fix: streamout

This commit is contained in:
Samuliak 2024-08-19 13:07:30 +02:00
parent 9aa72e6295
commit e2f66b8aa3
3 changed files with 27 additions and 13 deletions

View File

@ -3284,8 +3284,6 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La
continue;
uint32 u32Offset = streamWrite->exportArrayBase + i;
// HACK: disable streamout temporarily, since it causes GPU hangs
continue;
src->addFmt("sb[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset);
src->add(" = ");
@ -3399,8 +3397,6 @@ static void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, La
continue;
uint32 u32Offset = cfInstruction->exportArrayBase + i;
// HACK: disable streamout temporarily, since it causes GPU hangs
continue;
src->addFmt("sb[sbBase{} + {}]", streamoutBufferIndex, u32Offset);
src->add(" = ");

View File

@ -634,7 +634,9 @@ void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32
void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size)
{
debug_printf("MetalRenderer::bufferCache_copyStreamoutToMainBuffer not implemented\n");
auto blitCommandEncoder = GetBlitCommandEncoder();
blitCommandEncoder->copyFromBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
}
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
@ -669,17 +671,18 @@ RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, ui
void MetalRenderer::streamout_setupXfbBuffer(uint32 bufferIndex, sint32 ringBufferOffset, uint32 rangeAddr, uint32 rangeSize)
{
debug_printf("MetalRenderer::streamout_setupXfbBuffer not implemented\n");
m_state.m_streamoutState.buffers[bufferIndex].enabled = true;
m_state.m_streamoutState.buffers[bufferIndex].ringBufferOffset = ringBufferOffset;
}
void MetalRenderer::streamout_begin()
{
debug_printf("MetalRenderer::streamout_begin not implemented\n");
// Do nothing
}
void MetalRenderer::streamout_rendererFinishDrawcall()
{
debug_printf("MetalRenderer::streamout_rendererFinishDrawcall not implemented\n");
// Do nothing
}
void MetalRenderer::draw_beginSequence()
@ -966,6 +969,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
encoderState.m_renderPipelineState = renderPipelineState;
}
// Prepare streamout
m_state.m_streamoutState.verticesPerInstance = count;
LatteStreamout_PrepareDrawcall(count, instanceCount);
// Uniform buffers, textures and samplers
BindStageResources(renderCommandEncoder, vertexShader);
BindStageResources(renderCommandEncoder, pixelShader);
@ -981,6 +988,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
renderCommandEncoder->drawPrimitives(mtlPrimitiveType, baseVertex, count, instanceCount, baseInstance);
}
LatteStreamout_FinishDrawcall(false);
LatteGPUState.drawCallCounter++;
}
@ -1498,20 +1507,17 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
{
LatteMRT::GetCurrentFragCoordScale(GET_UNIFORM_DATA_PTR(shader->uniform.loc_fragCoordScale));
}
// TODO: uncomment
/*
if (shader->uniform.loc_verticesPerInstance >= 0)
{
*(int*)(supportBufferData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance;
*(int*)(supportBufferData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_state.m_streamoutState.verticesPerInstance;
for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++)
{
if (shader->uniform.loc_streamoutBufferBase[b] >= 0)
{
*(uint32*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_streamoutBufferBase[b]) = m_streamoutState.buffer[b].ringBufferOffset;
*(uint32*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_streamoutBufferBase[b]) = m_state.m_streamoutState.buffers[b].ringBufferOffset;
}
}
}
*/
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
size_t size = shader->uniform.uniformRangeSize;

View File

@ -80,6 +80,16 @@ struct MetalEncoderState
size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
};
struct MetalStreamoutState
{
struct
{
bool enabled;
uint32 ringBufferOffset;
} buffers[LATTE_NUM_STREAMOUT_BUFFER];
sint32 verticesPerInstance;
};
struct MetalState
{
MetalEncoderState m_encoderState{};
@ -99,6 +109,8 @@ struct MetalState
MTL::Viewport m_viewport;
MTL::ScissorRect m_scissor;
MetalStreamoutState m_streamoutState;
};
struct MetalCommandBuffer