mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-01-12 09:59:11 +01:00
Latte: Small refactor for shader decompiler
Latte: Small refactor for shader decompiler
This commit is contained in:
parent
4a564e2447
commit
f8ea594447
@ -549,7 +549,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
|
||||
// hash stride for streamout buffers
|
||||
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
||||
{
|
||||
if(!vertexShader->streamoutBufferWriteMask2[i])
|
||||
if(!vertexShader->streamoutBufferWriteMask[i])
|
||||
continue;
|
||||
uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4];
|
||||
auxHash = std::rotl<uint64>(auxHash, 7);
|
||||
@ -617,7 +617,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
||||
// copy texture info
|
||||
shader->textureUnitMask2 = decompilerOutput.textureUnitMask;
|
||||
// copy streamout info
|
||||
shader->streamoutBufferWriteMask2 = decompilerOutput.streamoutBufferWriteMask;
|
||||
shader->streamoutBufferWriteMask = decompilerOutput.streamoutBufferWriteMask;
|
||||
shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any();
|
||||
// copy uniform offsets
|
||||
// for OpenGL these are retrieved in _prepareSeparableUniforms()
|
||||
@ -746,9 +746,7 @@ LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash,
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader);
|
||||
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteFetchShader* fetchShaderList[1];
|
||||
fetchShaderList[0] = fetchShader;
|
||||
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), options, &decompilerOutput);
|
||||
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShader, options, &decompilerOutput);
|
||||
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||
vsAuxHash = vertexShader->auxHash;
|
||||
if (vertexShader->hasError == false)
|
||||
@ -779,9 +777,7 @@ LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHas
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true);
|
||||
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteFetchShader* fetchShaderList[1];
|
||||
fetchShaderList[0] = _activeFetchShader;
|
||||
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, options, &decompilerOutput);
|
||||
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, _activeVertexShader->ringParameterCount, options, &decompilerOutput);
|
||||
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||
if (geometryShader->hasError == false)
|
||||
{
|
||||
@ -810,7 +806,7 @@ LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash,
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader);
|
||||
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), options, &decompilerOutput);
|
||||
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, options, &decompilerOutput);
|
||||
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||
psAuxHash = pixelShader->auxHash;
|
||||
LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader);
|
||||
|
@ -651,15 +651,12 @@ bool LatteShaderCache_readSeparableVertexShader(MemStreamReader& streamReader, u
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader);
|
||||
// decompile vertex shader
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteFetchShader* fetchShaderList[1];
|
||||
fetchShaderList[0] = fetchShader;
|
||||
LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShaderList, 1, lcr->GetSpecialStateValues(), options, &decompilerOutput);
|
||||
LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShader, options, &decompilerOutput);
|
||||
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||
// compile
|
||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, vertexShader);
|
||||
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_VERTEX, vertexShaderData.data(), vertexShaderData.size());
|
||||
LatteShaderCache_loadOrCompileSeparableShader(vertexShader, shaderBaseHash, shaderAuxHash);
|
||||
catchOpenGLError();
|
||||
LatteSHRC_RegisterShader(vertexShader, shaderBaseHash, shaderAuxHash);
|
||||
return true;
|
||||
}
|
||||
@ -696,7 +693,7 @@ bool LatteShaderCache_readSeparableGeometryShader(MemStreamReader& streamReader,
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true);
|
||||
// decompile geometry shader
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), lcr->GetSpecialStateValues(), vsRingParameterCount, options, &decompilerOutput);
|
||||
LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), vsRingParameterCount, options, &decompilerOutput);
|
||||
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||
// compile
|
||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, geometryShader);
|
||||
@ -734,13 +731,12 @@ bool LatteShaderCache_readSeparablePixelShader(MemStreamReader& streamReader, ui
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader);
|
||||
// decompile pixel shader
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), lcr->GetSpecialStateValues(), options, &decompilerOutput);
|
||||
LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), options, &decompilerOutput);
|
||||
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||
// compile
|
||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, pixelShader);
|
||||
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_PIXEL, pixelShaderData.data(), pixelShaderData.size());
|
||||
LatteShaderCache_loadOrCompileSeparableShader(pixelShader, shaderBaseHash, shaderAuxHash);
|
||||
catchOpenGLError();
|
||||
LatteSHRC_RegisterShader(pixelShader, shaderBaseHash, shaderAuxHash);
|
||||
return true;
|
||||
}
|
||||
|
@ -101,16 +101,16 @@ void LatteStreamout_PrepareDrawcall(uint32 count, uint32 instanceCount)
|
||||
if (geometryShader)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
cemu_assert_debug(vertexShader->streamoutBufferWriteMask2.any() == false);
|
||||
cemu_assert_debug(vertexShader->streamoutBufferWriteMask.any() == false);
|
||||
#endif
|
||||
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
||||
if (geometryShader->streamoutBufferWriteMask2[i])
|
||||
if (geometryShader->streamoutBufferWriteMask[i])
|
||||
streamoutWriteMask |= (1 << i);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
||||
if (vertexShader->streamoutBufferWriteMask2[i])
|
||||
if (vertexShader->streamoutBufferWriteMask[i])
|
||||
streamoutWriteMask |= (1 << i);
|
||||
}
|
||||
activeStreamoutOperation.streamoutWriteMask = streamoutWriteMask;
|
||||
|
@ -1071,34 +1071,24 @@ void LatteDecompiler_InitContext(LatteDecompilerShaderContext& dCtx, const Latte
|
||||
dCtx.output = output;
|
||||
dCtx.shaderType = shaderType;
|
||||
dCtx.options = &options;
|
||||
output->shaderType = shaderType;
|
||||
dCtx.shaderBaseHash = shaderBaseHash;
|
||||
dCtx.contextRegisters = contextRegisters;
|
||||
dCtx.contextRegistersNew = (LatteContextRegister*)contextRegisters;
|
||||
|
||||
// set context parameters (redundant stuff since options can be accessed directly)
|
||||
dCtx.usesGeometryShader = options.usesGeometryShader;
|
||||
dCtx.useTFViaSSBO = options.useTFViaSSBO;
|
||||
output->shaderType = shaderType;
|
||||
}
|
||||
|
||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader* fetchShader, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
{
|
||||
cemu_assert_debug(fetchShader);
|
||||
cemu_assert_debug((programSize & 3) == 0);
|
||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||
// prepare decompiler context
|
||||
LatteDecompilerShaderContext shaderContext = { 0 };
|
||||
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Vertex, shaderBaseHash, contextRegisters);
|
||||
cemu_assert_debug(fetchShaderCount == 1);
|
||||
for (sint32 i = 0; i < fetchShaderCount; i++)
|
||||
{
|
||||
shaderContext.fetchShaderList[i] = fetchShaderList[i];
|
||||
}
|
||||
shaderContext.fetchShaderCount = fetchShaderCount;
|
||||
shaderContext.fetchShader = fetchShader;
|
||||
// prepare shader (deprecated)
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
||||
shader->shaderType = LatteConst::ShaderType::Vertex;
|
||||
shader->compatibleFetchShader = shaderContext.fetchShaderList[0];
|
||||
shaderContext.shaderType = LatteConst::ShaderType::Vertex;
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Vertex);
|
||||
shader->compatibleFetchShader = shaderContext.fetchShader;
|
||||
output->shaderType = LatteConst::ShaderType::Vertex;
|
||||
shaderContext.shader = shader;
|
||||
output->shader = shader;
|
||||
@ -1112,20 +1102,16 @@ void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contex
|
||||
performanceMonitor.gpuTime_shaderCreate.endMeasuring();
|
||||
}
|
||||
|
||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
{
|
||||
cemu_assert_debug((programSize & 3) == 0);
|
||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||
// prepare decompiler context
|
||||
LatteDecompilerShaderContext shaderContext = { 0 };
|
||||
shaderContext.fetchShaderCount = 0;
|
||||
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Geometry, shaderBaseHash, contextRegisters);
|
||||
// prepare shader
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
||||
shaderContext.output = output;
|
||||
shader->shaderType = LatteConst::ShaderType::Geometry;
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Geometry);
|
||||
shader->ringParameterCountFromPrevStage = vsRingParameterCount;
|
||||
shaderContext.shaderType = LatteConst::ShaderType::Geometry;
|
||||
output->shaderType = LatteConst::ShaderType::Geometry;
|
||||
shaderContext.shader = shader;
|
||||
output->shader = shader;
|
||||
@ -1147,7 +1133,7 @@ void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* cont
|
||||
performanceMonitor.gpuTime_shaderCreate.endMeasuring();
|
||||
}
|
||||
|
||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
{
|
||||
cemu_assert_debug((programSize & 3) == 0);
|
||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||
@ -1156,10 +1142,7 @@ void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* context
|
||||
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Pixel, shaderBaseHash, contextRegisters);
|
||||
shaderContext.contextRegisters = contextRegisters;
|
||||
// prepare shader
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
||||
shaderContext.output = output;
|
||||
shader->shaderType = LatteConst::ShaderType::Pixel;
|
||||
shaderContext.shaderType = LatteConst::ShaderType::Pixel;
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Pixel);
|
||||
output->shaderType = LatteConst::ShaderType::Pixel;
|
||||
shaderContext.shader = shader;
|
||||
output->shader = shader;
|
||||
|
@ -148,6 +148,8 @@ struct LatteDecompilerShaderResourceMapping
|
||||
|
||||
struct LatteDecompilerShader
|
||||
{
|
||||
LatteDecompilerShader(LatteConst::ShaderType shaderType) : shaderType(shaderType) {}
|
||||
|
||||
LatteDecompilerShader* next;
|
||||
LatteConst::ShaderType shaderType;
|
||||
uint64 baseHash;
|
||||
@ -167,21 +169,21 @@ struct LatteDecompilerShader
|
||||
Latte::E_DIM textureUnitDim[LATTE_NUM_MAX_TEX_UNITS]; // dimension of texture unit, from the currently set texture
|
||||
bool textureIsIntegerFormat[LATTE_NUM_MAX_TEX_UNITS]{};
|
||||
// analyzer stage (uniforms)
|
||||
uint8 uniformMode; // determines how uniforms are managed within the shader (see GPU7_DECOMPILER_UNIFORM_MODE_* constants)
|
||||
uint8 uniformMode; // determines how uniforms are managed within the shader (see LATTE_DECOMPILER_UNIFORM_MODE_* constants)
|
||||
uint64 uniformDataHash64[2]; // used to avoid redundant calls to glUniform*
|
||||
std::vector<LatteDecompilerRemappedUniformEntry_t> list_remappedUniformEntries;
|
||||
// analyzer stage (textures)
|
||||
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
|
||||
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]; // GPU7_SAMPLER_NONE means undefined
|
||||
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
|
||||
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS];
|
||||
|
||||
// analyzer stage (pixel outputs)
|
||||
uint32 pixelColorOutputMask; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments, may differ from export index inside the pixel shader)
|
||||
uint32 pixelColorOutputMask; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)
|
||||
// analyzer stage (geometry shader parameters/inputs)
|
||||
uint32 ringParameterCount;
|
||||
uint32 ringParameterCountFromPrevStage; // used in geometry shader to hold VS ringParameterCount
|
||||
// analyzer stage (misc)
|
||||
std::bitset<LATTE_NUM_STREAMOUT_BUFFER> streamoutBufferWriteMask2;
|
||||
std::bitset<LATTE_NUM_STREAMOUT_BUFFER> streamoutBufferWriteMask;
|
||||
bool hasStreamoutBufferWrite;
|
||||
// output code
|
||||
class StringBuf* strBuf_shaderSource{nullptr};
|
||||
@ -283,9 +285,9 @@ struct LatteDecompilerOutput_t
|
||||
|
||||
struct LatteDecompilerSubroutineInfo;
|
||||
|
||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader* fetchShader, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
|
||||
// specialized shader parsers
|
||||
|
||||
|
@ -630,7 +630,7 @@ namespace LatteDecompiler
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry && decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
|
||||
decompilerContext->hasUniformVarBlock = true; // uf_pointSize
|
||||
if (decompilerContext->analyzer.useSSBOForStreamout &&
|
||||
(decompilerContext->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) ||
|
||||
(decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||
(decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
|
||||
{
|
||||
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase*
|
||||
@ -735,7 +735,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
||||
// analyze render state
|
||||
shaderContext->analyzer.isPointsPrimitive = shaderContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE() == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS;
|
||||
shaderContext->analyzer.hasStreamoutEnable = shaderContext->contextRegisters[mmVGT_STRMOUT_EN] != 0; // set if the shader is used for transform feedback operations
|
||||
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex && shaderContext->usesGeometryShader == false)
|
||||
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader)
|
||||
shaderContext->analyzer.outputPointSize = shaderContext->analyzer.isPointsPrimitive;
|
||||
else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
@ -746,10 +746,9 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
||||
// analyze input attributes for vertex/geometry shader
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
for (sint32 f = 0; f < shaderContext->fetchShaderCount; f++)
|
||||
if(shaderContext->fetchShader)
|
||||
{
|
||||
LatteFetchShader* parsedFetchShader = (LatteFetchShader*)shaderContext->fetchShaderList[f];
|
||||
|
||||
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
|
||||
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
||||
{
|
||||
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
||||
@ -938,9 +937,9 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
||||
// analyze input attributes again (if shader has relative GPR read)
|
||||
if(shaderContext->analyzer.usesRelativeGPRRead && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry) )
|
||||
{
|
||||
for (sint32 f = 0; f < shaderContext->fetchShaderCount; f++)
|
||||
if(shaderContext->fetchShader)
|
||||
{
|
||||
LatteFetchShader* parsedFetchShader = (LatteFetchShader*)shaderContext->fetchShaderList[f];
|
||||
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
|
||||
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
||||
{
|
||||
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
||||
@ -1077,7 +1076,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
||||
if(list_subroutineAddrs.empty() == false)
|
||||
forceLogDebug_printf("Todo - analyze shader subroutine CF stack");
|
||||
// TF mode
|
||||
if (shaderContext->useTFViaSSBO && shaderContext->output->streamoutBufferWriteMask.any())
|
||||
if (shaderContext->options->useTFViaSSBO && shaderContext->output->streamoutBufferWriteMask.any())
|
||||
{
|
||||
shaderContext->analyzer.useSSBOForStreamout = true;
|
||||
}
|
||||
|
@ -3363,7 +3363,7 @@ void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDeco
|
||||
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
|
||||
continue;
|
||||
|
||||
if (shaderContext->useTFViaSSBO)
|
||||
if (shaderContext->options->useTFViaSSBO)
|
||||
{
|
||||
uint32 u32Offset = streamWrite->exportArrayBase + i;
|
||||
src->addFmt("sb_buffer[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset);
|
||||
@ -3483,7 +3483,7 @@ void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDeco
|
||||
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
|
||||
continue;
|
||||
|
||||
if (shaderContext->useTFViaSSBO)
|
||||
if (shaderContext->options->useTFViaSSBO)
|
||||
{
|
||||
uint32 u32Offset = cfInstruction->exportArrayBase + i;
|
||||
src->addFmt("sb_buffer[sbBase{} + {}]", streamoutBufferIndex, u32Offset);
|
||||
@ -3831,7 +3831,7 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade
|
||||
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works
|
||||
|
||||
if( LatteGPUState.glVendor == GLVENDOR_NVIDIA && !ActiveSettings::DumpShadersEnabled())
|
||||
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage
|
||||
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage (OpenGL)
|
||||
else
|
||||
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" _CRLF);
|
||||
}
|
||||
@ -3841,51 +3841,6 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade
|
||||
}
|
||||
}
|
||||
|
||||
void _addPixelShaderExtraDebugInfo(LatteDecompilerShaderContext* shaderContext, StringBuf* fCStr_shaderSource)
|
||||
{
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
fCStr_shaderSource->add("// Color buffers:" _CRLF);
|
||||
for(uint32 i=0; i<8; i++)
|
||||
{
|
||||
uint32 regColorBuffer = shaderContext->contextRegisters[mmCB_COLOR0_BASE+i];
|
||||
uint32 regColorSize = shaderContext->contextRegisters[mmCB_COLOR0_SIZE+i];
|
||||
uint32 regColorInfo = shaderContext->contextRegisters[mmCB_COLOR0_INFO+i];
|
||||
uint32 regColorView = shaderContext->contextRegisters[mmCB_COLOR0_VIEW+i];
|
||||
MPTR colorBufferPhysMem = regColorBuffer;
|
||||
if( regColorBuffer == MPTR_NULL )
|
||||
continue;
|
||||
|
||||
uint32 colorBufferFormat = (regColorInfo>>2)&0x3F; // format
|
||||
uint32 colorBufferTileMode = 0;
|
||||
colorBufferTileMode = (regColorInfo >> 8) & 0xF;
|
||||
switch ( (regColorInfo >> 12) & 7 )
|
||||
{
|
||||
case 4:
|
||||
colorBufferFormat |= 0x100;
|
||||
break;
|
||||
case 1:
|
||||
colorBufferFormat |= 0x200;
|
||||
break;
|
||||
case 5:
|
||||
colorBufferFormat |= 0x300;
|
||||
break;
|
||||
case 6:
|
||||
colorBufferFormat |= 0x400;
|
||||
break;
|
||||
case 7:
|
||||
colorBufferFormat |= 0x800;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
uint32 colorBufferWidth = (regColorSize>>0)&0xFFFF;
|
||||
uint32 colorBufferHeight = (regColorSize>>16)&0xFFFF;
|
||||
fCStr_shaderSource->addFmt("// Color{}: {}x{} at 0x{:08x} fmt {:04x} tm {}" _CRLF, i, colorBufferWidth, colorBufferHeight, colorBufferPhysMem, colorBufferFormat, colorBufferTileMode);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp"
|
||||
|
||||
void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* shaderContext, LatteParsedFetchShaderAttribute_t& attrib)
|
||||
@ -3954,18 +3909,14 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
src->add("#extension GL_ARB_texture_gather : enable" _CRLF);
|
||||
src->add("#extension GL_ARB_separate_shader_objects : enable" _CRLF);
|
||||
|
||||
if (shaderContext->analyzer.hasStreamoutWrite || shaderContext->usesGeometryShader )
|
||||
if (shaderContext->analyzer.hasStreamoutWrite || shaderContext->options->usesGeometryShader )
|
||||
src->add("#extension GL_ARB_enhanced_layouts : enable" _CRLF);
|
||||
|
||||
// debug info
|
||||
src->addFmt("// shader %08x%08x" _CRLF, (uint32)(shaderContext->shaderBaseHash >> 32), (uint32)(shaderContext->shaderBaseHash & 0xFFFFFFFF));
|
||||
src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false");
|
||||
src->addFmt(_CRLF);
|
||||
|
||||
if( shader->shaderType == LatteConst::ShaderType::Pixel )
|
||||
_addPixelShaderExtraDebugInfo(shaderContext, src);
|
||||
|
||||
#endif
|
||||
// header part (definitions for inputs and outputs)
|
||||
LatteDecompiler::emitHeader(shaderContext);
|
||||
@ -3982,7 +3933,6 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
{
|
||||
if (shaderContext->analyzer.usesRelativeGPRRead || (shaderContext->analyzer.gprUseMask[i / 8] & (1 << (i & 7))) != 0)
|
||||
{
|
||||
//fCStr_appendFormatted(fCStr_shaderSource, "ivec4 R{}i, R{}i, R{}i, R{}i;" STR_LINEBREAK, i*4+0, i*4+1, i*4+2, i*4+3);
|
||||
if (shaderContext->typeTracker.genIntReg)
|
||||
src->addFmt("ivec4 R{}i = ivec4(0);" _CRLF, i);
|
||||
else if (shaderContext->typeTracker.genFloatReg)
|
||||
@ -4035,7 +3985,6 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
src->add("bool predResult = true;" _CRLF);
|
||||
if(shaderContext->analyzer.modifiesPixelActiveState )
|
||||
{
|
||||
// cemu_assert_debug(shaderContext->analyzer.activeStackMaxDepth == 0);
|
||||
src->addFmt("bool activeMaskStack[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+1);
|
||||
src->addFmt("bool activeMaskStackC[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+2);
|
||||
for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth; i++)
|
||||
@ -4058,8 +4007,11 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
}
|
||||
}
|
||||
// helper variables for cube maps (todo: Only emit when used)
|
||||
src->addFmt("vec3 cubeMapSTM;" _CRLF);
|
||||
src->addFmt("int cubeMapFaceId;" _CRLF);
|
||||
if (shaderContext->analyzer.hasRedcCUBE)
|
||||
{
|
||||
src->add("vec3 cubeMapSTM;" _CRLF);
|
||||
src->add("int cubeMapFaceId;" _CRLF);
|
||||
}
|
||||
for(sint32 i=0; i<LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||
{
|
||||
if(!shaderContext->output->textureUnitMask[i])
|
||||
@ -4106,25 +4058,18 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
||||
if (shaderContext->fetchShaderCount == 1)
|
||||
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
|
||||
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
||||
{
|
||||
LatteFetchShader* parsedFetchShader = shaderContext->fetchShaderList[0];
|
||||
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
||||
{
|
||||
for(sint32 i=0; i<bufferGroup.attribCount; i++)
|
||||
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
||||
}
|
||||
for (auto& bufferGroup : parsedFetchShader->bufferGroupsInvalid)
|
||||
{
|
||||
// these attributes point to non-existent buffers
|
||||
// todo - figure out how the hardware actually handles this, currently we assume the input values are zero
|
||||
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
||||
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
||||
}
|
||||
for(sint32 i=0; i<bufferGroup.attribCount; i++)
|
||||
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
||||
}
|
||||
else
|
||||
for (auto& bufferGroup : parsedFetchShader->bufferGroupsInvalid)
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
// these attributes point to non-existent buffers
|
||||
// todo - figure out how the hardware actually handles this, currently we assume the input values are zero
|
||||
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
||||
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
||||
}
|
||||
}
|
||||
else if (shader->shaderType == LatteConst::ShaderType::Pixel)
|
||||
@ -4172,7 +4117,7 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (shaderContext->usesGeometryShader)
|
||||
if (shaderContext->options->usesGeometryShader)
|
||||
{
|
||||
// import from geometry shader
|
||||
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
||||
@ -4216,7 +4161,7 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
// vertex shader should write renderstate point size at the end if required but not modified by shader
|
||||
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
|
||||
{
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->usesGeometryShader == false)
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false)
|
||||
src->add("gl_PointSize = uf_pointSize;" _CRLF);
|
||||
}
|
||||
// end of shader main
|
||||
|
@ -95,7 +95,7 @@ namespace LatteDecompiler
|
||||
}
|
||||
if (decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
|
||||
{
|
||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->usesGeometryShader) ||
|
||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||
decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
|
||||
@ -135,7 +135,7 @@ namespace LatteDecompiler
|
||||
}
|
||||
// define uf_verticesPerInstance + uf_streamoutBufferBaseX
|
||||
if (decompilerContext->analyzer.useSSBOForStreamout &&
|
||||
(shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) ||
|
||||
(shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) ||
|
||||
(shader->shaderType == LatteConst::ShaderType::Geometry) )
|
||||
{
|
||||
shaderSrc->add("uniform int uf_verticesPerInstance;" _CRLF);
|
||||
@ -298,7 +298,7 @@ namespace LatteDecompiler
|
||||
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
if (decompilerContext->usesGeometryShader)
|
||||
if (decompilerContext->options->usesGeometryShader)
|
||||
src->add("#define V2G_LAYOUT layout(location = 0)" _CRLF);
|
||||
}
|
||||
}
|
||||
@ -322,7 +322,7 @@ namespace LatteDecompiler
|
||||
src->add("#define XFB_BLOCK_LAYOUT(__bufferIndex, __stride, __location) layout(xfb_buffer = __bufferIndex, xfb_stride = __stride)" _CRLF);
|
||||
|
||||
src->add("#define SET_POSITION(_v) gl_Position = _v\r\n");
|
||||
if (decompilerContext->usesGeometryShader)
|
||||
if (decompilerContext->options->usesGeometryShader)
|
||||
src->add("#define V2G_LAYOUT" _CRLF);
|
||||
}
|
||||
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
|
||||
@ -430,7 +430,7 @@ namespace LatteDecompiler
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
// per-vertex output (VS or GS)
|
||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) ||
|
||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||
(decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
|
||||
{
|
||||
src->add("out gl_PerVertex" _CRLF);
|
||||
@ -441,7 +441,7 @@ namespace LatteDecompiler
|
||||
src->add("};" _CRLF);
|
||||
}
|
||||
// varyings (variables passed from vertex to pixel shader, only if geometry stage is disabled
|
||||
if (decompilerContext->usesGeometryShader == false)
|
||||
if (decompilerContext->options->usesGeometryShader == false)
|
||||
{
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
@ -537,7 +537,7 @@ namespace LatteDecompiler
|
||||
// streamout buffer (transform feedback)
|
||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) && decompilerContext->analyzer.hasStreamoutEnable)
|
||||
{
|
||||
if (decompilerContext->useTFViaSSBO)
|
||||
if (decompilerContext->options->useTFViaSSBO)
|
||||
{
|
||||
if (decompilerContext->analyzer.useSSBOForStreamout && decompilerContext->analyzer.hasStreamoutWrite)
|
||||
{
|
||||
|
@ -150,11 +150,10 @@ struct LatteDecompilerShaderContext
|
||||
uint32* contextRegisters; // deprecated
|
||||
struct LatteContextRegister* contextRegistersNew;
|
||||
uint64 shaderBaseHash;
|
||||
StringBuf* shaderSource; // move to output struct
|
||||
StringBuf* shaderSource;
|
||||
std::vector<LatteDecompilerCFInstruction> cfInstructions;
|
||||
// fetch shader (required for vertex shader)
|
||||
LatteFetchShader* fetchShaderList[32];
|
||||
sint32 fetchShaderCount;
|
||||
LatteFetchShader* fetchShader{};
|
||||
// geometry copy shader (only present when geometry shader is active)
|
||||
LatteParsedGSCopyShader* parsedGSCopyShader;
|
||||
// state
|
||||
@ -217,8 +216,6 @@ struct LatteDecompilerShaderContext
|
||||
sint32 currentBindingPointVK{};
|
||||
|
||||
// misc
|
||||
bool usesGeometryShader; // for VS
|
||||
bool useTFViaSSBO;
|
||||
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user