mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-22 17:19:18 +01:00
Merge pull request #682 from Exzap/shader-improvements-2
Utilize VK_KHR_SHADER_FLOAT_CONTROLS if available
This commit is contained in:
commit
3acdd47eaf
@ -1,20 +1,25 @@
|
|||||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||||
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
|
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
|
||||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||||
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
|
|
||||||
#include "Cafe/HW/Latte/ISA/LatteReg.h"
|
#include "Cafe/HW/Latte/ISA/LatteReg.h"
|
||||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
||||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||||
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||||
|
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
|
||||||
#include "Cafe/GraphicPack/GraphicPack2.h"
|
#include "Cafe/GraphicPack/GraphicPack2.h"
|
||||||
#include "util/helpers/StringParser.h"
|
#include "util/helpers/StringParser.h"
|
||||||
#include "config/ActiveSettings.h"
|
#include "config/ActiveSettings.h"
|
||||||
#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h"
|
|
||||||
#include "util/Zir/Core/ZpIRDebug.h"
|
|
||||||
#include "util/containers/flat_hash_map.hpp"
|
#include "util/containers/flat_hash_map.hpp"
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
|
|
||||||
|
// experimental new decompiler (WIP)
|
||||||
|
#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h"
|
||||||
|
#include "util/Zir/Core/ZpIRDebug.h"
|
||||||
|
#include "Cafe/HW/Latte/Transcompiler/LatteTC.h"
|
||||||
|
#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h"
|
||||||
|
|
||||||
struct _ShaderHashCache
|
struct _ShaderHashCache
|
||||||
{
|
{
|
||||||
uint64 prevHash1;
|
uint64 prevHash1;
|
||||||
@ -544,7 +549,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
|
|||||||
// hash stride for streamout buffers
|
// hash stride for streamout buffers
|
||||||
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
||||||
{
|
{
|
||||||
if(!vertexShader->streamoutBufferWriteMask2[i])
|
if(!vertexShader->streamoutBufferWriteMask[i])
|
||||||
continue;
|
continue;
|
||||||
uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4];
|
uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4];
|
||||||
auxHash = std::rotl<uint64>(auxHash, 7);
|
auxHash = std::rotl<uint64>(auxHash, 7);
|
||||||
@ -612,7 +617,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
|||||||
// copy texture info
|
// copy texture info
|
||||||
shader->textureUnitMask2 = decompilerOutput.textureUnitMask;
|
shader->textureUnitMask2 = decompilerOutput.textureUnitMask;
|
||||||
// copy streamout info
|
// copy streamout info
|
||||||
shader->streamoutBufferWriteMask2 = decompilerOutput.streamoutBufferWriteMask;
|
shader->streamoutBufferWriteMask = decompilerOutput.streamoutBufferWriteMask;
|
||||||
shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any();
|
shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any();
|
||||||
// copy uniform offsets
|
// copy uniform offsets
|
||||||
// for OpenGL these are retrieved in _prepareSeparableUniforms()
|
// for OpenGL these are retrieved in _prepareSeparableUniforms()
|
||||||
@ -672,10 +677,18 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
|||||||
return shader;
|
return shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
#include "Cafe/HW/Latte/Transcompiler/LatteTC.h"
|
void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled)
|
||||||
#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h"
|
{
|
||||||
|
options.usesGeometryShader = geometryShaderEnabled;
|
||||||
|
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false;
|
||||||
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||||
|
{
|
||||||
|
options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO();
|
||||||
|
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
|
LatteDecompilerShader* LatteShader_CompileSeparableVertexShader2(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
|
||||||
{
|
{
|
||||||
/* Analyze shader to gather general information about inputs/outputs */
|
/* Analyze shader to gather general information about inputs/outputs */
|
||||||
Latte::ShaderDescription shaderDescription;
|
Latte::ShaderDescription shaderDescription;
|
||||||
@ -725,14 +738,15 @@ LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash,
|
|||||||
// compile new vertex shader (relies partially on current state)
|
// compile new vertex shader (relies partially on current state)
|
||||||
LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
|
LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
|
||||||
{
|
{
|
||||||
// new decompiler
|
// new decompiler test
|
||||||
//LatteShader_compileSeparableVertexShader(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader);
|
//LatteShader_CompileSeparableVertexShader2(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader);
|
||||||
|
|
||||||
// legacy decompiler
|
// legacy decompiler
|
||||||
|
LatteDecompilerOptions options;
|
||||||
|
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader);
|
||||||
|
|
||||||
LatteDecompilerOutput_t decompilerOutput{};
|
LatteDecompilerOutput_t decompilerOutput{};
|
||||||
LatteFetchShader* fetchShaderList[1];
|
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShader, options, &decompilerOutput);
|
||||||
fetchShaderList[0] = fetchShader;
|
|
||||||
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
|
|
||||||
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||||
vsAuxHash = vertexShader->auxHash;
|
vsAuxHash = vertexShader->auxHash;
|
||||||
if (vertexShader->hasError == false)
|
if (vertexShader->hasError == false)
|
||||||
@ -759,10 +773,11 @@ LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash,
|
|||||||
|
|
||||||
LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHash, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize)
|
LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHash, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize)
|
||||||
{
|
{
|
||||||
|
LatteDecompilerOptions options;
|
||||||
|
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true);
|
||||||
|
|
||||||
LatteDecompilerOutput_t decompilerOutput{};
|
LatteDecompilerOutput_t decompilerOutput{};
|
||||||
LatteFetchShader* fetchShaderList[1];
|
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, _activeVertexShader->ringParameterCount, options, &decompilerOutput);
|
||||||
fetchShaderList[0] = _activeFetchShader;
|
|
||||||
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, &decompilerOutput);
|
|
||||||
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||||
if (geometryShader->hasError == false)
|
if (geometryShader->hasError == false)
|
||||||
{
|
{
|
||||||
@ -787,8 +802,11 @@ LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHas
|
|||||||
|
|
||||||
LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, uint64& psAuxHash, uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader)
|
LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, uint64& psAuxHash, uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader)
|
||||||
{
|
{
|
||||||
|
LatteDecompilerOptions options;
|
||||||
|
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader);
|
||||||
|
|
||||||
LatteDecompilerOutput_t decompilerOutput{};
|
LatteDecompilerOutput_t decompilerOutput{};
|
||||||
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
|
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, options, &decompilerOutput);
|
||||||
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||||
psAuxHash = pixelShader->auxHash;
|
psAuxHash = pixelShader->auxHash;
|
||||||
LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader);
|
LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader);
|
||||||
|
@ -94,6 +94,7 @@ extern uint64 _shaderBaseHash_vs;
|
|||||||
extern uint64 _shaderBaseHash_gs;
|
extern uint64 _shaderBaseHash_gs;
|
||||||
extern uint64 _shaderBaseHash_ps;
|
extern uint64 _shaderBaseHash_ps;
|
||||||
|
|
||||||
|
void LatteShader_GetDecompilerOptions(struct LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled);
|
||||||
LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompilerOutput_t& decompilerOutput, uint64 baseHash, bool calculateAuxHash, uint64 optionalAuxHash, uint32* contextRegister);
|
LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompilerOutput_t& decompilerOutput, uint64 baseHash, bool calculateAuxHash, uint64 optionalAuxHash, uint32* contextRegister);
|
||||||
|
|
||||||
void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync);
|
void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync);
|
||||||
|
@ -641,22 +641,22 @@ bool LatteShaderCache_readSeparableVertexShader(MemStreamReader& streamReader, u
|
|||||||
return false;
|
return false;
|
||||||
if (streamReader.hasError() || !streamReader.isEndOfStream())
|
if (streamReader.hasError() || !streamReader.isEndOfStream())
|
||||||
return false;
|
return false;
|
||||||
// update PS inputs (influence VS shader outputs)
|
// update PS inputs (affects VS shader outputs)
|
||||||
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
||||||
// get fetch shader
|
// get fetch shader
|
||||||
LatteFetchShader::CacheHash fsHash = LatteFetchShader::CalculateCacheHash((uint32*)fetchShaderData.data(), fetchShaderData.size());
|
LatteFetchShader::CacheHash fsHash = LatteFetchShader::CalculateCacheHash((uint32*)fetchShaderData.data(), fetchShaderData.size());
|
||||||
LatteFetchShader* fetchShader = LatteShaderRecompiler_createFetchShader(fsHash, lcr->GetRawView(), (uint32*)fetchShaderData.data(), fetchShaderData.size());
|
LatteFetchShader* fetchShader = LatteShaderRecompiler_createFetchShader(fsHash, lcr->GetRawView(), (uint32*)fetchShaderData.data(), fetchShaderData.size());
|
||||||
|
// determine decompiler options
|
||||||
|
LatteDecompilerOptions options;
|
||||||
|
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader);
|
||||||
// decompile vertex shader
|
// decompile vertex shader
|
||||||
LatteDecompilerOutput_t decompilerOutput{};
|
LatteDecompilerOutput_t decompilerOutput{};
|
||||||
LatteFetchShader* fetchShaderList[1];
|
LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShader, options, &decompilerOutput);
|
||||||
fetchShaderList[0] = fetchShader;
|
|
||||||
LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShaderList, 1, lcr->GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
|
|
||||||
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||||
// compile
|
// compile
|
||||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, vertexShader);
|
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, vertexShader);
|
||||||
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_VERTEX, vertexShaderData.data(), vertexShaderData.size());
|
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_VERTEX, vertexShaderData.data(), vertexShaderData.size());
|
||||||
LatteShaderCache_loadOrCompileSeparableShader(vertexShader, shaderBaseHash, shaderAuxHash);
|
LatteShaderCache_loadOrCompileSeparableShader(vertexShader, shaderBaseHash, shaderAuxHash);
|
||||||
catchOpenGLError();
|
|
||||||
LatteSHRC_RegisterShader(vertexShader, shaderBaseHash, shaderAuxHash);
|
LatteSHRC_RegisterShader(vertexShader, shaderBaseHash, shaderAuxHash);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -688,15 +688,17 @@ bool LatteShaderCache_readSeparableGeometryShader(MemStreamReader& streamReader,
|
|||||||
return false;
|
return false;
|
||||||
// update PS inputs
|
// update PS inputs
|
||||||
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
||||||
|
// determine decompiler options
|
||||||
|
LatteDecompilerOptions options;
|
||||||
|
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true);
|
||||||
// decompile geometry shader
|
// decompile geometry shader
|
||||||
LatteDecompilerOutput_t decompilerOutput{};
|
LatteDecompilerOutput_t decompilerOutput{};
|
||||||
LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), lcr->GetSpecialStateValues(), vsRingParameterCount, &decompilerOutput);
|
LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), vsRingParameterCount, options, &decompilerOutput);
|
||||||
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||||
// compile
|
// compile
|
||||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, geometryShader);
|
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, geometryShader);
|
||||||
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_GEOMETRY, geometryShaderData.data(), geometryShaderData.size());
|
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_GEOMETRY, geometryShaderData.data(), geometryShaderData.size());
|
||||||
LatteShaderCache_loadOrCompileSeparableShader(geometryShader, shaderBaseHash, shaderAuxHash);
|
LatteShaderCache_loadOrCompileSeparableShader(geometryShader, shaderBaseHash, shaderAuxHash);
|
||||||
catchOpenGLError();
|
|
||||||
LatteSHRC_RegisterShader(geometryShader, shaderBaseHash, shaderAuxHash);
|
LatteSHRC_RegisterShader(geometryShader, shaderBaseHash, shaderAuxHash);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -724,15 +726,17 @@ bool LatteShaderCache_readSeparablePixelShader(MemStreamReader& streamReader, ui
|
|||||||
return false;
|
return false;
|
||||||
// update PS inputs
|
// update PS inputs
|
||||||
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
||||||
|
// determine decompiler options
|
||||||
|
LatteDecompilerOptions options;
|
||||||
|
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader);
|
||||||
// decompile pixel shader
|
// decompile pixel shader
|
||||||
LatteDecompilerOutput_t decompilerOutput{};
|
LatteDecompilerOutput_t decompilerOutput{};
|
||||||
LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), lcr->GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
|
LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), options, &decompilerOutput);
|
||||||
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||||
// compile
|
// compile
|
||||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, pixelShader);
|
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, pixelShader);
|
||||||
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_PIXEL, pixelShaderData.data(), pixelShaderData.size());
|
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_PIXEL, pixelShaderData.data(), pixelShaderData.size());
|
||||||
LatteShaderCache_loadOrCompileSeparableShader(pixelShader, shaderBaseHash, shaderAuxHash);
|
LatteShaderCache_loadOrCompileSeparableShader(pixelShader, shaderBaseHash, shaderAuxHash);
|
||||||
catchOpenGLError();
|
|
||||||
LatteSHRC_RegisterShader(pixelShader, shaderBaseHash, shaderAuxHash);
|
LatteSHRC_RegisterShader(pixelShader, shaderBaseHash, shaderAuxHash);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -101,16 +101,16 @@ void LatteStreamout_PrepareDrawcall(uint32 count, uint32 instanceCount)
|
|||||||
if (geometryShader)
|
if (geometryShader)
|
||||||
{
|
{
|
||||||
#ifdef CEMU_DEBUG_ASSERT
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
cemu_assert_debug(vertexShader->streamoutBufferWriteMask2.any() == false);
|
cemu_assert_debug(vertexShader->streamoutBufferWriteMask.any() == false);
|
||||||
#endif
|
#endif
|
||||||
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
||||||
if (geometryShader->streamoutBufferWriteMask2[i])
|
if (geometryShader->streamoutBufferWriteMask[i])
|
||||||
streamoutWriteMask |= (1 << i);
|
streamoutWriteMask |= (1 << i);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
||||||
if (vertexShader->streamoutBufferWriteMask2[i])
|
if (vertexShader->streamoutBufferWriteMask[i])
|
||||||
streamoutWriteMask |= (1 << i);
|
streamoutWriteMask |= (1 << i);
|
||||||
}
|
}
|
||||||
activeStreamoutOperation.streamoutWriteMask = streamoutWriteMask;
|
activeStreamoutOperation.streamoutWriteMask = streamoutWriteMask;
|
||||||
|
@ -1066,44 +1066,32 @@ void _LatteDecompiler_Process(LatteDecompilerShaderContext* shaderContext, uint8
|
|||||||
_LatteDecompiler_GenerateDataForFastAccess(shaderContext->shader);
|
_LatteDecompiler_GenerateDataForFastAccess(shaderContext->shader);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LatteDecompiler_InitContext(LatteDecompilerShaderContext& dCtx, LatteDecompilerOutput_t* output, LatteConst::ShaderType shaderType, uint64 shaderBaseHash, uint32* contextRegisters)
|
void LatteDecompiler_InitContext(LatteDecompilerShaderContext& dCtx, const LatteDecompilerOptions& options, LatteDecompilerOutput_t* output, LatteConst::ShaderType shaderType, uint64 shaderBaseHash, uint32* contextRegisters)
|
||||||
{
|
{
|
||||||
dCtx.output = output;
|
dCtx.output = output;
|
||||||
dCtx.shaderType = shaderType;
|
dCtx.shaderType = shaderType;
|
||||||
output->shaderType = shaderType;
|
dCtx.options = &options;
|
||||||
dCtx.shaderBaseHash = shaderBaseHash;
|
dCtx.shaderBaseHash = shaderBaseHash;
|
||||||
dCtx.contextRegisters = contextRegisters;
|
dCtx.contextRegisters = contextRegisters;
|
||||||
dCtx.contextRegistersNew = (LatteContextRegister*)contextRegisters;
|
dCtx.contextRegistersNew = (LatteContextRegister*)contextRegisters;
|
||||||
|
output->shaderType = shaderType;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output, bool useTFViaSSBO)
|
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader* fetchShader, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||||
{
|
{
|
||||||
|
cemu_assert_debug(fetchShader);
|
||||||
cemu_assert_debug((programSize & 3) == 0);
|
cemu_assert_debug((programSize & 3) == 0);
|
||||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||||
// prepare decompiler context
|
// prepare decompiler context
|
||||||
LatteDecompilerShaderContext shaderContext = { 0 };
|
LatteDecompilerShaderContext shaderContext = { 0 };
|
||||||
LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Vertex, shaderBaseHash, contextRegisters);
|
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Vertex, shaderBaseHash, contextRegisters);
|
||||||
cemu_assert_debug(fetchShaderCount == 1);
|
shaderContext.fetchShader = fetchShader;
|
||||||
for (sint32 i = 0; i < fetchShaderCount; i++)
|
|
||||||
{
|
|
||||||
shaderContext.fetchShaderList[i] = fetchShaderList[i];
|
|
||||||
}
|
|
||||||
shaderContext.fetchShaderCount = fetchShaderCount;
|
|
||||||
// ugly hack to get tf mode from Vulkan renderer
|
|
||||||
shaderContext.useTFViaSSBO = useTFViaSSBO;
|
|
||||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
|
||||||
{
|
|
||||||
shaderContext.useTFViaSSBO = VulkanRenderer::GetInstance()->useTFViaSSBO();
|
|
||||||
}
|
|
||||||
// prepare shader (deprecated)
|
// prepare shader (deprecated)
|
||||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Vertex);
|
||||||
shader->shaderType = LatteConst::ShaderType::Vertex;
|
shader->compatibleFetchShader = shaderContext.fetchShader;
|
||||||
shader->compatibleFetchShader = shaderContext.fetchShaderList[0];
|
|
||||||
shaderContext.shaderType = LatteConst::ShaderType::Vertex;
|
|
||||||
output->shaderType = LatteConst::ShaderType::Vertex;
|
output->shaderType = LatteConst::ShaderType::Vertex;
|
||||||
shaderContext.shader = shader;
|
shaderContext.shader = shader;
|
||||||
output->shader = shader;
|
output->shader = shader;
|
||||||
shaderContext.usesGeometryShader = usesGeometryShader;
|
|
||||||
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||||
{
|
{
|
||||||
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
||||||
@ -1114,24 +1102,19 @@ void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contex
|
|||||||
performanceMonitor.gpuTime_shaderCreate.endMeasuring();
|
performanceMonitor.gpuTime_shaderCreate.endMeasuring();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOutput_t* output, bool useTFViaSSBO)
|
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||||
{
|
{
|
||||||
cemu_assert_debug((programSize & 3) == 0);
|
cemu_assert_debug((programSize & 3) == 0);
|
||||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||||
// prepare decompiler context
|
// prepare decompiler context
|
||||||
LatteDecompilerShaderContext shaderContext = { 0 };
|
LatteDecompilerShaderContext shaderContext = { 0 };
|
||||||
shaderContext.fetchShaderCount = 0;
|
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Geometry, shaderBaseHash, contextRegisters);
|
||||||
LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Geometry, shaderBaseHash, contextRegisters);
|
|
||||||
// prepare shader
|
// prepare shader
|
||||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Geometry);
|
||||||
shaderContext.output = output;
|
|
||||||
shader->shaderType = LatteConst::ShaderType::Geometry;
|
|
||||||
shader->ringParameterCountFromPrevStage = vsRingParameterCount;
|
shader->ringParameterCountFromPrevStage = vsRingParameterCount;
|
||||||
shaderContext.shaderType = LatteConst::ShaderType::Geometry;
|
|
||||||
output->shaderType = LatteConst::ShaderType::Geometry;
|
output->shaderType = LatteConst::ShaderType::Geometry;
|
||||||
shaderContext.shader = shader;
|
shaderContext.shader = shader;
|
||||||
output->shader = shader;
|
output->shader = shader;
|
||||||
shaderContext.usesGeometryShader = true;
|
|
||||||
if (gsCopyProgramData == NULL)
|
if (gsCopyProgramData == NULL)
|
||||||
{
|
{
|
||||||
shader->hasError = true;
|
shader->hasError = true;
|
||||||
@ -1145,34 +1128,24 @@ void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* cont
|
|||||||
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
||||||
shader->textureUsesDepthCompare[i] = false;
|
shader->textureUsesDepthCompare[i] = false;
|
||||||
}
|
}
|
||||||
// ugly hack to get tf mode from Vulkan renderer
|
|
||||||
shaderContext.useTFViaSSBO = useTFViaSSBO;
|
|
||||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
|
||||||
{
|
|
||||||
shaderContext.useTFViaSSBO = VulkanRenderer::GetInstance()->useTFViaSSBO();
|
|
||||||
}
|
|
||||||
// parse & compile
|
// parse & compile
|
||||||
_LatteDecompiler_Process(&shaderContext, programData, programSize);
|
_LatteDecompiler_Process(&shaderContext, programData, programSize);
|
||||||
performanceMonitor.gpuTime_shaderCreate.endMeasuring();
|
performanceMonitor.gpuTime_shaderCreate.endMeasuring();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output)
|
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||||
{
|
{
|
||||||
cemu_assert_debug((programSize & 3) == 0);
|
cemu_assert_debug((programSize & 3) == 0);
|
||||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||||
// prepare decompiler context
|
// prepare decompiler context
|
||||||
LatteDecompilerShaderContext shaderContext = { 0 };
|
LatteDecompilerShaderContext shaderContext = { 0 };
|
||||||
LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Pixel, shaderBaseHash, contextRegisters);
|
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Pixel, shaderBaseHash, contextRegisters);
|
||||||
shaderContext.contextRegisters = contextRegisters;
|
shaderContext.contextRegisters = contextRegisters;
|
||||||
// prepare shader
|
// prepare shader
|
||||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
LatteDecompilerShader* shader = new LatteDecompilerShader(LatteConst::ShaderType::Pixel);
|
||||||
shaderContext.output = output;
|
|
||||||
shader->shaderType = LatteConst::ShaderType::Pixel;
|
|
||||||
shaderContext.shaderType = LatteConst::ShaderType::Pixel;
|
|
||||||
output->shaderType = LatteConst::ShaderType::Pixel;
|
output->shaderType = LatteConst::ShaderType::Pixel;
|
||||||
shaderContext.shader = shader;
|
shaderContext.shader = shader;
|
||||||
output->shader = shader;
|
output->shader = shader;
|
||||||
shaderContext.usesGeometryShader = usesGeometryShader;
|
|
||||||
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||||
{
|
{
|
||||||
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
||||||
|
@ -148,50 +148,52 @@ struct LatteDecompilerShaderResourceMapping
|
|||||||
|
|
||||||
struct LatteDecompilerShader
|
struct LatteDecompilerShader
|
||||||
{
|
{
|
||||||
LatteDecompilerShader* next;
|
LatteDecompilerShader(LatteConst::ShaderType shaderType) : shaderType(shaderType) {}
|
||||||
|
|
||||||
|
LatteDecompilerShader* next{nullptr};
|
||||||
LatteConst::ShaderType shaderType;
|
LatteConst::ShaderType shaderType;
|
||||||
uint64 baseHash;
|
uint64 baseHash{0};
|
||||||
uint64 auxHash;
|
uint64 auxHash{0};
|
||||||
// vertex shader
|
// vertex shader
|
||||||
struct LatteFetchShader* compatibleFetchShader{};
|
struct LatteFetchShader* compatibleFetchShader{};
|
||||||
// error tracking
|
// error tracking
|
||||||
bool hasError; // if set, the shader cannot be used
|
bool hasError{false}; // if set, the shader cannot be used
|
||||||
// optimized access / iteration
|
// optimized access / iteration
|
||||||
// list of uniform buffers used
|
// list of uniform buffers used
|
||||||
uint8 uniformBufferList[LATTE_NUM_MAX_UNIFORM_BUFFERS];
|
uint8 uniformBufferList[LATTE_NUM_MAX_UNIFORM_BUFFERS];
|
||||||
uint8 uniformBufferListCount;
|
uint8 uniformBufferListCount{ 0 };
|
||||||
// list of used texture units (faster access than iterating textureUnitMask)
|
// list of used texture units (faster access than iterating textureUnitMask)
|
||||||
uint8 textureUnitList[LATTE_NUM_MAX_TEX_UNITS];
|
uint8 textureUnitList[LATTE_NUM_MAX_TEX_UNITS];
|
||||||
uint8 textureUnitListCount;
|
uint8 textureUnitListCount{ 0 };
|
||||||
// input
|
// input
|
||||||
Latte::E_DIM textureUnitDim[LATTE_NUM_MAX_TEX_UNITS]; // dimension of texture unit, from the currently set texture
|
Latte::E_DIM textureUnitDim[LATTE_NUM_MAX_TEX_UNITS]{}; // dimension of texture unit, from the currently set texture
|
||||||
bool textureIsIntegerFormat[LATTE_NUM_MAX_TEX_UNITS]{};
|
bool textureIsIntegerFormat[LATTE_NUM_MAX_TEX_UNITS]{};
|
||||||
// analyzer stage (uniforms)
|
// analyzer stage (uniforms)
|
||||||
uint8 uniformMode; // determines how uniforms are managed within the shader (see GPU7_DECOMPILER_UNIFORM_MODE_* constants)
|
uint8 uniformMode{0}; // determines how uniforms are managed within the shader (see LATTE_DECOMPILER_UNIFORM_MODE_* constants)
|
||||||
uint64 uniformDataHash64[2]; // used to avoid redundant calls to glUniform*
|
uint64 uniformDataHash64[2]{0}; // used to avoid redundant calls to glUniform*
|
||||||
std::vector<LatteDecompilerRemappedUniformEntry_t> list_remappedUniformEntries;
|
std::vector<LatteDecompilerRemappedUniformEntry_t> list_remappedUniformEntries;
|
||||||
// analyzer stage (textures)
|
// analyzer stage (textures)
|
||||||
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
|
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
|
||||||
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]; // GPU7_SAMPLER_NONE means undefined
|
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
|
||||||
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS];
|
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
|
||||||
|
|
||||||
// analyzer stage (pixel outputs)
|
// analyzer stage (pixel outputs)
|
||||||
uint32 pixelColorOutputMask; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments, may differ from export index inside the pixel shader)
|
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)
|
||||||
// analyzer stage (geometry shader parameters/inputs)
|
// analyzer stage (geometry shader parameters/inputs)
|
||||||
uint32 ringParameterCount;
|
uint32 ringParameterCount{ 0 };
|
||||||
uint32 ringParameterCountFromPrevStage; // used in geometry shader to hold VS ringParameterCount
|
uint32 ringParameterCountFromPrevStage{ 0 }; // used in geometry shader to hold VS ringParameterCount
|
||||||
// analyzer stage (misc)
|
// analyzer stage (misc)
|
||||||
std::bitset<LATTE_NUM_STREAMOUT_BUFFER> streamoutBufferWriteMask2;
|
std::bitset<LATTE_NUM_STREAMOUT_BUFFER> streamoutBufferWriteMask;
|
||||||
bool hasStreamoutBufferWrite;
|
bool hasStreamoutBufferWrite{ false };
|
||||||
// output code
|
// output code
|
||||||
class StringBuf* strBuf_shaderSource{nullptr};
|
class StringBuf* strBuf_shaderSource{ nullptr };
|
||||||
// separable shaders
|
// separable shaders
|
||||||
RendererShader* shader;
|
RendererShader* shader{ nullptr };
|
||||||
bool isCustomShader;
|
bool isCustomShader{ false };
|
||||||
|
|
||||||
uint32 outputParameterMask;
|
uint32 outputParameterMask{ 0 };
|
||||||
// resource mapping (binding points)
|
// resource mapping (binding points)
|
||||||
LatteDecompilerShaderResourceMapping resourceMapping;
|
LatteDecompilerShaderResourceMapping resourceMapping{};
|
||||||
// uniforms
|
// uniforms
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
@ -208,7 +210,7 @@ struct LatteDecompilerShader
|
|||||||
sint32 loc_verticesPerInstance;
|
sint32 loc_verticesPerInstance;
|
||||||
sint32 loc_streamoutBufferBase[LATTE_NUM_STREAMOUT_BUFFER];
|
sint32 loc_streamoutBufferBase[LATTE_NUM_STREAMOUT_BUFFER];
|
||||||
sint32 uniformRangeSize; // entire size of uniform variable block
|
sint32 uniformRangeSize; // entire size of uniform variable block
|
||||||
}uniform;
|
}uniform{ 0 };
|
||||||
// fast access
|
// fast access
|
||||||
struct _RemappedUniformBufferGroup
|
struct _RemappedUniformBufferGroup
|
||||||
{
|
{
|
||||||
@ -218,7 +220,7 @@ struct LatteDecompilerShader
|
|||||||
std::vector<LatteFastAccessRemappedUniformEntry_buffer_t> entries;
|
std::vector<LatteFastAccessRemappedUniformEntry_buffer_t> entries;
|
||||||
};
|
};
|
||||||
std::vector<LatteFastAccessRemappedUniformEntry_register_t> list_remappedUniformEntries_register;
|
std::vector<LatteFastAccessRemappedUniformEntry_register_t> list_remappedUniformEntries_register;
|
||||||
std::vector<_RemappedUniformBufferGroup> list_remappedUniformEntries_bufferGroups;
|
std::vector<_RemappedUniformBufferGroup> list_remappedUniformEntries_bufferGroups;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LatteDecompilerOutputUniformOffsets
|
struct LatteDecompilerOutputUniformOffsets
|
||||||
@ -250,6 +252,17 @@ struct LatteDecompilerOutputUniformOffsets
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct LatteDecompilerOptions
|
||||||
|
{
|
||||||
|
bool usesGeometryShader{ false };
|
||||||
|
// Vulkan-specific
|
||||||
|
bool useTFViaSSBO{ false };
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
bool hasRoundingModeRTEFloat32{ false };
|
||||||
|
}spirvInstrinsics;
|
||||||
|
};
|
||||||
|
|
||||||
struct LatteDecompilerOutput_t
|
struct LatteDecompilerOutput_t
|
||||||
{
|
{
|
||||||
LatteDecompilerShader* shader;
|
LatteDecompilerShader* shader;
|
||||||
@ -272,9 +285,9 @@ struct LatteDecompilerOutput_t
|
|||||||
|
|
||||||
struct LatteDecompilerSubroutineInfo;
|
struct LatteDecompilerSubroutineInfo;
|
||||||
|
|
||||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output, bool useTFViaSSBO = false);
|
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader* fetchShader, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOutput_t* output, bool useTFViaSSBO = false);
|
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output);
|
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||||
|
|
||||||
// specialized shader parsers
|
// specialized shader parsers
|
||||||
|
|
||||||
|
@ -630,7 +630,7 @@ namespace LatteDecompiler
|
|||||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry && decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
|
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry && decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
|
||||||
decompilerContext->hasUniformVarBlock = true; // uf_pointSize
|
decompilerContext->hasUniformVarBlock = true; // uf_pointSize
|
||||||
if (decompilerContext->analyzer.useSSBOForStreamout &&
|
if (decompilerContext->analyzer.useSSBOForStreamout &&
|
||||||
(decompilerContext->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) ||
|
(decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||||
(decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
|
(decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
|
||||||
{
|
{
|
||||||
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase*
|
decompilerContext->hasUniformVarBlock = true; // uf_verticesPerInstance and uf_streamoutBufferBase*
|
||||||
@ -735,7 +735,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||||||
// analyze render state
|
// analyze render state
|
||||||
shaderContext->analyzer.isPointsPrimitive = shaderContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE() == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS;
|
shaderContext->analyzer.isPointsPrimitive = shaderContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE() == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS;
|
||||||
shaderContext->analyzer.hasStreamoutEnable = shaderContext->contextRegisters[mmVGT_STRMOUT_EN] != 0; // set if the shader is used for transform feedback operations
|
shaderContext->analyzer.hasStreamoutEnable = shaderContext->contextRegisters[mmVGT_STRMOUT_EN] != 0; // set if the shader is used for transform feedback operations
|
||||||
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex && shaderContext->usesGeometryShader == false)
|
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader)
|
||||||
shaderContext->analyzer.outputPointSize = shaderContext->analyzer.isPointsPrimitive;
|
shaderContext->analyzer.outputPointSize = shaderContext->analyzer.isPointsPrimitive;
|
||||||
else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry)
|
else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||||
{
|
{
|
||||||
@ -746,10 +746,9 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||||||
// analyze input attributes for vertex/geometry shader
|
// analyze input attributes for vertex/geometry shader
|
||||||
if (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)
|
if (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)
|
||||||
{
|
{
|
||||||
for (sint32 f = 0; f < shaderContext->fetchShaderCount; f++)
|
if(shaderContext->fetchShader)
|
||||||
{
|
{
|
||||||
LatteFetchShader* parsedFetchShader = (LatteFetchShader*)shaderContext->fetchShaderList[f];
|
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
|
||||||
|
|
||||||
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
||||||
{
|
{
|
||||||
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
||||||
@ -938,9 +937,9 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||||||
// analyze input attributes again (if shader has relative GPR read)
|
// analyze input attributes again (if shader has relative GPR read)
|
||||||
if(shaderContext->analyzer.usesRelativeGPRRead && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry) )
|
if(shaderContext->analyzer.usesRelativeGPRRead && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry) )
|
||||||
{
|
{
|
||||||
for (sint32 f = 0; f < shaderContext->fetchShaderCount; f++)
|
if(shaderContext->fetchShader)
|
||||||
{
|
{
|
||||||
LatteFetchShader* parsedFetchShader = (LatteFetchShader*)shaderContext->fetchShaderList[f];
|
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
|
||||||
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
||||||
{
|
{
|
||||||
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
||||||
@ -1077,7 +1076,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||||||
if(list_subroutineAddrs.empty() == false)
|
if(list_subroutineAddrs.empty() == false)
|
||||||
forceLogDebug_printf("Todo - analyze shader subroutine CF stack");
|
forceLogDebug_printf("Todo - analyze shader subroutine CF stack");
|
||||||
// TF mode
|
// TF mode
|
||||||
if (shaderContext->useTFViaSSBO && shaderContext->output->streamoutBufferWriteMask.any())
|
if (shaderContext->options->useTFViaSSBO && shaderContext->output->streamoutBufferWriteMask.any())
|
||||||
{
|
{
|
||||||
shaderContext->analyzer.useSSBOForStreamout = true;
|
shaderContext->analyzer.useSSBOForStreamout = true;
|
||||||
}
|
}
|
||||||
|
@ -3363,7 +3363,7 @@ void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDeco
|
|||||||
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
|
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (shaderContext->useTFViaSSBO)
|
if (shaderContext->options->useTFViaSSBO)
|
||||||
{
|
{
|
||||||
uint32 u32Offset = streamWrite->exportArrayBase + i;
|
uint32 u32Offset = streamWrite->exportArrayBase + i;
|
||||||
src->addFmt("sb_buffer[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset);
|
src->addFmt("sb_buffer[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset);
|
||||||
@ -3483,7 +3483,7 @@ void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDeco
|
|||||||
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
|
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (shaderContext->useTFViaSSBO)
|
if (shaderContext->options->useTFViaSSBO)
|
||||||
{
|
{
|
||||||
uint32 u32Offset = cfInstruction->exportArrayBase + i;
|
uint32 u32Offset = cfInstruction->exportArrayBase + i;
|
||||||
src->addFmt("sb_buffer[sbBase{} + {}]", streamoutBufferIndex, u32Offset);
|
src->addFmt("sb_buffer[sbBase{} + {}]", streamoutBufferIndex, u32Offset);
|
||||||
@ -3831,7 +3831,7 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade
|
|||||||
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works
|
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works
|
||||||
|
|
||||||
if( LatteGPUState.glVendor == GLVENDOR_NVIDIA && !ActiveSettings::DumpShadersEnabled())
|
if( LatteGPUState.glVendor == GLVENDOR_NVIDIA && !ActiveSettings::DumpShadersEnabled())
|
||||||
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage
|
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage (OpenGL)
|
||||||
else
|
else
|
||||||
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" _CRLF);
|
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" _CRLF);
|
||||||
}
|
}
|
||||||
@ -3841,51 +3841,6 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void _addPixelShaderExtraDebugInfo(LatteDecompilerShaderContext* shaderContext, StringBuf* fCStr_shaderSource)
|
|
||||||
{
|
|
||||||
#ifdef CEMU_DEBUG_ASSERT
|
|
||||||
fCStr_shaderSource->add("// Color buffers:" _CRLF);
|
|
||||||
for(uint32 i=0; i<8; i++)
|
|
||||||
{
|
|
||||||
uint32 regColorBuffer = shaderContext->contextRegisters[mmCB_COLOR0_BASE+i];
|
|
||||||
uint32 regColorSize = shaderContext->contextRegisters[mmCB_COLOR0_SIZE+i];
|
|
||||||
uint32 regColorInfo = shaderContext->contextRegisters[mmCB_COLOR0_INFO+i];
|
|
||||||
uint32 regColorView = shaderContext->contextRegisters[mmCB_COLOR0_VIEW+i];
|
|
||||||
MPTR colorBufferPhysMem = regColorBuffer;
|
|
||||||
if( regColorBuffer == MPTR_NULL )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
uint32 colorBufferFormat = (regColorInfo>>2)&0x3F; // format
|
|
||||||
uint32 colorBufferTileMode = 0;
|
|
||||||
colorBufferTileMode = (regColorInfo >> 8) & 0xF;
|
|
||||||
switch ( (regColorInfo >> 12) & 7 )
|
|
||||||
{
|
|
||||||
case 4:
|
|
||||||
colorBufferFormat |= 0x100;
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
colorBufferFormat |= 0x200;
|
|
||||||
break;
|
|
||||||
case 5:
|
|
||||||
colorBufferFormat |= 0x300;
|
|
||||||
break;
|
|
||||||
case 6:
|
|
||||||
colorBufferFormat |= 0x400;
|
|
||||||
break;
|
|
||||||
case 7:
|
|
||||||
colorBufferFormat |= 0x800;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32 colorBufferWidth = (regColorSize>>0)&0xFFFF;
|
|
||||||
uint32 colorBufferHeight = (regColorSize>>16)&0xFFFF;
|
|
||||||
fCStr_shaderSource->addFmt("// Color{}: {}x{} at 0x{:08x} fmt {:04x} tm {}" _CRLF, i, colorBufferWidth, colorBufferHeight, colorBufferPhysMem, colorBufferFormat, colorBufferTileMode);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp"
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp"
|
||||||
|
|
||||||
void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* shaderContext, LatteParsedFetchShaderAttribute_t& attrib)
|
void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* shaderContext, LatteParsedFetchShaderAttribute_t& attrib)
|
||||||
@ -3954,18 +3909,14 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
|||||||
src->add("#extension GL_ARB_texture_gather : enable" _CRLF);
|
src->add("#extension GL_ARB_texture_gather : enable" _CRLF);
|
||||||
src->add("#extension GL_ARB_separate_shader_objects : enable" _CRLF);
|
src->add("#extension GL_ARB_separate_shader_objects : enable" _CRLF);
|
||||||
|
|
||||||
if (shaderContext->analyzer.hasStreamoutWrite || shaderContext->usesGeometryShader )
|
if (shaderContext->analyzer.hasStreamoutWrite || shaderContext->options->usesGeometryShader )
|
||||||
src->add("#extension GL_ARB_enhanced_layouts : enable" _CRLF);
|
src->add("#extension GL_ARB_enhanced_layouts : enable" _CRLF);
|
||||||
|
|
||||||
// debug info
|
// debug info
|
||||||
src->addFmt("// shader %08x%08x" _CRLF, (uint32)(shaderContext->shaderBaseHash >> 32), (uint32)(shaderContext->shaderBaseHash & 0xFFFFFFFF));
|
src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash);
|
||||||
#ifdef CEMU_DEBUG_ASSERT
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false");
|
src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false");
|
||||||
src->addFmt(_CRLF);
|
src->addFmt(_CRLF);
|
||||||
|
|
||||||
if( shader->shaderType == LatteConst::ShaderType::Pixel )
|
|
||||||
_addPixelShaderExtraDebugInfo(shaderContext, src);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// header part (definitions for inputs and outputs)
|
// header part (definitions for inputs and outputs)
|
||||||
LatteDecompiler::emitHeader(shaderContext);
|
LatteDecompiler::emitHeader(shaderContext);
|
||||||
@ -3982,7 +3933,6 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
|||||||
{
|
{
|
||||||
if (shaderContext->analyzer.usesRelativeGPRRead || (shaderContext->analyzer.gprUseMask[i / 8] & (1 << (i & 7))) != 0)
|
if (shaderContext->analyzer.usesRelativeGPRRead || (shaderContext->analyzer.gprUseMask[i / 8] & (1 << (i & 7))) != 0)
|
||||||
{
|
{
|
||||||
//fCStr_appendFormatted(fCStr_shaderSource, "ivec4 R{}i, R{}i, R{}i, R{}i;" STR_LINEBREAK, i*4+0, i*4+1, i*4+2, i*4+3);
|
|
||||||
if (shaderContext->typeTracker.genIntReg)
|
if (shaderContext->typeTracker.genIntReg)
|
||||||
src->addFmt("ivec4 R{}i = ivec4(0);" _CRLF, i);
|
src->addFmt("ivec4 R{}i = ivec4(0);" _CRLF, i);
|
||||||
else if (shaderContext->typeTracker.genFloatReg)
|
else if (shaderContext->typeTracker.genFloatReg)
|
||||||
@ -4035,7 +3985,6 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
|||||||
src->add("bool predResult = true;" _CRLF);
|
src->add("bool predResult = true;" _CRLF);
|
||||||
if(shaderContext->analyzer.modifiesPixelActiveState )
|
if(shaderContext->analyzer.modifiesPixelActiveState )
|
||||||
{
|
{
|
||||||
// cemu_assert_debug(shaderContext->analyzer.activeStackMaxDepth == 0);
|
|
||||||
src->addFmt("bool activeMaskStack[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+1);
|
src->addFmt("bool activeMaskStack[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+1);
|
||||||
src->addFmt("bool activeMaskStackC[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+2);
|
src->addFmt("bool activeMaskStackC[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+2);
|
||||||
for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth; i++)
|
for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth; i++)
|
||||||
@ -4058,8 +4007,11 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// helper variables for cube maps (todo: Only emit when used)
|
// helper variables for cube maps (todo: Only emit when used)
|
||||||
src->addFmt("vec3 cubeMapSTM;" _CRLF);
|
if (shaderContext->analyzer.hasRedcCUBE)
|
||||||
src->addFmt("int cubeMapFaceId;" _CRLF);
|
{
|
||||||
|
src->add("vec3 cubeMapSTM;" _CRLF);
|
||||||
|
src->add("int cubeMapFaceId;" _CRLF);
|
||||||
|
}
|
||||||
for(sint32 i=0; i<LATTE_NUM_MAX_TEX_UNITS; i++)
|
for(sint32 i=0; i<LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||||
{
|
{
|
||||||
if(!shaderContext->output->textureUnitMask[i])
|
if(!shaderContext->output->textureUnitMask[i])
|
||||||
@ -4106,25 +4058,18 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
|||||||
cemu_assert_unimplemented();
|
cemu_assert_unimplemented();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shaderContext->fetchShaderCount == 1)
|
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
|
||||||
|
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
||||||
{
|
{
|
||||||
LatteFetchShader* parsedFetchShader = shaderContext->fetchShaderList[0];
|
for(sint32 i=0; i<bufferGroup.attribCount; i++)
|
||||||
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
||||||
{
|
|
||||||
for(sint32 i=0; i<bufferGroup.attribCount; i++)
|
|
||||||
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
|
||||||
}
|
|
||||||
for (auto& bufferGroup : parsedFetchShader->bufferGroupsInvalid)
|
|
||||||
{
|
|
||||||
// these attributes point to non-existent buffers
|
|
||||||
// todo - figure out how the hardware actually handles this, currently we assume the input values are zero
|
|
||||||
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
|
||||||
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
for (auto& bufferGroup : parsedFetchShader->bufferGroupsInvalid)
|
||||||
{
|
{
|
||||||
cemu_assert_unimplemented();
|
// these attributes point to non-existent buffers
|
||||||
|
// todo - figure out how the hardware actually handles this, currently we assume the input values are zero
|
||||||
|
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
||||||
|
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (shader->shaderType == LatteConst::ShaderType::Pixel)
|
else if (shader->shaderType == LatteConst::ShaderType::Pixel)
|
||||||
@ -4172,7 +4117,7 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shaderContext->usesGeometryShader)
|
if (shaderContext->options->usesGeometryShader)
|
||||||
{
|
{
|
||||||
// import from geometry shader
|
// import from geometry shader
|
||||||
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
||||||
@ -4216,7 +4161,7 @@ void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext,
|
|||||||
// vertex shader should write renderstate point size at the end if required but not modified by shader
|
// vertex shader should write renderstate point size at the end if required but not modified by shader
|
||||||
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
|
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
|
||||||
{
|
{
|
||||||
if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->usesGeometryShader == false)
|
if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false)
|
||||||
src->add("gl_PointSize = uf_pointSize;" _CRLF);
|
src->add("gl_PointSize = uf_pointSize;" _CRLF);
|
||||||
}
|
}
|
||||||
// end of shader main
|
// end of shader main
|
||||||
|
@ -95,7 +95,7 @@ namespace LatteDecompiler
|
|||||||
}
|
}
|
||||||
if (decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
|
if (decompilerContext->analyzer.outputPointSize && decompilerContext->analyzer.writesPointSize == false)
|
||||||
{
|
{
|
||||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->usesGeometryShader) ||
|
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||||
decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||||
{
|
{
|
||||||
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
|
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
|
||||||
@ -135,7 +135,7 @@ namespace LatteDecompiler
|
|||||||
}
|
}
|
||||||
// define uf_verticesPerInstance + uf_streamoutBufferBaseX
|
// define uf_verticesPerInstance + uf_streamoutBufferBaseX
|
||||||
if (decompilerContext->analyzer.useSSBOForStreamout &&
|
if (decompilerContext->analyzer.useSSBOForStreamout &&
|
||||||
(shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) ||
|
(shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) ||
|
||||||
(shader->shaderType == LatteConst::ShaderType::Geometry) )
|
(shader->shaderType == LatteConst::ShaderType::Geometry) )
|
||||||
{
|
{
|
||||||
shaderSrc->add("uniform int uf_verticesPerInstance;" _CRLF);
|
shaderSrc->add("uniform int uf_verticesPerInstance;" _CRLF);
|
||||||
@ -298,7 +298,7 @@ namespace LatteDecompiler
|
|||||||
|
|
||||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||||
{
|
{
|
||||||
if (decompilerContext->usesGeometryShader)
|
if (decompilerContext->options->usesGeometryShader)
|
||||||
src->add("#define V2G_LAYOUT layout(location = 0)" _CRLF);
|
src->add("#define V2G_LAYOUT layout(location = 0)" _CRLF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -306,6 +306,14 @@ namespace LatteDecompiler
|
|||||||
{
|
{
|
||||||
src->add("#define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale.xy,gl_FragCoord.z, 1.0/gl_FragCoord.w)" _CRLF);
|
src->add("#define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale.xy,gl_FragCoord.z, 1.0/gl_FragCoord.w)" _CRLF);
|
||||||
}
|
}
|
||||||
|
if (decompilerContext->options->spirvInstrinsics.hasRoundingModeRTEFloat32)
|
||||||
|
{
|
||||||
|
src->add("#extension GL_EXT_spirv_intrinsics: enable" _CRLF);
|
||||||
|
// set RoundingModeRTE
|
||||||
|
src->add("spirv_execution_mode(4462, 16);" _CRLF);
|
||||||
|
src->add("spirv_execution_mode(4462, 32);" _CRLF);
|
||||||
|
src->add("spirv_execution_mode(4462, 64);" _CRLF);
|
||||||
|
}
|
||||||
src->add("#else" _CRLF);
|
src->add("#else" _CRLF);
|
||||||
// OpenGL defines
|
// OpenGL defines
|
||||||
src->add("#define ATTR_LAYOUT(__vkSet, __location) layout(location = __location)" _CRLF);
|
src->add("#define ATTR_LAYOUT(__vkSet, __location) layout(location = __location)" _CRLF);
|
||||||
@ -317,7 +325,7 @@ namespace LatteDecompiler
|
|||||||
src->add("#define XFB_BLOCK_LAYOUT(__bufferIndex, __stride, __location) layout(xfb_buffer = __bufferIndex, xfb_stride = __stride)" _CRLF);
|
src->add("#define XFB_BLOCK_LAYOUT(__bufferIndex, __stride, __location) layout(xfb_buffer = __bufferIndex, xfb_stride = __stride)" _CRLF);
|
||||||
|
|
||||||
src->add("#define SET_POSITION(_v) gl_Position = _v\r\n");
|
src->add("#define SET_POSITION(_v) gl_Position = _v\r\n");
|
||||||
if (decompilerContext->usesGeometryShader)
|
if (decompilerContext->options->usesGeometryShader)
|
||||||
src->add("#define V2G_LAYOUT" _CRLF);
|
src->add("#define V2G_LAYOUT" _CRLF);
|
||||||
}
|
}
|
||||||
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
|
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
|
||||||
@ -425,7 +433,7 @@ namespace LatteDecompiler
|
|||||||
{
|
{
|
||||||
auto src = decompilerContext->shaderSource;
|
auto src = decompilerContext->shaderSource;
|
||||||
// per-vertex output (VS or GS)
|
// per-vertex output (VS or GS)
|
||||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->usesGeometryShader == false) ||
|
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||||
(decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
|
(decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
|
||||||
{
|
{
|
||||||
src->add("out gl_PerVertex" _CRLF);
|
src->add("out gl_PerVertex" _CRLF);
|
||||||
@ -436,7 +444,7 @@ namespace LatteDecompiler
|
|||||||
src->add("};" _CRLF);
|
src->add("};" _CRLF);
|
||||||
}
|
}
|
||||||
// varyings (variables passed from vertex to pixel shader, only if geometry stage is disabled
|
// varyings (variables passed from vertex to pixel shader, only if geometry stage is disabled
|
||||||
if (decompilerContext->usesGeometryShader == false)
|
if (decompilerContext->options->usesGeometryShader == false)
|
||||||
{
|
{
|
||||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||||
{
|
{
|
||||||
@ -532,7 +540,7 @@ namespace LatteDecompiler
|
|||||||
// streamout buffer (transform feedback)
|
// streamout buffer (transform feedback)
|
||||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) && decompilerContext->analyzer.hasStreamoutEnable)
|
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) && decompilerContext->analyzer.hasStreamoutEnable)
|
||||||
{
|
{
|
||||||
if (decompilerContext->useTFViaSSBO)
|
if (decompilerContext->options->useTFViaSSBO)
|
||||||
{
|
{
|
||||||
if (decompilerContext->analyzer.useSSBOForStreamout && decompilerContext->analyzer.hasStreamoutWrite)
|
if (decompilerContext->analyzer.useSSBOForStreamout && decompilerContext->analyzer.hasStreamoutWrite)
|
||||||
{
|
{
|
||||||
|
@ -70,8 +70,6 @@ struct LatteDecompilerTEXInstruction
|
|||||||
uint8 nfa{};
|
uint8 nfa{};
|
||||||
uint8 isSigned{};
|
uint8 isSigned{};
|
||||||
}memRead;
|
}memRead;
|
||||||
// custom shadow function
|
|
||||||
sint32 shadowFunctionIndex{};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LatteDecompilerCFInstruction
|
struct LatteDecompilerCFInstruction
|
||||||
@ -116,7 +114,7 @@ struct LatteDecompilerCFInstruction
|
|||||||
|
|
||||||
~LatteDecompilerCFInstruction()
|
~LatteDecompilerCFInstruction()
|
||||||
{
|
{
|
||||||
cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we dont accidentally added the wrong instruction type
|
cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BOOST_OS_WINDOWS
|
#if BOOST_OS_WINDOWS
|
||||||
@ -148,14 +146,14 @@ struct LatteDecompilerShaderContext
|
|||||||
LatteDecompilerOutput_t* output;
|
LatteDecompilerOutput_t* output;
|
||||||
LatteDecompilerShader* shader;
|
LatteDecompilerShader* shader;
|
||||||
LatteConst::ShaderType shaderType;
|
LatteConst::ShaderType shaderType;
|
||||||
|
const class LatteDecompilerOptions* options;
|
||||||
uint32* contextRegisters; // deprecated
|
uint32* contextRegisters; // deprecated
|
||||||
struct LatteContextRegister* contextRegistersNew;
|
struct LatteContextRegister* contextRegistersNew;
|
||||||
uint64 shaderBaseHash;
|
uint64 shaderBaseHash;
|
||||||
StringBuf* shaderSource; // move to output struct
|
StringBuf* shaderSource;
|
||||||
std::vector<LatteDecompilerCFInstruction> cfInstructions;
|
std::vector<LatteDecompilerCFInstruction> cfInstructions;
|
||||||
// fetch shader (required for vertex shader)
|
// fetch shader (required for vertex shader)
|
||||||
LatteFetchShader* fetchShaderList[32];
|
LatteFetchShader* fetchShader{};
|
||||||
sint32 fetchShaderCount;
|
|
||||||
// geometry copy shader (only present when geometry shader is active)
|
// geometry copy shader (only present when geometry shader is active)
|
||||||
LatteParsedGSCopyShader* parsedGSCopyShader;
|
LatteParsedGSCopyShader* parsedGSCopyShader;
|
||||||
// state
|
// state
|
||||||
@ -217,10 +215,7 @@ struct LatteDecompilerShaderContext
|
|||||||
bool hasUniformVarBlock;
|
bool hasUniformVarBlock;
|
||||||
sint32 currentBindingPointVK{};
|
sint32 currentBindingPointVK{};
|
||||||
|
|
||||||
// unsorted
|
// misc
|
||||||
bool usesGeometryShader; // for VS
|
|
||||||
bool useTFViaSSBO;
|
|
||||||
sint32 currentShadowFunctionIndex;
|
|
||||||
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
|
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ uint32 RendererShader::GeneratePrecompiledCacheId()
|
|||||||
v += (EMULATOR_VERSION_MINOR * 100u);
|
v += (EMULATOR_VERSION_MINOR * 100u);
|
||||||
|
|
||||||
// settings that can influence shaders
|
// settings that can influence shaders
|
||||||
v += (uint32)g_current_game_profile->GetAccurateShaderMul() * 133; // this option modifies shaders
|
v += (uint32)g_current_game_profile->GetAccurateShaderMul() * 133;
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
@ -1,17 +1,14 @@
|
|||||||
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
|
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
|
||||||
|
|
||||||
#include <glslang/Public/ShaderLang.h>
|
|
||||||
#include <glslang/SPIRV/GlslangToSpv.h>
|
|
||||||
|
|
||||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
|
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||||
|
|
||||||
#include "config/ActiveSettings.h"
|
#include "config/ActiveSettings.h"
|
||||||
#include "config/CemuConfig.h"
|
#include "config/CemuConfig.h"
|
||||||
#include "util/helpers/ConcurrentQueue.h"
|
#include "util/helpers/ConcurrentQueue.h"
|
||||||
|
|
||||||
#include "Cemu/FileCache/FileCache.h"
|
#include "Cemu/FileCache/FileCache.h"
|
||||||
|
|
||||||
|
#include <glslang/Public/ShaderLang.h>
|
||||||
|
#include <glslang/SPIRV/GlslangToSpv.h>
|
||||||
|
|
||||||
bool s_isLoadingShadersVk{ false };
|
bool s_isLoadingShadersVk{ false };
|
||||||
class FileCache* s_spirvCache{nullptr};
|
class FileCache* s_spirvCache{nullptr};
|
||||||
|
|
||||||
|
@ -37,13 +37,13 @@ extern std::atomic_int g_compiling_pipelines;
|
|||||||
|
|
||||||
const std::vector<const char*> kOptionalDeviceExtensions =
|
const std::vector<const char*> kOptionalDeviceExtensions =
|
||||||
{
|
{
|
||||||
//VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
|
|
||||||
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
|
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
|
||||||
VK_NV_FILL_RECTANGLE_EXTENSION_NAME,
|
VK_NV_FILL_RECTANGLE_EXTENSION_NAME,
|
||||||
VK_EXT_PIPELINE_CREATION_FEEDBACK_EXTENSION_NAME,
|
VK_EXT_PIPELINE_CREATION_FEEDBACK_EXTENSION_NAME,
|
||||||
VK_EXT_FILTER_CUBIC_EXTENSION_NAME, // not supported by any device yet
|
VK_EXT_FILTER_CUBIC_EXTENSION_NAME, // not supported by any device yet
|
||||||
VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
|
VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
|
||||||
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
|
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
|
||||||
|
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::vector<const char*> kRequiredDeviceExtensions =
|
const std::vector<const char*> kRequiredDeviceExtensions =
|
||||||
@ -236,25 +236,51 @@ void VulkanRenderer::DetermineVendor()
|
|||||||
|
|
||||||
void VulkanRenderer::GetDeviceFeatures()
|
void VulkanRenderer::GetDeviceFeatures()
|
||||||
{
|
{
|
||||||
|
/* Get Vulkan features via GetPhysicalDeviceFeatures2 */
|
||||||
|
void* prevStruct = nullptr;
|
||||||
VkPhysicalDeviceCustomBorderColorFeaturesEXT bcf{};
|
VkPhysicalDeviceCustomBorderColorFeaturesEXT bcf{};
|
||||||
bcf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
|
bcf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
|
||||||
|
bcf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
|
||||||
|
prevStruct = &bcf;
|
||||||
|
|
||||||
VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT pcc{};
|
VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT pcc{};
|
||||||
pcc.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT;
|
pcc.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT;
|
||||||
pcc.pNext = &bcf;
|
pcc.pNext = prevStruct;
|
||||||
|
prevStruct = &pcc;
|
||||||
|
|
||||||
VkPhysicalDeviceFeatures2 physicalDeviceFeatures2{};
|
VkPhysicalDeviceFeatures2 physicalDeviceFeatures2{};
|
||||||
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||||
physicalDeviceFeatures2.pNext = &pcc;
|
physicalDeviceFeatures2.pNext = prevStruct;
|
||||||
|
|
||||||
vkGetPhysicalDeviceFeatures2(m_physicalDevice, &physicalDeviceFeatures2);
|
vkGetPhysicalDeviceFeatures2(m_physicalDevice, &physicalDeviceFeatures2);
|
||||||
|
|
||||||
|
/* Get Vulkan device properties and limits */
|
||||||
|
VkPhysicalDeviceFloatControlsPropertiesKHR pfcp{};
|
||||||
|
prevStruct = nullptr;
|
||||||
|
if (m_featureControl.deviceExtensions.shader_float_controls)
|
||||||
|
{
|
||||||
|
pfcp.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
|
||||||
|
pfcp.pNext = prevStruct;
|
||||||
|
prevStruct = &pfcp;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkPhysicalDeviceProperties2 prop2{};
|
||||||
|
prop2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||||
|
prop2.pNext = prevStruct;
|
||||||
|
|
||||||
|
vkGetPhysicalDeviceProperties2(m_physicalDevice, &prop2);
|
||||||
|
|
||||||
|
/* Determine which subfeatures we can use */
|
||||||
|
|
||||||
m_featureControl.deviceExtensions.pipeline_creation_cache_control = pcc.pipelineCreationCacheControl;
|
m_featureControl.deviceExtensions.pipeline_creation_cache_control = pcc.pipelineCreationCacheControl;
|
||||||
m_featureControl.deviceExtensions.custom_border_color_without_format = m_featureControl.deviceExtensions.custom_border_color && bcf.customBorderColorWithoutFormat;
|
m_featureControl.deviceExtensions.custom_border_color_without_format = m_featureControl.deviceExtensions.custom_border_color && bcf.customBorderColorWithoutFormat;
|
||||||
|
m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32 = m_featureControl.deviceExtensions.shader_float_controls && pfcp.shaderRoundingModeRTEFloat32;
|
||||||
|
if(!m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32)
|
||||||
|
cemuLog_log(LogType::Force, "Shader round mode control not available on this device or driver. Some rendering issues might occur.");
|
||||||
|
|
||||||
if (!m_featureControl.deviceExtensions.pipeline_creation_cache_control)
|
if (!m_featureControl.deviceExtensions.pipeline_creation_cache_control)
|
||||||
{
|
{
|
||||||
forceLogDebug_printf("VK_EXT_pipeline_creation_cache_control not supported. Cannot use asynchronous shader and pipeline compilation");
|
cemuLog_log(LogType::Force, "VK_EXT_pipeline_creation_cache_control not supported. Cannot use asynchronous shader and pipeline compilation");
|
||||||
// if async shader compilation is enabled show warning message
|
// if async shader compilation is enabled show warning message
|
||||||
if (GetConfig().async_compile)
|
if (GetConfig().async_compile)
|
||||||
wxMessageBox(_("The currently installed graphics driver does not support the Vulkan extension necessary for asynchronous shader compilation. Asynchronous compilation cannot be used.\n \nRequired extension: VK_EXT_pipeline_creation_cache_control\n\nInstalling the latest graphics driver may solve this error."), _("Information"), wxOK | wxCENTRE);
|
wxMessageBox(_("The currently installed graphics driver does not support the Vulkan extension necessary for asynchronous shader compilation. Asynchronous compilation cannot be used.\n \nRequired extension: VK_EXT_pipeline_creation_cache_control\n\nInstalling the latest graphics driver may solve this error."), _("Information"), wxOK | wxCENTRE);
|
||||||
@ -270,13 +296,11 @@ void VulkanRenderer::GetDeviceFeatures()
|
|||||||
forceLog_printf("VK_EXT_custom_border_color not supported. Cannot emulate arbitrary border color");
|
forceLog_printf("VK_EXT_custom_border_color not supported. Cannot emulate arbitrary border color");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// retrieve limits
|
|
||||||
VkPhysicalDeviceProperties2 p2{};
|
// get limits
|
||||||
p2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(prop2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4);
|
||||||
vkGetPhysicalDeviceProperties2(m_physicalDevice, &p2);
|
m_featureControl.limits.nonCoherentAtomSize = std::max(prop2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4);
|
||||||
m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(p2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4);
|
cemuLog_log(LogType::Force, fmt::format("VulkanLimits: UBAlignment {0} nonCoherentAtomSize {1}", prop2.properties.limits.minUniformBufferOffsetAlignment, prop2.properties.limits.nonCoherentAtomSize));
|
||||||
m_featureControl.limits.nonCoherentAtomSize = std::max(p2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4);
|
|
||||||
cemuLog_log(LogType::Force, fmt::format("VulkanLimits: UBAlignment {0} nonCoherentAtomSize {1}", p2.properties.limits.minUniformBufferOffsetAlignment, p2.properties.limits.nonCoherentAtomSize));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VulkanRenderer::VulkanRenderer()
|
VulkanRenderer::VulkanRenderer()
|
||||||
@ -1025,6 +1049,8 @@ VkDeviceCreateInfo VulkanRenderer::CreateDeviceCreateInfo(const std::vector<VkDe
|
|||||||
used_extensions.emplace_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
used_extensions.emplace_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||||
if (m_featureControl.deviceExtensions.dynamic_rendering)
|
if (m_featureControl.deviceExtensions.dynamic_rendering)
|
||||||
used_extensions.emplace_back(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
used_extensions.emplace_back(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||||
|
if (m_featureControl.deviceExtensions.shader_float_controls)
|
||||||
|
used_extensions.emplace_back(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||||
|
|
||||||
VkDeviceCreateInfo createInfo{};
|
VkDeviceCreateInfo createInfo{};
|
||||||
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||||
@ -1100,6 +1126,7 @@ bool VulkanRenderer::CheckDeviceExtensionSupport(const VkPhysicalDevice device,
|
|||||||
info.deviceExtensions.driver_properties = isExtensionAvailable(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
|
info.deviceExtensions.driver_properties = isExtensionAvailable(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
|
||||||
info.deviceExtensions.external_memory_host = isExtensionAvailable(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
info.deviceExtensions.external_memory_host = isExtensionAvailable(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
||||||
info.deviceExtensions.synchronization2 = isExtensionAvailable(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
info.deviceExtensions.synchronization2 = isExtensionAvailable(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||||
|
info.deviceExtensions.shader_float_controls = isExtensionAvailable(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||||
info.deviceExtensions.dynamic_rendering = false; // isExtensionAvailable(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
info.deviceExtensions.dynamic_rendering = false; // isExtensionAvailable(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||||
// dynamic rendering doesn't provide any benefits for us right now. Driver implementations are very unoptimized as of Feb 2022
|
// dynamic rendering doesn't provide any benefits for us right now. Driver implementations are very unoptimized as of Feb 2022
|
||||||
|
|
||||||
@ -1209,7 +1236,7 @@ bool VulkanRenderer::IsDeviceSuitable(VkSurfaceKHR surface, const VkPhysicalDevi
|
|||||||
vkGetPhysicalDeviceProperties(device, &properties);
|
vkGetPhysicalDeviceProperties(device, &properties);
|
||||||
uint32 vkVersionMajor = VK_API_VERSION_MAJOR(properties.apiVersion);
|
uint32 vkVersionMajor = VK_API_VERSION_MAJOR(properties.apiVersion);
|
||||||
uint32 vkVersionMinor = VK_API_VERSION_MINOR(properties.apiVersion);
|
uint32 vkVersionMinor = VK_API_VERSION_MINOR(properties.apiVersion);
|
||||||
if (vkVersionMajor < 1 || vkVersionMinor < 1)
|
if (vkVersionMajor < 1 || (vkVersionMajor == 1 && vkVersionMinor < 1))
|
||||||
return false; // minimum required version is Vulkan 1.1
|
return false; // minimum required version is Vulkan 1.1
|
||||||
|
|
||||||
FeatureControl info;
|
FeatureControl info;
|
||||||
@ -1724,7 +1751,6 @@ ImTextureID VulkanRenderer::GenerateTexture(const std::vector<uint8>& data, cons
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// g_imgui_textures.emplace_back(texture);
|
|
||||||
std::vector <uint8> tmp(size.x * size.y * 4);
|
std::vector <uint8> tmp(size.x * size.y * 4);
|
||||||
for (size_t i = 0; i < data.size() / 3; ++i)
|
for (size_t i = 0; i < data.size() / 3; ++i)
|
||||||
{
|
{
|
||||||
|
@ -463,8 +463,14 @@ private:
|
|||||||
bool external_memory_host = false; // VK_EXT_external_memory_host
|
bool external_memory_host = false; // VK_EXT_external_memory_host
|
||||||
bool synchronization2 = false; // VK_KHR_synchronization2
|
bool synchronization2 = false; // VK_KHR_synchronization2
|
||||||
bool dynamic_rendering = false; // VK_KHR_dynamic_rendering
|
bool dynamic_rendering = false; // VK_KHR_dynamic_rendering
|
||||||
|
bool shader_float_controls = false; // VK_KHR_shader_float_controls
|
||||||
}deviceExtensions;
|
}deviceExtensions;
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
bool shaderRoundingModeRTEFloat32{ false };
|
||||||
|
}shaderFloatControls; // from VK_KHR_shader_float_controls
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
bool debug_utils = false; // VK_EXT_DEBUG_UTILS
|
bool debug_utils = false; // VK_EXT_DEBUG_UTILS
|
||||||
@ -482,8 +488,8 @@ private:
|
|||||||
uint32 nonCoherentAtomSize = 256;
|
uint32 nonCoherentAtomSize = 256;
|
||||||
}limits;
|
}limits;
|
||||||
|
|
||||||
bool debugMarkersSupported = false; // frame debugger is attached
|
bool debugMarkersSupported{ false }; // frame debugger is attached
|
||||||
bool disableMultithreadedCompilation = false; // for old nvidia drivers
|
bool disableMultithreadedCompilation{ false }; // for old nvidia drivers
|
||||||
|
|
||||||
}m_featureControl{};
|
}m_featureControl{};
|
||||||
static bool CheckDeviceExtensionSupport(const VkPhysicalDevice device, FeatureControl& info);
|
static bool CheckDeviceExtensionSupport(const VkPhysicalDevice device, FeatureControl& info);
|
||||||
@ -936,12 +942,10 @@ private:
|
|||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
bool GetDisableMultithreadedCompilation() { return m_featureControl.disableMultithreadedCompilation; }
|
bool GetDisableMultithreadedCompilation() const { return m_featureControl.disableMultithreadedCompilation; }
|
||||||
bool useTFViaSSBO() { return m_featureControl.mode.useTFEmulationViaSSBO; }
|
bool UseTFViaSSBO() const { return m_featureControl.mode.useTFEmulationViaSSBO; }
|
||||||
bool IsDebugUtilsEnabled() const
|
bool HasSPRIVRoundingModeRTE32() const { return m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32; }
|
||||||
{
|
bool IsDebugUtilsEnabled() const { return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils; }
|
||||||
return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
@ -1579,7 +1579,7 @@ void VulkanRenderer::draw_updateVertexBuffersDirectAccess()
|
|||||||
|
|
||||||
if (bufferAddress == MPTR_NULL)
|
if (bufferAddress == MPTR_NULL)
|
||||||
{
|
{
|
||||||
cemu_assert_unimplemented();
|
bufferAddress = 0x10000000;
|
||||||
}
|
}
|
||||||
if (m_state.currentVertexBinding[bufferIndex].offset == bufferAddress)
|
if (m_state.currentVertexBinding[bufferIndex].offset == bufferAddress)
|
||||||
continue;
|
continue;
|
||||||
|
@ -228,7 +228,7 @@ void TextureRelationViewerWindow::_setTextureRelationListItemTexture(wxListCtrl*
|
|||||||
uiList->SetItem(rowIndex, columnIndex, tempStr);
|
uiList->SetItem(rowIndex, columnIndex, tempStr);
|
||||||
columnIndex++;
|
columnIndex++;
|
||||||
// tilemode
|
// tilemode
|
||||||
sprintf(tempStr, "%d", texInfo->tileMode);
|
sprintf(tempStr, "%d", (int)texInfo->tileMode);
|
||||||
uiList->SetItem(rowIndex, columnIndex, tempStr);
|
uiList->SetItem(rowIndex, columnIndex, tempStr);
|
||||||
columnIndex++;
|
columnIndex++;
|
||||||
// sliceRange
|
// sliceRange
|
||||||
|
Loading…
Reference in New Issue
Block a user