mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-01-24 07:41:13 +01:00
Latte/Vulkan: Set shader rounding mode via VK_KHR_SHADER_FLOAT_CONTROLS
This commit is contained in:
parent
43c9a4eccd
commit
4a564e2447
@ -1,20 +1,25 @@
|
||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
|
||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
|
||||
#include "Cafe/HW/Latte/ISA/LatteReg.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
|
||||
#include "Cafe/GraphicPack/GraphicPack2.h"
|
||||
#include "util/helpers/StringParser.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h"
|
||||
#include "util/Zir/Core/ZpIRDebug.h"
|
||||
#include "util/containers/flat_hash_map.hpp"
|
||||
#include <cinttypes>
|
||||
|
||||
// experimental new decompiler (WIP)
|
||||
#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h"
|
||||
#include "util/Zir/Core/ZpIRDebug.h"
|
||||
#include "Cafe/HW/Latte/Transcompiler/LatteTC.h"
|
||||
#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h"
|
||||
|
||||
struct _ShaderHashCache
|
||||
{
|
||||
uint64 prevHash1;
|
||||
@ -672,10 +677,18 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
||||
return shader;
|
||||
}
|
||||
|
||||
#include "Cafe/HW/Latte/Transcompiler/LatteTC.h"
|
||||
#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h"
|
||||
void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled)
|
||||
{
|
||||
options.usesGeometryShader = geometryShaderEnabled;
|
||||
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false;
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
{
|
||||
options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO();
|
||||
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32();
|
||||
}
|
||||
}
|
||||
|
||||
LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
|
||||
LatteDecompilerShader* LatteShader_CompileSeparableVertexShader2(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
|
||||
{
|
||||
/* Analyze shader to gather general information about inputs/outputs */
|
||||
Latte::ShaderDescription shaderDescription;
|
||||
@ -725,14 +738,17 @@ LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash,
|
||||
// compile new vertex shader (relies partially on current state)
|
||||
LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
|
||||
{
|
||||
// new decompiler
|
||||
//LatteShader_compileSeparableVertexShader(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader);
|
||||
// new decompiler test
|
||||
//LatteShader_CompileSeparableVertexShader2(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader);
|
||||
|
||||
// legacy decompiler
|
||||
LatteDecompilerOptions options;
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader);
|
||||
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteFetchShader* fetchShaderList[1];
|
||||
fetchShaderList[0] = fetchShader;
|
||||
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
|
||||
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), options, &decompilerOutput);
|
||||
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||
vsAuxHash = vertexShader->auxHash;
|
||||
if (vertexShader->hasError == false)
|
||||
@ -759,10 +775,13 @@ LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash,
|
||||
|
||||
LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHash, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize)
|
||||
{
|
||||
LatteDecompilerOptions options;
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true);
|
||||
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteFetchShader* fetchShaderList[1];
|
||||
fetchShaderList[0] = _activeFetchShader;
|
||||
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, &decompilerOutput);
|
||||
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, options, &decompilerOutput);
|
||||
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||
if (geometryShader->hasError == false)
|
||||
{
|
||||
@ -787,8 +806,11 @@ LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHas
|
||||
|
||||
LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, uint64& psAuxHash, uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader)
|
||||
{
|
||||
LatteDecompilerOptions options;
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader);
|
||||
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
|
||||
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), options, &decompilerOutput);
|
||||
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
|
||||
psAuxHash = pixelShader->auxHash;
|
||||
LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader);
|
||||
|
@ -94,6 +94,7 @@ extern uint64 _shaderBaseHash_vs;
|
||||
extern uint64 _shaderBaseHash_gs;
|
||||
extern uint64 _shaderBaseHash_ps;
|
||||
|
||||
void LatteShader_GetDecompilerOptions(struct LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled);
|
||||
LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompilerOutput_t& decompilerOutput, uint64 baseHash, bool calculateAuxHash, uint64 optionalAuxHash, uint32* contextRegister);
|
||||
|
||||
void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync);
|
||||
|
@ -641,16 +641,19 @@ bool LatteShaderCache_readSeparableVertexShader(MemStreamReader& streamReader, u
|
||||
return false;
|
||||
if (streamReader.hasError() || !streamReader.isEndOfStream())
|
||||
return false;
|
||||
// update PS inputs (influence VS shader outputs)
|
||||
// update PS inputs (affects VS shader outputs)
|
||||
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
||||
// get fetch shader
|
||||
LatteFetchShader::CacheHash fsHash = LatteFetchShader::CalculateCacheHash((uint32*)fetchShaderData.data(), fetchShaderData.size());
|
||||
LatteFetchShader* fetchShader = LatteShaderRecompiler_createFetchShader(fsHash, lcr->GetRawView(), (uint32*)fetchShaderData.data(), fetchShaderData.size());
|
||||
// determine decompiler options
|
||||
LatteDecompilerOptions options;
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader);
|
||||
// decompile vertex shader
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteFetchShader* fetchShaderList[1];
|
||||
fetchShaderList[0] = fetchShader;
|
||||
LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShaderList, 1, lcr->GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
|
||||
LatteDecompiler_DecompileVertexShader(shaderBaseHash, lcr->GetRawView(), vertexShaderData.data(), vertexShaderData.size(), fetchShaderList, 1, lcr->GetSpecialStateValues(), options, &decompilerOutput);
|
||||
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||
// compile
|
||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, vertexShader);
|
||||
@ -688,15 +691,17 @@ bool LatteShaderCache_readSeparableGeometryShader(MemStreamReader& streamReader,
|
||||
return false;
|
||||
// update PS inputs
|
||||
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
||||
// determine decompiler options
|
||||
LatteDecompilerOptions options;
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true);
|
||||
// decompile geometry shader
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), lcr->GetSpecialStateValues(), vsRingParameterCount, &decompilerOutput);
|
||||
LatteDecompiler_DecompileGeometryShader(shaderBaseHash, lcr->GetRawView(), geometryShaderData.data(), geometryShaderData.size(), geometryCopyShaderData.data(), geometryCopyShaderData.size(), lcr->GetSpecialStateValues(), vsRingParameterCount, options, &decompilerOutput);
|
||||
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||
// compile
|
||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, geometryShader);
|
||||
LatteShader_DumpRawShader(shaderBaseHash, shaderAuxHash, SHADER_DUMP_TYPE_GEOMETRY, geometryShaderData.data(), geometryShaderData.size());
|
||||
LatteShaderCache_loadOrCompileSeparableShader(geometryShader, shaderBaseHash, shaderAuxHash);
|
||||
catchOpenGLError();
|
||||
LatteSHRC_RegisterShader(geometryShader, shaderBaseHash, shaderAuxHash);
|
||||
return true;
|
||||
}
|
||||
@ -724,9 +729,12 @@ bool LatteShaderCache_readSeparablePixelShader(MemStreamReader& streamReader, ui
|
||||
return false;
|
||||
// update PS inputs
|
||||
LatteShader_UpdatePSInputs(lcr->GetRawView());
|
||||
// determine decompiler options
|
||||
LatteDecompilerOptions options;
|
||||
LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader);
|
||||
// decompile pixel shader
|
||||
LatteDecompilerOutput_t decompilerOutput{};
|
||||
LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), lcr->GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
|
||||
LatteDecompiler_DecompilePixelShader(shaderBaseHash, lcr->GetRawView(), pixelShaderData.data(), pixelShaderData.size(), lcr->GetSpecialStateValues(), options, &decompilerOutput);
|
||||
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, shaderBaseHash, false, shaderAuxHash, lcr->GetRawView());
|
||||
// compile
|
||||
LatteShader_DumpShader(shaderBaseHash, shaderAuxHash, pixelShader);
|
||||
|
@ -1066,35 +1066,34 @@ void _LatteDecompiler_Process(LatteDecompilerShaderContext* shaderContext, uint8
|
||||
_LatteDecompiler_GenerateDataForFastAccess(shaderContext->shader);
|
||||
}
|
||||
|
||||
void LatteDecompiler_InitContext(LatteDecompilerShaderContext& dCtx, LatteDecompilerOutput_t* output, LatteConst::ShaderType shaderType, uint64 shaderBaseHash, uint32* contextRegisters)
|
||||
void LatteDecompiler_InitContext(LatteDecompilerShaderContext& dCtx, const LatteDecompilerOptions& options, LatteDecompilerOutput_t* output, LatteConst::ShaderType shaderType, uint64 shaderBaseHash, uint32* contextRegisters)
|
||||
{
|
||||
dCtx.output = output;
|
||||
dCtx.shaderType = shaderType;
|
||||
dCtx.options = &options;
|
||||
output->shaderType = shaderType;
|
||||
dCtx.shaderBaseHash = shaderBaseHash;
|
||||
dCtx.contextRegisters = contextRegisters;
|
||||
dCtx.contextRegistersNew = (LatteContextRegister*)contextRegisters;
|
||||
|
||||
// set context parameters (redundant stuff since options can be accessed directly)
|
||||
dCtx.usesGeometryShader = options.usesGeometryShader;
|
||||
dCtx.useTFViaSSBO = options.useTFViaSSBO;
|
||||
}
|
||||
|
||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output, bool useTFViaSSBO)
|
||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
{
|
||||
cemu_assert_debug((programSize & 3) == 0);
|
||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||
// prepare decompiler context
|
||||
LatteDecompilerShaderContext shaderContext = { 0 };
|
||||
LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Vertex, shaderBaseHash, contextRegisters);
|
||||
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Vertex, shaderBaseHash, contextRegisters);
|
||||
cemu_assert_debug(fetchShaderCount == 1);
|
||||
for (sint32 i = 0; i < fetchShaderCount; i++)
|
||||
{
|
||||
shaderContext.fetchShaderList[i] = fetchShaderList[i];
|
||||
}
|
||||
shaderContext.fetchShaderCount = fetchShaderCount;
|
||||
// ugly hack to get tf mode from Vulkan renderer
|
||||
shaderContext.useTFViaSSBO = useTFViaSSBO;
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
{
|
||||
shaderContext.useTFViaSSBO = VulkanRenderer::GetInstance()->useTFViaSSBO();
|
||||
}
|
||||
// prepare shader (deprecated)
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
||||
shader->shaderType = LatteConst::ShaderType::Vertex;
|
||||
@ -1103,7 +1102,6 @@ void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contex
|
||||
output->shaderType = LatteConst::ShaderType::Vertex;
|
||||
shaderContext.shader = shader;
|
||||
output->shader = shader;
|
||||
shaderContext.usesGeometryShader = usesGeometryShader;
|
||||
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||
{
|
||||
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
||||
@ -1114,14 +1112,14 @@ void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contex
|
||||
performanceMonitor.gpuTime_shaderCreate.endMeasuring();
|
||||
}
|
||||
|
||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOutput_t* output, bool useTFViaSSBO)
|
||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
{
|
||||
cemu_assert_debug((programSize & 3) == 0);
|
||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||
// prepare decompiler context
|
||||
LatteDecompilerShaderContext shaderContext = { 0 };
|
||||
shaderContext.fetchShaderCount = 0;
|
||||
LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Geometry, shaderBaseHash, contextRegisters);
|
||||
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Geometry, shaderBaseHash, contextRegisters);
|
||||
// prepare shader
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
||||
shaderContext.output = output;
|
||||
@ -1131,7 +1129,6 @@ void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* cont
|
||||
output->shaderType = LatteConst::ShaderType::Geometry;
|
||||
shaderContext.shader = shader;
|
||||
output->shader = shader;
|
||||
shaderContext.usesGeometryShader = true;
|
||||
if (gsCopyProgramData == NULL)
|
||||
{
|
||||
shader->hasError = true;
|
||||
@ -1145,24 +1142,18 @@ void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* cont
|
||||
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
||||
shader->textureUsesDepthCompare[i] = false;
|
||||
}
|
||||
// ugly hack to get tf mode from Vulkan renderer
|
||||
shaderContext.useTFViaSSBO = useTFViaSSBO;
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
{
|
||||
shaderContext.useTFViaSSBO = VulkanRenderer::GetInstance()->useTFViaSSBO();
|
||||
}
|
||||
// parse & compile
|
||||
_LatteDecompiler_Process(&shaderContext, programData, programSize);
|
||||
performanceMonitor.gpuTime_shaderCreate.endMeasuring();
|
||||
}
|
||||
|
||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output)
|
||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output)
|
||||
{
|
||||
cemu_assert_debug((programSize & 3) == 0);
|
||||
performanceMonitor.gpuTime_shaderCreate.beginMeasuring();
|
||||
// prepare decompiler context
|
||||
LatteDecompilerShaderContext shaderContext = { 0 };
|
||||
LatteDecompiler_InitContext(shaderContext, output, LatteConst::ShaderType::Pixel, shaderBaseHash, contextRegisters);
|
||||
LatteDecompiler_InitContext(shaderContext, options, output, LatteConst::ShaderType::Pixel, shaderBaseHash, contextRegisters);
|
||||
shaderContext.contextRegisters = contextRegisters;
|
||||
// prepare shader
|
||||
LatteDecompilerShader* shader = new LatteDecompilerShader();
|
||||
@ -1172,7 +1163,6 @@ void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* context
|
||||
output->shaderType = LatteConst::ShaderType::Pixel;
|
||||
shaderContext.shader = shader;
|
||||
output->shader = shader;
|
||||
shaderContext.usesGeometryShader = usesGeometryShader;
|
||||
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||
{
|
||||
shader->textureUnitSamplerAssignment[i] = LATTE_DECOMPILER_SAMPLER_NONE;
|
||||
|
@ -218,7 +218,7 @@ struct LatteDecompilerShader
|
||||
std::vector<LatteFastAccessRemappedUniformEntry_buffer_t> entries;
|
||||
};
|
||||
std::vector<LatteFastAccessRemappedUniformEntry_register_t> list_remappedUniformEntries_register;
|
||||
std::vector<_RemappedUniformBufferGroup> list_remappedUniformEntries_bufferGroups;
|
||||
std::vector<_RemappedUniformBufferGroup> list_remappedUniformEntries_bufferGroups;
|
||||
};
|
||||
|
||||
struct LatteDecompilerOutputUniformOffsets
|
||||
@ -250,6 +250,17 @@ struct LatteDecompilerOutputUniformOffsets
|
||||
}
|
||||
};
|
||||
|
||||
struct LatteDecompilerOptions
|
||||
{
|
||||
bool usesGeometryShader{ false };
|
||||
// Vulkan-specific
|
||||
bool useTFViaSSBO{ false };
|
||||
struct
|
||||
{
|
||||
bool hasRoundingModeRTEFloat32{ false };
|
||||
}spirvInstrinsics;
|
||||
};
|
||||
|
||||
struct LatteDecompilerOutput_t
|
||||
{
|
||||
LatteDecompilerShader* shader;
|
||||
@ -272,9 +283,9 @@ struct LatteDecompilerOutput_t
|
||||
|
||||
struct LatteDecompilerSubroutineInfo;
|
||||
|
||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output, bool useTFViaSSBO = false);
|
||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOutput_t* output, bool useTFViaSSBO = false);
|
||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, bool usesGeometryShader, LatteDecompilerOutput_t* output);
|
||||
void LatteDecompiler_DecompileVertexShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, struct LatteFetchShader** fetchShaderList, sint32 fetchShaderCount, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
void LatteDecompiler_DecompileGeometryShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint8* gsCopyProgramData, uint32 gsCopyProgramSize, uint32* hleSpecialState, uint32 vsRingParameterCount, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
void LatteDecompiler_DecompilePixelShader(uint64 shaderBaseHash, uint32* contextRegisters, uint8* programData, uint32 programSize, uint32* hleSpecialState, LatteDecompilerOptions& options, LatteDecompilerOutput_t* output);
|
||||
|
||||
// specialized shader parsers
|
||||
|
||||
|
@ -306,6 +306,11 @@ namespace LatteDecompiler
|
||||
{
|
||||
src->add("#define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale.xy,gl_FragCoord.z, 1.0/gl_FragCoord.w)" _CRLF);
|
||||
}
|
||||
if (decompilerContext->options->spirvInstrinsics.hasRoundingModeRTEFloat32)
|
||||
{
|
||||
src->add("#extension GL_EXT_spirv_intrinsics: enable" _CRLF);
|
||||
src->add("spirv_execution_mode(4462, 32);" _CRLF); // RoundingModeRTE 32
|
||||
}
|
||||
src->add("#else" _CRLF);
|
||||
// OpenGL defines
|
||||
src->add("#define ATTR_LAYOUT(__vkSet, __location) layout(location = __location)" _CRLF);
|
||||
|
@ -70,8 +70,6 @@ struct LatteDecompilerTEXInstruction
|
||||
uint8 nfa{};
|
||||
uint8 isSigned{};
|
||||
}memRead;
|
||||
// custom shadow function
|
||||
sint32 shadowFunctionIndex{};
|
||||
};
|
||||
|
||||
struct LatteDecompilerCFInstruction
|
||||
@ -116,7 +114,7 @@ struct LatteDecompilerCFInstruction
|
||||
|
||||
~LatteDecompilerCFInstruction()
|
||||
{
|
||||
cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we dont accidentally added the wrong instruction type
|
||||
cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type
|
||||
}
|
||||
|
||||
#if BOOST_OS_WINDOWS
|
||||
@ -148,6 +146,7 @@ struct LatteDecompilerShaderContext
|
||||
LatteDecompilerOutput_t* output;
|
||||
LatteDecompilerShader* shader;
|
||||
LatteConst::ShaderType shaderType;
|
||||
const class LatteDecompilerOptions* options;
|
||||
uint32* contextRegisters; // deprecated
|
||||
struct LatteContextRegister* contextRegistersNew;
|
||||
uint64 shaderBaseHash;
|
||||
@ -217,10 +216,9 @@ struct LatteDecompilerShaderContext
|
||||
bool hasUniformVarBlock;
|
||||
sint32 currentBindingPointVK{};
|
||||
|
||||
// unsorted
|
||||
// misc
|
||||
bool usesGeometryShader; // for VS
|
||||
bool useTFViaSSBO;
|
||||
sint32 currentShadowFunctionIndex;
|
||||
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
|
||||
};
|
||||
|
||||
|
@ -1,16 +1,18 @@
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "config/CemuConfig.h"
|
||||
#include "util/helpers/ConcurrentQueue.h"
|
||||
#include "Cemu/FileCache/FileCache.h"
|
||||
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include <glslang/SPIRV/GlslangToSpv.h>
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||
// required for modifying SPIR-V
|
||||
#include <glslang/SPIRV/SpvBuilder.h>
|
||||
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "config/CemuConfig.h"
|
||||
#include "util/helpers/ConcurrentQueue.h"
|
||||
|
||||
#include "Cemu/FileCache/FileCache.h"
|
||||
|
||||
bool s_isLoadingShadersVk{ false };
|
||||
class FileCache* s_spirvCache{nullptr};
|
||||
|
@ -37,13 +37,13 @@ extern std::atomic_int g_compiling_pipelines;
|
||||
|
||||
const std::vector<const char*> kOptionalDeviceExtensions =
|
||||
{
|
||||
//VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
|
||||
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME,
|
||||
VK_NV_FILL_RECTANGLE_EXTENSION_NAME,
|
||||
VK_EXT_PIPELINE_CREATION_FEEDBACK_EXTENSION_NAME,
|
||||
VK_EXT_FILTER_CUBIC_EXTENSION_NAME, // not supported by any device yet
|
||||
VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
|
||||
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,
|
||||
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME
|
||||
};
|
||||
|
||||
const std::vector<const char*> kRequiredDeviceExtensions =
|
||||
@ -236,25 +236,51 @@ void VulkanRenderer::DetermineVendor()
|
||||
|
||||
void VulkanRenderer::GetDeviceFeatures()
|
||||
{
|
||||
/* Get Vulkan features via GetPhysicalDeviceFeatures2 */
|
||||
void* prevStruct = nullptr;
|
||||
VkPhysicalDeviceCustomBorderColorFeaturesEXT bcf{};
|
||||
bcf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
|
||||
bcf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
|
||||
prevStruct = &bcf;
|
||||
|
||||
VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT pcc{};
|
||||
pcc.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT;
|
||||
pcc.pNext = &bcf;
|
||||
pcc.pNext = prevStruct;
|
||||
prevStruct = &pcc;
|
||||
|
||||
VkPhysicalDeviceFeatures2 physicalDeviceFeatures2{};
|
||||
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||
physicalDeviceFeatures2.pNext = &pcc;
|
||||
physicalDeviceFeatures2.pNext = prevStruct;
|
||||
|
||||
vkGetPhysicalDeviceFeatures2(m_physicalDevice, &physicalDeviceFeatures2);
|
||||
|
||||
/* Get Vulkan device properties and limits */
|
||||
VkPhysicalDeviceFloatControlsPropertiesKHR pfcp{};
|
||||
prevStruct = nullptr;
|
||||
if (m_featureControl.deviceExtensions.shader_float_controls)
|
||||
{
|
||||
pfcp.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
|
||||
pfcp.pNext = prevStruct;
|
||||
prevStruct = &pfcp;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties2 prop2{};
|
||||
prop2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||
prop2.pNext = prevStruct;
|
||||
|
||||
vkGetPhysicalDeviceProperties2(m_physicalDevice, &prop2);
|
||||
|
||||
/* Determine which subfeatures we can use */
|
||||
|
||||
m_featureControl.deviceExtensions.pipeline_creation_cache_control = pcc.pipelineCreationCacheControl;
|
||||
m_featureControl.deviceExtensions.custom_border_color_without_format = m_featureControl.deviceExtensions.custom_border_color && bcf.customBorderColorWithoutFormat;
|
||||
m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32 = m_featureControl.deviceExtensions.shader_float_controls && pfcp.shaderRoundingModeRTEFloat32;
|
||||
if(!m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32)
|
||||
cemuLog_log(LogType::Force, "Shader round mode control not available on this device or driver. Some rendering issues might occur.");
|
||||
|
||||
if (!m_featureControl.deviceExtensions.pipeline_creation_cache_control)
|
||||
{
|
||||
forceLogDebug_printf("VK_EXT_pipeline_creation_cache_control not supported. Cannot use asynchronous shader and pipeline compilation");
|
||||
cemuLog_log(LogType::Force, "VK_EXT_pipeline_creation_cache_control not supported. Cannot use asynchronous shader and pipeline compilation");
|
||||
// if async shader compilation is enabled show warning message
|
||||
if (GetConfig().async_compile)
|
||||
wxMessageBox(_("The currently installed graphics driver does not support the Vulkan extension necessary for asynchronous shader compilation. Asynchronous compilation cannot be used.\n \nRequired extension: VK_EXT_pipeline_creation_cache_control\n\nInstalling the latest graphics driver may solve this error."), _("Information"), wxOK | wxCENTRE);
|
||||
@ -270,13 +296,11 @@ void VulkanRenderer::GetDeviceFeatures()
|
||||
forceLog_printf("VK_EXT_custom_border_color not supported. Cannot emulate arbitrary border color");
|
||||
}
|
||||
}
|
||||
// retrieve limits
|
||||
VkPhysicalDeviceProperties2 p2{};
|
||||
p2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
||||
vkGetPhysicalDeviceProperties2(m_physicalDevice, &p2);
|
||||
m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(p2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4);
|
||||
m_featureControl.limits.nonCoherentAtomSize = std::max(p2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4);
|
||||
cemuLog_log(LogType::Force, fmt::format("VulkanLimits: UBAlignment {0} nonCoherentAtomSize {1}", p2.properties.limits.minUniformBufferOffsetAlignment, p2.properties.limits.nonCoherentAtomSize));
|
||||
|
||||
// get limits
|
||||
m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(prop2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4);
|
||||
m_featureControl.limits.nonCoherentAtomSize = std::max(prop2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4);
|
||||
cemuLog_log(LogType::Force, fmt::format("VulkanLimits: UBAlignment {0} nonCoherentAtomSize {1}", prop2.properties.limits.minUniformBufferOffsetAlignment, prop2.properties.limits.nonCoherentAtomSize));
|
||||
}
|
||||
|
||||
VulkanRenderer::VulkanRenderer()
|
||||
@ -1025,6 +1049,8 @@ VkDeviceCreateInfo VulkanRenderer::CreateDeviceCreateInfo(const std::vector<VkDe
|
||||
used_extensions.emplace_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||
if (m_featureControl.deviceExtensions.dynamic_rendering)
|
||||
used_extensions.emplace_back(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||
if (m_featureControl.deviceExtensions.shader_float_controls)
|
||||
used_extensions.emplace_back(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
|
||||
VkDeviceCreateInfo createInfo{};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
@ -1100,6 +1126,7 @@ bool VulkanRenderer::CheckDeviceExtensionSupport(const VkPhysicalDevice device,
|
||||
info.deviceExtensions.driver_properties = isExtensionAvailable(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
|
||||
info.deviceExtensions.external_memory_host = isExtensionAvailable(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
||||
info.deviceExtensions.synchronization2 = isExtensionAvailable(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||
info.deviceExtensions.shader_float_controls = isExtensionAvailable(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
info.deviceExtensions.dynamic_rendering = false; // isExtensionAvailable(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||
// dynamic rendering doesn't provide any benefits for us right now. Driver implementations are very unoptimized as of Feb 2022
|
||||
|
||||
@ -1209,7 +1236,7 @@ bool VulkanRenderer::IsDeviceSuitable(VkSurfaceKHR surface, const VkPhysicalDevi
|
||||
vkGetPhysicalDeviceProperties(device, &properties);
|
||||
uint32 vkVersionMajor = VK_API_VERSION_MAJOR(properties.apiVersion);
|
||||
uint32 vkVersionMinor = VK_API_VERSION_MINOR(properties.apiVersion);
|
||||
if (vkVersionMajor < 1 || vkVersionMinor < 1)
|
||||
if (vkVersionMajor < 1 || (vkVersionMajor == 1 && vkVersionMinor < 1))
|
||||
return false; // minimum required version is Vulkan 1.1
|
||||
|
||||
FeatureControl info;
|
||||
@ -1724,7 +1751,6 @@ ImTextureID VulkanRenderer::GenerateTexture(const std::vector<uint8>& data, cons
|
||||
{
|
||||
try
|
||||
{
|
||||
// g_imgui_textures.emplace_back(texture);
|
||||
std::vector <uint8> tmp(size.x * size.y * 4);
|
||||
for (size_t i = 0; i < data.size() / 3; ++i)
|
||||
{
|
||||
|
@ -463,8 +463,14 @@ private:
|
||||
bool external_memory_host = false; // VK_EXT_external_memory_host
|
||||
bool synchronization2 = false; // VK_KHR_synchronization2
|
||||
bool dynamic_rendering = false; // VK_KHR_dynamic_rendering
|
||||
bool shader_float_controls = false; // VK_KHR_shader_float_controls
|
||||
}deviceExtensions;
|
||||
|
||||
struct
|
||||
{
|
||||
bool shaderRoundingModeRTEFloat32{ false };
|
||||
}shaderFloatControls; // from VK_KHR_shader_float_controls
|
||||
|
||||
struct
|
||||
{
|
||||
bool debug_utils = false; // VK_EXT_DEBUG_UTILS
|
||||
@ -482,8 +488,8 @@ private:
|
||||
uint32 nonCoherentAtomSize = 256;
|
||||
}limits;
|
||||
|
||||
bool debugMarkersSupported = false; // frame debugger is attached
|
||||
bool disableMultithreadedCompilation = false; // for old nvidia drivers
|
||||
bool debugMarkersSupported{ false }; // frame debugger is attached
|
||||
bool disableMultithreadedCompilation{ false }; // for old nvidia drivers
|
||||
|
||||
}m_featureControl{};
|
||||
static bool CheckDeviceExtensionSupport(const VkPhysicalDevice device, FeatureControl& info);
|
||||
@ -936,12 +942,10 @@ private:
|
||||
|
||||
|
||||
public:
|
||||
bool GetDisableMultithreadedCompilation() { return m_featureControl.disableMultithreadedCompilation; }
|
||||
bool useTFViaSSBO() { return m_featureControl.mode.useTFEmulationViaSSBO; }
|
||||
bool IsDebugUtilsEnabled() const
|
||||
{
|
||||
return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils;
|
||||
}
|
||||
bool GetDisableMultithreadedCompilation() const { return m_featureControl.disableMultithreadedCompilation; }
|
||||
bool UseTFViaSSBO() const { return m_featureControl.mode.useTFEmulationViaSSBO; }
|
||||
bool HasSPRIVRoundingModeRTE32() const { return m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32; }
|
||||
bool IsDebugUtilsEnabled() const { return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils; }
|
||||
|
||||
private:
|
||||
|
||||
|
@ -1579,7 +1579,7 @@ void VulkanRenderer::draw_updateVertexBuffersDirectAccess()
|
||||
|
||||
if (bufferAddress == MPTR_NULL)
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
bufferAddress = 0x10000000;
|
||||
}
|
||||
if (m_state.currentVertexBinding[bufferIndex].offset == bufferAddress)
|
||||
continue;
|
||||
|
Loading…
x
Reference in New Issue
Block a user