From 504e0488a8a90bc6ca0e4f070fc15c57a13b449e Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sun, 12 Mar 2023 10:30:53 +0100 Subject: [PATCH] Remove shaderMulAccuracy "min" option It's less accurate and it doesn't actually perform better on most hardware. --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 2 ++ .../LegacyShaderDecompiler/LatteDecompiler.h | 2 ++ .../LatteDecompilerEmitGLSL.cpp | 18 +++++------------- src/config/CemuConfig.h | 8 +++----- src/gui/GameProfileWindow.cpp | 8 ++++---- 5 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index fe8056da..00a49f02 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -11,6 +11,7 @@ #include "Cafe/GraphicPack/GraphicPack2.h" #include "util/helpers/StringParser.h" #include "config/ActiveSettings.h" +#include "Cafe/GameProfile/GameProfile.h" #include "util/containers/flat_hash_map.hpp" #include @@ -686,6 +687,7 @@ void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteCons options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO(); options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32(); } + options.strictMul = g_current_game_profile->GetAccurateShaderMul() != AccurateShaderMulOption::False; } LatteDecompilerShader* LatteShader_CompileSeparableVertexShader2(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 3264f5d3..f7a0ea5f 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -255,6 +255,8 @@ struct LatteDecompilerOutputUniformOffsets struct LatteDecompilerOptions { bool usesGeometryShader{ false }; + // floating point math + bool strictMul{}; // if true, 0*anything=0 rule is emulated // Vulkan-specific bool useTFViaSSBO{ false }; struct diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp index 19c2cd87..67743528 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp @@ -9,9 +9,6 @@ #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" - -#include "Cafe/GameProfile/GameProfile.h" - #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "config/ActiveSettings.h" #include "util/helpers/StringBuf.h" @@ -1122,7 +1119,7 @@ void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderContext, Lat { useDefaultMul = true; } - if (g_current_game_profile->GetAccurateShaderMul() != AccurateShaderMulOption::False && useDefaultMul == false) + if (shaderContext->options->strictMul && useDefaultMul == false) { src->add("mul_nonIEEE("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -1652,7 +1649,7 @@ void _emitALUOP3InstructionCode(LatteDecompilerShaderContext* shaderContext, Lat if (aluInstruction->opcode == ALU_OP3_INST_MULADD_IEEE) useDefaultMul = true; - if (g_current_game_profile->GetAccurateShaderMul() != AccurateShaderMulOption::False && useDefaultMul == false) + if (shaderContext->options->strictMul && useDefaultMul == false) { src->add("mul_nonIEEE("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -3843,8 +3840,6 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade "v.z = -1.0;\r\n" "}\r\n" - - "return v;\r\n" "}\r\n"); } @@ -3860,7 +3855,7 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade "return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0));\r\n" "}\r\n"); // mul non-ieee way (0*NaN/INF => 0.0) - if (g_current_game_profile->GetAccurateShaderMul() == AccurateShaderMulOption::True) + if (shaderContext->options->strictMul) { // things we tried: //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(a*b,0.0,a==0.0||b==0.0); }" STR_LINEBREAK); @@ -3868,17 +3863,14 @@ void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shade //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works + // for "min" it used to be: float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); } + if( LatteGPUState.glVendor == GLVENDOR_NVIDIA && !ActiveSettings::DumpShadersEnabled()) fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage (OpenGL) else fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" _CRLF); // DXKV-like: fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return (b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b); }" _CRLF); - - } - else - { - fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); }" _CRLF); } } diff --git a/src/config/CemuConfig.h b/src/config/CemuConfig.h index 76f1c64c..baf47377 100644 --- a/src/config/CemuConfig.h +++ b/src/config/CemuConfig.h @@ -115,11 +115,10 @@ ENABLE_ENUM_ITERATORS(PrecompiledShaderOption, PrecompiledShaderOption::Auto, Pr enum class AccurateShaderMulOption { - False = 0, // ignore non-ieee MUL special cases - True = 1, // fully emulate non-ieee MUL special cases - Min = 2, // similar to true, but avoids conditionals (instead relies on min() and abs()) + False = 0, // always use standard multiplication + True = 1 // fully emulate non-ieee MUL special cases (0*anything = 0) }; -ENABLE_ENUM_ITERATORS(AccurateShaderMulOption, AccurateShaderMulOption::False, AccurateShaderMulOption::Min); +ENABLE_ENUM_ITERATORS(AccurateShaderMulOption, AccurateShaderMulOption::False, AccurateShaderMulOption::True); enum class CPUMode { @@ -213,7 +212,6 @@ struct fmt::formatter : formatter { { case AccurateShaderMulOption::True: name = "true"; break; case AccurateShaderMulOption::False: name = "false"; break; - case AccurateShaderMulOption::Min: name = "min"; break; default: name = "unknown"; break; } return formatter::format(name, ctx); diff --git a/src/gui/GameProfileWindow.cpp b/src/gui/GameProfileWindow.cpp index 330e290e..4d56e9cd 100644 --- a/src/gui/GameProfileWindow.cpp +++ b/src/gui/GameProfileWindow.cpp @@ -118,7 +118,7 @@ GameProfileWindow::GameProfileWindow(wxWindow* parent, uint64_t title_id) first_row->Add(new wxStaticText(panel, wxID_ANY, _("Shader multiplication accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); - wxString mul_values[] = { _("false"), _("true"), _("minimal") }; + wxString mul_values[] = { _("false"), _("true")}; m_shader_mul_accuracy = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(mul_values), mul_values); m_shader_mul_accuracy->SetToolTip(_("EXPERT OPTION\nControls the accuracy of floating point multiplication in shaders.\n\nRecommended: true")); first_row->Add(m_shader_mul_accuracy, 0, wxALL, 5); @@ -268,10 +268,7 @@ void GameProfileWindow::ApplyProfile() m_graphic_api->SetSelection(0); // selecting "" else m_graphic_api->SetSelection(1 + m_game_profile.m_graphics_api.value()); // "", OpenGL, Vulkan - //m_extended_texture_readback->SetValue(m_game_profile.m_extendedTextureReadback); - //m_precompiled->SetSelection((int)m_game_profile.m_precompiledShaders.value()); m_shader_mul_accuracy->SetSelection((int)m_game_profile.m_accurateShaderMul); - //m_cache_accuracy->SetSelection((int)m_game_profile.m_gpuBufferCacheAccuracy.value()); //// audio //m_disable_audio->Set3StateValue(GetCheckboxState(m_game_profile.disableAudio)); @@ -331,6 +328,9 @@ void GameProfileWindow::SaveProfile() // gpu m_game_profile.m_accurateShaderMul = (AccurateShaderMulOption)m_shader_mul_accuracy->GetSelection(); + if (m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::False && m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::True) + m_game_profile.m_accurateShaderMul = AccurateShaderMulOption::True; // force a legal value + if (m_graphic_api->GetSelection() == 0) m_game_profile.m_graphics_api = {}; else