diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 7db650e431..8a34f1debf 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -744,6 +744,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat bool per_pixel_depth, bool use_dual_source); static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data); +static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data); static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, bool use_dual_source); static void WriteBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data); @@ -1148,6 +1149,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos if (uid_data->logic_op_enable) WriteLogicOp(out, uid_data); + else if (uid_data->emulate_logic_op_with_blend) + WriteLogicOpBlend(out, uid_data); // Write the color and alpha values to the framebuffer // If using shader blend, we still use the separate alpha @@ -1803,6 +1806,29 @@ static void WriteLogicOp(ShaderCode& out, const pixel_shader_uid_data* uid_data) out.Write("\tprev = ({}) & 0xff;\n", logic_op_mode[uid_data->logic_op_mode]); } +static void WriteLogicOpBlend(ShaderCode& out, const pixel_shader_uid_data* uid_data) +{ + switch (static_cast(uid_data->logic_op_mode)) + { + case LogicOp::Clear: + case LogicOp::NoOp: + out.Write("\tprev = int4(0, 0, 0, 0);\n"); + break; + case LogicOp::Copy: + // Do nothing! + break; + case LogicOp::CopyInverted: + out.Write("\tprev ^= 255;\n"); + break; + case LogicOp::Set: + case LogicOp::Invert: // In cooperation with blend + out.Write("\tprev = int4(255, 255, 255, 255);\n"); + break; + default: + break; + } +} + static void WriteColor(ShaderCode& out, APIType api_type, const pixel_shader_uid_data* uid_data, bool use_dual_source) { diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 1cfe5a0ebc..84f1e27f74 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -58,8 +58,9 @@ struct pixel_shader_uid_data DstBlendFactor blend_dst_factor_alpha : 3; // Only used with shader_framebuffer_fetch blend u32 blend_subtract : 1; // Only used with shader_framebuffer_fetch blend u32 blend_subtract_alpha : 1; // Only used with shader_framebuffer_fetch blend + u32 emulate_logic_op_with_blend : 1; // Only used with logic op blend emulation u32 logic_op_enable : 1; // Only used with shader_framebuffer_fetch logic ops - u32 logic_op_mode : 4; // Only used with shader_framebuffer_fetch logic ops + u32 logic_op_mode : 4; // Only used with shader_framebuffer_fetch logic ops and blend emulation u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index 85e9adee15..d04733349f 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -185,6 +185,7 @@ void BlendingState::ApproximateLogicOpWithBlending() { struct LogicOpApproximation { + bool blendEnable; bool subtract; SrcBlendFactor srcfactor; DstBlendFactor dstfactor; @@ -193,31 +194,69 @@ void BlendingState::ApproximateLogicOpWithBlending() // but INVSRCCLR and INVDSTCLR were also aliased and were mixed. // Thus, NOR, EQUIV, INVERT, COPY_INVERTED, and OR_INVERTED duplicate(d) other values. static constexpr std::array approximations = {{ - {false, SrcBlendFactor::Zero, DstBlendFactor::Zero}, // CLEAR - {false, SrcBlendFactor::DstClr, DstBlendFactor::Zero}, // AND - {true, SrcBlendFactor::One, DstBlendFactor::InvSrcClr}, // AND_REVERSE - {false, SrcBlendFactor::One, DstBlendFactor::Zero}, // COPY - {true, SrcBlendFactor::DstClr, DstBlendFactor::One}, // AND_INVERTED - {false, SrcBlendFactor::Zero, DstBlendFactor::One}, // NOOP - {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // XOR - {false, SrcBlendFactor::InvDstClr, DstBlendFactor::One}, // OR - {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // NOR - {false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero}, // EQUIV - {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // INVERT - {false, SrcBlendFactor::One, DstBlendFactor::InvDstAlpha}, // OR_REVERSE - {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // COPY_INVERTED - {false, SrcBlendFactor::InvDstClr, DstBlendFactor::One}, // OR_INVERTED - {false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // NAND - {false, SrcBlendFactor::One, DstBlendFactor::One}, // SET + // clang-format off + {false, false, SrcBlendFactor::One, DstBlendFactor::Zero}, // CLEAR (Shader outputs 0) + {true, false, SrcBlendFactor::DstClr, DstBlendFactor::Zero}, // AND + {true, true, SrcBlendFactor::One, DstBlendFactor::InvSrcClr}, // AND_REVERSE + {false, false, SrcBlendFactor::One, DstBlendFactor::Zero}, // COPY + {true, true, SrcBlendFactor::DstClr, DstBlendFactor::One}, // AND_INVERTED + {true, false, SrcBlendFactor::Zero, DstBlendFactor::One}, // NOOP + {true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // XOR + {true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::One}, // OR + {true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // NOR + {true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero}, // EQUIV + {true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::Zero}, // INVERT (Shader outputs 255) + {true, false, SrcBlendFactor::One, DstBlendFactor::InvDstAlpha}, // OR_REVERSE + {false, false, SrcBlendFactor::One, DstBlendFactor::Zero}, // COPY_INVERTED (Shader inverts) + {true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::One}, // OR_INVERTED + {true, false, SrcBlendFactor::InvDstClr, DstBlendFactor::InvSrcClr}, // NAND + {false, false, SrcBlendFactor::One, DstBlendFactor::Zero}, // SET (Shader outputs 255) + // clang-format on }}; logicopenable = false; - blendenable = true; - subtract = approximations[u32(logicmode.Value())].subtract; - srcfactor = approximations[u32(logicmode.Value())].srcfactor; - srcfactoralpha = approximations[u32(logicmode.Value())].srcfactor; - dstfactor = approximations[u32(logicmode.Value())].dstfactor; - dstfactoralpha = approximations[u32(logicmode.Value())].dstfactor; + usedualsrc = false; + const LogicOpApproximation& approximation = approximations[static_cast(logicmode.Value())]; + if (approximation.blendEnable) + { + blendenable = true; + subtract = approximation.subtract; + srcfactor = approximation.srcfactor; + srcfactoralpha = approximation.srcfactor; + dstfactor = approximation.dstfactor; + dstfactoralpha = approximation.dstfactor; + } +} + +bool BlendingState::LogicOpApproximationIsExact() +{ + switch (logicmode.Value()) + { + case LogicOp::Clear: + case LogicOp::Set: + case LogicOp::NoOp: + case LogicOp::Invert: + case LogicOp::CopyInverted: + case LogicOp::Copy: + return true; + default: + return false; + } +} + +bool BlendingState::LogicOpApproximationWantsShaderHelp() +{ + switch (logicmode.Value()) + { + case LogicOp::Clear: + case LogicOp::Set: + case LogicOp::NoOp: + case LogicOp::Invert: + case LogicOp::CopyInverted: + return true; + default: + return false; + } } void SamplerState::Generate(const BPMemory& bp, u32 index) diff --git a/Source/Core/VideoCommon/RenderState.h b/Source/Core/VideoCommon/RenderState.h index 49590d55b5..4dc305638e 100644 --- a/Source/Core/VideoCommon/RenderState.h +++ b/Source/Core/VideoCommon/RenderState.h @@ -109,6 +109,8 @@ union BlendingState // HACK: Replaces logical operations with blend operations. // Will not be bit-correct, and in some cases not even remotely in the same ballpark. void ApproximateLogicOpWithBlending(); + bool LogicOpApproximationIsExact(); + bool LogicOpApproximationWantsShaderHelp(); BlendingState() = default; BlendingState(const BlendingState&) = default; diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 59ae917686..9b6b1afa9b 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -601,16 +601,6 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( config.depth_state = depth_state; config.blending_state = blending_state; config.framebuffer_state = g_framebuffer_manager->GetEFBFramebufferState(); - - // We can use framebuffer fetch to emulate logic ops in the fragment shader. - if (config.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp && - !g_ActiveConfig.backend_info.bSupportsFramebufferFetch) - { - WARN_LOG_FMT(VIDEO, - "Approximating logic op with blending, this will produce incorrect rendering."); - config.blending_state.ApproximateLogicOpWithBlending(); - } - return config; } @@ -628,6 +618,22 @@ static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in) ps->ztest = EmulatedZ::Early; } + // If framebuffer fetch is available, we can emulate logic ops in the fragment shader + // and don't need the below blend approximation + if (blend.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp && + !g_ActiveConfig.backend_info.bSupportsFramebufferFetch) + { + if (!blend.LogicOpApproximationIsExact()) + WARN_LOG_FMT(VIDEO, + "Approximating logic op with blending, this will produce incorrect rendering."); + if (blend.LogicOpApproximationWantsShaderHelp()) + { + ps->emulate_logic_op_with_blend = true; + ps->logic_op_mode = static_cast(blend.logicmode.Value()); + } + blend.ApproximateLogicOpWithBlending(); + } + const bool benefits_from_ps_dual_source_off = (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend && g_ActiveConfig.backend_info.bSupportsFramebufferFetch) || @@ -775,6 +781,18 @@ static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in) memcpy(&out, &in, sizeof(out)); // Copy padding if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) out.vertex_format = nullptr; + + // If framebuffer fetch is available, we can emulate logic ops in the fragment shader + // and don't need the below blend approximation + if (out.blending_state.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp && + !g_ActiveConfig.backend_info.bSupportsFramebufferFetch) + { + if (!out.blending_state.LogicOpApproximationIsExact()) + WARN_LOG_FMT(VIDEO, + "Approximating logic op with blending, this will produce incorrect rendering."); + out.blending_state.ApproximateLogicOpWithBlending(); + } + if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch) { // Always blend in shader diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index d625ce9c42..21309bc8b3 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -1092,6 +1092,26 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " TevResult &= 0xff;\n" " }}\n"); } + else if (!host_config.backend_logic_op) + { + out.Write(" // Helpers for logic op blending approximations\n" + " if (logic_op_enable) {{\n" + " switch (logic_op_mode) {{\n"); + out.Write(" case {}: // Clear\n", static_cast(LogicOp::Clear)); + out.Write(" TevResult = int4(0, 0, 0, 0);\n" + " break;\n"); + out.Write(" case {}: // Copy Inverted\n", static_cast(LogicOp::CopyInverted)); + out.Write(" TevResult ^= 0xff;\n" + " break;\n"); + out.Write(" case {}: // Set\n", static_cast(LogicOp::Set)); + out.Write(" case {}: // Invert\n", static_cast(LogicOp::Invert)); + out.Write(" TevResult = int4(255, 255, 255, 255);\n" + " break;\n"); + out.Write(" default:\n" + " break;\n" + " }}\n" + " }}\n"); + } // Some backends require that the shader outputs be uint when writing to a uint render target for // logic op.