From 0327e6acb4a4f883003fb00b04a8092a1914090a Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Mon, 24 Jan 2022 22:05:31 -0800
Subject: [PATCH 1/2] Use the same logic for lerp bias for color and alpha

It doesn't make sense for alpha to add the bias ONLY when dividing by 2, while color doesn't apply the bias for divide by 2 only; hardware testing indicates that alpha should have the bias.

This fixes the menus in Mario Kart Wii (https://bugs.dolphin-emu.org/issues/11909) but reintroduces the white rectangle in Fortune Street.

This reverts commit 5aaa5141ed76acc3fffc70561a6b6e7fe3b9b470 (and several other matching changes elsewhere).
---
 Source/Core/VideoBackends/Software/Tev.cpp  |  2 +-
 Source/Core/VideoCommon/PixelShaderGen.cpp  | 12 ++++----
 Source/Core/VideoCommon/UberShaderPixel.cpp | 32 +++++++++++----------
 3 files changed, 25 insertions(+), 21 deletions(-)
diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp
index 28c6c5f29d..ee0a789ec6 100644
--- a/Source/Core/VideoBackends/Software/Tev.cpp
+++ b/Source/Core/VideoBackends/Software/Tev.cpp
@@ -297,7 +297,7 @@ void Tev::DrawAlphaRegular(const TevStageCombiner::AlphaCombiner& ac, const Inpu
 
   s32 temp = InputReg.a * (256 - c) + (InputReg.b * c);
   temp <<= m_ScaleLShiftLUT[u32(ac.scale.Value())];
-  temp += (ac.scale != TevScale::Divide2) ? 0 : (ac.op == TevOp::Sub) ? 127 : 128;
+  temp += (ac.scale == TevScale::Divide2) ? 0 : (ac.op == TevOp::Sub) ? 127 : 128;
   temp = ac.op == TevOp::Sub ? (-temp >> 8) : (temp >> 8);
 
   s32 result =
diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index 673c97ec9b..af21e64156 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -852,7 +852,7 @@ uint WrapCoord(int coord, uint wrap, int size) {{
 static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
                        APIType api_type, bool stereo);
 static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op,
-                            bool clamp, TevScale scale, bool alpha);
+                            bool clamp, TevScale scale);
 static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, APIType api_type,
                            bool per_pixel_depth, bool use_dual_source);
 static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data);
@@ -1677,7 +1677,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
   out.Write("\t{} = clamp(", tev_c_output_table[cc.dest]);
   if (cc.bias != TevBias::Compare)
   {
-    WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.scale, false);
+    WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.scale);
   }
   else
   {
@@ -1710,7 +1710,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
   out.Write("\t{} = clamp(", tev_a_output_table[ac.dest]);
   if (ac.bias != TevBias::Compare)
   {
-    WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.scale, true);
+    WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.scale);
   }
   else
   {
@@ -1742,7 +1742,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
 }
 
 static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op,
-                            bool clamp, TevScale scale, bool alpha)
+                            bool clamp, TevScale scale)
 {
   static constexpr Common::EnumMap<const char*, TevScale::Divide2> tev_scale_table_left{
       "",       // Scale1
@@ -1780,12 +1780,14 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBia
   // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255
   // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy
   // - a rounding bias is added before dividing by 256
+  // TODO: Is the rounding bias still added when the scale is divide by 2?  Currently we do not
+  // apply it.
   out.Write("(((tevin_d.{}{}){})", components, tev_bias_table[bias], tev_scale_table_left[scale]);
   out.Write(" {} ", tev_op_table[op]);
   out.Write("(((((tevin_a.{0}<<8) + "
             "(tevin_b.{0}-tevin_a.{0})*(tevin_c.{0}+(tevin_c.{0}>>7))){1}){2})>>8)",
             components, tev_scale_table_left[scale],
-            ((scale == TevScale::Divide2) == alpha) ? tev_lerp_bias[op] : "");
+            (scale != TevScale::Divide2) ? tev_lerp_bias[op] : "");
   out.Write("){}", tev_scale_table_right[scale]);
 }
 
diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp
index ebf4afa55f..0ce13725d6 100644
--- a/Source/Core/VideoCommon/UberShaderPixel.cpp
+++ b/Source/Core/VideoCommon/UberShaderPixel.cpp
@@ -334,8 +334,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
   const auto WriteTevLerp = [&out](std::string_view components) {
     out.Write(
         "// TEV's Linear Interpolate, plus bias, add/subtract and scale\n"
-        "int{0} tevLerp{0}(int{0} A, int{0} B, int{0} C, int{0} D, uint bias, bool op, bool alpha, "
-        "uint shift) {{\n"
+        "int{0} tevLerp{0}(int{0} A, int{0} B, int{0} C, int{0} D, uint bias, bool op, "
+        "uint scale) {{\n"
         " // Scale C from 0..255 to 0..256\n"
         "  C += C >> 7;\n"
         "\n"
@@ -344,12 +344,14 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
         "  else if (bias == 2u) D -= 128;\n"
         "\n"
         "  int{0} lerp = (A << 8) + (B - A)*C;\n"
-        "  if (shift != 3u) {{\n"
-        "    lerp = lerp << shift;\n"
-        "    D = D << shift;\n"
+        "  if (scale != 3u) {{\n"
+        "    lerp = lerp << scale;\n"
+        "    D = D << scale;\n"
         "  }}\n"
         "\n"
-        "  if ((shift == 3u) == alpha)\n"
+        "  // TODO: Is this rounding bias still added when the scale is divide by 2?  Currently we "
+        "do not apply it.\n"
+        "  if (scale != 3u)\n"
         "    lerp = lerp + (op ? 127 : 128);\n"
         "\n"
         "  int{0} result = lerp >> 8;\n"
@@ -360,9 +362,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
         "  else // Add\n"
         "    result = D + result;\n"
         "\n"
-        "  // Most of the Shift was moved inside the lerp for improved precision\n"
+        "  // Most of the Scale was moved inside the lerp for improved precision\n"
         "  // But we still do the divide by 2 here\n"
-        "  if (shift == 3u)\n"
+        "  if (scale == 3u)\n"
         "    result = result >> 1;\n"
         "  return result;\n"
         "}}\n\n",
@@ -810,13 +812,13 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
             BitfieldExtract<&TevStageCombiner::ColorCombiner::op>("ss.cc"));
   out.Write("      bool color_clamp = bool({});\n",
             BitfieldExtract<&TevStageCombiner::ColorCombiner::clamp>("ss.cc"));
-  out.Write("      uint color_shift = {};\n",
+  out.Write("      uint color_scale = {};\n",
             BitfieldExtract<&TevStageCombiner::ColorCombiner::scale>("ss.cc"));
   out.Write("      uint color_dest = {};\n",
             BitfieldExtract<&TevStageCombiner::ColorCombiner::dest>("ss.cc"));
 
   out.Write(
-      "      uint color_compare_op = color_shift << 1 | uint(color_op);\n"
+      "      uint color_compare_op = color_scale << 1 | uint(color_op);\n"
       "\n"
       "      int3 color_A = selectColorInput(s, ss, {0}colors_0, {0}colors_1, color_a) & "
       "int3(255, 255, 255);\n"
@@ -831,8 +833,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
   out.Write(
       "      int3 color;\n"
       "      if (color_bias != 3u) {{ // Normal mode\n"
-      "        color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, false, "
-      "color_shift);\n"
+      "        color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, "
+      "color_scale);\n"
       "      }} else {{ // Compare mode\n"
       "        // op 6 and 7 do a select per color channel\n"
       "        if (color_compare_op == 6u) {{\n"
@@ -880,13 +882,13 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
             BitfieldExtract<&TevStageCombiner::AlphaCombiner::op>("ss.ac"));
   out.Write("      bool alpha_clamp = bool({});\n",
             BitfieldExtract<&TevStageCombiner::AlphaCombiner::clamp>("ss.ac"));
-  out.Write("      uint alpha_shift = {};\n",
+  out.Write("      uint alpha_scale = {};\n",
             BitfieldExtract<&TevStageCombiner::AlphaCombiner::scale>("ss.ac"));
   out.Write("      uint alpha_dest = {};\n",
             BitfieldExtract<&TevStageCombiner::AlphaCombiner::dest>("ss.ac"));
 
   out.Write(
-      "      uint alpha_compare_op = alpha_shift << 1 | uint(alpha_op);\n"
+      "      uint alpha_compare_op = alpha_scale << 1 | uint(alpha_op);\n"
       "\n"
       "      int alpha_A;\n"
       "      int alpha_B;\n"
@@ -904,7 +906,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
             "      int alpha;\n"
             "      if (alpha_bias != 3u) {{ // Normal mode\n"
             "        alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, "
-            "true, alpha_shift);\n"
+            "alpha_scale);\n"
             "      }} else {{ // Compare mode\n"
             "        if (alpha_compare_op == 6u) {{\n"
             "          // TevCompareMode::A8, TevComparison::GT\n"

From 444f6fd0cb84a673329cf33eacf8907bf96b5951 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Mon, 24 Jan 2022 22:48:43 -0800
Subject: [PATCH 2/2] Treat alpha as 0 if alpha is 1 for blending

This removes the white box in fortune street again, without causing Mario Kart Wii to regress.
---
 Source/Core/VideoBackends/Software/Tev.cpp  | 13 +++++++++++++
 Source/Core/VideoCommon/PixelShaderGen.cpp  | 12 ++++++++++++
 Source/Core/VideoCommon/UberShaderPixel.cpp |  3 +++
 3 files changed, 28 insertions(+)

diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp
index ee0a789ec6..6905920405 100644
--- a/Source/Core/VideoBackends/Software/Tev.cpp
+++ b/Source/Core/VideoBackends/Software/Tev.cpp
@@ -714,6 +714,19 @@ void Tev::Draw()
   if (!TevAlphaTest(output[ALP_C]))
     return;
 
+  // Hardware testing indicates that an alpha of 1 can pass an alpha test,
+  // but doesn't do anything in blending
+  // This situation is important for Mario Kart Wii's menus (they will render incorrectly if the
+  // alpha test for the FMV in the background fails, since they depend on depth for drawing a yellow
+  // border) and Fortune Street's gameplay (where a rectangle with an alpha value of 1 is drawn over
+  // the center of the screen several times, but those rectangles shouldn't be visible).
+  // Blending seems to result in no changes to the output with an alpha of 1, even if the input
+  // color is white.
+  // TODO: Investigate this further: we might be handling blending incorrectly in general (though
+  // there might not be any good way of changing blending behavior)
+  if (output[ALP_C] == 1)
+    output[ALP_C] = 0;
+
   // z texture
   if (bpmem.ztex2.op != ZTexOp::Disabled)
   {
diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index af21e64156..ddb03f8e96 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -1234,6 +1234,18 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
                    use_dual_source || use_shader_blend);
   }
 
+  // This situation is important for Mario Kart Wii's menus (they will render incorrectly if the
+  // alpha test for the FMV in the background fails, since they depend on depth for drawing a yellow
+  // border) and Fortune Street's gameplay (where a rectangle with an alpha value of 1 is drawn over
+  // the center of the screen several times, but those rectangles shouldn't be visible).
+  // Blending seems to result in no changes to the output with an alpha of 1, even if the input
+  // color is white.
+  // TODO: Investigate this further: we might be handling blending incorrectly in general (though
+  // there might not be any good way of changing blending behavior)
+  out.Write("\t// Hardware testing indicates that an alpha of 1 can pass an alpha test,\n"
+            "\t// but doesn't do anything in blending\n"
+            "\tif (prev.a == 1) prev.a = 0;\n");
+
   if (uid_data->zfreeze)
   {
     out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE);
diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp
index 0ce13725d6..fa6ad7bc75 100644
--- a/Source/Core/VideoCommon/UberShaderPixel.cpp
+++ b/Source/Core/VideoCommon/UberShaderPixel.cpp
@@ -1032,6 +1032,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
             "  }}\n"
             "\n");
 
+  out.Write("  // Hardware testing indicates that an alpha of 1 can pass an alpha test,\n"
+            "  // but doesn't do anything in blending\n"
+            "  if (TevResult.a == 1) TevResult.a = 0;\n");
   // =========
   // Dithering
   // =========