Restructure parameters to TetxureConverterShaderGen/TextureConversionShader

This will be used for later refactoring for increased accuracy.
2025-04-13 09:51:31 +02:00 · 2022-02-07 13:37:28 -08:00 · 2022-02-07 13:37:28 -08:00 · 791bd16b28
commit 791bd16b28
parent d20094efa2
7 changed files with 89 additions and 76 deletions
--- a/Source/Core/VideoBackends/Null/TextureCache.h
+++ b/Source/Core/VideoBackends/Null/TextureCache.h
@ -14,7 +14,7 @@ protected:
               u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
               const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
               float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
-               const EFBCopyFilterCoefficients& filter_coefficients) override
+               const std::array<u32, 3>& filter_coefficients) override
  {
  }

@ -22,7 +22,7 @@ protected:
                           const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
                           bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
                           float gamma, bool clamp_top, bool clamp_bottom,
-                           const EFBCopyFilterCoefficients& filter_coefficients) override
+                           const std::array<u32, 3>& filter_coefficients) override
  {
  }
 };
--- a/Source/Core/VideoBackends/Software/TextureCache.h
+++ b/Source/Core/VideoBackends/Software/TextureCache.h
@ -14,7 +14,7 @@ protected:
               u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
               const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
               float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
-               const EFBCopyFilterCoefficients& filter_coefficients) override
+               const std::array<u32, 3>& filter_coefficients) override
  {
    TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
                           src_rect, scale_by_half, y_scale, gamma);
@ -23,7 +23,7 @@ protected:
                           const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
                           bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
                           float gamma, bool clamp_top, bool clamp_bottom,
-                           const EFBCopyFilterCoefficients& filter_coefficients) override
+                           const std::array<u32, 3>& filter_coefficients) override
  {
    // TODO: If we ever want to "fake" vram textures, we would need to implement this
  }
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@ -1978,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
  }
 }

-EFBCopyFilterCoefficients
+std::array<u32, 3>
 TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
 {
  // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
  // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
-  return EFBCopyFilterCoefficients{
-      static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
-          64.0f,
-      static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
-                         static_cast<u32>(coefficients[4])) /
-          64.0f,
-      static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
-          64.0f,
+  return {
+      static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
+      static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
+          static_cast<u32>(coefficients[4]),
+      static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6]),
  };
 }

-EFBCopyFilterCoefficients
+std::array<u32, 3>
 TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
 {
  // If the user disables the copy filter, only apply it to the VRAM copy.
  // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
-  EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients);
+  std::array<u32, 3> res = GetRAMCopyFilterCoefficients(coefficients);
  if (!g_ActiveConfig.bDisableCopyFilter)
    return res;

  // Disabling the copy filter in options should not ignore the values the game sets completely,
  // as some games use the filter coefficients to control the brightness of the screen. Instead,
  // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
-  res.middle = res.upper + res.middle + res.lower;
-  res.upper = 0.0f;
-  res.lower = 0.0f;
+  res[1] = res[0] + res[1] + res[2];
+  res[0] = 0;
+  res[2] = 0;
  return res;
 }

-bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients)
+bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients)
 {
  // If the top/bottom coefficients are zero, no point sampling/blending from these rows.
-  return coefficients.upper != 0 || coefficients.lower != 0;
+  return coefficients[0] != 0 || coefficients[2] != 0;
+}
+
+bool TextureCacheBase::CopyFilterCanOverflow(const std::array<u32, 3>& coefficients)
+{
+  // Normally, the copy filter coefficients will sum to at most 64.  If the sum is higher than that,
+  // colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping
+  // breaks (as colors end up >= 512, which wraps back to 0).
+  return coefficients[0] + coefficients[1] + coefficients[2] >= 128;
 }

 void TextureCacheBase::CopyRenderTargetToTexture(
@ -2255,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture(

  if (copy_to_ram)
  {
-    EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
+    const std::array<u32, 3> coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
    PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
    EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
-                         NeedsCopyFilterInShader(coefficients));
+                         AllCopyFilterCoefsNeeded(coefficients),
+                         CopyFilterCanOverflow(coefficients), gamma != 1.0);

    std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
    if (staging_texture)
@ -2716,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
                                           bool scale_by_half, bool linear_filter,
                                           EFBCopyFormat dst_format, bool is_intensity, float gamma,
                                           bool clamp_top, bool clamp_bottom,
-                                           const EFBCopyFilterCoefficients& filter_coefficients)
+                                           const std::array<u32, 3>& filter_coefficients)
 {
  // Flush EFB pokes first, as they're expected to be included.
  g_framebuffer_manager->FlushEFBPokes();

  // Get the pipeline which we will be using. If the compilation failed, this will be null.
-  const AbstractPipeline* copy_pipeline =
-      g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
-          dst_format, is_depth_copy, is_intensity, scale_by_half,
-          NeedsCopyFilterInShader(filter_coefficients)));
+  const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline(
+      TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
+                                               scale_by_half, 1.0f / gamma, filter_coefficients));
  if (!copy_pipeline)
  {
    WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
@ -2746,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
  struct Uniforms
  {
    float src_left, src_top, src_width, src_height;
-    float filter_coefficients[3];
+    std::array<u32, 3> filter_coefficients;
    float gamma_rcp;
    float clamp_top;
    float clamp_bottom;
@ -2761,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
  uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
  uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
  uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
-  uniforms.filter_coefficients[0] = filter_coefficients.upper;
-  uniforms.filter_coefficients[1] = filter_coefficients.middle;
-  uniforms.filter_coefficients[2] = filter_coefficients.lower;
+  uniforms.filter_coefficients = filter_coefficients;
  uniforms.gamma_rcp = 1.0f / gamma;
  //   NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
  //         which returns random garbage from the empty bus (confirmed by hardware tests).
@ -2795,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
                               u32 memory_stride, const MathUtil::Rectangle<int>& src_rect,
                               bool scale_by_half, bool linear_filter, float y_scale, float gamma,
                               bool clamp_top, bool clamp_bottom,
-                               const EFBCopyFilterCoefficients& filter_coefficients)
+                               const std::array<u32, 3>& filter_coefficients)
 {
  // Flush EFB pokes first, as they're expected to be included.
  g_framebuffer_manager->FlushEFBPokes();
@ -2826,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
    float gamma_rcp;
    float clamp_top;
    float clamp_bottom;
-    float filter_coefficients[3];
+    std::array<u32, 3> filter_coefficients;
    u32 padding;
  };
  Uniforms encoder_params;
@ -2847,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
  encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height;
  const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1;
  encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height;
-  encoder_params.filter_coefficients[0] = filter_coefficients.upper;
-  encoder_params.filter_coefficients[1] = filter_coefficients.middle;
-  encoder_params.filter_coefficients[2] = filter_coefficients.lower;
+  encoder_params.filter_coefficients = filter_coefficients;
  g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));

  // Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.
--- a/Source/Core/VideoCommon/TextureCacheBase.h
+++ b/Source/Core/VideoCommon/TextureCacheBase.h
@ -57,23 +57,30 @@ struct TextureAndTLUTFormat
 struct EFBCopyParams
 {
  EFBCopyParams(PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, bool yuv_,
-                bool copy_filter_)
+                bool all_copy_filter_coefs_needed_, bool copy_filter_can_overflow_,
+                bool apply_gamma_)
      : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
-        copy_filter(copy_filter_)
+        all_copy_filter_coefs_needed(all_copy_filter_coefs_needed_),
+        copy_filter_can_overflow(copy_filter_can_overflow_), apply_gamma(apply_gamma_)
  {
  }

  bool operator<(const EFBCopyParams& rhs) const
  {
-    return std::tie(efb_format, copy_format, depth, yuv, copy_filter) <
-           std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter);
+    return std::tie(efb_format, copy_format, depth, yuv, all_copy_filter_coefs_needed,
+                    copy_filter_can_overflow,
+                    apply_gamma) < std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv,
+                                            rhs.all_copy_filter_coefs_needed,
+                                            rhs.copy_filter_can_overflow, rhs.apply_gamma);
  }

  PixelFormat efb_format;
  EFBCopyFormat copy_format;
  bool depth;
  bool yuv;
-  bool copy_filter;
+  bool all_copy_filter_coefs_needed;
+  bool copy_filter_can_overflow;
+  bool apply_gamma;
 };

 template <>
@ -89,19 +96,13 @@ struct fmt::formatter<EFBCopyParams>
    else
      copy_format = fmt::to_string(uid.copy_format);
    return fmt::format_to(ctx.out(),
-                          "format: {}, copy format: {}, depth: {}, yuv: {}, copy filter: {}",
-                          uid.efb_format, copy_format, uid.depth, uid.yuv, uid.copy_filter);
+                          "format: {}, copy format: {}, depth: {}, yuv: {}, apply_gamma: {}, "
+                          "all_copy_filter_coefs_needed: {}, copy_filter_can_overflow: {}",
+                          uid.efb_format, copy_format, uid.depth, uid.yuv, uid.apply_gamma,
+                          uid.all_copy_filter_coefs_needed, uid.copy_filter_can_overflow);
  }
 };

-// Reduced version of the full coefficient array, with a single value for each row.
-struct EFBCopyFilterCoefficients
-{
-  float upper;
-  float middle;
-  float lower;
-};
-
 class TextureCacheBase
 {
 private:
@ -267,8 +268,8 @@ public:
  // Save States
  void DoState(PointerWrap& p);

-  // Returns false if the top/bottom row coefficients are zero.
-  static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients);
+  static bool AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients);
+  static bool CopyFilterCanOverflow(const std::array<u32, 3>& coefficients);

 protected:
  // Decodes the specified data to the GPU texture specified by entry.
@ -285,12 +286,12 @@ protected:
                       u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
                       const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
                       bool linear_filter, float y_scale, float gamma, bool clamp_top,
-                       bool clamp_bottom, const EFBCopyFilterCoefficients& filter_coefficients);
+                       bool clamp_bottom, const std::array<u32, 3>& filter_coefficients);
  virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
                                   const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
                                   bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
                                   float gamma, bool clamp_top, bool clamp_bottom,
-                                   const EFBCopyFilterCoefficients& filter_coefficients);
+                                   const std::array<u32, 3>& filter_coefficients);

  alignas(16) u8* temp = nullptr;
  size_t temp_size = 0;
@ -338,9 +339,9 @@ private:
  void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);

  // Precomputing the coefficients for the previous, current, and next lines for the copy filter.
-  static EFBCopyFilterCoefficients
+  static std::array<u32, 3>
  GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
-  static EFBCopyFilterCoefficients
+  static std::array<u32, 3>
  GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);

  // Flushes a pending EFB copy to RAM from the host to the guest RAM.
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@ -62,7 +62,7 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
             "  float y_scale;\n"
             "  float gamma_rcp;\n"
             "  float2 clamp_tb;\n"
-             "  float3 filter_coefficients;\n"
+             "  uint3 filter_coefficients;\n"
             "}};\n");
  if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
  {
@ -151,7 +151,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
  // The filter is only applied to the RGB channels, the alpha channel is left intact.
  code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"
             "{{\n");
-  if (params.copy_filter)
+  if (params.all_copy_filter_coefs_needed)
  {
    code.Write("  float4 prev_row = ");
    WriteSampleOp(-1);
@ -162,9 +162,9 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
               "  float4 next_row = ");
    WriteSampleOp(1);
    code.Write(";\n"
-               "  return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
-               "                      current_row.rgb * filter_coefficients[1] +\n"
-               "                      next_row.rgb * filter_coefficients[2], \n"
+               "  return float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n"
+               "                      current_row.rgb * filter_coefficients[1] / 64.0 +\n"
+               "                      next_row.rgb * filter_coefficients[2] / 64.0, \n"
               "                    float3(1, 1, 1)), current_row.a);\n");
  }
  else
@ -172,7 +172,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
    code.Write("  float4 current_row = ");
    WriteSampleOp(0);
    code.Write(";\n"
-               "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
+               "return float4(min(current_row.rgb * filter_coefficients[1] / 64.0, float3(1, 1, 1)),\n"
               "              current_row.a);\n");
  }
  code.Write("}}\n");
--- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp
+++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp
@ -6,13 +6,15 @@
 #include "Common/Assert.h"
 #include "Common/CommonTypes.h"
 #include "VideoCommon/BPMemory.h"
+#include "VideoCommon/TextureCacheBase.h"
 #include "VideoCommon/VideoCommon.h"
 #include "VideoCommon/VideoConfig.h"

 namespace TextureConversionShaderGen
 {
 TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
-                         bool scale_by_half, bool copy_filter)
+                         bool scale_by_half, float gamma_rcp,
+                         const std::array<u32, 3>& filter_coefficients)
 {
  TCShaderUid out;

@ -22,7 +24,11 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
  uid_data->is_depth_copy = is_depth_copy;
  uid_data->is_intensity = is_intensity;
  uid_data->scale_by_half = scale_by_half;
-  uid_data->copy_filter = copy_filter;
+  uid_data->all_copy_filter_coefs_needed =
+      TextureCacheBase::AllCopyFilterCoefsNeeded(filter_coefficients);
+  uid_data->copy_filter_can_overflow = TextureCacheBase::CopyFilterCanOverflow(filter_coefficients);
+  // If the gamma is needed, then include that too.
+  uid_data->apply_gamma = gamma_rcp != 1.0f;

  return out;
 }
@ -31,7 +37,7 @@ static void WriteHeader(APIType api_type, ShaderCode& out)
 {
  out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
            "  float2 src_offset, src_size;\n"
-            "  float3 filter_coefficients;\n"
+            "  uint3 filter_coefficients;\n"
            "  float gamma_rcp;\n"
            "  float2 clamp_tb;\n"
            "  float pixel_height;\n"
@ -98,22 +104,22 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)

  // The copy filter applies to both color and depth copies. This has been verified on hardware.
  // The filter is only applied to the RGB channels, the alpha channel is left intact.
-  if (uid_data->copy_filter)
+  if (uid_data->all_copy_filter_coefs_needed)
  {
    out.Write("  float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
              "  float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
              "  float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
-              "  float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
-              "                               current_row.rgb * filter_coefficients[1] +\n"
-              "                               next_row.rgb * filter_coefficients[2], \n"
+              "  float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n"
+              "                               current_row.rgb * filter_coefficients[1] / 64.0 +\n"
+              "                               next_row.rgb * filter_coefficients[2] / 64.0, \n"
              "                             float3(1, 1, 1)), current_row.a);\n");
  }
  else
  {
    out.Write(
        "  float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
-        "  float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
-        "                         current_row.a);\n");
+        "  float4 texcol = float4(min(current_row.rgb * filter_coefficients[1] / 64.0,\n"
+        "                         float3(1, 1, 1)), current_row.a);\n");
  }

  if (uid_data->is_depth_copy)
--- a/Source/Core/VideoCommon/TextureConverterShaderGen.h
+++ b/Source/Core/VideoCommon/TextureConverterShaderGen.h
@ -25,7 +25,9 @@ struct UidData
  u32 is_depth_copy : 1;
  u32 is_intensity : 1;
  u32 scale_by_half : 1;
-  u32 copy_filter : 1;
+  u32 all_copy_filter_coefs_needed : 1;
+  u32 copy_filter_can_overflow : 1;
+  u32 apply_gamma : 1;
 };
 #pragma pack()

@ -35,7 +37,8 @@ ShaderCode GenerateVertexShader(APIType api_type);
 ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data);

 TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
-                         bool scale_by_half, bool copy_filter);
+                         bool scale_by_half, float gamma_rcp,
+                         const std::array<u32, 3>& filter_coefficients);

 }  // namespace TextureConversionShaderGen

@ -53,8 +56,10 @@ struct fmt::formatter<TextureConversionShaderGen::UidData>
      dst_format = fmt::to_string(uid.dst_format);
    return fmt::format_to(ctx.out(),
                          "dst_format: {}, efb_has_alpha: {}, is_depth_copy: {}, is_intensity: {}, "
-                          "scale_by_half: {}, copy_filter: {}",
+                          "scale_by_half: {}, all_copy_filter_coefs_needed: {}, "
+                          "copy_filter_can_overflow: {}, apply_gamma: {}",
                          dst_format, uid.efb_has_alpha, uid.is_depth_copy, uid.is_intensity,
-                          uid.scale_by_half, uid.copy_filter);
+                          uid.scale_by_half, uid.all_copy_filter_coefs_needed,
+                          uid.copy_filter_can_overflow, uid.apply_gamma);
  }
 };