VideoSoftware: Implement xfb copy filter (Deflickering/Brightness)

2025-03-12 06:39:14 +01:00 · 2016-05-18 08:21:02 +12:00 · 2016-05-18 08:21:02 +12:00 · fc96479f12
commit fc96479f12
parent 0cd46f4d21
5 changed files with 116 additions and 37 deletions
--- a/Source/Core/VideoBackends/Software/EfbInterface.cpp
+++ b/Source/Core/VideoBackends/Software/EfbInterface.cpp
@ -469,10 +469,42 @@ u32 GetColor(u16 x, u16 y)
  return GetPixelColor(offset);
 }
-// For internal used only, return a non-normalized value, which saves work later.
+static u32 VerticalFilter(const std::array<u32, 3>& colors,
-yuv444 GetColorYUV(u16 x, u16 y)
+                          const std::array<u8, 7>& filterCoefficients)
 {
  u8 in_colors[3][4];
  std::memcpy(&in_colors, colors.data(), sizeof(in_colors));
  // Alpha channel is not used
  u8 out_color[4];
  out_color[ALP_C] = 0;
  // All Coefficients should sum to 64, otherwise the total brightness will change, which many games
  // do on purpose to implement a brightness filter across the whole copy.
  for (int i = BLU_C; i <= RED_C; i++)
  {
    // TODO: implement support for multisampling.
    // In non-multisampling mode:
    //   * Coefficients 2, 3 and 4 sample from the current pixel.
    //   * Coefficients 0 and 1 sample from the pixel above this one
    //   * Coefficients 5 and 6 sample from the pixel below this one
    int sum =
        in_colors[0][i] * (filterCoefficients[0] + filterCoefficients[1]) +
        in_colors[1][i] * (filterCoefficients[2] + filterCoefficients[3] + filterCoefficients[4]) +
        in_colors[2][i] * (filterCoefficients[5] + filterCoefficients[6]);
    // TODO: this clamping behavior appears to be correct, but isn't confirmed on hardware.
    out_color[i] = std::min(255, sum >> 6);  // clamp larger values to 255
  }
  u32 out_color32;
  std::memcpy(&out_color32, out_color, sizeof(out_color32));
  return out_color32;
 }
 // For internal used only, return a non-normalized value, which saves work later.
 static yuv444 ConvertColorToYUV(u32 color)
 {
  const u32 color = GetColor(x, y);
  const u8 red = static_cast<u8>(color >> 24);
  const u8 green = static_cast<u8>(color >> 16);
  const u8 blue = static_cast<u8>(color >> 8);
@ -497,7 +529,9 @@ u8* GetPixelPointer(u16 x, u16 y, bool depth)
  return &efb[GetColorOffset(x, y)];
 }
-void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale)
+void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale,
               bool clamp_top, bool clamp_bottom, float Gamma,
               const std::array<u8, 7>& filterCoefficients)
 {
  if (!xfb_in_ram)
  {
@ -523,13 +557,29 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec
  source.resize(EFB_WIDTH * EFB_HEIGHT);
  yuv422_packed* src_ptr = &source[0];
-  for (float y = source_rect.top; y < source_rect.bottom; y++)
+  for (int y = source_rect.top; y < source_rect.bottom; y++)
  {
-    // Get a scanline of YUV pixels in 4:4:4 format
+    // Clamping behavior
    //   NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
    //         which returns random garbage from the empty bus (confirmed by hardware tests).
    //
    //         In our implementation, the garbage just so happens to be the top or bottom row.
    //         Statistically, that could happen.
    u16 y_prev = static_cast<u16>(std::max(clamp_top ? source_rect.top : 0, y - 1));
    u16 y_next = static_cast<u16>(std::min(clamp_bottom ? source_rect.bottom : EFB_HEIGHT, y + 1));
    // Get a scanline of YUV pixels in 4:4:4 format
    for (int i = 1, x = left; x < right; i++, x++)
    {
-      scanline[i] = GetColorYUV(x, y);
+      // Get RGB colors
      std::array<u32, 3> colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}};
      // Vertical Filter (Multisampling resolve, deflicker, brightness)
      u32 filtered = VerticalFilter(colors, filterCoefficients);
      // TODO: Gamma correction happens here.
      scanline[i] = ConvertColorToYUV(filtered);
    }
    // Flipper clamps the border colors
--- a/Source/Core/VideoBackends/Software/EfbInterface.h
+++ b/Source/Core/VideoBackends/Software/EfbInterface.h
@ -4,6 +4,8 @@
 #pragma once
 #include <array>
 #include "Common/CommonTypes.h"
 #include "VideoCommon/PerfQueryBase.h"
 #include "VideoCommon/VideoCommon.h"
@ -52,12 +54,13 @@ void SetColor(u16 x, u16 y, u8* color);
 void SetDepth(u16 x, u16 y, u32 depth);
 u32 GetColor(u16 x, u16 y);
 yuv444 GetColorYUV(u16 x, u16 y);
 u32 GetDepth(u16 x, u16 y);
 u8* GetPixelPointer(u16 x, u16 y, bool depth);
-void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale);
+void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale,
               bool clamp_top, bool clamp_bottom, float Gamma,
               const std::array<u8, 7>& filterCoefficients);
 extern u32 perf_values[PQ_NUM_MEMBERS];
 inline void IncPerfCounterQuadCount(PerfQueryType type)
--- a/Source/Core/VideoBackends/Software/TextureEncoder.cpp
+++ b/Source/Core/VideoBackends/Software/TextureEncoder.cpp
@ -1473,7 +1473,11 @@ void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_pe
 {
  if (params.copy_format == EFBCopyFormat::XFB)
  {
-    EfbInterface::EncodeXFB(dst, native_width, src_rect, params.y_scale);
+    static constexpr std::array<float, 4> gamma_LUT = {1.0f, 1.7f, 2.2f, 1.0f};
    EfbInterface::EncodeXFB(dst, native_width, src_rect, params.y_scale,
                            !!bpmem.triggerEFBCopy.clamp_top, !!bpmem.triggerEFBCopy.clamp_bottom,
                            gamma_LUT[bpmem.triggerEFBCopy.gamma],
                            bpmem.copyfilter.GetCoefficients());
  }
  else
  {
--- a/Source/Core/VideoCommon/BPMemory.h
+++ b/Source/Core/VideoCommon/BPMemory.h
@ -4,6 +4,7 @@
 #pragma once
 #include <array>
 #include <string>
 #include "Common/BitField.h"
@ -944,8 +945,8 @@ union UPE_Copy
 {
  u32 Hex;
-  BitField<0, 1, u32> clamp0;               // if set clamp top
+  BitField<0, 1, u32> clamp_top;            // if set clamp top
-  BitField<1, 1, u32> clamp1;               // if set clamp bottom
+  BitField<1, 1, u32> clamp_bottom;         // if set clamp bottom
  BitField<2, 1, u32> yuv;                  // if set, color conversion from RGB to YUV
  BitField<3, 4, u32> target_pixel_format;  // realformat is (fmt/2)+((fmt&1)*8).... for some reason
                                            // the msb is the lsb (pattern: cycling right shift)
@ -967,6 +968,27 @@ union UPE_Copy
  }
 };
 union CopyFilterCoefficients
 {
  u64 Hex;
  BitField<0, 6, u64> w0;
  BitField<6, 6, u64> w1;
  BitField<12, 6, u64> w2;
  BitField<18, 6, u64> w3;
  BitField<32, 6, u64> w4;
  BitField<38, 6, u64> w5;
  BitField<44, 6, u64> w6;
  std::array<u8, 7> GetCoefficients() const
  {
    return {
        static_cast<u8>(w0), static_cast<u8>(w1), static_cast<u8>(w2), static_cast<u8>(w3),
        static_cast<u8>(w4), static_cast<u8>(w5), static_cast<u8>(w6),
    };
  }
 };
 union BPU_PreloadTileInfo
 {
  u32 hex;
@ -1036,29 +1058,29 @@ struct BPMemory
                                 // 2 channel, 16 when dest is RGBA
  // also, doubles whenever mipmap box filter option is set (excent on RGBA). Probably to do with
  // number of bytes to look at when smoothing
-  u32 dispcopyyscale;              // 4e
+  u32 dispcopyyscale;                 // 4e
-  u32 clearcolorAR;                // 4f
+  u32 clearcolorAR;                   // 4f
-  u32 clearcolorGB;                // 50
+  u32 clearcolorGB;                   // 50
-  u32 clearZValue;                 // 51
+  u32 clearZValue;                    // 51
-  UPE_Copy triggerEFBCopy;         // 52
+  UPE_Copy triggerEFBCopy;            // 52
-  u32 copyfilter[2];               // 53,54
+  CopyFilterCoefficients copyfilter;  // 53,54
-  u32 boundbox0;                   // 55
+  u32 boundbox0;                      // 55
-  u32 boundbox1;                   // 56
+  u32 boundbox1;                      // 56
-  u32 unknown7[2];                 // 57,58
+  u32 unknown7[2];                    // 57,58
-  X10Y10 scissorOffset;            // 59
+  X10Y10 scissorOffset;               // 59
-  u32 unknown8[6];                 // 5a,5b,5c,5d, 5e,5f
+  u32 unknown8[6];                    // 5a,5b,5c,5d, 5e,5f
-  BPS_TmemConfig tmem_config;      // 60-66
+  BPS_TmemConfig tmem_config;         // 60-66
-  u32 metric;                      // 67
+  u32 metric;                         // 67
-  FieldMode fieldmode;             // 68
+  FieldMode fieldmode;                // 68
-  u32 unknown10[7];                // 69-6F
+  u32 unknown10[7];                   // 69-6F
-  u32 unknown11[16];               // 70-7F
+  u32 unknown11[16];                  // 70-7F
-  FourTexUnits tex[2];             // 80-bf
+  FourTexUnits tex[2];                // 80-bf
-  TevStageCombiner combiners[16];  // 0xC0-0xDF
+  TevStageCombiner combiners[16];     // 0xC0-0xDF
-  TevReg tevregs[4];               // 0xE0
+  TevReg tevregs[4];                  // 0xE0
-  FogRangeParams fogRange;         // 0xE8
+  FogRangeParams fogRange;            // 0xE8
-  FogParams fog;                   // 0xEE,0xEF,0xF0,0xF1,0xF2
+  FogParams fog;                      // 0xEE,0xEF,0xF0,0xF1,0xF2
-  AlphaTest alpha_test;            // 0xF3
+  AlphaTest alpha_test;               // 0xF3
-  ZTex1 ztex1;                     // 0xf4,0xf5
+  ZTex1 ztex1;                        // 0xf4,0xf5
  ZTex2 ztex2;
  TevKSel tevksel[8];  // 0xf6,0xf7,f8,f9,fa,fb,fc,fd
  u32 bpMask;          // 0xFE
--- a/Source/Core/VideoCommon/BPStructs.cpp
+++ b/Source/Core/VideoCommon/BPStructs.cpp
@ -1015,9 +1015,9 @@ void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
        "Copy to XFB: %s\n"
        "Intensity format: %s\n"
        "Automatic color conversion: %s",
-        (copy.clamp0 && copy.clamp1) ?
+        (copy.clamp_top && copy.clamp_bottom) ?
            "Top and Bottom" :
-            (copy.clamp0) ? "Top only" : (copy.clamp1) ? "Bottom only" : "None",
+            (copy.clamp_top) ? "Top only" : (copy.clamp_bottom) ? "Bottom only" : "None",
        no_yes[copy.yuv], static_cast<int>(copy.tp_realFormat()),
        (copy.gamma == 0) ?
            "1.0" :