diff --git a/Source/Core/VideoBackends/Software/EfbInterface.cpp b/Source/Core/VideoBackends/Software/EfbInterface.cpp index a94ac467b8..12c208f70c 100644 --- a/Source/Core/VideoBackends/Software/EfbInterface.cpp +++ b/Source/Core/VideoBackends/Software/EfbInterface.cpp @@ -469,10 +469,42 @@ u32 GetColor(u16 x, u16 y) return GetPixelColor(offset); } -// For internal used only, return a non-normalized value, which saves work later. -yuv444 GetColorYUV(u16 x, u16 y) +static u32 VerticalFilter(const std::array& colors, + const std::array& filterCoefficients) +{ + u8 in_colors[3][4]; + std::memcpy(&in_colors, colors.data(), sizeof(in_colors)); + + // Alpha channel is not used + u8 out_color[4]; + out_color[ALP_C] = 0; + + // All Coefficients should sum to 64, otherwise the total brightness will change, which many games + // do on purpose to implement a brightness filter across the whole copy. + for (int i = BLU_C; i <= RED_C; i++) + { + // TODO: implement support for multisampling. + // In non-multisampling mode: + // * Coefficients 2, 3 and 4 sample from the current pixel. + // * Coefficients 0 and 1 sample from the pixel above this one + // * Coefficients 5 and 6 sample from the pixel below this one + int sum = + in_colors[0][i] * (filterCoefficients[0] + filterCoefficients[1]) + + in_colors[1][i] * (filterCoefficients[2] + filterCoefficients[3] + filterCoefficients[4]) + + in_colors[2][i] * (filterCoefficients[5] + filterCoefficients[6]); + + // TODO: this clamping behavior appears to be correct, but isn't confirmed on hardware. + out_color[i] = std::min(255, sum >> 6); // clamp larger values to 255 + } + + u32 out_color32; + std::memcpy(&out_color32, out_color, sizeof(out_color32)); + return out_color32; +} + +// For internal used only, return a non-normalized value, which saves work later. +static yuv444 ConvertColorToYUV(u32 color) { - const u32 color = GetColor(x, y); const u8 red = static_cast(color >> 24); const u8 green = static_cast(color >> 16); const u8 blue = static_cast(color >> 8); @@ -497,7 +529,9 @@ u8* GetPixelPointer(u16 x, u16 y, bool depth) return &efb[GetColorOffset(x, y)]; } -void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale) +void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale, + bool clamp_top, bool clamp_bottom, float Gamma, + const std::array& filterCoefficients) { if (!xfb_in_ram) { @@ -523,13 +557,29 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec source.resize(EFB_WIDTH * EFB_HEIGHT); yuv422_packed* src_ptr = &source[0]; - for (float y = source_rect.top; y < source_rect.bottom; y++) + for (int y = source_rect.top; y < source_rect.bottom; y++) { - // Get a scanline of YUV pixels in 4:4:4 format + // Clamping behavior + // NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB, + // which returns random garbage from the empty bus (confirmed by hardware tests). + // + // In our implementation, the garbage just so happens to be the top or bottom row. + // Statistically, that could happen. + u16 y_prev = static_cast(std::max(clamp_top ? source_rect.top : 0, y - 1)); + u16 y_next = static_cast(std::min(clamp_bottom ? source_rect.bottom : EFB_HEIGHT, y + 1)); + // Get a scanline of YUV pixels in 4:4:4 format for (int i = 1, x = left; x < right; i++, x++) { - scanline[i] = GetColorYUV(x, y); + // Get RGB colors + std::array colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}}; + + // Vertical Filter (Multisampling resolve, deflicker, brightness) + u32 filtered = VerticalFilter(colors, filterCoefficients); + + // TODO: Gamma correction happens here. + + scanline[i] = ConvertColorToYUV(filtered); } // Flipper clamps the border colors diff --git a/Source/Core/VideoBackends/Software/EfbInterface.h b/Source/Core/VideoBackends/Software/EfbInterface.h index a95d6b8aeb..9d0706a83c 100644 --- a/Source/Core/VideoBackends/Software/EfbInterface.h +++ b/Source/Core/VideoBackends/Software/EfbInterface.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "Common/CommonTypes.h" #include "VideoCommon/PerfQueryBase.h" #include "VideoCommon/VideoCommon.h" @@ -52,12 +54,13 @@ void SetColor(u16 x, u16 y, u8* color); void SetDepth(u16 x, u16 y, u32 depth); u32 GetColor(u16 x, u16 y); -yuv444 GetColorYUV(u16 x, u16 y); u32 GetDepth(u16 x, u16 y); u8* GetPixelPointer(u16 x, u16 y, bool depth); -void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale); +void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale, + bool clamp_top, bool clamp_bottom, float Gamma, + const std::array& filterCoefficients); extern u32 perf_values[PQ_NUM_MEMBERS]; inline void IncPerfCounterQuadCount(PerfQueryType type) diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.cpp b/Source/Core/VideoBackends/Software/TextureEncoder.cpp index 751b148e18..5308dca705 100644 --- a/Source/Core/VideoBackends/Software/TextureEncoder.cpp +++ b/Source/Core/VideoBackends/Software/TextureEncoder.cpp @@ -1473,7 +1473,11 @@ void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_pe { if (params.copy_format == EFBCopyFormat::XFB) { - EfbInterface::EncodeXFB(dst, native_width, src_rect, params.y_scale); + static constexpr std::array gamma_LUT = {1.0f, 1.7f, 2.2f, 1.0f}; + EfbInterface::EncodeXFB(dst, native_width, src_rect, params.y_scale, + !!bpmem.triggerEFBCopy.clamp_top, !!bpmem.triggerEFBCopy.clamp_bottom, + gamma_LUT[bpmem.triggerEFBCopy.gamma], + bpmem.copyfilter.GetCoefficients()); } else { diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 0157b5aed3..4a1cea9d48 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "Common/BitField.h" @@ -944,8 +945,8 @@ union UPE_Copy { u32 Hex; - BitField<0, 1, u32> clamp0; // if set clamp top - BitField<1, 1, u32> clamp1; // if set clamp bottom + BitField<0, 1, u32> clamp_top; // if set clamp top + BitField<1, 1, u32> clamp_bottom; // if set clamp bottom BitField<2, 1, u32> yuv; // if set, color conversion from RGB to YUV BitField<3, 4, u32> target_pixel_format; // realformat is (fmt/2)+((fmt&1)*8).... for some reason // the msb is the lsb (pattern: cycling right shift) @@ -967,6 +968,27 @@ union UPE_Copy } }; +union CopyFilterCoefficients +{ + u64 Hex; + + BitField<0, 6, u64> w0; + BitField<6, 6, u64> w1; + BitField<12, 6, u64> w2; + BitField<18, 6, u64> w3; + BitField<32, 6, u64> w4; + BitField<38, 6, u64> w5; + BitField<44, 6, u64> w6; + + std::array GetCoefficients() const + { + return { + static_cast(w0), static_cast(w1), static_cast(w2), static_cast(w3), + static_cast(w4), static_cast(w5), static_cast(w6), + }; + } +}; + union BPU_PreloadTileInfo { u32 hex; @@ -1036,29 +1058,29 @@ struct BPMemory // 2 channel, 16 when dest is RGBA // also, doubles whenever mipmap box filter option is set (excent on RGBA). Probably to do with // number of bytes to look at when smoothing - u32 dispcopyyscale; // 4e - u32 clearcolorAR; // 4f - u32 clearcolorGB; // 50 - u32 clearZValue; // 51 - UPE_Copy triggerEFBCopy; // 52 - u32 copyfilter[2]; // 53,54 - u32 boundbox0; // 55 - u32 boundbox1; // 56 - u32 unknown7[2]; // 57,58 - X10Y10 scissorOffset; // 59 - u32 unknown8[6]; // 5a,5b,5c,5d, 5e,5f - BPS_TmemConfig tmem_config; // 60-66 - u32 metric; // 67 - FieldMode fieldmode; // 68 - u32 unknown10[7]; // 69-6F - u32 unknown11[16]; // 70-7F - FourTexUnits tex[2]; // 80-bf - TevStageCombiner combiners[16]; // 0xC0-0xDF - TevReg tevregs[4]; // 0xE0 - FogRangeParams fogRange; // 0xE8 - FogParams fog; // 0xEE,0xEF,0xF0,0xF1,0xF2 - AlphaTest alpha_test; // 0xF3 - ZTex1 ztex1; // 0xf4,0xf5 + u32 dispcopyyscale; // 4e + u32 clearcolorAR; // 4f + u32 clearcolorGB; // 50 + u32 clearZValue; // 51 + UPE_Copy triggerEFBCopy; // 52 + CopyFilterCoefficients copyfilter; // 53,54 + u32 boundbox0; // 55 + u32 boundbox1; // 56 + u32 unknown7[2]; // 57,58 + X10Y10 scissorOffset; // 59 + u32 unknown8[6]; // 5a,5b,5c,5d, 5e,5f + BPS_TmemConfig tmem_config; // 60-66 + u32 metric; // 67 + FieldMode fieldmode; // 68 + u32 unknown10[7]; // 69-6F + u32 unknown11[16]; // 70-7F + FourTexUnits tex[2]; // 80-bf + TevStageCombiner combiners[16]; // 0xC0-0xDF + TevReg tevregs[4]; // 0xE0 + FogRangeParams fogRange; // 0xE8 + FogParams fog; // 0xEE,0xEF,0xF0,0xF1,0xF2 + AlphaTest alpha_test; // 0xF3 + ZTex1 ztex1; // 0xf4,0xf5 ZTex2 ztex2; TevKSel tevksel[8]; // 0xf6,0xf7,f8,f9,fa,fb,fc,fd u32 bpMask; // 0xFE diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 4647c3de4e..143132ecaf 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -1015,9 +1015,9 @@ void GetBPRegInfo(const u8* data, std::string* name, std::string* desc) "Copy to XFB: %s\n" "Intensity format: %s\n" "Automatic color conversion: %s", - (copy.clamp0 && copy.clamp1) ? + (copy.clamp_top && copy.clamp_bottom) ? "Top and Bottom" : - (copy.clamp0) ? "Top only" : (copy.clamp1) ? "Bottom only" : "None", + (copy.clamp_top) ? "Top only" : (copy.clamp_bottom) ? "Bottom only" : "None", no_yes[copy.yuv], static_cast(copy.tp_realFormat()), (copy.gamma == 0) ? "1.0" :