From 791bd16b281d3b123a2b9cf2d5c215d4ab235c1d Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Mon, 7 Feb 2022 13:37:28 -0800 Subject: [PATCH] Restructure parameters to TetxureConverterShaderGen/TextureConversionShader This will be used for later refactoring for increased accuracy. --- Source/Core/VideoBackends/Null/TextureCache.h | 4 +- .../VideoBackends/Software/TextureCache.h | 4 +- Source/Core/VideoCommon/TextureCacheBase.cpp | 65 ++++++++++--------- Source/Core/VideoCommon/TextureCacheBase.h | 43 ++++++------ .../VideoCommon/TextureConversionShader.cpp | 12 ++-- .../VideoCommon/TextureConverterShaderGen.cpp | 24 ++++--- .../VideoCommon/TextureConverterShaderGen.h | 13 ++-- 7 files changed, 89 insertions(+), 76 deletions(-) diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h index 5b2b73c365..2b95586f44 100644 --- a/Source/Core/VideoBackends/Null/TextureCache.h +++ b/Source/Core/VideoBackends/Null/TextureCache.h @@ -14,7 +14,7 @@ protected: u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) override + const std::array& filter_coefficients) override { } @@ -22,7 +22,7 @@ protected: const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) override + const std::array& filter_coefficients) override { } }; diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h index 9ffa8fa4f4..a7d241197f 100644 --- a/Source/Core/VideoBackends/Software/TextureCache.h +++ b/Source/Core/VideoBackends/Software/TextureCache.h @@ -14,7 +14,7 @@ protected: u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) override + const std::array& filter_coefficients) override { TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half, y_scale, gamma); @@ -23,7 +23,7 @@ protected: const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) override + const std::array& filter_coefficients) override { // TODO: If we ever want to "fake" vram textures, we would need to implement this } diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index a14c63af5b..2f896027ad 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -1978,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry) } } -EFBCopyFilterCoefficients +std::array TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) { // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. - return EFBCopyFilterCoefficients{ - static_cast(static_cast(coefficients[0]) + static_cast(coefficients[1])) / - 64.0f, - static_cast(static_cast(coefficients[2]) + static_cast(coefficients[3]) + - static_cast(coefficients[4])) / - 64.0f, - static_cast(static_cast(coefficients[5]) + static_cast(coefficients[6])) / - 64.0f, + return { + static_cast(coefficients[0]) + static_cast(coefficients[1]), + static_cast(coefficients[2]) + static_cast(coefficients[3]) + + static_cast(coefficients[4]), + static_cast(coefficients[5]) + static_cast(coefficients[6]), }; } -EFBCopyFilterCoefficients +std::array TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) { // If the user disables the copy filter, only apply it to the VRAM copy. // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. - EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients); + std::array res = GetRAMCopyFilterCoefficients(coefficients); if (!g_ActiveConfig.bDisableCopyFilter) return res; // Disabling the copy filter in options should not ignore the values the game sets completely, // as some games use the filter coefficients to control the brightness of the screen. Instead, // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect. - res.middle = res.upper + res.middle + res.lower; - res.upper = 0.0f; - res.lower = 0.0f; + res[1] = res[0] + res[1] + res[2]; + res[0] = 0; + res[2] = 0; return res; } -bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients) +bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array& coefficients) { // If the top/bottom coefficients are zero, no point sampling/blending from these rows. - return coefficients.upper != 0 || coefficients.lower != 0; + return coefficients[0] != 0 || coefficients[2] != 0; +} + +bool TextureCacheBase::CopyFilterCanOverflow(const std::array& coefficients) +{ + // Normally, the copy filter coefficients will sum to at most 64. If the sum is higher than that, + // colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping + // breaks (as colors end up >= 512, which wraps back to 0). + return coefficients[0] + coefficients[1] + coefficients[2] >= 128; } void TextureCacheBase::CopyRenderTargetToTexture( @@ -2255,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture( if (copy_to_ram) { - EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); + const std::array coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); PixelFormat srcFormat = bpmem.zcontrol.pixel_format; EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, - NeedsCopyFilterInShader(coefficients)); + AllCopyFilterCoefsNeeded(coefficients), + CopyFilterCanOverflow(coefficients), gamma != 1.0); std::unique_ptr staging_texture = GetEFBCopyStagingTexture(); if (staging_texture) @@ -2716,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) + const std::array& filter_coefficients) { // Flush EFB pokes first, as they're expected to be included. g_framebuffer_manager->FlushEFBPokes(); // Get the pipeline which we will be using. If the compilation failed, this will be null. - const AbstractPipeline* copy_pipeline = - g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid( - dst_format, is_depth_copy, is_intensity, scale_by_half, - NeedsCopyFilterInShader(filter_coefficients))); + const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline( + TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, + scale_by_half, 1.0f / gamma, filter_coefficients)); if (!copy_pipeline) { WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline."); @@ -2746,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop struct Uniforms { float src_left, src_top, src_width, src_height; - float filter_coefficients[3]; + std::array filter_coefficients; float gamma_rcp; float clamp_top; float clamp_bottom; @@ -2761,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop uniforms.src_top = framebuffer_rect.top * rcp_efb_height; uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width; uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height; - uniforms.filter_coefficients[0] = filter_coefficients.upper; - uniforms.filter_coefficients[1] = filter_coefficients.middle; - uniforms.filter_coefficients[2] = filter_coefficients.lower; + uniforms.filter_coefficients = filter_coefficients; uniforms.gamma_rcp = 1.0f / gamma; // NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB, // which returns random garbage from the empty bus (confirmed by hardware tests). @@ -2795,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& u32 memory_stride, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients) + const std::array& filter_coefficients) { // Flush EFB pokes first, as they're expected to be included. g_framebuffer_manager->FlushEFBPokes(); @@ -2826,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& float gamma_rcp; float clamp_top; float clamp_bottom; - float filter_coefficients[3]; + std::array filter_coefficients; u32 padding; }; Uniforms encoder_params; @@ -2847,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& encoder_params.clamp_top = (static_cast(top_coord) + .5f) * rcp_efb_height; const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1; encoder_params.clamp_bottom = (static_cast(bottom_coord) + .5f) * rcp_efb_height; - encoder_params.filter_coefficients[0] = filter_coefficients.upper; - encoder_params.filter_coefficients[1] = filter_coefficients.middle; - encoder_params.filter_coefficients[2] = filter_coefficients.lower; + encoder_params.filter_coefficients = filter_coefficients; g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params)); // Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left. diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 287f3b840f..4fe11a64f4 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -57,23 +57,30 @@ struct TextureAndTLUTFormat struct EFBCopyParams { EFBCopyParams(PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, bool yuv_, - bool copy_filter_) + bool all_copy_filter_coefs_needed_, bool copy_filter_can_overflow_, + bool apply_gamma_) : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_), - copy_filter(copy_filter_) + all_copy_filter_coefs_needed(all_copy_filter_coefs_needed_), + copy_filter_can_overflow(copy_filter_can_overflow_), apply_gamma(apply_gamma_) { } bool operator<(const EFBCopyParams& rhs) const { - return std::tie(efb_format, copy_format, depth, yuv, copy_filter) < - std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter); + return std::tie(efb_format, copy_format, depth, yuv, all_copy_filter_coefs_needed, + copy_filter_can_overflow, + apply_gamma) < std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, + rhs.all_copy_filter_coefs_needed, + rhs.copy_filter_can_overflow, rhs.apply_gamma); } PixelFormat efb_format; EFBCopyFormat copy_format; bool depth; bool yuv; - bool copy_filter; + bool all_copy_filter_coefs_needed; + bool copy_filter_can_overflow; + bool apply_gamma; }; template <> @@ -89,19 +96,13 @@ struct fmt::formatter else copy_format = fmt::to_string(uid.copy_format); return fmt::format_to(ctx.out(), - "format: {}, copy format: {}, depth: {}, yuv: {}, copy filter: {}", - uid.efb_format, copy_format, uid.depth, uid.yuv, uid.copy_filter); + "format: {}, copy format: {}, depth: {}, yuv: {}, apply_gamma: {}, " + "all_copy_filter_coefs_needed: {}, copy_filter_can_overflow: {}", + uid.efb_format, copy_format, uid.depth, uid.yuv, uid.apply_gamma, + uid.all_copy_filter_coefs_needed, uid.copy_filter_can_overflow); } }; -// Reduced version of the full coefficient array, with a single value for each row. -struct EFBCopyFilterCoefficients -{ - float upper; - float middle; - float lower; -}; - class TextureCacheBase { private: @@ -267,8 +268,8 @@ public: // Save States void DoState(PointerWrap& p); - // Returns false if the top/bottom row coefficients are zero. - static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); + static bool AllCopyFilterCoefsNeeded(const std::array& coefficients); + static bool CopyFilterCanOverflow(const std::array& coefficients); protected: // Decodes the specified data to the GPU texture specified by entry. @@ -285,12 +286,12 @@ protected: u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, float y_scale, float gamma, bool clamp_top, - bool clamp_bottom, const EFBCopyFilterCoefficients& filter_coefficients); + bool clamp_bottom, const std::array& filter_coefficients); virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const MathUtil::Rectangle& src_rect, bool scale_by_half, bool linear_filter, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const EFBCopyFilterCoefficients& filter_coefficients); + const std::array& filter_coefficients); alignas(16) u8* temp = nullptr; size_t temp_size = 0; @@ -338,9 +339,9 @@ private: void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y); // Precomputing the coefficients for the previous, current, and next lines for the copy filter. - static EFBCopyFilterCoefficients + static std::array GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); - static EFBCopyFilterCoefficients + static std::array GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); // Flushes a pending EFB copy to RAM from the host to the guest RAM. diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index e7a2d4a392..fc27c9cc99 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -62,7 +62,7 @@ static void WriteHeader(ShaderCode& code, APIType api_type) " float y_scale;\n" " float gamma_rcp;\n" " float2 clamp_tb;\n" - " float3 filter_coefficients;\n" + " uint3 filter_coefficients;\n" "}};\n"); if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { @@ -151,7 +151,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A // The filter is only applied to the RGB channels, the alpha channel is left intact. code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n" "{{\n"); - if (params.copy_filter) + if (params.all_copy_filter_coefs_needed) { code.Write(" float4 prev_row = "); WriteSampleOp(-1); @@ -162,9 +162,9 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A " float4 next_row = "); WriteSampleOp(1); code.Write(";\n" - " return float4(min(prev_row.rgb * filter_coefficients[0] +\n" - " current_row.rgb * filter_coefficients[1] +\n" - " next_row.rgb * filter_coefficients[2], \n" + " return float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n" + " current_row.rgb * filter_coefficients[1] / 64.0 +\n" + " next_row.rgb * filter_coefficients[2] / 64.0, \n" " float3(1, 1, 1)), current_row.a);\n"); } else @@ -172,7 +172,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A code.Write(" float4 current_row = "); WriteSampleOp(0); code.Write(";\n" - "return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" + "return float4(min(current_row.rgb * filter_coefficients[1] / 64.0, float3(1, 1, 1)),\n" " current_row.a);\n"); } code.Write("}}\n"); diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 0667f8c621..6b10a34aed 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -6,13 +6,15 @@ #include "Common/Assert.h" #include "Common/CommonTypes.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" namespace TextureConversionShaderGen { TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, - bool scale_by_half, bool copy_filter) + bool scale_by_half, float gamma_rcp, + const std::array& filter_coefficients) { TCShaderUid out; @@ -22,7 +24,11 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i uid_data->is_depth_copy = is_depth_copy; uid_data->is_intensity = is_intensity; uid_data->scale_by_half = scale_by_half; - uid_data->copy_filter = copy_filter; + uid_data->all_copy_filter_coefs_needed = + TextureCacheBase::AllCopyFilterCoefsNeeded(filter_coefficients); + uid_data->copy_filter_can_overflow = TextureCacheBase::CopyFilterCanOverflow(filter_coefficients); + // If the gamma is needed, then include that too. + uid_data->apply_gamma = gamma_rcp != 1.0f; return out; } @@ -31,7 +37,7 @@ static void WriteHeader(APIType api_type, ShaderCode& out) { out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n" " float2 src_offset, src_size;\n" - " float3 filter_coefficients;\n" + " uint3 filter_coefficients;\n" " float gamma_rcp;\n" " float2 clamp_tb;\n" " float pixel_height;\n" @@ -98,22 +104,22 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) // The copy filter applies to both color and depth copies. This has been verified on hardware. // The filter is only applied to the RGB channels, the alpha channel is left intact. - if (uid_data->copy_filter) + if (uid_data->all_copy_filter_coefs_needed) { out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n" " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" " float4 next_row = SampleEFB(v_tex0, 1.0f);\n" - " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n" - " current_row.rgb * filter_coefficients[1] +\n" - " next_row.rgb * filter_coefficients[2], \n" + " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n" + " current_row.rgb * filter_coefficients[1] / 64.0 +\n" + " next_row.rgb * filter_coefficients[2] / 64.0, \n" " float3(1, 1, 1)), current_row.a);\n"); } else { out.Write( " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" - " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" - " current_row.a);\n"); + " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1] / 64.0,\n" + " float3(1, 1, 1)), current_row.a);\n"); } if (uid_data->is_depth_copy) diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.h b/Source/Core/VideoCommon/TextureConverterShaderGen.h index 54665104f6..10745cb3dc 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.h +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.h @@ -25,7 +25,9 @@ struct UidData u32 is_depth_copy : 1; u32 is_intensity : 1; u32 scale_by_half : 1; - u32 copy_filter : 1; + u32 all_copy_filter_coefs_needed : 1; + u32 copy_filter_can_overflow : 1; + u32 apply_gamma : 1; }; #pragma pack() @@ -35,7 +37,8 @@ ShaderCode GenerateVertexShader(APIType api_type); ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data); TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, - bool scale_by_half, bool copy_filter); + bool scale_by_half, float gamma_rcp, + const std::array& filter_coefficients); } // namespace TextureConversionShaderGen @@ -53,8 +56,10 @@ struct fmt::formatter dst_format = fmt::to_string(uid.dst_format); return fmt::format_to(ctx.out(), "dst_format: {}, efb_has_alpha: {}, is_depth_copy: {}, is_intensity: {}, " - "scale_by_half: {}, copy_filter: {}", + "scale_by_half: {}, all_copy_filter_coefs_needed: {}, " + "copy_filter_can_overflow: {}, apply_gamma: {}", dst_format, uid.efb_has_alpha, uid.is_depth_copy, uid.is_intensity, - uid.scale_by_half, uid.copy_filter); + uid.scale_by_half, uid.all_copy_filter_coefs_needed, + uid.copy_filter_can_overflow, uid.apply_gamma); } };