Restructure parameters to TetxureConverterShaderGen/TextureConversionShader

This will be used for later refactoring for increased accuracy.
This commit is contained in:
Pokechu22 2022-02-07 13:37:28 -08:00
parent d20094efa2
commit 791bd16b28
7 changed files with 89 additions and 76 deletions

View File

@ -14,7 +14,7 @@ protected:
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
}
@ -22,7 +22,7 @@ protected:
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
}
};

View File

@ -14,7 +14,7 @@ protected:
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
src_rect, scale_by_half, y_scale, gamma);
@ -23,7 +23,7 @@ protected:
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients) override
const std::array<u32, 3>& filter_coefficients) override
{
// TODO: If we ever want to "fake" vram textures, we would need to implement this
}

View File

@ -1978,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
}
}
EFBCopyFilterCoefficients
std::array<u32, 3>
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
return EFBCopyFilterCoefficients{
static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4])) /
64.0f,
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
64.0f,
return {
static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
static_cast<u32>(coefficients[4]),
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6]),
};
}
EFBCopyFilterCoefficients
std::array<u32, 3>
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
{
// If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients);
std::array<u32, 3> res = GetRAMCopyFilterCoefficients(coefficients);
if (!g_ActiveConfig.bDisableCopyFilter)
return res;
// Disabling the copy filter in options should not ignore the values the game sets completely,
// as some games use the filter coefficients to control the brightness of the screen. Instead,
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
res.middle = res.upper + res.middle + res.lower;
res.upper = 0.0f;
res.lower = 0.0f;
res[1] = res[0] + res[1] + res[2];
res[0] = 0;
res[2] = 0;
return res;
}
bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients)
bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients)
{
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
return coefficients.upper != 0 || coefficients.lower != 0;
return coefficients[0] != 0 || coefficients[2] != 0;
}
bool TextureCacheBase::CopyFilterCanOverflow(const std::array<u32, 3>& coefficients)
{
// Normally, the copy filter coefficients will sum to at most 64. If the sum is higher than that,
// colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping
// breaks (as colors end up >= 512, which wraps back to 0).
return coefficients[0] + coefficients[1] + coefficients[2] >= 128;
}
void TextureCacheBase::CopyRenderTargetToTexture(
@ -2255,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (copy_to_ram)
{
EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
const std::array<u32, 3> coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
AllCopyFilterCoefsNeeded(coefficients),
CopyFilterCanOverflow(coefficients), gamma != 1.0);
std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
if (staging_texture)
@ -2716,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
bool scale_by_half, bool linear_filter,
EFBCopyFormat dst_format, bool is_intensity, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
const std::array<u32, 3>& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
// Get the pipeline which we will be using. If the compilation failed, this will be null.
const AbstractPipeline* copy_pipeline =
g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
dst_format, is_depth_copy, is_intensity, scale_by_half,
NeedsCopyFilterInShader(filter_coefficients)));
const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline(
TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half, 1.0f / gamma, filter_coefficients));
if (!copy_pipeline)
{
WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
@ -2746,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
struct Uniforms
{
float src_left, src_top, src_width, src_height;
float filter_coefficients[3];
std::array<u32, 3> filter_coefficients;
float gamma_rcp;
float clamp_top;
float clamp_bottom;
@ -2761,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
uniforms.filter_coefficients[0] = filter_coefficients.upper;
uniforms.filter_coefficients[1] = filter_coefficients.middle;
uniforms.filter_coefficients[2] = filter_coefficients.lower;
uniforms.filter_coefficients = filter_coefficients;
uniforms.gamma_rcp = 1.0f / gamma;
// NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
// which returns random garbage from the empty bus (confirmed by hardware tests).
@ -2795,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
u32 memory_stride, const MathUtil::Rectangle<int>& src_rect,
bool scale_by_half, bool linear_filter, float y_scale, float gamma,
bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients)
const std::array<u32, 3>& filter_coefficients)
{
// Flush EFB pokes first, as they're expected to be included.
g_framebuffer_manager->FlushEFBPokes();
@ -2826,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
float gamma_rcp;
float clamp_top;
float clamp_bottom;
float filter_coefficients[3];
std::array<u32, 3> filter_coefficients;
u32 padding;
};
Uniforms encoder_params;
@ -2847,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height;
const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1;
encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height;
encoder_params.filter_coefficients[0] = filter_coefficients.upper;
encoder_params.filter_coefficients[1] = filter_coefficients.middle;
encoder_params.filter_coefficients[2] = filter_coefficients.lower;
encoder_params.filter_coefficients = filter_coefficients;
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));
// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.

View File

@ -57,23 +57,30 @@ struct TextureAndTLUTFormat
struct EFBCopyParams
{
EFBCopyParams(PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, bool yuv_,
bool copy_filter_)
bool all_copy_filter_coefs_needed_, bool copy_filter_can_overflow_,
bool apply_gamma_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
copy_filter(copy_filter_)
all_copy_filter_coefs_needed(all_copy_filter_coefs_needed_),
copy_filter_can_overflow(copy_filter_can_overflow_), apply_gamma(apply_gamma_)
{
}
bool operator<(const EFBCopyParams& rhs) const
{
return std::tie(efb_format, copy_format, depth, yuv, copy_filter) <
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter);
return std::tie(efb_format, copy_format, depth, yuv, all_copy_filter_coefs_needed,
copy_filter_can_overflow,
apply_gamma) < std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv,
rhs.all_copy_filter_coefs_needed,
rhs.copy_filter_can_overflow, rhs.apply_gamma);
}
PixelFormat efb_format;
EFBCopyFormat copy_format;
bool depth;
bool yuv;
bool copy_filter;
bool all_copy_filter_coefs_needed;
bool copy_filter_can_overflow;
bool apply_gamma;
};
template <>
@ -89,19 +96,13 @@ struct fmt::formatter<EFBCopyParams>
else
copy_format = fmt::to_string(uid.copy_format);
return fmt::format_to(ctx.out(),
"format: {}, copy format: {}, depth: {}, yuv: {}, copy filter: {}",
uid.efb_format, copy_format, uid.depth, uid.yuv, uid.copy_filter);
"format: {}, copy format: {}, depth: {}, yuv: {}, apply_gamma: {}, "
"all_copy_filter_coefs_needed: {}, copy_filter_can_overflow: {}",
uid.efb_format, copy_format, uid.depth, uid.yuv, uid.apply_gamma,
uid.all_copy_filter_coefs_needed, uid.copy_filter_can_overflow);
}
};
// Reduced version of the full coefficient array, with a single value for each row.
struct EFBCopyFilterCoefficients
{
float upper;
float middle;
float lower;
};
class TextureCacheBase
{
private:
@ -267,8 +268,8 @@ public:
// Save States
void DoState(PointerWrap& p);
// Returns false if the top/bottom row coefficients are zero.
static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients);
static bool AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients);
static bool CopyFilterCanOverflow(const std::array<u32, 3>& coefficients);
protected:
// Decodes the specified data to the GPU texture specified by entry.
@ -285,12 +286,12 @@ protected:
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, float y_scale, float gamma, bool clamp_top,
bool clamp_bottom, const EFBCopyFilterCoefficients& filter_coefficients);
bool clamp_bottom, const std::array<u32, 3>& filter_coefficients);
virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
float gamma, bool clamp_top, bool clamp_bottom,
const EFBCopyFilterCoefficients& filter_coefficients);
const std::array<u32, 3>& filter_coefficients);
alignas(16) u8* temp = nullptr;
size_t temp_size = 0;
@ -338,9 +339,9 @@ private:
void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);
// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
static EFBCopyFilterCoefficients
static std::array<u32, 3>
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
static EFBCopyFilterCoefficients
static std::array<u32, 3>
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
// Flushes a pending EFB copy to RAM from the host to the guest RAM.

View File

@ -62,7 +62,7 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
" float y_scale;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float3 filter_coefficients;\n"
" uint3 filter_coefficients;\n"
"}};\n");
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
{
@ -151,7 +151,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
// The filter is only applied to the RGB channels, the alpha channel is left intact.
code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"
"{{\n");
if (params.copy_filter)
if (params.all_copy_filter_coefs_needed)
{
code.Write(" float4 prev_row = ");
WriteSampleOp(-1);
@ -162,9 +162,9 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
" float4 next_row = ");
WriteSampleOp(1);
code.Write(";\n"
" return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
" return float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n"
" current_row.rgb * filter_coefficients[1] / 64.0 +\n"
" next_row.rgb * filter_coefficients[2] / 64.0, \n"
" float3(1, 1, 1)), current_row.a);\n");
}
else
@ -172,7 +172,7 @@ static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, A
code.Write(" float4 current_row = ");
WriteSampleOp(0);
code.Write(";\n"
"return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
"return float4(min(current_row.rgb * filter_coefficients[1] / 64.0, float3(1, 1, 1)),\n"
" current_row.a);\n");
}
code.Write("}}\n");

View File

@ -6,13 +6,15 @@
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
namespace TextureConversionShaderGen
{
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half, bool copy_filter)
bool scale_by_half, float gamma_rcp,
const std::array<u32, 3>& filter_coefficients)
{
TCShaderUid out;
@ -22,7 +24,11 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
uid_data->is_depth_copy = is_depth_copy;
uid_data->is_intensity = is_intensity;
uid_data->scale_by_half = scale_by_half;
uid_data->copy_filter = copy_filter;
uid_data->all_copy_filter_coefs_needed =
TextureCacheBase::AllCopyFilterCoefsNeeded(filter_coefficients);
uid_data->copy_filter_can_overflow = TextureCacheBase::CopyFilterCanOverflow(filter_coefficients);
// If the gamma is needed, then include that too.
uid_data->apply_gamma = gamma_rcp != 1.0f;
return out;
}
@ -31,7 +37,7 @@ static void WriteHeader(APIType api_type, ShaderCode& out)
{
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
" float2 src_offset, src_size;\n"
" float3 filter_coefficients;\n"
" uint3 filter_coefficients;\n"
" float gamma_rcp;\n"
" float2 clamp_tb;\n"
" float pixel_height;\n"
@ -98,22 +104,22 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
// The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact.
if (uid_data->copy_filter)
if (uid_data->all_copy_filter_coefs_needed)
{
out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], \n"
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n"
" current_row.rgb * filter_coefficients[1] / 64.0 +\n"
" next_row.rgb * filter_coefficients[2] / 64.0, \n"
" float3(1, 1, 1)), current_row.a);\n");
}
else
{
out.Write(
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
" float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
" current_row.a);\n");
" float4 texcol = float4(min(current_row.rgb * filter_coefficients[1] / 64.0,\n"
" float3(1, 1, 1)), current_row.a);\n");
}
if (uid_data->is_depth_copy)

View File

@ -25,7 +25,9 @@ struct UidData
u32 is_depth_copy : 1;
u32 is_intensity : 1;
u32 scale_by_half : 1;
u32 copy_filter : 1;
u32 all_copy_filter_coefs_needed : 1;
u32 copy_filter_can_overflow : 1;
u32 apply_gamma : 1;
};
#pragma pack()
@ -35,7 +37,8 @@ ShaderCode GenerateVertexShader(APIType api_type);
ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data);
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half, bool copy_filter);
bool scale_by_half, float gamma_rcp,
const std::array<u32, 3>& filter_coefficients);
} // namespace TextureConversionShaderGen
@ -53,8 +56,10 @@ struct fmt::formatter<TextureConversionShaderGen::UidData>
dst_format = fmt::to_string(uid.dst_format);
return fmt::format_to(ctx.out(),
"dst_format: {}, efb_has_alpha: {}, is_depth_copy: {}, is_intensity: {}, "
"scale_by_half: {}, copy_filter: {}",
"scale_by_half: {}, all_copy_filter_coefs_needed: {}, "
"copy_filter_can_overflow: {}, apply_gamma: {}",
dst_format, uid.efb_has_alpha, uid.is_depth_copy, uid.is_intensity,
uid.scale_by_half, uid.copy_filter);
uid.scale_by_half, uid.all_copy_filter_coefs_needed,
uid.copy_filter_can_overflow, uid.apply_gamma);
}
};