diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 6b10a34aed..3bee37060a 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -84,11 +84,25 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) WriteHeader(api_type, out); out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n" - " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), {}));\n" - "}}\n", + out.Write("uint4 SampleEFB(float3 uv, float y_offset) {{\n" + " float4 tex_sample = texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * " + "pixel_height), clamp_tb.x, clamp_tb.y), {}));\n", mono_depth ? "0.0" : "uv.z"); + if (uid_data->is_depth_copy) + { + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + out.Write(" tex_sample.x = 1.0 - tex_sample.x;\n"); + + out.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n" + " return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n" + "}}\n"); + } + else + { + out.Write(" return uint4(tex_sample * 255.0);\n" + "}}\n"); + } + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { out.Write("VARYING_LOCATION(0) in VertexData {{\n" @@ -99,6 +113,7 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) { out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n"); } + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n" "void main()\n{{\n"); @@ -106,191 +121,117 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) // The filter is only applied to the RGB channels, the alpha channel is left intact. if (uid_data->all_copy_filter_coefs_needed) { - out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n" - " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" - " float4 next_row = SampleEFB(v_tex0, 1.0f);\n" - " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] / 64.0 +\n" - " current_row.rgb * filter_coefficients[1] / 64.0 +\n" - " next_row.rgb * filter_coefficients[2] / 64.0, \n" - " float3(1, 1, 1)), current_row.a);\n"); + out.Write(" uint4 prev_row = SampleEFB(v_tex0, -1.0f);\n" + " uint4 current_row = SampleEFB(v_tex0, 0.0f);\n" + " uint4 next_row = SampleEFB(v_tex0, 1.0f);\n" + " uint3 combined_rows = prev_row.rgb * filter_coefficients[0] +\n" + " current_row.rgb * filter_coefficients[1] +\n" + " next_row.rgb * filter_coefficients[2];\n"); } else { - out.Write( - " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" - " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1] / 64.0,\n" - " float3(1, 1, 1)), current_row.a);\n"); + out.Write(" uint4 current_row = SampleEFB(v_tex0, 0.0f);\n" + " uint3 combined_rows = current_row.rgb * filter_coefficients[1];\n"); + } + out.Write(" // Shift right by 6 to divide by 64, as filter coefficients\n" + " // that sum to 64 result in no change in brightness\n" + " uint4 texcol_raw = uint4(combined_rows.rgb >> 6, {});\n", + uid_data->efb_has_alpha ? "current_row.a" : "255"); + + if (uid_data->copy_filter_can_overflow) + out.Write(" texcol_raw &= 0x1ffu;\n"); + // Note that overflow occurs when the sum of values is >= 128, but this max situation can be hit + // on >= 64, so we always include it. + out.Write(" texcol_raw = min(texcol_raw, uint4(255, 255, 255, 255));\n"); + + if (uid_data->apply_gamma) + { + out.Write(" texcol_raw = uint4(round(pow(abs(float4(texcol_raw) / 255.0),\n" + " float4(gamma_rcp, gamma_rcp, gamma_rcp, 1.0)) * 255.0));\n"); } - if (uid_data->is_depth_copy) + if (uid_data->is_intensity) { - if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) - out.Write("texcol.x = 1.0 - texcol.x;\n"); - - out.Write(" int depth = int(texcol.x * 16777216.0);\n" - - // Convert to Z24 format - " int4 workspace;\n" - " workspace.r = (depth >> 16) & 255;\n" - " workspace.g = (depth >> 8) & 255;\n" - " workspace.b = depth & 255;\n" - - // Convert to Z4 format - " workspace.a = (depth >> 16) & 0xF0;\n" - - // Normalize components to [0.0..1.0] - " texcol = float4(workspace) / 255.0;\n"); - switch (uid_data->dst_format) - { - case EFBCopyFormat::R4: // Z4 - out.Write(" ocol0 = texcol.aaaa;\n"); - break; - - case EFBCopyFormat::R8_0x1: // Z8 - case EFBCopyFormat::R8: // Z8H - out.Write(" ocol0 = texcol.rrrr;\n"); - break; - - case EFBCopyFormat::RA8: // Z16 - out.Write(" ocol0 = texcol.gggr;\n"); - break; - - case EFBCopyFormat::RG8: // Z16 (reverse order) - out.Write(" ocol0 = texcol.rrrg;\n"); - break; - - case EFBCopyFormat::RGBA8: // Z24X8 - out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n"); - break; - - case EFBCopyFormat::G8: // Z8M - out.Write(" ocol0 = texcol.gggg;\n"); - break; - - case EFBCopyFormat::B8: // Z8L - out.Write(" ocol0 = texcol.bbbb;\n"); - break; - - case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits - // expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits - // stored as alpha) - // Used e.g. in Zelda: Skyward Sword - out.Write(" ocol0 = texcol.gggb;\n"); - break; - - default: - ERROR_LOG_FMT(VIDEO, "Unknown copy zbuf format: {}", uid_data->dst_format); - out.Write(" ocol0 = float4(texcol.bgr, 0.0);\n"); - break; - } + out.Write(" // Intensity/YUV format conversion constants determined by hardware testing\n" + " const float4 y_const = float4( 66, 129, 25, 16);\n" + " const float4 u_const = float4(-38, -74, 112, 128);\n" + " const float4 v_const = float4(112, -94, -18, 128);\n" + " // Intensity/YUV format conversion\n" + " texcol_raw.rgb = uint3(dot(y_const, float4(texcol_raw.rgb, 256)),\n" + " dot(u_const, float4(texcol_raw.rgb, 256)),\n" + " dot(v_const, float4(texcol_raw.rgb, 256)));\n" + " // Divide by 256 and round .5 and higher up\n" + " texcol_raw.rgb = (texcol_raw.rgb >> 8) + ((texcol_raw.rgb >> 7) & 1);\n"); } - else if (uid_data->is_intensity) + + switch (uid_data->dst_format) { - if (!uid_data->efb_has_alpha) - out.Write(" texcol.a = 1.0;\n"); + case EFBCopyFormat::R4: // R4 + out.Write(" float red = float(texcol_raw.r & 0xF0u) / 240.0;\n" + " ocol0 = float4(red, red, red, red);\n"); + break; - bool has_four_bits = - (uid_data->dst_format == EFBCopyFormat::R4 || uid_data->dst_format == EFBCopyFormat::RA4); - bool has_alpha = - (uid_data->dst_format == EFBCopyFormat::RA4 || uid_data->dst_format == EFBCopyFormat::RA8); + case EFBCopyFormat::R8_0x1: // R8 + case EFBCopyFormat::R8: // R8 + out.Write(" ocol0 = float4(texcol_raw).rrrr / 255.0;\n"); + break; - switch (uid_data->dst_format) - { - case EFBCopyFormat::R4: // I4 - case EFBCopyFormat::R8_0x1: // I8 - case EFBCopyFormat::R8: // I8 - case EFBCopyFormat::RA4: // IA4 - case EFBCopyFormat::RA8: // IA8 - if (has_four_bits) - out.Write(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n"); + case EFBCopyFormat::RA4: // RA4 + out.Write(" float2 red_alpha = float2(texcol_raw.ra & 0xF0u) / 240.0;\n" + " ocol0 = red_alpha.rrrg;\n"); + break; - // TODO - verify these coefficients - out.Write(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n" - " float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n" - " ocol0 = float4(intensity, intensity, intensity, {});\n", - has_alpha ? "texcol.a" : "intensity"); - break; + case EFBCopyFormat::RA8: // RA8 + out.Write(" ocol0 = float4(texcol_raw).rrra / 255.0;\n"); + break; - default: - ERROR_LOG_FMT(VIDEO, "Unknown copy intensity format: {}", uid_data->dst_format); - out.Write(" ocol0 = texcol;\n"); - break; - } - } - else - { - if (!uid_data->efb_has_alpha) - out.Write(" texcol.a = 1.0;\n"); + case EFBCopyFormat::A8: // A8 + out.Write(" ocol0 = float4(texcol_raw).aaaa / 255.0;\n"); + break; - switch (uid_data->dst_format) - { - case EFBCopyFormat::R4: // R4 - out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n" - " ocol0 = float4(red, red, red, red);\n"); - break; + case EFBCopyFormat::G8: // G8 + out.Write(" ocol0 = float4(texcol_raw).gggg / 255.0;\n"); + break; - case EFBCopyFormat::R8_0x1: // R8 - case EFBCopyFormat::R8: // R8 - out.Write(" ocol0 = texcol.rrrr;\n"); - break; + case EFBCopyFormat::B8: // B8 + out.Write(" ocol0 = float4(texcol_raw).bbbb / 255.0;\n"); + break; - case EFBCopyFormat::RA4: // RA4 - out.Write(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n" - " ocol0 = red_alpha.rrrg;\n"); - break; + case EFBCopyFormat::RG8: // RG8 + out.Write(" ocol0 = float4(texcol_raw).rrrg / 255.0;\n"); + break; - case EFBCopyFormat::RA8: // RA8 - out.Write(" ocol0 = texcol.rrra;\n"); - break; + case EFBCopyFormat::GB8: // GB8 + out.Write(" ocol0 = float4(texcol_raw).gggb / 255.0;\n"); + break; - case EFBCopyFormat::A8: // A8 - out.Write(" ocol0 = texcol.aaaa;\n"); - break; + case EFBCopyFormat::RGB565: // RGB565 + out.Write(" float2 red_blue = float2(texcol_raw.rb & 0xF8u) / 248.0;\n" + " float green = float(texcol_raw.g & 0xFCu) / 252.0;\n" + " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n"); + break; - case EFBCopyFormat::G8: // G8 - out.Write(" ocol0 = texcol.gggg;\n"); - break; + case EFBCopyFormat::RGB5A3: // RGB5A3 + // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection + // will need to be implemented once we move away from floats. + out.Write(" float3 color = float3(texcol_raw.rgb & 0xF8u) / 248.0;\n" + " float alpha = float(texcol_raw.a & 0xE0u) / 224.0;\n" + " ocol0 = float4(color, alpha);\n"); + break; - case EFBCopyFormat::B8: // B8 - out.Write(" ocol0 = texcol.bbbb;\n"); - break; + case EFBCopyFormat::RGBA8: // RGBA8 + out.Write(" ocol0 = float4(texcol_raw.rgba) / 255.0;\n"); + break; - case EFBCopyFormat::RG8: // RG8 - out.Write(" ocol0 = texcol.rrrg;\n"); - break; + case EFBCopyFormat::XFB: + out.Write(" ocol0 = float4(float3(texcol_raw.rgb) / 255.0, 1.0);\n"); + break; - case EFBCopyFormat::GB8: // GB8 - out.Write(" ocol0 = texcol.gggb;\n"); - break; - - case EFBCopyFormat::RGB565: // RGB565 - out.Write(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n" - " float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n" - " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n"); - break; - - case EFBCopyFormat::RGB5A3: // RGB5A3 - // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection - // will need to be implemented once we move away from floats. - out.Write(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n" - " float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n" - " ocol0 = float4(color, alpha);\n"); - break; - - case EFBCopyFormat::RGBA8: // RGBA8 - out.Write(" ocol0 = texcol;\n"); - break; - - case EFBCopyFormat::XFB: - out.Write(" ocol0 = float4(pow(abs(texcol.rgb), float3(gamma_rcp, gamma_rcp, gamma_rcp)), " - "1.0f);\n"); - break; - - default: - ERROR_LOG_FMT(VIDEO, "Unknown copy color format: {}", uid_data->dst_format); - out.Write(" ocol0 = texcol;\n"); - break; - } + default: + ERROR_LOG_FMT(VIDEO, "Unknown copy/intensity color format: {} {}", uid_data->dst_format, + uid_data->is_intensity); + out.Write(" ocol0 = float4(texcol_raw.rgba) / 255.0;\n"); + break; } out.Write("}}\n");