From 2edf81cdb054c1db7a32e45c87514f8d4c11f5d9 Mon Sep 17 00:00:00 2001 From: Filoppi Date: Tue, 27 Jun 2023 03:23:08 +0300 Subject: [PATCH 1/5] Video: implement output resampling (upscaling/downscaling) methods --- .../Sys/Shaders/default_pre_post_process.glsl | 289 +++++++++++++++++- Data/Sys/Shaders/sharp_bilinear.glsl | 47 --- Source/Core/Core/Config/GraphicsSettings.cpp | 2 + Source/Core/Core/Config/GraphicsSettings.h | 2 + .../Config/Graphics/EnhancementsWidget.cpp | 47 +++ .../Config/Graphics/EnhancementsWidget.h | 1 + Source/Core/VideoCommon/PostProcessing.cpp | 41 ++- Source/Core/VideoCommon/VideoConfig.cpp | 1 + Source/Core/VideoCommon/VideoConfig.h | 12 + 9 files changed, 371 insertions(+), 71 deletions(-) delete mode 100644 Data/Sys/Shaders/sharp_bilinear.glsl diff --git a/Data/Sys/Shaders/default_pre_post_process.glsl b/Data/Sys/Shaders/default_pre_post_process.glsl index 58ed50d53c..1a1c3ecbe1 100644 --- a/Data/Sys/Shaders/default_pre_post_process.glsl +++ b/Data/Sys/Shaders/default_pre_post_process.glsl @@ -1,4 +1,4 @@ -// References: +// Color Space references: // https://www.unravel.com.au/understanding-color-spaces // SMPTE 170M - BT.601 (NTSC-M) -> BT.709 @@ -21,8 +21,8 @@ mat3 from_PAL = transpose(mat3( float3 LinearTosRGBGamma(float3 color) { - float a = 0.055; - + const float a = 0.055; + for (int i = 0; i < 3; ++i) { float x = color[i]; @@ -36,17 +36,284 @@ float3 LinearTosRGBGamma(float3 color) return color; } +// Non filtered gamma corrected sample (nearest neighbor) +float4 QuickSample(float3 uvw, float gamma) +{ + float4 color = texture(samp1, uvw); + color.rgb = pow(color.rgb, float3(gamma)); + return color; +} + +float4 QuickSample(float2 uv, float w, float gamma) +{ + return QuickSample(float3(uv, w), gamma); +} + +float4 BilinearSample(float3 uvw, float gamma) +{ + // This emulates the (bi)linear filtering done directly from GPUs HW. + // Note that GPUs might natively filter red green and blue differently, but we don't do it. + // They might also use different filtering between upscaling and downscaling. + + float2 source_size = GetResolution(); + float2 inverted_source_size = GetInvResolution(); + float2 pixel = (uvw.xy * source_size) - 0.5; // Try to find the matching pixel top left corner + + // Find the integer and floating point parts + float2 int_pixel = floor(pixel); + float2 frac_pixel = fract(pixel); + + // Take 4 samples around the original uvw + float4 c11 = QuickSample((int_pixel + float2(0.5, 0.5)) * inverted_source_size, uvw.z, gamma); + float4 c21 = QuickSample((int_pixel + float2(1.5, 0.5)) * inverted_source_size, uvw.z, gamma); + float4 c12 = QuickSample((int_pixel + float2(0.5, 1.5)) * inverted_source_size, uvw.z, gamma); + float4 c22 = QuickSample((int_pixel + float2(1.5, 1.5)) * inverted_source_size, uvw.z, gamma); + + // Blend the 4 samples by their weight + return lerp(lerp(c11, c21, frac_pixel.x), lerp(c12, c22, frac_pixel.x), frac_pixel.y); +} + +// Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang +// by Themaister, Public Domain license +// Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale, +// giving a sharper image than plain bilinear. +float4 SharpBilinearSample(float3 uvw, float gamma) +{ + float2 source_size = GetResolution(); + float2 inverted_source_size = GetInvResolution(); + float2 target_size = GetWindowResolution(); + float2 texel = uvw.xy * source_size; + float2 texel_floored = floor(texel); + float2 s = fract(texel); + float scale = ceil(max(target_size.x * inverted_source_size.x, target_size.y * inverted_source_size.y)); + float region_range = 0.5 - (0.5 / scale); + + // Figure out where in the texel to sample to get correct pre-scaled bilinear. + + float2 center_dist = s - 0.5; + float2 f = ((center_dist - clamp(center_dist, -region_range, region_range)) * scale) + 0.5; + + float2 mod_texel = texel_floored + f; + + uvw.xy = mod_texel * inverted_source_size; + return BilinearSample(uvw, gamma); +} + +float4 Cubic(float v) +{ + float4 n = float4(1.0, 2.0, 3.0, 4.0) - v; + float4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = 6.0 - x - y - z; + return float4(x, y, z, w) * (1.0 / 6.0); +} + +// https://stackoverflow.com/questions/13501081/efficient-bicubic-filtering-code-in-glsl +float4 BicubicSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma) +{ + float2 pixel = (uvw.xy * in_source_resolution) - 0.5; + float2 int_pixel = floor(pixel); + float2 frac_pixel = fract(pixel); + + float4 xcubic = Cubic(frac_pixel.x); + float4 ycubic = Cubic(frac_pixel.y); + + float4 c = float4(int_pixel.x - 0.5, int_pixel.x + 1.5, int_pixel.y - 0.5, int_pixel.y + 1.5); + float4 s = float4(xcubic.x + xcubic.y, xcubic.z + xcubic.w, ycubic.x + ycubic.y, ycubic.z + ycubic.w); + float4 offset = c + float4(xcubic.y, xcubic.w, ycubic.y, ycubic.w) / s; + + offset *= float4(in_inverted_source_resolution.x, in_inverted_source_resolution.x, in_inverted_source_resolution.y, in_inverted_source_resolution.y); + + float4 sample0 = QuickSample(offset.xz, uvw.z, gamma); + float4 sample1 = QuickSample(offset.yz, uvw.z, gamma); + float4 sample2 = QuickSample(offset.xw, uvw.z, gamma); + float4 sample3 = QuickSample(offset.yw, uvw.z, gamma); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return lerp(lerp(sample3, sample2, sx), lerp(sample1, sample0, sx), sy); +} + +float4 CubicHermite(float4 A, float4 B, float4 C, float4 D, float t) +{ + float t2 = t * t; + float t3 = t * t * t; + float4 a = (-A / 2.0) + ((3.0 * B) / 2.0) - ((3.0 * C) / 2.0) + (D / 2.0); + float4 b = A - ((5.0 * B) / 2.0 ) + (2.0 * C) - (D / 2.0); + float4 c = (-A / 2.0) + (C / 2.0); + float4 d = B; + + return (a * t3) + (b * t2) + (c * t) + d; +} + +float4 BicubicHermiteSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma) +{ + float2 pixel = (uvw.xy * in_source_resolution) + 0.5; + float2 frac_pixel = fract(pixel); + float2 uv = (floor(pixel) * in_inverted_source_resolution) - (in_inverted_source_resolution / 2.0); + + float2 inverted_source_resolution_double = in_inverted_source_resolution * 2.0; + + float4 c00 = QuickSample(uv + float2(-in_inverted_source_resolution.x, -in_inverted_source_resolution.y), uvw.z, gamma); + float4 c10 = QuickSample(uv + float2( 0.0, -in_inverted_source_resolution.y), uvw.z, gamma); + float4 c20 = QuickSample(uv + float2( in_inverted_source_resolution.x, -in_inverted_source_resolution.y), uvw.z, gamma); + float4 c30 = QuickSample(uv + float2( inverted_source_resolution_double.x, -in_inverted_source_resolution.y), uvw.z, gamma); + + float4 c01 = QuickSample(uv + float2(-in_inverted_source_resolution.x, 0.0), uvw.z, gamma); + float4 c11 = QuickSample(uv + float2( 0.0, 0.0), uvw.z, gamma); + float4 c21 = QuickSample(uv + float2( in_inverted_source_resolution.x, 0.0), uvw.z, gamma); + float4 c31 = QuickSample(uv + float2( inverted_source_resolution_double.x, 0.0), uvw.z, gamma); + + float4 c02 = QuickSample(uv + float2(-in_inverted_source_resolution.x, in_inverted_source_resolution.y), uvw.z, gamma); + float4 c12 = QuickSample(uv + float2( 0.0, in_inverted_source_resolution.y), uvw.z, gamma); + float4 c22 = QuickSample(uv + float2( in_inverted_source_resolution.x, in_inverted_source_resolution.y), uvw.z, gamma); + float4 c32 = QuickSample(uv + float2( inverted_source_resolution_double.x, in_inverted_source_resolution.y), uvw.z, gamma); + + float4 c03 = QuickSample(uv + float2(-in_inverted_source_resolution.x, inverted_source_resolution_double.y), uvw.z, gamma); + float4 c13 = QuickSample(uv + float2( 0.0, inverted_source_resolution_double.y), uvw.z, gamma); + float4 c23 = QuickSample(uv + float2( in_inverted_source_resolution.x, inverted_source_resolution_double.y), uvw.z, gamma); + float4 c33 = QuickSample(uv + float2( inverted_source_resolution_double.x, inverted_source_resolution_double.y), uvw.z, gamma); + + float4 cp0x = CubicHermite(c00, c10, c20, c30, frac_pixel.x); + float4 cp1x = CubicHermite(c01, c11, c21, c31, frac_pixel.x); + float4 cp2x = CubicHermite(c02, c12, c22, c32, frac_pixel.x); + float4 cp3x = CubicHermite(c03, c13, c23, c33, frac_pixel.x); + + return CubicHermite(cp0x, cp1x, cp2x, cp3x, frac_pixel.y); +} + +float CatmullRom(float B, float C, float x) +{ + float f = x; + + if (f < 0.0) + f = -f; + + if (f < 1.0) + { + return ((12 - 9 * B - 6 * C) * (f * f * f) + + (-18 + 12 * B + 6 * C) * (f * f) + + (6 - 2 * B)) / 6.0; + } + else if (f >= 1.0 && f < 2.0) + { + return ((-B - 6 * C) * (f * f * f) + + (6 * B + 30 * C) * (f * f) + + ( - (12 * B) - 48 * C) * f + + 8 * B + 24 * C) / 6.0; + } + else + { + return 0.0; + } +} + +// https://www.codeproject.com/Articles/236394/Bi-Cubic-and-Bi-Linear-Interpolation-with-GLSL +// https://github.com/ValveSoftware/gamescope/pull/740 +float4 BicubicCatmullRomSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma) +{ + const float offset = 0.5; + float2 pixel = (uvw.xy * in_source_resolution) - offset; + float2 int_pixel = floor(pixel); + float2 frac_pixel = fract(pixel); + float2 int_uvw = (int_pixel + offset) * in_inverted_source_resolution; + + // B and C can be any value between 0 and 1, + // though they are meant to be 0 and 0.5 for Catmull-Rom. + // https://en.wikipedia.org/wiki/Mitchell%E2%80%93Netravali_filters + // https://guideencodemoe-mkdocs.readthedocs.io/encoding/resampling/ + const float B = 0.0; + const float C = 0.5; + + // Take 16 (4x4) samples, each with a different weight. + // This loop can be replaced with any other bicubic formula (e.g. Hermite). + float4 color_sum = float4(0.0, 0.0, 0.0, 0.0); + float4 color_denominator = float4(0.0, 0.0, 0.0, 0.0); + for (int m = -1; m <= 2; m++) + { + for (int n = -1; n <= 2; n++) + { + float4 color = QuickSample(int_uvw + (float2(m, n) * in_inverted_source_resolution), uvw.z, gamma); + float f1 = CatmullRom(B, C, float(m) - frac_pixel.x); + float f2 = CatmullRom(B, C, -float(n) + frac_pixel.y); + float4 cooef1 = float4(f1, f1, f1, f1); + float4 cooef2 = float4(f2, f2, f2, f2); + color_sum += color * (cooef2 * cooef1); + color_denominator += cooef2 * cooef1; + } + } + return color_sum / color_denominator; +} + +// Returns an accurate (gamma corrected) sample of a gamma space space texture. +// Outputs in linear space for simplicity. +float4 LinearGammaCorrectedSample(float gamma) +{ + float3 uvw = v_tex0; + float4 color = float4(0, 0, 0, 1); + + if (resampling_method <= 1) // Bilinear + { + color = BilinearSample(uvw, gamma); + } + else if (resampling_method == 2) // "Simple" Bicubic + { + color = BicubicSample(uvw, GetResolution(), GetInvResolution(), gamma); + } + else if (resampling_method == 3) // Hermite + { + color = BicubicHermiteSample(uvw, GetResolution(), GetInvResolution(), gamma); + } + else if (resampling_method == 4) // Catmull-Rom + { + color = BicubicCatmullRomSample(uvw, GetResolution(), GetInvResolution(), gamma); + } + else if (resampling_method == 5) // Nearest Neighbor + { + color = QuickSample(uvw, gamma); + } + else if (resampling_method == 6) // Sharp Bilinear + { + color = SharpBilinearSample(uvw, gamma); + } + + return color; +} + void main() { - // Note: sampling in gamma space is "wrong" if the source - // and target resolution don't match exactly. - // Fortunately at the moment here they always should but to do this correctly, - // we'd need to sample from 4 pixels, de-apply the gamma from each of these, - // and then do linear sampling on their corrected value. - float4 color = Sample(); + // This tries to fall back on GPU HW sampling if it can (it won't be gamma corrected). + bool raw_resampling = resampling_method <= 0; + bool needs_rescaling = GetResolution() != GetWindowResolution(); - // Convert to linear space to do any other kind of operation - color.rgb = pow(color.rgb, float3(game_gamma)); + bool needs_resampling = needs_rescaling && (OptionEnabled(hdr_output) || OptionEnabled(correct_gamma) || !raw_resampling); + + float4 color; + + if (needs_resampling) + { + // Doing linear sampling in "gamma space" on linear texture formats isn't correct. + // If the source and target resolutions don't match, the GPU will return a color + // that is the average of 4 gamma space colors, but gamma space colors can't be blended together, + // gamma neeeds to be de-applied first. This makes a big difference if colors change + // drastically between two pixels. + + color = LinearGammaCorrectedSample(game_gamma); + } + else + { + // Default GPU HW sampling. Bilinear is identical to Nearest Neighbor if the input and output resolutions match. + if (needs_rescaling) + color = texture(samp0, v_tex0); + else + color = texture(samp1, v_tex0); + + // Convert to linear before doing any other of follow up operations. + color.rgb = pow(color.rgb, float3(game_gamma)); + } if (OptionEnabled(correct_color_space)) { diff --git a/Data/Sys/Shaders/sharp_bilinear.glsl b/Data/Sys/Shaders/sharp_bilinear.glsl deleted file mode 100644 index e94748b959..0000000000 --- a/Data/Sys/Shaders/sharp_bilinear.glsl +++ /dev/null @@ -1,47 +0,0 @@ -// Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang -// by Themaister, Public Domain license -// Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale, -// giving a sharper image than plain bilinear. - -/* -[configuration] -[OptionRangeFloat] -GUIName = Prescale Factor (set to 0 for automatic) -OptionName = PRESCALE_FACTOR -MinValue = 0.0 -MaxValue = 16.0 -StepAmount = 1.0 -DefaultValue = 0.0 -[/configuration] -*/ - -float CalculatePrescale(float config_scale) { - if (config_scale == 0.0) { - float2 source_size = GetResolution(); - float2 window_size = GetWindowResolution(); - return ceil(max(window_size.x / source_size.x, window_size.y / source_size.y)); - } else { - return config_scale; - } -} - -void main() -{ - float2 source_size = GetResolution(); - float2 texel = GetCoordinates() * source_size; - float2 texel_floored = floor(texel); - float2 s = fract(texel); - float config_scale = GetOption(PRESCALE_FACTOR); - float scale = CalculatePrescale(config_scale); - float region_range = 0.5 - 0.5 / scale; - - // Figure out where in the texel to sample to get correct pre-scaled bilinear. - // Uses the hardware bilinear interpolator to avoid having to sample 4 times manually. - - float2 center_dist = s - 0.5; - float2 f = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5; - - float2 mod_texel = texel_floored + f; - - SetOutput(SampleLocation(mod_texel / source_size)); -} diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 9a0dfbcd26..ad0059ca6d 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -118,6 +118,8 @@ const Info GFX_DRIVER_LIB_NAME{{System::GFX, "Settings", "DriverLib const Info GFX_ENHANCE_FORCE_TEXTURE_FILTERING{ {System::GFX, "Enhancements", "ForceTextureFiltering"}, TextureFilteringMode::Default}; const Info GFX_ENHANCE_MAX_ANISOTROPY{{System::GFX, "Enhancements", "MaxAnisotropy"}, 0}; +const Info GFX_ENHANCE_OUTPUT_RESAMPLING{ + {System::GFX, "Enhancements", "OutputResampling"}, OutputResamplingMode::Default}; const Info GFX_ENHANCE_POST_SHADER{ {System::GFX, "Enhancements", "PostProcessingShader"}, ""}; const Info GFX_ENHANCE_FORCE_TRUE_COLOR{{System::GFX, "Enhancements", "ForceTrueColor"}, diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 009b219136..165114e419 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -11,6 +11,7 @@ enum class AspectMode : int; enum class ShaderCompilationMode : int; enum class StereoMode : int; enum class TextureFilteringMode : int; +enum class OutputResamplingMode : int; enum class ColorCorrectionRegion : int; enum class TriState : int; @@ -101,6 +102,7 @@ extern const Info GFX_MODS_ENABLE; extern const Info GFX_ENHANCE_FORCE_TEXTURE_FILTERING; extern const Info GFX_ENHANCE_MAX_ANISOTROPY; // NOTE - this is x in (1 << x) +extern const Info GFX_ENHANCE_OUTPUT_RESAMPLING; extern const Info GFX_ENHANCE_POST_SHADER; extern const Info GFX_ENHANCE_FORCE_TRUE_COLOR; extern const Info GFX_ENHANCE_DISABLE_COPY_FILTER; diff --git a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp index 47e22ade53..67fd7da7b9 100644 --- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp @@ -105,6 +105,22 @@ void EnhancementsWidget::CreateWidgets() m_texture_filtering_combo->addItem(tr("Force Linear and 16x Anisotropic"), TEXTURE_FILTERING_FORCE_LINEAR_ANISO_16X); + m_output_resampling_combo = new ToolTipComboBox(); + m_output_resampling_combo->addItem(tr("Default"), + static_cast(OutputResamplingMode::Default)); + m_output_resampling_combo->addItem(tr("Bilinear"), + static_cast(OutputResamplingMode::Bilinear)); + m_output_resampling_combo->addItem(tr("Bicubic"), + static_cast(OutputResamplingMode::Bicubic)); + m_output_resampling_combo->addItem(tr("Hermite"), + static_cast(OutputResamplingMode::Hermite)); + m_output_resampling_combo->addItem(tr("Catmull-Rom"), + static_cast(OutputResamplingMode::CatmullRom)); + m_output_resampling_combo->addItem(tr("Nearest Neighbor"), + static_cast(OutputResamplingMode::NearestNeighbor)); + m_output_resampling_combo->addItem(tr("Sharp Bilinear"), + static_cast(OutputResamplingMode::SharpBilinear)); + m_configure_color_correction = new ToolTipPushButton(tr("Configure")); m_pp_effect = new ToolTipComboBox(); @@ -136,6 +152,10 @@ void EnhancementsWidget::CreateWidgets() enhancements_layout->addWidget(m_texture_filtering_combo, row, 1, 1, -1); ++row; + enhancements_layout->addWidget(new QLabel(tr("Output Resampling:")), row, 0); + enhancements_layout->addWidget(m_output_resampling_combo, row, 1, 1, -1); + ++row; + enhancements_layout->addWidget(new QLabel(tr("Color Correction:")), row, 0); enhancements_layout->addWidget(m_configure_color_correction, row, 1, 1, -1); ++row; @@ -195,6 +215,8 @@ void EnhancementsWidget::ConnectWidgets() [this](int) { SaveSettings(); }); connect(m_texture_filtering_combo, qOverload(&QComboBox::currentIndexChanged), [this](int) { SaveSettings(); }); + connect(m_output_resampling_combo, qOverload(&QComboBox::currentIndexChanged), + [this](int) { SaveSettings(); }); connect(m_pp_effect, qOverload(&QComboBox::currentIndexChanged), [this](int) { SaveSettings(); }); connect(m_3d_mode, qOverload(&QComboBox::currentIndexChanged), [this] { @@ -325,6 +347,14 @@ void EnhancementsWidget::LoadSettings() break; } + // Resampling + const OutputResamplingMode output_resampling_mode = + Config::Get(Config::GFX_ENHANCE_OUTPUT_RESAMPLING); + m_output_resampling_combo->setCurrentIndex(static_cast(output_resampling_mode)); + + m_output_resampling_combo->setEnabled(g_Config.backend_info.bSupportsPostProcessing); + + // Color Correction m_configure_color_correction->setEnabled(g_Config.backend_info.bSupportsPostProcessing); // Post Processing Shader @@ -413,6 +443,10 @@ void EnhancementsWidget::SaveSettings() break; } + const int output_resampling_selection = m_output_resampling_combo->currentData().toInt(); + Config::SetBaseOrCurrent(Config::GFX_ENHANCE_OUTPUT_RESAMPLING, + static_cast(output_resampling_selection)); + const bool anaglyph = g_Config.stereo_mode == StereoMode::Anaglyph; const bool passive = g_Config.stereo_mode == StereoMode::Passive; Config::SetBaseOrCurrent(Config::GFX_ENHANCE_POST_SHADER, @@ -455,6 +489,16 @@ void EnhancementsWidget::AddDescriptions() "scaling filter selected by the game.

Any option except 'Default' will alter the look " "of the game's textures and might cause issues in a small number of " "games.

If unsure, select 'Default'."); + static const char TR_OUTPUT_RESAMPLING_DESCRIPTION[] = QT_TR_NOOP( + "Affects how the game output image is upscaled or downscaled to the window resolution.
" + "\"Default\" will rely on the GPU internal bilinear sampler which isn't gamma corrected." + "
\"Bilinear\" (gamma corrected) is a good compromise between quality and performance." + "
\"Bicubic\" is smoother than \"Bilinear\"." + "
\"Hermite\" might offer the best quality when upscaling," + " at a slightly bigger perform cost.
\"Catmull-Rom\" is best for downscaling." + "
\"Nearest Neighbor\" doesn't do any resampling, select if you like a pixelated look." + "
\"Sharp Bilinear\" works best with 2D games at low resolutions, use if you like a" + " sharp look.

If unsure, select 'Default'."); static const char TR_COLOR_CORRECTION_DESCRIPTION[] = QT_TR_NOOP("A group of features to make the colors more accurate, matching the color space " "Wii and GC games were meant for."); @@ -537,6 +581,9 @@ void EnhancementsWidget::AddDescriptions() m_texture_filtering_combo->SetTitle(tr("Texture Filtering")); m_texture_filtering_combo->SetDescription(tr(TR_FORCE_TEXTURE_FILTERING_DESCRIPTION)); + m_output_resampling_combo->SetTitle(tr("Output Resampling")); + m_output_resampling_combo->SetDescription(tr(TR_OUTPUT_RESAMPLING_DESCRIPTION)); + m_configure_color_correction->SetTitle(tr("Color Correction")); m_configure_color_correction->SetDescription(tr(TR_COLOR_CORRECTION_DESCRIPTION)); diff --git a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h index 4d2cdcd5b5..34879d17b7 100644 --- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h @@ -39,6 +39,7 @@ private: ConfigChoice* m_ir_combo; ToolTipComboBox* m_aa_combo; ToolTipComboBox* m_texture_filtering_combo; + ToolTipComboBox* m_output_resampling_combo; ToolTipComboBox* m_pp_effect; ToolTipPushButton* m_configure_color_correction; QPushButton* m_configure_pp_effect; diff --git a/Source/Core/VideoCommon/PostProcessing.cpp b/Source/Core/VideoCommon/PostProcessing.cpp index c3a245629f..1de4c311c2 100644 --- a/Source/Core/VideoCommon/PostProcessing.cpp +++ b/Source/Core/VideoCommon/PostProcessing.cpp @@ -486,23 +486,29 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, MathUtil::Rectangle src_rect = src; g_gfx->SetSamplerState(0, RenderState::GetLinearSamplerState()); + g_gfx->SetSamplerState(1, RenderState::GetPointSamplerState()); g_gfx->SetTexture(0, src_tex); + g_gfx->SetTexture(1, src_tex); - const bool is_color_correction_active = IsColorCorrectionActive(); + const bool needs_color_correction = IsColorCorrectionActive(); + // Rely on the default (bi)linear sampler with the default mode + // (it might not be gamma corrected). + const bool needs_resampling = + g_ActiveConfig.output_resampling_mode > OutputResamplingMode::Default; const bool needs_intermediary_buffer = NeedsIntermediaryBuffer(); + const bool needs_default_pipeline = needs_color_correction || needs_resampling; const AbstractPipeline* final_pipeline = m_pipeline.get(); std::vector* uniform_staging_buffer = &m_default_uniform_staging_buffer; bool default_uniform_staging_buffer = true; + const MathUtil::Rectangle present_rect = g_presenter->GetTargetRectangle(); // Intermediary pass. - // We draw to a high quality intermediary texture for two reasons: + // We draw to a high quality intermediary texture for a couple reasons: + // -Consistently do high quality gamma corrected resampling (upscaling/downscaling) // -Keep quality for gamma and gamut conversions, and HDR output // (low bit depths lose too much quality with gamma conversions) - // -We make a texture of the exact same res as the source one, - // because all the post process shaders we already had assume that - // the source texture size (EFB) is different from the swap chain - // texture size (which matches the window size). - if (m_default_pipeline && is_color_correction_active && needs_intermediary_buffer) + // -Keep the post process phase in linear space, to better operate with colors + if (m_default_pipeline && needs_default_pipeline && needs_intermediary_buffer) { AbstractFramebuffer* const previous_framebuffer = g_gfx->GetCurrentFramebuffer(); @@ -512,13 +518,18 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, // so it would be a waste to allocate two layers (see "bUsesExplictQuadBuffering"). const u32 target_layers = copy_all_layers ? src_tex->GetLayers() : 1; + const u32 target_width = + needs_resampling ? present_rect.GetWidth() : static_cast(src_rect.GetWidth()); + const u32 target_height = + needs_resampling ? present_rect.GetHeight() : static_cast(src_rect.GetHeight()); + if (!m_intermediary_frame_buffer || !m_intermediary_color_texture || - m_intermediary_color_texture.get()->GetWidth() != static_cast(src_rect.GetWidth()) || - m_intermediary_color_texture.get()->GetHeight() != static_cast(src_rect.GetHeight()) || + m_intermediary_color_texture.get()->GetWidth() != target_width || + m_intermediary_color_texture.get()->GetHeight() != target_height || m_intermediary_color_texture.get()->GetLayers() != target_layers) { const TextureConfig intermediary_color_texture_config( - src_rect.GetWidth(), src_rect.GetHeight(), 1, target_layers, src_tex->GetSamples(), + target_width, target_height, 1, target_layers, src_tex->GetSamples(), s_intermediary_buffer_format, AbstractTextureFlag_RenderTarget); m_intermediary_color_texture = g_gfx->CreateTexture(intermediary_color_texture_config, "Intermediary post process texture"); @@ -530,7 +541,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, g_gfx->SetFramebuffer(m_intermediary_frame_buffer.get()); FillUniformBuffer(src_rect, src_tex, src_layer, g_gfx->GetCurrentFramebuffer()->GetRect(), - g_presenter->GetTargetRectangle(), uniform_staging_buffer->data(), + present_rect, uniform_staging_buffer->data(), !default_uniform_staging_buffer); g_vertex_manager->UploadUtilityUniforms(uniform_staging_buffer->data(), static_cast(uniform_staging_buffer->size())); @@ -544,6 +555,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, src_rect = m_intermediary_color_texture->GetRect(); src_tex = m_intermediary_color_texture.get(); g_gfx->SetTexture(0, src_tex); + g_gfx->SetTexture(1, src_tex); // The "m_intermediary_color_texture" has already copied // from the specified source layer onto its first one. // If we query for a layer that the source texture doesn't have, @@ -557,7 +569,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, // If we have no custom user shader selected, and color correction // is active, directly run the fixed pipeline shader instead of // doing two passes, with the second one doing nothing useful. - if (m_default_pipeline && is_color_correction_active) + if (m_default_pipeline && needs_default_pipeline) { final_pipeline = m_default_pipeline.get(); } @@ -580,7 +592,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, if (final_pipeline) { FillUniformBuffer(src_rect, src_tex, src_layer, g_gfx->GetCurrentFramebuffer()->GetRect(), - g_presenter->GetTargetRectangle(), uniform_staging_buffer->data(), + present_rect, uniform_staging_buffer->data(), !default_uniform_staging_buffer); g_vertex_manager->UploadUtilityUniforms(uniform_staging_buffer->data(), static_cast(uniform_staging_buffer->size())); @@ -610,6 +622,7 @@ std::string PostProcessing::GetUniformBufferHeader(bool user_post_process) const ss << " int src_layer;\n"; ss << " uint time;\n"; + ss << " int resampling_method;\n"; ss << " int correct_color_space;\n"; ss << " int game_color_space;\n"; ss << " int correct_gamma;\n"; @@ -816,6 +829,7 @@ struct BuiltinUniforms std::array src_rect; s32 src_layer; u32 time; + s32 resampling_method; s32 correct_color_space; s32 game_color_space; s32 correct_gamma; @@ -861,6 +875,7 @@ void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle& src, builtin_uniforms.src_layer = static_cast(src_layer); builtin_uniforms.time = static_cast(m_timer.ElapsedMs()); + builtin_uniforms.resampling_method = static_cast(g_ActiveConfig.output_resampling_mode); // Color correction related uniforms. // These are mainly used by the "m_default_pixel_shader", // but should also be accessible to all other shaders. diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 42bf178047..83dc5eb53e 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -133,6 +133,7 @@ void VideoConfig::Refresh() texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING); iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY); + output_resampling_mode = Config::Get(Config::GFX_ENHANCE_OUTPUT_RESAMPLING); sPostProcessingShader = Config::Get(Config::GFX_ENHANCE_POST_SHADER); bForceTrueColor = Config::Get(Config::GFX_ENHANCE_FORCE_TRUE_COLOR); bDisableCopyFilter = Config::Get(Config::GFX_ENHANCE_DISABLE_COPY_FILTER); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 20f4aa8a28..affbf78dec 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -52,6 +52,17 @@ enum class TextureFilteringMode : int Linear, }; +enum class OutputResamplingMode : int +{ + Default, + Bilinear, + Bicubic, + Hermite, + CatmullRom, + NearestNeighbor, + SharpBilinear, +}; + enum class ColorCorrectionRegion : int { SMPTE_NTSCM, @@ -103,6 +114,7 @@ struct VideoConfig final bool bSSAA = false; int iEFBScale = 0; TextureFilteringMode texture_filtering_mode = TextureFilteringMode::Default; + OutputResamplingMode output_resampling_mode = OutputResamplingMode::Default; int iMaxAnisotropy = 0; std::string sPostProcessingShader; bool bForceTrueColor = false; From ca93a5191fef40947a6fac935ce814fca8a2a295 Mon Sep 17 00:00:00 2001 From: Sam Belliveau Date: Wed, 28 Jun 2023 16:14:26 -0400 Subject: [PATCH 2/5] Video: Added Box Resampling Edited by Filoppi --- .../Sys/Shaders/default_pre_post_process.glsl | 125 ++++++++++++++++-- .../Config/Graphics/EnhancementsWidget.cpp | 8 +- Source/Core/VideoCommon/VideoConfig.h | 1 + 3 files changed, 122 insertions(+), 12 deletions(-) diff --git a/Data/Sys/Shaders/default_pre_post_process.glsl b/Data/Sys/Shaders/default_pre_post_process.glsl index 1a1c3ecbe1..91bec1a42e 100644 --- a/Data/Sys/Shaders/default_pre_post_process.glsl +++ b/Data/Sys/Shaders/default_pre_post_process.glsl @@ -54,7 +54,7 @@ float4 BilinearSample(float3 uvw, float gamma) // This emulates the (bi)linear filtering done directly from GPUs HW. // Note that GPUs might natively filter red green and blue differently, but we don't do it. // They might also use different filtering between upscaling and downscaling. - + float2 source_size = GetResolution(); float2 inverted_source_size = GetInvResolution(); float2 pixel = (uvw.xy * source_size) - 0.5; // Try to find the matching pixel top left corner @@ -99,6 +99,107 @@ float4 SharpBilinearSample(float3 uvw, float gamma) return BilinearSample(uvw, gamma); } +// By Sam Belliveau. Public Domain license. +// Effectively a more accurate sharp bilinear filter when upscaling, +// that also works as a mathematically perfect downscale filter. +// https://entropymine.com/imageworsener/pixelmixing/ +// https://github.com/obsproject/obs-studio/pull/1715 +// https://legacy.imagemagick.org/Usage/filter/ +float4 BoxResample(float3 uvw, float gamma) +{ + // Determine the sizes of the source and target images. + float2 source_size = GetResolution(); + float2 inv_source_size = GetInvResolution(); + float2 inv_target_size = GetInvWindowResolution(); + + // Determine the range of the source image that the target pixel will cover. + // We shift by one output pixel because that's a prerequisite of the algorithm. + float2 range = source_size * inv_target_size; + float2 beg = (uvw.xy - inv_target_size) * source_size; + float2 end = beg + range; + + // Compute the top-left and bottom-right corners of the pixel box. + float2 f_beg = floor(beg); + float2 f_end = floor(end); + + // Compute how much of the start and end pixels are covered horizontally & vertically. + float area_w = 1.0 - fract(beg.x); + float area_n = 1.0 - fract(beg.y); + float area_e = fract(end.x); + float area_s = fract(end.y); + + // Compute the areas of the corner pixels in the pixel box. + float area_nw = area_n * area_w; + float area_ne = area_n * area_e; + float area_sw = area_s * area_w; + float area_se = area_s * area_e; + + // Initialize the color accumulator. + float4 avg_color = float4(0.0, 0.0, 0.0, 0.0); + + // Presents rounding errors + const float offset = 0.5; + + // Accumulate corner pixels. + avg_color += area_nw * QuickSample(float2(f_beg.x + offset, f_beg.y + offset) * inv_source_size, uvw.z, gamma); + avg_color += area_ne * QuickSample(float2(f_end.x + offset, f_beg.y + offset) * inv_source_size, uvw.z, gamma); + avg_color += area_sw * QuickSample(float2(f_beg.x + offset, f_end.y + offset) * inv_source_size, uvw.z, gamma); + avg_color += area_se * QuickSample(float2(f_end.x + offset, f_end.y + offset) * inv_source_size, uvw.z, gamma); + + // Determine the size of the pixel box. + int x_range = int(f_end.x - f_beg.x + 0.5); + int y_range = int(f_end.y - f_beg.y + 0.5); + + // Workaround to compile the shader with DX11/12. + // If this isn't done, it will complain that the loop could have too many iterations. + // This number should be enough to guarantee downscaling from very high to very small resolutions. + const int max_iterations = 16; + + // Fix up the average calculations in case we reached the upper limit + x_range = min(x_range, max_iterations); + y_range = min(y_range, max_iterations); + + // Accumulate top and bottom edge pixels. + for (int ix = 0; ix < max_iterations; ++ix) + { + if (ix < x_range) + { + float x = f_beg.x + 1.0 + float(ix); + avg_color += area_n * QuickSample(float2(x + offset, f_beg.y + offset) * inv_source_size, uvw.z, gamma); + avg_color += area_s * QuickSample(float2(x + offset, f_end.y + offset) * inv_source_size, uvw.z, gamma); + } + } + + // Accumulate left and right edge pixels and all the pixels in between. + for (int iy = 0; iy < max_iterations; ++iy) + { + if (iy < y_range) + { + float y = f_beg.y + 1.0 + float(iy); + + avg_color += area_w * QuickSample(float2(f_beg.x + offset, y + offset) * inv_source_size, uvw.z, gamma); + avg_color += area_e * QuickSample(float2(f_end.x + offset, y + offset) * inv_source_size, uvw.z, gamma); + + for (int ix = 0; ix < max_iterations; ++ix) + { + if (ix < x_range) + { + float x = f_beg.x + 1.0 + float(ix); + avg_color += QuickSample(float2(x + offset, y + offset) * inv_source_size, uvw.z, gamma); + } + } + } + } + + // Compute the area of the pixel box that was sampled. + float area_corners = area_nw + area_ne + area_sw + area_se; + float area_edges = float(x_range) * (area_n + area_s) + float(y_range) * (area_w + area_e); + float area_center = float(x_range) * float(y_range); + + // Return the normalized average color. + return avg_color / (area_corners + area_edges + area_center); +} + float4 Cubic(float v) { float4 n = float4(1.0, 2.0, 3.0, 4.0) - v; @@ -119,7 +220,7 @@ float4 BicubicSample(float3 uvw, float2 in_source_resolution, float2 in_inverted float4 xcubic = Cubic(frac_pixel.x); float4 ycubic = Cubic(frac_pixel.y); - + float4 c = float4(int_pixel.x - 0.5, int_pixel.x + 1.5, int_pixel.y - 0.5, int_pixel.y + 1.5); float4 s = float4(xcubic.x + xcubic.y, xcubic.z + xcubic.w, ycubic.x + ycubic.y, ycubic.z + ycubic.w); float4 offset = c + float4(xcubic.y, xcubic.w, ycubic.y, ycubic.w) / s; @@ -154,24 +255,24 @@ float4 BicubicHermiteSample(float3 uvw, float2 in_source_resolution, float2 in_i float2 pixel = (uvw.xy * in_source_resolution) + 0.5; float2 frac_pixel = fract(pixel); float2 uv = (floor(pixel) * in_inverted_source_resolution) - (in_inverted_source_resolution / 2.0); - + float2 inverted_source_resolution_double = in_inverted_source_resolution * 2.0; float4 c00 = QuickSample(uv + float2(-in_inverted_source_resolution.x, -in_inverted_source_resolution.y), uvw.z, gamma); float4 c10 = QuickSample(uv + float2( 0.0, -in_inverted_source_resolution.y), uvw.z, gamma); float4 c20 = QuickSample(uv + float2( in_inverted_source_resolution.x, -in_inverted_source_resolution.y), uvw.z, gamma); float4 c30 = QuickSample(uv + float2( inverted_source_resolution_double.x, -in_inverted_source_resolution.y), uvw.z, gamma); - + float4 c01 = QuickSample(uv + float2(-in_inverted_source_resolution.x, 0.0), uvw.z, gamma); float4 c11 = QuickSample(uv + float2( 0.0, 0.0), uvw.z, gamma); float4 c21 = QuickSample(uv + float2( in_inverted_source_resolution.x, 0.0), uvw.z, gamma); float4 c31 = QuickSample(uv + float2( inverted_source_resolution_double.x, 0.0), uvw.z, gamma); - + float4 c02 = QuickSample(uv + float2(-in_inverted_source_resolution.x, in_inverted_source_resolution.y), uvw.z, gamma); float4 c12 = QuickSample(uv + float2( 0.0, in_inverted_source_resolution.y), uvw.z, gamma); float4 c22 = QuickSample(uv + float2( in_inverted_source_resolution.x, in_inverted_source_resolution.y), uvw.z, gamma); float4 c32 = QuickSample(uv + float2( inverted_source_resolution_double.x, in_inverted_source_resolution.y), uvw.z, gamma); - + float4 c03 = QuickSample(uv + float2(-in_inverted_source_resolution.x, inverted_source_resolution_double.y), uvw.z, gamma); float4 c13 = QuickSample(uv + float2( 0.0, inverted_source_resolution_double.y), uvw.z, gamma); float4 c23 = QuickSample(uv + float2( in_inverted_source_resolution.x, inverted_source_resolution_double.y), uvw.z, gamma); @@ -254,7 +355,7 @@ float4 LinearGammaCorrectedSample(float gamma) { float3 uvw = v_tex0; float4 color = float4(0, 0, 0, 1); - + if (resampling_method <= 1) // Bilinear { color = BilinearSample(uvw, gamma); @@ -279,6 +380,10 @@ float4 LinearGammaCorrectedSample(float gamma) { color = SharpBilinearSample(uvw, gamma); } + else if (resampling_method == 7) // BoxSampling + { + color = BoxResample(uvw, gamma); + } return color; } @@ -310,7 +415,7 @@ void main() color = texture(samp0, v_tex0); else color = texture(samp1, v_tex0); - + // Convert to linear before doing any other of follow up operations. color.rgb = pow(color.rgb, float3(game_gamma)); } @@ -324,13 +429,13 @@ void main() else if (game_color_space == 2) color.rgb = color.rgb * from_PAL; } - + if (OptionEnabled(hdr_output)) { float hdr_paper_white = hdr_paper_white_nits / hdr_sdr_white_nits; color.rgb *= hdr_paper_white; } - + if (OptionEnabled(linear_space_output)) { // Nothing to do here diff --git a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp index 67fd7da7b9..ee525ef001 100644 --- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp @@ -120,6 +120,8 @@ void EnhancementsWidget::CreateWidgets() static_cast(OutputResamplingMode::NearestNeighbor)); m_output_resampling_combo->addItem(tr("Sharp Bilinear"), static_cast(OutputResamplingMode::SharpBilinear)); + m_output_resampling_combo->addItem(tr("Box Resampling"), + static_cast(OutputResamplingMode::BoxResampling)); m_configure_color_correction = new ToolTipPushButton(tr("Configure")); @@ -497,8 +499,10 @@ void EnhancementsWidget::AddDescriptions() "
\"Hermite\" might offer the best quality when upscaling," " at a slightly bigger perform cost.
\"Catmull-Rom\" is best for downscaling." "
\"Nearest Neighbor\" doesn't do any resampling, select if you like a pixelated look." - "
\"Sharp Bilinear\" works best with 2D games at low resolutions, use if you like a" - " sharp look.

If unsure, select 'Default'."); + "
\"Sharp Bilinear\" works best with 2D games at low resolutions, use if you like a sharp" + " look." + "
\"Box Resampling\" is most expensive but also most accurate downscaling method." + "
If unsure, select 'Default'."); static const char TR_COLOR_CORRECTION_DESCRIPTION[] = QT_TR_NOOP("A group of features to make the colors more accurate, matching the color space " "Wii and GC games were meant for."); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index affbf78dec..dcdc631a40 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -61,6 +61,7 @@ enum class OutputResamplingMode : int CatmullRom, NearestNeighbor, SharpBilinear, + BoxResampling, }; enum class ColorCorrectionRegion : int From 39d96a21a8428a6372784ea2c23a687e120e22ad Mon Sep 17 00:00:00 2001 From: Sam Belliveau Date: Thu, 3 Aug 2023 18:53:21 -0400 Subject: [PATCH 3/5] Video: Improved Tooltips & Bicubic (#5) Edited by Filoppi --- .../Sys/Shaders/default_pre_post_process.glsl | 301 +++++++----------- .../Config/Graphics/EnhancementsWidget.cpp | 59 ++-- Source/Core/VideoCommon/VideoConfig.h | 7 +- 3 files changed, 159 insertions(+), 208 deletions(-) diff --git a/Data/Sys/Shaders/default_pre_post_process.glsl b/Data/Sys/Shaders/default_pre_post_process.glsl index 91bec1a42e..1f5ea2a201 100644 --- a/Data/Sys/Shaders/default_pre_post_process.glsl +++ b/Data/Sys/Shaders/default_pre_post_process.glsl @@ -1,3 +1,5 @@ +/***** COLOR CORRECTION *****/ + // Color Space references: // https://www.unravel.com.au/understanding-color-spaces @@ -36,6 +38,8 @@ float3 LinearTosRGBGamma(float3 color) return color; } +/***** COLOR SAMPLING *****/ + // Non filtered gamma corrected sample (nearest neighbor) float4 QuickSample(float3 uvw, float gamma) { @@ -43,20 +47,24 @@ float4 QuickSample(float3 uvw, float gamma) color.rgb = pow(color.rgb, float3(gamma)); return color; } - float4 QuickSample(float2 uv, float w, float gamma) { return QuickSample(float3(uv, w), gamma); } +float4 QuickSampleByPixel(float2 xy, float w, float gamma) +{ + float3 uvw = float3(xy * GetInvResolution(), w); + return QuickSample(uvw, gamma); +} + +/***** Bilinear Interpolation *****/ float4 BilinearSample(float3 uvw, float gamma) { // This emulates the (bi)linear filtering done directly from GPUs HW. // Note that GPUs might natively filter red green and blue differently, but we don't do it. // They might also use different filtering between upscaling and downscaling. - float2 source_size = GetResolution(); - float2 inverted_source_size = GetInvResolution(); float2 pixel = (uvw.xy * source_size) - 0.5; // Try to find the matching pixel top left corner // Find the integer and floating point parts @@ -64,15 +72,84 @@ float4 BilinearSample(float3 uvw, float gamma) float2 frac_pixel = fract(pixel); // Take 4 samples around the original uvw - float4 c11 = QuickSample((int_pixel + float2(0.5, 0.5)) * inverted_source_size, uvw.z, gamma); - float4 c21 = QuickSample((int_pixel + float2(1.5, 0.5)) * inverted_source_size, uvw.z, gamma); - float4 c12 = QuickSample((int_pixel + float2(0.5, 1.5)) * inverted_source_size, uvw.z, gamma); - float4 c22 = QuickSample((int_pixel + float2(1.5, 1.5)) * inverted_source_size, uvw.z, gamma); + float4 c11 = QuickSampleByPixel(int_pixel + float2(0.5, 0.5), uvw.z, gamma); + float4 c21 = QuickSampleByPixel(int_pixel + float2(1.5, 0.5), uvw.z, gamma); + float4 c12 = QuickSampleByPixel(int_pixel + float2(0.5, 1.5), uvw.z, gamma); + float4 c22 = QuickSampleByPixel(int_pixel + float2(1.5, 1.5), uvw.z, gamma); // Blend the 4 samples by their weight return lerp(lerp(c11, c21, frac_pixel.x), lerp(c12, c22, frac_pixel.x), frac_pixel.y); } +/***** Bicubic Interpolation *****/ + +// Formula derived from: +// https://en.wikipedia.org/wiki/Mitchell%E2%80%93Netravali_filters#Definition +// Values from: +// https://guideencodemoe-mkdocs.readthedocs.io/encoding/resampling/#mitchell-netravali-bicubic +// Other references: +// https://www.codeproject.com/Articles/236394/Bi-Cubic-and-Bi-Linear-Interpolation-with-GLSL +// https://github.com/ValveSoftware/gamescope/pull/740 +// https://stackoverflow.com/questions/13501081/efficient-bicubic-filtering-code-in-glsl +#define CUBIC_COEFF_GEN(B, C) \ + (mat4(/* t^0 */ ((B) / 6.0), (-(B) / 3.0 + 1.0), ((B) / 6.0), (0.0), \ + /* t^1 */ (-(B) / 2.0 - (C)), (0.0), ((B) / 2.0 + (C)), (0.0), \ + /* t^2 */ ((B) / 2.0 + 2.0 * (C)), (2.0 * (B) + (C)-3.0), \ + (-5.0 * (B) / 2.0 - 2.0 * (C) + 3.0), (-(C)), \ + /* t^3 */ (-(B) / 6.0 - (C)), (-3.0 * (B) / 2.0 - (C) + 2.0), \ + (3.0 * (B) / 2.0 + (C)-2.0), ((B) / 6.0 + (C)))) + +float4 CubicCoeffs(float t, mat4 coeffs) +{ + return coeffs * float4(1.0, t, t * t, t * t * t); +} + +float4 CubicMix(float4 c0, float4 c1, float4 c2, float4 c3, float4 coeffs) +{ + return c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2] + c3 * coeffs[3]; +} + +// By Sam Belliveau. Public Domain license. +// Simple 16 tap, gamma correct, implementation of bicubic filtering. +float4 BicubicSample(float3 uvw, float gamma, mat4 coeffs) +{ + float2 pixel = (uvw.xy * GetResolution()) - 0.5; + float2 int_pixel = floor(pixel); + float2 frac_pixel = fract(pixel); + + float4 c00 = QuickSampleByPixel(int_pixel + float2(-0.5, -0.5), uvw.z, gamma); + float4 c10 = QuickSampleByPixel(int_pixel + float2(+0.5, -0.5), uvw.z, gamma); + float4 c20 = QuickSampleByPixel(int_pixel + float2(+1.5, -0.5), uvw.z, gamma); + float4 c30 = QuickSampleByPixel(int_pixel + float2(+2.5, -0.5), uvw.z, gamma); + + float4 c01 = QuickSampleByPixel(int_pixel + float2(-0.5, +0.5), uvw.z, gamma); + float4 c11 = QuickSampleByPixel(int_pixel + float2(+0.5, +0.5), uvw.z, gamma); + float4 c21 = QuickSampleByPixel(int_pixel + float2(+1.5, +0.5), uvw.z, gamma); + float4 c31 = QuickSampleByPixel(int_pixel + float2(+2.5, +0.5), uvw.z, gamma); + + float4 c02 = QuickSampleByPixel(int_pixel + float2(-0.5, +1.5), uvw.z, gamma); + float4 c12 = QuickSampleByPixel(int_pixel + float2(+0.5, +1.5), uvw.z, gamma); + float4 c22 = QuickSampleByPixel(int_pixel + float2(+1.5, +1.5), uvw.z, gamma); + float4 c32 = QuickSampleByPixel(int_pixel + float2(+2.5, +1.5), uvw.z, gamma); + + float4 c03 = QuickSampleByPixel(int_pixel + float2(-0.5, +2.5), uvw.z, gamma); + float4 c13 = QuickSampleByPixel(int_pixel + float2(+0.5, +2.5), uvw.z, gamma); + float4 c23 = QuickSampleByPixel(int_pixel + float2(+1.5, +2.5), uvw.z, gamma); + float4 c33 = QuickSampleByPixel(int_pixel + float2(+2.5, +2.5), uvw.z, gamma); + + float4 cx = CubicCoeffs(frac_pixel.x, coeffs); + float4 cy = CubicCoeffs(frac_pixel.y, coeffs); + + float4 x0 = CubicMix(c00, c10, c20, c30, cx); + float4 x1 = CubicMix(c01, c11, c21, c31, cx); + float4 x2 = CubicMix(c02, c12, c22, c32, cx); + float4 x3 = CubicMix(c03, c13, c23, c33, cx); + + return CubicMix(x0, x1, x2, x3, cy); +} + +/***** Sharp Bilinear Filtering *****/ + // Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang // by Themaister, Public Domain license // Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale, @@ -99,23 +176,24 @@ float4 SharpBilinearSample(float3 uvw, float gamma) return BilinearSample(uvw, gamma); } +/***** Area Sampling *****/ + // By Sam Belliveau. Public Domain license. // Effectively a more accurate sharp bilinear filter when upscaling, // that also works as a mathematically perfect downscale filter. // https://entropymine.com/imageworsener/pixelmixing/ // https://github.com/obsproject/obs-studio/pull/1715 // https://legacy.imagemagick.org/Usage/filter/ -float4 BoxResample(float3 uvw, float gamma) +float4 AreaSampling(float3 uvw, float gamma) { // Determine the sizes of the source and target images. float2 source_size = GetResolution(); - float2 inv_source_size = GetInvResolution(); - float2 inv_target_size = GetInvWindowResolution(); + float2 inverted_target_size = GetInvWindowResolution(); // Determine the range of the source image that the target pixel will cover. // We shift by one output pixel because that's a prerequisite of the algorithm. - float2 range = source_size * inv_target_size; - float2 beg = (uvw.xy - inv_target_size) * source_size; + float2 range = source_size * inverted_target_size; + float2 beg = (uvw.xy - inverted_target_size) * source_size; float2 end = beg + range; // Compute the top-left and bottom-right corners of the pixel box. @@ -141,11 +219,11 @@ float4 BoxResample(float3 uvw, float gamma) const float offset = 0.5; // Accumulate corner pixels. - avg_color += area_nw * QuickSample(float2(f_beg.x + offset, f_beg.y + offset) * inv_source_size, uvw.z, gamma); - avg_color += area_ne * QuickSample(float2(f_end.x + offset, f_beg.y + offset) * inv_source_size, uvw.z, gamma); - avg_color += area_sw * QuickSample(float2(f_beg.x + offset, f_end.y + offset) * inv_source_size, uvw.z, gamma); - avg_color += area_se * QuickSample(float2(f_end.x + offset, f_end.y + offset) * inv_source_size, uvw.z, gamma); - + avg_color += area_nw * QuickSampleByPixel(float2(f_beg.x + offset, f_beg.y + offset), uvw.z, gamma); + avg_color += area_ne * QuickSampleByPixel(float2(f_end.x + offset, f_beg.y + offset), uvw.z, gamma); + avg_color += area_sw * QuickSampleByPixel(float2(f_beg.x + offset, f_end.y + offset), uvw.z, gamma); + avg_color += area_se * QuickSampleByPixel(float2(f_end.x + offset, f_end.y + offset), uvw.z, gamma); + // Determine the size of the pixel box. int x_range = int(f_end.x - f_beg.x + 0.5); int y_range = int(f_end.y - f_beg.y + 0.5); @@ -165,8 +243,8 @@ float4 BoxResample(float3 uvw, float gamma) if (ix < x_range) { float x = f_beg.x + 1.0 + float(ix); - avg_color += area_n * QuickSample(float2(x + offset, f_beg.y + offset) * inv_source_size, uvw.z, gamma); - avg_color += area_s * QuickSample(float2(x + offset, f_end.y + offset) * inv_source_size, uvw.z, gamma); + avg_color += area_n * QuickSampleByPixel(float2(x + offset, f_beg.y + offset), uvw.z, gamma); + avg_color += area_s * QuickSampleByPixel(float2(x + offset, f_end.y + offset), uvw.z, gamma); } } @@ -177,15 +255,15 @@ float4 BoxResample(float3 uvw, float gamma) { float y = f_beg.y + 1.0 + float(iy); - avg_color += area_w * QuickSample(float2(f_beg.x + offset, y + offset) * inv_source_size, uvw.z, gamma); - avg_color += area_e * QuickSample(float2(f_end.x + offset, y + offset) * inv_source_size, uvw.z, gamma); + avg_color += area_w * QuickSampleByPixel(float2(f_beg.x + offset, y + offset), uvw.z, gamma); + avg_color += area_e * QuickSampleByPixel(float2(f_end.x + offset, y + offset), uvw.z, gamma); for (int ix = 0; ix < max_iterations; ++ix) { if (ix < x_range) { float x = f_beg.x + 1.0 + float(ix); - avg_color += QuickSample(float2(x + offset, y + offset) * inv_source_size, uvw.z, gamma); + avg_color += QuickSampleByPixel(float2(x + offset, y + offset), uvw.z, gamma); } } } @@ -200,154 +278,7 @@ float4 BoxResample(float3 uvw, float gamma) return avg_color / (area_corners + area_edges + area_center); } -float4 Cubic(float v) -{ - float4 n = float4(1.0, 2.0, 3.0, 4.0) - v; - float4 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return float4(x, y, z, w) * (1.0 / 6.0); -} - -// https://stackoverflow.com/questions/13501081/efficient-bicubic-filtering-code-in-glsl -float4 BicubicSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma) -{ - float2 pixel = (uvw.xy * in_source_resolution) - 0.5; - float2 int_pixel = floor(pixel); - float2 frac_pixel = fract(pixel); - - float4 xcubic = Cubic(frac_pixel.x); - float4 ycubic = Cubic(frac_pixel.y); - - float4 c = float4(int_pixel.x - 0.5, int_pixel.x + 1.5, int_pixel.y - 0.5, int_pixel.y + 1.5); - float4 s = float4(xcubic.x + xcubic.y, xcubic.z + xcubic.w, ycubic.x + ycubic.y, ycubic.z + ycubic.w); - float4 offset = c + float4(xcubic.y, xcubic.w, ycubic.y, ycubic.w) / s; - - offset *= float4(in_inverted_source_resolution.x, in_inverted_source_resolution.x, in_inverted_source_resolution.y, in_inverted_source_resolution.y); - - float4 sample0 = QuickSample(offset.xz, uvw.z, gamma); - float4 sample1 = QuickSample(offset.yz, uvw.z, gamma); - float4 sample2 = QuickSample(offset.xw, uvw.z, gamma); - float4 sample3 = QuickSample(offset.yw, uvw.z, gamma); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return lerp(lerp(sample3, sample2, sx), lerp(sample1, sample0, sx), sy); -} - -float4 CubicHermite(float4 A, float4 B, float4 C, float4 D, float t) -{ - float t2 = t * t; - float t3 = t * t * t; - float4 a = (-A / 2.0) + ((3.0 * B) / 2.0) - ((3.0 * C) / 2.0) + (D / 2.0); - float4 b = A - ((5.0 * B) / 2.0 ) + (2.0 * C) - (D / 2.0); - float4 c = (-A / 2.0) + (C / 2.0); - float4 d = B; - - return (a * t3) + (b * t2) + (c * t) + d; -} - -float4 BicubicHermiteSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma) -{ - float2 pixel = (uvw.xy * in_source_resolution) + 0.5; - float2 frac_pixel = fract(pixel); - float2 uv = (floor(pixel) * in_inverted_source_resolution) - (in_inverted_source_resolution / 2.0); - - float2 inverted_source_resolution_double = in_inverted_source_resolution * 2.0; - - float4 c00 = QuickSample(uv + float2(-in_inverted_source_resolution.x, -in_inverted_source_resolution.y), uvw.z, gamma); - float4 c10 = QuickSample(uv + float2( 0.0, -in_inverted_source_resolution.y), uvw.z, gamma); - float4 c20 = QuickSample(uv + float2( in_inverted_source_resolution.x, -in_inverted_source_resolution.y), uvw.z, gamma); - float4 c30 = QuickSample(uv + float2( inverted_source_resolution_double.x, -in_inverted_source_resolution.y), uvw.z, gamma); - - float4 c01 = QuickSample(uv + float2(-in_inverted_source_resolution.x, 0.0), uvw.z, gamma); - float4 c11 = QuickSample(uv + float2( 0.0, 0.0), uvw.z, gamma); - float4 c21 = QuickSample(uv + float2( in_inverted_source_resolution.x, 0.0), uvw.z, gamma); - float4 c31 = QuickSample(uv + float2( inverted_source_resolution_double.x, 0.0), uvw.z, gamma); - - float4 c02 = QuickSample(uv + float2(-in_inverted_source_resolution.x, in_inverted_source_resolution.y), uvw.z, gamma); - float4 c12 = QuickSample(uv + float2( 0.0, in_inverted_source_resolution.y), uvw.z, gamma); - float4 c22 = QuickSample(uv + float2( in_inverted_source_resolution.x, in_inverted_source_resolution.y), uvw.z, gamma); - float4 c32 = QuickSample(uv + float2( inverted_source_resolution_double.x, in_inverted_source_resolution.y), uvw.z, gamma); - - float4 c03 = QuickSample(uv + float2(-in_inverted_source_resolution.x, inverted_source_resolution_double.y), uvw.z, gamma); - float4 c13 = QuickSample(uv + float2( 0.0, inverted_source_resolution_double.y), uvw.z, gamma); - float4 c23 = QuickSample(uv + float2( in_inverted_source_resolution.x, inverted_source_resolution_double.y), uvw.z, gamma); - float4 c33 = QuickSample(uv + float2( inverted_source_resolution_double.x, inverted_source_resolution_double.y), uvw.z, gamma); - - float4 cp0x = CubicHermite(c00, c10, c20, c30, frac_pixel.x); - float4 cp1x = CubicHermite(c01, c11, c21, c31, frac_pixel.x); - float4 cp2x = CubicHermite(c02, c12, c22, c32, frac_pixel.x); - float4 cp3x = CubicHermite(c03, c13, c23, c33, frac_pixel.x); - - return CubicHermite(cp0x, cp1x, cp2x, cp3x, frac_pixel.y); -} - -float CatmullRom(float B, float C, float x) -{ - float f = x; - - if (f < 0.0) - f = -f; - - if (f < 1.0) - { - return ((12 - 9 * B - 6 * C) * (f * f * f) + - (-18 + 12 * B + 6 * C) * (f * f) + - (6 - 2 * B)) / 6.0; - } - else if (f >= 1.0 && f < 2.0) - { - return ((-B - 6 * C) * (f * f * f) - + (6 * B + 30 * C) * (f * f) + - ( - (12 * B) - 48 * C) * f + - 8 * B + 24 * C) / 6.0; - } - else - { - return 0.0; - } -} - -// https://www.codeproject.com/Articles/236394/Bi-Cubic-and-Bi-Linear-Interpolation-with-GLSL -// https://github.com/ValveSoftware/gamescope/pull/740 -float4 BicubicCatmullRomSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma) -{ - const float offset = 0.5; - float2 pixel = (uvw.xy * in_source_resolution) - offset; - float2 int_pixel = floor(pixel); - float2 frac_pixel = fract(pixel); - float2 int_uvw = (int_pixel + offset) * in_inverted_source_resolution; - - // B and C can be any value between 0 and 1, - // though they are meant to be 0 and 0.5 for Catmull-Rom. - // https://en.wikipedia.org/wiki/Mitchell%E2%80%93Netravali_filters - // https://guideencodemoe-mkdocs.readthedocs.io/encoding/resampling/ - const float B = 0.0; - const float C = 0.5; - - // Take 16 (4x4) samples, each with a different weight. - // This loop can be replaced with any other bicubic formula (e.g. Hermite). - float4 color_sum = float4(0.0, 0.0, 0.0, 0.0); - float4 color_denominator = float4(0.0, 0.0, 0.0, 0.0); - for (int m = -1; m <= 2; m++) - { - for (int n = -1; n <= 2; n++) - { - float4 color = QuickSample(int_uvw + (float2(m, n) * in_inverted_source_resolution), uvw.z, gamma); - float f1 = CatmullRom(B, C, float(m) - frac_pixel.x); - float f2 = CatmullRom(B, C, -float(n) + frac_pixel.y); - float4 cooef1 = float4(f1, f1, f1, f1); - float4 cooef2 = float4(f2, f2, f2, f2); - color_sum += color * (cooef2 * cooef1); - color_denominator += cooef2 * cooef1; - } - } - return color_sum / color_denominator; -} +/***** Main Functions *****/ // Returns an accurate (gamma corrected) sample of a gamma space space texture. // Outputs in linear space for simplicity. @@ -360,29 +291,33 @@ float4 LinearGammaCorrectedSample(float gamma) { color = BilinearSample(uvw, gamma); } - else if (resampling_method == 2) // "Simple" Bicubic + else if (resampling_method == 2) // Bicubic: B-Spline { - color = BicubicSample(uvw, GetResolution(), GetInvResolution(), gamma); + color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(1.0, 0.0)); } - else if (resampling_method == 3) // Hermite + else if (resampling_method == 3) // Bicubic: Mitchell-Netravali { - color = BicubicHermiteSample(uvw, GetResolution(), GetInvResolution(), gamma); + color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(1.0 / 3.0, 1.0 / 3.0)); } - else if (resampling_method == 4) // Catmull-Rom + else if (resampling_method == 4) // Bicubic: Catmull-Rom { - color = BicubicCatmullRomSample(uvw, GetResolution(), GetInvResolution(), gamma); + color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(0.0, 0.5)); } - else if (resampling_method == 5) // Nearest Neighbor - { - color = QuickSample(uvw, gamma); - } - else if (resampling_method == 6) // Sharp Bilinear + else if (resampling_method == 5) // Sharp Bilinear { color = SharpBilinearSample(uvw, gamma); } - else if (resampling_method == 7) // BoxSampling + else if (resampling_method == 6) // Area Sampling { - color = BoxResample(uvw, gamma); + color = AreaSampling(uvw, gamma); + } + else if (resampling_method == 7) // Nearest Neighbor + { + color = QuickSample(uvw, gamma); + } + else if (resampling_method == 8) // Bicubic: Hermite + { + color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(0.0, 0.0)); } return color; diff --git a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp index ee525ef001..f181780749 100644 --- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp @@ -110,18 +110,16 @@ void EnhancementsWidget::CreateWidgets() static_cast(OutputResamplingMode::Default)); m_output_resampling_combo->addItem(tr("Bilinear"), static_cast(OutputResamplingMode::Bilinear)); - m_output_resampling_combo->addItem(tr("Bicubic"), - static_cast(OutputResamplingMode::Bicubic)); - m_output_resampling_combo->addItem(tr("Hermite"), - static_cast(OutputResamplingMode::Hermite)); - m_output_resampling_combo->addItem(tr("Catmull-Rom"), + m_output_resampling_combo->addItem(tr("Bicubic: B-Spline"), + static_cast(OutputResamplingMode::BSpline)); + m_output_resampling_combo->addItem(tr("Bicubic: Mitchell-Netravali"), + static_cast(OutputResamplingMode::MitchellNetravali)); + m_output_resampling_combo->addItem(tr("Bicubic: Catmull-Rom"), static_cast(OutputResamplingMode::CatmullRom)); - m_output_resampling_combo->addItem(tr("Nearest Neighbor"), - static_cast(OutputResamplingMode::NearestNeighbor)); m_output_resampling_combo->addItem(tr("Sharp Bilinear"), static_cast(OutputResamplingMode::SharpBilinear)); - m_output_resampling_combo->addItem(tr("Box Resampling"), - static_cast(OutputResamplingMode::BoxResampling)); + m_output_resampling_combo->addItem(tr("Area Sampling"), + static_cast(OutputResamplingMode::AreaSampling)); m_configure_color_correction = new ToolTipPushButton(tr("Configure")); @@ -491,18 +489,37 @@ void EnhancementsWidget::AddDescriptions() "scaling filter selected by the game.

Any option except 'Default' will alter the look " "of the game's textures and might cause issues in a small number of " "games.

If unsure, select 'Default'."); - static const char TR_OUTPUT_RESAMPLING_DESCRIPTION[] = QT_TR_NOOP( - "Affects how the game output image is upscaled or downscaled to the window resolution.
" - "\"Default\" will rely on the GPU internal bilinear sampler which isn't gamma corrected." - "
\"Bilinear\" (gamma corrected) is a good compromise between quality and performance." - "
\"Bicubic\" is smoother than \"Bilinear\"." - "
\"Hermite\" might offer the best quality when upscaling," - " at a slightly bigger perform cost.
\"Catmull-Rom\" is best for downscaling." - "
\"Nearest Neighbor\" doesn't do any resampling, select if you like a pixelated look." - "
\"Sharp Bilinear\" works best with 2D games at low resolutions, use if you like a sharp" - " look." - "
\"Box Resampling\" is most expensive but also most accurate downscaling method." - "
If unsure, select 'Default'."); + static const char TR_OUTPUT_RESAMPLING_DESCRIPTION[] = + QT_TR_NOOP("Affects how the game output is scaled to the window resolution." + "
The performance mostly depends on the number of samples each method uses." + "
Compared to SSAA, resampling is useful in case the output window" + "
resolution isn't a multiplier of the native emulation resolution." + + "

Default - [fastest]" + "
Internal GPU bilinear sampler which is not gamma corrected." + "
This setting might be ignored if gamma correction is forced on." + + "

Bilinear - [4 samples]" + "
Gamma corrected linear interpolation between pixels." + + "

Bicubic - [16 samples]" + "
Gamma corrected cubic interpolation between pixels." + "
Good when rescaling between close resolutions. i.e 1080p and 1440p." + "
Comes in various flavors:" + "
B-Spline: Blurry, but avoids all lobing artifacts" + "
Mitchell-Netravali: Good middle ground between blurry and lobing" + "
Catmull-Rom: Sharper, but can cause lobing artifacts" + + "

Sharp Bilinear - [1-4 samples]" + "
Similarly to \"Nearest Neighbor\", it maintains a sharp look," + "
but also does some blending to avoid shimmering." + "
Works best with 2D games at low resolutions." + + "

Area Sampling - [up to 324 samples]" + "
Weights pixels by the percentage of area they occupy. Gamma corrected." + "
Best for down scaling by more than 2x." + + "

If unsure, select 'Default'."); static const char TR_COLOR_CORRECTION_DESCRIPTION[] = QT_TR_NOOP("A group of features to make the colors more accurate, matching the color space " "Wii and GC games were meant for."); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index dcdc631a40..6e34a43934 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -56,12 +56,11 @@ enum class OutputResamplingMode : int { Default, Bilinear, - Bicubic, - Hermite, + BSpline, + MitchellNetravali, CatmullRom, - NearestNeighbor, SharpBilinear, - BoxResampling, + AreaSampling, }; enum class ColorCorrectionRegion : int From fc3f7866f87f07eab34288f84821c5d429664227 Mon Sep 17 00:00:00 2001 From: Filoppi Date: Fri, 4 Aug 2023 14:40:55 +0300 Subject: [PATCH 4/5] Video: Fix area/box resampling shifting the output by about one pixel when upscaling and downscaling --- .../Sys/Shaders/default_pre_post_process.glsl | 46 ++++++++++++------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/Data/Sys/Shaders/default_pre_post_process.glsl b/Data/Sys/Shaders/default_pre_post_process.glsl index 1f5ea2a201..795518d616 100644 --- a/Data/Sys/Shaders/default_pre_post_process.glsl +++ b/Data/Sys/Shaders/default_pre_post_process.glsl @@ -43,6 +43,17 @@ float3 LinearTosRGBGamma(float3 color) // Non filtered gamma corrected sample (nearest neighbor) float4 QuickSample(float3 uvw, float gamma) { +#if 0 // Test sampling range + const float threshold = 0.00000001; + float2 xy = uvw.xy * GetResolution(); + // Sampling outside the valid range, draw in yellow + if (xy.x < (0.0 - threshold) || xy.x > (GetResolution().x + threshold) || xy.y < (0.0 - threshold) || xy.y > (GetResolution().y + threshold)) + return float4(1.0, 1.0, 0.0, 1); + // Sampling at the edges, draw in purple + if (xy.x < 1.0 || xy.x > (GetResolution().x - 1.0) || xy.y < 1.0 || xy.y > (GetResolution().y - 1.0)) + return float4(0.5, 0, 0.5, 1); +#endif + float4 color = texture(samp1, uvw); color.rgb = pow(color.rgb, float3(gamma)); return color; @@ -178,7 +189,7 @@ float4 SharpBilinearSample(float3 uvw, float gamma) /***** Area Sampling *****/ -// By Sam Belliveau. Public Domain license. +// By Sam Belliveau and Filippo Tarpini. Public Domain license. // Effectively a more accurate sharp bilinear filter when upscaling, // that also works as a mathematically perfect downscale filter. // https://entropymine.com/imageworsener/pixelmixing/ @@ -191,9 +202,11 @@ float4 AreaSampling(float3 uvw, float gamma) float2 inverted_target_size = GetInvWindowResolution(); // Determine the range of the source image that the target pixel will cover. - // We shift by one output pixel because that's a prerequisite of the algorithm. - float2 range = source_size * inverted_target_size; - float2 beg = (uvw.xy - inverted_target_size) * source_size; + // Workaround: shift the resolution by 1/4 pixel to align the results with other sampling algorithms, + // otherwise the results would be offsetted, and we'd be sampling from coordinates outside the valid range. + float2 adjusted_source_size = source_size - 0.25; + float2 range = adjusted_source_size * inverted_target_size; + float2 beg = (uvw.xy * adjusted_source_size) - (range * 0.5); float2 end = beg + range; // Compute the top-left and bottom-right corners of the pixel box. @@ -215,15 +228,15 @@ float4 AreaSampling(float3 uvw, float gamma) // Initialize the color accumulator. float4 avg_color = float4(0.0, 0.0, 0.0, 0.0); - // Presents rounding errors - const float offset = 0.5; + // Prevents rounding errors due to the coordinates flooring above + const float2 offset = float2(0.5, 0.5); // Accumulate corner pixels. - avg_color += area_nw * QuickSampleByPixel(float2(f_beg.x + offset, f_beg.y + offset), uvw.z, gamma); - avg_color += area_ne * QuickSampleByPixel(float2(f_end.x + offset, f_beg.y + offset), uvw.z, gamma); - avg_color += area_sw * QuickSampleByPixel(float2(f_beg.x + offset, f_end.y + offset), uvw.z, gamma); - avg_color += area_se * QuickSampleByPixel(float2(f_end.x + offset, f_end.y + offset), uvw.z, gamma); - + avg_color += area_nw * QuickSampleByPixel(float2(f_beg.x, f_beg.y) + offset, uvw.z, gamma); + avg_color += area_ne * QuickSampleByPixel(float2(f_end.x, f_beg.y) + offset, uvw.z, gamma); + avg_color += area_sw * QuickSampleByPixel(float2(f_beg.x, f_end.y) + offset, uvw.z, gamma); + avg_color += area_se * QuickSampleByPixel(float2(f_end.x, f_end.y) + offset, uvw.z, gamma); + // Determine the size of the pixel box. int x_range = int(f_end.x - f_beg.x + 0.5); int y_range = int(f_end.y - f_beg.y + 0.5); @@ -231,6 +244,7 @@ float4 AreaSampling(float3 uvw, float gamma) // Workaround to compile the shader with DX11/12. // If this isn't done, it will complain that the loop could have too many iterations. // This number should be enough to guarantee downscaling from very high to very small resolutions. + // Note that this number might be referenced in the UI. const int max_iterations = 16; // Fix up the average calculations in case we reached the upper limit @@ -243,8 +257,8 @@ float4 AreaSampling(float3 uvw, float gamma) if (ix < x_range) { float x = f_beg.x + 1.0 + float(ix); - avg_color += area_n * QuickSampleByPixel(float2(x + offset, f_beg.y + offset), uvw.z, gamma); - avg_color += area_s * QuickSampleByPixel(float2(x + offset, f_end.y + offset), uvw.z, gamma); + avg_color += area_n * QuickSampleByPixel(float2(x, f_beg.y) + offset, uvw.z, gamma); + avg_color += area_s * QuickSampleByPixel(float2(x, f_end.y) + offset, uvw.z, gamma); } } @@ -255,15 +269,15 @@ float4 AreaSampling(float3 uvw, float gamma) { float y = f_beg.y + 1.0 + float(iy); - avg_color += area_w * QuickSampleByPixel(float2(f_beg.x + offset, y + offset), uvw.z, gamma); - avg_color += area_e * QuickSampleByPixel(float2(f_end.x + offset, y + offset), uvw.z, gamma); + avg_color += area_w * QuickSampleByPixel(float2(f_beg.x, y) + offset, uvw.z, gamma); + avg_color += area_e * QuickSampleByPixel(float2(f_end.x, y) + offset, uvw.z, gamma); for (int ix = 0; ix < max_iterations; ++ix) { if (ix < x_range) { float x = f_beg.x + 1.0 + float(ix); - avg_color += QuickSampleByPixel(float2(x + offset, y + offset), uvw.z, gamma); + avg_color += QuickSampleByPixel(float2(x, y) + offset, uvw.z, gamma); } } } From 80b453082dff9545e7b040fbe546055dac310b5c Mon Sep 17 00:00:00 2001 From: Filoppi Date: Sun, 6 Aug 2023 03:11:19 +0300 Subject: [PATCH 5/5] Video: fix post process shaders with parameters failing to build on OpenGL This was because the shader uniforms between the pixel and vertex shaders were willingly left different, to avoid filling the vertex shader with unnecessary params. Turns out all backends are fine with this except OGL. The new behaviour is now much more consistent and well explained, the "default" shaders are the ones that always run, and the non default ones are the user selected ones (if any). --- Source/Core/VideoCommon/PostProcessing.cpp | 66 ++++++++++++++-------- Source/Core/VideoCommon/PostProcessing.h | 3 +- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/Source/Core/VideoCommon/PostProcessing.cpp b/Source/Core/VideoCommon/PostProcessing.cpp index 1de4c311c2..49b05e6762 100644 --- a/Source/Core/VideoCommon/PostProcessing.cpp +++ b/Source/Core/VideoCommon/PostProcessing.cpp @@ -419,9 +419,9 @@ std::vector PostProcessing::GetPassiveShaderList() bool PostProcessing::Initialize(AbstractTextureFormat format) { m_framebuffer_format = format; - // CompilePixelShader must be run first if configuration options are used. + // CompilePixelShader() must be run first if configuration options are used. // Otherwise the UBO has a different member list between vertex and pixel - // shaders, which is a link error. + // shaders, which is a link error on some backends. if (!CompilePixelShader() || !CompileVertexShader() || !CompilePipeline()) return false; @@ -541,8 +541,8 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, g_gfx->SetFramebuffer(m_intermediary_frame_buffer.get()); FillUniformBuffer(src_rect, src_tex, src_layer, g_gfx->GetCurrentFramebuffer()->GetRect(), - present_rect, uniform_staging_buffer->data(), - !default_uniform_staging_buffer); + present_rect, uniform_staging_buffer->data(), !default_uniform_staging_buffer, + true); g_vertex_manager->UploadUtilityUniforms(uniform_staging_buffer->data(), static_cast(uniform_staging_buffer->size())); @@ -592,8 +592,8 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, if (final_pipeline) { FillUniformBuffer(src_rect, src_tex, src_layer, g_gfx->GetCurrentFramebuffer()->GetRect(), - present_rect, uniform_staging_buffer->data(), - !default_uniform_staging_buffer); + present_rect, uniform_staging_buffer->data(), !default_uniform_staging_buffer, + false); g_vertex_manager->UploadUtilityUniforms(uniform_staging_buffer->data(), static_cast(uniform_staging_buffer->size())); @@ -621,6 +621,9 @@ std::string PostProcessing::GetUniformBufferHeader(bool user_post_process) const // The first (but not necessarily only) source layer we target ss << " int src_layer;\n"; ss << " uint time;\n"; + ss << " int graphics_api;\n"; + // If true, it's an intermediary buffer (including the first), if false, it's the final one + ss << " int intermediary_buffer;\n"; ss << " int resampling_method;\n"; ss << " int correct_color_space;\n"; @@ -755,6 +758,7 @@ void SetOutput(float4 color) #define GetOption(x) (x) #define OptionEnabled(x) ((x) != 0) +#define OptionDisabled(x) ((x) == 0) )"; return ss.str(); @@ -765,13 +769,9 @@ std::string PostProcessing::GetFooter() const return {}; } -bool PostProcessing::CompileVertexShader() +std::string GetVertexShaderBody() { std::ostringstream ss; - // We never need the user selected post process custom uniforms in the vertex shader - const bool user_post_process = false; - ss << GetUniformBufferHeader(user_post_process); - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) { ss << "VARYING_LOCATION(0) out VertexData {\n"; @@ -792,21 +792,34 @@ bool PostProcessing::CompileVertexShader() // Vulkan Y needs to be inverted on every pass if (g_ActiveConfig.backend_info.api_type == APIType::Vulkan) + { ss << " opos.y = -opos.y;\n"; - - std::string s2 = ss.str(); - s2 += "}\n"; - m_default_vertex_shader = g_gfx->CreateShaderFromSource(ShaderStage::Vertex, s2, - "Default post-processing vertex shader"); - - // OpenGL Y needs to be inverted once only (in the last pass) - if (g_ActiveConfig.backend_info.api_type == APIType::OpenGL) - ss << " opos.y = -opos.y;\n"; + } + // OpenGL Y needs to be inverted in all passes except the last one + else if (g_ActiveConfig.backend_info.api_type == APIType::OpenGL) + { + ss << " if (intermediary_buffer != 0)\n"; + ss << " opos.y = -opos.y;\n"; + } ss << "}\n"; + return ss.str(); +} +bool PostProcessing::CompileVertexShader() +{ + std::ostringstream ss_default; + ss_default << GetUniformBufferHeader(false); + ss_default << GetVertexShaderBody(); + m_default_vertex_shader = g_gfx->CreateShaderFromSource(ShaderStage::Vertex, ss_default.str(), + "Default post-processing vertex shader"); + + std::ostringstream ss; + ss << GetUniformBufferHeader(true); + ss << GetVertexShaderBody(); m_vertex_shader = g_gfx->CreateShaderFromSource(ShaderStage::Vertex, ss.str(), "Post-processing vertex shader"); + if (!m_default_vertex_shader || !m_vertex_shader) { PanicAlertFmt("Failed to compile post-processing vertex shader"); @@ -829,6 +842,8 @@ struct BuiltinUniforms std::array src_rect; s32 src_layer; u32 time; + s32 graphics_api; + s32 intermediary_buffer; s32 resampling_method; s32 correct_color_space; s32 game_color_space; @@ -853,7 +868,7 @@ void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle& src, const AbstractTexture* src_tex, int src_layer, const MathUtil::Rectangle& dst, const MathUtil::Rectangle& wnd, u8* buffer, - bool user_post_process) + bool user_post_process, bool intermediary_buffer) { const float rcp_src_width = 1.0f / src_tex->GetWidth(); const float rcp_src_height = 1.0f / src_tex->GetHeight(); @@ -874,6 +889,8 @@ void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle& src, static_cast(src.GetHeight()) * rcp_src_height}; builtin_uniforms.src_layer = static_cast(src_layer); builtin_uniforms.time = static_cast(m_timer.ElapsedMs()); + builtin_uniforms.graphics_api = static_cast(g_ActiveConfig.backend_info.api_type); + builtin_uniforms.intermediary_buffer = static_cast(intermediary_buffer); builtin_uniforms.resampling_method = static_cast(g_ActiveConfig.output_resampling_mode); // Color correction related uniforms. @@ -898,6 +915,8 @@ void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle& src, std::memcpy(buffer, &builtin_uniforms, sizeof(builtin_uniforms)); buffer += sizeof(builtin_uniforms); + // Don't include the custom pp shader options if they are not necessary, + // having mismatching uniforms between different shaders can cause issues on some backends if (!user_post_process) return; @@ -1015,8 +1034,7 @@ bool PostProcessing::CompilePipeline() const bool needs_intermediary_buffer = NeedsIntermediaryBuffer(); AbstractPipelineConfig config = {}; - config.vertex_shader = - needs_intermediary_buffer ? m_vertex_shader.get() : m_default_vertex_shader.get(); + config.vertex_shader = m_default_vertex_shader.get(); // This geometry shader will take care of reading both layer 0 and 1 on the source texture, // and writing to both layer 0 and 1 on the render target. config.geometry_shader = UseGeometryShaderForPostProcess(needs_intermediary_buffer) ? @@ -1033,7 +1051,7 @@ bool PostProcessing::CompilePipeline() if (config.pixel_shader) m_default_pipeline = g_gfx->CreatePipeline(config); - config.vertex_shader = m_default_vertex_shader.get(); + config.vertex_shader = m_vertex_shader.get(); config.geometry_shader = UseGeometryShaderForPostProcess(false) ? g_shader_cache->GetTexcoordGeometryShader() : nullptr; diff --git a/Source/Core/VideoCommon/PostProcessing.h b/Source/Core/VideoCommon/PostProcessing.h index b8b648d2d0..b636f3b133 100644 --- a/Source/Core/VideoCommon/PostProcessing.h +++ b/Source/Core/VideoCommon/PostProcessing.h @@ -124,7 +124,8 @@ protected: size_t CalculateUniformsSize(bool user_post_process) const; void FillUniformBuffer(const MathUtil::Rectangle& src, const AbstractTexture* src_tex, int src_layer, const MathUtil::Rectangle& dst, - const MathUtil::Rectangle& wnd, u8* buffer, bool user_post_process); + const MathUtil::Rectangle& wnd, u8* buffer, bool user_post_process, + bool intermediary_buffer); // Timer for determining our time value Common::Timer m_timer;