From 2edf81cdb054c1db7a32e45c87514f8d4c11f5d9 Mon Sep 17 00:00:00 2001
From: Filoppi <filippotarpini@hotmail.it>
Date: Tue, 27 Jun 2023 03:23:08 +0300
Subject: [PATCH] Video: implement output resampling (upscaling/downscaling)
 methods

---
 .../Sys/Shaders/default_pre_post_process.glsl | 289 +++++++++++++++++-
 Data/Sys/Shaders/sharp_bilinear.glsl          |  47 ---
 Source/Core/Core/Config/GraphicsSettings.cpp  |   2 +
 Source/Core/Core/Config/GraphicsSettings.h    |   2 +
 .../Config/Graphics/EnhancementsWidget.cpp    |  47 +++
 .../Config/Graphics/EnhancementsWidget.h      |   1 +
 Source/Core/VideoCommon/PostProcessing.cpp    |  41 ++-
 Source/Core/VideoCommon/VideoConfig.cpp       |   1 +
 Source/Core/VideoCommon/VideoConfig.h         |  12 +
 9 files changed, 371 insertions(+), 71 deletions(-)
 delete mode 100644 Data/Sys/Shaders/sharp_bilinear.glsl

diff --git a/Data/Sys/Shaders/default_pre_post_process.glsl b/Data/Sys/Shaders/default_pre_post_process.glsl
index 58ed50d53c..1a1c3ecbe1 100644
--- a/Data/Sys/Shaders/default_pre_post_process.glsl
+++ b/Data/Sys/Shaders/default_pre_post_process.glsl
@@ -1,4 +1,4 @@
-// References:
+// Color Space references:
 // https://www.unravel.com.au/understanding-color-spaces
 
 // SMPTE 170M - BT.601 (NTSC-M) -> BT.709
@@ -21,8 +21,8 @@ mat3 from_PAL = transpose(mat3(
 
 float3 LinearTosRGBGamma(float3 color)
 {
-	float a = 0.055;
-	
+	const float a = 0.055;
+
 	for (int i = 0; i < 3; ++i)
 	{
 		float x = color[i];
@@ -36,17 +36,284 @@ float3 LinearTosRGBGamma(float3 color)
 	return color;
 }
 
+// Non filtered gamma corrected sample (nearest neighbor)
+float4 QuickSample(float3 uvw, float gamma)
+{
+	float4 color = texture(samp1, uvw);
+	color.rgb = pow(color.rgb, float3(gamma));
+	return color;
+}
+
+float4 QuickSample(float2 uv, float w, float gamma)
+{
+	return QuickSample(float3(uv, w), gamma);
+}
+
+float4 BilinearSample(float3 uvw, float gamma)
+{
+	// This emulates the (bi)linear filtering done directly from GPUs HW.
+	// Note that GPUs might natively filter red green and blue differently, but we don't do it.
+	// They might also use different filtering between upscaling and downscaling.
+	
+	float2 source_size = GetResolution();
+	float2 inverted_source_size = GetInvResolution();
+	float2 pixel = (uvw.xy * source_size) - 0.5; // Try to find the matching pixel top left corner
+
+	// Find the integer and floating point parts
+	float2 int_pixel = floor(pixel);
+	float2 frac_pixel = fract(pixel);
+
+	// Take 4 samples around the original uvw
+	float4 c11 = QuickSample((int_pixel + float2(0.5, 0.5)) * inverted_source_size, uvw.z, gamma);
+	float4 c21 = QuickSample((int_pixel + float2(1.5, 0.5)) * inverted_source_size, uvw.z, gamma);
+	float4 c12 = QuickSample((int_pixel + float2(0.5, 1.5)) * inverted_source_size, uvw.z, gamma);
+	float4 c22 = QuickSample((int_pixel + float2(1.5, 1.5)) * inverted_source_size, uvw.z, gamma);
+
+	// Blend the 4 samples by their weight
+	return lerp(lerp(c11, c21, frac_pixel.x), lerp(c12, c22, frac_pixel.x), frac_pixel.y);
+}
+
+// Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang
+// by Themaister, Public Domain license
+// Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale,
+// giving a sharper image than plain bilinear.
+float4 SharpBilinearSample(float3 uvw, float gamma)
+{
+	float2 source_size = GetResolution();
+	float2 inverted_source_size = GetInvResolution();
+	float2 target_size = GetWindowResolution();
+	float2 texel = uvw.xy * source_size;
+	float2 texel_floored = floor(texel);
+	float2 s = fract(texel);
+	float scale = ceil(max(target_size.x * inverted_source_size.x, target_size.y * inverted_source_size.y));
+	float region_range = 0.5 - (0.5 / scale);
+
+	// Figure out where in the texel to sample to get correct pre-scaled bilinear.
+
+	float2 center_dist = s - 0.5;
+	float2 f = ((center_dist - clamp(center_dist, -region_range, region_range)) * scale) + 0.5;
+
+	float2 mod_texel = texel_floored + f;
+
+	uvw.xy = mod_texel * inverted_source_size;
+	return BilinearSample(uvw, gamma);
+}
+
+float4 Cubic(float v)
+{
+	float4 n = float4(1.0, 2.0, 3.0, 4.0) - v;
+	float4 s = n * n * n;
+	float x = s.x;
+	float y = s.y - 4.0 * s.x;
+	float z = s.z - 4.0 * s.y + 6.0 * s.x;
+	float w = 6.0 - x - y - z;
+	return float4(x, y, z, w) * (1.0 / 6.0);
+}
+
+// https://stackoverflow.com/questions/13501081/efficient-bicubic-filtering-code-in-glsl
+float4 BicubicSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma)
+{
+	float2 pixel = (uvw.xy * in_source_resolution) - 0.5;
+	float2 int_pixel = floor(pixel);
+	float2 frac_pixel = fract(pixel);
+
+	float4 xcubic = Cubic(frac_pixel.x);
+	float4 ycubic = Cubic(frac_pixel.y);
+	
+	float4 c = float4(int_pixel.x - 0.5, int_pixel.x + 1.5, int_pixel.y - 0.5, int_pixel.y + 1.5);
+	float4 s = float4(xcubic.x + xcubic.y, xcubic.z + xcubic.w, ycubic.x + ycubic.y, ycubic.z + ycubic.w);
+	float4 offset = c + float4(xcubic.y, xcubic.w, ycubic.y, ycubic.w) / s;
+
+	offset *= float4(in_inverted_source_resolution.x, in_inverted_source_resolution.x, in_inverted_source_resolution.y, in_inverted_source_resolution.y);
+
+	float4 sample0 = QuickSample(offset.xz, uvw.z, gamma);
+	float4 sample1 = QuickSample(offset.yz, uvw.z, gamma);
+	float4 sample2 = QuickSample(offset.xw, uvw.z, gamma);
+	float4 sample3 = QuickSample(offset.yw, uvw.z, gamma);
+
+	float sx = s.x / (s.x + s.y);
+	float sy = s.z / (s.z + s.w);
+
+	return lerp(lerp(sample3, sample2, sx), lerp(sample1, sample0, sx), sy);
+}
+
+float4 CubicHermite(float4 A, float4 B, float4 C, float4 D, float t)
+{
+	float t2 = t * t;
+	float t3 = t * t * t;
+	float4 a = (-A / 2.0) + ((3.0 * B) / 2.0) - ((3.0 * C) / 2.0) + (D / 2.0);
+	float4 b = A - ((5.0 * B) / 2.0 ) + (2.0 * C) - (D / 2.0);
+	float4 c = (-A / 2.0) + (C / 2.0);
+	float4 d = B;
+
+	return (a * t3) + (b * t2) + (c * t) + d;
+}
+
+float4 BicubicHermiteSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma)
+{
+	float2 pixel = (uvw.xy * in_source_resolution) + 0.5;
+	float2 frac_pixel = fract(pixel);
+	float2 uv = (floor(pixel) * in_inverted_source_resolution) - (in_inverted_source_resolution / 2.0);
+	
+	float2 inverted_source_resolution_double = in_inverted_source_resolution * 2.0;
+
+	float4 c00 = QuickSample(uv + float2(-in_inverted_source_resolution.x,			-in_inverted_source_resolution.y), uvw.z, gamma);
+	float4 c10 = QuickSample(uv + float2(	0.0,																	-in_inverted_source_resolution.y), uvw.z, gamma);
+	float4 c20 = QuickSample(uv + float2(	in_inverted_source_resolution.x,			-in_inverted_source_resolution.y), uvw.z, gamma);
+	float4 c30 = QuickSample(uv + float2(	inverted_source_resolution_double.x,	-in_inverted_source_resolution.y), uvw.z, gamma);
+	
+	float4 c01 = QuickSample(uv + float2(-in_inverted_source_resolution.x,			0.0), uvw.z, gamma);
+	float4 c11 = QuickSample(uv + float2(	0.0,																	0.0), uvw.z, gamma);
+	float4 c21 = QuickSample(uv + float2(	in_inverted_source_resolution.x,			0.0), uvw.z, gamma);
+	float4 c31 = QuickSample(uv + float2(	inverted_source_resolution_double.x,	0.0), uvw.z, gamma);
+	
+	float4 c02 = QuickSample(uv + float2(-in_inverted_source_resolution.x,			in_inverted_source_resolution.y), uvw.z, gamma);
+	float4 c12 = QuickSample(uv + float2(	0.0,																	in_inverted_source_resolution.y), uvw.z, gamma);
+	float4 c22 = QuickSample(uv + float2(	in_inverted_source_resolution.x,			in_inverted_source_resolution.y), uvw.z, gamma);
+	float4 c32 = QuickSample(uv + float2(	inverted_source_resolution_double.x,	in_inverted_source_resolution.y), uvw.z, gamma);
+	
+	float4 c03 = QuickSample(uv + float2(-in_inverted_source_resolution.x,			inverted_source_resolution_double.y), uvw.z, gamma);
+	float4 c13 = QuickSample(uv + float2(	0.0,																	inverted_source_resolution_double.y), uvw.z, gamma);
+	float4 c23 = QuickSample(uv + float2(	in_inverted_source_resolution.x,			inverted_source_resolution_double.y), uvw.z, gamma);
+	float4 c33 = QuickSample(uv + float2(	inverted_source_resolution_double.x,	inverted_source_resolution_double.y), uvw.z, gamma);
+
+	float4 cp0x = CubicHermite(c00, c10, c20, c30, frac_pixel.x);
+	float4 cp1x = CubicHermite(c01, c11, c21, c31, frac_pixel.x);
+	float4 cp2x = CubicHermite(c02, c12, c22, c32, frac_pixel.x);
+	float4 cp3x = CubicHermite(c03, c13, c23, c33, frac_pixel.x);
+
+	return CubicHermite(cp0x, cp1x, cp2x, cp3x, frac_pixel.y);
+}
+
+float CatmullRom(float B, float C, float x)
+{
+	float f = x;
+
+	if (f < 0.0)
+		f = -f;
+
+	if (f < 1.0)
+	{
+		return ((12 - 9 * B - 6 * C) * (f * f * f) +
+			(-18 + 12 * B + 6 * C) * (f * f) +
+			(6 - 2 * B)) / 6.0;
+	}
+	else if (f >= 1.0 && f < 2.0)
+	{
+		return ((-B - 6 * C) * (f * f * f)
+			+ (6 * B + 30 * C) * (f * f) +
+			( - (12 * B) - 48 * C) * f +
+			8 * B + 24 * C) / 6.0;
+	}
+	else
+	{
+		return 0.0;
+	}
+}
+
+// https://www.codeproject.com/Articles/236394/Bi-Cubic-and-Bi-Linear-Interpolation-with-GLSL
+// https://github.com/ValveSoftware/gamescope/pull/740
+float4 BicubicCatmullRomSample(float3 uvw, float2 in_source_resolution, float2 in_inverted_source_resolution, float gamma)
+{
+	const float offset = 0.5;
+	float2 pixel = (uvw.xy * in_source_resolution) - offset;
+	float2 int_pixel = floor(pixel);
+	float2 frac_pixel = fract(pixel);
+	float2 int_uvw = (int_pixel + offset) * in_inverted_source_resolution;
+
+	// B and C can be any value between 0 and 1,
+	// though they are meant to be 0 and 0.5 for Catmull-Rom.
+	// https://en.wikipedia.org/wiki/Mitchell%E2%80%93Netravali_filters
+	// https://guideencodemoe-mkdocs.readthedocs.io/encoding/resampling/
+	const float B = 0.0;
+	const float C = 0.5;
+
+	// Take 16 (4x4) samples, each with a different weight.
+	// This loop can be replaced with any other bicubic formula (e.g. Hermite).
+	float4 color_sum = float4(0.0, 0.0, 0.0, 0.0);
+	float4 color_denominator = float4(0.0, 0.0, 0.0, 0.0);
+	for (int m = -1; m <= 2; m++)
+	{
+		for (int n = -1; n <= 2; n++)
+		{
+			float4 color = QuickSample(int_uvw + (float2(m, n) * in_inverted_source_resolution), uvw.z, gamma);
+			float f1 = CatmullRom(B, C, float(m) - frac_pixel.x);
+			float f2 = CatmullRom(B, C, -float(n) + frac_pixel.y);
+			float4 cooef1 = float4(f1, f1, f1, f1);
+			float4 cooef2 = float4(f2, f2, f2, f2);
+			color_sum += color * (cooef2 * cooef1);
+			color_denominator += cooef2 * cooef1;
+		}
+	}
+	return color_sum / color_denominator;
+}
+
+// Returns an accurate (gamma corrected) sample of a gamma space space texture.
+// Outputs in linear space for simplicity.
+float4 LinearGammaCorrectedSample(float gamma)
+{
+	float3 uvw = v_tex0;
+	float4 color = float4(0, 0, 0, 1);
+	
+	if (resampling_method <= 1) // Bilinear
+	{
+		color = BilinearSample(uvw, gamma);
+	}
+	else if (resampling_method == 2) // "Simple" Bicubic
+	{
+		color = BicubicSample(uvw, GetResolution(), GetInvResolution(), gamma);
+	}
+	else if (resampling_method == 3) // Hermite
+	{
+		color = BicubicHermiteSample(uvw, GetResolution(), GetInvResolution(), gamma);
+	}
+	else if (resampling_method == 4) // Catmull-Rom
+	{
+		color = BicubicCatmullRomSample(uvw, GetResolution(), GetInvResolution(), gamma);
+	}
+	else if (resampling_method == 5) // Nearest Neighbor
+	{
+		color = QuickSample(uvw, gamma);
+	}
+	else if (resampling_method == 6) // Sharp Bilinear
+	{
+		color = SharpBilinearSample(uvw, gamma);
+	}
+
+	return color;
+}
+
 void main()
 {
-	// Note: sampling in gamma space is "wrong" if the source
-	// and target resolution don't match exactly.
-	// Fortunately at the moment here they always should but to do this correctly,
-	// we'd need to sample from 4 pixels, de-apply the gamma from each of these,
-	// and then do linear sampling on their corrected value.
-	float4 color = Sample();
+	// This tries to fall back on GPU HW sampling if it can (it won't be gamma corrected).
+	bool raw_resampling = resampling_method <= 0;
+	bool needs_rescaling = GetResolution() != GetWindowResolution();
 
-	// Convert to linear space to do any other kind of operation
-	color.rgb = pow(color.rgb, float3(game_gamma));
+	bool needs_resampling = needs_rescaling && (OptionEnabled(hdr_output) || OptionEnabled(correct_gamma) || !raw_resampling);
+
+	float4 color;
+
+	if (needs_resampling)
+	{
+		// Doing linear sampling in "gamma space" on linear texture formats isn't correct.
+		// If the source and target resolutions don't match, the GPU will return a color
+		// that is the average of 4 gamma space colors, but gamma space colors can't be blended together,
+		// gamma neeeds to be de-applied first. This makes a big difference if colors change
+		// drastically between two pixels.
+
+		color = LinearGammaCorrectedSample(game_gamma);
+	}
+	else
+	{
+		// Default GPU HW sampling. Bilinear is identical to Nearest Neighbor if the input and output resolutions match.
+		if (needs_rescaling)
+			color = texture(samp0, v_tex0);
+		else
+			color = texture(samp1, v_tex0);
+		
+		// Convert to linear before doing any other of follow up operations.
+		color.rgb = pow(color.rgb, float3(game_gamma));
+	}
 
 	if (OptionEnabled(correct_color_space))
 	{
diff --git a/Data/Sys/Shaders/sharp_bilinear.glsl b/Data/Sys/Shaders/sharp_bilinear.glsl
deleted file mode 100644
index e94748b959..0000000000
--- a/Data/Sys/Shaders/sharp_bilinear.glsl
+++ /dev/null
@@ -1,47 +0,0 @@
-// Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang
-// by Themaister, Public Domain license
-// Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale,
-// giving a sharper image than plain bilinear.
-
-/*
-[configuration]
-[OptionRangeFloat]
-GUIName = Prescale Factor (set to 0 for automatic)
-OptionName = PRESCALE_FACTOR
-MinValue = 0.0
-MaxValue = 16.0
-StepAmount = 1.0
-DefaultValue = 0.0
-[/configuration]
-*/
-
-float CalculatePrescale(float config_scale) {
-  if (config_scale == 0.0) {
-    float2 source_size = GetResolution();
-    float2 window_size = GetWindowResolution();
-    return ceil(max(window_size.x / source_size.x, window_size.y / source_size.y));
-  } else {
-    return config_scale;
-  }
-}
-
-void main()
-{
-  float2 source_size = GetResolution();
-  float2 texel = GetCoordinates() * source_size;
-  float2 texel_floored = floor(texel);
-  float2 s = fract(texel);
-  float config_scale = GetOption(PRESCALE_FACTOR);
-  float scale = CalculatePrescale(config_scale);
-  float region_range = 0.5 - 0.5 / scale;
-
-  // Figure out where in the texel to sample to get correct pre-scaled bilinear.
-  // Uses the hardware bilinear interpolator to avoid having to sample 4 times manually.
-
-  float2 center_dist = s - 0.5;
-  float2 f = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5;
-
-  float2 mod_texel = texel_floored + f;
-
-  SetOutput(SampleLocation(mod_texel / source_size));
-}
diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp
index 9a0dfbcd26..ad0059ca6d 100644
--- a/Source/Core/Core/Config/GraphicsSettings.cpp
+++ b/Source/Core/Core/Config/GraphicsSettings.cpp
@@ -118,6 +118,8 @@ const Info<std::string> GFX_DRIVER_LIB_NAME{{System::GFX, "Settings", "DriverLib
 const Info<TextureFilteringMode> GFX_ENHANCE_FORCE_TEXTURE_FILTERING{
     {System::GFX, "Enhancements", "ForceTextureFiltering"}, TextureFilteringMode::Default};
 const Info<int> GFX_ENHANCE_MAX_ANISOTROPY{{System::GFX, "Enhancements", "MaxAnisotropy"}, 0};
+const Info<OutputResamplingMode> GFX_ENHANCE_OUTPUT_RESAMPLING{
+    {System::GFX, "Enhancements", "OutputResampling"}, OutputResamplingMode::Default};
 const Info<std::string> GFX_ENHANCE_POST_SHADER{
     {System::GFX, "Enhancements", "PostProcessingShader"}, ""};
 const Info<bool> GFX_ENHANCE_FORCE_TRUE_COLOR{{System::GFX, "Enhancements", "ForceTrueColor"},
diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h
index 009b219136..165114e419 100644
--- a/Source/Core/Core/Config/GraphicsSettings.h
+++ b/Source/Core/Core/Config/GraphicsSettings.h
@@ -11,6 +11,7 @@ enum class AspectMode : int;
 enum class ShaderCompilationMode : int;
 enum class StereoMode : int;
 enum class TextureFilteringMode : int;
+enum class OutputResamplingMode : int;
 enum class ColorCorrectionRegion : int;
 enum class TriState : int;
 
@@ -101,6 +102,7 @@ extern const Info<bool> GFX_MODS_ENABLE;
 
 extern const Info<TextureFilteringMode> GFX_ENHANCE_FORCE_TEXTURE_FILTERING;
 extern const Info<int> GFX_ENHANCE_MAX_ANISOTROPY;  // NOTE - this is x in (1 << x)
+extern const Info<OutputResamplingMode> GFX_ENHANCE_OUTPUT_RESAMPLING;
 extern const Info<std::string> GFX_ENHANCE_POST_SHADER;
 extern const Info<bool> GFX_ENHANCE_FORCE_TRUE_COLOR;
 extern const Info<bool> GFX_ENHANCE_DISABLE_COPY_FILTER;
diff --git a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp
index 47e22ade53..67fd7da7b9 100644
--- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp
+++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp
@@ -105,6 +105,22 @@ void EnhancementsWidget::CreateWidgets()
   m_texture_filtering_combo->addItem(tr("Force Linear and 16x Anisotropic"),
                                      TEXTURE_FILTERING_FORCE_LINEAR_ANISO_16X);
 
+  m_output_resampling_combo = new ToolTipComboBox();
+  m_output_resampling_combo->addItem(tr("Default"),
+                                     static_cast<int>(OutputResamplingMode::Default));
+  m_output_resampling_combo->addItem(tr("Bilinear"),
+                                     static_cast<int>(OutputResamplingMode::Bilinear));
+  m_output_resampling_combo->addItem(tr("Bicubic"),
+                                     static_cast<int>(OutputResamplingMode::Bicubic));
+  m_output_resampling_combo->addItem(tr("Hermite"),
+                                     static_cast<int>(OutputResamplingMode::Hermite));
+  m_output_resampling_combo->addItem(tr("Catmull-Rom"),
+                                     static_cast<int>(OutputResamplingMode::CatmullRom));
+  m_output_resampling_combo->addItem(tr("Nearest Neighbor"),
+                                     static_cast<int>(OutputResamplingMode::NearestNeighbor));
+  m_output_resampling_combo->addItem(tr("Sharp Bilinear"),
+                                     static_cast<int>(OutputResamplingMode::SharpBilinear));
+
   m_configure_color_correction = new ToolTipPushButton(tr("Configure"));
 
   m_pp_effect = new ToolTipComboBox();
@@ -136,6 +152,10 @@ void EnhancementsWidget::CreateWidgets()
   enhancements_layout->addWidget(m_texture_filtering_combo, row, 1, 1, -1);
   ++row;
 
+  enhancements_layout->addWidget(new QLabel(tr("Output Resampling:")), row, 0);
+  enhancements_layout->addWidget(m_output_resampling_combo, row, 1, 1, -1);
+  ++row;
+
   enhancements_layout->addWidget(new QLabel(tr("Color Correction:")), row, 0);
   enhancements_layout->addWidget(m_configure_color_correction, row, 1, 1, -1);
   ++row;
@@ -195,6 +215,8 @@ void EnhancementsWidget::ConnectWidgets()
           [this](int) { SaveSettings(); });
   connect(m_texture_filtering_combo, qOverload<int>(&QComboBox::currentIndexChanged),
           [this](int) { SaveSettings(); });
+  connect(m_output_resampling_combo, qOverload<int>(&QComboBox::currentIndexChanged),
+          [this](int) { SaveSettings(); });
   connect(m_pp_effect, qOverload<int>(&QComboBox::currentIndexChanged),
           [this](int) { SaveSettings(); });
   connect(m_3d_mode, qOverload<int>(&QComboBox::currentIndexChanged), [this] {
@@ -325,6 +347,14 @@ void EnhancementsWidget::LoadSettings()
     break;
   }
 
+  // Resampling
+  const OutputResamplingMode output_resampling_mode =
+      Config::Get(Config::GFX_ENHANCE_OUTPUT_RESAMPLING);
+  m_output_resampling_combo->setCurrentIndex(static_cast<int>(output_resampling_mode));
+
+  m_output_resampling_combo->setEnabled(g_Config.backend_info.bSupportsPostProcessing);
+
+  // Color Correction
   m_configure_color_correction->setEnabled(g_Config.backend_info.bSupportsPostProcessing);
 
   // Post Processing Shader
@@ -413,6 +443,10 @@ void EnhancementsWidget::SaveSettings()
     break;
   }
 
+  const int output_resampling_selection = m_output_resampling_combo->currentData().toInt();
+  Config::SetBaseOrCurrent(Config::GFX_ENHANCE_OUTPUT_RESAMPLING,
+                           static_cast<OutputResamplingMode>(output_resampling_selection));
+
   const bool anaglyph = g_Config.stereo_mode == StereoMode::Anaglyph;
   const bool passive = g_Config.stereo_mode == StereoMode::Passive;
   Config::SetBaseOrCurrent(Config::GFX_ENHANCE_POST_SHADER,
@@ -455,6 +489,16 @@ void EnhancementsWidget::AddDescriptions()
       "scaling filter selected by the game.<br><br>Any option except 'Default' will alter the look "
       "of the game's textures and might cause issues in a small number of "
       "games.<br><br><dolphin_emphasis>If unsure, select 'Default'.</dolphin_emphasis>");
+  static const char TR_OUTPUT_RESAMPLING_DESCRIPTION[] = QT_TR_NOOP(
+      "Affects how the game output image is upscaled or downscaled to the window resolution.<br>"
+      "\"Default\" will rely on the GPU internal bilinear sampler which isn't gamma corrected."
+      "<br>\"Bilinear\" (gamma corrected) is a good compromise between quality and performance."
+      "<br>\"Bicubic\" is smoother than \"Bilinear\"."
+      "<br>\"Hermite\" might offer the best quality when upscaling,"
+      " at a slightly bigger perform cost.<br>\"Catmull-Rom\" is best for downscaling."
+      "<br>\"Nearest Neighbor\" doesn't do any resampling, select if you like a pixelated look."
+      "<br>\"Sharp Bilinear\" works best with 2D games at low resolutions, use if you like a"
+      " sharp look.<br><br><dolphin_emphasis>If unsure, select 'Default'.</dolphin_emphasis>");
   static const char TR_COLOR_CORRECTION_DESCRIPTION[] =
       QT_TR_NOOP("A group of features to make the colors more accurate, matching the color space "
                  "Wii and GC games were meant for.");
@@ -537,6 +581,9 @@ void EnhancementsWidget::AddDescriptions()
   m_texture_filtering_combo->SetTitle(tr("Texture Filtering"));
   m_texture_filtering_combo->SetDescription(tr(TR_FORCE_TEXTURE_FILTERING_DESCRIPTION));
 
+  m_output_resampling_combo->SetTitle(tr("Output Resampling"));
+  m_output_resampling_combo->SetDescription(tr(TR_OUTPUT_RESAMPLING_DESCRIPTION));
+
   m_configure_color_correction->SetTitle(tr("Color Correction"));
   m_configure_color_correction->SetDescription(tr(TR_COLOR_CORRECTION_DESCRIPTION));
 
diff --git a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h
index 4d2cdcd5b5..34879d17b7 100644
--- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h
+++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.h
@@ -39,6 +39,7 @@ private:
   ConfigChoice* m_ir_combo;
   ToolTipComboBox* m_aa_combo;
   ToolTipComboBox* m_texture_filtering_combo;
+  ToolTipComboBox* m_output_resampling_combo;
   ToolTipComboBox* m_pp_effect;
   ToolTipPushButton* m_configure_color_correction;
   QPushButton* m_configure_pp_effect;
diff --git a/Source/Core/VideoCommon/PostProcessing.cpp b/Source/Core/VideoCommon/PostProcessing.cpp
index c3a245629f..1de4c311c2 100644
--- a/Source/Core/VideoCommon/PostProcessing.cpp
+++ b/Source/Core/VideoCommon/PostProcessing.cpp
@@ -486,23 +486,29 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
 
   MathUtil::Rectangle<int> src_rect = src;
   g_gfx->SetSamplerState(0, RenderState::GetLinearSamplerState());
+  g_gfx->SetSamplerState(1, RenderState::GetPointSamplerState());
   g_gfx->SetTexture(0, src_tex);
+  g_gfx->SetTexture(1, src_tex);
 
-  const bool is_color_correction_active = IsColorCorrectionActive();
+  const bool needs_color_correction = IsColorCorrectionActive();
+  // Rely on the default (bi)linear sampler with the default mode
+  // (it might not be gamma corrected).
+  const bool needs_resampling =
+      g_ActiveConfig.output_resampling_mode > OutputResamplingMode::Default;
   const bool needs_intermediary_buffer = NeedsIntermediaryBuffer();
+  const bool needs_default_pipeline = needs_color_correction || needs_resampling;
   const AbstractPipeline* final_pipeline = m_pipeline.get();
   std::vector<u8>* uniform_staging_buffer = &m_default_uniform_staging_buffer;
   bool default_uniform_staging_buffer = true;
+  const MathUtil::Rectangle<int> present_rect = g_presenter->GetTargetRectangle();
 
   // Intermediary pass.
-  // We draw to a high quality intermediary texture for two reasons:
+  // We draw to a high quality intermediary texture for a couple reasons:
+  // -Consistently do high quality gamma corrected resampling (upscaling/downscaling)
   // -Keep quality for gamma and gamut conversions, and HDR output
   //  (low bit depths lose too much quality with gamma conversions)
-  // -We make a texture of the exact same res as the source one,
-  //  because all the post process shaders we already had assume that
-  //  the source texture size (EFB) is different from the swap chain
-  //  texture size (which matches the window size).
-  if (m_default_pipeline && is_color_correction_active && needs_intermediary_buffer)
+  // -Keep the post process phase in linear space, to better operate with colors
+  if (m_default_pipeline && needs_default_pipeline && needs_intermediary_buffer)
   {
     AbstractFramebuffer* const previous_framebuffer = g_gfx->GetCurrentFramebuffer();
 
@@ -512,13 +518,18 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
     // so it would be a waste to allocate two layers (see "bUsesExplictQuadBuffering").
     const u32 target_layers = copy_all_layers ? src_tex->GetLayers() : 1;
 
+    const u32 target_width =
+        needs_resampling ? present_rect.GetWidth() : static_cast<u32>(src_rect.GetWidth());
+    const u32 target_height =
+        needs_resampling ? present_rect.GetHeight() : static_cast<u32>(src_rect.GetHeight());
+
     if (!m_intermediary_frame_buffer || !m_intermediary_color_texture ||
-        m_intermediary_color_texture.get()->GetWidth() != static_cast<u32>(src_rect.GetWidth()) ||
-        m_intermediary_color_texture.get()->GetHeight() != static_cast<u32>(src_rect.GetHeight()) ||
+        m_intermediary_color_texture.get()->GetWidth() != target_width ||
+        m_intermediary_color_texture.get()->GetHeight() != target_height ||
         m_intermediary_color_texture.get()->GetLayers() != target_layers)
     {
       const TextureConfig intermediary_color_texture_config(
-          src_rect.GetWidth(), src_rect.GetHeight(), 1, target_layers, src_tex->GetSamples(),
+          target_width, target_height, 1, target_layers, src_tex->GetSamples(),
           s_intermediary_buffer_format, AbstractTextureFlag_RenderTarget);
       m_intermediary_color_texture = g_gfx->CreateTexture(intermediary_color_texture_config,
                                                           "Intermediary post process texture");
@@ -530,7 +541,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
     g_gfx->SetFramebuffer(m_intermediary_frame_buffer.get());
 
     FillUniformBuffer(src_rect, src_tex, src_layer, g_gfx->GetCurrentFramebuffer()->GetRect(),
-                      g_presenter->GetTargetRectangle(), uniform_staging_buffer->data(),
+                      present_rect, uniform_staging_buffer->data(),
                       !default_uniform_staging_buffer);
     g_vertex_manager->UploadUtilityUniforms(uniform_staging_buffer->data(),
                                             static_cast<u32>(uniform_staging_buffer->size()));
@@ -544,6 +555,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
     src_rect = m_intermediary_color_texture->GetRect();
     src_tex = m_intermediary_color_texture.get();
     g_gfx->SetTexture(0, src_tex);
+    g_gfx->SetTexture(1, src_tex);
     // The "m_intermediary_color_texture" has already copied
     // from the specified source layer onto its first one.
     // If we query for a layer that the source texture doesn't have,
@@ -557,7 +569,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
     // If we have no custom user shader selected, and color correction
     // is active, directly run the fixed pipeline shader instead of
     // doing two passes, with the second one doing nothing useful.
-    if (m_default_pipeline && is_color_correction_active)
+    if (m_default_pipeline && needs_default_pipeline)
     {
       final_pipeline = m_default_pipeline.get();
     }
@@ -580,7 +592,7 @@ void PostProcessing::BlitFromTexture(const MathUtil::Rectangle<int>& dst,
   if (final_pipeline)
   {
     FillUniformBuffer(src_rect, src_tex, src_layer, g_gfx->GetCurrentFramebuffer()->GetRect(),
-                      g_presenter->GetTargetRectangle(), uniform_staging_buffer->data(),
+                      present_rect, uniform_staging_buffer->data(),
                       !default_uniform_staging_buffer);
     g_vertex_manager->UploadUtilityUniforms(uniform_staging_buffer->data(),
                                             static_cast<u32>(uniform_staging_buffer->size()));
@@ -610,6 +622,7 @@ std::string PostProcessing::GetUniformBufferHeader(bool user_post_process) const
   ss << "  int src_layer;\n";
   ss << "  uint time;\n";
 
+  ss << "  int resampling_method;\n";
   ss << "  int correct_color_space;\n";
   ss << "  int game_color_space;\n";
   ss << "  int correct_gamma;\n";
@@ -816,6 +829,7 @@ struct BuiltinUniforms
   std::array<float, 4> src_rect;
   s32 src_layer;
   u32 time;
+  s32 resampling_method;
   s32 correct_color_space;
   s32 game_color_space;
   s32 correct_gamma;
@@ -861,6 +875,7 @@ void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle<int>& src,
   builtin_uniforms.src_layer = static_cast<s32>(src_layer);
   builtin_uniforms.time = static_cast<u32>(m_timer.ElapsedMs());
 
+  builtin_uniforms.resampling_method = static_cast<s32>(g_ActiveConfig.output_resampling_mode);
   // Color correction related uniforms.
   // These are mainly used by the "m_default_pixel_shader",
   // but should also be accessible to all other shaders.
diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp
index 42bf178047..83dc5eb53e 100644
--- a/Source/Core/VideoCommon/VideoConfig.cpp
+++ b/Source/Core/VideoCommon/VideoConfig.cpp
@@ -133,6 +133,7 @@ void VideoConfig::Refresh()
 
   texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING);
   iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
+  output_resampling_mode = Config::Get(Config::GFX_ENHANCE_OUTPUT_RESAMPLING);
   sPostProcessingShader = Config::Get(Config::GFX_ENHANCE_POST_SHADER);
   bForceTrueColor = Config::Get(Config::GFX_ENHANCE_FORCE_TRUE_COLOR);
   bDisableCopyFilter = Config::Get(Config::GFX_ENHANCE_DISABLE_COPY_FILTER);
diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h
index 20f4aa8a28..affbf78dec 100644
--- a/Source/Core/VideoCommon/VideoConfig.h
+++ b/Source/Core/VideoCommon/VideoConfig.h
@@ -52,6 +52,17 @@ enum class TextureFilteringMode : int
   Linear,
 };
 
+enum class OutputResamplingMode : int
+{
+  Default,
+  Bilinear,
+  Bicubic,
+  Hermite,
+  CatmullRom,
+  NearestNeighbor,
+  SharpBilinear,
+};
+
 enum class ColorCorrectionRegion : int
 {
   SMPTE_NTSCM,
@@ -103,6 +114,7 @@ struct VideoConfig final
   bool bSSAA = false;
   int iEFBScale = 0;
   TextureFilteringMode texture_filtering_mode = TextureFilteringMode::Default;
+  OutputResamplingMode output_resampling_mode = OutputResamplingMode::Default;
   int iMaxAnisotropy = 0;
   std::string sPostProcessingShader;
   bool bForceTrueColor = false;