From 555a93057c07f045cc71c4652c54eebb990021b7 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Mon, 26 Jul 2021 11:20:04 -0700 Subject: [PATCH 01/15] VideoCommon: Allow BitfieldExtract in specialized shaders --- Source/Core/VideoCommon/PixelShaderGen.cpp | 1 + Source/Core/VideoCommon/ShaderGenCommon.cpp | 18 ++++++++++++++++++ Source/Core/VideoCommon/ShaderGenCommon.h | 12 ++++++++++++ Source/Core/VideoCommon/UberShaderCommon.cpp | 18 ------------------ Source/Core/VideoCommon/UberShaderCommon.h | 19 ------------------- Source/Core/VideoCommon/UberShaderPixel.cpp | 2 +- Source/Core/VideoCommon/UberShaderVertex.cpp | 2 +- 7 files changed, 33 insertions(+), 39 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index a380937ae2..97adb46a97 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -565,6 +565,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos uid_data->genMode_numtexgens, uid_data->genMode_numindstages); // Stuff that is shared between ubershaders and pixelgen. + WriteBitfieldExtractHeader(out, api_type, host_config); WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box); if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ) diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index c5cd3ef30d..cbc17f0772 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -105,6 +105,24 @@ void WriteIsNanHeader(ShaderCode& out, APIType api_type) } } +void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type, + const ShaderHostConfig& host_config) +{ + // ============================================== + // BitfieldExtract for APIs which don't have it + // ============================================== + if (!host_config.backend_bitfield) + { + out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n" + " // This built-in function is only supported in OpenGL 4.0+ and ES 3.1+\n" + " // Microsoft's HLSL compiler automatically optimises this to a bitfield extract " + "instruction.\n" + " uint mask = uint((1 << size) - 1);\n" + " return uint(val >> off) & mask;\n" + "}}\n\n"); + } +} + static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string_view qualifier, std::string_view type, std::string_view name, int var_index, std::string_view semantic = {}, int semantic_index = -1) diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 3c1e7f38f8..f7f27b3073 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -14,6 +14,7 @@ #include "Common/BitField.h" #include "Common/CommonTypes.h" #include "Common/StringUtil.h" +#include "Common/TypeUtils.h" enum class APIType; @@ -177,6 +178,8 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool bool include_host_config, bool include_api = true); void WriteIsNanHeader(ShaderCode& out, APIType api_type); +void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type, + const ShaderHostConfig& host_config); void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens, const ShaderHostConfig& host_config, std::string_view qualifier); @@ -195,6 +198,15 @@ void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_v const char* GetInterpolationQualifier(bool msaa, bool ssaa, bool in_glsl_interface_block = false, bool in = false); +// bitfieldExtract generator for BitField types +template +std::string BitfieldExtract(std::string_view source) +{ + using BitFieldT = Common::MemberType; + return fmt::format("bitfieldExtract({}, {}, {})", source, static_cast(BitFieldT::StartBit()), + static_cast(BitFieldT::NumBits())); +} + // Constant variable names #define I_COLORS "color" #define I_KCOLORS "k" diff --git a/Source/Core/VideoCommon/UberShaderCommon.cpp b/Source/Core/VideoCommon/UberShaderCommon.cpp index 8b013fc91b..283d11ffdd 100644 --- a/Source/Core/VideoCommon/UberShaderCommon.cpp +++ b/Source/Core/VideoCommon/UberShaderCommon.cpp @@ -9,24 +9,6 @@ namespace UberShader { -void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type, - const ShaderHostConfig& host_config) -{ - // ============================================== - // BitfieldExtract for APIs which don't have it - // ============================================== - if (!host_config.backend_bitfield) - { - out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n" - " // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n" - " // Microsoft's HLSL compiler automatically optimises this to a bitfield extract " - "instruction.\n" - " uint mask = uint((1 << size) - 1);\n" - " return uint(val >> off) & mask;\n" - "}}\n\n"); - } -} - void WriteLightingFunction(ShaderCode& out) { // ============================================== diff --git a/Source/Core/VideoCommon/UberShaderCommon.h b/Source/Core/VideoCommon/UberShaderCommon.h index d1a736ae47..4e3b0ff2a9 100644 --- a/Source/Core/VideoCommon/UberShaderCommon.h +++ b/Source/Core/VideoCommon/UberShaderCommon.h @@ -3,37 +3,18 @@ #pragma once -#include #include -#include - -#include "Common/CommonTypes.h" -#include "Common/TypeUtils.h" - class ShaderCode; enum class APIType; union ShaderHostConfig; namespace UberShader { -// Common functions across all ubershaders -void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type, - const ShaderHostConfig& host_config); - // Vertex lighting void WriteLightingFunction(ShaderCode& out); void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view world_pos_var, std::string_view normal_var, std::string_view in_color_0_var, std::string_view in_color_1_var, std::string_view out_color_0_var, std::string_view out_color_1_var); - -// bitfieldExtract generator for BitField types -template -std::string BitfieldExtract(std::string_view source) -{ - using BitFieldT = Common::MemberType; - return fmt::format("bitfieldExtract({}, {}, {})", source, static_cast(BitFieldT::StartBit()), - static_cast(BitFieldT::NumBits())); -} } // namespace UberShader diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 4541f15b10..c124a0b5db 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -63,8 +63,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write("// Pixel UberShader for {} texgens{}{}\n", numTexgen, early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : ""); + WriteBitfieldExtractHeader(out, api_type, host_config); WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box); - WriteUberShaderCommonHeader(out, api_type, host_config); if (per_pixel_lighting) WriteLightingFunction(out); diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 8d3b128688..68915351d1 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -49,8 +49,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config GenerateVSOutputMembers(out, api_type, num_texgen, host_config, ""); out.Write("}};\n\n"); - WriteUberShaderCommonHeader(out, api_type, host_config); WriteIsNanHeader(out, api_type); + WriteBitfieldExtractHeader(out, api_type, host_config); WriteLightingFunction(out); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) From d2041b4c2a8562073ab905e2aea3b05c678aa962 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 26 Aug 2021 13:58:45 -0700 Subject: [PATCH 02/15] VideoCommon: Add signed version of BitfieldExtract --- Source/Core/Common/BitField.h | 2 ++ Source/Core/VideoCommon/ShaderGenCommon.cpp | 6 ++++++ Source/Core/VideoCommon/ShaderGenCommon.h | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Source/Core/Common/BitField.h b/Source/Core/Common/BitField.h index 26b3c5e0a4..2f5eba092f 100644 --- a/Source/Core/Common/BitField.h +++ b/Source/Core/Common/BitField.h @@ -149,6 +149,7 @@ public: constexpr T Value() const { return Value(std::is_signed()); } constexpr operator T() const { return Value(); } + static constexpr bool IsSigned() { return std::is_signed(); } static constexpr std::size_t StartBit() { return position; } static constexpr std::size_t NumBits() { return bits; } @@ -244,6 +245,7 @@ public: BitFieldArray& operator=(const BitFieldArray&) = delete; public: + constexpr bool IsSigned() const { return std::is_signed(); } constexpr std::size_t StartBit() const { return position; } constexpr std::size_t NumBits() const { return bits; } constexpr std::size_t Size() const { return size; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index cbc17f0772..72a7e0d14f 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -120,6 +120,12 @@ void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type, " uint mask = uint((1 << size) - 1);\n" " return uint(val >> off) & mask;\n" "}}\n\n"); + out.Write("int bitfieldExtract(int val, int off, int size) {{\n" + " // This built-in function is only supported in OpenGL 4.0+ and ES 3.1+\n" + " // Microsoft's HLSL compiler automatically optimises this to a bitfield extract " + "instruction.\n" + " return ((val << (32 - size - off)) >> (32 - size));\n" + "}}\n\n"); } } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index f7f27b3073..ebdcda262b 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -203,7 +203,8 @@ template std::string BitfieldExtract(std::string_view source) { using BitFieldT = Common::MemberType; - return fmt::format("bitfieldExtract({}, {}, {})", source, static_cast(BitFieldT::StartBit()), + return fmt::format("bitfieldExtract({}({}), {}, {})", BitFieldT::IsSigned() ? "int" : "uint", + source, static_cast(BitFieldT::StartBit()), static_cast(BitFieldT::NumBits())); } From 3096f77ba09fe3510a785e073602a8a829a02d9e Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 8 Aug 2021 11:05:32 -0700 Subject: [PATCH 03/15] Eliminate SamplerCommon::AreBpTexMode0MipmapsEnabled This was added in 0b9a72a62d38481ff08f742b2318d07f29ff5dff but became irrelevant in 70f9fc4e7526fc9cfc008a43cd33229f62be99b6 as the check is now self-explanatory due to a rejiggering of the bitfields. --- Source/Core/VideoBackends/Software/TextureSampler.cpp | 3 +-- Source/Core/VideoCommon/RenderState.cpp | 7 ++++--- Source/Core/VideoCommon/SamplerCommon.h | 7 ------- Source/Core/VideoCommon/TextureCacheBase.cpp | 9 ++++----- Source/Core/VideoCommon/TextureInfo.cpp | 3 +-- 5 files changed, 10 insertions(+), 19 deletions(-) diff --git a/Source/Core/VideoBackends/Software/TextureSampler.cpp b/Source/Core/VideoBackends/Software/TextureSampler.cpp index 064e9df41c..0ba340c33a 100644 --- a/Source/Core/VideoBackends/Software/TextureSampler.cpp +++ b/Source/Core/VideoBackends/Software/TextureSampler.cpp @@ -11,7 +11,6 @@ #include "Core/HW/Memmap.h" #include "VideoCommon/BPMemory.h" -#include "VideoCommon/SamplerCommon.h" #include "VideoCommon/TextureDecoder.h" #define ALLOW_MIPMAP 1 @@ -79,7 +78,7 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample) const s32 lodFract = lod & 0xf; - if (lod > 0 && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) + if (lod > 0 && tm0.mipmap_filter != MipMode::None) { // use mipmap baseMip = lod >> 4; diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index 358c99974e..85b3dc68c1 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -2,9 +2,10 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "VideoCommon/RenderState.h" + #include #include -#include "VideoCommon/SamplerCommon.h" + #include "VideoCommon/TextureConfig.h" void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_type) @@ -235,9 +236,9 @@ void SamplerState::Generate(const BPMemory& bp, u32 index) mag_filter = tm0.mag_filter == FilterMode::Linear ? Filter::Linear : Filter::Point; // If mipmaps are disabled, clamp min/max lod - max_lod = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm1.max_lod.Value() : 0; + max_lod = tm0.mipmap_filter != MipMode::None ? tm1.max_lod.Value() : 0; min_lod = std::min(max_lod.Value(), static_cast(tm1.min_lod)); - lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias * (256 / 32) : 0; + lod_bias = tm0.mipmap_filter != MipMode::None ? tm0.lod_bias * (256 / 32) : 0; // Address modes // Hardware testing indicates that wrap_mode set to 3 behaves the same as clamp. diff --git a/Source/Core/VideoCommon/SamplerCommon.h b/Source/Core/VideoCommon/SamplerCommon.h index 2b09393169..ec15f07026 100644 --- a/Source/Core/VideoCommon/SamplerCommon.h +++ b/Source/Core/VideoCommon/SamplerCommon.h @@ -17,11 +17,4 @@ constexpr bool IsBpTexMode0PointFiltering(const T& tm0) { return tm0.min_filter == FilterMode::Near && tm0.mag_filter == FilterMode::Near; } - -// Check if the minification filter has mipmap based filtering modes enabled. -template -constexpr bool AreBpTexMode0MipmapsEnabled(const T& tm0) -{ - return tm0.mipmap_filter != MipMode::None; -} } // namespace SamplerCommon diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 0f66284180..ff15a755ba 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -979,9 +979,8 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, { state.min_filter = SamplerState::Filter::Linear; state.mag_filter = SamplerState::Filter::Linear; - state.mipmap_filter = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? - SamplerState::Filter::Linear : - SamplerState::Filter::Point; + state.mipmap_filter = tm0.mipmap_filter != MipMode::None ? SamplerState::Filter::Linear : + SamplerState::Filter::Point; } // Custom textures may have a greater number of mips @@ -1000,7 +999,7 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, // disabling anisotropy, or changing the anisotropic algorithm employed. state.min_filter = SamplerState::Filter::Linear; state.mag_filter = SamplerState::Filter::Linear; - if (SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) + if (tm0.mipmap_filter != MipMode::None) state.mipmap_filter = SamplerState::Filter::Linear; state.anisotropic_filtering = 1; } @@ -1009,7 +1008,7 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, state.anisotropic_filtering = 0; } - if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) + if (has_arbitrary_mips && tm0.mipmap_filter != MipMode::None) { // Apply a secondary bias calculated from the IR scale to pull inwards mipmaps // that have arbitrary contents, eg. are used for fog effects where the diff --git a/Source/Core/VideoCommon/TextureInfo.cpp b/Source/Core/VideoCommon/TextureInfo.cpp index 6959b95534..696f5b904c 100644 --- a/Source/Core/VideoCommon/TextureInfo.cpp +++ b/Source/Core/VideoCommon/TextureInfo.cpp @@ -9,7 +9,6 @@ #include "Common/Align.h" #include "Core/HW/Memmap.h" #include "VideoCommon/BPMemory.h" -#include "VideoCommon/SamplerCommon.h" #include "VideoCommon/TextureDecoder.h" TextureInfo TextureInfo::FromStage(u32 stage) @@ -28,7 +27,7 @@ TextureInfo TextureInfo::FromStage(u32 stage) const u8* tlut_ptr = &texMem[tlutaddr]; std::optional mip_count; - const bool has_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0); + const bool has_mipmaps = tex.texMode0.mipmap_filter != MipMode::None; if (has_mipmaps) { mip_count = (tex.texMode1.max_lod + 0xf) / 0x10; From 6236a0d494378430767c20c7930af0c9cfe1ff5b Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 8 Aug 2021 11:13:00 -0700 Subject: [PATCH 04/15] Eliminate SamplerCommon --- Source/Core/DolphinLib.props | 1 - .../Core/VideoBackends/OGL/SamplerCache.cpp | 1 - Source/Core/VideoCommon/CMakeLists.txt | 1 - Source/Core/VideoCommon/SamplerCommon.h | 20 ------------------- Source/Core/VideoCommon/TextureCacheBase.cpp | 15 ++++++++++++-- Source/Core/VideoCommon/VertexManagerBase.cpp | 1 - 6 files changed, 13 insertions(+), 26 deletions(-) delete mode 100644 Source/Core/VideoCommon/SamplerCommon.h diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 96b489f555..b547482af1 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -644,7 +644,6 @@ - diff --git a/Source/Core/VideoBackends/OGL/SamplerCache.cpp b/Source/Core/VideoBackends/OGL/SamplerCache.cpp index 85af44447b..6e594a1e7e 100644 --- a/Source/Core/VideoBackends/OGL/SamplerCache.cpp +++ b/Source/Core/VideoBackends/OGL/SamplerCache.cpp @@ -7,7 +7,6 @@ #include #include "Common/CommonTypes.h" -#include "VideoCommon/SamplerCommon.h" #include "VideoCommon/VideoConfig.h" namespace OGL diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index 17bee4ab7a..3504bc63d1 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -70,7 +70,6 @@ add_library(videocommon RenderBase.h RenderState.cpp RenderState.h - SamplerCommon.h ShaderCache.cpp ShaderCache.h ShaderGenCommon.cpp diff --git a/Source/Core/VideoCommon/SamplerCommon.h b/Source/Core/VideoCommon/SamplerCommon.h deleted file mode 100644 index ec15f07026..0000000000 --- a/Source/Core/VideoCommon/SamplerCommon.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -namespace SamplerCommon -{ -// Helper for checking if a BPMemory TexMode0 register is set to Point -// Filtering modes. This is used to decide whether Anisotropic enhancements -// are (mostly) safe in the VideoBackends. -// If both the minification and magnification filters are set to POINT modes -// then applying anisotropic filtering is equivalent to forced filtering. Point -// mode textures are usually some sort of 2D UI billboard which will end up -// misaligned from the correct pixels when filtered anisotropically. -template -constexpr bool IsBpTexMode0PointFiltering(const T& tm0) -{ - return tm0.min_filter == FilterMode::Near && tm0.mag_filter == FilterMode::Near; -} -} // namespace SamplerCommon diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index ff15a755ba..0127137c8d 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -40,7 +40,6 @@ #include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" -#include "VideoCommon/SamplerCommon.h" #include "VideoCommon/ShaderCache.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TMEM.h" @@ -966,6 +965,18 @@ void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, uns entry->texture->Save(filename, level); } +// Helper for checking if a BPMemory TexMode0 register is set to Point +// Filtering modes. This is used to decide whether Anisotropic enhancements +// are (mostly) safe in the VideoBackends. +// If both the minification and magnification filters are set to POINT modes +// then applying anisotropic filtering is equivalent to forced filtering. Point +// mode textures are usually some sort of 2D UI billboard which will end up +// misaligned from the correct pixels when filtered anisotropically. +static bool IsAnisostropicEnhancementSafe(const TexMode0& tm0) +{ + return !(tm0.min_filter == FilterMode::Near && tm0.mag_filter == FilterMode::Near); +} + static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, bool has_arbitrary_mips) { @@ -988,7 +999,7 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, state.max_lod = 255; // Anisotropic filtering option. - if (g_ActiveConfig.iMaxAnisotropy != 0 && !SamplerCommon::IsBpTexMode0PointFiltering(tm0)) + if (g_ActiveConfig.iMaxAnisotropy != 0 && IsAnisostropicEnhancementSafe(tm0)) { // https://www.opengl.org/registry/specs/EXT/texture_filter_anisotropic.txt // For predictable results on all hardware/drivers, only use one of: diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 60bfa1b1d1..63213edd3a 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -27,7 +27,6 @@ #include "VideoCommon/PerfQueryBase.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" -#include "VideoCommon/SamplerCommon.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VertexLoaderManager.h" From a273b655661bfc09e5c6a81a681aab3f130699cc Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Wed, 6 Oct 2021 17:36:15 -0700 Subject: [PATCH 05/15] RenderState: Use operator== for operator!= and adjust constructors --- Source/Core/VideoCommon/RenderState.cpp | 30 --------- Source/Core/VideoCommon/RenderState.h | 89 +++++++++++++++++++++---- 2 files changed, 77 insertions(+), 42 deletions(-) diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index 85b3dc68c1..d84a17257e 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -18,18 +18,6 @@ void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_ty cullmode = CullMode::None; } -RasterizationState& RasterizationState::operator=(const RasterizationState& rhs) -{ - hex = rhs.hex; - return *this; -} - -FramebufferState& FramebufferState::operator=(const FramebufferState& rhs) -{ - hex = rhs.hex; - return *this; -} - void DepthState::Generate(const BPMemory& bp) { testenable = bp.zmode.testenable.Value(); @@ -37,12 +25,6 @@ void DepthState::Generate(const BPMemory& bp) func = bp.zmode.func.Value(); } -DepthState& DepthState::operator=(const DepthState& rhs) -{ - hex = rhs.hex; - return *this; -} - // If the framebuffer format has no alpha channel, it is assumed to // ONE on blending. As the backends may emulate this framebuffer // configuration with an alpha channel, we just drop all references @@ -217,12 +199,6 @@ void BlendingState::ApproximateLogicOpWithBlending() dstfactor = approximations[u32(logicmode.Value())].dstfactor; } -BlendingState& BlendingState::operator=(const BlendingState& rhs) -{ - hex = rhs.hex; - return *this; -} - void SamplerState::Generate(const BPMemory& bp, u32 index) { auto tex = bp.tex.GetUnit(index); @@ -249,12 +225,6 @@ void SamplerState::Generate(const BPMemory& bp, u32 index) anisotropic_filtering = 0; } -SamplerState& SamplerState::operator=(const SamplerState& rhs) -{ - hex = rhs.hex; - return *this; -} - namespace RenderState { RasterizationState GetInvalidRasterizationState() diff --git a/Source/Core/VideoCommon/RenderState.h b/Source/Core/VideoCommon/RenderState.h index 192b0c8027..76738e2744 100644 --- a/Source/Core/VideoCommon/RenderState.h +++ b/Source/Core/VideoCommon/RenderState.h @@ -22,11 +22,24 @@ union RasterizationState { void Generate(const BPMemory& bp, PrimitiveType primitive_type); - RasterizationState& operator=(const RasterizationState& rhs); + RasterizationState() = default; + RasterizationState(const RasterizationState&) = default; + RasterizationState& operator=(const RasterizationState& rhs) + { + hex = rhs.hex; + return *this; + } + RasterizationState(RasterizationState&&) = default; + RasterizationState& operator=(RasterizationState&& rhs) + { + hex = rhs.hex; + return *this; + } bool operator==(const RasterizationState& rhs) const { return hex == rhs.hex; } - bool operator!=(const RasterizationState& rhs) const { return hex != rhs.hex; } + bool operator!=(const RasterizationState& rhs) const { return !operator==(rhs); } bool operator<(const RasterizationState& rhs) const { return hex < rhs.hex; } + BitField<0, 2, CullMode> cullmode; BitField<3, 2, PrimitiveType> primitive; @@ -35,15 +48,28 @@ union RasterizationState union FramebufferState { + FramebufferState() = default; + FramebufferState(const FramebufferState&) = default; + FramebufferState& operator=(const FramebufferState& rhs) + { + hex = rhs.hex; + return *this; + } + FramebufferState(FramebufferState&&) = default; + FramebufferState& operator=(FramebufferState&& rhs) + { + hex = rhs.hex; + return *this; + } + + bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; } + bool operator!=(const FramebufferState& rhs) const { return !operator==(rhs); } + BitField<0, 8, AbstractTextureFormat> color_texture_format; BitField<8, 8, AbstractTextureFormat> depth_texture_format; BitField<16, 8, u32> samples; BitField<24, 1, u32> per_sample_shading; - bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; } - bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; } - FramebufferState& operator=(const FramebufferState& rhs); - u32 hex; }; @@ -51,11 +77,24 @@ union DepthState { void Generate(const BPMemory& bp); - DepthState& operator=(const DepthState& rhs); + DepthState() = default; + DepthState(const DepthState&) = default; + DepthState& operator=(const DepthState& rhs) + { + hex = rhs.hex; + return *this; + } + DepthState(DepthState&&) = default; + DepthState& operator=(DepthState&& rhs) + { + hex = rhs.hex; + return *this; + } bool operator==(const DepthState& rhs) const { return hex == rhs.hex; } - bool operator!=(const DepthState& rhs) const { return hex != rhs.hex; } + bool operator!=(const DepthState& rhs) const { return !operator==(rhs); } bool operator<(const DepthState& rhs) const { return hex < rhs.hex; } + BitField<0, 1, u32> testenable; BitField<1, 1, u32> updateenable; BitField<2, 3, CompareMode> func; @@ -71,11 +110,24 @@ union BlendingState // Will not be bit-correct, and in some cases not even remotely in the same ballpark. void ApproximateLogicOpWithBlending(); - BlendingState& operator=(const BlendingState& rhs); + BlendingState() = default; + BlendingState(const BlendingState&) = default; + BlendingState& operator=(const BlendingState& rhs) + { + hex = rhs.hex; + return *this; + } + BlendingState(BlendingState&&) = default; + BlendingState& operator=(BlendingState&& rhs) + { + hex = rhs.hex; + return *this; + } bool operator==(const BlendingState& rhs) const { return hex == rhs.hex; } - bool operator!=(const BlendingState& rhs) const { return hex != rhs.hex; } + bool operator!=(const BlendingState& rhs) const { return !operator==(rhs); } bool operator<(const BlendingState& rhs) const { return hex < rhs.hex; } + BitField<0, 1, u32> blendenable; BitField<1, 1, u32> logicopenable; BitField<2, 1, u32> dstalpha; @@ -112,10 +164,23 @@ union SamplerState void Generate(const BPMemory& bp, u32 index); - SamplerState& operator=(const SamplerState& rhs); + SamplerState() = default; + SamplerState(const SamplerState&) = default; + SamplerState& operator=(const SamplerState& rhs) + { + hex = rhs.hex; + return *this; + } + SamplerState(SamplerState&&) = default; + SamplerState& operator=(SamplerState&& rhs) + { + tm0.hex = rhs.tm0.hex; + tm1.hex = rhs.tm1.hex; + return *this; + } bool operator==(const SamplerState& rhs) const { return hex == rhs.hex; } - bool operator!=(const SamplerState& rhs) const { return hex != rhs.hex; } + bool operator!=(const SamplerState& rhs) const { return !operator==(rhs); } bool operator<(const SamplerState& rhs) const { return hex < rhs.hex; } BitField<0, 1, Filter> min_filter; BitField<1, 1, Filter> mag_filter; From 9ef228503abe24820985e431ef942c6ebb312442 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 24 Jul 2021 15:58:55 -0700 Subject: [PATCH 06/15] VideoCommon: Provide raw texdims to shaders --- Source/Core/VideoCommon/ConstantManager.h | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 11 ++++++----- Source/Core/VideoCommon/PixelShaderManager.cpp | 13 +++++-------- Source/Core/VideoCommon/UberShaderPixel.cpp | 9 +++++---- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index fe80767112..6046450dec 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -21,7 +21,7 @@ struct PixelShaderConstants std::array colors; std::array kcolors; int4 alpha; - std::array texdims; + std::array texdims; std::array zbias; std::array indtexscale; std::array indtexmtx; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 97adb46a97..6f11c605fd 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -393,7 +393,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, out.Write("\tint4 " I_COLORS "[4];\n" "\tint4 " I_KCOLORS "[4];\n" "\tint4 " I_ALPHA ";\n" - "\tfloat4 " I_TEXDIMS "[8];\n" + "\tint4 " I_TEXDIMS "[8];\n" "\tint4 " I_ZBIAS "[2];\n" "\tint4 " I_INDTEXSCALE "[2];\n" "\tint4 " I_INDTEXMTX "[6];\n" @@ -812,7 +812,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos { out.Write("\tint2 fixpoint_uv{} = int2(", i); out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i); - out.Write(" * " I_TEXDIMS "[{}].zw);\n", i); + out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i); // TODO: S24 overflows here? } } @@ -1436,13 +1436,14 @@ static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::stri if (api_type == APIType::D3D) { - out.Write("iround(255.0 * Tex[{}].Sample(samp[{}], float3({}.xy * " I_TEXDIMS - "[{}].xy, {}))).{};\n", + out.Write("iround(255.0 * Tex[{}].Sample(samp[{}], float3({}.xy / float2(" I_TEXDIMS + "[{}].xy * 128), {}))).{};\n", texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); } else { - out.Write("iround(255.0 * texture(samp[{}], float3({}.xy * " I_TEXDIMS "[{}].xy, {}))).{};\n", + out.Write("iround(255.0 * texture(samp[{}], float3({}.xy / float2(" I_TEXDIMS + "[{}].xy * 128), {}))).{};\n", texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); } } diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 436db9dd50..3a232903af 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -273,16 +273,13 @@ void PixelShaderManager::SetDestAlphaChanged() void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height) { - float rwidth = 1.0f / (width * 128.0f); - float rheight = 1.0f / (height * 128.0f); - // TODO: move this check out to callee. There we could just call this function on texture changes // or better, use textureSize() in glsl - if (constants.texdims[texmapid][0] != rwidth || constants.texdims[texmapid][1] != rheight) + if (constants.texdims[texmapid][0] != width || constants.texdims[texmapid][1] != height) dirty = true; - constants.texdims[texmapid][0] = rwidth; - constants.texdims[texmapid][1] = rheight; + constants.texdims[texmapid][0] = width; + constants.texdims[texmapid][1] = height; } void PixelShaderManager::SetZTextureBias() @@ -382,8 +379,8 @@ void PixelShaderManager::SetZTextureOpChanged() void PixelShaderManager::SetTexCoordChanged(u8 texmapid) { TCoordInfo& tc = bpmem.texcoords[texmapid]; - constants.texdims[texmapid][2] = (float)(tc.s.scale_minus_1 + 1) * 128.0f; - constants.texdims[texmapid][3] = (float)(tc.t.scale_minus_1 + 1) * 128.0f; + constants.texdims[texmapid][2] = tc.s.scale_minus_1 + 1; + constants.texdims[texmapid][3] = tc.t.scale_minus_1 + 1; dirty = true; } diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index c124a0b5db..833bbe1d0f 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -301,8 +301,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " else\n" " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].zw;\n" "\n" - " {} = sampleTexture(texmap, float3(float2(fixedPoint_uv) * " I_TEXDIMS - "[texmap].xy, {})).abg;\n" + " {} = sampleTexture(texmap, float3(float2(fixedPoint_uv) / float2(" I_TEXDIMS + "[texmap].xy * 128), {})).abg;\n" "}}", in_index_name, in_index_name, in_index_name, in_index_name, out_var_name, stereo ? "float(layer)" : "0.0"); @@ -786,7 +786,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, { out.Write(" int2 fixpoint_uv{} = int2(", i); out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i); - out.Write(" * " I_TEXDIMS "[{}].zw);\n", i); + out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i); // TODO: S24 overflows here? } @@ -910,7 +910,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " uint sampler_num = {};\n", BitfieldExtract<&TwoTevStageOrders::texmap0>("ss.order")); out.Write("\n" - " float2 uv = (float2(tevcoord.xy)) * " I_TEXDIMS "[sampler_num].xy;\n"); + " float2 uv = (float2(tevcoord.xy)) / float2(" I_TEXDIMS + "[sampler_num].xy * 128);\n"); out.Write(" int4 color = sampleTexture(sampler_num, float3(uv, {}));\n", stereo ? "float(layer)" : "0.0"); out.Write(" uint swap = {};\n", From 4a9b26de86aa7e99af51016e07a755968b8777e0 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 8 Aug 2021 21:11:50 -0700 Subject: [PATCH 07/15] VideoCommon: Expose SamplerState to shaders The benefit to exposing this over the raw BP state is that adjustments Dolphin makes, such as LOD biases from arbitrary mipmap detection, will work properly. --- Source/Core/VideoBackends/D3D/D3DState.cpp | 30 +++---- Source/Core/VideoBackends/D3D/D3DState.h | 2 +- .../D3D12/DescriptorHeapManager.cpp | 30 +++---- .../D3D12/DescriptorHeapManager.h | 2 +- .../Core/VideoBackends/OGL/SamplerCache.cpp | 24 +++--- .../Core/VideoBackends/Vulkan/ObjectCache.cpp | 26 +++--- .../Core/VideoBackends/Vulkan/VKRenderer.cpp | 8 +- Source/Core/VideoCommon/ConstantManager.h | 4 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 2 +- .../Core/VideoCommon/PixelShaderManager.cpp | 9 ++ Source/Core/VideoCommon/PixelShaderManager.h | 1 + Source/Core/VideoCommon/RenderState.cpp | 86 ++++++++++++------- Source/Core/VideoCommon/RenderState.h | 76 +++++++++------- Source/Core/VideoCommon/TextureCacheBase.cpp | 27 +++--- 14 files changed, 188 insertions(+), 139 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/D3DState.cpp b/Source/Core/VideoBackends/D3D/D3DState.cpp index 7cb123de15..63d5a61cca 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D/D3DState.cpp @@ -303,43 +303,43 @@ StateCache::~StateCache() = default; ID3D11SamplerState* StateCache::Get(SamplerState state) { std::lock_guard guard(m_lock); - auto it = m_sampler.find(state.hex); + auto it = m_sampler.find(state); if (it != m_sampler.end()) return it->second.Get(); D3D11_SAMPLER_DESC sampdc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - if (state.mipmap_filter == SamplerState::Filter::Linear) + if (state.tm0.mipmap_filter == FilterMode::Linear) { - if (state.min_filter == SamplerState::Filter::Linear) - sampdc.Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + if (state.tm0.min_filter == FilterMode::Linear) + sampdc.Filter = (state.tm0.mag_filter == FilterMode::Linear) ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR; else - sampdc.Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + sampdc.Filter = (state.tm0.mag_filter == FilterMode::Linear) ? D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR : D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR; } else { - if (state.min_filter == SamplerState::Filter::Linear) - sampdc.Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + if (state.tm0.min_filter == FilterMode::Linear) + sampdc.Filter = (state.tm0.mag_filter == FilterMode::Linear) ? D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT : D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT; else - sampdc.Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + sampdc.Filter = (state.tm0.mag_filter == FilterMode::Linear) ? D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT : D3D11_FILTER_MIN_MAG_MIP_POINT; } static constexpr std::array address_modes = { {D3D11_TEXTURE_ADDRESS_CLAMP, D3D11_TEXTURE_ADDRESS_WRAP, D3D11_TEXTURE_ADDRESS_MIRROR}}; - sampdc.AddressU = address_modes[static_cast(state.wrap_u.Value())]; - sampdc.AddressV = address_modes[static_cast(state.wrap_v.Value())]; - sampdc.MaxLOD = state.max_lod / 16.f; - sampdc.MinLOD = state.min_lod / 16.f; - sampdc.MipLODBias = (s32)state.lod_bias / 256.f; + sampdc.AddressU = address_modes[static_cast(state.tm0.wrap_u.Value())]; + sampdc.AddressV = address_modes[static_cast(state.tm0.wrap_v.Value())]; + sampdc.MaxLOD = state.tm1.max_lod / 16.f; + sampdc.MinLOD = state.tm1.min_lod / 16.f; + sampdc.MipLODBias = state.tm0.lod_bias / 256.f; - if (state.anisotropic_filtering) + if (state.tm0.anisotropic_filtering) { sampdc.Filter = D3D11_FILTER_ANISOTROPIC; sampdc.MaxAnisotropy = 1u << g_ActiveConfig.iMaxAnisotropy; @@ -348,7 +348,7 @@ ID3D11SamplerState* StateCache::Get(SamplerState state) ComPtr res; HRESULT hr = D3D::device->CreateSamplerState(&sampdc, res.GetAddressOf()); CHECK(SUCCEEDED(hr), "Creating D3D sampler state failed"); - return m_sampler.emplace(state.hex, std::move(res)).first->second.Get(); + return m_sampler.emplace(state, std::move(res)).first->second.Get(); } ID3D11BlendState* StateCache::Get(BlendingState state) diff --git a/Source/Core/VideoBackends/D3D/D3DState.h b/Source/Core/VideoBackends/D3D/D3DState.h index b76b85fb44..795e7a0432 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.h +++ b/Source/Core/VideoBackends/D3D/D3DState.h @@ -37,7 +37,7 @@ private: std::unordered_map> m_depth; std::unordered_map> m_raster; std::unordered_map> m_blend; - std::unordered_map> m_sampler; + std::unordered_map> m_sampler; std::mutex m_lock; }; diff --git a/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.cpp b/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.cpp index 412e3c9640..537561559c 100644 --- a/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.cpp +++ b/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.cpp @@ -85,32 +85,32 @@ SamplerHeapManager::~SamplerHeapManager() = default; static void GetD3DSamplerDesc(D3D12_SAMPLER_DESC* desc, const SamplerState& state) { - if (state.mipmap_filter == SamplerState::Filter::Linear) + if (state.tm0.mipmap_filter == FilterMode::Linear) { - if (state.min_filter == SamplerState::Filter::Linear) + if (state.tm0.min_filter == FilterMode::Linear) { - desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + desc->Filter = (state.tm0.mag_filter == FilterMode::Linear) ? D3D12_FILTER_MIN_MAG_MIP_LINEAR : D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR; } else { - desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + desc->Filter = (state.tm0.mag_filter == FilterMode::Linear) ? D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR : D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR; } } else { - if (state.min_filter == SamplerState::Filter::Linear) + if (state.tm0.min_filter == FilterMode::Linear) { - desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + desc->Filter = (state.tm0.mag_filter == FilterMode::Linear) ? D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT : D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT; } else { - desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ? + desc->Filter = (state.tm0.mag_filter == FilterMode::Linear) ? D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT : D3D12_FILTER_MIN_MAG_MIP_POINT; } @@ -119,15 +119,15 @@ static void GetD3DSamplerDesc(D3D12_SAMPLER_DESC* desc, const SamplerState& stat static constexpr std::array address_modes = { {D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE_MIRROR}}; - desc->AddressU = address_modes[static_cast(state.wrap_u.Value())]; - desc->AddressV = address_modes[static_cast(state.wrap_v.Value())]; + desc->AddressU = address_modes[static_cast(state.tm0.wrap_u.Value())]; + desc->AddressV = address_modes[static_cast(state.tm0.wrap_v.Value())]; desc->AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - desc->MaxLOD = state.max_lod / 16.f; - desc->MinLOD = state.min_lod / 16.f; - desc->MipLODBias = static_cast(state.lod_bias) / 256.f; + desc->MaxLOD = state.tm1.max_lod / 16.f; + desc->MinLOD = state.tm1.min_lod / 16.f; + desc->MipLODBias = static_cast(state.tm0.lod_bias) / 256.f; desc->ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; - if (state.anisotropic_filtering) + if (state.tm0.anisotropic_filtering) { desc->Filter = D3D12_FILTER_ANISOTROPIC; desc->MaxAnisotropy = 1u << g_ActiveConfig.iMaxAnisotropy; @@ -136,7 +136,7 @@ static void GetD3DSamplerDesc(D3D12_SAMPLER_DESC* desc, const SamplerState& stat bool SamplerHeapManager::Lookup(const SamplerState& ss, D3D12_CPU_DESCRIPTOR_HANDLE* handle) { - const auto it = m_sampler_map.find(ss.hex); + const auto it = m_sampler_map.find(ss); if (it != m_sampler_map.end()) { *handle = it->second; @@ -158,7 +158,7 @@ bool SamplerHeapManager::Lookup(const SamplerState& ss, D3D12_CPU_DESCRIPTOR_HAN m_current_offset * m_descriptor_increment_size}; g_dx_context->GetDevice()->CreateSampler(&desc, new_handle); - m_sampler_map.emplace(ss.hex, new_handle); + m_sampler_map.emplace(ss, new_handle); m_current_offset++; *handle = new_handle; return true; diff --git a/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.h b/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.h index ce4d23b533..7ff42c8e5b 100644 --- a/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.h +++ b/Source/Core/VideoBackends/D3D12/DescriptorHeapManager.h @@ -68,6 +68,6 @@ private: D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu{}; - std::unordered_map m_sampler_map; + std::unordered_map m_sampler_map; }; } // namespace DX12 diff --git a/Source/Core/VideoBackends/OGL/SamplerCache.cpp b/Source/Core/VideoBackends/OGL/SamplerCache.cpp index 6e594a1e7e..286f654de2 100644 --- a/Source/Core/VideoBackends/OGL/SamplerCache.cpp +++ b/Source/Core/VideoBackends/OGL/SamplerCache.cpp @@ -71,16 +71,16 @@ void SamplerCache::InvalidateBinding(u32 stage) void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params) { GLenum min_filter; - GLenum mag_filter = (params.mag_filter == SamplerState::Filter::Point) ? GL_NEAREST : GL_LINEAR; - if (params.mipmap_filter == SamplerState::Filter::Linear) + GLenum mag_filter = (params.tm0.mag_filter == FilterMode::Near) ? GL_NEAREST : GL_LINEAR; + if (params.tm0.mipmap_filter == FilterMode::Linear) { - min_filter = (params.min_filter == SamplerState::Filter::Point) ? GL_NEAREST_MIPMAP_LINEAR : - GL_LINEAR_MIPMAP_LINEAR; + min_filter = (params.tm0.min_filter == FilterMode::Near) ? GL_NEAREST_MIPMAP_LINEAR : + GL_LINEAR_MIPMAP_LINEAR; } else { - min_filter = (params.min_filter == SamplerState::Filter::Point) ? GL_NEAREST_MIPMAP_NEAREST : - GL_LINEAR_MIPMAP_NEAREST; + min_filter = (params.tm0.min_filter == FilterMode::Near) ? GL_NEAREST_MIPMAP_NEAREST : + GL_LINEAR_MIPMAP_NEAREST; } glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, min_filter); @@ -90,17 +90,17 @@ void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params) {GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT}}; glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, - address_modes[static_cast(params.wrap_u.Value())]); + address_modes[static_cast(params.tm0.wrap_u.Value())]); glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, - address_modes[static_cast(params.wrap_v.Value())]); + address_modes[static_cast(params.tm0.wrap_v.Value())]); - glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, params.min_lod / 16.f); - glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.max_lod / 16.f); + glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, params.tm1.min_lod / 16.f); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.tm1.max_lod / 16.f); if (!static_cast(g_renderer.get())->IsGLES()) - glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 256.f); + glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.tm0.lod_bias / 256.f); - if (params.anisotropic_filtering && g_ogl_config.bSupportsAniso) + if (params.tm0.anisotropic_filtering && g_ogl_config.bSupportsAniso) { glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, static_cast(1 << g_ActiveConfig.iMaxAnisotropy)); diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp index a1ccdd6f30..fd296457e4 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -315,28 +315,28 @@ VkSampler ObjectCache::GetSampler(const SamplerState& info) VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT}}; VkSamplerCreateInfo create_info = { - VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkSamplerCreateFlags flags - filters[static_cast(info.mag_filter.Value())], // VkFilter magFilter - filters[static_cast(info.min_filter.Value())], // VkFilter minFilter - mipmap_modes[static_cast(info.mipmap_filter.Value())], // VkSamplerMipmapMode mipmapMode - address_modes[static_cast(info.wrap_u.Value())], // VkSamplerAddressMode addressModeU - address_modes[static_cast(info.wrap_v.Value())], // VkSamplerAddressMode addressModeV - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW - info.lod_bias / 256.0f, // float mipLodBias + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkSamplerCreateFlags flags + filters[u32(info.tm0.mag_filter.Value())], // VkFilter magFilter + filters[u32(info.tm0.min_filter.Value())], // VkFilter minFilter + mipmap_modes[u32(info.tm0.mipmap_filter.Value())], // VkSamplerMipmapMode mipmapMode + address_modes[u32(info.tm0.wrap_u.Value())], // VkSamplerAddressMode addressModeU + address_modes[u32(info.tm0.wrap_v.Value())], // VkSamplerAddressMode addressModeV + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW + info.tm0.lod_bias / 256.0f, // float mipLodBias VK_FALSE, // VkBool32 anisotropyEnable 0.0f, // float maxAnisotropy VK_FALSE, // VkBool32 compareEnable VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp - info.min_lod / 16.0f, // float minLod - info.max_lod / 16.0f, // float maxLod + info.tm1.min_lod / 16.0f, // float minLod + info.tm1.max_lod / 16.0f, // float maxLod VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor VK_FALSE // VkBool32 unnormalizedCoordinates }; // Can we use anisotropic filtering with this sampler? - if (info.anisotropic_filtering && g_vulkan_context->SupportsAnisotropicFiltering()) + if (info.tm0.anisotropic_filtering && g_vulkan_context->SupportsAnisotropicFiltering()) { // Cap anisotropy to device limits. create_info.anisotropyEnable = VK_TRUE; diff --git a/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp b/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp index 06a3f09518..35c092c147 100644 --- a/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp @@ -49,7 +49,7 @@ Renderer::Renderer(std::unique_ptr swap_chain, float backbuffer_scale { UpdateActiveConfig(); for (SamplerState& m_sampler_state : m_sampler_states) - m_sampler_state.hex = RenderState::GetPointSamplerState().hex; + m_sampler_state = RenderState::GetPointSamplerState(); } Renderer::~Renderer() = default; @@ -545,7 +545,7 @@ void Renderer::SetTexture(u32 index, const AbstractTexture* texture) void Renderer::SetSamplerState(u32 index, const SamplerState& state) { // Skip lookup if the state hasn't changed. - if (m_sampler_states[index].hex == state.hex) + if (m_sampler_states[index] == state) return; // Look up new state and replace in state tracker. @@ -557,7 +557,7 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state) } StateTracker::GetInstance()->SetSampler(index, sampler); - m_sampler_states[index].hex = state.hex; + m_sampler_states[index] = state; } void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) @@ -588,7 +588,7 @@ void Renderer::ResetSamplerStates() // Invalidate all sampler states, next draw will re-initialize them. for (u32 i = 0; i < m_sampler_states.size(); i++) { - m_sampler_states[i].hex = RenderState::GetPointSamplerState().hex; + m_sampler_states[i] = RenderState::GetPointSamplerState(); StateTracker::GetInstance()->SetSampler(i, g_object_cache->GetPointSampler()); } diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 6046450dec..7144342503 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -32,7 +32,7 @@ struct PixelShaderConstants float4 zslope; std::array efbscale; // .xy - // Constants from here onwards are only used in ubershaders. + // Constants from here onwards are only used in ubershaders, other than pack2. u32 genmode; // .z u32 alphaTest; // .w u32 fogParam3; // .x @@ -44,7 +44,7 @@ struct PixelShaderConstants u32 dither; // .z (bool) u32 bounding_box; // .w (bool) std::array pack1; // .xy - combiners, .z - tevind, .w - iref - std::array pack2; // .x - tevorder, .y - tevksel + std::array pack2; // .x - tevorder, .y - tevksel, .z/.w - SamplerState tm0/tm1 std::array konst; // .rgba // The following are used in ubershaders when using shader_framebuffer_fetch blending u32 blend_enable; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6f11c605fd..ffc9044916 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -414,7 +414,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, "\tbool bpmem_dither;\n" "\tbool bpmem_bounding_box;\n" "\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind - "\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel + "\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel, .zw - SamplerState tm0/tm1 "\tint4 konstLookup[32];\n" "\tbool blend_enable;\n" "\tuint blend_src_factor;\n" diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 3a232903af..f63722c9c1 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -282,6 +282,15 @@ void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height) constants.texdims[texmapid][1] = height; } +void PixelShaderManager::SetSamplerState(int texmapid, u32 tm0, u32 tm1) +{ + if (constants.pack2[texmapid][2] != tm0 || constants.pack2[texmapid][3] != tm1) + dirty = true; + + constants.pack2[texmapid][2] = tm0; + constants.pack2[texmapid][3] = tm1; +} + void PixelShaderManager::SetZTextureBias() { constants.zbias[1][3] = bpmem.ztex1.bias; diff --git a/Source/Core/VideoCommon/PixelShaderManager.h b/Source/Core/VideoCommon/PixelShaderManager.h index 4d130178ac..614504998e 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.h +++ b/Source/Core/VideoCommon/PixelShaderManager.h @@ -30,6 +30,7 @@ public: static void SetAlphaTestChanged(); static void SetDestAlphaChanged(); static void SetTexDims(int texmapid, u32 width, u32 height); + static void SetSamplerState(int texmapid, u32 tm0, u32 tm1); static void SetZTextureBias(); static void SetViewportChanged(); static void SetEfbScaleChanged(float scalex, float scaley); diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index d84a17257e..15559ae67f 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -202,27 +202,42 @@ void BlendingState::ApproximateLogicOpWithBlending() void SamplerState::Generate(const BPMemory& bp, u32 index) { auto tex = bp.tex.GetUnit(index); - const TexMode0& tm0 = tex.texMode0; - const TexMode1& tm1 = tex.texMode1; + const TexMode0& bp_tm0 = tex.texMode0; + const TexMode1& bp_tm1 = tex.texMode1; // GX can configure the mip filter to none. However, D3D and Vulkan can't express this in their // sampler states. Therefore, we set the min/max LOD to zero if this option is used. - min_filter = tm0.min_filter == FilterMode::Linear ? Filter::Linear : Filter::Point; - mipmap_filter = tm0.mipmap_filter == MipMode::Linear ? Filter::Linear : Filter::Point; - mag_filter = tm0.mag_filter == FilterMode::Linear ? Filter::Linear : Filter::Point; + tm0.min_filter = bp_tm0.min_filter; + tm0.mipmap_filter = + bp_tm0.mipmap_filter == MipMode::Linear ? FilterMode::Linear : FilterMode::Near; + tm0.mag_filter = bp_tm0.mag_filter; // If mipmaps are disabled, clamp min/max lod - max_lod = tm0.mipmap_filter != MipMode::None ? tm1.max_lod.Value() : 0; - min_lod = std::min(max_lod.Value(), static_cast(tm1.min_lod)); - lod_bias = tm0.mipmap_filter != MipMode::None ? tm0.lod_bias * (256 / 32) : 0; + if (bp_tm0.mipmap_filter == MipMode::None) + { + tm1.max_lod = 0; + tm1.min_lod = 0; + tm0.lod_bias = 0; + } + else + { + // NOTE: When comparing, max is checked first, then min; if max is less than min, max wins + tm1.max_lod = bp_tm1.max_lod.Value(); + tm1.min_lod = std::min(tm1.max_lod.Value(), bp_tm1.min_lod.Value()); + tm0.lod_bias = bp_tm0.lod_bias * (256 / 32); + } - // Address modes + // Wrap modes // Hardware testing indicates that wrap_mode set to 3 behaves the same as clamp. - static constexpr std::array address_modes = { - {AddressMode::Clamp, AddressMode::Repeat, AddressMode::MirroredRepeat, AddressMode::Clamp}}; - wrap_u = address_modes[u32(tm0.wrap_s.Value())]; - wrap_v = address_modes[u32(tm0.wrap_t.Value())]; - anisotropic_filtering = 0; + auto filter_invalid_wrap = [](WrapMode mode) { + return (mode <= WrapMode::Mirror) ? mode : WrapMode::Clamp; + }; + tm0.wrap_u = filter_invalid_wrap(bp_tm0.wrap_s); + tm0.wrap_v = filter_invalid_wrap(bp_tm0.wrap_t); + + tm0.diag_lod = bp_tm0.diag_lod; + tm0.anisotropic_filtering = false; // TODO: Respect BP anisotropic filtering mode + tm0.lod_clamp = bp_tm0.lod_clamp; // TODO: What does this do? } namespace RenderState @@ -315,37 +330,42 @@ BlendingState GetNoColorWriteBlendState() SamplerState GetInvalidSamplerState() { SamplerState state; - state.hex = UINT64_C(0xFFFFFFFFFFFFFFFF); + state.tm0.hex = 0xFFFFFFFF; + state.tm1.hex = 0xFFFFFFFF; return state; } SamplerState GetPointSamplerState() { SamplerState state = {}; - state.min_filter = SamplerState::Filter::Point; - state.mag_filter = SamplerState::Filter::Point; - state.mipmap_filter = SamplerState::Filter::Point; - state.wrap_u = SamplerState::AddressMode::Clamp; - state.wrap_v = SamplerState::AddressMode::Clamp; - state.min_lod = 0; - state.max_lod = 255; - state.lod_bias = 0; - state.anisotropic_filtering = false; + state.tm0.min_filter = FilterMode::Near; + state.tm0.mag_filter = FilterMode::Near; + state.tm0.mipmap_filter = FilterMode::Near; + state.tm0.wrap_u = WrapMode::Clamp; + state.tm0.wrap_v = WrapMode::Clamp; + state.tm1.min_lod = 0; + state.tm1.max_lod = 255; + state.tm0.lod_bias = 0; + state.tm0.anisotropic_filtering = false; + state.tm0.diag_lod = LODType::Edge; + state.tm0.lod_clamp = false; return state; } SamplerState GetLinearSamplerState() { SamplerState state = {}; - state.min_filter = SamplerState::Filter::Linear; - state.mag_filter = SamplerState::Filter::Linear; - state.mipmap_filter = SamplerState::Filter::Linear; - state.wrap_u = SamplerState::AddressMode::Clamp; - state.wrap_v = SamplerState::AddressMode::Clamp; - state.min_lod = 0; - state.max_lod = 255; - state.lod_bias = 0; - state.anisotropic_filtering = false; + state.tm0.min_filter = FilterMode::Linear; + state.tm0.mag_filter = FilterMode::Linear; + state.tm0.mipmap_filter = FilterMode::Linear; + state.tm0.wrap_u = WrapMode::Clamp; + state.tm0.wrap_v = WrapMode::Clamp; + state.tm1.min_lod = 0; + state.tm1.max_lod = 255; + state.tm0.lod_bias = 0; + state.tm0.anisotropic_filtering = false; + state.tm0.diag_lod = LODType::Edge; + state.tm0.lod_clamp = false; return state; } diff --git a/Source/Core/VideoCommon/RenderState.h b/Source/Core/VideoCommon/RenderState.h index 76738e2744..6fca2eff15 100644 --- a/Source/Core/VideoCommon/RenderState.h +++ b/Source/Core/VideoCommon/RenderState.h @@ -145,30 +145,16 @@ union BlendingState u32 hex; }; -union SamplerState +struct SamplerState { - using StorageType = u64; - - enum class Filter : StorageType - { - Point, - Linear - }; - - enum class AddressMode : StorageType - { - Clamp, - Repeat, - MirroredRepeat - }; - void Generate(const BPMemory& bp, u32 index); SamplerState() = default; SamplerState(const SamplerState&) = default; SamplerState& operator=(const SamplerState& rhs) { - hex = rhs.hex; + tm0.hex = rhs.tm0.hex; + tm1.hex = rhs.tm1.hex; return *this; } SamplerState(SamplerState&&) = default; @@ -179,22 +165,54 @@ union SamplerState return *this; } - bool operator==(const SamplerState& rhs) const { return hex == rhs.hex; } + bool operator==(const SamplerState& rhs) const { return Hex() == rhs.Hex(); } bool operator!=(const SamplerState& rhs) const { return !operator==(rhs); } - bool operator<(const SamplerState& rhs) const { return hex < rhs.hex; } - BitField<0, 1, Filter> min_filter; - BitField<1, 1, Filter> mag_filter; - BitField<2, 1, Filter> mipmap_filter; - BitField<3, 2, AddressMode> wrap_u; - BitField<5, 2, AddressMode> wrap_v; - BitField<7, 16, s64> lod_bias; // multiplied by 256 - BitField<23, 8, u64> min_lod; // multiplied by 16 - BitField<31, 8, u64> max_lod; // multiplied by 16 - BitField<39, 1, u64> anisotropic_filtering; + bool operator<(const SamplerState& rhs) const { return Hex() < rhs.Hex(); } - StorageType hex; + constexpr u64 Hex() const { return tm0.hex | (static_cast(tm1.hex) << 32); } + + // Based on BPMemory TexMode0/TexMode1, but with slightly higher precision and some + // simplifications + union TM0 + { + // BP's mipmap_filter can be None, but that is represented here by setting min_lod and max_lod + // to 0 + BitField<0, 1, FilterMode> min_filter; + BitField<1, 1, FilterMode> mag_filter; + BitField<2, 1, FilterMode> mipmap_filter; + // Guaranteed to be valid values (i.e. not 3) + BitField<3, 2, WrapMode> wrap_u; + BitField<5, 2, WrapMode> wrap_v; + BitField<7, 1, LODType> diag_lod; + BitField<8, 16, s32> lod_bias; // multiplied by 256, higher precision than normal + BitField<24, 1, bool, u32> lod_clamp; // TODO: This isn't currently implemented + BitField<25, 1, bool, u32> anisotropic_filtering; // TODO: This doesn't use the BP one yet + u32 hex; + }; + union TM1 + { + // Min is guaranteed to be less than or equal to max + BitField<0, 8, u32> min_lod; // multiplied by 16 + BitField<8, 8, u32> max_lod; // multiplied by 16 + u32 hex; + }; + + TM0 tm0; + TM1 tm1; }; +namespace std +{ +template <> +struct hash +{ + std::size_t operator()(SamplerState const& state) const noexcept + { + return std::hash{}(state.Hex()); + } +}; +} // namespace std + namespace RenderState { RasterizationState GetInvalidRasterizationState(); diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 0127137c8d..10b088283d 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -988,15 +988,15 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, // Force texture filtering config option. if (g_ActiveConfig.bForceFiltering) { - state.min_filter = SamplerState::Filter::Linear; - state.mag_filter = SamplerState::Filter::Linear; - state.mipmap_filter = tm0.mipmap_filter != MipMode::None ? SamplerState::Filter::Linear : - SamplerState::Filter::Point; + state.tm0.min_filter = FilterMode::Linear; + state.tm0.mag_filter = FilterMode::Linear; + state.tm0.mipmap_filter = + tm0.mipmap_filter != MipMode::None ? FilterMode::Linear : FilterMode::Near; } // Custom textures may have a greater number of mips if (custom_tex) - state.max_lod = 255; + state.tm1.max_lod = 255; // Anisotropic filtering option. if (g_ActiveConfig.iMaxAnisotropy != 0 && IsAnisostropicEnhancementSafe(tm0)) @@ -1008,15 +1008,15 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, // Letting the game set other combinations will have varying arbitrary results; // possibly being interpreted as equal to bilinear/trilinear, implicitly // disabling anisotropy, or changing the anisotropic algorithm employed. - state.min_filter = SamplerState::Filter::Linear; - state.mag_filter = SamplerState::Filter::Linear; + state.tm0.min_filter = FilterMode::Linear; + state.tm0.mag_filter = FilterMode::Linear; if (tm0.mipmap_filter != MipMode::None) - state.mipmap_filter = SamplerState::Filter::Linear; - state.anisotropic_filtering = 1; + state.tm0.mipmap_filter = FilterMode::Linear; + state.tm0.anisotropic_filtering = true; } else { - state.anisotropic_filtering = 0; + state.tm0.anisotropic_filtering = false; } if (has_arbitrary_mips && tm0.mipmap_filter != MipMode::None) @@ -1025,14 +1025,15 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, // that have arbitrary contents, eg. are used for fog effects where the // distance they kick in at is important to preserve at any resolution. // Correct this with the upscaling factor of custom textures. - s64 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f; - state.lod_bias = std::clamp(state.lod_bias + lod_offset, -32768, 32767); + s32 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f; + state.tm0.lod_bias = std::clamp(state.tm0.lod_bias + lod_offset, -32768, 32767); // Anisotropic also pushes mips farther away so it cannot be used either - state.anisotropic_filtering = 0; + state.tm0.anisotropic_filtering = false; } g_renderer->SetSamplerState(index, state); + PixelShaderManager::SetSamplerState(index, state.tm0.hex, state.tm1.hex); } void TextureCacheBase::BindTextures(BitSet32 used_textures) From ddf26913955cdfad9ca502aa444cdf94bc0964e8 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 22 Aug 2021 15:34:50 -0700 Subject: [PATCH 08/15] VideoCommon: Manually handle texture wrapping and sampling --- Source/Core/VideoCommon/PixelShaderGen.cpp | 219 +++++++++++++++++--- Source/Core/VideoCommon/UberShaderPixel.cpp | 42 ++-- 2 files changed, 214 insertions(+), 47 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index ffc9044916..b2a1166fd6 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -381,7 +381,7 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, // Declare samplers out.Write("SamplerState samp[8] : register(s0);\n" "\n" - "Texture2DArray Tex[8] : register(t0);\n"); + "Texture2DArray tex[8] : register(t0);\n"); } out.Write("\n"); @@ -428,7 +428,9 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, "#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n" "#define bpmem_iref(i) (bpmem_pack1[(i)].w)\n" "#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n" - "#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n\n"); + "#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n" + "#define samp_texmode0(i) (bpmem_pack2[(i)].z)\n" + "#define samp_texmode1(i) (bpmem_pack2[(i)].w)\n\n"); if (host_config.per_pixel_lighting) { @@ -534,14 +536,183 @@ void UpdateBoundingBox(float2 rawpos) {{ )", fmt::arg("efb_height", EFB_HEIGHT), fmt::arg("efb_scale", I_EFBSCALE)); } + + { + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { + out.Write(R"( +int4 readTexture(in sampler2DArray tex, uint u, uint v, int layer, int lod) {{ + return iround(texelFetch(tex, int3(u, v, layer), lod) * 255.0); +}} + +int4 readTextureLinear(in sampler2DArray tex, uint2 uv1, uint2 uv2, int layer, int lod, int2 frac_uv) {{)"); + } + else if (api_type == APIType::D3D) + { + out.Write(R"( +int4 readTexture(in Texture2DArray tex, uint u, uint v, int layer, int lod) {{ + return iround(tex.Load(int4(u, v, layer, lod)) * 255.0); +}} + +int4 readTextureLinear(in Texture2DArray tex, uint2 uv1, uint2 uv2, int layer, int lod, int2 frac_uv) {{)"); + } + + out.Write(R"( + int4 result = + readTexture(tex, uv1.x, uv1.y, layer, lod) * (128 - frac_uv.x) * (128 - frac_uv.y) + + readTexture(tex, uv2.x, uv1.y, layer, lod) * ( frac_uv.x) * (128 - frac_uv.y) + + readTexture(tex, uv1.x, uv2.y, layer, lod) * (128 - frac_uv.x) * ( frac_uv.y) + + readTexture(tex, uv2.x, uv2.y, layer, lod) * ( frac_uv.x) * ( frac_uv.y); + return result >> 14; +}} +)"); + + out.Write(R"( +uint WrapCoord(int coord, uint wrap, int size) {{ + switch (wrap) {{ + case {:s}: + default: // confirmed that clamp is used for invalid (3) via hardware test + return uint(clamp(coord, 0, size - 1)); + case {:s}: + return uint(coord & (size - 1)); + case {:s}: + if ((coord & size) != 0) {{ + coord = ~coord; + }} + return uint(coord & (size - 1)); + }} +}} +)", + WrapMode::Clamp, WrapMode::Repeat, WrapMode::Mirror); + } + + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { + out.Write("\nint4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) {{\n"); + } + else if (api_type == APIType::D3D) + { + out.Write("\nint4 sampleTexture(uint texmap, in Texture2DArray tex, in SamplerState tex_samp, " + "int2 uv, int layer) {{\n"); + } + + { + out.Write(R"( + uint texmode0 = samp_texmode0(texmap); + uint texmode1 = samp_texmode1(texmap); + int size_s = )" I_TEXDIMS R"([texmap].x; + int size_t = )" I_TEXDIMS R"([texmap].y; + + uint wrap_s = {}; + uint wrap_t = {}; + bool mag_linear = {} != 0u; + bool mipmap_linear = {} != 0u; + bool min_linear = {} != 0u; + bool diag_lod = {} != 0u; + int lod_bias = {}; + // uint max_aniso = TODO; + bool lod_clamp = {} != 0u; + int min_lod = int({}); + int max_lod = int({}); +)", + BitfieldExtract<&SamplerState::TM0::wrap_u>("texmode0"), + BitfieldExtract<&SamplerState::TM0::wrap_v>("texmode0"), + BitfieldExtract<&SamplerState::TM0::mag_filter>("texmode0"), + BitfieldExtract<&SamplerState::TM0::mipmap_filter>("texmode0"), + BitfieldExtract<&SamplerState::TM0::min_filter>("texmode0"), + BitfieldExtract<&SamplerState::TM0::diag_lod>("texmode0"), + BitfieldExtract<&SamplerState::TM0::lod_bias>("texmode0"), + // BitfieldExtract<&SamplerState::TM0::max_aniso>("texmode0"), + BitfieldExtract<&SamplerState::TM0::lod_clamp>("texmode0"), + BitfieldExtract<&SamplerState::TM1::min_lod>("texmode1"), + BitfieldExtract<&SamplerState::TM1::max_lod>("texmode1")); + + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + out.Write(R"( + float2 uv_delta_x = abs(dFdx(float2(uv))); + float2 uv_delta_y = abs(dFdy(float2(uv))); +)"); + else if (api_type == APIType::D3D) + out.Write(R"( + float2 uv_delta_x = abs(ddx(float2(uv))); + float2 uv_delta_y = abs(ddy(float2(uv))); +)"); + + // TODO: LOD bias is normally S2.5 (Dolphin uses S7.8 for arbitrary mipmap detection and higher + // IRs), but (at least per the software renderer) actual LOD is S28.4. How does this work? + // Also, note that we can make some assumptions due to use of a SamplerState version of the BP + // configuration, which tidies things compared to whatever nonsense games can put in. + // TODO: This doesn't support diagonal LOD + out.Write(R"( + float2 uv_delta = max(uv_delta_x, uv_delta_y); + float max_delta = max(uv_delta.x / 128.0, uv_delta.y / 128.0); + // log2(x) is undefined if x <= 0, but in practice it seems log2(0) is -infinity, which becomes INT_MIN. + // If lod_bias is negative, adding it to INT_MIN causes an underflow, resulting in a large positive value. + // Hardware testing indicates that min_lod should be used when the derivative is 0. + int lod = max_delta == 0.0 ? min_lod : int(floor(log2(max_delta) * 16.0)) + (lod_bias >> 4); + + bool is_linear = (lod > 0) ? min_linear : mag_linear; + lod = clamp(lod, min_lod, max_lod); + int base_lod = lod >> 4; + int frac_lod = lod & 15; + if (!mipmap_linear && frac_lod >= 8) {{ + // Round to nearest LOD in point mode + base_lod++; + }} + + if (is_linear) {{ + uint2 texuv1 = uint2( + WrapCoord(((uv.x >> base_lod) - 64) >> 7, wrap_s, size_s >> base_lod), + WrapCoord(((uv.y >> base_lod) - 64) >> 7, wrap_t, size_t >> base_lod)); + uint2 texuv2 = uint2( + WrapCoord(((uv.x >> base_lod) + 64) >> 7, wrap_s, size_s >> base_lod), + WrapCoord(((uv.y >> base_lod) + 64) >> 7, wrap_t, size_t >> base_lod)); + int2 frac_uv = int2(((uv.x >> base_lod) - 64) & 0x7f, ((uv.y >> base_lod) - 64) & 0x7f); + + int4 result = readTextureLinear(tex, texuv1, texuv2, layer, base_lod, frac_uv); + + if (frac_lod != 0 && mipmap_linear) {{ + texuv1 = uint2( + WrapCoord(((uv.x >> (base_lod + 1)) - 64) >> 7, wrap_s, size_s >> (base_lod + 1)), + WrapCoord(((uv.y >> (base_lod + 1)) - 64) >> 7, wrap_t, size_t >> (base_lod + 1))); + texuv2 = uint2( + WrapCoord(((uv.x >> (base_lod + 1)) + 64) >> 7, wrap_s, size_s >> (base_lod + 1)), + WrapCoord(((uv.y >> (base_lod + 1)) + 64) >> 7, wrap_t, size_t >> (base_lod + 1))); + frac_uv = int2(((uv.x >> (base_lod + 1)) - 64) & 0x7f, ((uv.y >> (base_lod + 1)) - 64) & 0x7f); + + result *= 16 - frac_lod; + result += readTextureLinear(tex, texuv1, texuv2, layer, base_lod + 1, frac_uv) * frac_lod; + result >>= 4; + }} + + return result; + }} else {{ + uint2 texuv = uint2( + WrapCoord(uv.x >> (7 + base_lod), wrap_s, size_s >> base_lod), + WrapCoord(uv.y >> (7 + base_lod), wrap_t, size_t >> base_lod)); + + int4 result = readTexture(tex, texuv.x, texuv.y, layer, base_lod); + + if (frac_lod != 0 && mipmap_linear) {{ + texuv = uint2( + WrapCoord(uv.x >> (7 + base_lod + 1), wrap_s, size_s >> (base_lod + 1)), + WrapCoord(uv.y >> (7 + base_lod + 1), wrap_t, size_t >> (base_lod + 1))); + + result *= 16 - frac_lod; + result += readTexture(tex, texuv.x, texuv.y, layer, base_lod + 1) * frac_lod; + result >>= 4; + }} + return result; + }} +}} +)"); + } } static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, APIType api_type, bool stereo); static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBias bias, TevOp op, bool clamp, TevScale scale, bool alpha); -static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::string_view texswap, - int texmap, bool stereo, APIType api_type); static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_data, APIType api_type, bool per_pixel_depth, bool use_dual_source); static void WriteFog(ShaderCode& out, const pixel_shader_uid_data* uid_data); @@ -568,6 +739,17 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos WriteBitfieldExtractHeader(out, api_type, host_config); WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box); + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { + out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " + "sampleTexture(texmap, samp[texmap], uv, layer)\n"); + } + else if (api_type == APIType::D3D) + { + out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) " + "sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer)\n"); + } + if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ) { // Zcomploc (aka early_ztest) is a way to control whether depth test is done before @@ -755,6 +937,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n"); out.Write(" ) {{\n"); } + if (!stereo) + out.Write("\tint layer = 0;\n"); out.Write("\tint4 c0 = " I_COLORS "[1], c1 = " I_COLORS "[2], c2 = " I_COLORS "[3], prev = " I_COLORS "[0];\n" @@ -835,8 +1019,8 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); - out.Write("\tint3 iindtex{} = ", i); - SampleTexture(out, "float2(tempcoord)", "abg", texmap, stereo, api_type); + out.Write("\tint3 iindtex{0} = sampleTextureWrapper({1}u, tempcoord, layer).abg;\n", i, + texmap); } } @@ -1244,8 +1428,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i '\0', }; - out.Write("\ttextemp = "); - SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, stereo, api_type); + out.Write("\ttextemp = sampleTextureWrapper({0}u, tevcoord.xy, layer).{1};\n", + stage.tevorders_texmap, texswap); } else if (uid_data->genMode_numtexgens == 0) { @@ -1429,25 +1613,6 @@ static void WriteTevRegular(ShaderCode& out, std::string_view components, TevBia out.Write("){}", tev_scale_table_right[u32(scale)]); } -static void SampleTexture(ShaderCode& out, std::string_view texcoords, std::string_view texswap, - int texmap, bool stereo, APIType api_type) -{ - out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap); - - if (api_type == APIType::D3D) - { - out.Write("iround(255.0 * Tex[{}].Sample(samp[{}], float3({}.xy / float2(" I_TEXDIMS - "[{}].xy * 128), {}))).{};\n", - texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); - } - else - { - out.Write("iround(255.0 * texture(samp[{}], float3({}.xy / float2(" I_TEXDIMS - "[{}].xy * 128), {}))).{};\n", - texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); - } -} - constexpr std::array tev_alpha_funcs_table{ "(false)", // CompareMode::Never "(prev.a < {})", // CompareMode::Less diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 833bbe1d0f..36fc6addc1 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -226,17 +226,17 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, { // Doesn't look like DirectX supports this. Oh well the code path is here just in case it // supports this in the future. - out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n"); + out.Write("int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) {{\n"); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.Write(" return iround(texture(samp[sampler_num], uv) * 255.0);\n"); + out.Write(" return sampleTexture(texmap, samp[texmap], uv, layer);\n"); else if (api_type == APIType::D3D) - out.Write(" return iround(Tex[sampler_num].Sample(samp[sampler_num], uv) * 255.0);\n"); + out.Write(" return sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer);\n"); out.Write("}}\n\n"); } else { - out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n" - " // This is messy, but DirectX, OpenGL 3.3 and OpenGL ES 3.0 doesn't support " + out.Write("int4 sampleTextureWrapper(uint sampler_num, int2 uv, int layer) {{\n" + " // This is messy, but DirectX, OpenGL 3.3, and OpenGL ES 3.0 don't support " "dynamic indexing of the sampler array\n" " // With any luck the shader compiler will optimise this if the hardware supports " "dynamic indexing.\n" @@ -244,9 +244,14 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, for (int i = 0; i < 8; i++) { if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.Write(" case {}u: return iround(texture(samp[{}], uv) * 255.0);\n", i, i); + { + out.Write(" case {0}u: return sampleTexture({0}u, samp[{0}u], uv, layer);\n", i); + } else if (api_type == APIType::D3D) - out.Write(" case {}u: return iround(Tex[{}].Sample(samp[{}], uv) * 255.0);\n", i, i, i); + { + out.Write(" case {0}u: return sampleTexture({0}u, tex[{0}u], samp[{0}u], uv, layer);\n", + i); + } } out.Write(" }}\n" "}}\n\n"); @@ -284,8 +289,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, // ====================== // Indirect Lookup // ====================== - const auto LookupIndirectTexture = [&out, stereo](std::string_view out_var_name, - std::string_view in_index_name) { + const auto LookupIndirectTexture = [&out](std::string_view out_var_name, + std::string_view in_index_name) { // in_index_name is the indirect stage, not the tev stage // bpmem_iref is packed differently from RAS1_IREF // This function assumes bpmem_iref is nonzero (i.e. matrix is not off, and the @@ -301,11 +306,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " else\n" " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].zw;\n" "\n" - " {} = sampleTexture(texmap, float3(float2(fixedPoint_uv) / float2(" I_TEXDIMS - "[texmap].xy * 128), {})).abg;\n" - "}}", - in_index_name, in_index_name, in_index_name, in_index_name, out_var_name, - stereo ? "float(layer)" : "0.0"); + " {} = sampleTextureWrapper(texmap, fixedPoint_uv, layer).abg;\n" + "}}\n", + in_index_name, in_index_name, in_index_name, in_index_name, out_var_name); }; // ====================== @@ -729,6 +732,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n"); out.Write("\n ) {{\n"); } + if (!stereo) + out.Write(" int layer = 0;\n"); out.Write(" int3 tevcoord = int3(0, 0, 0);\n" " State s;\n" @@ -820,7 +825,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, // For the undefined case, we just skip applying the indirect operation, which is close enough. // Viewtiful Joe hits the undefined case (bug 12525). // Wrapping and add to previous still apply in this case (and when the stage is disabled). - out.Write(" if (bpmem_iref(bt) != 0u) {{"); + out.Write(" if (bpmem_iref(bt) != 0u) {{\n"); out.Write(" int3 indcoord;\n"); LookupIndirectTexture("indcoord", "bt"); out.Write(" if (bs != 0u)\n" @@ -910,11 +915,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, " uint sampler_num = {};\n", BitfieldExtract<&TwoTevStageOrders::texmap0>("ss.order")); out.Write("\n" - " float2 uv = (float2(tevcoord.xy)) / float2(" I_TEXDIMS - "[sampler_num].xy * 128);\n"); - out.Write(" int4 color = sampleTexture(sampler_num, float3(uv, {}));\n", - stereo ? "float(layer)" : "0.0"); - out.Write(" uint swap = {};\n", + " int4 color = sampleTextureWrapper(sampler_num, tevcoord.xy, layer);\n" + " uint swap = {};\n", BitfieldExtract<&TevStageCombiner::AlphaCombiner::tswap>("ss.ac")); out.Write(" s.TexColor = Swizzle(swap, color);\n"); out.Write(" }} else {{\n" From 51e3334526ec3d86655c49307577f9bcffd05aa6 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 13 Nov 2021 20:10:20 -0800 Subject: [PATCH 09/15] VideoCommon: Use coarse derivatives for Manual Texture Sampling if possible --- Source/Core/VideoBackends/D3D/D3DMain.cpp | 1 + .../Core/VideoBackends/D3D12/VideoBackend.cpp | 1 + .../Core/VideoBackends/Null/NullBackend.cpp | 1 + Source/Core/VideoBackends/OGL/OGLMain.cpp | 3 ++- Source/Core/VideoBackends/OGL/OGLRender.cpp | 2 ++ .../VideoBackends/OGL/ProgramShaderCache.cpp | 4 +++ Source/Core/VideoBackends/Software/SWmain.cpp | 1 + .../VideoBackends/Vulkan/VulkanContext.cpp | 1 + Source/Core/VideoCommon/PixelShaderGen.cpp | 26 ++++++++++++++++--- Source/Core/VideoCommon/VideoConfig.h | 1 + 10 files changed, 37 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/D3DMain.cpp b/Source/Core/VideoBackends/D3D/D3DMain.cpp index 9f75712ccd..b217218986 100644 --- a/Source/Core/VideoBackends/D3D/D3DMain.cpp +++ b/Source/Core/VideoBackends/D3D/D3DMain.cpp @@ -106,6 +106,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsSSAA = true; g_Config.backend_info.bSupportsShaderBinaries = true; g_Config.backend_info.bSupportsPipelineCacheData = false; + g_Config.backend_info.bSupportsCoarseDerivatives = true; g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter); g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames(); diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp index 68496c9ed6..3a33832dc1 100644 --- a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp @@ -82,6 +82,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.AAModes = DXContext::GetAAModes(g_Config.iAdapter); g_Config.backend_info.bSupportsShaderBinaries = true; g_Config.backend_info.bSupportsPipelineCacheData = true; + g_Config.backend_info.bSupportsCoarseDerivatives = true; // We can only check texture support once we have a device. if (g_dx_context) diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index 5887d37391..17e31c4418 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -55,6 +55,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsPartialDepthCopies = false; g_Config.backend_info.bSupportsShaderBinaries = false; g_Config.backend_info.bSupportsPipelineCacheData = false; + g_Config.backend_info.bSupportsCoarseDerivatives = false; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLMain.cpp b/Source/Core/VideoBackends/OGL/OGLMain.cpp index 3ca79d5c85..375b1d12c7 100644 --- a/Source/Core/VideoBackends/OGL/OGLMain.cpp +++ b/Source/Core/VideoBackends/OGL/OGLMain.cpp @@ -99,7 +99,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsGPUTextureDecoding = true; g_Config.backend_info.bSupportsBBox = true; - // Overwritten in Render.cpp later + // Overwritten in OGLRender.cpp later g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsPaletteConversion = true; @@ -107,6 +107,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsDepthClamp = true; g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false; + g_Config.backend_info.bSupportsCoarseDerivatives = false; g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 782d5e1747..76064920f2 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -483,6 +483,8 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ GLExtensions::Supports("GL_EXT_texture_compression_s3tc"); g_Config.backend_info.bSupportsBPTCTextures = GLExtensions::Supports("GL_ARB_texture_compression_bptc"); + g_Config.backend_info.bSupportsCoarseDerivatives = + GLExtensions::Supports("GL_ARB_derivative_control") || GLExtensions::Version() >= 450; if (m_main_gl_context->IsGLES()) { diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 021391e47c..8aba81b7ed 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -747,6 +747,7 @@ void ProgramShaderCache::CreateHeader() "%s\n" // shader image load store "%s\n" // shader framebuffer fetch "%s\n" // shader thread shuffle + "%s\n" // derivative control // Precision defines for GLSL ES "%s\n" @@ -826,6 +827,9 @@ void ProgramShaderCache::CreateHeader() "#extension GL_ARB_shader_image_load_store : enable" : "", framebuffer_fetch_string.c_str(), shader_shuffle_string.c_str(), + g_ActiveConfig.backend_info.bSupportsCoarseDerivatives ? + "#extension GL_ARB_derivative_control : enable" : + "", is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "", (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index a0aa8e03c1..ca1f4304dd 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -84,6 +84,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsShaderBinaries = false; g_Config.backend_info.bSupportsPipelineCacheData = false; g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsCoarseDerivatives = false; // aamodes g_Config.backend_info.AAModes = {1}; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 6c58369204..16aa7872e7 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -286,6 +286,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsLogicOp = false; // Dependent on features. config->backend_info.bSupportsLargePoints = false; // Dependent on features. config->backend_info.bSupportsFramebufferFetch = false; // No support. + config->backend_info.bSupportsCoarseDerivatives = true; // Assumed support. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index b2a1166fd6..dd2d9f41ad 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -628,15 +628,35 @@ uint WrapCoord(int coord, uint wrap, int size) {{ BitfieldExtract<&SamplerState::TM1::max_lod>("texmode1")); if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - out.Write(R"( + { + if (g_ActiveConfig.backend_info.bSupportsCoarseDerivatives) + { + // The software renderer uses the equivalent of coarse derivatives, so use them here for + // consistency. This hasn't been hardware tested. + // Note that bSupportsCoarseDerivatives being false only means dFdxCoarse and dFdxFine don't + // exist. The GPU may still implement dFdx using coarse derivatives; we just don't have the + // ability to specifically require it. + out.Write(R"( + float2 uv_delta_x = abs(dFdxCoarse(float2(uv))); + float2 uv_delta_y = abs(dFdyCoarse(float2(uv))); +)"); + } + else + { + out.Write(R"( float2 uv_delta_x = abs(dFdx(float2(uv))); float2 uv_delta_y = abs(dFdy(float2(uv))); )"); + } + } else if (api_type == APIType::D3D) + { + ASSERT(g_ActiveConfig.backend_info.bSupportsCoarseDerivatives); out.Write(R"( - float2 uv_delta_x = abs(ddx(float2(uv))); - float2 uv_delta_y = abs(ddy(float2(uv))); + float2 uv_delta_x = abs(ddx_coarse(float2(uv))); + float2 uv_delta_y = abs(ddy_coarse(float2(uv))); )"); + } // TODO: LOD bias is normally S2.5 (Dolphin uses S7.8 for arbitrary mipmap detection and higher // IRs), but (at least per the software renderer) actual LOD is S28.4. How does this work? diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 495eef6c28..d4307c479a 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -230,6 +230,7 @@ struct VideoConfig final bool bSupportsDepthReadback = false; bool bSupportsShaderBinaries = false; bool bSupportsPipelineCacheData = false; + bool bSupportsCoarseDerivatives = false; } backend_info; // Utility From b9288212a018bfac1bf2239c4dd778ad74bff186 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Tue, 27 Jul 2021 20:20:39 -0700 Subject: [PATCH 10/15] Software: Adjust diagonal LOD implementation This produces behavior matching the behavior on hardware (see Wario's Gold Mine in Mario Kart Wii). --- .../VideoBackends/Software/Rasterizer.cpp | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index df04ab5a31..f886858124 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -171,22 +171,25 @@ static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoor const TexMode1& tm1 = texUnit.texMode1; float sDelta, tDelta; + + float* uv00 = rasterBlock.Pixel[0][0].Uv[texcoord]; + float* uv10 = rasterBlock.Pixel[1][0].Uv[texcoord]; + float* uv01 = rasterBlock.Pixel[0][1].Uv[texcoord]; + + float dudx = fabsf(uv00[0] - uv10[0]); + float dvdx = fabsf(uv00[1] - uv10[1]); + float dudy = fabsf(uv00[0] - uv01[0]); + float dvdy = fabsf(uv00[1] - uv01[1]); + if (tm0.diag_lod == LODType::Diagonal) { - float* uv0 = rasterBlock.Pixel[0][0].Uv[texcoord]; - float* uv1 = rasterBlock.Pixel[1][1].Uv[texcoord]; - - sDelta = fabsf(uv0[0] - uv1[0]); - tDelta = fabsf(uv0[1] - uv1[1]); + sDelta = dudx + dudy; + tDelta = dvdx + dvdy; } else { - float* uv0 = rasterBlock.Pixel[0][0].Uv[texcoord]; - float* uv1 = rasterBlock.Pixel[1][0].Uv[texcoord]; - float* uv2 = rasterBlock.Pixel[0][1].Uv[texcoord]; - - sDelta = std::max(fabsf(uv0[0] - uv1[0]), fabsf(uv0[0] - uv2[0])); - tDelta = std::max(fabsf(uv0[1] - uv1[1]), fabsf(uv0[1] - uv2[1])); + sDelta = std::max(dudx, dudy); + tDelta = std::max(dvdx, dvdy); } // get LOD in s28.4 From ee80298ca4b4fb810b02334508e11c5405aa77d2 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 16 Oct 2021 12:41:46 -0700 Subject: [PATCH 11/15] VideoCommon: Implement diagonal LOD Note that both GLSL and HLSL provide a fwidth (fragment width) function defined as `fwidth(p) = abs(dFdx(p)) + abs(dFdy(p))`. However, it's easy enough to implement this ourselves (and it makes the code a bit more obvious). --- Source/Core/VideoCommon/PixelShaderGen.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index dd2d9f41ad..bff76d4246 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -662,9 +662,8 @@ uint WrapCoord(int coord, uint wrap, int size) {{ // IRs), but (at least per the software renderer) actual LOD is S28.4. How does this work? // Also, note that we can make some assumptions due to use of a SamplerState version of the BP // configuration, which tidies things compared to whatever nonsense games can put in. - // TODO: This doesn't support diagonal LOD out.Write(R"( - float2 uv_delta = max(uv_delta_x, uv_delta_y); + float2 uv_delta = diag_lod ? uv_delta_x + uv_delta_y : max(uv_delta_x, uv_delta_y); float max_delta = max(uv_delta.x / 128.0, uv_delta.y / 128.0); // log2(x) is undefined if x <= 0, but in practice it seems log2(0) is -infinity, which becomes INT_MIN. // If lod_bias is negative, adding it to INT_MIN causes an underflow, resulting in a large positive value. From 93eea7cb1382cee1a265f330b824dcf1ca65294c Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 29 Jul 2021 17:43:35 -0700 Subject: [PATCH 12/15] VideoCommon: Add option to use old behavior (Fast Texture Sampling) Co-authored-by: JosJuice --- .../features/settings/model/BooleanSetting.java | 2 ++ .../settings/ui/SettingsFragmentPresenter.java | 2 ++ .../Android/app/src/main/res/values/strings.xml | 2 ++ Source/Core/Core/Config/GraphicsSettings.cpp | 2 ++ Source/Core/Core/Config/GraphicsSettings.h | 1 + .../DolphinQt/Config/Graphics/HacksWidget.cpp | 15 +++++++++++++++ .../Core/DolphinQt/Config/Graphics/HacksWidget.h | 3 ++- Source/Core/VideoCommon/PixelShaderGen.cpp | 16 ++++++++++++++++ Source/Core/VideoCommon/ShaderGenCommon.cpp | 1 + Source/Core/VideoCommon/ShaderGenCommon.h | 1 + Source/Core/VideoCommon/VideoConfig.cpp | 1 + Source/Core/VideoCommon/VideoConfig.h | 1 + 12 files changed, 46 insertions(+), 1 deletion(-) diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/model/BooleanSetting.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/model/BooleanSetting.java index ac3f86eb8a..7b488719a6 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/model/BooleanSetting.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/model/BooleanSetting.java @@ -198,6 +198,8 @@ public enum BooleanSetting implements AbstractBooleanSetting GFX_HACK_EFB_EMULATE_FORMAT_CHANGES(Settings.FILE_GFX, Settings.SECTION_GFX_HACKS, "EFBEmulateFormatChanges", false), GFX_HACK_VERTEX_ROUDING(Settings.FILE_GFX, Settings.SECTION_GFX_HACKS, "VertexRounding", false), + GFX_HACK_FAST_TEXTURE_SAMPLING(Settings.FILE_GFX, Settings.SECTION_GFX_HACKS, + "FastTextureSampling", false), LOGGER_WRITE_TO_FILE(Settings.FILE_LOGGER, Settings.SECTION_LOGGER_OPTIONS, "WriteToFile", false), diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/ui/SettingsFragmentPresenter.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/ui/SettingsFragmentPresenter.java index bc23b2042a..3eb95efa68 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/ui/SettingsFragmentPresenter.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/ui/SettingsFragmentPresenter.java @@ -719,6 +719,8 @@ public final class SettingsFragmentPresenter R.string.vertex_rounding, R.string.vertex_rounding_description)); sl.add(new CheckBoxSetting(mContext, BooleanSetting.GFX_SAVE_TEXTURE_CACHE_TO_STATE, R.string.texture_cache_to_state, R.string.texture_cache_to_state_description)); + sl.add(new CheckBoxSetting(mContext, BooleanSetting.GFX_HACK_FAST_TEXTURE_SAMPLING, + R.string.fast_texture_sampling, R.string.fast_texture_sampling_description)); } private void addAdvancedGraphicsSettings(ArrayList sl) diff --git a/Source/Android/app/src/main/res/values/strings.xml b/Source/Android/app/src/main/res/values/strings.xml index efcdf82bd4..664e9e8343 100644 --- a/Source/Android/app/src/main/res/values/strings.xml +++ b/Source/Android/app/src/main/res/values/strings.xml @@ -277,6 +277,8 @@ Rounds 2D vertices to whole pixels. Fixes graphical problems in some games at higher internal resolutions. This setting has no effect when native internal resolution is used. If unsure, leave this unchecked. Save Texture Cache to State Includes the contents of the embedded frame buffer (EFB) and upscaled EFB copies in save states. Fixes missing and/or non-upscaled textures/objects when loading states at the cost of additional save/load time. + Fast Texture Sampling + Use the video backend\'s built-in texture sampling functionality instead of a manual implementation. Aspect Ratio Select what aspect ratio to use when rendering Shader Compilation Mode diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index d1c7b21cf9..d859cab150 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -150,6 +150,8 @@ const Info GFX_HACK_EFB_EMULATE_FORMAT_CHANGES{ const Info GFX_HACK_VERTEX_ROUDING{{System::GFX, "Hacks", "VertexRounding"}, false}; const Info GFX_HACK_MISSING_COLOR_VALUE{{System::GFX, "Hacks", "MissingColorValue"}, 0xFFFFFFFF}; +const Info GFX_HACK_FAST_TEXTURE_SAMPLING{{System::GFX, "Hacks", "FastTextureSampling"}, + false}; // Graphics.GameSpecific diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index cc112722ba..3936b65cd9 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -123,6 +123,7 @@ extern const Info GFX_HACK_COPY_EFB_SCALED; extern const Info GFX_HACK_EFB_EMULATE_FORMAT_CHANGES; extern const Info GFX_HACK_VERTEX_ROUDING; extern const Info GFX_HACK_MISSING_COLOR_VALUE; +extern const Info GFX_HACK_FAST_TEXTURE_SAMPLING; // Graphics.GameSpecific diff --git a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp index 2765e2e2fd..4dc302ce1e 100644 --- a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp @@ -106,11 +106,14 @@ void HacksWidget::CreateWidgets() m_vertex_rounding = new GraphicsBool(tr("Vertex Rounding"), Config::GFX_HACK_VERTEX_ROUDING); m_save_texture_cache_state = new GraphicsBool(tr("Save Texture Cache to State"), Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE); + m_fast_texture_sampling = + new GraphicsBool(tr("Fast Texture Sampling"), Config::GFX_HACK_FAST_TEXTURE_SAMPLING); other_layout->addWidget(m_fast_depth_calculation, 0, 0); other_layout->addWidget(m_disable_bounding_box, 0, 1); other_layout->addWidget(m_vertex_rounding, 1, 0); other_layout->addWidget(m_save_texture_cache_state, 1, 1); + other_layout->addWidget(m_fast_texture_sampling, 2, 0); main_layout->addWidget(efb_box); main_layout->addWidget(texture_cache_box); @@ -276,6 +279,17 @@ void HacksWidget::AddDescriptions() "higher internal resolutions. This setting has no effect when native internal " "resolution is used.

If unsure, leave this " "unchecked."); + static const char TR_FAST_TEXTURE_SAMPLING_DESCRIPTION[] = QT_TR_NOOP( + "Use the video backend's built-in texture sampling functionality instead of a manual " + "implementation.

" + "This setting can cause potentially improve performance, especially at higher internal " + "resolutions; additionally, Anisotropic Filtering currently only works with Fast Texture " + "Sampling.

" + "This comes at the cost of graphical issues in some games on certain GPUs, most commonly " + "vertical lines on FMVs, as well as lack of emulation of texture wrapping special cases " + "(though this also only works at 1x IR or when scaled EFB is disabled, and with custom " + "textures disabled) and worse emulation of Level of Detail calculation.

" + "If unsure, leave this unchecked."); m_skip_efb_cpu->SetDescription(tr(TR_SKIP_EFB_CPU_ACCESS_DESCRIPTION)); m_ignore_format_changes->SetDescription(tr(TR_IGNORE_FORMAT_CHANGE_DESCRIPTION)); @@ -291,6 +305,7 @@ void HacksWidget::AddDescriptions() m_disable_bounding_box->SetDescription(tr(TR_DISABLE_BOUNDINGBOX_DESCRIPTION)); m_save_texture_cache_state->SetDescription(tr(TR_SAVE_TEXTURE_CACHE_TO_STATE_DESCRIPTION)); m_vertex_rounding->SetDescription(tr(TR_VERTEX_ROUNDING_DESCRIPTION)); + m_fast_texture_sampling->SetDescription(tr(TR_FAST_TEXTURE_SAMPLING_DESCRIPTION)); } void HacksWidget::UpdateDeferEFBCopiesEnabled() diff --git a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h index 2af4a12fea..c34cd27ff8 100644 --- a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h @@ -26,6 +26,7 @@ private: GraphicsBool* m_skip_efb_cpu; GraphicsBool* m_ignore_format_changes; GraphicsBool* m_store_efb_copies; + GraphicsBool* m_defer_efb_copies; // Texture Cache QLabel* m_accuracy_label; @@ -42,7 +43,7 @@ private: GraphicsBool* m_disable_bounding_box; GraphicsBool* m_vertex_rounding; GraphicsBool* m_save_texture_cache_state; - GraphicsBool* m_defer_efb_copies; + GraphicsBool* m_fast_texture_sampling; void CreateWidgets(); void ConnectWidgets(); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index bff76d4246..fd9933c2ae 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -537,6 +537,7 @@ void UpdateBoundingBox(float2 rawpos) {{ fmt::arg("efb_height", EFB_HEIGHT), fmt::arg("efb_scale", I_EFBSCALE)); } + if (host_config.manual_texture_sampling) { if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { @@ -596,6 +597,21 @@ uint WrapCoord(int coord, uint wrap, int size) {{ "int2 uv, int layer) {{\n"); } + if (!host_config.manual_texture_sampling) + { + out.Write(" float size_s = float(" I_TEXDIMS "[texmap].x * 128);\n" + " float size_t = float(" I_TEXDIMS "[texmap].y * 128);\n" + " float3 coords = float3(float(uv.x) / size_s, float(uv.y) / size_t, layer);\n"); + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { + out.Write(" return iround(255.0 * texture(tex, coords));\n}}\n"); + } + else if (api_type == APIType::D3D) + { + out.Write(" return iround(255.0 * tex.Sample(tex_samp, coords));\n}}\n"); + } + } + else { out.Write(R"( uint texmode0 = samp_texmode0(texmap); diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 72a7e0d14f..49fd1c9783 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -39,6 +39,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() bits.backend_logic_op = g_ActiveConfig.backend_info.bSupportsLogicOp; bits.backend_palette_conversion = g_ActiveConfig.backend_info.bSupportsPaletteConversion; bits.enable_validation_layer = g_ActiveConfig.bEnableValidationLayer; + bits.manual_texture_sampling = !g_ActiveConfig.bFastTextureSampling; return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index ebdcda262b..46d34e18c6 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -169,6 +169,7 @@ union ShaderHostConfig BitField<21, 1, bool, u32> backend_logic_op; BitField<22, 1, bool, u32> backend_palette_conversion; BitField<23, 1, bool, u32> enable_validation_layer; + BitField<24, 1, bool, u32> manual_texture_sampling; static ShaderHostConfig GetCurrent(); }; diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 255e1722b9..88ec126c7c 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -135,6 +135,7 @@ void VideoConfig::Refresh() bVertexRounding = Config::Get(Config::GFX_HACK_VERTEX_ROUDING); iEFBAccessTileSize = Config::Get(Config::GFX_HACK_EFB_ACCESS_TILE_SIZE); iMissingColorValue = Config::Get(Config::GFX_HACK_MISSING_COLOR_VALUE); + bFastTextureSampling = Config::Get(Config::GFX_HACK_FAST_TEXTURE_SAMPLING); bPerfQueriesEnable = Config::Get(Config::GFX_PERF_QUERIES_ENABLE); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index d4307c479a..40922b3ecf 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -135,6 +135,7 @@ struct VideoConfig final int iLog = 0; // CONF_ bits int iSaveTargetId = 0; // TODO: Should be dropped u32 iMissingColorValue = 0; + bool bFastTextureSampling = false; // Stereoscopy StereoMode stereo_mode{}; From bdcfb311872481280b947703fedb04e7d18f2231 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 1 Aug 2021 17:31:40 -0700 Subject: [PATCH 13/15] VideoCommon: Handle custom texture sizes correctly Specifically, when using Manual Texture Sampling, if textures sizes don't match the size the game specifies, things previously broke. That can happen with custom textures, and also with scaled EFB copies at non-native IRs. It breaks most obviously by not scaling the texture coordinates (so only part of the texture shows up), but the hardware wrapping functionality also assumes texture sizes are a power of 2 (or else it will behave weirdly in a way that matches how hardware behaves weirdly). The fix is to provide alternative texture wrapping logic when custom texture sizes are possible. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 84 ++++++++++++++++++++- Source/Core/VideoCommon/ShaderGenCommon.cpp | 2 + Source/Core/VideoCommon/ShaderGenCommon.h | 1 + Source/Core/VideoCommon/VideoConfig.h | 10 +++ 4 files changed, 93 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index fd9933c2ae..7ad7f54366 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -568,7 +568,44 @@ int4 readTextureLinear(in Texture2DArray tex, uint2 uv1, uint2 uv2, int layer, i }} )"); - out.Write(R"( + if (host_config.manual_texture_sampling_custom_texture_sizes) + { + // This is slower, and doesn't result in the same odd behavior that happens on console when + // wrapping with non-power-of-2 sizes, but it's fine for custom textures to have non-console + // behavior. + out.Write(R"( +// Both GLSL and HLSL produce undefined values when the modulo operator (%) is used with a negative +// dividend and a positive divisor. We want a positive value such that SafeModulo(-1, 3) is 2. +int SafeModulo(int dividend, int divisor) {{ + if (dividend >= 0) {{ + return dividend % divisor; + }} else {{ + // This works because ~x is the same as -x - 1. + // `~x % 5` over -5 to -1 gives 4, 3, 2, 1, 0. `4 - (~x % 5)` gives 0, 1, 2, 3, 4. + return (divisor - 1) - (~dividend % divisor); + }} +}} + +uint WrapCoord(int coord, uint wrap, int size) {{ + switch (wrap) {{ + case {:s}: + default: // confirmed that clamp is used for invalid (3) via hardware test + return uint(clamp(coord, 0, size - 1)); + case {:s}: + return uint(SafeModulo(coord, size)); // coord % size + case {:s}: + if (SafeModulo(coord, 2 * size) >= size) {{ // coord % (2 * size) + coord = ~coord; + }} + return uint(SafeModulo(coord, size)); // coord % size + }} +}} +)", + WrapMode::Clamp, WrapMode::Repeat, WrapMode::Mirror); + } + else + { + out.Write(R"( uint WrapCoord(int coord, uint wrap, int size) {{ switch (wrap) {{ case {:s}: @@ -584,7 +621,8 @@ uint WrapCoord(int coord, uint wrap, int size) {{ }} }} )", - WrapMode::Clamp, WrapMode::Repeat, WrapMode::Mirror); + WrapMode::Clamp, WrapMode::Repeat, WrapMode::Mirror); + } } if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) @@ -616,8 +654,6 @@ uint WrapCoord(int coord, uint wrap, int size) {{ out.Write(R"( uint texmode0 = samp_texmode0(texmap); uint texmode1 = samp_texmode1(texmap); - int size_s = )" I_TEXDIMS R"([texmap].x; - int size_t = )" I_TEXDIMS R"([texmap].y; uint wrap_s = {}; uint wrap_t = {}; @@ -643,6 +679,46 @@ uint WrapCoord(int coord, uint wrap, int size) {{ BitfieldExtract<&SamplerState::TM1::min_lod>("texmode1"), BitfieldExtract<&SamplerState::TM1::max_lod>("texmode1")); + if (host_config.manual_texture_sampling_custom_texture_sizes) + { + out.Write(R"( + int native_size_s = )" I_TEXDIMS R"([texmap].x; + int native_size_t = )" I_TEXDIMS R"([texmap].y; +)"); + + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { + out.Write(R"( + int3 size = textureSize(tex, 0); + int size_s = size.x; + int size_t = size.y; + int number_of_levels = textureQueryLevels(tex); +)"); + } + else if (api_type == APIType::D3D) + { + out.Write(R"( + int size_s, size_t, layers, number_of_levels; + tex.GetDimensions(0, size_s, size_t, layers, number_of_levels); +)"); + } + + out.Write(R"( + // Prevent out-of-bounds LOD values when using custom textures + max_lod = min(max_lod, (number_of_levels - 1) << 4); + // Rescale uv to account for the new texture size + uv.x = (uv.x * size_s) / native_size_s; + uv.y = (uv.y * size_t) / native_size_t; +)"); + } + else + { + out.Write(R"( + int size_s = )" I_TEXDIMS R"([texmap].x; + int size_t = )" I_TEXDIMS R"([texmap].y; +)"); + } + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { if (g_ActiveConfig.backend_info.bSupportsCoarseDerivatives) diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 49fd1c9783..34921f9116 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -40,6 +40,8 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() bits.backend_palette_conversion = g_ActiveConfig.backend_info.bSupportsPaletteConversion; bits.enable_validation_layer = g_ActiveConfig.bEnableValidationLayer; bits.manual_texture_sampling = !g_ActiveConfig.bFastTextureSampling; + bits.manual_texture_sampling_custom_texture_sizes = + g_ActiveConfig.ManualTextureSamplingWithHiResTextures(); return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 46d34e18c6..367a472294 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -170,6 +170,7 @@ union ShaderHostConfig BitField<22, 1, bool, u32> backend_palette_conversion; BitField<23, 1, bool, u32> enable_validation_layer; BitField<24, 1, bool, u32> manual_texture_sampling; + BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes; static ShaderHostConfig GetCurrent(); }; diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 40922b3ecf..c4e2a7ed85 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -245,6 +245,16 @@ struct VideoConfig final return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding; } bool UseVertexRounding() const { return bVertexRounding && iEFBScale != 1; } + bool ManualTextureSamplingWithHiResTextures() const + { + // Hi-res textures (including hi-res EFB copies, but not native-resolution EFB copies at higher + // internal resolutions) breaks the wrapping logic used by manual texture sampling. + if (bFastTextureSampling) + return false; + if (iEFBScale != 1 && bCopyEFBScaled) + return true; + return bHiresTextures; + } bool UsingUberShaders() const; u32 GetShaderCompilerThreads() const; u32 GetShaderPrecompilerThreads() const; From 1adff1c46757f91c343c3f238511e86156314388 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 13 Nov 2021 20:10:55 -0800 Subject: [PATCH 14/15] VideoCommon: Skip textureQueryLevels if it doesn't exist --- Source/Core/VideoBackends/D3D/D3DMain.cpp | 1 + Source/Core/VideoBackends/D3D12/VideoBackend.cpp | 1 + Source/Core/VideoBackends/Null/NullBackend.cpp | 1 + Source/Core/VideoBackends/OGL/OGLMain.cpp | 1 + Source/Core/VideoBackends/OGL/OGLRender.cpp | 2 ++ Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp | 4 ++++ Source/Core/VideoBackends/Software/SWmain.cpp | 1 + Source/Core/VideoBackends/Vulkan/VulkanContext.cpp | 1 + Source/Core/VideoCommon/PixelShaderGen.cpp | 12 +++++++++++- Source/Core/VideoCommon/VideoConfig.h | 1 + 10 files changed, 24 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoBackends/D3D/D3DMain.cpp b/Source/Core/VideoBackends/D3D/D3DMain.cpp index b217218986..f101f60321 100644 --- a/Source/Core/VideoBackends/D3D/D3DMain.cpp +++ b/Source/Core/VideoBackends/D3D/D3DMain.cpp @@ -107,6 +107,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsShaderBinaries = true; g_Config.backend_info.bSupportsPipelineCacheData = false; g_Config.backend_info.bSupportsCoarseDerivatives = true; + g_Config.backend_info.bSupportsTextureQueryLevels = true; g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter); g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames(); diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp index 3a33832dc1..a82dd428a5 100644 --- a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp @@ -83,6 +83,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsShaderBinaries = true; g_Config.backend_info.bSupportsPipelineCacheData = true; g_Config.backend_info.bSupportsCoarseDerivatives = true; + g_Config.backend_info.bSupportsTextureQueryLevels = true; // We can only check texture support once we have a device. if (g_dx_context) diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index 17e31c4418..78ce7f1afe 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -56,6 +56,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsShaderBinaries = false; g_Config.backend_info.bSupportsPipelineCacheData = false; g_Config.backend_info.bSupportsCoarseDerivatives = false; + g_Config.backend_info.bSupportsTextureQueryLevels = false; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLMain.cpp b/Source/Core/VideoBackends/OGL/OGLMain.cpp index 375b1d12c7..537058a565 100644 --- a/Source/Core/VideoBackends/OGL/OGLMain.cpp +++ b/Source/Core/VideoBackends/OGL/OGLMain.cpp @@ -108,6 +108,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsCoarseDerivatives = false; + g_Config.backend_info.bSupportsTextureQueryLevels = false; g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 76064920f2..2fc307cb9c 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -485,6 +485,8 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ GLExtensions::Supports("GL_ARB_texture_compression_bptc"); g_Config.backend_info.bSupportsCoarseDerivatives = GLExtensions::Supports("GL_ARB_derivative_control") || GLExtensions::Version() >= 450; + g_Config.backend_info.bSupportsTextureQueryLevels = + GLExtensions::Supports("GL_ARB_texture_query_levels") || GLExtensions::Version() >= 430; if (m_main_gl_context->IsGLES()) { diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 8aba81b7ed..492d4b956c 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -748,6 +748,7 @@ void ProgramShaderCache::CreateHeader() "%s\n" // shader framebuffer fetch "%s\n" // shader thread shuffle "%s\n" // derivative control + "%s\n" // query levels // Precision defines for GLSL ES "%s\n" @@ -830,6 +831,9 @@ void ProgramShaderCache::CreateHeader() g_ActiveConfig.backend_info.bSupportsCoarseDerivatives ? "#extension GL_ARB_derivative_control : enable" : "", + g_ActiveConfig.backend_info.bSupportsTextureQueryLevels ? + "#extension GL_ARB_texture_query_levels : enable" : + "", is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "", (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index ca1f4304dd..fa6f89ba2d 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -85,6 +85,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsPipelineCacheData = false; g_Config.backend_info.bSupportsBBox = true; g_Config.backend_info.bSupportsCoarseDerivatives = false; + g_Config.backend_info.bSupportsTextureQueryLevels = false; // aamodes g_Config.backend_info.AAModes = {1}; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 16aa7872e7..a571c6c38d 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -287,6 +287,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsLargePoints = false; // Dependent on features. config->backend_info.bSupportsFramebufferFetch = false; // No support. config->backend_info.bSupportsCoarseDerivatives = true; // Assumed support. + config->backend_info.bSupportsTextureQueryLevels = true; // Assumed support. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 7ad7f54366..6261f57812 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -692,11 +692,21 @@ uint WrapCoord(int coord, uint wrap, int size) {{ int3 size = textureSize(tex, 0); int size_s = size.x; int size_t = size.y; - int number_of_levels = textureQueryLevels(tex); )"); + if (g_ActiveConfig.backend_info.bSupportsTextureQueryLevels) + { + out.Write(" int number_of_levels = textureQueryLevels(tex);\n"); + } + else + { + out.Write(" int number_of_levels = 256; // textureQueryLevels is not supported\n"); + ERROR_LOG_FMT(VIDEO, "textureQueryLevels is not supported! Odd graphical results may " + "occur if custom textures are in use!"); + } } else if (api_type == APIType::D3D) { + ASSERT(g_ActiveConfig.backend_info.bSupportsTextureQueryLevels); out.Write(R"( int size_s, size_t, layers, number_of_levels; tex.GetDimensions(0, size_s, size_t, layers, number_of_levels); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index c4e2a7ed85..395b0f6a94 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -232,6 +232,7 @@ struct VideoConfig final bool bSupportsShaderBinaries = false; bool bSupportsPipelineCacheData = false; bool bSupportsCoarseDerivatives = false; + bool bSupportsTextureQueryLevels = false; } backend_info; // Utility From 95b99410443e7ae47df1f3e9436e275c80dd3678 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Fri, 15 Oct 2021 11:57:53 -0700 Subject: [PATCH 15/15] Use Fast Texture Sampling by default This commit changes the default value of Fast Texture Sampling to true, and also moves the setting that controls it to the experimental section of the advanced tab. This is its own commit so that it can be easily reverted when we want to default to Manual Texture Sampling. Co-authored-by: JosJuice --- .../features/settings/model/BooleanSetting.java | 2 +- .../settings/ui/SettingsFragmentPresenter.java | 4 ++-- .../Android/app/src/main/res/values/strings.xml | 4 ++-- Source/Core/Core/Config/GraphicsSettings.cpp | 2 +- .../DolphinQt/Config/Graphics/AdvancedWidget.cpp | 15 +++++++++++++++ .../DolphinQt/Config/Graphics/AdvancedWidget.h | 1 + .../DolphinQt/Config/Graphics/HacksWidget.cpp | 15 --------------- .../Core/DolphinQt/Config/Graphics/HacksWidget.h | 1 - 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/model/BooleanSetting.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/model/BooleanSetting.java index 7b488719a6..7423bf3db6 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/model/BooleanSetting.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/model/BooleanSetting.java @@ -199,7 +199,7 @@ public enum BooleanSetting implements AbstractBooleanSetting "EFBEmulateFormatChanges", false), GFX_HACK_VERTEX_ROUDING(Settings.FILE_GFX, Settings.SECTION_GFX_HACKS, "VertexRounding", false), GFX_HACK_FAST_TEXTURE_SAMPLING(Settings.FILE_GFX, Settings.SECTION_GFX_HACKS, - "FastTextureSampling", false), + "FastTextureSampling", true), LOGGER_WRITE_TO_FILE(Settings.FILE_LOGGER, Settings.SECTION_LOGGER_OPTIONS, "WriteToFile", false), diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/ui/SettingsFragmentPresenter.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/ui/SettingsFragmentPresenter.java index 3eb95efa68..c9f4a31712 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/ui/SettingsFragmentPresenter.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/features/settings/ui/SettingsFragmentPresenter.java @@ -719,8 +719,6 @@ public final class SettingsFragmentPresenter R.string.vertex_rounding, R.string.vertex_rounding_description)); sl.add(new CheckBoxSetting(mContext, BooleanSetting.GFX_SAVE_TEXTURE_CACHE_TO_STATE, R.string.texture_cache_to_state, R.string.texture_cache_to_state_description)); - sl.add(new CheckBoxSetting(mContext, BooleanSetting.GFX_HACK_FAST_TEXTURE_SAMPLING, - R.string.fast_texture_sampling, R.string.fast_texture_sampling_description)); } private void addAdvancedGraphicsSettings(ArrayList sl) @@ -746,6 +744,8 @@ public final class SettingsFragmentPresenter R.string.backend_multithreading, R.string.backend_multithreading_description)); sl.add(new CheckBoxSetting(mContext, BooleanSetting.GFX_HACK_EFB_DEFER_INVALIDATION, R.string.defer_efb_invalidation, R.string.defer_efb_invalidation_description)); + sl.add(new InvertedCheckBoxSetting(mContext, BooleanSetting.GFX_HACK_FAST_TEXTURE_SAMPLING, + R.string.manual_texture_sampling, R.string.manual_texture_sampling_description)); sl.add(new CheckBoxSetting(mContext, BooleanSetting.GFX_INTERNAL_RESOLUTION_FRAME_DUMPS, R.string.internal_resolution_dumps, R.string.internal_resolution_dumps_description)); diff --git a/Source/Android/app/src/main/res/values/strings.xml b/Source/Android/app/src/main/res/values/strings.xml index 664e9e8343..c424629dde 100644 --- a/Source/Android/app/src/main/res/values/strings.xml +++ b/Source/Android/app/src/main/res/values/strings.xml @@ -277,8 +277,6 @@ Rounds 2D vertices to whole pixels. Fixes graphical problems in some games at higher internal resolutions. This setting has no effect when native internal resolution is used. If unsure, leave this unchecked. Save Texture Cache to State Includes the contents of the embedded frame buffer (EFB) and upscaled EFB copies in save states. Fixes missing and/or non-upscaled textures/objects when loading states at the cost of additional save/load time. - Fast Texture Sampling - Use the video backend\'s built-in texture sampling functionality instead of a manual implementation. Aspect Ratio Select what aspect ratio to use when rendering Shader Compilation Mode @@ -305,6 +303,8 @@ Enables graphics backend multithreading (Vulkan only). May affect performance. If unsure, leave this unchecked. Defer EFB Cache Invalidation Defers invalidation of the EFB access cache until a GPU synchronization command is executed. May improve performance in some games at the cost of stability. If unsure, leave this unchecked. + Manual Texture Sampling + Use a manual implementation of texture sampling instead of the graphics backend\'s built-in functionality. Dump Frames at Internal Resolution Creates frame dumps and screenshots at the internal resolution of the renderer, rather than the size of the window it is displayed within. If the aspect ratio is widescreen, the output image will be scaled horizontally to preserve the vertical resolution. Debugging diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index d859cab150..4ce7c1b3a8 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -151,7 +151,7 @@ const Info GFX_HACK_VERTEX_ROUDING{{System::GFX, "Hacks", "VertexRounding" const Info GFX_HACK_MISSING_COLOR_VALUE{{System::GFX, "Hacks", "MissingColorValue"}, 0xFFFFFFFF}; const Info GFX_HACK_FAST_TEXTURE_SAMPLING{{System::GFX, "Hacks", "FastTextureSampling"}, - false}; + true}; // Graphics.GameSpecific diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp index 3a62620f1c..332927ea10 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp @@ -138,8 +138,11 @@ void AdvancedWidget::CreateWidgets() m_defer_efb_access_invalidation = new GraphicsBool(tr("Defer EFB Cache Invalidation"), Config::GFX_HACK_EFB_DEFER_INVALIDATION); + m_manual_texture_sampling = + new GraphicsBool(tr("Manual Texture Sampling"), Config::GFX_HACK_FAST_TEXTURE_SAMPLING, true); experimental_layout->addWidget(m_defer_efb_access_invalidation, 0, 0); + experimental_layout->addWidget(m_manual_texture_sampling, 0, 1); main_layout->addWidget(debugging_box); main_layout->addWidget(utility_box); @@ -266,6 +269,17 @@ void AdvancedWidget::AddDescriptions() "

May improve performance in some games which rely on CPU EFB Access at the cost " "of stability.

If unsure, leave this " "unchecked."); + static const char TR_MANUAL_TEXTURE_SAMPLING_DESCRIPTION[] = QT_TR_NOOP( + "Use a manual implementation of texture sampling instead of the graphics backend's built-in " + "functionality.

" + "This setting can fix graphical issues in some games on certain GPUs, most commonly vertical " + "lines on FMVs. In addition to this, enabling Manual Texture Sampling will allow for correct " + "emulation of texture wrapping special cases (at 1x IR or when scaled EFB is disabled, and " + "with custom textures disabled) and better emulates Level of Detail calculation.

" + "This comes at the cost of potentially worse performance, especially at higher internal " + "resolutions; additionally, Anisotropic Filtering is currently incompatible with Manual " + "Texture Sampling.

" + "If unsure, leave this unchecked."); #ifdef _WIN32 static const char TR_BORDERLESS_FULLSCREEN_DESCRIPTION[] = QT_TR_NOOP( @@ -299,4 +313,5 @@ void AdvancedWidget::AddDescriptions() m_borderless_fullscreen->SetDescription(tr(TR_BORDERLESS_FULLSCREEN_DESCRIPTION)); #endif m_defer_efb_access_invalidation->SetDescription(tr(TR_DEFER_EFB_ACCESS_INVALIDATION_DESCRIPTION)); + m_manual_texture_sampling->SetDescription(tr(TR_MANUAL_TEXTURE_SAMPLING_DESCRIPTION)); } diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h index abda8395f7..805f1e54b3 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h @@ -61,4 +61,5 @@ private: // Experimental GraphicsBool* m_defer_efb_access_invalidation; + GraphicsBool* m_manual_texture_sampling; }; diff --git a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp index 4dc302ce1e..2765e2e2fd 100644 --- a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.cpp @@ -106,14 +106,11 @@ void HacksWidget::CreateWidgets() m_vertex_rounding = new GraphicsBool(tr("Vertex Rounding"), Config::GFX_HACK_VERTEX_ROUDING); m_save_texture_cache_state = new GraphicsBool(tr("Save Texture Cache to State"), Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE); - m_fast_texture_sampling = - new GraphicsBool(tr("Fast Texture Sampling"), Config::GFX_HACK_FAST_TEXTURE_SAMPLING); other_layout->addWidget(m_fast_depth_calculation, 0, 0); other_layout->addWidget(m_disable_bounding_box, 0, 1); other_layout->addWidget(m_vertex_rounding, 1, 0); other_layout->addWidget(m_save_texture_cache_state, 1, 1); - other_layout->addWidget(m_fast_texture_sampling, 2, 0); main_layout->addWidget(efb_box); main_layout->addWidget(texture_cache_box); @@ -279,17 +276,6 @@ void HacksWidget::AddDescriptions() "higher internal resolutions. This setting has no effect when native internal " "resolution is used.

If unsure, leave this " "unchecked."); - static const char TR_FAST_TEXTURE_SAMPLING_DESCRIPTION[] = QT_TR_NOOP( - "Use the video backend's built-in texture sampling functionality instead of a manual " - "implementation.

" - "This setting can cause potentially improve performance, especially at higher internal " - "resolutions; additionally, Anisotropic Filtering currently only works with Fast Texture " - "Sampling.

" - "This comes at the cost of graphical issues in some games on certain GPUs, most commonly " - "vertical lines on FMVs, as well as lack of emulation of texture wrapping special cases " - "(though this also only works at 1x IR or when scaled EFB is disabled, and with custom " - "textures disabled) and worse emulation of Level of Detail calculation.

" - "If unsure, leave this unchecked."); m_skip_efb_cpu->SetDescription(tr(TR_SKIP_EFB_CPU_ACCESS_DESCRIPTION)); m_ignore_format_changes->SetDescription(tr(TR_IGNORE_FORMAT_CHANGE_DESCRIPTION)); @@ -305,7 +291,6 @@ void HacksWidget::AddDescriptions() m_disable_bounding_box->SetDescription(tr(TR_DISABLE_BOUNDINGBOX_DESCRIPTION)); m_save_texture_cache_state->SetDescription(tr(TR_SAVE_TEXTURE_CACHE_TO_STATE_DESCRIPTION)); m_vertex_rounding->SetDescription(tr(TR_VERTEX_ROUNDING_DESCRIPTION)); - m_fast_texture_sampling->SetDescription(tr(TR_FAST_TEXTURE_SAMPLING_DESCRIPTION)); } void HacksWidget::UpdateDeferEFBCopiesEnabled() diff --git a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h index c34cd27ff8..490a5b4f7e 100644 --- a/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/HacksWidget.h @@ -43,7 +43,6 @@ private: GraphicsBool* m_disable_bounding_box; GraphicsBool* m_vertex_rounding; GraphicsBool* m_save_texture_cache_state; - GraphicsBool* m_fast_texture_sampling; void CreateWidgets(); void ConnectWidgets();