mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-10 16:19:28 +01:00
VideoCommon: Better driver bug handling
Adds a pass to process driver deficiencies between UID caching and use, allowing a full view of the whole pipeline, since some bugs/workarounds involve interactions between blend modes and the pixel shader
This commit is contained in:
parent
99eef44765
commit
6ab24e6c17
@ -1168,19 +1168,8 @@ void Renderer::ApplyBlendingState(const BlendingState state)
|
|||||||
if (m_current_blend_state == state)
|
if (m_current_blend_state == state)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bool useDualSource =
|
bool useDualSource = state.usedualsrc;
|
||||||
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
|
|
||||||
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
|
|
||||||
// Only use shader blend if we need to and we don't support dual-source blending directly
|
|
||||||
bool useShaderBlend = !useDualSource && state.usedualsrc && state.dstalpha &&
|
|
||||||
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
|
|
||||||
|
|
||||||
if (useShaderBlend)
|
|
||||||
{
|
|
||||||
glDisable(GL_BLEND);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const GLenum src_factors[8] = {GL_ZERO,
|
const GLenum src_factors[8] = {GL_ZERO,
|
||||||
GL_ONE,
|
GL_ONE,
|
||||||
GL_DST_COLOR,
|
GL_DST_COLOR,
|
||||||
@ -1216,7 +1205,6 @@ void Renderer::ApplyBlendingState(const BlendingState state)
|
|||||||
dst_factors[u32(state.dstfactor.Value())],
|
dst_factors[u32(state.dstfactor.Value())],
|
||||||
src_factors[u32(state.srcfactoralpha.Value())],
|
src_factors[u32(state.srcfactoralpha.Value())],
|
||||||
dst_factors[u32(state.dstfactoralpha.Value())]);
|
dst_factors[u32(state.dstfactoralpha.Value())]);
|
||||||
}
|
|
||||||
|
|
||||||
const GLenum logic_op_codes[16] = {
|
const GLenum logic_op_codes[16] = {
|
||||||
GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP,
|
GL_CLEAR, GL_AND, GL_AND_REVERSE, GL_COPY, GL_AND_INVERTED, GL_NOOP,
|
||||||
|
@ -153,7 +153,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
|
|||||||
|
|
||||||
s32 z = (s32)std::clamp<float>(ZSlope.GetValue(x, y), 0.0f, 16777215.0f);
|
s32 z = (s32)std::clamp<float>(ZSlope.GetValue(x, y), 0.0f, 16777215.0f);
|
||||||
|
|
||||||
if (bpmem.UseEarlyDepthTest())
|
if (bpmem.GetEmulatedZ() == EmulatedZ::Early)
|
||||||
{
|
{
|
||||||
// TODO: Test if perf regs are incremented even if test is disabled
|
// TODO: Test if perf regs are incremented even if test is disabled
|
||||||
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC);
|
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC);
|
||||||
|
@ -840,7 +840,7 @@ void Tev::Draw()
|
|||||||
output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8;
|
output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bpmem.UseLateDepthTest())
|
if (bpmem.GetEmulatedZ() == EmulatedZ::Late)
|
||||||
{
|
{
|
||||||
// TODO: Check against hw if these values get incremented even if depth testing is disabled
|
// TODO: Check against hw if these values get incremented even if depth testing is disabled
|
||||||
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT);
|
EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT);
|
||||||
|
@ -137,19 +137,8 @@ GetVulkanAttachmentBlendState(const BlendingState& state, AbstractPipelineUsage
|
|||||||
{
|
{
|
||||||
VkPipelineColorBlendAttachmentState vk_state = {};
|
VkPipelineColorBlendAttachmentState vk_state = {};
|
||||||
|
|
||||||
bool use_dual_source =
|
bool use_dual_source = state.usedualsrc;
|
||||||
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
|
|
||||||
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
|
|
||||||
bool use_shader_blend = !use_dual_source && state.usedualsrc && state.dstalpha &&
|
|
||||||
g_ActiveConfig.backend_info.bSupportsFramebufferFetch;
|
|
||||||
|
|
||||||
if (use_shader_blend || (usage == AbstractPipelineUsage::GX &&
|
|
||||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)))
|
|
||||||
{
|
|
||||||
vk_state.blendEnable = VK_FALSE;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
vk_state.blendEnable = static_cast<VkBool32>(state.blendenable);
|
vk_state.blendEnable = static_cast<VkBool32>(state.blendenable);
|
||||||
vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
|
vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
|
||||||
vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
|
vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD;
|
||||||
@ -191,7 +180,6 @@ GetVulkanAttachmentBlendState(const BlendingState& state, AbstractPipelineUsage
|
|||||||
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
|
vk_state.dstColorBlendFactor = dst_factors[u32(state.dstfactor.Value())];
|
||||||
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
|
vk_state.dstAlphaBlendFactor = dst_factors[u32(state.dstfactoralpha.Value())];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (state.colorupdate)
|
if (state.colorupdate)
|
||||||
{
|
{
|
||||||
|
@ -371,13 +371,6 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD
|
|||||||
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_REVERSED_DEPTH_RANGE))
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_REVERSED_DEPTH_RANGE))
|
||||||
config->backend_info.bSupportsReversedDepthRange = false;
|
config->backend_info.bSupportsReversedDepthRange = false;
|
||||||
|
|
||||||
// Calling discard when early depth test is enabled can break on some Apple Silicon GPU drivers.
|
|
||||||
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))
|
|
||||||
{
|
|
||||||
// We will use shader blending, so disable hardware dual source blending.
|
|
||||||
config->backend_info.bSupportsDualSourceBlend = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dynamic sampler indexing locks up Intel GPUs on MoltenVK/Metal
|
// Dynamic sampler indexing locks up Intel GPUs on MoltenVK/Metal
|
||||||
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING))
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING))
|
||||||
config->backend_info.bSupportsDynamicSamplerIndexing = false;
|
config->backend_info.bSupportsDynamicSamplerIndexing = false;
|
||||||
|
@ -2336,6 +2336,16 @@ struct BPCmd
|
|||||||
int newvalue;
|
int newvalue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class EmulatedZ : u32
|
||||||
|
{
|
||||||
|
Disabled = 0,
|
||||||
|
Early = 1,
|
||||||
|
Late = 2,
|
||||||
|
ForcedEarly = 3,
|
||||||
|
EarlyWithFBFetch = 4,
|
||||||
|
EarlyWithZComplocHack = 5,
|
||||||
|
};
|
||||||
|
|
||||||
struct BPMemory
|
struct BPMemory
|
||||||
{
|
{
|
||||||
GenMode genMode;
|
GenMode genMode;
|
||||||
@ -2403,8 +2413,15 @@ struct BPMemory
|
|||||||
u32 bpMask; // 0xFE
|
u32 bpMask; // 0xFE
|
||||||
u32 unknown18; // ff
|
u32 unknown18; // ff
|
||||||
|
|
||||||
bool UseEarlyDepthTest() const { return zcontrol.early_ztest && zmode.testenable; }
|
EmulatedZ GetEmulatedZ() const
|
||||||
bool UseLateDepthTest() const { return !zcontrol.early_ztest && zmode.testenable; }
|
{
|
||||||
|
if (!zmode.testenable)
|
||||||
|
return EmulatedZ::Disabled;
|
||||||
|
if (zcontrol.early_ztest)
|
||||||
|
return EmulatedZ::Early;
|
||||||
|
else
|
||||||
|
return EmulatedZ::Late;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#pragma pack()
|
#pragma pack()
|
||||||
|
@ -237,7 +237,8 @@ enum Bug
|
|||||||
// crash. Sometimes this happens in the kernel mode part of the driver, resulting in a BSOD.
|
// crash. Sometimes this happens in the kernel mode part of the driver, resulting in a BSOD.
|
||||||
// These shaders are also particularly problematic on macOS's Intel drivers. On OpenGL, they can
|
// These shaders are also particularly problematic on macOS's Intel drivers. On OpenGL, they can
|
||||||
// cause depth issues. On Metal, they can cause the driver to not write a primitive to the depth
|
// cause depth issues. On Metal, they can cause the driver to not write a primitive to the depth
|
||||||
// buffer whenever a fragment is discarded. Disable dual-source blending support on these drivers.
|
// buffer if dual source blending is output in the shader but not subsequently used in blending.
|
||||||
|
// Compile separate shaders for DSB on vs off for these drivers.
|
||||||
BUG_BROKEN_DUAL_SOURCE_BLENDING,
|
BUG_BROKEN_DUAL_SOURCE_BLENDING,
|
||||||
|
|
||||||
// BUG: ImgTec GLSL shader compiler fails when negating the input to a bitwise operation
|
// BUG: ImgTec GLSL shader compiler fails when negating the input to a bitwise operation
|
||||||
|
@ -19,7 +19,7 @@ namespace VideoCommon
|
|||||||
// As pipelines encompass both shader UIDs and render states, changes to either of these should
|
// As pipelines encompass both shader UIDs and render states, changes to either of these should
|
||||||
// also increment the pipeline UID version. Incrementing the UID version will cause all UID
|
// also increment the pipeline UID version. Incrementing the UID version will cause all UID
|
||||||
// caches to be invalidated.
|
// caches to be invalidated.
|
||||||
constexpr u32 GX_PIPELINE_UID_VERSION = 4; // Last changed in PR 10215
|
constexpr u32 GX_PIPELINE_UID_VERSION = 5; // Last changed in PR 10747
|
||||||
|
|
||||||
struct GXPipelineUid
|
struct GXPipelineUid
|
||||||
{
|
{
|
||||||
|
@ -167,9 +167,6 @@ constexpr Common::EnumMap<const char*, TevOutput::Color2> tev_a_output_table{
|
|||||||
"c2.a",
|
"c2.a",
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXME: Some of the video card's capabilities (BBox support, EarlyZ support, dstAlpha support)
|
|
||||||
// leak into this UID; This is really unhelpful if these UIDs ever move from one machine to
|
|
||||||
// another.
|
|
||||||
PixelShaderUid GetPixelShaderUid()
|
PixelShaderUid GetPixelShaderUid()
|
||||||
{
|
{
|
||||||
PixelShaderUid out;
|
PixelShaderUid out;
|
||||||
@ -189,20 +186,25 @@ PixelShaderUid GetPixelShaderUid()
|
|||||||
|
|
||||||
u32 numStages = uid_data->genMode_numtevstages + 1;
|
u32 numStages = uid_data->genMode_numtevstages + 1;
|
||||||
|
|
||||||
const bool forced_early_z =
|
uid_data->Pretest = bpmem.alpha_test.TestResult();
|
||||||
bpmem.UseEarlyDepthTest() &&
|
uid_data->ztest = bpmem.GetEmulatedZ();
|
||||||
|
if (uid_data->ztest == EmulatedZ::Early &&
|
||||||
(g_ActiveConfig.bFastDepthCalc ||
|
(g_ActiveConfig.bFastDepthCalc ||
|
||||||
bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined)
|
bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined)
|
||||||
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
|
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
|
||||||
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
|
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
|
||||||
&& !(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
&& !bpmem.genMode.zfreeze)
|
||||||
|
{
|
||||||
|
uid_data->ztest = EmulatedZ::ForcedEarly;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool forced_early_z = uid_data->ztest == EmulatedZ::ForcedEarly;
|
||||||
const bool per_pixel_depth =
|
const bool per_pixel_depth =
|
||||||
(bpmem.ztex2.op != ZTexOp::Disabled && bpmem.UseLateDepthTest()) ||
|
(bpmem.ztex2.op != ZTexOp::Disabled && uid_data->ztest == EmulatedZ::Late) ||
|
||||||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) ||
|
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) ||
|
||||||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
||||||
|
|
||||||
uid_data->per_pixel_depth = per_pixel_depth;
|
uid_data->per_pixel_depth = per_pixel_depth;
|
||||||
uid_data->forced_early_z = forced_early_z;
|
|
||||||
|
|
||||||
if (g_ActiveConfig.bEnablePixelLighting)
|
if (g_ActiveConfig.bEnablePixelLighting)
|
||||||
{
|
{
|
||||||
@ -285,59 +287,24 @@ PixelShaderUid GetPixelShaderUid()
|
|||||||
sizeof(*uid_data) :
|
sizeof(*uid_data) :
|
||||||
MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]);
|
MY_STRUCT_OFFSET(*uid_data, stagehash[numStages]);
|
||||||
|
|
||||||
uid_data->Pretest = bpmem.alpha_test.TestResult();
|
|
||||||
uid_data->late_ztest = bpmem.UseLateDepthTest();
|
|
||||||
|
|
||||||
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
|
// NOTE: Fragment may not be discarded if alpha test always fails and early depth test is enabled
|
||||||
// (in this case we need to write a depth value if depth test passes regardless of the alpha
|
// (in this case we need to write a depth value if depth test passes regardless of the alpha
|
||||||
// testing result)
|
// testing result)
|
||||||
if (uid_data->Pretest == AlphaTestResult::Undetermined ||
|
if (uid_data->Pretest == AlphaTestResult::Undetermined ||
|
||||||
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->late_ztest))
|
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late))
|
||||||
{
|
{
|
||||||
uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0;
|
uid_data->alpha_test_comp0 = bpmem.alpha_test.comp0;
|
||||||
uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1;
|
uid_data->alpha_test_comp1 = bpmem.alpha_test.comp1;
|
||||||
uid_data->alpha_test_logic = bpmem.alpha_test.logic;
|
uid_data->alpha_test_logic = bpmem.alpha_test.logic;
|
||||||
|
|
||||||
// ZCOMPLOC HACK:
|
|
||||||
// The only way to emulate alpha test + early-z is to force early-z in the shader.
|
|
||||||
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
|
|
||||||
// we are only able to choose which one we want to respect more.
|
|
||||||
// Tests seem to have proven that writing depth even when the alpha test fails is more
|
|
||||||
// important that a reliable alpha test, so we just force the alpha test to always succeed.
|
|
||||||
// At least this seems to be less buggy.
|
|
||||||
uid_data->alpha_test_use_zcomploc_hack =
|
|
||||||
bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable &&
|
|
||||||
!g_ActiveConfig.backend_info.bSupportsEarlyZ && !bpmem.genMode.zfreeze;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uid_data->zfreeze = bpmem.genMode.zfreeze;
|
uid_data->zfreeze = bpmem.genMode.zfreeze;
|
||||||
uid_data->ztex_op = bpmem.ztex2.op;
|
uid_data->ztex_op = bpmem.ztex2.op;
|
||||||
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
|
|
||||||
|
|
||||||
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
|
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
|
||||||
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
|
uid_data->fog_proj = bpmem.fog.c_proj_fsel.proj;
|
||||||
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
|
uid_data->fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled;
|
||||||
|
|
||||||
BlendingState state = {};
|
|
||||||
state.Generate(bpmem);
|
|
||||||
|
|
||||||
if (((state.usedualsrc && state.dstalpha) ||
|
|
||||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) &&
|
|
||||||
g_ActiveConfig.backend_info.bSupportsFramebufferFetch &&
|
|
||||||
!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
|
|
||||||
{
|
|
||||||
uid_data->blend_enable = state.blendenable;
|
|
||||||
uid_data->blend_src_factor = state.srcfactor;
|
|
||||||
uid_data->blend_src_factor_alpha = state.srcfactoralpha;
|
|
||||||
uid_data->blend_dst_factor = state.dstfactor;
|
|
||||||
uid_data->blend_dst_factor_alpha = state.dstfactoralpha;
|
|
||||||
uid_data->blend_subtract = state.subtract;
|
|
||||||
uid_data->blend_subtract_alpha = state.subtractAlpha;
|
|
||||||
}
|
|
||||||
|
|
||||||
uid_data->logic_op_enable = state.logicopenable;
|
|
||||||
uid_data->logic_op_mode = u32(state.logicmode.Value());
|
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -798,7 +765,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||||||
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
|
out.Write("\n#define sampleTextureWrapper(texmap, uv, layer) "
|
||||||
"sampleTexture(texmap, samp[texmap], uv, layer)\n");
|
"sampleTexture(texmap, samp[texmap], uv, layer)\n");
|
||||||
|
|
||||||
if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ)
|
if (uid_data->ztest == EmulatedZ::ForcedEarly)
|
||||||
{
|
{
|
||||||
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before
|
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before
|
||||||
// or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
|
// or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
|
||||||
@ -837,28 +804,15 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||||||
out.Write("FORCE_EARLY_Z; \n");
|
out.Write("FORCE_EARLY_Z; \n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only use dual-source blending when required on drivers that don't support it very well.
|
const bool use_framebuffer_fetch = uid_data->blend_enable || uid_data->logic_op_enable ||
|
||||||
const bool use_dual_source =
|
uid_data->ztest == EmulatedZ::EarlyWithFBFetch;
|
||||||
host_config.backend_dual_source_blend &&
|
|
||||||
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) ||
|
|
||||||
uid_data->useDstAlpha);
|
|
||||||
const bool use_shader_blend =
|
|
||||||
!use_dual_source &&
|
|
||||||
(uid_data->useDstAlpha ||
|
|
||||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z)) &&
|
|
||||||
host_config.backend_shader_framebuffer_fetch;
|
|
||||||
const bool use_shader_logic_op = !host_config.backend_logic_op && uid_data->logic_op_enable &&
|
|
||||||
host_config.backend_shader_framebuffer_fetch;
|
|
||||||
const bool use_framebuffer_fetch =
|
|
||||||
use_shader_blend || use_shader_logic_op ||
|
|
||||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z);
|
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
|
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
|
||||||
// if we want to use it.
|
// if we want to use it.
|
||||||
if (api_type == APIType::Vulkan)
|
if (api_type == APIType::Vulkan)
|
||||||
{
|
{
|
||||||
if (use_dual_source)
|
if (!uid_data->no_dual_src)
|
||||||
{
|
{
|
||||||
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
|
out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 {};\n"
|
||||||
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
|
"FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n",
|
||||||
@ -891,7 +845,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||||||
uid_data->uint_output ? "uvec4" : "vec4",
|
uid_data->uint_output ? "uvec4" : "vec4",
|
||||||
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
|
use_framebuffer_fetch ? "real_ocol0" : "ocol0");
|
||||||
|
|
||||||
if (use_dual_source)
|
if (!uid_data->no_dual_src)
|
||||||
{
|
{
|
||||||
out.Write("{} out {} ocol1;\n",
|
out.Write("{} out {} ocol1;\n",
|
||||||
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(1)" :
|
has_broken_decoration ? "FRAGMENT_OUTPUT_LOCATION(1)" :
|
||||||
@ -960,7 +914,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||||||
out.Write("\tfloat4 ocol0;\n");
|
out.Write("\tfloat4 ocol0;\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_shader_blend)
|
if (uid_data->blend_enable)
|
||||||
{
|
{
|
||||||
out.Write("\tfloat4 ocol1;\n");
|
out.Write("\tfloat4 ocol1;\n");
|
||||||
}
|
}
|
||||||
@ -1086,10 +1040,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||||||
// (in this case we need to write a depth value if depth test passes regardless of the alpha
|
// (in this case we need to write a depth value if depth test passes regardless of the alpha
|
||||||
// testing result)
|
// testing result)
|
||||||
if (uid_data->Pretest == AlphaTestResult::Undetermined ||
|
if (uid_data->Pretest == AlphaTestResult::Undetermined ||
|
||||||
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->late_ztest))
|
(uid_data->Pretest == AlphaTestResult::Fail && uid_data->ztest == EmulatedZ::Late))
|
||||||
{
|
{
|
||||||
WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth,
|
WriteAlphaTest(out, uid_data, api_type, uid_data->per_pixel_depth,
|
||||||
use_dual_source || use_shader_blend);
|
!uid_data->no_dual_src || uid_data->blend_enable);
|
||||||
}
|
}
|
||||||
|
|
||||||
// This situation is important for Mario Kart Wii's menus (they will render incorrectly if the
|
// This situation is important for Mario Kart Wii's menus (they will render incorrectly if the
|
||||||
@ -1144,7 +1098,10 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||||||
const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off;
|
const bool skip_ztexture = !uid_data->per_pixel_depth && uid_data->fog_fsel == FogType::Off;
|
||||||
|
|
||||||
// Note: z-textures are not written to depth buffer if early depth test is used
|
// Note: z-textures are not written to depth buffer if early depth test is used
|
||||||
if (uid_data->per_pixel_depth && uid_data->early_ztest)
|
const bool early_ztest = uid_data->ztest == EmulatedZ::Early ||
|
||||||
|
uid_data->ztest == EmulatedZ::EarlyWithFBFetch ||
|
||||||
|
uid_data->ztest == EmulatedZ::EarlyWithZComplocHack;
|
||||||
|
if (uid_data->per_pixel_depth && early_ztest)
|
||||||
{
|
{
|
||||||
if (!host_config.backend_reversed_depth_range)
|
if (!host_config.backend_reversed_depth_range)
|
||||||
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
|
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
|
||||||
@ -1165,7 +1122,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||||||
out.Write("\tzCoord = zCoord & 0xFFFFFF;\n");
|
out.Write("\tzCoord = zCoord & 0xFFFFFF;\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uid_data->per_pixel_depth && uid_data->late_ztest)
|
if (uid_data->per_pixel_depth && uid_data->ztest == EmulatedZ::Late)
|
||||||
{
|
{
|
||||||
if (!host_config.backend_reversed_depth_range)
|
if (!host_config.backend_reversed_depth_range)
|
||||||
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
|
out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n");
|
||||||
@ -1184,14 +1141,14 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
|
|||||||
|
|
||||||
WriteFog(out, uid_data);
|
WriteFog(out, uid_data);
|
||||||
|
|
||||||
if (use_shader_logic_op)
|
if (uid_data->logic_op_enable)
|
||||||
WriteLogicOp(out, uid_data);
|
WriteLogicOp(out, uid_data);
|
||||||
|
|
||||||
// Write the color and alpha values to the framebuffer
|
// Write the color and alpha values to the framebuffer
|
||||||
// If using shader blend, we still use the separate alpha
|
// If using shader blend, we still use the separate alpha
|
||||||
WriteColor(out, api_type, uid_data, use_dual_source || use_shader_blend);
|
WriteColor(out, api_type, uid_data, !uid_data->no_dual_src || uid_data->blend_enable);
|
||||||
|
|
||||||
if (use_shader_blend)
|
if (uid_data->blend_enable)
|
||||||
WriteBlend(out, uid_data);
|
WriteBlend(out, uid_data);
|
||||||
else if (use_framebuffer_fetch)
|
else if (use_framebuffer_fetch)
|
||||||
out.Write("\treal_ocol0 = ocol0;\n");
|
out.Write("\treal_ocol0 = ocol0;\n");
|
||||||
@ -1728,11 +1685,10 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ZCOMPLOC HACK:
|
// ZCOMPLOC HACK:
|
||||||
if (!uid_data->alpha_test_use_zcomploc_hack)
|
if (uid_data->ztest != EmulatedZ::EarlyWithZComplocHack)
|
||||||
{
|
{
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
if (uid_data->forced_early_z &&
|
if (uid_data->ztest == EmulatedZ::EarlyWithFBFetch)
|
||||||
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z))
|
|
||||||
{
|
{
|
||||||
// Instead of using discard, fetch the framebuffer's color value and use it as the output
|
// Instead of using discard, fetch the framebuffer's color value and use it as the output
|
||||||
// for this fragment.
|
// for this fragment.
|
||||||
|
@ -12,6 +12,7 @@ enum class AlphaTestOp : u32;
|
|||||||
enum class AlphaTestResult;
|
enum class AlphaTestResult;
|
||||||
enum class CompareMode : u32;
|
enum class CompareMode : u32;
|
||||||
enum class DstBlendFactor : u32;
|
enum class DstBlendFactor : u32;
|
||||||
|
enum class EmulatedZ : u32;
|
||||||
enum class FogProjection : u32;
|
enum class FogProjection : u32;
|
||||||
enum class FogType : u32;
|
enum class FogType : u32;
|
||||||
enum class KonstSel : u32;
|
enum class KonstSel : u32;
|
||||||
@ -28,6 +29,7 @@ struct pixel_shader_uid_data
|
|||||||
u32 NumValues() const { return num_values; }
|
u32 NumValues() const { return num_values; }
|
||||||
u32 pad0 : 4;
|
u32 pad0 : 4;
|
||||||
u32 useDstAlpha : 1;
|
u32 useDstAlpha : 1;
|
||||||
|
u32 no_dual_src : 1;
|
||||||
AlphaTestResult Pretest : 2;
|
AlphaTestResult Pretest : 2;
|
||||||
u32 nIndirectStagesUsed : 4;
|
u32 nIndirectStagesUsed : 4;
|
||||||
u32 genMode_numtexgens : 4;
|
u32 genMode_numtexgens : 4;
|
||||||
@ -36,16 +38,13 @@ struct pixel_shader_uid_data
|
|||||||
CompareMode alpha_test_comp0 : 3;
|
CompareMode alpha_test_comp0 : 3;
|
||||||
CompareMode alpha_test_comp1 : 3;
|
CompareMode alpha_test_comp1 : 3;
|
||||||
AlphaTestOp alpha_test_logic : 2;
|
AlphaTestOp alpha_test_logic : 2;
|
||||||
u32 alpha_test_use_zcomploc_hack : 1;
|
|
||||||
FogProjection fog_proj : 1;
|
FogProjection fog_proj : 1;
|
||||||
|
|
||||||
FogType fog_fsel : 3;
|
FogType fog_fsel : 3;
|
||||||
u32 fog_RangeBaseEnabled : 1;
|
u32 fog_RangeBaseEnabled : 1;
|
||||||
ZTexOp ztex_op : 2;
|
ZTexOp ztex_op : 2;
|
||||||
u32 per_pixel_depth : 1;
|
u32 per_pixel_depth : 1;
|
||||||
u32 forced_early_z : 1;
|
EmulatedZ ztest : 3;
|
||||||
u32 early_ztest : 1;
|
|
||||||
u32 late_ztest : 1;
|
|
||||||
u32 bounding_box : 1;
|
u32 bounding_box : 1;
|
||||||
u32 zfreeze : 1;
|
u32 zfreeze : 1;
|
||||||
u32 numColorChans : 2;
|
u32 numColorChans : 2;
|
||||||
|
@ -448,7 +448,7 @@ void PixelShaderManager::SetGenModeChanged()
|
|||||||
|
|
||||||
void PixelShaderManager::SetZModeControl()
|
void PixelShaderManager::SetZModeControl()
|
||||||
{
|
{
|
||||||
u32 late_ztest = bpmem.UseLateDepthTest();
|
u32 late_ztest = bpmem.GetEmulatedZ() == EmulatedZ::Late;
|
||||||
u32 rgba6_format =
|
u32 rgba6_format =
|
||||||
(bpmem.zcontrol.pixel_format == PixelFormat::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor) ?
|
(bpmem.zcontrol.pixel_format == PixelFormat::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor) ?
|
||||||
1 :
|
1 :
|
||||||
|
@ -25,6 +25,34 @@ void DepthState::Generate(const BPMemory& bp)
|
|||||||
func = bp.zmode.func.Value();
|
func = bp.zmode.func.Value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool IsDualSrc(SrcBlendFactor factor)
|
||||||
|
{
|
||||||
|
return factor == SrcBlendFactor::SrcAlpha || factor == SrcBlendFactor::InvSrcAlpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool IsDualSrc(DstBlendFactor factor)
|
||||||
|
{
|
||||||
|
switch (factor)
|
||||||
|
{
|
||||||
|
case DstBlendFactor::SrcClr:
|
||||||
|
case DstBlendFactor::SrcAlpha:
|
||||||
|
case DstBlendFactor::InvSrcClr:
|
||||||
|
case DstBlendFactor::InvSrcAlpha:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BlendingState::RequiresDualSrc() const
|
||||||
|
{
|
||||||
|
bool requires_dual_src = false;
|
||||||
|
requires_dual_src |= IsDualSrc(srcfactor) || IsDualSrc(srcfactoralpha);
|
||||||
|
requires_dual_src |= IsDualSrc(dstfactor) || IsDualSrc(dstfactoralpha);
|
||||||
|
requires_dual_src &= blendenable && usedualsrc;
|
||||||
|
return requires_dual_src;
|
||||||
|
}
|
||||||
|
|
||||||
// If the framebuffer format has no alpha channel, it is assumed to
|
// If the framebuffer format has no alpha channel, it is assumed to
|
||||||
// ONE on blending. As the backends may emulate this framebuffer
|
// ONE on blending. As the backends may emulate this framebuffer
|
||||||
// configuration with an alpha channel, we just drop all references
|
// configuration with an alpha channel, we just drop all references
|
||||||
|
@ -142,6 +142,8 @@ union BlendingState
|
|||||||
BitField<17, 3, SrcBlendFactor> srcfactoralpha;
|
BitField<17, 3, SrcBlendFactor> srcfactoralpha;
|
||||||
BitField<20, 4, LogicOp> logicmode;
|
BitField<20, 4, LogicOp> logicmode;
|
||||||
|
|
||||||
|
bool RequiresDualSrc() const;
|
||||||
|
|
||||||
u32 hex;
|
u32 hex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "Common/MsgHandler.h"
|
#include "Common/MsgHandler.h"
|
||||||
#include "Core/ConfigManager.h"
|
#include "Core/ConfigManager.h"
|
||||||
|
|
||||||
|
#include "VideoCommon/DriverDetails.h"
|
||||||
#include "VideoCommon/FramebufferManager.h"
|
#include "VideoCommon/FramebufferManager.h"
|
||||||
#include "VideoCommon/FramebufferShaderGen.h"
|
#include "VideoCommon/FramebufferShaderGen.h"
|
||||||
#include "VideoCommon/RenderBase.h"
|
#include "VideoCommon/RenderBase.h"
|
||||||
@ -612,8 +613,95 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
|
|||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<AbstractPipelineConfig> ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config)
|
/// Edits the UID based on driver bugs and other special configurations
|
||||||
|
static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in)
|
||||||
{
|
{
|
||||||
|
GXPipelineUid out;
|
||||||
|
memcpy(&out, &in, sizeof(out)); // copy padding
|
||||||
|
pixel_shader_uid_data* ps = out.ps_uid.GetUidData();
|
||||||
|
BlendingState& blend = out.blending_state;
|
||||||
|
|
||||||
|
if (ps->ztest == EmulatedZ::ForcedEarly && !out.depth_state.updateenable)
|
||||||
|
{
|
||||||
|
// No need to force early depth test if you're not writing z
|
||||||
|
ps->ztest = EmulatedZ::Early;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool benefits_from_ps_dual_source_off =
|
||||||
|
(!g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
|
||||||
|
g_ActiveConfig.backend_info.bSupportsFramebufferFetch) ||
|
||||||
|
DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING);
|
||||||
|
if (benefits_from_ps_dual_source_off && !blend.RequiresDualSrc())
|
||||||
|
{
|
||||||
|
// Only use dual-source blending when required on drivers that don't support it very well.
|
||||||
|
ps->no_dual_src = true;
|
||||||
|
blend.usedualsrc = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
|
||||||
|
{
|
||||||
|
bool fbfetch_blend = false;
|
||||||
|
if ((DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DISCARD_WITH_EARLY_Z) ||
|
||||||
|
!g_ActiveConfig.backend_info.bSupportsEarlyZ) &&
|
||||||
|
ps->ztest == EmulatedZ::ForcedEarly)
|
||||||
|
{
|
||||||
|
ps->ztest = EmulatedZ::EarlyWithFBFetch;
|
||||||
|
fbfetch_blend |= static_cast<bool>(out.blending_state.blendenable);
|
||||||
|
ps->no_dual_src = true;
|
||||||
|
}
|
||||||
|
fbfetch_blend |= blend.logicopenable && !g_ActiveConfig.backend_info.bSupportsLogicOp;
|
||||||
|
fbfetch_blend |= blend.usedualsrc && !g_ActiveConfig.backend_info.bSupportsDualSourceBlend;
|
||||||
|
if (fbfetch_blend)
|
||||||
|
{
|
||||||
|
ps->no_dual_src = true;
|
||||||
|
if (blend.logicopenable)
|
||||||
|
{
|
||||||
|
ps->logic_op_enable = true;
|
||||||
|
ps->logic_op_mode = static_cast<u32>(blend.logicmode.Value());
|
||||||
|
blend.logicopenable = false;
|
||||||
|
}
|
||||||
|
if (blend.blendenable)
|
||||||
|
{
|
||||||
|
ps->blend_enable = true;
|
||||||
|
ps->blend_src_factor = blend.srcfactor;
|
||||||
|
ps->blend_src_factor_alpha = blend.srcfactoralpha;
|
||||||
|
ps->blend_dst_factor = blend.dstfactor;
|
||||||
|
ps->blend_dst_factor_alpha = blend.dstfactoralpha;
|
||||||
|
ps->blend_subtract = blend.subtract;
|
||||||
|
ps->blend_subtract_alpha = blend.subtractAlpha;
|
||||||
|
blend.blendenable = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// force dual src off if we can't support it
|
||||||
|
if (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
|
||||||
|
{
|
||||||
|
ps->no_dual_src = true;
|
||||||
|
blend.usedualsrc = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ps->ztest == EmulatedZ::ForcedEarly && !g_ActiveConfig.backend_info.bSupportsEarlyZ)
|
||||||
|
{
|
||||||
|
// These things should be false
|
||||||
|
ASSERT(!ps->zfreeze);
|
||||||
|
// ZCOMPLOC HACK:
|
||||||
|
// The only way to emulate alpha test + early-z is to force early-z in the shader.
|
||||||
|
// As this isn't available on all drivers and as we can't emulate this feature otherwise,
|
||||||
|
// we are only able to choose which one we want to respect more.
|
||||||
|
// Tests seem to have proven that writing depth even when the alpha test fails is more
|
||||||
|
// important that a reliable alpha test, so we just force the alpha test to always succeed.
|
||||||
|
// At least this seems to be less buggy.
|
||||||
|
ps->ztest = EmulatedZ::EarlyWithZComplocHack;
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<AbstractPipelineConfig>
|
||||||
|
ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config_in)
|
||||||
|
{
|
||||||
|
GXPipelineUid config = ApplyDriverBugs(config_in);
|
||||||
const AbstractShader* vs;
|
const AbstractShader* vs;
|
||||||
auto vs_iter = m_vs_cache.shader_map.find(config.vs_uid);
|
auto vs_iter = m_vs_cache.shader_map.find(config.vs_uid);
|
||||||
if (vs_iter != m_vs_cache.shader_map.end() && !vs_iter->second.pending)
|
if (vs_iter != m_vs_cache.shader_map.end() && !vs_iter->second.pending)
|
||||||
@ -650,9 +738,25 @@ std::optional<AbstractPipelineConfig> ShaderCache::GetGXPipelineConfig(const GXP
|
|||||||
config.depth_state, config.blending_state);
|
config.depth_state, config.blending_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<AbstractPipelineConfig>
|
/// Edits the UID based on driver bugs and other special configurations
|
||||||
ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config)
|
static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in)
|
||||||
{
|
{
|
||||||
|
GXUberPipelineUid out;
|
||||||
|
memcpy(&out, &in, sizeof(out)); // Copy padding
|
||||||
|
if (!g_ActiveConfig.backend_info.bSupportsDualSourceBlend ||
|
||||||
|
(DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) &&
|
||||||
|
!out.blending_state.RequiresDualSrc()))
|
||||||
|
{
|
||||||
|
out.blending_state.usedualsrc = false;
|
||||||
|
out.ps_uid.GetUidData()->no_dual_src = true;
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<AbstractPipelineConfig>
|
||||||
|
ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config_in)
|
||||||
|
{
|
||||||
|
GXUberPipelineUid config = ApplyDriverBugs(config_in);
|
||||||
const AbstractShader* vs;
|
const AbstractShader* vs;
|
||||||
auto vs_iter = m_uber_vs_cache.shader_map.find(config.vs_uid);
|
auto vs_iter = m_uber_vs_cache.shader_map.find(config.vs_uid);
|
||||||
if (vs_iter != m_uber_vs_cache.shader_map.end() && !vs_iter->second.pending)
|
if (vs_iter != m_uber_vs_cache.shader_map.end() && !vs_iter->second.pending)
|
||||||
@ -981,12 +1085,14 @@ void ShaderCache::QueuePipelineCompile(const GXPipelineUid& uid, u32 priority)
|
|||||||
{
|
{
|
||||||
stages_ready = true;
|
stages_ready = true;
|
||||||
|
|
||||||
auto vs_it = shader_cache->m_vs_cache.shader_map.find(uid.vs_uid);
|
GXPipelineUid actual_uid = ApplyDriverBugs(uid);
|
||||||
|
|
||||||
|
auto vs_it = shader_cache->m_vs_cache.shader_map.find(actual_uid.vs_uid);
|
||||||
stages_ready &= vs_it != shader_cache->m_vs_cache.shader_map.end() && !vs_it->second.pending;
|
stages_ready &= vs_it != shader_cache->m_vs_cache.shader_map.end() && !vs_it->second.pending;
|
||||||
if (vs_it == shader_cache->m_vs_cache.shader_map.end())
|
if (vs_it == shader_cache->m_vs_cache.shader_map.end())
|
||||||
shader_cache->QueueVertexShaderCompile(uid.vs_uid, priority);
|
shader_cache->QueueVertexShaderCompile(actual_uid.vs_uid, priority);
|
||||||
|
|
||||||
PixelShaderUid ps_uid = uid.ps_uid;
|
PixelShaderUid ps_uid = actual_uid.ps_uid;
|
||||||
ClearUnusedPixelShaderUidBits(shader_cache->m_api_type, shader_cache->m_host_config, &ps_uid);
|
ClearUnusedPixelShaderUidBits(shader_cache->m_api_type, shader_cache->m_host_config, &ps_uid);
|
||||||
|
|
||||||
auto ps_it = shader_cache->m_ps_cache.shader_map.find(ps_uid);
|
auto ps_it = shader_cache->m_ps_cache.shader_map.find(ps_uid);
|
||||||
@ -1051,13 +1157,15 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid, u32 pri
|
|||||||
{
|
{
|
||||||
stages_ready = true;
|
stages_ready = true;
|
||||||
|
|
||||||
auto vs_it = shader_cache->m_uber_vs_cache.shader_map.find(uid.vs_uid);
|
GXUberPipelineUid actual_uid = ApplyDriverBugs(uid);
|
||||||
|
|
||||||
|
auto vs_it = shader_cache->m_uber_vs_cache.shader_map.find(actual_uid.vs_uid);
|
||||||
stages_ready &=
|
stages_ready &=
|
||||||
vs_it != shader_cache->m_uber_vs_cache.shader_map.end() && !vs_it->second.pending;
|
vs_it != shader_cache->m_uber_vs_cache.shader_map.end() && !vs_it->second.pending;
|
||||||
if (vs_it == shader_cache->m_uber_vs_cache.shader_map.end())
|
if (vs_it == shader_cache->m_uber_vs_cache.shader_map.end())
|
||||||
shader_cache->QueueVertexUberShaderCompile(uid.vs_uid, priority);
|
shader_cache->QueueVertexUberShaderCompile(actual_uid.vs_uid, priority);
|
||||||
|
|
||||||
UberShader::PixelShaderUid ps_uid = uid.ps_uid;
|
UberShader::PixelShaderUid ps_uid = actual_uid.ps_uid;
|
||||||
UberShader::ClearUnusedPixelShaderUidBits(shader_cache->m_api_type,
|
UberShader::ClearUnusedPixelShaderUidBits(shader_cache->m_api_type,
|
||||||
shader_cache->m_host_config, &ps_uid);
|
shader_cache->m_host_config, &ps_uid);
|
||||||
|
|
||||||
|
@ -21,12 +21,12 @@ PixelShaderUid GetPixelShaderUid()
|
|||||||
|
|
||||||
pixel_ubershader_uid_data* const uid_data = out.GetUidData();
|
pixel_ubershader_uid_data* const uid_data = out.GetUidData();
|
||||||
uid_data->num_texgens = xfmem.numTexGen.numTexGens;
|
uid_data->num_texgens = xfmem.numTexGen.numTexGens;
|
||||||
uid_data->early_depth = bpmem.UseEarlyDepthTest() &&
|
uid_data->early_depth = bpmem.GetEmulatedZ() == EmulatedZ::Early &&
|
||||||
(g_ActiveConfig.bFastDepthCalc ||
|
(g_ActiveConfig.bFastDepthCalc ||
|
||||||
bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) &&
|
bpmem.alpha_test.TestResult() == AlphaTestResult::Undetermined) &&
|
||||||
!(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
!(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
||||||
uid_data->per_pixel_depth =
|
uid_data->per_pixel_depth =
|
||||||
(bpmem.ztex2.op != ZTexOp::Disabled && bpmem.UseLateDepthTest()) ||
|
(bpmem.ztex2.op != ZTexOp::Disabled && bpmem.GetEmulatedZ() == EmulatedZ::Late) ||
|
||||||
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !uid_data->early_depth) ||
|
(!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !uid_data->early_depth) ||
|
||||||
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
(bpmem.zmode.testenable && bpmem.genMode.zfreeze);
|
||||||
uid_data->uint_output = bpmem.blendmode.UseLogicOp();
|
uid_data->uint_output = bpmem.blendmode.UseLogicOp();
|
||||||
@ -39,6 +39,10 @@ void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& hos
|
|||||||
{
|
{
|
||||||
pixel_ubershader_uid_data* const uid_data = uid->GetUidData();
|
pixel_ubershader_uid_data* const uid_data = uid->GetUidData();
|
||||||
|
|
||||||
|
// Dual source is always enabled in the shader if this bug is not present
|
||||||
|
if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING))
|
||||||
|
uid_data->no_dual_src = 0;
|
||||||
|
|
||||||
// OpenGL and Vulkan convert implicitly normalized color outputs to their uint representation.
|
// OpenGL and Vulkan convert implicitly normalized color outputs to their uint representation.
|
||||||
// Therefore, it is not necessary to use a uint output on these backends. We also disable the
|
// Therefore, it is not necessary to use a uint output on these backends. We also disable the
|
||||||
// uint output when logic op is not supported (i.e. driver/device does not support D3D11.1).
|
// uint output when logic op is not supported (i.e. driver/device does not support D3D11.1).
|
||||||
@ -53,8 +57,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
|
|||||||
const bool msaa = host_config.msaa;
|
const bool msaa = host_config.msaa;
|
||||||
const bool ssaa = host_config.ssaa;
|
const bool ssaa = host_config.ssaa;
|
||||||
const bool stereo = host_config.stereo;
|
const bool stereo = host_config.stereo;
|
||||||
const bool use_dual_source = host_config.backend_dual_source_blend;
|
const bool use_dual_source = host_config.backend_dual_source_blend && !uid_data->no_dual_src;
|
||||||
const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch;
|
const bool use_shader_blend = !host_config.backend_dual_source_blend &&
|
||||||
|
host_config.backend_shader_framebuffer_fetch;
|
||||||
const bool use_shader_logic_op =
|
const bool use_shader_logic_op =
|
||||||
!host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch;
|
!host_config.backend_logic_op && host_config.backend_shader_framebuffer_fetch;
|
||||||
const bool use_framebuffer_fetch =
|
const bool use_framebuffer_fetch =
|
||||||
@ -1273,10 +1278,14 @@ void EnumeratePixelShaderUids(const std::function<void(const PixelShaderUid&)>&
|
|||||||
for (u32 uint_output = 0; uint_output < 2; uint_output++)
|
for (u32 uint_output = 0; uint_output < 2; uint_output++)
|
||||||
{
|
{
|
||||||
puid->uint_output = uint_output;
|
puid->uint_output = uint_output;
|
||||||
|
for (u32 no_dual_src = 0; no_dual_src < 2; no_dual_src++)
|
||||||
|
{
|
||||||
|
puid->no_dual_src = no_dual_src;
|
||||||
callback(uid);
|
callback(uid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // namespace UberShader
|
} // namespace UberShader
|
||||||
|
@ -18,6 +18,7 @@ struct pixel_ubershader_uid_data
|
|||||||
u32 early_depth : 1;
|
u32 early_depth : 1;
|
||||||
u32 per_pixel_depth : 1;
|
u32 per_pixel_depth : 1;
|
||||||
u32 uint_output : 1;
|
u32 uint_output : 1;
|
||||||
|
u32 no_dual_src : 1;
|
||||||
|
|
||||||
u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); }
|
u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); }
|
||||||
};
|
};
|
||||||
@ -42,9 +43,9 @@ struct fmt::formatter<UberShader::pixel_ubershader_uid_data>
|
|||||||
template <typename FormatContext>
|
template <typename FormatContext>
|
||||||
auto format(const UberShader::pixel_ubershader_uid_data& uid, FormatContext& ctx) const
|
auto format(const UberShader::pixel_ubershader_uid_data& uid, FormatContext& ctx) const
|
||||||
{
|
{
|
||||||
return fmt::format_to(ctx.out(), "Pixel UberShader for {} texgens{}{}{}", uid.num_texgens,
|
return fmt::format_to(
|
||||||
uid.early_depth ? ", early-depth" : "",
|
ctx.out(), "Pixel UberShader for {} texgens{}{}{}{}", uid.num_texgens,
|
||||||
uid.per_pixel_depth ? ", per-pixel depth" : "",
|
uid.early_depth ? ", early-depth" : "", uid.per_pixel_depth ? ", per-pixel depth" : "",
|
||||||
uid.uint_output ? ", uint output" : "");
|
uid.uint_output ? ", uint output" : "", uid.no_dual_src ? ", no dual-source blending" : "");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user