From 68f49df0f8a845eecd1e65c4d75dfbee3bdc989e Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 23 Jul 2022 00:47:04 -0500 Subject: [PATCH 1/9] VideoCommon: Add vertex shader point and line expansion --- Source/Core/Core/State.cpp | 2 +- .../Core/VideoBackends/D3D12/DX12Context.cpp | 2 +- Source/Core/VideoCommon/ConstantManager.h | 9 + Source/Core/VideoCommon/GXPipelineTypes.h | 2 +- Source/Core/VideoCommon/GeometryShaderGen.cpp | 6 +- .../VideoCommon/GeometryShaderManager.cpp | 25 +- .../Core/VideoCommon/GeometryShaderManager.h | 3 +- Source/Core/VideoCommon/IndexGenerator.cpp | 98 +++++++- Source/Core/VideoCommon/IndexGenerator.h | 2 +- Source/Core/VideoCommon/ShaderCache.cpp | 41 +++ Source/Core/VideoCommon/ShaderGenCommon.cpp | 2 + Source/Core/VideoCommon/ShaderGenCommon.h | 6 + Source/Core/VideoCommon/UberShaderVertex.cpp | 120 ++++++++- Source/Core/VideoCommon/VertexManagerBase.cpp | 87 +++++-- Source/Core/VideoCommon/VertexShaderGen.cpp | 236 ++++++++++++++++-- Source/Core/VideoCommon/VertexShaderGen.h | 9 +- Source/Core/VideoCommon/VideoConfig.h | 7 + 17 files changed, 584 insertions(+), 73 deletions(-) diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index 97ceb37c01..5c175c8ae6 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -94,7 +94,7 @@ static size_t s_state_writes_in_queue; static std::condition_variable s_state_write_queue_is_empty; // Don't forget to increase this after doing changes on the savestate system -constexpr u32 STATE_VERSION = 154; // Last changed in PR 11177 +constexpr u32 STATE_VERSION = 155; // Last changed in PR 10890 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, diff --git a/Source/Core/VideoBackends/D3D12/DX12Context.cpp b/Source/Core/VideoBackends/D3D12/DX12Context.cpp index 5cf0e414c4..955f26413e 100644 --- a/Source/Core/VideoBackends/D3D12/DX12Context.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12Context.cpp @@ -358,7 +358,7 @@ bool DXContext::CreateGXRootSignature() SetRootParamTable(¶ms[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3, 1, D3D12_SHADER_VISIBILITY_VERTEX); param_count++; - SetRootParamConstant(¶ms[param_count], 2, 1, D3D12_SHADER_VISIBILITY_VERTEX); + SetRootParamConstant(¶ms[param_count], 3, 1, D3D12_SHADER_VISIBILITY_VERTEX); param_count++; // Since these must be contiguous, pixel lighting goes to bbox if not enabled. diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index c3427ba37f..6e60929056 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -102,9 +102,18 @@ struct VertexShaderConstants std::array vertex_offset_texcoords; }; +enum class VSExpand : u32 +{ + None = 0, + Point, + Line, +}; + struct GeometryShaderConstants { float4 stereoparams; float4 lineptparams; int4 texoffset; + VSExpand vs_expand; // Used by VS point/line expansion in ubershaders + u32 pad[3]; }; diff --git a/Source/Core/VideoCommon/GXPipelineTypes.h b/Source/Core/VideoCommon/GXPipelineTypes.h index c226739b7d..fdb76a6646 100644 --- a/Source/Core/VideoCommon/GXPipelineTypes.h +++ b/Source/Core/VideoCommon/GXPipelineTypes.h @@ -19,7 +19,7 @@ namespace VideoCommon // As pipelines encompass both shader UIDs and render states, changes to either of these should // also increment the pipeline UID version. Incrementing the UID version will cause all UID // caches to be invalidated. -constexpr u32 GX_PIPELINE_UID_VERSION = 5; // Last changed in PR 10747 +constexpr u32 GX_PIPELINE_UID_VERSION = 6; // Last changed in PR 10890 struct GXPipelineUid { diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 818a22bfa4..04d0062982 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -97,10 +97,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& else out.Write("cbuffer GSBlock {{\n"); - out.Write("\tfloat4 " I_STEREOPARAMS ";\n" - "\tfloat4 " I_LINEPTPARAMS ";\n" - "\tint4 " I_TEXOFFSET ";\n" - "}};\n"); + out.Write("{}", s_geometry_shader_uniforms); + out.Write("}};\n"); out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "", diff --git a/Source/Core/VideoCommon/GeometryShaderManager.cpp b/Source/Core/VideoCommon/GeometryShaderManager.cpp index f71a687ce8..459df67868 100644 --- a/Source/Core/VideoCommon/GeometryShaderManager.cpp +++ b/Source/Core/VideoCommon/GeometryShaderManager.cpp @@ -8,6 +8,7 @@ #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/RenderState.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" @@ -36,10 +37,22 @@ void GeometryShaderManager::Dirty() // Any constants that can changed based on settings should be re-calculated s_projection_changed = true; + // Uses EFB scale config + SetLinePtWidthChanged(); + dirty = true; } -void GeometryShaderManager::SetConstants() +static void SetVSExpand(VSExpand expand) +{ + if (GeometryShaderManager::constants.vs_expand != expand) + { + GeometryShaderManager::constants.vs_expand = expand; + GeometryShaderManager::dirty = true; + } +} + +void GeometryShaderManager::SetConstants(PrimitiveType prim) { if (s_projection_changed && g_ActiveConfig.stereo_mode != StereoMode::Off) { @@ -63,6 +76,16 @@ void GeometryShaderManager::SetConstants() dirty = true; } + if (g_ActiveConfig.UseVSForLinePointExpand()) + { + if (prim == PrimitiveType::Points) + SetVSExpand(VSExpand::Point); + else if (prim == PrimitiveType::Lines) + SetVSExpand(VSExpand::Line); + else + SetVSExpand(VSExpand::None); + } + if (s_viewport_changed) { s_viewport_changed = false; diff --git a/Source/Core/VideoCommon/GeometryShaderManager.h b/Source/Core/VideoCommon/GeometryShaderManager.h index cbca02616c..2b449b9089 100644 --- a/Source/Core/VideoCommon/GeometryShaderManager.h +++ b/Source/Core/VideoCommon/GeometryShaderManager.h @@ -7,6 +7,7 @@ #include "VideoCommon/ConstantManager.h" class PointerWrap; +enum class PrimitiveType : u32; // The non-API dependent parts. class GeometryShaderManager @@ -16,7 +17,7 @@ public: static void Dirty(); static void DoState(PointerWrap& p); - static void SetConstants(); + static void SetConstants(PrimitiveType prim); static void SetViewportChanged(); static void SetProjectionChanged(); static void SetLinePtWidthChanged(); diff --git a/Source/Core/VideoCommon/IndexGenerator.cpp b/Source/Core/VideoCommon/IndexGenerator.cpp index be2dc99e3a..5d86561ffd 100644 --- a/Source/Core/VideoCommon/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/IndexGenerator.cpp @@ -190,6 +190,39 @@ u16* AddLineStrip(u16* index_ptr, u32 num_verts, u32 index) return index_ptr; } +template +u16* AddLines_VSExpand(u16* index_ptr, u32 num_verts, u32 index) +{ + // VS Expand uses (index >> 2) as the base vertex + // Bit 0 indicates which side of the line (left/right for a vertical line) + // Bit 1 indicates which point of the line (top/bottom for a vertical line) + // VS Expand assumes the two points will be adjacent vertices + constexpr u32 advance = linestrip ? 1 : 2; + for (u32 i = 1; i < num_verts; i += advance) + { + u32 p0 = (index + i - 1) << 2; + u32 p1 = (index + i - 0) << 2; + if constexpr (pr) + { + *index_ptr++ = p0 + 0; + *index_ptr++ = p0 + 1; + *index_ptr++ = p1 + 2; + *index_ptr++ = p1 + 3; + *index_ptr++ = s_primitive_restart; + } + else + { + *index_ptr++ = p0 + 0; + *index_ptr++ = p0 + 1; + *index_ptr++ = p1 + 2; + *index_ptr++ = p0 + 1; + *index_ptr++ = p1 + 2; + *index_ptr++ = p1 + 3; + } + } + return index_ptr; +} + u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index) { for (u32 i = 0; i != num_verts; ++i) @@ -198,6 +231,35 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index) } return index_ptr; } + +template +u16* AddPoints_VSExpand(u16* index_ptr, u32 num_verts, u32 index) +{ + // VS Expand uses (index >> 2) as the base vertex + // Bottom two bits indicate which of (TL, TR, BL, BR) this is + for (u32 i = 0; i < num_verts; ++i) + { + u32 base = (index + i) << 2; + if constexpr (pr) + { + *index_ptr++ = base + 0; + *index_ptr++ = base + 1; + *index_ptr++ = base + 2; + *index_ptr++ = base + 3; + *index_ptr++ = s_primitive_restart; + } + else + { + *index_ptr++ = base + 0; + *index_ptr++ = base + 1; + *index_ptr++ = base + 2; + *index_ptr++ = base + 1; + *index_ptr++ = base + 2; + *index_ptr++ = base + 3; + } + } + return index_ptr; +} } // Anonymous namespace void IndexGenerator::Init() @@ -220,9 +282,27 @@ void IndexGenerator::Init() m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip; m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan; } - m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList; - m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip; - m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints; + if (g_Config.UseVSForLinePointExpand()) + { + if (g_Config.backend_info.bSupportsPrimitiveRestart) + { + m_primitive_table[Primitive::GX_DRAW_LINES] = AddLines_VSExpand; + m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLines_VSExpand; + m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints_VSExpand; + } + else + { + m_primitive_table[Primitive::GX_DRAW_LINES] = AddLines_VSExpand; + m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLines_VSExpand; + m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints_VSExpand; + } + } + else + { + m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList; + m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip; + m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints; + } } void IndexGenerator::Start(u16* index_ptr) @@ -246,10 +326,14 @@ void IndexGenerator::AddExternalIndices(const u16* indices, u32 num_indices, u32 m_base_index += num_vertices; } -u32 IndexGenerator::GetRemainingIndices() const +u32 IndexGenerator::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const { - // -1 is reserved for primitive restart (OGL + DX11) - constexpr u32 max_index = 65534; + u32 max_index = USHRT_MAX; - return max_index - m_base_index; + if (g_Config.UseVSForLinePointExpand() && primitive >= OpcodeDecoder::Primitive::GX_DRAW_LINES) + max_index >>= 2; + + // -1 is reserved for primitive restart + + return max_index - m_base_index - 1; } diff --git a/Source/Core/VideoCommon/IndexGenerator.h b/Source/Core/VideoCommon/IndexGenerator.h index 32cf21e207..3c57ea7803 100644 --- a/Source/Core/VideoCommon/IndexGenerator.h +++ b/Source/Core/VideoCommon/IndexGenerator.h @@ -23,7 +23,7 @@ public: // returns numprimitives u32 GetNumVerts() const { return m_base_index; } u32 GetIndexLen() const { return static_cast(m_index_buffer_current - m_base_index_ptr); } - u32 GetRemainingIndices() const; + u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const; private: u16* m_index_buffer_current = nullptr; diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index f281024f22..59ae917686 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -10,6 +10,7 @@ #include "Common/MsgHandler.h" #include "Core/ConfigManager.h" +#include "VideoCommon/ConstantManager.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/FramebufferManager.h" #include "VideoCommon/FramebufferShaderGen.h" @@ -695,6 +696,35 @@ static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in) ps->ztest = EmulatedZ::EarlyWithZComplocHack; } + if (g_ActiveConfig.UseVSForLinePointExpand() && + (out.rasterization_state.primitive == PrimitiveType::Points || + out.rasterization_state.primitive == PrimitiveType::Lines)) + { + // All primitives are expanded to triangles in the vertex shader + vertex_shader_uid_data* vs = out.vs_uid.GetUidData(); + const PortableVertexDeclaration& decl = out.vertex_format->GetVertexDeclaration(); + vs->position_has_3_elems = decl.position.components >= 3; + vs->texcoord_elem_count = 0; + for (int i = 0; i < 8; i++) + { + if (decl.texcoords[i].enable) + { + ASSERT(decl.texcoords[i].components <= 3); + vs->texcoord_elem_count |= decl.texcoords[i].components << (i * 2); + } + } + out.vertex_format = nullptr; + if (out.rasterization_state.primitive == PrimitiveType::Points) + vs->vs_expand = VSExpand::Point; + else + vs->vs_expand = VSExpand::Line; + PrimitiveType prim = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? + PrimitiveType::TriangleStrip : + PrimitiveType::Triangles; + out.rasterization_state.primitive = prim; + out.gs_uid.GetUidData()->primitive_type = static_cast(prim); + } + return out; } @@ -760,6 +790,17 @@ static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in) out.blending_state.usedualsrc = false; out.ps_uid.GetUidData()->no_dual_src = true; } + + if (g_ActiveConfig.UseVSForLinePointExpand()) + { + // All primitives are expanded to triangles in the vertex shader + PrimitiveType prim = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? + PrimitiveType::TriangleStrip : + PrimitiveType::Triangles; + out.rasterization_state.primitive = prim; + out.gs_uid.GetUidData()->primitive_type = static_cast(prim); + } + return out; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 7b407f9aaf..85757b64c5 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -5,6 +5,7 @@ #include +#include "Common/Assert.h" #include "Common/FileUtil.h" #include "Core/ConfigManager.h" #include "VideoCommon/VideoCommon.h" @@ -44,6 +45,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() g_ActiveConfig.ManualTextureSamplingWithHiResTextures(); bits.backend_sampler_lod_bias = g_ActiveConfig.backend_info.bSupportsLodBiasInSampler; bits.backend_dynamic_vertex_loader = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader; + bits.backend_vs_point_line_expand = g_ActiveConfig.UseVSForLinePointExpand(); return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 73fa68af03..22bad1b220 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -178,6 +178,7 @@ union ShaderHostConfig BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes; BitField<26, 1, bool, u32> backend_sampler_lod_bias; BitField<27, 1, bool, u32> backend_dynamic_vertex_loader; + BitField<28, 1, bool, u32> backend_vs_point_line_expand; static ShaderHostConfig GetCurrent(); }; @@ -316,3 +317,8 @@ static const char s_shader_uniforms[] = "\tuint components;\n" "\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n" "\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n" "\t#define xfmem_alpha(i) (xfmem_pack1[(i)].w)\n"; + +static const char s_geometry_shader_uniforms[] = "\tfloat4 " I_STEREOPARAMS ";\n" + "\tfloat4 " I_LINEPTPARAMS ";\n" + "\tint4 " I_TEXOFFSET ";\n" + "\tuint vs_expand;\n"; diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 04f2b27a14..0bfacf88ee 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -3,6 +3,7 @@ #include "VideoCommon/UberShaderVertex.h" +#include "VideoCommon/ConstantManager.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/UberShaderCommon.h" @@ -35,6 +36,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config const bool ssaa = host_config.ssaa; const bool per_pixel_lighting = host_config.per_pixel_lighting; const bool vertex_rounding = host_config.vertex_rounding; + const bool vertex_loader = + host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand; const u32 num_texgen = uid_data->num_texgens; ShaderCode out; @@ -46,6 +49,13 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config out.Write("{}", s_shader_uniforms); out.Write("}};\n"); + if (vertex_loader) + { + out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n"); + out.Write("{}", s_geometry_shader_uniforms); + out.Write("}};\n"); + } + out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "", ShaderStage::Vertex); out.Write("}};\n\n"); @@ -54,7 +64,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config WriteBitfieldExtractHeader(out, api_type, host_config); WriteLightingFunction(out); - if (host_config.backend_dynamic_vertex_loader) + if (vertex_loader) { out.Write(R"( SSBO_BINDING(1) readonly restrict buffer Vertices {{ @@ -73,17 +83,17 @@ SSBO_BINDING(1) readonly restrict buffer Vertices {{ // D3D12 uses a root constant for this uniform, since it changes with every draw. // D3D11 doesn't currently support dynamic vertex loader, and we'll have to figure something // out for it if we want to support it in the future. - out.Write("UBO_BINDING(std140, 3) uniform DX_Constants {{\n" + out.Write("UBO_BINDING(std140, 4) uniform DX_Constants {{\n" " uint base_vertex;\n" "}};\n\n" - "uint GetVertexBaseOffset() {{\n" - " return (gl_VertexID + base_vertex) * vertex_stride;\n" + "uint GetVertexBaseOffset(uint vertex_id) {{\n" + " return (vertex_id + base_vertex) * vertex_stride;\n" "}}\n"); } else { - out.Write("uint GetVertexBaseOffset() {{\n" - " return gl_VertexID * vertex_stride;\n" + out.Write("uint GetVertexBaseOffset(uint vertex_id) {{\n" + " return vertex_id * vertex_stride;\n" "}}\n"); } @@ -187,9 +197,17 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ out.Write("VS_OUTPUT o;\n" "\n"); - if (host_config.backend_dynamic_vertex_loader) + if (host_config.backend_vs_point_line_expand) { - out.Write("uint vertex_base_offset = GetVertexBaseOffset();\n"); + out.Write("uint vertex_id = gl_VertexID;\n" + "if (vs_expand != 0u) {{\n" + " vertex_id = vertex_id >> 2;\n" + "}}\n" + "uint vertex_base_offset = GetVertexBaseOffset(vertex_id);\n"); + } + else if (host_config.backend_dynamic_vertex_loader) + { + out.Write("uint vertex_base_offset = GetVertexBaseOffset(gl_VertexID);\n"); } // rawpos is always needed LoadVertexAttribute(out, host_config, 0, "rawpos", "float4", "rawpos"); @@ -320,6 +338,88 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ if (num_texgen > 0) GenVertexShaderTexGens(api_type, host_config, num_texgen, out); + if (host_config.backend_vs_point_line_expand) + { + out.Write("if (vs_expand == {}u) {{ // Line\n", static_cast(VSExpand::Line)); + out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n" + " bool is_right = (gl_VertexID & 1) != 0;\n" + " uint other_base_offset = vertex_base_offset;\n" + " if (is_bottom) {{\n" + " other_base_offset -= vertex_stride;\n" + " }} else {{\n" + " other_base_offset += vertex_stride;\n" + " }}\n" + " float4 other_rawpos = load_input_float4_rawpos(other_base_offset, " + "vertex_offset_rawpos);\n" + " float4 other_p0 = P0;\n" + " float4 other_p1 = P1;\n" + " float4 other_p2 = P2;\n" + " if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n", + VB_HAS_POSMTXIDX); + out.Write(" uint other_posidx = int(load_input_uint4_ubyte4(other_base_offset, " + "vertex_offset_posmtx).r);\n" + " other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n" + " other_p1 = " I_TRANSFORMMATRICES "[other_posidx+1];\n" + " other_p2 = " I_TRANSFORMMATRICES "[other_posidx+2];\n" + " }}\n" + " float4 other_pos = float4(dot(other_p0, other_rawpos), " + "dot(other_p1, other_rawpos), dot(other_p2, other_rawpos), 1.0);\n" + " other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION + "[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION + "[3], other_pos));\n" + "\n" + " float sign = is_right ? 1.0f : -1.0f;\n" + // GameCube/Wii's line drawing algorithm is a little quirky. It does not + // use the correct line caps. Instead, the line caps are vertical or + // horizontal depending the slope of the line. + " float2 offset;\n" + " float2 to = abs(o.pos.xy / o.pos.w - other_pos.xy / other_pos.w);\n" + // FIXME: What does real hardware do when line is at a 45-degree angle? + // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. + " if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" + // Line is more tall. Extend geometry left and right. + // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] + " offset = float2(sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" + " }} else {{\n" + // Line is more wide. Extend geometry up and down. + // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] + " offset = float2(0, sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" + " }}\n" + "\n" + " o.pos.xy += offset * o.pos.w;\n"); + if (num_texgen > 0) + { + out.Write(" if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n" + " float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); + for (u32 i = 0; i < num_texgen; i++) + { + out.Write(" if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i); + out.Write(" o.tex{}.x += texOffset;\n", i); + } + out.Write(" }}\n"); + } + out.Write("}} else if (vs_expand == {}u) {{ // Point\n", static_cast(VSExpand::Point)); + out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n" + " bool is_right = (gl_VertexID & 1) != 0;\n" + " float2 sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? 1.0f : -1.0f);\n" + " float2 offset = sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n" + " o.pos.xy += offset * o.pos.w;\n"); + if (num_texgen > 0) + { + out.Write(" if (" I_TEXOFFSET "[3] != 0) {{\n" + " float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n" + " float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, " + "is_bottom ? texOffsetMagnitude : 0.0f);"); + for (u32 i = 0; i < num_texgen; i++) + { + out.Write(" if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", i); + out.Write(" o.tex{}.xy += texOffset;\n", i); + } + out.Write(" }}\n"); + } + out.Write("}}\n"); + } + if (per_pixel_lighting) { out.Write("// When per-pixel lighting is enabled, the vertex colors are passed through\n" @@ -574,7 +674,7 @@ static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& hos " {{\n"); out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n", VB_HAS_TEXMTXIDX0); - if (host_config.backend_dynamic_vertex_loader) + if (host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand) { out.Write(" int tmp = int(load_input_float3_rawtex(vertex_base_offset, " "vertex_offset_rawtex[texgen / 4][texgen % 4]).z);\n" @@ -655,7 +755,7 @@ static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_c std::string_view name, std::string_view shader_type, std::string_view stored_type, std::string_view offset_name) { - if (host_config.backend_dynamic_vertex_loader) + if (host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand) { code.Write("{:{}}{} {} = load_input_{}_{}(vertex_base_offset, vertex_offset_{});\n", "", indent, shader_type, name, shader_type, stored_type, diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 41b3968b2b..f8e4970a51 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -140,12 +140,12 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive // Check for size in buffer, if the buffer gets full, call Flush() if (!m_is_flushed && - (count > m_index_generator.GetRemainingIndices() || count > GetRemainingIndices(primitive) || - needed_vertex_bytes > GetRemainingSize())) + (count > m_index_generator.GetRemainingIndices(primitive) || + count > GetRemainingIndices(primitive) || needed_vertex_bytes > GetRemainingSize())) { Flush(); - if (count > m_index_generator.GetRemainingIndices()) + if (count > m_index_generator.GetRemainingIndices(primitive)) { ERROR_LOG_FMT(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush."); } @@ -193,7 +193,55 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c { const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen(); - if (g_Config.backend_info.bSupportsPrimitiveRestart) + if (primitive >= Primitive::GX_DRAW_LINES) + { + if (g_Config.UseVSForLinePointExpand()) + { + if (g_Config.backend_info.bSupportsPrimitiveRestart) + { + switch (primitive) + { + case Primitive::GX_DRAW_LINES: + return index_len / 5 * 2; + case Primitive::GX_DRAW_LINE_STRIP: + return index_len / 5 + 1; + case Primitive::GX_DRAW_POINTS: + return index_len / 5; + default: + return 0; + } + } + else + { + switch (primitive) + { + case Primitive::GX_DRAW_LINES: + return index_len / 6 * 2; + case Primitive::GX_DRAW_LINE_STRIP: + return index_len / 6 + 1; + case Primitive::GX_DRAW_POINTS: + return index_len / 6; + default: + return 0; + } + } + } + else + { + switch (primitive) + { + case Primitive::GX_DRAW_LINES: + return index_len; + case Primitive::GX_DRAW_LINE_STRIP: + return index_len / 2 + 1; + case Primitive::GX_DRAW_POINTS: + return index_len; + default: + return 0; + } + } + } + else if (g_Config.backend_info.bSupportsPrimitiveRestart) { switch (primitive) { @@ -206,15 +254,6 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c return index_len / 1 - 1; case Primitive::GX_DRAW_TRIANGLE_FAN: return index_len / 6 * 4 + 1; - - case Primitive::GX_DRAW_LINES: - return index_len; - case Primitive::GX_DRAW_LINE_STRIP: - return index_len / 2 + 1; - - case Primitive::GX_DRAW_POINTS: - return index_len; - default: return 0; } @@ -232,15 +271,6 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c return index_len / 3 + 2; case Primitive::GX_DRAW_TRIANGLE_FAN: return index_len / 3 + 2; - - case Primitive::GX_DRAW_LINES: - return index_len; - case Primitive::GX_DRAW_LINE_STRIP: - return index_len / 2 + 1; - - case Primitive::GX_DRAW_POINTS: - return index_len; - default: return 0; } @@ -511,13 +541,24 @@ void VertexManagerBase::Flush() VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices, &base_vertex, &base_index); + if (g_ActiveConfig.backend_info.api_type != APIType::D3D && + g_ActiveConfig.UseVSForLinePointExpand() && + (m_current_primitive_type == PrimitiveType::Points || + m_current_primitive_type == PrimitiveType::Lines)) + { + // VS point/line expansion puts the vertex id at gl_VertexID << 2 + // That means the base vertex has to be adjusted to match + // (The shader adds this after shifting right on D3D, so no need to do this) + base_vertex <<= 2; + } + // Texture loading can cause palettes to be applied (-> uniforms -> draws). // Palette application does not use vertices, only a full-screen quad, so this is okay. // Same with GPU texture decoding, which uses compute shaders. g_texture_cache->BindTextures(used_textures); // Now we can upload uniforms, as nothing else will override them. - GeometryShaderManager::SetConstants(); + GeometryShaderManager::SetConstants(m_current_primitive_type); PixelShaderManager::SetConstants(); UploadUniforms(); diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 2eb5a53a97..948dc3ad34 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -6,6 +6,7 @@ #include "Common/Assert.h" #include "Common/CommonTypes.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/ConstantManager.h" #include "VideoCommon/LightingShaderGen.h" #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/VertexLoaderManager.h" @@ -83,6 +84,8 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho const bool ssaa = host_config.ssaa; const bool vertex_rounding = host_config.vertex_rounding; + ShaderCode input_extract; + out.Write("{}", s_lighting_struct); // uniforms @@ -91,6 +94,21 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("{}", s_shader_uniforms); out.Write("}};\n"); + if (uid_data->vs_expand != VSExpand::None) + { + out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n"); + out.Write("{}", s_geometry_shader_uniforms); + out.Write("}};\n"); + + if (api_type == APIType::D3D) + { + // D3D doesn't include the base vertex in SV_VertexID + out.Write("UBO_BINDING(std140, 4) uniform DX_Constants {{\n" + " uint base_vertex;\n" + "}};\n\n"); + } + } + out.Write("struct VS_OUTPUT {{\n"); GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "", ShaderStage::Vertex); @@ -98,31 +116,114 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho WriteIsNanHeader(out, api_type); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); - if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - if ((uid_data->components & VB_HAS_NORMAL) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); - if ((uid_data->components & VB_HAS_TANGENT) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); - if ((uid_data->components & VB_HAS_BINORMAL) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); - - if ((uid_data->components & VB_HAS_COL0) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); - if ((uid_data->components & VB_HAS_COL1) != 0) - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); - - for (u32 i = 0; i < 8; ++i) + if (uid_data->vs_expand == VSExpand::None) { - const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + if ((uid_data->components & VB_HAS_POSMTXIDX) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + if ((uid_data->components & VB_HAS_NORMAL) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); + if ((uid_data->components & VB_HAS_TANGENT) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); + if ((uid_data->components & VB_HAS_BINORMAL) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); - if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) + if ((uid_data->components & VB_HAS_COL0) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + if ((uid_data->components & VB_HAS_COL1) != 0) + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + + for (u32 i = 0; i < 8; ++i) { - out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, - has_texmtx != 0 ? 3 : 2, i); + const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); + + if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0) + { + out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, + has_texmtx != 0 ? 3 : 2, i); + } } } + else + { + // Can't use float3, etc because we want 4-byte alignment + out.Write( + "uint4 unpack_ubyte4(uint value) {{\n" + " return uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);\n" + "}}\n\n" + "struct InputData {{\n"); + if (uid_data->components & VB_HAS_POSMTXIDX) + { + out.Write(" uint posmtx;\n"); + input_extract.Write("uint4 posmtx = unpack_ubyte4(i.posmtx);\n"); + } + if (uid_data->position_has_3_elems) + { + out.Write(" float pos0;\n" + " float pos1;\n" + " float pos2;\n"); + input_extract.Write("float4 rawpos = float4(i.pos0, i.pos1, i.pos2, 1.0f);\n"); + } + else + { + out.Write(" float pos0;\n" + " float pos1;\n"); + input_extract.Write("float4 rawpos = float4(i.pos0, i.pos1, 0.0f, 1.0f);\n"); + } + std::array names = {"normal", "binormal", "tangent"}; + for (int i = 0; i < 3; i++) + { + if (uid_data->components & (VB_HAS_NORMAL << i)) + { + out.Write(" float {0}0;\n" + " float {0}1;\n" + " float {0}2;\n", + names[i]); + input_extract.Write("float3 raw{0} = float3(i.{0}0, i.{0}1, i.{0}2);\n", names[i]); + } + } + for (int i = 0; i < 2; i++) + { + if (uid_data->components & (VB_HAS_COL0 << i)) + { + out.Write(" uint color{};\n", i); + input_extract.Write("float4 rawcolor{0} = float4(unpack_ubyte4(i.color{0})) / 255.0f;\n", + i); + } + } + for (int i = 0; i < 8; i++) + { + if (uid_data->components & (VB_HAS_UV0 << i)) + { + u32 ncomponents = (uid_data->texcoord_elem_count >> (2 * i)) & 3; + if (ncomponents < 2) + { + out.Write(" float tex{};\n", i); + input_extract.Write("float3 rawtex{0} = float3(i.tex{0}, 0.0f, 0.0f);\n", i); + } + else if (ncomponents == 2) + { + out.Write(" float tex{0}_0;\n" + " float tex{0}_1;\n", + i); + input_extract.Write("float3 rawtex{0} = float3(i.tex{0}_0, i.tex{0}_1, 0.0f);\n", i); + } + else + { + out.Write(" float tex{0}_0;\n" + " float tex{0}_1;\n" + " float tex{0}_2;\n", + i); + input_extract.Write("float3 rawtex{0} = float3(i.tex{0}_0, i.tex{0}_1, i.tex{0}_2);\n", + i); + } + } + } + out.Write("}};\n\n" + "SSBO_BINDING(1) readonly restrict buffer InputBuffer {{\n" + " InputData input_buffer[];\n" + "}};\n\n"); + } if (host_config.backend_geometry_shaders) { @@ -161,6 +262,21 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("void main()\n{{\n"); + if (uid_data->vs_expand != VSExpand::None) + { + out.Write("bool is_bottom = (gl_VertexID & 2) != 0;\n" + "bool is_right = (gl_VertexID & 1) != 0;\n"); + // D3D doesn't include the base vertex in SV_VertexID + // See comment in UberShaderVertex for details + if (api_type == APIType::D3D) + out.Write("uint vertex_id = (gl_VertexID >> 2) + base_vertex;\n"); + else + out.Write("uint vertex_id = gl_VertexID >> 2;\n"); + out.Write("InputData i = input_buffer[vertex_id];\n" + "{}", + input_extract.GetBuffer()); + } + out.Write("VS_OUTPUT o;\n"); // xfmem.numColorChans controls the number of color channels available to TEV, but we still need @@ -403,6 +519,86 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("}}\n"); } + if (uid_data->vs_expand == VSExpand::Line) + { + out.Write("// Line expansion\n" + "uint other_id = vertex_id;\n" + "if (is_bottom) {{\n" + " other_id -= 1;\n" + "}} else {{\n" + " other_id += 1;\n" + "}}\n" + "InputData other = input_buffer[other_id];\n"); + if (uid_data->position_has_3_elems) + out.Write("float4 other_pos = float4(other.pos0, other.pos1, other.pos2, 1.0f);\n"); + else + out.Write("float4 other_pos = float4(other.pos0, other.pos1, 0.0f, 1.0f);\n"); + if (uid_data->components & VB_HAS_POSMTXIDX) + { + out.Write("uint other_posidx = other.posmtx & 0xff;\n" + "float4 other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n" + "float4 other_p1 = " I_TRANSFORMMATRICES "[other_posidx + 1];\n" + "float4 other_p2 = " I_TRANSFORMMATRICES "[other_posidx + 2];\n" + "other_pos = float4(dot(other_p0, other_pos), dot(other_p1, other_pos), " + "dot(other_p2, other_pos), 1.0f);\n"); + } + else + { + out.Write("other_pos = float4(dot(P0, other_pos), dot(P1, other_pos), dot(P2, other_pos), " + "1.0f);\n"); + } + out.Write("other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION + "[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION + "[3], other_pos));\n" + "float expand_sign = is_right ? 1.0f : -1.0f;\n" + "float2 offset;\n" + "float2 to = abs(o.pos.xy / o.pos.w - other_pos.xy / other_pos.w);\n" + // FIXME: What does real hardware do when line is at a 45-degree angle? + // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. + "if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" + // Line is more tall. Extend geometry left and right. + // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] + " offset = float2(expand_sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" + "}} else {{\n" + // Line is more wide. Extend geometry up and down. + // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] + " offset = float2(0, expand_sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" + "}}\n" + "\n" + "o.pos.xy += offset * o.pos.w;\n"); + if (uid_data->numTexGens > 0) + { + out.Write("if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n" + " float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); + for (u32 i = 0; i < uid_data->numTexGens; i++) + { + out.Write(" if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i); + out.Write(" o.tex{}.x += texOffset;\n", i); + } + out.Write("}}\n"); + } + } + else if (uid_data->vs_expand == VSExpand::Point) + { + out.Write("// Point expansion\n" + "float2 expand_sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? 1.0f : -1.0f);\n" + "float2 offset = expand_sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n" + "o.pos.xy += offset * o.pos.w;\n"); + if (uid_data->numTexGens > 0) + { + out.Write("if (" I_TEXOFFSET "[3] != 0) {{\n" + " float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n" + " float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, " + "is_bottom ? texOffsetMagnitude : 0.0f);"); + for (u32 i = 0; i < uid_data->numTexGens; i++) + { + out.Write(" if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", i); + out.Write(" o.tex{}.xy += texOffset;\n", i); + } + out.Write("}}\n"); + } + } + if (per_pixel_lighting) { // When per-pixel lighting is enabled, the vertex colors are passed through diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index 028404c6e8..0c30223b9e 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -11,6 +11,7 @@ enum class APIType; enum class TexInputForm : u32; enum class TexGenType : u32; enum class SourceRow : u32; +enum class VSExpand : u32; // TODO should be reordered enum : int @@ -42,10 +43,12 @@ struct vertex_shader_uid_data u32 numTexGens : 4; u32 numColorChans : 2; u32 dualTexTrans_enabled : 1; + VSExpand vs_expand : 2; + u32 position_has_3_elems : 1; - u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is - // 8 bits wide - u32 pad : 18; + u16 texcoord_elem_count; // 2 bits per texcoord input + u16 texMtxInfo_n_projection; // Stored separately to guarantee that the texMtxInfo struct is + // 8 bits wide struct { diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 579cf5e30a..dfdd0ffe79 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -222,9 +222,16 @@ struct VideoConfig final bool bSupportsSettingObjectNames = false; bool bSupportsPartialMultisampleResolve = false; bool bSupportsDynamicVertexLoader = false; + bool bSupportsVSLinePointExpand = false; } backend_info; // Utility + bool UseVSForLinePointExpand() const + { + if (!backend_info.bSupportsVSLinePointExpand) + return false; + return !backend_info.bSupportsGeometryShaders; + } bool MultisamplingEnabled() const { return iMultisamples > 1; } bool ExclusiveFullscreenEnabled() const { From 678ee48bfcc169e1d1833a19635153f544613ad5 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 23 Jul 2022 01:22:02 -0500 Subject: [PATCH 2/9] VideoBackends:Metal: Add support for vertex shader point and line expansion --- Source/Core/VideoBackends/Metal/MTLStateTracker.h | 3 ++- Source/Core/VideoBackends/Metal/MTLStateTracker.mm | 12 +++++++++++- Source/Core/VideoBackends/Metal/MTLUtil.mm | 2 ++ Source/Core/VideoBackends/Metal/MTLVertexManager.mm | 5 ++++- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h index 3e7bac832d..2ec2e2ae27 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.h +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -83,7 +83,7 @@ public: void SetTexture(u32 idx, id texture); void SetSampler(u32 idx, const SamplerState& sampler); void SetComputeTexture(const Texture* texture); - void InvalidateUniforms(bool vertex, bool fragment); + void InvalidateUniforms(bool vertex, bool geometry, bool fragment); void SetUtilityUniform(const void* buffer, size_t size); void SetTexelBuffer(id buffer, u32 offset0, u32 offset1); void SetVerticesAndIndices(id vertices, id indices); @@ -180,6 +180,7 @@ private: { // clang-format off bool has_gx_vs_uniform : 1; + bool has_gx_gs_uniform : 1; bool has_gx_ps_uniform : 1; bool has_utility_vs_uniform : 1; bool has_utility_ps_uniform : 1; diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index 0004c81089..e25e33fe02 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -15,6 +15,7 @@ #include "VideoBackends/Metal/MTLTexture.h" #include "VideoBackends/Metal/MTLUtil.h" +#include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexShaderManager.h" @@ -462,9 +463,10 @@ void Metal::StateTracker::UnbindTexture(id texture) } } -void Metal::StateTracker::InvalidateUniforms(bool vertex, bool fragment) +void Metal::StateTracker::InvalidateUniforms(bool vertex, bool geometry, bool fragment) { m_flags.has_gx_vs_uniform &= !vertex; + m_flags.has_gx_gs_uniform &= !geometry; m_flags.has_gx_ps_uniform &= !fragment; } @@ -722,6 +724,14 @@ void Metal::StateTracker::PrepareRender() ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, Align(sizeof(VertexShaderConstants), AlignMask::Uniform)); } + if (!m_flags.has_gx_gs_uniform && pipe->UsesVertexBuffer(2)) + { + m_flags.has_gx_gs_uniform = true; + [m_current_render_encoder setVertexBytes:&GeometryShaderManager::constants + length:sizeof(GeometryShaderConstants) + atIndex:2]; + ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, sizeof(GeometryShaderConstants)); + } if (!m_flags.has_gx_ps_uniform) { m_flags.has_gx_ps_uniform = true; diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index 30eb305de3..aa379c0851 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -75,6 +75,7 @@ void Metal::Util::PopulateBackendInfo(VideoConfig* config) // Metal requires multisample resolve to be done on a render pass config->backend_info.bSupportsPartialMultisampleResolve = false; config->backend_info.bSupportsDynamicVertexLoader = true; + config->backend_info.bSupportsVSLinePointExpand = true; } void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, @@ -427,6 +428,7 @@ std::optional Metal::Util::TranslateShaderToMSL(ShaderStage stage, static const spirv_cross::MSLResourceBinding resource_bindings[] = { MakeResourceBinding(spv::ExecutionModelVertex, 0, 0, 1, 0, 0), // vs/ubo MakeResourceBinding(spv::ExecutionModelVertex, 0, 1, 1, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelVertex, 0, 2, 2, 0, 0), // vs/ubo MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo diff --git a/Source/Core/VideoBackends/Metal/MTLVertexManager.mm b/Source/Core/VideoBackends/Metal/MTLVertexManager.mm index 8fb3e25de4..6cfad2d2f8 100644 --- a/Source/Core/VideoBackends/Metal/MTLVertexManager.mm +++ b/Source/Core/VideoBackends/Metal/MTLVertexManager.mm @@ -5,6 +5,7 @@ #include "VideoBackends/Metal/MTLStateTracker.h" +#include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexShaderManager.h" @@ -88,7 +89,9 @@ void Metal::VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 void Metal::VertexManager::UploadUniforms() { - g_state_tracker->InvalidateUniforms(VertexShaderManager::dirty, PixelShaderManager::dirty); + g_state_tracker->InvalidateUniforms(VertexShaderManager::dirty, GeometryShaderManager::dirty, + PixelShaderManager::dirty); VertexShaderManager::dirty = false; + GeometryShaderManager::dirty = false; PixelShaderManager::dirty = false; } From 3a5901d12e1257001f983acdb55448db29befed2 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 24 Jul 2022 03:32:59 -0500 Subject: [PATCH 3/9] VideoBackends:Vulkan: Add support for vertex shader point and line expansion --- .../Core/VideoBackends/Vulkan/ObjectCache.cpp | 23 +++++++++++--- .../VideoBackends/Vulkan/StateTracker.cpp | 31 ++++++++++--------- .../VideoBackends/Vulkan/VulkanContext.cpp | 1 + 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp index 353cb6da3f..57dbe6f5e3 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -156,9 +156,11 @@ bool ObjectCache::CreateDescriptorSetLayouts() {5, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT}, }}; + std::array ubo_bindings = standard_ubo_bindings; + std::array create_infos{{ {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(standard_ubo_bindings.size()), standard_ubo_bindings.data()}, + static_cast(ubo_bindings.size()), ubo_bindings.data()}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(standard_sampler_bindings.size()), standard_sampler_bindings.data()}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, @@ -172,8 +174,17 @@ bool ObjectCache::CreateDescriptorSetLayouts() }}; // Don't set the GS bit if geometry shaders aren't available. - if (!g_ActiveConfig.backend_info.bSupportsGeometryShaders) + if (g_ActiveConfig.UseVSForLinePointExpand()) + { + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) + ubo_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].stageFlags |= VK_SHADER_STAGE_VERTEX_BIT; + else + ubo_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + } + else if (!g_ActiveConfig.backend_info.bSupportsGeometryShaders) + { create_infos[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS].bindingCount--; + } // Remove the dynamic vertex loader's buffer if it'll never be needed if (!g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) @@ -244,12 +255,14 @@ bool ObjectCache::CreatePipelineLayouts() static_cast(compute_sets.size()), compute_sets.data(), 0, nullptr}, }}; + const bool ssbos_in_standard = + g_ActiveConfig.backend_info.bSupportsBBox || g_ActiveConfig.UseVSForLinePointExpand(); + // If bounding box is unsupported, don't bother with the SSBO descriptor set. - if (!g_ActiveConfig.backend_info.bSupportsBBox) + if (!ssbos_in_standard) pipeline_layout_info[PIPELINE_LAYOUT_STANDARD].setLayoutCount--; // If neither SSBO-using feature is supported, skip in ubershaders too - if (!g_ActiveConfig.backend_info.bSupportsBBox && - !g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + if (!ssbos_in_standard && !g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) pipeline_layout_info[PIPELINE_LAYOUT_UBER].setLayoutCount--; for (size_t i = 0; i < pipeline_layout_info.size(); i++) diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp index d9b03940ce..8b01c66ca8 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp @@ -458,6 +458,9 @@ void StateTracker::UpdateGXDescriptorSet() std::array writes; u32 num_writes = 0; + const bool needs_gs_ubo = g_ActiveConfig.backend_info.bSupportsGeometryShaders || + g_ActiveConfig.UseVSForLinePointExpand(); + if (m_dirty_flags & DIRTY_FLAG_GX_UBOS || m_gx_descriptor_sets[0] == VK_NULL_HANDLE) { m_gx_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( @@ -465,8 +468,7 @@ void StateTracker::UpdateGXDescriptorSet() for (size_t i = 0; i < NUM_UBO_DESCRIPTOR_SET_BINDINGS; i++) { - if (i == UBO_DESCRIPTOR_SET_BINDING_GS && - !g_ActiveConfig.backend_info.bSupportsGeometryShaders) + if (i == UBO_DESCRIPTOR_SET_BINDING_GS && !needs_gs_ubo) { continue; } @@ -505,8 +507,9 @@ void StateTracker::UpdateGXDescriptorSet() } const bool needs_bbox_ssbo = g_ActiveConfig.backend_info.bSupportsBBox; - const bool needs_vertex_ssbo = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader && - m_pipeline->GetUsage() == AbstractPipelineUsage::GXUber; + const bool needs_vertex_ssbo = (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader && + m_pipeline->GetUsage() == AbstractPipelineUsage::GXUber) || + g_ActiveConfig.UseVSForLinePointExpand(); const bool needs_ssbo = needs_bbox_ssbo || needs_vertex_ssbo; if (needs_ssbo && @@ -520,7 +523,8 @@ void StateTracker::UpdateGXDescriptorSet() VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, m_gx_descriptor_sets[2], 0, 0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, nullptr, &m_bindings.ssbo, nullptr}; - if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader || + g_ActiveConfig.UseVSForLinePointExpand()) { writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, @@ -546,21 +550,18 @@ void StateTracker::UpdateGXDescriptorSet() VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, needs_ssbo ? NUM_GX_DESCRIPTOR_SETS : (NUM_GX_DESCRIPTOR_SETS - 1), m_gx_descriptor_sets.data(), - g_ActiveConfig.backend_info.bSupportsGeometryShaders ? - NUM_UBO_DESCRIPTOR_SET_BINDINGS : - (NUM_UBO_DESCRIPTOR_SET_BINDINGS - 1), + needs_gs_ubo ? NUM_UBO_DESCRIPTOR_SET_BINDINGS : + (NUM_UBO_DESCRIPTOR_SET_BINDINGS - 1), m_bindings.gx_ubo_offsets.data()); m_dirty_flags &= ~(DIRTY_FLAG_DESCRIPTOR_SETS | DIRTY_FLAG_GX_UBO_OFFSETS); } else if (m_dirty_flags & DIRTY_FLAG_GX_UBO_OFFSETS) { - vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, - 1, m_gx_descriptor_sets.data(), - g_ActiveConfig.backend_info.bSupportsGeometryShaders ? - NUM_UBO_DESCRIPTOR_SET_BINDINGS : - (NUM_UBO_DESCRIPTOR_SET_BINDINGS - 1), - m_bindings.gx_ubo_offsets.data()); + vkCmdBindDescriptorSets( + g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, + m_pipeline->GetVkPipelineLayout(), 0, 1, m_gx_descriptor_sets.data(), + needs_gs_ubo ? NUM_UBO_DESCRIPTOR_SET_BINDINGS : (NUM_UBO_DESCRIPTOR_SET_BINDINGS - 1), + m_bindings.gx_ubo_offsets.data()); m_dirty_flags &= ~DIRTY_FLAG_GX_UBO_OFFSETS; } } diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index bc9cc1169b..a1119cefcb 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -299,6 +299,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsSettingObjectNames = false; // Dependent on features. config->backend_info.bSupportsPartialMultisampleResolve = true; // Assumed support. config->backend_info.bSupportsDynamicVertexLoader = true; // Assumed support. + config->backend_info.bSupportsVSLinePointExpand = true; // Assumed support. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) From 96244799330b00fe02cb8463a13873caeaab5ba5 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Fri, 30 Sep 2022 20:37:27 -0500 Subject: [PATCH 4/9] VideoBackends:OGL: Add support for vertex shader point and line expansion --- Source/Core/VideoBackends/OGL/OGLRender.cpp | 2 ++ Source/Core/VideoBackends/OGL/OGLVertexManager.cpp | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 78f9d8e176..49400d73ff 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -423,6 +423,8 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ ((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart")); g_Config.backend_info.bSupportsFragmentStoresAndAtomics = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); + g_Config.backend_info.bSupportsVSLinePointExpand = + GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); g_Config.backend_info.bSupportsGSInstancing = GLExtensions::Supports("GL_ARB_gpu_shader5"); g_Config.backend_info.bSupportsSSAA = GLExtensions::Supports("GL_ARB_gpu_shader5") && GLExtensions::Supports("GL_ARB_sample_shading"); diff --git a/Source/Core/VideoBackends/OGL/OGLVertexManager.cpp b/Source/Core/VideoBackends/OGL/OGLVertexManager.cpp index 38024b0875..e47e425a3b 100644 --- a/Source/Core/VideoBackends/OGL/OGLVertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/OGLVertexManager.cpp @@ -58,6 +58,11 @@ bool VertexManager::Initialize() m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_STREAM_BUFFER_SIZE); m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_STREAM_BUFFER_SIZE); + if (g_ActiveConfig.UseVSForLinePointExpand() || + g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + { + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_vertex_buffer->GetGLBufferId()); + } if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { From 1eeba6dccaad0dc74ec380746c0513a4b1cb35a9 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Fri, 30 Sep 2022 22:12:08 -0500 Subject: [PATCH 5/9] VideoBackends:D3D12: Add support for vertex shader point and line expansion --- .../Core/VideoBackends/D3D12/D3D12Renderer.cpp | 18 +++++++++++------- .../Core/VideoBackends/D3D12/DX12Context.cpp | 5 ++++- .../Core/VideoBackends/D3D12/VideoBackend.cpp | 1 + 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp index 80872086a9..fc76f8e6f2 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp @@ -19,6 +19,14 @@ namespace DX12 { +static bool UsesDynamicVertexLoader(const AbstractPipeline* pipeline) +{ + const AbstractPipelineUsage usage = static_cast(pipeline)->GetUsage(); + return (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader && + usage == AbstractPipelineUsage::GXUber) || + (g_ActiveConfig.UseVSForLinePointExpand() && usage != AbstractPipelineUsage::Utility); +} + Renderer::Renderer(std::unique_ptr swap_chain, float backbuffer_scale) : ::Renderer(swap_chain ? swap_chain->GetWidth() : 0, swap_chain ? swap_chain->GetHeight() : 0, backbuffer_scale, @@ -364,8 +372,7 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) return; // DX12 is great and doesn't include the base vertex in SV_VertexID - if (static_cast(m_current_pipeline)->GetUsage() == - AbstractPipelineUsage::GXUber) + if (UsesDynamicVertexLoader(m_current_pipeline)) g_dx_context->GetCommandList()->SetGraphicsRoot32BitConstant( ROOT_PARAMETER_BASE_VERTEX_CONSTANT, base_vertex, 0); g_dx_context->GetCommandList()->DrawIndexedInstanced(num_indices, 1, base_index, base_vertex, 0); @@ -601,8 +608,7 @@ bool Renderer::ApplyState() } } - if (dirty_bits & DirtyState_VS_SRV_Descriptor && - pipeline->GetUsage() == AbstractPipelineUsage::GXUber) + if (dirty_bits & DirtyState_VS_SRV_Descriptor && UsesDynamicVertexLoader(pipeline)) { cmdlist->SetGraphicsRootDescriptorTable(ROOT_PARAMETER_VS_SRV, m_state.vertex_srv_descriptor_base); @@ -724,9 +730,7 @@ bool Renderer::UpdateUAVDescriptorTable() bool Renderer::UpdateVSSRVDescriptorTable() { - if (!g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader || - static_cast(m_current_pipeline)->GetUsage() != - AbstractPipelineUsage::GXUber) + if (!UsesDynamicVertexLoader(m_current_pipeline)) { return true; } diff --git a/Source/Core/VideoBackends/D3D12/DX12Context.cpp b/Source/Core/VideoBackends/D3D12/DX12Context.cpp index 955f26413e..e0fd41e4f0 100644 --- a/Source/Core/VideoBackends/D3D12/DX12Context.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12Context.cpp @@ -353,7 +353,10 @@ bool DXContext::CreateGXRootSignature() param_count++; SetRootParamCBV(¶ms[param_count], 1, D3D12_SHADER_VISIBILITY_VERTEX); param_count++; - SetRootParamCBV(¶ms[param_count], 0, D3D12_SHADER_VISIBILITY_GEOMETRY); + if (g_ActiveConfig.UseVSForLinePointExpand()) + SetRootParamCBV(¶ms[param_count], 2, D3D12_SHADER_VISIBILITY_VERTEX); + else + SetRootParamCBV(¶ms[param_count], 0, D3D12_SHADER_VISIBILITY_GEOMETRY); param_count++; SetRootParamTable(¶ms[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3, 1, D3D12_SHADER_VISIBILITY_VERTEX); diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp index 8426709b39..1e7483fd6a 100644 --- a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp @@ -88,6 +88,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsSettingObjectNames = true; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; g_Config.backend_info.bSupportsDynamicVertexLoader = true; + g_Config.backend_info.bSupportsVSLinePointExpand = true; // We can only check texture support once we have a device. if (g_dx_context) From 0a42c534c397b6f99013c25c03799c1c673885b4 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 24 Jul 2022 03:51:22 -0500 Subject: [PATCH 6/9] VideoCommon: Add configuration to prefer VS for line/point expansion --- Source/Core/Core/Config/GraphicsSettings.cpp | 2 ++ Source/Core/Core/Config/GraphicsSettings.h | 1 + .../Config/Graphics/AdvancedWidget.cpp | 20 ++++++++++++++++++- .../Config/Graphics/AdvancedWidget.h | 1 + Source/Core/VideoCommon/VideoConfig.cpp | 1 + Source/Core/VideoCommon/VideoConfig.h | 5 ++++- 6 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 6f0ee5940c..c9091db2e2 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -84,6 +84,8 @@ const Info GFX_SHADER_PRECOMPILER_THREADS{ {System::GFX, "Settings", "ShaderPrecompilerThreads"}, -1}; const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE{ {System::GFX, "Settings", "SaveTextureCacheToState"}, true}; +const Info GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{ + {System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false}; const Info GFX_SW_DUMP_OBJECTS{{System::GFX, "Settings", "SWDumpObjects"}, false}; const Info GFX_SW_DUMP_TEV_STAGES{{System::GFX, "Settings", "SWDumpTevStages"}, false}; diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 398a3dd5a0..3497aa6281 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -73,6 +73,7 @@ extern const Info GFX_SHADER_COMPILATION_MODE; extern const Info GFX_SHADER_COMPILER_THREADS; extern const Info GFX_SHADER_PRECOMPILER_THREADS; extern const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE; +extern const Info GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION; extern const Info GFX_SW_DUMP_OBJECTS; extern const Info GFX_SW_DUMP_TEV_STAGES; diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp index 47fa7d3eae..dfd809f956 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp @@ -128,15 +128,18 @@ void AdvancedWidget::CreateWidgets() m_enable_prog_scan = new ToolTipCheckBox(tr("Enable Progressive Scan")); m_backend_multithreading = new GraphicsBool(tr("Backend Multithreading"), Config::GFX_BACKEND_MULTITHREADING); + m_prefer_vs_for_point_line_expansion = new GraphicsBool( + tr("Prefer VS for Point/Line Expansion"), Config::GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION); misc_layout->addWidget(m_enable_cropping, 0, 0); misc_layout->addWidget(m_enable_prog_scan, 0, 1); misc_layout->addWidget(m_backend_multithreading, 1, 0); + misc_layout->addWidget(m_prefer_vs_for_point_line_expansion, 1, 1); #ifdef _WIN32 m_borderless_fullscreen = new GraphicsBool(tr("Borderless Fullscreen"), Config::GFX_BORDERLESS_FULLSCREEN); - misc_layout->addWidget(m_borderless_fullscreen, 1, 1); + misc_layout->addWidget(m_borderless_fullscreen, 2, 0); #endif // Experimental. @@ -198,11 +201,19 @@ void AdvancedWidget::SaveSettings() void AdvancedWidget::OnBackendChanged() { m_backend_multithreading->setEnabled(g_Config.backend_info.bSupportsMultithreading); + m_prefer_vs_for_point_line_expansion->setEnabled( + Core::GetState() == Core::State::Uninitialized && + g_Config.backend_info.bSupportsGeometryShaders && + g_Config.backend_info.bSupportsVSLinePointExpand); } void AdvancedWidget::OnEmulationStateChanged(bool running) { m_enable_prog_scan->setEnabled(!running); + m_prefer_vs_for_point_line_expansion->setEnabled( + !running && + g_Config.backend_info.bSupportsGeometryShaders && + g_Config.backend_info.bSupportsVSLinePointExpand); } void AdvancedWidget::AddDescriptions() @@ -289,6 +300,11 @@ void AdvancedWidget::AddDescriptions() "this option may result in a performance improvement on systems with more than " "two CPU cores. Currently, this is limited to the Vulkan backend.

" "If unsure, leave this checked."); + static const char TR_PREFER_VS_FOR_POINT_LINE_EXPANSION_DESCRIPTION[] = + QT_TR_NOOP("On backends that support both using the geometry shader and the vertex shader " + "for expanding points and lines, selects the vertex shader for the job. May " + "affect performance." + "

If unsure, leave this unchecked."); static const char TR_DEFER_EFB_ACCESS_INVALIDATION_DESCRIPTION[] = QT_TR_NOOP( "Defers invalidation of the EFB access cache until a GPU synchronization command " "is executed. If disabled, the cache will be invalidated with every draw call. " @@ -337,6 +353,8 @@ void AdvancedWidget::AddDescriptions() m_enable_cropping->SetDescription(tr(TR_CROPPING_DESCRIPTION)); m_enable_prog_scan->SetDescription(tr(TR_PROGRESSIVE_SCAN_DESCRIPTION)); m_backend_multithreading->SetDescription(tr(TR_BACKEND_MULTITHREADING_DESCRIPTION)); + m_prefer_vs_for_point_line_expansion->SetDescription( + tr(TR_PREFER_VS_FOR_POINT_LINE_EXPANSION_DESCRIPTION)); #ifdef _WIN32 m_borderless_fullscreen->SetDescription(tr(TR_BORDERLESS_FULLSCREEN_DESCRIPTION)); #endif diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h index 4530db5fcf..8188fcb010 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h @@ -59,6 +59,7 @@ private: GraphicsBool* m_enable_cropping; ToolTipCheckBox* m_enable_prog_scan; GraphicsBool* m_backend_multithreading; + GraphicsBool* m_prefer_vs_for_point_line_expansion; GraphicsBool* m_borderless_fullscreen; // Experimental diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 91e0550715..91df848c94 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -85,6 +85,7 @@ void VideoConfig::Refresh() iBitrateKbps = Config::Get(Config::GFX_BITRATE_KBPS); bInternalResolutionFrameDumps = Config::Get(Config::GFX_INTERNAL_RESOLUTION_FRAME_DUMPS); bEnableGPUTextureDecoding = Config::Get(Config::GFX_ENABLE_GPU_TEXTURE_DECODING); + bPreferVSForLinePointExpansion = Config::Get(Config::GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION); bEnablePixelLighting = Config::Get(Config::GFX_ENABLE_PIXEL_LIGHTING); bFastDepthCalc = Config::Get(Config::GFX_FAST_DEPTH_CALC); iMultisamples = Config::Get(Config::GFX_MSAA); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index dfdd0ffe79..471ff5a18f 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -106,6 +106,7 @@ struct VideoConfig final bool bInternalResolutionFrameDumps = false; bool bBorderlessFullscreen = false; bool bEnableGPUTextureDecoding = false; + bool bPreferVSForLinePointExpansion = false; int iBitrateKbps = 0; bool bGraphicMods = false; std::optional graphics_mod_config; @@ -230,7 +231,9 @@ struct VideoConfig final { if (!backend_info.bSupportsVSLinePointExpand) return false; - return !backend_info.bSupportsGeometryShaders; + if (!backend_info.bSupportsGeometryShaders) + return true; + return bPreferVSForLinePointExpansion; } bool MultisamplingEnabled() const { return iMultisamples > 1; } bool ExclusiveFullscreenEnabled() const From b567f3afcf79ab9333e9f5b17bad9aa17880b74e Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Tue, 4 Oct 2022 22:54:30 -0500 Subject: [PATCH 7/9] VideoCommon: Move repeated point/line expansion code to ShaderGenCommon --- Source/Core/VideoCommon/GeometryShaderGen.cpp | 17 +---- Source/Core/VideoCommon/ShaderGenCommon.cpp | 72 +++++++++++++++++++ Source/Core/VideoCommon/ShaderGenCommon.h | 7 ++ Source/Core/VideoCommon/UberShaderVertex.cpp | 60 ++-------------- Source/Core/VideoCommon/VertexShaderGen.cpp | 50 +------------ 5 files changed, 89 insertions(+), 117 deletions(-) diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 04d0062982..2db201c636 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -169,22 +169,7 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig& "\tVS_OUTPUT end = o[1];\n"); } - // GameCube/Wii's line drawing algorithm is a little quirky. It does not - // use the correct line caps. Instead, the line caps are vertical or - // horizontal depending the slope of the line. - out.Write("\tfloat2 offset;\n" - "\tfloat2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w);\n" - // FIXME: What does real hardware do when line is at a 45-degree angle? - // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. - "\tif (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" - // Line is more tall. Extend geometry left and right. - // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] - "\t\toffset = float2(" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" - "\t}} else {{\n" - // Line is more wide. Extend geometry up and down. - // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] - "\t\toffset = float2(0, -" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" - "\t}}\n"); + GenerateLineOffset(out, "\t", "\t\t", "end.pos", "start.pos", ""); } else if (primitive_type == PrimitiveType::Points) { diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 85757b64c5..8d880d5479 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -254,6 +254,78 @@ void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_v } } +void GenerateLineOffset(ShaderCode& object, std::string_view indent0, std::string_view indent1, + std::string_view pos_a, std::string_view pos_b, std::string_view sign) +{ + // GameCube/Wii's line drawing algorithm is a little quirky. It does not + // use the correct line caps. Instead, the line caps are vertical or + // horizontal depending the slope of the line. + object.Write("{indent0}float2 offset;\n" + "{indent0}float2 to = abs({pos_a}.xy / {pos_a}.w - {pos_b}.xy / {pos_b}.w);\n" + // FIXME: What does real hardware do when line is at a 45-degree angle? + // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. + "{indent0}if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" + // Line is more tall. Extend geometry left and right. + // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] + "{indent1}offset = float2({sign}" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" + "{indent0}}} else {{\n" + // Line is more wide. Extend geometry up and down. + // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] + "{indent1}offset = float2(0, {sign}-" I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" + "{indent0}}}\n", + fmt::arg("indent0", indent0), fmt::arg("indent1", indent1), // + fmt::arg("pos_a", pos_a), fmt::arg("pos_b", pos_b), fmt::arg("sign", sign)); +} + +void GenerateVSLineExpansion(ShaderCode& object, std::string_view indent, u32 texgens) +{ + std::string indent1 = std::string(indent) + " "; + object.Write("{0}other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION + "[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION + "[3], other_pos));\n" + "\n" + "{0}float expand_sign = is_right ? 1.0f : -1.0f;\n", + indent); + GenerateLineOffset(object, indent, indent1, "o.pos", "other_pos", "expand_sign * "); + object.Write("\n" + "{}o.pos.xy += offset * o.pos.w;\n", + indent); + if (texgens > 0) + { + object.Write("{}if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n", indent); + object.Write("{} float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n", indent); + for (u32 i = 0; i < texgens; i++) + { + object.Write("{} if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", indent, i); + object.Write("{} o.tex{}.x += texOffset;\n", indent, i); + } + object.Write("{}}}\n", indent); + } +} + +void GenerateVSPointExpansion(ShaderCode& object, std::string_view indent, u32 texgens) +{ + object.Write( + "{0}float2 expand_sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? -1.0f : 1.0f);\n" + "{0}float2 offset = expand_sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n" + "{0}o.pos.xy += offset * o.pos.w;\n", + indent); + if (texgens > 0) + { + object.Write("{0}if (" I_TEXOFFSET "[3] != 0) {{\n" + "{0} float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n" + "{0} float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, " + "is_bottom ? texOffsetMagnitude : 0.0f);", + indent); + for (u32 i = 0; i < texgens; i++) + { + object.Write("{} if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", indent, i); + object.Write("{} o.tex{}.xy += texOffset;\n", indent, i); + } + object.Write("{}}}\n", indent); + } +} + const char* GetInterpolationQualifier(bool msaa, bool ssaa, bool in_glsl_interface_block, bool in) { if (!msaa) diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 22bad1b220..25b7e32eb6 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -198,6 +198,13 @@ void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens, void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_view b, u32 texgens, const ShaderHostConfig& host_config); +void GenerateLineOffset(ShaderCode& object, std::string_view indent0, std::string_view indent1, + std::string_view pos_a, std::string_view pos_b, std::string_view sign); + +void GenerateVSLineExpansion(ShaderCode& object, std::string_view indent, u32 texgens); + +void GenerateVSPointExpansion(ShaderCode& object, std::string_view indent, u32 texgens); + // We use the flag "centroid" to fix some MSAA rendering bugs. With MSAA, the // pixel shader will be executed for each pixel which has at least one passed sample. // So there may be rendered pixels where the center of the pixel isn't in the primitive. diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 0bfacf88ee..4000f858a9 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -356,67 +356,19 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ " float4 other_p2 = P2;\n" " if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n", VB_HAS_POSMTXIDX); - out.Write(" uint other_posidx = int(load_input_uint4_ubyte4(other_base_offset, " - "vertex_offset_posmtx).r);\n" + out.Write(" uint other_posidx = load_input_uint4_ubyte4(other_base_offset, " + "vertex_offset_posmtx).r;\n" " other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n" " other_p1 = " I_TRANSFORMMATRICES "[other_posidx+1];\n" " other_p2 = " I_TRANSFORMMATRICES "[other_posidx+2];\n" " }}\n" " float4 other_pos = float4(dot(other_p0, other_rawpos), " - "dot(other_p1, other_rawpos), dot(other_p2, other_rawpos), 1.0);\n" - " other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION - "[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION - "[3], other_pos));\n" - "\n" - " float sign = is_right ? 1.0f : -1.0f;\n" - // GameCube/Wii's line drawing algorithm is a little quirky. It does not - // use the correct line caps. Instead, the line caps are vertical or - // horizontal depending the slope of the line. - " float2 offset;\n" - " float2 to = abs(o.pos.xy / o.pos.w - other_pos.xy / other_pos.w);\n" - // FIXME: What does real hardware do when line is at a 45-degree angle? - // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. - " if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" - // Line is more tall. Extend geometry left and right. - // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] - " offset = float2(sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" - " }} else {{\n" - // Line is more wide. Extend geometry up and down. - // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] - " offset = float2(0, sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" - " }}\n" - "\n" - " o.pos.xy += offset * o.pos.w;\n"); - if (num_texgen > 0) - { - out.Write(" if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n" - " float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); - for (u32 i = 0; i < num_texgen; i++) - { - out.Write(" if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i); - out.Write(" o.tex{}.x += texOffset;\n", i); - } - out.Write(" }}\n"); - } + "dot(other_p1, other_rawpos), dot(other_p2, other_rawpos), 1.0);\n"); + GenerateVSLineExpansion(out, " ", num_texgen); out.Write("}} else if (vs_expand == {}u) {{ // Point\n", static_cast(VSExpand::Point)); out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n" - " bool is_right = (gl_VertexID & 1) != 0;\n" - " float2 sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? 1.0f : -1.0f);\n" - " float2 offset = sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n" - " o.pos.xy += offset * o.pos.w;\n"); - if (num_texgen > 0) - { - out.Write(" if (" I_TEXOFFSET "[3] != 0) {{\n" - " float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n" - " float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, " - "is_bottom ? texOffsetMagnitude : 0.0f);"); - for (u32 i = 0; i < num_texgen; i++) - { - out.Write(" if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", i); - out.Write(" o.tex{}.xy += texOffset;\n", i); - } - out.Write(" }}\n"); - } + " bool is_right = (gl_VertexID & 1) != 0;\n"); + GenerateVSPointExpansion(out, " ", num_texgen); out.Write("}}\n"); } diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 948dc3ad34..5570e52cc0 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -547,56 +547,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho out.Write("other_pos = float4(dot(P0, other_pos), dot(P1, other_pos), dot(P2, other_pos), " "1.0f);\n"); } - out.Write("other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION - "[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION - "[3], other_pos));\n" - "float expand_sign = is_right ? 1.0f : -1.0f;\n" - "float2 offset;\n" - "float2 to = abs(o.pos.xy / o.pos.w - other_pos.xy / other_pos.w);\n" - // FIXME: What does real hardware do when line is at a 45-degree angle? - // FIXME: Lines aren't drawn at the correct width. See Twilight Princess map. - "if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n" - // Line is more tall. Extend geometry left and right. - // Lerp LineWidth/2 from [0..VpWidth] to [-1..1] - " offset = float2(expand_sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n" - "}} else {{\n" - // Line is more wide. Extend geometry up and down. - // Lerp LineWidth/2 from [0..VpHeight] to [1..-1] - " offset = float2(0, expand_sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n" - "}}\n" - "\n" - "o.pos.xy += offset * o.pos.w;\n"); - if (uid_data->numTexGens > 0) - { - out.Write("if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n" - " float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n"); - for (u32 i = 0; i < uid_data->numTexGens; i++) - { - out.Write(" if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i); - out.Write(" o.tex{}.x += texOffset;\n", i); - } - out.Write("}}\n"); - } + GenerateVSLineExpansion(out, "", uid_data->numTexGens); } else if (uid_data->vs_expand == VSExpand::Point) { - out.Write("// Point expansion\n" - "float2 expand_sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? 1.0f : -1.0f);\n" - "float2 offset = expand_sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n" - "o.pos.xy += offset * o.pos.w;\n"); - if (uid_data->numTexGens > 0) - { - out.Write("if (" I_TEXOFFSET "[3] != 0) {{\n" - " float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n" - " float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, " - "is_bottom ? texOffsetMagnitude : 0.0f);"); - for (u32 i = 0; i < uid_data->numTexGens; i++) - { - out.Write(" if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", i); - out.Write(" o.tex{}.xy += texOffset;\n", i); - } - out.Write("}}\n"); - } + out.Write("// Point expansion\n"); + GenerateVSPointExpansion(out, "", uid_data->numTexGens); } if (per_pixel_lighting) From 3912fa7a2ed8ce80d01edb86b5a421bf53ff69ee Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 22 Oct 2022 19:48:50 -0500 Subject: [PATCH 8/9] VideoCommon: Add reasons for disabled VS expand --- .../Config/Graphics/AdvancedWidget.cpp | 17 +++++++++++++++-- Source/Core/VideoCommon/VideoBackendBase.cpp | 1 + Source/Core/VideoCommon/VideoConfig.h | 1 + 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp index dfd809f956..9c392e02ce 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp @@ -205,6 +205,7 @@ void AdvancedWidget::OnBackendChanged() Core::GetState() == Core::State::Uninitialized && g_Config.backend_info.bSupportsGeometryShaders && g_Config.backend_info.bSupportsVSLinePointExpand); + AddDescriptions(); } void AdvancedWidget::OnEmulationStateChanged(bool running) @@ -304,7 +305,7 @@ void AdvancedWidget::AddDescriptions() QT_TR_NOOP("On backends that support both using the geometry shader and the vertex shader " "for expanding points and lines, selects the vertex shader for the job. May " "affect performance." - "

If unsure, leave this unchecked."); + "

%1"); static const char TR_DEFER_EFB_ACCESS_INVALIDATION_DESCRIPTION[] = QT_TR_NOOP( "Defers invalidation of the EFB access cache until a GPU synchronization command " "is executed. If disabled, the cache will be invalidated with every draw call. " @@ -332,6 +333,9 @@ void AdvancedWidget::AddDescriptions() "unchecked."); #endif + static const char IF_UNSURE_UNCHECKED[] = + QT_TR_NOOP("If unsure, leave this unchecked."); + m_enable_wireframe->SetDescription(tr(TR_WIREFRAME_DESCRIPTION)); m_show_statistics->SetDescription(tr(TR_SHOW_STATS_DESCRIPTION)); m_enable_format_overlay->SetDescription(tr(TR_TEXTURE_FORMAT_DESCRIPTION)); @@ -353,8 +357,17 @@ void AdvancedWidget::AddDescriptions() m_enable_cropping->SetDescription(tr(TR_CROPPING_DESCRIPTION)); m_enable_prog_scan->SetDescription(tr(TR_PROGRESSIVE_SCAN_DESCRIPTION)); m_backend_multithreading->SetDescription(tr(TR_BACKEND_MULTITHREADING_DESCRIPTION)); + QString vsexpand_extra; + if (!g_Config.backend_info.bSupportsGeometryShaders) + vsexpand_extra = tr("Forced on because %1 doesn't support geometry shaders.") + .arg(tr(g_Config.backend_info.DisplayName.c_str())); + else if (!g_Config.backend_info.bSupportsVSLinePointExpand) + vsexpand_extra = tr("Forced off because %1 doesn't support VS expansion.") + .arg(tr(g_Config.backend_info.DisplayName.c_str())); + else + vsexpand_extra = tr(IF_UNSURE_UNCHECKED); m_prefer_vs_for_point_line_expansion->SetDescription( - tr(TR_PREFER_VS_FOR_POINT_LINE_EXPANSION_DESCRIPTION)); + tr(TR_PREFER_VS_FOR_POINT_LINE_EXPANSION_DESCRIPTION).arg(vsexpand_extra)); #ifdef _WIN32 m_borderless_fullscreen->SetDescription(tr(TR_BORDERLESS_FULLSCREEN_DESCRIPTION)); #endif diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index b385883093..2b4883d22b 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -274,6 +274,7 @@ void VideoBackendBase::PopulateBackendInfo() // a value from the previously used renderer g_Config.backend_info = {}; ActivateBackend(Config::Get(Config::MAIN_GFX_BACKEND)); + g_Config.backend_info.DisplayName = g_video_backend->GetDisplayName(); g_video_backend->InitBackendInfo(); // We validate the config after initializing the backend info, as system-specific settings // such as anti-aliasing, or the selected adapter may be invalid, and should be checked. diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 471ff5a18f..d0863a9d84 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -174,6 +174,7 @@ struct VideoConfig final struct { APIType api_type = APIType::Nothing; + std::string DisplayName; std::vector Adapters; // for D3D std::vector AAModes; From 1e9b6f88e4c13722a2a47aeb0d54df95c3a07c69 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 22 Oct 2022 20:11:52 -0500 Subject: [PATCH 9/9] VideoCommon: Support hot reloading of VS expand --- Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp | 5 ----- Source/Core/VideoCommon/RenderBase.cpp | 1 + Source/Core/VideoCommon/VertexManagerBase.cpp | 6 ++++++ Source/Core/VideoCommon/VertexManagerBase.h | 3 +++ 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp index 9c392e02ce..4a25976a1c 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp @@ -202,7 +202,6 @@ void AdvancedWidget::OnBackendChanged() { m_backend_multithreading->setEnabled(g_Config.backend_info.bSupportsMultithreading); m_prefer_vs_for_point_line_expansion->setEnabled( - Core::GetState() == Core::State::Uninitialized && g_Config.backend_info.bSupportsGeometryShaders && g_Config.backend_info.bSupportsVSLinePointExpand); AddDescriptions(); @@ -211,10 +210,6 @@ void AdvancedWidget::OnBackendChanged() void AdvancedWidget::OnEmulationStateChanged(bool running) { m_enable_prog_scan->setEnabled(!running); - m_prefer_vs_for_point_line_expansion->setEnabled( - !running && - g_Config.backend_info.bSupportsGeometryShaders && - g_Config.backend_info.bSupportsVSLinePointExpand); } void AdvancedWidget::AddDescriptions() diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 6f727c50ed..f787ab7dab 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -488,6 +488,7 @@ void Renderer::CheckForConfigChanges() UpdateActiveConfig(); FreeLook::UpdateActiveConfig(); + g_vertex_manager->OnConfigChange(); g_freelook_camera.SetControlType(FreeLook::GetActiveConfig().camera_config.control_type); diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index f8e4970a51..273f67c746 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -826,6 +826,12 @@ void VertexManagerBase::UpdatePipelineObject() } } +void VertexManagerBase::OnConfigChange() +{ + // Reload index generator function tables in case VS expand config changed + m_index_generator.Init(); +} + void VertexManagerBase::OnDraw() { m_draw_counter++; diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index dc35ab96e2..ba3777a7fe 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -140,6 +140,9 @@ public: u32* out_offset, const void* palette_data, u32 palette_size, TexelBufferFormat palette_format, u32* out_palette_offset); + // Call if active config changes + void OnConfigChange(); + // CPU access tracking - call after a draw call is made. void OnDraw();