diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index a6467f2b2c..f97e166f19 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -74,7 +74,7 @@ static std::recursive_mutex g_save_thread_mutex; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -constexpr u32 STATE_VERSION = 148; // Last changed in PR 10768 +constexpr u32 STATE_VERSION = 149; // Last changed in PR 10781 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, diff --git a/Source/Core/VideoBackends/D3D/D3DMain.cpp b/Source/Core/VideoBackends/D3D/D3DMain.cpp index b8d7fb5871..755f0cf590 100644 --- a/Source/Core/VideoBackends/D3D/D3DMain.cpp +++ b/Source/Core/VideoBackends/D3D/D3DMain.cpp @@ -112,6 +112,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter); g_Config.backend_info.bSupportsSettingObjectNames = true; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + g_Config.backend_info.bSupportsDynamicVertexLoader = false; g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames(); g_Config.backend_info.AAModes = D3D::GetAAModes(g_Config.iAdapter); diff --git a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp index b1cb3378ed..80872086a9 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp @@ -180,7 +180,8 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline) m_state.root_signature = dx_pipeline->GetRootSignature(); m_dirty_bits |= DirtyState_RootSignature | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV | DirtyState_SRV_Descriptor | - DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor; + DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor | + DirtyState_VS_SRV_Descriptor; } if (dx_pipeline->UseIntegerRTV() != m_state.using_integer_rtv) { @@ -362,6 +363,11 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) if (!ApplyState()) return; + // DX12 is great and doesn't include the base vertex in SV_VertexID + if (static_cast(m_current_pipeline)->GetUsage() == + AbstractPipelineUsage::GXUber) + g_dx_context->GetCommandList()->SetGraphicsRoot32BitConstant( + ROOT_PARAMETER_BASE_VERTEX_CONSTANT, base_vertex, 0); g_dx_context->GetCommandList()->DrawIndexedInstanced(num_indices, 1, base_index, base_vertex, 0); } @@ -494,18 +500,22 @@ void Renderer::SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) m_dirty_bits |= DirtyState_PS_UAV; } -void Renderer::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size) +void Renderer::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, D3D12_CPU_DESCRIPTOR_HANDLE srv, + u32 stride, u32 size) { - if (m_state.vertex_buffer.BufferLocation == address && - m_state.vertex_buffer.StrideInBytes == stride && m_state.vertex_buffer.SizeInBytes == size) + if (m_state.vertex_buffer.BufferLocation != address || + m_state.vertex_buffer.StrideInBytes != stride || m_state.vertex_buffer.SizeInBytes != size) { - return; + m_state.vertex_buffer.BufferLocation = address; + m_state.vertex_buffer.StrideInBytes = stride; + m_state.vertex_buffer.SizeInBytes = size; + m_dirty_bits |= DirtyState_VertexBuffer; + } + if (m_state.vs_srv.ptr != srv.ptr) + { + m_state.vs_srv = srv; + m_dirty_bits |= DirtyState_VS_SRV; } - - m_state.vertex_buffer.BufferLocation = address; - m_state.vertex_buffer.StrideInBytes = stride; - m_state.vertex_buffer.SizeInBytes = size; - m_dirty_bits |= DirtyState_VertexBuffer; } void Renderer::SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format) @@ -535,15 +545,17 @@ bool Renderer::ApplyState() // Clear bits before actually changing state. Some state (e.g. cbuffers) can't be set // if utility pipelines are bound. const u32 dirty_bits = m_dirty_bits; - m_dirty_bits &= ~( - DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Viewport | DirtyState_ScissorRect | - DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV | - DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor | - DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology); + m_dirty_bits &= + ~(DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Viewport | + DirtyState_ScissorRect | DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | + DirtyState_GS_CBV | DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | + DirtyState_UAV_Descriptor | DirtyState_VertexBuffer | DirtyState_IndexBuffer | + DirtyState_PrimitiveTopology | DirtyState_VS_SRV_Descriptor); auto* const cmdlist = g_dx_context->GetCommandList(); + auto* const pipeline = static_cast(m_current_pipeline); if (dirty_bits & DirtyState_Pipeline) - cmdlist->SetPipelineState(static_cast(m_current_pipeline)->GetPipeline()); + cmdlist->SetPipelineState(pipeline->GetPipeline()); if (dirty_bits & DirtyState_Framebuffer) BindFramebuffer(static_cast(m_current_framebuffer)); @@ -572,7 +584,7 @@ bool Renderer::ApplyState() m_state.sampler_descriptor_base); } - if (static_cast(m_current_pipeline)->GetUsage() == AbstractPipelineUsage::GX) + if (pipeline->GetUsage() != AbstractPipelineUsage::Utility) { if (dirty_bits & DirtyState_VS_CBV) { @@ -589,6 +601,13 @@ bool Renderer::ApplyState() } } + if (dirty_bits & DirtyState_VS_SRV_Descriptor && + pipeline->GetUsage() == AbstractPipelineUsage::GXUber) + { + cmdlist->SetGraphicsRootDescriptorTable(ROOT_PARAMETER_VS_SRV, + m_state.vertex_srv_descriptor_base); + } + if (dirty_bits & DirtyState_GS_CBV) { cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_GS_CBV, @@ -641,7 +660,9 @@ void Renderer::UpdateDescriptorTables() const bool sampler_update_failed = (m_dirty_bits & DirtyState_Samplers) && !UpdateSamplerDescriptorTable(); const bool uav_update_failed = (m_dirty_bits & DirtyState_PS_UAV) && !UpdateUAVDescriptorTable(); - if (texture_update_failed || sampler_update_failed || uav_update_failed) + const bool srv_update_failed = + (m_dirty_bits & DirtyState_VS_SRV) && !UpdateVSSRVDescriptorTable(); + if (texture_update_failed || sampler_update_failed || uav_update_failed || srv_update_failed) { WARN_LOG_FMT(VIDEO, "Executing command list while waiting for temporary {}", texture_update_failed ? "descriptors" : "samplers"); @@ -651,6 +672,7 @@ void Renderer::UpdateDescriptorTables() UpdateSRVDescriptorTable(); UpdateSamplerDescriptorTable(); UpdateUAVDescriptorTable(); + UpdateVSSRVDescriptorTable(); } } @@ -700,6 +722,26 @@ bool Renderer::UpdateUAVDescriptorTable() return true; } +bool Renderer::UpdateVSSRVDescriptorTable() +{ + if (!g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader || + static_cast(m_current_pipeline)->GetUsage() != + AbstractPipelineUsage::GXUber) + { + return true; + } + + DescriptorHandle handle; + if (!g_dx_context->GetDescriptorAllocator()->Allocate(1, &handle)) + return false; + + g_dx_context->GetDevice()->CopyDescriptorsSimple(1, handle.cpu_handle, m_state.vs_srv, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_state.vertex_srv_descriptor_base = handle.gpu_handle; + m_dirty_bits = (m_dirty_bits & ~DirtyState_VS_SRV) | DirtyState_VS_SRV_Descriptor; + return true; +} + bool Renderer::UpdateComputeUAVDescriptorTable() { DescriptorHandle handle; diff --git a/Source/Core/VideoBackends/D3D12/D3D12Renderer.h b/Source/Core/VideoBackends/D3D12/D3D12Renderer.h index 9624199aa4..19188a5acf 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12Renderer.h +++ b/Source/Core/VideoBackends/D3D12/D3D12Renderer.h @@ -88,7 +88,8 @@ public: void SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle); // Graphics vertex/index buffer binding. - void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size); + void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, D3D12_CPU_DESCRIPTOR_HANDLE srv, + u32 stride, u32 size); void SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format); // Binds all dirty state @@ -126,6 +127,8 @@ private: DirtyState_RootSignature = (1 << 17), DirtyState_ComputeRootSignature = (1 << 18), DirtyState_DescriptorHeaps = (1 << 19), + DirtyState_VS_SRV = (1 << 20), + DirtyState_VS_SRV_Descriptor = (1 << 21), DirtyState_All = DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Textures | DirtyState_Samplers | @@ -133,7 +136,8 @@ private: DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV | DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor | DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology | - DirtyState_RootSignature | DirtyState_ComputeRootSignature | DirtyState_DescriptorHeaps + DirtyState_RootSignature | DirtyState_ComputeRootSignature | DirtyState_DescriptorHeaps | + DirtyState_VS_SRV | DirtyState_VS_SRV_Descriptor }; void CheckForSwapChainChanges(); @@ -144,6 +148,7 @@ private: void UpdateDescriptorTables(); bool UpdateSRVDescriptorTable(); bool UpdateUAVDescriptorTable(); + bool UpdateVSSRVDescriptorTable(); bool UpdateComputeUAVDescriptorTable(); bool UpdateSamplerDescriptorTable(); @@ -157,11 +162,13 @@ private: DXShader* compute_shader = nullptr; std::array constant_buffers = {}; std::array textures = {}; + D3D12_CPU_DESCRIPTOR_HANDLE vs_srv = {}; D3D12_CPU_DESCRIPTOR_HANDLE ps_uav = {}; SamplerStateSet samplers = {}; const DXTexture* compute_image_texture = nullptr; D3D12_VIEWPORT viewport = {}; D3D12_RECT scissor = {}; + D3D12_GPU_DESCRIPTOR_HANDLE vertex_srv_descriptor_base = {}; D3D12_GPU_DESCRIPTOR_HANDLE srv_descriptor_base = {}; D3D12_GPU_DESCRIPTOR_HANDLE sampler_descriptor_base = {}; D3D12_GPU_DESCRIPTOR_HANDLE uav_descriptor_base = {}; diff --git a/Source/Core/VideoBackends/D3D12/D3D12VertexManager.cpp b/Source/Core/VideoBackends/D3D12/D3D12VertexManager.cpp index b85e4e459c..5a9efc0d6c 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12VertexManager.cpp +++ b/Source/Core/VideoBackends/D3D12/D3D12VertexManager.cpp @@ -64,6 +64,18 @@ bool VertexManager::Initialize() &srv_desc, dh.cpu_handle); } + if (!g_dx_context->GetDescriptorHeapManager().Allocate(&m_vertex_srv)) + { + PanicAlertFmt("Failed to allocate descriptor for vertex srv"); + return false; + } + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {DXGI_FORMAT_R32_UINT, D3D12_SRV_DIMENSION_BUFFER, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + srv_desc.Buffer.NumElements = m_vertex_stream_buffer.GetSize() / sizeof(u32); + g_dx_context->GetDevice()->CreateShaderResourceView(m_vertex_stream_buffer.GetBuffer(), &srv_desc, + m_vertex_srv.cpu_handle); + UploadAllConstants(); return true; } @@ -115,7 +127,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, static_cast(vertex_data_size)); ADDSTAT(g_stats.this_frame.bytes_index_streamed, static_cast(index_data_size)); - Renderer::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), vertex_stride, + Renderer::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), + m_vertex_srv.cpu_handle, vertex_stride, m_vertex_stream_buffer.GetSize()); Renderer::GetInstance()->SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT); diff --git a/Source/Core/VideoBackends/D3D12/D3D12VertexManager.h b/Source/Core/VideoBackends/D3D12/D3D12VertexManager.h index 6bcff92bb7..2dd2cdfee9 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12VertexManager.h +++ b/Source/Core/VideoBackends/D3D12/D3D12VertexManager.h @@ -46,6 +46,7 @@ protected: StreamBuffer m_uniform_stream_buffer; StreamBuffer m_texel_stream_buffer; std::array m_texel_buffer_views = {}; + DescriptorHandle m_vertex_srv = {}; }; } // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/DX12Context.cpp b/Source/Core/VideoBackends/D3D12/DX12Context.cpp index e3ff8a7417..5cf0e414c4 100644 --- a/Source/Core/VideoBackends/D3D12/DX12Context.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12Context.cpp @@ -261,6 +261,16 @@ bool DXContext::CreateDescriptorHeaps() return true; } +static void SetRootParamConstant(D3D12_ROOT_PARAMETER* rp, u32 shader_reg, u32 num_values, + D3D12_SHADER_VISIBILITY visibility) +{ + rp->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rp->Constants.Num32BitValues = num_values; + rp->Constants.ShaderRegister = shader_reg; + rp->Constants.RegisterSpace = 0; + rp->ShaderVisibility = visibility; +} + static void SetRootParamCBV(D3D12_ROOT_PARAMETER* rp, u32 shader_reg, D3D12_SHADER_VISIBILITY visibility) { @@ -345,6 +355,11 @@ bool DXContext::CreateGXRootSignature() param_count++; SetRootParamCBV(¶ms[param_count], 0, D3D12_SHADER_VISIBILITY_GEOMETRY); param_count++; + SetRootParamTable(¶ms[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3, + 1, D3D12_SHADER_VISIBILITY_VERTEX); + param_count++; + SetRootParamConstant(¶ms[param_count], 2, 1, D3D12_SHADER_VISIBILITY_VERTEX); + param_count++; // Since these must be contiguous, pixel lighting goes to bbox if not enabled. if (g_ActiveConfig.bBBoxEnable) diff --git a/Source/Core/VideoBackends/D3D12/DX12Context.h b/Source/Core/VideoBackends/D3D12/DX12Context.h index d1c5b38798..ad9ef9fe74 100644 --- a/Source/Core/VideoBackends/D3D12/DX12Context.h +++ b/Source/Core/VideoBackends/D3D12/DX12Context.h @@ -27,6 +27,8 @@ enum ROOT_PARAMETER ROOT_PARAMETER_VS_CBV, ROOT_PARAMETER_VS_CBV2, ROOT_PARAMETER_GS_CBV, + ROOT_PARAMETER_VS_SRV, + ROOT_PARAMETER_BASE_VERTEX_CONSTANT, ROOT_PARAMETER_PS_UAV_OR_CBV2, ROOT_PARAMETER_PS_CBV2, // ROOT_PARAMETER_PS_UAV_OR_CBV2 if bbox is not enabled NUM_ROOT_PARAMETERS diff --git a/Source/Core/VideoBackends/D3D12/DX12Pipeline.cpp b/Source/Core/VideoBackends/D3D12/DX12Pipeline.cpp index 52b73e6911..bca820a2c3 100644 --- a/Source/Core/VideoBackends/D3D12/DX12Pipeline.cpp +++ b/Source/Core/VideoBackends/D3D12/DX12Pipeline.cpp @@ -165,6 +165,7 @@ std::unique_ptr DXPipeline::Create(const AbstractPipelineConfig& con switch (config.usage) { case AbstractPipelineUsage::GX: + case AbstractPipelineUsage::GXUber: desc.pRootSignature = g_dx_context->GetGXRootSignature(); break; case AbstractPipelineUsage::Utility: diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp index b0611a8690..b901b994e0 100644 --- a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp @@ -87,6 +87,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsLodBiasInSampler = true; g_Config.backend_info.bSupportsSettingObjectNames = true; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + g_Config.backend_info.bSupportsDynamicVertexLoader = true; // We can only check texture support once we have a device. if (g_dx_context) diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm index 3181e1fdee..48a8ad7dae 100644 --- a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm @@ -220,7 +220,7 @@ static MTLCullMode Convert(CullMode cull) switch (cull) { case CullMode::None: - case CullMode::All: // Handled by disabling rasterization + case CullMode::All: // Handled by VertexLoaderManager::RunVertices return MTLCullModeNone; case CullMode::Front: return MTLCullModeFront; @@ -289,7 +289,8 @@ public: } }; template - static void CopyAll(std::array& output, const AttributeFormat (&input)[N]) + static void CopyAll(std::array& output, + const std::array& input) { for (size_t i = 0; i < N; ++i) output[i] = VertexAttribute(input[i]); @@ -327,13 +328,14 @@ public: blend.subtractAlpha = cfg.blending_state.subtractAlpha.Value(); // clang-format on } - // Throw extras in bits we don't otherwise use - if (cfg.rasterization_state.cullmode == CullMode::All) - blend.hex |= 1 << 29; - if (cfg.rasterization_state.primitive == PrimitiveType::Points) - blend.hex |= 1 << 30; - else if (cfg.rasterization_state.primitive == PrimitiveType::Lines) - blend.hex |= 1 << 31; + + if (cfg.usage != AbstractPipelineUsage::GXUber) + { + if (cfg.rasterization_state.primitive == PrimitiveType::Points) + is_points = true; + else if (cfg.rasterization_state.primitive == PrimitiveType::Lines) + is_lines = true; + } } PipelineID() { memset(this, 0, sizeof(*this)); } PipelineID(const PipelineID& other) { memcpy(this, &other, sizeof(*this)); } @@ -359,7 +361,13 @@ public: VertexAttribute v_posmtx; const Shader* vertex_shader; const Shader* fragment_shader; - BlendingState blend; + union + { + BlendingState blend; + // Throw extras in bits we don't otherwise use + BitField<30, 1, bool, u32> is_points; + BitField<31, 1, bool, u32> is_lines; + }; FramebufferState framebuffer; }; @@ -377,24 +385,17 @@ public: auto desc = MRCTransfer([MTLRenderPipelineDescriptor new]); [desc setVertexFunction:static_cast(config.vertex_shader)->GetShader()]; [desc setFragmentFunction:static_cast(config.pixel_shader)->GetShader()]; - if (config.usage == AbstractPipelineUsage::GX) - { - if ([[[desc vertexFunction] label] containsString:@"Uber"]) - [desc - setLabel:[NSString stringWithFormat:@"GX Uber Pipeline %d", m_pipeline_counter[0]++]]; - else - [desc setLabel:[NSString stringWithFormat:@"GX Pipeline %d", m_pipeline_counter[1]++]]; - } + if (config.usage == AbstractPipelineUsage::GXUber) + [desc setLabel:[NSString stringWithFormat:@"GX Uber Pipeline %d", m_pipeline_counter[0]++]]; + else if (config.usage == AbstractPipelineUsage::GX) + [desc setLabel:[NSString stringWithFormat:@"GX Pipeline %d", m_pipeline_counter[1]++]]; else - { [desc setLabel:[NSString stringWithFormat:@"Utility Pipeline %d", m_pipeline_counter[2]++]]; - } if (config.vertex_format) [desc setVertexDescriptor:static_cast(config.vertex_format)->Get()]; RasterizationState rs = config.rasterization_state; - [desc setInputPrimitiveTopology:GetClass(rs.primitive)]; - if (rs.cullmode == CullMode::All) - [desc setRasterizationEnabled:NO]; + if (config.usage != AbstractPipelineUsage::GXUber) + [desc setInputPrimitiveTopology:GetClass(rs.primitive)]; MTLRenderPipelineColorAttachmentDescriptor* color0 = [[desc colorAttachments] objectAtIndexedSubscript:0]; BlendingState bs = config.blending_state; diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index 3ab6224f83..0004c81089 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -608,7 +608,7 @@ void Metal::StateTracker::PrepareRender() BeginRenderPass(MTLLoadActionLoad); id enc = m_current_render_encoder; const Pipeline* pipe = m_state.render_pipeline; - bool is_gx = pipe->Usage() == AbstractPipelineUsage::GX; + bool is_gx = pipe->Usage() != AbstractPipelineUsage::Utility; NSString* label = is_gx ? LABEL_GX : LABEL_UTIL; if (m_flags.should_apply_label && m_current.label != label) { diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index 14c83b5c17..30eb305de3 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -74,6 +74,7 @@ void Metal::Util::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsSettingObjectNames = true; // Metal requires multisample resolve to be done on a render pass config->backend_info.bSupportsPartialMultisampleResolve = false; + config->backend_info.bSupportsDynamicVertexLoader = true; } void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, @@ -426,6 +427,7 @@ std::optional Metal::Util::TranslateShaderToMSL(ShaderStage stage, static const spirv_cross::MSLResourceBinding resource_bindings[] = { MakeResourceBinding(spv::ExecutionModelVertex, 0, 0, 1, 0, 0), // vs/ubo MakeResourceBinding(spv::ExecutionModelVertex, 0, 1, 1, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0 diff --git a/Source/Core/VideoBackends/Metal/MTLVertexFormat.mm b/Source/Core/VideoBackends/Metal/MTLVertexFormat.mm index 6453c5189a..d9407402e2 100644 --- a/Source/Core/VideoBackends/Metal/MTLVertexFormat.mm +++ b/Source/Core/VideoBackends/Metal/MTLVertexFormat.mm @@ -125,7 +125,7 @@ static void SetAttribute(MTLVertexDescriptor* desc, u32 attribute, const Attribu template static void SetAttributes(MTLVertexDescriptor* desc, u32 attribute, - const AttributeFormat (&format)[N]) + const std::array& format) { for (size_t i = 0; i < N; ++i) SetAttribute(desc, attribute + i, format[i]); diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index b68c9cfd94..7cc8919ad0 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -60,6 +60,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsLodBiasInSampler = false; g_Config.backend_info.bSupportsSettingObjectNames = false; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + g_Config.backend_info.bSupportsDynamicVertexLoader = false; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLMain.cpp b/Source/Core/VideoBackends/OGL/OGLMain.cpp index 24a65f746d..f6a84240e0 100644 --- a/Source/Core/VideoBackends/OGL/OGLMain.cpp +++ b/Source/Core/VideoBackends/OGL/OGLMain.cpp @@ -94,6 +94,8 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsPipelineCacheData = false; g_Config.backend_info.bSupportsLodBiasInSampler = true; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + // Unneccessary since OGL doesn't use pipelines + g_Config.backend_info.bSupportsDynamicVertexLoader = false; // TODO: There is a bug here, if texel buffers or SSBOs/atomics are not supported the graphics // options will show the option when it is not supported. The only way around this would be diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index 80dc4603f2..5076106089 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -89,6 +89,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsLodBiasInSampler = false; g_Config.backend_info.bSupportsSettingObjectNames = false; g_Config.backend_info.bSupportsPartialMultisampleResolve = true; + g_Config.backend_info.bSupportsDynamicVertexLoader = false; // aamodes g_Config.backend_info.AAModes = {1}; diff --git a/Source/Core/VideoBackends/Vulkan/Constants.h b/Source/Core/VideoBackends/Vulkan/Constants.h index c1218f9a67..52d59b4f45 100644 --- a/Source/Core/VideoBackends/Vulkan/Constants.h +++ b/Source/Core/VideoBackends/Vulkan/Constants.h @@ -39,6 +39,8 @@ enum DESCRIPTOR_SET_LAYOUT // - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) [set=0, binding=0-2] // - 8 combined image samplers (accessible from PS) [set=1, binding=0-7] // - 1 SSBO accessible from PS if supported [set=2, binding=0] +// - Uber +// - Like standard, plus 1 SSBO accessible from VS if supported [set=2, binding=1] // - Utility // - 1 combined UBO, accessible from VS/GS/PS [set=0, binding=0] // - 8 combined image samplers (accessible from PS) [set=1, binding=0-7] @@ -55,6 +57,7 @@ enum DESCRIPTOR_SET_LAYOUT enum PIPELINE_LAYOUT { PIPELINE_LAYOUT_STANDARD, + PIPELINE_LAYOUT_UBER, PIPELINE_LAYOUT_UTILITY, PIPELINE_LAYOUT_COMPUTE, NUM_PIPELINE_LAYOUTS diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp index fd296457e4..353cb6da3f 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -123,8 +123,10 @@ bool ObjectCache::CreateDescriptorSetLayouts() VK_SHADER_STAGE_FRAGMENT_BIT}, }}; - static const std::array standard_ssbo_bindings{{ + // The dynamic veretex loader's vertex buffer must be last here, for similar reasons + static const std::array standard_ssbo_bindings{{ {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT}, }}; static const std::array utility_ubo_bindings{{ @@ -173,6 +175,10 @@ bool ObjectCache::CreateDescriptorSetLayouts() if (!g_ActiveConfig.backend_info.bSupportsGeometryShaders) create_infos[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS].bindingCount--; + // Remove the dynamic vertex loader's buffer if it'll never be needed + if (!g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + create_infos[DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS].bindingCount--; + for (size_t i = 0; i < create_infos.size(); i++) { VkResult res = vkCreateDescriptorSetLayout(g_vulkan_context->GetDevice(), &create_infos[i], @@ -206,6 +212,11 @@ bool ObjectCache::CreatePipelineLayouts() m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS], m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS], }; + const std::array uber_sets{ + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS], + }; const std::array utility_sets{ m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER], m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS], @@ -220,6 +231,10 @@ bool ObjectCache::CreatePipelineLayouts() {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(standard_sets.size()), standard_sets.data(), 0, nullptr}, + // Uber + {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, + static_cast(uber_sets.size()), uber_sets.data(), 0, nullptr}, + // Utility {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(utility_sets.size()), utility_sets.data(), 0, nullptr}, @@ -232,6 +247,10 @@ bool ObjectCache::CreatePipelineLayouts() // If bounding box is unsupported, don't bother with the SSBO descriptor set. if (!g_ActiveConfig.backend_info.bSupportsBBox) pipeline_layout_info[PIPELINE_LAYOUT_STANDARD].setLayoutCount--; + // If neither SSBO-using feature is supported, skip in ubershaders too + if (!g_ActiveConfig.backend_info.bSupportsBBox && + !g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + pipeline_layout_info[PIPELINE_LAYOUT_UBER].setLayoutCount--; for (size_t i = 0; i < pipeline_layout_info.size(); i++) { diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp index db59b16398..686bbce973 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp @@ -77,14 +77,23 @@ bool StateTracker::Initialize() return true; } -void StateTracker::SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset) +void StateTracker::SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset, u32 size) { - if (m_vertex_buffer == buffer && m_vertex_buffer_offset == offset) - return; - - m_vertex_buffer = buffer; - m_vertex_buffer_offset = offset; - m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER; + if (m_vertex_buffer != buffer || m_vertex_buffer_offset != offset) + { + m_vertex_buffer = buffer; + m_vertex_buffer_offset = offset; + m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER; + } + if (m_bindings.gx_uber_vertex_ssbo.buffer != buffer || + m_bindings.gx_uber_vertex_ssbo.offset != offset || + m_bindings.gx_uber_vertex_ssbo.range != size) + { + m_bindings.gx_uber_vertex_ssbo.buffer = buffer; + m_bindings.gx_uber_vertex_ssbo.offset = offset; + m_bindings.gx_uber_vertex_ssbo.range = size; + m_dirty_flags |= DIRTY_FLAG_GX_SSBO; + } } void StateTracker::SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) @@ -366,8 +375,13 @@ bool StateTracker::Bind() // Re-bind parts of the pipeline const VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - if (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER) + const bool needs_vertex_buffer = !g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader || + m_pipeline->GetUsage() != AbstractPipelineUsage::GXUber; + if (needs_vertex_buffer && (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER)) + { vkCmdBindVertexBuffers(command_buffer, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset); + m_dirty_flags &= ~DIRTY_FLAG_VERTEX_BUFFER; + } if (m_dirty_flags & DIRTY_FLAG_INDEX_BUFFER) vkCmdBindIndexBuffer(command_buffer, m_index_buffer, m_index_buffer_offset, m_index_type); @@ -381,8 +395,8 @@ bool StateTracker::Bind() if (m_dirty_flags & DIRTY_FLAG_SCISSOR) vkCmdSetScissor(command_buffer, 0, 1, &m_scissor); - m_dirty_flags &= ~(DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | - DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR); + m_dirty_flags &= + ~(DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR); return true; } @@ -452,7 +466,7 @@ void StateTracker::EndClearRenderPass() bool StateTracker::UpdateDescriptorSet() { - if (m_pipeline->GetUsage() == AbstractPipelineUsage::GX) + if (m_pipeline->GetUsage() != AbstractPipelineUsage::Utility) return UpdateGXDescriptorSet(); else return UpdateUtilityDescriptorSet(); @@ -462,7 +476,7 @@ bool StateTracker::UpdateGXDescriptorSet() { const size_t MAX_DESCRIPTOR_WRITES = NUM_UBO_DESCRIPTOR_SET_BINDINGS + // UBO 1 + // Samplers - 1; // SSBO + 2; // SSBO std::array writes; u32 num_writes = 0; @@ -516,7 +530,12 @@ bool StateTracker::UpdateGXDescriptorSet() m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_SAMPLERS) | DIRTY_FLAG_DESCRIPTOR_SETS; } - if (g_ActiveConfig.backend_info.bSupportsBBox && + const bool needs_bbox_ssbo = g_ActiveConfig.backend_info.bSupportsBBox; + const bool needs_vertex_ssbo = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader && + m_pipeline->GetUsage() == AbstractPipelineUsage::GXUber; + const bool needs_ssbo = needs_bbox_ssbo || needs_vertex_ssbo; + + if (needs_ssbo && (m_dirty_flags & DIRTY_FLAG_GX_SSBO || m_gx_descriptor_sets[2] == VK_NULL_HANDLE)) { m_gx_descriptor_sets[2] = @@ -528,6 +547,21 @@ bool StateTracker::UpdateGXDescriptorSet() writes[num_writes++] = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, m_gx_descriptor_sets[2], 0, 0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, nullptr, &m_bindings.ssbo, nullptr}; + + if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + { + writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_gx_descriptor_sets[2], + 1, + 0, + 1, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + nullptr, + &m_bindings.gx_uber_vertex_ssbo, + nullptr}; + } + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_SSBO) | DIRTY_FLAG_DESCRIPTOR_SETS; } @@ -538,9 +572,7 @@ bool StateTracker::UpdateGXDescriptorSet() { vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, - g_ActiveConfig.backend_info.bSupportsBBox ? - NUM_GX_DESCRIPTOR_SETS : - (NUM_GX_DESCRIPTOR_SETS - 1), + needs_ssbo ? NUM_GX_DESCRIPTOR_SETS : (NUM_GX_DESCRIPTOR_SETS - 1), m_gx_descriptor_sets.data(), g_ActiveConfig.backend_info.bSupportsGeometryShaders ? NUM_UBO_DESCRIPTOR_SET_BINDINGS : diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.h b/Source/Core/VideoBackends/Vulkan/StateTracker.h index 7184a2ae4c..bb7311d932 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.h +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.h @@ -32,7 +32,7 @@ public: VKFramebuffer* GetFramebuffer() const { return m_framebuffer; } const VKPipeline* GetPipeline() const { return m_pipeline; } - void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset); + void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset, u32 size); void SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type); void SetFramebuffer(VKFramebuffer* framebuffer); void SetPipeline(const VKPipeline* pipeline); @@ -145,6 +145,7 @@ private: std::array samplers; std::array texel_buffers; VkDescriptorBufferInfo ssbo; + VkDescriptorBufferInfo gx_uber_vertex_ssbo; VkDescriptorImageInfo image_texture; } m_bindings = {}; std::array m_gx_descriptor_sets = {}; diff --git a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp index f21fb9bf5e..564f93d802 100644 --- a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp @@ -251,6 +251,9 @@ std::unique_ptr VKPipeline::Create(const AbstractPipelineConfig& con case AbstractPipelineUsage::GX: pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD); break; + case AbstractPipelineUsage::GXUber: + pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_UBER); + break; case AbstractPipelineUsage::Utility: pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_UTILITY); break; diff --git a/Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp b/Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp index 1c47ea0b8e..146eb2921e 100644 --- a/Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp @@ -62,7 +62,8 @@ bool VertexManager::Initialize() return false; m_vertex_stream_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE); + StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VERTEX_STREAM_BUFFER_SIZE); m_index_stream_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE); m_uniform_stream_buffer = @@ -186,7 +187,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, static_cast(vertex_data_size)); ADDSTAT(g_stats.this_frame.bytes_index_streamed, static_cast(index_data_size)); - StateTracker::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer->GetBuffer(), 0); + StateTracker::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer->GetBuffer(), 0, + VERTEX_STREAM_BUFFER_SIZE); StateTracker::GetInstance()->SetIndexBuffer(m_index_stream_buffer->GetBuffer(), 0, VK_INDEX_TYPE_UINT16); } diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index cad326c83a..5b970cc9b1 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -295,6 +295,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsLodBiasInSampler = false; // Dependent on OS. config->backend_info.bSupportsSettingObjectNames = false; // Dependent on features. config->backend_info.bSupportsPartialMultisampleResolve = true; // Assumed support. + config->backend_info.bSupportsDynamicVertexLoader = true; // Assumed support. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) diff --git a/Source/Core/VideoCommon/AbstractPipeline.h b/Source/Core/VideoCommon/AbstractPipeline.h index 0be46eaf5a..60381d0cd9 100644 --- a/Source/Core/VideoCommon/AbstractPipeline.h +++ b/Source/Core/VideoCommon/AbstractPipeline.h @@ -20,6 +20,8 @@ class NativeVertexFormat; // - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) // - 8 combined image samplers (accessible from PS) // - 1 SSBO, accessible from PS if bounding box is enabled +// - GX Uber +// - Same as GX, plus one VS SSBO for vertices if dynamic vertex loading is enabled // - Utility // - Single UBO, accessible from all stages [set=0, binding=1] // - 8 combined image samplers (accessible from PS) [set=1, binding=0-7] @@ -32,6 +34,7 @@ class NativeVertexFormat; enum class AbstractPipelineUsage { GX, + GXUber, Utility }; diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 9fd4c060cd..c3427ba37f 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -93,6 +93,13 @@ struct VertexShaderConstants float4 cached_tangent; float4 cached_binormal; + // For UberShader vertex loader + u32 vertex_stride; + std::array vertex_offset_normals; + u32 vertex_offset_position; + u32 vertex_offset_posmtx; + std::array vertex_offset_colors; + std::array vertex_offset_texcoords; }; struct GeometryShaderConstants diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index 55e1a40178..89f3a06db5 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -58,9 +58,9 @@ struct PortableVertexDeclaration int stride; AttributeFormat position; - AttributeFormat normals[3]; - AttributeFormat colors[2]; - AttributeFormat texcoords[8]; + std::array normals; + std::array colors; + std::array texcoords; AttributeFormat posmtx; inline bool operator<(const PortableVertexDeclaration& b) const diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 6049fd29a7..f281024f22 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -588,10 +588,10 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader, const AbstractShader* geometry_shader, const AbstractShader* pixel_shader, const RasterizationState& rasterization_state, const DepthState& depth_state, - const BlendingState& blending_state) + const BlendingState& blending_state, AbstractPipelineUsage usage) { AbstractPipelineConfig config = {}; - config.usage = AbstractPipelineUsage::GX; + config.usage = usage; config.vertex_format = vertex_format; config.vertex_shader = vertex_shader; config.geometry_shader = geometry_shader; @@ -735,7 +735,7 @@ ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config_in) } return GetGXPipelineConfig(config.vertex_format, vs, gs, ps, config.rasterization_state, - config.depth_state, config.blending_state); + config.depth_state, config.blending_state, AbstractPipelineUsage::GX); } /// Edits the UID based on driver bugs and other special configurations @@ -743,6 +743,8 @@ static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in) { GXUberPipelineUid out; memcpy(&out, &in, sizeof(out)); // Copy padding + if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + out.vertex_format = nullptr; if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch) { // Always blend in shader @@ -798,7 +800,8 @@ ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config_in) } return GetGXPipelineConfig(config.vertex_format, vs, gs, ps, config.rasterization_state, - config.depth_state, config.blending_state); + config.depth_state, config.blending_state, + AbstractPipelineUsage::GXUber); } const AbstractPipeline* ShaderCache::InsertGXPipeline(const GXPipelineUid& config, @@ -1233,32 +1236,32 @@ void ShaderCache::QueueUberShaderPipelines() dummy_vertex_decl.stride = sizeof(float) * 4; NativeVertexFormat* dummy_vertex_format = VertexLoaderManager::GetUberVertexFormat(dummy_vertex_decl); - auto QueueDummyPipeline = [&](const UberShader::VertexShaderUid& vs_uid, - const GeometryShaderUid& gs_uid, - const UberShader::PixelShaderUid& ps_uid) { - GXUberPipelineUid config; - config.vertex_format = dummy_vertex_format; - config.vs_uid = vs_uid; - config.gs_uid = gs_uid; - config.ps_uid = ps_uid; - config.rasterization_state = RenderState::GetCullBackFaceRasterizationState( - static_cast(gs_uid.GetUidData()->primitive_type)); - config.depth_state = RenderState::GetNoDepthTestingDepthState(); - config.blending_state = RenderState::GetNoBlendingBlendState(); - if (ps_uid.GetUidData()->uint_output) - { - // uint_output is only ever enabled when logic ops are enabled. - config.blending_state.logicopenable = true; - config.blending_state.logicmode = LogicOp::And; - } + auto QueueDummyPipeline = + [&](const UberShader::VertexShaderUid& vs_uid, const GeometryShaderUid& gs_uid, + const UberShader::PixelShaderUid& ps_uid, const BlendingState& blend) { + GXUberPipelineUid config; + config.vertex_format = dummy_vertex_format; + config.vs_uid = vs_uid; + config.gs_uid = gs_uid; + config.ps_uid = ps_uid; + config.rasterization_state = RenderState::GetCullBackFaceRasterizationState( + static_cast(gs_uid.GetUidData()->primitive_type)); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = blend; + if (ps_uid.GetUidData()->uint_output) + { + // uint_output is only ever enabled when logic ops are enabled. + config.blending_state.logicopenable = true; + config.blending_state.logicmode = LogicOp::And; + } - auto iter = m_gx_uber_pipeline_cache.find(config); - if (iter != m_gx_uber_pipeline_cache.end()) - return; + auto iter = m_gx_uber_pipeline_cache.find(config); + if (iter != m_gx_uber_pipeline_cache.end()) + return; - auto& entry = m_gx_uber_pipeline_cache[config]; - entry.second = false; - }; + auto& entry = m_gx_uber_pipeline_cache[config]; + entry.second = false; + }; // Populate the pipeline configs with empty entries, these will be compiled afterwards. UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& vuid) { @@ -1275,7 +1278,45 @@ void ShaderCache::QueueUberShaderPipelines() { return; } - QueueDummyPipeline(vuid, guid, cleared_puid); + BlendingState blend = RenderState::GetNoBlendingBlendState(); + QueueDummyPipeline(vuid, guid, cleared_puid, blend); + if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader) + { + // Not all GPUs need all the pipeline state compiled into shaders, so they tend to key + // compiled shaders based on some subset of the pipeline state. + // Some test results: + // (GPUs tested: AMD Radeon Pro 5600M, Nvidia GT 750M, Intel UHD 630, + // Intel Iris Pro 5200, Apple M1) + // MacOS Metal: + // - AMD, Nvidia, Intel GPUs: Shaders are keyed on vertex layout and whether or not + // dual source blend is enabled. That's it. + // - Apple GPUs: Shaders are keyed on vertex layout and all blending settings. We use + // framebuffer fetch here, so the only blending settings used by ubershaders are the + // alphaupdate and colorupdate ones. Also keyed on primitive type, but Metal supports + // setting it to "unknown" and we do for ubershaders (but MoltenVK won't). + // Windows Vulkan: + // - AMD, Nvidia: Definitely keyed on dual source blend, but the others seem more random + // Changing a setting on one shader will require a recompile, but changing the same + // setting on another won't. Compiling a copy with alphaupdate off, colorupdate off, + // and one with DSB on seems to get pretty good coverage though. + // Windows D3D12: + // - AMD: Keyed on dual source blend and vertex layout + // - Nvidia Kepler: No recompiles for changes to vertex layout or blend + blend.alphaupdate = false; + QueueDummyPipeline(vuid, guid, cleared_puid, blend); + blend.alphaupdate = true; + blend.colorupdate = false; + QueueDummyPipeline(vuid, guid, cleared_puid, blend); + blend.colorupdate = true; + if (!cleared_puid.GetUidData()->no_dual_src && !cleared_puid.GetUidData()->uint_output) + { + blend.blendenable = true; + blend.usedualsrc = true; + blend.srcfactor = SrcBlendFactor::SrcAlpha; + blend.dstfactor = DstBlendFactor::InvSrcAlpha; + QueueDummyPipeline(vuid, guid, cleared_puid, blend); + } + } }); }); }); diff --git a/Source/Core/VideoCommon/ShaderCache.h b/Source/Core/VideoCommon/ShaderCache.h index ee59fe9ae7..2721cd7319 100644 --- a/Source/Core/VideoCommon/ShaderCache.h +++ b/Source/Core/VideoCommon/ShaderCache.h @@ -151,7 +151,7 @@ private: GetGXPipelineConfig(const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader, const AbstractShader* geometry_shader, const AbstractShader* pixel_shader, const RasterizationState& rasterization_state, const DepthState& depth_state, - const BlendingState& blending_state); + const BlendingState& blending_state, AbstractPipelineUsage usage); std::optional GetGXPipelineConfig(const GXPipelineUid& uid); std::optional GetGXPipelineConfig(const GXUberPipelineUid& uid); const AbstractPipeline* InsertGXPipeline(const GXPipelineUid& config, diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index b9965421cc..7b407f9aaf 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -43,6 +43,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() bits.manual_texture_sampling_custom_texture_sizes = g_ActiveConfig.ManualTextureSamplingWithHiResTextures(); bits.backend_sampler_lod_bias = g_ActiveConfig.backend_info.bSupportsLodBiasInSampler; + bits.backend_dynamic_vertex_loader = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader; return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 1cbff2bfcf..73fa68af03 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -177,6 +177,7 @@ union ShaderHostConfig BitField<24, 1, bool, u32> manual_texture_sampling; BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes; BitField<26, 1, bool, u32> backend_sampler_lod_bias; + BitField<27, 1, bool, u32> backend_dynamic_vertex_loader; static ShaderHostConfig GetCurrent(); }; @@ -302,6 +303,15 @@ static const char s_shader_uniforms[] = "\tuint components;\n" "\tuint4 xfmem_pack1[8];\n" "\tfloat4 " I_CACHED_TANGENT ";\n" "\tfloat4 " I_CACHED_BINORMAL ";\n" + "\tuint vertex_stride;\n" + "\tuint vertex_offset_rawnormal;\n" + "\tuint vertex_offset_rawtangent;\n" + "\tuint vertex_offset_rawbinormal;\n" + "\tuint vertex_offset_rawpos;\n" + "\tuint vertex_offset_posmtx;\n" + "\tuint vertex_offset_rawcolor0;\n" + "\tuint vertex_offset_rawcolor1;\n" + "\tuint4 vertex_offset_rawtex[2];\n" // std140 is pain "\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n" "\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n" "\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n" diff --git a/Source/Core/VideoCommon/UberShaderVertex.cpp b/Source/Core/VideoCommon/UberShaderVertex.cpp index 67dd84bb7f..04f2b27a14 100644 --- a/Source/Core/VideoCommon/UberShaderVertex.cpp +++ b/Source/Core/VideoCommon/UberShaderVertex.cpp @@ -22,7 +22,11 @@ VertexShaderUid GetVertexShaderUid() return out; } -static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out); +static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& host_config, + u32 num_texgen, ShaderCode& out); +static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_config, u32 indent, + std::string_view name, std::string_view shader_type, + std::string_view stored_type, std::string_view offset_name = {}); ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config, const vertex_ubershader_uid_data* uid_data) @@ -50,15 +54,99 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config WriteBitfieldExtractHeader(out, api_type, host_config); WriteLightingFunction(out); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); - out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); - for (int i = 0; i < 8; ++i) - out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); + if (host_config.backend_dynamic_vertex_loader) + { + out.Write(R"( +SSBO_BINDING(1) readonly restrict buffer Vertices {{ + uint vertex_buffer[]; +}}; +)"); + if (api_type == APIType::D3D) + { + // Write a function to get an offset into vertex_buffer corresponding to this vertex. + // This must be done differently for D3D compared to OpenGL/Vulkan/Metal, as on OpenGL, etc., + // gl_VertexID starts counting at the base vertex specified in glDrawElementsBaseVertex, + // while on D3D, SV_VertexID (which spirv-cross translates gl_VertexID into) starts counting + // at 0 regardless of the BaseVertexLocation value passed to DrawIndexed. In both cases, + // offset 0 of vertex_buffer corresponds to index 0 with basevertex set to 0, so we have to + // manually apply the basevertex offset for D3D + // D3D12 uses a root constant for this uniform, since it changes with every draw. + // D3D11 doesn't currently support dynamic vertex loader, and we'll have to figure something + // out for it if we want to support it in the future. + out.Write("UBO_BINDING(std140, 3) uniform DX_Constants {{\n" + " uint base_vertex;\n" + "}};\n\n" + "uint GetVertexBaseOffset() {{\n" + " return (gl_VertexID + base_vertex) * vertex_stride;\n" + "}}\n"); + } + else + { + out.Write("uint GetVertexBaseOffset() {{\n" + " return gl_VertexID * vertex_stride;\n" + "}}\n"); + } + + out.Write(R"( +uint4 load_input_uint4_ubyte4(uint vtx_offset, uint attr_offset) {{ + uint value = vertex_buffer[vtx_offset + attr_offset]; + return uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); +}} + +float4 load_input_float4_ubyte4(uint vtx_offset, uint attr_offset) {{ + return float4(load_input_uint4_ubyte4(vtx_offset, attr_offset)) / 255.0f; +}} + +float3 load_input_float3_float3(uint vtx_offset, uint attr_offset) {{ + uint offset = vtx_offset + attr_offset; + return float3(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + uintBitsToFloat(vertex_buffer[offset + 2])); +}} + +float4 load_input_float4_rawpos(uint vtx_offset, uint attr_offset) {{ + uint components = attr_offset >> 16; + uint offset = vtx_offset + (attr_offset & 0xffff); + if (components < 3) + return float4(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + 0.0f, 1.0f); + else + return float4(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + uintBitsToFloat(vertex_buffer[offset + 2]), + 1.0f); +}} + +float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{ + uint components = attr_offset >> 16; + uint offset = vtx_offset + (attr_offset & 0xffff); + if (components < 2) + return float3(uintBitsToFloat(vertex_buffer[offset + 0]), 0.0f, 0.0f); + else if (components < 3) + return float3(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + 0.0f); + else + return float3(uintBitsToFloat(vertex_buffer[offset + 0]), + uintBitsToFloat(vertex_buffer[offset + 1]), + uintBitsToFloat(vertex_buffer[offset + 2])); +}} + +)"); + } + else + { + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB); + for (int i = 0; i < 8; ++i) + out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i, i); + } if (host_config.backend_geometry_shaders) { @@ -99,7 +187,12 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config out.Write("VS_OUTPUT o;\n" "\n"); - + if (host_config.backend_dynamic_vertex_loader) + { + out.Write("uint vertex_base_offset = GetVertexBaseOffset();\n"); + } + // rawpos is always needed + LoadVertexAttribute(out, host_config, 0, "rawpos", "float4", "rawpos"); // Transforms out.Write("// Position matrix\n" "float4 P0;\n" @@ -113,6 +206,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "\n" "if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n", VB_HAS_POSMTXIDX); + LoadVertexAttribute(out, host_config, 2, "posmtx", "uint4", "ubyte4"); out.Write(" // Vertex format has a per-vertex matrix\n" " int posidx = int(posmtx.r);\n" " P0 = " I_TRANSFORMMATRICES "[posidx];\n" @@ -144,27 +238,40 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "// by lighting calculations and needs to be unit length), the same transform matrix\n" "// can do double duty, scaling for emboss mapping, and not scaling for lighting.\n" "float3 _normal = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_NORMAL\n", + "if ((components & {}u) != 0u) // VB_HAS_NORMAL\n" + "{{\n", VB_HAS_NORMAL); + LoadVertexAttribute(out, host_config, 2, "rawnormal", "float3", "float3"); out.Write(" _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, " "rawnormal)));\n" + "}}\n" "\n" "float3 _tangent = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_TANGENT\n", + "if ((components & {}u) != 0u) // VB_HAS_TANGENT\n" + "{{\n", VB_HAS_TANGENT); + LoadVertexAttribute(out, host_config, 2, "rawtangent", "float3", "float3"); out.Write(" _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));\n" + "}}\n" "else\n" + "{{\n" " _tangent = float3(dot(N0, " I_CACHED_TANGENT ".xyz), dot(N1, " I_CACHED_TANGENT ".xyz), dot(N2, " I_CACHED_TANGENT ".xyz));\n" + "}}\n" "\n" "float3 _binormal = float3(0.0, 0.0, 0.0);\n" - "if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n", + "if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n" + "{{\n", VB_HAS_BINORMAL); + LoadVertexAttribute(out, host_config, 2, "rawbinormal", "float3", "float3"); out.Write(" _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, " "rawbinormal));\n" + "}}\n" "else\n" + "{{\n" " _binormal = float3(dot(N0, " I_CACHED_BINORMAL ".xyz), dot(N1, " I_CACHED_BINORMAL ".xyz), dot(N2, " I_CACHED_BINORMAL ".xyz));\n" + "}}\n" "\n"); // Hardware Lighting @@ -178,34 +285,40 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config "bool use_color_1 = ((components & {0}u) == {0}u); // VB_HAS_COL0 | VB_HAS_COL1\n", VB_HAS_COL0 | VB_HAS_COL1); - out.Write("for (uint color = 0u; color < {}u; color++) {{\n", NUM_XF_COLOR_CHANNELS); - out.Write(" if ((color == 0u || use_color_1) && (components & ({}u << color)) != 0u) {{\n", - VB_HAS_COL0); - out.Write(" // Use color0 for channel 0, and color1 for channel 1 if both colors 0 and 1 are " - "present.\n" - " if (color == 0u)\n" - " vertex_color_0 = rawcolor0;\n" - " else\n" - " vertex_color_1 = rawcolor1;\n" - " }} else if (color == 0u && (components & {}u) != 0u) {{\n", - VB_HAS_COL1); - out.Write(" // Use color1 for channel 0 if color0 is not present.\n" - " vertex_color_0 = rawcolor1;\n" - " }} else {{\n" - " if (color == 0u)\n" - " vertex_color_0 = missing_color_value;\n" - " else\n" - " vertex_color_1 = missing_color_value;\n" - " }}\n" + out.Write("if ((components & {0}u) == {0}u) // VB_HAS_COL0 | VB_HAS_COL1\n" + "{{\n", + VB_HAS_COL0 | VB_HAS_COL1); + LoadVertexAttribute(out, host_config, 2, "rawcolor0", "float4", "ubyte4"); + LoadVertexAttribute(out, host_config, 2, "rawcolor1", "float4", "ubyte4"); + out.Write(" vertex_color_0 = rawcolor0;\n" + " vertex_color_1 = rawcolor1;\n" "}}\n" - "\n"); + "else if ((components & {}u) != 0u) // VB_HAS_COL0\n" + "{{\n", + VB_HAS_COL0); + LoadVertexAttribute(out, host_config, 2, "rawcolor0", "float4", "ubyte4"); + out.Write(" vertex_color_0 = rawcolor0;\n" + " vertex_color_1 = rawcolor0;\n" + "}}\n" + "else if ((components & {}u) != 0u) // VB_HAS_COL1\n" + "{{\n", + VB_HAS_COL1); + LoadVertexAttribute(out, host_config, 2, "rawcolor1", "float4", "ubyte4"); + out.Write(" vertex_color_0 = rawcolor1;\n" + " vertex_color_1 = rawcolor1;\n" + "}}\n" + "else\n" + "{{\n" + " vertex_color_0 = missing_color_value;\n" + " vertex_color_1 = missing_color_value;\n" + "}}\n"); WriteVertexLighting(out, api_type, "pos.xyz", "_normal", "vertex_color_0", "vertex_color_1", "o.colors_0", "o.colors_1"); // Texture Coordinates if (num_texgen > 0) - GenVertexShaderTexGens(api_type, num_texgen, out); + GenVertexShaderTexGens(api_type, host_config, num_texgen, out); if (per_pixel_lighting) { @@ -352,7 +465,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config return out; } -static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out) +static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& host_config, + u32 num_texgen, ShaderCode& out) { // The HLSL compiler complains that the output texture coordinates are uninitialized when trying // to dynamically index them. @@ -377,27 +491,40 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& out.Write(" coord.xyz = rawpos.xyz;\n"); out.Write(" break;\n\n"); out.Write(" case {:s}:\n", SourceRow::Normal); - out.Write(" coord.xyz = ((components & {}u /* VB_HAS_NORMAL */) != 0u) ? rawnormal.xyz : " - "coord.xyz;", + out.Write(" if ((components & {}u) != 0u) // VB_HAS_NORMAL\n" + " {{\n", VB_HAS_NORMAL); - out.Write(" break;\n\n"); + LoadVertexAttribute(out, host_config, 6, "rawnormal", "float3", "float3"); + out.Write(" coord.xyz = rawnormal.xyz;\n" + " }}\n" + " break;\n\n"); out.Write(" case {:s}:\n", SourceRow::BinormalT); - out.Write(" coord.xyz = ((components & {}u /* VB_HAS_TANGENT */) != 0u) ? rawtangent.xyz : " - "coord.xyz;", + out.Write(" if ((components & {}u) != 0u) // VB_HAS_TANGENT\n" + " {{\n", VB_HAS_TANGENT); - out.Write(" break;\n\n"); + LoadVertexAttribute(out, host_config, 6, "rawtangent", "float3", "float3"); + out.Write(" coord.xyz = rawtangent.xyz;\n" + " }}\n" + " break;\n\n"); out.Write(" case {:s}:\n", SourceRow::BinormalB); - out.Write(" coord.xyz = ((components & {}u /* VB_HAS_BINORMAL */) != 0u) ? rawbinormal.xyz : " - "coord.xyz;", + out.Write(" if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n" + " {{\n", VB_HAS_BINORMAL); - out.Write(" break;\n\n"); + LoadVertexAttribute(out, host_config, 6, "rawbinormal", "float3", "float3"); + out.Write(" coord.xyz = rawbinormal.xyz;\n" + " }}\n" + " break;\n\n"); for (u32 i = 0; i < 8; i++) { out.Write(" case {:s}:\n", static_cast(static_cast(SourceRow::Tex0) + i)); - out.Write( - " coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, " - "1.0, 1.0) : coord;\n", - VB_HAS_UV0 << i, i, i, i); + out.Write(" if ((components & {}u) != 0u) // VB_HAS_UV{}\n" + " {{\n", + VB_HAS_UV0 << i, i); + LoadVertexAttribute(out, host_config, 6, fmt::format("rawtex{}", i), "float3", "rawtex", + fmt::format("rawtex[{}][{}]", i / 4, i % 4)); + out.Write(" coord = float4(rawtex{}.x, rawtex{}.y, 1.0f, 1.0f);\n" + " }}\n", + i, i); out.Write(" break;\n\n"); } out.Write(" }}\n" @@ -447,14 +574,24 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& " {{\n"); out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n", VB_HAS_TEXMTXIDX0); - out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n" - " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" - " int tmp = 0;\n" - " switch (texgen) {{\n"); - for (u32 i = 0; i < num_texgen; i++) - out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i); - out.Write(" }}\n" - "\n"); + if (host_config.backend_dynamic_vertex_loader) + { + out.Write(" int tmp = int(load_input_float3_rawtex(vertex_base_offset, " + "vertex_offset_rawtex[texgen / 4][texgen % 4]).z);\n" + "\n"); + } + else + { + out.Write( + " // This is messy, due to dynamic indexing of the input texture coordinates.\n" + " // Hopefully the compiler will unroll this whole loop anyway and the switch.\n" + " int tmp = 0;\n" + " switch (texgen) {{\n"); + for (u32 i = 0; i < num_texgen; i++) + out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i); + out.Write(" }}\n" + "\n"); + } out.Write(" if ({} == {:s}) {{\n", BitfieldExtract<&TexMtxInfo::projection>("texMtxInfo"), TexSize::STQ); out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n" @@ -514,6 +651,19 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode& "}}\n"); } +static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_config, u32 indent, + std::string_view name, std::string_view shader_type, + std::string_view stored_type, std::string_view offset_name) +{ + if (host_config.backend_dynamic_vertex_loader) + { + code.Write("{:{}}{} {} = load_input_{}_{}(vertex_base_offset, vertex_offset_{});\n", "", indent, + shader_type, name, shader_type, stored_type, + offset_name.empty() ? name : offset_name); + } + // else inputs are always available +} + void EnumerateVertexShaderUids(const std::function& callback) { VertexShaderUid uid; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 607de66bee..bb843a6120 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -353,7 +353,8 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun } s_current_vtx_fmt = loader->m_native_vertex_format; g_current_components = loader->m_native_components; - VertexShaderManager::SetVertexFormat(loader->m_native_components); + VertexShaderManager::SetVertexFormat(loader->m_native_components, + loader->m_native_vertex_format->GetVertexDeclaration()); // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. // They still need to go through vertex loading, because we need to calculate a zfreeze refrence diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index ce43235977..b74c820271 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -606,13 +606,42 @@ void VertexShaderManager::SetMaterialColorChanged(int index) nMaterialsChanged[index] = true; } -void VertexShaderManager::SetVertexFormat(u32 components) +static void UpdateValue(bool* dirty, u32* old_value, u32 new_value) { - if (components != constants.components) - { - constants.components = components; - dirty = true; - } + if (*old_value == new_value) + return; + *old_value = new_value; + *dirty = true; +} + +static void UpdateOffset(bool* dirty, bool include_components, u32* old_value, + const AttributeFormat& attribute) +{ + if (!attribute.enable) + return; + u32 new_value = attribute.offset / 4; // GPU uses uint offsets + if (include_components) + new_value |= attribute.components << 16; + UpdateValue(dirty, old_value, new_value); +} + +template +static void UpdateOffsets(bool* dirty, bool include_components, std::array* old_value, + const std::array& attribute) +{ + for (size_t i = 0; i < N; i++) + UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]); +} + +void VertexShaderManager::SetVertexFormat(u32 components, const PortableVertexDeclaration& format) +{ + UpdateValue(&dirty, &constants.components, components); + UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4); + UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position); + UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx); + UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords); + UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors); + UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals); } void VertexShaderManager::SetTexMatrixInfoChanged(int index) diff --git a/Source/Core/VideoCommon/VertexShaderManager.h b/Source/Core/VideoCommon/VertexShaderManager.h index 3bddf28fb0..2a8aa7b596 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.h +++ b/Source/Core/VideoCommon/VertexShaderManager.h @@ -10,6 +10,7 @@ #include "VideoCommon/ConstantManager.h" class PointerWrap; +struct PortableVertexDeclaration; // The non-API dependent parts. class VertexShaderManager @@ -29,7 +30,7 @@ public: static void SetProjectionChanged(); static void SetMaterialColorChanged(int index); - static void SetVertexFormat(u32 components); + static void SetVertexFormat(u32 components, const PortableVertexDeclaration& format); static void SetTexMatrixInfoChanged(int index); static void SetLightingConfigChanged(); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 22817d1f61..105840bafa 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -232,6 +232,7 @@ struct VideoConfig final bool bSupportsLodBiasInSampler = false; bool bSupportsSettingObjectNames = false; bool bSupportsPartialMultisampleResolve = false; + bool bSupportsDynamicVertexLoader = false; } backend_info; // Utility