From 81b4ed2a81d7bada71a115efc7fb2347378fb975 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 20 Jul 2017 15:25:35 +1000 Subject: [PATCH] Vulkan: Uber shader support --- .../Core/VideoBackends/Vulkan/ObjectCache.cpp | 26 +- .../Core/VideoBackends/Vulkan/ObjectCache.h | 7 + .../VideoBackends/Vulkan/PostProcessing.cpp | 4 +- .../Core/VideoBackends/Vulkan/RasterFont.cpp | 6 +- Source/Core/VideoBackends/Vulkan/Renderer.cpp | 11 + .../Core/VideoBackends/Vulkan/ShaderCache.cpp | 388 +++++++++++++++++- .../Core/VideoBackends/Vulkan/ShaderCache.h | 72 +++- .../VideoBackends/Vulkan/StateTracker.cpp | 273 +++++++++--- .../Core/VideoBackends/Vulkan/StateTracker.h | 19 +- Source/Core/VideoBackends/Vulkan/Util.cpp | 40 +- .../VideoBackends/Vulkan/VertexFormat.cpp | 12 +- .../Core/VideoBackends/Vulkan/VertexFormat.h | 10 +- Source/Core/VideoBackends/Vulkan/main.cpp | 7 + Source/Core/VideoCommon/PixelShaderGen.cpp | 18 +- .../Core/VideoCommon/VertexLoaderManager.cpp | 7 - Source/Core/VideoCommon/VertexLoaderManager.h | 2 - 16 files changed, 738 insertions(+), 164 deletions(-) diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp index 4e856cb0d5..9eb7c0efd9 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -16,6 +16,7 @@ #include "Core/ConfigManager.h" +#include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/ShaderCompiler.h" #include "VideoBackends/Vulkan/StreamBuffer.h" #include "VideoBackends/Vulkan/Util.h" @@ -59,6 +60,19 @@ bool ObjectCache::Initialize() if (!m_utility_shader_vertex_buffer || !m_utility_shader_uniform_buffer) return false; + m_dummy_texture = Texture2D::Create(1, 1, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_LINEAR, + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + VkClearColorValue clear_color = {}; + VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + m_dummy_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + &clear_color, 1, &clear_range); + m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + return true; } @@ -99,17 +113,9 @@ bool ObjectCache::CreateDescriptorSetLayouts() {UBO_DESCRIPTOR_SET_BINDING_GS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_GEOMETRY_BIT}}; - // Annoying these have to be split, apparently we can't partially update an array without the - // validation layers throwing a warning. static const VkDescriptorSetLayoutBinding sampler_set_bindings[] = { - {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, - {1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, - {2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, - {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, - {4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, - {5, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, - {6, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, - {7, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}}; + {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, static_cast(NUM_PIXEL_SHADER_SAMPLERS), + VK_SHADER_STAGE_FRAGMENT_BIT}}; static const VkDescriptorSetLayoutBinding ssbo_set_bindings[] = { {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}}; diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.h b/Source/Core/VideoBackends/Vulkan/ObjectCache.h index dd7b1ed739..bfcec09166 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.h +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.h @@ -15,6 +15,7 @@ #include "Common/LinearDiskCache.h" #include "VideoBackends/Vulkan/Constants.h" +#include "VideoBackends/Vulkan/Texture2D.h" #include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/PixelShaderGen.h" @@ -62,6 +63,9 @@ public: VkSampler GetLinearSampler() const { return m_linear_sampler; } VkSampler GetSampler(const SamplerState& info); + // Dummy image for samplers that are unbound + Texture2D* GetDummyImage() const { return m_dummy_texture.get(); } + VkImageView GetDummyImageView() const { return m_dummy_texture->GetView(); } // Perform at startup, create descriptor layouts, compiles all static shaders. bool Initialize(); @@ -89,6 +93,9 @@ private: VkSampler m_linear_sampler = VK_NULL_HANDLE; std::map m_sampler_cache; + + // Dummy image for samplers that are unbound + std::unique_ptr m_dummy_texture; }; extern std::unique_ptr g_object_cache; diff --git a/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp b/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp index 991117567c..f2a8b3e0d0 100644 --- a/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp +++ b/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp @@ -149,7 +149,7 @@ static const std::string DEFAULT_FRAGMENT_SHADER_SOURCE = R"( static const std::string POSTPROCESSING_SHADER_HEADER = R"( SAMPLER_BINDING(0) uniform sampler2DArray samp0; - SAMPLER_BINDING(1) uniform sampler2D samp1; + SAMPLER_BINDING(1) uniform sampler2DArray samp1; layout(location = 0) in float3 uv0; layout(location = 1) in float4 col0; @@ -176,7 +176,7 @@ static const std::string POSTPROCESSING_SHADER_HEADER = R"( float4 SampleFontLocation(float2 location) { - return texture(samp1, location); + return texture(samp1, float3(location, 0.0)); } float2 GetResolution() diff --git a/Source/Core/VideoBackends/Vulkan/RasterFont.cpp b/Source/Core/VideoBackends/Vulkan/RasterFont.cpp index def32e5050..97ab4dd6d8 100644 --- a/Source/Core/VideoBackends/Vulkan/RasterFont.cpp +++ b/Source/Core/VideoBackends/Vulkan/RasterFont.cpp @@ -150,7 +150,7 @@ layout(std140, push_constant) uniform PCBlock { vec4 color; } PC; -layout(set = 1, binding = 0) uniform sampler2D samp0; +layout(set = 1, binding = 0) uniform sampler2DArray samp0; layout(location = 0) in vec2 uv0; @@ -158,7 +158,7 @@ layout(location = 0) out vec4 ocol0; void main() { - ocol0 = texture(samp0, uv0) * PC.color; + ocol0 = texture(samp0, float3(uv0, 0.0)) * PC.color; } )"; @@ -209,7 +209,7 @@ bool RasterFont::CreateTexture() // create the actual texture object m_texture = Texture2D::Create(CHARACTER_WIDTH * CHARACTER_COUNT, CHARACTER_HEIGHT, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); if (!m_texture) return false; diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index 4e72d94a61..de281e7e38 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -589,6 +589,9 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height // Clean up stale textures. TextureCache::GetInstance()->Cleanup(frameCount); + + // Pull in now-ready async shaders. + g_shader_cache->RetrieveAsyncShaders(); } void Renderer::TransitionBuffersForSwap(const TargetRectangle& scaled_rect, @@ -1132,6 +1135,8 @@ void Renderer::CheckForConfigChanges() bool old_force_filtering = g_ActiveConfig.bForceFiltering; bool old_use_xfb = g_ActiveConfig.bUseXFB; bool old_use_realxfb = g_ActiveConfig.bUseRealXFB; + bool old_vertex_ubershaders = g_ActiveConfig.bForceVertexUberShaders; + bool old_pixel_ubershaders = g_ActiveConfig.bForcePixelUberShaders; // Copy g_Config to g_ActiveConfig. // NOTE: This can potentially race with the UI thread, however if it does, the changes will be @@ -1145,6 +1150,8 @@ void Renderer::CheckForConfigChanges() bool aspect_changed = old_aspect_ratio != g_ActiveConfig.iAspectRatio; bool use_xfb_changed = old_use_xfb != g_ActiveConfig.bUseXFB; bool use_realxfb_changed = old_use_realxfb != g_ActiveConfig.bUseRealXFB; + bool ubershaders_changed = old_vertex_ubershaders != g_ActiveConfig.bForceVertexUberShaders || + old_pixel_ubershaders != g_ActiveConfig.bForcePixelUberShaders; // Update texture cache settings with any changed options. TextureCache::GetInstance()->OnConfigChanged(g_ActiveConfig); @@ -1190,6 +1197,10 @@ void Renderer::CheckForConfigChanges() if (anisotropy_changed || force_texture_filtering_changed) ResetSamplerStates(); + // Clear UID state if ubershaders are toggled. + if (ubershaders_changed) + StateTracker::GetInstance()->ClearShaders(); + // Check for a changed post-processing shader and recompile if needed. static_cast(m_post_processor.get())->UpdateConfig(); } diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp index 79a77458b6..07b71a6202 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp @@ -15,13 +15,20 @@ #include "Common/MsgHandler.h" #include "Core/ConfigManager.h" +#include "Core/Host.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" #include "VideoBackends/Vulkan/ShaderCompiler.h" #include "VideoBackends/Vulkan/StreamBuffer.h" #include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" +#include "VideoCommon/AsyncShaderCompiler.h" +#include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/UberShaderPixel.h" +#include "VideoCommon/UberShaderVertex.h" +#include "VideoCommon/VertexLoaderManager.h" namespace Vulkan { @@ -55,9 +62,22 @@ bool ShaderCache::Initialize() if (!CompileSharedShaders()) return false; + m_async_shader_compiler = std::make_unique(); + if (g_ActiveConfig.GetShaderCompilerThreads() > 0) + m_async_shader_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); + return true; } +void ShaderCache::Shutdown() +{ + if (m_async_shader_compiler) + { + m_async_shader_compiler->StopWorkerThreads(); + m_async_shader_compiler->RetrieveWorkItems(); + } +} + static bool IsStripPrimitiveTopology(VkPrimitiveTopology topology) { return topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP || @@ -365,13 +385,34 @@ std::pair ShaderCache::GetPipelineWithCacheResult(const Pipeli { auto iter = m_pipeline_objects.find(info); if (iter != m_pipeline_objects.end()) - return {iter->second, true}; + { + // If it's background compiling, ignore it, and recompile it synchronously. + if (!iter->second.second) + return std::make_pair(iter->second.first, true); + else + m_pipeline_objects.erase(iter); + } VkPipeline pipeline = CreatePipeline(info); - m_pipeline_objects.emplace(info, pipeline); + m_pipeline_objects.emplace(info, std::make_pair(pipeline, false)); + _assert_(pipeline != VK_NULL_HANDLE); return {pipeline, false}; } +std::pair, bool> +ShaderCache::GetPipelineWithCacheResultAsync(const PipelineInfo& info) +{ + auto iter = m_pipeline_objects.find(info); + if (iter != m_pipeline_objects.end()) + return std::make_pair(iter->second, true); + + // Kick a job off. + m_async_shader_compiler->QueueWorkItem( + m_async_shader_compiler->CreateWorkItem(info)); + m_pipeline_objects.emplace(info, std::make_pair(static_cast(VK_NULL_HANDLE), true)); + return std::make_pair(std::make_pair(static_cast(VK_NULL_HANDLE), true), false); +} + VkPipeline ShaderCache::CreateComputePipeline(const ComputePipelineInfo& info) { VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, @@ -409,10 +450,11 @@ VkPipeline ShaderCache::GetComputePipeline(const ComputePipelineInfo& info) void ShaderCache::ClearPipelineCache() { + // TODO: Stop any async compiling happening. for (const auto& it : m_pipeline_objects) { - if (it.second != VK_NULL_HANDLE) - vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr); + if (it.second.first != VK_NULL_HANDLE) + vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second.first, nullptr); } m_pipeline_objects.clear(); @@ -620,7 +662,10 @@ void ShaderCache::SavePipelineCache() template struct ShaderCacheReader : public LinearDiskCacheReader { - ShaderCacheReader(std::map& shader_map) : m_shader_map(shader_map) {} + ShaderCacheReader(std::map>& shader_map) + : m_shader_map(shader_map) + { + } void Read(const Uid& key, const u32* value, u32 value_size) override { // We don't insert null modules into the shader map since creation could succeed later on. @@ -630,10 +675,10 @@ struct ShaderCacheReader : public LinearDiskCacheReader if (module == VK_NULL_HANDLE) return; - m_shader_map.emplace(key, module); + m_shader_map.emplace(key, std::make_pair(module, false)); } - std::map& m_shader_map; + std::map>& m_shader_map; }; void ShaderCache::LoadShaderCaches() @@ -653,6 +698,13 @@ void ShaderCache::LoadShaderCaches() gs_reader); } + ShaderCacheReader uber_vs_reader(m_uber_vs_cache.shader_map); + m_uber_vs_cache.disk_cache.OpenAndRead( + GetDiskShaderCacheFileName(APIType::Vulkan, "UberVS", false, true), uber_vs_reader); + ShaderCacheReader uber_ps_reader(m_uber_ps_cache.shader_map); + m_uber_ps_cache.disk_cache.OpenAndRead( + GetDiskShaderCacheFileName(APIType::Vulkan, "UberPS", false, true), uber_ps_reader); + SETSTAT(stats.numPixelShadersCreated, static_cast(m_ps_cache.shader_map.size())); SETSTAT(stats.numPixelShadersAlive, static_cast(m_ps_cache.shader_map.size())); SETSTAT(stats.numVertexShadersCreated, static_cast(m_vs_cache.shader_map.size())); @@ -666,8 +718,8 @@ static void DestroyShaderCache(T& cache) cache.disk_cache.Close(); for (const auto& it : cache.shader_map) { - if (it.second != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second, nullptr); + if (it.second.first != VK_NULL_HANDLE) + vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.first, nullptr); } cache.shader_map.clear(); } @@ -680,6 +732,9 @@ void ShaderCache::DestroyShaderCaches() if (g_vulkan_context->SupportsGeometryShaders()) DestroyShaderCache(m_gs_cache); + DestroyShaderCache(m_uber_vs_cache); + DestroyShaderCache(m_uber_ps_cache); + SETSTAT(stats.numPixelShadersCreated, 0); SETSTAT(stats.numPixelShadersAlive, 0); SETSTAT(stats.numVertexShadersCreated, 0); @@ -690,7 +745,13 @@ VkShaderModule ShaderCache::GetVertexShaderForUid(const VertexShaderUid& uid) { auto it = m_vs_cache.shader_map.find(uid); if (it != m_vs_cache.shader_map.end()) - return it->second; + { + // If it's pending, compile it synchronously. + if (!it->second.second) + return it->second.first; + else + m_vs_cache.shader_map.erase(it); + } // Not in the cache, so compile the shader. ShaderCompiler::SPIRVCodeVector spv; @@ -712,7 +773,7 @@ VkShaderModule ShaderCache::GetVertexShaderForUid(const VertexShaderUid& uid) } // We still insert null entries to prevent further compilation attempts. - m_vs_cache.shader_map.emplace(uid, module); + m_vs_cache.shader_map.emplace(uid, std::make_pair(module, false)); return module; } @@ -721,7 +782,13 @@ VkShaderModule ShaderCache::GetGeometryShaderForUid(const GeometryShaderUid& uid _assert_(g_vulkan_context->SupportsGeometryShaders()); auto it = m_gs_cache.shader_map.find(uid); if (it != m_gs_cache.shader_map.end()) - return it->second; + { + // If it's pending, compile it synchronously. + if (!it->second.second) + return it->second.first; + else + m_gs_cache.shader_map.erase(it); + } // Not in the cache, so compile the shader. ShaderCompiler::SPIRVCodeVector spv; @@ -739,7 +806,7 @@ VkShaderModule ShaderCache::GetGeometryShaderForUid(const GeometryShaderUid& uid } // We still insert null entries to prevent further compilation attempts. - m_gs_cache.shader_map.emplace(uid, module); + m_gs_cache.shader_map.emplace(uid, std::make_pair(module, false)); return module; } @@ -747,7 +814,13 @@ VkShaderModule ShaderCache::GetPixelShaderForUid(const PixelShaderUid& uid) { auto it = m_ps_cache.shader_map.find(uid); if (it != m_ps_cache.shader_map.end()) - return it->second; + { + // If it's pending, compile it synchronously. + if (!it->second.second) + return it->second.first; + else + m_ps_cache.shader_map.erase(it); + } // Not in the cache, so compile the shader. ShaderCompiler::SPIRVCodeVector spv; @@ -769,7 +842,79 @@ VkShaderModule ShaderCache::GetPixelShaderForUid(const PixelShaderUid& uid) } // We still insert null entries to prevent further compilation attempts. - m_ps_cache.shader_map.emplace(uid, module); + m_ps_cache.shader_map.emplace(uid, std::make_pair(module, false)); + return module; +} + +VkShaderModule ShaderCache::GetVertexUberShaderForUid(const UberShader::VertexShaderUid& uid) +{ + auto it = m_uber_vs_cache.shader_map.find(uid); + if (it != m_uber_vs_cache.shader_map.end()) + { + // If it's pending, compile it synchronously. + if (!it->second.second) + return it->second.first; + else + m_uber_vs_cache.shader_map.erase(it); + } + + // Not in the cache, so compile the shader. + ShaderCompiler::SPIRVCodeVector spv; + VkShaderModule module = VK_NULL_HANDLE; + ShaderCode source_code = UberShader::GenVertexShader( + APIType::Vulkan, ShaderHostConfig::GetCurrent(), uid.GetUidData()); + if (ShaderCompiler::CompileVertexShader(&spv, source_code.GetBuffer().c_str(), + source_code.GetBuffer().length())) + { + module = Util::CreateShaderModule(spv.data(), spv.size()); + + // Append to shader cache if it created successfully. + if (module != VK_NULL_HANDLE) + { + m_uber_vs_cache.disk_cache.Append(uid, spv.data(), static_cast(spv.size())); + INCSTAT(stats.numVertexShadersCreated); + INCSTAT(stats.numVertexShadersAlive); + } + } + + // We still insert null entries to prevent further compilation attempts. + m_uber_vs_cache.shader_map.emplace(uid, std::make_pair(module, false)); + return module; +} + +VkShaderModule ShaderCache::GetPixelUberShaderForUid(const UberShader::PixelShaderUid& uid) +{ + auto it = m_uber_ps_cache.shader_map.find(uid); + if (it != m_uber_ps_cache.shader_map.end()) + { + // If it's pending, compile it synchronously. + if (!it->second.second) + return it->second.first; + else + m_uber_ps_cache.shader_map.erase(it); + } + + // Not in the cache, so compile the shader. + ShaderCompiler::SPIRVCodeVector spv; + VkShaderModule module = VK_NULL_HANDLE; + ShaderCode source_code = + UberShader::GenPixelShader(APIType::Vulkan, ShaderHostConfig::GetCurrent(), uid.GetUidData()); + if (ShaderCompiler::CompileFragmentShader(&spv, source_code.GetBuffer().c_str(), + source_code.GetBuffer().length())) + { + module = Util::CreateShaderModule(spv.data(), spv.size()); + + // Append to shader cache if it created successfully. + if (module != VK_NULL_HANDLE) + { + m_uber_ps_cache.disk_cache.Append(uid, spv.data(), static_cast(spv.size())); + INCSTAT(stats.numPixelShadersCreated); + INCSTAT(stats.numPixelShadersAlive); + } + } + + // We still insert null entries to prevent further compilation attempts. + m_uber_ps_cache.shader_map.emplace(uid, std::make_pair(module, false)); return module; } @@ -782,6 +927,9 @@ void ShaderCache::RecompileSharedShaders() void ShaderCache::ReloadShaderAndPipelineCaches() { + m_async_shader_compiler->WaitUntilCompletion(); + m_async_shader_compiler->RetrieveWorkItems(); + SavePipelineCache(); DestroyShaderCaches(); DestroyPipelineCache(); @@ -795,6 +943,9 @@ void ShaderCache::ReloadShaderAndPipelineCaches() { CreatePipelineCache(); } + + if (g_ActiveConfig.CanPrecompileUberShaders()) + PrecompileUberShaders(); } std::string ShaderCache::GetUtilityShaderHeader() const @@ -1026,4 +1177,211 @@ void ShaderCache::DestroySharedShaders() DestroyShader(m_screen_quad_geometry_shader); DestroyShader(m_passthrough_geometry_shader); } + +void ShaderCache::CreateDummyPipeline(const UberShader::VertexShaderUid& vuid, + const GeometryShaderUid& guid, + const UberShader::PixelShaderUid& puid) +{ + PortableVertexDeclaration vertex_decl; + std::memset(&vertex_decl, 0, sizeof(vertex_decl)); + + PipelineInfo pinfo; + pinfo.vertex_format = + static_cast(VertexLoaderManager::GetUberVertexFormat(vertex_decl)); + pinfo.pipeline_layout = g_object_cache->GetPipelineLayout( + g_ActiveConfig.bBBoxEnable && g_ActiveConfig.BBoxUseFragmentShaderImplementation() ? + PIPELINE_LAYOUT_BBOX : + PIPELINE_LAYOUT_STANDARD); + pinfo.vs = GetVertexUberShaderForUid(vuid); + pinfo.gs = (!guid.GetUidData()->IsPassthrough() && g_vulkan_context->SupportsGeometryShaders()) ? + GetGeometryShaderForUid(guid) : + VK_NULL_HANDLE; + pinfo.ps = GetPixelUberShaderForUid(puid); + pinfo.render_pass = FramebufferManager::GetInstance()->GetEFBLoadRenderPass(); + pinfo.rasterization_state.bits = Util::GetNoCullRasterizationState().bits; + pinfo.depth_stencil_state.bits = Util::GetNoDepthTestingDepthStencilState().bits; + pinfo.blend_state.hex = Util::GetNoBlendingBlendState().hex; + switch (guid.GetUidData()->primitive_type) + { + case PRIMITIVE_POINTS: + pinfo.primitive_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + break; + case PRIMITIVE_LINES: + pinfo.primitive_topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + break; + case PRIMITIVE_TRIANGLES: + pinfo.primitive_topology = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP : + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + break; + } + GetPipelineWithCacheResultAsync(pinfo); +} + +void ShaderCache::PrecompileUberShaders() +{ + UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& vuid) { + UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& puid) { + // UIDs must have compatible texgens, a mismatching combination will never be queried. + if (vuid.GetUidData()->num_texgens != puid.GetUidData()->num_texgens) + return; + + EnumerateGeometryShaderUids([&](const GeometryShaderUid& guid) { + if (guid.GetUidData()->numTexGens != vuid.GetUidData()->num_texgens) + return; + + CreateDummyPipeline(vuid, guid, puid); + }); + }); + }); + + WaitForBackgroundCompilesToComplete(); +} + +void ShaderCache::WaitForBackgroundCompilesToComplete() +{ + m_async_shader_compiler->WaitUntilCompletion([](size_t completed, size_t total) { + Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(), + static_cast(completed), static_cast(total)); + }); + m_async_shader_compiler->RetrieveWorkItems(); + Host_UpdateProgressDialog("", -1, -1); +} + +void ShaderCache::RetrieveAsyncShaders() +{ + m_async_shader_compiler->RetrieveWorkItems(); +} + +std::pair ShaderCache::GetVertexShaderForUidAsync(const VertexShaderUid& uid) +{ + auto it = m_vs_cache.shader_map.find(uid); + if (it != m_vs_cache.shader_map.end()) + return it->second; + + // Kick a compile job off. + m_async_shader_compiler->QueueWorkItem( + m_async_shader_compiler->CreateWorkItem(uid)); + m_vs_cache.shader_map.emplace(uid, + std::make_pair(static_cast(VK_NULL_HANDLE), true)); + return std::make_pair(VK_NULL_HANDLE, true); +} + +std::pair ShaderCache::GetPixelShaderForUidAsync(const PixelShaderUid& uid) +{ + auto it = m_ps_cache.shader_map.find(uid); + if (it != m_ps_cache.shader_map.end()) + return it->second; + + // Kick a compile job off. + m_async_shader_compiler->QueueWorkItem( + m_async_shader_compiler->CreateWorkItem(uid)); + m_ps_cache.shader_map.emplace(uid, + std::make_pair(static_cast(VK_NULL_HANDLE), true)); + return std::make_pair(VK_NULL_HANDLE, true); +} + +bool ShaderCache::VertexShaderCompilerWorkItem::Compile() +{ + ShaderCode code = + GenerateVertexShaderCode(APIType::Vulkan, ShaderHostConfig::GetCurrent(), m_uid.GetUidData()); + if (!ShaderCompiler::CompileVertexShader(&m_spirv, code.GetBuffer().c_str(), + code.GetBuffer().length())) + return true; + + m_module = Util::CreateShaderModule(m_spirv.data(), m_spirv.size()); + return true; +} + +void ShaderCache::VertexShaderCompilerWorkItem::Retrieve() +{ + auto it = g_shader_cache->m_vs_cache.shader_map.find(m_uid); + if (it == g_shader_cache->m_vs_cache.shader_map.end()) + { + g_shader_cache->m_vs_cache.shader_map.emplace(m_uid, std::make_pair(m_module, false)); + g_shader_cache->m_vs_cache.disk_cache.Append(m_uid, m_spirv.data(), + static_cast(m_spirv.size())); + return; + } + + // The main thread may have also compiled this shader. + if (!it->second.second) + { + if (m_module != VK_NULL_HANDLE) + vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_module, nullptr); + return; + } + + // No longer pending. + it->second.first = m_module; + it->second.second = false; + g_shader_cache->m_vs_cache.disk_cache.Append(m_uid, m_spirv.data(), + static_cast(m_spirv.size())); +} + +bool ShaderCache::PixelShaderCompilerWorkItem::Compile() +{ + ShaderCode code = + GeneratePixelShaderCode(APIType::Vulkan, ShaderHostConfig::GetCurrent(), m_uid.GetUidData()); + if (!ShaderCompiler::CompileFragmentShader(&m_spirv, code.GetBuffer().c_str(), + code.GetBuffer().length())) + return true; + + m_module = Util::CreateShaderModule(m_spirv.data(), m_spirv.size()); + return true; +} + +void ShaderCache::PixelShaderCompilerWorkItem::Retrieve() +{ + auto it = g_shader_cache->m_ps_cache.shader_map.find(m_uid); + if (it == g_shader_cache->m_ps_cache.shader_map.end()) + { + g_shader_cache->m_ps_cache.shader_map.emplace(m_uid, std::make_pair(m_module, false)); + g_shader_cache->m_ps_cache.disk_cache.Append(m_uid, m_spirv.data(), + static_cast(m_spirv.size())); + return; + } + + // The main thread may have also compiled this shader. + if (!it->second.second) + { + if (m_module != VK_NULL_HANDLE) + vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_module, nullptr); + return; + } + + // No longer pending. + it->second.first = m_module; + it->second.second = false; + g_shader_cache->m_ps_cache.disk_cache.Append(m_uid, m_spirv.data(), + static_cast(m_spirv.size())); +} + +bool ShaderCache::PipelineCompilerWorkItem::Compile() +{ + m_pipeline = g_shader_cache->CreatePipeline(m_info); + return true; +} + +void ShaderCache::PipelineCompilerWorkItem::Retrieve() +{ + auto it = g_shader_cache->m_pipeline_objects.find(m_info); + if (it == g_shader_cache->m_pipeline_objects.end()) + { + g_shader_cache->m_pipeline_objects.emplace(m_info, std::make_pair(m_pipeline, false)); + return; + } + + // The main thread may have also compiled this shader. + if (!it->second.second) + { + if (m_pipeline != VK_NULL_HANDLE) + vkDestroyPipeline(g_vulkan_context->GetDevice(), m_pipeline, nullptr); + return; + } + + // No longer pending. + it->second.first = m_pipeline; + it->second.second = false; +} } diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCache.h b/Source/Core/VideoBackends/Vulkan/ShaderCache.h index 7a83472e38..c31ad08616 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCache.h +++ b/Source/Core/VideoBackends/Vulkan/ShaderCache.h @@ -10,16 +10,21 @@ #include #include #include +#include #include "Common/CommonTypes.h" #include "Common/LinearDiskCache.h" #include "VideoBackends/Vulkan/Constants.h" #include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/ShaderCompiler.h" +#include "VideoCommon/AsyncShaderCompiler.h" #include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/PixelShaderGen.h" #include "VideoCommon/RenderState.h" +#include "VideoCommon/UberShaderPixel.h" +#include "VideoCommon/UberShaderVertex.h" #include "VideoCommon/VertexShaderGen.h" namespace Vulkan @@ -92,8 +97,17 @@ public: VkShaderModule GetGeometryShaderForUid(const GeometryShaderUid& uid); VkShaderModule GetPixelShaderForUid(const PixelShaderUid& uid); + // Ubershader caches + VkShaderModule GetVertexUberShaderForUid(const UberShader::VertexShaderUid& uid); + VkShaderModule GetPixelUberShaderForUid(const UberShader::PixelShaderUid& uid); + + // Accesses ShaderGen shader caches asynchronously + std::pair GetVertexShaderForUidAsync(const VertexShaderUid& uid); + std::pair GetPixelShaderForUidAsync(const PixelShaderUid& uid); + // Perform at startup, create descriptor layouts, compiles all static shaders. bool Initialize(); + void Shutdown(); // Creates a pipeline for the specified description. The resulting pipeline, if successful // is not stored anywhere, this is left up to the caller. @@ -106,6 +120,8 @@ public: // resulted in a pipeline being created, the second field of the return value will be false, // otherwise for a cache hit it will be true. std::pair GetPipelineWithCacheResult(const PipelineInfo& info); + std::pair, bool> + GetPipelineWithCacheResultAsync(const PipelineInfo& info); // Creates a compute pipeline, and does not track the handle. VkPipeline CreateComputePipeline(const ComputePipelineInfo& info); @@ -134,6 +150,10 @@ public: VkShaderModule GetPassthroughVertexShader() const { return m_passthrough_vertex_shader; } VkShaderModule GetScreenQuadGeometryShader() const { return m_screen_quad_geometry_shader; } VkShaderModule GetPassthroughGeometryShader() const { return m_passthrough_geometry_shader; } + void PrecompileUberShaders(); + void WaitForBackgroundCompilesToComplete(); + void RetrieveAsyncShaders(); + private: bool CreatePipelineCache(); bool LoadPipelineCache(); @@ -144,17 +164,26 @@ private: bool CompileSharedShaders(); void DestroySharedShaders(); + // We generate a dummy pipeline with some defaults in the blend/depth states, + // that way the driver is forced to compile something (looking at you, NVIDIA). + // It can then hopefully re-use part of this pipeline for others in the future. + void CreateDummyPipeline(const UberShader::VertexShaderUid& vuid, const GeometryShaderUid& guid, + const UberShader::PixelShaderUid& puid); + template struct ShaderModuleCache { - std::map shader_map; + std::map> shader_map; LinearDiskCache disk_cache; }; ShaderModuleCache m_vs_cache; ShaderModuleCache m_gs_cache; ShaderModuleCache m_ps_cache; + ShaderModuleCache m_uber_vs_cache; + ShaderModuleCache m_uber_ps_cache; - std::unordered_map m_pipeline_objects; + std::unordered_map, PipelineInfoHash> + m_pipeline_objects; std::unordered_map m_compute_pipeline_objects; VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; @@ -165,6 +194,45 @@ private: VkShaderModule m_passthrough_vertex_shader = VK_NULL_HANDLE; VkShaderModule m_screen_quad_geometry_shader = VK_NULL_HANDLE; VkShaderModule m_passthrough_geometry_shader = VK_NULL_HANDLE; + + std::unique_ptr m_async_shader_compiler; + + // TODO: Use templates to reduce the number of these classes. + class VertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem + { + public: + VertexShaderCompilerWorkItem(const VertexShaderUid& uid) : m_uid(uid) {} + bool Compile() override; + void Retrieve() override; + + private: + VertexShaderUid m_uid; + ShaderCompiler::SPIRVCodeVector m_spirv; + VkShaderModule m_module = VK_NULL_HANDLE; + }; + class PixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem + { + public: + PixelShaderCompilerWorkItem(const PixelShaderUid& uid) : m_uid(uid) {} + bool Compile() override; + void Retrieve() override; + + private: + PixelShaderUid m_uid; + ShaderCompiler::SPIRVCodeVector m_spirv; + VkShaderModule m_module = VK_NULL_HANDLE; + }; + class PipelineCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem + { + public: + PipelineCompilerWorkItem(const PipelineInfo& info) : m_info(info) {} + bool Compile() override; + void Retrieve() override; + + private: + PipelineInfo m_info; + VkPipeline m_pipeline; + }; }; extern std::unique_ptr g_shader_cache; diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp index a03437a697..c0661422f1 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp @@ -22,6 +22,7 @@ #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" @@ -77,12 +78,13 @@ bool StateTracker::Initialize() m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD); m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS; m_bbox_enabled = false; + ClearShaders(); // Initialize all samplers to point by default for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) { m_bindings.ps_samplers[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - m_bindings.ps_samplers[i].imageView = VK_NULL_HANDLE; + m_bindings.ps_samplers[i].imageView = g_object_cache->GetDummyImageView(); m_bindings.ps_samplers[i].sampler = g_object_cache->GetPointSampler(); } @@ -178,7 +180,8 @@ bool StateTracker::PrecachePipelineUID(const SerializedPipelineUID& uid) // Need to create the vertex declaration first, rather than deferring to when a game creates a // vertex loader that uses this format, since we need it to create a pipeline. - pinfo.vertex_format = VertexFormat::GetOrCreateMatchingFormat(uid.vertex_decl); + pinfo.vertex_format = + static_cast(VertexLoaderManager::GetOrCreateMatchingFormat(uid.vertex_decl)); pinfo.pipeline_layout = uid.ps_uid.GetUidData()->bounding_box ? g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX) : g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD); @@ -267,11 +270,11 @@ void StateTracker::SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& ren void StateTracker::SetVertexFormat(const VertexFormat* vertex_format) { - if (m_pipeline_state.vertex_format == vertex_format) + if (m_vertex_format == vertex_format) return; - m_pipeline_state.vertex_format = vertex_format; - m_dirty_flags |= DIRTY_FLAG_PIPELINE; + m_vertex_format = vertex_format; + UpdatePipelineVertexFormat(); } void StateTracker::SetPrimitiveTopology(VkPrimitiveTopology primitive_topology) @@ -323,14 +326,94 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type) { VertexShaderUid vs_uid = GetVertexShaderUid(); PixelShaderUid ps_uid = GetPixelShaderUid(); - bool changed = false; - if (vs_uid != m_vs_uid) + bool use_ubershaders = g_ActiveConfig.bDisableSpecializedShaders; + if (g_ActiveConfig.CanBackgroundCompileShaders() && !g_ActiveConfig.bDisableSpecializedShaders) { - m_pipeline_state.vs = g_shader_cache->GetVertexShaderForUid(vs_uid); - m_vs_uid = vs_uid; - changed = true; + // Look up both VS and PS, and check if we can compile it asynchronously. + auto vs = g_shader_cache->GetVertexShaderForUidAsync(vs_uid); + auto ps = g_shader_cache->GetPixelShaderForUidAsync(ps_uid); + if (vs.second || ps.second) + { + // One of the shaders is still pending. Use the ubershader for both. + use_ubershaders = true; + } + else + { + // Use the standard shaders for both. + if (m_pipeline_state.vs != vs.first) + { + m_pipeline_state.vs = vs.first; + m_vs_uid = vs_uid; + changed = true; + } + if (m_pipeline_state.ps != ps.first) + { + m_pipeline_state.ps = ps.first; + m_ps_uid = ps_uid; + changed = true; + } + } + } + else + { + // Normal shader path. No ubershaders. + if (vs_uid != m_vs_uid) + { + m_vs_uid = vs_uid; + m_pipeline_state.vs = g_shader_cache->GetVertexShaderForUid(vs_uid); + changed = true; + } + if (ps_uid != m_ps_uid) + { + m_ps_uid = ps_uid; + m_pipeline_state.ps = g_shader_cache->GetPixelShaderForUid(ps_uid); + changed = true; + } + } + + // Ubershader fallback? + bool uber_vertex_shader = use_ubershaders || g_ActiveConfig.bForceVertexUberShaders; + bool uber_pixel_shader = use_ubershaders || g_ActiveConfig.bForcePixelUberShaders; + bool using_ubershaders = uber_vertex_shader || uber_pixel_shader; + if (!g_ActiveConfig.CanUseUberShaders()) + { + // Per-pixel lighting disables ubershaders. + uber_vertex_shader = false; + uber_pixel_shader = false; + using_ubershaders = false; + } + + // Switching to/from ubershaders? Have to adjust the vertex format and pipeline layout. + if (using_ubershaders != m_using_ubershaders) + { + m_using_ubershaders = using_ubershaders; + UpdatePipelineLayout(); + UpdatePipelineVertexFormat(); + } + + if (uber_vertex_shader) + { + UberShader::VertexShaderUid uber_vs_uid = UberShader::GetVertexShaderUid(); + VkShaderModule vs = g_shader_cache->GetVertexUberShaderForUid(uber_vs_uid); + if (vs != m_pipeline_state.vs) + { + m_uber_vs_uid = uber_vs_uid; + m_pipeline_state.vs = vs; + changed = true; + } + } + if (uber_pixel_shader) + { + UberShader::PixelShaderUid uber_ps_uid = UberShader::GetPixelShaderUid(); + VkShaderModule ps = g_shader_cache->GetPixelUberShaderForUid(uber_ps_uid); + if (ps != m_pipeline_state.ps) + { + m_uber_ps_uid = uber_ps_uid; + m_pipeline_state.ps = ps; + changed = true; + } } if (g_vulkan_context->SupportsGeometryShaders()) @@ -338,29 +421,39 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type) GeometryShaderUid gs_uid = GetGeometryShaderUid(gx_primitive_type); if (gs_uid != m_gs_uid) { + m_gs_uid = gs_uid; if (gs_uid.GetUidData()->IsPassthrough()) m_pipeline_state.gs = VK_NULL_HANDLE; else m_pipeline_state.gs = g_shader_cache->GetGeometryShaderForUid(gs_uid); - m_gs_uid = gs_uid; changed = true; } } - if (ps_uid != m_ps_uid) - { - m_pipeline_state.ps = g_shader_cache->GetPixelShaderForUid(ps_uid); - m_ps_uid = ps_uid; - changed = true; - } - if (changed) m_dirty_flags |= DIRTY_FLAG_PIPELINE; return changed; } +void StateTracker::ClearShaders() +{ + // Set the UIDs to something that will never match, so on the first access they are checked. + std::memset(&m_vs_uid, 0xFF, sizeof(m_vs_uid)); + std::memset(&m_gs_uid, 0xFF, sizeof(m_gs_uid)); + std::memset(&m_ps_uid, 0xFF, sizeof(m_ps_uid)); + std::memset(&m_uber_vs_uid, 0xFF, sizeof(m_uber_vs_uid)); + std::memset(&m_uber_ps_uid, 0xFF, sizeof(m_uber_ps_uid)); + + m_pipeline_state.vs = VK_NULL_HANDLE; + m_pipeline_state.gs = VK_NULL_HANDLE; + m_pipeline_state.ps = VK_NULL_HANDLE; + m_pipeline_state.vertex_format = nullptr; + + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + void StateTracker::UpdateVertexShaderConstants() { if (!VertexShaderManager::dirty || !ReserveConstantStorage()) @@ -557,24 +650,8 @@ void StateTracker::SetBBoxEnable(bool enable) if (m_bbox_enabled == enable) return; - // Change the number of active descriptor sets, as well as the pipeline layout - if (enable) - { - m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX); - m_num_active_descriptor_sets = NUM_GX_DRAW_WITH_BBOX_DESCRIPTOR_SETS; - - // The bbox buffer never changes, so we defer descriptor updates until it is enabled. - if (m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE) - m_dirty_flags |= DIRTY_FLAG_PS_SSBO; - } - else - { - m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD); - m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS; - } - - m_dirty_flags |= DIRTY_FLAG_PIPELINE | DIRTY_FLAG_DESCRIPTOR_SET_BINDING; m_bbox_enabled = enable; + UpdatePipelineLayout(); } void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range) @@ -590,7 +667,7 @@ void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceS m_bindings.ps_ssbo.range = range; // Defer descriptor update until bbox is actually enabled. - if (m_bbox_enabled) + if (IsSSBODescriptorRequired()) m_dirty_flags |= DIRTY_FLAG_PS_SSBO; } @@ -599,7 +676,7 @@ void StateTracker::UnbindTexture(VkImageView view) for (VkDescriptorImageInfo& it : m_bindings.ps_samplers) { if (it.imageView == view) - it.imageView = VK_NULL_HANDLE; + it.imageView = g_object_cache->GetDummyImageView(); } } @@ -609,7 +686,7 @@ void StateTracker::InvalidateDescriptorSets() m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS; // Defer SSBO descriptor update until bbox is actually enabled. - if (!m_bbox_enabled) + if (!IsSSBODescriptorRequired()) m_dirty_flags &= ~DIRTY_FLAG_PS_SSBO; } @@ -886,15 +963,49 @@ void StateTracker::EndClearRenderPass() EndRenderPass(); } -VkPipeline StateTracker::GetPipelineAndCacheUID(const PipelineInfo& info) +VkPipeline StateTracker::GetPipelineAndCacheUID() { - auto result = g_shader_cache->GetPipelineWithCacheResult(info); + // We can't cache ubershader uids, only normal shader uids. + if (g_ActiveConfig.CanBackgroundCompileShaders() && !m_using_ubershaders) + { + // Append to UID cache if it is a new pipeline. + auto result = g_shader_cache->GetPipelineWithCacheResultAsync(m_pipeline_state); + if (!result.second && g_ActiveConfig.bShaderCache) + AppendToPipelineUIDCache(m_pipeline_state); - // Add to the UID cache if it is a new pipeline. - if (!result.second && g_ActiveConfig.bShaderCache) - AppendToPipelineUIDCache(info); + // Still waiting for the pipeline to compile? + if (!result.first.second) + return result.first.first; - return result.first; + // Use ubershader instead. + m_using_ubershaders = true; + UpdatePipelineLayout(); + UpdatePipelineVertexFormat(); + + PipelineInfo uber_info = m_pipeline_state; + UberShader::VertexShaderUid uber_vuid = UberShader::GetVertexShaderUid(); + UberShader::PixelShaderUid uber_puid = UberShader::GetPixelShaderUid(); + uber_info.vs = g_shader_cache->GetVertexUberShaderForUid(uber_vuid); + uber_info.ps = g_shader_cache->GetPixelUberShaderForUid(uber_puid); + + auto uber_result = g_shader_cache->GetPipelineWithCacheResult(uber_info); + return uber_result.first; + } + else + { + // Add to the UID cache if it is a new pipeline. + auto result = g_shader_cache->GetPipelineWithCacheResult(m_pipeline_state); + if (!result.second && !m_using_ubershaders && g_ActiveConfig.bShaderCache) + AppendToPipelineUIDCache(m_pipeline_state); + + return result.first; + } +} + +bool StateTracker::IsSSBODescriptorRequired() const +{ + return m_bbox_enabled || (m_using_ubershaders && g_ActiveConfig.bBBoxEnable && + g_ActiveConfig.BBoxUseFragmentShaderImplementation()); } bool StateTracker::UpdatePipeline() @@ -904,16 +1015,56 @@ bool StateTracker::UpdatePipeline() return false; // Grab a new pipeline object, this can fail. - m_pipeline_object = GetPipelineAndCacheUID(m_pipeline_state); + m_pipeline_object = GetPipelineAndCacheUID(); m_dirty_flags |= DIRTY_FLAG_PIPELINE_BINDING; return m_pipeline_object != VK_NULL_HANDLE; } +void StateTracker::UpdatePipelineLayout() +{ + const bool use_bbox_pipeline_layout = IsSSBODescriptorRequired(); + VkPipelineLayout pipeline_layout = + use_bbox_pipeline_layout ? g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX) : + g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD); + if (m_pipeline_state.pipeline_layout == pipeline_layout) + return; + + // Change the number of active descriptor sets, as well as the pipeline layout + m_pipeline_state.pipeline_layout = pipeline_layout; + if (use_bbox_pipeline_layout) + { + m_num_active_descriptor_sets = NUM_GX_DRAW_WITH_BBOX_DESCRIPTOR_SETS; + + // The bbox buffer never changes, so we defer descriptor updates until it is enabled. + if (m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE) + m_dirty_flags |= DIRTY_FLAG_PS_SSBO; + } + else + { + m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS; + } + + m_dirty_flags |= DIRTY_FLAG_PIPELINE | DIRTY_FLAG_DESCRIPTOR_SET_BINDING; +} + +void StateTracker::UpdatePipelineVertexFormat() +{ + const NativeVertexFormat* vertex_format = + m_using_ubershaders ? + VertexLoaderManager::GetUberVertexFormat(m_vertex_format->GetVertexDeclaration()) : + m_vertex_format; + if (m_pipeline_state.vertex_format == vertex_format) + return; + + m_pipeline_state.vertex_format = static_cast(vertex_format); + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + bool StateTracker::UpdateDescriptorSet() { const size_t MAX_DESCRIPTOR_WRITES = NUM_UBO_DESCRIPTOR_SET_BINDINGS + // UBO - NUM_PIXEL_SHADER_SAMPLERS + // Samplers + 1 + // Samplers 1; // SSBO std::array writes; u32 num_writes = 0; @@ -954,30 +1105,22 @@ bool StateTracker::UpdateDescriptorSet() if (set == VK_NULL_HANDLE) return false; - for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) - { - const VkDescriptorImageInfo& info = m_bindings.ps_samplers[i]; - if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE) - { - writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - static_cast(i), - 0, - 1, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - &info, - nullptr, - nullptr}; - } - } + writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + 0, + 0, + static_cast(NUM_PIXEL_SHADER_SAMPLERS), + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + m_bindings.ps_samplers.data(), + nullptr, + nullptr}; m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set; m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; } - if (m_bbox_enabled && - (m_dirty_flags & DIRTY_FLAG_PS_SSBO || + if ((m_dirty_flags & DIRTY_FLAG_PS_SSBO || m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE)) { VkDescriptorSetLayout layout = diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.h b/Source/Core/VideoBackends/Vulkan/StateTracker.h index 03d7464cee..73da3ad646 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.h +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.h @@ -16,6 +16,8 @@ #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/PixelShaderGen.h" #include "VideoCommon/RenderBase.h" +#include "VideoCommon/UberShaderPixel.h" +#include "VideoCommon/UberShaderVertex.h" #include "VideoCommon/VertexShaderGen.h" namespace Vulkan @@ -60,6 +62,7 @@ public: void SetBlendState(const BlendingState& state); bool CheckForShaderChanges(u32 gx_primitive_type); + void ClearShaders(); void UpdateVertexShaderConstants(); void UpdateGeometryShaderConstants(); @@ -159,8 +162,8 @@ private: DIRTY_FLAG_DESCRIPTOR_SET_BINDING = (1 << 11), DIRTY_FLAG_PIPELINE_BINDING = (1 << 12), - DIRTY_FLAG_ALL_DESCRIPTOR_SETS = - DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO + DIRTY_FLAG_ALL_DESCRIPTOR_SETS = DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO | + DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO }; bool Initialize(); @@ -178,9 +181,15 @@ private: // Obtains a Vulkan pipeline object for the specified pipeline configuration. // Also adds this pipeline configuration to the UID cache if it is not present already. - VkPipeline GetPipelineAndCacheUID(const PipelineInfo& info); + VkPipeline GetPipelineAndCacheUID(); + + // Are bounding box ubershaders enabled? If so, we need to ensure the SSBO is set up, + // since the bbox writes are determined by a uniform. + bool IsSSBODescriptorRequired() const; bool UpdatePipeline(); + void UpdatePipelineLayout(); + void UpdatePipelineVertexFormat(); bool UpdateDescriptorSet(); // Allocates storage in the uniform buffer of the specified size. If this storage cannot be @@ -203,10 +212,14 @@ private: VertexShaderUid m_vs_uid = {}; GeometryShaderUid m_gs_uid = {}; PixelShaderUid m_ps_uid = {}; + UberShader::VertexShaderUid m_uber_vs_uid = {}; + UberShader::PixelShaderUid m_uber_ps_uid = {}; + bool m_using_ubershaders = false; // pipeline state PipelineInfo m_pipeline_state = {}; VkPipeline m_pipeline_object = VK_NULL_HANDLE; + const VertexFormat* m_vertex_format = nullptr; // shader bindings std::array m_descriptor_sets = {}; diff --git a/Source/Core/VideoBackends/Vulkan/Util.cpp b/Source/Core/VideoBackends/Vulkan/Util.cpp index f1f4f42b1b..fc5b2ec8dc 100644 --- a/Source/Core/VideoBackends/Vulkan/Util.cpp +++ b/Source/Core/VideoBackends/Vulkan/Util.cpp @@ -575,8 +575,7 @@ void UtilityShaderDraw::BindDescriptors() { // TODO: This method is a mess, clean it up std::array bind_descriptor_sets = {}; - std::array - set_writes = {}; + std::array set_writes = {}; uint32_t num_set_writes = 0; VkDescriptorBufferInfo dummy_uniform_buffer = { @@ -633,29 +632,32 @@ void UtilityShaderDraw::BindDescriptors() // Check if we have any at all, skip the binding process entirely if we don't if (first_active_sampler != NUM_PIXEL_SHADER_SAMPLERS) { + // We need to fill it with non-empty images. + for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) + { + if (m_ps_samplers[i].imageView == VK_NULL_HANDLE) + { + m_ps_samplers[i].imageView = g_object_cache->GetDummyImageView(); + m_ps_samplers[i].sampler = g_object_cache->GetPointSampler(); + } + } + // Allocate a new descriptor set VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS)); if (set == VK_NULL_HANDLE) PanicAlert("Failed to allocate descriptor set for utility draw"); - for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) - { - const VkDescriptorImageInfo& info = m_ps_samplers[i]; - if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - static_cast(i), - 0, - 1, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - &info, - nullptr, - nullptr}; - } - } + set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + 0, + 0, + static_cast(NUM_PIXEL_SHADER_SAMPLERS), + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + m_ps_samplers.data(), + nullptr, + nullptr}; bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set; } diff --git a/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp b/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp index 1d35177252..08daba3fe5 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp +++ b/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp @@ -53,17 +53,9 @@ VertexFormat::VertexFormat(const PortableVertexDeclaration& in_vtx_decl) SetupInputState(); } -VertexFormat* VertexFormat::GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl) +const VkPipelineVertexInputStateCreateInfo& VertexFormat::GetVertexInputStateInfo() const { - auto vertex_format_map = VertexLoaderManager::GetNativeVertexFormatMap(); - auto iter = vertex_format_map->find(decl); - if (iter == vertex_format_map->end()) - { - auto ipair = vertex_format_map->emplace(decl, std::make_unique(decl)); - iter = ipair.first; - } - - return static_cast(iter->second.get()); + return m_input_state_info; } void VertexFormat::MapAttributes() diff --git a/Source/Core/VideoBackends/Vulkan/VertexFormat.h b/Source/Core/VideoBackends/Vulkan/VertexFormat.h index ef2d31d748..b2fe21a06e 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexFormat.h +++ b/Source/Core/VideoBackends/Vulkan/VertexFormat.h @@ -16,16 +16,8 @@ class VertexFormat : public ::NativeVertexFormat public: VertexFormat(const PortableVertexDeclaration& in_vtx_decl); - // Creates or obtains a pointer to a VertexFormat representing decl. - // If this results in a VertexFormat being created, if the game later uses a matching vertex - // declaration, the one that was previously created will be used. - static VertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl); - // Passed to pipeline state creation - const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const - { - return m_input_state_info; - } + const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const; // Converting PortableVertexDeclaration -> Vulkan types void MapAttributes(); diff --git a/Source/Core/VideoBackends/Vulkan/main.cpp b/Source/Core/VideoBackends/Vulkan/main.cpp index 4354a7a58b..3fecc914c1 100644 --- a/Source/Core/VideoBackends/Vulkan/main.cpp +++ b/Source/Core/VideoBackends/Vulkan/main.cpp @@ -253,6 +253,7 @@ bool VideoBackend::Initialize(void* window_handle) g_renderer.reset(); StateTracker::DestroyInstance(); g_framebuffer_manager.reset(); + g_shader_cache->Shutdown(); g_shader_cache.reset(); g_object_cache.reset(); g_command_buffer_mgr.reset(); @@ -262,6 +263,11 @@ bool VideoBackend::Initialize(void* window_handle) return false; } + // Lastly, precompile ubershaders, if requested. + // This has to be done after the texture cache and shader cache are initialized. + if (g_ActiveConfig.CanPrecompileUberShaders()) + g_shader_cache->PrecompileUberShaders(); + return true; } @@ -293,6 +299,7 @@ void VideoBackend::Shutdown() void VideoBackend::Video_Cleanup() { g_command_buffer_mgr->WaitForGPUIdle(); + g_shader_cache->Shutdown(); // Save all cached pipelines out to disk for next time. if (g_ActiveConfig.bShaderCache) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 036709aa1a..61e8f92531 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -354,21 +354,10 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, bool boundin "int3 iround(float3 x) { return int3(round(x)); }\n" "int4 iround(float4 x) { return int4(round(x)); }\n\n"); - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n"); } - else if (ApiType == APIType::Vulkan) - { - out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - out.Write("SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n"); - out.Write("SAMPLER_BINDING(2) uniform sampler2DArray samp2;\n"); - out.Write("SAMPLER_BINDING(3) uniform sampler2DArray samp3;\n"); - out.Write("SAMPLER_BINDING(4) uniform sampler2DArray samp4;\n"); - out.Write("SAMPLER_BINDING(5) uniform sampler2DArray samp5;\n"); - out.Write("SAMPLER_BINDING(6) uniform sampler2DArray samp6;\n"); - out.Write("SAMPLER_BINDING(7) uniform sampler2DArray samp7;\n"); - } else // D3D { // Declare samplers @@ -1191,11 +1180,6 @@ static void SampleTexture(ShaderCode& out, const char* texcoords, const char* te "[%d].xy, %s))).%s;\n", texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); } - else if (ApiType == APIType::Vulkan) - { - out.Write("iround(255.0 * texture(samp%d, float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n", - texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); - } else { out.Write("iround(255.0 * texture(samp[%d], float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n", diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 01d9a00fd6..bb52197585 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -44,13 +44,6 @@ static VertexLoaderMap s_vertex_loader_map; u8* cached_arraybases[12]; -// Used in the Vulkan backend - -NativeVertexFormatMap* GetNativeVertexFormatMap() -{ - return &s_native_vertex_map; -} - void Init() { MarkAllDirty(); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index bd280d88db..b46d354e7d 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -24,8 +24,6 @@ void Clear(); void MarkAllDirty(); -NativeVertexFormatMap* GetNativeVertexFormatMap(); - // Creates or obtains a pointer to a VertexFormat representing decl. // If this results in a VertexFormat being created, if the game later uses a matching vertex // declaration, the one that was previously created will be used.