From 222ca7de211108f340241a93425ba463a90ca4d3 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 9 Feb 2024 01:56:20 +0200 Subject: [PATCH] renderer_vulkan: Rewrite descriptor management * Switch to batched vkUpdateDescriptorSets from cached descriptor sets with templates --- src/video_core/CMakeLists.txt | 4 +- .../custom_textures/custom_tex_manager.cpp | 2 +- src/video_core/pica/regs_texturing.h | 8 +- .../rasterizer_cache/rasterizer_cache.h | 63 +++--- .../renderer_opengl/gl_texture_runtime.cpp | 13 +- .../renderer_opengl/gl_texture_runtime.h | 7 +- .../renderer_vulkan/renderer_vulkan.cpp | 42 ++-- .../renderer_vulkan/renderer_vulkan.h | 6 +- .../renderer_vulkan/vk_blit_helper.cpp | 79 +++----- .../renderer_vulkan/vk_blit_helper.h | 14 +- src/video_core/renderer_vulkan/vk_common.h | 1 - .../renderer_vulkan/vk_descriptor_pool.cpp | 141 -------------- .../renderer_vulkan/vk_descriptor_pool.h | 92 --------- .../vk_descriptor_update_queue.cpp | 109 +++++++++++ .../vk_descriptor_update_queue.h | 53 +++++ .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 155 +++------------ .../renderer_vulkan/vk_pipeline_cache.h | 55 +++--- .../renderer_vulkan/vk_platform.cpp | 3 +- .../renderer_vulkan/vk_present_window.cpp | 10 +- .../renderer_vulkan/vk_rasterizer.cpp | 147 +++++++------- .../renderer_vulkan/vk_rasterizer.h | 32 +-- .../renderer_vulkan/vk_render_manager.cpp | 11 +- .../renderer_vulkan/vk_render_manager.h | 7 +- .../renderer_vulkan/vk_resource_pool.cpp | 157 ++++++++++----- .../renderer_vulkan/vk_resource_pool.h | 34 +++- .../renderer_vulkan/vk_scheduler.cpp | 6 +- src/video_core/renderer_vulkan/vk_scheduler.h | 2 +- .../renderer_vulkan/vk_shader_util.cpp | 1 + .../renderer_vulkan/vk_stream_buffer.cpp | 13 +- .../renderer_vulkan/vk_stream_buffer.h | 8 +- .../renderer_vulkan/vk_texture_runtime.cpp | 42 ++-- .../renderer_vulkan/vk_texture_runtime.h | 18 +- .../shader/generator/glsl_fs_shader_gen.cpp | 182 +++++++++++++++--- .../shader/generator/glsl_fs_shader_gen.h | 3 +- 35 files changed, 784 insertions(+), 738 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_descriptor_pool.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_descriptor_pool.h create mode 100644 src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp create mode 100644 src/video_core/renderer_vulkan/vk_descriptor_update_queue.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index df3e5411a..7a61f99c3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -160,8 +160,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_blit_helper.h renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.h - renderer_vulkan/vk_descriptor_pool.cpp - renderer_vulkan/vk_descriptor_pool.h + renderer_vulkan/vk_descriptor_update_queue.cpp + renderer_vulkan/vk_descriptor_update_queue.h renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp diff --git a/src/video_core/custom_textures/custom_tex_manager.cpp b/src/video_core/custom_textures/custom_tex_manager.cpp index df9720941..2ec3c946b 100644 --- a/src/video_core/custom_textures/custom_tex_manager.cpp +++ b/src/video_core/custom_textures/custom_tex_manager.cpp @@ -385,7 +385,7 @@ std::vector CustomTexManager::GetTextures(u64 title_id) { } void CustomTexManager::CreateWorkers() { - const std::size_t num_workers = std::max(std::thread::hardware_concurrency(), 2U) - 1; + const std::size_t num_workers = std::max(std::thread::hardware_concurrency(), 2U) >> 1; workers = std::make_unique(num_workers, "Custom textures"); } diff --git a/src/video_core/pica/regs_texturing.h b/src/video_core/pica/regs_texturing.h index a92d118a5..a9c58f71e 100644 --- a/src/video_core/pica/regs_texturing.h +++ b/src/video_core/pica/regs_texturing.h @@ -176,15 +176,15 @@ struct TexturingRegs { INSERT_PADDING_WORDS(0x9); struct FullTextureConfig { - const bool enabled; + const u32 enabled; const TextureConfig config; const TextureFormat format; }; const std::array GetTextures() const { return {{ - {static_cast(main_config.texture0_enable), texture0, texture0_format}, - {static_cast(main_config.texture1_enable), texture1, texture1_format}, - {static_cast(main_config.texture2_enable), texture2, texture2_format}, + {main_config.texture0_enable, texture0, texture0_format}, + {main_config.texture1_enable, texture1, texture1_format}, + {main_config.texture2_enable, texture2, texture2_format}, }}; } diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index b1871b3c8..cd9c080ef 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -600,14 +600,43 @@ typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& auto [it, new_surface] = texture_cube_cache.try_emplace(config); TextureCube& cube = it->second; + const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz}; + if (new_surface) { + Pica::Texture::TextureInfo info = { + .width = config.width, + .height = config.width, + .format = config.format, + }; + info.SetDefaultStride(); + + u32 res_scale = 1; + for (u32 i = 0; i < addresses.size(); i++) { + if (!addresses[i]) { + continue; + } + + SurfaceId& face_id = cube.face_ids[i]; + if (!face_id) { + info.physical_address = addresses[i]; + face_id = GetTextureSurface(info, config.levels - 1); + Surface& surface = slot_surfaces[face_id]; + ASSERT_MSG( + surface.levels >= config.levels, + "Texture cube face levels are not enough to validate the levels requested"); + surface.flags |= SurfaceFlagBits::Tracked; + } + Surface& surface = slot_surfaces[face_id]; + res_scale = std::max(surface.res_scale, res_scale); + } + SurfaceParams cube_params = { .addr = config.px, .width = config.width, .height = config.width, .stride = config.width, .levels = config.levels, - .res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1, + .res_scale = res_scale, .texture_type = TextureType::CubeMap, .pixel_format = PixelFormatFromTextureFormat(config.format), .type = SurfaceType::Texture, @@ -616,38 +645,20 @@ typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& cube.surface_id = CreateSurface(cube_params); } - const u32 scaled_size = slot_surfaces[cube.surface_id].GetScaledWidth(); - const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz}; - - Pica::Texture::TextureInfo info = { - .width = config.width, - .height = config.width, - .format = config.format, - }; - info.SetDefaultStride(); - + Surface& cube_surface = slot_surfaces[cube.surface_id]; for (u32 i = 0; i < addresses.size(); i++) { if (!addresses[i]) { continue; } - - SurfaceId& face_id = cube.face_ids[i]; - if (!face_id) { - info.physical_address = addresses[i]; - face_id = GetTextureSurface(info, config.levels - 1); - ASSERT_MSG(slot_surfaces[face_id].levels >= config.levels, - "Texture cube face levels are not enough to validate the levels requested"); - } - Surface& surface = slot_surfaces[face_id]; - surface.flags |= SurfaceFlagBits::Tracked; + Surface& surface = slot_surfaces[cube.face_ids[i]]; if (cube.ticks[i] == surface.modification_tick) { continue; } cube.ticks[i] = surface.modification_tick; - Surface& cube_surface = slot_surfaces[cube.surface_id]; + boost::container::small_vector upload_copies; for (u32 level = 0; level < config.levels; level++) { - const u32 width_lod = scaled_size >> level; - const TextureCopy texture_copy = { + const u32 width_lod = surface.GetScaledWidth() >> level; + upload_copies.push_back({ .src_level = level, .dst_level = level, .src_layer = 0, @@ -655,9 +666,9 @@ typename T::Surface& RasterizerCache::GetTextureCube(const TextureCubeConfig& .src_offset = {0, 0}, .dst_offset = {0, 0}, .extent = {width_lod, width_lod}, - }; - runtime.CopyTextures(surface, cube_surface, texture_copy); + }); } + runtime.CopyTextures(surface, cube_surface, upload_copies); } return slot_surfaces[cube.surface_id]; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index f24fe10ac..2d3669e10 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -260,16 +260,19 @@ void TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea } bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, - const VideoCore::TextureCopy& copy) { + std::span copies) { const GLenum src_textarget = source.texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; const GLenum dest_textarget = dest.texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; - glCopyImageSubData(source.Handle(), src_textarget, copy.src_level, copy.src_offset.x, - copy.src_offset.y, copy.src_layer, dest.Handle(), dest_textarget, - copy.dst_level, copy.dst_offset.x, copy.dst_offset.y, copy.dst_layer, - copy.extent.width, copy.extent.height, 1); + + for (const auto& copy : copies) { + glCopyImageSubData(source.Handle(), src_textarget, copy.src_level, copy.src_offset.x, + copy.src_offset.y, copy.src_layer, dest.Handle(), dest_textarget, + copy.dst_level, copy.dst_offset.x, copy.dst_offset.y, copy.dst_layer, + copy.extent.width, copy.extent.height, 1); + } return true; } diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 9fdc77bc5..5fe7300a7 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -65,7 +65,12 @@ public: void ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); /// Copies a rectangle of source to another rectange of dest - bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + bool CopyTextures(Surface& source, Surface& dest, + std::span copies); + + bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { + return CopyTextures(source, dest, std::array{copy}); + } /// Blits a rectangle of source to another rectange of dest bool BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2241c45fe..f477e88f4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -54,20 +54,20 @@ RendererVulkan::RendererVulkan(Core::System& system, Pica::PicaCore& pica_, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window) : RendererBase{system, window, secondary_window}, memory{system.Memory()}, pica{pica_}, instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance}, - render_manager{instance, scheduler}, pool{instance}, main_window{window, instance, scheduler}, + render_manager{instance, scheduler}, main_window{window, instance, scheduler}, vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer, VERTEX_BUFFER_SIZE}, - rasterizer{memory, - pica, - system.CustomTexManager(), - *this, - render_window, - instance, - scheduler, - pool, - render_manager, - main_window.ImageCount()}, - present_set_provider{instance, pool, PRESENT_BINDINGS} { + update_queue{instance}, rasterizer{memory, + pica, + system.CustomTexManager(), + *this, + render_window, + instance, + scheduler, + render_manager, + update_queue, + main_window.ImageCount()}, + present_heap{instance, scheduler.GetMasterSemaphore(), PRESENT_BINDINGS, 32} { CompileShaders(); BuildLayouts(); BuildPipelines(); @@ -126,16 +126,14 @@ void RendererVulkan::PrepareRendertarget() { void RendererVulkan::PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout) { const auto sampler = present_samplers[!Settings::values.filter_mode.GetValue()]; - std::transform(screen_infos.begin(), screen_infos.end(), present_textures.begin(), - [&](auto& info) { - return DescriptorData{vk::DescriptorImageInfo{sampler, info.image_view, - vk::ImageLayout::eGeneral}}; - }); - - const auto descriptor_set = present_set_provider.Acquire(present_textures); + const auto present_set = present_heap.Commit(); + for (u32 index = 0; index < screen_infos.size(); index++) { + update_queue.AddImageSampler(present_set, 0, index, screen_infos[index].image_view, + sampler); + } render_manager.EndRendering(); - scheduler.Record([this, layout, frame, descriptor_set, renderpass = main_window.Renderpass(), + scheduler.Record([this, layout, frame, present_set, renderpass = main_window.Renderpass(), index = current_pipeline](vk::CommandBuffer cmdbuf) { const vk::Viewport viewport = { .x = 0.0f, @@ -170,7 +168,7 @@ void RendererVulkan::PrepareDraw(Frame* frame, const Layout::FramebufferLayout& cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[index]); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {}); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, present_set, {}); }); } @@ -261,7 +259,7 @@ void RendererVulkan::BuildLayouts() { .size = sizeof(PresentUniformData), }; - const auto descriptor_set_layout = present_set_provider.Layout(); + const auto descriptor_set_layout = present_heap.Layout(); const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 1, .pSetLayouts = &descriptor_set_layout, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index cb3bd2227..af80b0c82 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -7,7 +7,6 @@ #include "common/common_types.h" #include "common/math_util.h" #include "video_core/renderer_base.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_present_window.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -119,14 +118,14 @@ private: Instance instance; Scheduler scheduler; RenderManager render_manager; - DescriptorPool pool; PresentWindow main_window; StreamBuffer vertex_buffer; + DescriptorUpdateQueue update_queue; RasterizerVulkan rasterizer; std::unique_ptr second_window; + DescriptorHeap present_heap; vk::UniquePipelineLayout present_pipeline_layout; - DescriptorSetProvider present_set_provider; std::array present_pipelines; std::array present_shaders; std::array present_samplers; @@ -134,7 +133,6 @@ private: u32 current_pipeline = 0; std::array screen_infos{}; - std::array present_textures{}; PresentUniformData draw_info{}; vk::ClearColorValue clear_color{}; }; diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp index 651d38437..18e72979e 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -4,6 +4,7 @@ #include "common/vector_math.h" #include "video_core/renderer_vulkan/vk_blit_helper.h" +#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -177,12 +178,13 @@ constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_ } // Anonymous namespace -BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, DescriptorPool& pool, - RenderManager& render_manager_) +BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, + RenderManager& render_manager_, DescriptorUpdateQueue& update_queue_) : instance{instance_}, scheduler{scheduler_}, render_manager{render_manager_}, - device{instance.GetDevice()}, compute_provider{instance, pool, COMPUTE_BINDINGS}, - compute_buffer_provider{instance, pool, COMPUTE_BUFFER_BINDINGS}, - two_textures_provider{instance, pool, TWO_TEXTURES_BINDINGS}, + update_queue{update_queue_}, device{instance.GetDevice()}, + compute_provider{instance, scheduler.GetMasterSemaphore(), COMPUTE_BINDINGS}, + compute_buffer_provider{instance, scheduler.GetMasterSemaphore(), COMPUTE_BUFFER_BINDINGS}, + two_textures_provider{instance, scheduler.GetMasterSemaphore(), TWO_TEXTURES_BINDINGS, 16}, compute_pipeline_layout{ device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_provider.Layout(), true))}, compute_buffer_pipeline_layout{device.createPipelineLayout( @@ -286,24 +288,13 @@ bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest, .extent = {dest.GetScaledWidth(), dest.GetScaledHeight()}, }; - std::array textures{}; - textures[0].image_info = vk::DescriptorImageInfo{ - .sampler = nearest_sampler, - .imageView = source.DepthView(), - .imageLayout = vk::ImageLayout::eGeneral, - }; - textures[1].image_info = vk::DescriptorImageInfo{ - .sampler = nearest_sampler, - .imageView = source.StencilView(), - .imageLayout = vk::ImageLayout::eGeneral, - }; - - const auto descriptor_set = two_textures_provider.Acquire(textures); + const auto descriptor_set = two_textures_provider.Commit(); + update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), nearest_sampler); + update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), nearest_sampler); const RenderPass depth_pass = { .framebuffer = dest.Framebuffer(), - .render_pass = - render_manager.GetRenderpass(PixelFormat::Invalid, dest.pixel_format, false), + .render_pass = render_manager.GetRenderpass(PixelFormat::Invalid, dest.pixel_format, false), .render_area = dst_render_area, }; render_manager.BeginRendering(depth_pass); @@ -322,21 +313,12 @@ bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest, bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { - std::array textures{}; - textures[0].image_info = vk::DescriptorImageInfo{ - .imageView = source.DepthView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - }; - textures[1].image_info = vk::DescriptorImageInfo{ - .imageView = source.StencilView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - }; - textures[2].image_info = vk::DescriptorImageInfo{ - .imageView = dest.ImageView(), - .imageLayout = vk::ImageLayout::eGeneral, - }; - - const auto descriptor_set = compute_provider.Acquire(textures); + const auto descriptor_set = compute_provider.Commit(); + update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), VK_NULL_HANDLE, + vk::ImageLayout::eDepthStencilReadOnlyOptimal); + update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), VK_NULL_HANDLE, + vk::ImageLayout::eDepthStencilReadOnlyOptimal); + update_queue.AddStorageImage(descriptor_set, 2, dest.ImageView()); render_manager.EndRendering(); scheduler.Record([this, descriptor_set, copy, src_image = source.Image(), @@ -442,24 +424,13 @@ bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, bool BlitHelper::DepthToBuffer(Surface& source, vk::Buffer buffer, const VideoCore::BufferTextureCopy& copy) { - std::array textures{}; - textures[0].image_info = vk::DescriptorImageInfo{ - .sampler = nearest_sampler, - .imageView = source.DepthView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - }; - textures[1].image_info = vk::DescriptorImageInfo{ - .sampler = nearest_sampler, - .imageView = source.StencilView(), - .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - }; - textures[2].buffer_info = vk::DescriptorBufferInfo{ - .buffer = buffer, - .offset = copy.buffer_offset, - .range = copy.buffer_size, - }; - - const auto descriptor_set = compute_buffer_provider.Acquire(textures); + const auto descriptor_set = compute_buffer_provider.Commit(); + update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), nearest_sampler, + vk::ImageLayout::eDepthStencilReadOnlyOptimal); + update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), nearest_sampler, + vk::ImageLayout::eDepthStencilReadOnlyOptimal); + update_queue.AddBuffer(descriptor_set, 2, buffer, copy.buffer_offset, copy.buffer_size, + vk::DescriptorType::eStorageBuffer); render_manager.EndRendering(); scheduler.Record([this, descriptor_set, copy, src_image = source.Image(), @@ -548,7 +519,7 @@ vk::Pipeline BlitHelper::MakeDepthStencilBlitPipeline() { const std::array stages = MakeStages(full_screen_vert, blit_depth_stencil_frag); const auto renderpass = render_manager.GetRenderpass(VideoCore::PixelFormat::Invalid, - VideoCore::PixelFormat::D24S8, false); + VideoCore::PixelFormat::D24S8, false); vk::GraphicsPipelineCreateInfo depth_stencil_info = { .stageCount = static_cast(stages.size()), .pStages = stages.data(), diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.h b/src/video_core/renderer_vulkan/vk_blit_helper.h index c8d2751ed..3937fc29b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.h +++ b/src/video_core/renderer_vulkan/vk_blit_helper.h @@ -4,7 +4,7 @@ #pragma once -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" namespace VideoCore { struct TextureBlit; @@ -18,13 +18,14 @@ class Instance; class RenderManager; class Scheduler; class Surface; +class DescriptorUpdateQueue; class BlitHelper { friend class TextureRuntime; public: - BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, - RenderManager& render_manager); + explicit BlitHelper(const Instance& instance, Scheduler& scheduler, + RenderManager& render_manager, DescriptorUpdateQueue& update_queue); ~BlitHelper(); bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); @@ -42,13 +43,14 @@ private: const Instance& instance; Scheduler& scheduler; RenderManager& render_manager; + DescriptorUpdateQueue& update_queue; vk::Device device; vk::RenderPass r32_renderpass; - DescriptorSetProvider compute_provider; - DescriptorSetProvider compute_buffer_provider; - DescriptorSetProvider two_textures_provider; + DescriptorHeap compute_provider; + DescriptorHeap compute_buffer_provider; + DescriptorHeap two_textures_provider; vk::PipelineLayout compute_pipeline_layout; vk::PipelineLayout compute_buffer_pipeline_layout; vk::PipelineLayout two_textures_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index 3fd6bc45c..a8147acbe 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -9,7 +9,6 @@ #define VK_NO_PROTOTYPES #define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 #define VULKAN_HPP_NO_CONSTRUCTORS -#define VULKAN_HPP_NO_UNION_CONSTRUCTORS #define VULKAN_HPP_NO_STRUCT_SETTERS #include diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp deleted file mode 100644 index 3909da237..000000000 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_instance.h" - -namespace Vulkan { - -MICROPROFILE_DEFINE(Vulkan_DescriptorSetAcquire, "Vulkan", "Descriptor Set Acquire", - MP_RGB(64, 128, 256)); - -constexpr u32 MAX_BATCH_SIZE = 8; - -DescriptorPool::DescriptorPool(const Instance& instance_) : instance{instance_} { - auto& pool = pools.emplace_back(); - pool = CreatePool(); -} - -DescriptorPool::~DescriptorPool() = default; - -std::vector DescriptorPool::Allocate(vk::DescriptorSetLayout layout, - u32 num_sets) { - std::array layouts; - layouts.fill(layout); - - u32 current_pool = 0; - vk::DescriptorSetAllocateInfo alloc_info = { - .descriptorPool = *pools[current_pool], - .descriptorSetCount = num_sets, - .pSetLayouts = layouts.data(), - }; - - while (true) { - try { - return instance.GetDevice().allocateDescriptorSets(alloc_info); - } catch (const vk::OutOfPoolMemoryError&) { - current_pool++; - if (current_pool == pools.size()) { - LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!"); - auto& pool = pools.emplace_back(); - pool = CreatePool(); - } - alloc_info.descriptorPool = *pools[current_pool]; - } - } -} - -vk::DescriptorSet DescriptorPool::Allocate(vk::DescriptorSetLayout layout) { - const auto sets = Allocate(layout, 1); - return sets[0]; -} - -vk::UniqueDescriptorPool DescriptorPool::CreatePool() { - // Choose a sane pool size good for most games - static constexpr std::array pool_sizes = {{ - {vk::DescriptorType::eUniformBufferDynamic, 64}, - {vk::DescriptorType::eUniformTexelBuffer, 64}, - {vk::DescriptorType::eCombinedImageSampler, 4096}, - {vk::DescriptorType::eSampledImage, 256}, - {vk::DescriptorType::eStorageImage, 256}, - {vk::DescriptorType::eStorageBuffer, 32}, - }}; - - const vk::DescriptorPoolCreateInfo descriptor_pool_info = { - .maxSets = 4098, - .poolSizeCount = static_cast(pool_sizes.size()), - .pPoolSizes = pool_sizes.data(), - }; - - return instance.GetDevice().createDescriptorPoolUnique(descriptor_pool_info); -} - -DescriptorSetProvider::DescriptorSetProvider( - const Instance& instance, DescriptorPool& pool_, - std::span bindings) - : pool{pool_}, device{instance.GetDevice()} { - std::array update_entries; - - for (u32 i = 0; i < bindings.size(); i++) { - update_entries[i] = vk::DescriptorUpdateTemplateEntry{ - .dstBinding = bindings[i].binding, - .dstArrayElement = 0, - .descriptorCount = bindings[i].descriptorCount, - .descriptorType = bindings[i].descriptorType, - .offset = i * sizeof(DescriptorData), - .stride = sizeof(DescriptorData), - }; - } - - const vk::DescriptorSetLayoutCreateInfo layout_info = { - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }; - layout = device.createDescriptorSetLayoutUnique(layout_info); - - const vk::DescriptorUpdateTemplateCreateInfo template_info = { - .descriptorUpdateEntryCount = static_cast(bindings.size()), - .pDescriptorUpdateEntries = update_entries.data(), - .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, - .descriptorSetLayout = *layout, - }; - update_template = device.createDescriptorUpdateTemplateUnique(template_info); -} - -DescriptorSetProvider::~DescriptorSetProvider() = default; - -vk::DescriptorSet DescriptorSetProvider::Acquire(std::span data) { - MICROPROFILE_SCOPE(Vulkan_DescriptorSetAcquire); - DescriptorSetData key{}; - std::memcpy(key.data(), data.data(), data.size_bytes()); - const auto [it, new_set] = descriptor_set_map.try_emplace(key); - if (!new_set) { - return it->second; - } - if (free_sets.empty()) { - free_sets = pool.Allocate(*layout, MAX_BATCH_SIZE); - } - it.value() = free_sets.back(); - free_sets.pop_back(); - device.updateDescriptorSetWithTemplate(it->second, *update_template, data[0]); - return it->second; -} - -void DescriptorSetProvider::FreeWithImage(vk::ImageView image_view) { - for (auto it = descriptor_set_map.begin(); it != descriptor_set_map.end();) { - const auto& [data, set] = *it; - const bool has_image = std::any_of(data.begin(), data.end(), [image_view](auto& info) { - return info.image_info.imageView == image_view; - }); - if (has_image) { - free_sets.push_back(set); - it = descriptor_set_map.erase(it); - } else { - it++; - } - } -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h deleted file mode 100644 index 2990cd294..000000000 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/hash.h" -#include "video_core/renderer_vulkan/vk_common.h" - -namespace Vulkan { - -class Instance; - -constexpr u32 MAX_DESCRIPTORS = 7; - -union DescriptorData { - vk::DescriptorImageInfo image_info; - vk::DescriptorBufferInfo buffer_info; - vk::BufferView buffer_view; - - bool operator==(const DescriptorData& other) const noexcept { - return std::memcmp(this, &other, sizeof(DescriptorData)) == 0; - } -}; - -using DescriptorSetData = std::array; - -struct DataHasher { - u64 operator()(const DescriptorSetData& data) const noexcept { - return Common::ComputeHash64(data.data(), sizeof(data)); - } -}; - -/** - * An interface for allocating descriptor sets that manages a collection of descriptor pools. - */ -class DescriptorPool { -public: - explicit DescriptorPool(const Instance& instance); - ~DescriptorPool(); - - std::vector Allocate(vk::DescriptorSetLayout layout, u32 num_sets); - - vk::DescriptorSet Allocate(vk::DescriptorSetLayout layout); - -private: - vk::UniqueDescriptorPool CreatePool(); - -private: - const Instance& instance; - std::vector pools; -}; - -/** - * Allocates and caches descriptor sets of a specific layout. - */ -class DescriptorSetProvider { -public: - explicit DescriptorSetProvider(const Instance& instance, DescriptorPool& pool, - std::span bindings); - ~DescriptorSetProvider(); - - vk::DescriptorSet Acquire(std::span data); - - void FreeWithImage(vk::ImageView image_view); - - [[nodiscard]] vk::DescriptorSetLayout Layout() const noexcept { - return *layout; - } - - [[nodiscard]] vk::DescriptorSetLayout& Layout() noexcept { - return layout.get(); - } - - [[nodiscard]] vk::DescriptorUpdateTemplate UpdateTemplate() const noexcept { - return *update_template; - } - -private: - DescriptorPool& pool; - vk::Device device; - vk::UniqueDescriptorSetLayout layout; - vk::UniqueDescriptorUpdateTemplate update_template; - std::vector free_sets; - tsl::robin_map descriptor_set_map; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp b/src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp new file mode 100644 index 000000000..f7c54f39e --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_update_queue.cpp @@ -0,0 +1,109 @@ +// Copyright 2024 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +DescriptorUpdateQueue::DescriptorUpdateQueue(const Instance& instance, u32 descriptor_write_max_) + : device{instance.GetDevice()}, descriptor_write_max{descriptor_write_max_} { + descriptor_infos = std::make_unique(descriptor_write_max); + descriptor_writes = std::make_unique(descriptor_write_max); +} + +void DescriptorUpdateQueue::Flush() { + if (descriptor_write_end == 0) { + return; + } + device.updateDescriptorSets({std::span(descriptor_writes.get(), descriptor_write_end)}, {}); + descriptor_write_end = 0; +} + +void DescriptorUpdateQueue::AddStorageImage(vk::DescriptorSet target, u8 binding, + vk::ImageView image_view, + vk::ImageLayout image_layout) { + if (descriptor_write_end >= descriptor_write_max) [[unlikely]] { + Flush(); + } + + auto& image_info = descriptor_infos[descriptor_write_end].image_info; + image_info.sampler = VK_NULL_HANDLE; + image_info.imageView = image_view; + image_info.imageLayout = image_layout; + + descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{ + .dstSet = target, + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageImage, + .pImageInfo = &image_info, + }; +} + +void DescriptorUpdateQueue::AddImageSampler(vk::DescriptorSet target, u8 binding, u8 array_index, + vk::ImageView image_view, vk::Sampler sampler, + vk::ImageLayout image_layout) { + if (descriptor_write_end >= descriptor_write_max) [[unlikely]] { + Flush(); + } + + auto& image_info = descriptor_infos[descriptor_write_end].image_info; + image_info.sampler = sampler; + image_info.imageView = image_view; + image_info.imageLayout = image_layout; + + descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{ + .dstSet = target, + .dstBinding = binding, + .dstArrayElement = array_index, + .descriptorCount = 1, + .descriptorType = + sampler ? vk::DescriptorType::eCombinedImageSampler : vk::DescriptorType::eSampledImage, + .pImageInfo = &image_info, + }; +} + +void DescriptorUpdateQueue::AddBuffer(vk::DescriptorSet target, u8 binding, vk::Buffer buffer, + vk::DeviceSize offset, vk::DeviceSize size, + vk::DescriptorType type) { + if (descriptor_write_end >= descriptor_write_max) [[unlikely]] { + Flush(); + } + + auto& buffer_info = descriptor_infos[descriptor_write_end].buffer_info; + buffer_info.buffer = buffer; + buffer_info.offset = offset; + buffer_info.range = size; + + descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{ + .dstSet = target, + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .pBufferInfo = &buffer_info, + }; +} + +void DescriptorUpdateQueue::AddTexelBuffer(vk::DescriptorSet target, u8 binding, + vk::BufferView buffer_view) { + if (descriptor_write_end >= descriptor_write_max) [[unlikely]] { + Flush(); + } + + auto& buffer_info = descriptor_infos[descriptor_write_end].buffer_view; + buffer_info = buffer_view; + descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{ + .dstSet = target, + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eUniformTexelBuffer, + .pTexelBufferView = &buffer_info, + }; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_update_queue.h b/src/video_core/renderer_vulkan/vk_descriptor_update_queue.h new file mode 100644 index 000000000..2f7fb42cd --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_update_queue.h @@ -0,0 +1,53 @@ +// Copyright 2024 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; + +struct DescriptorInfoUnion { + DescriptorInfoUnion() {} + + union { + vk::DescriptorImageInfo image_info; + vk::DescriptorBufferInfo buffer_info; + vk::BufferView buffer_view; + }; +}; + +class DescriptorUpdateQueue { +public: + explicit DescriptorUpdateQueue(const Instance& instance, u32 descriptor_write_max = 2048); + ~DescriptorUpdateQueue() = default; + + void Flush(); + + void AddStorageImage(vk::DescriptorSet target, u8 binding, vk::ImageView image_view, + vk::ImageLayout image_layout = vk::ImageLayout::eGeneral); + + void AddImageSampler(vk::DescriptorSet target, u8 binding, u8 array_index, + vk::ImageView image_view, vk::Sampler sampler, + vk::ImageLayout imageLayout = vk::ImageLayout::eGeneral); + + void AddBuffer(vk::DescriptorSet target, u8 binding, vk::Buffer buffer, vk::DeviceSize offset, + vk::DeviceSize size = VK_WHOLE_SIZE, + vk::DescriptorType type = vk::DescriptorType::eUniformBufferDynamic); + + void AddTexelBuffer(vk::DescriptorSet target, u8 binding, vk::BufferView buffer_view); + +private: + const vk::Device device; + const u32 descriptor_write_max; + std::unique_ptr descriptor_infos; + std::unique_ptr descriptor_writes; + u32 descriptor_write_end = 0; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index c0df5d681..363b2b567 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -126,7 +126,7 @@ struct AttachmentInfo { }; /** - * Information about a graphics/compute pipeline + * Information about a graphics pipeline */ struct PipelineInfo { BlendingState blending; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c10e74949..69e0cb7ba 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,6 +11,7 @@ #include "common/scope_exit.h" #include "common/settings.h" #include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_render_manager.h" @@ -62,34 +63,34 @@ constexpr std::array BUFFER_BINDINGS = {{ {5, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, }}; +template constexpr std::array TEXTURE_BINDINGS = {{ - {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, - {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, - {2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {0, vk::DescriptorType::eCombinedImageSampler, NumTex0, + vk::ShaderStageFlagBits::eFragment}, // tex0 + {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex1 + {2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex2 }}; -// TODO: Use descriptor array for shadow cube -constexpr std::array SHADOW_BINDINGS = {{ - {0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {1, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {3, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {4, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {5, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, - {6, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, +constexpr std::array UTILITY_BINDINGS = {{ + {0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, // shadow_buffer + {1, vk::DescriptorType::eCombinedImageSampler, 1, + vk::ShaderStageFlagBits::eFragment}, // tex_normal }}; PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, - RenderManager& render_manager_, DescriptorPool& pool_) - : instance{instance_}, scheduler{scheduler_}, render_manager{render_manager_}, pool{pool_}, - num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U)}, + RenderManager& render_manager_, DescriptorUpdateQueue& update_queue_) + : instance{instance_}, scheduler{scheduler_}, render_manager{render_manager_}, + update_queue{update_queue_}, + num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U) >> 1}, workers{num_worker_threads, "Pipeline workers"}, - descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS}, - DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS}, - DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}}, + descriptor_heaps{ + DescriptorHeap{instance, scheduler.GetMasterSemaphore(), BUFFER_BINDINGS, 32}, + DescriptorHeap{instance, scheduler.GetMasterSemaphore(), TEXTURE_BINDINGS<1>}, + DescriptorHeap{instance, scheduler.GetMasterSemaphore(), UTILITY_BINDINGS, 32}}, trivial_vertex_shader{ instance, vk::ShaderStageFlagBits::eVertex, GLSL::GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported(), true)} { + scheduler.RegisterOnDispatch([this] { update_queue.Flush(); }); profile = Pica::Shader::Profile{ .has_separable_shaders = true, .has_clip_planes = instance.IsShaderClipDistanceSupported(), @@ -106,13 +107,13 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, } void PipelineCache::BuildLayout() { - std::array descriptor_set_layouts; - std::transform(descriptor_set_providers.begin(), descriptor_set_providers.end(), - descriptor_set_layouts.begin(), - [](const auto& provider) { return provider.Layout(); }); + std::array descriptor_set_layouts; + descriptor_set_layouts[0] = descriptor_heaps[0].Layout(); + descriptor_set_layouts[1] = descriptor_heaps[1].Layout(); + descriptor_set_layouts[2] = descriptor_heaps[2].Layout(); const vk::PipelineLayoutCreateInfo layout_info = { - .setLayoutCount = NUM_RASTERIZER_SETS, + .setLayoutCount = NumRasterizerSets, .pSetLayouts = descriptor_set_layouts.data(), .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, @@ -214,55 +215,11 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { return false; } - u32 new_descriptors_start = 0; - std::span new_descriptors_span{}; - std::span new_offsets_span{}; - - // Ensure all the descriptor sets are set at least once at the beginning. - if (scheduler.IsStateDirty(StateFlags::DescriptorSets)) { - set_dirty.set(); - } - - if (set_dirty.any()) { - for (u32 i = 0; i < NUM_RASTERIZER_SETS; i++) { - if (!set_dirty.test(i)) { - continue; - } - bound_descriptor_sets[i] = descriptor_set_providers[i].Acquire(update_data[i]); - } - new_descriptors_span = bound_descriptor_sets; - - // Only send new offsets if the buffer descriptor-set changed. - if (set_dirty.test(0)) { - new_offsets_span = offsets; - } - - // Try to compact the number of updated descriptor-set slots to the ones that have actually - // changed - if (!set_dirty.all()) { - const u64 dirty_mask = set_dirty.to_ulong(); - new_descriptors_start = static_cast(std::countr_zero(dirty_mask)); - const u32 new_descriptors_end = 64u - static_cast(std::countl_zero(dirty_mask)); - const u32 new_descriptors_size = new_descriptors_end - new_descriptors_start; - - new_descriptors_span = - new_descriptors_span.subspan(new_descriptors_start, new_descriptors_size); - } - - set_dirty.reset(); - } - - boost::container::static_vector new_descriptors( - new_descriptors_span.begin(), new_descriptors_span.end()); - boost::container::static_vector new_offsets(new_offsets_span.begin(), - new_offsets_span.end()); - const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); const bool pipeline_dirty = (current_pipeline != pipeline) || is_dirty; scheduler.Record([this, is_dirty, pipeline_dirty, pipeline, current_dynamic = current_info.dynamic, dynamic = info.dynamic, - new_descriptors_start, descriptor_sets = std::move(new_descriptors), - offsets = std::move(new_offsets), + descriptor_sets = bound_descriptor_sets, offsets = offsets, current_rasterization = current_info.rasterization, current_depth_stencil = current_info.depth_stencil, rasterization = info.rasterization, @@ -364,10 +321,8 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); } - if (descriptor_sets.size()) { - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, - new_descriptors_start, descriptor_sets, offsets); - } + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, + descriptor_sets, offsets); }); current_info = info; @@ -385,7 +340,6 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::RegsInternal& regs, // We also don't need the geometry shader if we have the barycentric extension. const bool use_geometry_shader = instance.UseGeometryShaders() && !regs.lighting.disable && !instance.IsFragmentShaderBarycentricSupported(); - PicaVSConfig config{regs, setup, instance.IsShaderClipDistanceSupported(), use_geometry_shader}; for (u32 i = 0; i < layout.attribute_count; i++) { @@ -402,7 +356,7 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::RegsInternal& regs, } } - auto [it, new_config] = programmable_vertex_map.try_emplace(config); + const auto [it, new_config] = programmable_vertex_map.try_emplace(config); if (new_config) { auto program = GLSL::GenerateVertexShader(setup, config, true); if (program.empty()) { @@ -497,59 +451,6 @@ void PipelineCache::UseFragmentShader(const Pica::RegsInternal& regs, shader_hashes[ProgramType::FS] = fs_config.Hash(); } -void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) { - auto& info = update_data[1][binding].image_info; - if (info.imageView == image_view && info.sampler == sampler) { - return; - } - set_dirty[1] = true; - info = vk::DescriptorImageInfo{ - .sampler = sampler, - .imageView = image_view, - .imageLayout = vk::ImageLayout::eGeneral, - }; -} - -void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { - auto& info = update_data[2][binding].image_info; - if (info.imageView == image_view) { - return; - } - set_dirty[2] = true; - info = vk::DescriptorImageInfo{ - .imageView = image_view, - .imageLayout = vk::ImageLayout::eGeneral, - }; -} - -void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { - auto& info = update_data[0][binding].buffer_info; - if (info.buffer == buffer && info.offset == offset && info.range == size) { - return; - } - set_dirty[0] = true; - info = vk::DescriptorBufferInfo{ - .buffer = buffer, - .offset = offset, - .range = size, - }; -} - -void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) { - auto& view = update_data[0][binding].buffer_view; - if (view != buffer_view) { - set_dirty[0] = true; - view = buffer_view; - } -} - -void PipelineCache::SetBufferOffset(u32 binding, std::size_t offset) { - if (offsets[binding] != static_cast(offset)) { - offsets[binding] = static_cast(offset); - set_dirty[0] = true; - } -} - bool PipelineCache::IsCacheValid(std::span data) const { if (data.size() < sizeof(vk::PipelineCacheHeaderVersionOne)) { LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header"); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index f4f6e163d..ac8e949d9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -7,8 +7,8 @@ #include #include -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/shader/generator/pica_fs_config.h" #include "video_core/shader/generator/profile.h" #include "video_core/shader/generator/shader_gen.h" @@ -23,22 +23,38 @@ namespace Vulkan { class Instance; class Scheduler; class RenderManager; -class DescriptorPool; +class DescriptorUpdateQueue; -constexpr u32 NUM_RASTERIZER_SETS = 3; -constexpr u32 NUM_DYNAMIC_OFFSETS = 3; +enum class DescriptorHeapType : u32 { + Buffer, + Texture, + Utility, +}; /** * Stores a collection of rasterizer pipelines used during rendering. */ class PipelineCache { + static constexpr u32 NumRasterizerSets = 3; + static constexpr u32 NumDescriptorHeaps = 3; + static constexpr u32 NumDynamicOffsets = 3; + public: explicit PipelineCache(const Instance& instance, Scheduler& scheduler, - RenderManager& render_manager, DescriptorPool& pool); + RenderManager& render_manager, DescriptorUpdateQueue& update_queue); ~PipelineCache(); - [[nodiscard]] DescriptorSetProvider& TextureProvider() noexcept { - return descriptor_set_providers[1]; + /// Acquires and binds a free descriptor set from the appropriate heap. + vk::DescriptorSet Acquire(DescriptorHeapType type) { + const u32 index = static_cast(type); + const auto descriptor_set = descriptor_heaps[index].Commit(); + bound_descriptor_sets[index] = descriptor_set; + return descriptor_set; + } + + /// Sets the dynamic offset for the uniform buffer at binding + void UpdateRange(u8 binding, u32 offset) { + offsets[binding] = offset; } /// Loads the pipeline cache stored to disk @@ -66,21 +82,6 @@ public: /// Binds a fragment shader generated from PICA state void UseFragmentShader(const Pica::RegsInternal& regs, const Pica::Shader::UserConfig& user); - /// Binds a texture to the specified binding - void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler); - - /// Binds a storage image to the specified binding - void BindStorageImage(u32 binding, vk::ImageView image_view); - - /// Binds a buffer to the specified binding - void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size); - - /// Binds a buffer to the specified binding - void BindTexelBuffer(u32 binding, vk::BufferView buffer_view); - - /// Sets the dynamic offset for the uniform buffer at binding - void SetBufferOffset(u32 binding, std::size_t offset); - private: /// Builds the rasterizer pipeline layout void BuildLayout(); @@ -98,7 +99,7 @@ private: const Instance& instance; Scheduler& scheduler; RenderManager& render_manager; - DescriptorPool& pool; + DescriptorUpdateQueue& update_queue; Pica::Shader::Profile profile{}; vk::UniquePipelineCache pipeline_cache; @@ -110,11 +111,9 @@ private: tsl::robin_map, Common::IdentityHash> graphics_pipelines; - std::array descriptor_set_providers; - std::array update_data{}; - std::array bound_descriptor_sets{}; - std::array offsets{}; - std::bitset set_dirty{}; + std::array descriptor_heaps; + std::array bound_descriptor_sets{}; + std::array offsets{}; std::array shader_hashes; std::array current_shaders; diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 94ce93ec9..fa8f76199 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -32,8 +32,9 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* callback_data, void* user_data) { - switch (callback_data->messageIdNumber) { + switch (static_cast(callback_data->messageIdNumber)) { case 0x609a13b: // Vertex attribute at location not consumed by shader + case 0xc81ad50e: return VK_FALSE; default: break; diff --git a/src/video_core/renderer_vulkan/vk_present_window.cpp b/src/video_core/renderer_vulkan/vk_present_window.cpp index 5f096ad4b..62178c690 100644 --- a/src/video_core/renderer_vulkan/vk_present_window.cpp +++ b/src/video_core/renderer_vulkan/vk_present_window.cpp @@ -138,11 +138,11 @@ PresentWindow::PresentWindow(Frontend::EmuWindow& emu_window_, const Instance& i if (instance.HasDebuggingToolAttached()) { for (u32 i = 0; i < num_images; ++i) { - Vulkan::SetObjectName(device, swap_chain[i].cmdbuf, "Swapchain Command Buffer {}", i); - Vulkan::SetObjectName(device, swap_chain[i].render_ready, - "Swapchain Semaphore: render_ready {}", i); - Vulkan::SetObjectName(device, swap_chain[i].present_done, - "Swapchain Fence: present_done {}", i); + SetObjectName(device, swap_chain[i].cmdbuf, "Swapchain Command Buffer {}", i); + SetObjectName(device, swap_chain[i].render_ready, + "Swapchain Semaphore: render_ready {}", i); + SetObjectName(device, swap_chain[i].present_done, "Swapchain Fence: present_done {}", + i); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 95d33e222..3c5557658 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,13 +58,15 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& VideoCore::CustomTexManager& custom_tex_manager, VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window, const Instance& instance, - Scheduler& scheduler, DescriptorPool& pool, - RenderManager& render_manager, u32 image_count) + Scheduler& scheduler, RenderManager& render_manager, + DescriptorUpdateQueue& update_queue_, u32 image_count) : RasterizerAccelerated{memory, pica}, instance{instance}, scheduler{scheduler}, - render_manager{render_manager}, pipeline_cache{instance, scheduler, render_manager, - pool}, - runtime{instance, scheduler, render_manager, pool, pipeline_cache.TextureProvider(), - image_count}, + render_manager{render_manager}, update_queue{update_queue_}, + pipeline_cache{instance, scheduler, render_manager, update_queue}, runtime{instance, + scheduler, + render_manager, + update_queue, + image_count}, res_cache{memory, custom_tex_manager, runtime, regs, renderer}, stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE}, uniform_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformBuffer, @@ -80,9 +82,9 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& // Query uniform buffer alignment. uniform_buffer_alignment = instance.UniformMinAlignment(); uniform_size_aligned_vs_pica = - Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment); - uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); - uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment); + Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment); + uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment); // Define vertex layout for software shaders MakeSoftwareVertexLayout(); @@ -108,28 +110,32 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& .range = VK_WHOLE_SIZE, }); - scheduler.RegisterOnSubmit([&render_manager] { - render_manager.EndRendering(); - }); + scheduler.RegisterOnSubmit([&render_manager] { render_manager.EndRendering(); }); - // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize - // all descriptor sets even the ones we don't use. - pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData)); - pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(VSUniformData)); - pipeline_cache.BindBuffer(2, uniform_buffer.Handle(), 0, sizeof(FSUniformData)); - pipeline_cache.BindTexelBuffer(3, *texture_lf_view); - pipeline_cache.BindTexelBuffer(4, *texture_rg_view); - pipeline_cache.BindTexelBuffer(5, *texture_rgba_view); + // Prepare the static buffer descriptor set. + const auto buffer_set = pipeline_cache.Acquire(DescriptorHeapType::Buffer); + update_queue.AddBuffer(buffer_set, 0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData)); + update_queue.AddBuffer(buffer_set, 1, uniform_buffer.Handle(), 0, sizeof(VSUniformData)); + update_queue.AddBuffer(buffer_set, 2, uniform_buffer.Handle(), 0, sizeof(FSUniformData)); + update_queue.AddTexelBuffer(buffer_set, 3, *texture_lf_view); + update_queue.AddTexelBuffer(buffer_set, 4, *texture_rg_view); + update_queue.AddTexelBuffer(buffer_set, 5, *texture_rgba_view); + const auto texture_set = pipeline_cache.Acquire(DescriptorHeapType::Texture); Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); + + // Prepare texture and utility descriptor sets. for (u32 i = 0; i < 3; i++) { - pipeline_cache.BindTexture(i, null_surface.ImageView(), null_sampler.Handle()); + update_queue.AddImageSampler(texture_set, i, 0, null_surface.ImageView(), + null_sampler.Handle()); } - for (u32 i = 0; i < 7; i++) { - pipeline_cache.BindStorageImage(i, null_surface.StorageView()); - } + const auto utility_set = pipeline_cache.Acquire(DescriptorHeapType::Utility); + update_queue.AddStorageImage(utility_set, 0, null_surface.StorageView()); + update_queue.AddImageSampler(utility_set, 1, 0, null_surface.ImageView(), + null_sampler.Handle()); + update_queue.Flush(); SyncEntireState(); } @@ -482,13 +488,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { pipeline_info.attachments.color = framebuffer->Format(SurfaceType::Color); pipeline_info.attachments.depth = framebuffer->Format(SurfaceType::Depth); - if (shadow_rendering) { - pipeline_cache.BindStorageImage(6, framebuffer->ImageView(SurfaceType::Color)); - } else { - Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); - pipeline_cache.BindStorageImage(6, null_surface.StorageView()); - } - // Update scissor uniforms const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 || @@ -505,6 +504,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // Sync and bind the texture surfaces SyncTextureUnits(framebuffer); + SyncUtilityTextures(framebuffer); // Sync and bind the shader if (shader_dirty) { @@ -538,8 +538,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { } else { pipeline_cache.BindPipeline(pipeline_info, true); - const u64 vertex_size = vertex_batch.size() * sizeof(HardwareVertex); const u32 vertex_count = static_cast(vertex_batch.size()); + const u32 vertex_size = vertex_count * sizeof(HardwareVertex); const auto [buffer, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex)); std::memcpy(buffer, vertex_batch.data(), vertex_size); @@ -559,6 +559,11 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; const auto pica_textures = regs.texturing.GetTextures(); + const bool use_cube_heap = + pica_textures[0].enabled && pica_textures[0].config.type == TextureType::ShadowCube; + const auto texture_set = pipeline_cache.Acquire(use_cube_heap ? DescriptorHeapType::Texture + : DescriptorHeapType::Texture); + for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { const auto& texture = pica_textures[texture_index]; @@ -566,8 +571,8 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { if (!texture.enabled) { const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); - pipeline_cache.BindTexture(texture_index, null_surface.ImageView(), - null_sampler.Handle()); + update_queue.AddImageSampler(texture_set, texture_index, 0, null_surface.ImageView(), + null_sampler.Handle()); continue; } @@ -576,20 +581,21 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { switch (texture.config.type.Value()) { case TextureType::Shadow2D: { Surface& surface = res_cache.GetTextureSurface(texture); + Sampler& sampler = res_cache.GetSampler(texture.config); surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; - pipeline_cache.BindStorageImage(0, surface.StorageView()); + update_queue.AddImageSampler(texture_set, texture_index, 0, surface.StorageView(), + sampler.Handle()); continue; } case TextureType::ShadowCube: { - BindShadowCube(texture); + BindShadowCube(texture, texture_set); continue; } case TextureType::TextureCube: { - BindTextureCube(texture); + BindTextureCube(texture, texture_set); continue; } default: - UnbindSpecial(); break; } } @@ -597,13 +603,26 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { // Bind the texture provided by the rasterizer cache Surface& surface = res_cache.GetTextureSurface(texture); Sampler& sampler = res_cache.GetSampler(texture.config); - if (!IsFeedbackLoop(texture_index, framebuffer, surface, sampler)) { - pipeline_cache.BindTexture(texture_index, surface.ImageView(), sampler.Handle()); - } + const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color); + const bool is_feedback_loop = color_view == surface.ImageView(); + const vk::ImageView texture_view = + is_feedback_loop ? surface.CopyImageView() : surface.ImageView(); + update_queue.AddImageSampler(texture_set, texture_index, 0, texture_view, sampler.Handle()); } } -void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) { +void RasterizerVulkan::SyncUtilityTextures(const Framebuffer* framebuffer) { + const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); + if (!shadow_rendering) { + return; + } + + const auto utility_set = pipeline_cache.Acquire(DescriptorHeapType::Utility); + update_queue.AddStorageImage(utility_set, 0, framebuffer->ImageView(SurfaceType::Color)); +} + +void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture, + vk::DescriptorSet texture_set) { using CubeFace = Pica::TexturingRegs::CubeFace; auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); constexpr std::array faces = { @@ -611,6 +630,8 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf CubeFace::NegativeY, CubeFace::PositiveZ, CubeFace::NegativeZ, }; + Sampler& sampler = res_cache.GetSampler(texture.config); + for (CubeFace face : faces) { const u32 binding = static_cast(face); info.physical_address = regs.texturing.GetCubePhysicalAddress(face); @@ -618,11 +639,13 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf const VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info); Surface& surface = res_cache.GetSurface(surface_id); surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; - pipeline_cache.BindStorageImage(binding, surface.StorageView()); + update_queue.AddImageSampler(texture_set, 0, binding, surface.StorageView(), + sampler.Handle()); } } -void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) { +void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture, + vk::DescriptorSet texture_set) { using CubeFace = Pica::TexturingRegs::CubeFace; const VideoCore::TextureCubeConfig config = { .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), @@ -638,27 +661,7 @@ void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureCon Surface& surface = res_cache.GetTextureCube(config); Sampler& sampler = res_cache.GetSampler(texture.config); - pipeline_cache.BindTexture(0, surface.ImageView(), sampler.Handle()); -} - -bool RasterizerVulkan::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, - Surface& surface, Sampler& sampler) { - const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color); - const bool is_feedback_loop = color_view == surface.ImageView(); - if (!is_feedback_loop) { - return false; - } - - // Make a temporary copy of the framebuffer to sample from - pipeline_cache.BindTexture(texture_index, surface.CopyImageView(), sampler.Handle()); - return true; -} - -void RasterizerVulkan::UnbindSpecial() { - Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); - for (u32 i = 0; i < 6; i++) { - pipeline_cache.BindStorageImage(i, null_surface.StorageView()); - } + update_queue.AddImageSampler(texture_set, 0, 0, surface.ImageView(), sampler.Handle()); } void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) { @@ -1096,7 +1099,7 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { return; } - const u64 uniform_size = + const u32 uniform_size = uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs; auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size, uniform_buffer_alignment); @@ -1107,18 +1110,18 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data, sizeof(vs_uniform_block_data.data)); - pipeline_cache.SetBufferOffset(1, offset + used_bytes); + pipeline_cache.UpdateRange(1, offset + used_bytes); vs_uniform_block_data.dirty = false; - used_bytes += static_cast(uniform_size_aligned_vs); + used_bytes += uniform_size_aligned_vs; } if (sync_fs || invalidate) { std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data, sizeof(fs_uniform_block_data.data)); - pipeline_cache.SetBufferOffset(2, offset + used_bytes); + pipeline_cache.UpdateRange(2, offset + used_bytes); fs_uniform_block_data.dirty = false; - used_bytes += static_cast(uniform_size_aligned_fs); + used_bytes += uniform_size_aligned_fs; } if (sync_vs_pica) { @@ -1126,8 +1129,8 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup); std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); - pipeline_cache.SetBufferOffset(0, offset + used_bytes); - used_bytes += static_cast(uniform_size_aligned_vs_pica); + pipeline_cache.UpdateRange(0, offset + used_bytes); + used_bytes += uniform_size_aligned_vs_pica; } uniform_buffer.Commit(used_bytes); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7fb4780ec..0e65f9647 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -5,6 +5,7 @@ #pragma once #include "video_core/rasterizer_accelerated.h" +#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" @@ -32,15 +33,15 @@ struct ScreenInfo; class Instance; class Scheduler; class RenderManager; -class DescriptorPool; class RasterizerVulkan : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& pica, VideoCore::CustomTexManager& custom_tex_manager, VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window, - const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, - RenderManager& render_manager, u32 image_count); + const Instance& instance, Scheduler& scheduler, + RenderManager& render_manager, DescriptorUpdateQueue& update_queue, + u32 image_count); ~RasterizerVulkan() override; void TickFrame(); @@ -102,18 +103,16 @@ private: /// Syncs all enabled PICA texture units void SyncTextureUnits(const Framebuffer* framebuffer); + /// Syncs all utility textures in the fragment shader. + void SyncUtilityTextures(const Framebuffer* framebuffer); + /// Binds the PICA shadow cube required for shadow mapping - void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture); + void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture, + vk::DescriptorSet texture_set); /// Binds a texture cube to texture unit 0 - void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture); - - /// Makes a temporary copy of the framebuffer if a feedback loop is detected - bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface, - Sampler& sampler); - - /// Unbinds all special texture unit 0 texture configurations - void UnbindSpecial(); + void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture, + vk::DescriptorSet texture_set); /// Upload the uniform blocks to the uniform buffer object void UploadUniforms(bool accelerate_draw); @@ -146,6 +145,7 @@ private: const Instance& instance; Scheduler& scheduler; RenderManager& render_manager; + DescriptorUpdateQueue& update_queue; PipelineCache pipeline_cache; TextureRuntime runtime; RasterizerCache res_cache; @@ -164,10 +164,10 @@ private: vk::UniqueBufferView texture_lf_view; vk::UniqueBufferView texture_rg_view; vk::UniqueBufferView texture_rgba_view; - u64 uniform_buffer_alignment; - u64 uniform_size_aligned_vs_pica; - u64 uniform_size_aligned_vs; - u64 uniform_size_aligned_fs; + vk::DeviceSize uniform_buffer_alignment; + u32 uniform_size_aligned_vs_pica; + u32 uniform_size_aligned_vs; + u32 uniform_size_aligned_fs; bool async_shaders{false}; }; diff --git a/src/video_core/renderer_vulkan/vk_render_manager.cpp b/src/video_core/renderer_vulkan/vk_render_manager.cpp index 238bf0dc1..d934bcc5c 100644 --- a/src/video_core/renderer_vulkan/vk_render_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_render_manager.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright 2024 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -23,7 +23,7 @@ RenderManager::RenderManager(const Instance& instance, Scheduler& scheduler) RenderManager::~RenderManager() = default; void RenderManager::BeginRendering(const Framebuffer* framebuffer, - Common::Rectangle draw_rect) { + Common::Rectangle draw_rect) { const vk::Rect2D render_area = { .offset{ .x = static_cast(draw_rect.left), @@ -107,6 +107,9 @@ void RenderManager::EndRendering() { }; } cmdbuf.endRenderPass(); + if (num_barriers == 0) { + return; + } cmdbuf.pipelineBarrier(pipeline_flags, vk::PipelineStageFlagBits::eFragmentShader | vk::PipelineStageFlagBits::eTransfer, @@ -128,7 +131,7 @@ void RenderManager::EndRendering() { } vk::RenderPass RenderManager::GetRenderpass(VideoCore::PixelFormat color, - VideoCore::PixelFormat depth, bool is_clear) { + VideoCore::PixelFormat depth, bool is_clear) { std::scoped_lock lock{cache_mutex}; const u32 color_index = @@ -153,7 +156,7 @@ vk::RenderPass RenderManager::GetRenderpass(VideoCore::PixelFormat color, } vk::UniqueRenderPass RenderManager::CreateRenderPass(vk::Format color, vk::Format depth, - vk::AttachmentLoadOp load_op) const { + vk::AttachmentLoadOp load_op) const { u32 attachment_count = 0; std::array attachments; diff --git a/src/video_core/renderer_vulkan/vk_render_manager.h b/src/video_core/renderer_vulkan/vk_render_manager.h index 64e71b580..b8d83595e 100644 --- a/src/video_core/renderer_vulkan/vk_render_manager.h +++ b/src/video_core/renderer_vulkan/vk_render_manager.h @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright 2024 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -27,7 +27,10 @@ struct RenderPass { u32 do_clear; bool operator==(const RenderPass& other) const noexcept { - return std::memcmp(this, &other, sizeof(RenderPass)) == 0; + return std::tie(framebuffer, render_pass, render_area, do_clear) == + std::tie(other.framebuffer, other.render_pass, other.render_area, + other.do_clear) && + std::memcmp(&clear, &other.clear, sizeof(vk::ClearValue)) == 0; } }; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index 84df8afe7..0021167e4 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -4,6 +4,7 @@ #include #include +#include #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" @@ -14,9 +15,7 @@ ResourcePool::ResourcePool(MasterSemaphore* master_semaphore_, std::size_t grow_ : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} std::size_t ResourcePool::CommitResource() { - // Refresh semaphore to query updated results - master_semaphore->Refresh(); - const u64 gpu_tick = master_semaphore->KnownGpuTick(); + u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](std::size_t begin, std::size_t end) -> std::optional { for (std::size_t iterator = begin; iterator < end; ++iterator) { @@ -29,7 +28,13 @@ std::size_t ResourcePool::CommitResource() { }; // Try to find a free resource from the hinted position to the end. - std::optional found = search(hint_iterator, ticks.size()); + auto found = search(hint_iterator, ticks.size()); + if (!found) { + // Refresh semaphore to query updated results + master_semaphore->Refresh(); + gpu_tick = master_semaphore->KnownGpuTick(); + found = search(hint_iterator, ticks.size()); + } if (!found) { // Search from beginning to the hinted position. found = search(0, hint_iterator); @@ -48,75 +53,137 @@ std::size_t ResourcePool::CommitResource() { } std::size_t ResourcePool::ManageOverflow() { - const std::size_t old_capacity = ticks.size(); - Grow(); - - // The last entry is guaranted to be free, since it's the first element of the freshly - // allocated resources. - return old_capacity; -} - -void ResourcePool::Grow() { const std::size_t old_capacity = ticks.size(); ticks.resize(old_capacity + grow_step); Allocate(old_capacity, old_capacity + grow_step); + return old_capacity; } constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 4; -struct CommandPool::Pool { - vk::CommandPool handle; - std::array cmdbufs; -}; - CommandPool::CommandPool(const Instance& instance, MasterSemaphore* master_semaphore) - : ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {} - -CommandPool::~CommandPool() { - vk::Device device = instance.GetDevice(); - for (Pool& pool : pools) { - device.destroyCommandPool(pool.handle); - } -} - -void CommandPool::Allocate(std::size_t begin, std::size_t end) { - // Command buffers are going to be commited, recorded, executed every single usage cycle. - // They are also going to be reseted when commited. - Pool& pool = pools.emplace_back(); - + : ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} { const vk::CommandPoolCreateInfo pool_create_info = { .flags = vk::CommandPoolCreateFlagBits::eTransient | vk::CommandPoolCreateFlagBits::eResetCommandBuffer, .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), }; + const vk::Device device = instance.GetDevice(); + cmd_pool = device.createCommandPoolUnique(pool_create_info); + if (instance.HasDebuggingToolAttached()) { + SetObjectName(device, *cmd_pool, "CommandPool"); + } +} - vk::Device device = instance.GetDevice(); - pool.handle = device.createCommandPool(pool_create_info); +CommandPool::~CommandPool() = default; + +void CommandPool::Allocate(std::size_t begin, std::size_t end) { + cmd_buffers.resize(end); const vk::CommandBufferAllocateInfo buffer_alloc_info = { - .commandPool = pool.handle, + .commandPool = *cmd_pool, .level = vk::CommandBufferLevel::ePrimary, .commandBufferCount = COMMAND_BUFFER_POOL_SIZE, }; - auto buffers = device.allocateCommandBuffers(buffer_alloc_info); - std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin()); + const vk::Device device = instance.GetDevice(); + const auto result = + device.allocateCommandBuffers(&buffer_alloc_info, cmd_buffers.data() + begin); + ASSERT(result == vk::Result::eSuccess); if (instance.HasDebuggingToolAttached()) { - Vulkan::SetObjectName(device, pool.handle, "CommandPool: Pool({})", - COMMAND_BUFFER_POOL_SIZE); - - for (u32 i = 0; i < pool.cmdbufs.size(); ++i) { - Vulkan::SetObjectName(device, pool.cmdbufs[i], "CommandPool: Command Buffer {}", i); + for (std::size_t i = begin; i < end; ++i) { + SetObjectName(device, cmd_buffers[i], "CommandPool: Command Buffer {}", i); } } } vk::CommandBuffer CommandPool::Commit() { const std::size_t index = CommitResource(); - const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; - const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; - return pools[pool_index].cmdbufs[sub_index]; + return cmd_buffers[index]; +} + +constexpr u32 DESCRIPTOR_SET_BATCH = 32; + +DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore, + std::span bindings, + u32 descriptor_heap_count_) + : ResourcePool{master_semaphore, DESCRIPTOR_SET_BATCH}, device{instance.GetDevice()}, + descriptor_heap_count{descriptor_heap_count_} { + // Create descriptor set layout. + const vk::DescriptorSetLayoutCreateInfo layout_ci = { + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + descriptor_set_layout = device.createDescriptorSetLayoutUnique(layout_ci); + if (instance.HasDebuggingToolAttached()) { + SetObjectName(device, *descriptor_set_layout, "DescriptorSetLayout"); + } + + // Build descriptor set pool counts. + std::unordered_map descriptor_type_counts; + for (const auto& binding : bindings) { + descriptor_type_counts[binding.descriptorType] += binding.descriptorCount; + } + for (const auto& [type, count] : descriptor_type_counts) { + auto& pool_size = pool_sizes.emplace_back(); + pool_size.descriptorCount = count * descriptor_heap_count; + pool_size.type = type; + } + + // Create descriptor pool + AppendDescriptorPool(); +} + +DescriptorHeap::~DescriptorHeap() = default; + +void DescriptorHeap::Allocate(std::size_t begin, std::size_t end) { + ASSERT(end - begin == DESCRIPTOR_SET_BATCH); + descriptor_sets.resize(end); + + std::array layouts; + layouts.fill(*descriptor_set_layout); + + u32 current_pool = 0; + vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = *pools[current_pool], + .descriptorSetCount = DESCRIPTOR_SET_BATCH, + .pSetLayouts = layouts.data(), + }; + + // Attempt to allocate the descriptor set batch. If the pool has run out of space, use a new + // one. + while (true) { + const auto result = + device.allocateDescriptorSets(&alloc_info, descriptor_sets.data() + begin); + if (result == vk::Result::eSuccess) { + break; + } + if (result == vk::Result::eErrorOutOfPoolMemory) { + current_pool++; + if (current_pool == pools.size()) { + LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!"); + AppendDescriptorPool(); + } + alloc_info.descriptorPool = *pools[current_pool]; + } + } +} + +vk::DescriptorSet DescriptorHeap::Commit() { + const std::size_t index = CommitResource(); + return descriptor_sets[index]; +} + +void DescriptorHeap::AppendDescriptorPool() { + const vk::DescriptorPoolCreateInfo pool_info = { + .flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, + .maxSets = descriptor_heap_count, + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; + auto& pool = pools.emplace_back(); + pool = device.createDescriptorPoolUnique(pool_info); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 81fc549e7..568b08d7a 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -39,9 +39,6 @@ private: /// Manages pool overflow allocating new resources. std::size_t ManageOverflow(); - /// Allocates a new page of resources. - void Grow(); - protected: MasterSemaphore* master_semaphore{nullptr}; std::size_t grow_step = 0; ///< Number of new resources created after an overflow @@ -59,9 +56,36 @@ public: vk::CommandBuffer Commit(); private: - struct Pool; const Instance& instance; - std::vector pools; + vk::UniqueCommandPool cmd_pool; + std::vector cmd_buffers; +}; + +class DescriptorHeap final : public ResourcePool { +public: + explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore, + std::span bindings, + u32 descriptor_heap_count = 1024); + ~DescriptorHeap() override; + + const vk::DescriptorSetLayout& Layout() const { + return *descriptor_set_layout; + } + + void Allocate(std::size_t begin, std::size_t end) override; + + vk::DescriptorSet Commit(); + +private: + void AppendDescriptorPool(); + +private: + vk::Device device; + vk::UniqueDescriptorSetLayout descriptor_set_layout; + u32 descriptor_heap_count; + std::vector pool_sizes; + std::vector pools; + std::vector descriptor_sets; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index e6285d6cd..0099b0ca3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -5,10 +5,8 @@ #include #include #include "common/microprofile.h" -#include "common/settings.h" #include "common/thread.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); @@ -98,7 +96,7 @@ void Scheduler::DispatchWork() { return; } - //on_dispatch(); + on_dispatch(); { std::scoped_lock ql{queue_mutex}; @@ -183,6 +181,8 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa master_semaphore->SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value); }); + master_semaphore->Refresh(); + if (!use_worker_thread) { AllocateWorkerCommandBuffers(); } else { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 20d55a4b9..96fd3bb32 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -4,9 +4,9 @@ #pragma once +#include #include #include -#include #include "common/alignment.h" #include "common/common_funcs.h" #include "common/polyfill_thread.h" diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index ba5c5f867..608ed3788 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -182,6 +182,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v includer)) [[unlikely]] { LOG_INFO(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog()); + LOG_INFO(Render_Vulkan, "Shader Source:\n{}", code); return {}; } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 3ca77b1f8..2a5bf7b0a 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -82,7 +82,7 @@ StreamBuffer::~StreamBuffer() { device.freeMemory(memory); } -std::tuple StreamBuffer::Map(u64 size, u64 alignment) { +std::tuple StreamBuffer::Map(u32 size, u64 alignment) { if (!is_coherent && type == BufferType::Stream) { size = Common::AlignUp(size, instance.NonCoherentAtomSize()); } @@ -114,7 +114,7 @@ std::tuple StreamBuffer::Map(u64 size, u64 alignment) { return std::make_tuple(mapped + offset, offset, invalidate); } -void StreamBuffer::Commit(u64 size) { +void StreamBuffer::Commit(u32 size) { if (!is_coherent && type == BufferType::Stream) { size = Common::AlignUp(size, instance.NonCoherentAtomSize()); } @@ -200,11 +200,10 @@ void StreamBuffer::CreateBuffers(u64 prefered_size) { mapped = reinterpret_cast(device.mapMemory(memory, 0, VK_WHOLE_SIZE)); if (instance.HasDebuggingToolAttached()) { - Vulkan::SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type), - stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags)); - Vulkan::SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}", - BufferTypeName(type), stream_buffer_size / 1024, - vk::to_string(mem_type.propertyFlags)); + SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type), + stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags)); + SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}", BufferTypeName(type), + stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags)); } } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 2b14c78a7..01747a391 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,10 +35,10 @@ public: * @param size Size to reserve. * @returns A pair of a raw memory pointer (with offset added), and the buffer offset */ - std::tuple Map(u64 size, u64 alignment); + std::tuple Map(u32 size, u64 alignment); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. - void Commit(u64 size); + void Commit(u32 size); vk::Buffer Handle() const noexcept { return buffer; @@ -70,8 +70,8 @@ private: vk::BufferUsageFlags usage{}; BufferType type; - u64 offset{}; ///< Buffer iterator. - u64 mapped_size{}; ///< Size reserved for the current copy. + u32 offset{}; ///< Buffer iterator. + u32 mapped_size{}; ///< Size reserved for the current copy. bool is_coherent{}; ///< True if the buffer is coherent std::vector current_watches; ///< Watches recorded in the current iteration. diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index 4cc819e02..2d8597733 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -12,7 +12,6 @@ #include "video_core/rasterizer_cache/texture_codec.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_vulkan/pica_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_render_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -249,10 +248,10 @@ constexpr u64 DOWNLOAD_BUFFER_SIZE = 16_MiB; } // Anonymous namespace TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, - RenderManager& render_manager, DescriptorPool& pool, - DescriptorSetProvider& texture_provider_, u32 num_swapchain_images_) + RenderManager& render_manager, DescriptorUpdateQueue& update_queue, + u32 num_swapchain_images_) : instance{instance}, scheduler{scheduler}, render_manager{render_manager}, - texture_provider{texture_provider_}, blit_helper{instance, scheduler, pool, render_manager}, + blit_helper{instance, scheduler, render_manager, update_queue}, upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE, BufferType::Upload}, download_buffer{instance, scheduler, @@ -268,7 +267,7 @@ VideoCore::StagingData TextureRuntime::FindStaging(u32 size, bool upload) { const auto [data, offset, invalidate] = buffer.Map(size, 16); return VideoCore::StagingData{ .size = size, - .offset = static_cast(offset), + .offset = offset, .mapped = std::span{data, size}, }; } @@ -453,7 +452,7 @@ void TextureRuntime::ClearTextureWithRenderpass(Surface& surface, } bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, - const VideoCore::TextureCopy& copy) { + std::span copies) { render_manager.EndRendering(); const RecordParams params = { @@ -466,8 +465,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .dst_image = dest.Image(), }; - scheduler.Record([params, copy](vk::CommandBuffer cmdbuf) { - const vk::ImageCopy image_copy = { + boost::container::small_vector vk_copies; + std::ranges::transform(copies, std::back_inserter(vk_copies), [&](const auto& copy) { + return vk::ImageCopy{ .srcSubresource{ .aspectMask = params.aspect, .mipLevel = copy.src_level, @@ -486,7 +486,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, 0}, .extent = {copy.extent.width, copy.extent.height, 1}, }; + }); + scheduler.Record([params, copies = std::move(vk_copies)](vk::CommandBuffer cmdbuf) { const bool self_copy = params.src_image == params.dst_image; const vk::ImageLayout new_src_layout = self_copy ? vk::ImageLayout::eGeneral : vk::ImageLayout::eTransferSrcOptimal; @@ -502,7 +504,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = params.src_image, - .subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level), + .subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS), }, vk::ImageMemoryBarrier{ .srcAccessMask = params.dst_access, @@ -512,7 +514,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = params.dst_image, - .subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level), + .subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS), }, }; const std::array post_barriers = { @@ -524,7 +526,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = params.src_image, - .subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level), + .subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS), }, vk::ImageMemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eTransferWrite, @@ -534,7 +536,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = params.dst_image, - .subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level), + .subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS), }, }; @@ -542,7 +544,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); cmdbuf.copyImage(params.src_image, new_src_layout, params.dst_image, new_dst_layout, - image_copy); + copies); cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); @@ -694,13 +696,6 @@ bool TextureRuntime::NeedsConversion(VideoCore::PixelFormat format) const { traits.aspect != (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil); } -void TextureRuntime::FreeDescriptorSetsWithImage(vk::ImageView image_view) { - texture_provider.FreeWithImage(image_view); - blit_helper.compute_provider.FreeWithImage(image_view); - blit_helper.compute_buffer_provider.FreeWithImage(image_view); - blit_helper.two_textures_provider.FreeWithImage(image_view); -} - Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params) : SurfaceBase{params}, runtime{&runtime_}, instance{&runtime_.GetInstance()}, scheduler{&runtime_.GetScheduler()}, traits{instance->GetTraits(pixel_format)} { @@ -798,9 +793,6 @@ Surface::~Surface() { return; } for (const auto& [alloc, image, image_view] : handles) { - if (image_view) { - runtime->FreeDescriptorSetsWithImage(*image_view); - } if (image) { vmaDestroyImage(instance->GetAllocator(), image, alloc); } @@ -902,7 +894,7 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) { const Common::Rectangle rect{0U, height, width, 0U}; const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) { - const u64 custom_size = texture->data.size(); + const u32 custom_size = static_cast(texture->data.size()); const RecordParams params = { .aspect = vk::ImageAspectFlagBits::eColor, .pipeline_flags = PipelineStageFlags(), @@ -1515,7 +1507,7 @@ Sampler::Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params instance.IsCustomBorderColorSupported() && (params.wrap_s == TextureConfig::ClampToBorder || params.wrap_t == TextureConfig::ClampToBorder); - const Common::Vec4f color = PicaToVK::ColorRGBA8(params.border_color); + const auto color = PicaToVK::ColorRGBA8(params.border_color); const vk::SamplerCustomBorderColorCreateInfoEXT border_color_info = { .customBorderColor = MakeClearColorValue(color), .format = vk::Format::eUndefined, diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index fa7c188ab..f7a6bcdbd 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -23,9 +23,8 @@ namespace Vulkan { class Instance; class RenderManager; -class DescriptorPool; -class DescriptorSetProvider; class Surface; +class DescriptorUpdateQueue; struct Handle { VmaAllocation alloc; @@ -42,8 +41,8 @@ class TextureRuntime { public: explicit TextureRuntime(const Instance& instance, Scheduler& scheduler, - RenderManager& render_manager, DescriptorPool& pool, - DescriptorSetProvider& texture_provider, u32 num_swapchain_images); + RenderManager& render_manager, DescriptorUpdateQueue& update_queue, + u32 num_swapchain_images); ~TextureRuntime(); const Instance& GetInstance() const { @@ -74,7 +73,12 @@ public: bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); /// Copies a rectangle of src_tex to another rectange of dst_rect - bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + bool CopyTextures(Surface& source, Surface& dest, + std::span copies); + + bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { + return CopyTextures(source, dest, std::array{copy}); + } /// Blits a rectangle of src_tex to another rectange of dst_rect bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit); @@ -85,9 +89,6 @@ public: /// Returns true if the provided pixel format needs convertion bool NeedsConversion(VideoCore::PixelFormat format) const; - /// Removes any descriptor sets that contain the provided image view. - void FreeDescriptorSetsWithImage(vk::ImageView image_view); - private: /// Clears a partial texture rect using a clear rectangle void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear); @@ -96,7 +97,6 @@ private: const Instance& instance; Scheduler& scheduler; RenderManager& render_manager; - DescriptorSetProvider& texture_provider; BlitHelper blit_helper; StreamBuffer upload_buffer; StreamBuffer download_buffer; diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp index 852f6eed8..96be02d05 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp @@ -106,7 +106,11 @@ FragmentModule::FragmentModule(const FSConfig& config_, const Profile& profile_) out.reserve(RESERVE_SIZE); DefineExtensions(); DefineInterface(); - DefineBindings(); + if (profile.is_vulkan) { + DefineBindingsVK(); + } else { + DefineBindingsGL(); + } DefineHelpers(); DefineShadowHelpers(); DefineLightingHelpers(); @@ -1272,7 +1276,43 @@ void FragmentModule::DefineInterface() { out += "layout (location = 0) out vec4 color;\n\n"; } -void FragmentModule::DefineBindings() { +void FragmentModule::DefineBindingsVK() { + // Uniform and texture buffers + out += FSUniformBlockDef; + out += "layout(set = 0, binding = 3) uniform samplerBuffer texture_buffer_lut_lf;\n"; + out += "layout(set = 0, binding = 4) uniform samplerBuffer texture_buffer_lut_rg;\n"; + out += "layout(set = 0, binding = 5) uniform samplerBuffer texture_buffer_lut_rgba;\n\n"; + + // Texture samplers + const auto texture_type = config.texture.texture0_type.Value(); + const auto sampler_tex0 = [&] { + switch (texture_type) { + case TextureType::Shadow2D: + case TextureType::ShadowCube: + return "usampler2D"; + case TextureType::TextureCube: + return "samplerCube"; + default: + return "sampler2D"; + } + }(); + for (u32 i = 0; i < 3; i++) { + const auto sampler = i == 0 ? sampler_tex0 : "sampler2D"; + const auto num_descriptors = i == 0 && texture_type == TextureType::ShadowCube ? "[6]" : ""; + out += fmt::format("layout(set = 1, binding = {0}) uniform {1} tex{0}{2};\n", i, sampler, + num_descriptors); + } + + // Utility textures + if (config.framebuffer.shadow_rendering) { + out += "layout(set = 2, binding = 0, r32ui) uniform uimage2D shadow_buffer;\n\n"; + } + if (config.user.use_custom_normal) { + out += "layout(set = 2, binding = 1) uniform sampler2D tex_normal;\n"; + } +} + +void FragmentModule::DefineBindingsGL() { // Uniform and texture buffers out += FSUniformBlockDef; out += "layout(binding = 3) uniform samplerBuffer texture_buffer_lut_lf;\n"; @@ -1280,33 +1320,32 @@ void FragmentModule::DefineBindings() { out += "layout(binding = 5) uniform samplerBuffer texture_buffer_lut_rgba;\n\n"; // Texture samplers - const auto texunit_set = profile.is_vulkan ? "set = 1, " : ""; const auto texture_type = config.texture.texture0_type.Value(); for (u32 i = 0; i < 3; i++) { const auto sampler = i == 0 && texture_type == TextureType::TextureCube ? "samplerCube" : "sampler2D"; - out += - fmt::format("layout({0}binding = {1}) uniform {2} tex{1};\n", texunit_set, i, sampler); + out += fmt::format("layout(binding = {0}) uniform {1} tex{0};\n", i, sampler); } - if (config.user.use_custom_normal && !profile.is_vulkan) { + // Utility textures + if (config.user.use_custom_normal) { out += "layout(binding = 6) uniform sampler2D tex_normal;\n"; } - if (use_blend_fallback && !profile.is_vulkan) { + if (use_blend_fallback) { out += "layout(location = 7) uniform sampler2D tex_color;\n"; } - // Storage images - static constexpr std::array postfixes = {"px", "nx", "py", "ny", "pz", "nz"}; - const auto shadow_set = profile.is_vulkan ? "set = 2, " : ""; - for (u32 i = 0; i < postfixes.size(); i++) { - out += fmt::format( - "layout({}binding = {}, r32ui) uniform readonly uimage2D shadow_texture_{};\n", - shadow_set, i, postfixes[i]); + // Shadow textures + if (texture_type == TextureType::Shadow2D || texture_type == TextureType::ShadowCube) { + static constexpr std::array postfixes = {"px", "nx", "py", "ny", "pz", "nz"}; + for (u32 i = 0; i < postfixes.size(); i++) { + out += fmt::format( + "layout(binding = {}, r32ui) uniform readonly uimage2D shadow_texture_{};\n", i, + postfixes[i]); + } } if (config.framebuffer.shadow_rendering) { - out += fmt::format("layout({}binding = 6, r32ui) uniform uimage2D shadow_buffer;\n\n", - shadow_set); + out += "layout(binding = 6, r32ui) uniform uimage2D shadow_buffer;\n\n"; } } @@ -1414,19 +1453,48 @@ float mix2(vec4 s, vec2 a) { )"; if (config.texture.texture0_type == TexturingRegs::TextureConfig::Shadow2D) { - out += R"( + if (profile.is_vulkan) { + out += R"( float SampleShadow2D(ivec2 uv, uint z) { - if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) + if (any(bvec4(lessThan(uv, ivec2(0)), greaterThanEqual(uv, textureSize(tex0, 0))))) + return 1.0; + return CompareShadow(texelFetch(tex0, uv, 0).x, z); +} + +vec4 shadowTexture(vec2 uv, float w) { +)"; + if (!config.texture.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += R"( + uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); + vec2 coord = vec2(textureSize(tex0, 0)) * uv - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i = ivec2(coord_floor); + vec4 s = vec4( + SampleShadow2D(i , z), + SampleShadow2D(i + ivec2(1, 0), z), + SampleShadow2D(i + ivec2(0, 1), z), + SampleShadow2D(i + ivec2(1, 1), z)); + return vec4(mix2(s, f)); +} +)"; + + } else { + out += R"( +float SampleShadow2D(ivec2 uv, uint z) { + if (any(bvec4(lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px))))) return 1.0; return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); } vec4 shadowTexture(vec2 uv, float w) { )"; - if (!config.texture.shadow_texture_orthographic) { - out += "uv /= w;"; - } - out += R"( + if (!config.texture.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += R"( uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); vec2 coord_floor = floor(coord); @@ -1440,8 +1508,75 @@ vec4 shadowTexture(vec2 uv, float w) { return vec4(mix2(s, f)); } )"; + } } else if (config.texture.texture0_type == TexturingRegs::TextureConfig::ShadowCube) { - out += R"( + if (profile.is_vulkan) { + out += R"( +uvec4 SampleShadowCube(int face, ivec2 i00, ivec2 i10, ivec2 i01, ivec2 i11) { + return uvec4( + texelFetch(tex0[face], i00, 0).r, + texelFetch(tex0[face], i10, 0).r, + texelFetch(tex0[face], i01, 0).r, + texelFetch(tex0[face], i11, 0).r); +} + +vec4 shadowTextureCube(vec2 uv, float w) { + ivec2 size = textureSize(tex0[0], 0); + vec3 c = vec3(uv, w); + vec3 a = abs(c); + if (a.x > a.y && a.x > a.z) { + w = a.x; + uv = -c.zy; + if (c.x < 0.0) uv.x = -uv.x; + } else if (a.y > a.z) { + w = a.y; + uv = c.xz; + if (c.y < 0.0) uv.y = -uv.y; + } else { + w = a.z; + uv = -c.xy; + if (c.z > 0.0) uv.x = -uv.x; + } + uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); + vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i00 = ivec2(coord_floor); + ivec2 i10 = i00 + ivec2(1, 0); + ivec2 i01 = i00 + ivec2(0, 1); + ivec2 i11 = i00 + ivec2(1, 1); + ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); + i00 = clamp(i00, cmin, cmax); + i10 = clamp(i10, cmin, cmax); + i01 = clamp(i01, cmin, cmax); + i11 = clamp(i11, cmin, cmax); + uvec4 pixels; + if (a.x > a.y && a.x > a.z) { + if (c.x > 0.0) + pixels = SampleShadowCube(0, i00, i10, i01, i11); + else + pixels = SampleShadowCube(1, i00, i10, i01, i11); + } else if (a.y > a.z) { + if (c.y > 0.0) + pixels = SampleShadowCube(2, i00, i10, i01, i11); + else + pixels = SampleShadowCube(3, i00, i10, i01, i11); + } else { + if (c.z > 0.0) + pixels = SampleShadowCube(4, i00, i10, i01, i11); + else + pixels = SampleShadowCube(5, i00, i10, i01, i11); + } + vec4 s = vec4( + CompareShadow(pixels.x, z), + CompareShadow(pixels.y, z), + CompareShadow(pixels.z, z), + CompareShadow(pixels.w, z)); + return vec4(mix2(s, f)); +} + )"; + } else { + out += R"( vec4 shadowTextureCube(vec2 uv, float w) { ivec2 size = imageSize(shadow_texture_px); vec3 c = vec3(uv, w); @@ -1523,6 +1658,7 @@ vec4 shadowTextureCube(vec2 uv, float w) { return vec4(mix2(s, f)); } )"; + } } } } diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.h b/src/video_core/shader/generator/glsl_fs_shader_gen.h index a7eff4c44..10dab5b26 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.h +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.h @@ -74,7 +74,8 @@ private: void DefineExtensions(); void DefineInterface(); - void DefineBindings(); + void DefineBindingsVK(); + void DefineBindingsGL(); void DefineHelpers(); void DefineLightingHelpers(); void DefineShadowHelpers();