From 427dd45151eddc920d681c4711a3cdbd7e363f82 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 17 Mar 2019 15:59:22 +1000 Subject: [PATCH] Vulkan: Simplify command buffer fence tracking --- .../Vulkan/CommandBufferManager.cpp | 122 ++++++++---------- .../Vulkan/CommandBufferManager.h | 34 ++--- .../Core/VideoBackends/Vulkan/PerfQuery.cpp | 39 +++--- Source/Core/VideoBackends/Vulkan/PerfQuery.h | 6 +- Source/Core/VideoBackends/Vulkan/Renderer.cpp | 12 +- .../VideoBackends/Vulkan/StreamBuffer.cpp | 58 +++++---- .../Core/VideoBackends/Vulkan/StreamBuffer.h | 4 +- .../Core/VideoBackends/Vulkan/VKTexture.cpp | 59 ++------- Source/Core/VideoBackends/Vulkan/VKTexture.h | 2 +- .../VideoBackends/Vulkan/VertexManager.cpp | 6 +- Source/Core/VideoBackends/Vulkan/main.cpp | 4 +- 11 files changed, 138 insertions(+), 208 deletions(-) diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp index 1f2fcd01c7..f00f6001cb 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp @@ -54,7 +54,6 @@ bool CommandBufferManager::CreateCommandBuffers() { resources.init_command_buffer_used = false; resources.semaphore_used = false; - resources.needs_fence_wait = false; VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, g_vulkan_context->GetGraphicsQueueFamilyIndex()}; @@ -211,43 +210,61 @@ void CommandBufferManager::WaitForWorkerThreadIdle() m_submit_semaphore.Post(); } -void CommandBufferManager::WaitForGPUIdle() +void CommandBufferManager::WaitForFenceCounter(u64 fence_counter) { - WaitForWorkerThreadIdle(); - vkDeviceWaitIdle(g_vulkan_context->GetDevice()); -} - -void CommandBufferManager::WaitForFence(VkFence fence) -{ - // Find the command buffer that this fence corresponds to. - u32 command_buffer_index = 0; - for (; command_buffer_index < static_cast(m_frame_resources.size()); command_buffer_index++) - { - if (m_frame_resources[command_buffer_index].fence == fence) - break; - } - ASSERT(command_buffer_index < m_frame_resources.size()); - - // Has this command buffer already been waited for? - if (!m_frame_resources[command_buffer_index].needs_fence_wait) + if (m_completed_fence_counter >= fence_counter) return; + // Find the first command buffer which covers this counter value. + u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; + while (index != m_current_frame) + { + if (m_frame_resources[index].fence_counter >= fence_counter) + break; + + index = (index + 1) % NUM_COMMAND_BUFFERS; + } + + ASSERT(index != m_current_frame); + WaitForCommandBufferCompletion(index); +} + +void CommandBufferManager::WaitForCommandBufferCompletion(u32 index) +{ // Ensure this command buffer has been submitted. WaitForWorkerThreadIdle(); // Wait for this command buffer to be completed. - VkResult res = - vkWaitForFences(g_vulkan_context->GetDevice(), 1, - &m_frame_resources[command_buffer_index].fence, VK_TRUE, UINT64_MAX); + VkResult res = vkWaitForFences(g_vulkan_context->GetDevice(), 1, &m_frame_resources[index].fence, + VK_TRUE, UINT64_MAX); if (res != VK_SUCCESS) LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); - // Immediately fire callbacks and cleanups, since the commands has been completed. - m_frame_resources[command_buffer_index].needs_fence_wait = false; - OnCommandBufferExecuted(command_buffer_index); + // Clean up any resources for command buffers between the last known completed buffer and this + // now-completed command buffer. If we use >2 buffers, this may be more than one buffer. + const u64 now_completed_counter = m_frame_resources[index].fence_counter; + u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; + while (cleanup_index != m_current_frame) + { + FrameResources& resources = m_frame_resources[cleanup_index]; + if (resources.fence_counter > now_completed_counter) + break; + + if (resources.fence_counter > m_completed_fence_counter) + { + for (auto& it : resources.cleanup_resources) + it(); + resources.cleanup_resources.clear(); + } + + cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS; + } + + m_completed_fence_counter = now_completed_counter; } void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, + bool wait_for_completion, VkSwapchainKHR present_swap_chain, uint32_t present_image_index) { @@ -263,16 +280,13 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, } } - // This command buffer now has commands, so can't be re-used without waiting. - resources.needs_fence_wait = true; - // Grab the semaphore before submitting command buffer either on-thread or off-thread. // This prevents a race from occurring where a second command buffer is executed // before the worker thread has woken and executed the first one yet. m_submit_semaphore.Wait(); // Submitting off-thread? - if (m_use_threaded_submission && submit_on_worker_thread) + if (m_use_threaded_submission && submit_on_worker_thread && !wait_for_completion) { // Push to the pending submit queue. { @@ -287,6 +301,8 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, { // Pass through to normal submission path. SubmitCommandBuffer(m_current_frame, present_swap_chain, present_image_index); + if (wait_for_completion) + WaitForCommandBufferCompletion(m_current_frame); } // Switch to next cmdbuffer. @@ -365,39 +381,15 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index, m_submit_semaphore.Post(); } -void CommandBufferManager::OnCommandBufferExecuted(u32 index) -{ - FrameResources& resources = m_frame_resources[index]; - - // Fire fence tracking callbacks. - for (auto iter = m_fence_callbacks.begin(); iter != m_fence_callbacks.end();) - { - auto backup_iter = iter++; - backup_iter->second(resources.fence); - } - - // Clean up all objects pending destruction on this command buffer - for (auto& it : resources.cleanup_resources) - it(); - resources.cleanup_resources.clear(); -} - void CommandBufferManager::BeginCommandBuffer() { // Move to the next command buffer. - m_current_frame = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; - FrameResources& resources = m_frame_resources[m_current_frame]; + const u32 next_buffer_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; + FrameResources& resources = m_frame_resources[next_buffer_index]; // Wait for the GPU to finish with all resources for this command buffer. - if (resources.needs_fence_wait) - { - VkResult res = - vkWaitForFences(g_vulkan_context->GetDevice(), 1, &resources.fence, true, UINT64_MAX); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); - - OnCommandBufferExecuted(m_current_frame); - } + if (resources.fence_counter > m_completed_fence_counter) + WaitForCommandBufferCompletion(next_buffer_index); // Reset fence to unsignaled before starting. VkResult res = vkResetFences(g_vulkan_context->GetDevice(), 1, &resources.fence); @@ -427,6 +419,8 @@ void CommandBufferManager::BeginCommandBuffer() // Reset upload command buffer state resources.init_command_buffer_used = false; resources.semaphore_used = false; + resources.fence_counter = m_next_fence_counter++; + m_current_frame = next_buffer_index; } void CommandBufferManager::DeferBufferDestruction(VkBuffer object) @@ -471,19 +465,5 @@ void CommandBufferManager::DeferImageViewDestruction(VkImageView object) [object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); }); } -void CommandBufferManager::AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback) -{ - // Shouldn't be adding twice. - ASSERT(m_fence_callbacks.find(key) == m_fence_callbacks.end()); - m_fence_callbacks.emplace(key, std::move(callback)); -} - -void CommandBufferManager::RemoveFenceSignaledCallback(const void* key) -{ - auto iter = m_fence_callbacks.find(key); - ASSERT(iter != m_fence_callbacks.end()); - m_fence_callbacks.erase(iter); -} - std::unique_ptr g_command_buffer_mgr; } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h index 9cfc50e287..abc49c0622 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h @@ -51,9 +51,15 @@ public: // Allocates a descriptors set from the pool reserved for the current frame. VkDescriptorSet AllocateDescriptorSet(VkDescriptorSetLayout set_layout); + // Fence "counters" are used to track which commands have been completed by the GPU. + // If the last completed fence counter is greater or equal to N, it means that the work + // associated counter N has been completed by the GPU. The value of N to associate with + // commands can be retreived by calling GetCurrentFenceCounter(). + u64 GetCompletedFenceCounter() const { return m_completed_fence_counter; } + // Gets the fence that will be signaled when the currently executing command buffer is // queued and executed. Do not wait for this fence before the buffer is executed. - VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; } + u64 GetCurrentFenceCounter() const { return m_frame_resources[m_current_frame].fence_counter; } // Returns the semaphore for the current command buffer, which can be used to ensure the // swap chain image is ready before the command buffer executes. @@ -66,15 +72,11 @@ public: // Ensure that the worker thread has submitted any previous command buffers and is idle. void WaitForWorkerThreadIdle(); - // Ensure that the worker thread has both submitted all commands, and the GPU has caught up. - // Use with caution, huge performance penalty. - void WaitForGPUIdle(); - // Wait for a fence to be completed. // Also invokes callbacks for completion. - void WaitForFence(VkFence fence); + void WaitForFenceCounter(u64 fence_counter); - void SubmitCommandBuffer(bool submit_on_worker_thread, + void SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion, VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE, uint32_t present_image_index = 0xFFFFFFFF); @@ -90,25 +92,17 @@ public: void DeferImageDestruction(VkImage object); void DeferImageViewDestruction(VkImageView object); - // Instruct the manager to fire the specified callback when a fence is flagged to be signaled. - // This happens when command buffers are executed, and can be tested if signaled, which means - // that all commands up to the point when the callback was fired have completed. - using FenceSignaledCallback = std::function; - void AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback); - void RemoveFenceSignaledCallback(const void* key); - private: bool CreateCommandBuffers(); void DestroyCommandBuffers(); bool CreateSubmitThread(); + void WaitForCommandBufferCompletion(u32 command_buffer_index); void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, u32 present_image_index); void BeginCommandBuffer(); - void OnCommandBufferExecuted(u32 index); - struct FrameResources { // [0] - Init (upload) command buffer, [1] - draw command buffer @@ -117,19 +111,19 @@ private: VkDescriptorPool descriptor_pool = VK_NULL_HANDLE; VkFence fence = VK_NULL_HANDLE; VkSemaphore semaphore = VK_NULL_HANDLE; + u64 fence_counter = 0; bool init_command_buffer_used = false; bool semaphore_used = false; - bool needs_fence_wait = false; std::vector> cleanup_resources; }; + u64 m_next_fence_counter = 1; + u64 m_completed_fence_counter = 0; + std::array m_frame_resources; u32 m_current_frame; - // callbacks when a fence point is set - std::map m_fence_callbacks; - // Threaded command buffer execution // Semaphore determines when a command buffer can be queued Common::Semaphore m_submit_semaphore; diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp index 3a84f2e571..1934c78410 100644 --- a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp @@ -24,8 +24,6 @@ PerfQuery::PerfQuery() = default; PerfQuery::~PerfQuery() { - g_command_buffer_mgr->RemoveFenceSignaledCallback(this); - if (m_query_pool != VK_NULL_HANDLE) vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr); } @@ -49,9 +47,6 @@ bool PerfQuery::Initialize() return false; } - g_command_buffer_mgr->AddFenceSignaledCallback( - this, std::bind(&PerfQuery::OnFenceSignaled, this, std::placeholders::_1)); - return true; } @@ -113,7 +108,7 @@ void PerfQuery::ResetQuery() for (auto& entry : m_query_buffer) { - entry.pending_fence = VK_NULL_HANDLE; + entry.fence_counter = 0; entry.available = false; entry.active = false; } @@ -217,7 +212,7 @@ void PerfQuery::QueueCopyQueryResults(u32 start_index, u32 query_count) { u32 index = start_index + i; ActiveQuery& entry = m_query_buffer[index]; - entry.pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + entry.fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); entry.available = true; entry.active = false; } @@ -261,8 +256,10 @@ void PerfQuery::FlushQueries() QueueCopyQueryResults(copy_start_index, copy_count); } -void PerfQuery::OnFenceSignaled(VkFence fence) +void PerfQuery::ProcessPendingResults() { + const u64 completed_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); + // Need to save these since ProcessResults will modify them. u32 query_read_pos = m_query_read_pos; u32 query_count = m_query_count; @@ -273,7 +270,7 @@ void PerfQuery::OnFenceSignaled(VkFence fence) for (u32 i = 0; i < query_count; i++) { u32 index = (query_read_pos + i) % PERF_QUERY_BUFFER_SIZE; - if (m_query_buffer[index].pending_fence != fence) + if (m_query_buffer[index].fence_counter > completed_fence_counter) { // These should be grouped together, at the end. break; @@ -314,8 +311,8 @@ void PerfQuery::ProcessResults(u32 start_index, u32 query_count) ActiveQuery& entry = m_query_buffer[index]; // Should have a fence associated with it (waiting for a result). - ASSERT(entry.pending_fence != VK_NULL_HANDLE); - entry.pending_fence = VK_NULL_HANDLE; + ASSERT(entry.fence_counter != 0); + entry.fence_counter = 0; entry.available = false; entry.active = false; @@ -340,9 +337,11 @@ void PerfQuery::NonBlockingPartialFlush() return; // Submit a command buffer in the background if the front query is not bound to one. - // Ideally this will complete before the buffer fills. - if (m_query_buffer[m_query_read_pos].pending_fence == VK_NULL_HANDLE) + ActiveQuery& entry = m_query_buffer[m_query_read_pos]; + if (entry.fence_counter == g_command_buffer_mgr->GetCurrentFenceCounter()) Renderer::GetInstance()->ExecuteCommandBuffer(true, false); + + ProcessPendingResults(); } void PerfQuery::BlockingPartialFlush() @@ -352,17 +351,9 @@ void PerfQuery::BlockingPartialFlush() // If the first pending query is needing command buffer execution, do that. ActiveQuery& entry = m_query_buffer[m_query_read_pos]; - if (entry.pending_fence == VK_NULL_HANDLE) - { - // This will callback OnCommandBufferQueued which will set the fence on the entry. - // We wait for completion, which will also call OnCommandBufferExecuted, and clear the fence. + if (entry.fence_counter == g_command_buffer_mgr->GetCurrentFenceCounter()) Renderer::GetInstance()->ExecuteCommandBuffer(false, true); - } - else - { - // The command buffer has been submitted, but is awaiting completion. - // Wait for the fence to complete, which will call OnCommandBufferExecuted. - g_command_buffer_mgr->WaitForFence(entry.pending_fence); - } + + ProcessPendingResults(); } } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.h b/Source/Core/VideoBackends/Vulkan/PerfQuery.h index 47ccf22a66..facbe0dc0f 100644 --- a/Source/Core/VideoBackends/Vulkan/PerfQuery.h +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.h @@ -36,8 +36,8 @@ public: private: struct ActiveQuery { + u64 fence_counter; PerfQueryType query_type; - VkFence pending_fence; bool available; bool active; }; @@ -45,11 +45,9 @@ private: bool CreateQueryPool(); bool CreateReadbackBuffer(); void QueueCopyQueryResults(u32 start_index, u32 query_count); + void ProcessPendingResults(); void ProcessResults(u32 start_index, u32 query_count); - void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence); - void OnFenceSignaled(VkFence fence); - void NonBlockingPartialFlush(); void BlockingPartialFlush(); diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index 8b560be15d..a4db679ecb 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -315,7 +315,7 @@ void Renderer::PresentBackbuffer() // Because this final command buffer is rendering to the swap chain, we need to wait for // the available semaphore to be signaled before executing the buffer. This final submission // can happen off-thread in the background while we're preparing the next frame. - g_command_buffer_mgr->SubmitCommandBuffer(true, m_swap_chain->GetSwapChain(), + g_command_buffer_mgr->SubmitCommandBuffer(true, false, m_swap_chain->GetSwapChain(), m_swap_chain->GetCurrentImageIndex()); // New cmdbuffer, so invalidate state. @@ -327,11 +327,7 @@ void Renderer::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_comple StateTracker::GetInstance()->EndRenderPass(); PerfQuery::GetInstance()->FlushQueries(); - // If we're waiting for completion, don't bother waking the worker thread. - const VkFence pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); - g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread && wait_for_completion); - if (wait_for_completion) - g_command_buffer_mgr->WaitForFence(pending_fence); + g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread, wait_for_completion); StateTracker::GetInstance()->InvalidateCachedState(); } @@ -550,10 +546,6 @@ void Renderer::UnbindTexture(const AbstractTexture* texture) void Renderer::ResetSamplerStates() { - // Ensure none of the sampler objects are in use. - // This assumes that none of the samplers are in use on the command list currently being recorded. - g_command_buffer_mgr->WaitForGPUIdle(); - // Invalidate all sampler states, next draw will re-initialize them. for (u32 i = 0; i < m_sampler_states.size(); i++) { diff --git a/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp index ea610f09cf..aa635e4d41 100644 --- a/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp +++ b/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp @@ -19,14 +19,10 @@ namespace Vulkan { StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, u32 size) : m_usage(usage), m_size(size) { - g_command_buffer_mgr->AddFenceSignaledCallback( - this, std::bind(&StreamBuffer::OnFenceSignaled, this, std::placeholders::_1)); } StreamBuffer::~StreamBuffer() { - g_command_buffer_mgr->RemoveFenceSignaledCallback(this); - if (m_host_pointer) vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory); @@ -189,8 +185,6 @@ bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) // Can we find a fence to wait on that will give us enough memory? if (WaitForClearSpace(required_bytes)) { - ASSERT(m_current_offset == m_current_gpu_position || - (m_current_offset + required_bytes) < m_current_gpu_position); m_current_offset = Common::AlignUp(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; @@ -225,36 +219,40 @@ void StreamBuffer::UpdateCurrentFencePosition() return; // Has the offset changed since the last fence? - const VkFence fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); - if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence) + const u64 counter = g_command_buffer_mgr->GetCurrentFenceCounter(); + if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) { // Still haven't executed a command buffer, so just update the offset. m_tracked_fences.back().second = m_current_offset; return; } - m_tracked_fences.emplace_back(fence, m_current_offset); + // New buffer, so update the GPU position while we're at it. + UpdateGPUPosition(); + m_tracked_fences.emplace_back(counter, m_current_offset); } -void StreamBuffer::OnFenceSignaled(VkFence fence) +void StreamBuffer::UpdateGPUPosition() { - // Locate the entry for this fence (if any, we may have been forced to wait already) - auto iter = std::find_if(m_tracked_fences.begin(), m_tracked_fences.end(), - [fence](const auto& it) { return it.first == fence; }); + auto start = m_tracked_fences.begin(); + auto end = start; - if (iter != m_tracked_fences.end()) + const u64 completed_counter = g_command_buffer_mgr->GetCompletedFenceCounter(); + while (end != m_tracked_fences.end() && completed_counter >= end->first) { - // Update the GPU position, and remove any fences before this fence (since - // it is implied that they have been signaled as well, though the callback - // should have removed them already). - m_current_gpu_position = iter->second; - m_tracked_fences.erase(m_tracked_fences.begin(), ++iter); + m_current_gpu_position = end->second; + ++end; } + + if (start != end) + m_tracked_fences.erase(start, end); } bool StreamBuffer::WaitForClearSpace(u32 num_bytes) { u32 new_offset = 0; + u32 new_gpu_position = 0; + auto iter = m_tracked_fences.begin(); for (; iter != m_tracked_fences.end(); iter++) { @@ -265,20 +263,32 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes) u32 gpu_position = iter->second; if (m_current_offset == gpu_position) { - // Start at the start of the buffer again. new_offset = 0; + new_gpu_position = 0; break; } // Assuming that we wait for this fence, are we allocating in front of the GPU? if (m_current_offset > gpu_position) { + // This would suggest the GPU has now followed us and wrapped around, so we have from + // m_current_position..m_size free, as well as and 0..gpu_position. + const u32 remaining_space_after_offset = m_size - m_current_offset; + if (remaining_space_after_offset >= num_bytes) + { + // Switch to allocating in front of the GPU, using the remainder of the buffer. + new_offset = m_current_offset; + new_gpu_position = gpu_position; + break; + } + // We can wrap around to the start, behind the GPU, if there is enough space. // We use > here because otherwise we'd end up lining up with the GPU, and then the // allocator would assume that the GPU has consumed what we just wrote. if (gpu_position > num_bytes) { new_offset = 0; + new_gpu_position = gpu_position; break; } } @@ -292,6 +302,7 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes) { // Leave the offset as-is, but update the GPU position. new_offset = m_current_offset; + new_gpu_position = gpu_position; break; } } @@ -300,14 +311,17 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes) // Did any fences satisfy this condition? // Has the command buffer been executed yet? If not, the caller should execute it. if (iter == m_tracked_fences.end() || - iter->first == g_command_buffer_mgr->GetCurrentCommandBufferFence()) + iter->first == g_command_buffer_mgr->GetCurrentFenceCounter()) { return false; } // Wait until this fence is signaled. This will fire the callback, updating the GPU position. - g_command_buffer_mgr->WaitForFence(iter->first); + g_command_buffer_mgr->WaitForFenceCounter(iter->first); + m_tracked_fences.erase(m_tracked_fences.begin(), + m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); m_current_offset = new_offset; + m_current_gpu_position = new_gpu_position; return true; } diff --git a/Source/Core/VideoBackends/Vulkan/StreamBuffer.h b/Source/Core/VideoBackends/Vulkan/StreamBuffer.h index b52ce6cd35..677313939a 100644 --- a/Source/Core/VideoBackends/Vulkan/StreamBuffer.h +++ b/Source/Core/VideoBackends/Vulkan/StreamBuffer.h @@ -34,7 +34,7 @@ public: private: bool AllocateBuffer(); void UpdateCurrentFencePosition(); - void OnFenceSignaled(VkFence fence); + void UpdateGPUPosition(); // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. bool WaitForClearSpace(u32 num_bytes); @@ -50,7 +50,7 @@ private: u8* m_host_pointer = nullptr; // List of fences and the corresponding positions in the buffer - std::deque> m_tracked_fences; + std::deque> m_tracked_fences; bool m_coherent_mapping = false; }; diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp index 202bbaad0f..27b079a779 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp @@ -674,11 +674,7 @@ VKStagingTexture::VKStagingTexture(StagingTextureType type, const TextureConfig& { } -VKStagingTexture::~VKStagingTexture() -{ - if (m_needs_flush) - VKStagingTexture::Flush(); -} +VKStagingTexture::~VKStagingTexture() = default; std::unique_ptr VKStagingTexture::Create(StagingTextureType type, const TextureConfig& config) @@ -739,14 +735,6 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src, ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); - if (m_needs_flush) - { - // Drop copy before reusing it. - g_command_buffer_mgr->RemoveFenceSignaledCallback(this); - m_flush_fence = VK_NULL_HANDLE; - m_needs_flush = false; - } - StateTracker::GetInstance()->EndRenderPass(); VkImageLayout old_layout = src_tex->GetLayout(); @@ -773,16 +761,7 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src, src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); m_needs_flush = true; - m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); - g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) { - if (m_flush_fence != fence) - return; - - m_flush_fence = VK_NULL_HANDLE; - m_needs_flush = false; - g_command_buffer_mgr->RemoveFenceSignaledCallback(this); - m_staging_buffer->InvalidateCPUCache(); - }); + m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); } void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, @@ -798,14 +777,6 @@ void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, A ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst_tex->GetWidth() && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst_tex->GetHeight()); - if (m_needs_flush) - { - // Drop copy before reusing it. - g_command_buffer_mgr->RemoveFenceSignaledCallback(this); - m_flush_fence = VK_NULL_HANDLE; - m_needs_flush = false; - } - // Flush caches before copying. m_staging_buffer->FlushCPUCache(); StateTracker::GetInstance()->EndRenderPass(); @@ -833,15 +804,7 @@ void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, A dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); m_needs_flush = true; - m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); - g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) { - if (m_flush_fence != fence) - return; - - m_flush_fence = VK_NULL_HANDLE; - m_needs_flush = false; - g_command_buffer_mgr->RemoveFenceSignaledCallback(this); - }); + m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); } bool VKStagingTexture::Map() @@ -860,25 +823,23 @@ void VKStagingTexture::Flush() if (!m_needs_flush) return; - // Either of the below two calls will cause the callback to fire. - g_command_buffer_mgr->RemoveFenceSignaledCallback(this); - if (m_flush_fence == g_command_buffer_mgr->GetCurrentCommandBufferFence()) + // Is this copy in the current command buffer? + if (g_command_buffer_mgr->GetCurrentFenceCounter() == m_flush_fence_counter) { - // The readback is in the current command buffer, and we must execute it. + // Execute the command buffer and wait for it to finish. Renderer::GetInstance()->ExecuteCommandBuffer(false, true); } else { - // WaitForFence should fire the callback. - g_command_buffer_mgr->WaitForFence(m_flush_fence); + // Wait for the GPU to finish with it. + g_command_buffer_mgr->WaitForFenceCounter(m_flush_fence_counter); } - DEBUG_ASSERT(m_flush_fence == VK_NULL_HANDLE); - m_needs_flush = false; - // For readback textures, invalidate the CPU cache as there is new data there. if (m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable) m_staging_buffer->InvalidateCPUCache(); + + m_needs_flush = false; } VKFramebuffer::VKFramebuffer(VKTexture* color_attachment, VKTexture* depth_attachment, u32 width, diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.h b/Source/Core/VideoBackends/Vulkan/VKTexture.h index bab11ec108..2a194acc4d 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.h +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.h @@ -104,7 +104,7 @@ private: std::unique_ptr buffer); std::unique_ptr m_staging_buffer; - VkFence m_flush_fence = VK_NULL_HANDLE; + u64 m_flush_fence_counter = 0; }; class VKFramebuffer final : public AbstractFramebuffer diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp index 336d3ad480..144d3b3ab8 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp @@ -60,11 +60,11 @@ VertexManager::~VertexManager() bool VertexManager::Initialize() { m_vertex_stream_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE * 4); + StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE); m_index_stream_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE * 4); + StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE); m_uniform_stream_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE * 4); + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE); if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_uniform_stream_buffer) { PanicAlert("Failed to allocate streaming buffers"); diff --git a/Source/Core/VideoBackends/Vulkan/main.cpp b/Source/Core/VideoBackends/Vulkan/main.cpp index 911f8d1991..07c24c9354 100644 --- a/Source/Core/VideoBackends/Vulkan/main.cpp +++ b/Source/Core/VideoBackends/Vulkan/main.cpp @@ -251,8 +251,8 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) void VideoBackend::Shutdown() { - if (g_command_buffer_mgr) - g_command_buffer_mgr->WaitForGPUIdle(); + if (g_vulkan_context) + vkDeviceWaitIdle(g_vulkan_context->GetDevice()); if (g_shader_cache) g_shader_cache->Shutdown();