Vulkan: Simplify command buffer fence tracking

This commit is contained in:
Stenzek 2019-03-17 15:59:22 +10:00
parent f3fadd7302
commit 427dd45151
11 changed files with 138 additions and 208 deletions

View File

@ -54,7 +54,6 @@ bool CommandBufferManager::CreateCommandBuffers()
{
resources.init_command_buffer_used = false;
resources.semaphore_used = false;
resources.needs_fence_wait = false;
VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0,
g_vulkan_context->GetGraphicsQueueFamilyIndex()};
@ -211,43 +210,61 @@ void CommandBufferManager::WaitForWorkerThreadIdle()
m_submit_semaphore.Post();
}
void CommandBufferManager::WaitForGPUIdle()
void CommandBufferManager::WaitForFenceCounter(u64 fence_counter)
{
WaitForWorkerThreadIdle();
vkDeviceWaitIdle(g_vulkan_context->GetDevice());
}
void CommandBufferManager::WaitForFence(VkFence fence)
{
// Find the command buffer that this fence corresponds to.
u32 command_buffer_index = 0;
for (; command_buffer_index < static_cast<u32>(m_frame_resources.size()); command_buffer_index++)
{
if (m_frame_resources[command_buffer_index].fence == fence)
break;
}
ASSERT(command_buffer_index < m_frame_resources.size());
// Has this command buffer already been waited for?
if (!m_frame_resources[command_buffer_index].needs_fence_wait)
if (m_completed_fence_counter >= fence_counter)
return;
// Find the first command buffer which covers this counter value.
u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
while (index != m_current_frame)
{
if (m_frame_resources[index].fence_counter >= fence_counter)
break;
index = (index + 1) % NUM_COMMAND_BUFFERS;
}
ASSERT(index != m_current_frame);
WaitForCommandBufferCompletion(index);
}
void CommandBufferManager::WaitForCommandBufferCompletion(u32 index)
{
// Ensure this command buffer has been submitted.
WaitForWorkerThreadIdle();
// Wait for this command buffer to be completed.
VkResult res =
vkWaitForFences(g_vulkan_context->GetDevice(), 1,
&m_frame_resources[command_buffer_index].fence, VK_TRUE, UINT64_MAX);
VkResult res = vkWaitForFences(g_vulkan_context->GetDevice(), 1, &m_frame_resources[index].fence,
VK_TRUE, UINT64_MAX);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
// Immediately fire callbacks and cleanups, since the commands has been completed.
m_frame_resources[command_buffer_index].needs_fence_wait = false;
OnCommandBufferExecuted(command_buffer_index);
// Clean up any resources for command buffers between the last known completed buffer and this
// now-completed command buffer. If we use >2 buffers, this may be more than one buffer.
const u64 now_completed_counter = m_frame_resources[index].fence_counter;
u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
while (cleanup_index != m_current_frame)
{
FrameResources& resources = m_frame_resources[cleanup_index];
if (resources.fence_counter > now_completed_counter)
break;
if (resources.fence_counter > m_completed_fence_counter)
{
for (auto& it : resources.cleanup_resources)
it();
resources.cleanup_resources.clear();
}
cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS;
}
m_completed_fence_counter = now_completed_counter;
}
void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
bool wait_for_completion,
VkSwapchainKHR present_swap_chain,
uint32_t present_image_index)
{
@ -263,16 +280,13 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
}
}
// This command buffer now has commands, so can't be re-used without waiting.
resources.needs_fence_wait = true;
// Grab the semaphore before submitting command buffer either on-thread or off-thread.
// This prevents a race from occurring where a second command buffer is executed
// before the worker thread has woken and executed the first one yet.
m_submit_semaphore.Wait();
// Submitting off-thread?
if (m_use_threaded_submission && submit_on_worker_thread)
if (m_use_threaded_submission && submit_on_worker_thread && !wait_for_completion)
{
// Push to the pending submit queue.
{
@ -287,6 +301,8 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread,
{
// Pass through to normal submission path.
SubmitCommandBuffer(m_current_frame, present_swap_chain, present_image_index);
if (wait_for_completion)
WaitForCommandBufferCompletion(m_current_frame);
}
// Switch to next cmdbuffer.
@ -365,39 +381,15 @@ void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index,
m_submit_semaphore.Post();
}
void CommandBufferManager::OnCommandBufferExecuted(u32 index)
{
FrameResources& resources = m_frame_resources[index];
// Fire fence tracking callbacks.
for (auto iter = m_fence_callbacks.begin(); iter != m_fence_callbacks.end();)
{
auto backup_iter = iter++;
backup_iter->second(resources.fence);
}
// Clean up all objects pending destruction on this command buffer
for (auto& it : resources.cleanup_resources)
it();
resources.cleanup_resources.clear();
}
void CommandBufferManager::BeginCommandBuffer()
{
// Move to the next command buffer.
m_current_frame = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
FrameResources& resources = m_frame_resources[m_current_frame];
const u32 next_buffer_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
FrameResources& resources = m_frame_resources[next_buffer_index];
// Wait for the GPU to finish with all resources for this command buffer.
if (resources.needs_fence_wait)
{
VkResult res =
vkWaitForFences(g_vulkan_context->GetDevice(), 1, &resources.fence, true, UINT64_MAX);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
OnCommandBufferExecuted(m_current_frame);
}
if (resources.fence_counter > m_completed_fence_counter)
WaitForCommandBufferCompletion(next_buffer_index);
// Reset fence to unsignaled before starting.
VkResult res = vkResetFences(g_vulkan_context->GetDevice(), 1, &resources.fence);
@ -427,6 +419,8 @@ void CommandBufferManager::BeginCommandBuffer()
// Reset upload command buffer state
resources.init_command_buffer_used = false;
resources.semaphore_used = false;
resources.fence_counter = m_next_fence_counter++;
m_current_frame = next_buffer_index;
}
void CommandBufferManager::DeferBufferDestruction(VkBuffer object)
@ -471,19 +465,5 @@ void CommandBufferManager::DeferImageViewDestruction(VkImageView object)
[object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); });
}
void CommandBufferManager::AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback)
{
// Shouldn't be adding twice.
ASSERT(m_fence_callbacks.find(key) == m_fence_callbacks.end());
m_fence_callbacks.emplace(key, std::move(callback));
}
void CommandBufferManager::RemoveFenceSignaledCallback(const void* key)
{
auto iter = m_fence_callbacks.find(key);
ASSERT(iter != m_fence_callbacks.end());
m_fence_callbacks.erase(iter);
}
std::unique_ptr<CommandBufferManager> g_command_buffer_mgr;
} // namespace Vulkan

View File

@ -51,9 +51,15 @@ public:
// Allocates a descriptors set from the pool reserved for the current frame.
VkDescriptorSet AllocateDescriptorSet(VkDescriptorSetLayout set_layout);
// Fence "counters" are used to track which commands have been completed by the GPU.
// If the last completed fence counter is greater or equal to N, it means that the work
// associated counter N has been completed by the GPU. The value of N to associate with
// commands can be retreived by calling GetCurrentFenceCounter().
u64 GetCompletedFenceCounter() const { return m_completed_fence_counter; }
// Gets the fence that will be signaled when the currently executing command buffer is
// queued and executed. Do not wait for this fence before the buffer is executed.
VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; }
u64 GetCurrentFenceCounter() const { return m_frame_resources[m_current_frame].fence_counter; }
// Returns the semaphore for the current command buffer, which can be used to ensure the
// swap chain image is ready before the command buffer executes.
@ -66,15 +72,11 @@ public:
// Ensure that the worker thread has submitted any previous command buffers and is idle.
void WaitForWorkerThreadIdle();
// Ensure that the worker thread has both submitted all commands, and the GPU has caught up.
// Use with caution, huge performance penalty.
void WaitForGPUIdle();
// Wait for a fence to be completed.
// Also invokes callbacks for completion.
void WaitForFence(VkFence fence);
void WaitForFenceCounter(u64 fence_counter);
void SubmitCommandBuffer(bool submit_on_worker_thread,
void SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion,
VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE,
uint32_t present_image_index = 0xFFFFFFFF);
@ -90,25 +92,17 @@ public:
void DeferImageDestruction(VkImage object);
void DeferImageViewDestruction(VkImageView object);
// Instruct the manager to fire the specified callback when a fence is flagged to be signaled.
// This happens when command buffers are executed, and can be tested if signaled, which means
// that all commands up to the point when the callback was fired have completed.
using FenceSignaledCallback = std::function<void(VkFence)>;
void AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback);
void RemoveFenceSignaledCallback(const void* key);
private:
bool CreateCommandBuffers();
void DestroyCommandBuffers();
bool CreateSubmitThread();
void WaitForCommandBufferCompletion(u32 command_buffer_index);
void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain,
u32 present_image_index);
void BeginCommandBuffer();
void OnCommandBufferExecuted(u32 index);
struct FrameResources
{
// [0] - Init (upload) command buffer, [1] - draw command buffer
@ -117,19 +111,19 @@ private:
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
VkFence fence = VK_NULL_HANDLE;
VkSemaphore semaphore = VK_NULL_HANDLE;
u64 fence_counter = 0;
bool init_command_buffer_used = false;
bool semaphore_used = false;
bool needs_fence_wait = false;
std::vector<std::function<void()>> cleanup_resources;
};
u64 m_next_fence_counter = 1;
u64 m_completed_fence_counter = 0;
std::array<FrameResources, NUM_COMMAND_BUFFERS> m_frame_resources;
u32 m_current_frame;
// callbacks when a fence point is set
std::map<const void*, FenceSignaledCallback> m_fence_callbacks;
// Threaded command buffer execution
// Semaphore determines when a command buffer can be queued
Common::Semaphore m_submit_semaphore;

View File

@ -24,8 +24,6 @@ PerfQuery::PerfQuery() = default;
PerfQuery::~PerfQuery()
{
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
if (m_query_pool != VK_NULL_HANDLE)
vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr);
}
@ -49,9 +47,6 @@ bool PerfQuery::Initialize()
return false;
}
g_command_buffer_mgr->AddFenceSignaledCallback(
this, std::bind(&PerfQuery::OnFenceSignaled, this, std::placeholders::_1));
return true;
}
@ -113,7 +108,7 @@ void PerfQuery::ResetQuery()
for (auto& entry : m_query_buffer)
{
entry.pending_fence = VK_NULL_HANDLE;
entry.fence_counter = 0;
entry.available = false;
entry.active = false;
}
@ -217,7 +212,7 @@ void PerfQuery::QueueCopyQueryResults(u32 start_index, u32 query_count)
{
u32 index = start_index + i;
ActiveQuery& entry = m_query_buffer[index];
entry.pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
entry.fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter();
entry.available = true;
entry.active = false;
}
@ -261,8 +256,10 @@ void PerfQuery::FlushQueries()
QueueCopyQueryResults(copy_start_index, copy_count);
}
void PerfQuery::OnFenceSignaled(VkFence fence)
void PerfQuery::ProcessPendingResults()
{
const u64 completed_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter();
// Need to save these since ProcessResults will modify them.
u32 query_read_pos = m_query_read_pos;
u32 query_count = m_query_count;
@ -273,7 +270,7 @@ void PerfQuery::OnFenceSignaled(VkFence fence)
for (u32 i = 0; i < query_count; i++)
{
u32 index = (query_read_pos + i) % PERF_QUERY_BUFFER_SIZE;
if (m_query_buffer[index].pending_fence != fence)
if (m_query_buffer[index].fence_counter > completed_fence_counter)
{
// These should be grouped together, at the end.
break;
@ -314,8 +311,8 @@ void PerfQuery::ProcessResults(u32 start_index, u32 query_count)
ActiveQuery& entry = m_query_buffer[index];
// Should have a fence associated with it (waiting for a result).
ASSERT(entry.pending_fence != VK_NULL_HANDLE);
entry.pending_fence = VK_NULL_HANDLE;
ASSERT(entry.fence_counter != 0);
entry.fence_counter = 0;
entry.available = false;
entry.active = false;
@ -340,9 +337,11 @@ void PerfQuery::NonBlockingPartialFlush()
return;
// Submit a command buffer in the background if the front query is not bound to one.
// Ideally this will complete before the buffer fills.
if (m_query_buffer[m_query_read_pos].pending_fence == VK_NULL_HANDLE)
ActiveQuery& entry = m_query_buffer[m_query_read_pos];
if (entry.fence_counter == g_command_buffer_mgr->GetCurrentFenceCounter())
Renderer::GetInstance()->ExecuteCommandBuffer(true, false);
ProcessPendingResults();
}
void PerfQuery::BlockingPartialFlush()
@ -352,17 +351,9 @@ void PerfQuery::BlockingPartialFlush()
// If the first pending query is needing command buffer execution, do that.
ActiveQuery& entry = m_query_buffer[m_query_read_pos];
if (entry.pending_fence == VK_NULL_HANDLE)
{
// This will callback OnCommandBufferQueued which will set the fence on the entry.
// We wait for completion, which will also call OnCommandBufferExecuted, and clear the fence.
if (entry.fence_counter == g_command_buffer_mgr->GetCurrentFenceCounter())
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);
}
else
{
// The command buffer has been submitted, but is awaiting completion.
// Wait for the fence to complete, which will call OnCommandBufferExecuted.
g_command_buffer_mgr->WaitForFence(entry.pending_fence);
}
ProcessPendingResults();
}
} // namespace Vulkan

View File

@ -36,8 +36,8 @@ public:
private:
struct ActiveQuery
{
u64 fence_counter;
PerfQueryType query_type;
VkFence pending_fence;
bool available;
bool active;
};
@ -45,11 +45,9 @@ private:
bool CreateQueryPool();
bool CreateReadbackBuffer();
void QueueCopyQueryResults(u32 start_index, u32 query_count);
void ProcessPendingResults();
void ProcessResults(u32 start_index, u32 query_count);
void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence);
void OnFenceSignaled(VkFence fence);
void NonBlockingPartialFlush();
void BlockingPartialFlush();

View File

@ -315,7 +315,7 @@ void Renderer::PresentBackbuffer()
// Because this final command buffer is rendering to the swap chain, we need to wait for
// the available semaphore to be signaled before executing the buffer. This final submission
// can happen off-thread in the background while we're preparing the next frame.
g_command_buffer_mgr->SubmitCommandBuffer(true, m_swap_chain->GetSwapChain(),
g_command_buffer_mgr->SubmitCommandBuffer(true, false, m_swap_chain->GetSwapChain(),
m_swap_chain->GetCurrentImageIndex());
// New cmdbuffer, so invalidate state.
@ -327,11 +327,7 @@ void Renderer::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_comple
StateTracker::GetInstance()->EndRenderPass();
PerfQuery::GetInstance()->FlushQueries();
// If we're waiting for completion, don't bother waking the worker thread.
const VkFence pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread && wait_for_completion);
if (wait_for_completion)
g_command_buffer_mgr->WaitForFence(pending_fence);
g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread, wait_for_completion);
StateTracker::GetInstance()->InvalidateCachedState();
}
@ -550,10 +546,6 @@ void Renderer::UnbindTexture(const AbstractTexture* texture)
void Renderer::ResetSamplerStates()
{
// Ensure none of the sampler objects are in use.
// This assumes that none of the samplers are in use on the command list currently being recorded.
g_command_buffer_mgr->WaitForGPUIdle();
// Invalidate all sampler states, next draw will re-initialize them.
for (u32 i = 0; i < m_sampler_states.size(); i++)
{

View File

@ -19,14 +19,10 @@ namespace Vulkan
{
StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, u32 size) : m_usage(usage), m_size(size)
{
g_command_buffer_mgr->AddFenceSignaledCallback(
this, std::bind(&StreamBuffer::OnFenceSignaled, this, std::placeholders::_1));
}
StreamBuffer::~StreamBuffer()
{
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
if (m_host_pointer)
vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
@ -189,8 +185,6 @@ bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
// Can we find a fence to wait on that will give us enough memory?
if (WaitForClearSpace(required_bytes))
{
ASSERT(m_current_offset == m_current_gpu_position ||
(m_current_offset + required_bytes) < m_current_gpu_position);
m_current_offset = Common::AlignUp(m_current_offset, alignment);
m_last_allocation_size = num_bytes;
return true;
@ -225,36 +219,40 @@ void StreamBuffer::UpdateCurrentFencePosition()
return;
// Has the offset changed since the last fence?
const VkFence fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence)
const u64 counter = g_command_buffer_mgr->GetCurrentFenceCounter();
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
{
// Still haven't executed a command buffer, so just update the offset.
m_tracked_fences.back().second = m_current_offset;
return;
}
m_tracked_fences.emplace_back(fence, m_current_offset);
// New buffer, so update the GPU position while we're at it.
UpdateGPUPosition();
m_tracked_fences.emplace_back(counter, m_current_offset);
}
void StreamBuffer::OnFenceSignaled(VkFence fence)
void StreamBuffer::UpdateGPUPosition()
{
// Locate the entry for this fence (if any, we may have been forced to wait already)
auto iter = std::find_if(m_tracked_fences.begin(), m_tracked_fences.end(),
[fence](const auto& it) { return it.first == fence; });
auto start = m_tracked_fences.begin();
auto end = start;
if (iter != m_tracked_fences.end())
const u64 completed_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
while (end != m_tracked_fences.end() && completed_counter >= end->first)
{
// Update the GPU position, and remove any fences before this fence (since
// it is implied that they have been signaled as well, though the callback
// should have removed them already).
m_current_gpu_position = iter->second;
m_tracked_fences.erase(m_tracked_fences.begin(), ++iter);
m_current_gpu_position = end->second;
++end;
}
if (start != end)
m_tracked_fences.erase(start, end);
}
bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
{
u32 new_offset = 0;
u32 new_gpu_position = 0;
auto iter = m_tracked_fences.begin();
for (; iter != m_tracked_fences.end(); iter++)
{
@ -265,20 +263,32 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
u32 gpu_position = iter->second;
if (m_current_offset == gpu_position)
{
// Start at the start of the buffer again.
new_offset = 0;
new_gpu_position = 0;
break;
}
// Assuming that we wait for this fence, are we allocating in front of the GPU?
if (m_current_offset > gpu_position)
{
// This would suggest the GPU has now followed us and wrapped around, so we have from
// m_current_position..m_size free, as well as and 0..gpu_position.
const u32 remaining_space_after_offset = m_size - m_current_offset;
if (remaining_space_after_offset >= num_bytes)
{
// Switch to allocating in front of the GPU, using the remainder of the buffer.
new_offset = m_current_offset;
new_gpu_position = gpu_position;
break;
}
// We can wrap around to the start, behind the GPU, if there is enough space.
// We use > here because otherwise we'd end up lining up with the GPU, and then the
// allocator would assume that the GPU has consumed what we just wrote.
if (gpu_position > num_bytes)
{
new_offset = 0;
new_gpu_position = gpu_position;
break;
}
}
@ -292,6 +302,7 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
{
// Leave the offset as-is, but update the GPU position.
new_offset = m_current_offset;
new_gpu_position = gpu_position;
break;
}
}
@ -300,14 +311,17 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
// Did any fences satisfy this condition?
// Has the command buffer been executed yet? If not, the caller should execute it.
if (iter == m_tracked_fences.end() ||
iter->first == g_command_buffer_mgr->GetCurrentCommandBufferFence())
iter->first == g_command_buffer_mgr->GetCurrentFenceCounter())
{
return false;
}
// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
g_command_buffer_mgr->WaitForFence(iter->first);
g_command_buffer_mgr->WaitForFenceCounter(iter->first);
m_tracked_fences.erase(m_tracked_fences.begin(),
m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
m_current_offset = new_offset;
m_current_gpu_position = new_gpu_position;
return true;
}

View File

@ -34,7 +34,7 @@ public:
private:
bool AllocateBuffer();
void UpdateCurrentFencePosition();
void OnFenceSignaled(VkFence fence);
void UpdateGPUPosition();
// Waits for as many fences as needed to allocate num_bytes bytes from the buffer.
bool WaitForClearSpace(u32 num_bytes);
@ -50,7 +50,7 @@ private:
u8* m_host_pointer = nullptr;
// List of fences and the corresponding positions in the buffer
std::deque<std::pair<VkFence, u32>> m_tracked_fences;
std::deque<std::pair<u64, u32>> m_tracked_fences;
bool m_coherent_mapping = false;
};

View File

@ -674,11 +674,7 @@ VKStagingTexture::VKStagingTexture(StagingTextureType type, const TextureConfig&
{
}
VKStagingTexture::~VKStagingTexture()
{
if (m_needs_flush)
VKStagingTexture::Flush();
}
VKStagingTexture::~VKStagingTexture() = default;
std::unique_ptr<VKStagingTexture> VKStagingTexture::Create(StagingTextureType type,
const TextureConfig& config)
@ -739,14 +735,6 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src,
ASSERT(dst_rect.left >= 0 && static_cast<u32>(dst_rect.right) <= m_config.width &&
dst_rect.top >= 0 && static_cast<u32>(dst_rect.bottom) <= m_config.height);
if (m_needs_flush)
{
// Drop copy before reusing it.
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
m_flush_fence = VK_NULL_HANDLE;
m_needs_flush = false;
}
StateTracker::GetInstance()->EndRenderPass();
VkImageLayout old_layout = src_tex->GetLayout();
@ -773,16 +761,7 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src,
src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout);
m_needs_flush = true;
m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) {
if (m_flush_fence != fence)
return;
m_flush_fence = VK_NULL_HANDLE;
m_needs_flush = false;
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
m_staging_buffer->InvalidateCPUCache();
});
m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter();
}
void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, AbstractTexture* dst,
@ -798,14 +777,6 @@ void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, A
ASSERT(dst_rect.left >= 0 && static_cast<u32>(dst_rect.right) <= dst_tex->GetWidth() &&
dst_rect.top >= 0 && static_cast<u32>(dst_rect.bottom) <= dst_tex->GetHeight());
if (m_needs_flush)
{
// Drop copy before reusing it.
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
m_flush_fence = VK_NULL_HANDLE;
m_needs_flush = false;
}
// Flush caches before copying.
m_staging_buffer->FlushCPUCache();
StateTracker::GetInstance()->EndRenderPass();
@ -833,15 +804,7 @@ void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, A
dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout);
m_needs_flush = true;
m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence();
g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) {
if (m_flush_fence != fence)
return;
m_flush_fence = VK_NULL_HANDLE;
m_needs_flush = false;
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
});
m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter();
}
bool VKStagingTexture::Map()
@ -860,25 +823,23 @@ void VKStagingTexture::Flush()
if (!m_needs_flush)
return;
// Either of the below two calls will cause the callback to fire.
g_command_buffer_mgr->RemoveFenceSignaledCallback(this);
if (m_flush_fence == g_command_buffer_mgr->GetCurrentCommandBufferFence())
// Is this copy in the current command buffer?
if (g_command_buffer_mgr->GetCurrentFenceCounter() == m_flush_fence_counter)
{
// The readback is in the current command buffer, and we must execute it.
// Execute the command buffer and wait for it to finish.
Renderer::GetInstance()->ExecuteCommandBuffer(false, true);
}
else
{
// WaitForFence should fire the callback.
g_command_buffer_mgr->WaitForFence(m_flush_fence);
// Wait for the GPU to finish with it.
g_command_buffer_mgr->WaitForFenceCounter(m_flush_fence_counter);
}
DEBUG_ASSERT(m_flush_fence == VK_NULL_HANDLE);
m_needs_flush = false;
// For readback textures, invalidate the CPU cache as there is new data there.
if (m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable)
m_staging_buffer->InvalidateCPUCache();
m_needs_flush = false;
}
VKFramebuffer::VKFramebuffer(VKTexture* color_attachment, VKTexture* depth_attachment, u32 width,

View File

@ -104,7 +104,7 @@ private:
std::unique_ptr<StagingBuffer> buffer);
std::unique_ptr<StagingBuffer> m_staging_buffer;
VkFence m_flush_fence = VK_NULL_HANDLE;
u64 m_flush_fence_counter = 0;
};
class VKFramebuffer final : public AbstractFramebuffer

View File

@ -60,11 +60,11 @@ VertexManager::~VertexManager()
bool VertexManager::Initialize()
{
m_vertex_stream_buffer =
StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE * 4);
StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE);
m_index_stream_buffer =
StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE * 4);
StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE);
m_uniform_stream_buffer =
StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE * 4);
StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE);
if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_uniform_stream_buffer)
{
PanicAlert("Failed to allocate streaming buffers");

View File

@ -251,8 +251,8 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi)
void VideoBackend::Shutdown()
{
if (g_command_buffer_mgr)
g_command_buffer_mgr->WaitForGPUIdle();
if (g_vulkan_context)
vkDeviceWaitIdle(g_vulkan_context->GetDevice());
if (g_shader_cache)
g_shader_cache->Shutdown();