From 31bc0cf2c3d8995763fac0d1b04bfd83196b3d2a Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 17 Feb 2016 02:43:56 +1000 Subject: [PATCH 01/16] D3D12: Don't clear texture SRV on destruction, as it may still be in use --- Source/Core/VideoBackends/D3D12/D3DTexture.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DTexture.cpp b/Source/Core/VideoBackends/D3D12/D3DTexture.cpp index 6ad5da8d59..f913715e75 100644 --- a/Source/Core/VideoBackends/D3D12/D3DTexture.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DTexture.cpp @@ -261,17 +261,6 @@ void D3DTexture2D::TransitionToResourceState(ID3D12GraphicsCommandList* command_ D3DTexture2D::~D3DTexture2D() { DX12::D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_tex12); - - if (m_srv12_cpu.ptr) - { - D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {}; - null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - - null_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - - DX12::D3D::device12->CreateShaderResourceView(NULL, &null_srv_desc, m_srv12_cpu); - } } } // namespace DX12 From 2f7870b04608e82f5995cd75e53a7eade5cedc73 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 17 Feb 2016 02:44:17 +1000 Subject: [PATCH 02/16] D3D12: Don't add fence tracking entries without buffer offset changes --- .../Core/VideoBackends/D3D12/D3DStreamBuffer.cpp | 15 +++++++++++++-- Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h | 2 ++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp index fa91352b99..aa868b0004 100644 --- a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp @@ -293,7 +293,7 @@ void D3DStreamBuffer::UpdateGPUProgress() m_queued_fences.pop(); // Has fence gone past this point? - if (fence_value > tracking_information.fence_value) + if (fence_value >= tracking_information.fence_value) { m_buffer_gpu_completion_offset = tracking_information.buffer_offset; } @@ -307,7 +307,18 @@ void D3DStreamBuffer::UpdateGPUProgress() void D3DStreamBuffer::QueueFenceCallback(void* owning_object, UINT64 fence_value) { - reinterpret_cast(owning_object)->QueueFence(fence_value); + D3DStreamBuffer* owning_stream_buffer = reinterpret_cast(owning_object); + if (owning_stream_buffer->HasBufferOffsetChangedSinceLastFence()) + owning_stream_buffer->QueueFence(fence_value); +} + +bool D3DStreamBuffer::HasBufferOffsetChangedSinceLastFence() const +{ + if (m_queued_fences.empty()) + return true; + + // Don't add a new fence tracking entry when our offset hasn't changed. + return (m_queued_fences.back().buffer_offset != m_buffer_offset); } void D3DStreamBuffer::QueueFence(UINT64 fence_value) diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h index 897e30a66d..e931904fbd 100644 --- a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h @@ -39,6 +39,8 @@ private: bool AttemptToFindExistingFenceToStallOn(size_t allocation_size); void UpdateGPUProgress(); + + bool HasBufferOffsetChangedSinceLastFence() const; void QueueFence(UINT64 fence_value); struct FenceTrackingInformation From efbb85da4322fe523db6d73b262f5531d5ac1e0e Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 17 Feb 2016 02:44:34 +1000 Subject: [PATCH 03/16] D3D12: Improve robustness of command allocator and fence tracking --- .../D3D12/D3DCommandListManager.cpp | 157 ++++++++++-------- .../D3D12/D3DCommandListManager.h | 9 +- 2 files changed, 96 insertions(+), 70 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp index fc4a98ae84..4fe6875f13 100644 --- a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include #include #include @@ -69,6 +70,9 @@ D3DCommandListManager::D3DCommandListManager( } m_current_deferred_destruction_list = 0; + + std::fill(m_command_allocator_list_fences.begin(), m_command_allocator_list_fences.end(), 0); + std::fill(m_deferred_destruction_list_fences.begin(), m_deferred_destruction_list_fences.end(), 0); } void D3DCommandListManager::SetInitialCommandListState() @@ -114,37 +118,26 @@ void D3DCommandListManager::ExecuteQueuedWork(bool wait_for_gpu_completion) m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); - ResetCommandListWithIdleCommandAllocator(); - - m_queued_command_list->ProcessQueuedItems(); + m_queued_command_list->ProcessQueuedItems(wait_for_gpu_completion); #else CheckHR(m_backing_command_list->Close()); - ID3D12CommandList* const commandListsToExecute[1] = { m_backing_command_list }; - m_command_queue->ExecuteCommandLists(1, commandListsToExecute); + ID3D12CommandList* const execute_list[1] = { m_backing_command_list }; + m_command_queue->ExecuteCommandLists(1, execute_list); - if (wait_for_gpu_completion) - { - CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); - } - - if (m_current_command_allocator == 0) - { - PerformGpuRolloverChecks(); - } - - ResetCommandListWithIdleCommandAllocator(); + CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); #endif + // Notify observers of the fence value for the current work to finish. for (auto it : m_queue_fence_callbacks) it.second(it.first, m_queue_fence_value); - SetInitialCommandListState(); - if (wait_for_gpu_completion) - { - WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); - } + WaitForGPUCompletion(); + + // Re-open the command list, using the current allocator. + ResetCommandList(); + SetInitialCommandListState(); } void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags) @@ -154,60 +147,67 @@ void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_cha #ifdef USE_D3D12_QUEUED_COMMAND_LISTS CheckHR(m_queued_command_list->Close()); m_queued_command_list->QueueExecute(); - m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); m_queued_command_list->QueuePresent(swap_chain, sync_interval, flags); - m_queued_command_list->ProcessQueuedItems(true); - - if (m_current_command_allocator == 0) - { - PerformGpuRolloverChecks(); - } - - m_current_command_allocator = (m_current_command_allocator + 1) % m_command_allocator_lists[m_current_command_allocator_list].size(); - - ResetCommandListWithIdleCommandAllocator(); - - SetInitialCommandListState(); -#else - ExecuteQueuedWork(); - m_command_queue->Signal(m_queue_fence, m_queue_fence_value); - CheckHR(swap_chain->Present(sync_interval, flags)); -#endif - - for (auto it : m_queue_fence_callbacks) - it.second(it.first, m_queue_fence_value); -} - -void D3DCommandListManager::WaitForQueuedWorkToBeExecutedOnGPU() -{ - // Wait for GPU to finish all outstanding work. - m_queue_fence_value++; - -#ifdef USE_D3D12_QUEUED_COMMAND_LISTS - m_queued_command_list->QueueExecute(); m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); - m_queued_command_list->ProcessQueuedItems(true); #else + CheckHR(m_backing_command_list->Close()); + + ID3D12CommandList* const execute_list[1] = { m_backing_command_list }; + m_command_queue->ExecuteCommandLists(1, execute_list); + + CheckHR(swap_chain->Present(sync_interval, flags)); CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); #endif - WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); + // Notify observers of the fence value for the current work to finish. + for (auto it : m_queue_fence_callbacks) + it.second(it.first, m_queue_fence_value); + + // Move to the next command allocator, this may mean switching allocator lists. + MoveToNextCommandAllocator(); + ResetCommandList(); + SetInitialCommandListState(); +} + +void D3DCommandListManager::WaitForGPUCompletion() +{ + // Wait for GPU to finish all outstanding work. + // This method assumes that no command lists are open. + m_queue_frame_fence_value++; + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value); + m_queued_command_list->ProcessQueuedItems(true); +#else + CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value)); +#endif + + WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value); + + // GPU is up to date with us. Therefore, it has finished with any pending resources. + ImmediatelyDestroyAllResourcesScheduledForDestruction(); + + // Command allocators are also up-to-date, so reset these. + ResetAllCommandAllocators(); } void D3DCommandListManager::PerformGpuRolloverChecks() { - // Insert fence to measure GPU progress, ensure we aren't using in-use command allocators. - if (m_queue_frame_fence->GetCompletedValue() < m_queue_frame_fence_value) - { - WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value); - } + m_queue_frame_fence_value++; + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value); +#else + CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value)); +#endif // We now know that the previous 'set' of command lists has completed on GPU, and it is safe to // release resources / start back at beginning of command allocator list. // Begin Deferred Resource Destruction UINT safe_to_delete_deferred_destruction_list = (m_current_deferred_destruction_list - 1) % m_deferred_destruction_lists.size(); + WaitOnCPUForFence(m_queue_frame_fence, m_deferred_destruction_list_fences[safe_to_delete_deferred_destruction_list]); for (UINT i = 0; i < m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].size(); i++) { @@ -216,30 +216,37 @@ void D3DCommandListManager::PerformGpuRolloverChecks() m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].clear(); + m_deferred_destruction_list_fences[m_current_deferred_destruction_list] = m_queue_frame_fence_value; m_current_deferred_destruction_list = (m_current_deferred_destruction_list + 1) % m_deferred_destruction_lists.size(); // End Deferred Resource Destruction // Begin Command Allocator Resets UINT safe_to_reset_command_allocator_list = (m_current_command_allocator_list - 1) % m_command_allocator_lists.size(); + WaitOnCPUForFence(m_queue_frame_fence, m_command_allocator_list_fences[safe_to_reset_command_allocator_list]); for (UINT i = 0; i < m_command_allocator_lists[safe_to_reset_command_allocator_list].size(); i++) { CheckHR(m_command_allocator_lists[safe_to_reset_command_allocator_list][i]->Reset()); } + m_command_allocator_list_fences[m_current_command_allocator_list] = m_queue_frame_fence_value; m_current_command_allocator_list = (m_current_command_allocator_list + 1) % m_command_allocator_lists.size(); + m_current_command_allocator = 0; // End Command Allocator Resets - - m_queue_frame_fence_value++; -#ifdef USE_D3D12_QUEUED_COMMAND_LISTS - m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value); -#else - CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value)); -#endif } -void D3DCommandListManager::ResetCommandListWithIdleCommandAllocator() +void D3DCommandListManager::MoveToNextCommandAllocator() +{ + // Move to the next allocator in the current allocator list. + m_current_command_allocator = (m_current_command_allocator + 1) % m_command_allocator_lists[m_current_command_allocator_list].size(); + + // Did we wrap around? Move to the next set of allocators. + if (m_current_command_allocator == 0) + PerformGpuRolloverChecks(); +} + +void D3DCommandListManager::ResetCommandList() { #ifdef USE_D3D12_QUEUED_COMMAND_LISTS ID3D12QueuedCommandList* command_list = m_queued_command_list; @@ -268,6 +275,20 @@ void D3DCommandListManager::ImmediatelyDestroyAllResourcesScheduledForDestructio } } +void D3DCommandListManager::ResetAllCommandAllocators() +{ + for (auto& allocator_list : m_command_allocator_lists) + { + for (auto& allocator : allocator_list) + allocator->Reset(); + } + + // Move back to the start, using the first allocator of first list. + m_current_command_allocator = 0; + m_current_command_allocator_list = 0; + m_current_deferred_destruction_list = 0; +} + void D3DCommandListManager::ClearQueueAndWaitForCompletionOfInflightWork() { // Wait for GPU to finish all outstanding work. @@ -305,8 +326,10 @@ D3DCommandListManager::~D3DCommandListManager() void D3DCommandListManager::WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value) { - CheckHR(fence->SetEventOnCompletion(fence_value, m_wait_on_cpu_fence_event)); + if (fence->GetCompletedValue() >= fence_value) + return; + CheckHR(fence->SetEventOnCompletion(fence_value, m_wait_on_cpu_fence_event)); WaitForSingleObject(m_wait_on_cpu_fence_event, INFINITE); } diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h index b9622df5fc..3f932dfc0e 100644 --- a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h @@ -38,11 +38,10 @@ public: void ExecuteQueuedWork(bool wait_for_gpu_completion = false); void ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags); - void WaitForQueuedWorkToBeExecutedOnGPU(); - void ClearQueueAndWaitForCompletionOfInflightWork(); void DestroyResourceAfterCurrentCommandListExecuted(ID3D12Resource* resource); void ImmediatelyDestroyAllResourcesScheduledForDestruction(); + void ResetAllCommandAllocators(); void SetCommandListDirtyState(unsigned int command_list_state, bool dirty); bool GetCommandListDirtyState(COMMAND_LIST_STATE command_list_state) const; @@ -65,8 +64,10 @@ public: private: + void WaitForGPUCompletion(); void PerformGpuRolloverChecks(); - void ResetCommandListWithIdleCommandAllocator(); + void MoveToNextCommandAllocator(); + void ResetCommandList(); unsigned int m_command_list_dirty_state = UINT_MAX; D3D_PRIMITIVE_TOPOLOGY m_command_list_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; @@ -85,6 +86,7 @@ private: UINT m_current_command_allocator; UINT m_current_command_allocator_list; std::array, 2> m_command_allocator_lists; + std::array m_command_allocator_list_fences; ID3D12GraphicsCommandList* m_backing_command_list; ID3D12QueuedCommandList* m_queued_command_list; @@ -93,6 +95,7 @@ private: UINT m_current_deferred_destruction_list; std::array, 2> m_deferred_destruction_lists; + std::array m_deferred_destruction_list_fences; }; } // namespace \ No newline at end of file From 5c1a708977673cde4bc8a0f7be291f864275b0e6 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 17 Feb 2016 13:39:12 +1000 Subject: [PATCH 04/16] D3D12: Clean up debug device creation path --- Source/Core/VideoBackends/D3D12/D3DBase.cpp | 53 ++++++--------------- 1 file changed, 15 insertions(+), 38 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.cpp b/Source/Core/VideoBackends/D3D12/D3DBase.cpp index c95142e133..eebbdcbba8 100644 --- a/Source/Core/VideoBackends/D3D12/D3DBase.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DBase.cpp @@ -423,52 +423,29 @@ HRESULT Create(HWND wnd) swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; swap_chain_desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; -#if defined(_DEBUG) || defined(DEBUGFAST) - // Creating debug devices can sometimes fail if the user doesn't have the correct - // version of the DirectX SDK. If it does, simply fallback to a non-debug device. +#if defined(_DEBUG) || defined(DEBUGFAST) || defined(USE_D3D12_DEBUG_LAYER) + // Enabling the debug layer will fail if the Graphics Tools feature is not installed. + if (SUCCEEDED(hr)) { + ID3D12Debug* debug_controller; + hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); if (SUCCEEDED(hr)) { - ID3D12Debug* debug_controller; - hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); - if (SUCCEEDED(hr)) - { - debug_controller->EnableDebugLayer(); - debug_controller->Release(); - } - else - { - MessageBox(wnd, _T("Failed to initialize Direct3D debug layer, please make sure it is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - } - - hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); - - s_feat_level = D3D_FEATURE_LEVEL_11_0; + debug_controller->EnableDebugLayer(); + debug_controller->Release(); + } + else + { + MessageBox(wnd, _T("WARNING: Failed to enable D3D12 debug layer, please ensure the Graphics Tools feature is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); } } - if (FAILED(hr)) #endif - { - if (SUCCEEDED(hr)) - { -#ifdef USE_D3D12_DEBUG_LAYER - ID3D12Debug* debug_controller; - hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); - if (SUCCEEDED(hr)) - { - debug_controller->EnableDebugLayer(); - debug_controller->Release(); - } - else - { - MessageBox(wnd, _T("Failed to initialize Direct3D debug layer."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - } -#endif - hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); - s_feat_level = D3D_FEATURE_LEVEL_11_0; - } + if (SUCCEEDED(hr)) + { + hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); + s_feat_level = D3D_FEATURE_LEVEL_11_0; } if (SUCCEEDED(hr)) From ffe0e326d2713f0ec0748cacec73f9b298298211 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 17 Feb 2016 14:20:15 +1000 Subject: [PATCH 05/16] D3D12: Use stream buffer for CD3DFont --- Source/Core/VideoBackends/D3D12/D3DUtil.cpp | 46 ++++++--------------- Source/Core/VideoBackends/D3D12/D3DUtil.h | 7 ++-- 2 files changed, 15 insertions(+), 38 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp index 606c4a466f..74aafe20f2 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp @@ -359,24 +359,7 @@ int CD3DFont::Init() const unsigned int text_vb_size = s_max_num_vertices * sizeof(FONT2DVERTEX); - CheckHR( - device12->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), - D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(text_vb_size), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_vb12) - ) - ); - - SetDebugObjectName12(m_vb12, "vertex buffer of a CD3DFont object"); - - m_vb12_view.BufferLocation = m_vb12->GetGPUVirtualAddress(); - m_vb12_view.SizeInBytes = text_vb_size; - m_vb12_view.StrideInBytes = sizeof(FONT2DVERTEX); - - CheckHR(m_vb12->Map(0, nullptr, &m_vb12_data)); + m_vertex_buffer = std::make_unique(text_vb_size * 2, text_vb_size * 16, nullptr); D3D12_GRAPHICS_PIPELINE_STATE_DESC text_pso_desc = { default_root_signature, // ID3D12RootSignature *pRootSignature; @@ -409,7 +392,7 @@ int CD3DFont::Init() int CD3DFont::Shutdown() { - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_vb12); + m_vertex_buffer.reset(); D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_texture12); return S_OK; @@ -417,7 +400,7 @@ int CD3DFont::Shutdown() int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dwColor, const std::string& text) { - if (!m_vb12) + if (!m_vertex_buffer) return 0; float scale_x = 1 / static_cast(D3D::GetBackBufferWidth()) * 2.f; @@ -428,10 +411,6 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw float sx = x * scale_x - 1.f; float sy = 1.f - y * scale_y; - // Fill vertex buffer - FONT2DVERTEX* vertices12 = static_cast(m_vb12_data) + m_vb12_offset / sizeof(FONT2DVERTEX); - int num_triangles = 0L; - // set general pipeline state D3D::current_command_list->SetPipelineState(m_pso); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); @@ -441,13 +420,11 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, m_texture12_gpu); - // If we are close to running off edge of vertex buffer, jump back to beginning. - if (m_vb12_offset + text.length() * 6 * sizeof(FONT2DVERTEX) >= s_max_num_vertices * sizeof(FONT2DVERTEX)) - { - m_vb12_offset = 0; - vertices12 = static_cast(m_vb12_data); - } + // upper bound is nchars * 6, assuming no spaces + m_vertex_buffer->AllocateSpaceInBuffer(static_cast(text.length()) * 6 * sizeof(FONT2DVERTEX), sizeof(FONT2DVERTEX)); + FONT2DVERTEX* vertices12 = reinterpret_cast(m_vertex_buffer->GetCPUAddressOfCurrentAllocation()); + int num_triangles = 0; float start_x = sx; for (char c : text) { @@ -487,13 +464,14 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw // Render the vertex buffer if (num_triangles > 0) { - D3D::current_command_list->IASetVertexBuffers(0, 1, &m_vb12_view); + u32 written_size = num_triangles * 3 * sizeof(FONT2DVERTEX); + m_vertex_buffer->OverrideSizeOfPreviousAllocation(written_size); - D3D::current_command_list->DrawInstanced(3 * num_triangles, 1, m_vb12_offset / sizeof(FONT2DVERTEX), 0); + D3D12_VERTEX_BUFFER_VIEW vb_view = { m_vertex_buffer->GetGPUAddressOfCurrentAllocation(), written_size, sizeof(FONT2DVERTEX) }; + D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view); + D3D::current_command_list->DrawInstanced(3 * num_triangles, 1, 0, 0); } - m_vb12_offset += 3 * num_triangles * sizeof(FONT2DVERTEX); - return S_OK; } diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.h b/Source/Core/VideoBackends/D3D12/D3DUtil.h index c40784425c..1e72979b33 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.h +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.h @@ -5,10 +5,12 @@ #pragma once #include +#include #include #include "Common/MathUtil.h" #include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DStreamBuffer.h" #include "VideoCommon/RenderBase.h" @@ -48,10 +50,7 @@ private: D3D12_CPU_DESCRIPTOR_HANDLE m_texture12_cpu = {}; D3D12_GPU_DESCRIPTOR_HANDLE m_texture12_gpu = {}; - ID3D12Resource* m_vb12 = nullptr; - D3D12_VERTEX_BUFFER_VIEW m_vb12_view = {}; - void* m_vb12_data = nullptr; - unsigned int m_vb12_offset = 0; + std::unique_ptr m_vertex_buffer; D3D12_INPUT_LAYOUT_DESC m_input_layout12 = {}; D3D12_SHADER_BYTECODE m_pshader12 = {}; From 759b77474da123d36946a742da929561eac63baf Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 17 Feb 2016 14:25:42 +1000 Subject: [PATCH 06/16] D3D12: Use std::thread for worker thread Using CreateThread can create issues if any CRT calls are made, as thread-specific data may not be initialized. Additionally, TerminateThread is not a good idea for similar reasons, and may not free CRT resources. --- .../VideoBackends/D3D12/D3DQueuedCommandList.cpp | 12 +++++++----- .../Core/VideoBackends/D3D12/D3DQueuedCommandList.h | 7 ++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp index b3f672a8ed..16be35d021 100644 --- a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp @@ -14,9 +14,8 @@ constexpr size_t BufferOffsetForQueueItemType() return sizeof(T) + sizeof(D3DQueueItemType) * 2; } -DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param) +void ID3D12QueuedCommandList::BackgroundThreadFunction(ID3D12QueuedCommandList* parent_queued_command_list) { - ID3D12QueuedCommandList* parent_queued_command_list = static_cast(param); ID3D12GraphicsCommandList* command_list = parent_queued_command_list->m_command_list; byte* queue_array = parent_queued_command_list->m_queue_array; @@ -26,6 +25,8 @@ DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param) while (true) { WaitForSingleObject(parent_queued_command_list->m_begin_execution_event, INFINITE); + if (parent_queued_command_list->m_background_thread_exit.load()) + break; byte* item = &queue_array[queue_array_front]; @@ -374,13 +375,14 @@ ID3D12QueuedCommandList::ID3D12QueuedCommandList(ID3D12GraphicsCommandList* back m_begin_execution_event = CreateSemaphore(nullptr, 0, 256, nullptr); m_stop_execution_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); - m_background_thread = CreateThread(nullptr, 0, BackgroundThreadFunction, this, 0, &m_background_thread_id); + m_background_thread = std::thread(BackgroundThreadFunction, this); } ID3D12QueuedCommandList::~ID3D12QueuedCommandList() { - TerminateThread(m_background_thread, 0); - CloseHandle(m_background_thread); + m_background_thread_exit.store(true); + ReleaseSemaphore(m_begin_execution_event, 1, nullptr); + m_background_thread.join(); CloseHandle(m_begin_execution_event); CloseHandle(m_stop_execution_event); diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h index 068b66e723..6b1ca4306c 100644 --- a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h @@ -6,6 +6,7 @@ #include #include +#include namespace DX12 { @@ -612,15 +613,15 @@ private: void ResetQueueOverflowTracking(); void CheckForOverflow(); - static DWORD WINAPI BackgroundThreadFunction(LPVOID param); + static void BackgroundThreadFunction(ID3D12QueuedCommandList* parent_queued_command_list); byte m_queue_array[QUEUE_ARRAY_SIZE]; byte* m_queue_array_back = m_queue_array; byte* m_queue_array_back_at_start_of_frame = m_queue_array_back; - DWORD m_background_thread_id; - HANDLE m_background_thread; + std::thread m_background_thread; + std::atomic_bool m_background_thread_exit; HANDLE m_begin_execution_event; HANDLE m_stop_execution_event; From 649b94338efc6190490d542c0b63e298216f0a55 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 17 Feb 2016 14:57:57 +1000 Subject: [PATCH 07/16] D3D12: Cleanup/refactoring of teardown process --- Source/Core/VideoBackends/D3D12/D3DBase.cpp | 10 +-- .../D3D12/D3DCommandListManager.cpp | 87 ++++++++----------- .../D3D12/D3DCommandListManager.h | 9 +- .../D3D12/D3DQueuedCommandList.cpp | 23 ++--- .../D3D12/D3DQueuedCommandList.h | 5 +- 5 files changed, 57 insertions(+), 77 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.cpp b/Source/Core/VideoBackends/D3D12/D3DBase.cpp index eebbdcbba8..01d5d9a989 100644 --- a/Source/Core/VideoBackends/D3D12/D3DBase.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DBase.cpp @@ -715,7 +715,7 @@ void CreateRootSignatures() void WaitForOutstandingRenderingToComplete() { - command_list_mgr->ClearQueueAndWaitForCompletionOfInflightWork(); + command_list_mgr->ExecuteQueuedWork(true); } void Close() @@ -731,8 +731,6 @@ void Close() D3D::CleanupPersistentD3DTextureResources(); - command_list_mgr->ImmediatelyDestroyAllResourcesScheduledForDestruction(); - SAFE_RELEASE(s_swap_chain); command_list_mgr.reset(); @@ -816,15 +814,15 @@ unsigned int GetMaxTextureSize() void Reset() { - command_list_mgr->ExecuteQueuedWork(true); - // release all back buffer references for (UINT i = 0; i < ARRAYSIZE(s_backbuf); i++) { SAFE_RELEASE(s_backbuf[i]); } - D3D::command_list_mgr->ImmediatelyDestroyAllResourcesScheduledForDestruction(); + // Block until all commands have finished. + // This will also final-release all pending resources (including the backbuffer above) + command_list_mgr->ExecuteQueuedWork(true); // resize swapchain buffers RECT client; diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp index 4fe6875f13..07c9b65eab 100644 --- a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp @@ -113,12 +113,10 @@ void D3DCommandListManager::ExecuteQueuedWork(bool wait_for_gpu_completion) m_queue_fence_value++; #ifdef USE_D3D12_QUEUED_COMMAND_LISTS - CheckHR(m_queued_command_list->Close()); + m_queued_command_list->Close(); m_queued_command_list->QueueExecute(); - m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); - - m_queued_command_list->ProcessQueuedItems(wait_for_gpu_completion); + m_queued_command_list->ProcessQueuedItems(wait_for_gpu_completion, wait_for_gpu_completion); #else CheckHR(m_backing_command_list->Close()); @@ -145,7 +143,7 @@ void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_cha m_queue_fence_value++; #ifdef USE_D3D12_QUEUED_COMMAND_LISTS - CheckHR(m_queued_command_list->Close()); + m_queued_command_list->Close(); m_queued_command_list->QueueExecute(); m_queued_command_list->QueuePresent(swap_chain, sync_interval, flags); m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); @@ -170,6 +168,31 @@ void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_cha SetInitialCommandListState(); } +void D3DCommandListManager::DestroyAllPendingResources() +{ + for (auto& destruction_list : m_deferred_destruction_lists) + { + for (auto& resource : destruction_list) + resource->Release(); + + destruction_list.clear(); + } +} + +void D3DCommandListManager::ResetAllCommandAllocators() +{ + for (auto& allocator_list : m_command_allocator_lists) + { + for (auto& allocator : allocator_list) + allocator->Reset(); + } + + // Move back to the start, using the first allocator of first list. + m_current_command_allocator = 0; + m_current_command_allocator_list = 0; + m_current_deferred_destruction_list = 0; +} + void D3DCommandListManager::WaitForGPUCompletion() { // Wait for GPU to finish all outstanding work. @@ -186,13 +209,13 @@ void D3DCommandListManager::WaitForGPUCompletion() WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value); // GPU is up to date with us. Therefore, it has finished with any pending resources. - ImmediatelyDestroyAllResourcesScheduledForDestruction(); + DestroyAllPendingResources(); // Command allocators are also up-to-date, so reset these. ResetAllCommandAllocators(); } -void D3DCommandListManager::PerformGpuRolloverChecks() +void D3DCommandListManager::PerformGPURolloverChecks() { m_queue_frame_fence_value++; @@ -243,7 +266,7 @@ void D3DCommandListManager::MoveToNextCommandAllocator() // Did we wrap around? Move to the next set of allocators. if (m_current_command_allocator == 0) - PerformGpuRolloverChecks(); + PerformGPURolloverChecks(); } void D3DCommandListManager::ResetCommandList() @@ -264,52 +287,18 @@ void D3DCommandListManager::DestroyResourceAfterCurrentCommandListExecuted(ID3D1 m_deferred_destruction_lists[m_current_deferred_destruction_list].push_back(resource); } -void D3DCommandListManager::ImmediatelyDestroyAllResourcesScheduledForDestruction() -{ - for (auto& destruction_list : m_deferred_destruction_lists) - { - for (auto& resource : destruction_list) - resource->Release(); - - destruction_list.clear(); - } -} - -void D3DCommandListManager::ResetAllCommandAllocators() -{ - for (auto& allocator_list : m_command_allocator_lists) - { - for (auto& allocator : allocator_list) - allocator->Reset(); - } - - // Move back to the start, using the first allocator of first list. - m_current_command_allocator = 0; - m_current_command_allocator_list = 0; - m_current_deferred_destruction_list = 0; -} - -void D3DCommandListManager::ClearQueueAndWaitForCompletionOfInflightWork() -{ - // Wait for GPU to finish all outstanding work. - m_queue_fence_value++; -#ifdef USE_D3D12_QUEUED_COMMAND_LISTS - m_queued_command_list->ClearQueue(); // Waits for currently-processing work to finish, then clears queue. - m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); - m_queued_command_list->ProcessQueuedItems(true); -#else - CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); -#endif - WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); -} - D3DCommandListManager::~D3DCommandListManager() { - ImmediatelyDestroyAllResourcesScheduledForDestruction(); - #ifdef USE_D3D12_QUEUED_COMMAND_LISTS + // Wait for background thread to exit. m_queued_command_list->Release(); #endif + + // The command list will still be open, close it before destroying. + m_backing_command_list->Close(); + + DestroyAllPendingResources(); + m_backing_command_list->Release(); for (auto& allocator_list : m_command_allocator_lists) diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h index 3f932dfc0e..f404a8a0ac 100644 --- a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h @@ -38,10 +38,7 @@ public: void ExecuteQueuedWork(bool wait_for_gpu_completion = false); void ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags); - void ClearQueueAndWaitForCompletionOfInflightWork(); void DestroyResourceAfterCurrentCommandListExecuted(ID3D12Resource* resource); - void ImmediatelyDestroyAllResourcesScheduledForDestruction(); - void ResetAllCommandAllocators(); void SetCommandListDirtyState(unsigned int command_list_state, bool dirty); bool GetCommandListDirtyState(COMMAND_LIST_STATE command_list_state) const; @@ -63,9 +60,11 @@ public: void WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value); private: - + void DestroyAllPendingResources(); + void ResetAllCommandAllocators(); void WaitForGPUCompletion(); - void PerformGpuRolloverChecks(); + + void PerformGPURolloverChecks(); void MoveToNextCommandAllocator(); void ResetCommandList(); diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp index 16be35d021..4bfb58735a 100644 --- a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp @@ -25,8 +25,6 @@ void ID3D12QueuedCommandList::BackgroundThreadFunction(ID3D12QueuedCommandList* while (true) { WaitForSingleObject(parent_queued_command_list->m_begin_execution_event, INFINITE); - if (parent_queued_command_list->m_background_thread_exit.load()) - break; byte* item = &queue_array[queue_array_front]; @@ -341,6 +339,7 @@ void ID3D12QueuedCommandList::BackgroundThreadFunction(ID3D12QueuedCommandList* bool eligible_to_move_to_front_of_queue = reinterpret_cast(item)->Stop.eligible_to_move_to_front_of_queue; bool signal_stop_event = reinterpret_cast(item)->Stop.signal_stop_event; + bool terminate_worker_thread = reinterpret_cast(item)->Stop.terminate_worker_thread; item += BufferOffsetForQueueItemType(); @@ -354,6 +353,9 @@ void ID3D12QueuedCommandList::BackgroundThreadFunction(ID3D12QueuedCommandList* SetEvent(parent_queued_command_list->m_stop_execution_event); } + if (terminate_worker_thread) + return; + goto exitLoop; } } @@ -380,8 +382,8 @@ ID3D12QueuedCommandList::ID3D12QueuedCommandList(ID3D12GraphicsCommandList* back ID3D12QueuedCommandList::~ID3D12QueuedCommandList() { - m_background_thread_exit.store(true); - ReleaseSemaphore(m_begin_execution_event, 1, nullptr); + // Kick worker thread, and tell it to exit. + ProcessQueuedItems(true, true, true); m_background_thread.join(); CloseHandle(m_begin_execution_event); @@ -463,22 +465,14 @@ void ID3D12QueuedCommandList::QueuePresent(IDXGISwapChain* swap_chain, UINT sync CheckForOverflow(); } -void ID3D12QueuedCommandList::ClearQueue() -{ - // Drain semaphore to ensure no new previously queued work executes (though inflight work may continue). - while (WaitForSingleObject(m_begin_execution_event, 0) != WAIT_TIMEOUT) { } - - // Assume that any inflight queued work will complete within 100ms. This is a safe assumption. - Sleep(100); -} - -void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_of_queue, bool wait_for_stop) +void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_of_queue, bool wait_for_stop, bool terminate_worker_thread) { D3DQueueItem item = {}; item.Type = D3DQueueItemType::Stop; item.Stop.eligible_to_move_to_front_of_queue = eligible_to_move_to_front_of_queue; item.Stop.signal_stop_event = wait_for_stop; + item.Stop.terminate_worker_thread = terminate_worker_thread; *reinterpret_cast(m_queue_array_back) = item; @@ -502,6 +496,7 @@ void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_ if (wait_for_stop) { WaitForSingleObject(m_stop_execution_event, INFINITE); + ResetEvent(m_stop_execution_event); } } diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h index 6b1ca4306c..1ca0334dba 100644 --- a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h @@ -211,6 +211,7 @@ struct StopArguments { bool eligible_to_move_to_front_of_queue; bool signal_stop_event; + bool terminate_worker_thread; }; struct D3DQueueItem @@ -255,13 +256,12 @@ public: ID3D12QueuedCommandList(ID3D12GraphicsCommandList* backing_command_list, ID3D12CommandQueue* backing_command_queue); - void ProcessQueuedItems(bool eligible_to_move_to_front_of_queue = false, bool wait_for_stop = false); + void ProcessQueuedItems(bool eligible_to_move_to_front_of_queue = false, bool wait_for_stop = false, bool terminate_worker_thread = false); void QueueExecute(); void QueueFenceGpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value); void QueueFenceCpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value); void QueuePresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags); - void ClearQueue(); // IUnknown methods @@ -621,7 +621,6 @@ private: byte* m_queue_array_back_at_start_of_frame = m_queue_array_back; std::thread m_background_thread; - std::atomic_bool m_background_thread_exit; HANDLE m_begin_execution_event; HANDLE m_stop_execution_event; From 6bbf836ea997b50b3a26eea716b64aa2f6ce87d0 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 21 Feb 2016 16:55:43 +1000 Subject: [PATCH 08/16] D3D12: Simplify and fix MSAA EFB depth copy path --- Source/Core/VideoBackends/D3D12/D3DUtil.cpp | 7 +--- Source/Core/VideoBackends/D3D12/D3DUtil.h | 3 +- .../D3D12/FramebufferManager.cpp | 19 ++------- .../VideoBackends/D3D12/FramebufferManager.h | 2 - .../VideoBackends/D3D12/StaticShaderCache.cpp | 40 ++++++------------- .../VideoBackends/D3D12/StaticShaderCache.h | 2 +- 6 files changed, 21 insertions(+), 52 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp index 74aafe20f2..abf1088ac4 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp @@ -599,8 +599,7 @@ void DrawShadedTexQuad(D3DTexture2D* texture, u32 slice, DXGI_FORMAT rt_format, bool inherit_srv_binding, - bool rt_multisampled, - D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc_override + bool rt_multisampled ) { float sw = 1.0f / static_cast(source_width); @@ -663,9 +662,7 @@ void DrawShadedTexQuad(D3DTexture2D* texture, Renderer::GetResetBlendDesc(), // D3D12_BLEND_DESC BlendState; UINT_MAX, // UINT SampleMask; Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState - depth_stencil_desc_override ? - *depth_stencil_desc_override : - Renderer::GetResetDepthStencilDesc(), // D3D12_DEPTH_STENCIL_DESC DepthStencilState + Renderer::GetResetDepthStencilDesc(), // D3D12_DEPTH_STENCIL_DESC DepthStencilState layout12, // D3D12_INPUT_LAYOUT_DESC InputLayout D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES IndexBufferProperties D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.h b/Source/Core/VideoBackends/D3D12/D3DUtil.h index 1e72979b33..b000c4ac99 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.h +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.h @@ -86,8 +86,7 @@ void DrawShadedTexQuad(D3DTexture2D* texture, u32 slice = 0, DXGI_FORMAT rt_format = DXGI_FORMAT_R8G8B8A8_UNORM, bool inherit_srv_binding = false, - bool rt_multisampled = false, - D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc_override = nullptr + bool rt_multisampled = false ); void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled); diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp index f0118a20bf..936266f609 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp @@ -21,8 +21,6 @@ FramebufferManager::Efb FramebufferManager::m_efb; unsigned int FramebufferManager::m_target_width; unsigned int FramebufferManager::m_target_height; -D3D12_DEPTH_STENCIL_DESC FramebufferManager::m_depth_resolve_depth_stencil_desc; - D3DTexture2D*& FramebufferManager::GetEFBColorTexture() { return m_efb.color_tex; } ID3D12Resource*& FramebufferManager::GetEFBColorStagingBuffer() { return m_efb.color_staging_buf; } @@ -148,18 +146,12 @@ FramebufferManager::FramebufferManager() SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.resolved_color_tex->GetTex12(), "EFB color resolve texture shader resource view"); - texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, m_target_width, m_target_height, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12)); CHECK(hr == S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); - m_efb.resolved_depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); + m_efb.resolved_depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.resolved_depth_tex->GetTex12(), "EFB depth resolve texture shader resource view"); - - m_depth_resolve_depth_stencil_desc = {}; - m_depth_resolve_depth_stencil_desc.StencilEnable = FALSE; - m_depth_resolve_depth_stencil_desc.DepthEnable = TRUE; - m_depth_resolve_depth_stencil_desc.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; - m_depth_resolve_depth_stencil_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; } else { @@ -215,21 +207,18 @@ void FramebufferManager::ResolveDepthTexture() const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast(m_target_width), static_cast(m_target_height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; D3D::current_command_list->RSSetViewports(1, &vp12); - m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(0, nullptr, FALSE, &m_efb.resolved_depth_tex->GetDSV12()); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - D3D::SetLinearCopySampler(); - - // Render a quad covering the entire target, writing SV_Depth. const D3D12_RECT source_rect = CD3DX12_RECT(0, 0, m_target_width, m_target_height); D3D::DrawShadedTexQuad( FramebufferManager::GetEFBDepthTexture(), &source_rect, m_target_width, m_target_height, - StaticShaderCache::GetDepthCopyPixelShader(true), + StaticShaderCache::GetDepthResolveToColorPixelShader(), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), StaticShaderCache::GetCopyGeometryShader(), diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.h b/Source/Core/VideoBackends/D3D12/FramebufferManager.h index 8f0376a9bc..78242197b2 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.h +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.h @@ -100,8 +100,6 @@ private: static unsigned int m_target_width; static unsigned int m_target_height; - - static D3D12_DEPTH_STENCIL_DESC m_depth_resolve_depth_stencil_desc; }; } // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp index adbe83346b..e2bd65aac9 100644 --- a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp @@ -15,7 +15,7 @@ namespace DX12 static ID3DBlob* s_color_matrix_program_blob[2] = {}; static ID3DBlob* s_color_copy_program_blob[2] = {}; static ID3DBlob* s_depth_matrix_program_blob[2] = {}; -static ID3DBlob* s_depth_copy_program_blob[2] = {}; +static ID3DBlob* s_depth_resolve_to_color_program_blob = {}; static ID3DBlob* s_clear_program_blob = {}; static ID3DBlob* s_anaglyph_program_blob = {}; static ID3DBlob* s_rgba6_to_rgb8_program_blob[2] = {}; @@ -73,17 +73,6 @@ static constexpr const char s_color_copy_program_hlsl[] = { "}\n" }; -static constexpr const char s_depth_copy_program_hlsl[] = { - "sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - "out float odepth : SV_Depth,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "odepth = Tex0.Sample(samp0,uv0);\n" - "}\n" -}; - // Anaglyph Red-Cyan shader based on Dubois algorithm // Constants taken from the paper: // "Conversion of a Stereo Pair to Anaglyph with @@ -126,19 +115,19 @@ static constexpr const char s_color_copy_program_msaa_hlsl[] = { "}\n" }; -static constexpr const char s_depth_copy_program_msaa_hlsl[] = { +static constexpr const char s_depth_resolve_to_color_program_hlsl[] = { "#define SAMPLES %d\n" "Texture2DMSArray Tex0 : register(t0);\n" "void main(\n" - " out float depth : SV_Depth,\n" + " out float ocol0 : SV_Target,\n" " in float4 pos : SV_Position,\n" " in float3 uv0 : TEXCOORD0)\n" "{\n" " int width, height, slices, samples;\n" " Tex0.GetDimensions(width, height, slices, samples);\n" - " depth = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n" + " ocol0 = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n" " for(int i = 1; i < SAMPLES; ++i)\n" - " depth = min(depth, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n" + " ocol0 = min(ocol0, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n" "}\n" }; @@ -497,25 +486,21 @@ D3D12_SHADER_BYTECODE StaticShaderCache::GetColorCopyPixelShader(bool multisampl return bytecode; } -D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthCopyPixelShader(bool multisampled) +D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthResolveToColorPixelShader() { D3D12_SHADER_BYTECODE bytecode = {}; - if (!multisampled || g_ActiveConfig.iMultisamples == 1) + if (s_depth_resolve_to_color_program_blob) { - bytecode = { s_depth_copy_program_blob[0]->GetBufferPointer(), s_depth_copy_program_blob[0]->GetBufferSize() }; - } - else if (s_depth_copy_program_blob[1]) - { - bytecode = { s_depth_copy_program_blob[1]->GetBufferPointer(), s_depth_copy_program_blob[1]->GetBufferSize() }; + bytecode = { s_depth_resolve_to_color_program_blob->GetBufferPointer(), s_depth_resolve_to_color_program_blob->GetBufferSize() }; } else { // create MSAA shader for current AA mode - std::string buf = StringFromFormat(s_depth_copy_program_msaa_hlsl, g_ActiveConfig.iMultisamples); + std::string buf = StringFromFormat(s_depth_resolve_to_color_program_hlsl, g_ActiveConfig.iMultisamples); - D3D::CompilePixelShader(buf, &s_depth_copy_program_blob[1]); - bytecode = { s_depth_copy_program_blob[1]->GetBufferPointer(), s_depth_copy_program_blob[1]->GetBufferSize() }; + D3D::CompilePixelShader(buf, &s_depth_resolve_to_color_program_blob); + bytecode = { s_depth_resolve_to_color_program_blob->GetBufferPointer(), s_depth_resolve_to_color_program_blob->GetBufferSize() }; } return bytecode; @@ -646,7 +631,6 @@ void StaticShaderCache::Init() D3D::CompilePixelShader(s_clear_program_hlsl, &s_clear_program_blob); D3D::CompilePixelShader(s_anaglyph_program_hlsl, &s_anaglyph_program_blob); D3D::CompilePixelShader(s_color_copy_program_hlsl, &s_color_copy_program_blob[0]); - D3D::CompilePixelShader(s_depth_copy_program_hlsl, &s_depth_copy_program_blob[0]); D3D::CompilePixelShader(s_color_matrix_program_hlsl, &s_color_matrix_program_blob[0]); D3D::CompilePixelShader(s_depth_matrix_program_hlsl, &s_depth_matrix_program_blob[0]); @@ -667,6 +651,7 @@ void StaticShaderCache::InvalidateMSAAShaders() SAFE_RELEASE(s_depth_matrix_program_blob[1]); SAFE_RELEASE(s_rgb8_to_rgba6_program_blob[1]); SAFE_RELEASE(s_rgba6_to_rgb8_program_blob[1]); + SAFE_RELEASE(s_depth_resolve_to_color_program_blob); } void StaticShaderCache::Shutdown() @@ -675,6 +660,7 @@ void StaticShaderCache::Shutdown() SAFE_RELEASE(s_clear_program_blob); SAFE_RELEASE(s_anaglyph_program_blob); + SAFE_RELEASE(s_depth_resolve_to_color_program_blob); for (unsigned int i = 0; i < 2; ++i) { diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h index bbdb37cb9e..4b9f6959a8 100644 --- a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h @@ -18,7 +18,7 @@ public: static D3D12_SHADER_BYTECODE GetColorMatrixPixelShader(bool multisampled); static D3D12_SHADER_BYTECODE GetColorCopyPixelShader(bool multisampled); static D3D12_SHADER_BYTECODE GetDepthMatrixPixelShader(bool multisampled); - static D3D12_SHADER_BYTECODE GetDepthCopyPixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetDepthResolveToColorPixelShader(); static D3D12_SHADER_BYTECODE GetClearPixelShader(); static D3D12_SHADER_BYTECODE GetAnaglyphPixelShader(); static D3D12_SHADER_BYTECODE GetReinterpRGBA6ToRGB8PixelShader(bool multisampled); From 1d909ec7a422b261588e860486b01db85e94fa6e Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 21 Feb 2016 17:18:02 +1000 Subject: [PATCH 09/16] D3D12: Implement non-blocking EFB access when EFB has not been modified --- Source/Core/VideoBackends/D3D12/D3DUtil.cpp | 6 + .../D3D12/FramebufferManager.cpp | 253 +++++++++++++++--- .../VideoBackends/D3D12/FramebufferManager.h | 29 +- Source/Core/VideoBackends/D3D12/Render.cpp | 218 +++------------ 4 files changed, 279 insertions(+), 227 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp index abf1088ac4..a1a7c512da 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp @@ -16,6 +16,7 @@ #include "VideoBackends/D3D12/D3DTexture.h" #include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/FramebufferManager.h" #include "VideoBackends/D3D12/Render.h" #include "VideoBackends/D3D12/StaticShaderCache.h" @@ -964,6 +965,11 @@ void DrawEFBPokeQuads(EFBAccessType type, InitColVertex(&vertex[3], x1, y2, z, col); InitColVertex(&vertex[4], x2, y1, z, col); InitColVertex(&vertex[5], x2, y2, z, col); + + if (type == POKE_COLOR) + FramebufferManager::UpdateEFBColorAccessCopy(point->x, point->y, col); + else if (type == POKE_Z) + FramebufferManager::UpdateEFBDepthAccessCopy(point->x, point->y, z); } // Issue the draw diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp index 936266f609..9b7bebf608 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp @@ -22,11 +22,7 @@ unsigned int FramebufferManager::m_target_width; unsigned int FramebufferManager::m_target_height; D3DTexture2D*& FramebufferManager::GetEFBColorTexture() { return m_efb.color_tex; } -ID3D12Resource*& FramebufferManager::GetEFBColorStagingBuffer() { return m_efb.color_staging_buf; } - D3DTexture2D*& FramebufferManager::GetEFBDepthTexture() { return m_efb.depth_tex; } -D3DTexture2D*& FramebufferManager::GetEFBDepthReadTexture() { return m_efb.depth_read_texture; } -ID3D12Resource*& FramebufferManager::GetEFBDepthStagingBuffer() { return m_efb.depth_staging_buf; } D3DTexture2D*& FramebufferManager::GetEFBColorTempTexture() { return m_efb.color_temp_tex; } @@ -105,11 +101,6 @@ FramebufferManager::FramebufferManager() SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.color_temp_tex->GetTex12(), "EFB color temp texture"); - // AccessEFB - Sysmem buffer used to retrieve the pixel data from color_tex - texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(64 * 1024); - CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.color_staging_buf))); - CHECK(hr == S_OK, "create EFB color staging buffer (hr=%#x)", hr); - // EFB depth buffer - primary depth buffer texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueDSV, IID_PPV_ARGS(&buf12))); @@ -118,24 +109,6 @@ FramebufferManager::FramebufferManager() SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.depth_tex->GetTex12(), "EFB depth texture"); - // Render buffer for AccessEFB (depth data) - texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, 1, 1, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); - optimized_clear_valueRTV.Format = DXGI_FORMAT_R32_FLOAT; - hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12)); - CHECK(hr == S_OK, "create EFB depth read texture (hr=%#x)", hr); - - m_efb.depth_read_texture = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); - - SAFE_RELEASE(buf12); - D3D::SetDebugObjectName12(m_efb.depth_read_texture->GetTex12(), "EFB depth read texture (used in Renderer::AccessEFB)"); - - // AccessEFB - Sysmem buffer used to retrieve the pixel data from depth_read_texture - texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(64 * 1024); - hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.depth_staging_buf)); - CHECK(hr == S_OK, "create EFB depth staging buffer (hr=%#x)", hr); - - D3D::SetDebugObjectName12(m_efb.depth_staging_buf, "EFB depth staging texture (used for Renderer::AccessEFB)"); - if (g_ActiveConfig.iMultisamples > 1) { // Framebuffer resolve textures (color+depth) @@ -159,6 +132,8 @@ FramebufferManager::FramebufferManager() m_efb.resolved_depth_tex = nullptr; } + InitializeEFBAccessCopies(); + s_xfbEncoder.Init(); } @@ -166,17 +141,12 @@ FramebufferManager::~FramebufferManager() { s_xfbEncoder.Shutdown(); + DestroyEFBAccessCopies(); + SAFE_RELEASE(m_efb.color_tex); - SAFE_RELEASE(m_efb.color_temp_tex); - - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.color_staging_buf); - - SAFE_RELEASE(m_efb.resolved_color_tex); SAFE_RELEASE(m_efb.depth_tex); - - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.depth_staging_buf); - - SAFE_RELEASE(m_efb.depth_read_texture); + SAFE_RELEASE(m_efb.color_temp_tex); + SAFE_RELEASE(m_efb.resolved_color_tex); SAFE_RELEASE(m_efb.resolved_depth_tex); } @@ -235,6 +205,217 @@ void FramebufferManager::ResolveDepthTexture() g_renderer->RestoreAPIState(); } +u32 FramebufferManager::ReadEFBColorAccessCopy(u32 x, u32 y) +{ + if (!m_efb.color_access_readback_map) + MapEFBColorAccessCopy(); + + u32 color; + size_t buffer_offset = y * m_efb.color_access_readback_pitch + x * sizeof(u32); + memcpy(&color, &m_efb.color_access_readback_map[buffer_offset], sizeof(color)); + return color; +} + +float FramebufferManager::ReadEFBDepthAccessCopy(u32 x, u32 y) +{ + if (!m_efb.depth_access_readback_map) + MapEFBDepthAccessCopy(); + + float depth; + size_t buffer_offset = y * m_efb.depth_access_readback_pitch + x * sizeof(float); + memcpy(&depth, &m_efb.depth_access_readback_map[buffer_offset], sizeof(depth)); + return depth; +} + +void FramebufferManager::UpdateEFBColorAccessCopy(u32 x, u32 y, u32 color) +{ + if (!m_efb.color_access_readback_map) + return; + + size_t buffer_offset = y * m_efb.color_access_readback_pitch + x * sizeof(u32); + memcpy(&m_efb.color_access_readback_map[buffer_offset], &color, sizeof(color)); +} + +void FramebufferManager::UpdateEFBDepthAccessCopy(u32 x, u32 y, float depth) +{ + if (!m_efb.depth_access_readback_map) + return; + + size_t buffer_offset = y * m_efb.depth_access_readback_pitch + x * sizeof(float); + memcpy(&m_efb.depth_access_readback_map[buffer_offset], &depth, sizeof(depth)); +} + +void FramebufferManager::InitializeEFBAccessCopies() +{ + D3D12_CLEAR_VALUE optimized_color_clear_value = { DXGI_FORMAT_R8G8B8A8_UNORM, { 0.0f, 0.0f, 0.0f, 1.0f } }; + D3D12_CLEAR_VALUE optimized_depth_clear_value = { DXGI_FORMAT_R32_FLOAT, { 1.0f } }; + CD3DX12_RESOURCE_DESC texdesc12; + ID3D12Resource* buf12; + HRESULT hr; + + // EFB access - color resize buffer + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_TEXTURE_LAYOUT_UNKNOWN, 0); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_color_clear_value, IID_PPV_ARGS(&buf12)); + CHECK(hr == S_OK, "create EFB access color resize buffer (hr=%#x)", hr); + m_efb.color_access_resize_tex = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_R8G8B8A8_UNORM); + D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(), "EFB access color resize buffer"); + buf12->Release(); + + // EFB access - color staging/readback buffer + m_efb.color_access_readback_pitch = D3D::AlignValue(EFB_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(m_efb.color_access_readback_pitch * EFB_HEIGHT); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.color_access_readback_buffer)); + D3D::SetDebugObjectName12(m_efb.color_access_readback_buffer, "EFB access color readback buffer"); + + // EFB access - depth resize buffer + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_TEXTURE_LAYOUT_UNKNOWN, 0); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_depth_clear_value, IID_PPV_ARGS(&buf12)); + CHECK(hr == S_OK, "create EFB access depth resize buffer (hr=%#x)", hr); + m_efb.depth_access_resize_tex = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_R32_FLOAT); + D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(), "EFB access depth resize buffer"); + buf12->Release(); + + // EFB access - depth staging/readback buffer + m_efb.depth_access_readback_pitch = D3D::AlignValue(EFB_WIDTH * sizeof(float), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(m_efb.depth_access_readback_pitch * EFB_HEIGHT); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.depth_access_readback_buffer)); + D3D::SetDebugObjectName12(m_efb.color_access_readback_buffer, "EFB access depth readback buffer"); +} + +void FramebufferManager::MapEFBColorAccessCopy() +{ + D3D::command_list_mgr->CPUAccessNotify(); + + ID3D12Resource* src_resource; + if (m_target_width != EFB_WIDTH || m_target_height != EFB_HEIGHT || g_ActiveConfig.iMultisamples > 1) + { + // for non-1xIR or multisampled cases, we need to copy to an intermediate texture first + m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + + D3D12_VIEWPORT vp12 = { 0, 0, EFB_WIDTH, EFB_HEIGHT, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp12); + D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr); + D3D::SetPointCopySampler(); + + CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height); + D3D::DrawShadedTexQuad(m_efb.color_tex, &src_rect, m_target_width, m_target_height, + StaticShaderCache::GetColorCopyPixelShader(true), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + {}, 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); + + m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + src_resource = m_efb.color_access_resize_tex->GetTex12(); + } + else + { + // Can source the EFB buffer + m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + src_resource = m_efb.color_tex->GetTex12(); + } + + // Copy to staging resource + D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = { 0, { DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, m_efb.color_access_readback_pitch } }; + CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_efb.color_access_readback_buffer, dst_footprint); + CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); + + // Block until completion + D3D::command_list_mgr->ExecuteQueuedWork(true); + + // Restore EFB resource state if it was sourced from here + if (src_resource == m_efb.color_tex->GetTex12()) + m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + + // Restore state after resetting command list + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + g_renderer->RestoreAPIState(); + + // Resource copy has finished, so safe to map now + m_efb.color_access_readback_buffer->Map(0, nullptr, reinterpret_cast(&m_efb.color_access_readback_map)); +} + +void FramebufferManager::MapEFBDepthAccessCopy() +{ + D3D::command_list_mgr->CPUAccessNotify(); + + ID3D12Resource* src_resource; + if (m_target_width != EFB_WIDTH || m_target_height != EFB_HEIGHT || g_ActiveConfig.iMultisamples > 1) + { + // for non-1xIR or multisampled cases, we need to copy to an intermediate texture first + m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + + D3D12_VIEWPORT vp12 = { 0, 0, EFB_WIDTH, EFB_HEIGHT, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp12); + D3D::current_command_list->OMSetRenderTargets(1, &m_efb.depth_access_resize_tex->GetRTV12(), FALSE, nullptr); + D3D::SetPointCopySampler(); + + CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height); + D3D::DrawShadedTexQuad(m_efb.depth_tex, &src_rect, m_target_width, m_target_height, + (g_ActiveConfig.iMultisamples > 1) ? StaticShaderCache::GetDepthResolveToColorPixelShader() : StaticShaderCache::GetColorCopyPixelShader(false), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + {}, 1.0f, 0, DXGI_FORMAT_R32_FLOAT, false, false); + + m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + src_resource = m_efb.depth_access_resize_tex->GetTex12(); + } + else + { + // Can source the EFB buffer + m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + src_resource = m_efb.depth_tex->GetTex12(); + } + + // Copy to staging resource + D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = { 0,{ DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, m_efb.depth_access_readback_pitch } }; + CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_efb.depth_access_readback_buffer, dst_footprint); + CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); + + // Block until completion + D3D::command_list_mgr->ExecuteQueuedWork(true); + + // Restore EFB resource state if it was sourced from here + if (src_resource == m_efb.depth_tex->GetTex12()) + m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + + // Restore state after resetting command list + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + g_renderer->RestoreAPIState(); + + // Resource copy has finished, so safe to map now + m_efb.depth_access_readback_buffer->Map(0, nullptr, reinterpret_cast(&m_efb.depth_access_readback_map)); +} + +void FramebufferManager::InvalidateEFBAccessCopies() +{ + if (m_efb.color_access_readback_map) + { + m_efb.color_access_readback_buffer->Unmap(0, nullptr); + m_efb.color_access_readback_map = nullptr; + } + + if (m_efb.depth_access_readback_map) + { + m_efb.depth_access_readback_buffer->Unmap(0, nullptr); + m_efb.depth_access_readback_map = nullptr; + } +} + +void FramebufferManager::DestroyEFBAccessCopies() +{ + InvalidateEFBAccessCopies(); + + SAFE_RELEASE(m_efb.color_access_resize_tex); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.color_access_readback_buffer); + m_efb.color_access_readback_buffer = nullptr; + + SAFE_RELEASE(m_efb.depth_access_resize_tex); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.depth_access_readback_buffer); + m_efb.depth_access_readback_buffer = nullptr; +} + void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) { // DX12's XFB decoder does not use this function. diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.h b/Source/Core/VideoBackends/D3D12/FramebufferManager.h index 78242197b2..13e9ae205c 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.h +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.h @@ -61,11 +61,7 @@ public: ~FramebufferManager(); static D3DTexture2D*& GetEFBColorTexture(); - static ID3D12Resource*& GetEFBColorStagingBuffer(); - static D3DTexture2D*& GetEFBDepthTexture(); - static D3DTexture2D*& GetEFBDepthReadTexture(); - static ID3D12Resource*& GetEFBDepthStagingBuffer(); static D3DTexture2D*& GetResolvedEFBColorTexture(); static D3DTexture2D*& GetResolvedEFBDepthTexture(); @@ -74,6 +70,17 @@ public: static void ResolveDepthTexture(); + // Access EFB from CPU + static u32 ReadEFBColorAccessCopy(u32 x, u32 y); + static float ReadEFBDepthAccessCopy(u32 x, u32 y); + static void UpdateEFBColorAccessCopy(u32 x, u32 y, u32 color); + static void UpdateEFBDepthAccessCopy(u32 x, u32 y, float depth); + static void InitializeEFBAccessCopies(); + static void MapEFBColorAccessCopy(); + static void MapEFBDepthAccessCopy(); + static void InvalidateEFBAccessCopies(); + static void DestroyEFBAccessCopies(); + private: std::unique_ptr CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override; void GetTargetSize(unsigned int* width, unsigned int* height) override; @@ -83,18 +90,24 @@ private: static struct Efb { D3DTexture2D* color_tex; - ID3D12Resource* color_staging_buf; D3DTexture2D* depth_tex; - ID3D12Resource* depth_staging_buf; - - D3DTexture2D* depth_read_texture; D3DTexture2D* color_temp_tex; D3DTexture2D* resolved_color_tex; D3DTexture2D* resolved_depth_tex; + D3DTexture2D* color_access_resize_tex; + ID3D12Resource* color_access_readback_buffer; + u8* color_access_readback_map; + u32 color_access_readback_pitch; + + D3DTexture2D* depth_access_resize_tex; + ID3D12Resource* depth_access_readback_buffer; + u8* depth_access_readback_map; + u32 depth_access_readback_pitch; + int slices; } m_efb; diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp index e3fc160c9d..4f763a68cb 100644 --- a/Source/Core/VideoBackends/D3D12/Render.cpp +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -51,8 +51,6 @@ static bool s_last_xfb_mode = false; static Television s_television; -static ID3D12Resource* s_access_efb_constant_buffer = nullptr; - enum CLEAR_BLEND_DESC { CLEAR_BLEND_DESC_ALL_CHANNELS_ENABLED = 0, @@ -110,25 +108,6 @@ static void SetupDeviceObjects() g_framebuffer_manager = std::make_unique(); - float colmat[20] = { 0.0f }; - colmat[0] = colmat[5] = colmat[10] = 1.0f; - - CheckHR( - D3D::device12->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), - D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(sizeof(colmat)), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&s_access_efb_constant_buffer) - ) - ); - - // Copy inital data to access_efb_cbuf12. - void* access_efb_constant_buffer_data = nullptr; - CheckHR(s_access_efb_constant_buffer->Map(0, nullptr, &access_efb_constant_buffer_data)); - memcpy(access_efb_constant_buffer_data, colmat, sizeof(colmat)); - D3D12_DEPTH_STENCIL_DESC depth_desc; depth_desc.DepthEnable = FALSE; depth_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; @@ -197,9 +176,6 @@ static void TeardownDeviceObjects() s_screenshot_texture = nullptr; } - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_access_efb_constant_buffer); - s_access_efb_constant_buffer = nullptr; - s_television.Shutdown(); gx_state_cache.Clear(); @@ -394,192 +370,60 @@ void Renderer::SetColorMask() // - GX_PokeZMode (TODO) u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) { - // EXISTINGD3D11TODO: This function currently is broken if anti-aliasing is enabled - - // Convert EFB dimensions to the ones of our render target - EFBRectangle efb_pixel_rc; - efb_pixel_rc.left = x; - efb_pixel_rc.top = y; - efb_pixel_rc.right = x + 1; - efb_pixel_rc.bottom = y + 1; - TargetRectangle target_pixel_rc = Renderer::ConvertEFBRectangle(efb_pixel_rc); - - // Take the mean of the resulting dimensions; TODO: Don't use the center pixel, compute the average color instead - D3D12_RECT rect_to_lock; - if (type == PEEK_COLOR || type == PEEK_Z) + if (type == PEEK_COLOR) { - rect_to_lock.left = (target_pixel_rc.left + target_pixel_rc.right) / 2; - rect_to_lock.top = (target_pixel_rc.top + target_pixel_rc.bottom) / 2; - rect_to_lock.right = rect_to_lock.left + 1; - rect_to_lock.bottom = rect_to_lock.top + 1; - } - else - { - rect_to_lock.left = target_pixel_rc.left; - rect_to_lock.right = target_pixel_rc.right; - rect_to_lock.top = target_pixel_rc.top; - rect_to_lock.bottom = target_pixel_rc.bottom; - } + u32 color = FramebufferManager::ReadEFBColorAccessCopy(x, y); - if (type == PEEK_Z) - { - D3D::command_list_mgr->CPUAccessNotify(); - - // depth buffers can only be completely CopySubresourceRegion'ed, so we're using DrawShadedTexQuad instead - // D3D12TODO: Is above statement true on D3D12? - D3D12_VIEWPORT vp12 = { 0.f, 0.f, 1.f, 1.f, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp12); - - D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_access_efb_constant_buffer->GetGPUVirtualAddress()); - D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); - - FramebufferManager::GetEFBDepthReadTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBDepthReadTexture()->GetRTV12(), FALSE, nullptr); - - D3D::SetPointCopySampler(); - - D3D::DrawShadedTexQuad( - FramebufferManager::GetEFBDepthTexture(), - &rect_to_lock, - Renderer::GetTargetWidth(), - Renderer::GetTargetHeight(), - StaticShaderCache::GetColorCopyPixelShader(true), - StaticShaderCache::GetSimpleVertexShader(), - StaticShaderCache::GetSimpleVertexShaderInputLayout(), - D3D12_SHADER_BYTECODE(), - 1.0f, - 0, - DXGI_FORMAT_R32_FLOAT, - false, - FramebufferManager::GetEFBDepthReadTexture()->GetMultisampled() - ); - - // copy to system memory - D3D12_BOX src_box = CD3DX12_BOX(0, 0, 0, 1, 1, 1); - ID3D12Resource* readback_buffer = FramebufferManager::GetEFBDepthStagingBuffer(); - - D3D12_TEXTURE_COPY_LOCATION dst_location = {}; - dst_location.pResource = readback_buffer; - dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst_location.PlacedFootprint.Offset = 0; - dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; - dst_location.PlacedFootprint.Footprint.Width = 1; - dst_location.PlacedFootprint.Footprint.Height = 1; - dst_location.PlacedFootprint.Footprint.Depth = 1; - dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - D3D12_TEXTURE_COPY_LOCATION src_location = {}; - src_location.pResource = FramebufferManager::GetEFBDepthReadTexture()->GetTex12(); - src_location.SubresourceIndex = 0; - src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - - FramebufferManager::GetEFBDepthReadTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); - D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); - - // Need to wait for the CPU to complete the copy (and all prior operations) before we can read it on the CPU. - D3D::command_list_mgr->ExecuteQueuedWork(true); - - FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); - - // Restores proper viewport/scissor settings. - g_renderer->RestoreAPIState(); - - // read the data from system memory - void* readback_buffer_data = nullptr; - CheckHR(readback_buffer->Map(0, nullptr, &readback_buffer_data)); - - // depth buffer is inverted in the d3d backend - float val = 1.0f - reinterpret_cast(readback_buffer_data)[0]; - u32 ret = 0; - - if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - // if Z is in 16 bit format you must return a 16 bit integer - ret = MathUtil::Clamp(static_cast(val * 65536.0f), 0, 0xFFFF); - } - else - { - ret = MathUtil::Clamp(static_cast(val * 16777216.0f), 0, 0xFFFFFF); - } - - // EXISTINGD3D11TODO: in RE0 this value is often off by one in Video_DX9 (where this code is derived from), which causes lighting to disappear - return ret; - } - else if (type == PEEK_COLOR) - { - D3D::command_list_mgr->CPUAccessNotify(); - - ID3D12Resource* readback_buffer = FramebufferManager::GetEFBColorStagingBuffer(); - - D3D12_BOX src_box = CD3DX12_BOX(rect_to_lock.left, rect_to_lock.top, 0, rect_to_lock.right, rect_to_lock.bottom, 1); - - D3D12_TEXTURE_COPY_LOCATION dst_location = {}; - dst_location.pResource = readback_buffer; - dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst_location.PlacedFootprint.Offset = 0; - dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - dst_location.PlacedFootprint.Footprint.Width = 1; - dst_location.PlacedFootprint.Footprint.Height = 1; - dst_location.PlacedFootprint.Footprint.Depth = 1; - dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - D3D12_TEXTURE_COPY_LOCATION src_location = {}; - src_location.pResource = FramebufferManager::GetResolvedEFBColorTexture()->GetTex12(); - src_location.SubresourceIndex = 0; - src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - - FramebufferManager::GetResolvedEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); - D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); - - // Need to wait for the CPU to complete the copy (and all prior operations) before we can read it on the CPU. - D3D::command_list_mgr->ExecuteQueuedWork(true); - - FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); - - // Restores proper viewport/scissor settings. - g_renderer->RestoreAPIState(); - - // read the data from system memory - void* readback_buffer_data = nullptr; - CheckHR(readback_buffer->Map(0, nullptr, &readback_buffer_data)); - - u32 ret = reinterpret_cast(readback_buffer_data)[0]; + // a little-endian value is expected to be returned + color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000)); // check what to do with the alpha channel (GX_PokeAlphaRead) PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) { - ret = RGBA8ToRGBA6ToRGBA8(ret); + color = RGBA8ToRGBA6ToRGBA8(color); } else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) { - ret = RGBA8ToRGB565ToRGBA8(ret); + color = RGBA8ToRGB565ToRGBA8(color); } if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) { - ret |= 0xFF000000; + color |= 0xFF000000; } if (alpha_read_mode.ReadMode == 2) { - return ret; // GX_READ_NONE + return color; // GX_READ_NONE } else if (alpha_read_mode.ReadMode == 1) { - return (ret | 0xFF000000); // GX_READ_FF + return (color | 0xFF000000); // GX_READ_FF } else /*if(alpha_read_mode.ReadMode == 0)*/ { - return (ret & 0x00FFFFFF); // GX_READ_00 + return (color & 0x00FFFFFF); // GX_READ_00 } } + else // if (type == PEEK_Z) + { + // depth buffer is inverted in the d3d backend + float depth = 1.0f - FramebufferManager::ReadEFBDepthAccessCopy(x, y); + u32 ret = 0; - return 0; + if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + // if Z is in 16 bit format you must return a 16 bit integer + ret = MathUtil::Clamp(static_cast(depth * 65536.0f), 0, 0xFFFF); + } + else + { + ret = MathUtil::Clamp(static_cast(depth * 16777216.0f), 0, 0xFFFFFF); + } + + return ret; + } } void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) @@ -706,6 +550,8 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha // Restores proper viewport/scissor settings. g_renderer->RestoreAPIState(); + + FramebufferManager::InvalidateEFBAccessCopies(); } void Renderer::ReinterpretPixelData(unsigned int convtype) @@ -906,6 +752,9 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height return; } + // Invalidate EFB access copies. Not strictly necessary, but this avoids having the buffers mapped when calling Present(). + FramebufferManager::InvalidateEFBAccessCopies(); + // Prepare to copy the XFBs to our backbuffer UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); TargetRectangle target_rc = GetTargetRectangle(); @@ -1272,6 +1121,9 @@ void Renderer::ApplyState(bool use_dst_alpha) D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, false); } + + // Always called prior to drawing, so we can invalidate the CPU EFB copies here. + FramebufferManager::InvalidateEFBAccessCopies(); } void Renderer::RestoreState() From 9efe66509d9923c8dedb5948808a4d2c0c7d802e Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 21 Feb 2016 18:55:55 +1000 Subject: [PATCH 10/16] D3D12: Fix crash/errors when switching MSAA modes while running --- Source/Core/VideoBackends/D3D12/D3DState.cpp | 12 ++++++++++++ Source/Core/VideoBackends/D3D12/D3DState.h | 9 +++++++-- Source/Core/VideoBackends/D3D12/Render.cpp | 11 +++++++++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DState.cpp b/Source/Core/VideoBackends/D3D12/D3DState.cpp index d0df14371f..5810a74536 100644 --- a/Source/Core/VideoBackends/D3D12/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DState.cpp @@ -461,6 +461,18 @@ HRESULT StateCache::GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D return S_OK; } +void StateCache::OnMSAASettingsChanged() +{ + for (auto& it : m_small_pso_map) + { + SAFE_RELEASE(it.second); + } + m_small_pso_map.clear(); + + // Update sample count for new PSOs being created + gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples; +} + void StateCache::Clear() { for (auto& it : m_pso_map) diff --git a/Source/Core/VideoBackends/D3D12/D3DState.h b/Source/Core/VideoBackends/D3D12/D3DState.h index 85e83592d0..49d4fab157 100644 --- a/Source/Core/VideoBackends/D3D12/D3DState.h +++ b/Source/Core/VideoBackends/D3D12/D3DState.h @@ -95,6 +95,9 @@ public: HRESULT GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc, ID3D12PipelineState** pso); HRESULT GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D12PipelineState** pso, D3D12_PRIMITIVE_TOPOLOGY_TYPE topology, const GeometryShaderUid* gs_uid, const PixelShaderUid* ps_uid, const VertexShaderUid* vs_uid); + // Called when the MSAA count/quality changes. Invalidates all small PSOs. + void OnMSAASettingsChanged(); + // Release all cached states and clear hash tables. void Clear(); @@ -126,7 +129,8 @@ private: lhs.BlendState.RenderTarget[0].DestBlend, lhs.BlendState.RenderTarget[0].SrcBlend, lhs.BlendState.RenderTarget[0].RenderTargetWriteMask, - lhs.RTVFormats[0]) == + lhs.RTVFormats[0], + lhs.SampleDesc.Count) == std::tie(rhs.PS.pShaderBytecode, rhs.VS.pShaderBytecode, rhs.GS.pShaderBytecode, rhs.RasterizerState.CullMode, rhs.DepthStencilState.DepthEnable, @@ -137,7 +141,8 @@ private: rhs.BlendState.RenderTarget[0].DestBlend, rhs.BlendState.RenderTarget[0].SrcBlend, rhs.BlendState.RenderTarget[0].RenderTargetWriteMask, - rhs.RTVFormats[0]); + rhs.RTVFormats[0], + rhs.SampleDesc.Count); } }; diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp index 4f763a68cb..bf80081ff5 100644 --- a/Source/Core/VideoBackends/D3D12/Render.cpp +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -984,9 +984,16 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0)) { s_last_xfb_mode = g_ActiveConfig.bUseRealXFB; - s_last_multisamples = g_ActiveConfig.iMultisamples; - StaticShaderCache::InvalidateMSAAShaders(); + // Block on any changes until the GPU catches up, so we can free resources safely. + D3D::command_list_mgr->ExecuteQueuedWork(true); + + if (s_last_multisamples != g_ActiveConfig.iMultisamples) + { + s_last_multisamples = g_ActiveConfig.iMultisamples; + StaticShaderCache::InvalidateMSAAShaders(); + gx_state_cache.OnMSAASettingsChanged(); + } if (window_resized) { From 04257029e06915772432db8782daa98b7287aa5e Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 21 Feb 2016 21:51:15 +1000 Subject: [PATCH 11/16] D3D12: Don't enumerate outputs, it's not used anywhere The D3D12 backend does not support exclusive fullscreen. --- Source/Core/VideoBackends/D3D12/D3DBase.cpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.cpp b/Source/Core/VideoBackends/D3D12/D3DBase.cpp index 01d5d9a989..22abfef574 100644 --- a/Source/Core/VideoBackends/D3D12/D3DBase.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DBase.cpp @@ -362,7 +362,6 @@ HRESULT Create(HWND wnd) IDXGIFactory* factory; IDXGIAdapter* adapter; - IDXGIOutput* output; hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); if (FAILED(hr)) MessageBox(wnd, _T("Failed to create IDXGIFactory object"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); @@ -376,25 +375,6 @@ HRESULT Create(HWND wnd) MessageBox(wnd, _T("Failed to enumerate adapters"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); } - // TODO: Make this configurable - hr = adapter->EnumOutputs(0, &output); - if (FAILED(hr)) - { - // try using the first one - IDXGIAdapter* firstadapter; - hr = factory->EnumAdapters(0, &firstadapter); - if (!FAILED(hr)) - hr = firstadapter->EnumOutputs(0, &output); - if (FAILED(hr)) - MessageBox(wnd, - _T("Failed to enumerate outputs!\n") - _T("This usually happens when you've set your video adapter to the Nvidia GPU in an Optimus-equipped system.\n") - _T("Set Dolphin to use the high-performance graphics in Nvidia's drivers instead and leave Dolphin's video adapter set to the Intel GPU."), - _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - - SAFE_RELEASE(firstadapter); - } - // get supported AA modes s_aa_modes = EnumAAModes(adapter); @@ -530,7 +510,6 @@ HRESULT Create(HWND wnd) MessageBox(wnd, _T("Failed to associate the window"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); SAFE_RELEASE(factory); - SAFE_RELEASE(output); SAFE_RELEASE(adapter) CreateDescriptorHeaps(); From 13e143de3861956b2f9a7c2a4d5a5d6fba62ff4d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 23 Feb 2016 22:12:03 +1000 Subject: [PATCH 12/16] D3D12: Optionally prevent StreamBuffer from executing command list This applies to callers that do not have full knowledge of the command list state, and thus, cannot restore it should allocations cause command list execution. Instead we reallocate a new buffer. Should not happen often enough for this to be a concern, as it's mainly for the utility classes. --- .../VideoBackends/D3D12/D3DStreamBuffer.cpp | 43 +++++++++++++------ .../VideoBackends/D3D12/D3DStreamBuffer.h | 5 ++- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp index aa868b0004..22c83a3654 100644 --- a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp @@ -39,7 +39,7 @@ D3DStreamBuffer::~D3DStreamBuffer() // Obviously this is non-performant, so the buffer max_size should be large enough to // ensure this never happens. -bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t alignment) +bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t alignment, bool allow_execute) { CHECK(allocation_size <= m_buffer_max_size, "Error: Requested allocation size in D3DStreamBuffer is greater than max allowed size of backing buffer."); @@ -75,7 +75,7 @@ bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t align // Slow path. No room at front, or back, due to the GPU still (possibly) accessing parts of the buffer. // Resize if possible, else stall. - bool command_list_executed = AttemptBufferResizeOrElseStall(allocation_size); + bool command_list_executed = AttemptBufferResizeOrElseStall(allocation_size, allow_execute); return command_list_executed; } @@ -113,14 +113,25 @@ void D3DStreamBuffer::AllocateBuffer(size_t size) CheckHR(m_buffer->Map(0, nullptr, &m_buffer_cpu_address)); m_buffer_gpu_address = m_buffer->GetGPUVirtualAddress(); - m_buffer_size = size; + + // Start at the beginning of the new buffer. + m_buffer_gpu_completion_offset = 0; + m_buffer_current_allocation_offset = 0; + m_buffer_offset = 0; + + // Notify observers. + if (m_buffer_reallocation_notification != nullptr) + *m_buffer_reallocation_notification = true; + + // If we had any fences queued, they are no longer relevant. + ClearFences(); } // Function returns true if current command list executed as a result of current command list // referencing all of buffer's contents, AND we are already at max_size. No alternative but to // flush. See comments above AllocateSpaceInBuffer for more details. -bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size) +bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size, bool allow_execute) { // This function will attempt to increase the size of the buffer, in response // to running out of room. If the buffer is already at its maximum size specified @@ -155,14 +166,7 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size) if (new_size > m_buffer_size) { AllocateBuffer(new_size); - m_buffer_current_allocation_offset = 0; m_buffer_offset = allocation_size; - - if (m_buffer_reallocation_notification != nullptr) - { - *m_buffer_reallocation_notification = true; - } - return false; } @@ -177,6 +181,14 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size) return false; } + // If allow_execute is false, the caller cannot handle command list execution (and the associated reset), so re-allocate the same-sized buffer. + if (!allow_execute) + { + AllocateBuffer(new_size); + m_buffer_offset = allocation_size; + return false; + } + // 4) If we get to this point, that means there is no outstanding queued GPU work, and we're still out of room. // This is bad - and performance will suffer due to the CPU/GPU serialization, but the show must go on. @@ -188,6 +200,7 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size) m_buffer_offset = allocation_size; m_buffer_current_allocation_offset = 0; m_buffer_gpu_completion_offset = 0; + ClearFences(); return true; } @@ -299,7 +312,7 @@ void D3DStreamBuffer::UpdateGPUProgress() } else { - // Fences are stored in assending order, so once we hit a fence we haven't yet crossed on GPU, abort search. + // Fences are stored in ascending order, so once we hit a fence we haven't yet crossed on GPU, abort search. break; } } @@ -312,6 +325,12 @@ void D3DStreamBuffer::QueueFenceCallback(void* owning_object, UINT64 fence_value owning_stream_buffer->QueueFence(fence_value); } +void D3DStreamBuffer::ClearFences() +{ + while (!m_queued_fences.empty()) + m_queued_fences.pop(); +} + bool D3DStreamBuffer::HasBufferOffsetChangedSinceLastFence() const { if (m_queued_fences.empty()) diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h index e931904fbd..9aeb18468c 100644 --- a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h @@ -17,7 +17,7 @@ public: D3DStreamBuffer(size_t initial_size, size_t max_size, bool* buffer_reallocation_notification); ~D3DStreamBuffer(); - bool AllocateSpaceInBuffer(size_t allocation_size, size_t alignment); + bool AllocateSpaceInBuffer(size_t allocation_size, size_t alignment, bool allow_execute = true); void OverrideSizeOfPreviousAllocation(size_t override_allocation_size); void* GetBaseCPUAddress() const; @@ -32,7 +32,7 @@ public: private: void AllocateBuffer(size_t size); - bool AttemptBufferResizeOrElseStall(size_t new_size); + bool AttemptBufferResizeOrElseStall(size_t allocation_size, bool allow_execute); bool AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(size_t allocation_size); @@ -40,6 +40,7 @@ private: void UpdateGPUProgress(); + void ClearFences(); bool HasBufferOffsetChangedSinceLastFence() const; void QueueFence(UINT64 fence_value); From c4d79d6db3192ec3b163c31a611a4f20c03ed8b0 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 23 Feb 2016 22:18:41 +1000 Subject: [PATCH 13/16] D3D12: Add helper method for setting both viewport and scissor rect Simplfies making changes, as well as keeping the two in sync. --- Source/Core/VideoBackends/D3D12/D3DUtil.cpp | 44 ++++++----- Source/Core/VideoBackends/D3D12/D3DUtil.h | 3 +- .../D3D12/FramebufferManager.cpp | 17 ++-- .../VideoBackends/D3D12/PSTextureEncoder.cpp | 3 +- Source/Core/VideoBackends/D3D12/Render.cpp | 79 ++----------------- .../Core/VideoBackends/D3D12/TextureCache.cpp | 32 ++------ 6 files changed, 49 insertions(+), 129 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp index a1a7c512da..360cd04df9 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp @@ -72,7 +72,7 @@ public: // returns vertex offset to the new data size_t AppendData(const void* data, size_t size, size_t vertex_size) { - m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size); + m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size, false); memcpy(static_cast(m_stream_buffer->GetCPUAddressOfCurrentAllocation()), data, size); @@ -81,7 +81,7 @@ public: size_t BeginAppendData(void** write_ptr, size_t size, size_t vertex_size) { - m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size); + m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size, false); *write_ptr = m_stream_buffer->GetCPUAddressOfCurrentAllocation(); @@ -422,7 +422,7 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, m_texture12_gpu); // upper bound is nchars * 6, assuming no spaces - m_vertex_buffer->AllocateSpaceInBuffer(static_cast(text.length()) * 6 * sizeof(FONT2DVERTEX), sizeof(FONT2DVERTEX)); + m_vertex_buffer->AllocateSpaceInBuffer(static_cast(text.length()) * 6 * sizeof(FONT2DVERTEX), sizeof(FONT2DVERTEX), false); FONT2DVERTEX* vertices12 = reinterpret_cast(m_vertex_buffer->GetCPUAddressOfCurrentAllocation()); int num_triangles = 0; @@ -588,6 +588,28 @@ void SetLinearCopySampler() D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true); } +void SetViewportAndScissor(u32 top_left_x, u32 top_left_y, u32 width, u32 height, float min_depth, float max_depth) +{ + D3D12_VIEWPORT viewport = { + static_cast(top_left_x), + static_cast(top_left_y), + static_cast(width), + static_cast(height), + min_depth, + max_depth + }; + + D3D12_RECT scissor = { + static_cast(top_left_x), + static_cast(top_left_y), + static_cast(top_left_x + width), + static_cast(top_left_y + height) + }; + + D3D::current_command_list->RSSetViewports(1, &viewport); + D3D::current_command_list->RSSetScissorRects(1, &scissor); +}; + void DrawShadedTexQuad(D3DTexture2D* texture, const D3D12_RECT* rSource, int source_width, @@ -684,13 +706,6 @@ void DrawShadedTexQuad(D3DTexture2D* texture, D3D::current_command_list->SetPipelineState(pso); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); - // In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled. - // Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid - // dirtying state. - - // 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072 - D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); - D3D::current_command_list->DrawInstanced(4, 1, static_cast(stq_offset), 0); g_renderer->RestoreAPIState(); @@ -840,13 +855,6 @@ void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH D3D::current_command_list->SetPipelineState(pso); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); - // In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled. - // Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid - // dirtying state. - - // 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072 - D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); - D3D::current_command_list->DrawInstanced(4, 1, static_cast(clearq_offset), 0); g_renderer->RestoreAPIState(); @@ -865,7 +873,6 @@ void DrawEFBPokeQuads(EFBAccessType type, size_t num_points, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, - D3D12_VIEWPORT* viewport, D3D12_CPU_DESCRIPTOR_HANDLE* render_target, D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer, bool rt_multisampled @@ -925,7 +932,6 @@ void DrawEFBPokeQuads(EFBAccessType type, // Corresponding dirty flags set outside loop. D3D::current_command_list->OMSetRenderTargets(1, render_target, FALSE, depth_buffer); - D3D::current_command_list->RSSetViewports(1, viewport); D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); D3D12_VERTEX_BUFFER_VIEW vb_view = { diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.h b/Source/Core/VideoBackends/D3D12/D3DUtil.h index b000c4ac99..add8516be1 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.h +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.h @@ -74,6 +74,8 @@ void ShutdownUtils(); void SetPointCopySampler(); void SetLinearCopySampler(); +void SetViewportAndScissor(u32 top_left_x, u32 top_left_y, u32 width, u32 height, float min_depth = D3D12_MIN_DEPTH, float max_depth = D3D12_MAX_DEPTH); + void DrawShadedTexQuad(D3DTexture2D* texture, const D3D12_RECT* source, int source_width, @@ -97,7 +99,6 @@ void DrawEFBPokeQuads(EFBAccessType type, size_t num_points, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, - D3D12_VIEWPORT* viewport, D3D12_CPU_DESCRIPTOR_HANDLE* render_target, D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer, bool rt_multisampled); diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp index 9b7bebf608..a3c8064318 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp @@ -173,9 +173,7 @@ void FramebufferManager::ResolveDepthTexture() { // ResolveSubresource does not work with depth textures. // Instead, we use a shader that selects the minimum depth from all samples. - - const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast(m_target_width), static_cast(m_target_height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp12); + D3D::SetViewportAndScissor(0, 0, m_target_width, m_target_height); m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(0, nullptr, FALSE, &m_efb.resolved_depth_tex->GetDSV12()); @@ -292,10 +290,9 @@ void FramebufferManager::MapEFBColorAccessCopy() // for non-1xIR or multisampled cases, we need to copy to an intermediate texture first m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - D3D12_VIEWPORT vp12 = { 0, 0, EFB_WIDTH, EFB_HEIGHT, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp12); - D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr); + D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); D3D::SetPointCopySampler(); + D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr); CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height); D3D::DrawShadedTexQuad(m_efb.color_tex, &src_rect, m_target_width, m_target_height, @@ -345,10 +342,9 @@ void FramebufferManager::MapEFBDepthAccessCopy() // for non-1xIR or multisampled cases, we need to copy to an intermediate texture first m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - D3D12_VIEWPORT vp12 = { 0, 0, EFB_WIDTH, EFB_HEIGHT, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp12); - D3D::current_command_list->OMSetRenderTargets(1, &m_efb.depth_access_resize_tex->GetRTV12(), FALSE, nullptr); + D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); D3D::SetPointCopySampler(); + D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr); CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height); D3D::DrawShadedTexQuad(m_efb.depth_tex, &src_rect, m_target_width, m_target_height, @@ -425,8 +421,7 @@ void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) void XFBSource::CopyEFB(float gamma) { // Copy EFB data to XFB and restore render target again - const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast(texWidth), static_cast(texHeight), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp12); + D3D::SetViewportAndScissor(0, 0, texWidth, texHeight); const D3D12_RECT rect = CD3DX12_RECT(0, 0, texWidth, texHeight); diff --git a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp index 6af30765ba..bd1610adb3 100644 --- a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp @@ -152,8 +152,7 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p const u32 words_per_row = bytes_per_row / sizeof(u32); - D3D12_VIEWPORT vp = { 0.f, 0.f, FLOAT(words_per_row), FLOAT(num_blocks_y), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(0, 0, words_per_row, num_blocks_y); constexpr EFBRectangle full_src_rect(0, 0, EFB_WIDTH, EFB_HEIGHT); diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp index bf80081ff5..de63d406a0 100644 --- a/Source/Core/VideoBackends/D3D12/Render.cpp +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -428,7 +428,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) { - D3D12_VIEWPORT vp = { 0.0f, 0.0f, static_cast(GetTargetWidth()), static_cast(GetTargetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::SetViewportAndScissor(0, 0, GetTargetWidth(), GetTargetHeight()); if (type == POKE_COLOR) { @@ -439,7 +439,6 @@ void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num num_points, &g_reset_blend_desc, &g_reset_depth_desc, - &vp, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), nullptr, FramebufferManager::GetEFBColorTexture()->GetMultisampled() @@ -453,7 +452,6 @@ void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num num_points, &s_clear_blend_descs[CLEAR_BLEND_DESC_ALL_CHANNELS_DISABLED], &s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_ENABLED], - &vp, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), &FramebufferManager::GetEFBDepthTexture()->GetDSV12(), FramebufferManager::GetEFBColorTexture()->GetMultisampled() @@ -533,19 +531,9 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha // Update the view port for clearing the picture TargetRectangle target_rc = Renderer::ConvertEFBRectangle(rc); - D3D12_VIEWPORT vp = { - static_cast(target_rc.left), - static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), - static_cast(target_rc.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp); - // Color is passed in bgra mode so we need to convert it to rgba u32 rgba_color = (color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000); + D3D::SetViewportAndScissor(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight()); D3D::DrawClearQuad(rgba_color, 1.0f - (z & 0xFFFFFF) / 16777216.0f, blend_desc, depth_stencil_desc, FramebufferManager::GetEFBColorTexture()->GetMultisampled()); // Restores proper viewport/scissor settings. @@ -575,16 +563,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) return; } - D3D12_VIEWPORT vp = { - 0.f, - 0.f, - static_cast(g_renderer->GetTargetWidth()), - static_cast(g_renderer->GetTargetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(0, 0, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); FramebufferManager::GetEFBColorTempTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTempTexture()->GetRTV12(), FALSE, nullptr); @@ -765,27 +744,13 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height float clear_color[4] = { 0.f, 0.f, 0.f, 1.f }; D3D::current_command_list->ClearRenderTargetView(D3D::GetBackBuffer()->GetRTV12(), clear_color, 0, nullptr); - // D3D12: Because scissor-testing is always enabled, change scissor rect to backbuffer in case EFB is smaller - // than swap chain back buffer. - D3D12_RECT back_buffer_rect = { 0L, 0L, GetBackbufferWidth(), GetBackbufferHeight() }; - D3D::current_command_list->RSSetScissorRects(1, &back_buffer_rect); - // activate linear filtering for the buffer copies D3D::SetLinearCopySampler(); if (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB) { // EXISTINGD3D11TODO: Television should be used to render Virtual XFB mode as well. - D3D12_VIEWPORT vp12 = { - static_cast(target_rc.left), - static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), - static_cast(target_rc.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp12); + D3D::SetViewportAndScissor(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight()); s_television.Submit(xfb_addr, fb_stride, fb_width, fb_height); s_television.Render(); @@ -933,16 +898,7 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height } // Reset viewport for drawing text - D3D12_VIEWPORT vp = { - 0.0f, - 0.0f, - static_cast(GetBackbufferWidth()), - static_cast(GetBackbufferHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(0, 0, GetBackbufferWidth(), GetBackbufferHeight()); Renderer::DrawDebugText(); @@ -1368,30 +1324,12 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D TargetRectangle left_rc, right_rc; ConvertStereoRectangle(dst, left_rc, right_rc); - D3D12_VIEWPORT left_vp = { - static_cast(left_rc.left), - static_cast(left_rc.top), - static_cast(left_rc.GetWidth()), - static_cast(left_rc.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D12_VIEWPORT right_vp = { - static_cast(right_rc.left), - static_cast(right_rc.top), - static_cast(right_rc.GetWidth()), - static_cast(right_rc.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - // Swap chain backbuffer is never multisampled.. - D3D::current_command_list->RSSetViewports(1, &left_vp); + D3D::SetViewportAndScissor(left_rc.left, left_rc.top, left_rc.GetWidth(), left_rc.GetHeight()); D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); - D3D::current_command_list->RSSetViewports(1, &right_vp); + D3D::SetViewportAndScissor(right_rc.left, right_rc.top, right_rc.GetWidth(), right_rc.GetHeight()); D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 1, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); } else if (g_ActiveConfig.iStereoMode == STEREO_3DVISION) @@ -1433,8 +1371,7 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D } else { - D3D12_VIEWPORT vp = { static_cast(dst.left), static_cast(dst.top), static_cast(dst.GetWidth()), static_cast(dst.GetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(dst.left, dst.top, dst.GetWidth(), dst.GetHeight()); D3D::DrawShadedTexQuad( src_texture, diff --git a/Source/Core/VideoBackends/D3D12/TextureCache.cpp b/Source/Core/VideoBackends/D3D12/TextureCache.cpp index d88fce86c7..37660b9136 100644 --- a/Source/Core/VideoBackends/D3D12/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D12/TextureCache.cpp @@ -164,15 +164,7 @@ void TextureCache::TCacheEntry::CopyRectangleFromTexture( return; } - const D3D12_VIEWPORT vp = { - float(dst_rect.left), - float(dst_rect.top), - float(dst_rect.GetWidth()), - float(dst_rect.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(dst_rect.left, dst_rect.top, dst_rect.GetWidth(), dst_rect.GetHeight()); m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr); @@ -289,18 +281,6 @@ void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat FramebufferManager::GetResolvedEFBColorTexture(); } - // stretch picture with increased internal resolution - const D3D12_VIEWPORT vp = { - 0.f, - 0.f, - static_cast(config.width), - static_cast(config.height), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp); - // set transformation if (cbuf_id != old_cbuf_id) { @@ -311,6 +291,9 @@ void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat old_cbuf_id = cbuf_id; } + // stretch picture with increased internal resolution + D3D::SetViewportAndScissor(0, 0, config.width, config.height); + D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_efb_copy_stream_buffer->GetGPUAddressOfCurrentAllocation()); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); @@ -441,14 +424,13 @@ void main( void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) { - // stretch picture with increased internal resolution - const D3D12_VIEWPORT vp = { 0.f, 0.f, static_cast(unconverted->config.width), static_cast(unconverted->config.height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp); - const unsigned int palette_buffer_allocation_size = 512; m_palette_stream_buffer->AllocateSpaceInBuffer(palette_buffer_allocation_size, 256); memcpy(m_palette_stream_buffer->GetCPUAddressOfCurrentAllocation(), palette, palette_buffer_allocation_size); + // stretch picture with increased internal resolution + D3D::SetViewportAndScissor(0, 0, unconverted->config.width, unconverted->config.height); + // D3D12: Because the second SRV slot is occupied by this buffer, and an arbitrary texture occupies the first SRV slot, // we need to allocate temporary space out of our descriptor heap, place the palette SRV in the second slot, then copy the // existing texture's descriptor into the first slot. From 74275bdfe3e46008969e13e6bdee968c573132b5 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 23 Feb 2016 22:21:09 +1000 Subject: [PATCH 14/16] D3D12: Don't keep screenshot/encoder buffers mapped Readback heaps do not support persistent mapping. See D3D12 docs. --- Source/Core/VideoBackends/D3D12/D3DBase.cpp | 4 ---- .../VideoBackends/D3D12/PSTextureEncoder.cpp | 8 +++++--- .../VideoBackends/D3D12/PSTextureEncoder.h | 1 - Source/Core/VideoBackends/D3D12/Render.cpp | 18 ++++++++++++------ 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.cpp b/Source/Core/VideoBackends/D3D12/D3DBase.cpp index 22abfef574..637b11b059 100644 --- a/Source/Core/VideoBackends/D3D12/D3DBase.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DBase.cpp @@ -486,11 +486,7 @@ HRESULT Create(HWND wnd) D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_DEPTHSTENCILVIEW_NOT_SET, // Benign. D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, // Benign. D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, // Benign. - D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, // Benign. Probably. - D3D12_MESSAGE_ID_INVALID_SUBRESOURCE_STATE, D3D12_MESSAGE_ID_MAP_INVALID_NULLRANGE, // Benign. - D3D12_MESSAGE_ID_EXECUTECOMMANDLISTS_GPU_WRITTEN_READBACK_RESOURCE_MAPPED, // Benign. - D3D12_MESSAGE_ID_RESOURCE_BARRIER_BEFORE_AFTER_MISMATCH // Benign. Probably. }; filter.DenyList.NumIDs = ARRAYSIZE(id_list); filter.DenyList.pIDList = id_list; diff --git a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp index bd1610adb3..5c27244bdd 100644 --- a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp @@ -89,8 +89,6 @@ void PSTextureEncoder::Init() D3D::SetDebugObjectName12(m_out_readback_buffer, "efb encoder output staging buffer"); - CheckHR(m_out_readback_buffer->Map(0, nullptr, &m_out_readback_buffer_data)); - // Create constant buffer for uploading data to shaders. Need to align to 256 bytes. unsigned int encode_params_buffer_size = (sizeof(EFBEncodeParams) + 0xff) & ~0xff; @@ -220,8 +218,10 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p D3D::command_list_mgr->ExecuteQueuedWork(true); // Transfer staging buffer to GameCube/Wii RAM + void* readback_data_map; + CheckHR(m_out_readback_buffer->Map(0, nullptr, &readback_data_map)); - u8* src = static_cast(m_out_readback_buffer_data); + u8* src = static_cast(readback_data_map); u32 read_stride = std::min(bytes_per_row, dst_location.PlacedFootprint.Footprint.RowPitch); for (unsigned int y = 0; y < num_blocks_y; ++y) { @@ -231,6 +231,8 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p src += dst_location.PlacedFootprint.Footprint.RowPitch; } + m_out_readback_buffer->Unmap(0, nullptr); + // Restores proper viewport/scissor settings. g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h index c8f05788e8..1e42a87f6a 100644 --- a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h +++ b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h @@ -29,7 +29,6 @@ private: D3D12_CPU_DESCRIPTOR_HANDLE m_out_rtv_cpu = {}; ID3D12Resource* m_out_readback_buffer = nullptr; - void* m_out_readback_buffer_data = nullptr; ID3D12Resource* m_encode_params_buffer = nullptr; void* m_encode_params_buffer_data = nullptr; diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp index de63d406a0..4881fec717 100644 --- a/Source/Core/VideoBackends/D3D12/Render.cpp +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -76,7 +76,6 @@ D3D12_DEPTH_STENCIL_DESC g_reset_depth_desc = {}; D3D12_RASTERIZER_DESC g_reset_rast_desc = {}; static ID3D12Resource* s_screenshot_texture = nullptr; -static void* s_screenshot_texture_data = nullptr; // Nvidia stereo blitting struct defined in "nvstereo.h" from the Nvidia SDK typedef struct _Nv_Stereo_Image_Header @@ -162,7 +161,6 @@ static void SetupDeviceObjects() g_reset_rast_desc = rast_desc; s_screenshot_texture = nullptr; - s_screenshot_texture_data = nullptr; } // Kill off all device objects @@ -200,8 +198,6 @@ void CreateScreenshotTexture() IID_PPV_ARGS(&s_screenshot_texture) ) ); - - CheckHR(s_screenshot_texture->Map(0, nullptr, &s_screenshot_texture_data)); } static D3D12_BOX GetScreenshotSourceBox(const TargetRectangle& target_rc) @@ -677,7 +673,12 @@ bool Renderer::SaveScreenshot(const std::string& filename, const TargetRectangle D3D::command_list_mgr->ExecuteQueuedWork(true); - saved_png = TextureToPng(static_cast(s_screenshot_texture_data), dst_location.PlacedFootprint.Footprint.RowPitch, filename, source_box.right - source_box.left, source_box.bottom - source_box.top, false); + void* screenshot_texture_map; + CheckHR(s_screenshot_texture->Map(0, nullptr, &screenshot_texture_map)); + + saved_png = TextureToPng(static_cast(screenshot_texture_map), dst_location.PlacedFootprint.Footprint.RowPitch, filename, source_box.right - source_box.left, source_box.bottom - source_box.top, false); + + s_screenshot_texture->Unmap(0, nullptr); if (saved_png) { @@ -877,7 +878,12 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height w = s_record_width; h = s_record_height; } - formatBufferDump(static_cast(s_screenshot_texture_data), &frame_data[0], source_width, source_height, dst_location.PlacedFootprint.Footprint.RowPitch); + + void* screenshot_texture_map; + CheckHR(s_screenshot_texture->Map(0, nullptr, &screenshot_texture_map)); + formatBufferDump(static_cast(screenshot_texture_map), &frame_data[0], source_width, source_height, dst_location.PlacedFootprint.Footprint.RowPitch); + s_screenshot_texture->Unmap(0, nullptr); + FlipImageData(&frame_data[0], w, h); AVIDump::AddFrame(&frame_data[0], source_width, source_height); } From c793459b88651513b4f2f5ed5795f1561f67cb1d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 24 Feb 2016 00:31:14 +1000 Subject: [PATCH 15/16] D3D12: Fixed issue where EFB copies could end up corrupted after reset Also prevents previously-released textures from ending up in a descriptor table. --- Source/Core/VideoBackends/D3D12/TextureCache.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/TextureCache.cpp b/Source/Core/VideoBackends/D3D12/TextureCache.cpp index 37660b9136..9ecc7e6123 100644 --- a/Source/Core/VideoBackends/D3D12/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D12/TextureCache.cpp @@ -25,6 +25,7 @@ namespace DX12 static std::unique_ptr s_encoder = nullptr; static std::unique_ptr s_efb_copy_stream_buffer = nullptr; +static u32 s_efb_copy_last_cbuf_id = UINT_MAX; static ID3D12Resource* s_texture_cache_entry_readback_buffer = nullptr; static void* s_texture_cache_entry_readback_buffer_data = nullptr; @@ -264,8 +265,6 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat src_format, const EFBRectangle& srcRect, bool scale_by_half, unsigned int cbuf_id, const float* colmat) { - static unsigned int old_cbuf_id = UINT_MAX; - // When copying at half size, in multisampled mode, resolve the color/depth buffer first. // This is because multisampled texture reads go through Load, not Sample, and the linear // filter is ignored. @@ -282,13 +281,13 @@ void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat } // set transformation - if (cbuf_id != old_cbuf_id) + if (s_efb_copy_last_cbuf_id != cbuf_id) { s_efb_copy_stream_buffer->AllocateSpaceInBuffer(28 * sizeof(float), 256); memcpy(s_efb_copy_stream_buffer->GetCPUAddressOfCurrentAllocation(), colmat, 28 * sizeof(float)); - old_cbuf_id = cbuf_id; + s_efb_copy_last_cbuf_id = cbuf_id; } // stretch picture with increased internal resolution @@ -536,6 +535,7 @@ TextureCache::TextureCache() s_encoder->Init(); s_efb_copy_stream_buffer = std::make_unique(1024 * 1024, 1024 * 1024, nullptr); + s_efb_copy_last_cbuf_id = UINT_MAX; s_texture_cache_entry_readback_buffer = nullptr; s_texture_cache_entry_readback_buffer_data = nullptr; @@ -617,7 +617,7 @@ void TextureCache::BindTextures() D3D12_GPU_DESCRIPTOR_HANDLE s_group_base_texture_gpu_handle; DX12::D3D::gpu_descriptor_heap_mgr->AllocateGroup(&s_group_base_texture_cpu_handle, 8, &s_group_base_texture_gpu_handle, nullptr, true); - for (unsigned int stage = 0; stage <= last_texture; stage++) + for (unsigned int stage = 0; stage < 8; stage++) { if (bound_textures[stage] != nullptr) { From 0a96e2f53117aaa3e094b1ea4282c5aea08e8fd8 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 24 Feb 2016 22:53:24 +1000 Subject: [PATCH 16/16] D3D11: Fix texture dumping, for both single and multi-mip textures --- .../Core/VideoBackends/D3D12/TextureCache.cpp | 81 ++++++++----------- 1 file changed, 33 insertions(+), 48 deletions(-) diff --git a/Source/Core/VideoBackends/D3D12/TextureCache.cpp b/Source/Core/VideoBackends/D3D12/TextureCache.cpp index 9ecc7e6123..309e62c6fc 100644 --- a/Source/Core/VideoBackends/D3D12/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D12/TextureCache.cpp @@ -28,8 +28,7 @@ static std::unique_ptr s_efb_copy_stream_buffer = nullptr; static u32 s_efb_copy_last_cbuf_id = UINT_MAX; static ID3D12Resource* s_texture_cache_entry_readback_buffer = nullptr; -static void* s_texture_cache_entry_readback_buffer_data = nullptr; -static UINT s_texture_cache_entry_readback_buffer_size = 0; +static size_t s_texture_cache_entry_readback_buffer_size = 0; TextureCache::TCacheEntry::~TCacheEntry() { @@ -43,47 +42,27 @@ void TextureCache::TCacheEntry::Bind(unsigned int stage) bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level) { - // EXISTINGD3D11TODO: Somehow implement this (D3DX11 doesn't support dumping individual LODs) - static bool warn_once = true; - if (level && warn_once) + u32 level_width = std::max(config.width >> level, 1u); + u32 level_height = std::max(config.height >> level, 1u); + size_t level_pitch = D3D::AlignValue(level_width * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + size_t required_readback_buffer_size = level_pitch * level_height; + + // Check if the current readback buffer is large enough + if (required_readback_buffer_size > s_texture_cache_entry_readback_buffer_size) { - WARN_LOG(VIDEO, "Dumping individual LOD not supported by D3D12 backend!"); - warn_once = false; - return false; - } + // Reallocate the buffer with the new size. Safe to immediately release because we're the only user and we block until completion. + if (s_texture_cache_entry_readback_buffer) + s_texture_cache_entry_readback_buffer->Release(); - D3D12_RESOURCE_DESC texture_desc = m_texture->GetTex12()->GetDesc(); - - const unsigned int required_readback_buffer_size = D3D::AlignValue(static_cast(texture_desc.Width) * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - if (s_texture_cache_entry_readback_buffer_size < required_readback_buffer_size) - { s_texture_cache_entry_readback_buffer_size = required_readback_buffer_size; - - // We know the readback buffer won't be in use right now, since we wait on this thread - // for the GPU to finish execution right after copying to it. - - SAFE_RELEASE(s_texture_cache_entry_readback_buffer); + CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(s_texture_cache_entry_readback_buffer_size), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer))); } - if (!s_texture_cache_entry_readback_buffer_size) - { - CheckHR( - D3D::device12->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), - D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(s_texture_cache_entry_readback_buffer_size), - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer) - ) - ); - - CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &s_texture_cache_entry_readback_buffer_data)); - } - - bool saved_png = false; - m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); D3D12_TEXTURE_COPY_LOCATION dst_location = {}; @@ -91,26 +70,31 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; dst_location.PlacedFootprint.Offset = 0; dst_location.PlacedFootprint.Footprint.Depth = 1; - dst_location.PlacedFootprint.Footprint.Format = texture_desc.Format; - dst_location.PlacedFootprint.Footprint.Width = static_cast(texture_desc.Width); - dst_location.PlacedFootprint.Footprint.Height = texture_desc.Height; - dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + dst_location.PlacedFootprint.Footprint.Width = level_width; + dst_location.PlacedFootprint.Footprint.Height = level_height; + dst_location.PlacedFootprint.Footprint.RowPitch = static_cast(level_pitch); - D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0); + D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), level); D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); D3D::command_list_mgr->ExecuteQueuedWork(true); - saved_png = TextureToPng( - static_cast(s_texture_cache_entry_readback_buffer_data), + // Map readback buffer and save to file. + void* readback_texture_map; + CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &readback_texture_map)); + + bool saved = TextureToPng( + static_cast(readback_texture_map), dst_location.PlacedFootprint.Footprint.RowPitch, filename, dst_location.PlacedFootprint.Footprint.Width, dst_location.PlacedFootprint.Footprint.Height ); - return saved_png; + s_texture_cache_entry_readback_buffer->Unmap(0, nullptr); + return saved; } void TextureCache::TCacheEntry::CopyRectangleFromTexture( @@ -538,7 +522,6 @@ TextureCache::TextureCache() s_efb_copy_last_cbuf_id = UINT_MAX; s_texture_cache_entry_readback_buffer = nullptr; - s_texture_cache_entry_readback_buffer_data = nullptr; s_texture_cache_entry_readback_buffer_size = 0; m_palette_pixel_shaders[GX_TL_IA8] = GetConvertShader12(std::string("IA8")); @@ -588,8 +571,10 @@ TextureCache::~TextureCache() if (s_texture_cache_entry_readback_buffer) { - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_texture_cache_entry_readback_buffer); + // Safe to destroy the readback buffer immediately, as the only time it's used is blocked until completion. + s_texture_cache_entry_readback_buffer->Release(); s_texture_cache_entry_readback_buffer = nullptr; + s_texture_cache_entry_readback_buffer_size = 0; } D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_palette_uniform_buffer);