mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
D3D12: Implement perf query support
This commit is contained in:
parent
25d5da0ea3
commit
4269abdc3e
@ -274,6 +274,45 @@ void ID3D12QueuedCommandList::BackgroundThreadFunction(ID3D12QueuedCommandList*
|
||||
break;
|
||||
}
|
||||
|
||||
case D3DQueueItemType::BeginQuery:
|
||||
{
|
||||
command_list->BeginQuery(
|
||||
reinterpret_cast<D3DQueueItem*>(item)->BeginQuery.pQueryHeap,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->BeginQuery.Type,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->BeginQuery.Index
|
||||
);
|
||||
|
||||
item += BufferOffsetForQueueItemType<BeginQueryArguments>();
|
||||
break;
|
||||
}
|
||||
|
||||
case D3DQueueItemType::EndQuery:
|
||||
{
|
||||
command_list->EndQuery(
|
||||
reinterpret_cast<D3DQueueItem*>(item)->EndQuery.pQueryHeap,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->EndQuery.Type,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->EndQuery.Index
|
||||
);
|
||||
|
||||
item += BufferOffsetForQueueItemType<EndQueryArguments>();
|
||||
break;
|
||||
}
|
||||
|
||||
case D3DQueueItemType::ResolveQueryData:
|
||||
{
|
||||
command_list->ResolveQueryData(
|
||||
reinterpret_cast<D3DQueueItem*>(item)->ResolveQueryData.pQueryHeap,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->ResolveQueryData.Type,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->ResolveQueryData.StartElement,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->ResolveQueryData.ElementCount,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->ResolveQueryData.pDestinationBuffer,
|
||||
reinterpret_cast<D3DQueueItem*>(item)->ResolveQueryData.AlignedDestinationBufferOffset
|
||||
);
|
||||
|
||||
item += BufferOffsetForQueueItemType<ResolveQueryDataArguments>();
|
||||
break;
|
||||
}
|
||||
|
||||
case D3DQueueItemType::CloseCommandList:
|
||||
{
|
||||
CheckHR(command_list->Close());
|
||||
@ -916,8 +955,14 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::BeginQuery(
|
||||
_In_ UINT Index
|
||||
)
|
||||
{
|
||||
// Function not implemented yet.
|
||||
DEBUGCHECK(0, "Function not implemented yet.");
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->Type = D3DQueueItemType::BeginQuery;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->BeginQuery.pQueryHeap = pQueryHeap;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->BeginQuery.Type = Type;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->BeginQuery.Index = Index;
|
||||
|
||||
m_queue_array_back += BufferOffsetForQueueItemType<BeginQueryArguments>();
|
||||
|
||||
CheckForOverflow();
|
||||
}
|
||||
|
||||
void STDMETHODCALLTYPE ID3D12QueuedCommandList::EndQuery(
|
||||
@ -926,8 +971,14 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::EndQuery(
|
||||
_In_ UINT Index
|
||||
)
|
||||
{
|
||||
// Function not implemented yet.
|
||||
DEBUGCHECK(0, "Function not implemented yet.");
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->Type = D3DQueueItemType::EndQuery;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->EndQuery.pQueryHeap = pQueryHeap;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->EndQuery.Type = Type;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->EndQuery.Index = Index;
|
||||
|
||||
m_queue_array_back += BufferOffsetForQueueItemType<EndQueryArguments>();
|
||||
|
||||
CheckForOverflow();
|
||||
}
|
||||
|
||||
void STDMETHODCALLTYPE ID3D12QueuedCommandList::ResolveQueryData(
|
||||
@ -939,8 +990,17 @@ void STDMETHODCALLTYPE ID3D12QueuedCommandList::ResolveQueryData(
|
||||
_In_ UINT64 AlignedDestinationBufferOffset
|
||||
)
|
||||
{
|
||||
// Function not implemented yet.
|
||||
DEBUGCHECK(0, "Function not implemented yet.");
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->Type = D3DQueueItemType::ResolveQueryData;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->ResolveQueryData.pQueryHeap = pQueryHeap;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->ResolveQueryData.Type = Type;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->ResolveQueryData.StartElement = StartElement;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->ResolveQueryData.ElementCount = ElementCount;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->ResolveQueryData.pDestinationBuffer = pDestinationBuffer;
|
||||
reinterpret_cast<D3DQueueItem*>(m_queue_array_back)->ResolveQueryData.AlignedDestinationBufferOffset = AlignedDestinationBufferOffset;
|
||||
|
||||
m_queue_array_back += BufferOffsetForQueueItemType<ResolveQueryDataArguments>();
|
||||
|
||||
CheckForOverflow();
|
||||
}
|
||||
|
||||
void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetPredication(
|
||||
|
@ -35,6 +35,9 @@ enum D3DQueueItemType
|
||||
SetDescriptorHeaps,
|
||||
ResourceBarrier,
|
||||
ResolveSubresource,
|
||||
BeginQuery,
|
||||
EndQuery,
|
||||
ResolveQueryData,
|
||||
ExecuteCommandList,
|
||||
CloseCommandList,
|
||||
Present,
|
||||
@ -170,6 +173,30 @@ struct ResolveSubresourceArguments
|
||||
DXGI_FORMAT Format;
|
||||
};
|
||||
|
||||
struct BeginQueryArguments
|
||||
{
|
||||
ID3D12QueryHeap* pQueryHeap;
|
||||
D3D12_QUERY_TYPE Type;
|
||||
UINT Index;
|
||||
};
|
||||
|
||||
struct EndQueryArguments
|
||||
{
|
||||
ID3D12QueryHeap* pQueryHeap;
|
||||
D3D12_QUERY_TYPE Type;
|
||||
UINT Index;
|
||||
};
|
||||
|
||||
struct ResolveQueryDataArguments
|
||||
{
|
||||
ID3D12QueryHeap* pQueryHeap;
|
||||
D3D12_QUERY_TYPE Type;
|
||||
UINT StartElement;
|
||||
UINT ElementCount;
|
||||
ID3D12Resource* pDestinationBuffer;
|
||||
UINT64 AlignedDestinationBufferOffset;
|
||||
};
|
||||
|
||||
struct CloseCommandListArguments
|
||||
{
|
||||
};
|
||||
@ -239,6 +266,9 @@ struct D3DQueueItem
|
||||
SetDescriptorHeapsArguments SetDescriptorHeaps;
|
||||
ResourceBarrierArguments ResourceBarrier;
|
||||
ResolveSubresourceArguments ResolveSubresource;
|
||||
BeginQueryArguments BeginQuery;
|
||||
EndQueryArguments EndQuery;
|
||||
ResolveQueryDataArguments ResolveQueryData;
|
||||
CloseCommandListArguments CloseCommandList;
|
||||
ExecuteCommandListArguments ExecuteCommandList;
|
||||
PresentArguments Present;
|
||||
|
@ -2,68 +2,215 @@
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Logging/Log.h"
|
||||
#include "VideoBackends/D3D12/D3DBase.h"
|
||||
#include "VideoBackends/D3D12/D3DCommandListManager.h"
|
||||
#include "VideoBackends/D3D12/PerfQuery.h"
|
||||
#include "VideoCommon/RenderBase.h"
|
||||
|
||||
//D3D12TODO: Implement PerfQuery class.
|
||||
|
||||
namespace DX12
|
||||
{
|
||||
|
||||
PerfQuery::PerfQuery()
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
D3D12_QUERY_HEAP_DESC desc = { D3D12_QUERY_HEAP_TYPE_OCCLUSION, PERF_QUERY_BUFFER_SIZE, 0 };
|
||||
CheckHR(D3D::device12->CreateQueryHeap(&desc, IID_PPV_ARGS(&m_query_heap)));
|
||||
|
||||
CheckHR(D3D::device12->CreateCommittedResource(
|
||||
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
|
||||
D3D12_HEAP_FLAG_NONE,
|
||||
&CD3DX12_RESOURCE_DESC::Buffer(QUERY_READBACK_BUFFER_SIZE),
|
||||
D3D12_RESOURCE_STATE_COPY_DEST,
|
||||
nullptr,
|
||||
IID_PPV_ARGS(&m_query_readback_buffer)));
|
||||
|
||||
m_tracking_fence = D3D::command_list_mgr->RegisterQueueFenceCallback(this, &PerfQuery::QueueFenceCallback);
|
||||
}
|
||||
|
||||
PerfQuery::~PerfQuery()
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
D3D::command_list_mgr->RemoveQueueFenceCallback(this);
|
||||
|
||||
SAFE_RELEASE(m_query_heap);
|
||||
SAFE_RELEASE(m_query_readback_buffer);
|
||||
}
|
||||
|
||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
if (m_query_count > m_query_buffer.size() / 2)
|
||||
WeakFlush();
|
||||
|
||||
// all queries already used?
|
||||
if (m_query_buffer.size() == m_query_count)
|
||||
{
|
||||
FlushOne();
|
||||
//WARN_LOG(VIDEO, "Flushed query buffer early!");
|
||||
}
|
||||
|
||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||
{
|
||||
size_t index = (m_query_read_pos + m_query_count) % m_query_buffer.size();
|
||||
auto& entry = m_query_buffer[index];
|
||||
|
||||
D3D::current_command_list->BeginQuery(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION, static_cast<UINT>(index));
|
||||
entry.query_type = type;
|
||||
entry.fence_value = -1;
|
||||
|
||||
++m_query_count;
|
||||
}
|
||||
}
|
||||
|
||||
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||
{
|
||||
size_t index = (m_query_read_pos + m_query_count + m_query_buffer.size() - 1) % m_query_buffer.size();
|
||||
auto& entry = m_query_buffer[index];
|
||||
|
||||
D3D::current_command_list->EndQuery(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION, static_cast<UINT>(index));
|
||||
D3D::current_command_list->ResolveQueryData(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION, static_cast<UINT>(index), 1, m_query_readback_buffer, index * sizeof(UINT64));
|
||||
entry.fence_value = m_next_fence_value;
|
||||
}
|
||||
}
|
||||
|
||||
void PerfQuery::ResetQuery()
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
m_query_count = 0;
|
||||
std::fill_n(m_results, ArraySize(m_results), 0);
|
||||
}
|
||||
|
||||
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
return 0;
|
||||
u32 result = 0;
|
||||
|
||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||
result = m_results[PQG_ZCOMP];
|
||||
else if (type == PQ_BLEND_INPUT)
|
||||
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||
result = m_results[PQG_EFB_COPY_CLOCKS];
|
||||
|
||||
return result / 4;
|
||||
}
|
||||
|
||||
void PerfQuery::FlushOne()
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
size_t index = m_query_read_pos;
|
||||
ActiveQuery& entry = m_query_buffer[index];
|
||||
|
||||
// Has the command list been executed yet?
|
||||
if (entry.fence_value == m_next_fence_value)
|
||||
D3D::command_list_mgr->ExecuteQueuedWork(false);
|
||||
|
||||
// Block until the fence is reached
|
||||
D3D::command_list_mgr->WaitOnCPUForFence(m_tracking_fence, entry.fence_value);
|
||||
|
||||
// Copy from readback buffer to local
|
||||
void* readback_buffer_map;
|
||||
D3D12_RANGE read_range = { sizeof(UINT64) * index, sizeof(UINT64) * (index + 1) };
|
||||
CheckHR(m_query_readback_buffer->Map(0, &read_range, &readback_buffer_map));
|
||||
|
||||
UINT64 result;
|
||||
memcpy(&result, reinterpret_cast<u8*>(readback_buffer_map) + sizeof(UINT64) * index, sizeof(UINT64));
|
||||
|
||||
D3D12_RANGE empty_range = {};
|
||||
m_query_readback_buffer->Unmap(0, &empty_range);
|
||||
|
||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||
m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight());
|
||||
|
||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||
m_query_count--;
|
||||
}
|
||||
|
||||
UINT64 PerfQuery::FindLastPendingFenceValue() const
|
||||
{
|
||||
UINT64 last_fence_value = 0;
|
||||
u32 query_count = m_query_count;
|
||||
u32 query_read_pos = m_query_read_pos;
|
||||
while (query_count > 0)
|
||||
{
|
||||
const ActiveQuery& entry = m_query_buffer[query_read_pos];
|
||||
|
||||
last_fence_value = std::max(entry.fence_value, last_fence_value);
|
||||
query_read_pos = (query_read_pos + 1) % m_query_buffer.size();
|
||||
query_count--;
|
||||
}
|
||||
|
||||
return last_fence_value;
|
||||
}
|
||||
|
||||
void PerfQuery::FlushResults()
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
if (IsFlushed())
|
||||
return;
|
||||
|
||||
// Find the fence value we have to wait for.
|
||||
UINT64 last_fence_value = FindLastPendingFenceValue();
|
||||
if (last_fence_value == m_next_fence_value)
|
||||
D3D::command_list_mgr->ExecuteQueuedWork(false);
|
||||
|
||||
// Wait for all queries to be resolved.
|
||||
D3D::command_list_mgr->WaitOnCPUForFence(m_tracking_fence, last_fence_value);
|
||||
|
||||
// Map the whole readback buffer. Shouldn't have much overhead, and saves taking the wrapped-around cases into consideration.
|
||||
void* readback_buffer_map;
|
||||
D3D12_RANGE read_range = { 0, QUERY_READBACK_BUFFER_SIZE };
|
||||
CheckHR(m_query_readback_buffer->Map(0, &read_range, &readback_buffer_map));
|
||||
|
||||
// Read all pending queries.
|
||||
while (m_query_count > 0)
|
||||
{
|
||||
ActiveQuery& entry = m_query_buffer[m_query_read_pos];
|
||||
|
||||
UINT64 result;
|
||||
memcpy(&result, reinterpret_cast<u8*>(readback_buffer_map) + sizeof(UINT64) * m_query_read_pos, sizeof(UINT64));
|
||||
|
||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||
m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight());
|
||||
|
||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||
m_query_count--;
|
||||
}
|
||||
|
||||
D3D12_RANGE write_range = {};
|
||||
m_query_readback_buffer->Unmap(0, &write_range);
|
||||
}
|
||||
|
||||
void PerfQuery::WeakFlush()
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
UINT64 completed_fence = m_tracking_fence->GetCompletedValue();
|
||||
|
||||
while (!IsFlushed())
|
||||
{
|
||||
ActiveQuery& entry = m_query_buffer[m_query_read_pos];
|
||||
if (entry.fence_value > completed_fence)
|
||||
break;
|
||||
|
||||
FlushOne();
|
||||
}
|
||||
}
|
||||
|
||||
bool PerfQuery::IsFlushed() const
|
||||
{
|
||||
//D3D12TODO: Add implementation
|
||||
return true;
|
||||
return m_query_count == 0;
|
||||
}
|
||||
|
||||
void PerfQuery::QueueFenceCallback(void* owning_object, UINT64 fence_value)
|
||||
{
|
||||
PerfQuery* owning_perf_query = static_cast<PerfQuery*>(owning_object);
|
||||
owning_perf_query->QueueFence(fence_value);
|
||||
}
|
||||
|
||||
void PerfQuery::QueueFence(UINT64 fence_value)
|
||||
{
|
||||
m_next_fence_value = fence_value + 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <d3d12.h>
|
||||
|
||||
#include "VideoCommon/PerfQueryBase.h"
|
||||
|
||||
@ -27,20 +28,33 @@ public:
|
||||
private:
|
||||
struct ActiveQuery
|
||||
{
|
||||
//ID3D11Query* query;
|
||||
PerfQueryGroup query_type;
|
||||
UINT64 fence_value;
|
||||
};
|
||||
|
||||
void WeakFlush();
|
||||
|
||||
// Find the last fence value of all pending queries.
|
||||
UINT64 FindLastPendingFenceValue() const;
|
||||
|
||||
// Only use when non-empty
|
||||
void FlushOne();
|
||||
|
||||
// when testing in SMS: 64 was too small, 128 was ok
|
||||
static const int s_perf_query_buffer_size = 512;
|
||||
static void QueueFenceCallback(void* owning_object, UINT64 fence_value);
|
||||
void QueueFence(UINT64 fence_value);
|
||||
|
||||
std::array<ActiveQuery, s_perf_query_buffer_size> m_query_buffer;
|
||||
// when testing in SMS: 64 was too small, 128 was ok
|
||||
static constexpr size_t PERF_QUERY_BUFFER_SIZE = 512;
|
||||
static constexpr size_t QUERY_READBACK_BUFFER_SIZE = PERF_QUERY_BUFFER_SIZE * sizeof(UINT64);
|
||||
|
||||
std::array<ActiveQuery, PERF_QUERY_BUFFER_SIZE> m_query_buffer;
|
||||
int m_query_read_pos = 0;
|
||||
|
||||
ID3D12QueryHeap* m_query_heap = nullptr;
|
||||
ID3D12Resource* m_query_readback_buffer = nullptr;
|
||||
|
||||
ID3D12Fence* m_tracking_fence = nullptr;
|
||||
UINT64 m_next_fence_value = 0;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -158,7 +158,11 @@ void VertexManager::vFlush(bool use_dst_alpha)
|
||||
// D3D12TODO: Decide right threshold for drawCountSinceAsyncFlush at runtime depending on
|
||||
// amount of stall measured in AccessEFB.
|
||||
|
||||
if (D3D::command_list_mgr->m_draws_since_last_execution > 100 && D3D::command_list_mgr->m_cpu_access_last_frame)
|
||||
// We can't do this with perf queries enabled since it can leave queries open.
|
||||
|
||||
if (D3D::command_list_mgr->m_cpu_access_last_frame &&
|
||||
D3D::command_list_mgr->m_draws_since_last_execution > 100 &&
|
||||
!PerfQueryBase::ShouldEmulate())
|
||||
{
|
||||
D3D::command_list_mgr->m_draws_since_last_execution = 0;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user