diff --git a/Source/Core/DolphinWX/DolphinWX.vcxproj b/Source/Core/DolphinWX/DolphinWX.vcxproj index cea95b666a..f7d158b1cd 100644 --- a/Source/Core/DolphinWX/DolphinWX.vcxproj +++ b/Source/Core/DolphinWX/DolphinWX.vcxproj @@ -233,6 +233,9 @@ {3de9ee35-3e91-4f27-a014-2866ad8c3fe3} + + {570215b7-e32f-4438-95ae-c8d955f9fca3} + diff --git a/Source/Core/DolphinWX/Main.cpp b/Source/Core/DolphinWX/Main.cpp index 30ae7ea5ac..a852f76777 100644 --- a/Source/Core/DolphinWX/Main.cpp +++ b/Source/Core/DolphinWX/Main.cpp @@ -532,7 +532,7 @@ void Host_ConnectWiimote(int wm_idx, bool connect) void Host_ShowVideoConfig(void* parent, const std::string& backend_name, const std::string& config_name) { - if (backend_name == "Direct3D" || backend_name == "OpenGL") + if (backend_name == "Direct3D" || backend_name == "Direct3D 12 (experimental)" || backend_name == "OpenGL") { VideoConfigDiag diag((wxWindow*)parent, backend_name, config_name); diag.ShowModal(); diff --git a/Source/Core/VideoBackends/D3D12/BoundingBox.cpp b/Source/Core/VideoBackends/D3D12/BoundingBox.cpp new file mode 100644 index 0000000000..d847924cf2 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/BoundingBox.cpp @@ -0,0 +1,44 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/CommonTypes.h" +#include "Common/MsgHandler.h" +#include "VideoBackends/D3D12/BoundingBox.h" +#include "VideoCommon/VideoConfig.h" + +// D3D12TODO: Support bounding box behavior. +namespace DX12 +{ + +ID3D11UnorderedAccessView* BBox::GetUAV() +{ + // D3D12TODO: Implement this; + return nullptr; +} + +void BBox::Init() +{ + if (g_ActiveConfig.backend_info.bSupportsBBox) + { + // D3D12TODO: Implement this; + } +} + +void BBox::Shutdown() +{ + // D3D12TODO: Implement this; +} + +void BBox::Set(int index, int value) +{ + // D3D12TODO: Implement this; +} + +int BBox::Get(int index) +{ + // D3D12TODO: Implement this; + return 0; +} + +}; diff --git a/Source/Core/VideoBackends/D3D12/BoundingBox.h b/Source/Core/VideoBackends/D3D12/BoundingBox.h new file mode 100644 index 0000000000..05126810db --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/BoundingBox.h @@ -0,0 +1,22 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once +#include "VideoBackends/D3D12/D3DBase.h" + +namespace DX12 +{ + +class BBox +{ +public: + static ID3D11UnorderedAccessView* GetUAV(); + static void Init(); + static void Shutdown(); + + static void Set(int index, int value); + static int Get(int index); +}; + +}; diff --git a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj new file mode 100644 index 0000000000..b44179a441 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj @@ -0,0 +1,109 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {570215B7-E32F-4438-95AE-C8D955F9FCA3} + 10.0.10240.0 + + + + StaticLibrary + v140 + Unicode + + + true + + + false + + + + + + + + + + + + + NotUsing + + + + + + NotUsing + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {3de9ee35-3e91-4f27-a014-2866ad8c3fe3} + + + + + + \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters new file mode 100644 index 0000000000..83038e5a7b --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3D12.vcxproj.filters @@ -0,0 +1,152 @@ + + + + + {3683d29b-19f6-4e7a-803f-4ac70b1d49fd} + + + {ae700f7e-33c8-45b5-b7ee-a0ded3630549} + + + + + D3D12 + + + D3D12 + + + D3D12 + + + D3D12 + + + D3D12 + + + Render + + + Render + + + Render + + + Render + + + Render + + + Render + + + Render + + + Render + + + Render + + + + Render + + + D3D12 + + + D3D12 + + + D3D12 + + + Render + + + Render + + + Render + + + D3D12 + + + + + D3D12 + + + D3D12 + + + D3D12 + + + D3D12 + + + D3D12 + + + Render + + + Render + + + Render + + + Render + + + Render + + + Render + + + Render + + + Render + + + Render + + + + Render + + + D3D12 + + + D3D12 + + + Render + + + D3D12 + + + Render + + + Render + + + Render + + + D3D12 + + + \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.cpp b/Source/Core/VideoBackends/D3D12/D3DBase.cpp new file mode 100644 index 0000000000..54565ec273 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DBase.cpp @@ -0,0 +1,970 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Common/CommonTypes.h" +#include "Common/MsgHandler.h" +#include "Common/StringUtil.h" +#include "Common/Logging/Log.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DTexture.h" +#include "VideoCommon/VideoConfig.h" + +static const unsigned int SWAP_CHAIN_BUFFER_COUNT = 4; + +namespace DX12 +{ + +// d3dcompiler_*.dll exports +static HINSTANCE s_d3d_compiler_dll = nullptr; +static int s_d3d_compiler_dll_ref = 0; +D3DREFLECT d3d_reflect = nullptr; +D3DCREATEBLOB d3d_create_blob = nullptr; +pD3DCompile d3d_compile = nullptr; + +// dxgi.dll exports +static HINSTANCE s_dxgi_dll = nullptr; +static int s_dxgi_dll_ref = 0; +CREATEDXGIFACTORY create_dxgi_factory = nullptr; + +// d3d12.dll exports +static HINSTANCE s_d3d12_dll = nullptr; +static int s_d3d12_dll_ref = 0; +D3D12CREATEDEVICE d3d12_create_device = nullptr; +D3D12SERIALIZEROOTSIGNATURE d3d12_serialize_root_signature = nullptr; +D3D12GETDEBUGINTERFACE d3d12_get_debug_interface = nullptr; + +namespace D3D +{ + +// Begin extern'd variables. +ID3D12Device* device12 = nullptr; + +ID3D12CommandQueue* command_queue = nullptr; +D3DCommandListManager* command_list_mgr = nullptr; +ID3D12GraphicsCommandList* current_command_list = nullptr; +ID3D12RootSignature* default_root_signature = nullptr; + +D3D12_CPU_DESCRIPTOR_HANDLE null_srv_cpu = {}; +D3D12_CPU_DESCRIPTOR_HANDLE null_srv_cpu_shadow = {}; + +unsigned int resource_descriptor_size = 0; +unsigned int sampler_descriptor_size = 0; +D3DDescriptorHeapManager* gpu_descriptor_heap_mgr = nullptr; +D3DDescriptorHeapManager* sampler_descriptor_heap_mgr = nullptr; +D3DDescriptorHeapManager* dsv_descriptor_heap_mgr = nullptr; +D3DDescriptorHeapManager* rtv_descriptor_heap_mgr = nullptr; +std::array gpu_descriptor_heaps; + +HWND hWnd; +// End extern'd variables. + +static IDXGISwapChain* s_swap_chain = nullptr; +static unsigned int s_monitor_refresh_rate = 0; + +static LARGE_INTEGER s_qpc_frequency; + +static ID3D12DebugDevice* s_debug_device12 = nullptr; + +static D3D_FEATURE_LEVEL s_feat_level; +static D3DTexture2D* s_backbuf[SWAP_CHAIN_BUFFER_COUNT]; +static unsigned int s_current_back_buf = 0; +static unsigned int s_xres = 0; +static unsigned int s_yres = 0; +static bool s_frame_in_progress = false; + +static std::vector s_aa_modes; // supported AA modes of the current adapter +static const D3D_FEATURE_LEVEL s_supported_feature_levels[] = { + D3D_FEATURE_LEVEL_11_0 +}; + +HRESULT LoadDXGI() +{ + if (s_dxgi_dll_ref++ > 0) + return S_OK; + + if (s_dxgi_dll) + return S_OK; + + s_dxgi_dll = LoadLibraryA("dxgi.dll"); + if (!s_dxgi_dll) + { + MessageBoxA(nullptr, "Failed to load dxgi.dll", "Critical error", MB_OK | MB_ICONERROR); + --s_dxgi_dll_ref; + return E_FAIL; + } + create_dxgi_factory = (CREATEDXGIFACTORY)GetProcAddress(s_dxgi_dll, "CreateDXGIFactory"); + + if (create_dxgi_factory == nullptr) + MessageBoxA(nullptr, "GetProcAddress failed for CreateDXGIFactory!", "Critical error", MB_OK | MB_ICONERROR); + + return S_OK; +} + +HRESULT LoadD3D() +{ + if (s_d3d12_dll_ref++ > 0) + return S_OK; + + s_d3d12_dll = LoadLibraryA("d3d12.dll"); + if (!s_d3d12_dll) + { + MessageBoxA(nullptr, "Failed to load d3d12.dll", "Critical error", MB_OK | MB_ICONERROR); + --s_d3d12_dll_ref; + return E_FAIL; + } + + d3d12_create_device = (D3D12CREATEDEVICE)GetProcAddress(s_d3d12_dll, "D3D12CreateDevice"); + if (d3d12_create_device == nullptr) + { + MessageBoxA(nullptr, "GetProcAddress failed for D3D12CreateDevice!", "Critical error", MB_OK | MB_ICONERROR); + return E_FAIL; + } + + d3d12_serialize_root_signature = (D3D12SERIALIZEROOTSIGNATURE)GetProcAddress(s_d3d12_dll, "D3D12SerializeRootSignature"); + if (d3d12_serialize_root_signature == nullptr) + { + MessageBoxA(nullptr, "GetProcAddress failed for D3D12SerializeRootSignature!", "Critical error", MB_OK | MB_ICONERROR); + return E_FAIL; + } + + d3d12_get_debug_interface = (D3D12GETDEBUGINTERFACE)GetProcAddress(s_d3d12_dll, "D3D12GetDebugInterface"); + if (d3d12_get_debug_interface == nullptr) + { + MessageBoxA(nullptr, "GetProcAddress failed for D3D12GetDebugInterface!", "Critical error", MB_OK | MB_ICONERROR); + return E_FAIL; + } + + return S_OK; +} + +HRESULT LoadD3DCompiler() +{ + if (s_d3d_compiler_dll_ref++ > 0) + return S_OK; + + if (s_d3d_compiler_dll) + return S_OK; + + // try to load D3DCompiler first to check whether we have proper runtime support + // try to use the dll the backend was compiled against first - don't bother about debug runtimes + s_d3d_compiler_dll = LoadLibraryA(D3DCOMPILER_DLL_A); + if (!s_d3d_compiler_dll) + { + // if that fails, use the dll which should be available in every SDK which officially supports DX12. + s_d3d_compiler_dll = LoadLibraryA("D3DCompiler_42.dll"); + if (!s_d3d_compiler_dll) + { + MessageBoxA(nullptr, "Failed to load D3DCompiler_42.dll, update your DX12 runtime, please", "Critical error", MB_OK | MB_ICONERROR); + return E_FAIL; + } + else + { + NOTICE_LOG(VIDEO, "Successfully loaded D3DCompiler_42.dll. If you're having trouble, try updating your DX runtime first."); + } + } + + d3d_reflect = (D3DREFLECT) GetProcAddress(s_d3d_compiler_dll, "D3DReflect"); + if (d3d_reflect == nullptr) + MessageBoxA(nullptr, "GetProcAddress failed for D3DReflect!", "Critical error", MB_OK | MB_ICONERROR); + + d3d_create_blob = (D3DCREATEBLOB)GetProcAddress(s_d3d_compiler_dll, "D3DCreateBlob"); + if (d3d_create_blob == nullptr) + MessageBoxA(nullptr, "GetProcAddress failed for D3DCreateBlob!", "Critical error", MB_OK | MB_ICONERROR); + + d3d_compile = (pD3DCompile) GetProcAddress(s_d3d_compiler_dll, "D3DCompile"); + if (d3d_compile == nullptr) + MessageBoxA(nullptr, "GetProcAddress failed for D3DCompile!", "Critical error", MB_OK | MB_ICONERROR); + + return S_OK; +} + +void UnloadDXGI() +{ + if (!s_dxgi_dll_ref) + return; + + if (--s_dxgi_dll_ref != 0) + return; + + if (s_dxgi_dll) + FreeLibrary(s_dxgi_dll); + + s_dxgi_dll = nullptr; + create_dxgi_factory = nullptr; +} + +void UnloadD3D() +{ + if (!s_d3d12_dll_ref) + return; + + if (--s_d3d12_dll_ref != 0) + return; + + if (s_d3d12_dll) + FreeLibrary(s_d3d12_dll); + + s_d3d12_dll = nullptr; + d3d12_create_device = nullptr; + d3d12_serialize_root_signature = nullptr; +} + +void UnloadD3DCompiler() +{ + if (!s_d3d_compiler_dll_ref) + return; + + if (--s_d3d_compiler_dll_ref != 0) + return; + + if (s_d3d_compiler_dll) + FreeLibrary(s_d3d_compiler_dll); + + s_d3d_compiler_dll = nullptr; + d3d_compile = nullptr; + d3d_create_blob = nullptr; + d3d_reflect = nullptr; +} + +bool AlertUserIfSelectedAdapterDoesNotSupportD3D12() +{ + HRESULT hr = LoadDXGI(); + if (SUCCEEDED(hr)) + { + hr = LoadD3D(); + } + + if (FAILED(hr)) + { + // LoadDXGI / LoadD3D display a specific error message, + // no need to do that here. + return false; + } + + IDXGIFactory* factory = nullptr; + IDXGIAdapter* adapter = nullptr; + ID3D12Device* device = nullptr; + + if (SUCCEEDED(hr)) + { + hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); + } + + if (SUCCEEDED(hr)) + { + hr = factory->EnumAdapters(g_ActiveConfig.iAdapter, &adapter); + } + + if (SUCCEEDED(hr)) + { + hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device)); + + SAFE_RELEASE(device); + SAFE_RELEASE(adapter); + SAFE_RELEASE(factory); + + if (FAILED(hr)) + { + UnloadD3D(); + UnloadDXGI(); + MessageBoxA(nullptr, "Failed to create a D3D12 device on the selected adapter.\n\nPlease make sure it supports Direct3D 12, and that your graphics drivers are up-to-date.", "Critical error", MB_OK | MB_ICONERROR); + return false; + } + + // If succeeded, leave DXGI and D3D libraries loaded since we'll use them in Create(). + return true; + } + + // DXGI failed to create factory/enumerate adapter. This should be very uncommon. + MessageBoxA(nullptr, "Failed to create enumerate selected adapter. Please select a different graphics adapter.", "Critical error", MB_OK | MB_ICONERROR); + SAFE_RELEASE(adapter); + SAFE_RELEASE(factory); + + UnloadD3D(); + UnloadDXGI(); + return false; +} + +std::vector EnumAAModes(IDXGIAdapter* adapter) +{ + std::vector aa_modes; + + bool d3d12_supported = AlertUserIfSelectedAdapterDoesNotSupportD3D12(); + + if (!d3d12_supported) + return aa_modes; + + ID3D12Device* device12 = nullptr; + d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); + + if (device12) + { + for (int samples = 0; samples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; ++samples) + { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS multisample_quality_levels = {}; + multisample_quality_levels.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + multisample_quality_levels.SampleCount = samples; + + device12->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &multisample_quality_levels, sizeof(multisample_quality_levels)); + + DXGI_SAMPLE_DESC desc; + desc.Count = samples; + desc.Quality = 0; + + if (multisample_quality_levels.NumQualityLevels > 0) + { + aa_modes.push_back(desc); + } + } + + device12->Release(); + } + + return aa_modes; +} + +D3D_FEATURE_LEVEL GetFeatureLevel(IDXGIAdapter* adapter) +{ + return D3D_FEATURE_LEVEL_11_0; +} + +HRESULT Create(HWND wnd) +{ + hWnd = wnd; + HRESULT hr; + + RECT client; + GetClientRect(hWnd, &client); + s_xres = client.right - client.left; + s_yres = client.bottom - client.top; + + hr = LoadDXGI(); + if (SUCCEEDED(hr)) + hr = LoadD3D(); + + if (SUCCEEDED(hr)) + hr = LoadD3DCompiler(); + + if (FAILED(hr)) + { + UnloadDXGI(); + UnloadD3D(); + UnloadD3DCompiler(); + return hr; + } + + IDXGIFactory* factory; + IDXGIAdapter* adapter; + IDXGIOutput* output; + hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); + if (FAILED(hr)) + MessageBox(wnd, _T("Failed to create IDXGIFactory object"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); + + hr = factory->EnumAdapters(g_ActiveConfig.iAdapter, &adapter); + if (FAILED(hr)) + { + // try using the first one + hr = factory->EnumAdapters(0, &adapter); + if (FAILED(hr)) + MessageBox(wnd, _T("Failed to enumerate adapters"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); + } + + // TODO: Make this configurable + hr = adapter->EnumOutputs(0, &output); + if (FAILED(hr)) + { + // try using the first one + IDXGIAdapter* firstadapter; + hr = factory->EnumAdapters(0, &firstadapter); + if (!FAILED(hr)) + hr = firstadapter->EnumOutputs(0, &output); + if (FAILED(hr)) + MessageBox(wnd, + _T("Failed to enumerate outputs!\n") + _T("This usually happens when you've set your video adapter to the Nvidia GPU in an Optimus-equipped system.\n") + _T("Set Dolphin to use the high-performance graphics in Nvidia's drivers instead and leave Dolphin's video adapter set to the Intel GPU."), + _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); + + SAFE_RELEASE(firstadapter); + } + + // get supported AA modes + s_aa_modes = EnumAAModes(adapter); + + if (std::find_if( + s_aa_modes.begin(), + s_aa_modes.end(), + [](const DXGI_SAMPLE_DESC& desc) {return desc.Count == g_Config.iMultisamples; } + ) == s_aa_modes.end()) + { + g_Config.iMultisamples = 1; + UpdateActiveConfig(); + } + + DXGI_SWAP_CHAIN_DESC swap_chain_desc = {}; + swap_chain_desc.BufferCount = SWAP_CHAIN_BUFFER_COUNT; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.OutputWindow = wnd; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.SampleDesc.Quality = 0; + swap_chain_desc.Windowed = true; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + swap_chain_desc.Flags = 0; + + swap_chain_desc.BufferDesc.Width = s_xres; + swap_chain_desc.BufferDesc.Height = s_yres; + swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swap_chain_desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; + +#if defined(_DEBUG) || defined(DEBUGFAST) + // Creating debug devices can sometimes fail if the user doesn't have the correct + // version of the DirectX SDK. If it does, simply fallback to a non-debug device. + { + if (SUCCEEDED(hr)) + { + ID3D12Debug* debug_controller; + hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); + if (SUCCEEDED(hr)) + { + debug_controller->EnableDebugLayer(); + debug_controller->Release(); + } + else + { + MessageBox(wnd, _T("Failed to initialize Direct3D debug layer, please make sure it is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); + } + + hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); + + s_feat_level = D3D_FEATURE_LEVEL_11_0; + } + } + + if (FAILED(hr)) +#endif + { + if (SUCCEEDED(hr)) + { +#ifdef USE_D3D12_DEBUG_LAYER + ID3D12Debug* debug_controller; + hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); + if (SUCCEEDED(hr)) + { + debug_controller->EnableDebugLayer(); + debug_controller->Release(); + } + else + { + MessageBox(wnd, _T("Failed to initialize Direct3D debug layer."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); + } +#endif + hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); + + s_feat_level = D3D_FEATURE_LEVEL_11_0; + } + } + + if (SUCCEEDED(hr)) + { + D3D12_COMMAND_QUEUE_DESC command_queue_desc = { + D3D12_COMMAND_LIST_TYPE_DIRECT, // D3D12_COMMAND_LIST_TYPE Type; + 0, // INT Priority; + D3D12_COMMAND_QUEUE_FLAG_NONE, // D3D12_COMMAND_QUEUE_FLAG Flags; + 0 // UINT NodeMask; + }; + + CheckHR(device12->CreateCommandQueue(&command_queue_desc, IID_PPV_ARGS(&command_queue))); + + IDXGIFactory* factory = nullptr; + adapter->GetParent(IID_PPV_ARGS(&factory)); + + CheckHR(factory->CreateSwapChain(command_queue, &swap_chain_desc, &s_swap_chain)); + + s_current_back_buf = 0; + + factory->Release(); + } + + if (SUCCEEDED(hr)) + { + // Query the monitor refresh rate, to ensure proper Present throttling behavior. + DEVMODE dev_mode; + memset(&dev_mode, 0, sizeof(DEVMODE)); + dev_mode.dmSize = sizeof(DEVMODE); + dev_mode.dmDriverExtra = 0; + + if (EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode) == 0) + { + // If EnumDisplaySettings fails, assume monitor refresh rate of 60 Hz. + s_monitor_refresh_rate = 60; + } + else + { + s_monitor_refresh_rate = dev_mode.dmDisplayFrequency; + } + } + + if (FAILED(hr)) + { + MessageBox(wnd, _T("Failed to initialize Direct3D.\nMake sure your video card supports Direct3D 12 and your drivers are up-to-date."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); + SAFE_RELEASE(s_swap_chain); + return E_FAIL; + } + + ID3D12InfoQueue* info_queue = nullptr; + if (SUCCEEDED(device12->QueryInterface(&info_queue))) + { + CheckHR(info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE)); + CheckHR(info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE)); + + D3D12_INFO_QUEUE_FILTER filter = {}; + D3D12_MESSAGE_ID id_list[] = { + D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_DEPTHSTENCILVIEW_NOT_SET, // Benign. + D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, // Benign. + D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, // Benign. + D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, // Benign. Probably. + D3D12_MESSAGE_ID_INVALID_SUBRESOURCE_STATE, + D3D12_MESSAGE_ID_MAP_INVALID_NULLRANGE, // Benign. + D3D12_MESSAGE_ID_EXECUTECOMMANDLISTS_GPU_WRITTEN_READBACK_RESOURCE_MAPPED, // Benign. + D3D12_MESSAGE_ID_RESOURCE_BARRIER_BEFORE_AFTER_MISMATCH // Benign. Probably. + }; + filter.DenyList.NumIDs = ARRAYSIZE(id_list); + filter.DenyList.pIDList = id_list; + info_queue->PushStorageFilter(&filter); + + info_queue->Release(); + + // Used at Close time to report live objects. + CheckHR(device12->QueryInterface(&s_debug_device12)); + } + + // prevent DXGI from responding to Alt+Enter, unfortunately DXGI_MWA_NO_ALT_ENTER + // does not work so we disable all monitoring of window messages. However this + // may make it more difficult for DXGI to handle display mode changes. + hr = factory->MakeWindowAssociation(wnd, DXGI_MWA_NO_WINDOW_CHANGES); + if (FAILED(hr)) + MessageBox(wnd, _T("Failed to associate the window"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); + + SAFE_RELEASE(factory); + SAFE_RELEASE(output); + SAFE_RELEASE(adapter) + + CreateDescriptorHeaps(); + CreateRootSignatures(); + + command_list_mgr = new D3DCommandListManager( + D3D12_COMMAND_LIST_TYPE_DIRECT, + device12, + command_queue + ); + + command_list_mgr->GetCommandList(¤t_command_list); + command_list_mgr->SetInitialCommandListState(); + + for (UINT i = 0; i < SWAP_CHAIN_BUFFER_COUNT; i++) + { + ID3D12Resource* buf12 = nullptr; + hr = s_swap_chain->GetBuffer(i, IID_PPV_ARGS(&buf12)); + + CHECK(SUCCEEDED(hr), "Retrieve back buffer texture"); + + s_backbuf[i] = new D3DTexture2D(buf12, + D3D11_BIND_RENDER_TARGET, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + false, + D3D12_RESOURCE_STATE_PRESENT // Swap Chain back buffers start out in D3D12_RESOURCE_STATE_PRESENT. + ); + + SAFE_RELEASE(buf12); + SetDebugObjectName12(s_backbuf[i]->GetTex12(), "backbuffer texture"); + } + + s_backbuf[s_current_back_buf]->TransitionToResourceState(current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + current_command_list->OMSetRenderTargets(1, &s_backbuf[s_current_back_buf]->GetRTV12(), FALSE, nullptr); + + QueryPerformanceFrequency(&s_qpc_frequency); + + return S_OK; +} + +void CreateDescriptorHeaps() +{ + // Create D3D12 GPU and CPU descriptor heaps. + + { + D3D12_DESCRIPTOR_HEAP_DESC gpu_descriptor_heap_desc = {}; + gpu_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + gpu_descriptor_heap_desc.NumDescriptors = 500000; + gpu_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + + gpu_descriptor_heap_mgr = new D3DDescriptorHeapManager(&gpu_descriptor_heap_desc, device12, 50000); + + gpu_descriptor_heaps[0] = gpu_descriptor_heap_mgr->GetDescriptorHeap(); + + D3D12_CPU_DESCRIPTOR_HANDLE descriptor_heap_cpu_base = gpu_descriptor_heap_mgr->GetDescriptorHeap()->GetCPUDescriptorHandleForHeapStart(); + + resource_descriptor_size = device12->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + sampler_descriptor_size = device12->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + D3D12_GPU_DESCRIPTOR_HANDLE null_srv_gpu = {}; + gpu_descriptor_heap_mgr->Allocate(&null_srv_cpu, &null_srv_gpu, &null_srv_cpu_shadow); + + D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {}; + null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + null_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + device12->CreateShaderResourceView(NULL, &null_srv_desc, null_srv_cpu); + + for (UINT i = 0; i < 500000; i++) + { + // D3D12TODO: Make paving of descriptor heap optional. + + D3D12_CPU_DESCRIPTOR_HANDLE destination_descriptor = {}; + destination_descriptor.ptr = descriptor_heap_cpu_base.ptr + i * resource_descriptor_size; + + device12->CreateShaderResourceView(NULL, &null_srv_desc, destination_descriptor); + } + } + + { + D3D12_DESCRIPTOR_HEAP_DESC sampler_descriptor_heap_desc = {}; + sampler_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + sampler_descriptor_heap_desc.NumDescriptors = 2000; + sampler_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + + sampler_descriptor_heap_mgr = new D3DDescriptorHeapManager(&sampler_descriptor_heap_desc, device12); + + gpu_descriptor_heaps[1] = sampler_descriptor_heap_mgr->GetDescriptorHeap(); + } + + { + D3D12_DESCRIPTOR_HEAP_DESC dsv_descriptor_heap_desc = {}; + dsv_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + dsv_descriptor_heap_desc.NumDescriptors = 2000; + dsv_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; + + dsv_descriptor_heap_mgr = new D3DDescriptorHeapManager(&dsv_descriptor_heap_desc, device12); + } + + { + // D3D12TODO: Temporary workaround.. really need to properly suballocate out of render target heap. + D3D12_DESCRIPTOR_HEAP_DESC rtv_descriptor_heap_desc = {}; + rtv_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + rtv_descriptor_heap_desc.NumDescriptors = 1000000; + rtv_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + + rtv_descriptor_heap_mgr = new D3DDescriptorHeapManager(&rtv_descriptor_heap_desc, device12); + } +} + +void CreateRootSignatures() +{ + D3D12_DESCRIPTOR_RANGE desc_range_srv = { + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, // D3D12_DESCRIPTOR_RANGE_TYPE RangeType; + 8, // UINT NumDescriptors; + 0, // UINT BaseShaderRegister; + 0, // UINT RegisterSpace; + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND // UINT OffsetInDescriptorsFromTableStart; + }; + + D3D12_DESCRIPTOR_RANGE desc_range_sampler = { + D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, // D3D12_DESCRIPTOR_RANGE_TYPE RangeType; + 8, // UINT NumDescriptors; + 0, // UINT BaseShaderRegister; + 0, // UINT RegisterSpace; + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND // UINT OffsetInDescriptorsFromTableStart; + }; + + D3D12_ROOT_PARAMETER root_parameters[6]; + + root_parameters[DESCRIPTOR_TABLE_PS_SRV].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_parameters[DESCRIPTOR_TABLE_PS_SRV].DescriptorTable.NumDescriptorRanges = 1; + root_parameters[DESCRIPTOR_TABLE_PS_SRV].DescriptorTable.pDescriptorRanges = &desc_range_srv; + root_parameters[DESCRIPTOR_TABLE_PS_SRV].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + root_parameters[DESCRIPTOR_TABLE_PS_SAMPLER].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_parameters[DESCRIPTOR_TABLE_PS_SAMPLER].DescriptorTable.NumDescriptorRanges = 1; + root_parameters[DESCRIPTOR_TABLE_PS_SAMPLER].DescriptorTable.pDescriptorRanges = &desc_range_sampler; + root_parameters[DESCRIPTOR_TABLE_PS_SAMPLER].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + root_parameters[DESCRIPTOR_TABLE_GS_CBV].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + root_parameters[DESCRIPTOR_TABLE_GS_CBV].Descriptor.RegisterSpace = 0; + root_parameters[DESCRIPTOR_TABLE_GS_CBV].Descriptor.ShaderRegister = 0; + root_parameters[DESCRIPTOR_TABLE_GS_CBV].ShaderVisibility = D3D12_SHADER_VISIBILITY_GEOMETRY; + + root_parameters[DESCRIPTOR_TABLE_VS_CBV].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + root_parameters[DESCRIPTOR_TABLE_VS_CBV].Descriptor.RegisterSpace = 0; + root_parameters[DESCRIPTOR_TABLE_VS_CBV].Descriptor.ShaderRegister = 0; + root_parameters[DESCRIPTOR_TABLE_VS_CBV].ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; + + root_parameters[DESCRIPTOR_TABLE_PS_CBVONE].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + root_parameters[DESCRIPTOR_TABLE_PS_CBVONE].Descriptor.RegisterSpace = 0; + root_parameters[DESCRIPTOR_TABLE_PS_CBVONE].Descriptor.ShaderRegister = 0; + root_parameters[DESCRIPTOR_TABLE_PS_CBVONE].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].Descriptor.RegisterSpace = 0; + root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].Descriptor.ShaderRegister = 1; + root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + + // D3D12TODO: Add bounding box UAV to root signature. + + D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {}; + root_signature_desc.pParameters = root_parameters; + root_signature_desc.Flags = + D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS; + + root_signature_desc.NumParameters = ARRAYSIZE(root_parameters); + + ID3DBlob* text_root_signature_blob; + ID3DBlob* text_root_signature_error_blob; + + CheckHR(d3d12_serialize_root_signature(&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1, &text_root_signature_blob, &text_root_signature_error_blob)); + + CheckHR(D3D::device12->CreateRootSignature(0, text_root_signature_blob->GetBufferPointer(), text_root_signature_blob->GetBufferSize(), IID_PPV_ARGS(&default_root_signature))); +} + +void WaitForOutstandingRenderingToComplete() +{ + command_list_mgr->ClearQueueAndWaitForCompletionOfInflightWork(); +} + +void Close() +{ + // we can't release the swapchain while in fullscreen. + s_swap_chain->SetFullscreenState(false, nullptr); + + // Release all back buffer references + for (UINT i = 0; i < ARRAYSIZE(s_backbuf); i++) + { + SAFE_RELEASE(s_backbuf[i]); + } + + D3D::CleanupPersistentD3DTextureResources(); + + command_list_mgr->ImmediatelyDestroyAllResourcesScheduledForDestruction(); + + SAFE_RELEASE(s_swap_chain); + + SAFE_DELETE(command_list_mgr); + command_queue->Release(); + + default_root_signature->Release(); + + SAFE_DELETE(gpu_descriptor_heap_mgr); + SAFE_DELETE(sampler_descriptor_heap_mgr); + SAFE_DELETE(rtv_descriptor_heap_mgr); + SAFE_DELETE(dsv_descriptor_heap_mgr); + + ULONG remaining_references = device12->Release(); + if ((!s_debug_device12 && remaining_references) || (s_debug_device12 && remaining_references > 1)) + { + ERROR_LOG(VIDEO, "Unreleased D3D12 references: %i.", remaining_references); + } + else + { + NOTICE_LOG(VIDEO, "Successfully released all D3D12 device references!"); + } + +#if defined(_DEBUG) || defined(DEBUGFAST) + if (s_debug_device12) + { + --remaining_references; // the debug interface increases the refcount of the device, subtract that. + if (remaining_references) + { + // print out alive objects, but only if we actually have pending references + // note this will also print out internal live objects to the debug console + s_debug_device12->ReportLiveDeviceObjects(D3D12_RLDO_DETAIL); + } + SAFE_RELEASE(s_debug_device12); + } +#endif + + device12 = nullptr; + current_command_list = nullptr; + + // unload DLLs + UnloadDXGI(); + UnloadD3DCompiler(); + UnloadD3D(); +} + +const std::string VertexShaderVersionString() +{ + return "vs_5_0"; +} + +const std::string GeometryShaderVersionString() +{ + return "gs_5_0"; +} + +const std::string PixelShaderVersionString() +{ + return "ps_5_0"; +} + +D3DTexture2D* &GetBackBuffer() +{ + return s_backbuf[s_current_back_buf]; +} + +unsigned int GetBackBufferWidth() +{ + return s_xres; +} + +unsigned int GetBackBufferHeight() +{ + return s_yres; +} + +// Returns the maximum width/height of a texture. +unsigned int GetMaxTextureSize() +{ + return D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; +} + +void Reset() +{ + command_list_mgr->ExecuteQueuedWork(true); + + // release all back buffer references + for (UINT i = 0; i < ARRAYSIZE(s_backbuf); i++) + { + SAFE_RELEASE(s_backbuf[i]); + } + + D3D::command_list_mgr->ImmediatelyDestroyAllResourcesScheduledForDestruction(); + + // resize swapchain buffers + RECT client; + GetClientRect(hWnd, &client); + s_xres = client.right - client.left; + s_yres = client.bottom - client.top; + + CheckHR(s_swap_chain->ResizeBuffers(SWAP_CHAIN_BUFFER_COUNT, s_xres, s_yres, DXGI_FORMAT_R8G8B8A8_UNORM, 0)); + + // recreate back buffer textures + + HRESULT hr = S_OK; + + for (UINT i = 0; i < SWAP_CHAIN_BUFFER_COUNT; i++) + { + ID3D12Resource* buf12 = nullptr; + hr = s_swap_chain->GetBuffer(i, IID_PPV_ARGS(&buf12)); + + CHECK(SUCCEEDED(hr), "Retrieve back buffer texture"); + + s_backbuf[i] = new D3DTexture2D(buf12, + D3D11_BIND_RENDER_TARGET, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + false, + D3D12_RESOURCE_STATE_PRESENT + ); + + SAFE_RELEASE(buf12); + SetDebugObjectName12(s_backbuf[i]->GetTex12(), "backbuffer texture"); + } + + // The 'about-to-be-presented' back buffer index is always set back to '0' upon ResizeBuffers, just like + // creating a new swap chain. + s_current_back_buf = 0; + + s_backbuf[s_current_back_buf]->TransitionToResourceState(current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); +} + +bool BeginFrame() +{ + if (s_frame_in_progress) + { + PanicAlert("BeginFrame called although a frame is already in progress"); + return false; + } + s_frame_in_progress = true; + return (device12 != nullptr); +} + +void EndFrame() +{ + if (!s_frame_in_progress) + { + PanicAlert("EndFrame called although no frame is in progress"); + return; + } + s_frame_in_progress = false; +} + +void Present() +{ + // The Present function contains logic to ensure we never Present faster than Windows can + // send to the monitor. If we Present too fast, the Present call will start to block, and we'll be + // throttled - obviously not desired if vsync is disabled and the emulated CPU speed is > 100%. + + // The throttling logic ensures that we don't Present more than twice in a given monitor vsync. + // This is accomplished through timing data - there is a programmatic way to determine if a + // Present call will block, however after investigation that is not feasible here (without invasive + // workarounds), due to the fact this method does not actually call Present - we just queue a Present + // command for the background thread to dispatch. + + // The monitor refresh rate is determined in Create(). + + static LARGE_INTEGER s_last_present_qpc; + + LARGE_INTEGER current_qpc; + QueryPerformanceCounter(¤t_qpc); + + const double time_elapsed_since_last_present = static_cast(current_qpc.QuadPart - s_last_present_qpc.QuadPart) / s_qpc_frequency.QuadPart; + + unsigned int present_flags = 0; + + if (g_ActiveConfig.IsVSync() == false && + time_elapsed_since_last_present < (1.0 / static_cast(s_monitor_refresh_rate)) / 2.0 + ) + { + present_flags = DXGI_PRESENT_TEST; // Causes Present to be a no-op. + } + else + { + s_last_present_qpc = current_qpc; + + s_backbuf[s_current_back_buf]->TransitionToResourceState(current_command_list, D3D12_RESOURCE_STATE_PRESENT); + s_current_back_buf = (s_current_back_buf + 1) % SWAP_CHAIN_BUFFER_COUNT; + } + + command_list_mgr->ExecuteQueuedWorkAndPresent(s_swap_chain, g_ActiveConfig.IsVSync() ? 1 : 0, present_flags); + + command_list_mgr->m_cpu_access_last_frame = command_list_mgr->m_cpu_access_this_frame; + command_list_mgr->m_cpu_access_this_frame = false; + command_list_mgr->m_draws_since_last_execution = 0; +} + +HRESULT SetFullscreenState(bool enable_fullscreen) +{ + return S_OK; +} + +HRESULT GetFullscreenState(bool* fullscreen_state) +{ + // Fullscreen exclusive intentionally not supported in DX12 backend. No performance + // difference between it and windowed full-screen due to usage of a FLIP swap chain. + *fullscreen_state = false; + return S_OK; +} + +} // namespace D3D + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.h b/Source/Core/VideoBackends/D3D12/D3DBase.h new file mode 100644 index 0000000000..b3ae9555a1 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DBase.h @@ -0,0 +1,158 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#define USE_D3D12_QUEUED_COMMAND_LISTS + +// D3D12TODO: Support this from Graphics Settings, not require a recompile to enable. +//#define USE_D3D12_DEBUG_LAYER + +#pragma once + +#include +#include +#include +#include +#include + +#include "../../Externals/d3dx12/d3dx12.h" + +#include "Common/Common.h" +#include "Common/CommonTypes.h" +#include "Common/MsgHandler.h" + +namespace DX12 +{ + +#define SAFE_RELEASE(x) { if (x) (x)->Release(); (x) = nullptr; } +#define SAFE_DELETE(x) { delete (x); (x) = nullptr; } +#define SAFE_DELETE_ARRAY(x) { delete[] (x); (x) = nullptr; } +#define CHECK(cond, Message, ...) if (!(cond)) { __debugbreak(); PanicAlert(__FUNCTION__ " failed in %s at line %d: " Message, __FILE__, __LINE__, __VA_ARGS__); } + +// DEBUGCHECK is for high-frequency functions that we only want to check on debug builds. +#if defined(_DEBUG) || defined(DEBUGFAST) +#define DEBUGCHECK(cond, Message, ...) if (!(cond)) { PanicAlert(__FUNCTION__ " failed in %s at line %d: " Message, __FILE__, __LINE__, __VA_ARGS__); } +#else +#define DEBUGCHECK(cond, Message, ...) +#endif + +inline void CheckHR(HRESULT hr) +{ + CHECK(SUCCEEDED(hr), "Failed HRESULT."); +} + +class D3DCommandListManager; +class D3DDescriptorHeapManager; +class D3DTexture2D; + +namespace D3D +{ + +#define DESCRIPTOR_TABLE_PS_SRV 0 +#define DESCRIPTOR_TABLE_PS_SAMPLER 1 +#define DESCRIPTOR_TABLE_GS_CBV 2 +#define DESCRIPTOR_TABLE_VS_CBV 3 +// #define DESCRIPTOR_TABLE_PS_UAV 4 +#define DESCRIPTOR_TABLE_PS_CBVONE 4 +#define DESCRIPTOR_TABLE_PS_CBVTWO 5 + +HRESULT LoadDXGI(); +HRESULT LoadD3D(); +HRESULT LoadD3DCompiler(); +void UnloadDXGI(); +void UnloadD3D(); +void UnloadD3DCompiler(); + +D3D_FEATURE_LEVEL GetFeatureLevel(IDXGIAdapter* adapter); +std::vector EnumAAModes(IDXGIAdapter* adapter); + +bool AlertUserIfSelectedAdapterDoesNotSupportD3D12(); + +HRESULT Create(HWND wnd); + +void CreateDescriptorHeaps(); +void CreateRootSignatures(); + +void WaitForOutstandingRenderingToComplete(); +void Close(); + +extern ID3D12Device* device12; + +extern unsigned int resource_descriptor_size; +extern unsigned int sampler_descriptor_size; +extern D3DDescriptorHeapManager* gpu_descriptor_heap_mgr; +extern D3DDescriptorHeapManager* sampler_descriptor_heap_mgr; +extern D3DDescriptorHeapManager* dsv_descriptor_heap_mgr; +extern D3DDescriptorHeapManager* rtv_descriptor_heap_mgr; +extern std::array gpu_descriptor_heaps; + + +extern D3D12_CPU_DESCRIPTOR_HANDLE null_srv_cpu; +extern D3D12_CPU_DESCRIPTOR_HANDLE null_srv_cpu_shadow; + +extern D3DCommandListManager* command_list_mgr; +extern ID3D12GraphicsCommandList* current_command_list; + +extern ID3D12RootSignature* default_root_signature; + +extern HWND hWnd; + +void Reset(); +bool BeginFrame(); +void EndFrame(); +void Present(); + +unsigned int GetBackBufferWidth(); +unsigned int GetBackBufferHeight(); +D3DTexture2D*& GetBackBuffer(); +const std::string PixelShaderVersionString(); +const std::string GeometryShaderVersionString(); +const std::string VertexShaderVersionString(); + +unsigned int GetMaxTextureSize(); + +HRESULT SetFullscreenState(bool enable_fullscreen); +HRESULT GetFullscreenState(bool* fullscreen_state); + +// This function will assign a name to the given resource. +// The DirectX debug layer will make it easier to identify resources that way, +// e.g. when listing up all resources who have unreleased references. +static void SetDebugObjectName12(ID3D12Resource* resource, LPCSTR name) +{ + HRESULT hr = resource->SetPrivateData(WKPDID_D3DDebugObjectName, (UINT)(name ? strlen(name) : 0), name); + if (FAILED(hr)) + { + throw std::exception("Failure setting name for D3D12 object"); + } +} + +static std::string GetDebugObjectName12(ID3D12Resource* resource) +{ + std::string name; + if (resource) + { + UINT size = 0; + resource->GetPrivateData(WKPDID_D3DDebugObjectName, &size, nullptr); //get required size + name.resize(size); + resource->GetPrivateData(WKPDID_D3DDebugObjectName, &size, const_cast(name.data())); + } +} + +} // namespace D3D + +using CREATEDXGIFACTORY = HRESULT(WINAPI*)(REFIID, void**); +extern CREATEDXGIFACTORY create_dxgi_factory; + +using D3D12CREATEDEVICE = HRESULT(WINAPI*)(IUnknown*, D3D_FEATURE_LEVEL, REFIID, void**); +using D3D12SERIALIZEROOTSIGNATURE = HRESULT(WINAPI*)(const D3D12_ROOT_SIGNATURE_DESC* pRootSignature, D3D_ROOT_SIGNATURE_VERSION Version, ID3DBlob** ppBlob, ID3DBlob** ppErrorBlob); +using D3D12GETDEBUGINTERFACE = HRESULT(WINAPI*)(REFIID riid, void** ppvDebug); + +using D3DREFLECT = HRESULT(WINAPI*)(LPCVOID, SIZE_T, REFIID, void**); +extern D3DREFLECT d3d_reflect; + +using D3DCREATEBLOB = HRESULT(WINAPI*)(SIZE_T, ID3DBlob**); +extern D3DCREATEBLOB d3d_create_blob; + +extern pD3DCompile d3d_compile; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp new file mode 100644 index 0000000000..fc4a98ae84 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp @@ -0,0 +1,355 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DQueuedCommandList.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DTexture.h" + +#include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/ShaderConstantsManager.h" +#include "VideoBackends/D3D12/VertexManager.h" + +static constexpr unsigned int COMMAND_ALLOCATORS_PER_LIST = 2; + +namespace DX12 +{ +extern StateCache gx_state_cache; + +D3DCommandListManager::D3DCommandListManager( + D3D12_COMMAND_LIST_TYPE command_list_type, + ID3D12Device* device, + ID3D12CommandQueue* command_queue + ) : + m_device(device), + m_command_queue(command_queue) +{ + // Create two lists, with two command allocators each. This corresponds to up to two frames in flight at once. + m_current_command_allocator = 0; + m_current_command_allocator_list = 0; + for (UINT i = 0; i < COMMAND_ALLOCATORS_PER_LIST; i++) + { + for (UINT j = 0; j < m_command_allocator_lists.size(); j++) + { + ID3D12CommandAllocator* command_allocator = nullptr; + + CheckHR(m_device->CreateCommandAllocator(command_list_type, IID_PPV_ARGS(&command_allocator))); + m_command_allocator_lists[j].push_back(command_allocator); + } + } + + // Create backing command list. + CheckHR(m_device->CreateCommandList(0, command_list_type, m_command_allocator_lists[m_current_command_allocator_list][0], nullptr, IID_PPV_ARGS(&m_backing_command_list))); + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list = new ID3D12QueuedCommandList(m_backing_command_list, m_command_queue); +#endif + + // Create fence that will be used to measure GPU progress of app rendering requests (e.g. CPU readback of GPU data). + m_queue_fence_value = 0; + CheckHR(m_device->CreateFence(m_queue_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_queue_fence))); + + // Create fence that will be used internally by D3DCommandListManager for frame-level resource tracking. + m_queue_frame_fence_value = 0; + CheckHR(m_device->CreateFence(m_queue_frame_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_queue_frame_fence))); + + // Create event that will be used for waiting on CPU until a fence is signaled by GPU. + m_wait_on_cpu_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); + + // Pre-size the deferred destruction lists. + for (UINT i = 0; i < m_deferred_destruction_lists.size(); i++) + { + m_deferred_destruction_lists[i].reserve(200); + } + + m_current_deferred_destruction_list = 0; +} + +void D3DCommandListManager::SetInitialCommandListState() +{ + ID3D12GraphicsCommandList* command_list = nullptr; + GetCommandList(&command_list); + + command_list->SetDescriptorHeaps(static_cast(D3D::gpu_descriptor_heaps.size()), D3D::gpu_descriptor_heaps.data()); + command_list->SetGraphicsRootSignature(D3D::default_root_signature); + + if (g_renderer) + { + // It is possible that we change command lists in the middle of the frame. In that case, restore + // the viewport/scissor to the current console GPU state. + g_renderer->RestoreAPIState(); + } + + m_command_list_dirty_state = UINT_MAX; + + command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + m_command_list_current_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + + if (g_vertex_manager) + reinterpret_cast(g_vertex_manager.get())->SetIndexBuffer(); +} + +void D3DCommandListManager::GetCommandList(ID3D12GraphicsCommandList** command_list) const +{ +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + *command_list = this->m_queued_command_list; +#else + *command_list = this->m_backing_command_list; +#endif +} + +void D3DCommandListManager::ExecuteQueuedWork(bool wait_for_gpu_completion) +{ + m_queue_fence_value++; + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + CheckHR(m_queued_command_list->Close()); + m_queued_command_list->QueueExecute(); + + m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); + + ResetCommandListWithIdleCommandAllocator(); + + m_queued_command_list->ProcessQueuedItems(); +#else + CheckHR(m_backing_command_list->Close()); + + ID3D12CommandList* const commandListsToExecute[1] = { m_backing_command_list }; + m_command_queue->ExecuteCommandLists(1, commandListsToExecute); + + if (wait_for_gpu_completion) + { + CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); + } + + if (m_current_command_allocator == 0) + { + PerformGpuRolloverChecks(); + } + + ResetCommandListWithIdleCommandAllocator(); +#endif + + for (auto it : m_queue_fence_callbacks) + it.second(it.first, m_queue_fence_value); + + SetInitialCommandListState(); + + if (wait_for_gpu_completion) + { + WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); + } +} + +void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags) +{ + m_queue_fence_value++; + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + CheckHR(m_queued_command_list->Close()); + m_queued_command_list->QueueExecute(); + m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); + m_queued_command_list->QueuePresent(swap_chain, sync_interval, flags); + m_queued_command_list->ProcessQueuedItems(true); + + if (m_current_command_allocator == 0) + { + PerformGpuRolloverChecks(); + } + + m_current_command_allocator = (m_current_command_allocator + 1) % m_command_allocator_lists[m_current_command_allocator_list].size(); + + ResetCommandListWithIdleCommandAllocator(); + + SetInitialCommandListState(); +#else + ExecuteQueuedWork(); + m_command_queue->Signal(m_queue_fence, m_queue_fence_value); + CheckHR(swap_chain->Present(sync_interval, flags)); +#endif + + for (auto it : m_queue_fence_callbacks) + it.second(it.first, m_queue_fence_value); +} + +void D3DCommandListManager::WaitForQueuedWorkToBeExecutedOnGPU() +{ + // Wait for GPU to finish all outstanding work. + m_queue_fence_value++; + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list->QueueExecute(); + m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); + + m_queued_command_list->ProcessQueuedItems(true); +#else + CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); +#endif + + WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); +} + +void D3DCommandListManager::PerformGpuRolloverChecks() +{ + // Insert fence to measure GPU progress, ensure we aren't using in-use command allocators. + if (m_queue_frame_fence->GetCompletedValue() < m_queue_frame_fence_value) + { + WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value); + } + + // We now know that the previous 'set' of command lists has completed on GPU, and it is safe to + // release resources / start back at beginning of command allocator list. + + // Begin Deferred Resource Destruction + UINT safe_to_delete_deferred_destruction_list = (m_current_deferred_destruction_list - 1) % m_deferred_destruction_lists.size(); + + for (UINT i = 0; i < m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].size(); i++) + { + CHECK(m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list][i]->Release() == 0, "Resource leak."); + } + + m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].clear(); + + m_current_deferred_destruction_list = (m_current_deferred_destruction_list + 1) % m_deferred_destruction_lists.size(); + // End Deferred Resource Destruction + + + // Begin Command Allocator Resets + UINT safe_to_reset_command_allocator_list = (m_current_command_allocator_list - 1) % m_command_allocator_lists.size(); + + for (UINT i = 0; i < m_command_allocator_lists[safe_to_reset_command_allocator_list].size(); i++) + { + CheckHR(m_command_allocator_lists[safe_to_reset_command_allocator_list][i]->Reset()); + } + + m_current_command_allocator_list = (m_current_command_allocator_list + 1) % m_command_allocator_lists.size(); + // End Command Allocator Resets + + m_queue_frame_fence_value++; +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value); +#else + CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value)); +#endif +} + +void D3DCommandListManager::ResetCommandListWithIdleCommandAllocator() +{ +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + ID3D12QueuedCommandList* command_list = m_queued_command_list; +#else + ID3D12GraphicsCommandList* command_list = m_backing_command_list; +#endif + + CheckHR(command_list->Reset(m_command_allocator_lists[m_current_command_allocator_list][m_current_command_allocator], nullptr)); +} + +void D3DCommandListManager::DestroyResourceAfterCurrentCommandListExecuted(ID3D12Resource* resource) +{ + CHECK(resource, "Null resource being inserted!"); + + m_deferred_destruction_lists[m_current_deferred_destruction_list].push_back(resource); +} + +void D3DCommandListManager::ImmediatelyDestroyAllResourcesScheduledForDestruction() +{ + for (auto& destruction_list : m_deferred_destruction_lists) + { + for (auto& resource : destruction_list) + resource->Release(); + + destruction_list.clear(); + } +} + +void D3DCommandListManager::ClearQueueAndWaitForCompletionOfInflightWork() +{ + // Wait for GPU to finish all outstanding work. + m_queue_fence_value++; +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list->ClearQueue(); // Waits for currently-processing work to finish, then clears queue. + m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); + m_queued_command_list->ProcessQueuedItems(true); +#else + CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); +#endif + WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); +} + +D3DCommandListManager::~D3DCommandListManager() +{ + ImmediatelyDestroyAllResourcesScheduledForDestruction(); + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list->Release(); +#endif + m_backing_command_list->Release(); + + for (auto& allocator_list : m_command_allocator_lists) + { + for (auto& resource : allocator_list) + resource->Release(); + } + + m_queue_fence->Release(); + m_queue_frame_fence->Release(); + + CloseHandle(m_wait_on_cpu_fence_event); +} + +void D3DCommandListManager::WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value) +{ + CheckHR(fence->SetEventOnCompletion(fence_value, m_wait_on_cpu_fence_event)); + + WaitForSingleObject(m_wait_on_cpu_fence_event, INFINITE); +} + +void D3DCommandListManager::SetCommandListDirtyState(unsigned int command_list_state, bool dirty) +{ + if (dirty) + m_command_list_dirty_state |= command_list_state; + else + m_command_list_dirty_state &= ~command_list_state; +} + +bool D3DCommandListManager::GetCommandListDirtyState(COMMAND_LIST_STATE command_list_state) const +{ + return ((m_command_list_dirty_state & command_list_state) != 0); +} + +void D3DCommandListManager::SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY primitive_topology) +{ + m_command_list_current_topology = primitive_topology; +} + +D3D_PRIMITIVE_TOPOLOGY D3DCommandListManager::GetCommandListPrimitiveTopology() const +{ + return m_command_list_current_topology; +} + +void D3DCommandListManager::CPUAccessNotify() +{ + m_cpu_access_last_frame = true; + m_cpu_access_this_frame = true; + m_draws_since_last_execution = 0; +}; + +ID3D12Fence* D3DCommandListManager::RegisterQueueFenceCallback(void* owning_object, PFN_QUEUE_FENCE_CALLBACK* callback_function) +{ + m_queue_fence_callbacks[owning_object] = callback_function; + + return m_queue_fence; +} + +void D3DCommandListManager::RemoveQueueFenceCallback(void* owning_object) +{ + m_queue_fence_callbacks.erase(owning_object); +} + +} // namespace DX12 \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h new file mode 100644 index 0000000000..b9622df5fc --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h @@ -0,0 +1,98 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "D3DQueuedCommandList.h" + +namespace DX12 +{ + +enum COMMAND_LIST_STATE +{ + COMMAND_LIST_STATE_GS_CBV = 1, + COMMAND_LIST_STATE_PS_CBV = 2, + COMMAND_LIST_STATE_VS_CBV = 4, + COMMAND_LIST_STATE_PSO = 8, + COMMAND_LIST_STATE_SAMPLERS = 16, + COMMAND_LIST_STATE_VERTEX_BUFFER = 32 +}; + +// This class provides an abstraction for D3D12 descriptor heaps. +class D3DCommandListManager +{ +public: + + D3DCommandListManager(D3D12_COMMAND_LIST_TYPE command_list_type, ID3D12Device* device, ID3D12CommandQueue* command_queue); + ~D3DCommandListManager(); + + void SetInitialCommandListState(); + + void GetCommandList(ID3D12GraphicsCommandList** command_list) const; + + void ExecuteQueuedWork(bool wait_for_gpu_completion = false); + void ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags); + + void WaitForQueuedWorkToBeExecutedOnGPU(); + + void ClearQueueAndWaitForCompletionOfInflightWork(); + void DestroyResourceAfterCurrentCommandListExecuted(ID3D12Resource* resource); + void ImmediatelyDestroyAllResourcesScheduledForDestruction(); + + void SetCommandListDirtyState(unsigned int command_list_state, bool dirty); + bool GetCommandListDirtyState(COMMAND_LIST_STATE command_list_state) const; + + void SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY primitive_topology); + D3D_PRIMITIVE_TOPOLOGY GetCommandListPrimitiveTopology() const; + + unsigned int m_draws_since_last_execution = 0; + bool m_cpu_access_last_frame = false; + bool m_cpu_access_this_frame = false; + + void CPUAccessNotify(); + + // Allow other components to register for a callback each time a fence is queued. + using PFN_QUEUE_FENCE_CALLBACK = void(void* owning_object, UINT64 fence_value); + ID3D12Fence* RegisterQueueFenceCallback(void* owning_object, PFN_QUEUE_FENCE_CALLBACK* callback_function); + void RemoveQueueFenceCallback(void* owning_object); + + void WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value); + +private: + + void PerformGpuRolloverChecks(); + void ResetCommandListWithIdleCommandAllocator(); + + unsigned int m_command_list_dirty_state = UINT_MAX; + D3D_PRIMITIVE_TOPOLOGY m_command_list_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + + HANDLE m_wait_on_cpu_fence_event; + + ID3D12Device* m_device; + ID3D12CommandQueue* m_command_queue; + UINT64 m_queue_fence_value; + ID3D12Fence* m_queue_fence; + UINT64 m_queue_frame_fence_value; + ID3D12Fence* m_queue_frame_fence; + + std::map m_queue_fence_callbacks; + + UINT m_current_command_allocator; + UINT m_current_command_allocator_list; + std::array, 2> m_command_allocator_lists; + + ID3D12GraphicsCommandList* m_backing_command_list; + ID3D12QueuedCommandList* m_queued_command_list; + + ID3D12RootSignature* m_default_root_signature; + + UINT m_current_deferred_destruction_list; + std::array, 2> m_deferred_destruction_lists; +}; + +} // namespace \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DDescriptorHeapManager.cpp b/Source/Core/VideoBackends/D3D12/D3DDescriptorHeapManager.cpp new file mode 100644 index 0000000000..25c1e7c79c --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DDescriptorHeapManager.cpp @@ -0,0 +1,169 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DState.h" + +namespace DX12 +{ + +bool operator==(const D3DDescriptorHeapManager::SamplerStateSet& lhs, const D3DDescriptorHeapManager::SamplerStateSet& rhs) +{ + // D3D12TODO: Do something more efficient than this. + return (!memcmp(&lhs, &rhs, sizeof(D3DDescriptorHeapManager::SamplerStateSet))); +} + +D3DDescriptorHeapManager::D3DDescriptorHeapManager(D3D12_DESCRIPTOR_HEAP_DESC* desc, ID3D12Device* device, unsigned int temporarySlots) : + m_device(device) +{ + CheckHR(device->CreateDescriptorHeap(desc, IID_PPV_ARGS(&m_descriptor_heap))); + + m_descriptor_heap_size = desc->NumDescriptors; + m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(desc->Type); + m_gpu_visible = (desc->Flags == D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); + + if (m_gpu_visible) + { + D3D12_DESCRIPTOR_HEAP_DESC cpu_shadow_heap_desc = *desc; + cpu_shadow_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + + CheckHR(device->CreateDescriptorHeap(&cpu_shadow_heap_desc, IID_PPV_ARGS(&m_descriptor_heap_cpu_shadow))); + + m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart(); + m_heap_base_gpu_cpu_shadow = m_descriptor_heap_cpu_shadow->GetCPUDescriptorHandleForHeapStart(); + } + + m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart(); + + m_first_temporary_slot_in_heap = m_descriptor_heap_size - temporarySlots; + m_current_temporary_offset_in_heap = m_first_temporary_slot_in_heap; +} + +bool D3DDescriptorHeapManager::Allocate(D3D12_CPU_DESCRIPTOR_HANDLE* cpu_handle, D3D12_GPU_DESCRIPTOR_HANDLE* gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE* gpu_handle_cpu_shadow, bool temporary) +{ + bool allocated_from_current_heap = true; + + if (m_current_permanent_offset_in_heap + 1 >= m_first_temporary_slot_in_heap) + { + // If out of room in the heap, start back at beginning. + allocated_from_current_heap = false; + m_current_permanent_offset_in_heap = 0; + } + + CHECK(!gpu_handle || (gpu_handle && m_gpu_visible), "D3D12_GPU_DESCRIPTOR_HANDLE used on non-GPU-visible heap."); + + if (temporary && m_current_temporary_offset_in_heap + 1 >= m_descriptor_heap_size) + { + m_current_temporary_offset_in_heap = m_first_temporary_slot_in_heap; + } + + unsigned int heapOffsetToUse = temporary ? m_current_temporary_offset_in_heap : m_current_permanent_offset_in_heap; + + if (m_gpu_visible) + { + gpu_handle->ptr = m_heap_base_gpu.ptr + heapOffsetToUse * m_descriptor_increment_size; + + if (gpu_handle_cpu_shadow) + gpu_handle_cpu_shadow->ptr = m_heap_base_gpu_cpu_shadow.ptr + heapOffsetToUse * m_descriptor_increment_size; + } + + cpu_handle->ptr = m_heap_base_cpu.ptr + heapOffsetToUse * m_descriptor_increment_size; + + if (!temporary) + { + m_current_permanent_offset_in_heap++; + } + + return allocated_from_current_heap; +} + +bool D3DDescriptorHeapManager::AllocateGroup(D3D12_CPU_DESCRIPTOR_HANDLE* base_cpu_handle, unsigned int num_handles, D3D12_GPU_DESCRIPTOR_HANDLE* base_gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE* base_gpu_handle_cpu_shadow, bool temporary) +{ + bool allocated_from_current_heap = true; + + if (m_current_permanent_offset_in_heap + num_handles >= m_first_temporary_slot_in_heap) + { + // If out of room in the heap, start back at beginning. + allocated_from_current_heap = false; + m_current_permanent_offset_in_heap = 0; + } + + CHECK(!base_gpu_handle || (base_gpu_handle && m_gpu_visible), "D3D12_GPU_DESCRIPTOR_HANDLE used on non-GPU-visible heap."); + + if (temporary && m_current_temporary_offset_in_heap + num_handles >= m_descriptor_heap_size) + { + m_current_temporary_offset_in_heap = m_first_temporary_slot_in_heap; + } + + unsigned int heapOffsetToUse = temporary ? m_current_temporary_offset_in_heap : m_current_permanent_offset_in_heap; + + if (m_gpu_visible) + { + base_gpu_handle->ptr = m_heap_base_gpu.ptr + heapOffsetToUse * m_descriptor_increment_size; + + if (base_gpu_handle_cpu_shadow) + base_gpu_handle_cpu_shadow->ptr = m_heap_base_gpu_cpu_shadow.ptr + heapOffsetToUse * m_descriptor_increment_size; + } + + base_cpu_handle->ptr = m_heap_base_cpu.ptr + heapOffsetToUse * m_descriptor_increment_size; + + if (temporary) + { + m_current_temporary_offset_in_heap += num_handles; + } + else + { + m_current_permanent_offset_in_heap += num_handles; + } + + return allocated_from_current_heap; +} + +D3D12_GPU_DESCRIPTOR_HANDLE D3DDescriptorHeapManager::GetHandleForSamplerGroup(SamplerState* sampler_state, unsigned int num_sampler_samples) +{ + auto it = m_sampler_map.find(*reinterpret_cast(sampler_state)); + + if (it == m_sampler_map.end()) + { + D3D12_CPU_DESCRIPTOR_HANDLE base_sampler_cpu_handle; + D3D12_GPU_DESCRIPTOR_HANDLE base_sampler_gpu_handle; + + bool allocatedFromExistingHeap = AllocateGroup(&base_sampler_cpu_handle, num_sampler_samples, &base_sampler_gpu_handle); + + if (!allocatedFromExistingHeap) + { + m_sampler_map.clear(); + } + + for (unsigned int i = 0; i < num_sampler_samples; i++) + { + D3D12_CPU_DESCRIPTOR_HANDLE destinationDescriptor; + destinationDescriptor.ptr = base_sampler_cpu_handle.ptr + i * D3D::sampler_descriptor_size; + + D3D::device12->CreateSampler(&StateCache::GetDesc12(sampler_state[i]), destinationDescriptor); + } + + m_sampler_map[*reinterpret_cast(sampler_state)] = base_sampler_gpu_handle; + + return base_sampler_gpu_handle; + } + else + { + return it->second; + } +} + +ID3D12DescriptorHeap* D3DDescriptorHeapManager::GetDescriptorHeap() const +{ + return m_descriptor_heap; +} + +D3DDescriptorHeapManager::~D3DDescriptorHeapManager() +{ + SAFE_RELEASE(m_descriptor_heap); + SAFE_RELEASE(m_descriptor_heap_cpu_shadow); +} + +} // namespace DX12 \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DDescriptorHeapManager.h b/Source/Core/VideoBackends/D3D12/D3DDescriptorHeapManager.h new file mode 100644 index 0000000000..4ff1fa789f --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DDescriptorHeapManager.h @@ -0,0 +1,72 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "VideoBackends/D3D12/D3DState.h" + +namespace DX12 +{ + +// This class provides an abstraction for D3D12 descriptor heaps. +class D3DDescriptorHeapManager +{ +public: + + D3DDescriptorHeapManager(D3D12_DESCRIPTOR_HEAP_DESC* desc, ID3D12Device* device, unsigned int temporarySlots = 0); + ~D3DDescriptorHeapManager(); + + bool Allocate(D3D12_CPU_DESCRIPTOR_HANDLE* cpu_handle, D3D12_GPU_DESCRIPTOR_HANDLE* gpu_handle = nullptr, D3D12_CPU_DESCRIPTOR_HANDLE* gpu_handle_cpu_shadow = nullptr, bool temporary = false); + bool AllocateGroup(D3D12_CPU_DESCRIPTOR_HANDLE* cpu_handles, unsigned int num_handles, D3D12_GPU_DESCRIPTOR_HANDLE* gpu_handles = nullptr, D3D12_CPU_DESCRIPTOR_HANDLE* gpu_handle_cpu_shadows = nullptr, bool temporary = false); + + D3D12_GPU_DESCRIPTOR_HANDLE GetHandleForSamplerGroup(SamplerState* sampler_state, unsigned int num_sampler_samples); + + ID3D12DescriptorHeap* GetDescriptorHeap() const; + + struct SamplerStateSet + { + SamplerState desc0; + SamplerState desc1; + SamplerState desc2; + SamplerState desc3; + SamplerState desc4; + SamplerState desc5; + SamplerState desc6; + SamplerState desc7; + }; + +private: + + ID3D12Device* m_device = nullptr; + ID3D12DescriptorHeap* m_descriptor_heap = nullptr; + ID3D12DescriptorHeap* m_descriptor_heap_cpu_shadow = nullptr; + + D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu; + D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu; + D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_gpu_cpu_shadow; + + struct hash_sampler_desc + { + size_t operator()(const SamplerStateSet sampler_state_set) const + { + return sampler_state_set.desc0.hex; + } + }; + + std::unordered_map m_sampler_map; + + unsigned int m_current_temporary_offset_in_heap = 0; + unsigned int m_current_permanent_offset_in_heap = 0; + + unsigned int m_descriptor_increment_size; + unsigned int m_descriptor_heap_size; + bool m_gpu_visible; + + unsigned int m_first_temporary_slot_in_heap; +}; + +} // namespace \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp new file mode 100644 index 0000000000..b3f672a8ed --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp @@ -0,0 +1,1315 @@ +// Copyright hdcmeta +// Dual-Licensed under MIT and GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DQueuedCommandList.h" + +namespace DX12 +{ + +template +constexpr size_t BufferOffsetForQueueItemType() +{ + return sizeof(T) + sizeof(D3DQueueItemType) * 2; +} + +DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param) +{ + ID3D12QueuedCommandList* parent_queued_command_list = static_cast(param); + ID3D12GraphicsCommandList* command_list = parent_queued_command_list->m_command_list; + + byte* queue_array = parent_queued_command_list->m_queue_array; + + unsigned int queue_array_front = 0; + + while (true) + { + WaitForSingleObject(parent_queued_command_list->m_begin_execution_event, INFINITE); + + byte* item = &queue_array[queue_array_front]; + + while (true) + { + switch (reinterpret_cast(item)->Type) + { + case D3DQueueItemType::ClearDepthStencilView: + { + command_list->ClearDepthStencilView(reinterpret_cast(item)->ClearDepthStencilView.DepthStencilView, D3D12_CLEAR_FLAG_DEPTH, 0.f, 0, 0, nullptr); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::ClearRenderTargetView: + { + float clearColor[4] = { 0.f, 0.f, 0.f, 1.f }; + command_list->ClearRenderTargetView(reinterpret_cast(item)->ClearRenderTargetView.RenderTargetView, clearColor, 0, nullptr); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::CopyBufferRegion: + { + command_list->CopyBufferRegion( + reinterpret_cast(item)->CopyBufferRegion.pDstBuffer, + reinterpret_cast(item)->CopyBufferRegion.DstOffset, + reinterpret_cast(item)->CopyBufferRegion.pSrcBuffer, + reinterpret_cast(item)->CopyBufferRegion.SrcOffset, + reinterpret_cast(item)->CopyBufferRegion.NumBytes + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::CopyTextureRegion: + { + // If box is completely empty, assume that the original API call has a NULL box (which means + // copy from the entire resource. + + D3D12_BOX* src_box = &reinterpret_cast(item)->CopyTextureRegion.srcBox; + + // Front/Back never used, so don't need to check. + bool empty_box = + src_box->bottom == 0 && + src_box->left == 0 && + src_box->right == 0 && + src_box->top == 0; + + command_list->CopyTextureRegion( + &reinterpret_cast(item)->CopyTextureRegion.dst, + reinterpret_cast(item)->CopyTextureRegion.DstX, + reinterpret_cast(item)->CopyTextureRegion.DstY, + reinterpret_cast(item)->CopyTextureRegion.DstZ, + &reinterpret_cast(item)->CopyTextureRegion.src, + empty_box ? + nullptr : src_box + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::DrawIndexedInstanced: + { + command_list->DrawIndexedInstanced( + reinterpret_cast(item)->DrawIndexedInstanced.IndexCount, + 1, + reinterpret_cast(item)->DrawIndexedInstanced.StartIndexLocation, + reinterpret_cast(item)->DrawIndexedInstanced.BaseVertexLocation, + 0 + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::DrawInstanced: + { + command_list->DrawInstanced( + reinterpret_cast(item)->DrawInstanced.VertexCount, + 1, + reinterpret_cast(item)->DrawInstanced.StartVertexLocation, + 0 + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::IASetPrimitiveTopology: + { + command_list->IASetPrimitiveTopology(reinterpret_cast(item)->IASetPrimitiveTopology.PrimitiveTopology); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::ResourceBarrier: + { + command_list->ResourceBarrier(1, &reinterpret_cast(item)->ResourceBarrier.barrier); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::RSSetScissorRects: + { + D3D12_RECT rect = { + reinterpret_cast(item)->RSSetScissorRects.left, + reinterpret_cast(item)->RSSetScissorRects.top, + reinterpret_cast(item)->RSSetScissorRects.right, + reinterpret_cast(item)->RSSetScissorRects.bottom + }; + + command_list->RSSetScissorRects(1, &rect); + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::RSSetViewports: + { + D3D12_VIEWPORT viewport = { + reinterpret_cast(item)->RSSetViewports.TopLeftX, + reinterpret_cast(item)->RSSetViewports.TopLeftY, + reinterpret_cast(item)->RSSetViewports.Width, + reinterpret_cast(item)->RSSetViewports.Height, + reinterpret_cast(item)->RSSetViewports.MinDepth, + reinterpret_cast(item)->RSSetViewports.MaxDepth + }; + + command_list->RSSetViewports(1, &viewport); + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::SetDescriptorHeaps: + { + command_list->SetDescriptorHeaps( + reinterpret_cast(item)->SetDescriptorHeaps.NumDescriptorHeaps, + reinterpret_cast(item)->SetDescriptorHeaps.ppDescriptorHeap + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::SetGraphicsRootConstantBufferView: + { + command_list->SetGraphicsRootConstantBufferView( + reinterpret_cast(item)->SetGraphicsRootConstantBufferView.RootParameterIndex, + reinterpret_cast(item)->SetGraphicsRootConstantBufferView.BufferLocation + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::SetGraphicsRootDescriptorTable: + { + command_list->SetGraphicsRootDescriptorTable( + reinterpret_cast(item)->SetGraphicsRootDescriptorTable.RootParameterIndex, + reinterpret_cast(item)->SetGraphicsRootDescriptorTable.BaseDescriptor + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::SetGraphicsRootSignature: + { + command_list->SetGraphicsRootSignature( + reinterpret_cast(item)->SetGraphicsRootSignature.pRootSignature + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::SetIndexBuffer: + { + command_list->IASetIndexBuffer( + &reinterpret_cast(item)->SetIndexBuffer.desc + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::SetVertexBuffers: + { + command_list->IASetVertexBuffers( + 0, + 1, + &reinterpret_cast(item)->SetVertexBuffers.desc + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::SetPipelineState: + { + command_list->SetPipelineState(reinterpret_cast(item)->SetPipelineState.pPipelineStateObject); + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::SetRenderTargets: + { + unsigned int render_target_count = 0; + + if (reinterpret_cast(item)->SetRenderTargets.RenderTargetDescriptor.ptr) + { + render_target_count = 1; + } + + command_list->OMSetRenderTargets( + render_target_count, + reinterpret_cast(item)->SetRenderTargets.RenderTargetDescriptor.ptr == NULL ? + nullptr : + &reinterpret_cast(item)->SetRenderTargets.RenderTargetDescriptor, + FALSE, + reinterpret_cast(item)->SetRenderTargets.DepthStencilDescriptor.ptr == NULL ? + nullptr : + &reinterpret_cast(item)->SetRenderTargets.DepthStencilDescriptor + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::ResolveSubresource: + { + command_list->ResolveSubresource( + reinterpret_cast(item)->ResolveSubresource.pDstResource, + reinterpret_cast(item)->ResolveSubresource.DstSubresource, + reinterpret_cast(item)->ResolveSubresource.pSrcResource, + reinterpret_cast(item)->ResolveSubresource.SrcSubresource, + reinterpret_cast(item)->ResolveSubresource.Format + ); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::CloseCommandList: + { + CheckHR(command_list->Close()); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::ExecuteCommandList: + { + parent_queued_command_list->m_command_queue->ExecuteCommandLists(1, reinterpret_cast(&command_list)); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::Present: + { + CheckHR(reinterpret_cast(item)->Present.swapChain->Present(reinterpret_cast(item)->Present.syncInterval, reinterpret_cast(item)->Present.flags)); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::ResetCommandList: + { + CheckHR(command_list->Reset(reinterpret_cast(item)->ResetCommandList.allocator, nullptr)); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::ResetCommandAllocator: + { + CheckHR(reinterpret_cast(item)->ResetCommandAllocator.allocator->Reset()); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::FenceGpuSignal: + { + CheckHR(parent_queued_command_list->m_command_queue->Signal(reinterpret_cast(item)->FenceGpuSignal.fence, reinterpret_cast(item)->FenceGpuSignal.fence_value)); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::FenceCpuSignal: + { + CheckHR(reinterpret_cast(item)->FenceCpuSignal.fence->Signal(reinterpret_cast(item)->FenceCpuSignal.fence_value)); + + item += BufferOffsetForQueueItemType(); + break; + } + + case D3DQueueItemType::Stop: + + // Use a goto to break out of the loop, since we can't exit the loop from + // within a switch statement. We could use a separate 'if' after the switch, + // but that was the highest source of overhead in the function after profiling. + // http://stackoverflow.com/questions/1420029/how-to-break-out-of-a-loop-from-inside-a-switch + + bool eligible_to_move_to_front_of_queue = reinterpret_cast(item)->Stop.eligible_to_move_to_front_of_queue; + bool signal_stop_event = reinterpret_cast(item)->Stop.signal_stop_event; + + item += BufferOffsetForQueueItemType(); + + if (eligible_to_move_to_front_of_queue && item - queue_array > QUEUE_ARRAY_SIZE * 2 / 3) + { + item = queue_array; + } + + if (signal_stop_event) + { + SetEvent(parent_queued_command_list->m_stop_execution_event); + } + + goto exitLoop; + } + } + + exitLoop: + + queue_array_front = static_cast(item - queue_array); + } +} + +ID3D12QueuedCommandList::ID3D12QueuedCommandList(ID3D12GraphicsCommandList* backing_command_list, ID3D12CommandQueue* backing_command_queue) : + m_command_list(backing_command_list), + m_command_queue(backing_command_queue) +{ + memset(m_queue_array, 0, sizeof(m_queue_array)); + + m_queue_array_back = m_queue_array; + + m_begin_execution_event = CreateSemaphore(nullptr, 0, 256, nullptr); + m_stop_execution_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); + + m_background_thread = CreateThread(nullptr, 0, BackgroundThreadFunction, this, 0, &m_background_thread_id); +} + +ID3D12QueuedCommandList::~ID3D12QueuedCommandList() +{ + TerminateThread(m_background_thread, 0); + CloseHandle(m_background_thread); + + CloseHandle(m_begin_execution_event); + CloseHandle(m_stop_execution_event); +} + +void ID3D12QueuedCommandList::CheckForOverflow() +{ + constexpr const unsigned int queue_space_allowed_per_frame = QUEUE_ARRAY_SIZE / 3; + + if (m_queue_array_back - m_queue_array_back_at_start_of_frame > queue_space_allowed_per_frame) + { + // Game is (possibly) using too much space, kick off queue processing and + // wait on this thread till it chews through queue. + + // This means the game is submitting more than 28,000 draws a frame. + + ProcessQueuedItems(true, true); + } +} + +void ID3D12QueuedCommandList::ResetQueueOverflowTracking() +{ + m_queue_array_back_at_start_of_frame = m_queue_array_back; +} + +void ID3D12QueuedCommandList::QueueExecute() +{ + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::ExecuteCommandList; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void ID3D12QueuedCommandList::QueueFenceGpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value) +{ + D3DQueueItem item = {}; + + item.Type = D3DQueueItemType::FenceGpuSignal; + item.FenceGpuSignal.fence = fence_to_signal; + item.FenceGpuSignal.fence_value = fence_value; + + *reinterpret_cast(m_queue_array_back) = item; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void ID3D12QueuedCommandList::QueueFenceCpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value) +{ + D3DQueueItem item = {}; + + item.Type = D3DQueueItemType::FenceCpuSignal; + item.FenceCpuSignal.fence = fence_to_signal; + item.FenceCpuSignal.fence_value = fence_value; + + *reinterpret_cast(m_queue_array_back) = item; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void ID3D12QueuedCommandList::QueuePresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags) +{ + D3DQueueItem item = {}; + + item.Type = D3DQueueItemType::Present; + item.Present.swapChain = swap_chain; + item.Present.flags = flags; + item.Present.syncInterval = sync_interval; + + *reinterpret_cast(m_queue_array_back) = item; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void ID3D12QueuedCommandList::ClearQueue() +{ + // Drain semaphore to ensure no new previously queued work executes (though inflight work may continue). + while (WaitForSingleObject(m_begin_execution_event, 0) != WAIT_TIMEOUT) { } + + // Assume that any inflight queued work will complete within 100ms. This is a safe assumption. + Sleep(100); +} + +void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_of_queue, bool wait_for_stop) +{ + D3DQueueItem item = {}; + + item.Type = D3DQueueItemType::Stop; + item.Stop.eligible_to_move_to_front_of_queue = eligible_to_move_to_front_of_queue; + item.Stop.signal_stop_event = wait_for_stop; + + *reinterpret_cast(m_queue_array_back) = item; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + // Only (possibly) move to front of queue when finishing a frame, or when draining GPU queue. + // Logic in ID3D12QueuedCommandList::CheckForOverflow + // ensures that not more than one third of queue is used per frame. + if (eligible_to_move_to_front_of_queue && (m_queue_array_back - m_queue_array > QUEUE_ARRAY_SIZE * 2 / 3)) + { + m_queue_array_back = m_queue_array; + } + + if (eligible_to_move_to_front_of_queue) + { + ResetQueueOverflowTracking(); + } + + ReleaseSemaphore(m_begin_execution_event, 1, nullptr); + + if (wait_for_stop) + { + WaitForSingleObject(m_stop_execution_event, INFINITE); + } +} + +ULONG ID3D12QueuedCommandList::AddRef() +{ + m_ref.fetch_add(1); + return m_ref.load(); +} + +ULONG ID3D12QueuedCommandList::Release() +{ + // fetch_sub returns the value held before the subtraction. + ULONG ref = m_ref.fetch_sub(1); + if (ref == 1) + { + delete this; + } + + return ref; +} + +HRESULT STDMETHODCALLTYPE ID3D12QueuedCommandList::QueryInterface( + _In_ REFIID riid, + _COM_Outptr_ void** ppvObject + ) +{ + *ppvObject = nullptr; + HRESULT hr = S_OK; + + if (riid == __uuidof(ID3D12GraphicsCommandList)) + { + *ppvObject = reinterpret_cast(this); + } + else if (riid == __uuidof(ID3D12CommandList)) + { + *ppvObject = reinterpret_cast(this); + } + else if (riid == __uuidof(ID3D12DeviceChild)) + { + *ppvObject = reinterpret_cast(this); + } + else if (riid == __uuidof(ID3D12Object)) + { + *ppvObject = reinterpret_cast(this); + } + else + { + hr = E_NOINTERFACE; + } + + if (*ppvObject != nullptr) + { + AddRef(); + } + + return hr; +} + +// ID3D12Object + +HRESULT STDMETHODCALLTYPE ID3D12QueuedCommandList::GetPrivateData( + _In_ REFGUID guid, + _Inout_ UINT* pDataSize, + _Out_writes_bytes_opt_(*pDataSize) void* pData + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); + return E_FAIL; +} + +HRESULT STDMETHODCALLTYPE ID3D12QueuedCommandList::SetPrivateData( + _In_ REFGUID guid, + _In_ UINT DataSize, + _In_reads_bytes_opt_(DataSize) const void* pData + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); + return E_FAIL; +} + +HRESULT STDMETHODCALLTYPE ID3D12QueuedCommandList::SetPrivateDataInterface( + _In_ REFGUID guid, + _In_opt_ const IUnknown* pData + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); + return E_FAIL; +} + +HRESULT STDMETHODCALLTYPE ID3D12QueuedCommandList::SetName( + _In_z_ LPCWSTR pName + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); + return E_FAIL; +} + +// ID3D12DeviceChild + +D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE ID3D12QueuedCommandList::GetType() +{ + return D3D12_COMMAND_LIST_TYPE_DIRECT; +} + +// ID3D12CommandList + +HRESULT STDMETHODCALLTYPE ID3D12QueuedCommandList::GetDevice( + REFIID riid, + _Out_ void** ppDevice + ) +{ + return m_command_list->GetDevice(riid, ppDevice); +} + +HRESULT STDMETHODCALLTYPE ID3D12QueuedCommandList::Close() { + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::CloseCommandList; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); + + return S_OK; +} + +HRESULT STDMETHODCALLTYPE ID3D12QueuedCommandList::Reset( + _In_ ID3D12CommandAllocator* pAllocator, + _In_opt_ ID3D12PipelineState* pInitialState + ) +{ + DEBUGCHECK(pInitialState == nullptr, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::ResetCommandList; + reinterpret_cast(m_queue_array_back)->ResetCommandList.allocator = pAllocator; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); + + return S_OK; +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ClearState( + _In_ ID3D12PipelineState* pPipelineState + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::DrawInstanced( + _In_ UINT VertexCountPerInstance, + _In_ UINT InstanceCount, + _In_ UINT StartVertexLocation, + _In_ UINT StartInstanceLocation + ) +{ + DEBUGCHECK(InstanceCount == 1, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(StartInstanceLocation == 0, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::DrawInstanced; + reinterpret_cast(m_queue_array_back)->DrawInstanced.StartVertexLocation = StartVertexLocation; + reinterpret_cast(m_queue_array_back)->DrawInstanced.VertexCount = VertexCountPerInstance; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::DrawIndexedInstanced( + _In_ UINT IndexCountPerInstance, + _In_ UINT InstanceCount, + _In_ UINT StartIndexLocation, + _In_ INT BaseVertexLocation, + _In_ UINT StartInstanceLocation + ) +{ + DEBUGCHECK(InstanceCount == 1, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(StartInstanceLocation == 0, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + D3DQueueItem* item = reinterpret_cast(m_queue_array_back); + + item->Type = D3DQueueItemType::DrawIndexedInstanced; + item->DrawIndexedInstanced.BaseVertexLocation = BaseVertexLocation; + item->DrawIndexedInstanced.IndexCount = IndexCountPerInstance; + item->DrawIndexedInstanced.StartIndexLocation = StartIndexLocation; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::Dispatch( + _In_ UINT ThreadGroupCountX, + _In_ UINT ThreadGroupCountY, + _In_ UINT ThreadGroupCountZ + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::DispatchIndirect( + _In_ ID3D12Resource* pBufferForArgs, + _In_ UINT AlignedByteOffsetForArgs + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::CopyBufferRegion( + _In_ ID3D12Resource* pDstBuffer, + UINT64 DstOffset, + _In_ ID3D12Resource* pSrcBuffer, + UINT64 SrcOffset, + UINT64 NumBytes + ) +{ + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::CopyBufferRegion; + reinterpret_cast(m_queue_array_back)->CopyBufferRegion.pDstBuffer = pDstBuffer; + reinterpret_cast(m_queue_array_back)->CopyBufferRegion.DstOffset = static_cast(DstOffset); + reinterpret_cast(m_queue_array_back)->CopyBufferRegion.pSrcBuffer = pSrcBuffer; + reinterpret_cast(m_queue_array_back)->CopyBufferRegion.SrcOffset = static_cast(SrcOffset); + reinterpret_cast(m_queue_array_back)->CopyBufferRegion.NumBytes = static_cast(NumBytes); + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::CopyTextureRegion( + _In_ const D3D12_TEXTURE_COPY_LOCATION* pDst, + UINT DstX, + UINT DstY, + UINT DstZ, + _In_ const D3D12_TEXTURE_COPY_LOCATION* pSrc, + _In_opt_ const D3D12_BOX* pSrcBox + ) +{ + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::CopyTextureRegion; + reinterpret_cast(m_queue_array_back)->CopyTextureRegion.dst =* pDst; + reinterpret_cast(m_queue_array_back)->CopyTextureRegion.src =* pSrc; + + reinterpret_cast(m_queue_array_back)->CopyTextureRegion.DstX = DstX; + reinterpret_cast(m_queue_array_back)->CopyTextureRegion.DstY = DstY; + reinterpret_cast(m_queue_array_back)->CopyTextureRegion.DstZ = DstZ; + + if (pSrcBox) + reinterpret_cast(m_queue_array_back)->CopyTextureRegion.srcBox = *pSrcBox; + else + reinterpret_cast(m_queue_array_back)->CopyTextureRegion.srcBox = {}; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::CopyResource( + _In_ ID3D12Resource* pDstResource, + _In_ ID3D12Resource* pSrcResource + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::CopyTiles( + _In_ ID3D12Resource* pTiledResource, + _In_ const D3D12_TILED_RESOURCE_COORDINATE* pTileRegionStartCoordinate, + _In_ const D3D12_TILE_REGION_SIZE* pTileRegionSize, + _In_ ID3D12Resource* pBuffer, + UINT64 BufferStartOffsetInBytes, + D3D12_TILE_COPY_FLAGS Flags + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ResolveSubresource( + _In_ ID3D12Resource* pDstResource, + _In_ UINT DstSubresource, + _In_ ID3D12Resource* pSrcResource, + _In_ UINT SrcSubresource, + _In_ DXGI_FORMAT Format + ) +{ + // No ignored parameters, no assumptions to DEBUGCHECK. + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::ResolveSubresource; + reinterpret_cast(m_queue_array_back)->ResolveSubresource.pDstResource = pDstResource; + reinterpret_cast(m_queue_array_back)->ResolveSubresource.DstSubresource = DstSubresource; + reinterpret_cast(m_queue_array_back)->ResolveSubresource.pSrcResource = pSrcResource; + reinterpret_cast(m_queue_array_back)->ResolveSubresource.SrcSubresource = SrcSubresource; + reinterpret_cast(m_queue_array_back)->ResolveSubresource.Format = Format; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::IASetPrimitiveTopology( + _In_ D3D11_PRIMITIVE_TOPOLOGY PrimitiveTopology + ) +{ + // No ignored parameters, no assumptions to DEBUGCHECK. + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::IASetPrimitiveTopology; + reinterpret_cast(m_queue_array_back)->IASetPrimitiveTopology.PrimitiveTopology = PrimitiveTopology; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::RSSetViewports( + _In_range_(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count, + _In_reads_(Count) const D3D12_VIEWPORT* pViewports + ) +{ + DEBUGCHECK(Count == 1, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::RSSetViewports; + reinterpret_cast(m_queue_array_back)->RSSetViewports.Height = pViewports->Height; + reinterpret_cast(m_queue_array_back)->RSSetViewports.Width = pViewports->Width; + reinterpret_cast(m_queue_array_back)->RSSetViewports.TopLeftX = pViewports->TopLeftX; + reinterpret_cast(m_queue_array_back)->RSSetViewports.TopLeftY = pViewports->TopLeftY; + reinterpret_cast(m_queue_array_back)->RSSetViewports.MinDepth = pViewports->MinDepth; + reinterpret_cast(m_queue_array_back)->RSSetViewports.MaxDepth = pViewports->MaxDepth; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::RSSetScissorRects( + _In_range_(0, D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count, + _In_reads_(Count) const D3D12_RECT* pRects + ) +{ + DEBUGCHECK(Count == 1, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::RSSetScissorRects; + reinterpret_cast(m_queue_array_back)->RSSetScissorRects.bottom = pRects->bottom; + reinterpret_cast(m_queue_array_back)->RSSetScissorRects.left = pRects->left; + reinterpret_cast(m_queue_array_back)->RSSetScissorRects.right = pRects->right; + reinterpret_cast(m_queue_array_back)->RSSetScissorRects.top = pRects->top; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::OMSetBlendFactor( + _In_opt_ const FLOAT BlendFactor[4] + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::OMSetStencilRef( + _In_ UINT StencilRef + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetPipelineState( + _In_ ID3D12PipelineState* pPipelineState + ) +{ + // No ignored parameters, no assumptions to DEBUGCHECK. + + D3DQueueItem* item = reinterpret_cast(m_queue_array_back); + + item->Type = D3DQueueItemType::SetPipelineState; + item->SetPipelineState.pPipelineStateObject = pPipelineState; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ResourceBarrier( + _In_ UINT NumBarriers, + _In_reads_(NumBarriers) const D3D12_RESOURCE_BARRIER* pBarriers + ) +{ + DEBUGCHECK(NumBarriers == 1, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::ResourceBarrier; + reinterpret_cast(m_queue_array_back)->ResourceBarrier.barrier = *pBarriers; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ExecuteBundle( + _In_ ID3D12GraphicsCommandList *pCommandList + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::BeginQuery( + _In_ ID3D12QueryHeap* pQueryHeap, + _In_ D3D12_QUERY_TYPE Type, + _In_ UINT Index + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::EndQuery( + _In_ ID3D12QueryHeap* pQueryHeap, + _In_ D3D12_QUERY_TYPE Type, + _In_ UINT Index + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ResolveQueryData( + _In_ ID3D12QueryHeap* pQueryHeap, + _In_ D3D12_QUERY_TYPE Type, + _In_ UINT StartElement, + _In_ UINT ElementCount, + _In_ ID3D12Resource* pDestinationBuffer, + _In_ UINT64 AlignedDestinationBufferOffset + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetPredication( + _In_opt_ ID3D12Resource* pBuffer, + _In_ UINT64 AlignedBufferOffset, + _In_ D3D12_PREDICATION_OP Operation + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetDescriptorHeaps( + _In_ UINT NumDescriptorHeaps, + _In_reads_(NumDescriptorHeaps) ID3D12DescriptorHeap** pDescriptorHeaps + ) +{ + // No ignored parameters, no assumptions to DEBUGCHECK. + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::SetDescriptorHeaps; + reinterpret_cast(m_queue_array_back)->SetDescriptorHeaps.ppDescriptorHeap = pDescriptorHeaps; + reinterpret_cast(m_queue_array_back)->SetDescriptorHeaps.NumDescriptorHeaps = NumDescriptorHeaps; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetComputeRootSignature( + _In_ ID3D12RootSignature* pRootSignature + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetGraphicsRootSignature( + _In_ ID3D12RootSignature* pRootSignature + ) +{ + // No ignored parameters, no assumptions to DEBUGCHECK. + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::SetGraphicsRootSignature; + reinterpret_cast(m_queue_array_back)->SetGraphicsRootSignature.pRootSignature = pRootSignature; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetComputeRootDescriptorTable( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetGraphicsRootDescriptorTable( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor + ) +{ + // No ignored parameters, no assumptions to DEBUGCHECK. + + D3DQueueItem* item = reinterpret_cast(m_queue_array_back); + + item->Type = D3DQueueItemType::SetGraphicsRootDescriptorTable; + item->SetGraphicsRootDescriptorTable.RootParameterIndex = RootParameterIndex; + item->SetGraphicsRootDescriptorTable.BaseDescriptor = BaseDescriptor; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetComputeRoot32BitConstant( + _In_ UINT RootParameterIndex, + _In_ UINT SrcData, + _In_ UINT DestOffsetIn32BitValues + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetGraphicsRoot32BitConstant( + _In_ UINT RootParameterIndex, + _In_ UINT SrcData, + _In_ UINT DestOffsetIn32BitValues + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetComputeRoot32BitConstants( + _In_ UINT RootParameterIndex, + _In_ UINT Num32BitValuesToSet, + _In_reads_(Num32BitValuesToSet*sizeof(UINT)) const void* pSrcData, + _In_ UINT DestOffsetIn32BitValues + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetGraphicsRoot32BitConstants( + _In_ UINT RootParameterIndex, + _In_ UINT Num32BitValuesToSet, + _In_reads_(Num32BitValuesToSet*sizeof(UINT)) const void* pSrcData, + _In_ UINT DestOffsetIn32BitValues + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetGraphicsRootConstantBufferView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS BufferLocation + ) +{ + // No ignored parameters, no assumptions to DEBUGCHECK. + + D3DQueueItem* item = reinterpret_cast(m_queue_array_back); + + item->Type = D3DQueueItemType::SetGraphicsRootConstantBufferView; + item->SetGraphicsRootConstantBufferView.RootParameterIndex = RootParameterIndex; + item->SetGraphicsRootConstantBufferView.BufferLocation = BufferLocation; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetComputeRootConstantBufferView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS BufferLocation + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetComputeRootShaderResourceView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetGraphicsRootShaderResourceView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetComputeRootUnorderedAccessView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetGraphicsRootUnorderedAccessView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::IASetIndexBuffer( + _In_opt_ const D3D12_INDEX_BUFFER_VIEW* pDesc + ) +{ + // No ignored parameters, no assumptions to DEBUGCHECK. + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::SetIndexBuffer; + reinterpret_cast(m_queue_array_back)->SetIndexBuffer.desc = *pDesc; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::IASetVertexBuffers( + _In_ UINT StartSlot, + _In_ UINT NumBuffers, + _In_ const D3D12_VERTEX_BUFFER_VIEW* pDesc + ) +{ + DEBUGCHECK(StartSlot == 0, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(NumBuffers == 1, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::SetVertexBuffers; + reinterpret_cast(m_queue_array_back)->SetVertexBuffers.desc = *pDesc; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SOSetTargets( + _In_ UINT StartSlot, + _In_ UINT NumViews, + _In_ const D3D12_STREAM_OUTPUT_BUFFER_VIEW* pViews + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::OMSetRenderTargets( + _In_ UINT NumRenderTargetDescriptors, + _In_ const D3D12_CPU_DESCRIPTOR_HANDLE* pRenderTargetDescriptors, + _In_ BOOL RTsSingleHandleToDescriptorRange, + _In_opt_ const D3D12_CPU_DESCRIPTOR_HANDLE *pDepthStencilDescriptor + ) +{ + DEBUGCHECK(RTsSingleHandleToDescriptorRange == FALSE, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::SetRenderTargets; + + if (pRenderTargetDescriptors) + reinterpret_cast(m_queue_array_back)->SetRenderTargets.RenderTargetDescriptor = *pRenderTargetDescriptors; + else + reinterpret_cast(m_queue_array_back)->SetRenderTargets.RenderTargetDescriptor = {}; + + if (pDepthStencilDescriptor) + reinterpret_cast(m_queue_array_back)->SetRenderTargets.DepthStencilDescriptor = *pDepthStencilDescriptor; + else + reinterpret_cast(m_queue_array_back)->SetRenderTargets.DepthStencilDescriptor = {}; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ClearDepthStencilView( + _In_ D3D12_CPU_DESCRIPTOR_HANDLE DepthStencilView, + _In_ D3D12_CLEAR_FLAGS ClearFlags, + _In_ FLOAT Depth, + _In_ UINT8 Stencil, + _In_ UINT NumRects, + _In_reads_opt_(NumRects) const D3D12_RECT* pRect + ) +{ + DEBUGCHECK(ClearFlags == D3D11_CLEAR_DEPTH, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(Depth == 0.0f, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(Stencil == 0, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(pRect == nullptr, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(NumRects == 0, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::ClearDepthStencilView; + reinterpret_cast(m_queue_array_back)->ClearDepthStencilView.DepthStencilView = DepthStencilView; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ClearRenderTargetView( + _In_ D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetView, + _In_ const FLOAT ColorRGBA[4], + _In_ UINT NumRects, + _In_reads_opt_(NumRects) const D3D12_RECT* pRects + ) +{ + DEBUGCHECK(ColorRGBA[0] == 0.0f, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(ColorRGBA[1] == 0.0f, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(ColorRGBA[2] == 0.0f, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(ColorRGBA[3] == 1.0f, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(pRects == nullptr, "Error: Invalid assumption in ID3D12QueuedCommandList."); + DEBUGCHECK(NumRects == 0, "Error: Invalid assumption in ID3D12QueuedCommandList."); + + reinterpret_cast(m_queue_array_back)->Type = D3DQueueItemType::ClearRenderTargetView; + reinterpret_cast(m_queue_array_back)->ClearRenderTargetView.RenderTargetView = RenderTargetView; + + m_queue_array_back += BufferOffsetForQueueItemType(); + + CheckForOverflow(); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ClearUnorderedAccessViewUint( + _In_ D3D12_GPU_DESCRIPTOR_HANDLE ViewGPUHandleInCurrentHeap, + _In_ D3D12_CPU_DESCRIPTOR_HANDLE ViewCPUHandle, + _In_ ID3D12Resource* pResource, + _In_ const UINT Values[4], + _In_ UINT NumRects, + _In_reads_opt_(NumRects) const D3D12_RECT* pRects + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ClearUnorderedAccessViewFloat( + _In_ D3D12_GPU_DESCRIPTOR_HANDLE ViewGPUHandleInCurrentHeap, + _In_ D3D12_CPU_DESCRIPTOR_HANDLE ViewCPUHandle, + _In_ ID3D12Resource* pResource, + _In_ const FLOAT Values[4], + _In_ UINT NumRects, + _In_reads_opt_(NumRects) const D3D12_RECT* pRects + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::DiscardResource( + _In_ ID3D12Resource* pResource, + _In_opt_ const D3D12_DISCARD_REGION* pDesc + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::SetMarker( + UINT Metadata, + _In_reads_bytes_opt_(Size) const void* pData, + UINT Size + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::BeginEvent( + UINT Metadata, + _In_reads_bytes_opt_(Size) const void* pData, + UINT Size + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::EndEvent() +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +void STDMETHODCALLTYPE ID3D12QueuedCommandList::ExecuteIndirect( + _In_ ID3D12CommandSignature* pCommandSignature, + _In_ UINT MaxCommandCount, + _In_ ID3D12Resource* pArgumentBuffer, + _In_ UINT64 ArgumentBufferOffset, + _In_opt_ ID3D12Resource* pCountBuffer, + _In_ UINT64 CountBufferOffset + ) +{ + // Function not implemented yet. + DEBUGCHECK(0, "Function not implemented yet."); +} + +} // namespace DX12 \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h new file mode 100644 index 0000000000..068b66e723 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h @@ -0,0 +1,634 @@ +// Copyright hdcmeta +// Dual-Licensed under MIT and GPLv2+ +// Refer to the license.txt/license_mit.txt files included. + +#pragma once + +#include +#include + +namespace DX12 +{ + +static const unsigned int QUEUE_ARRAY_SIZE = 24 * 1024 * 1024; + +enum D3DQueueItemType +{ + AbortProcessing = 0, + SetPipelineState, + SetRenderTargets, + SetVertexBuffers, + SetIndexBuffer, + RSSetViewports, + RSSetScissorRects, + SetGraphicsRootDescriptorTable, + SetGraphicsRootConstantBufferView, + SetGraphicsRootSignature, + ClearRenderTargetView, + ClearDepthStencilView, + DrawInstanced, + DrawIndexedInstanced, + IASetPrimitiveTopology, + CopyBufferRegion, + CopyTextureRegion, + SetDescriptorHeaps, + ResourceBarrier, + ResolveSubresource, + ExecuteCommandList, + CloseCommandList, + Present, + ResetCommandList, + ResetCommandAllocator, + FenceGpuSignal, + FenceCpuSignal, + Stop +}; + +struct SetPipelineStateArguments +{ + ID3D12PipelineState* pPipelineStateObject; +}; + +struct SetRenderTargetsArguments +{ + D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetDescriptor; + D3D12_CPU_DESCRIPTOR_HANDLE DepthStencilDescriptor; +}; + +struct SetVertexBuffersArguments +{ + // UINT startSlot; - Dolphin only uses the 0th slot. + D3D12_VERTEX_BUFFER_VIEW desc; + // UINT numBuffers; - Only supporting single vertex buffer set since that's all Dolphin uses. +}; + +struct SetIndexBufferArguments +{ + D3D12_INDEX_BUFFER_VIEW desc; +}; + +struct RSSetViewportsArguments +{ + FLOAT TopLeftX; + FLOAT TopLeftY; + FLOAT Width; + FLOAT Height; + FLOAT MinDepth; + FLOAT MaxDepth; +}; + +struct RSSetScissorRectsArguments +{ + LONG left; + LONG top; + LONG right; + LONG bottom; +}; + +struct SetGraphicsRootDescriptorTableArguments +{ + UINT RootParameterIndex; + D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor; +}; + +struct SetGraphicsRootConstantBufferViewArguments +{ + UINT RootParameterIndex; + D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; +}; + +struct SetGraphicsRootSignatureArguments +{ + ID3D12RootSignature* pRootSignature; +}; + +struct ClearRenderTargetViewArguments +{ + D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetView; +}; + +struct ClearDepthStencilViewArguments +{ + D3D12_CPU_DESCRIPTOR_HANDLE DepthStencilView; +}; + +struct DrawInstancedArguments +{ + UINT VertexCount; + UINT StartVertexLocation; +}; + +struct DrawIndexedInstancedArguments +{ + UINT IndexCount; + UINT StartIndexLocation; + INT BaseVertexLocation; +}; + +struct IASetPrimitiveTopologyArguments +{ + D3D12_PRIMITIVE_TOPOLOGY PrimitiveTopology; +}; + +struct CopyBufferRegionArguments +{ + ID3D12Resource* pDstBuffer; + UINT DstOffset; + ID3D12Resource* pSrcBuffer; + UINT SrcOffset; + UINT NumBytes; +}; + +struct CopyTextureRegionArguments +{ + D3D12_TEXTURE_COPY_LOCATION dst; + UINT DstX; + UINT DstY; + UINT DstZ; + D3D12_TEXTURE_COPY_LOCATION src; + D3D12_BOX srcBox; +}; + +struct SetDescriptorHeapsArguments +{ + ID3D12DescriptorHeap** ppDescriptorHeap; + UINT NumDescriptorHeaps; +}; + +struct ResourceBarrierArguments +{ + D3D12_RESOURCE_BARRIER barrier; +}; + +struct ResolveSubresourceArguments +{ + ID3D12Resource* pDstResource; + UINT DstSubresource; + ID3D12Resource* pSrcResource; + UINT SrcSubresource; + DXGI_FORMAT Format; +}; + +struct CloseCommandListArguments +{ +}; + +struct ExecuteCommandListArguments +{ +}; + +struct PresentArguments +{ + IDXGISwapChain* swapChain; + UINT syncInterval; + UINT flags; +}; + +struct ResetCommandListArguments +{ + ID3D12CommandAllocator* allocator; +}; + +struct ResetCommandAllocatorArguments +{ + ID3D12CommandAllocator* allocator; +}; + +struct FenceGpuSignalArguments +{ + ID3D12Fence* fence; + UINT64 fence_value; +}; + +struct FenceCpuSignalArguments +{ + ID3D12Fence* fence; + UINT64 fence_value; +}; + +struct StopArguments +{ + bool eligible_to_move_to_front_of_queue; + bool signal_stop_event; +}; + +struct D3DQueueItem +{ + D3DQueueItemType Type; + + union + { + SetPipelineStateArguments SetPipelineState; + SetRenderTargetsArguments SetRenderTargets; + SetVertexBuffersArguments SetVertexBuffers; + SetIndexBufferArguments SetIndexBuffer; + RSSetViewportsArguments RSSetViewports; + RSSetScissorRectsArguments RSSetScissorRects; + SetGraphicsRootDescriptorTableArguments SetGraphicsRootDescriptorTable; + SetGraphicsRootConstantBufferViewArguments SetGraphicsRootConstantBufferView; + SetGraphicsRootSignatureArguments SetGraphicsRootSignature; + ClearRenderTargetViewArguments ClearRenderTargetView; + ClearDepthStencilViewArguments ClearDepthStencilView; + DrawInstancedArguments DrawInstanced; + DrawIndexedInstancedArguments DrawIndexedInstanced; + IASetPrimitiveTopologyArguments IASetPrimitiveTopology; + CopyBufferRegionArguments CopyBufferRegion; + CopyTextureRegionArguments CopyTextureRegion; + SetDescriptorHeapsArguments SetDescriptorHeaps; + ResourceBarrierArguments ResourceBarrier; + ResolveSubresourceArguments ResolveSubresource; + CloseCommandListArguments CloseCommandList; + ExecuteCommandListArguments ExecuteCommandList; + PresentArguments Present; + ResetCommandListArguments ResetCommandList; + ResetCommandAllocatorArguments ResetCommandAllocator; + FenceGpuSignalArguments FenceGpuSignal; + FenceCpuSignalArguments FenceCpuSignal; + StopArguments Stop; + }; +}; + +class ID3D12QueuedCommandList : public ID3D12GraphicsCommandList +{ +public: + + ID3D12QueuedCommandList(ID3D12GraphicsCommandList* backing_command_list, ID3D12CommandQueue* backing_command_queue); + + void ProcessQueuedItems(bool eligible_to_move_to_front_of_queue = false, bool wait_for_stop = false); + + void QueueExecute(); + void QueueFenceGpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value); + void QueueFenceCpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value); + void QueuePresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags); + void ClearQueue(); + + // IUnknown methods + + ULONG STDMETHODCALLTYPE AddRef(); + ULONG STDMETHODCALLTYPE Release(); + HRESULT STDMETHODCALLTYPE QueryInterface( + _In_ REFIID riid, + _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject + ); + + // ID3D12Object methods + + HRESULT STDMETHODCALLTYPE GetPrivateData( + _In_ REFGUID guid, + _Inout_ UINT* pDataSize, + _Out_writes_bytes_opt_(*pDataSize) void* pData + ); + + HRESULT STDMETHODCALLTYPE SetPrivateData( + _In_ REFGUID guid, + _In_ UINT DataSize, + _In_reads_bytes_opt_(DataSize) const void* pData + ); + + HRESULT STDMETHODCALLTYPE SetPrivateDataInterface( + _In_ REFGUID guid, + _In_opt_ const IUnknown* pData + ); + + HRESULT STDMETHODCALLTYPE SetName( + _In_z_ LPCWSTR pName + ); + + // ID3D12DeviceChild methods + + D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE GetType( + ); + + // ID3D12CommandList methods + + HRESULT STDMETHODCALLTYPE GetDevice( + REFIID riid, + void** ppvDevice + ); + + HRESULT STDMETHODCALLTYPE Close(void); + + HRESULT STDMETHODCALLTYPE Reset( + _In_ ID3D12CommandAllocator* pAllocator, + _In_opt_ ID3D12PipelineState* pInitialState + ); + + void STDMETHODCALLTYPE ClearState( + _In_ ID3D12PipelineState* pPipelineState + ); + + void STDMETHODCALLTYPE DrawInstanced( + _In_ UINT VertexCountPerInstance, + _In_ UINT InstanceCount, + _In_ UINT StartVertexLocation, + _In_ UINT StartInstanceLocation + ); + + void STDMETHODCALLTYPE DrawIndexedInstanced( + _In_ UINT IndexCountPerInstance, + _In_ UINT InstanceCount, + _In_ UINT StartIndexLocation, + _In_ INT BaseVertexLocation, + _In_ UINT StartInstanceLocation + ); + + void STDMETHODCALLTYPE Dispatch( + _In_ UINT ThreadGroupCountX, + _In_ UINT ThreadGroupCountY, + _In_ UINT ThreadGroupCountZ + ); + + void STDMETHODCALLTYPE DispatchIndirect( + _In_ ID3D12Resource* pBufferForArgs, + _In_ UINT AlignedByteOffsetForArgs + ); + + void STDMETHODCALLTYPE CopyBufferRegion( + _In_ ID3D12Resource* pDstBuffer, + UINT64 DstOffset, + _In_ ID3D12Resource* pSrcBuffer, + UINT64 SrcOffset, + UINT64 NumBytes + ); + + void STDMETHODCALLTYPE CopyTextureRegion( + _In_ const D3D12_TEXTURE_COPY_LOCATION* pDst, + UINT DstX, + UINT DstY, + UINT DstZ, + _In_ const D3D12_TEXTURE_COPY_LOCATION* pSrc, + _In_opt_ const D3D12_BOX* pSrcBox + ); + + void STDMETHODCALLTYPE CopyResource( + _In_ ID3D12Resource* pDstResource, + _In_ ID3D12Resource* pSrcResource + ); + + void STDMETHODCALLTYPE CopyTiles( + _In_ ID3D12Resource* pTiledResource, + _In_ const D3D12_TILED_RESOURCE_COORDINATE* pTileRegionStartCoordinate, + _In_ const D3D12_TILE_REGION_SIZE* pTileRegionSize, + _In_ ID3D12Resource* pBuffer, + UINT64 BufferStartOffsetInBytes, + D3D12_TILE_COPY_FLAGS Flags + ); + + void STDMETHODCALLTYPE ResolveSubresource( + _In_ ID3D12Resource* pDstResource, + _In_ UINT DstSubresource, + _In_ ID3D12Resource* pSrcResource, + _In_ UINT SrcSubresource, + _In_ DXGI_FORMAT Format + ); + + void STDMETHODCALLTYPE IASetPrimitiveTopology( + _In_ D3D12_PRIMITIVE_TOPOLOGY PrimitiveTopology + ); + + void STDMETHODCALLTYPE RSSetViewports( + _In_range_(0, D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count, + _In_reads_(Count) const D3D12_VIEWPORT* pViewports + ); + + void STDMETHODCALLTYPE RSSetScissorRects( + _In_range_(0, D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count, + _In_reads_(Count) const D3D12_RECT* pRects + ); + + void STDMETHODCALLTYPE OMSetBlendFactor( + _In_opt_ const FLOAT BlendFactor[4] + ); + + void STDMETHODCALLTYPE OMSetStencilRef( + _In_ UINT StencilRef + ); + + void STDMETHODCALLTYPE SetPipelineState( + _In_ ID3D12PipelineState* pPipelineState + ); + + void STDMETHODCALLTYPE ResourceBarrier( + _In_ UINT NumBarriers, + _In_reads_(NumBarriers) const D3D12_RESOURCE_BARRIER* pBarriers + ); + + void STDMETHODCALLTYPE ExecuteBundle( + _In_ ID3D12GraphicsCommandList* command_list + ); + + void STDMETHODCALLTYPE BeginQuery( + _In_ ID3D12QueryHeap* pQueryHeap, + _In_ D3D12_QUERY_TYPE Type, + _In_ UINT Index + ); + + void STDMETHODCALLTYPE EndQuery( + _In_ ID3D12QueryHeap* pQueryHeap, + _In_ D3D12_QUERY_TYPE Type, + _In_ UINT Index + ); + + void STDMETHODCALLTYPE ResolveQueryData( + _In_ ID3D12QueryHeap* pQueryHeap, + _In_ D3D12_QUERY_TYPE Type, + _In_ UINT StartElement, + _In_ UINT ElementCount, + _In_ ID3D12Resource* pDestinationBuffer, + _In_ UINT64 AlignedDestinationBufferOffset + ); + + void STDMETHODCALLTYPE SetPredication( + _In_opt_ ID3D12Resource* pBuffer, + _In_ UINT64 AlignedBufferOffset, + _In_ D3D12_PREDICATION_OP Operation + ); + + void STDMETHODCALLTYPE SetDescriptorHeaps( + _In_ UINT NumDescriptorHeaps, + _In_reads_(NumDescriptorHeaps) ID3D12DescriptorHeap** pDescriptorHeaps + ); + + void STDMETHODCALLTYPE SetComputeRootSignature( + _In_ ID3D12RootSignature* pRootSignature + ); + + void STDMETHODCALLTYPE SetGraphicsRootSignature( + _In_ ID3D12RootSignature* pRootSignature + ); + + void STDMETHODCALLTYPE SetComputeRootDescriptorTable( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor + ); + + void STDMETHODCALLTYPE SetGraphicsRootDescriptorTable( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor + ); + + void STDMETHODCALLTYPE SetComputeRoot32BitConstant( + _In_ UINT RootParameterIndex, + _In_ UINT SrcData, + _In_ UINT DestOffsetIn32BitValues + ); + + void STDMETHODCALLTYPE SetGraphicsRoot32BitConstant( + _In_ UINT RootParameterIndex, + _In_ UINT SrcData, + _In_ UINT DestOffsetIn32BitValues + ); + + void STDMETHODCALLTYPE SetComputeRoot32BitConstants( + _In_ UINT RootParameterIndex, + _In_ UINT Num32BitValuesToSet, + _In_reads_(Num32BitValuesToSet*sizeof(UINT)) const void* pSrcData, + _In_ UINT DestOffsetIn32BitValues + ); + + void STDMETHODCALLTYPE SetGraphicsRoot32BitConstants( + _In_ UINT RootParameterIndex, + _In_ UINT Num32BitValuesToSet, + _In_reads_(Num32BitValuesToSet*sizeof(UINT)) const void* pSrcData, + _In_ UINT DestOffsetIn32BitValues + ); + + void STDMETHODCALLTYPE SetGraphicsRootConstantBufferView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS BufferLocation + ); + + void STDMETHODCALLTYPE SetComputeRootConstantBufferView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS BufferLocation + ); + + void STDMETHODCALLTYPE SetComputeRootShaderResourceView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle + ); + + void STDMETHODCALLTYPE SetGraphicsRootShaderResourceView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle + ); + + void STDMETHODCALLTYPE SetComputeRootUnorderedAccessView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle + ); + + void STDMETHODCALLTYPE SetGraphicsRootUnorderedAccessView( + _In_ UINT RootParameterIndex, + _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle + ); + + void STDMETHODCALLTYPE IASetIndexBuffer( + _In_opt_ const D3D12_INDEX_BUFFER_VIEW* pDesc + ); + + void STDMETHODCALLTYPE IASetVertexBuffers( + _In_ UINT StartSlot, + _In_ UINT NumBuffers, + _In_ const D3D12_VERTEX_BUFFER_VIEW* pDesc + ); + + void STDMETHODCALLTYPE SOSetTargets( + _In_ UINT StartSlot, + _In_ UINT NumViews, + _In_ const D3D12_STREAM_OUTPUT_BUFFER_VIEW* pViews + ); + + void STDMETHODCALLTYPE OMSetRenderTargets( + _In_ UINT NumRenderTargetDescriptors, + _In_ const D3D12_CPU_DESCRIPTOR_HANDLE* pRenderTargetDescriptors, + _In_ BOOL RTsSingleHandleToDescriptorRange, + _In_opt_ const D3D12_CPU_DESCRIPTOR_HANDLE* pDepthStencilDescriptor + ); + + void STDMETHODCALLTYPE ClearDepthStencilView( + _In_ D3D12_CPU_DESCRIPTOR_HANDLE DepthStencilView, + _In_ D3D12_CLEAR_FLAGS ClearFlags, + _In_ FLOAT Depth, + _In_ UINT8 Stencil, + _In_ UINT NumRects, + _In_reads_opt_(NumRects) const D3D12_RECT* pRect + ); + + void STDMETHODCALLTYPE ClearRenderTargetView( + _In_ D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetView, + _In_ const FLOAT ColorRGBA[4], + _In_ UINT NumRects, + _In_reads_opt_(NumRects) const D3D12_RECT* pRects + ); + + void STDMETHODCALLTYPE ClearUnorderedAccessViewUint( + _In_ D3D12_GPU_DESCRIPTOR_HANDLE ViewGPUHandleInCurrentHeap, + _In_ D3D12_CPU_DESCRIPTOR_HANDLE ViewCPUHandle, + _In_ ID3D12Resource* pResource, + _In_ const UINT Values[4], + _In_ UINT NumRects, + _In_reads_opt_(NumRects) const D3D12_RECT* pRects + ); + + void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat( + _In_ D3D12_GPU_DESCRIPTOR_HANDLE ViewGPUHandleInCurrentHeap, + _In_ D3D12_CPU_DESCRIPTOR_HANDLE ViewCPUHandle, + _In_ ID3D12Resource* pResource, + _In_ const FLOAT Values[4], + _In_ UINT NumRects, + _In_reads_opt_(NumRects) const D3D12_RECT* pRects + ); + + void STDMETHODCALLTYPE DiscardResource( + _In_ ID3D12Resource* pResource, + _In_opt_ const D3D12_DISCARD_REGION* pRegion + ); + + void STDMETHODCALLTYPE SetMarker( + UINT Metadata, + _In_reads_bytes_opt_(Size) const void* pData, + UINT Size); + + void STDMETHODCALLTYPE BeginEvent( + UINT Metadata, + _In_reads_bytes_opt_(Size) const void* pData, + UINT Size); + + void STDMETHODCALLTYPE EndEvent(void); + + void STDMETHODCALLTYPE ExecuteIndirect( + _In_ ID3D12CommandSignature* pCommandSignature, + _In_ UINT MaxCommandCount, + _In_ ID3D12Resource* pArgumentBuffer, + _In_ UINT64 ArgumentBufferOffset, + _In_opt_ ID3D12Resource* pCountBuffer, + _In_ UINT64 CountBufferOffset + ); + +private: + ~ID3D12QueuedCommandList(); + + void ResetQueueOverflowTracking(); + void CheckForOverflow(); + + static DWORD WINAPI BackgroundThreadFunction(LPVOID param); + + byte m_queue_array[QUEUE_ARRAY_SIZE]; + byte* m_queue_array_back = m_queue_array; + + byte* m_queue_array_back_at_start_of_frame = m_queue_array_back; + + DWORD m_background_thread_id; + HANDLE m_background_thread; + + HANDLE m_begin_execution_event; + HANDLE m_stop_execution_event; + + ID3D12GraphicsCommandList* m_command_list; + ID3D12CommandQueue* m_command_queue; + + std::atomic m_ref = 1; +}; + +} // namespace \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DShader.cpp b/Source/Core/VideoBackends/D3D12/D3DShader.cpp new file mode 100644 index 0000000000..1b0eae1681 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DShader.cpp @@ -0,0 +1,84 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/FileUtil.h" +#include "Common/MsgHandler.h" +#include "Common/StringUtil.h" +#include "Common/Logging/Log.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DShader.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +namespace D3D +{ + +bool CompileShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines, std::string shader_version_string) +{ + ID3D10Blob* shader_buffer = nullptr; + ID3D10Blob* error_buffer = nullptr; + +#if defined(_DEBUG) || defined(DEBUGFAST) + UINT flags = D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_DEBUG; +#else + UINT flags = D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_OPTIMIZATION_LEVEL3 | D3DCOMPILE_SKIP_VALIDATION; +#endif + HRESULT hr = d3d_compile(code.c_str(), code.length(), nullptr, defines, nullptr, "main", shader_version_string.data(), + flags, 0, &shader_buffer, &error_buffer); + + if (error_buffer) + { + INFO_LOG(VIDEO, "Shader compiler messages:\n%s\n", + static_cast(error_buffer->GetBufferPointer())); + } + + if (FAILED(hr)) + { + static int num_failures = 0; + std::string filename = StringFromFormat("%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), shader_version_string, num_failures++); + std::ofstream file; + OpenFStream(file, filename, std::ios_base::out); + file << code; + file.close(); + + PanicAlert("Failed to compile shader: %s\nDebug info (%s):\n%s", + filename.c_str(), shader_version_string, static_cast(error_buffer->GetBufferPointer())); + + *blob = nullptr; + error_buffer->Release(); + } + else + { + *blob = shader_buffer; + } + + return SUCCEEDED(hr); +} + +// code->bytecode +bool CompileVertexShader(const std::string& code, ID3DBlob** blob) +{ + return CompileShader(code, blob, nullptr, D3D::VertexShaderVersionString()); +} + +// code->bytecode +bool CompileGeometryShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines) +{ + return CompileShader(code, blob, defines, D3D::GeometryShaderVersionString()); +} + +// code->bytecode +bool CompilePixelShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines) +{ + return CompileShader(code, blob, defines, D3D::PixelShaderVersionString()); +} + +} // namespace + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DShader.h b/Source/Core/VideoBackends/D3D12/D3DShader.h new file mode 100644 index 0000000000..e43522302d --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DShader.h @@ -0,0 +1,26 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/D3D12/D3DBase.h" + +class D3DBlob; + +namespace DX12 +{ + +namespace D3D +{ + +// The returned bytecode buffers should be Release()d. +bool CompileVertexShader(const std::string& code, ID3DBlob** blob); +bool CompileGeometryShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines = nullptr); +bool CompilePixelShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines = nullptr); + +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DState.cpp b/Source/Core/VideoBackends/D3D12/D3DState.cpp new file mode 100644 index 0000000000..d0df14371f --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DState.cpp @@ -0,0 +1,482 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/BitSet.h" +#include "Common/CommonTypes.h" +#include "Common/FileUtil.h" +#include "Common/LinearDiskCache.h" +#include "Common/MsgHandler.h" +#include "Common/StringUtil.h" +#include "Common/Logging/Log.h" + +#include "Core/ConfigManager.h" + +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DUtil.h" + +#include "VideoBackends/D3D12/NativeVertexFormat.h" +#include "VideoBackends/D3D12/ShaderCache.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" + +#include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +static bool s_cache_is_corrupted = false; +static LinearDiskCache s_pso_disk_cache; + +class PipelineStateCacheInserter : public LinearDiskCacheReader +{ +public: + void Read(const SmallPsoDiskDesc& key, const u8* value, u32 value_size) + { + if (s_cache_is_corrupted) + return; + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {}; + desc.pRootSignature = D3D::default_root_signature; + desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; // This state changes in PSTextureEncoder::Encode. + desc.DSVFormat = DXGI_FORMAT_D32_FLOAT; // This state changes in PSTextureEncoder::Encode. + desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF; + desc.NumRenderTargets = 1; + desc.SampleMask = UINT_MAX; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + + desc.GS = ShaderCache::GetGeometryShaderFromUid(&key.gs_uid); + desc.PS = ShaderCache::GetPixelShaderFromUid(&key.ps_uid); + desc.VS = ShaderCache::GetVertexShaderFromUid(&key.vs_uid); + + if (!desc.PS.pShaderBytecode || !desc.VS.pShaderBytecode) + { + s_cache_is_corrupted = true; + return; + } + + BlendState blend_state = {}; + blend_state.hex = key.blend_state_hex; + desc.BlendState = StateCache::GetDesc12(blend_state); + + ZMode depth_stencil_state = {}; + depth_stencil_state.hex = key.depth_stencil_state_hex; + desc.DepthStencilState = StateCache::GetDesc12(depth_stencil_state); + + RasterizerState rasterizer_state = {}; + rasterizer_state.hex = key.rasterizer_state_hex; + desc.RasterizerState = StateCache::GetDesc12(rasterizer_state); + + desc.PrimitiveTopologyType = key.topology; + + // search for a cached native vertex format + const PortableVertexDeclaration& native_vtx_decl = key.vertex_declaration; + std::unique_ptr& native = (*VertexLoaderManager::GetNativeVertexFormatMap())[native_vtx_decl]; + + if (!native) + { + native.reset(g_vertex_manager->CreateNativeVertexFormat(native_vtx_decl)); + } + + desc.InputLayout = reinterpret_cast(native.get())->GetActiveInputLayout12(); + + desc.CachedPSO.CachedBlobSizeInBytes = value_size; + desc.CachedPSO.pCachedBlob = value; + + ID3D12PipelineState* pso = nullptr; + HRESULT hr = D3D::device12->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso)); + + if (FAILED(hr)) + { + // Failure can occur if disk cache is corrupted, or a driver upgrade invalidates the existing blobs. + // In this case, we need to clear the disk cache. + + s_cache_is_corrupted = true; + return; + } + + SmallPsoDesc small_desc = {}; + small_desc.blend_state.hex = key.blend_state_hex; + small_desc.depth_stencil_state.hex = key.depth_stencil_state_hex; + small_desc.rasterizer_state.hex = key.rasterizer_state_hex; + small_desc.gs_bytecode = desc.GS; + small_desc.ps_bytecode = desc.PS; + small_desc.vs_bytecode = desc.VS; + small_desc.input_layout = reinterpret_cast(native.get()); + + gx_state_cache.m_small_pso_map[small_desc] = pso; + } +}; + +StateCache::StateCache() +{ + m_current_pso_desc = {}; + + m_current_pso_desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; // This state changes in PSTextureEncoder::Encode. + m_current_pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT; // This state changes in PSTextureEncoder::Encode. + m_current_pso_desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF; + m_current_pso_desc.NumRenderTargets = 1; + m_current_pso_desc.SampleMask = UINT_MAX; +} + +void StateCache::Init() +{ + // Root signature isn't available at time of StateCache construction, so fill it in now. + gx_state_cache.m_current_pso_desc.pRootSignature = D3D::default_root_signature; + + // Multi-sample configuration isn't available at time of StateCache construction, so fille it in now. + gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples; + gx_state_cache.m_current_pso_desc.SampleDesc.Quality = 0; + + if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX))) + File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX)); + + std::string cache_filename = StringFromFormat("%sdx12-%s-pso.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(), + SConfig::GetInstance().m_strUniqueID.c_str()); + + PipelineStateCacheInserter inserter; + s_pso_disk_cache.OpenAndRead(cache_filename, inserter); + + if (s_cache_is_corrupted) + { + // If a PSO fails to create, that means either: + // - The file itself is corrupt. + // - A driver/HW change has occured, causing the existing cache blobs to be invalid. + // + // In either case, we want to re-create the disk cache. This should not be a frequent occurence. + + s_pso_disk_cache.Close(); + + for (auto it : gx_state_cache.m_small_pso_map) + { + SAFE_RELEASE(it.second); + } + gx_state_cache.m_small_pso_map.clear(); + + File::Delete(cache_filename); + + s_pso_disk_cache.OpenAndRead(cache_filename, inserter); + + s_cache_is_corrupted = false; + } +} + +D3D12_SAMPLER_DESC StateCache::GetDesc12(SamplerState state) +{ + const unsigned int d3d_mip_filters[4] = + { + TexMode0::TEXF_NONE, + TexMode0::TEXF_POINT, + TexMode0::TEXF_LINEAR, + TexMode0::TEXF_NONE, //reserved + }; + const D3D12_TEXTURE_ADDRESS_MODE d3d_clamps[4] = + { + D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + D3D12_TEXTURE_ADDRESS_MODE_WRAP //reserved + }; + + D3D12_SAMPLER_DESC sampdc; + + unsigned int mip = d3d_mip_filters[state.min_filter & 3]; + + sampdc.MaxAnisotropy = 1; + if (g_ActiveConfig.iMaxAnisotropy > 1) + { + sampdc.Filter = D3D12_FILTER_ANISOTROPIC; + sampdc.MaxAnisotropy = 1 << g_ActiveConfig.iMaxAnisotropy; + } + else if (state.min_filter & 4) // linear min filter + { + if (state.mag_filter) // linear mag filter + { + if (mip == TexMode0::TEXF_NONE) + sampdc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + else if (mip == TexMode0::TEXF_POINT) + sampdc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + else if (mip == TexMode0::TEXF_LINEAR) + sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + } + else // point mag filter + { + if (mip == TexMode0::TEXF_NONE) + sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT; + else if (mip == TexMode0::TEXF_POINT) + sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT; + else if (mip == TexMode0::TEXF_LINEAR) + sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR; + } + } + else // point min filter + { + if (state.mag_filter) // linear mag filter + { + if (mip == TexMode0::TEXF_NONE) + sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT; + else if (mip == TexMode0::TEXF_POINT) + sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT; + else if (mip == TexMode0::TEXF_LINEAR) + sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR; + } + else // point mag filter + { + if (mip == TexMode0::TEXF_NONE) + sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + else if (mip == TexMode0::TEXF_POINT) + sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + else if (mip == TexMode0::TEXF_LINEAR) + sampdc.Filter = D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR; + } + } + + sampdc.AddressU = d3d_clamps[state.wrap_s]; + sampdc.AddressV = d3d_clamps[state.wrap_t]; + sampdc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + + sampdc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + + sampdc.BorderColor[0] = sampdc.BorderColor[1] = sampdc.BorderColor[2] = sampdc.BorderColor[3] = 1.0f; + + sampdc.MaxLOD = (mip == TexMode0::TEXF_NONE) ? 0.0f : static_cast(state.max_lod) / 16.f; + sampdc.MinLOD = static_cast(state.min_lod) / 16.f; + sampdc.MipLODBias = static_cast(state.lod_bias) / 32.0f; + + return sampdc; +} + +D3D12_BLEND GetBlendingAlpha(D3D12_BLEND blend) +{ + switch (blend) + { + case D3D12_BLEND_SRC_COLOR: + return D3D12_BLEND_SRC_ALPHA; + case D3D12_BLEND_INV_SRC_COLOR: + return D3D12_BLEND_INV_SRC_ALPHA; + case D3D12_BLEND_DEST_COLOR: + return D3D12_BLEND_DEST_ALPHA; + case D3D12_BLEND_INV_DEST_COLOR: + return D3D12_BLEND_INV_DEST_ALPHA; + + default: + return blend; + } +} + +D3D12_BLEND_DESC StateCache::GetDesc12(BlendState state) +{ + if (!state.blend_enable) + { + state.src_blend = D3D12_BLEND_ONE; + state.dst_blend = D3D12_BLEND_ZERO; + state.blend_op = D3D12_BLEND_OP_ADD; + state.use_dst_alpha = false; + } + + D3D12_BLEND_DESC blenddc = { + FALSE, // BOOL AlphaToCoverageEnable; + FALSE, // BOOL IndependentBlendEnable; + { + state.blend_enable, // BOOL BlendEnable; + FALSE, // BOOL LogicOpEnable; + state.src_blend, // D3D12_BLEND SrcBlend; + state.dst_blend, // D3D12_BLEND DestBlend; + state.blend_op, // D3D12_BLEND_OP BlendOp; + state.src_blend, // D3D12_BLEND SrcBlendAlpha; + state.dst_blend, // D3D12_BLEND DestBlendAlpha; + state.blend_op, // D3D12_BLEND_OP BlendOpAlpha; + D3D12_LOGIC_OP_NOOP, // D3D12_LOGIC_OP LogicOp + state.write_mask // UINT8 RenderTargetWriteMask; + } + }; + + blenddc.RenderTarget[0].SrcBlendAlpha = GetBlendingAlpha(blenddc.RenderTarget[0].SrcBlend); + blenddc.RenderTarget[0].DestBlendAlpha = GetBlendingAlpha(blenddc.RenderTarget[0].DestBlend); + + if (state.use_dst_alpha) + { + // Colors should blend against SRC1_ALPHA + if (blenddc.RenderTarget[0].SrcBlend == D3D12_BLEND_SRC_ALPHA) + blenddc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC1_ALPHA; + else if (blenddc.RenderTarget[0].SrcBlend == D3D12_BLEND_INV_SRC_ALPHA) + blenddc.RenderTarget[0].SrcBlend = D3D12_BLEND_INV_SRC1_ALPHA; + + // Colors should blend against SRC1_ALPHA + if (blenddc.RenderTarget[0].DestBlend == D3D12_BLEND_SRC_ALPHA) + blenddc.RenderTarget[0].DestBlend = D3D12_BLEND_SRC1_ALPHA; + else if (blenddc.RenderTarget[0].DestBlend == D3D12_BLEND_INV_SRC_ALPHA) + blenddc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC1_ALPHA; + + blenddc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; + blenddc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO; + blenddc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + } + + return blenddc; +} + +D3D12_RASTERIZER_DESC StateCache::GetDesc12(RasterizerState state) +{ + return { + D3D12_FILL_MODE_SOLID, + state.cull_mode, + false, + 0, + 0.f, + 0, + true, + true, + false, + 0, + D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }; +} + +inline D3D12_DEPTH_STENCIL_DESC StateCache::GetDesc12(ZMode state) +{ + D3D12_DEPTH_STENCIL_DESC depthdc; + + depthdc.StencilEnable = FALSE; + depthdc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; + depthdc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; + + D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS }; + depthdc.FrontFace = defaultStencilOp; + depthdc.BackFace = defaultStencilOp; + + const D3D12_COMPARISON_FUNC d3dCmpFuncs[8] = + { + D3D12_COMPARISON_FUNC_NEVER, + D3D12_COMPARISON_FUNC_GREATER, + D3D12_COMPARISON_FUNC_EQUAL, + D3D12_COMPARISON_FUNC_GREATER_EQUAL, + D3D12_COMPARISON_FUNC_LESS, + D3D12_COMPARISON_FUNC_NOT_EQUAL, + D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_COMPARISON_FUNC_ALWAYS + }; + + if (state.testenable) + { + depthdc.DepthEnable = TRUE; + depthdc.DepthWriteMask = state.updateenable ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + depthdc.DepthFunc = d3dCmpFuncs[state.func]; + } + else + { + // if the test is disabled write is disabled too + depthdc.DepthEnable = FALSE; + depthdc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; + } + + return depthdc; +} + +HRESULT StateCache::GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc, ID3D12PipelineState** pso) +{ + auto it = m_pso_map.find(*pso_desc); + + if (it == m_pso_map.end()) + { + // Not found, create new PSO. + + ID3D12PipelineState* new_pso = nullptr; + HRESULT hr = D3D::device12->CreateGraphicsPipelineState(pso_desc, IID_PPV_ARGS(&new_pso)); + + if (FAILED(hr)) + { + return hr; + } + + m_pso_map[*pso_desc] = new_pso; + *pso = new_pso; + } + else + { + *pso = it->second; + } + + return S_OK; +} + +HRESULT StateCache::GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D12PipelineState** pso, D3D12_PRIMITIVE_TOPOLOGY_TYPE topology, const GeometryShaderUid* gs_uid, const PixelShaderUid* ps_uid, const VertexShaderUid* vs_uid) +{ + auto it = m_small_pso_map.find(*pso_desc); + + if (it == m_small_pso_map.end()) + { + // Not found, create new PSO. + + // RootSignature, SampleMask, SampleDesc, NumRenderTargets, RTVFormats, DSVFormat + // never change so they are set in constructor and forgotten. + m_current_pso_desc.GS = pso_desc->gs_bytecode; + m_current_pso_desc.PS = pso_desc->ps_bytecode; + m_current_pso_desc.VS = pso_desc->vs_bytecode; + m_current_pso_desc.BlendState = GetDesc12(pso_desc->blend_state); + m_current_pso_desc.DepthStencilState = GetDesc12(pso_desc->depth_stencil_state); + m_current_pso_desc.RasterizerState = GetDesc12(pso_desc->rasterizer_state); + m_current_pso_desc.PrimitiveTopologyType = topology; + m_current_pso_desc.InputLayout = pso_desc->input_layout->GetActiveInputLayout12(); + + ID3D12PipelineState* new_pso = nullptr; + HRESULT hr = D3D::device12->CreateGraphicsPipelineState(&m_current_pso_desc, IID_PPV_ARGS(&new_pso)); + + if (FAILED(hr)) + { + return hr; + } + + m_small_pso_map[*pso_desc] = new_pso; + *pso = new_pso; + + // This contains all of the information needed to reconstruct a PSO at startup. + SmallPsoDiskDesc disk_desc = {}; + disk_desc.blend_state_hex = pso_desc->blend_state.hex; + disk_desc.depth_stencil_state_hex = pso_desc->depth_stencil_state.hex; + disk_desc.rasterizer_state_hex = pso_desc->rasterizer_state.hex; + disk_desc.ps_uid = *ps_uid; + disk_desc.vs_uid = *vs_uid; + disk_desc.gs_uid = *gs_uid; + disk_desc.vertex_declaration = pso_desc->input_layout->GetVertexDeclaration(); + disk_desc.topology = topology; + + // This shouldn't fail.. but if it does, don't cache to disk. + ID3DBlob* psoBlob = nullptr; + hr = new_pso->GetCachedBlob(&psoBlob); + + if (SUCCEEDED(hr)) + { + s_pso_disk_cache.Append(disk_desc, reinterpret_cast(psoBlob->GetBufferPointer()), static_cast(psoBlob->GetBufferSize())); + psoBlob->Release(); + } + } + else + { + *pso = it->second; + } + + return S_OK; +} + +void StateCache::Clear() +{ + for (auto& it : m_pso_map) + { + SAFE_RELEASE(it.second); + } + m_pso_map.clear(); + + for (auto& it : m_small_pso_map) + { + SAFE_RELEASE(it.second); + } + m_small_pso_map.clear(); + + s_pso_disk_cache.Sync(); + s_pso_disk_cache.Close(); +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DState.h b/Source/Core/VideoBackends/D3D12/D3DState.h new file mode 100644 index 0000000000..85e83592d0 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DState.h @@ -0,0 +1,187 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/BitField.h" +#include "Common/CommonTypes.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/NativeVertexFormat.h" +#include "VideoBackends/D3D12/ShaderCache.h" + +#include "VideoCommon/BPMemory.h" + +namespace DX12 +{ + +class PipelineStateCacheInserter; + +union RasterizerState +{ + BitField<0, 2, D3D12_CULL_MODE> cull_mode; + + u32 hex; +}; + +union BlendState +{ + BitField<0, 1, u32> blend_enable; + BitField<1, 3, D3D12_BLEND_OP> blend_op; + BitField<4, 4, u8> write_mask; + BitField<8, 5, D3D12_BLEND> src_blend; + BitField<13, 5, D3D12_BLEND> dst_blend; + BitField<18, 1, u32> use_dst_alpha; + + u32 hex; +}; + +union SamplerState +{ + BitField<0, 3, u32> min_filter; + BitField<3, 1, u32> mag_filter; + BitField<4, 8, u32> min_lod; + BitField<12, 8, u32> max_lod; + BitField<20, 8, s32> lod_bias; + BitField<28, 2, u32> wrap_s; + BitField<30, 2, u32> wrap_t; + + u32 hex; +}; + +struct SmallPsoDesc +{ + D3D12_SHADER_BYTECODE gs_bytecode; + D3D12_SHADER_BYTECODE ps_bytecode; + D3D12_SHADER_BYTECODE vs_bytecode; + D3DVertexFormat* input_layout; + BlendState blend_state; + RasterizerState rasterizer_state; + ZMode depth_stencil_state; +}; + +// The Bitfield members in BlendState, RasterizerState, and ZMode cause the.. +// static_assert(std::is_trivially_copyable::value, "K must be a trivially copyable type"); +// .. check in LinearDiskCache to fail. So, just storing the packed u32 values. + +struct SmallPsoDiskDesc +{ + u32 blend_state_hex; + u32 rasterizer_state_hex; + u32 depth_stencil_state_hex; + PixelShaderUid ps_uid; + VertexShaderUid vs_uid; + GeometryShaderUid gs_uid; + D3D12_PRIMITIVE_TOPOLOGY_TYPE topology; + PortableVertexDeclaration vertex_declaration; // Used to construct the input layout. +}; + +class StateCache +{ +public: + StateCache(); + + static void Init(); + + // Get D3D12 descs for the internal state bitfields. + static D3D12_SAMPLER_DESC GetDesc12(SamplerState state); + static D3D12_BLEND_DESC GetDesc12(BlendState state); + static D3D12_RASTERIZER_DESC GetDesc12(RasterizerState state); + static D3D12_DEPTH_STENCIL_DESC GetDesc12(ZMode state); + + HRESULT GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc, ID3D12PipelineState** pso); + HRESULT GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D12PipelineState** pso, D3D12_PRIMITIVE_TOPOLOGY_TYPE topology, const GeometryShaderUid* gs_uid, const PixelShaderUid* ps_uid, const VertexShaderUid* vs_uid); + + // Release all cached states and clear hash tables. + void Clear(); + +private: + + friend DX12::PipelineStateCacheInserter; + + D3D12_GRAPHICS_PIPELINE_STATE_DESC m_current_pso_desc; + + struct hash_pso_desc + { + size_t operator()(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& pso_desc) const + { + return ((uintptr_t)pso_desc.PS.pShaderBytecode * 1000000) ^ ((uintptr_t)pso_desc.VS.pShaderBytecode * 1000) ^ ((uintptr_t)pso_desc.InputLayout.pInputElementDescs); + } + }; + + struct equality_pipeline_state_desc + { + bool operator()(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& lhs, const D3D12_GRAPHICS_PIPELINE_STATE_DESC& rhs) const + { + return std::tie(lhs.PS.pShaderBytecode, lhs.VS.pShaderBytecode, lhs.GS.pShaderBytecode, + lhs.RasterizerState.CullMode, + lhs.DepthStencilState.DepthEnable, + lhs.DepthStencilState.DepthFunc, + lhs.DepthStencilState.DepthWriteMask, + lhs.BlendState.RenderTarget[0].BlendEnable, + lhs.BlendState.RenderTarget[0].BlendOp, + lhs.BlendState.RenderTarget[0].DestBlend, + lhs.BlendState.RenderTarget[0].SrcBlend, + lhs.BlendState.RenderTarget[0].RenderTargetWriteMask, + lhs.RTVFormats[0]) == + std::tie(rhs.PS.pShaderBytecode, rhs.VS.pShaderBytecode, rhs.GS.pShaderBytecode, + rhs.RasterizerState.CullMode, + rhs.DepthStencilState.DepthEnable, + rhs.DepthStencilState.DepthFunc, + rhs.DepthStencilState.DepthWriteMask, + rhs.BlendState.RenderTarget[0].BlendEnable, + rhs.BlendState.RenderTarget[0].BlendOp, + rhs.BlendState.RenderTarget[0].DestBlend, + rhs.BlendState.RenderTarget[0].SrcBlend, + rhs.BlendState.RenderTarget[0].RenderTargetWriteMask, + rhs.RTVFormats[0]); + } + }; + + std::unordered_map m_pso_map; + + struct hash_small_pso_desc + { + size_t operator()(const SmallPsoDesc& pso_desc) const + { + return ((uintptr_t)pso_desc.vs_bytecode.pShaderBytecode << 10) ^ + ((uintptr_t)pso_desc.ps_bytecode.pShaderBytecode) + + pso_desc.blend_state.hex + + pso_desc.depth_stencil_state.hex; + } + }; + + struct equality_small_pipeline_state_desc + { + bool operator()(const SmallPsoDesc& lhs, const SmallPsoDesc& rhs) const + { + return std::tie(lhs.ps_bytecode.pShaderBytecode, lhs.vs_bytecode.pShaderBytecode, lhs.gs_bytecode.pShaderBytecode, + lhs.input_layout, lhs.blend_state.hex, lhs.depth_stencil_state.hex, lhs.rasterizer_state.hex) == + std::tie(rhs.ps_bytecode.pShaderBytecode, rhs.vs_bytecode.pShaderBytecode, rhs.gs_bytecode.pShaderBytecode, + rhs.input_layout, rhs.blend_state.hex, rhs.depth_stencil_state.hex, rhs.rasterizer_state.hex); + } + }; + + struct hash_shader_bytecode + { + size_t operator()(const D3D12_SHADER_BYTECODE& shader) const + { + return (uintptr_t)shader.pShaderBytecode; + } + }; + + struct equality_shader_bytecode + { + bool operator()(const D3D12_SHADER_BYTECODE& lhs, const D3D12_SHADER_BYTECODE& rhs) const + { + return lhs.pShaderBytecode == rhs.pShaderBytecode; + } + }; + + std::unordered_map m_small_pso_map; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp new file mode 100644 index 0000000000..adf5a9dcc5 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp @@ -0,0 +1,358 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DStreamBuffer.h" +#include "VideoBackends/D3D12/D3DUtil.h" + +namespace DX12 +{ + +D3DStreamBuffer::D3DStreamBuffer(unsigned int initial_size, unsigned int max_size, bool* buffer_reallocation_notification) : + m_buffer_size(initial_size), + m_buffer_max_size(max_size), + m_buffer_reallocation_notification(buffer_reallocation_notification) +{ + CHECK(initial_size <= max_size, "Error: Initial size for D3DStreamBuffer is greater than max_size."); + + AllocateBuffer(initial_size); + + // Register for callback from D3DCommandListManager each time a fence is queued to be signaled. + m_buffer_tracking_fence = D3D::command_list_mgr->RegisterQueueFenceCallback(this, &D3DStreamBuffer::QueueFenceCallback); +} + +D3DStreamBuffer::~D3DStreamBuffer() +{ + D3D::command_list_mgr->RemoveQueueFenceCallback(this); + + m_buffer->Unmap(0, nullptr); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_buffer); +} + +// Function returns true if (worst case), needed to flush existing command list in order to +// ensure the GPU finished with current use of buffer. The calling function will need to take +// care to reset GPU state to what it was previously. + +// Obviously this is non-performant, so the buffer max_size should be large enough to +// ensure this never happens. +bool D3DStreamBuffer::AllocateSpaceInBuffer(unsigned int allocation_size, unsigned int alignment) +{ + CHECK(allocation_size <= m_buffer_max_size, "Error: Requested allocation size in D3DStreamBuffer is greater than max allowed size of backing buffer."); + + if (alignment) + { + unsigned int padding = m_buffer_offset % alignment; + + // Check for case when adding alignment causes CPU offset to equal GPU offset, + // which would imply entire buffer is available (if not corrected). + if (m_buffer_offset < m_buffer_gpu_completion_offset && + m_buffer_offset + alignment - padding >= m_buffer_gpu_completion_offset) + { + m_buffer_gpu_completion_offset++; + } + + m_buffer_offset += alignment - padding; + + if (m_buffer_offset > m_buffer_size) + { + m_buffer_offset = 0; + + // Correct for case where CPU was about to run into GPU. + if (m_buffer_gpu_completion_offset == 0) + m_buffer_gpu_completion_offset = 1; + } + } + + // First, check if there is available (not-in-use-by-GPU) space in existing buffer. + if (AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(allocation_size)) + { + return false; + } + + // Slow path. No room at front, or back, due to the GPU still (possibly) accessing parts of the buffer. + // Resize if possible, else stall. + bool command_list_executed = AttemptBufferResizeOrElseStall(allocation_size); + + return command_list_executed; +} + +// In VertexManager, we don't know the 'real' size of the allocation at the time +// we call AllocateSpaceInBuffer. We have to conservatively allocate 16MB (!). +// After the vertex data is written, we can choose to specify the 'real' allocation +// size to avoid wasting space. +void D3DStreamBuffer::OverrideSizeOfPreviousAllocation(unsigned int override_allocation_size) +{ + m_buffer_offset = m_buffer_current_allocation_offset + override_allocation_size; +} + +void D3DStreamBuffer::AllocateBuffer(unsigned int size) +{ + // First, put existing buffer (if it exists) in deferred destruction list. + if (m_buffer) + { + m_buffer->Unmap(0, nullptr); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_buffer); + m_buffer = nullptr; + } + + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(size), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_buffer) + ) + ); + + CheckHR(m_buffer->Map(0, nullptr, &m_buffer_cpu_address)); + + m_buffer_gpu_address = m_buffer->GetGPUVirtualAddress(); + + m_buffer_size = size; +} + +// Function returns true if current command list executed as a result of current command list +// referencing all of buffer's contents, AND we are already at max_size. No alternative but to +// flush. See comments above AllocateSpaceInBuffer for more details. +bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(unsigned int allocation_size) +{ + // This function will attempt to increase the size of the buffer, in response + // to running out of room. If the buffer is already at its maximum size specified + // at creation time, then stall waiting for the GPU to finish with the currently + // requested memory. + + // Four possibilities, in order of desirability. + // 1) Best - Update GPU tracking progress - maybe the GPU has made enough + // progress such that there is now room. + // 2) Enlarge GPU buffer, up to our max allowed size. + // 3) Stall until GPU finishes existing queued work/advances offset + // in buffer enough to free room. + // 4) Worst - flush current GPU commands and wait, which will free all room + // in buffer. + + // 1) First, let's check if GPU has already continued farther along buffer. If it has freed up + // enough of the buffer, we won't have to stall/allocate new memory. + + UpdateGPUProgress(); + + // Now that GPU progress is updated, do we have room in the queue? + if (AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(allocation_size)) + { + return false; + } + + // 2) Next, prefer increasing buffer size instead of stalling. + unsigned int new_size = std::min(static_cast(m_buffer_size * 1.5f), m_buffer_max_size); + new_size = std::max(new_size, allocation_size); + + // Can we grow buffer further? + if (new_size > m_buffer_size) + { + AllocateBuffer(new_size); + m_buffer_current_allocation_offset = 0; + m_buffer_offset = allocation_size; + + if (m_buffer_reallocation_notification != nullptr) + { + *m_buffer_reallocation_notification = true; + } + + return false; + } + + // 3) Bad case - we need to stall. + // This might be ok if we have > 2 frames queued up or something, but + // we don't want to be stalling as we generate the front-of-queue frame. + + const bool found_fence_to_wait_on = AttemptToFindExistingFenceToStallOn(allocation_size); + + if (found_fence_to_wait_on) + { + return false; + } + + // 4) If we get to this point, that means there is no outstanding queued GPU work, and we're still out of room. + // This is bad - and performance will suffer due to the CPU/GPU serialization, but the show must go on. + + // This is guaranteed to succeed, since we've already CHECK'd that the allocation_size <= max_buffer_size, and flushing now and waiting will + // free all space in buffer. + + D3D::command_list_mgr->ExecuteQueuedWork(true); + + m_buffer_offset = allocation_size; + m_buffer_current_allocation_offset = 0; + m_buffer_gpu_completion_offset = 0; + + return true; +} + +// Return true if space is found. +bool D3DStreamBuffer::AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(unsigned int allocation_size) +{ + // First, check if there is room at end of buffer. Fast path. + if (m_buffer_offset >= m_buffer_gpu_completion_offset) + { + if (m_buffer_offset + allocation_size <= m_buffer_size) + { + m_buffer_current_allocation_offset = m_buffer_offset; + m_buffer_offset += allocation_size; + return true; + } + + if (0 + allocation_size < m_buffer_gpu_completion_offset) + { + m_buffer_current_allocation_offset = 0; + m_buffer_offset = allocation_size; + return true; + } + } + + // Next, check if there is room at front of buffer. Fast path. + if (m_buffer_offset < m_buffer_gpu_completion_offset && m_buffer_offset + allocation_size < m_buffer_gpu_completion_offset) + { + m_buffer_current_allocation_offset = m_buffer_offset; + m_buffer_offset += allocation_size; + return true; + } + + return false; +} + +// Returns true if fence was found and waited on. +bool D3DStreamBuffer::AttemptToFindExistingFenceToStallOn(unsigned int allocation_size) +{ + // Let's find the first fence that will free up enough space in our buffer. + + UINT64 fence_value_required = 0; + unsigned int new_buffer_offset = 0; + + while (m_queued_fences.size() > 0) + { + FenceTrackingInformation tracking_information = m_queued_fences.front(); + m_queued_fences.pop(); + + if (m_buffer_offset >= m_buffer_gpu_completion_offset) + { + // At this point, we need to wrap around, so req'd gpu offset is allocation_size. + if (tracking_information.buffer_offset >= allocation_size) + { + fence_value_required = tracking_information.fence_value; + m_buffer_current_allocation_offset = 0; + m_buffer_offset = allocation_size; + break; + } + } + else + { + if (m_buffer_offset + allocation_size <= m_buffer_size) + { + if (tracking_information.buffer_offset >= m_buffer_offset + allocation_size) + { + fence_value_required = tracking_information.fence_value; + m_buffer_current_allocation_offset = m_buffer_offset; + m_buffer_offset = m_buffer_offset + allocation_size; + break; + } + } + else + { + if (tracking_information.buffer_offset >= allocation_size) + { + fence_value_required = tracking_information.fence_value; + m_buffer_current_allocation_offset = 0; + m_buffer_offset = allocation_size; + break; + } + } + } + } + + // Check if we found a fence we can wait on, for GPU to make sufficient progress. + // If so, wait on it. + if (fence_value_required > 0) + { + D3D::command_list_mgr->WaitOnCPUForFence(m_buffer_tracking_fence, fence_value_required); + return true; + } + + return false; +} + +void D3DStreamBuffer::UpdateGPUProgress() +{ + const UINT64 fence_value = m_buffer_tracking_fence->GetCompletedValue(); + + while (m_queued_fences.size() > 0) + { + FenceTrackingInformation tracking_information = m_queued_fences.front(); + m_queued_fences.pop(); + + // Has fence gone past this point? + if (fence_value > tracking_information.fence_value) + { + m_buffer_gpu_completion_offset = tracking_information.buffer_offset; + } + else + { + // Fences are stored in assending order, so once we hit a fence we haven't yet crossed on GPU, abort search. + break; + } + } +} + +void D3DStreamBuffer::QueueFenceCallback(void* owning_object, UINT64 fence_value) +{ + reinterpret_cast(owning_object)->QueueFence(fence_value); +} + +void D3DStreamBuffer::QueueFence(UINT64 fence_value) +{ + FenceTrackingInformation tracking_information = {}; + tracking_information.fence_value = fence_value; + tracking_information.buffer_offset = m_buffer_offset; + + m_queued_fences.push(tracking_information); +} + +ID3D12Resource* D3DStreamBuffer::GetBuffer() const +{ + return m_buffer; +} + +D3D12_GPU_VIRTUAL_ADDRESS D3DStreamBuffer::GetGPUAddressOfCurrentAllocation() const +{ + return m_buffer_gpu_address + m_buffer_current_allocation_offset; +} + +void* D3DStreamBuffer::GetCPUAddressOfCurrentAllocation() const +{ + return static_cast(m_buffer_cpu_address) + m_buffer_current_allocation_offset; +} + +unsigned int D3DStreamBuffer::GetOffsetOfCurrentAllocation() const +{ + return m_buffer_current_allocation_offset; +} + +unsigned int D3DStreamBuffer::GetSize() const +{ + return m_buffer_size; +} + +void* D3DStreamBuffer::GetBaseCPUAddress() const +{ + return m_buffer_cpu_address; +} + +D3D12_GPU_VIRTUAL_ADDRESS D3DStreamBuffer::GetBaseGPUAddress() const +{ + return m_buffer_gpu_address; +} + +} \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h new file mode 100644 index 0000000000..3eb04b7e36 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h @@ -0,0 +1,70 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +struct ID3D12Resource; + +namespace DX12 +{ + +class D3DStreamBuffer +{ +public: + D3DStreamBuffer(unsigned int initial_size, unsigned int max_size, bool* buffer_reallocation_notification); + ~D3DStreamBuffer(); + + bool AllocateSpaceInBuffer(unsigned int allocation_size, unsigned int alignment); + void OverrideSizeOfPreviousAllocation(unsigned int override_allocation_size); + + void* GetBaseCPUAddress() const; + D3D12_GPU_VIRTUAL_ADDRESS GetBaseGPUAddress() const; + ID3D12Resource* GetBuffer() const; + void* GetCPUAddressOfCurrentAllocation() const; + D3D12_GPU_VIRTUAL_ADDRESS GetGPUAddressOfCurrentAllocation() const; + unsigned int GetOffsetOfCurrentAllocation() const; + unsigned int GetSize() const; + + static void QueueFenceCallback(void* owning_object, UINT64 fence_value); + +private: + void AllocateBuffer(unsigned int size); + bool AttemptBufferResizeOrElseStall(unsigned int new_size); + + bool AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(unsigned int allocation_size); + + bool AttemptToFindExistingFenceToStallOn(unsigned int allocation_size); + + void UpdateGPUProgress(); + void QueueFence(UINT64 fence_value); + + struct FenceTrackingInformation + { + UINT64 fence_value; + unsigned int buffer_offset; + }; + + std::queue m_queued_fences; + + ID3D12Fence* m_buffer_tracking_fence = nullptr; + + ID3D12Resource* m_buffer = nullptr; + + void* m_buffer_cpu_address = nullptr; + D3D12_GPU_VIRTUAL_ADDRESS m_buffer_gpu_address = {}; + + unsigned int m_buffer_current_allocation_offset = 0; + unsigned int m_buffer_offset = 0; + unsigned int m_buffer_size = 0; + + const unsigned int m_buffer_max_size = 0; + + unsigned int m_buffer_gpu_completion_offset = 0; + + bool* m_buffer_reallocation_notification = nullptr; +}; + +} \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DTexture.cpp b/Source/Core/VideoBackends/D3D12/D3DTexture.cpp new file mode 100644 index 0000000000..9b56fa5e4a --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DTexture.cpp @@ -0,0 +1,275 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/CommonTypes.h" +#include "Common/MsgHandler.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DStreamBuffer.h" +#include "VideoBackends/D3D12/D3DTexture.h" +#include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/FramebufferManager.h" +#include "VideoBackends/D3D12/Render.h" + +namespace DX12 +{ + +namespace D3D +{ + +static D3DStreamBuffer* s_texture_upload_stream_buffer = nullptr; + +void CleanupPersistentD3DTextureResources() +{ + SAFE_DELETE(s_texture_upload_stream_buffer); +} + +void ReplaceRGBATexture2D(ID3D12Resource* texture12, const u8* buffer, unsigned int width, unsigned int height, unsigned int src_pitch, unsigned int level, D3D12_RESOURCE_STATES current_resource_state) +{ + const unsigned int upload_size = AlignValue(src_pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * height; + + if (!s_texture_upload_stream_buffer) + { + s_texture_upload_stream_buffer = new D3DStreamBuffer(4 * 1024 * 1024, 64 * 1024 * 1024, nullptr); + } + + bool current_command_list_executed = s_texture_upload_stream_buffer->AllocateSpaceInBuffer(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + if (current_command_list_executed) + { + g_renderer->SetViewport(); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + } + + ResourceBarrier(current_command_list, texture12, current_resource_state, D3D12_RESOURCE_STATE_COPY_DEST, level); + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT upload_footprint = {}; + u32 upload_rows = 0; + u64 upload_row_size_in_bytes = 0; + u64 upload_total_bytes = 0; + + D3D::device12->GetCopyableFootprints(&texture12->GetDesc(), level, 1, s_texture_upload_stream_buffer->GetOffsetOfCurrentAllocation(), &upload_footprint, &upload_rows, &upload_row_size_in_bytes, &upload_total_bytes); + + u8* dest_data = reinterpret_cast(s_texture_upload_stream_buffer->GetCPUAddressOfCurrentAllocation()); + const u8* src_data = reinterpret_cast(buffer); + for (u32 y = 0; y < upload_rows; ++y) + { + memcpy( + dest_data + upload_footprint.Footprint.RowPitch * y, + src_data + src_pitch * y, + upload_row_size_in_bytes + ); + } + + D3D::current_command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(texture12, level), 0, 0, 0, &CD3DX12_TEXTURE_COPY_LOCATION(s_texture_upload_stream_buffer->GetBuffer(), upload_footprint), nullptr); + + ResourceBarrier(D3D::current_command_list, texture12, D3D12_RESOURCE_STATE_COPY_DEST, current_resource_state, level); +} + +} // namespace + +D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, D3D11_USAGE usage, DXGI_FORMAT fmt, unsigned int levels, unsigned int slices, D3D12_SUBRESOURCE_DATA* data) +{ + ID3D12Resource* texture12 = nullptr; + + D3D12_RESOURCE_DESC texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D( + fmt, + width, + height, + slices, + levels + ); + + D3D12_CLEAR_VALUE optimized_clear_value = {}; + optimized_clear_value.Format = fmt; + + if (bind & D3D11_BIND_RENDER_TARGET) + { + texdesc12.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + optimized_clear_value.Color[0] = 0.0f; + optimized_clear_value.Color[1] = 0.0f; + optimized_clear_value.Color[2] = 0.0f; + optimized_clear_value.Color[3] = 1.0f; + } + + if (bind & D3D11_BIND_DEPTH_STENCIL) + { + texdesc12.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + optimized_clear_value.DepthStencil.Depth = 0.0f; + optimized_clear_value.DepthStencil.Stencil = 0; + } + + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC(texdesc12), + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + &optimized_clear_value, + IID_PPV_ARGS(&texture12) + ) + ); + + D3D::SetDebugObjectName12(texture12, "Texture created via D3DTexture2D::Create"); + D3DTexture2D* ret = new D3DTexture2D(texture12, bind, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + if (data) + { + DX12::D3D::ReplaceRGBATexture2D(texture12, reinterpret_cast(data->pData), width, height, static_cast(data->RowPitch), 0, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + + SAFE_RELEASE(texture12); + return ret; +} + +void D3DTexture2D::AddRef() +{ + m_ref.fetch_add(1); +} + +UINT D3DTexture2D::Release() +{ + // fetch_sub returns the value held before the subtraction. + if (m_ref.fetch_sub(1) == 1) + { + delete this; + return 0; + } + return m_ref.load(); +} + +D3D12_RESOURCE_STATES D3DTexture2D::GetResourceUsageState() const +{ + return m_resource_state; +} + +bool D3DTexture2D::GetMultisampled() const +{ + return m_multisampled; +} + +ID3D12Resource* D3DTexture2D::GetTex12() const +{ + return m_tex12; +} + +D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetSRV12CPU() const +{ + return m_srv12_cpu; +} + +D3D12_GPU_DESCRIPTOR_HANDLE D3DTexture2D::GetSRV12GPU() const +{ + return m_srv12_gpu; +} + +D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetSRV12GPUCPUShadow() const +{ + return m_srv12_gpu_cpu_shadow; +} + +D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetDSV12() const +{ + return m_dsv12; +} + +D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetRTV12() const +{ + return m_rtv12; +} + +D3DTexture2D::D3DTexture2D(ID3D12Resource* texptr, D3D11_BIND_FLAG bind, + DXGI_FORMAT srv_format, DXGI_FORMAT dsv_format, DXGI_FORMAT rtv_format, bool multisampled, D3D12_RESOURCE_STATES resource_state) + : m_tex12(texptr), m_resource_state(resource_state), m_multisampled(multisampled) +{ + D3D12_SRV_DIMENSION srv_dim12 = multisampled ? D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY : D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + D3D12_DSV_DIMENSION dsv_dim12 = multisampled ? D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY : D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + D3D12_RTV_DIMENSION rtv_dim12 = multisampled ? D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY : D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + + if (bind & D3D11_BIND_SHADER_RESOURCE) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { + srv_format, // DXGI_FORMAT Format + srv_dim12 // D3D12_SRV_DIMENSION ViewDimension + }; + + if (srv_dim12 == D3D12_SRV_DIMENSION_TEXTURE2DARRAY) + { + srv_desc.Texture2DArray.MipLevels = -1; + srv_desc.Texture2DArray.MostDetailedMip = 0; + srv_desc.Texture2DArray.ResourceMinLODClamp = 0; + srv_desc.Texture2DArray.ArraySize = -1; + } + else + { + srv_desc.Texture2DMSArray.ArraySize = -1; + } + + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + CHECK(D3D::gpu_descriptor_heap_mgr->Allocate(&m_srv12_cpu, &m_srv12_gpu, &m_srv12_gpu_cpu_shadow), "Error: Ran out of permenant slots in GPU descriptor heap, but don't support rolling over heap."); + + D3D::device12->CreateShaderResourceView(m_tex12, &srv_desc, m_srv12_cpu); + D3D::device12->CreateShaderResourceView(m_tex12, &srv_desc, m_srv12_gpu_cpu_shadow); + } + + if (bind & D3D11_BIND_DEPTH_STENCIL) + { + D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = { + dsv_format, // DXGI_FORMAT Format + dsv_dim12, // D3D12_DSV_DIMENSION + D3D12_DSV_FLAG_NONE // D3D12_DSV_FLAG Flags + }; + + if (dsv_dim12 == D3D12_DSV_DIMENSION_TEXTURE2DARRAY) + dsv_desc.Texture2DArray.ArraySize = -1; + else + dsv_desc.Texture2DMSArray.ArraySize = -1; + + D3D::dsv_descriptor_heap_mgr->Allocate(&m_dsv12); + D3D::device12->CreateDepthStencilView(m_tex12, &dsv_desc, m_dsv12); + } + + if (bind & D3D11_BIND_RENDER_TARGET) + { + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = { + rtv_format, // DXGI_FORMAT Format + rtv_dim12 // D3D12_RTV_DIMENSION ViewDimension + }; + + if (rtv_dim12 == D3D12_RTV_DIMENSION_TEXTURE2DARRAY) + rtv_desc.Texture2DArray.ArraySize = -1; + else + rtv_desc.Texture2DMSArray.ArraySize = -1; + + D3D::rtv_descriptor_heap_mgr->Allocate(&m_rtv12); + D3D::device12->CreateRenderTargetView(m_tex12, &rtv_desc, m_rtv12); + } + + m_tex12->AddRef(); +} + +void D3DTexture2D::TransitionToResourceState(ID3D12GraphicsCommandList* command_list, D3D12_RESOURCE_STATES state_after) +{ + DX12::D3D::ResourceBarrier(command_list, m_tex12, m_resource_state, state_after, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + m_resource_state = state_after; +} + +D3DTexture2D::~D3DTexture2D() +{ + DX12::D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_tex12); + + if (m_srv12_cpu.ptr) + { + D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {}; + null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + + null_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + DX12::D3D::device12->CreateShaderResourceView(NULL, &null_srv_desc, m_srv12_cpu); + } +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DTexture.h b/Source/Core/VideoBackends/D3D12/D3DTexture.h new file mode 100644 index 0000000000..08c1f38a1d --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DTexture.h @@ -0,0 +1,66 @@ +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +namespace DX12 +{ + +namespace D3D +{ + void ReplaceRGBATexture2D(ID3D12Resource* pTexture, const u8* buffer, unsigned int width, unsigned int height, unsigned int src_pitch, unsigned int level, D3D12_RESOURCE_STATES current_resource_state = D3D12_RESOURCE_STATE_COMMON); + void CleanupPersistentD3DTextureResources(); +} + +class D3DTexture2D +{ + +public: + // there are two ways to create a D3DTexture2D object: + // either create an ID3D12Resource object, pass it to the constructor and specify what views to create + // or let the texture automatically be created by D3DTexture2D::Create + + D3DTexture2D(ID3D12Resource* texptr, D3D11_BIND_FLAG bind, DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN, DXGI_FORMAT dsv_format = DXGI_FORMAT_UNKNOWN, DXGI_FORMAT rtv_format = DXGI_FORMAT_UNKNOWN, bool multisampled = false, D3D12_RESOURCE_STATES resource_state = D3D12_RESOURCE_STATE_COMMON); + static D3DTexture2D* Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, D3D11_USAGE usage, DXGI_FORMAT, unsigned int levels = 1, unsigned int slices = 1, D3D12_SUBRESOURCE_DATA* data = nullptr); + void TransitionToResourceState(ID3D12GraphicsCommandList* command_list, D3D12_RESOURCE_STATES state_after); + + // reference counting, use AddRef() when creating a new reference and Release() it when you don't need it anymore + void AddRef(); + UINT Release(); + + ID3D12Resource* GetTex12() const; + + D3D12_CPU_DESCRIPTOR_HANDLE GetSRV12CPU() const; + D3D12_GPU_DESCRIPTOR_HANDLE GetSRV12GPU() const; + D3D12_CPU_DESCRIPTOR_HANDLE GetSRV12GPUCPUShadow() const; + D3D12_CPU_DESCRIPTOR_HANDLE GetDSV12() const; + D3D12_CPU_DESCRIPTOR_HANDLE GetRTV12() const; + + D3D12_RESOURCE_STATES GetResourceUsageState() const; + + bool GetMultisampled() const; + +private: + ~D3DTexture2D(); + + ID3D12Resource* m_tex12 = nullptr; + + D3D12_CPU_DESCRIPTOR_HANDLE m_srv12_cpu = {}; + D3D12_GPU_DESCRIPTOR_HANDLE m_srv12_gpu = {}; + D3D12_CPU_DESCRIPTOR_HANDLE m_srv12_gpu_cpu_shadow = {}; + + D3D12_CPU_DESCRIPTOR_HANDLE m_dsv12 = {}; + D3D12_CPU_DESCRIPTOR_HANDLE m_rtv12 = {}; + + D3D12_RESOURCE_STATES m_resource_state = D3D12_RESOURCE_STATE_COMMON; + + bool m_multisampled = false; + + std::atomic m_ref = 1; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp new file mode 100644 index 0000000000..df009e8cd5 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp @@ -0,0 +1,1001 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include +#include + +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DShader.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DStreamBuffer.h" +#include "VideoBackends/D3D12/D3DTexture.h" +#include "VideoBackends/D3D12/D3DUtil.h" + +#include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" + +namespace DX12 +{ + +namespace D3D +{ + +unsigned int AlignValue(unsigned int value, unsigned int alignment) +{ + return (value + (alignment - 1)) & ~(alignment - 1); +} + +void ResourceBarrier(ID3D12GraphicsCommandList* command_list, ID3D12Resource* resource, D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after, UINT subresource) +{ + if (state_before == state_after) + return; + + CHECK(resource, "NULL resource passed to ResourceBarrier."); + + D3D12_RESOURCE_BARRIER resourceBarrierDesc = { + D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, // D3D12_RESOURCE_TRANSITION_BARRIER_DESC Transition + D3D12_RESOURCE_BARRIER_FLAG_NONE, // D3D12_RESOURCE_BARRIER_FLAGS Flags + + // D3D12_RESOURCE_TRANSITION_BARRIER_DESC Transition + { + resource, // ID3D12Resource *pResource; + subresource, // UINT Subresource; + state_before, // UINT StateBefore; + state_after // UINT StateAfter; + } + }; + + command_list->ResourceBarrier(1, &resourceBarrierDesc); +} + +// Ring buffer class, shared between the draw* functions +class UtilVertexBuffer +{ +public: + explicit UtilVertexBuffer(int size) + { + m_stream_buffer = new D3DStreamBuffer(size, size * 4, nullptr); + } + ~UtilVertexBuffer() + { + SAFE_DELETE(m_stream_buffer); + } + + unsigned int GetSize() const { return m_stream_buffer->GetSize(); } + + // returns vertex offset to the new data + int AppendData(void* data, int size, int vertex_size) + { + m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size); + + memcpy(static_cast(m_stream_buffer->GetCPUAddressOfCurrentAllocation()), data, size); + + return m_stream_buffer->GetOffsetOfCurrentAllocation() / vertex_size; + } + + int BeginAppendData(void** write_ptr, int size, int vertex_size) + { + m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size); + + *write_ptr = m_stream_buffer->GetCPUAddressOfCurrentAllocation(); + + return m_stream_buffer->GetOffsetOfCurrentAllocation() / vertex_size; + } + + void EndAppendData() + { + // No-op on DX12. + } + + ID3D12Resource* GetBuffer12() + { + return m_stream_buffer->GetBuffer(); + } + +private: + D3DStreamBuffer* m_stream_buffer = nullptr; +}; + +CD3DFont font; +UtilVertexBuffer* util_vbuf_stq = nullptr; +UtilVertexBuffer* util_vbuf_cq = nullptr; +UtilVertexBuffer* util_vbuf_clearq = nullptr; +UtilVertexBuffer* util_vbuf_efbpokequads = nullptr; + +static const unsigned int s_max_num_vertices = 8000 * 6; + +struct FONT2DVERTEX +{ + float x, y, z; + float col[4]; + float tu, tv; +}; + +FONT2DVERTEX InitFont2DVertex(float x, float y, u32 color, float tu, float tv) +{ + FONT2DVERTEX v; v.x=x; v.y=y; v.z=0; v.tu = tu; v.tv = tv; + v.col[0] = (static_cast((color >> 16) & 0xFF)) / 255.f; + v.col[1] = (static_cast((color >> 8) & 0xFF)) / 255.f; + v.col[2] = (static_cast((color >> 0) & 0xFF)) / 255.f; + v.col[3] = (static_cast((color >> 24) & 0xFF)) / 255.f; + return v; +} + +CD3DFont::CD3DFont() +{ +} + +constexpr const char fontpixshader[] = { + "Texture2D tex2D;\n" + "SamplerState linearSampler\n" + "{\n" + " Filter = MIN_MAG_MIP_LINEAR;\n" + " AddressU = D3D11_TEXTURE_ADDRESS_BORDER;\n" + " AddressV = D3D11_TEXTURE_ADDRESS_BORDER;\n" + " BorderColor = float4(0.f, 0.f, 0.f, 0.f);\n" + "};\n" + "struct PS_INPUT\n" + "{\n" + " float4 pos : SV_POSITION;\n" + " float4 col : COLOR;\n" + " float2 tex : TEXCOORD;\n" + "};\n" + "float4 main( PS_INPUT input ) : SV_Target\n" + "{\n" + " return tex2D.Sample( linearSampler, input.tex ) * input.col;\n" + "};\n" +}; + +constexpr const char fontvertshader[] = { + "struct VS_INPUT\n" + "{\n" + " float4 pos : POSITION;\n" + " float4 col : COLOR;\n" + " float2 tex : TEXCOORD;\n" + "};\n" + "struct PS_INPUT\n" + "{\n" + " float4 pos : SV_POSITION;\n" + " float4 col : COLOR;\n" + " float2 tex : TEXCOORD;\n" + "};\n" + "PS_INPUT main( VS_INPUT input )\n" + "{\n" + " PS_INPUT output;\n" + " output.pos = input.pos;\n" + " output.col = input.col;\n" + " output.tex = input.tex;\n" + " return output;\n" + "};\n" +}; + +int CD3DFont::Init() +{ + // Create vertex buffer for the letters + + // Prepare to create a bitmap + unsigned int* bitmap_bits; + BITMAPINFO bmi; + ZeroMemory(&bmi.bmiHeader, sizeof(BITMAPINFOHEADER)); + bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + bmi.bmiHeader.biWidth = static_cast(m_tex_width); + bmi.bmiHeader.biHeight = -static_cast(m_tex_height); + bmi.bmiHeader.biPlanes = 1; + bmi.bmiHeader.biCompression = BI_RGB; + bmi.bmiHeader.biBitCount = 32; + + // Create a DC and a bitmap for the font + HDC hDC = CreateCompatibleDC(nullptr); + HBITMAP hbmBitmap = CreateDIBSection(hDC, &bmi, DIB_RGB_COLORS, reinterpret_cast(&bitmap_bits), nullptr, 0); + SetMapMode(hDC, MM_TEXT); + + // create a GDI font + HFONT hFont = CreateFont(24, 0, 0, 0, FW_NORMAL, FALSE, + FALSE, FALSE, DEFAULT_CHARSET, OUT_DEFAULT_PRECIS, + CLIP_DEFAULT_PRECIS, PROOF_QUALITY, + VARIABLE_PITCH, _T("Tahoma")); + + if (nullptr == hFont) + return E_FAIL; + + HGDIOBJ hOldbmBitmap = SelectObject(hDC, hbmBitmap); + HGDIOBJ hOldFont = SelectObject(hDC, hFont); + + // Set text properties + SetTextColor(hDC, 0xFFFFFF); + SetBkColor (hDC, 0); + SetTextAlign(hDC, TA_TOP); + + TEXTMETRICW tm; + GetTextMetricsW(hDC, &tm); + m_line_height = tm.tmHeight; + + // Loop through all printable characters and output them to the bitmap + // Meanwhile, keep track of the corresponding tex coords for each character. + int x = 0, y = 0; + char str[2] = "\0"; + for (int c = 0; c < 127 - 32; c++) + { + str[0] = c + 32; + SIZE size; + GetTextExtentPoint32A(hDC, str, 1, &size); + if (static_cast(x + size.cx + 1) > m_tex_width) + { + x = 0; + y += m_line_height; + } + + ExtTextOutA(hDC, x+1, y+0, ETO_OPAQUE | ETO_CLIPPED, nullptr, str, 1, nullptr); + m_tex_coords[c][0] = (static_cast(x + 0))/m_tex_width; + m_tex_coords[c][1] = (static_cast(y + 0))/m_tex_height; + m_tex_coords[c][2] = (static_cast(x + 0 + size.cx))/m_tex_width; + m_tex_coords[c][3] = (static_cast(y + 0 + size.cy))/m_tex_height; + + x += size.cx + 3; // 3 to work around annoying ij conflict (part of the j ends up with the i) + } + + // Create a new texture for the font + // possible optimization: store the converted data in a buffer and fill the texture on creation. + // That way, we can use a static texture + std::unique_ptr tex_initial_data(new byte[m_tex_width * m_tex_height * 4]); + + for (y = 0; y < m_tex_height; y++) + { + u32* pDst32 = reinterpret_cast(static_cast(tex_initial_data.get()) + y * m_tex_width * 4); + for (x = 0; x < m_tex_width; x++) + { + const u8 bAlpha = (bitmap_bits[m_tex_width * y + x] & 0xff); + + *pDst32++ = (((bAlpha << 4) | bAlpha) << 24) | 0xFFFFFF; + } + } + + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_tex_width, m_tex_height, 1, 1), + D3D12_RESOURCE_STATE_COMMON, + nullptr, + IID_PPV_ARGS(&m_texture12) + ) + ); + + D3D::SetDebugObjectName12(m_texture12, "texture of a CD3DFont object"); + + ID3D12Resource* temporaryFontTextureUploadBuffer; + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(AlignValue(m_tex_width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * m_tex_height), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&temporaryFontTextureUploadBuffer) + ) + ); + + D3D12_SUBRESOURCE_DATA subresource_data_dest = { + tex_initial_data.get(), // const void *pData; + m_tex_width * 4, // LONG_PTR RowPitch; + 0 // LONG_PTR SlicePitch; + }; + + D3D::ResourceBarrier(D3D::current_command_list, m_texture12, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + + CHECK(0 != UpdateSubresources(D3D::current_command_list, m_texture12, temporaryFontTextureUploadBuffer, 0, 0, 1, &subresource_data_dest), "UpdateSubresources call failed."); + + command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(temporaryFontTextureUploadBuffer); + + tex_initial_data.release(); + + D3D::gpu_descriptor_heap_mgr->Allocate(&m_texture12_cpu, &m_texture12_gpu); + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; + srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv_desc.Texture2D.MipLevels = -1; + + D3D::device12->CreateShaderResourceView(m_texture12, &srv_desc, m_texture12_cpu); + + D3D::ResourceBarrier(D3D::current_command_list, m_texture12, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + + SelectObject(hDC, hOldbmBitmap); + DeleteObject(hbmBitmap); + + SelectObject(hDC, hOldFont); + DeleteObject(hFont); + + // setup device objects for drawing + ID3DBlob* psbytecode = nullptr; + D3D::CompilePixelShader(fontpixshader, &psbytecode); + if (psbytecode == nullptr) + PanicAlert("Failed to compile pixel shader, %s %d\n", __FILE__, __LINE__); + + m_pshader12.pShaderBytecode = psbytecode->GetBufferPointer(); + m_pshader12.BytecodeLength = psbytecode->GetBufferSize(); + + ID3DBlob* vsbytecode = nullptr; + D3D::CompileVertexShader(fontvertshader, &vsbytecode); + if (vsbytecode == nullptr) + PanicAlert("Failed to compile vertex shader, %s %d\n", __FILE__, __LINE__); + + m_vshader12.pShaderBytecode = vsbytecode->GetBufferPointer(); + m_vshader12.BytecodeLength = vsbytecode->GetBufferSize(); + + const D3D12_INPUT_ELEMENT_DESC desc[] = + { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 28, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + }; + + m_input_layout12.NumElements = ARRAYSIZE(desc); + m_input_layout12.pInputElementDescs = desc; + + D3D12_BLEND_DESC blenddesc = {}; + blenddesc.AlphaToCoverageEnable = FALSE; + blenddesc.IndependentBlendEnable = FALSE; + blenddesc.RenderTarget[0].BlendEnable = TRUE; + blenddesc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + blenddesc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; + blenddesc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; + blenddesc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + blenddesc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA; + blenddesc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; + blenddesc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + blenddesc.RenderTarget[0].LogicOp = D3D12_LOGIC_OP_NOOP; + blenddesc.RenderTarget[0].LogicOpEnable = FALSE; + m_blendstate12 = blenddesc; + + D3D12_RASTERIZER_DESC rastdesc = { D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false, 0, 0.f, 0.f, false, false, false, false }; + m_raststate12 = rastdesc; + + const unsigned int text_vb_size = s_max_num_vertices * sizeof(FONT2DVERTEX); + + CheckHR( + device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(text_vb_size), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_vb12) + ) + ); + + SetDebugObjectName12(m_vb12, "vertex buffer of a CD3DFont object"); + + m_vb12_view.BufferLocation = m_vb12->GetGPUVirtualAddress(); + m_vb12_view.SizeInBytes = text_vb_size; + m_vb12_view.StrideInBytes = sizeof(FONT2DVERTEX); + + CheckHR(m_vb12->Map(0, nullptr, &m_vb12_data)); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC text_pso_desc = { + default_root_signature, // ID3D12RootSignature *pRootSignature; + { vsbytecode->GetBufferPointer(), vsbytecode->GetBufferSize() }, // D3D12_SHADER_BYTECODE VS; + { psbytecode->GetBufferPointer(), psbytecode->GetBufferSize() }, // D3D12_SHADER_BYTECODE PS; + {}, // D3D12_SHADER_BYTECODE DS; + {}, // D3D12_SHADER_BYTECODE HS; + {}, // D3D12_SHADER_BYTECODE GS; + {}, // D3D12_STREAM_OUTPUT_DESC StreamOutput + blenddesc, // D3D12_BLEND_DESC BlendState; + UINT_MAX, // UINT SampleMask; + rastdesc, // D3D12_RASTERIZER_DESC RasterizerState + CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT), // D3D12_DEPTH_STENCIL_DESC DepthStencilState + m_input_layout12, // D3D12_INPUT_LAYOUT_DESC InputLayout + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IndexBufferProperties + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType + 1, // UINT NumRenderTargets + { DXGI_FORMAT_R8G8B8A8_UNORM }, // DXGI_FORMAT RTVFormats[8] + DXGI_FORMAT_UNKNOWN, // DXGI_FORMAT DSVFormat + { 1 /* UINT Count */, 0 /* UINT Quality */ } // DXGI_SAMPLE_DESC SampleDesc + }; + + CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&text_pso_desc, &m_pso)); + + SAFE_RELEASE(psbytecode); + SAFE_RELEASE(vsbytecode); + + return S_OK; +} + +int CD3DFont::Shutdown() +{ + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_vb12); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_texture12); + + return S_OK; +} + +int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dwColor, const std::string& text) +{ + if (!m_vb12) + return 0; + + float scale_x = 1 / static_cast(D3D::GetBackBufferWidth()) * 2.f; + float scale_y = 1 / static_cast(D3D::GetBackBufferHeight()) * 2.f; + float sizeratio = size / static_cast(m_line_height); + + // translate starting positions + float sx = x * scale_x - 1.f; + float sy = 1.f - y * scale_y; + + // Fill vertex buffer + FONT2DVERTEX* vertices12 = static_cast(m_vb12_data) + m_vb12_offset / sizeof(FONT2DVERTEX); + int num_triangles = 0L; + + // set general pipeline state + D3D::current_command_list->SetPipelineState(m_pso); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); + + D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, m_texture12_gpu); + + // If we are close to running off edge of vertex buffer, jump back to beginning. + if (m_vb12_offset + text.length() * 6 * sizeof(FONT2DVERTEX) >= s_max_num_vertices * sizeof(FONT2DVERTEX)) + { + m_vb12_offset = 0; + vertices12 = static_cast(m_vb12_data); + } + + float start_x = sx; + for (char c : text) + { + if (c == '\n') + { + sx = start_x; + sy -= scale_y * size; + } + if (!std::isprint(c)) + continue; + + c -= 32; + float tx1 = m_tex_coords[c][0]; + float ty1 = m_tex_coords[c][1]; + float tx2 = m_tex_coords[c][2]; + float ty2 = m_tex_coords[c][3]; + + float w = static_cast(tx2 - tx1) * m_tex_width * scale_x * sizeratio; + float h = static_cast(ty1 - ty2) * m_tex_height * scale_y * sizeratio; + + FONT2DVERTEX v[6]; + v[0] = InitFont2DVertex(sx, sy + h, dwColor, tx1, ty2); + v[1] = InitFont2DVertex(sx, sy, dwColor, tx1, ty1); + v[2] = InitFont2DVertex(sx + w, sy + h, dwColor, tx2, ty2); + v[3] = InitFont2DVertex(sx + w, sy, dwColor, tx2, ty1); + v[4] = v[2]; + v[5] = v[1]; + + memcpy(vertices12, v, 6 * sizeof(FONT2DVERTEX)); + vertices12 += 6; + + num_triangles += 2; + + sx += w + spacing * scale_x * size; + } + + // Render the vertex buffer + if (num_triangles > 0) + { + D3D::current_command_list->IASetVertexBuffers(0, 1, &m_vb12_view); + + D3D::current_command_list->DrawInstanced(3 * num_triangles, 1, m_vb12_offset / sizeof(FONT2DVERTEX), 0); + } + + m_vb12_offset += 3 * num_triangles * sizeof(FONT2DVERTEX); + + return S_OK; +} + +D3D12_CPU_DESCRIPTOR_HANDLE linear_copy_sampler12CPU; +D3D12_GPU_DESCRIPTOR_HANDLE linear_copy_sampler12GPU; +D3D12_CPU_DESCRIPTOR_HANDLE point_copy_sampler12CPU; +D3D12_GPU_DESCRIPTOR_HANDLE point_copy_sampler12GPU; + +struct STQVertex +{ + float x, y, z, u, v, w, g; +}; +struct ClearVertex +{ + float x, y, z; + u32 col; +}; + +struct ColVertex +{ + float x, y, z; + u32 col; +}; + +struct +{ + float u1, v1, u2, v2, S, G; +} tex_quad_data; + +struct +{ + float x1, y1, x2, y2, z; + u32 col; +} draw_quad_data; + +struct +{ + u32 col; + float z; +} clear_quad_data; + +// ring buffer offsets +int stq_offset; +int cq_offset; +int clearq_offset; + +void InitUtils() +{ + util_vbuf_stq = new UtilVertexBuffer(0x10000); + util_vbuf_cq = new UtilVertexBuffer(0x10000); + util_vbuf_clearq = new UtilVertexBuffer(0x10000); + util_vbuf_efbpokequads = new UtilVertexBuffer(0x100000); + + D3D12_SAMPLER_DESC point_sampler_desc = { + D3D12_FILTER_MIN_MAG_MIP_POINT, + D3D12_TEXTURE_ADDRESS_MODE_BORDER, + D3D12_TEXTURE_ADDRESS_MODE_BORDER, + D3D12_TEXTURE_ADDRESS_MODE_BORDER, + 0.f, + 1, + D3D12_COMPARISON_FUNC_ALWAYS, + { 0.f, 0.f, 0.f, 0.f }, + 0.f, + 0.f + }; + + D3D::sampler_descriptor_heap_mgr->Allocate(&point_copy_sampler12CPU, &point_copy_sampler12GPU); + D3D::device12->CreateSampler(&point_sampler_desc, point_copy_sampler12CPU); + + D3D12_SAMPLER_DESC linear_sampler_desc = { + D3D12_FILTER_MIN_MAG_MIP_LINEAR, + D3D12_TEXTURE_ADDRESS_MODE_BORDER, + D3D12_TEXTURE_ADDRESS_MODE_BORDER, + D3D12_TEXTURE_ADDRESS_MODE_BORDER, + 0.f, + 1, + D3D12_COMPARISON_FUNC_ALWAYS, + { 0.f, 0.f, 0.f, 0.f }, + 0.f, + 0.f + }; + + D3D::sampler_descriptor_heap_mgr->Allocate(&linear_copy_sampler12CPU, &linear_copy_sampler12GPU); + D3D::device12->CreateSampler(&linear_sampler_desc, linear_copy_sampler12CPU); + + // cached data used to avoid unnecessarily reloading the vertex buffers + memset(&tex_quad_data, 0, sizeof(tex_quad_data)); + memset(&draw_quad_data, 0, sizeof(draw_quad_data)); + memset(&clear_quad_data, 0, sizeof(clear_quad_data)); + + font.Init(); +} + +void ShutdownUtils() +{ + font.Shutdown(); + + SAFE_DELETE(util_vbuf_stq); + SAFE_DELETE(util_vbuf_cq); + SAFE_DELETE(util_vbuf_clearq); + SAFE_DELETE(util_vbuf_efbpokequads); +} + +void SetPointCopySampler() +{ + D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SAMPLER, point_copy_sampler12GPU); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true); +} + +void SetLinearCopySampler() +{ + D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SAMPLER, linear_copy_sampler12GPU); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true); +} + +void DrawShadedTexQuad(D3DTexture2D* texture, + const D3D12_RECT* rSource, + int source_width, + int source_height, + D3D12_SHADER_BYTECODE pshader12, + D3D12_SHADER_BYTECODE vshader12, + D3D12_INPUT_LAYOUT_DESC layout12, + D3D12_SHADER_BYTECODE gshader12, + float gamma, + u32 slice, + DXGI_FORMAT rt_format, + bool inherit_srv_binding, + bool rt_multisampled, + D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc_override + ) +{ + float sw = 1.0f / static_cast(source_width); + float sh = 1.0f / static_cast(source_height); + float u1 = static_cast(rSource->left) * sw; + float u2 = static_cast(rSource->right) * sw; + float v1 = static_cast(rSource->top) * sh; + float v2 = static_cast(rSource->bottom) * sh; + float S = static_cast(slice); + float G = 1.0f / gamma; + + STQVertex coords[4] = { + { -1.0f, 1.0f, 0.0f, u1, v1, S, G }, + { 1.0f, 1.0f, 0.0f, u2, v1, S, G }, + { -1.0f, -1.0f, 0.0f, u1, v2, S, G }, + { 1.0f, -1.0f, 0.0f, u2, v2, S, G }, + }; + + // only upload the data to VRAM if it changed + if (tex_quad_data.u1 != u1 || tex_quad_data.v1 != v1 || + tex_quad_data.u2 != u2 || tex_quad_data.v2 != v2 || + tex_quad_data.S != S || tex_quad_data.G != G) + { + stq_offset = util_vbuf_stq->AppendData(coords, sizeof(coords), sizeof(STQVertex)); + + tex_quad_data.u1 = u1; + tex_quad_data.v1 = v1; + tex_quad_data.u2 = u2; + tex_quad_data.v2 = v2; + tex_quad_data.S = S; + tex_quad_data.G = G; + } + + D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + D3D12_VERTEX_BUFFER_VIEW vb_view = { + util_vbuf_stq->GetBuffer12()->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; + util_vbuf_stq->GetSize(), // UINT SizeInBytes; This is the size of the entire buffer, not just the size of the vertex data for one draw call, since the offsetting is done in the draw call itself. + sizeof(STQVertex) // UINT StrideInBytes; + }; + + D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true); + + if (!inherit_srv_binding) + { + texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, texture->GetSRV12GPU()); + } + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { + default_root_signature, // ID3D12RootSignature *pRootSignature; + vshader12, // D3D12_SHADER_BYTECODE VS; + pshader12, // D3D12_SHADER_BYTECODE PS; + {}, // D3D12_SHADER_BYTECODE DS; + {}, // D3D12_SHADER_BYTECODE HS; + gshader12, // D3D12_SHADER_BYTECODE GS; + {}, // D3D12_STREAM_OUTPUT_DESC StreamOutput + Renderer::GetResetBlendDesc(), // D3D12_BLEND_DESC BlendState; + UINT_MAX, // UINT SampleMask; + Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState + depth_stencil_desc_override ? + *depth_stencil_desc_override : + Renderer::GetResetDepthStencilDesc(), // D3D12_DEPTH_STENCIL_DESC DepthStencilState + layout12, // D3D12_INPUT_LAYOUT_DESC InputLayout + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES IndexBufferProperties + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType + 1, // UINT NumRenderTargets + { rt_format }, // DXGI_FORMAT RTVFormats[8] + DXGI_FORMAT_D32_FLOAT, // DXGI_FORMAT DSVFormat + { 1 /* UINT Count */, 0 /* UINT Quality */ } // DXGI_SAMPLE_DESC SampleDesc + }; + + if (rt_multisampled) + { + pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples; + } + + ID3D12PipelineState* pso = nullptr; + CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&pso_desc, &pso)); + + D3D::current_command_list->SetPipelineState(pso); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); + + // In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled. + // Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid + // dirtying state. + + // 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072 + D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); + + D3D::current_command_list->DrawInstanced(4, 1, stq_offset, 0); + + g_renderer->RestoreAPIState(); +} + +// Fills a certain area of the current render target with the specified color +// destination coordinates normalized to (-1;1) +void DrawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled) +{ + ColVertex coords[4] = { + { x1, y2, z, Color }, + { x2, y2, z, Color }, + { x1, y1, z, Color }, + { x2, y1, z, Color }, + }; + + if (draw_quad_data.x1 != x1 || draw_quad_data.y1 != y1 || + draw_quad_data.x2 != x2 || draw_quad_data.y2 != y2 || + draw_quad_data.col != Color || draw_quad_data.z != z) + { + cq_offset = util_vbuf_cq->AppendData(coords, sizeof(coords), sizeof(ColVertex)); + + draw_quad_data.x1 = x1; + draw_quad_data.y1 = y1; + draw_quad_data.x2 = x2; + draw_quad_data.y2 = y2; + draw_quad_data.col = Color; + draw_quad_data.z = z; + } + + D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + D3D12_VERTEX_BUFFER_VIEW vb_view = { + util_vbuf_cq->GetBuffer12()->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; + util_vbuf_cq->GetSize(), // UINT SizeInBytes; This is the size of the entire buffer, not just the size of the vertex data for one draw call, since the offsetting is done in the draw call itself. + sizeof(ColVertex) // UINT StrideInBytes; + }; + + D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { + default_root_signature, // ID3D12RootSignature *pRootSignature; + StaticShaderCache::GetClearVertexShader(), // D3D12_SHADER_BYTECODE VS; + StaticShaderCache::GetClearPixelShader(), // D3D12_SHADER_BYTECODE PS; + {}, // D3D12_SHADER_BYTECODE DS; + {}, // D3D12_SHADER_BYTECODE HS; + StaticShaderCache::GetClearGeometryShader(), // D3D12_SHADER_BYTECODE GS; + {}, // D3D12_STREAM_OUTPUT_DESC StreamOutput + *blend_desc, // D3D12_BLEND_DESC BlendState; + UINT_MAX, // UINT SampleMask; + Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState + *depth_stencil_desc, // D3D12_DEPTH_STENCIL_DESC DepthStencilState + StaticShaderCache::GetClearVertexShaderInputLayout(), // D3D12_INPUT_LAYOUT_DESC InputLayout + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES IndexBufferProperties + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType + 1, // UINT NumRenderTargets + { DXGI_FORMAT_R8G8B8A8_UNORM }, // DXGI_FORMAT RTVFormats[8] + DXGI_FORMAT_D32_FLOAT, // DXGI_FORMAT DSVFormat + { 1 /* UINT Count */, 0 /* UINT Quality */ } // DXGI_SAMPLE_DESC SampleDesc + }; + + if (rt_multisampled) + { + pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples; + } + + ID3D12PipelineState* pso = nullptr; + CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&pso_desc, &pso)); + + D3D::current_command_list->SetPipelineState(pso); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); + + // In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled. + // Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid + // dirtying state. + + // 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072 + D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); + + D3D::current_command_list->DrawInstanced(4, 1, cq_offset, 0); + + g_renderer->RestoreAPIState(); +} + +void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled) +{ + ClearVertex coords[4] = { + {-1.0f, 1.0f, z, Color}, + { 1.0f, 1.0f, z, Color}, + {-1.0f, -1.0f, z, Color}, + { 1.0f, -1.0f, z, Color}, + }; + + if (clear_quad_data.col != Color || clear_quad_data.z != z) + { + clearq_offset = util_vbuf_clearq->AppendData(coords, sizeof(coords), sizeof(ClearVertex)); + + clear_quad_data.col = Color; + clear_quad_data.z = z; + } + + D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + D3D12_VERTEX_BUFFER_VIEW vb_view = { + util_vbuf_clearq->GetBuffer12()->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; + util_vbuf_clearq->GetSize(), // UINT SizeInBytes; This is the size of the entire buffer, not just the size of the vertex data for one draw call, since the offsetting is done in the draw call itself. + sizeof(ClearVertex) // UINT StrideInBytes; + }; + + D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { + default_root_signature, // ID3D12RootSignature *pRootSignature; + StaticShaderCache::GetClearVertexShader(), // D3D12_SHADER_BYTECODE VS; + StaticShaderCache::GetClearPixelShader(), // D3D12_SHADER_BYTECODE PS; + {}, // D3D12_SHADER_BYTECODE DS; + {}, // D3D12_SHADER_BYTECODE HS; + g_ActiveConfig.iStereoMode > 0 ? + StaticShaderCache::GetClearGeometryShader() : + D3D12_SHADER_BYTECODE(), // D3D12_SHADER_BYTECODE GS; + {}, // D3D12_STREAM_OUTPUT_DESC StreamOutput + *blend_desc, // D3D12_BLEND_DESC BlendState; + UINT_MAX, // UINT SampleMask; + Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState + *depth_stencil_desc, // D3D12_DEPTH_STENCIL_DESC DepthStencilState + StaticShaderCache::GetClearVertexShaderInputLayout(), // D3D12_INPUT_LAYOUT_DESC InputLayout + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES IndexBufferProperties + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType + 1, // UINT NumRenderTargets + { DXGI_FORMAT_R8G8B8A8_UNORM }, // DXGI_FORMAT RTVFormats[8] + DXGI_FORMAT_D32_FLOAT, // DXGI_FORMAT DSVFormat + { 1 /* UINT Count */, 0 /* UINT Quality */ } // DXGI_SAMPLE_DESC SampleDesc + }; + + if (rt_multisampled) + { + pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples; + } + + ID3D12PipelineState* pso = nullptr; + CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&pso_desc, &pso)); + + D3D::current_command_list->SetPipelineState(pso); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); + + // In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled. + // Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid + // dirtying state. + + // 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072 + D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); + + D3D::current_command_list->DrawInstanced(4, 1, clearq_offset, 0); + + g_renderer->RestoreAPIState(); +} + +static void InitColVertex(ColVertex* vert, float x, float y, float z, u32 col) +{ + vert->x = x; + vert->y = y; + vert->z = z; + vert->col = col; +} + +void DrawEFBPokeQuads(EFBAccessType type, + const EfbPokeData* points, + size_t num_points, + D3D12_BLEND_DESC* blend_desc, + D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, + D3D12_VIEWPORT* viewport, + D3D12_CPU_DESCRIPTOR_HANDLE* render_target, + D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer, + bool rt_multisampled + ) +{ + // The viewport and RT/DB are passed in so we can reconstruct the state if we need to execute in the middle of building the vertex buffer. + + D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { + default_root_signature, // ID3D12RootSignature *pRootSignature; + StaticShaderCache::GetClearVertexShader(), // D3D12_SHADER_BYTECODE VS; + StaticShaderCache::GetClearPixelShader(), // D3D12_SHADER_BYTECODE PS; + {}, // D3D12_SHADER_BYTECODE DS; + {}, // D3D12_SHADER_BYTECODE HS; + g_ActiveConfig.iStereoMode > 0 ? + StaticShaderCache::GetClearGeometryShader() : + D3D12_SHADER_BYTECODE(), // D3D12_SHADER_BYTECODE GS; + {}, // D3D12_STREAM_OUTPUT_DESC StreamOutput + *blend_desc, // D3D12_BLEND_DESC BlendState; + UINT_MAX, // UINT SampleMask; + Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState + *depth_stencil_desc, // D3D12_DEPTH_STENCIL_DESC DepthStencilState + StaticShaderCache::GetClearVertexShaderInputLayout(), // D3D12_INPUT_LAYOUT_DESC InputLayout + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES IndexBufferProperties + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType + 1, // UINT NumRenderTargets + { DXGI_FORMAT_R8G8B8A8_UNORM }, // DXGI_FORMAT RTVFormats[8] + DXGI_FORMAT_D32_FLOAT, // DXGI_FORMAT DSVFormat + { 1 /* UINT Count */, 0 /* UINT Quality */ } // DXGI_SAMPLE_DESC SampleDesc + }; + + if (rt_multisampled) + { + pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples; + } + + ID3D12PipelineState* pso = nullptr; + CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&pso_desc, &pso)); + + // If drawing a large number of points at once, this will have to be split into multiple passes. + const size_t COL_QUAD_SIZE = sizeof(ColVertex) * 6; + size_t points_per_draw = util_vbuf_efbpokequads->GetSize() / COL_QUAD_SIZE; + + size_t current_point_index = 0; + + while (current_point_index < num_points) + { + // Map and reserve enough buffer space for this draw + size_t points_to_draw = std::min(num_points - current_point_index, points_per_draw); + size_t required_bytes = COL_QUAD_SIZE * points_to_draw; + + void* buffer_ptr = nullptr; + int base_vertex_index = util_vbuf_efbpokequads->BeginAppendData(&buffer_ptr, static_cast(required_bytes), sizeof(ColVertex)); + + CHECK(base_vertex_index * 16 + required_bytes <= util_vbuf_efbpokequads->GetSize(), "Uh oh"); + + // Corresponding dirty flags set outside loop. + D3D::current_command_list->OMSetRenderTargets(1, render_target, FALSE, depth_buffer); + D3D::current_command_list->RSSetViewports(1, viewport); + D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + D3D12_VERTEX_BUFFER_VIEW vb_view = { + util_vbuf_efbpokequads->GetBuffer12()->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; + util_vbuf_efbpokequads->GetSize(), // UINT SizeInBytes; This is the size of the entire buffer, not just the size of the vertex data for one draw call, since the offsetting is done in the draw call itself. + sizeof(ColVertex) // UINT StrideInBytes; + }; + + D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true); + + D3D::current_command_list->SetPipelineState(pso); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); + + // Disable scissor testing. + D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); + + // generate quads for each efb point + ColVertex* base_vertex_ptr = reinterpret_cast(buffer_ptr); + for (size_t i = 0; i < points_to_draw; i++) + { + // generate quad from the single point (clip-space coordinates) + const EfbPokeData* point = &points[current_point_index]; + float x1 = float(point->x) * 2.0f / EFB_WIDTH - 1.0f; + float y1 = -float(point->y) * 2.0f / EFB_HEIGHT + 1.0f; + float x2 = float(point->x + 1) * 2.0f / EFB_WIDTH - 1.0f; + float y2 = -float(point->y + 1) * 2.0f / EFB_HEIGHT + 1.0f; + float z = (type == POKE_Z) ? (1.0f - float(point->data & 0xFFFFFF) / 16777216.0f) : 0.0f; + u32 col = (type == POKE_Z) ? 0 : ((point->data & 0xFF00FF00) | ((point->data >> 16) & 0xFF) | ((point->data << 16) & 0xFF0000)); + current_point_index++; + + // quad -> triangles + ColVertex* vertex = &base_vertex_ptr[i * 6]; + InitColVertex(&vertex[0], x1, y1, z, col); + InitColVertex(&vertex[1], x2, y1, z, col); + InitColVertex(&vertex[2], x1, y2, z, col); + InitColVertex(&vertex[3], x1, y2, z, col); + InitColVertex(&vertex[4], x2, y1, z, col); + InitColVertex(&vertex[5], x2, y2, z, col); + } + + // Issue the draw + D3D::current_command_list->DrawInstanced(6 * static_cast(points_to_draw), 1, base_vertex_index, 0); + } + +} + +} // namespace D3D + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.h b/Source/Core/VideoBackends/D3D12/D3DUtil.h new file mode 100644 index 0000000000..c40784425c --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.h @@ -0,0 +1,108 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/MathUtil.h" +#include "VideoBackends/D3D12/D3DState.h" + +#include "VideoCommon/RenderBase.h" + +namespace DX12 +{ + +extern StateCache gx_state_cache; + +namespace D3D +{ + +unsigned int AlignValue(unsigned int value, unsigned int alignment); +void ResourceBarrier(ID3D12GraphicsCommandList* command_list, ID3D12Resource* resource, D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after, UINT subresource); + +// Font creation flags +static const unsigned int D3DFONT_BOLD = 0x0001; +static const unsigned int D3DFONT_ITALIC = 0x0002; + +// Font rendering flags +static const unsigned int D3DFONT_CENTERED = 0x0001; + +class CD3DFont +{ +public: + CD3DFont(); + // 2D text drawing function + // Initializing and destroying device-dependent objects + int Init(); + int Shutdown(); + int DrawTextScaled(float x, float y, + float size, + float spacing, u32 dwColor, + const std::string& text); + +private: + ID3D12Resource* m_texture12 = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE m_texture12_cpu = {}; + D3D12_GPU_DESCRIPTOR_HANDLE m_texture12_gpu = {}; + + ID3D12Resource* m_vb12 = nullptr; + D3D12_VERTEX_BUFFER_VIEW m_vb12_view = {}; + void* m_vb12_data = nullptr; + unsigned int m_vb12_offset = 0; + + D3D12_INPUT_LAYOUT_DESC m_input_layout12 = {}; + D3D12_SHADER_BYTECODE m_pshader12 = {}; + D3D12_SHADER_BYTECODE m_vshader12 = {}; + D3D12_BLEND_DESC m_blendstate12 = {}; + D3D12_RASTERIZER_DESC m_raststate12 = {}; + ID3D12PipelineState* m_pso = nullptr; + + unsigned int m_line_height = 0; + float m_tex_coords[128 - 32][4] = {}; + + const int m_tex_width = 512; + const int m_tex_height = 512; +}; + +extern CD3DFont font; + +void InitUtils(); +void ShutdownUtils(); + +void SetPointCopySampler(); +void SetLinearCopySampler(); + +void DrawShadedTexQuad(D3DTexture2D* texture, + const D3D12_RECT* source, + int source_width, + int source_height, + D3D12_SHADER_BYTECODE pshader12 = {}, + D3D12_SHADER_BYTECODE vshader12 = {}, + D3D12_INPUT_LAYOUT_DESC layout12 = {}, + D3D12_SHADER_BYTECODE gshader12 = {}, + float gamma = 1.0f, + u32 slice = 0, + DXGI_FORMAT rt_format = DXGI_FORMAT_R8G8B8A8_UNORM, + bool inherit_srv_binding = false, + bool rt_multisampled = false, + D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc_override = nullptr + ); + +void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled); +void DrawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled); + +void DrawEFBPokeQuads(EFBAccessType type, + const EfbPokeData* points, + size_t num_points, + D3D12_BLEND_DESC* blend_desc, + D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, + D3D12_VIEWPORT* viewport, + D3D12_CPU_DESCRIPTOR_HANDLE* render_target, + D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer, + bool rt_multisampled); +} + +} diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp new file mode 100644 index 0000000000..f0118a20bf --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp @@ -0,0 +1,292 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/HW/Memmap.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/FramebufferManager.h" +#include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" +#include "VideoBackends/D3D12/XFBEncoder.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +static XFBEncoder s_xfbEncoder; + +FramebufferManager::Efb FramebufferManager::m_efb; +unsigned int FramebufferManager::m_target_width; +unsigned int FramebufferManager::m_target_height; + +D3D12_DEPTH_STENCIL_DESC FramebufferManager::m_depth_resolve_depth_stencil_desc; + +D3DTexture2D*& FramebufferManager::GetEFBColorTexture() { return m_efb.color_tex; } +ID3D12Resource*& FramebufferManager::GetEFBColorStagingBuffer() { return m_efb.color_staging_buf; } + +D3DTexture2D*& FramebufferManager::GetEFBDepthTexture() { return m_efb.depth_tex; } +D3DTexture2D*& FramebufferManager::GetEFBDepthReadTexture() { return m_efb.depth_read_texture; } +ID3D12Resource*& FramebufferManager::GetEFBDepthStagingBuffer() { return m_efb.depth_staging_buf; } + +D3DTexture2D*& FramebufferManager::GetEFBColorTempTexture() { return m_efb.color_temp_tex; } + +void FramebufferManager::SwapReinterpretTexture() +{ + D3DTexture2D* swaptex = GetEFBColorTempTexture(); + m_efb.color_temp_tex = GetEFBColorTexture(); + m_efb.color_tex = swaptex; +} + +D3DTexture2D*& FramebufferManager::GetResolvedEFBColorTexture() +{ + if (g_ActiveConfig.iMultisamples > 1) + { + m_efb.resolved_color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RESOLVE_DEST); + m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + + for (int i = 0; i < m_efb.slices; i++) + { + D3D::current_command_list->ResolveSubresource(m_efb.resolved_color_tex->GetTex12(), D3D11CalcSubresource(0, i, 1), m_efb.color_tex->GetTex12(), D3D11CalcSubresource(0, i, 1), DXGI_FORMAT_R8G8B8A8_UNORM); + } + + m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + + return m_efb.resolved_color_tex; + } + else + { + return m_efb.color_tex; + } +} + +D3DTexture2D*& FramebufferManager::GetResolvedEFBDepthTexture() +{ + if (g_ActiveConfig.iMultisamples > 1) + { + ResolveDepthTexture(); + + return m_efb.resolved_depth_tex; + } + else + { + return m_efb.depth_tex; + } +} + +FramebufferManager::FramebufferManager() +{ + m_target_width = std::max(Renderer::GetTargetWidth(), 1); + m_target_height = std::max(Renderer::GetTargetHeight(), 1); + + DXGI_SAMPLE_DESC sample_desc; + sample_desc.Count = g_ActiveConfig.iMultisamples; + sample_desc.Quality = 0; + + ID3D12Resource* buf12; + D3D12_RESOURCE_DESC texdesc12; + D3D12_CLEAR_VALUE optimized_clear_valueRTV = { DXGI_FORMAT_R8G8B8A8_UNORM, { 0.0f, 0.0f, 0.0f, 1.0f } }; + D3D12_CLEAR_VALUE optimized_clear_valueDSV = CD3DX12_CLEAR_VALUE(DXGI_FORMAT_D32_FLOAT, 0.0f, 0); + + HRESULT hr; + + m_EFBLayers = m_efb.slices = (g_ActiveConfig.iStereoMode > 0) ? 2 : 1; + + // EFB color texture - primary render target + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12)); + + m_efb.color_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); + SAFE_RELEASE(buf12); + + // Temporary EFB color texture - used in ReinterpretPixelData + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); + CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12))); + m_efb.color_temp_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); + SAFE_RELEASE(buf12); + D3D::SetDebugObjectName12(m_efb.color_temp_tex->GetTex12(), "EFB color temp texture"); + + // AccessEFB - Sysmem buffer used to retrieve the pixel data from color_tex + texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(64 * 1024); + CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.color_staging_buf))); + CHECK(hr == S_OK, "create EFB color staging buffer (hr=%#x)", hr); + + // EFB depth buffer - primary depth buffer + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueDSV, IID_PPV_ARGS(&buf12))); + + m_efb.depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, (sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON); + SAFE_RELEASE(buf12); + D3D::SetDebugObjectName12(m_efb.depth_tex->GetTex12(), "EFB depth texture"); + + // Render buffer for AccessEFB (depth data) + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, 1, 1, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); + optimized_clear_valueRTV.Format = DXGI_FORMAT_R32_FLOAT; + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12)); + CHECK(hr == S_OK, "create EFB depth read texture (hr=%#x)", hr); + + m_efb.depth_read_texture = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); + + SAFE_RELEASE(buf12); + D3D::SetDebugObjectName12(m_efb.depth_read_texture->GetTex12(), "EFB depth read texture (used in Renderer::AccessEFB)"); + + // AccessEFB - Sysmem buffer used to retrieve the pixel data from depth_read_texture + texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(64 * 1024); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.depth_staging_buf)); + CHECK(hr == S_OK, "create EFB depth staging buffer (hr=%#x)", hr); + + D3D::SetDebugObjectName12(m_efb.depth_staging_buf, "EFB depth staging texture (used for Renderer::AccessEFB)"); + + if (g_ActiveConfig.iMultisamples > 1) + { + // Framebuffer resolve textures (color+depth) + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12)); + CHECK(hr == S_OK, "create EFB color resolve texture (size: %dx%d)", m_target_width, m_target_height); + m_efb.resolved_color_tex = new D3DTexture2D(buf12, D3D11_BIND_SHADER_RESOURCE, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); + SAFE_RELEASE(buf12); + D3D::SetDebugObjectName12(m_efb.resolved_color_tex->GetTex12(), "EFB color resolve texture shader resource view"); + + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12)); + CHECK(hr == S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); + m_efb.resolved_depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); + SAFE_RELEASE(buf12); + D3D::SetDebugObjectName12(m_efb.resolved_depth_tex->GetTex12(), "EFB depth resolve texture shader resource view"); + + m_depth_resolve_depth_stencil_desc = {}; + m_depth_resolve_depth_stencil_desc.StencilEnable = FALSE; + m_depth_resolve_depth_stencil_desc.DepthEnable = TRUE; + m_depth_resolve_depth_stencil_desc.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + m_depth_resolve_depth_stencil_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + } + else + { + m_efb.resolved_color_tex = nullptr; + m_efb.resolved_depth_tex = nullptr; + } + + s_xfbEncoder.Init(); +} + +FramebufferManager::~FramebufferManager() +{ + s_xfbEncoder.Shutdown(); + + SAFE_RELEASE(m_efb.color_tex); + SAFE_RELEASE(m_efb.color_temp_tex); + + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.color_staging_buf); + + SAFE_RELEASE(m_efb.resolved_color_tex); + SAFE_RELEASE(m_efb.depth_tex); + + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.depth_staging_buf); + + SAFE_RELEASE(m_efb.depth_read_texture); + SAFE_RELEASE(m_efb.resolved_depth_tex); +} + +void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float gamma) +{ + u8* dst = Memory::GetPointer(xfbAddr); + s_xfbEncoder.Encode(dst, fbStride/2, fbHeight, sourceRc, gamma); +} + +std::unique_ptr FramebufferManager::CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) +{ + return std::make_unique(D3DTexture2D::Create(target_width, target_height, + (D3D11_BIND_FLAG)(D3D11_BIND_RENDER_TARGET|D3D11_BIND_SHADER_RESOURCE), + D3D11_USAGE_DEFAULT, DXGI_FORMAT_R8G8B8A8_UNORM, 1, layers), layers); +} + +void FramebufferManager::GetTargetSize(unsigned int* width, unsigned int* height) +{ + *width = m_target_width; + *height = m_target_height; +} + +void FramebufferManager::ResolveDepthTexture() +{ + // ResolveSubresource does not work with depth textures. + // Instead, we use a shader that selects the minimum depth from all samples. + + const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast(m_target_width), static_cast(m_target_height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp12); + + m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + D3D::current_command_list->OMSetRenderTargets(0, nullptr, FALSE, &m_efb.resolved_depth_tex->GetDSV12()); + + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + D3D::SetLinearCopySampler(); + + // Render a quad covering the entire target, writing SV_Depth. + const D3D12_RECT source_rect = CD3DX12_RECT(0, 0, m_target_width, m_target_height); + D3D::DrawShadedTexQuad( + FramebufferManager::GetEFBDepthTexture(), + &source_rect, + m_target_width, + m_target_height, + StaticShaderCache::GetDepthCopyPixelShader(true), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + StaticShaderCache::GetCopyGeometryShader(), + 1.0, + 0, + DXGI_FORMAT_D32_FLOAT + ); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + // Restores proper viewport/scissor settings. + g_renderer->RestoreAPIState(); +} + +void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) +{ + // DX12's XFB decoder does not use this function. + // YUYV data is decoded in Render::Swap. +} + +void XFBSource::CopyEFB(float gamma) +{ + // Copy EFB data to XFB and restore render target again + const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast(texWidth), static_cast(texHeight), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp12); + + const D3D12_RECT rect = CD3DX12_RECT(0, 0, texWidth, texHeight); + + m_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &m_tex->GetRTV12(), FALSE, nullptr); + + D3D::SetPointCopySampler(); + + D3D::DrawShadedTexQuad( + FramebufferManager::GetEFBColorTexture(), + &rect, + Renderer::GetTargetWidth(), + Renderer::GetTargetHeight(), + StaticShaderCache::GetColorCopyPixelShader(true), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + StaticShaderCache::GetCopyGeometryShader(), + gamma, + 0, + DXGI_FORMAT_R8G8B8A8_UNORM, + false, + m_tex->GetMultisampled() + ); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + // Restores proper viewport/scissor settings. + g_renderer->RestoreAPIState(); +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.h b/Source/Core/VideoBackends/D3D12/FramebufferManager.h new file mode 100644 index 0000000000..8f0376a9bc --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.h @@ -0,0 +1,107 @@ +// Copyright 2009 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoBackends/D3D12/D3DTexture.h" +#include "VideoCommon/FramebufferManagerBase.h" + +namespace DX12 +{ + +// On the GameCube, the game sends a request for the graphics processor to +// transfer its internal EFB (Embedded Framebuffer) to an area in GameCube RAM +// called the XFB (External Framebuffer). The size and location of the XFB is +// decided at the time of the copy, and the format is always YUYV. The video +// interface is given a pointer to the XFB, which will be decoded and +// displayed on the TV. +// +// There are two ways for Dolphin to emulate this: +// +// Real XFB mode: +// +// Dolphin will behave like the GameCube and encode the EFB to +// a portion of GameCube RAM. The emulated video interface will decode the data +// for output to the screen. +// +// Advantages: Behaves exactly like the GameCube. +// Disadvantages: Resolution will be limited. +// +// Virtual XFB mode: +// +// When a request is made to copy the EFB to an XFB, Dolphin +// will remember the RAM location and size of the XFB in a Virtual XFB list. +// The video interface will look up the XFB in the list and use the enhanced +// data stored there, if available. +// +// Advantages: Enables high resolution graphics, better than real hardware. +// Disadvantages: If the GameCube CPU writes directly to the XFB (which is +// possible but uncommon), the Virtual XFB will not capture this information. + +// There may be multiple XFBs in GameCube RAM. This is the maximum number to +// virtualize. + +struct XFBSource final : public XFBSourceBase +{ + XFBSource(D3DTexture2D* tex, int slices) : m_tex(tex), m_slices(slices) {} + ~XFBSource() { m_tex->Release(); } + + void DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) override; + void CopyEFB(float gamma) override; + + D3DTexture2D* m_tex; + const int m_slices; +}; + +class FramebufferManager final : public FramebufferManagerBase +{ +public: + FramebufferManager(); + ~FramebufferManager(); + + static D3DTexture2D*& GetEFBColorTexture(); + static ID3D12Resource*& GetEFBColorStagingBuffer(); + + static D3DTexture2D*& GetEFBDepthTexture(); + static D3DTexture2D*& GetEFBDepthReadTexture(); + static ID3D12Resource*& GetEFBDepthStagingBuffer(); + static D3DTexture2D*& GetResolvedEFBColorTexture(); + static D3DTexture2D*& GetResolvedEFBDepthTexture(); + + static D3DTexture2D*& GetEFBColorTempTexture(); + static void SwapReinterpretTexture(); + + static void ResolveDepthTexture(); + +private: + std::unique_ptr CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override; + void GetTargetSize(unsigned int* width, unsigned int* height) override; + + void CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc, float gamma) override; + + static struct Efb + { + D3DTexture2D* color_tex; + ID3D12Resource* color_staging_buf; + + D3DTexture2D* depth_tex; + ID3D12Resource* depth_staging_buf; + + D3DTexture2D* depth_read_texture; + + D3DTexture2D* color_temp_tex; + + D3DTexture2D* resolved_color_tex; + D3DTexture2D* resolved_depth_tex; + + int slices; + } m_efb; + + static unsigned int m_target_width; + static unsigned int m_target_height; + + static D3D12_DEPTH_STENCIL_DESC m_depth_resolve_depth_stencil_desc; +}; + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/NativeVertexFormat.cpp b/Source/Core/VideoBackends/D3D12/NativeVertexFormat.cpp new file mode 100644 index 0000000000..fda4846367 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/NativeVertexFormat.cpp @@ -0,0 +1,110 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DUtil.h" + +#include "VideoBackends/D3D12/NativeVertexFormat.h" +#include "VideoBackends/D3D12/VertexManager.h" + +namespace DX12 +{ + +NativeVertexFormat* VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + return new D3DVertexFormat(vtx_decl); +} + +static const constexpr DXGI_FORMAT d3d_format_lookup[5*4*2] = +{ + // float formats + DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R32G32_FLOAT, + DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R32G32B32A32_FLOAT, + + // integer formats + DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SINT, DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_UNKNOWN, +}; + +DXGI_FORMAT VarToD3D(VarType t, int size, bool integer) +{ + DXGI_FORMAT retval = d3d_format_lookup[static_cast(t) + 5 * (size-1) + 5 * 4 * static_cast(integer)]; + if (retval == DXGI_FORMAT_UNKNOWN) + { + PanicAlert("VarToD3D: Invalid type/size combo %i , %i, %i", static_cast(t), size, static_cast(integer)); + } + return retval; +} + +D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration &vtx_decl) + : m_num_elems(0), m_layout12({}), m_elems() +{ + this->vtx_decl = vtx_decl; + + AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.position, "POSITION", 0); + + for (int i = 0; i < 3; i++) + { + AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.normals[i], "NORMAL", i); + } + + for (int i = 0; i < 2; i++) + { + AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.colors[i], "COLOR", i); + } + + for (int i = 0; i < 8; i++) + { + AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.texcoords[i], "TEXCOORD", i); + } + + AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.posmtx, "BLENDINDICES", 0); + + m_layout12.NumElements = m_num_elems; + m_layout12.pInputElementDescs = m_elems.data(); +} + +D3DVertexFormat::~D3DVertexFormat() +{ +} + +void D3DVertexFormat::AddInputElementDescFromAttributeFormatIfValid(const AttributeFormat* format, const char* semantic_name, unsigned int semantic_index) +{ + if (!format->enable) + { + return; + } + + D3D12_INPUT_ELEMENT_DESC desc = {}; + + desc.AlignedByteOffset = format->offset; + desc.Format = VarToD3D(format->type, format->components, format->integer); + desc.InputSlot = 0; + desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + desc.SemanticName = semantic_name; + desc.SemanticIndex = semantic_index; + + m_elems[m_num_elems] = desc; + ++m_num_elems; +} + +void D3DVertexFormat::SetupVertexPointers() +{ + // No-op on DX12. +} + +D3D12_INPUT_LAYOUT_DESC D3DVertexFormat::GetActiveInputLayout12() const +{ + return m_layout12; +} + + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/NativeVertexFormat.h b/Source/Core/VideoBackends/D3D12/NativeVertexFormat.h new file mode 100644 index 0000000000..58a9fbb1e4 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/NativeVertexFormat.h @@ -0,0 +1,31 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +#pragma once + +#include +#include + +#include "VideoCommon/NativeVertexFormat.h" + +namespace DX12 +{ + +class D3DVertexFormat final : public NativeVertexFormat +{ + std::array m_elems; + UINT m_num_elems; + + D3D12_INPUT_LAYOUT_DESC m_layout12; + +public: + D3DVertexFormat(const PortableVertexDeclaration& vtx_decl); + ~D3DVertexFormat(); + + void SetupVertexPointers(); + + D3D12_INPUT_LAYOUT_DESC GetActiveInputLayout12() const; + +private: + void AddInputElementDescFromAttributeFormatIfValid(const AttributeFormat* format, const char* semantic_name, unsigned int semantic_index); +}; +} diff --git a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp new file mode 100644 index 0000000000..6af30765ba --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp @@ -0,0 +1,299 @@ +// Copyright 2011 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Core/HW/Memmap.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DShader.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/FramebufferManager.h" +#include "VideoBackends/D3D12/PSTextureEncoder.h" +#include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" +#include "VideoBackends/D3D12/TextureCache.h" + +#include "VideoCommon/TextureConversionShader.h" + +namespace DX12 +{ + +struct EFBEncodeParams +{ + DWORD SrcLeft; + DWORD SrcTop; + DWORD DestWidth; + DWORD ScaleFactor; +}; + +PSTextureEncoder::PSTextureEncoder() +{ +} + +void PSTextureEncoder::Init() +{ + // Create output texture RGBA format + D3D12_RESOURCE_DESC out_tex_desc = CD3DX12_RESOURCE_DESC::Tex2D( + DXGI_FORMAT_B8G8R8A8_UNORM, + EFB_WIDTH * 4, + EFB_HEIGHT / 4, + 1, + 0, + 1, + 0, + D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET + ); + + D3D12_CLEAR_VALUE optimized_clear_value = { DXGI_FORMAT_B8G8R8A8_UNORM, { 0.0f, 0.0f, 0.0f, 1.0f } }; + + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &out_tex_desc, + D3D12_RESOURCE_STATE_COPY_SOURCE, + &optimized_clear_value, + IID_PPV_ARGS(&m_out) + ) + ); + + D3D::SetDebugObjectName12(m_out, "efb encoder output texture"); + + // Create output render target view + D3D12_RENDER_TARGET_VIEW_DESC tex_rtv_desc = { + DXGI_FORMAT_B8G8R8A8_UNORM, // DXGI_FORMAT Format; + D3D12_RTV_DIMENSION_TEXTURE2D // D3D12_RTV_DIMENSION ViewDimension; + }; + + tex_rtv_desc.Texture2D.MipSlice = 0; + + D3D::rtv_descriptor_heap_mgr->Allocate(&m_out_rtv_cpu); + D3D::device12->CreateRenderTargetView(m_out, &tex_rtv_desc, m_out_rtv_cpu); + + // Create output staging buffer + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer( + D3D::AlignValue(static_cast(out_tex_desc.Width) * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * + out_tex_desc.Height + ), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_out_readback_buffer) + ) + ); + + D3D::SetDebugObjectName12(m_out_readback_buffer, "efb encoder output staging buffer"); + + CheckHR(m_out_readback_buffer->Map(0, nullptr, &m_out_readback_buffer_data)); + + // Create constant buffer for uploading data to shaders. Need to align to 256 bytes. + unsigned int encode_params_buffer_size = (sizeof(EFBEncodeParams) + 0xff) & ~0xff; + + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(encode_params_buffer_size), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_encode_params_buffer) + ) + ); + + D3D::SetDebugObjectName12(m_encode_params_buffer, "efb encoder params buffer"); + + CheckHR(m_encode_params_buffer->Map(0, nullptr, &m_encode_params_buffer_data)); + + m_ready = true; +} + +void PSTextureEncoder::Shutdown() +{ + m_ready = false; + + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_out); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_out_readback_buffer); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_encode_params_buffer); + + for (auto& it : m_static_shaders_blobs) + { + SAFE_RELEASE(it); + } + + m_static_shaders_blobs.clear(); + m_static_shaders_map.clear(); +} + +void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + PEControl::PixelFormat src_format, const EFBRectangle& src_rect, + bool is_intensity, bool scale_by_half) +{ + if (!m_ready) // Make sure we initialized OK + return; + + D3D::command_list_mgr->CPUAccessNotify(); + + // Resolve MSAA targets before copying. + D3DTexture2D* efb_source = (src_format == PEControl::Z24) ? + FramebufferManager::GetResolvedEFBDepthTexture() : + // EXISTINGD3D11TODO: Instead of resolving EFB, it would be better to pick out a + // single sample from each pixel. The game may break if it isn't + // expecting the blurred edges around multisampled shapes. + FramebufferManager::GetResolvedEFBColorTexture(); + + // GetResolvedEFBDepthTexture will set the render targets, when MSAA is enabled + // (since it needs to do a manual depth resolve). So make sure to set the RTs + // afterwards. + + const u32 words_per_row = bytes_per_row / sizeof(u32); + + D3D12_VIEWPORT vp = { 0.f, 0.f, FLOAT(words_per_row), FLOAT(num_blocks_y), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp); + + constexpr EFBRectangle full_src_rect(0, 0, EFB_WIDTH, EFB_HEIGHT); + + TargetRectangle target_rect = g_renderer->ConvertEFBRectangle(full_src_rect); + + D3D::ResourceBarrier(D3D::current_command_list, m_out, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET, 0); + D3D::current_command_list->OMSetRenderTargets(1, &m_out_rtv_cpu, FALSE, nullptr); + + EFBEncodeParams params; + params.SrcLeft = src_rect.left; + params.SrcTop = src_rect.top; + params.DestWidth = native_width; + params.ScaleFactor = scale_by_half ? 2 : 1; + + memcpy(m_encode_params_buffer_data, ¶ms, sizeof(params)); + D3D::current_command_list->SetGraphicsRootConstantBufferView( + DESCRIPTOR_TABLE_PS_CBVONE, + m_encode_params_buffer->GetGPUVirtualAddress() + ); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); + + // Use linear filtering if (bScaleByHalf), use point filtering otherwise + if (scale_by_half) + D3D::SetLinearCopySampler(); + else + D3D::SetPointCopySampler(); + + D3D::DrawShadedTexQuad(efb_source, + target_rect.AsRECT(), + Renderer::GetTargetWidth(), + Renderer::GetTargetHeight(), + SetStaticShader(format, src_format, is_intensity, scale_by_half), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + D3D12_SHADER_BYTECODE(), + 1.0f, + 0, + DXGI_FORMAT_B8G8R8A8_UNORM, + false, + false /* Render target is not multisampled */ + ); + + // Copy to staging buffer + D3D12_BOX src_box = CD3DX12_BOX(0, 0, 0, words_per_row, num_blocks_y, 1); + + D3D12_TEXTURE_COPY_LOCATION dst_location = {}; + dst_location.pResource = m_out_readback_buffer; + dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst_location.PlacedFootprint.Offset = 0; + dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + dst_location.PlacedFootprint.Footprint.Width = EFB_WIDTH * 4; + dst_location.PlacedFootprint.Footprint.Height = EFB_HEIGHT / 4; + dst_location.PlacedFootprint.Footprint.Depth = 1; + dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12_TEXTURE_COPY_LOCATION src_location = {}; + src_location.pResource = m_out; + src_location.SubresourceIndex = 0; + src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + + D3D::ResourceBarrier(D3D::current_command_list, m_out, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE, 0); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + + D3D::command_list_mgr->ExecuteQueuedWork(true); + + // Transfer staging buffer to GameCube/Wii RAM + + u8* src = static_cast(m_out_readback_buffer_data); + u32 read_stride = std::min(bytes_per_row, dst_location.PlacedFootprint.Footprint.RowPitch); + for (unsigned int y = 0; y < num_blocks_y; ++y) + { + memcpy(dst, src, read_stride); + + dst += memory_stride; + src += dst_location.PlacedFootprint.Footprint.RowPitch; + } + + // Restores proper viewport/scissor settings. + g_renderer->RestoreAPIState(); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); +} + +D3D12_SHADER_BYTECODE PSTextureEncoder::SetStaticShader(unsigned int dst_format, PEControl::PixelFormat src_format, + bool is_intensity, bool scale_by_half) +{ + size_t fetch_num = static_cast(src_format); + size_t scaled_fetch_num = scale_by_half ? 1 : 0; + size_t intensity_num = is_intensity ? 1 : 0; + size_t generator_num = dst_format; + + ComboKey key = MakeComboKey(dst_format, src_format, is_intensity, scale_by_half); + + ComboMap::iterator it = m_static_shaders_map.find(key); + if (it == m_static_shaders_map.end()) + { + INFO_LOG(VIDEO, "Compiling efb encoding shader for dst_format 0x%X, src_format %d, is_intensity %d, scale_by_half %d", + dst_format, static_cast(src_format), is_intensity ? 1 : 0, scale_by_half ? 1 : 0); + + u32 format = dst_format; + + if (src_format == PEControl::Z24) + { + format |= _GX_TF_ZTF; + if (dst_format == 11) + format = GX_TF_Z16; + else if (format < GX_TF_Z8 || format > GX_TF_Z24X8) + format |= _GX_TF_CTF; + } + else + { + if (dst_format > GX_TF_RGBA8 || (dst_format < GX_TF_RGB565 && !is_intensity)) + format |= _GX_TF_CTF; + } + + ID3DBlob* bytecode = nullptr; + const char* shader = TextureConversionShader::GenerateEncodingShader(format, API_D3D); + if (!D3D::CompilePixelShader(shader, &bytecode)) + { + WARN_LOG(VIDEO, "EFB encoder shader for dst_format 0x%X, src_format %d, is_intensity %d, scale_by_half %d failed to compile", + dst_format, static_cast(src_format), is_intensity ? 1 : 0, scale_by_half ? 1 : 0); + m_static_shaders_blobs[key] = {}; + return {}; + } + + D3D12_SHADER_BYTECODE new_shader = { + bytecode->GetBufferPointer(), + bytecode->GetBufferSize() + }; + + it = m_static_shaders_map.emplace(key, new_shader).first; + + // Keep track of the ID3DBlobs, so we can free them upon shutdown. + m_static_shaders_blobs.push_back(bytecode); + } + + return it->second; +} + +} diff --git a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h new file mode 100644 index 0000000000..c8f05788e8 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h @@ -0,0 +1,53 @@ +// Copyright 2011 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoBackends/D3D12/TextureEncoder.h" + +#include "VideoCommon/TextureCacheBase.h" + +namespace DX12 +{ + +class PSTextureEncoder final : public TextureEncoder +{ +public: + PSTextureEncoder(); + + void Init(); + void Shutdown(); + void Encode(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + PEControl::PixelFormat src_format, const EFBRectangle& src_rect, + bool is_intensity, bool scale_by_half); + +private: + bool m_ready = false; + + ID3D12Resource* m_out = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE m_out_rtv_cpu = {}; + + ID3D12Resource* m_out_readback_buffer = nullptr; + void* m_out_readback_buffer_data = nullptr; + + ID3D12Resource* m_encode_params_buffer = nullptr; + void* m_encode_params_buffer_data = nullptr; + + D3D12_SHADER_BYTECODE SetStaticShader(unsigned int dst_format, + PEControl::PixelFormat src_format, bool is_intensity, bool scale_by_half); + + using ComboKey = unsigned int; // Key for a shader combination + static ComboKey MakeComboKey(unsigned int dst_format, + PEControl::PixelFormat src_format, bool is_intensity, bool scale_by_half) + { + return (dst_format << 4) | (static_cast(src_format) << 2) | (is_intensity ? (1 << 1) : 0) + | (scale_by_half ? (1 << 0) : 0); + } + + using ComboMap = std::map; + ComboMap m_static_shaders_map; + std::vector m_static_shaders_blobs; +}; + +} diff --git a/Source/Core/VideoBackends/D3D12/PerfQuery.cpp b/Source/Core/VideoBackends/D3D12/PerfQuery.cpp new file mode 100644 index 0000000000..f5821ca1b7 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/PerfQuery.cpp @@ -0,0 +1,69 @@ +// Copyright 2012 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/CommonFuncs.h" +#include "Common/CommonTypes.h" +#include "Common/Logging/Log.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/PerfQuery.h" +#include "VideoCommon/RenderBase.h" + +//D3D12TODO: Implement PerfQuery class. + +namespace DX12 +{ + +PerfQuery::PerfQuery() +{ + //D3D12TODO: Add implementation +} + +PerfQuery::~PerfQuery() +{ + //D3D12TODO: Add implementation +} + +void PerfQuery::EnableQuery(PerfQueryGroup type) +{ + //D3D12TODO: Add implementation +} + +void PerfQuery::DisableQuery(PerfQueryGroup type) +{ + //D3D12TODO: Add implementation +} + +void PerfQuery::ResetQuery() +{ + //D3D12TODO: Add implementation +} + +u32 PerfQuery::GetQueryResult(PerfQueryType type) +{ + //D3D12TODO: Add implementation + return 0; +} + +void PerfQuery::FlushOne() +{ + //D3D12TODO: Add implementation +} + +void PerfQuery::FlushResults() +{ + //D3D12TODO: Add implementation +} + +void PerfQuery::WeakFlush() +{ + //D3D12TODO: Add implementation +} + +bool PerfQuery::IsFlushed() const +{ + //D3D12TODO: Add implementation + return true; +} + +} // namespace diff --git a/Source/Core/VideoBackends/D3D12/PerfQuery.h b/Source/Core/VideoBackends/D3D12/PerfQuery.h new file mode 100644 index 0000000000..6e197bf53a --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/PerfQuery.h @@ -0,0 +1,46 @@ +// Copyright 2012 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoCommon/PerfQueryBase.h" + +namespace DX12 +{ + +class PerfQuery final : public PerfQueryBase +{ +public: + PerfQuery(); + ~PerfQuery(); + + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void ResetQuery() override; + u32 GetQueryResult(PerfQueryType type) override; + void FlushResults() override; + bool IsFlushed() const override; + +private: + struct ActiveQuery + { + ID3D11Query* query; + PerfQueryGroup query_type; + }; + + void WeakFlush(); + + // Only use when non-empty + void FlushOne(); + + // when testing in SMS: 64 was too small, 128 was ok + static const int s_perf_query_buffer_size = 512; + + std::array m_query_buffer; + int m_query_read_pos = 0; +}; + +} // namespace diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp new file mode 100644 index 0000000000..e3fc160c9d --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -0,0 +1,1613 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include + +#include "Common/CommonTypes.h" +#include "Common/FileUtil.h" +#include "Common/MathUtil.h" + +#include "Core/ConfigManager.h" +#include "Core/Core.h" +#include "Core/Host.h" + +#include "VideoBackends/D3D12/BoundingBox.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/FramebufferManager.h" +#include "VideoBackends/D3D12/NativeVertexFormat.h" +#include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/ShaderCache.h" +#include "VideoBackends/D3D12/ShaderConstantsManager.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" +#include "VideoBackends/D3D12/Television.h" +#include "VideoBackends/D3D12/TextureCache.h" + +#include "VideoCommon/AVIDump.h" +#include "VideoCommon/BPFunctions.h" +#include "VideoCommon/Fifo.h" +#include "VideoCommon/ImageWrite.h" +#include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/PixelEngine.h" +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +static u32 s_last_multisamples = 1; +static bool s_last_stereo_mode = false; +static bool s_last_xfb_mode = false; + +static Television s_television; + +static ID3D12Resource* s_access_efb_constant_buffer = nullptr; + +enum CLEAR_BLEND_DESC +{ + CLEAR_BLEND_DESC_ALL_CHANNELS_ENABLED = 0, + CLEAR_BLEND_DESC_RGB_CHANNELS_ENABLED = 1, + CLEAR_BLEND_DESC_ALPHA_CHANNEL_ENABLED = 2, + CLEAR_BLEND_DESC_ALL_CHANNELS_DISABLED = 3 +}; + +static D3D12_BLEND_DESC s_clear_blend_descs[4] = {}; + +enum CLEAR_DEPTH_DESC +{ + CLEAR_DEPTH_DESC_DEPTH_DISABLED = 0, + CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_ENABLED = 1, + CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_DISABLED = 2, +}; + +static D3D12_DEPTH_STENCIL_DESC s_clear_depth_descs[3] = {}; + +// These are accessed in D3DUtil. +D3D12_BLEND_DESC g_reset_blend_desc = {}; +D3D12_DEPTH_STENCIL_DESC g_reset_depth_desc = {}; +D3D12_RASTERIZER_DESC g_reset_rast_desc = {}; + +static ID3D12Resource* s_screenshot_texture = nullptr; +static void* s_screenshot_texture_data = nullptr; + +// Nvidia stereo blitting struct defined in "nvstereo.h" from the Nvidia SDK +typedef struct _Nv_Stereo_Image_Header +{ + unsigned int dwSignature; + unsigned int dwWidth; + unsigned int dwHeight; + unsigned int dwBPP; + unsigned int dwFlags; +} NVSTEREOIMAGEHEADER, *LPNVSTEREOIMAGEHEADER; + +#define NVSTEREO_IMAGE_SIGNATURE 0x4433564e + +// GX pipeline state +struct +{ + SamplerState sampler[8]; + BlendState blend; + ZMode zmode; + RasterizerState raster; + +} gx_state; + +StateCache gx_state_cache; + +static void SetupDeviceObjects() +{ + s_television.Init(); + + g_framebuffer_manager = std::make_unique(); + + float colmat[20] = { 0.0f }; + colmat[0] = colmat[5] = colmat[10] = 1.0f; + + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(sizeof(colmat)), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&s_access_efb_constant_buffer) + ) + ); + + // Copy inital data to access_efb_cbuf12. + void* access_efb_constant_buffer_data = nullptr; + CheckHR(s_access_efb_constant_buffer->Map(0, nullptr, &access_efb_constant_buffer_data)); + memcpy(access_efb_constant_buffer_data, colmat, sizeof(colmat)); + + D3D12_DEPTH_STENCIL_DESC depth_desc; + depth_desc.DepthEnable = FALSE; + depth_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; + depth_desc.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + depth_desc.StencilEnable = FALSE; + depth_desc.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + depth_desc.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_DISABLED] = depth_desc; + + depth_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + depth_desc.DepthEnable = TRUE; + s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_ENABLED] = depth_desc; + + depth_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; + s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_DISABLED] = depth_desc; + + D3D12_BLEND_DESC blend_desc; + blend_desc.AlphaToCoverageEnable = FALSE; + blend_desc.IndependentBlendEnable = FALSE; + blend_desc.RenderTarget[0].LogicOpEnable = FALSE; + blend_desc.RenderTarget[0].LogicOp = D3D12_LOGIC_OP_NOOP; + blend_desc.RenderTarget[0].BlendEnable = FALSE; + blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_ONE; + blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_ZERO; + blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; + blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; + blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO; + blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; + g_reset_blend_desc = blend_desc; + s_clear_blend_descs[CLEAR_BLEND_DESC_ALL_CHANNELS_ENABLED] = g_reset_blend_desc; + + blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED|D3D12_COLOR_WRITE_ENABLE_GREEN|D3D12_COLOR_WRITE_ENABLE_BLUE; + s_clear_blend_descs[CLEAR_BLEND_DESC_RGB_CHANNELS_ENABLED] = blend_desc; + + blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALPHA; + s_clear_blend_descs[CLEAR_BLEND_DESC_ALPHA_CHANNEL_ENABLED] = blend_desc; + + blend_desc.RenderTarget[0].RenderTargetWriteMask = 0; + s_clear_blend_descs[CLEAR_BLEND_DESC_ALL_CHANNELS_DISABLED] = blend_desc; + + depth_desc.DepthEnable = FALSE; + depth_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; + depth_desc.DepthFunc = D3D12_COMPARISON_FUNC_LESS; + depth_desc.StencilEnable = FALSE; + depth_desc.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + depth_desc.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + + g_reset_depth_desc = depth_desc; + + D3D12_RASTERIZER_DESC rast_desc = CD3DX12_RASTERIZER_DESC(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false, 0, 0.f, 0.f, false, false, false, 0, D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF); + g_reset_rast_desc = rast_desc; + + s_screenshot_texture = nullptr; + s_screenshot_texture_data = nullptr; +} + +// Kill off all device objects +static void TeardownDeviceObjects() +{ + g_framebuffer_manager.reset(); + + if (s_screenshot_texture) + { + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_screenshot_texture); + s_screenshot_texture = nullptr; + } + + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_access_efb_constant_buffer); + s_access_efb_constant_buffer = nullptr; + + s_television.Shutdown(); + + gx_state_cache.Clear(); +} + +void CreateScreenshotTexture() +{ + // We can't render anything outside of the backbuffer anyway, so use the backbuffer size as the screenshot buffer size. + // This texture is released to be recreated when the window is resized in Renderer::SwapImpl. + + const unsigned int screenshot_buffer_size = + D3D::AlignValue(D3D::GetBackBufferWidth() * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * + D3D::GetBackBufferHeight(); + + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(screenshot_buffer_size), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&s_screenshot_texture) + ) + ); + + CheckHR(s_screenshot_texture->Map(0, nullptr, &s_screenshot_texture_data)); +} + +static D3D12_BOX GetScreenshotSourceBox(const TargetRectangle& target_rc) +{ + // Since the screenshot buffer is copied back to the CPU, we can't access pixels that + // fall outside the backbuffer bounds. Therefore, when crop is enabled and the target rect is + // off-screen to the top/left, we clamp the origin at zero, as well as the bottom/right + // coordinates at the backbuffer dimensions. This will result in a rectangle that can be + // smaller than the backbuffer, but never larger. + + return CD3DX12_BOX( + std::max(target_rc.left, 0), + std::max(target_rc.top, 0), + 0, + std::min(D3D::GetBackBufferWidth(), static_cast(target_rc.right)), + std::min(D3D::GetBackBufferHeight(), static_cast(target_rc.bottom)), + 1); +} + +static void Create3DVisionTexture(int width, int height) +{ + // D3D12TODO: 3D Vision not implemented on D3D12 backend. +} + +Renderer::Renderer(void*& window_handle) +{ + if (g_ActiveConfig.iStereoMode == STEREO_3DVISION) + { + PanicAlert("3DVision not implemented on D3D12 backend."); + return; + } + + D3D::Create((HWND)window_handle); + + s_backbuffer_width = D3D::GetBackBufferWidth(); + s_backbuffer_height = D3D::GetBackBufferHeight(); + + FramebufferManagerBase::SetLastXfbWidth(MAX_XFB_WIDTH); + FramebufferManagerBase::SetLastXfbHeight(MAX_XFB_HEIGHT); + + UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); + + s_last_multisamples = g_ActiveConfig.iMultisamples; + s_last_efb_scale = g_ActiveConfig.iEFBScale; + s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0; + s_last_xfb_mode = g_ActiveConfig.bUseRealXFB; + CalculateTargetSize(s_backbuffer_width, s_backbuffer_height); + PixelShaderManager::SetEfbScaleChanged(); + + SetupDeviceObjects(); + + // Setup GX pipeline state + gx_state.blend.blend_enable = false; + gx_state.blend.write_mask = D3D11_COLOR_WRITE_ENABLE_ALL; + gx_state.blend.src_blend = D3D12_BLEND_ONE; + gx_state.blend.dst_blend = D3D12_BLEND_ZERO; + gx_state.blend.blend_op = D3D12_BLEND_OP_ADD; + gx_state.blend.use_dst_alpha = false; + + for (unsigned int k = 0; k < 8; k++) + { + gx_state.sampler[k].hex = 0; + } + + gx_state.zmode.testenable = false; + gx_state.zmode.updateenable = false; + gx_state.zmode.func = ZMode::NEVER; + + gx_state.raster.cull_mode = D3D12_CULL_MODE_NONE; + + // Clear EFB textures + float clear_color[4] = { 0.f, 0.f, 0.f, 1.f }; + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); + D3D::current_command_list->ClearRenderTargetView(FramebufferManager::GetEFBColorTexture()->GetRTV12(), clear_color, 0, nullptr); + D3D::current_command_list->ClearDepthStencilView(FramebufferManager::GetEFBDepthTexture()->GetDSV12(), D3D12_CLEAR_FLAG_DEPTH, 0.f, 0, 0, nullptr); + + D3D12_VIEWPORT vp = { 0.f, 0.f, static_cast(s_target_width), static_cast(s_target_height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp); + + // Already transitioned to appropriate states a few lines up for the clears. + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + D3D::BeginFrame(); +} + +Renderer::~Renderer() +{ + D3D::EndFrame(); + D3D::WaitForOutstandingRenderingToComplete(); + TeardownDeviceObjects(); + D3D::Close(); +} + +void Renderer::RenderText(const std::string& text, int left, int top, u32 color) +{ + D3D::font.DrawTextScaled(static_cast(left + 1), static_cast(top + 1), 20.f, 0.0f, color & 0xFF000000, text); + D3D::font.DrawTextScaled(static_cast(left), static_cast(top), 20.f, 0.0f, color, text); +} + +TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) +{ + TargetRectangle result; + result.left = EFBToScaledX(rc.left); + result.top = EFBToScaledY(rc.top); + result.right = EFBToScaledX(rc.right); + result.bottom = EFBToScaledY(rc.bottom); + return result; +} + +// With D3D, we have to resize the backbuffer if the window changed +// size. +__declspec(noinline) bool Renderer::CheckForResize() +{ + RECT rc_window; + GetClientRect(D3D::hWnd, &rc_window); + int client_width = rc_window.right - rc_window.left; + int client_height = rc_window.bottom - rc_window.top; + + // Sanity check + if ((client_width != Renderer::GetBackbufferWidth() || + client_height != Renderer::GetBackbufferHeight()) && + client_width >= 4 && client_height >= 4) + { + return true; + } + + return false; +} + +void Renderer::SetScissorRect(const EFBRectangle& rc) +{ + TargetRectangle trc = ConvertEFBRectangle(rc); + D3D::current_command_list->RSSetScissorRects(1, trc.AsRECT()); +} + +void Renderer::SetColorMask() +{ + // Only enable alpha channel if it's supported by the current EFB format + UINT8 color_mask = 0; + if (bpmem.alpha_test.TestResult() != AlphaTest::FAIL) + { + if (bpmem.blendmode.alphaupdate && (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24)) + color_mask = D3D12_COLOR_WRITE_ENABLE_ALPHA; + if (bpmem.blendmode.colorupdate) + color_mask |= D3D12_COLOR_WRITE_ENABLE_RED | D3D12_COLOR_WRITE_ENABLE_GREEN | D3D12_COLOR_WRITE_ENABLE_BLUE; + } + gx_state.blend.write_mask = color_mask; + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); +} + +// This function allows the CPU to directly access the EFB. +// There are EFB peeks (which will read the color or depth of a pixel) +// and EFB pokes (which will change the color or depth of a pixel). +// +// The behavior of EFB peeks can only be modified by: +// - GX_PokeAlphaRead +// The behavior of EFB pokes can be modified by: +// - GX_PokeAlphaMode (TODO) +// - GX_PokeAlphaUpdate (TODO) +// - GX_PokeBlendMode (TODO) +// - GX_PokeColorUpdate (TODO) +// - GX_PokeDither (TODO) +// - GX_PokeDstAlpha (TODO) +// - GX_PokeZMode (TODO) +u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) +{ + // EXISTINGD3D11TODO: This function currently is broken if anti-aliasing is enabled + + // Convert EFB dimensions to the ones of our render target + EFBRectangle efb_pixel_rc; + efb_pixel_rc.left = x; + efb_pixel_rc.top = y; + efb_pixel_rc.right = x + 1; + efb_pixel_rc.bottom = y + 1; + TargetRectangle target_pixel_rc = Renderer::ConvertEFBRectangle(efb_pixel_rc); + + // Take the mean of the resulting dimensions; TODO: Don't use the center pixel, compute the average color instead + D3D12_RECT rect_to_lock; + if (type == PEEK_COLOR || type == PEEK_Z) + { + rect_to_lock.left = (target_pixel_rc.left + target_pixel_rc.right) / 2; + rect_to_lock.top = (target_pixel_rc.top + target_pixel_rc.bottom) / 2; + rect_to_lock.right = rect_to_lock.left + 1; + rect_to_lock.bottom = rect_to_lock.top + 1; + } + else + { + rect_to_lock.left = target_pixel_rc.left; + rect_to_lock.right = target_pixel_rc.right; + rect_to_lock.top = target_pixel_rc.top; + rect_to_lock.bottom = target_pixel_rc.bottom; + } + + if (type == PEEK_Z) + { + D3D::command_list_mgr->CPUAccessNotify(); + + // depth buffers can only be completely CopySubresourceRegion'ed, so we're using DrawShadedTexQuad instead + // D3D12TODO: Is above statement true on D3D12? + D3D12_VIEWPORT vp12 = { 0.f, 0.f, 1.f, 1.f, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp12); + + D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_access_efb_constant_buffer->GetGPUVirtualAddress()); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); + + FramebufferManager::GetEFBDepthReadTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBDepthReadTexture()->GetRTV12(), FALSE, nullptr); + + D3D::SetPointCopySampler(); + + D3D::DrawShadedTexQuad( + FramebufferManager::GetEFBDepthTexture(), + &rect_to_lock, + Renderer::GetTargetWidth(), + Renderer::GetTargetHeight(), + StaticShaderCache::GetColorCopyPixelShader(true), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + D3D12_SHADER_BYTECODE(), + 1.0f, + 0, + DXGI_FORMAT_R32_FLOAT, + false, + FramebufferManager::GetEFBDepthReadTexture()->GetMultisampled() + ); + + // copy to system memory + D3D12_BOX src_box = CD3DX12_BOX(0, 0, 0, 1, 1, 1); + ID3D12Resource* readback_buffer = FramebufferManager::GetEFBDepthStagingBuffer(); + + D3D12_TEXTURE_COPY_LOCATION dst_location = {}; + dst_location.pResource = readback_buffer; + dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst_location.PlacedFootprint.Offset = 0; + dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; + dst_location.PlacedFootprint.Footprint.Width = 1; + dst_location.PlacedFootprint.Footprint.Height = 1; + dst_location.PlacedFootprint.Footprint.Depth = 1; + dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12_TEXTURE_COPY_LOCATION src_location = {}; + src_location.pResource = FramebufferManager::GetEFBDepthReadTexture()->GetTex12(); + src_location.SubresourceIndex = 0; + src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + + FramebufferManager::GetEFBDepthReadTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + + // Need to wait for the CPU to complete the copy (and all prior operations) before we can read it on the CPU. + D3D::command_list_mgr->ExecuteQueuedWork(true); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + // Restores proper viewport/scissor settings. + g_renderer->RestoreAPIState(); + + // read the data from system memory + void* readback_buffer_data = nullptr; + CheckHR(readback_buffer->Map(0, nullptr, &readback_buffer_data)); + + // depth buffer is inverted in the d3d backend + float val = 1.0f - reinterpret_cast(readback_buffer_data)[0]; + u32 ret = 0; + + if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + // if Z is in 16 bit format you must return a 16 bit integer + ret = MathUtil::Clamp(static_cast(val * 65536.0f), 0, 0xFFFF); + } + else + { + ret = MathUtil::Clamp(static_cast(val * 16777216.0f), 0, 0xFFFFFF); + } + + // EXISTINGD3D11TODO: in RE0 this value is often off by one in Video_DX9 (where this code is derived from), which causes lighting to disappear + return ret; + } + else if (type == PEEK_COLOR) + { + D3D::command_list_mgr->CPUAccessNotify(); + + ID3D12Resource* readback_buffer = FramebufferManager::GetEFBColorStagingBuffer(); + + D3D12_BOX src_box = CD3DX12_BOX(rect_to_lock.left, rect_to_lock.top, 0, rect_to_lock.right, rect_to_lock.bottom, 1); + + D3D12_TEXTURE_COPY_LOCATION dst_location = {}; + dst_location.pResource = readback_buffer; + dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst_location.PlacedFootprint.Offset = 0; + dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + dst_location.PlacedFootprint.Footprint.Width = 1; + dst_location.PlacedFootprint.Footprint.Height = 1; + dst_location.PlacedFootprint.Footprint.Depth = 1; + dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12_TEXTURE_COPY_LOCATION src_location = {}; + src_location.pResource = FramebufferManager::GetResolvedEFBColorTexture()->GetTex12(); + src_location.SubresourceIndex = 0; + src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + + FramebufferManager::GetResolvedEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); + + // Need to wait for the CPU to complete the copy (and all prior operations) before we can read it on the CPU. + D3D::command_list_mgr->ExecuteQueuedWork(true); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + // Restores proper viewport/scissor settings. + g_renderer->RestoreAPIState(); + + // read the data from system memory + void* readback_buffer_data = nullptr; + CheckHR(readback_buffer->Map(0, nullptr, &readback_buffer_data)); + + u32 ret = reinterpret_cast(readback_buffer_data)[0]; + + // check what to do with the alpha channel (GX_PokeAlphaRead) + PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); + + if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) + { + ret = RGBA8ToRGBA6ToRGBA8(ret); + } + else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + ret = RGBA8ToRGB565ToRGBA8(ret); + } + if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) + { + ret |= 0xFF000000; + } + + if (alpha_read_mode.ReadMode == 2) + { + return ret; // GX_READ_NONE + } + else if (alpha_read_mode.ReadMode == 1) + { + return (ret | 0xFF000000); // GX_READ_FF + } + else /*if(alpha_read_mode.ReadMode == 0)*/ + { + return (ret & 0x00FFFFFF); // GX_READ_00 + } + } + + return 0; +} + +void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) +{ + D3D12_VIEWPORT vp = { 0.0f, 0.0f, static_cast(GetTargetWidth()), static_cast(GetTargetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + + if (type == POKE_COLOR) + { + // In the D3D12 backend, the rt/db/viewport is passed into DrawEFBPokeQuads, and set there. + D3D::DrawEFBPokeQuads( + type, + points, + num_points, + &g_reset_blend_desc, + &g_reset_depth_desc, + &vp, + &FramebufferManager::GetEFBColorTexture()->GetRTV12(), + nullptr, + FramebufferManager::GetEFBColorTexture()->GetMultisampled() + ); + } + else // if (type == POKE_Z) + { + D3D::DrawEFBPokeQuads( + type, + points, + num_points, + &s_clear_blend_descs[CLEAR_BLEND_DESC_ALL_CHANNELS_DISABLED], + &s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_ENABLED], + &vp, + &FramebufferManager::GetEFBColorTexture()->GetRTV12(), + &FramebufferManager::GetEFBDepthTexture()->GetDSV12(), + FramebufferManager::GetEFBColorTexture()->GetMultisampled() + ); + } + + RestoreAPIState(); +} + +void Renderer::SetViewport() +{ + // reversed gxsetviewport(xorig, yorig, width, height, nearz, farz) + // [0] = width/2 + // [1] = height/2 + // [2] = 16777215 * (farz - nearz) + // [3] = xorig + width/2 + 342 + // [4] = yorig + height/2 + 342 + // [5] = 16777215 * farz + + // D3D crashes for zero viewports + if (xfmem.viewport.wd == 0 || xfmem.viewport.ht == 0) + return; + + int scissor_x_offset = bpmem.scissorOffset.x * 2; + int scissor_y_offset = bpmem.scissorOffset.y * 2; + + float x = Renderer::EFBToScaledXf(xfmem.viewport.xOrig - xfmem.viewport.wd - scissor_x_offset); + float y = Renderer::EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissor_y_offset); + float width = Renderer::EFBToScaledXf(2.0f * xfmem.viewport.wd); + float height = Renderer::EFBToScaledYf(-2.0f * xfmem.viewport.ht); + if (width < 0.0f) + { + x += width; + width = -width; + } + if (height < 0.0f) + { + y += height; + height = -height; + } + + // In D3D, the viewport rectangle must fit within the render target. + x = (x >= 0.f) ? x : 0.f; + y = (y >= 0.f) ? y : 0.f; + width = (x + width <= GetTargetWidth()) ? width : (GetTargetWidth() - x); + height = (y + height <= GetTargetHeight()) ? height : (GetTargetHeight() - y); + + D3D12_VIEWPORT vp = { x, y, width, height, + 1.0f - MathUtil::Clamp(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f, + 1.0f - MathUtil::Clamp(xfmem.viewport.farZ - MathUtil::Clamp(xfmem.viewport.zRange, 0.0f, 16777216.0f), 0.0f, 16777215.0f) / 16777216.0f }; + + D3D::current_command_list->RSSetViewports(1, &vp); +} + +void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, u32 color, u32 z) +{ + D3D12_BLEND_DESC* blend_desc = nullptr; + + if (color_enable && alpha_enable) + blend_desc = &s_clear_blend_descs[CLEAR_BLEND_DESC_ALL_CHANNELS_ENABLED]; + else if (color_enable) + blend_desc = &s_clear_blend_descs[CLEAR_BLEND_DESC_RGB_CHANNELS_ENABLED]; + else if (alpha_enable) + blend_desc = &s_clear_blend_descs[CLEAR_BLEND_DESC_ALPHA_CHANNEL_ENABLED]; + else + blend_desc = &s_clear_blend_descs[CLEAR_BLEND_DESC_ALL_CHANNELS_DISABLED]; + + D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc = nullptr; + + // EXISTINGD3D11TODO: Should we enable Z testing here? + /*if (!bpmem.zmode.testenable) depth_stencil_desc = &s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_DISABLED]; + else */if (z_enable) + depth_stencil_desc = &s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_ENABLED]; + else /*if (!z_enable)*/ + depth_stencil_desc = &s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_DISABLED]; + + // Update the view port for clearing the picture + TargetRectangle target_rc = Renderer::ConvertEFBRectangle(rc); + + D3D12_VIEWPORT vp = { + static_cast(target_rc.left), + static_cast(target_rc.top), + static_cast(target_rc.GetWidth()), + static_cast(target_rc.GetHeight()), + D3D12_MIN_DEPTH, + D3D12_MAX_DEPTH + }; + + D3D::current_command_list->RSSetViewports(1, &vp); + + // Color is passed in bgra mode so we need to convert it to rgba + u32 rgba_color = (color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000); + D3D::DrawClearQuad(rgba_color, 1.0f - (z & 0xFFFFFF) / 16777216.0f, blend_desc, depth_stencil_desc, FramebufferManager::GetEFBColorTexture()->GetMultisampled()); + + // Restores proper viewport/scissor settings. + g_renderer->RestoreAPIState(); +} + +void Renderer::ReinterpretPixelData(unsigned int convtype) +{ + // EXISTINGD3D11TODO: MSAA support.. + D3D12_RECT source = CD3DX12_RECT(0, 0, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); + + D3D12_SHADER_BYTECODE pixel_shader = {}; + + if (convtype == 0) + { + pixel_shader = StaticShaderCache::GetReinterpRGB8ToRGBA6PixelShader(true); + } + else if (convtype == 2) + { + pixel_shader = StaticShaderCache::GetReinterpRGBA6ToRGB8PixelShader(true); + } + else + { + ERROR_LOG(VIDEO, "Trying to reinterpret pixel data with unsupported conversion type %d", convtype); + return; + } + + D3D12_VIEWPORT vp = { + 0.f, + 0.f, + static_cast(g_renderer->GetTargetWidth()), + static_cast(g_renderer->GetTargetHeight()), + D3D12_MIN_DEPTH, + D3D12_MAX_DEPTH + }; + + D3D::current_command_list->RSSetViewports(1, &vp); + + FramebufferManager::GetEFBColorTempTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTempTexture()->GetRTV12(), FALSE, nullptr); + + D3D::SetPointCopySampler(); + D3D::DrawShadedTexQuad( + FramebufferManager::GetEFBColorTexture(), + &source, + g_renderer->GetTargetWidth(), + g_renderer->GetTargetHeight(), + pixel_shader, + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + StaticShaderCache::GetCopyGeometryShader(), + 1.0f, + 0, + DXGI_FORMAT_R8G8B8A8_UNORM, + false, + FramebufferManager::GetEFBColorTempTexture()->GetMultisampled() + ); + + // Restores proper viewport/scissor settings. + g_renderer->RestoreAPIState(); + + FramebufferManager::SwapReinterpretTexture(); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); +} + +void Renderer::SetBlendMode(bool force_update) +{ + // Our render target always uses an alpha channel, so we need to override the blend functions to assume a destination alpha of 1 if the render target isn't supposed to have an alpha channel + // Example: D3DBLEND_DESTALPHA needs to be D3DBLEND_ONE since the result without an alpha channel is assumed to always be 1. + bool target_has_alpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; + const D3D12_BLEND d3d_src_factors[8] = + { + D3D12_BLEND_ZERO, + D3D12_BLEND_ONE, + D3D12_BLEND_DEST_COLOR, + D3D12_BLEND_INV_DEST_COLOR, + D3D12_BLEND_SRC_ALPHA, + D3D12_BLEND_INV_SRC_ALPHA, // NOTE: Use SRC1_ALPHA if dst alpha is enabled! + (target_has_alpha) ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_ONE, + (target_has_alpha) ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_ZERO + }; + const D3D12_BLEND d3d_dst_factors[8] = + { + D3D12_BLEND_ZERO, + D3D12_BLEND_ONE, + D3D12_BLEND_SRC_COLOR, + D3D12_BLEND_INV_SRC_COLOR, + D3D12_BLEND_SRC_ALPHA, + D3D12_BLEND_INV_SRC_ALPHA, // NOTE: Use SRC1_ALPHA if dst alpha is enabled! + (target_has_alpha) ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_ONE, + (target_has_alpha) ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_ZERO + }; + + if (bpmem.blendmode.logicopenable && !bpmem.blendmode.blendenable && !force_update) + return; + + if (bpmem.blendmode.subtract) + { + gx_state.blend.blend_enable = true; + gx_state.blend.blend_op = D3D12_BLEND_OP_REV_SUBTRACT; + gx_state.blend.src_blend = D3D12_BLEND_ONE; + gx_state.blend.dst_blend = D3D12_BLEND_ONE; + } + else + { + gx_state.blend.blend_enable = static_cast(bpmem.blendmode.blendenable); + if (bpmem.blendmode.blendenable) + { + gx_state.blend.blend_op = D3D12_BLEND_OP_ADD; + gx_state.blend.src_blend = d3d_src_factors[bpmem.blendmode.srcfactor]; + gx_state.blend.dst_blend = d3d_dst_factors[bpmem.blendmode.dstfactor]; + } + } + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); +} + +bool Renderer::SaveScreenshot(const std::string& filename, const TargetRectangle& rc) +{ + if (!s_screenshot_texture) + CreateScreenshotTexture(); + + // copy back buffer to system memory + bool saved_png = false; + + D3D12_BOX source_box = GetScreenshotSourceBox(rc); + + D3D12_TEXTURE_COPY_LOCATION dst_location = {}; + dst_location.pResource = s_screenshot_texture; + dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst_location.PlacedFootprint.Offset = 0; + dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + dst_location.PlacedFootprint.Footprint.Width = D3D::GetBackBufferWidth(); + dst_location.PlacedFootprint.Footprint.Height = D3D::GetBackBufferHeight(); + dst_location.PlacedFootprint.Footprint.Depth = 1; + dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12_TEXTURE_COPY_LOCATION src_location = {}; + src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src_location.SubresourceIndex = 0; + src_location.pResource = D3D::GetBackBuffer()->GetTex12(); + + D3D::GetBackBuffer()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &source_box); + + D3D::command_list_mgr->ExecuteQueuedWork(true); + + saved_png = TextureToPng(static_cast(s_screenshot_texture_data), dst_location.PlacedFootprint.Footprint.RowPitch, filename, source_box.right - source_box.left, source_box.bottom - source_box.top, false); + + if (saved_png) + { + OSD::AddMessage(StringFromFormat("Saved %i x %i %s", rc.GetWidth(), + rc.GetHeight(), filename.c_str())); + } + else + { + OSD::AddMessage(StringFromFormat("Error saving %s", filename.c_str())); + } + + return saved_png; +} + +void formatBufferDump(const u8* in, u8* out, int w, int h, int p) +{ + for (int y = 0; y < h; ++y) + { + auto line = (in + (h - y - 1) * p); + for (int x = 0; x < w; ++x) + { + out[0] = line[2]; + out[1] = line[1]; + out[2] = line[0]; + out += 3; + line += 4; + } + } +} + +// This function has the final picture. We adjust the aspect ratio here. +void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, const EFBRectangle& rc, float gamma) +{ + if (Fifo::WillSkipCurrentFrame() || (!XFBWrited && !g_ActiveConfig.RealXFBEnabled()) || !fb_width || !fb_height) + { + if (SConfig::GetInstance().m_DumpFrames && !frame_data.empty()) + AVIDump::AddFrame(&frame_data[0], fb_width, fb_height); + + Core::Callback_VideoCopiedToXFB(false); + return; + } + + u32 xfb_count = 0; + const XFBSourceBase* const* xfb_source_list = FramebufferManager::GetXFBSource(xfb_addr, fb_stride, fb_height, &xfb_count); + if ((!xfb_source_list || xfb_count == 0) && g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) + { + if (SConfig::GetInstance().m_DumpFrames && !frame_data.empty()) + AVIDump::AddFrame(&frame_data[0], fb_width, fb_height); + + Core::Callback_VideoCopiedToXFB(false); + return; + } + + // Prepare to copy the XFBs to our backbuffer + UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); + TargetRectangle target_rc = GetTargetRectangle(); + + D3D::GetBackBuffer()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV12(), FALSE, nullptr); + + float clear_color[4] = { 0.f, 0.f, 0.f, 1.f }; + D3D::current_command_list->ClearRenderTargetView(D3D::GetBackBuffer()->GetRTV12(), clear_color, 0, nullptr); + + // D3D12: Because scissor-testing is always enabled, change scissor rect to backbuffer in case EFB is smaller + // than swap chain back buffer. + D3D12_RECT back_buffer_rect = { 0L, 0L, GetBackbufferWidth(), GetBackbufferHeight() }; + D3D::current_command_list->RSSetScissorRects(1, &back_buffer_rect); + + // activate linear filtering for the buffer copies + D3D::SetLinearCopySampler(); + + if (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB) + { + // EXISTINGD3D11TODO: Television should be used to render Virtual XFB mode as well. + D3D12_VIEWPORT vp12 = { + static_cast(target_rc.left), + static_cast(target_rc.top), + static_cast(target_rc.GetWidth()), + static_cast(target_rc.GetHeight()), + D3D12_MIN_DEPTH, + D3D12_MAX_DEPTH + }; + + D3D::current_command_list->RSSetViewports(1, &vp12); + + s_television.Submit(xfb_addr, fb_stride, fb_width, fb_height); + s_television.Render(); + } + else if (g_ActiveConfig.bUseXFB) + { + const XFBSource* xfb_source; + + // draw each xfb source + for (u32 i = 0; i < xfb_count; ++i) + { + xfb_source = static_cast(xfb_source_list[i]); + + TargetRectangle drawRc; + + // use virtual xfb with offset + int xfb_height = xfb_source->srcHeight; + int xfb_width = xfb_source->srcWidth; + int hOffset = (static_cast(xfb_source->srcAddr) - static_cast(xfb_addr)) / (static_cast(fb_stride) * 2); + + drawRc.top = target_rc.top + hOffset * target_rc.GetHeight() / static_cast(fb_height); + drawRc.bottom = target_rc.top + (hOffset + xfb_height) * target_rc.GetHeight() / static_cast(fb_height); + drawRc.left = target_rc.left + (target_rc.GetWidth() - xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; + drawRc.right = target_rc.left + (target_rc.GetWidth() + xfb_width * target_rc.GetWidth() / static_cast(fb_stride)) / 2; + + // The following code disables auto stretch. Kept for reference. + // scale draw area for a 1 to 1 pixel mapping with the draw target + //float vScale = static_cast(fbHeight) / static_cast(s_backbuffer_height); + //float hScale = static_cast(fbWidth) / static_cast(s_backbuffer_width); + //drawRc.top *= vScale; + //drawRc.bottom *= vScale; + //drawRc.left *= hScale; + //drawRc.right *= hScale; + + TargetRectangle source_rc; + source_rc.left = xfb_source->sourceRc.left; + source_rc.top = xfb_source->sourceRc.top; + source_rc.right = xfb_source->sourceRc.right; + source_rc.bottom = xfb_source->sourceRc.bottom; + + source_rc.right -= Renderer::EFBToScaledX(fb_stride - fb_width); + + BlitScreen(source_rc, drawRc, xfb_source->m_tex, xfb_source->texWidth, xfb_source->texHeight, gamma); + } + } + else + { + TargetRectangle source_rc = Renderer::ConvertEFBRectangle(rc); + + // EXISTINGD3D11TODO: Improve sampling algorithm for the pixel shader so that we can use the multisampled EFB texture as source + D3DTexture2D* read_texture = FramebufferManager::GetResolvedEFBColorTexture(); + + BlitScreen(source_rc, target_rc, read_texture, GetTargetWidth(), GetTargetHeight(), gamma); + } + + // done with drawing the game stuff, good moment to save a screenshot + if (s_bScreenshot) + { + std::lock_guard guard(s_criticalScreenshot); + + SaveScreenshot(s_sScreenshotName, GetTargetRectangle()); + s_sScreenshotName.clear(); + s_bScreenshot = false; + s_screenshotCompleted.Set(); + } + + // Dump frames + static int w = 0, h = 0; + if (SConfig::GetInstance().m_DumpFrames) + { + static unsigned int s_record_width; + static unsigned int s_record_height; + + if (!s_screenshot_texture) + CreateScreenshotTexture(); + + D3D12_BOX source_box = GetScreenshotSourceBox(target_rc); + + unsigned int source_width = source_box.right - source_box.left; + unsigned int source_height = source_box.bottom - source_box.top; + + D3D12_TEXTURE_COPY_LOCATION dst_location = {}; + dst_location.pResource = s_screenshot_texture; + dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst_location.PlacedFootprint.Offset = 0; + dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + dst_location.PlacedFootprint.Footprint.Width = GetTargetRectangle().GetWidth(); + dst_location.PlacedFootprint.Footprint.Height = GetTargetRectangle().GetHeight(); + dst_location.PlacedFootprint.Footprint.Depth = 1; + dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12_TEXTURE_COPY_LOCATION src_location = {}; + src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src_location.SubresourceIndex = 0; + src_location.pResource = D3D::GetBackBuffer()->GetTex12(); + + D3D::GetBackBuffer()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &source_box); + + D3D::command_list_mgr->ExecuteQueuedWork(true); + + if (!bLastFrameDumped) + { + s_record_width = source_width; + s_record_height = source_height; + bAVIDumping = AVIDump::Start(s_record_width, s_record_height, AVIDump::DumpFormat::FORMAT_BGR); + if (!bAVIDumping) + { + PanicAlert("Error dumping frames to AVI."); + } + else + { + std::string msg = StringFromFormat("Dumping Frames to \"%sframedump0.avi\" (%dx%d RGB24)", + File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), s_record_width, s_record_height); + + OSD::AddMessage(msg, 2000); + } + } + if (bAVIDumping) + { + if (frame_data.empty() || w != s_record_width || h != s_record_height) + { + frame_data.resize(3 * s_record_width * s_record_height); + w = s_record_width; + h = s_record_height; + } + formatBufferDump(static_cast(s_screenshot_texture_data), &frame_data[0], source_width, source_height, dst_location.PlacedFootprint.Footprint.RowPitch); + FlipImageData(&frame_data[0], w, h); + AVIDump::AddFrame(&frame_data[0], source_width, source_height); + } + bLastFrameDumped = true; + } + else + { + if (bLastFrameDumped && bAVIDumping) + { + std::vector().swap(frame_data); + w = h = 0; + + AVIDump::Stop(); + bAVIDumping = false; + OSD::AddMessage("Stop dumping frames to AVI", 2000); + } + bLastFrameDumped = false; + } + + // Reset viewport for drawing text + D3D12_VIEWPORT vp = { + 0.0f, + 0.0f, + static_cast(GetBackbufferWidth()), + static_cast(GetBackbufferHeight()), + D3D12_MIN_DEPTH, + D3D12_MAX_DEPTH + }; + + D3D::current_command_list->RSSetViewports(1, &vp); + + Renderer::DrawDebugText(); + + OSD::DrawMessages(); + D3D::EndFrame(); + + TextureCacheBase::Cleanup(frameCount); + + // Enable configuration changes + UpdateActiveConfig(); + TextureCacheBase::OnConfigChanged(g_ActiveConfig); + + SetWindowSize(fb_stride, fb_height); + + const bool window_resized = CheckForResize(); + const bool fullscreen = g_ActiveConfig.bFullscreen && !g_ActiveConfig.bBorderlessFullscreen && + !SConfig::GetInstance().bRenderToMain; + + bool xfb_changed = s_last_xfb_mode != g_ActiveConfig.bUseRealXFB; + + if (FramebufferManagerBase::LastXfbWidth() != fb_stride || FramebufferManagerBase::LastXfbHeight() != fb_height) + { + xfb_changed = true; + unsigned int xfb_w = (fb_stride < 1 || fb_stride > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_stride; + unsigned int xfb_h = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height; + FramebufferManagerBase::SetLastXfbWidth(xfb_w); + FramebufferManagerBase::SetLastXfbHeight(xfb_h); + } + + // Flip/present backbuffer to frontbuffer here + D3D::Present(); + + // Resize the back buffers NOW to avoid flickering + if (CalculateTargetSize(s_backbuffer_width, s_backbuffer_height) || + xfb_changed || + window_resized || + s_last_efb_scale != g_ActiveConfig.iEFBScale || + s_last_multisamples != g_ActiveConfig.iMultisamples || + s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0)) + { + s_last_xfb_mode = g_ActiveConfig.bUseRealXFB; + s_last_multisamples = g_ActiveConfig.iMultisamples; + + StaticShaderCache::InvalidateMSAAShaders(); + + if (window_resized) + { + // TODO: Aren't we still holding a reference to the back buffer right now? + D3D::Reset(); + + if (s_screenshot_texture) + { + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_screenshot_texture); + s_screenshot_texture = nullptr; + } + + s_backbuffer_width = D3D::GetBackBufferWidth(); + s_backbuffer_height = D3D::GetBackBufferHeight(); + } + + UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); + + s_last_efb_scale = g_ActiveConfig.iEFBScale; + s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0; + + PixelShaderManager::SetEfbScaleChanged(); + + D3D::GetBackBuffer()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV12(), FALSE, nullptr); + + g_framebuffer_manager.reset(); + g_framebuffer_manager = std::make_unique(); + const float clear_color[4] = { 0.f, 0.f, 0.f, 1.f }; + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->ClearRenderTargetView(FramebufferManager::GetEFBColorTexture()->GetRTV12(), clear_color, 0, nullptr); + + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); + D3D::current_command_list->ClearDepthStencilView(FramebufferManager::GetEFBDepthTexture()->GetDSV12(), D3D12_CLEAR_FLAG_DEPTH, 0.f, 0, 0, nullptr); + } + + // begin next frame + RestoreAPIState(); + D3D::BeginFrame(); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + SetViewport(); +} + +void Renderer::ResetAPIState() +{ + CHECK(0, "This should never be called.. just required for inheritance."); +} + +void Renderer::RestoreAPIState() +{ + // Restores viewport/scissor rects, which might have been + // overwritten elsewhere (particularly the viewport). + SetViewport(); + BPFunctions::SetScissor(); +} + +static bool s_previous_use_dst_alpha = false; +static D3DVertexFormat* s_previous_vertex_format = nullptr; + +void Renderer::ApplyState(bool use_dst_alpha) +{ + if (use_dst_alpha != s_previous_use_dst_alpha) + { + s_previous_use_dst_alpha = use_dst_alpha; + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); + } + + gx_state.blend.use_dst_alpha = use_dst_alpha; + + if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS)) + { + D3D12_GPU_DESCRIPTOR_HANDLE sample_group_gpu_handle; + sample_group_gpu_handle = D3D::sampler_descriptor_heap_mgr->GetHandleForSamplerGroup(gx_state.sampler, 8); + + D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SAMPLER, sample_group_gpu_handle); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, false); + } + + // Uploads and binds required constant buffer data for all stages. + ShaderConstantsManager::LoadAndSetGeometryShaderConstants(); + ShaderConstantsManager::LoadAndSetPixelShaderConstants(); + ShaderConstantsManager::LoadAndSetVertexShaderConstants(); + + if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_PSO) || s_previous_vertex_format != reinterpret_cast(VertexLoaderManager::GetCurrentVertexFormat())) + { + s_previous_vertex_format = reinterpret_cast(VertexLoaderManager::GetCurrentVertexFormat()); + + D3D12_PRIMITIVE_TOPOLOGY_TYPE topologyType = ShaderCache::GetCurrentPrimitiveTopology(); + RasterizerState modifiableRastState = gx_state.raster; + + if (topologyType != D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE) + { + modifiableRastState.cull_mode = D3D12_CULL_MODE_NONE; + } + + SmallPsoDesc pso_desc = { + ShaderCache::GetActiveGeometryShaderBytecode(), // D3D12_SHADER_BYTECODE GS; + ShaderCache::GetActivePixelShaderBytecode(), // D3D12_SHADER_BYTECODE PS; + ShaderCache::GetActiveVertexShaderBytecode(), // D3D12_SHADER_BYTECODE VS; + s_previous_vertex_format, // D3DVertexFormat* InputLayout; + gx_state.blend, // BlendState BlendState; + modifiableRastState, // RasterizerState RasterizerState; + gx_state.zmode, // ZMode DepthStencilState; + }; + + if (use_dst_alpha) + { + // restore actual state + SetBlendMode(false); + SetLogicOpMode(); + } + + ID3D12PipelineState* pso = nullptr; + CheckHR( + gx_state_cache.GetPipelineStateObjectFromCache( + &pso_desc, + &pso, + topologyType, + ShaderCache::GetActiveGeometryShaderUid(), + ShaderCache::GetActivePixelShaderUid(), + ShaderCache::GetActiveVertexShaderUid() + ) + ); + + D3D::current_command_list->SetPipelineState(pso); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, false); + } +} + +void Renderer::RestoreState() +{ +} + +void Renderer::ApplyCullDisable() +{ + // This functionality is handled directly in ApplyState. +} + +void Renderer::RestoreCull() +{ + // This functionality is handled directly in ApplyState. +} + +void Renderer::SetGenerationMode() +{ + const D3D12_CULL_MODE d3d_cull_modes[4] = + { + D3D12_CULL_MODE_NONE, + D3D12_CULL_MODE_BACK, + D3D12_CULL_MODE_FRONT, + D3D12_CULL_MODE_BACK + }; + + // rastdc.FrontCounterClockwise must be false for this to work + // EXISTINGD3D11TODO: GX_CULL_ALL not supported, yet! + gx_state.raster.cull_mode = d3d_cull_modes[bpmem.genMode.cullmode]; + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); +} + +void Renderer::SetDepthMode() +{ + gx_state.zmode.hex = bpmem.zmode.hex; + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); +} + +void Renderer::SetLogicOpMode() +{ + // D3D11 doesn't support logic blending, so this is a huge hack + // EXISTINGD3D11TODO: Make use of D3D11.1's logic blending support + // D3D12TODO: Obviously these are always available in D3D12.. + + // 0 0x00 + // 1 Source & destination + // 2 Source & ~destination + // 3 Source + // 4 ~Source & destination + // 5 Destination + // 6 Source ^ destination = Source & ~destination | ~Source & destination + // 7 Source | destination + // 8 ~(Source | destination) + // 9 ~(Source ^ destination) = ~Source & ~destination | Source & destination + // 10 ~Destination + // 11 Source | ~destination + // 12 ~Source + // 13 ~Source | destination + // 14 ~(Source & destination) + // 15 0xff + const D3D12_BLEND_OP d3d_logic_ops[16] = + { + D3D12_BLEND_OP_ADD,//0 + D3D12_BLEND_OP_ADD,//1 + D3D12_BLEND_OP_SUBTRACT,//2 + D3D12_BLEND_OP_ADD,//3 + D3D12_BLEND_OP_REV_SUBTRACT,//4 + D3D12_BLEND_OP_ADD,//5 + D3D12_BLEND_OP_MAX,//6 + D3D12_BLEND_OP_ADD,//7 + D3D12_BLEND_OP_MAX,//8 + D3D12_BLEND_OP_MAX,//9 + D3D12_BLEND_OP_ADD,//10 + D3D12_BLEND_OP_ADD,//11 + D3D12_BLEND_OP_ADD,//12 + D3D12_BLEND_OP_ADD,//13 + D3D12_BLEND_OP_ADD,//14 + D3D12_BLEND_OP_ADD//15 + }; + const D3D12_BLEND d3d_logic_op_src_factors[16] = + { + D3D12_BLEND_ZERO,//0 + D3D12_BLEND_DEST_COLOR,//1 + D3D12_BLEND_ONE,//2 + D3D12_BLEND_ONE,//3 + D3D12_BLEND_DEST_COLOR,//4 + D3D12_BLEND_ZERO,//5 + D3D12_BLEND_INV_DEST_COLOR,//6 + D3D12_BLEND_INV_DEST_COLOR,//7 + D3D12_BLEND_INV_SRC_COLOR,//8 + D3D12_BLEND_INV_SRC_COLOR,//9 + D3D12_BLEND_INV_DEST_COLOR,//10 + D3D12_BLEND_ONE,//11 + D3D12_BLEND_INV_SRC_COLOR,//12 + D3D12_BLEND_INV_SRC_COLOR,//13 + D3D12_BLEND_INV_DEST_COLOR,//14 + D3D12_BLEND_ONE//15 + }; + const D3D12_BLEND d3d_logic_op_dest_factors[16] = + { + D3D12_BLEND_ZERO,//0 + D3D12_BLEND_ZERO,//1 + D3D12_BLEND_INV_SRC_COLOR,//2 + D3D12_BLEND_ZERO,//3 + D3D12_BLEND_ONE,//4 + D3D12_BLEND_ONE,//5 + D3D12_BLEND_INV_SRC_COLOR,//6 + D3D12_BLEND_ONE,//7 + D3D12_BLEND_INV_DEST_COLOR,//8 + D3D12_BLEND_SRC_COLOR,//9 + D3D12_BLEND_INV_DEST_COLOR,//10 + D3D12_BLEND_INV_DEST_COLOR,//11 + D3D12_BLEND_INV_SRC_COLOR,//12 + D3D12_BLEND_ONE,//13 + D3D12_BLEND_INV_SRC_COLOR,//14 + D3D12_BLEND_ONE//15 + }; + + if (bpmem.blendmode.logicopenable && !bpmem.blendmode.blendenable) + { + gx_state.blend.blend_enable = true; + gx_state.blend.blend_op = d3d_logic_ops[bpmem.blendmode.logicmode]; + gx_state.blend.src_blend = d3d_logic_op_src_factors[bpmem.blendmode.logicmode]; + gx_state.blend.dst_blend = d3d_logic_op_dest_factors[bpmem.blendmode.logicmode]; + } + else + { + SetBlendMode(true); + } + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); +} + +void Renderer::SetDitherMode() +{ + // EXISTINGD3D11TODO: Set dither mode to bpmem.blendmode.dither +} + +void Renderer::SetSamplerState(int stage, int tex_index, bool custom_tex) +{ + SamplerState s_previous_sampler_state[8]; + + const FourTexUnits& tex = bpmem.tex[tex_index]; + const TexMode0& tm0 = tex.texMode0[stage]; + const TexMode1& tm1 = tex.texMode1[stage]; + + if (tex_index) + stage += 4; + + if (g_ActiveConfig.bForceFiltering) + { + gx_state.sampler[stage].min_filter = 6; // 4 (linear mip) | 2 (linear min) + gx_state.sampler[stage].mag_filter = 1; // linear mag + } + else + { + gx_state.sampler[stage].min_filter = static_cast(tm0.min_filter); + gx_state.sampler[stage].mag_filter = static_cast(tm0.mag_filter); + } + + gx_state.sampler[stage].wrap_s = static_cast(tm0.wrap_s); + gx_state.sampler[stage].wrap_t = static_cast(tm0.wrap_t); + gx_state.sampler[stage].max_lod = static_cast(tm1.max_lod); + gx_state.sampler[stage].min_lod = static_cast(tm1.min_lod); + gx_state.sampler[stage].lod_bias = static_cast(tm0.lod_bias); + + // custom textures may have higher resolution, so disable the max_lod + if (custom_tex) + { + gx_state.sampler[stage].max_lod = 255; + } + + if (gx_state.sampler[stage].hex != s_previous_sampler_state[stage].hex) + { + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true); + s_previous_sampler_state[stage].hex = gx_state.sampler[stage].hex; + } +} + +void Renderer::SetInterlacingMode() +{ + // EXISTINGD3D11TODO +} + +int Renderer::GetMaxTextureSize() +{ + return DX12::D3D::GetMaxTextureSize(); +} + +u16 Renderer::BBoxRead(int index) +{ + // Here we get the min/max value of the truncated position of the upscaled framebuffer. + // So we have to correct them to the unscaled EFB sizes. + int value = BBox::Get(index); + + if (index < 2) + { + // left/right + value = value * EFB_WIDTH / s_target_width; + } + else + { + // up/down + value = value * EFB_HEIGHT / s_target_height; + } + if (index & 1) + value++; // fix max values to describe the outer border + + return value; +} + +void Renderer::BBoxWrite(int index, u16 value) +{ + int local_value = value; // u16 isn't enough to multiply by the efb width + if (index & 1) + local_value--; + if (index < 2) + { + local_value = local_value * s_target_width / EFB_WIDTH; + } + else + { + local_value = local_value * s_target_height / EFB_HEIGHT; + } + + BBox::Set(index, local_value); +} + +void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture, u32 src_width, u32 src_height, float gamma) +{ + if (g_ActiveConfig.iStereoMode == STEREO_SBS || g_ActiveConfig.iStereoMode == STEREO_TAB) + { + TargetRectangle left_rc, right_rc; + ConvertStereoRectangle(dst, left_rc, right_rc); + + D3D12_VIEWPORT left_vp = { + static_cast(left_rc.left), + static_cast(left_rc.top), + static_cast(left_rc.GetWidth()), + static_cast(left_rc.GetHeight()), + D3D12_MIN_DEPTH, + D3D12_MAX_DEPTH + }; + + D3D12_VIEWPORT right_vp = { + static_cast(right_rc.left), + static_cast(right_rc.top), + static_cast(right_rc.GetWidth()), + static_cast(right_rc.GetHeight()), + D3D12_MIN_DEPTH, + D3D12_MAX_DEPTH + }; + + // Swap chain backbuffer is never multisampled.. + + D3D::current_command_list->RSSetViewports(1, &left_vp); + D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); + + D3D::current_command_list->RSSetViewports(1, &right_vp); + D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 1, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); + } + else if (g_ActiveConfig.iStereoMode == STEREO_3DVISION) + { + // D3D12TODO + // Not currently supported on D3D12 backend. Implemented (but untested) code kept for reference. + + //if (!s_3d_vision_texture) + // Create3DVisionTexture(s_backbuffer_width, s_backbuffer_height); + + //D3D12_VIEWPORT leftVp12 = { static_cast(dst.left), static_cast(dst.top), static_cast(dst.GetWidth()), static_cast(dst.GetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + //D3D12_VIEWPORT rightVp12 = { static_cast(dst.left + s_backbuffer_width), static_cast(dst.top), static_cast(dst.GetWidth()), static_cast(dst.GetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + + //// Render to staging texture which is double the width of the backbuffer + //s_3d_vision_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + //D3D::current_command_list->OMSetRenderTargets(1, &s_3d_vision_texture->GetRTV12(), FALSE, nullptr); + + //D3D::current_command_list->RSSetViewports(1, &leftVp12); + //D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, s_3d_vision_texture->GetMultisampled()); + + //D3D::current_command_list->RSSetViewports(1, &rightVp12); + //D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 1, DXGI_FORMAT_R8G8B8A8_UNORM, false, s_3d_vision_texture->GetMultisampled()); + + //// Copy the left eye to the backbuffer, if Nvidia 3D Vision is enabled it should + //// recognize the signature and automatically include the right eye frame. + //// D3D12TODO: Does this work on D3D12? + + //D3D12_BOX box = CD3DX12_BOX(0, 0, 0, s_backbuffer_width, s_backbuffer_height, 1); + //D3D12_TEXTURE_COPY_LOCATION dst = CD3DX12_TEXTURE_COPY_LOCATION(D3D::GetBackBuffer()->GetTex12(), 0); + //D3D12_TEXTURE_COPY_LOCATION src = CD3DX12_TEXTURE_COPY_LOCATION(s_3d_vision_texture->GetTex12(), 0); + + //D3D::GetBackBuffer()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_DEST); + //s_3d_vision_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + //D3D::current_command_list->CopyTextureRegion(&dst, 0, 0, 0, &src, &box); + + //// Restore render target to backbuffer + //D3D::GetBackBuffer()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + //D3D::current_command_list->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV12(), FALSE, nullptr); + } + else + { + D3D12_VIEWPORT vp = { static_cast(dst.left), static_cast(dst.top), static_cast(dst.GetWidth()), static_cast(dst.GetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp); + + D3D::DrawShadedTexQuad( + src_texture, + src.AsRECT(), + src_width, + src_height, + (g_Config.iStereoMode == STEREO_ANAGLYPH) ? StaticShaderCache::GetAnaglyphPixelShader() : StaticShaderCache::GetColorCopyPixelShader(false), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + D3D12_SHADER_BYTECODE(), + gamma, + 0, + DXGI_FORMAT_R8G8B8A8_UNORM, + false, + false // Backbuffer never multisampled. + ); + } +} + +D3D12_BLEND_DESC Renderer::GetResetBlendDesc() +{ + return g_reset_blend_desc; +} + +D3D12_DEPTH_STENCIL_DESC Renderer::GetResetDepthStencilDesc() +{ + return g_reset_depth_desc; +} + +D3D12_RASTERIZER_DESC Renderer::GetResetRasterizerDesc() +{ + return g_reset_rast_desc; +} + +} // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/Render.h b/Source/Core/VideoBackends/D3D12/Render.h new file mode 100644 index 0000000000..bf0c73dad9 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/Render.h @@ -0,0 +1,70 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include "VideoCommon/RenderBase.h" + +namespace DX12 +{ + +class Renderer final : public ::Renderer +{ +public: + Renderer(void*& window_handle); + ~Renderer(); + + void SetColorMask() override; + void SetBlendMode(bool force_update) override; + void SetScissorRect(const EFBRectangle& rc) override; + void SetGenerationMode() override; + void SetDepthMode() override; + void SetLogicOpMode() override; + void SetDitherMode() override; + void SetSamplerState(int stage, int tex_index, bool custom_tex) override; + void SetInterlacingMode() override; + void SetViewport() override; + + // TODO: Fix confusing names (see ResetAPIState and RestoreAPIState) + void ApplyState(bool use_dst_alpha) override; + void RestoreState() override; + + void ApplyCullDisable(); + void RestoreCull(); + + void RenderText(const std::string& text, int left, int top, u32 color) override; + + u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; + void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; + + u16 BBoxRead(int index) override; + void BBoxWrite(int index, u16 value) override; + + void ResetAPIState() override; + void RestoreAPIState() override; + + TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; + + void SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, const EFBRectangle& rc, float gamma) override; + + void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, u32 color, u32 z) override; + + void ReinterpretPixelData(unsigned int conv_type) override; + + bool SaveScreenshot(const std::string& filename, const TargetRectangle& rc) override; + + static bool CheckForResize(); + + int GetMaxTextureSize() override; + + static D3D12_BLEND_DESC GetResetBlendDesc(); + static D3D12_DEPTH_STENCIL_DESC GetResetDepthStencilDesc(); + static D3D12_RASTERIZER_DESC GetResetRasterizerDesc(); + +private: + void BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture, u32 src_width, u32 src_height, float gamma); +}; + +} diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.cpp b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp new file mode 100644 index 0000000000..299834434b --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.cpp @@ -0,0 +1,388 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/LinearDiskCache.h" + +#include "Core/ConfigManager.h" + +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DShader.h" +#include "VideoBackends/D3D12/ShaderCache.h" + +#include "VideoCommon/Debugger.h" +#include "VideoCommon/Statistics.h" + +namespace DX12 +{ + +// Primitive topology type is always triangle, unless the GS stage is used. This is consumed +// by the PSO created in Renderer::ApplyState. +static D3D12_PRIMITIVE_TOPOLOGY_TYPE s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + +using GsBytecodeCache = std::map; +using PsBytecodeCache = std::map; +using VsBytecodeCache = std::map; +GsBytecodeCache s_gs_bytecode_cache; +PsBytecodeCache s_ps_bytecode_cache; +VsBytecodeCache s_vs_bytecode_cache; + +// Used to keep track of blobs to release at Shutdown time. +static std::vector s_shader_blob_list; + +// Only used for shader debugging.. +using GsHlslCache = std::map; +using PsHlslCache = std::map; +using VsHlslCache = std::map; +static GsHlslCache s_gs_hlsl_cache; +static PsHlslCache s_ps_hlsl_cache; +static VsHlslCache s_vs_hlsl_cache; + +static LinearDiskCache s_gs_disk_cache; +static LinearDiskCache s_ps_disk_cache; +static LinearDiskCache s_vs_disk_cache; + +static UidChecker s_geometry_uid_checker; +static UidChecker s_pixel_uid_checker; +static UidChecker s_vertex_uid_checker; + +static D3D12_SHADER_BYTECODE s_last_geometry_shader_bytecode; +static D3D12_SHADER_BYTECODE s_last_pixel_shader_bytecode; +static D3D12_SHADER_BYTECODE s_last_vertex_shader_bytecode; +static GeometryShaderUid s_last_geometry_shader_uid; +static PixelShaderUid s_last_pixel_shader_uid; +static VertexShaderUid s_last_vertex_shader_uid; + +template +class ShaderCacheInserter final : public LinearDiskCacheReader +{ +public: + void Read(const UidType &key, const u8* value, u32 value_size) + { + ID3DBlob* blob = nullptr; + CheckHR(d3d_create_blob(value_size, &blob)); + memcpy(blob->GetBufferPointer(), value, value_size); + + ShaderCache::InsertByteCode(key, cache, blob); + } +}; + +void ShaderCache::Init() +{ + // This class intentionally shares its shader cache files with DX11, as the shaders are (right now) identical. + // Reduces unnecessary compilation when switching between APIs. + + s_last_geometry_shader_bytecode = {}; + s_last_pixel_shader_bytecode = {}; + s_last_vertex_shader_bytecode = {}; + s_last_geometry_shader_uid = {}; + s_last_pixel_shader_uid = {}; + s_last_vertex_shader_uid = {}; + + // Ensure shader cache directory exists.. + std::string shader_cache_path = File::GetUserPath(D_SHADERCACHE_IDX); + + if (!File::Exists(shader_cache_path)) + File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX)); + + std::string title_unique_id = SConfig::GetInstance().m_strUniqueID.c_str(); + + std::string gs_cache_filename = StringFromFormat("%sdx11-%s-gs.cache", shader_cache_path.c_str(), title_unique_id.c_str()); + std::string ps_cache_filename = StringFromFormat("%sdx11-%s-ps.cache", shader_cache_path.c_str(), title_unique_id.c_str()); + std::string vs_cache_filename = StringFromFormat("%sdx11-%s-vs.cache", shader_cache_path.c_str(), title_unique_id.c_str()); + + ShaderCacheInserter gs_inserter; + s_gs_disk_cache.OpenAndRead(gs_cache_filename, gs_inserter); + + ShaderCacheInserter ps_inserter; + s_ps_disk_cache.OpenAndRead(ps_cache_filename, ps_inserter); + + ShaderCacheInserter vs_inserter; + s_vs_disk_cache.OpenAndRead(vs_cache_filename, vs_inserter); + + // Clear out cache when debugging shaders to ensure stale ones don't stick around.. + if (g_Config.bEnableShaderDebugging) + Clear(); + + SETSTAT(stats.numPixelShadersAlive, static_cast(s_ps_bytecode_cache.size())); + SETSTAT(stats.numPixelShadersCreated, static_cast(s_ps_bytecode_cache.size())); + SETSTAT(stats.numVertexShadersAlive, static_cast(s_vs_bytecode_cache.size())); + SETSTAT(stats.numVertexShadersCreated, static_cast(s_vs_bytecode_cache.size())); +} + +void ShaderCache::Clear() +{ + for (auto& iter : s_shader_blob_list) + SAFE_RELEASE(iter); + + s_shader_blob_list.clear(); + + s_gs_bytecode_cache.clear(); + s_ps_bytecode_cache.clear(); + s_vs_bytecode_cache.clear(); + + s_last_geometry_shader_bytecode = {}; + s_last_geometry_shader_uid = {}; + + s_last_pixel_shader_bytecode = {}; + s_last_pixel_shader_uid = {}; + + s_last_vertex_shader_bytecode = {}; + s_last_vertex_shader_uid = {}; +} + +void ShaderCache::Shutdown() +{ + Clear(); + + s_gs_disk_cache.Sync(); + s_gs_disk_cache.Close(); + s_ps_disk_cache.Sync(); + s_ps_disk_cache.Close(); + s_vs_disk_cache.Sync(); + s_vs_disk_cache.Close(); + + if (g_Config.bEnableShaderDebugging) + { + s_gs_hlsl_cache.clear(); + s_ps_hlsl_cache.clear(); + s_vs_hlsl_cache.clear(); + } + + s_geometry_uid_checker.Invalidate(); + s_pixel_uid_checker.Invalidate(); + s_vertex_uid_checker.Invalidate(); +} + +void ShaderCache::LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 gs_primitive_type) +{ + SetCurrentPrimitiveTopology(gs_primitive_type); + + GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type, API_D3D); + PixelShaderUid ps_uid = GetPixelShaderUid(ps_dst_alpha_mode, API_D3D); + VertexShaderUid vs_uid = GetVertexShaderUid(API_D3D); + + bool gs_changed = gs_uid != s_last_geometry_shader_uid; + bool ps_changed = ps_uid != s_last_pixel_shader_uid; + bool vs_changed = vs_uid != s_last_vertex_shader_uid; + + if (!gs_changed && !ps_changed && !vs_changed) + { + return; + } + + if (gs_changed) + { + HandleGSUIDChange(gs_uid, gs_primitive_type); + } + + if (ps_changed) + { + HandlePSUIDChange(ps_uid, ps_dst_alpha_mode); + } + + if (vs_changed) + { + HandleVSUIDChange(vs_uid); + } + + // A Uid has changed, so the PSO will need to be reset at next ApplyState. + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); +} + +void ShaderCache::SetCurrentPrimitiveTopology(u32 gs_primitive_type) +{ + switch (gs_primitive_type) + { + case PRIMITIVE_TRIANGLES: + s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + break; + case PRIMITIVE_LINES: + s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + break; + case PRIMITIVE_POINTS: + s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + break; + default: + CHECK(0, "Invalid primitive type."); + break; + } +} + +void ShaderCache::HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_type) +{ + s_last_geometry_shader_uid = gs_uid; + + if (g_ActiveConfig.bEnableShaderDebugging) + { + ShaderCode code = GenerateGeometryShaderCode(gs_primitive_type, API_D3D); + s_geometry_uid_checker.AddToIndexAndCheck(code, gs_uid, "Geometry", "g"); + } + + if (gs_uid.GetUidData()->IsPassthrough()) + { + s_last_geometry_shader_bytecode = {}; + return; + } + + auto gs_iterator = s_gs_bytecode_cache.find(gs_uid); + if (gs_iterator != s_gs_bytecode_cache.end()) + { + s_last_geometry_shader_bytecode = gs_iterator->second; + } + else + { + ShaderCode gs_code = GenerateGeometryShaderCode(gs_primitive_type, API_D3D); + ID3DBlob* gs_bytecode = nullptr; + + if (!D3D::CompileGeometryShader(gs_code.GetBuffer(), &gs_bytecode)) + { + GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); + return; + } + + s_last_geometry_shader_bytecode = InsertByteCode(gs_uid, &s_gs_bytecode_cache, gs_bytecode); + s_gs_disk_cache.Append(gs_uid, reinterpret_cast(gs_bytecode->GetBufferPointer()), static_cast(gs_bytecode->GetBufferSize())); + + if (g_ActiveConfig.bEnableShaderDebugging && gs_bytecode) + { + s_gs_hlsl_cache[gs_uid] = gs_code.GetBuffer(); + } + } +} + +void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_alpha_mode) +{ + s_last_pixel_shader_uid = ps_uid; + + if (g_ActiveConfig.bEnableShaderDebugging) + { + ShaderCode code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); + s_pixel_uid_checker.AddToIndexAndCheck(code, ps_uid, "Pixel", "p"); + } + + auto ps_iterator = s_ps_bytecode_cache.find(ps_uid); + if (ps_iterator != s_ps_bytecode_cache.end()) + { + s_last_pixel_shader_bytecode = ps_iterator->second; + GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); + } + else + { + ShaderCode ps_code = GeneratePixelShaderCode(ps_dst_alpha_mode, API_D3D); + ID3DBlob* ps_bytecode = nullptr; + + if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode)) + { + GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); + return; + } + + s_last_pixel_shader_bytecode = InsertByteCode(ps_uid, &s_ps_bytecode_cache, ps_bytecode); + s_ps_disk_cache.Append(ps_uid, reinterpret_cast(ps_bytecode->GetBufferPointer()), static_cast(ps_bytecode->GetBufferSize())); + + SETSTAT(stats.numPixelShadersAlive, static_cast(s_ps_bytecode_cache.size())); + INCSTAT(stats.numPixelShadersCreated); + + if (g_ActiveConfig.bEnableShaderDebugging && ps_bytecode) + { + s_ps_hlsl_cache[ps_uid] = ps_code.GetBuffer(); + } + } +} + +void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid) +{ + s_last_vertex_shader_uid = vs_uid; + + if (g_ActiveConfig.bEnableShaderDebugging) + { + ShaderCode code = GenerateVertexShaderCode(API_D3D); + s_vertex_uid_checker.AddToIndexAndCheck(code, vs_uid, "Vertex", "v"); + } + + auto vs_iterator = s_vs_bytecode_cache.find(vs_uid); + if (vs_iterator != s_vs_bytecode_cache.end()) + { + s_last_vertex_shader_bytecode = vs_iterator->second; + GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true); + } + else + { + ShaderCode vs_code = GenerateVertexShaderCode(API_D3D); + ID3DBlob* vs_bytecode = nullptr; + + if (!D3D::CompileVertexShader(vs_code.GetBuffer(), &vs_bytecode)) + { + GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); + return; + } + + s_last_vertex_shader_bytecode = InsertByteCode(vs_uid, &s_vs_bytecode_cache, vs_bytecode); + s_vs_disk_cache.Append(vs_uid, reinterpret_cast(vs_bytecode->GetBufferPointer()), static_cast(vs_bytecode->GetBufferSize())); + + SETSTAT(stats.numVertexShadersAlive, static_cast(s_vs_bytecode_cache.size())); + INCSTAT(stats.numVertexShadersCreated); + + if (g_ActiveConfig.bEnableShaderDebugging && vs_bytecode) + { + s_vs_hlsl_cache[vs_uid] = vs_code.GetBuffer(); + } + } +} + +template +D3D12_SHADER_BYTECODE ShaderCache::InsertByteCode(const UidType& uid, ShaderCacheType* shader_cache, ID3DBlob* bytecode_blob) +{ + // Note: Don't release the incoming bytecode, we need it to stick around, since in D3D12 + // the raw bytecode itself is bound. It is released at Shutdown() time. + + s_shader_blob_list.push_back(bytecode_blob); + + D3D12_SHADER_BYTECODE shader_bytecode; + shader_bytecode.pShaderBytecode = bytecode_blob->GetBufferPointer(); + shader_bytecode.BytecodeLength = bytecode_blob->GetBufferSize(); + + (*shader_cache)[uid] = shader_bytecode; + + return shader_bytecode; +} + +D3D12_PRIMITIVE_TOPOLOGY_TYPE ShaderCache::GetCurrentPrimitiveTopology() { return s_current_primitive_topology; } + +D3D12_SHADER_BYTECODE ShaderCache::GetActiveGeometryShaderBytecode() { return s_last_geometry_shader_bytecode; } +D3D12_SHADER_BYTECODE ShaderCache::GetActivePixelShaderBytecode() { return s_last_pixel_shader_bytecode; } +D3D12_SHADER_BYTECODE ShaderCache::GetActiveVertexShaderBytecode() { return s_last_vertex_shader_bytecode; } + +const GeometryShaderUid* ShaderCache::GetActiveGeometryShaderUid() { return &s_last_geometry_shader_uid; } +const PixelShaderUid* ShaderCache::GetActivePixelShaderUid() { return &s_last_pixel_shader_uid; } +const VertexShaderUid* ShaderCache::GetActiveVertexShaderUid() { return &s_last_vertex_shader_uid; } + +D3D12_SHADER_BYTECODE ShaderCache::GetGeometryShaderFromUid(const GeometryShaderUid* uid) +{ + auto bytecode = s_gs_bytecode_cache.find(*uid); + if (bytecode != s_gs_bytecode_cache.end()) + return bytecode->second; + + return D3D12_SHADER_BYTECODE(); +} + +D3D12_SHADER_BYTECODE ShaderCache::GetPixelShaderFromUid(const PixelShaderUid* uid) +{ + auto bytecode = s_ps_bytecode_cache.find(*uid); + if (bytecode != s_ps_bytecode_cache.end()) + return bytecode->second; + + return D3D12_SHADER_BYTECODE(); +} + +D3D12_SHADER_BYTECODE ShaderCache::GetVertexShaderFromUid(const VertexShaderUid* uid) +{ + auto bytecode = s_vs_bytecode_cache.find(*uid); + if (bytecode != s_vs_bytecode_cache.end()) + return bytecode->second; + + return D3D12_SHADER_BYTECODE(); +} + +} \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/ShaderCache.h b/Source/Core/VideoBackends/D3D12/ShaderCache.h new file mode 100644 index 0000000000..e9b6a081e7 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/ShaderCache.h @@ -0,0 +1,50 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoCommon/GeometryShaderGen.h" +#include "VideoCommon/PixelShaderGen.h" +#include "VideoCommon/VertexShaderGen.h" + +namespace DX12 +{ + +class D3DBlob; + +class ShaderCache final +{ +public: + static void Init(); + static void Clear(); + static void Shutdown(); + + static void LoadAndSetActiveShaders(DSTALPHA_MODE ps_dst_alpha_mode, u32 gs_primitive_type); + + template + static D3D12_SHADER_BYTECODE InsertByteCode(const UidType& uid, ShaderCacheType* shader_cache, ID3DBlob* bytecode_blob); + + static D3D12_SHADER_BYTECODE GetActiveGeometryShaderBytecode(); + static D3D12_SHADER_BYTECODE GetActivePixelShaderBytecode(); + static D3D12_SHADER_BYTECODE GetActiveVertexShaderBytecode(); + + static const GeometryShaderUid* GetActiveGeometryShaderUid(); + static const PixelShaderUid* GetActivePixelShaderUid(); + static const VertexShaderUid* GetActiveVertexShaderUid(); + + static D3D12_SHADER_BYTECODE GetGeometryShaderFromUid(const GeometryShaderUid* uid); + static D3D12_SHADER_BYTECODE GetPixelShaderFromUid(const PixelShaderUid* uid); + static D3D12_SHADER_BYTECODE GetVertexShaderFromUid(const VertexShaderUid* uid); + + static D3D12_PRIMITIVE_TOPOLOGY_TYPE GetCurrentPrimitiveTopology(); + +private: + static void SetCurrentPrimitiveTopology(u32 gs_primitive_type); + + static void HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_type); + static void HandlePSUIDChange(PixelShaderUid ps_uid, DSTALPHA_MODE ps_dst_alpha_mode); + static void HandleVSUIDChange(VertexShaderUid vs_uid); +}; + +} diff --git a/Source/Core/VideoBackends/D3D12/ShaderConstantsManager.cpp b/Source/Core/VideoBackends/D3D12/ShaderConstantsManager.cpp new file mode 100644 index 0000000000..049145f352 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/ShaderConstantsManager.cpp @@ -0,0 +1,167 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "D3DBase.h" +#include "D3DCommandListManager.h" +#include "D3DStreamBuffer.h" + +#include "ShaderConstantsManager.h" + +#include "VideoCommon/GeometryShaderManager.h" +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +enum SHADER_STAGE +{ + SHADER_STAGE_GEOMETRY_SHADER = 0, + SHADER_STAGE_PIXEL_SHADER = 1, + SHADER_STAGE_VERTEX_SHADER = 2, + SHADER_STAGE_COUNT = 3 +}; + +static std::array s_shader_constant_stream_buffers = {}; + +static const unsigned int s_shader_constant_buffer_padded_sizes[SHADER_STAGE_COUNT] = { + (sizeof(GeometryShaderConstants) + 0xff) & ~0xff, + (sizeof(PixelShaderConstants) + 0xff) & ~0xff, + (sizeof(VertexShaderConstants) + 0xff) & ~0xff +}; + +void ShaderConstantsManager::Init() +{ + // Allow a large maximum size, as we want to minimize stalls here + std::generate(std::begin(s_shader_constant_stream_buffers), std::end(s_shader_constant_stream_buffers), []() { + return new D3DStreamBuffer(2 * 1024 * 1024, 64 * 1024 * 1024, nullptr); + }); +} + +void ShaderConstantsManager::Shutdown() +{ + for (auto& it : s_shader_constant_stream_buffers) + SAFE_DELETE(it); +} + +bool ShaderConstantsManager::LoadAndSetGeometryShaderConstants() +{ + bool command_list_executed = false; + + if (GeometryShaderManager::dirty) + { + command_list_executed = s_shader_constant_stream_buffers[SHADER_STAGE_GEOMETRY_SHADER]->AllocateSpaceInBuffer( + s_shader_constant_buffer_padded_sizes[SHADER_STAGE_GEOMETRY_SHADER], + 0 // The padded sizes are already aligned to 256 bytes, so don't need to worry about manually aligning offset. + ); + + memcpy( + s_shader_constant_stream_buffers[SHADER_STAGE_GEOMETRY_SHADER]->GetCPUAddressOfCurrentAllocation(), + &GeometryShaderManager::constants, + sizeof(GeometryShaderConstants)); + + GeometryShaderManager::dirty = false; + + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants)); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_GS_CBV, true); + } + + if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_GS_CBV)) + { + D3D::current_command_list->SetGraphicsRootConstantBufferView( + DESCRIPTOR_TABLE_GS_CBV, + s_shader_constant_stream_buffers[SHADER_STAGE_GEOMETRY_SHADER]->GetGPUAddressOfCurrentAllocation() + ); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_GS_CBV, false); + } + + return command_list_executed; +} + +bool ShaderConstantsManager::LoadAndSetPixelShaderConstants() +{ + bool command_list_executed = false; + + if (PixelShaderManager::dirty) + { + command_list_executed = s_shader_constant_stream_buffers[SHADER_STAGE_PIXEL_SHADER]->AllocateSpaceInBuffer( + s_shader_constant_buffer_padded_sizes[SHADER_STAGE_PIXEL_SHADER], + 0 // The padded sizes are already aligned to 256 bytes, so don't need to worry about manually aligning offset. + ); + + memcpy( + s_shader_constant_stream_buffers[SHADER_STAGE_PIXEL_SHADER]->GetCPUAddressOfCurrentAllocation(), + &PixelShaderManager::constants, + sizeof(PixelShaderConstants)); + + PixelShaderManager::dirty = false; + + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants)); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); + } + + if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV)) + { + D3D::current_command_list->SetGraphicsRootConstantBufferView( + DESCRIPTOR_TABLE_PS_CBVONE, + s_shader_constant_stream_buffers[SHADER_STAGE_PIXEL_SHADER]->GetGPUAddressOfCurrentAllocation() + ); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, false); + } + + return command_list_executed; +} + +bool ShaderConstantsManager::LoadAndSetVertexShaderConstants() +{ + bool command_list_executed = false; + + if (VertexShaderManager::dirty) + { + command_list_executed = s_shader_constant_stream_buffers[SHADER_STAGE_VERTEX_SHADER]->AllocateSpaceInBuffer( + s_shader_constant_buffer_padded_sizes[SHADER_STAGE_VERTEX_SHADER], + 0 // The padded sizes are already aligned to 256 bytes, so don't need to worry about manually aligning offset. + ); + + memcpy( + s_shader_constant_stream_buffers[SHADER_STAGE_VERTEX_SHADER]->GetCPUAddressOfCurrentAllocation(), + &VertexShaderManager::constants, + sizeof(VertexShaderConstants)); + + VertexShaderManager::dirty = false; + + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants)); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VS_CBV, true); + } + + if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_VS_CBV)) + { + const D3D12_GPU_VIRTUAL_ADDRESS calculated_gpu_va = + s_shader_constant_stream_buffers[SHADER_STAGE_VERTEX_SHADER]->GetGPUAddressOfCurrentAllocation(); + + D3D::current_command_list->SetGraphicsRootConstantBufferView( + DESCRIPTOR_TABLE_VS_CBV, + calculated_gpu_va + ); + + if (g_ActiveConfig.bEnablePixelLighting) + D3D::current_command_list->SetGraphicsRootConstantBufferView( + DESCRIPTOR_TABLE_PS_CBVTWO, + calculated_gpu_va + ); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VS_CBV, false); + } + + return command_list_executed; +} + +} \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/ShaderConstantsManager.h b/Source/Core/VideoBackends/D3D12/ShaderConstantsManager.h new file mode 100644 index 0000000000..1d235a030d --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/ShaderConstantsManager.h @@ -0,0 +1,23 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "ShaderCache.h" + +namespace DX12 +{ + +class ShaderConstantsManager final +{ +public: + static void Init(); + static void Shutdown(); + + static bool LoadAndSetGeometryShaderConstants(); + static bool LoadAndSetPixelShaderConstants(); + static bool LoadAndSetVertexShaderConstants(); +}; + +} \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp new file mode 100644 index 0000000000..adbe83346b --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp @@ -0,0 +1,699 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/StringUtil.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DShader.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +// Pixel Shader blobs +static ID3DBlob* s_color_matrix_program_blob[2] = {}; +static ID3DBlob* s_color_copy_program_blob[2] = {}; +static ID3DBlob* s_depth_matrix_program_blob[2] = {}; +static ID3DBlob* s_depth_copy_program_blob[2] = {}; +static ID3DBlob* s_clear_program_blob = {}; +static ID3DBlob* s_anaglyph_program_blob = {}; +static ID3DBlob* s_rgba6_to_rgb8_program_blob[2] = {}; +static ID3DBlob* s_rgb8_to_rgba6_program_blob[2] = {}; + +// Vertex Shader blobs/input layouts +static ID3DBlob* s_simple_vertex_shader_blob = {}; +static ID3DBlob* s_simple_clear_vertex_shader_blob = {}; + +static const D3D12_INPUT_ELEMENT_DESC s_simple_vertex_shader_input_elements[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, +}; + +static const D3D12_INPUT_LAYOUT_DESC s_simple_vertex_shader_input_layout = { + s_simple_vertex_shader_input_elements, + ARRAYSIZE(s_simple_vertex_shader_input_elements) +}; + +static const D3D12_INPUT_ELEMENT_DESC s_clear_vertex_shader_input_elements[] = +{ + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, +}; + +static const D3D12_INPUT_LAYOUT_DESC s_clear_vertex_shader_input_layout = +{ + s_clear_vertex_shader_input_elements, + ARRAYSIZE(s_clear_vertex_shader_input_elements) +}; + +// Geometry Shader blobs +static ID3DBlob* s_clear_geometry_shader_blob = nullptr; +static ID3DBlob* s_copy_geometry_shader_blob = nullptr; + +// Pixel Shader HLSL +static constexpr const char s_clear_program_hlsl[] = { + "void main(\n" + "out float4 ocol0 : SV_Target,\n" + "in float4 pos : SV_Position,\n" + "in float4 incol0 : COLOR0){\n" + "ocol0 = incol0;\n" + "}\n" +}; + +// EXISTINGD3D11TODO: Find some way to avoid having separate shaders for non-MSAA and MSAA... +static constexpr const char s_color_copy_program_hlsl[] = { + "sampler samp0 : register(s0);\n" + "Texture2DArray Tex0 : register(t0);\n" + "void main(\n" + "out float4 ocol0 : SV_Target,\n" + "in float4 pos : SV_Position,\n" + "in float3 uv0 : TEXCOORD0){\n" + "ocol0 = Tex0.Sample(samp0,uv0);\n" + "}\n" +}; + +static constexpr const char s_depth_copy_program_hlsl[] = { + "sampler samp0 : register(s0);\n" + "Texture2DArray Tex0 : register(t0);\n" + "void main(\n" + "out float odepth : SV_Depth,\n" + "in float4 pos : SV_Position,\n" + "in float3 uv0 : TEXCOORD0){\n" + "odepth = Tex0.Sample(samp0,uv0);\n" + "}\n" +}; + +// Anaglyph Red-Cyan shader based on Dubois algorithm +// Constants taken from the paper: +// "Conversion of a Stereo Pair to Anaglyph with +// the Least-Squares Projection Method" +// Eric Dubois, March 2009 +static constexpr const char s_anaglyph_program_hlsl[] = { + "sampler samp0 : register(s0);\n" + "Texture2DArray Tex0 : register(t0);\n" + "void main(\n" + "out float4 ocol0 : SV_Target,\n" + "in float4 pos : SV_Position,\n" + "in float3 uv0 : TEXCOORD0){\n" + "float4 c0 = Tex0.Sample(samp0, float3(uv0.xy, 0.0));\n" + "float4 c1 = Tex0.Sample(samp0, float3(uv0.xy, 1.0));\n" + "float3x3 l = float3x3( 0.437, 0.449, 0.164,\n" + " -0.062,-0.062,-0.024,\n" + " -0.048,-0.050,-0.017);\n" + "float3x3 r = float3x3(-0.011,-0.032,-0.007,\n" + " 0.377, 0.761, 0.009,\n" + " -0.026,-0.093, 1.234);\n" + "ocol0 = float4(mul(l, c0.rgb) + mul(r, c1.rgb), c0.a);\n" + "}\n" +}; + +// TODO: Improve sampling algorithm! +static constexpr const char s_color_copy_program_msaa_hlsl[] = { + "#define SAMPLES %d\n" + "sampler samp0 : register(s0);\n" + "Texture2DMSArray Tex0 : register(t0);\n" + "void main(\n" + "out float4 ocol0 : SV_Target,\n" + "in float4 pos : SV_Position,\n" + "in float3 uv0 : TEXCOORD0){\n" + "int width, height, slices, samples;\n" + "Tex0.GetDimensions(width, height, slices, samples);\n" + "ocol0 = 0;\n" + "for(int i = 0; i < SAMPLES; ++i)\n" + " ocol0 += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" + "ocol0 /= SAMPLES;\n" + "}\n" +}; + +static constexpr const char s_depth_copy_program_msaa_hlsl[] = { + "#define SAMPLES %d\n" + "Texture2DMSArray Tex0 : register(t0);\n" + "void main(\n" + " out float depth : SV_Depth,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0)\n" + "{\n" + " int width, height, slices, samples;\n" + " Tex0.GetDimensions(width, height, slices, samples);\n" + " depth = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n" + " for(int i = 1; i < SAMPLES; ++i)\n" + " depth = min(depth, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n" + "}\n" +}; + +static constexpr const char s_color_matrix_program_hlsl[] = { + "sampler samp0 : register(s0);\n" + "Texture2DArray Tex0 : register(t0);\n" + "uniform float4 cColMatrix[7] : register(c0);\n" + "void main(\n" + "out float4 ocol0 : SV_Target,\n" + "in float4 pos : SV_Position,\n" + "in float3 uv0 : TEXCOORD0){\n" + "float4 texcol = Tex0.Sample(samp0,uv0);\n" + "texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n" +}; + +static constexpr const char s_color_matrix_program_msaa_hlsl[] = { + "#define SAMPLES %d\n" + "sampler samp0 : register(s0);\n" + "Texture2DMSArray Tex0 : register(t0);\n" + "uniform float4 cColMatrix[7] : register(c0);\n" + "void main(\n" + "out float4 ocol0 : SV_Target,\n" + "in float4 pos : SV_Position,\n" + "in float3 uv0 : TEXCOORD0){\n" + "int width, height, slices, samples;\n" + "Tex0.GetDimensions(width, height, slices, samples);\n" + "float4 texcol = 0;\n" + "for(int i = 0; i < SAMPLES; ++i)\n" + " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" + "texcol /= SAMPLES;\n" + "texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n" +}; + +static constexpr const char s_depth_matrix_program_hlsl[] = { + "sampler samp0 : register(s0);\n" + "Texture2DArray Tex0 : register(t0);\n" + "uniform float4 cColMatrix[7] : register(c0);\n" + "void main(\n" + "out float4 ocol0 : SV_Target,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0){\n" + " float4 texcol = Tex0.Sample(samp0,uv0);\n" + " int depth = int((1.0 - texcol.x) * 16777216.0);\n" + + // Convert to Z24 format + " int4 workspace;\n" + " workspace.r = (depth >> 16) & 255;\n" + " workspace.g = (depth >> 8) & 255;\n" + " workspace.b = depth & 255;\n" + + // Convert to Z4 format + " workspace.a = (depth >> 16) & 0xF0;\n" + + // Normalize components to [0.0..1.0] + " texcol = float4(workspace) / 255.0;\n" + + // Apply color matrix + " ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n" +}; + +static constexpr const char s_depth_matrix_program_msaa_hlsl[] = { + "#define SAMPLES %d\n" + "sampler samp0 : register(s0);\n" + "Texture2DMSArray Tex0 : register(t0);\n" + "uniform float4 cColMatrix[7] : register(c0);\n" + "void main(\n" + "out float4 ocol0 : SV_Target,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0){\n" + " int width, height, slices, samples;\n" + " Tex0.GetDimensions(width, height, slices, samples);\n" + " float4 texcol = 0;\n" + " for(int i = 0; i < SAMPLES; ++i)\n" + " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" + " texcol /= SAMPLES;\n" + " int depth = int((1.0 - texcol.x) * 16777216.0);\n" + + // Convert to Z24 format + " int4 workspace;\n" + " workspace.r = (depth >> 16) & 255;\n" + " workspace.g = (depth >> 8) & 255;\n" + " workspace.b = depth & 255;\n" + + // Convert to Z4 format + " workspace.a = (depth >> 16) & 0xF0;\n" + + // Normalize components to [0.0..1.0] + " texcol = float4(workspace) / 255.0;\n" + + // Apply color matrix + " ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n" +}; + +static constexpr const char s_reint_rgba6_to_rgb8_program_hlsl[] = { + "sampler samp0 : register(s0);\n" + "Texture2DArray Tex0 : register(t0);\n" + "void main(\n" + " out float4 ocol0 : SV_Target,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0)\n" + "{\n" + " int4 src6 = round(Tex0.Sample(samp0,uv0) * 63.f);\n" + " int4 dst8;\n" + " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" + " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" + " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n" + " dst8.a = 255;\n" + " ocol0 = (float4)dst8 / 255.f;\n" + "}" +}; + +static constexpr const char s_reint_rgba6_to_rgb8_program_msaa_hlsl[] = { + "#define SAMPLES %d\n" + "sampler samp0 : register(s0);\n" + "Texture2DMSArray Tex0 : register(t0);\n" + "void main(\n" + " out float4 ocol0 : SV_Target,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0)\n" + "{\n" + " int width, height, slices, samples;\n" + " Tex0.GetDimensions(width, height, slices, samples);\n" + " float4 texcol = 0;\n" + " for (int i = 0; i < SAMPLES; ++i)\n" + " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" + " texcol /= SAMPLES;\n" + " int4 src6 = round(texcol * 63.f);\n" + " int4 dst8;\n" + " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" + " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" + " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n" + " dst8.a = 255;\n" + " ocol0 = (float4)dst8 / 255.f;\n" + "}" +}; + +static constexpr const char s_reint_rgb8_to_rgba6_program_hlsl[] = { + "sampler samp0 : register(s0);\n" + "Texture2DArray Tex0 : register(t0);\n" + "void main(\n" + " out float4 ocol0 : SV_Target,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0)\n" + "{\n" + " int4 src8 = round(Tex0.Sample(samp0,uv0) * 255.f);\n" + " int4 dst6;\n" + " dst6.r = src8.r >> 2;\n" + " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" + " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n" + " dst6.a = src8.b & 0x3F;\n" + " ocol0 = (float4)dst6 / 63.f;\n" + "}\n" +}; + +static constexpr const char s_reint_rgb8_to_rgba6_program_msaa_hlsl[] = { + "#define SAMPLES %d\n" + "sampler samp0 : register(s0);\n" + "Texture2DMSArray Tex0 : register(t0);\n" + "void main(\n" + " out float4 ocol0 : SV_Target,\n" + " in float4 pos : SV_Position,\n" + " in float3 uv0 : TEXCOORD0)\n" + "{\n" + " int width, height, slices, samples;\n" + " Tex0.GetDimensions(width, height, slices, samples);\n" + " float4 texcol = 0;\n" + " for (int i = 0; i < SAMPLES; ++i)\n" + " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" + " texcol /= SAMPLES;\n" + " int4 src8 = round(texcol * 255.f);\n" + " int4 dst6;\n" + " dst6.r = src8.r >> 2;\n" + " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" + " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n" + " dst6.a = src8.b & 0x3F;\n" + " ocol0 = (float4)dst6 / 63.f;\n" + "}\n" +}; + +// Vertex Shader HLSL +static constexpr const char s_simple_vertex_shader_hlsl[] = { + "struct VSOUTPUT\n" + "{\n" + "float4 vPosition : POSITION;\n" + "float3 vTexCoord : TEXCOORD0;\n" + "float vTexCoord1 : TEXCOORD1;\n" + "};\n" + "VSOUTPUT main(float4 inPosition : POSITION,float4 inTEX0 : TEXCOORD0)\n" + "{\n" + "VSOUTPUT OUT;\n" + "OUT.vPosition = inPosition;\n" + "OUT.vTexCoord = inTEX0.xyz;\n" + "OUT.vTexCoord1 = inTEX0.w;\n" + "return OUT;\n" + "}\n" +}; + +static constexpr const char s_clear_vertex_shader_hlsl[] = { + "struct VSOUTPUT\n" + "{\n" + "float4 vPosition : POSITION;\n" + "float4 vColor0 : COLOR0;\n" + "};\n" + "VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n" + "{\n" + "VSOUTPUT OUT;\n" + "OUT.vPosition = inPosition;\n" + "OUT.vColor0 = inColor0;\n" + "return OUT;\n" + "}\n" +}; + +// Geometry Shader HLSL +static constexpr const char s_clear_geometry_shader_hlsl[] = { + "struct VSOUTPUT\n" + "{\n" + " float4 vPosition : POSITION;\n" + " float4 vColor0 : COLOR0;\n" + "};\n" + "struct GSOUTPUT\n" + "{\n" + " float4 vPosition : POSITION;\n" + " float4 vColor0 : COLOR0;\n" + " uint slice : SV_RenderTargetArrayIndex;\n" + "};\n" + "[maxvertexcount(6)]\n" + "void main(triangle VSOUTPUT o[3], inout TriangleStream Output)\n" + "{\n" + "for(int slice = 0; slice < 2; slice++)\n" + "{\n" + " for(int i = 0; i < 3; i++)\n" + " {\n" + " GSOUTPUT OUT;\n" + " OUT.vPosition = o[i].vPosition;\n" + " OUT.vColor0 = o[i].vColor0;\n" + " OUT.slice = slice;\n" + " Output.Append(OUT);\n" + " }\n" + " Output.RestartStrip();\n" + "}\n" + "}\n" +}; + +static constexpr const char s_copy_geometry_shader_hlsl[] = { + "struct VSOUTPUT\n" + "{\n" + " float4 vPosition : POSITION;\n" + " float3 vTexCoord : TEXCOORD0;\n" + " float vTexCoord1 : TEXCOORD1;\n" + "};\n" + "struct GSOUTPUT\n" + "{\n" + " float4 vPosition : POSITION;\n" + " float3 vTexCoord : TEXCOORD0;\n" + " float vTexCoord1 : TEXCOORD1;\n" + " uint slice : SV_RenderTargetArrayIndex;\n" + "};\n" + "[maxvertexcount(6)]\n" + "void main(triangle VSOUTPUT o[3], inout TriangleStream Output)\n" + "{\n" + "for(int slice = 0; slice < 2; slice++)\n" + "{\n" + " for(int i = 0; i < 3; i++)\n" + " {\n" + " GSOUTPUT OUT;\n" + " OUT.vPosition = o[i].vPosition;\n" + " OUT.vTexCoord = o[i].vTexCoord;\n" + " OUT.vTexCoord.z = slice;\n" + " OUT.vTexCoord1 = o[i].vTexCoord1;\n" + " OUT.slice = slice;\n" + " Output.Append(OUT);\n" + " }\n" + " Output.RestartStrip();\n" + "}\n" + "}\n" +}; + +D3D12_SHADER_BYTECODE StaticShaderCache::GetReinterpRGBA6ToRGB8PixelShader(bool multisampled) +{ + D3D12_SHADER_BYTECODE bytecode = {}; + + if (!multisampled || g_ActiveConfig.iMultisamples == 1) + { + if (!s_rgba6_to_rgb8_program_blob[0]) + { + D3D::CompilePixelShader(s_reint_rgba6_to_rgb8_program_hlsl, &s_rgba6_to_rgb8_program_blob[0]); + } + + bytecode = { s_rgba6_to_rgb8_program_blob[0]->GetBufferPointer(), s_rgba6_to_rgb8_program_blob[0]->GetBufferSize() }; + return bytecode; + } + else if (!s_rgba6_to_rgb8_program_blob[1]) + { + // create MSAA shader for current AA mode + std::string buf = StringFromFormat(s_reint_rgba6_to_rgb8_program_msaa_hlsl, g_ActiveConfig.iMultisamples); + + D3D::CompilePixelShader(buf, &s_rgba6_to_rgb8_program_blob[1]); + bytecode = { s_rgba6_to_rgb8_program_blob[1]->GetBufferPointer(), s_rgba6_to_rgb8_program_blob[1]->GetBufferSize() }; + } + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetReinterpRGB8ToRGBA6PixelShader(bool multisampled) +{ + D3D12_SHADER_BYTECODE bytecode = {}; + + if (!multisampled || g_ActiveConfig.iMultisamples == 1) + { + if (!s_rgb8_to_rgba6_program_blob[0]) + { + D3D::CompilePixelShader(s_reint_rgb8_to_rgba6_program_hlsl, &s_rgb8_to_rgba6_program_blob[0]); + } + + bytecode = { s_rgb8_to_rgba6_program_blob[0]->GetBufferPointer(), s_rgb8_to_rgba6_program_blob[0]->GetBufferSize() }; + return bytecode; + } + else if (!s_rgb8_to_rgba6_program_blob[1]) + { + // create MSAA shader for current AA mode + std::string buf = StringFromFormat(s_reint_rgb8_to_rgba6_program_msaa_hlsl, g_ActiveConfig.iMultisamples); + + D3D::CompilePixelShader(buf, &s_rgb8_to_rgba6_program_blob[1]); + bytecode = { s_rgb8_to_rgba6_program_blob[1]->GetBufferPointer(), s_rgb8_to_rgba6_program_blob[1]->GetBufferSize() }; + } + + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetColorCopyPixelShader(bool multisampled) +{ + D3D12_SHADER_BYTECODE bytecode = {}; + + if (!multisampled || g_ActiveConfig.iMultisamples == 1) + { + bytecode = { s_color_copy_program_blob[0]->GetBufferPointer(), s_color_copy_program_blob[0]->GetBufferSize() }; + } + else if (s_color_copy_program_blob[1]) + { + bytecode = { s_color_copy_program_blob[1]->GetBufferPointer(), s_color_copy_program_blob[1]->GetBufferSize() }; + } + else + { + // create MSAA shader for current AA mode + std::string buf = StringFromFormat(s_color_copy_program_msaa_hlsl, g_ActiveConfig.iMultisamples); + + D3D::CompilePixelShader(buf, &s_color_copy_program_blob[1]); + bytecode = { s_color_copy_program_blob[1]->GetBufferPointer(), s_color_copy_program_blob[1]->GetBufferSize() }; + } + + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthCopyPixelShader(bool multisampled) +{ + D3D12_SHADER_BYTECODE bytecode = {}; + + if (!multisampled || g_ActiveConfig.iMultisamples == 1) + { + bytecode = { s_depth_copy_program_blob[0]->GetBufferPointer(), s_depth_copy_program_blob[0]->GetBufferSize() }; + } + else if (s_depth_copy_program_blob[1]) + { + bytecode = { s_depth_copy_program_blob[1]->GetBufferPointer(), s_depth_copy_program_blob[1]->GetBufferSize() }; + } + else + { + // create MSAA shader for current AA mode + std::string buf = StringFromFormat(s_depth_copy_program_msaa_hlsl, g_ActiveConfig.iMultisamples); + + D3D::CompilePixelShader(buf, &s_depth_copy_program_blob[1]); + bytecode = { s_depth_copy_program_blob[1]->GetBufferPointer(), s_depth_copy_program_blob[1]->GetBufferSize() }; + } + + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetColorMatrixPixelShader(bool multisampled) +{ + D3D12_SHADER_BYTECODE bytecode = {}; + + if (!multisampled || g_ActiveConfig.iMultisamples == 1) + { + bytecode = { s_color_matrix_program_blob[0]->GetBufferPointer(), s_color_matrix_program_blob[0]->GetBufferSize() }; + } + else if (s_color_matrix_program_blob[1]) + { + bytecode = { s_color_matrix_program_blob[1]->GetBufferPointer(), s_color_matrix_program_blob[1]->GetBufferSize() }; + } + else + { + // create MSAA shader for current AA mode + std::string buf = StringFromFormat(s_color_matrix_program_msaa_hlsl, g_ActiveConfig.iMultisamples); + + D3D::CompilePixelShader(buf, &s_color_matrix_program_blob[1]); + bytecode = { s_color_matrix_program_blob[1]->GetBufferPointer(), s_color_matrix_program_blob[1]->GetBufferSize() }; + } + + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthMatrixPixelShader(bool multisampled) +{ + D3D12_SHADER_BYTECODE bytecode = {}; + + if (!multisampled || g_ActiveConfig.iMultisamples == 1) + { + bytecode = { s_depth_matrix_program_blob[0]->GetBufferPointer(), s_depth_matrix_program_blob[0]->GetBufferSize() }; + } + else if (s_depth_matrix_program_blob[1]) + { + bytecode = { s_depth_matrix_program_blob[1]->GetBufferPointer(), s_depth_matrix_program_blob[1]->GetBufferSize() }; + } + else + { + // create MSAA shader for current AA mode + std::string buf = StringFromFormat(s_depth_matrix_program_msaa_hlsl, g_ActiveConfig.iMultisamples); + + D3D::CompilePixelShader(buf, &s_depth_matrix_program_blob[1]); + + bytecode = { s_depth_matrix_program_blob[1]->GetBufferPointer(), s_depth_matrix_program_blob[1]->GetBufferSize() }; + } + + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetClearPixelShader() +{ + D3D12_SHADER_BYTECODE shader = {}; + shader.BytecodeLength = s_clear_program_blob->GetBufferSize(); + shader.pShaderBytecode = s_clear_program_blob->GetBufferPointer(); + + return shader; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetAnaglyphPixelShader() +{ + D3D12_SHADER_BYTECODE shader = {}; + shader.BytecodeLength = s_anaglyph_program_blob->GetBufferSize(); + shader.pShaderBytecode = s_anaglyph_program_blob->GetBufferPointer(); + + return shader; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetSimpleVertexShader() +{ + D3D12_SHADER_BYTECODE shader = {}; + shader.BytecodeLength = s_simple_vertex_shader_blob->GetBufferSize(); + shader.pShaderBytecode = s_simple_vertex_shader_blob->GetBufferPointer(); + + return shader; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetClearVertexShader() +{ + D3D12_SHADER_BYTECODE shader = {}; + shader.BytecodeLength = s_simple_clear_vertex_shader_blob->GetBufferSize(); + shader.pShaderBytecode = s_simple_clear_vertex_shader_blob->GetBufferPointer(); + + return shader; +} + +D3D12_INPUT_LAYOUT_DESC StaticShaderCache::GetSimpleVertexShaderInputLayout() +{ + return s_simple_vertex_shader_input_layout; +} + +D3D12_INPUT_LAYOUT_DESC StaticShaderCache::GetClearVertexShaderInputLayout() +{ + return s_clear_vertex_shader_input_layout; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetClearGeometryShader() +{ + D3D12_SHADER_BYTECODE bytecode = {}; + if (g_ActiveConfig.iStereoMode > 0) + { + bytecode.BytecodeLength = s_clear_geometry_shader_blob->GetBufferSize(); + bytecode.pShaderBytecode = s_clear_geometry_shader_blob->GetBufferPointer(); + } + + return bytecode; +} + +D3D12_SHADER_BYTECODE StaticShaderCache::GetCopyGeometryShader() +{ + D3D12_SHADER_BYTECODE bytecode = {}; + if (g_ActiveConfig.iStereoMode > 0) + { + bytecode.BytecodeLength = s_copy_geometry_shader_blob->GetBufferSize(); + bytecode.pShaderBytecode = s_copy_geometry_shader_blob->GetBufferPointer(); + } + + return bytecode; +} + +void StaticShaderCache::Init() +{ + // Compile static pixel shaders + D3D::CompilePixelShader(s_clear_program_hlsl, &s_clear_program_blob); + D3D::CompilePixelShader(s_anaglyph_program_hlsl, &s_anaglyph_program_blob); + D3D::CompilePixelShader(s_color_copy_program_hlsl, &s_color_copy_program_blob[0]); + D3D::CompilePixelShader(s_depth_copy_program_hlsl, &s_depth_copy_program_blob[0]); + D3D::CompilePixelShader(s_color_matrix_program_hlsl, &s_color_matrix_program_blob[0]); + D3D::CompilePixelShader(s_depth_matrix_program_hlsl, &s_depth_matrix_program_blob[0]); + + // Compile static vertex shaders + D3D::CompileVertexShader(s_simple_vertex_shader_hlsl, &s_simple_vertex_shader_blob); + D3D::CompileVertexShader(s_clear_vertex_shader_hlsl, &s_simple_clear_vertex_shader_blob); + + // Compile static geometry shaders + D3D::CompileGeometryShader(s_clear_geometry_shader_hlsl, &s_clear_geometry_shader_blob); + D3D::CompileGeometryShader(s_copy_geometry_shader_hlsl, &s_copy_geometry_shader_blob); +} + +// Call this when multisampling mode changes, and shaders need to be regenerated. +void StaticShaderCache::InvalidateMSAAShaders() +{ + SAFE_RELEASE(s_color_copy_program_blob[1]); + SAFE_RELEASE(s_color_matrix_program_blob[1]); + SAFE_RELEASE(s_depth_matrix_program_blob[1]); + SAFE_RELEASE(s_rgb8_to_rgba6_program_blob[1]); + SAFE_RELEASE(s_rgba6_to_rgb8_program_blob[1]); +} + +void StaticShaderCache::Shutdown() +{ + // Free pixel shader blobs + + SAFE_RELEASE(s_clear_program_blob); + SAFE_RELEASE(s_anaglyph_program_blob); + + for (unsigned int i = 0; i < 2; ++i) + { + SAFE_RELEASE(s_color_copy_program_blob[i]); + SAFE_RELEASE(s_color_matrix_program_blob[i]); + SAFE_RELEASE(s_depth_matrix_program_blob[i]); + SAFE_RELEASE(s_rgba6_to_rgb8_program_blob[i]); + SAFE_RELEASE(s_rgb8_to_rgba6_program_blob[i]); + } + + // Free vertex shader blobs + + SAFE_RELEASE(s_simple_vertex_shader_blob); + SAFE_RELEASE(s_simple_clear_vertex_shader_blob); + + // Free geometry shader blobs + + SAFE_RELEASE(s_clear_geometry_shader_blob); + SAFE_RELEASE(s_copy_geometry_shader_blob); +} + +} \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h new file mode 100644 index 0000000000..bbdb37cb9e --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h @@ -0,0 +1,38 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +namespace DX12 +{ + +class StaticShaderCache final +{ +public: + static void Init(); + static void InvalidateMSAAShaders(); + static void Shutdown(); + + // Pixel shaders + static D3D12_SHADER_BYTECODE GetColorMatrixPixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetColorCopyPixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetDepthMatrixPixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetDepthCopyPixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetClearPixelShader(); + static D3D12_SHADER_BYTECODE GetAnaglyphPixelShader(); + static D3D12_SHADER_BYTECODE GetReinterpRGBA6ToRGB8PixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetReinterpRGB8ToRGBA6PixelShader(bool multisampled); + + // Vertex shaders + static D3D12_SHADER_BYTECODE GetSimpleVertexShader(); + static D3D12_SHADER_BYTECODE GetClearVertexShader(); + static D3D12_INPUT_LAYOUT_DESC GetSimpleVertexShaderInputLayout(); + static D3D12_INPUT_LAYOUT_DESC GetClearVertexShaderInputLayout(); + + // Geometry shaders + static D3D12_SHADER_BYTECODE GetClearGeometryShader(); + static D3D12_SHADER_BYTECODE GetCopyGeometryShader(); +}; + +} \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/Television.cpp b/Source/Core/VideoBackends/D3D12/Television.cpp new file mode 100644 index 0000000000..548859a1ac --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/Television.cpp @@ -0,0 +1,45 @@ +// Copyright 2011 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Core/HW/Memmap.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DShader.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/Television.h" +#include "VideoCommon/VideoConfig.h" + +// D3D12TODO: Add DX12 path for this file. + +namespace DX12 +{ + +Television::Television() +{ + // D3D12TODO: Add DX12 path for this file. +} + +void Television::Init() +{ + // D3D12TODO: Add DX12 path for this file. +} + +void Television::Shutdown() +{ + // D3D12TODO: Add DX12 path for this file. +} + +void Television::Submit(u32 xfb_address, u32 stride, u32 width, u32 height) +{ + // D3D12TODO: Add DX12 path for this file. +} + +void Television::Render() +{ + // D3D12TODO: Add DX12 path for this file. +} + +} diff --git a/Source/Core/VideoBackends/D3D12/Television.h b/Source/Core/VideoBackends/D3D12/Television.h new file mode 100644 index 0000000000..1bfbbb7c7d --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/Television.h @@ -0,0 +1,37 @@ +// Copyright 2011 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoCommon/VideoCommon.h" + +// D3D12TODO: Add DX12 path for this file. + +namespace DX12 +{ + +class Television final +{ + +public: + + Television(); + + void Init(); + void Shutdown(); + + // Submit video data to be drawn. This will change the current state of the + // TV. xfbAddr points to YUYV data stored in GameCube/Wii RAM, but the XFB + // may be virtualized when rendering so the RAM may not actually be read. + void Submit(u32 xfb_address, u32 stride, u32 width, u32 height); + + // Render the current state of the TV. + void Render(); + +private: + + +}; + +} diff --git a/Source/Core/VideoBackends/D3D12/TextureCache.cpp b/Source/Core/VideoBackends/D3D12/TextureCache.cpp new file mode 100644 index 0000000000..cafe0dc651 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/TextureCache.cpp @@ -0,0 +1,672 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h" +#include "VideoBackends/D3D12/D3DShader.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DStreamBuffer.h" +#include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/FramebufferManager.h" +#include "VideoBackends/D3D12/PSTextureEncoder.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" +#include "VideoBackends/D3D12/TextureCache.h" +#include "VideoBackends/D3D12/TextureEncoder.h" +#include "VideoCommon/ImageWrite.h" +#include "VideoCommon/LookUpTables.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +static std::unique_ptr s_encoder = nullptr; + +static std::unique_ptr s_efb_copy_stream_buffer = nullptr; + +static ID3D12Resource* s_texture_cache_entry_readback_buffer = nullptr; +static void* s_texture_cache_entry_readback_buffer_data = nullptr; +static UINT s_texture_cache_entry_readback_buffer_size = 0; + +TextureCache::TCacheEntry::~TCacheEntry() +{ + m_texture->Release(); +} + +void TextureCache::TCacheEntry::Bind(unsigned int stage) +{ + // Textures bound as group in TextureCache::BindTextures method. +} + +bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level) +{ + // EXISTINGD3D11TODO: Somehow implement this (D3DX11 doesn't support dumping individual LODs) + static bool warn_once = true; + if (level && warn_once) + { + WARN_LOG(VIDEO, "Dumping individual LOD not supported by D3D12 backend!"); + warn_once = false; + return false; + } + + D3D12_RESOURCE_DESC texture_desc = m_texture->GetTex12()->GetDesc(); + + const unsigned int required_readback_buffer_size = D3D::AlignValue(static_cast(texture_desc.Width) * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + if (s_texture_cache_entry_readback_buffer_size < required_readback_buffer_size) + { + s_texture_cache_entry_readback_buffer_size = required_readback_buffer_size; + + // We know the readback buffer won't be in use right now, since we wait on this thread + // for the GPU to finish execution right after copying to it. + + SAFE_RELEASE(s_texture_cache_entry_readback_buffer); + } + + if (!s_texture_cache_entry_readback_buffer_size) + { + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(s_texture_cache_entry_readback_buffer_size), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer) + ) + ); + + CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &s_texture_cache_entry_readback_buffer_data)); + } + + bool saved_png = false; + + m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + + D3D12_TEXTURE_COPY_LOCATION dst_location = {}; + dst_location.pResource = s_texture_cache_entry_readback_buffer; + dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dst_location.PlacedFootprint.Offset = 0; + dst_location.PlacedFootprint.Footprint.Depth = 1; + dst_location.PlacedFootprint.Footprint.Format = texture_desc.Format; + dst_location.PlacedFootprint.Footprint.Width = static_cast(texture_desc.Width); + dst_location.PlacedFootprint.Footprint.Height = texture_desc.Height; + dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0); + + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); + + D3D::command_list_mgr->ExecuteQueuedWork(true); + + saved_png = TextureToPng( + static_cast(s_texture_cache_entry_readback_buffer_data), + dst_location.PlacedFootprint.Footprint.RowPitch, + filename, + dst_location.PlacedFootprint.Footprint.Width, + dst_location.PlacedFootprint.Footprint.Height + ); + + return saved_png; +} + +void TextureCache::TCacheEntry::CopyRectangleFromTexture( + const TCacheEntryBase* source, + const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& dst_rect) +{ + const TCacheEntry* srcentry = reinterpret_cast(source); + if (src_rect.GetWidth() == dst_rect.GetWidth() + && src_rect.GetHeight() == dst_rect.GetHeight()) + { + const D3D12_BOX* src_box_pointer = nullptr; + D3D12_BOX src_box; + if (src_rect.left != 0 || src_rect.top != 0) + { + src_box.front = 0; + src_box.back = 1; + src_box.left = src_rect.left; + src_box.top = src_rect.top; + src_box.right = src_rect.right; + src_box.bottom = src_rect.bottom; + src_box_pointer = &src_box; + } + + if (static_cast(src_rect.GetHeight()) > config.height || + static_cast(src_rect.GetWidth()) > config.width) + { + // To mimic D3D11 behavior, we're just going to drop the clear since it is invalid. + // This invalid copy needs to be fixed above the Backend level. + + // On D3D12, instead of silently dropping this invalid clear, the runtime throws an exception + // so we need to filter it out ourselves. + + return; + } + + D3D12_TEXTURE_COPY_LOCATION dst_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0); + D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(srcentry->m_texture->GetTex12(), 0); + + m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_DEST); + srcentry->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + + D3D::current_command_list->CopyTextureRegion(&dst_location, dst_rect.left, dst_rect.top, 0, &src_location, src_box_pointer); + + m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + srcentry->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + return; + } + else if (!config.rendertarget) + { + return; + } + + const D3D12_VIEWPORT vp = { + float(dst_rect.left), + float(dst_rect.top), + float(dst_rect.GetWidth()), + float(dst_rect.GetHeight()), + D3D12_MIN_DEPTH, + D3D12_MAX_DEPTH + }; + D3D::current_command_list->RSSetViewports(1, &vp); + + m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr); + + D3D::SetLinearCopySampler(); + + D3D12_RECT src_rc; + src_rc.left = src_rect.left; + src_rc.right = src_rect.right; + src_rc.top = src_rect.top; + src_rc.bottom = src_rect.bottom; + + D3D::DrawShadedTexQuad(srcentry->m_texture, &src_rc, + srcentry->config.width, srcentry->config.height, + StaticShaderCache::GetColorCopyPixelShader(false), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), 1.0, 0, + DXGI_FORMAT_R8G8B8A8_UNORM, false, m_texture->GetMultisampled()); + + m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + g_renderer->RestoreAPIState(); +} + +void TextureCache::TCacheEntry::Load(unsigned int width, unsigned int height, + unsigned int expanded_width, unsigned int level) +{ + unsigned int src_pitch = 4 * expanded_width; + D3D::ReplaceRGBATexture2D(m_texture->GetTex12(), TextureCache::temp, width, height, src_pitch, level, m_texture->GetResourceUsageState()); +} + +TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConfig& config) +{ + if (config.rendertarget) + { + D3DTexture2D* texture = D3DTexture2D::Create(config.width, config.height, + static_cast((static_cast(D3D11_BIND_RENDER_TARGET) | static_cast(D3D11_BIND_SHADER_RESOURCE))), + D3D11_USAGE_DEFAULT, DXGI_FORMAT_R8G8B8A8_UNORM, 1, config.layers); + + TCacheEntry* entry = new TCacheEntry(config, texture); + + entry->m_texture_srv_cpu_handle = texture->GetSRV12CPU(); + entry->m_texture_srv_gpu_handle = texture->GetSRV12GPU(); + entry->m_texture_srv_gpu_handle_cpu_shadow = texture->GetSRV12GPUCPUShadow(); + + return entry; + } + else + { + ID3D12Resource* texture_resource = nullptr; + + D3D12_RESOURCE_DESC texture_resource_desc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, + config.width, config.height, 1, config.levels); + + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC(texture_resource_desc), + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + nullptr, + IID_PPV_ARGS(&texture_resource) + ) + ); + + D3DTexture2D* texture = new D3DTexture2D( + texture_resource, + D3D11_BIND_SHADER_RESOURCE, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + false, + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + ); + + TCacheEntry* const entry = new TCacheEntry( + config, texture + ); + + entry->m_texture_srv_cpu_handle = texture->GetSRV12CPU(); + entry->m_texture_srv_gpu_handle = texture->GetSRV12GPU(); + entry->m_texture_srv_gpu_handle_cpu_shadow = texture->GetSRV12GPUCPUShadow(); + + // EXISTINGD3D11TODO: better debug names + D3D::SetDebugObjectName12(entry->m_texture->GetTex12(), "a texture of the TextureCache"); + + SAFE_RELEASE(texture_resource); + + return entry; + } +} + +void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat src_format, const EFBRectangle& srcRect, + bool scale_by_half, unsigned int cbuf_id, const float* colmat) +{ + static unsigned int old_cbuf_id = UINT_MAX; + + // When copying at half size, in multisampled mode, resolve the color/depth buffer first. + // This is because multisampled texture reads go through Load, not Sample, and the linear + // filter is ignored. + bool multisampled = (g_ActiveConfig.iMultisamples > 1); + D3DTexture2D* efb_tex = (src_format == PEControl::Z24) ? + FramebufferManager::GetEFBDepthTexture() : + FramebufferManager::GetEFBColorTexture(); + if (multisampled && scale_by_half) + { + multisampled = false; + efb_tex = (src_format == PEControl::Z24) ? + FramebufferManager::GetResolvedEFBDepthTexture() : + FramebufferManager::GetResolvedEFBColorTexture(); + } + + // stretch picture with increased internal resolution + const D3D12_VIEWPORT vp = { + 0.f, + 0.f, + static_cast(config.width), + static_cast(config.height), + D3D12_MIN_DEPTH, + D3D12_MAX_DEPTH + }; + + D3D::current_command_list->RSSetViewports(1, &vp); + + // set transformation + if (cbuf_id != old_cbuf_id) + { + s_efb_copy_stream_buffer->AllocateSpaceInBuffer(28 * sizeof(float), 256); + + memcpy(s_efb_copy_stream_buffer->GetCPUAddressOfCurrentAllocation(), colmat, 28 * sizeof(float)); + + old_cbuf_id = cbuf_id; + } + + D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_efb_copy_stream_buffer->GetGPUAddressOfCurrentAllocation()); + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); + + const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect); + // EXISTINGD3D11TODO: try targetSource.asRECT(); + const D3D12_RECT sourcerect = CD3DX12_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom); + + // Use linear filtering if (bScaleByHalf), use point filtering otherwise + if (scale_by_half) + D3D::SetLinearCopySampler(); + else + D3D::SetPointCopySampler(); + + // Make sure we don't draw with the texture set as both a source and target. + // (This can happen because we don't unbind textures when we free them.) + + m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr); + + // Create texture copy + D3D::DrawShadedTexQuad( + efb_tex, + &sourcerect, + Renderer::GetTargetWidth(), + Renderer::GetTargetHeight(), + (src_format == PEControl::Z24) ? StaticShaderCache::GetDepthMatrixPixelShader(multisampled) : StaticShaderCache::GetColorMatrixPixelShader(multisampled), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + StaticShaderCache::GetCopyGeometryShader(), + 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, m_texture->GetMultisampled() + ); + + m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + g_renderer->RestoreAPIState(); +} + +void TextureCache::CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, + bool isIntensity, bool scaleByHalf) +{ + s_encoder->Encode(dst, format, native_width, bytes_per_row, num_blocks_y, memory_stride, srcFormat, srcRect, isIntensity, scaleByHalf); +} + +static const constexpr char s_palette_shader_hlsl[] = +R"HLSL( +sampler samp0 : register(s0); +Texture2DArray Tex0 : register(t0); +Buffer Tex1 : register(t1); +uniform float Multiply; + +uint Convert3To8(uint v) +{ + // Swizzle bits: 00000123 -> 12312312 + return (v << 5) | (v << 2) | (v >> 1); +} + +uint Convert4To8(uint v) +{ + // Swizzle bits: 00001234 -> 12341234 + return (v << 4) | v; +} + +uint Convert5To8(uint v) +{ + // Swizzle bits: 00012345 -> 12345123 + return (v << 3) | (v >> 2); +} + +uint Convert6To8(uint v) +{ + // Swizzle bits: 00123456 -> 12345612 + return (v << 2) | (v >> 4); +} + +float4 DecodePixel_RGB5A3(uint val) +{ + int r,g,b,a; + if ((val&0x8000)) + { + r=Convert5To8((val>>10) & 0x1f); + g=Convert5To8((val>>5 ) & 0x1f); + b=Convert5To8((val ) & 0x1f); + a=0xFF; + } + else + { + a=Convert3To8((val>>12) & 0x7); + r=Convert4To8((val>>8 ) & 0xf); + g=Convert4To8((val>>4 ) & 0xf); + b=Convert4To8((val ) & 0xf); + } + return float4(r, g, b, a) / 255; +} + +float4 DecodePixel_RGB565(uint val) +{ + int r, g, b, a; + r = Convert5To8((val >> 11) & 0x1f); + g = Convert6To8((val >> 5) & 0x3f); + b = Convert5To8((val) & 0x1f); + a = 0xFF; + return float4(r, g, b, a) / 255; +} + +float4 DecodePixel_IA8(uint val) +{ + int i = val & 0xFF; + int a = val >> 8; + return float4(i, i, i, a) / 255; +} + +void main( + out float4 ocol0 : SV_Target, + in float4 pos : SV_Position, + in float3 uv0 : TEXCOORD0) +{ + uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r; + src = Tex1.Load(src); + src = ((src << 8) & 0xFF00) | (src >> 8); + ocol0 = DECODE(src); +} +)HLSL"; + +void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) +{ + // stretch picture with increased internal resolution + const D3D12_VIEWPORT vp = { 0.f, 0.f, static_cast(unconverted->config.width), static_cast(unconverted->config.height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::current_command_list->RSSetViewports(1, &vp); + + const unsigned int palette_buffer_allocation_size = 512; + m_palette_stream_buffer->AllocateSpaceInBuffer(palette_buffer_allocation_size, 256); + memcpy(m_palette_stream_buffer->GetCPUAddressOfCurrentAllocation(), palette, palette_buffer_allocation_size); + + // D3D12: Because the second SRV slot is occupied by this buffer, and an arbitrary texture occupies the first SRV slot, + // we need to allocate temporary space out of our descriptor heap, place the palette SRV in the second slot, then copy the + // existing texture's descriptor into the first slot. + + // First, allocate the (temporary) space in the descriptor heap. + D3D12_CPU_DESCRIPTOR_HANDLE srv_group_cpu_handle[2] = {}; + D3D12_GPU_DESCRIPTOR_HANDLE srv_group_gpu_handle[2] = {}; + D3D::gpu_descriptor_heap_mgr->AllocateGroup(srv_group_cpu_handle, 2, srv_group_gpu_handle, nullptr, true); + + srv_group_cpu_handle[1].ptr = srv_group_cpu_handle[0].ptr + D3D::resource_descriptor_size; + + // Now, create the palette SRV at the appropriate offset. + D3D12_SHADER_RESOURCE_VIEW_DESC palette_buffer_srv_desc = { + DXGI_FORMAT_R16_UINT, // DXGI_FORMAT Format; + D3D12_SRV_DIMENSION_BUFFER, // D3D12_SRV_DIMENSION ViewDimension; + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING // UINT Shader4ComponentMapping; + }; + + // Each 'element' is two bytes since format is R16. + palette_buffer_srv_desc.Buffer.FirstElement = m_palette_stream_buffer->GetOffsetOfCurrentAllocation() / sizeof(u16); + palette_buffer_srv_desc.Buffer.NumElements = 256; + + D3D::device12->CreateShaderResourceView(m_palette_stream_buffer->GetBuffer(), &palette_buffer_srv_desc, srv_group_cpu_handle[1]); + + // Now, copy the existing texture's descriptor into the new temporary location. + static_cast(unconverted)->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + D3D::device12->CopyDescriptorsSimple( + 1, + srv_group_cpu_handle[0], + static_cast(unconverted)->m_texture->GetSRV12GPUCPUShadow(), + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + ); + + // Finally, bind our temporary location. + D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, srv_group_gpu_handle[0]); + + // D3D11EXISTINGTODO: Add support for C14X2 format. (Different multiplier, more palette entries.) + + // D3D12: See TextureCache::TextureCache() - because there are only two possible buffer contents here, + // just pre-populate the data in two parts of the same upload heap. + if ((unconverted->format & 0xf) == GX_TF_I4) + { + D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, m_palette_uniform_buffer->GetGPUVirtualAddress()); + } + else + { + D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, m_palette_uniform_buffer->GetGPUVirtualAddress() + 256); + } + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); + + const D3D12_RECT source_rect = CD3DX12_RECT(0, 0, unconverted->config.width, unconverted->config.height); + + D3D::SetPointCopySampler(); + + // Make sure we don't draw with the texture set as both a source and target. + // (This can happen because we don't unbind textures when we free them.) + + static_cast(entry)->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D::current_command_list->OMSetRenderTargets(1, &static_cast(entry)->m_texture->GetRTV12(), FALSE, nullptr); + + // Create texture copy + D3D::DrawShadedTexQuad( + static_cast(unconverted)->m_texture, + &source_rect, unconverted->config.width, + unconverted->config.height, + m_palette_pixel_shaders[format], + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + StaticShaderCache::GetCopyGeometryShader(), + 1.0f, + 0, + DXGI_FORMAT_R8G8B8A8_UNORM, + true, + static_cast(entry)->m_texture->GetMultisampled() + ); + + static_cast(entry)->m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + + g_renderer->RestoreAPIState(); +} + +D3D12_SHADER_BYTECODE GetConvertShader12(std::string& Type) +{ + std::string shader = "#define DECODE DecodePixel_"; + shader.append(Type); + shader.append("\n"); + shader.append(s_palette_shader_hlsl); + + ID3DBlob* blob = nullptr; + D3D::CompilePixelShader(shader, &blob); + + return { blob->GetBufferPointer(), blob->GetBufferSize() }; +} + +TextureCache::TextureCache() +{ + s_encoder = std::make_unique(); + s_encoder->Init(); + + s_efb_copy_stream_buffer = std::make_unique(1024 * 1024, 1024 * 1024, nullptr); + + s_texture_cache_entry_readback_buffer = nullptr; + s_texture_cache_entry_readback_buffer_data = nullptr; + s_texture_cache_entry_readback_buffer_size = 0; + + m_palette_pixel_shaders[GX_TL_IA8] = GetConvertShader12(std::string("IA8")); + m_palette_pixel_shaders[GX_TL_RGB565] = GetConvertShader12(std::string("RGB565")); + m_palette_pixel_shaders[GX_TL_RGB5A3] = GetConvertShader12(std::string("RGB5A3")); + + m_palette_stream_buffer = new D3DStreamBuffer(sizeof(u16) * 256 * 1024, sizeof(u16) * 256 * 1024 * 16, nullptr); + + // Right now, there are only two variants of palette_uniform data. So, we'll just create an upload heap to permanently store both of these. + CheckHR( + D3D::device12->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(((16 + 255) & ~255) * 2), // Constant Buffers have to be 256b aligned. "* 2" to create for two sets of data. + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_palette_uniform_buffer) + ) + ); + + D3D::SetDebugObjectName12(m_palette_uniform_buffer, "a constant buffer used in TextureCache::ConvertTexture"); + + // Temporarily repurpose m_palette_stream_buffer as a copy source to populate initial data here. + m_palette_stream_buffer->AllocateSpaceInBuffer(256 * 2, 256); + + u8* upload_heap_data_location = reinterpret_cast(m_palette_stream_buffer->GetCPUAddressOfCurrentAllocation()); + + memset(upload_heap_data_location, 0, 256 * 2); + + float paramsFormatZero[4] = { 15.f }; + float paramsFormatNonzero[4] = { 255.f }; + + memcpy(upload_heap_data_location, paramsFormatZero, sizeof(paramsFormatZero)); + memcpy(upload_heap_data_location + 256, paramsFormatNonzero, sizeof(paramsFormatNonzero)); + + D3D::current_command_list->CopyBufferRegion(m_palette_uniform_buffer, 0, m_palette_stream_buffer->GetBuffer(), m_palette_stream_buffer->GetOffsetOfCurrentAllocation(), 256 * 2); + + DX12::D3D::ResourceBarrier(D3D::current_command_list, m_palette_uniform_buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, 0); +} + +TextureCache::~TextureCache() +{ + s_encoder->Shutdown(); + s_encoder.reset(); + + s_efb_copy_stream_buffer.reset(); + + SAFE_DELETE(m_palette_stream_buffer); + + if (s_texture_cache_entry_readback_buffer) + { + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_texture_cache_entry_readback_buffer); + s_texture_cache_entry_readback_buffer = nullptr; + } + + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_palette_uniform_buffer); +} + +void TextureCache::BindTextures() +{ + unsigned int last_texture = 0; + for (unsigned int i = 0; i < 8; ++i) + { + if (bound_textures[i] != nullptr) + { + last_texture = i; + } + } + + if (last_texture == 0 && bound_textures[0] != nullptr) + { + DX12::D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, reinterpret_cast(bound_textures[0])->m_texture_srv_gpu_handle); + return; + } + + // If more than one texture, allocate space for group. + D3D12_CPU_DESCRIPTOR_HANDLE s_group_base_texture_cpu_handle; + D3D12_GPU_DESCRIPTOR_HANDLE s_group_base_texture_gpu_handle; + DX12::D3D::gpu_descriptor_heap_mgr->AllocateGroup(&s_group_base_texture_cpu_handle, 8, &s_group_base_texture_gpu_handle, nullptr, true); + + for (unsigned int stage = 0; stage <= last_texture; stage++) + { + if (bound_textures[stage] != nullptr) + { + D3D12_CPU_DESCRIPTOR_HANDLE textureDestDescriptor; + textureDestDescriptor.ptr = s_group_base_texture_cpu_handle.ptr + stage * D3D::resource_descriptor_size; + + DX12::D3D::device12->CopyDescriptorsSimple( + 1, + textureDestDescriptor, + reinterpret_cast(bound_textures[stage])->m_texture_srv_gpu_handle_cpu_shadow, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + ); + } + else + { + D3D12_CPU_DESCRIPTOR_HANDLE nullDestDescriptor; + nullDestDescriptor.ptr = s_group_base_texture_cpu_handle.ptr + stage * D3D::resource_descriptor_size; + + DX12::D3D::device12->CopyDescriptorsSimple( + 1, + nullDestDescriptor, + DX12::D3D::null_srv_cpu_shadow, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + ); + } + } + + // Actually bind the textures. + DX12::D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, s_group_base_texture_gpu_handle); +} + +} diff --git a/Source/Core/VideoBackends/D3D12/TextureCache.h b/Source/Core/VideoBackends/D3D12/TextureCache.h new file mode 100644 index 0000000000..b9ef56fd98 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/TextureCache.h @@ -0,0 +1,68 @@ +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoBackends/D3D12/D3DTexture.h" +#include "VideoCommon/TextureCacheBase.h" + +namespace DX12 +{ + +class D3DStreamBuffer; + +class TextureCache final : public TextureCacheBase +{ +public: + TextureCache(); + ~TextureCache(); + + virtual void BindTextures(); + +private: + struct TCacheEntry : TCacheEntryBase + { + D3DTexture2D* const m_texture = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE m_texture_srv_cpu_handle = {}; + D3D12_GPU_DESCRIPTOR_HANDLE m_texture_srv_gpu_handle = {}; + D3D12_CPU_DESCRIPTOR_HANDLE m_texture_srv_gpu_handle_cpu_shadow = {}; + + TCacheEntry(const TCacheEntryConfig& config, D3DTexture2D* tex) : TCacheEntryBase(config), m_texture(tex) {} + ~TCacheEntry(); + + void CopyRectangleFromTexture( + const TCacheEntryBase* source, + const MathUtil::Rectangle &src_rect, + const MathUtil::Rectangle &dst_rect) override; + + void Load(unsigned int width, unsigned int height, + unsigned int expanded_width, unsigned int levels) override; + + void FromRenderTarget(u8* dst, PEControl::PixelFormat src_format, const EFBRectangle& src_rect, + bool scale_by_half, unsigned int cbuf_id, const float* colmat) override; + + void Bind(unsigned int stage) override; + bool Save(const std::string& filename, unsigned int level) override; + }; + + TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) override; + + u64 EncodeToRamFromTexture(u32 address, void* source_texture, u32 source_width, u32 source_height, bool is_from_z_buffer, bool is_intensity_format, u32 copy_format, int scale_by_half, const EFBRectangle& source) {return 0;}; + + void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) override; + + void CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + PEControl::PixelFormat src_format, const EFBRectangle& src_rect, + bool is_intensity, bool scale_by_half) override; + + void CompileShaders() override { } + void DeleteShaders() override { } + + D3DStreamBuffer* m_palette_stream_buffer = nullptr; + + ID3D12Resource* m_palette_uniform_buffer = nullptr; + D3D12_SHADER_BYTECODE m_palette_pixel_shaders[3] = {}; +}; + +} diff --git a/Source/Core/VideoBackends/D3D12/TextureEncoder.h b/Source/Core/VideoBackends/D3D12/TextureEncoder.h new file mode 100644 index 0000000000..346f87d79f --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/TextureEncoder.h @@ -0,0 +1,32 @@ +// Copyright 2011 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoCommon/BPMemory.h" +#include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/VideoCommon.h" + +namespace DX12 +{ +// Maximum number of bytes that can occur in a texture block-row generated by +// the encoder +static constexpr unsigned int MAX_BYTES_PER_BLOCK_ROW = (EFB_WIDTH / 4) * 64; +// The maximum amount of data that the texture encoder can generate in one call +static constexpr unsigned int MAX_BYTES_PER_ENCODE = MAX_BYTES_PER_BLOCK_ROW * (EFB_HEIGHT / 4); + +class TextureEncoder +{ +public: + virtual ~TextureEncoder() { } + + virtual void Init() = 0; + virtual void Shutdown() = 0; + // Returns size in bytes of encoded block of memory + virtual void Encode(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + PEControl::PixelFormat src_format, const EFBRectangle& src_rect, + bool is_intensity, bool scale_by_half) = 0; +}; + +} diff --git a/Source/Core/VideoBackends/D3D12/VertexManager.cpp b/Source/Core/VideoBackends/D3D12/VertexManager.cpp new file mode 100644 index 0000000000..a385dde95f --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/VertexManager.cpp @@ -0,0 +1,218 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/CommonTypes.h" + +#include "VideoBackends/D3D12/BoundingBox.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DStreamBuffer.h" +#include "VideoBackends/D3D12/FramebufferManager.h" +#include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/ShaderCache.h" +#include "VideoBackends/D3D12/VertexManager.h" + +#include "VideoCommon/BoundingBox.h" +#include "VideoCommon/Debugger.h" +#include "VideoCommon/IndexGenerator.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +static constexpr unsigned int MAX_IBUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 16; +static constexpr unsigned int MAX_VBUFFER_SIZE = VertexManager::MAXVBUFFERSIZE * 4; + +void VertexManager::SetIndexBuffer() +{ + D3D12_INDEX_BUFFER_VIEW ib_view = { + m_index_stream_buffer->GetBaseGPUAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; + m_index_stream_buffer->GetSize(), // UINT SizeInBytes; + DXGI_FORMAT_R16_UINT // DXGI_FORMAT Format; + }; + + D3D::current_command_list->IASetIndexBuffer(&ib_view); +} + +void VertexManager::CreateDeviceObjects() +{ + m_vertex_draw_offset = 0; + m_index_draw_offset = 0; + + m_vertex_stream_buffer = new D3DStreamBuffer(VertexManager::MAXVBUFFERSIZE * 2, MAX_VBUFFER_SIZE, &m_vertex_stream_buffer_reallocated); + m_index_stream_buffer = new D3DStreamBuffer(VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 2, VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 16, &m_index_stream_buffer_reallocated); + + SetIndexBuffer(); + + // Use CPU-only memory if the GPU won't be reading from the buffers, + // since reading upload heaps on the CPU is slow.. + m_vertex_cpu_buffer.resize(MAXVBUFFERSIZE); + m_index_cpu_buffer.resize(MAXIBUFFERSIZE); +} + +void VertexManager::DestroyDeviceObjects() +{ + SAFE_DELETE(m_vertex_stream_buffer); + SAFE_DELETE(m_index_stream_buffer); + + m_vertex_cpu_buffer.clear(); + m_index_cpu_buffer.clear(); +} + +VertexManager::VertexManager() +{ + CreateDeviceObjects(); +} + +VertexManager::~VertexManager() +{ + DestroyDeviceObjects(); +} + +void VertexManager::PrepareDrawBuffers(u32 stride) +{ + u32 vertex_data_size = IndexGenerator::GetNumVerts() * stride; + u32 index_data_size = IndexGenerator::GetIndexLen() * sizeof(u16); + + m_vertex_stream_buffer->OverrideSizeOfPreviousAllocation(vertex_data_size); + m_index_stream_buffer->OverrideSizeOfPreviousAllocation(index_data_size); + + ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertex_data_size); + ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size); +} + +void VertexManager::Draw(u32 stride) +{ + static u32 s_previous_stride = UINT_MAX; + + u32 indices = IndexGenerator::GetIndexLen(); + + if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER) || s_previous_stride != stride) + { + D3D12_VERTEX_BUFFER_VIEW vb_view = { + m_vertex_stream_buffer->GetBaseGPUAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation; + m_vertex_stream_buffer->GetSize(), // UINT SizeInBytes; + stride // UINT StrideInBytes; + }; + + D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view); + + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, false); + s_previous_stride = stride; + } + + D3D_PRIMITIVE_TOPOLOGY d3d_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + + switch (current_primitive_type) + { + case PRIMITIVE_POINTS: + d3d_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + break; + case PRIMITIVE_LINES: + d3d_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; + break; + } + + if (D3D::command_list_mgr->GetCommandListPrimitiveTopology() != d3d_primitive_topology) + { + D3D::current_command_list->IASetPrimitiveTopology(d3d_primitive_topology); + D3D::command_list_mgr->SetCommandListPrimitiveTopology(d3d_primitive_topology); + } + + u32 base_vertex = m_vertex_draw_offset / stride; + u32 start_index = m_index_draw_offset / sizeof(u16); + + D3D::current_command_list->DrawIndexedInstanced(indices, 1, start_index, base_vertex, 0); + + INCSTAT(stats.thisFrame.numDrawCalls); +} + +void VertexManager::vFlush(bool use_dst_alpha) +{ + ShaderCache::LoadAndSetActiveShaders(use_dst_alpha ? DSTALPHA_DUAL_SOURCE_BLEND : DSTALPHA_NONE, current_primitive_type); + + if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active) + { + // D3D12TODO: Support GPU-side bounding box. + // D3D::context->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 2, 1, &BBox::GetUAV(), nullptr); + } + + u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(); + + PrepareDrawBuffers(stride); + + g_renderer->ApplyState(use_dst_alpha); + + Draw(stride); + + D3D::command_list_mgr->m_draws_since_last_execution++; + + // Many Gamecube/Wii titles read from the EFB each frame to determine what new rendering work to submit, e.g. where sun rays are + // occluded and where they aren't. When the CPU wants to read this data (done in Renderer::AccessEFB), it requires that the GPU + // finish all oustanding work. As an optimization, when we detect that the CPU is likely to read back data this frame, we break + // up the rendering work and submit it more frequently to the GPU (via ExecuteCommandList). Thus, when the CPU finally needs the + // the GPU to finish all of its work, there is (hopefully) less work outstanding to wait on at that moment. + + // D3D12TODO: Decide right threshold for drawCountSinceAsyncFlush at runtime depending on + // amount of stall measured in AccessEFB. + + if (D3D::command_list_mgr->m_draws_since_last_execution > 100 && D3D::command_list_mgr->m_cpu_access_last_frame) + { + D3D::command_list_mgr->m_draws_since_last_execution = 0; + + D3D::command_list_mgr->ExecuteQueuedWork(); + + g_renderer->SetViewport(); + + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + } +} + +void VertexManager::ResetBuffer(u32 stride) +{ + if (s_cull_all) + { + s_pCurBufferPointer = m_vertex_cpu_buffer.data(); + s_pBaseBufferPointer = m_vertex_cpu_buffer.data(); + s_pEndBufferPointer = m_vertex_cpu_buffer.data() + MAXVBUFFERSIZE; + + IndexGenerator::Start(reinterpret_cast(m_index_cpu_buffer.data())); + return; + } + + bool command_list_executed = m_vertex_stream_buffer->AllocateSpaceInBuffer(MAXVBUFFERSIZE, stride); + + if (m_vertex_stream_buffer_reallocated) + { + D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true); + m_vertex_stream_buffer_reallocated = false; + } + + s_pBaseBufferPointer = static_cast(m_vertex_stream_buffer->GetBaseCPUAddress()); + s_pEndBufferPointer = s_pBaseBufferPointer + m_vertex_stream_buffer->GetSize(); + s_pCurBufferPointer = static_cast(m_vertex_stream_buffer->GetCPUAddressOfCurrentAllocation()); + m_vertex_draw_offset = m_vertex_stream_buffer->GetOffsetOfCurrentAllocation(); + + command_list_executed |= m_index_stream_buffer->AllocateSpaceInBuffer(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16)); + if (command_list_executed) + { + g_renderer->SetViewport(); + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + } + + if (m_index_stream_buffer_reallocated) + { + SetIndexBuffer(); + m_index_stream_buffer_reallocated = false; + } + + m_index_draw_offset = m_index_stream_buffer->GetOffsetOfCurrentAllocation(); + IndexGenerator::Start(reinterpret_cast(m_index_stream_buffer->GetCPUAddressOfCurrentAllocation())); +} + +} // namespace diff --git a/Source/Core/VideoBackends/D3D12/VertexManager.h b/Source/Core/VideoBackends/D3D12/VertexManager.h new file mode 100644 index 0000000000..d7e7394f6b --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/VertexManager.h @@ -0,0 +1,47 @@ +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoCommon/VertexManagerBase.h" + +namespace DX12 +{ + +class D3DStreamBuffer; + +class VertexManager final : public VertexManagerBase +{ +public: + VertexManager(); + ~VertexManager(); + + NativeVertexFormat* CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + void CreateDeviceObjects() override; + void DestroyDeviceObjects() override; + + void SetIndexBuffer(); + +protected: + void ResetBuffer(u32 stride) override; + +private: + void PrepareDrawBuffers(u32 stride); + void Draw(u32 stride); + void vFlush(bool use_dst_alpha) override; + + u32 m_vertex_draw_offset; + u32 m_index_draw_offset; + + D3DStreamBuffer* m_vertex_stream_buffer = nullptr; + D3DStreamBuffer* m_index_stream_buffer = nullptr; + + bool m_vertex_stream_buffer_reallocated = false; + bool m_index_stream_buffer_reallocated = false; + + std::vector m_index_cpu_buffer; + std::vector m_vertex_cpu_buffer; +}; + +} // namespace diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.h b/Source/Core/VideoBackends/D3D12/VideoBackend.h new file mode 100644 index 0000000000..168ac93351 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.h @@ -0,0 +1,33 @@ +// Copyright 2011 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include "VideoCommon/VideoBackendBase.h" + +namespace DX12 +{ + +class VideoBackend : public VideoBackendBase +{ + bool Initialize(void*) override; + void Shutdown() override; + + std::string GetName() const override; + std::string GetDisplayName() const override; + + void Video_Prepare() override; + void Video_Cleanup() override; + + void ShowConfig(void* parent) override; + + unsigned int PeekMessages() override; + +private: + void* m_window_handle; +}; + +} + diff --git a/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp b/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp new file mode 100644 index 0000000000..a2933ed268 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/XFBEncoder.cpp @@ -0,0 +1,38 @@ +// Copyright 2011 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/CommonTypes.h" +#include "Common/MsgHandler.h" +#include "Common/Logging/Log.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DShader.h" +#include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/FramebufferManager.h" +#include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/XFBEncoder.h" + +// D3D12TODO: Convert this file.. + +namespace DX12 +{ + +XFBEncoder::XFBEncoder() +{ } + +void XFBEncoder::Init() +{ + // D3D12TODO: Convert this file.. +} + +void XFBEncoder::Shutdown() +{ + // D3D12TODO: Convert this file.. +} + +void XFBEncoder::Encode(u8* dst, u32 width, u32 height, const EFBRectangle& srcRect, float gamma) +{ + // D3D12TODO: Convert this file.. +} + +} diff --git a/Source/Core/VideoBackends/D3D12/XFBEncoder.h b/Source/Core/VideoBackends/D3D12/XFBEncoder.h new file mode 100644 index 0000000000..68d2cd3839 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/XFBEncoder.h @@ -0,0 +1,28 @@ +// Copyright 2011 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoCommon/VideoCommon.h" + +namespace DX12 +{ + +class XFBEncoder +{ + +public: + XFBEncoder(); + + void Init(); + void Shutdown(); + + void Encode(u8* dst, u32 width, u32 height, const EFBRectangle& src_rect, float gamma); + +private: + // D3D12TODO: Implement this class + +}; + +} diff --git a/Source/Core/VideoBackends/D3D12/main.cpp b/Source/Core/VideoBackends/D3D12/main.cpp new file mode 100644 index 0000000000..8b045ba5b7 --- /dev/null +++ b/Source/Core/VideoBackends/D3D12/main.cpp @@ -0,0 +1,244 @@ +// Copyright 2008 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Common/CommonTypes.h" +#include "Common/FileUtil.h" +#include "Common/StringUtil.h" + +#include "Core/ConfigManager.h" +#include "Core/Host.h" + +#include "VideoBackends/D3D12/BoundingBox.h" +#include "VideoBackends/D3D12/D3DBase.h" +#include "VideoBackends/D3D12/D3DCommandListManager.h" +#include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/PerfQuery.h" +#include "VideoBackends/D3D12/Render.h" +#include "VideoBackends/D3D12/ShaderCache.h" +#include "VideoBackends/D3D12/ShaderConstantsManager.h" +#include "VideoBackends/D3D12/StaticShaderCache.h" +#include "VideoBackends/D3D12/TextureCache.h" +#include "VideoBackends/D3D12/VertexManager.h" +#include "VideoBackends/D3D12/VideoBackend.h" + +#include "VideoCommon/BPStructs.h" +#include "VideoCommon/CommandProcessor.h" +#include "VideoCommon/Fifo.h" +#include "VideoCommon/GeometryShaderManager.h" +#include "VideoCommon/IndexGenerator.h" +#include "VideoCommon/OpcodeDecoding.h" +#include "VideoCommon/PixelEngine.h" +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoConfig.h" + +namespace DX12 +{ + +unsigned int VideoBackend::PeekMessages() +{ + MSG msg; + while (PeekMessage(&msg, 0, 0, 0, PM_REMOVE)) + { + if (msg.message == WM_QUIT) + return FALSE; + TranslateMessage(&msg); + DispatchMessage(&msg); + } + return TRUE; +} + +std::string VideoBackend::GetName() const +{ + return "D3D12"; +} + +std::string VideoBackend::GetDisplayName() const +{ + return "Direct3D 12 (experimental)"; +} + +void InitBackendInfo() +{ + HRESULT hr = DX12::D3D::LoadDXGI(); + if (SUCCEEDED(hr)) hr = DX12::D3D::LoadD3D(); + if (FAILED(hr)) + { + DX12::D3D::UnloadDXGI(); + return; + } + + g_Config.backend_info.APIType = API_D3D; + g_Config.backend_info.bSupportsExclusiveFullscreen = false; + g_Config.backend_info.bSupportsDualSourceBlend = true; + g_Config.backend_info.bSupportsPrimitiveRestart = true; + g_Config.backend_info.bSupportsOversizedViewports = false; + g_Config.backend_info.bSupportsGeometryShaders = true; + g_Config.backend_info.bSupports3DVision = true; + g_Config.backend_info.bSupportsPostProcessing = false; + g_Config.backend_info.bSupportsPaletteConversion = true; + g_Config.backend_info.bSupportsClipControl = true; + + IDXGIFactory* factory; + IDXGIAdapter* ad; + hr = DX12::create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); + if (FAILED(hr)) + PanicAlert("Failed to create IDXGIFactory object"); + + // adapters + g_Config.backend_info.Adapters.clear(); + g_Config.backend_info.AAModes.clear(); + while (factory->EnumAdapters((UINT)g_Config.backend_info.Adapters.size(), &ad) != DXGI_ERROR_NOT_FOUND) + { + const size_t adapter_index = g_Config.backend_info.Adapters.size(); + + DXGI_ADAPTER_DESC desc; + ad->GetDesc(&desc); + + // TODO: These don't get updated on adapter change, yet + if (adapter_index == g_Config.iAdapter) + { + std::string samples; + std::vector modes = DX12::D3D::EnumAAModes(ad); + // First iteration will be 1. This equals no AA. + for (unsigned int i = 0; i < modes.size(); ++i) + { + g_Config.backend_info.AAModes.push_back(modes[i].Count); + } + + bool shader_model_5_supported = (DX12::D3D::GetFeatureLevel(ad) >= D3D_FEATURE_LEVEL_11_0); + + // Requires the earlydepthstencil attribute (only available in shader model 5) + g_Config.backend_info.bSupportsEarlyZ = shader_model_5_supported; + + // Requires full UAV functionality (only available in shader model 5) + g_Config.backend_info.bSupportsBBox = false; + + // Requires the instance attribute (only available in shader model 5) + g_Config.backend_info.bSupportsGSInstancing = shader_model_5_supported; + + // Sample shading requires shader model 5 + g_Config.backend_info.bSupportsSSAA = shader_model_5_supported; + } + g_Config.backend_info.Adapters.push_back(UTF16ToUTF8(desc.Description)); + ad->Release(); + } + factory->Release(); + + // Clear ppshaders string vector + g_Config.backend_info.PPShaders.clear(); + g_Config.backend_info.AnaglyphShaders.clear(); + + DX12::D3D::UnloadDXGI(); + DX12::D3D::UnloadD3D(); +} + +void VideoBackend::ShowConfig(void *hParent) +{ + InitBackendInfo(); + Host_ShowVideoConfig(hParent, GetDisplayName(), "gfx_dx12"); +} + +bool VideoBackend::Initialize(void *window_handle) +{ + bool d3d12_supported = D3D::AlertUserIfSelectedAdapterDoesNotSupportD3D12(); + + if (!d3d12_supported) + return false; + + if (window_handle == nullptr) + return false; + + InitializeShared(); + InitBackendInfo(); + + frameCount = 0; + + if (File::Exists(File::GetUserPath(D_CONFIG_IDX) + "GFX.ini")) + g_Config.Load(File::GetUserPath(D_CONFIG_IDX) + "GFX.ini"); + else + g_Config.Load(File::GetUserPath(D_CONFIG_IDX) + "gfx_dx12.ini"); + + g_Config.GameIniLoad(); + g_Config.UpdateProjectionHack(); + g_Config.VerifyValidity(); + UpdateActiveConfig(); + + m_window_handle = window_handle; + m_initialized = true; + + return true; +} + +void VideoBackend::Video_Prepare() +{ + // internal interfaces + g_renderer = std::make_unique(m_window_handle); + g_texture_cache = std::make_unique(); + g_vertex_manager = std::make_unique(); + g_perf_query = std::make_unique(); + ShaderCache::Init(); + ShaderConstantsManager::Init(); + StaticShaderCache::Init(); + StateCache::Init(); // PSO cache is populated here, after constituent shaders are loaded. + D3D::InitUtils(); + + // VideoCommon + BPInit(); + Fifo::Init(); + IndexGenerator::Init(); + VertexLoaderManager::Init(); + OpcodeDecoder::Init(); + VertexShaderManager::Init(); + PixelShaderManager::Init(); + GeometryShaderManager::Init(); + CommandProcessor::Init(); + PixelEngine::Init(); + BBox::Init(); + + // Tell the host that the window is ready + Host_Message(WM_USER_CREATE); +} + +void VideoBackend::Shutdown() +{ + m_initialized = true; + + // TODO: should be in Video_Cleanup + if (g_renderer) + { + // Immediately stop app from submitting work to GPU, and wait for all submitted work to complete. D3D12TODO: Check this. + D3D::command_list_mgr->ExecuteQueuedWork(true); + + // VideoCommon + Fifo::Shutdown(); + CommandProcessor::Shutdown(); + GeometryShaderManager::Shutdown(); + PixelShaderManager::Shutdown(); + VertexShaderManager::Shutdown(); + OpcodeDecoder::Shutdown(); + VertexLoaderManager::Shutdown(); + + // internal interfaces + D3D::ShutdownUtils(); + ShaderCache::Shutdown(); + ShaderConstantsManager::Shutdown(); + StaticShaderCache::Shutdown(); + BBox::Shutdown(); + + g_perf_query.reset(); + g_vertex_manager.reset(); + g_texture_cache.reset(); + g_renderer.reset(); + } +} + +void VideoBackend::Video_Cleanup() +{ +} + +} diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index 7978f78710..00b2ddf2e2 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -8,6 +8,7 @@ // TODO: ugly #ifdef _WIN32 #include "VideoBackends/D3D/VideoBackend.h" +#include "VideoBackends/D3D12/VideoBackend.h" #endif #include "VideoBackends/OGL/VideoBackend.h" #include "VideoBackends/Software/VideoBackend.h" @@ -33,10 +34,18 @@ void VideoBackendBase::PopulateList() { VideoBackendBase* backends[4] = { nullptr }; - // OGL > D3D11 > SW + // OGL > D3D11 > D3D12 > SW g_available_video_backends.push_back(backends[0] = new OGL::VideoBackend); #ifdef _WIN32 g_available_video_backends.push_back(backends[1] = new DX11::VideoBackend); + + // More robust way to check for D3D12 support than (unreliable) OS version checks. + HMODULE d3d12_module = LoadLibraryA("d3d12.dll"); + if (d3d12_module != NULL) + { + FreeLibrary(d3d12_module); + g_available_video_backends.push_back(backends[2] = new DX12::VideoBackend); + } #endif g_available_video_backends.push_back(backends[3] = new SW::VideoSoftware); diff --git a/Source/UnitTests/UnitTests.vcxproj b/Source/UnitTests/UnitTests.vcxproj index 9a03b7e802..fa5aa706a2 100644 --- a/Source/UnitTests/UnitTests.vcxproj +++ b/Source/UnitTests/UnitTests.vcxproj @@ -78,6 +78,9 @@ {a4c423aa-f57c-46c7-a172-d1a777017d29} + + {570215b7-e32f-4438-95ae-c8d955f9fca3} + diff --git a/Source/dolphin-emu.sln b/Source/dolphin-emu.sln index 3deb2a6ca2..2093e4757b 100644 --- a/Source/dolphin-emu.sln +++ b/Source/dolphin-emu.sln @@ -1,4 +1,3 @@ - Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 14 VisualStudioVersion = 14.0.23107.0 @@ -69,6 +68,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnitTests", "UnitTests\Unit EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "enet", "..\Externals\enet\enet.vcxproj", "{CBC76802-C128-4B17-BF6C-23B08C313E5E}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "D3D12", "Core\VideoBackends\D3D12\D3D12.vcxproj", "{570215B7-E32F-4438-95AE-C8D955F9FCA3}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -79,10 +80,6 @@ Global {47411FDB-1BF2-48D0-AB4E-C7C41160F898}.Debug|x64.Build.0 = Debug|x64 {47411FDB-1BF2-48D0-AB4E-C7C41160F898}.Release|x64.ActiveCfg = Release|x64 {47411FDB-1BF2-48D0-AB4E-C7C41160F898}.Release|x64.Build.0 = Release|x64 - {69D2B16E-122A-4E5D-8C37-8EC7B0F7CEB0}.Debug|x64.ActiveCfg = Debug|x64 - {69D2B16E-122A-4E5D-8C37-8EC7B0F7CEB0}.Debug|x64.Build.0 = Debug|x64 - {69D2B16E-122A-4E5D-8C37-8EC7B0F7CEB0}.Release|x64.ActiveCfg = Release|x64 - {69D2B16E-122A-4E5D-8C37-8EC7B0F7CEB0}.Release|x64.Build.0 = Release|x64 {E54CF649-140E-4255-81A5-30A673C1FB36}.Debug|x64.ActiveCfg = Debug|x64 {E54CF649-140E-4255-81A5-30A673C1FB36}.Debug|x64.Build.0 = Debug|x64 {E54CF649-140E-4255-81A5-30A673C1FB36}.Release|x64.ActiveCfg = Release|x64 @@ -199,6 +196,10 @@ Global {CBC76802-C128-4B17-BF6C-23B08C313E5E}.Debug|x64.Build.0 = Debug|x64 {CBC76802-C128-4B17-BF6C-23B08C313E5E}.Release|x64.ActiveCfg = Release|x64 {CBC76802-C128-4B17-BF6C-23B08C313E5E}.Release|x64.Build.0 = Release|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Debug|x64.ActiveCfg = Debug|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Debug|x64.Build.0 = Debug|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Release|x64.ActiveCfg = Release|x64 + {570215B7-E32F-4438-95AE-C8D955F9FCA3}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -230,5 +231,6 @@ Global {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} = {15670B2E-CED6-4ED5-94CE-A00B1B2B5BA6} {76563A7F-1011-4EAD-B667-7BB18D09568E} = {15670B2E-CED6-4ED5-94CE-A00B1B2B5BA6} {CBC76802-C128-4B17-BF6C-23B08C313E5E} = {87ADDFF9-5768-4DA2-A33B-2477593D6677} + {570215B7-E32F-4438-95AE-C8D955F9FCA3} = {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} EndGlobalSection EndGlobal