diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsAdapter.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsAdapter.java index 182593c2c9..235e43ee63 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsAdapter.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsAdapter.java @@ -14,6 +14,7 @@ import org.dolphinemu.dolphinemu.R; import org.dolphinemu.dolphinemu.model.settings.BooleanSetting; import org.dolphinemu.dolphinemu.model.settings.FloatSetting; import org.dolphinemu.dolphinemu.model.settings.IntSetting; +import org.dolphinemu.dolphinemu.model.settings.StringSetting; import org.dolphinemu.dolphinemu.model.settings.view.CheckBoxSetting; import org.dolphinemu.dolphinemu.model.settings.view.SettingsItem; import org.dolphinemu.dolphinemu.model.settings.view.SingleChoiceSetting; @@ -207,7 +208,11 @@ public final class SettingsAdapter extends RecyclerView.Adapter5 + + + OpenGL + Vulkan + Software + Null + + + 0 + 1 + 2 + 3 + + "None" diff --git a/Source/Android/app/src/main/res/values/strings.xml b/Source/Android/app/src/main/res/values/strings.xml index 5ad4e029b3..8b91ea74ac 100644 --- a/Source/Android/app/src/main/res/values/strings.xml +++ b/Source/Android/app/src/main/res/values/strings.xml @@ -263,7 +263,7 @@ OpenGL ES OpenGL Video Backend - %s + Select the API used for graphics rendering. Show FPS Show the number of frames rendered per second as a measure of emulation speed. diff --git a/Source/Android/jni/MainAndroid.cpp b/Source/Android/jni/MainAndroid.cpp index 7e3da54b25..5e12fffa57 100644 --- a/Source/Android/jni/MainAndroid.cpp +++ b/Source/Android/jni/MainAndroid.cpp @@ -747,35 +747,21 @@ JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_SurfaceChang if (surf == nullptr) __android_log_print(ANDROID_LOG_ERROR, DOLPHIN_TAG, "Error: Surface is null."); - // If GLInterface isn't a thing yet then we don't need to let it know that the - // surface has changed - if (GLInterface) - { - GLInterface->UpdateHandle(surf); - Renderer::s_ChangedSurface.Reset(); - Renderer::s_SurfaceNeedsChanged.Set(); - Renderer::s_ChangedSurface.Wait(); - } + if (g_renderer) + g_renderer->ChangeSurface(surf); } JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_SurfaceDestroyed(JNIEnv* env, jobject obj) { + if (g_renderer) + g_renderer->ChangeSurface(nullptr); + if (surf) { ANativeWindow_release(surf); surf = nullptr; } - - // If GLInterface isn't a thing yet then we don't need to let it know that the - // surface has changed - if (GLInterface) - { - GLInterface->UpdateHandle(nullptr); - Renderer::s_ChangedSurface.Reset(); - Renderer::s_SurfaceNeedsChanged.Set(); - Renderer::s_ChangedSurface.Wait(); - } } JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_RefreshWiimotes(JNIEnv* env, jobject obj) diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj index fd47fe52ff..b6cb26cf94 100644 --- a/Source/Core/Common/Common.vcxproj +++ b/Source/Core/Common/Common.vcxproj @@ -125,6 +125,7 @@ + diff --git a/Source/Core/Common/Common.vcxproj.filters b/Source/Core/Common/Common.vcxproj.filters index 27410191d8..8b4876f638 100644 --- a/Source/Core/Common/Common.vcxproj.filters +++ b/Source/Core/Common/Common.vcxproj.filters @@ -225,6 +225,7 @@ + diff --git a/Source/Core/Common/Semaphore.h b/Source/Core/Common/Semaphore.h new file mode 100644 index 0000000000..1d1810a5f0 --- /dev/null +++ b/Source/Core/Common/Semaphore.h @@ -0,0 +1,50 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#ifdef _WIN32 + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include + +namespace Common +{ +class Semaphore +{ +public: + Semaphore(int initial_count, int maximum_count) + { + m_handle = CreateSemaphoreA(nullptr, initial_count, maximum_count, nullptr); + } + + ~Semaphore() { CloseHandle(m_handle); } + void Wait() { WaitForSingleObject(m_handle, INFINITE); } + void Post() { ReleaseSemaphore(m_handle, 1, nullptr); } +private: + HANDLE m_handle; +}; +} // namespace Common + +#else // _WIN32 + +#include + +namespace Common +{ +class Semaphore +{ +public: + Semaphore(int initial_count, int maximum_count) { sem_init(&m_handle, 0, initial_count); } + ~Semaphore() { sem_destroy(&m_handle); } + void Wait() { sem_wait(&m_handle); } + void Post() { sem_post(&m_handle); } +private: + sem_t m_handle; +}; +} // namespace Common + +#endif // _WIN32 diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index a2e9516f44..5989d47399 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -233,6 +233,7 @@ set(LIBS sfml-network sfml-system videonull + videovulkan videoogl videosoftware z diff --git a/Source/Core/DolphinQt2/DolphinQt2.vcxproj b/Source/Core/DolphinQt2/DolphinQt2.vcxproj index 5605bd5ce3..a754927eba 100644 --- a/Source/Core/DolphinQt2/DolphinQt2.vcxproj +++ b/Source/Core/DolphinQt2/DolphinQt2.vcxproj @@ -207,6 +207,9 @@ {570215b7-e32f-4438-95ae-c8d955f9fca3} + + {29f29a19-f141-45ad-9679-5a2923b49da3} + diff --git a/Source/Core/DolphinWX/DolphinWX.vcxproj b/Source/Core/DolphinWX/DolphinWX.vcxproj index 56b5ed630a..992ab457ea 100644 --- a/Source/Core/DolphinWX/DolphinWX.vcxproj +++ b/Source/Core/DolphinWX/DolphinWX.vcxproj @@ -238,6 +238,9 @@ {53A5391B-737E-49A8-BC8F-312ADA00736F} + + {29F29A19-F141-45AD-9679-5A2923B49DA3} + {3de9ee35-3e91-4f27-a014-2866ad8c3fe3} diff --git a/Source/Core/DolphinWX/FrameTools.cpp b/Source/Core/DolphinWX/FrameTools.cpp index 55e31075d6..2b690c0e5a 100644 --- a/Source/Core/DolphinWX/FrameTools.cpp +++ b/Source/Core/DolphinWX/FrameTools.cpp @@ -77,6 +77,7 @@ #include "InputCommon/ControllerInterface/ControllerInterface.h" +#include "VideoCommon/RenderBase.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" @@ -907,6 +908,12 @@ void CFrame::OnRenderParentResize(wxSizeEvent& event) } m_LogWindow->Refresh(); m_LogWindow->Update(); + + // We call Renderer::ChangeSurface here to indicate the size has changed, + // but pass the same window handle. This is needed for the Vulkan backend, + // otherwise it cannot tell that the window has been resized on some drivers. + if (g_renderer) + g_renderer->ChangeSurface(GetRenderHandle()); } event.Skip(); } diff --git a/Source/Core/DolphinWX/VideoConfigDiag.cpp b/Source/Core/DolphinWX/VideoConfigDiag.cpp index a6e04c583e..147ff94376 100644 --- a/Source/Core/DolphinWX/VideoConfigDiag.cpp +++ b/Source/Core/DolphinWX/VideoConfigDiag.cpp @@ -272,6 +272,12 @@ static wxString stereo_convergence_desc = static wxString stereo_swap_desc = wxTRANSLATE("Swaps the left and right eye. Mostly useful if you want to view side-by-side " "cross-eyed.\n\nIf unsure, leave this unchecked."); +static wxString validation_layer_desc = + wxTRANSLATE("Enables validation of API calls made by the video backend, which may assist in " + "debugging graphical issues.\n\nIf unsure, leave this unchecked."); +static wxString backend_multithreading_desc = + wxTRANSLATE("Enables multi-threading in the video backend, which may result in performance " + "gains in some scenarios.\n\nIf unsure, leave this unchecked."); #if !defined(__APPLE__) // Search for available resolutions - TODO: Move to Common? @@ -471,6 +477,13 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title) CreateCheckBox(page_general, _("Render to Main Window"), wxGetTranslation(render_to_main_win_desc), SConfig::GetInstance().bRenderToMain)); + + if (vconfig.backend_info.bSupportsMultithreading) + { + szr_other->Add(CreateCheckBox(page_general, _("Enable Multi-threading"), + wxGetTranslation(backend_multithreading_desc), + vconfig.bBackendMultithreading)); + } } wxStaticBoxSizer* const group_basic = @@ -760,6 +773,9 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title) wxGetTranslation(show_stats_desc), vconfig.bOverlayStats)); szr_debug->Add(CreateCheckBox(page_advanced, _("Texture Format Overlay"), wxGetTranslation(texfmt_desc), vconfig.bTexFmtOverlayEnable)); + szr_debug->Add(CreateCheckBox(page_advanced, _("Enable API Validation Layers"), + wxGetTranslation(validation_layer_desc), + vconfig.bEnableValidationLayer)); wxStaticBoxSizer* const group_debug = new wxStaticBoxSizer(wxVERTICAL, page_advanced, _("Debugging")); diff --git a/Source/Core/VideoBackends/CMakeLists.txt b/Source/Core/VideoBackends/CMakeLists.txt index cb41b64b82..64344e43f8 100644 --- a/Source/Core/VideoBackends/CMakeLists.txt +++ b/Source/Core/VideoBackends/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(OGL) add_subdirectory(Null) add_subdirectory(Software) +add_subdirectory(Vulkan) # TODO: Add other backends here! diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h index 85cc641812..9fc5d0876c 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ b/Source/Core/VideoBackends/D3D/TextureCache.h @@ -58,7 +58,7 @@ private: u32 memory_stride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) override; - void CompileShaders() override {} + bool CompileShaders() override { return true; } void DeleteShaders() override {} ID3D11Buffer* palette_buf; ID3D11ShaderResourceView* palette_buf_srv; diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 92020481d4..887e585266 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -73,6 +73,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsClipControl = true; g_Config.backend_info.bSupportsDepthClamp = true; g_Config.backend_info.bSupportsReversedDepthRange = false; + g_Config.backend_info.bSupportsMultithreading = false; IDXGIFactory* factory; IDXGIAdapter* ad; diff --git a/Source/Core/VideoBackends/D3D12/TextureCache.h b/Source/Core/VideoBackends/D3D12/TextureCache.h index 4cce8a75b0..d8b20a4911 100644 --- a/Source/Core/VideoBackends/D3D12/TextureCache.h +++ b/Source/Core/VideoBackends/D3D12/TextureCache.h @@ -65,7 +65,7 @@ private: u32 memory_stride, PEControl::PixelFormat src_format, const EFBRectangle& src_rect, bool is_intensity, bool scale_by_half) override; - void CompileShaders() override {} + bool CompileShaders() override { return true; } void DeleteShaders() override {} std::unique_ptr m_palette_stream_buffer; diff --git a/Source/Core/VideoBackends/D3D12/main.cpp b/Source/Core/VideoBackends/D3D12/main.cpp index 0036a4a2f2..4184d5ab31 100644 --- a/Source/Core/VideoBackends/D3D12/main.cpp +++ b/Source/Core/VideoBackends/D3D12/main.cpp @@ -76,6 +76,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsClipControl = true; g_Config.backend_info.bSupportsDepthClamp = true; g_Config.backend_info.bSupportsReversedDepthRange = false; + g_Config.backend_info.bSupportsMultithreading = false; IDXGIFactory* factory; IDXGIAdapter* ad; diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index 4cab977f18..8e54a730e7 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -26,17 +26,27 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.api_type = APIType::Nothing; g_Config.backend_info.bSupportsExclusiveFullscreen = true; g_Config.backend_info.bSupportsDualSourceBlend = true; - g_Config.backend_info.bSupportsEarlyZ = true; g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsOversizedViewports = true; g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupports3DVision = false; + g_Config.backend_info.bSupportsEarlyZ = true; + g_Config.backend_info.bSupportsBindingLayout = true; + g_Config.backend_info.bSupportsBBox = true; + g_Config.backend_info.bSupportsGSInstancing = true; g_Config.backend_info.bSupportsPostProcessing = false; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsClipControl = true; + g_Config.backend_info.bSupportsSSAA = true; + g_Config.backend_info.bSupportsDepthClamp = true; + g_Config.backend_info.bSupportsReversedDepthRange = true; + g_Config.backend_info.bSupportsMultithreading = false; // aamodes: We only support 1 sample, so no MSAA + g_Config.backend_info.Adapters.clear(); g_Config.backend_info.AAModes = {1}; + g_Config.backend_info.PPShaders.clear(); + g_Config.backend_info.AnaglyphShaders.clear(); } bool VideoBackend::Initialize(void* window_handle) diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h index d6ee5b3b7c..7c51133c6a 100644 --- a/Source/Core/VideoBackends/Null/TextureCache.h +++ b/Source/Core/VideoBackends/Null/TextureCache.h @@ -13,7 +13,7 @@ class TextureCache : public TextureCacheBase public: TextureCache() {} ~TextureCache() {} - void CompileShaders() override {} + bool CompileShaders() override { return true; } void DeleteShaders() override {} void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) override diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index 7ec094f9f2..fdc449442c 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -23,7 +23,7 @@ void BoundingBox::Init() glGenBuffers(1, &s_bbox_buffer_id); glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, s_bbox_buffer_id); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); } } diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index ca622f7eb0..599f783cef 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -554,7 +554,8 @@ void ProgramShaderCache::CreateHeader() "%s\n" // early-z "%s\n" // 420pack "%s\n" // msaa - "%s\n" // Sampler binding + "%s\n" // Input/output/sampler binding + "%s\n" // Varying location "%s\n" // storage buffer "%s\n" // shader5 "%s\n" // SSAA @@ -595,9 +596,23 @@ void ProgramShaderCache::CreateHeader() (g_ogl_config.bSupportsMSAA && v < GLSL_150) ? "#extension GL_ARB_texture_multisample : enable" : "", + // Attribute and fragment output bindings are still done via glBindAttribLocation and + // glBindFragDataLocation. In the future this could be moved to the layout qualifier + // in GLSL, but requires verification of GL_ARB_explicit_attrib_location. g_ActiveConfig.backend_info.bSupportsBindingLayout ? - "#define SAMPLER_BINDING(x) layout(binding = x)" : - "#define SAMPLER_BINDING(x)", + "#define ATTRIBUTE_LOCATION(x)\n" + "#define FRAGMENT_OUTPUT_LOCATION(x)\n" + "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n" + "#define UBO_BINDING(packing, x) layout(packing, binding = x)\n" + "#define SAMPLER_BINDING(x) layout(binding = x)\n" + "#define SSBO_BINDING(x) layout(binding = x)\n" : + "#define ATTRIBUTE_LOCATION(x)\n" + "#define FRAGMENT_OUTPUT_LOCATION(x)\n" + "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n" + "#define UBO_BINDING(packing, x) layout(packing)\n" + "#define SAMPLER_BINDING(x)\n", + // Input/output blocks are matched by name during program linking + "#define VARYING_LOCATION(x)\n", !is_glsles && g_ActiveConfig.backend_info.bSupportsBBox ? "#extension GL_ARB_shader_storage_buffer_object : enable" : "", diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 95be3867a7..c1e8165b9b 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -327,7 +327,7 @@ static void InitDriverInfo() default: break; } - DriverDetails::Init(vendor, driver, version, family); + DriverDetails::Init(DriverDetails::API_OPENGL, vendor, driver, version, family); } // Init functions @@ -1620,12 +1620,16 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, OSD::DoCallbacks(OSD::CallbackType::OnFrame); OSD::DrawMessages(); - if (s_SurfaceNeedsChanged.IsSet()) +#ifdef ANDROID + if (s_surface_needs_change.IsSet()) { + GLInterface->UpdateHandle(s_new_surface_handle); GLInterface->UpdateSurface(); - s_SurfaceNeedsChanged.Clear(); - s_ChangedSurface.Set(); + s_new_surface_handle = nullptr; + s_surface_needs_change.Clear(); + s_surface_changed.Set(); } +#endif // Copy the rendered frame to the real window GLInterface->Swap(); @@ -1814,4 +1818,16 @@ int Renderer::GetMaxTextureSize() glGetIntegerv(GL_MAX_TEXTURE_SIZE, &s_max_texture_size); return s_max_texture_size; } + +void Renderer::ChangeSurface(void* new_surface_handle) +{ +// Win32 polls the window size when redrawing, X11 runs an event loop in another thread. +// This is only necessary for Android at this point, although handling resizes here +// would be more efficient than polling. +#ifdef ANDROID + s_new_surface_handle = new_surface_handle; + s_surface_needs_change.Set(); + s_surface_changed.Wait(); +#endif +} } diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index c240a936d3..57611f6856 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -105,6 +105,8 @@ public: int GetMaxTextureSize() override; + void ChangeSurface(void* new_surface_handle) override; + private: void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const void* data); diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 6643e6b9cd..dc9d290f3e 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -314,7 +314,7 @@ void TextureCache::SetStage() glActiveTexture(GL_TEXTURE0 + s_ActiveTexture); } -void TextureCache::CompileShaders() +bool TextureCache::CompileShaders() { constexpr const char* color_copy_program = "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" "in vec3 f_uv0;\n" @@ -398,15 +398,18 @@ void TextureCache::CompileShaders() const char* prefix = geo_program.empty() ? "f" : "v"; const char* depth_layer = g_ActiveConfig.bStereoEFBMonoDepth ? "0.0" : "f_uv0.z"; - ProgramShaderCache::CompileShader(s_ColorCopyProgram, - StringFromFormat(vertex_program, prefix, prefix), - color_copy_program, geo_program); - ProgramShaderCache::CompileShader(s_ColorMatrixProgram, - StringFromFormat(vertex_program, prefix, prefix), - color_matrix_program, geo_program); - ProgramShaderCache::CompileShader( - s_DepthMatrixProgram, StringFromFormat(vertex_program, prefix, prefix), - StringFromFormat(depth_matrix_program, depth_layer), geo_program); + if (!ProgramShaderCache::CompileShader(s_ColorCopyProgram, + StringFromFormat(vertex_program, prefix, prefix), + color_copy_program, geo_program) || + !ProgramShaderCache::CompileShader(s_ColorMatrixProgram, + StringFromFormat(vertex_program, prefix, prefix), + color_matrix_program, geo_program) || + !ProgramShaderCache::CompileShader( + s_DepthMatrixProgram, StringFromFormat(vertex_program, prefix, prefix), + StringFromFormat(depth_matrix_program, depth_layer), geo_program)) + { + return false; + } s_ColorMatrixUniform = glGetUniformLocation(s_ColorMatrixProgram.glprogid, "colmat"); s_DepthMatrixUniform = glGetUniformLocation(s_DepthMatrixProgram.glprogid, "colmat"); @@ -500,9 +503,12 @@ void TextureCache::CompileShaders() if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { - ProgramShaderCache::CompileShader( - s_palette_pixel_shader[GX_TL_IA8], StringFromFormat(vertex_program, prefix, prefix), - "#define DECODE DecodePixel_IA8" + palette_shader, geo_program); + if (!ProgramShaderCache::CompileShader( + s_palette_pixel_shader[GX_TL_IA8], StringFromFormat(vertex_program, prefix, prefix), + "#define DECODE DecodePixel_IA8" + palette_shader, geo_program)) + { + return false; + } s_palette_buffer_offset_uniform[GX_TL_IA8] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "texture_buffer_offset"); s_palette_multiplier_uniform[GX_TL_IA8] = @@ -510,9 +516,12 @@ void TextureCache::CompileShaders() s_palette_copy_position_uniform[GX_TL_IA8] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "copy_position"); - ProgramShaderCache::CompileShader( - s_palette_pixel_shader[GX_TL_RGB565], StringFromFormat(vertex_program, prefix, prefix), - "#define DECODE DecodePixel_RGB565" + palette_shader, geo_program); + if (!ProgramShaderCache::CompileShader( + s_palette_pixel_shader[GX_TL_RGB565], StringFromFormat(vertex_program, prefix, prefix), + "#define DECODE DecodePixel_RGB565" + palette_shader, geo_program)) + { + return false; + } s_palette_buffer_offset_uniform[GX_TL_RGB565] = glGetUniformLocation( s_palette_pixel_shader[GX_TL_RGB565].glprogid, "texture_buffer_offset"); s_palette_multiplier_uniform[GX_TL_RGB565] = @@ -520,9 +529,12 @@ void TextureCache::CompileShaders() s_palette_copy_position_uniform[GX_TL_RGB565] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB565].glprogid, "copy_position"); - ProgramShaderCache::CompileShader( - s_palette_pixel_shader[GX_TL_RGB5A3], StringFromFormat(vertex_program, prefix, prefix), - "#define DECODE DecodePixel_RGB5A3" + palette_shader, geo_program); + if (!ProgramShaderCache::CompileShader( + s_palette_pixel_shader[GX_TL_RGB5A3], StringFromFormat(vertex_program, prefix, prefix), + "#define DECODE DecodePixel_RGB5A3" + palette_shader, geo_program)) + { + return false; + } s_palette_buffer_offset_uniform[GX_TL_RGB5A3] = glGetUniformLocation( s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "texture_buffer_offset"); s_palette_multiplier_uniform[GX_TL_RGB5A3] = @@ -530,6 +542,8 @@ void TextureCache::CompileShaders() s_palette_copy_position_uniform[GX_TL_RGB5A3] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "copy_position"); } + + return true; } void TextureCache::DeleteShaders() diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h index ecd9056127..0007cf1deb 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ b/Source/Core/VideoBackends/OGL/TextureCache.h @@ -57,7 +57,7 @@ private: u32 memory_stride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) override; - void CompileShaders() override; + bool CompileShaders() override; void DeleteShaders() override; }; diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index b351ef48bb..5951959253 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -103,6 +103,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsPostProcessing = true; g_Config.backend_info.bSupportsSSAA = true; g_Config.backend_info.bSupportsReversedDepthRange = true; + g_Config.backend_info.bSupportsMultithreading = false; // Overwritten in Render.cpp later g_Config.backend_info.bSupportsDualSourceBlend = true; diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index ab1910fd1e..ebac32de3a 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -47,10 +47,12 @@ public: class TextureCache : public TextureCacheBase { public: - void CompileShaders() override{}; - void DeleteShaders() override{}; + bool CompileShaders() override { return true; } + void DeleteShaders() override {} void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, - TlutFormat format) override{}; + TlutFormat format) override + { + } void CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) override @@ -130,6 +132,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsEarlyZ = true; g_Config.backend_info.bSupportsOversizedViewports = true; g_Config.backend_info.bSupportsPrimitiveRestart = false; + g_Config.backend_info.bSupportsMultithreading = false; // aamodes g_Config.backend_info.AAModes = {1}; diff --git a/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp b/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp new file mode 100644 index 0000000000..eea4b7e673 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp @@ -0,0 +1,249 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Common/Assert.h" + +#include "VideoBackends/Vulkan/BoundingBox.h" +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/StagingBuffer.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +BoundingBox::BoundingBox() +{ +} + +BoundingBox::~BoundingBox() +{ + if (m_gpu_buffer != VK_NULL_HANDLE) + { + vkDestroyBuffer(g_vulkan_context->GetDevice(), m_gpu_buffer, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), m_gpu_memory, nullptr); + } +} + +bool BoundingBox::Initialize() +{ + if (!g_vulkan_context->SupportsBoundingBox()) + { + WARN_LOG(VIDEO, "Vulkan: Bounding box is unsupported by your device."); + return true; + } + + if (!CreateGPUBuffer()) + return false; + + if (!CreateReadbackBuffer()) + return false; + + return true; +} + +void BoundingBox::Flush(StateTracker* state_tracker) +{ + if (m_gpu_buffer == VK_NULL_HANDLE) + return; + + // Combine updates together, chances are the game would have written all 4. + bool updated_buffer = false; + for (size_t start = 0; start < 4; start++) + { + if (!m_values_dirty[start]) + continue; + + size_t count = 0; + std::array write_values; + for (; (start + count) < 4; count++) + { + if (!m_values_dirty[start + count]) + break; + + m_readback_buffer->Read((start + count) * sizeof(s32), &write_values[count], sizeof(s32), + false); + m_values_dirty[start + count] = false; + } + + // We can't issue vkCmdUpdateBuffer within a render pass. + // However, the writes must be serialized, so we can't put it in the init buffer. + if (!updated_buffer) + { + state_tracker->EndRenderPass(); + + // Ensure GPU buffer is in a state where it can be transferred to. + Util::BufferMemoryBarrier( + g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0, + BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + updated_buffer = true; + } + + vkCmdUpdateBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, + start * sizeof(s32), count * sizeof(s32), + reinterpret_cast(write_values.data())); + } + + // Restore fragment shader access to the buffer. + if (updated_buffer) + { + Util::BufferMemoryBarrier( + g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + } + + // We're now up-to-date. + m_valid = true; +} + +void BoundingBox::Invalidate(StateTracker* state_tracker) +{ + if (m_gpu_buffer == VK_NULL_HANDLE) + return; + + m_valid = false; +} + +s32 BoundingBox::Get(StateTracker* state_tracker, size_t index) +{ + _assert_(index < NUM_VALUES); + + if (!m_valid) + Readback(state_tracker); + + s32 value; + m_readback_buffer->Read(index * sizeof(s32), &value, sizeof(value), false); + return value; +} + +void BoundingBox::Set(StateTracker* state_tracker, size_t index, s32 value) +{ + _assert_(index < NUM_VALUES); + + // If we're currently valid, update the stored value in both our cache and the GPU buffer. + if (m_valid) + { + // Skip when it hasn't changed. + s32 current_value; + m_readback_buffer->Read(index * sizeof(s32), ¤t_value, sizeof(current_value), false); + if (current_value == value) + return; + } + + // Flag as dirty, and update values. + m_readback_buffer->Write(index * sizeof(s32), &value, sizeof(value), true); + m_values_dirty[index] = true; +} + +bool BoundingBox::CreateGPUBuffer() +{ + VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT; + VkBufferCreateInfo info = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkBufferCreateFlags flags + BUFFER_SIZE, // VkDeviceSize size + buffer_usage, // VkBufferUsageFlags usage + VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode + 0, // uint32_t queueFamilyIndexCount + nullptr // const uint32_t* pQueueFamilyIndices + }; + + VkBuffer buffer; + VkResult res = vkCreateBuffer(g_vulkan_context->GetDevice(), &info, nullptr, &buffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: "); + return false; + } + + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements); + + uint32_t memory_type_index = g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + VkMemoryAllocateInfo memory_allocate_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + memory_requirements.size, // VkDeviceSize allocationSize + memory_type_index // uint32_t memoryTypeIndex + }; + VkDeviceMemory memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + return false; + } + + res = vkBindBufferMemory(g_vulkan_context->GetDevice(), buffer, memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr); + return false; + } + + m_gpu_buffer = buffer; + m_gpu_memory = memory; + return true; +} + +bool BoundingBox::CreateReadbackBuffer() +{ + m_readback_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_READBACK, BUFFER_SIZE, + VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + if (!m_readback_buffer || !m_readback_buffer->Map()) + return false; + + return true; +} + +void BoundingBox::Readback(StateTracker* state_tracker) +{ + // Can't be done within a render pass. + state_tracker->EndRenderPass(); + + // Ensure all writes are completed to the GPU buffer prior to the transfer. + Util::BufferMemoryBarrier( + g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0, + BUFFER_SIZE, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Copy from GPU -> readback buffer. + VkBufferCopy region = {0, 0, BUFFER_SIZE}; + vkCmdCopyBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, + m_readback_buffer->GetBuffer(), 1, ®ion); + + // Restore GPU buffer access. + Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, + VK_ACCESS_TRANSFER_READ_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Wait until these commands complete. + Util::ExecuteCurrentCommandsAndRestoreState(state_tracker, false, true); + + // Cache is now valid. + m_readback_buffer->InvalidateCPUCache(); + m_valid = true; +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/BoundingBox.h b/Source/Core/VideoBackends/Vulkan/BoundingBox.h new file mode 100644 index 0000000000..fd6d0fcdd0 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/BoundingBox.h @@ -0,0 +1,52 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" + +#include "VideoBackends/Vulkan/VulkanLoader.h" + +namespace Vulkan +{ +class StagingBuffer; +class StateTracker; + +class BoundingBox +{ +public: + BoundingBox(); + ~BoundingBox(); + + bool Initialize(); + + VkBuffer GetGPUBuffer() const { return m_gpu_buffer; } + VkDeviceSize GetGPUBufferOffset() const { return 0; } + VkDeviceSize GetGPUBufferSize() const { return BUFFER_SIZE; } + s32 Get(StateTracker* state_tracker, size_t index); + void Set(StateTracker* state_tracker, size_t index, s32 value); + + void Invalidate(StateTracker* state_tracker); + void Flush(StateTracker* state_tracker); + +private: + bool CreateGPUBuffer(); + bool CreateReadbackBuffer(); + void Readback(StateTracker* state_tracker); + + VkBuffer m_gpu_buffer = VK_NULL_HANDLE; + VkDeviceMemory m_gpu_memory = nullptr; + + static const size_t NUM_VALUES = 4; + static const size_t BUFFER_SIZE = sizeof(u32) * NUM_VALUES; + + std::unique_ptr m_readback_buffer; + std::array m_values_dirty = {}; + bool m_valid = true; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/CMakeLists.txt b/Source/Core/VideoBackends/Vulkan/CMakeLists.txt new file mode 100644 index 0000000000..82d471ba54 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/CMakeLists.txt @@ -0,0 +1,42 @@ +set(SRCS + BoundingBox.cpp + CommandBufferManager.cpp + FramebufferManager.cpp + ObjectCache.cpp + PaletteTextureConverter.cpp + PerfQuery.cpp + RasterFont.cpp + Renderer.cpp + ShaderCompiler.cpp + StateTracker.cpp + StagingBuffer.cpp + StagingTexture2D.cpp + StreamBuffer.cpp + SwapChain.cpp + Texture2D.cpp + TextureCache.cpp + TextureEncoder.cpp + Util.cpp + VertexFormat.cpp + VertexManager.cpp + VulkanContext.cpp + VulkanLoader.cpp + main.cpp +) + +set(LIBS + videocommon + common +) + +# Only include the Vulkan headers when building the Vulkan backend +include_directories(${CMAKE_SOURCE_DIR}/Externals/Vulkan/Include) + +# Silence warnings on glslang by flagging it as a system include +include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/Externals/glslang/glslang/Public) +include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/Externals/glslang/SPIRV) + +# Link against glslang, the other necessary libraries are referenced by the executable. +add_dolphin_library(videovulkan "${SRCS}" "${LIBS}") +target_link_libraries(videovulkan glslang) + diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp new file mode 100644 index 0000000000..537c21c514 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp @@ -0,0 +1,466 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/Assert.h" +#include "Common/CommonFuncs.h" +#include "Common/MsgHandler.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +CommandBufferManager::CommandBufferManager(bool use_threaded_submission) + : m_submit_semaphore(1, 1), m_use_threaded_submission(use_threaded_submission) +{ +} + +CommandBufferManager::~CommandBufferManager() +{ + // If the worker thread is enabled, wait for it to exit. + if (m_use_threaded_submission) + { + // Wait for all command buffers to be consumed by the worker thread. + m_submit_semaphore.Wait(); + m_submit_loop->Stop(); + m_submit_thread.join(); + } + + vkDeviceWaitIdle(g_vulkan_context->GetDevice()); + + DestroyCommandBuffers(); + DestroyCommandPool(); +} + +bool CommandBufferManager::Initialize() +{ + if (!CreateCommandPool()) + return false; + + if (!CreateCommandBuffers()) + return false; + + if (m_use_threaded_submission && !CreateSubmitThread()) + return false; + + return true; +} + +bool CommandBufferManager::CreateCommandPool() +{ + VkCommandPoolCreateInfo info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + g_vulkan_context->GetGraphicsQueueFamilyIndex()}; + + VkResult res = + vkCreateCommandPool(g_vulkan_context->GetDevice(), &info, nullptr, &m_command_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateCommandPool failed: "); + return false; + } + + return true; +} + +void CommandBufferManager::DestroyCommandPool() +{ + if (m_command_pool) + { + vkDestroyCommandPool(g_vulkan_context->GetDevice(), m_command_pool, nullptr); + m_command_pool = nullptr; + } +} + +bool CommandBufferManager::CreateCommandBuffers() +{ + VkDevice device = g_vulkan_context->GetDevice(); + + for (FrameResources& resources : m_frame_resources) + { + resources.init_command_buffer_used = false; + resources.needs_fence_wait = false; + + VkCommandBufferAllocateInfo allocate_info = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr, m_command_pool, + VK_COMMAND_BUFFER_LEVEL_PRIMARY, static_cast(resources.command_buffers.size())}; + + VkResult res = + vkAllocateCommandBuffers(device, &allocate_info, resources.command_buffers.data()); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: "); + return false; + } + + VkFenceCreateInfo fence_info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, + VK_FENCE_CREATE_SIGNALED_BIT}; + + res = vkCreateFence(device, &fence_info, nullptr, &resources.fence); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFence failed: "); + return false; + } + + // TODO: A better way to choose the number of descriptors. + VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000}, + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024}}; + + VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + nullptr, + 0, + 100000, // tweak this + static_cast(ArraySize(pool_sizes)), + pool_sizes}; + + res = vkCreateDescriptorPool(device, &pool_create_info, nullptr, &resources.descriptor_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); + return false; + } + } + + // Activate the first command buffer. ActivateCommandBuffer moves forward, so start with the last + m_current_frame = m_frame_resources.size() - 1; + ActivateCommandBuffer(); + return true; +} + +void CommandBufferManager::DestroyCommandBuffers() +{ + VkDevice device = g_vulkan_context->GetDevice(); + + for (FrameResources& resources : m_frame_resources) + { + for (const auto& it : resources.cleanup_resources) + it.destroy_callback(device, it.object); + resources.cleanup_resources.clear(); + + if (resources.fence != VK_NULL_HANDLE) + { + vkDestroyFence(device, resources.fence, nullptr); + resources.fence = VK_NULL_HANDLE; + } + if (resources.descriptor_pool != VK_NULL_HANDLE) + { + vkDestroyDescriptorPool(device, resources.descriptor_pool, nullptr); + resources.descriptor_pool = VK_NULL_HANDLE; + } + if (resources.command_buffers[0] != VK_NULL_HANDLE) + { + vkFreeCommandBuffers(device, m_command_pool, + static_cast(resources.command_buffers.size()), + resources.command_buffers.data()); + + resources.command_buffers.fill(VK_NULL_HANDLE); + } + } +} + +VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayout set_layout) +{ + VkDescriptorSetAllocateInfo allocate_info = { + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, + m_frame_resources[m_current_frame].descriptor_pool, 1, &set_layout}; + + VkDescriptorSet descriptor_set; + VkResult res = + vkAllocateDescriptorSets(g_vulkan_context->GetDevice(), &allocate_info, &descriptor_set); + if (res != VK_SUCCESS) + { + // Failing to allocate a descriptor set is not a fatal error, we can + // recover by moving to the next command buffer. + return VK_NULL_HANDLE; + } + + return descriptor_set; +} + +bool CommandBufferManager::CreateSubmitThread() +{ + m_submit_loop = std::make_unique(); + m_submit_thread = std::thread([this]() { + m_submit_loop->Run([this]() { + PendingCommandBufferSubmit submit; + { + std::lock_guard guard(m_pending_submit_lock); + if (m_pending_submits.empty()) + { + m_submit_loop->AllowSleep(); + return; + } + + submit = m_pending_submits.front(); + m_pending_submits.pop_front(); + } + + SubmitCommandBuffer(submit.index, submit.wait_semaphore, submit.signal_semaphore, + submit.present_swap_chain, submit.present_image_index); + }); + }); + + return true; +} + +void CommandBufferManager::PrepareToSubmitCommandBuffer() +{ + // Grab the semaphore before submitting command buffer either on-thread or off-thread. + // This prevents a race from occurring where a second command buffer is executed + // before the worker thread has woken and executed the first one yet. + m_submit_semaphore.Wait(); +} + +void CommandBufferManager::WaitForWorkerThreadIdle() +{ + // Drain the semaphore, then allow another request in the future. + m_submit_semaphore.Wait(); + m_submit_semaphore.Post(); +} + +void CommandBufferManager::WaitForGPUIdle() +{ + WaitForWorkerThreadIdle(); + vkDeviceWaitIdle(g_vulkan_context->GetDevice()); +} + +void CommandBufferManager::WaitForFence(VkFence fence) +{ + // Find the command buffer that this fence corresponds to. + size_t command_buffer_index = 0; + for (; command_buffer_index < m_frame_resources.size(); command_buffer_index++) + { + if (m_frame_resources[command_buffer_index].fence == fence) + break; + } + _assert_(command_buffer_index < m_frame_resources.size()); + + // Has this command buffer already been waited for? + if (!m_frame_resources[command_buffer_index].needs_fence_wait) + return; + + // Wait for this command buffer to be completed. + VkResult res = + vkWaitForFences(g_vulkan_context->GetDevice(), 1, + &m_frame_resources[command_buffer_index].fence, VK_TRUE, UINT64_MAX); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); + + // Immediately fire callbacks and cleanups, since the commands has been completed. + m_frame_resources[command_buffer_index].needs_fence_wait = false; + OnCommandBufferExecuted(command_buffer_index); +} + +void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, + VkSemaphore wait_semaphore, + VkSemaphore signal_semaphore, + VkSwapchainKHR present_swap_chain, + uint32_t present_image_index) +{ + FrameResources& resources = m_frame_resources[m_current_frame]; + + // Fire fence tracking callbacks. This can't happen on the worker thread. + // We invoke these before submitting so that any last-minute commands can be added. + for (const auto& iter : m_fence_point_callbacks) + iter.second.first(resources.command_buffers[1], resources.fence); + + // End the current command buffer. + for (VkCommandBuffer command_buffer : resources.command_buffers) + { + VkResult res = vkEndCommandBuffer(command_buffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: "); + PanicAlert("Failed to end command buffer"); + } + } + + // This command buffer now has commands, so can't be re-used without waiting. + resources.needs_fence_wait = true; + + // Submitting off-thread? + if (m_use_threaded_submission && submit_on_worker_thread) + { + // Push to the pending submit queue. + { + std::lock_guard guard(m_pending_submit_lock); + m_pending_submits.push_back({m_current_frame, wait_semaphore, signal_semaphore, + present_swap_chain, present_image_index}); + } + + // Wake up the worker thread for a single iteration. + m_submit_loop->Wakeup(); + } + else + { + // Pass through to normal submission path. + SubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore, present_swap_chain, + present_image_index); + } +} + +void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_semaphore, + VkSemaphore signal_semaphore, + VkSwapchainKHR present_swap_chain, + uint32_t present_image_index) +{ + FrameResources& resources = m_frame_resources[index]; + + // This may be executed on the worker thread, so don't modify any state of the manager class. + uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, + nullptr, + 0, + nullptr, + &wait_bits, + static_cast(resources.command_buffers.size()), + resources.command_buffers.data(), + 0, + nullptr}; + + // If the init command buffer did not have any commands recorded, don't submit it. + if (!m_frame_resources[index].init_command_buffer_used) + { + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &m_frame_resources[index].command_buffers[1]; + } + + if (wait_semaphore != VK_NULL_HANDLE) + { + submit_info.pWaitSemaphores = &wait_semaphore; + submit_info.waitSemaphoreCount = 1; + } + + if (signal_semaphore != VK_NULL_HANDLE) + { + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &signal_semaphore; + } + + VkResult res = + vkQueueSubmit(g_vulkan_context->GetGraphicsQueue(), 1, &submit_info, resources.fence); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: "); + PanicAlert("Failed to submit command buffer."); + } + + // Do we have a swap chain to present? + if (present_swap_chain != VK_NULL_HANDLE) + { + // Should have a signal semaphore. + _assert_(signal_semaphore != VK_NULL_HANDLE); + VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + nullptr, + 1, + &signal_semaphore, + 1, + &present_swap_chain, + &present_image_index, + nullptr}; + + res = vkQueuePresentKHR(g_vulkan_context->GetGraphicsQueue(), &present_info); + if (res != VK_SUCCESS && res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) + LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: "); + } + + // Command buffer has been queued, so permit the next one. + m_submit_semaphore.Post(); +} + +void CommandBufferManager::OnCommandBufferExecuted(size_t index) +{ + FrameResources& resources = m_frame_resources[index]; + + // Fire fence tracking callbacks. + for (const auto& iter : m_fence_point_callbacks) + iter.second.second(resources.fence); + + // Clean up all objects pending destruction on this command buffer + for (const auto& it : resources.cleanup_resources) + it.destroy_callback(g_vulkan_context->GetDevice(), it.object); + resources.cleanup_resources.clear(); +} + +void CommandBufferManager::ActivateCommandBuffer() +{ + // Move to the next command buffer. + m_current_frame = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; + FrameResources& resources = m_frame_resources[m_current_frame]; + + // Wait for the GPU to finish with all resources for this command buffer. + if (resources.needs_fence_wait) + { + VkResult res = + vkWaitForFences(g_vulkan_context->GetDevice(), 1, &resources.fence, true, UINT64_MAX); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); + + OnCommandBufferExecuted(m_current_frame); + } + + // Reset fence to unsignaled before starting. + VkResult res = vkResetFences(g_vulkan_context->GetDevice(), 1, &resources.fence); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkResetFences failed: "); + + // Reset command buffer to beginning since we can re-use the memory now + VkCommandBufferBeginInfo begin_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr}; + resources.init_command_buffer_used = false; + for (VkCommandBuffer command_buffer : resources.command_buffers) + { + res = vkResetCommandBuffer(command_buffer, 0); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkResetCommandBuffer failed: "); + + res = vkBeginCommandBuffer(command_buffer, &begin_info); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: "); + } + + // Also can do the same for the descriptor pools + res = vkResetDescriptorPool(g_vulkan_context->GetDevice(), resources.descriptor_pool, 0); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: "); +} + +void CommandBufferManager::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion) +{ + VkFence pending_fence = GetCurrentCommandBufferFence(); + + // If we're waiting for completion, don't bother waking the worker thread. + PrepareToSubmitCommandBuffer(); + SubmitCommandBuffer((submit_off_thread && wait_for_completion)); + ActivateCommandBuffer(); + + if (wait_for_completion) + WaitForFence(pending_fence); +} + +void CommandBufferManager::AddFencePointCallback( + const void* key, const CommandBufferQueuedCallback& queued_callback, + const CommandBufferExecutedCallback& executed_callback) +{ + // Shouldn't be adding twice. + _assert_(m_fence_point_callbacks.find(key) == m_fence_point_callbacks.end()); + m_fence_point_callbacks.emplace(key, std::make_pair(queued_callback, executed_callback)); +} + +void CommandBufferManager::RemoveFencePointCallback(const void* key) +{ + auto iter = m_fence_point_callbacks.find(key); + _assert_(iter != m_fence_point_callbacks.end()); + m_fence_point_callbacks.erase(iter); +} + +std::unique_ptr g_command_buffer_mgr; +} diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h new file mode 100644 index 0000000000..3f0148db9c --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h @@ -0,0 +1,156 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Common/BlockingLoop.h" +#include "Common/Semaphore.h" + +#include "VideoCommon/VideoCommon.h" + +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoBackends/Vulkan/Util.h" + +namespace Vulkan +{ +class CommandBufferManager +{ +public: + explicit CommandBufferManager(bool use_threaded_submission); + ~CommandBufferManager(); + + bool Initialize(); + + VkCommandPool GetCommandPool() const { return m_command_pool; } + // These command buffers are allocated per-frame. They are valid until the command buffer + // is submitted, after that you should call these functions again. + VkCommandBuffer GetCurrentInitCommandBuffer() + { + m_frame_resources[m_current_frame].init_command_buffer_used = true; + return m_frame_resources[m_current_frame].command_buffers[0]; + } + VkCommandBuffer GetCurrentCommandBuffer() const + { + return m_frame_resources[m_current_frame].command_buffers[1]; + } + VkDescriptorPool GetCurrentDescriptorPool() const + { + return m_frame_resources[m_current_frame].descriptor_pool; + } + // Allocates a descriptors set from the pool reserved for the current frame. + VkDescriptorSet AllocateDescriptorSet(VkDescriptorSetLayout set_layout); + + // Gets the fence that will be signaled when the currently executing command buffer is + // queued and executed. Do not wait for this fence before the buffer is executed. + VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; } + // Ensure the worker thread has submitted the previous frame's command buffer. + void PrepareToSubmitCommandBuffer(); + + // Ensure that the worker thread has submitted any previous command buffers and is idle. + void WaitForWorkerThreadIdle(); + + // Ensure that the worker thread has both submitted all commands, and the GPU has caught up. + // Use with caution, huge performance penalty. + void WaitForGPUIdle(); + + // Wait for a fence to be completed. + // Also invokes callbacks for completion. + void WaitForFence(VkFence fence); + + void SubmitCommandBuffer(bool submit_on_worker_thread, + VkSemaphore wait_semaphore = VK_NULL_HANDLE, + VkSemaphore signal_semaphore = VK_NULL_HANDLE, + VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE, + uint32_t present_image_index = 0xFFFFFFFF); + + void ActivateCommandBuffer(); + + void ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion); + + // Schedule a vulkan resource for destruction later on. This will occur when the command buffer + // is next re-used, and the GPU has finished working with the specified resource. + template + void DeferResourceDestruction(T object) + { + DeferredResourceDestruction wrapper = DeferredResourceDestruction::Wrapper(object); + m_frame_resources[m_current_frame].cleanup_resources.push_back(wrapper); + } + + // Instruct the manager to fire the specified callback when a fence is flagged to be signaled. + // This happens when command buffers are executed, and can be tested if signaled, which means + // that all commands up to the point when the callback was fired have completed. + using CommandBufferQueuedCallback = std::function; + using CommandBufferExecutedCallback = std::function; + + void AddFencePointCallback(const void* key, const CommandBufferQueuedCallback& queued_callback, + const CommandBufferExecutedCallback& executed_callback); + + void RemoveFencePointCallback(const void* key); + +private: + bool CreateCommandPool(); + void DestroyCommandPool(); + + bool CreateCommandBuffers(); + void DestroyCommandBuffers(); + + bool CreateSubmitThread(); + + void SubmitCommandBuffer(size_t index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, + VkSwapchainKHR present_swap_chain, uint32_t present_image_index); + + void OnCommandBufferExecuted(size_t index); + + VkCommandPool m_command_pool = VK_NULL_HANDLE; + + struct FrameResources + { + // [0] - Init (upload) command buffer, [1] - draw command buffer + std::array command_buffers; + VkDescriptorPool descriptor_pool; + VkFence fence; + bool init_command_buffer_used; + bool needs_fence_wait; + + std::vector cleanup_resources; + }; + + std::array m_frame_resources = {}; + size_t m_current_frame; + + // callbacks when a fence point is set + std::map> + m_fence_point_callbacks; + + // Threaded command buffer execution + // Semaphore determines when a command buffer can be queued + Common::Semaphore m_submit_semaphore; + std::thread m_submit_thread; + std::unique_ptr m_submit_loop; + struct PendingCommandBufferSubmit + { + size_t index; + VkSemaphore wait_semaphore; + VkSemaphore signal_semaphore; + VkSwapchainKHR present_swap_chain; + uint32_t present_image_index; + }; + std::deque m_pending_submits; + std::mutex m_pending_submit_lock; + bool m_use_threaded_submission = false; +}; + +extern std::unique_ptr g_command_buffer_mgr; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Constants.h b/Source/Core/VideoBackends/Vulkan/Constants.h new file mode 100644 index 0000000000..3562635ca0 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/Constants.h @@ -0,0 +1,136 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Common/BitField.h" +#include "VideoBackends/Vulkan/VulkanLoader.h" + +namespace Vulkan +{ +// Number of command buffers. Having two allows one buffer to be +// executed whilst another is being built. +constexpr size_t NUM_COMMAND_BUFFERS = 2; + +// Staging buffer usage - optimize for uploads or readbacks +enum STAGING_BUFFER_TYPE +{ + STAGING_BUFFER_TYPE_UPLOAD, + STAGING_BUFFER_TYPE_READBACK +}; + +// Descriptor sets +enum DESCRIPTOR_SET +{ + DESCRIPTOR_SET_UNIFORM_BUFFERS, + DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS, + DESCRIPTOR_SET_SHADER_STORAGE_BUFFERS, + NUM_DESCRIPTOR_SETS +}; + +// Uniform buffer bindings within the first descriptor set +enum UNIFORM_BUFFER_DESCRIPTOR_SET_BINDING +{ + UBO_DESCRIPTOR_SET_BINDING_PS, + UBO_DESCRIPTOR_SET_BINDING_VS, + UBO_DESCRIPTOR_SET_BINDING_GS, + NUM_UBO_DESCRIPTOR_SET_BINDINGS +}; + +// Maximum number of attributes per vertex (we don't have any more than this?) +constexpr size_t MAX_VERTEX_ATTRIBUTES = 16; + +// Number of pixel shader texture slots +constexpr size_t NUM_PIXEL_SHADER_SAMPLERS = 8; + +// Total number of binding points in the pipeline layout +constexpr size_t TOTAL_PIPELINE_BINDING_POINTS = + NUM_UBO_DESCRIPTOR_SET_BINDINGS + NUM_PIXEL_SHADER_SAMPLERS + 1; + +// Format of EFB textures +constexpr VkFormat EFB_COLOR_TEXTURE_FORMAT = VK_FORMAT_R8G8B8A8_UNORM; +constexpr VkFormat EFB_DEPTH_TEXTURE_FORMAT = VK_FORMAT_D32_SFLOAT; +constexpr VkFormat EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT = VK_FORMAT_R32_SFLOAT; + +// Format of texturecache textures +constexpr VkFormat TEXTURECACHE_TEXTURE_FORMAT = VK_FORMAT_R8G8B8A8_UNORM; + +// Textures that don't fit into this buffer will be uploaded with a separate buffer (see below). +constexpr size_t INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE = 16 * 1024 * 1024; +constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024; + +// Textures greater than 1024*1024 will be put in staging textures that are released after +// execution instead. A 2048x2048 texture is 16MB, and we'd only fit four of these in our +// streaming buffer and be blocking frequently. Games are unlikely to have textures this +// large anyway, so it's only really an issue for HD texture packs, and memory is not +// a limiting factor in these scenarios anyway. +constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4; + +// Streaming uniform buffer size +constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; +constexpr size_t MAXIMUM_UNIFORM_STREAM_BUFFER_SIZE = 32 * 1024 * 1024; + +// Push constant buffer size for utility shaders +constexpr u32 PUSH_CONSTANT_BUFFER_SIZE = 128; + +// Rasterization state info +union RasterizationState { + BitField<0, 2, VkCullModeFlags> cull_mode; + BitField<2, 7, VkSampleCountFlagBits> samples; + BitField<9, 1, VkBool32> per_sample_shading; + BitField<10, 1, VkBool32> depth_clamp; + + u32 bits; +}; + +// Depth state info +union DepthStencilState { + BitField<0, 1, VkBool32> test_enable; + BitField<1, 1, VkBool32> write_enable; + BitField<2, 3, VkCompareOp> compare_op; + + u32 bits; +}; + +// Blend state info +union BlendState { + struct + { + union { + BitField<0, 1, VkBool32> blend_enable; + BitField<1, 3, VkBlendOp> blend_op; + BitField<4, 5, VkBlendFactor> src_blend; + BitField<9, 5, VkBlendFactor> dst_blend; + BitField<14, 3, VkBlendOp> alpha_blend_op; + BitField<17, 5, VkBlendFactor> src_alpha_blend; + BitField<22, 5, VkBlendFactor> dst_alpha_blend; + BitField<27, 4, VkColorComponentFlags> write_mask; + u32 low_bits; + }; + union { + BitField<0, 1, VkBool32> logic_op_enable; + BitField<1, 4, VkLogicOp> logic_op; + u32 high_bits; + }; + }; + + u64 bits; +}; + +// Sampler info +union SamplerState { + BitField<0, 1, VkFilter> min_filter; + BitField<1, 1, VkFilter> mag_filter; + BitField<2, 1, VkSamplerMipmapMode> mipmap_mode; + BitField<3, 2, VkSamplerAddressMode> wrap_u; + BitField<5, 2, VkSamplerAddressMode> wrap_v; + BitField<7, 8, u32> min_lod; + BitField<15, 8, u32> max_lod; + BitField<23, 6, s32> lod_bias; // tm0.lod_bias (8 bits) / 32 gives us 0-7. + BitField<29, 3, u32> anisotropy; // max_anisotropy = 1 << anisotropy, max of 16, so range 0-4. + + u32 bits; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp b/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp new file mode 100644 index 0000000000..97de14849b --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp @@ -0,0 +1,1342 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/CommonFuncs.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/StagingTexture2D.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Texture2D.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VertexFormat.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/VideoConfig.h" + +namespace Vulkan +{ +// Maximum number of pixels poked in one batch * 6 +constexpr size_t MAX_POKE_VERTICES = 8192; +constexpr size_t POKE_VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; + +FramebufferManager::FramebufferManager() +{ +} + +FramebufferManager::~FramebufferManager() +{ + DestroyEFBFramebuffer(); + DestroyEFBRenderPass(); + + DestroyConversionShaders(); + + DestroyReadbackFramebuffer(); + DestroyReadbackTextures(); + DestroyReadbackShaders(); + DestroyReadbackRenderPasses(); + + DestroyPokeVertexBuffer(); +} + +bool FramebufferManager::Initialize() +{ + if (!CreateEFBRenderPass()) + { + PanicAlert("Failed to create EFB render pass"); + return false; + } + if (!CreateEFBFramebuffer()) + { + PanicAlert("Failed to create EFB textures"); + return false; + } + + if (!CompileConversionShaders()) + { + PanicAlert("Failed to compile EFB shaders"); + return false; + } + + if (!CreateReadbackRenderPasses()) + { + PanicAlert("Failed to create readback render passes"); + return false; + } + if (!CompileReadbackShaders()) + { + PanicAlert("Failed to compile readback shaders"); + return false; + } + if (!CreateReadbackTextures()) + { + PanicAlert("Failed to create readback textures"); + return false; + } + if (!CreateReadbackFramebuffer()) + { + PanicAlert("Failed to create readback framebuffer"); + return false; + } + + CreatePokeVertexFormat(); + if (!CreatePokeVertexBuffer()) + { + PanicAlert("Failed to create poke vertex buffer"); + return false; + } + + return true; +} + +void FramebufferManager::GetTargetSize(unsigned int* width, unsigned int* height) +{ + *width = m_efb_width; + *height = m_efb_height; +} + +bool FramebufferManager::CreateEFBRenderPass() +{ + m_efb_samples = static_cast(g_ActiveConfig.iMultisamples); + + // render pass for rendering to the efb + VkAttachmentDescription attachments[] = { + {0, EFB_COLOR_TEXTURE_FORMAT, m_efb_samples, VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}, + {0, EFB_DEPTH_TEXTURE_FORMAT, m_efb_samples, VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}}; + + VkAttachmentReference color_attachment_references[] = { + {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}}; + + VkAttachmentReference depth_attachment_reference = { + 1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; + + VkSubpassDescription subpass_description = { + 0, VK_PIPELINE_BIND_POINT_GRAPHICS, 0, nullptr, 1, color_attachment_references, + nullptr, &depth_attachment_reference, 0, nullptr}; + + VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + nullptr, + 0, + static_cast(ArraySize(attachments)), + attachments, + 1, + &subpass_description, + 0, + nullptr}; + + VkResult res = vkCreateRenderPass(g_vulkan_context->GetDevice(), &pass_info, nullptr, + &m_efb_load_render_pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass (EFB) failed: "); + return false; + } + + // render pass for clearing color/depth on load, as opposed to loading it + attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + res = vkCreateRenderPass(g_vulkan_context->GetDevice(), &pass_info, nullptr, + &m_efb_clear_render_pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass (EFB) failed: "); + return false; + } + + // render pass for resolving depth, since we can't do it with vkCmdResolveImage + if (m_efb_samples != VK_SAMPLE_COUNT_1_BIT) + { + VkAttachmentDescription resolve_attachment = {0, + EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_STORE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + + // Ensure all reads have finished from the resolved texture before overwriting it. + VkSubpassDependency dependencies[] = { + {VK_SUBPASS_EXTERNAL, 0, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_DEPENDENCY_BY_REGION_BIT}, + {0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT}}; + subpass_description.pDepthStencilAttachment = nullptr; + pass_info.pAttachments = &resolve_attachment; + pass_info.attachmentCount = 1; + pass_info.dependencyCount = static_cast(ArraySize(dependencies)); + pass_info.pDependencies = dependencies; + res = vkCreateRenderPass(g_vulkan_context->GetDevice(), &pass_info, nullptr, + &m_depth_resolve_render_pass); + + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass (EFB depth resolve) failed: "); + return false; + } + } + + return true; +} + +void FramebufferManager::DestroyEFBRenderPass() +{ + if (m_efb_load_render_pass != VK_NULL_HANDLE) + { + vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_efb_load_render_pass, nullptr); + m_efb_load_render_pass = VK_NULL_HANDLE; + } + + if (m_depth_resolve_render_pass != VK_NULL_HANDLE) + { + vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_depth_resolve_render_pass, nullptr); + m_depth_resolve_render_pass = VK_NULL_HANDLE; + } +} + +bool FramebufferManager::CreateEFBFramebuffer() +{ + m_efb_width = static_cast(std::max(Renderer::GetTargetWidth(), 1)); + m_efb_height = static_cast(std::max(Renderer::GetTargetHeight(), 1)); + m_efb_layers = (g_ActiveConfig.iStereoMode != STEREO_OFF) ? 2 : 1; + INFO_LOG(VIDEO, "EFB size: %ux%ux%u", m_efb_width, m_efb_height, m_efb_layers); + + // Update the static variable in the base class. Why does this even exist? + FramebufferManagerBase::m_EFBLayers = m_efb_layers; + + // Allocate EFB render targets + m_efb_color_texture = + Texture2D::Create(m_efb_width, m_efb_height, 1, m_efb_layers, EFB_COLOR_TEXTURE_FORMAT, + m_efb_samples, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + + // We need a second texture to swap with for changing pixel formats + m_efb_convert_color_texture = + Texture2D::Create(m_efb_width, m_efb_height, 1, m_efb_layers, EFB_COLOR_TEXTURE_FORMAT, + m_efb_samples, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + + m_efb_depth_texture = Texture2D::Create( + m_efb_width, m_efb_height, 1, m_efb_layers, EFB_DEPTH_TEXTURE_FORMAT, m_efb_samples, + VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + + if (!m_efb_color_texture || !m_efb_convert_color_texture || !m_efb_depth_texture) + return false; + + // Create resolved textures if MSAA is on + if (m_efb_samples != VK_SAMPLE_COUNT_1_BIT) + { + m_efb_resolve_color_texture = Texture2D::Create( + m_efb_width, m_efb_height, 1, m_efb_layers, EFB_COLOR_TEXTURE_FORMAT, VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); + + m_efb_resolve_depth_texture = Texture2D::Create( + m_efb_width, m_efb_height, 1, m_efb_layers, EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); + + if (!m_efb_resolve_color_texture || !m_efb_resolve_depth_texture) + return false; + + VkImageView attachment = m_efb_resolve_depth_texture->GetView(); + VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + nullptr, + 0, + m_depth_resolve_render_pass, + 1, + &attachment, + m_efb_width, + m_efb_height, + m_efb_layers}; + + VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, + &m_depth_resolve_framebuffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + return false; + } + } + + VkImageView framebuffer_attachments[] = { + m_efb_color_texture->GetView(), m_efb_depth_texture->GetView(), + }; + + VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + nullptr, + 0, + m_efb_load_render_pass, + static_cast(ArraySize(framebuffer_attachments)), + framebuffer_attachments, + m_efb_width, + m_efb_height, + m_efb_layers}; + + VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, + &m_efb_framebuffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + return false; + } + + // Create second framebuffer for format conversions + framebuffer_attachments[0] = m_efb_convert_color_texture->GetView(); + res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, + &m_efb_convert_framebuffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + return false; + } + + // Transition to state that can be used to clear + m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + // Clear the contents of the buffers. + static const VkClearColorValue clear_color = {{0.0f, 0.0f, 0.0f, 1.0f}}; + static const VkClearDepthStencilValue clear_depth = {0.0f, 0}; + VkImageSubresourceRange clear_color_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, m_efb_layers}; + VkImageSubresourceRange clear_depth_range = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, m_efb_layers}; + vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), + m_efb_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + &clear_color, 1, &clear_color_range); + vkCmdClearDepthStencilImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), + m_efb_depth_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + &clear_depth, 1, &clear_depth_range); + + // Transition to color attachment state ready for rendering. + m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + return true; +} + +void FramebufferManager::DestroyEFBFramebuffer() +{ + if (m_efb_framebuffer != VK_NULL_HANDLE) + { + vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_efb_framebuffer, nullptr); + m_efb_framebuffer = VK_NULL_HANDLE; + } + + m_efb_color_texture.reset(); + m_efb_convert_color_texture.reset(); + m_efb_depth_texture.reset(); + m_efb_resolve_color_texture.reset(); + m_efb_resolve_depth_texture.reset(); +} + +void FramebufferManager::ResizeEFBTextures() +{ + DestroyEFBFramebuffer(); + if (!CreateEFBFramebuffer()) + PanicAlert("Failed to create EFB textures"); +} + +void FramebufferManager::RecreateRenderPass() +{ + DestroyEFBRenderPass(); + + if (!CreateEFBRenderPass()) + PanicAlert("Failed to create EFB render pass"); +} + +void FramebufferManager::RecompileShaders() +{ + DestroyConversionShaders(); + + if (!CompileConversionShaders()) + PanicAlert("Failed to compile EFB shaders"); + + DestroyReadbackShaders(); + if (!CompileReadbackShaders()) + PanicAlert("Failed to compile readback shaders"); +} + +void FramebufferManager::ReinterpretPixelData(int convtype) +{ + VkShaderModule pixel_shader = VK_NULL_HANDLE; + if (convtype == 0) + { + pixel_shader = m_ps_rgb8_to_rgba6; + } + else if (convtype == 2) + { + pixel_shader = m_ps_rgba6_to_rgb8; + } + else + { + ERROR_LOG(VIDEO, "Unhandled reinterpret pixel data %d", convtype); + return; + } + + // Transition EFB color buffer to shader resource, and the convert buffer to color attachment. + m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + m_efb_convert_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), + g_object_cache->GetStandardPipelineLayout(), m_efb_load_render_pass, + g_object_cache->GetScreenQuadVertexShader(), + g_object_cache->GetScreenQuadGeometryShader(), pixel_shader); + + RasterizationState rs_state = Util::GetNoCullRasterizationState(); + rs_state.samples = m_efb_samples; + rs_state.per_sample_shading = g_ActiveConfig.bSSAA ? VK_TRUE : VK_FALSE; + draw.SetRasterizationState(rs_state); + + VkRect2D region = {{0, 0}, {m_efb_width, m_efb_height}}; + draw.BeginRenderPass(m_efb_convert_framebuffer, region); + draw.SetPSSampler(0, m_efb_color_texture->GetView(), g_object_cache->GetPointSampler()); + draw.SetViewportAndScissor(0, 0, m_efb_width, m_efb_height); + draw.DrawWithoutVertexBuffer(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 4); + draw.EndRenderPass(); + + // Swap EFB texture pointers + std::swap(m_efb_color_texture, m_efb_convert_color_texture); + std::swap(m_efb_framebuffer, m_efb_convert_framebuffer); +} + +Texture2D* FramebufferManager::ResolveEFBColorTexture(StateTracker* state_tracker, + const VkRect2D& region) +{ + // Return the normal EFB texture if multisampling is off. + if (m_efb_samples == VK_SAMPLE_COUNT_1_BIT) + return m_efb_color_texture.get(); + + // Can't resolve within a render pass. + state_tracker->EndRenderPass(); + + // Resolving is considered to be a transfer operation. + m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_efb_resolve_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + // Resolve to our already-created texture. + VkImageResolve resolve = { + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, m_efb_layers}, // VkImageSubresourceLayers srcSubresource + {region.offset.x, region.offset.y, 0}, // VkOffset3D srcOffset + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, m_efb_layers}, // VkImageSubresourceLayers dstSubresource + {region.offset.x, region.offset.y, 0}, // VkOffset3D dstOffset + {region.extent.width, region.extent.height, m_efb_layers} // VkExtent3D extent + }; + vkCmdResolveImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), + m_efb_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + m_efb_resolve_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, &resolve); + + // Restore MSAA texture ready for rendering again + m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + return m_efb_resolve_color_texture.get(); +} + +Texture2D* FramebufferManager::ResolveEFBDepthTexture(StateTracker* state_tracker, + const VkRect2D& region) +{ + // Return the normal EFB texture if multisampling is off. + if (m_efb_samples == VK_SAMPLE_COUNT_1_BIT) + return m_efb_depth_texture.get(); + + // Can't resolve within a render pass. + state_tracker->EndRenderPass(); + + m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + // Draw using resolve shader to write the minimum depth of all samples to the resolve texture. + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), + g_object_cache->GetStandardPipelineLayout(), m_depth_resolve_render_pass, + g_object_cache->GetScreenQuadVertexShader(), + g_object_cache->GetScreenQuadGeometryShader(), m_ps_depth_resolve); + draw.BeginRenderPass(m_depth_resolve_framebuffer, region); + draw.SetPSSampler(0, m_efb_depth_texture->GetView(), g_object_cache->GetPointSampler()); + draw.SetViewportAndScissor(region.offset.x, region.offset.y, region.extent.width, + region.extent.height); + draw.DrawWithoutVertexBuffer(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 4); + draw.EndRenderPass(); + + // Restore MSAA texture ready for rendering again + m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + + // Render pass transitions to shader resource. + m_efb_resolve_depth_texture->OverrideImageLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + return m_efb_resolve_depth_texture.get(); +} + +bool FramebufferManager::CompileConversionShaders() +{ + static const char RGB8_TO_RGBA6_SHADER_SOURCE[] = R"( + #if MSAA_ENABLED + SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; + #else + SAMPLER_BINDING(0) uniform sampler2DArray samp0; + #endif + layout(location = 0) in vec3 uv0; + layout(location = 0) out vec4 ocol0; + + void main() + { + int layer = 0; + #if EFB_LAYERS > 1 + layer = int(uv0.z); + #endif + + ivec3 coords = ivec3(gl_FragCoord.xy, layer); + + vec4 val; + #if !MSAA_ENABLED + // No MSAA - just load the first (and only) sample + val = texelFetch(samp0, coords, 0); + #elif SSAA_ENABLED + // Sample shading, shader runs once per sample + val = texelFetch(samp0, coords, gl_SampleID); + #else + // MSAA without sample shading, average out all samples. + val = vec4(0, 0, 0, 0); + for (int i = 0; i < MSAA_SAMPLES; i++) + val += texelFetch(samp0, coords, i); + val /= float(MSAA_SAMPLES); + #endif + + ivec4 src8 = ivec4(round(val * 255.f)); + ivec4 dst6; + dst6.r = src8.r >> 2; + dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4); + dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6); + dst6.a = src8.b & 0x3F; + + ocol0 = float4(dst6) / 63.f; + } + )"; + + static const char RGBA6_TO_RGB8_SHADER_SOURCE[] = R"( + #if MSAA_ENABLED + SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; + #else + SAMPLER_BINDING(0) uniform sampler2DArray samp0; + #endif + layout(location = 0) in vec3 uv0; + layout(location = 0) out vec4 ocol0; + + void main() + { + int layer = 0; + #if EFB_LAYERS > 1 + layer = int(uv0.z); + #endif + + ivec3 coords = ivec3(gl_FragCoord.xy, layer); + + vec4 val; + #if !MSAA_ENABLED + // No MSAA - just load the first (and only) sample + val = texelFetch(samp0, coords, 0); + #elif SSAA_ENABLED + // Sample shading, shader runs once per sample + val = texelFetch(samp0, coords, gl_SampleID); + #else + // MSAA without sample shading, average out all samples. + val = vec4(0, 0, 0, 0); + for (int i = 0; i < MSAA_SAMPLES; i++) + val += texelFetch(samp0, coords, i); + val /= float(MSAA_SAMPLES); + #endif + + ivec4 src6 = ivec4(round(val * 63.f)); + ivec4 dst8; + dst8.r = (src6.r << 2) | (src6.g >> 4); + dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2); + dst8.b = ((src6.b & 0x3) << 6) | src6.a; + dst8.a = 255; + + ocol0 = float4(dst8) / 255.f; + } + )"; + + static const char DEPTH_RESOLVE_SHADER_SOURCE[] = R"( + SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; + layout(location = 0) in vec3 uv0; + layout(location = 0) out float ocol0; + + void main() + { + int layer = 0; + #if EFB_LAYERS > 1 + layer = int(uv0.z); + #endif + + // gl_FragCoord is in window coordinates, and we're rendering to + // the same rectangle in the resolve texture. + ivec3 coords = ivec3(gl_FragCoord.xy, layer); + + // Take the minimum of all depth samples. + ocol0 = texelFetch(samp0, coords, 0).r; + for (int i = 1; i < MSAA_SAMPLES; i++) + ocol0 = min(ocol0, texelFetch(samp0, coords, i).r); + } + )"; + + std::string header = g_object_cache->GetUtilityShaderHeader(); + DestroyConversionShaders(); + + m_ps_rgb8_to_rgba6 = Util::CompileAndCreateFragmentShader(header + RGB8_TO_RGBA6_SHADER_SOURCE); + m_ps_rgba6_to_rgb8 = Util::CompileAndCreateFragmentShader(header + RGBA6_TO_RGB8_SHADER_SOURCE); + if (m_efb_samples != VK_SAMPLE_COUNT_1_BIT) + m_ps_depth_resolve = Util::CompileAndCreateFragmentShader(header + DEPTH_RESOLVE_SHADER_SOURCE); + + return (m_ps_rgba6_to_rgb8 != VK_NULL_HANDLE && m_ps_rgb8_to_rgba6 != VK_NULL_HANDLE && + (m_efb_samples == VK_SAMPLE_COUNT_1_BIT || m_ps_depth_resolve != VK_NULL_HANDLE)); +} + +void FramebufferManager::DestroyConversionShaders() +{ + auto DestroyShader = [this](VkShaderModule& shader) { + if (shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); + shader = VK_NULL_HANDLE; + } + }; + + DestroyShader(m_ps_rgb8_to_rgba6); + DestroyShader(m_ps_rgba6_to_rgb8); + DestroyShader(m_ps_depth_resolve); +} + +u32 FramebufferManager::PeekEFBColor(StateTracker* state_tracker, u32 x, u32 y) +{ + if (!m_color_readback_texture_valid && !PopulateColorReadbackTexture(state_tracker)) + return 0; + + u32 value; + m_color_readback_texture->ReadTexel(x, y, &value, sizeof(value)); + return value; +} + +bool FramebufferManager::PopulateColorReadbackTexture(StateTracker* state_tracker) +{ + // Can't be in our normal render pass. + state_tracker->EndRenderPass(); + state_tracker->OnReadback(); + + // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. + VkRect2D src_region = {{0, 0}, {m_efb_width, m_efb_height}}; + Texture2D* src_texture = m_efb_color_texture.get(); + VkImageAspectFlags src_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + if (m_efb_samples > 1) + src_texture = ResolveEFBColorTexture(state_tracker, src_region); + + if (m_efb_width != EFB_WIDTH || m_efb_height != EFB_HEIGHT) + { + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), + g_object_cache->GetStandardPipelineLayout(), m_copy_color_render_pass, + g_object_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, + m_copy_color_shader); + + VkRect2D rect = {{0, 0}, {EFB_WIDTH, EFB_HEIGHT}}; + draw.BeginRenderPass(m_color_copy_framebuffer, rect); + + // Transition EFB to shader read before drawing. + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetPointSampler()); + draw.SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); + draw.DrawWithoutVertexBuffer(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 4); + draw.EndRenderPass(); + + // Restore EFB to color attachment, since we're done with it. + if (src_texture == m_efb_color_texture.get()) + { + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + } + + // Use this as a source texture now. + m_color_copy_texture->OverrideImageLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + src_texture = m_color_copy_texture.get(); + } + + // Copy from EFB or copy texture to staging texture. + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_color_readback_texture->CopyFromImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), + src_texture->GetImage(), src_aspect, 0, 0, EFB_WIDTH, + EFB_HEIGHT, 0, 0); + + // Restore original layout if we used the EFB as a source. + if (src_texture == m_efb_color_texture.get()) + { + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + } + + // Wait until the copy is complete. + g_command_buffer_mgr->ExecuteCommandBuffer(false, true); + state_tracker->InvalidateDescriptorSets(); + state_tracker->SetPendingRebind(); + + // Map to host memory. + if (!m_color_readback_texture->IsMapped() && !m_color_readback_texture->Map()) + return false; + + m_color_readback_texture_valid = true; + return true; +} + +float FramebufferManager::PeekEFBDepth(StateTracker* state_tracker, u32 x, u32 y) +{ + if (!m_depth_readback_texture_valid && !PopulateDepthReadbackTexture(state_tracker)) + return 0.0f; + + float value; + m_depth_readback_texture->ReadTexel(x, y, &value, sizeof(value)); + return value; +} + +bool FramebufferManager::PopulateDepthReadbackTexture(StateTracker* state_tracker) +{ + // Can't be in our normal render pass. + state_tracker->EndRenderPass(); + state_tracker->OnReadback(); + + // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. + VkRect2D src_region = {{0, 0}, {m_efb_width, m_efb_height}}; + Texture2D* src_texture = m_efb_depth_texture.get(); + VkImageAspectFlags src_aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + if (m_efb_samples > 1) + { + // EFB depth resolves are written out as color textures + src_texture = ResolveEFBDepthTexture(state_tracker, src_region); + src_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + } + if (m_efb_width != EFB_WIDTH || m_efb_height != EFB_HEIGHT) + { + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), + g_object_cache->GetStandardPipelineLayout(), m_copy_depth_render_pass, + g_object_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, + m_copy_depth_shader); + + VkRect2D rect = {{0, 0}, {EFB_WIDTH, EFB_HEIGHT}}; + draw.BeginRenderPass(m_depth_copy_framebuffer, rect); + + // Transition EFB to shader read before drawing. + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetPointSampler()); + draw.SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); + draw.DrawWithoutVertexBuffer(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 4); + draw.EndRenderPass(); + + // Restore EFB to depth attachment, since we're done with it. + if (src_texture == m_efb_depth_texture.get()) + { + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + } + + // Use this as a source texture now. + m_depth_copy_texture->OverrideImageLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + src_texture = m_depth_copy_texture.get(); + src_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + } + + // Copy from EFB or copy texture to staging texture. + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_depth_readback_texture->CopyFromImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), + src_texture->GetImage(), src_aspect, 0, 0, EFB_WIDTH, + EFB_HEIGHT, 0, 0); + + // Restore original layout if we used the EFB as a source. + if (src_texture == m_efb_depth_texture.get()) + { + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + } + + // Wait until the copy is complete. + g_command_buffer_mgr->ExecuteCommandBuffer(false, true); + state_tracker->InvalidateDescriptorSets(); + state_tracker->SetPendingRebind(); + + // Map to host memory. + if (!m_depth_readback_texture->IsMapped() && !m_depth_readback_texture->Map()) + return false; + + m_depth_readback_texture_valid = true; + return true; +} + +void FramebufferManager::InvalidatePeekCache() +{ + m_color_readback_texture_valid = false; + m_depth_readback_texture_valid = false; +} + +bool FramebufferManager::CreateReadbackRenderPasses() +{ + VkAttachmentDescription copy_attachment = { + 0, // VkAttachmentDescriptionFlags flags + EFB_COLOR_TEXTURE_FORMAT, // VkFormat format + VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples + VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp loadOp + VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp + VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp + VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // VkImageLayout initialLayout + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL // VkImageLayout finalLayout + }; + VkAttachmentReference copy_attachment_ref = { + 0, // uint32_t attachment + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout + }; + VkSubpassDescription copy_subpass = { + 0, // VkSubpassDescriptionFlags flags + VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint + 0, // uint32_t inputAttachmentCount + nullptr, // const VkAttachmentReference* pInputAttachments + 1, // uint32_t colorAttachmentCount + ©_attachment_ref, // const VkAttachmentReference* pColorAttachments + nullptr, // const VkAttachmentReference* pResolveAttachments + nullptr, // const VkAttachmentReference* pDepthStencilAttachment + 0, // uint32_t preserveAttachmentCount + nullptr // const uint32_t* pPreserveAttachments + }; + VkSubpassDependency copy_dependency = { + 0, + VK_SUBPASS_EXTERNAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_TRANSFER_READ_BIT, + VK_DEPENDENCY_BY_REGION_BIT}; + VkRenderPassCreateInfo copy_pass = { + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkRenderPassCreateFlags flags + 1, // uint32_t attachmentCount + ©_attachment, // const VkAttachmentDescription* pAttachments + 1, // uint32_t subpassCount + ©_subpass, // const VkSubpassDescription* pSubpasses + 1, // uint32_t dependencyCount + ©_dependency // const VkSubpassDependency* pDependencies + }; + + VkResult res = vkCreateRenderPass(g_vulkan_context->GetDevice(), ©_pass, nullptr, + &m_copy_color_render_pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass failed: "); + return false; + } + + // Depth is similar to copy, just a different format. + copy_attachment.format = EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT; + res = vkCreateRenderPass(g_vulkan_context->GetDevice(), ©_pass, nullptr, + &m_copy_depth_render_pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass failed: "); + return false; + } + + // Some devices don't support point sizes >1 (e.g. Adreno). + // If we can't use a point size above our maximum IR, use triangles instead. + // This means a 6x increase in the size of the vertices, though. + if (!g_vulkan_context->GetDeviceFeatures().largePoints || + g_vulkan_context->GetDeviceLimits().pointSizeGranularity > 1 || + g_vulkan_context->GetDeviceLimits().pointSizeRange[0] > 1 || + g_vulkan_context->GetDeviceLimits().pointSizeRange[1] < 16) + { + m_poke_primitive_topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + } + else + { + // Points should be okay. + m_poke_primitive_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } + + return true; +} + +void FramebufferManager::DestroyReadbackRenderPasses() +{ + if (m_copy_color_render_pass != VK_NULL_HANDLE) + vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_copy_color_render_pass, nullptr); + if (m_copy_depth_render_pass != VK_NULL_HANDLE) + vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_copy_depth_render_pass, nullptr); +} + +bool FramebufferManager::CompileReadbackShaders() +{ + std::string source; + + // TODO: Use input attachment here instead? + // TODO: MSAA resolve in shader. + static const char COPY_COLOR_SHADER_SOURCE[] = R"( + SAMPLER_BINDING(0) uniform sampler2DArray samp0; + layout(location = 0) in vec3 uv0; + layout(location = 0) out vec4 ocol0; + void main() + { + ocol0 = texture(samp0, uv0); + } + )"; + + static const char COPY_DEPTH_SHADER_SOURCE[] = R"( + SAMPLER_BINDING(0) uniform sampler2DArray samp0; + layout(location = 0) in vec3 uv0; + layout(location = 0) out float ocol0; + void main() + { + ocol0 = texture(samp0, uv0).r; + } + )"; + + source = g_object_cache->GetUtilityShaderHeader() + COPY_COLOR_SHADER_SOURCE; + m_copy_color_shader = Util::CompileAndCreateFragmentShader(source); + + source = g_object_cache->GetUtilityShaderHeader() + COPY_DEPTH_SHADER_SOURCE; + m_copy_depth_shader = Util::CompileAndCreateFragmentShader(source); + + return m_copy_color_shader != VK_NULL_HANDLE && m_copy_depth_shader != VK_NULL_HANDLE; +} + +void FramebufferManager::DestroyReadbackShaders() +{ + if (m_copy_color_shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_color_shader, nullptr); + m_copy_color_shader = VK_NULL_HANDLE; + } + if (m_copy_depth_shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_depth_shader, nullptr); + m_copy_depth_shader = VK_NULL_HANDLE; + } +} + +bool FramebufferManager::CreateReadbackTextures() +{ + m_color_copy_texture = + Texture2D::Create(EFB_WIDTH, EFB_HEIGHT, 1, 1, EFB_COLOR_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + + m_color_readback_texture = StagingTexture2D::Create(STAGING_BUFFER_TYPE_READBACK, EFB_WIDTH, + EFB_HEIGHT, EFB_COLOR_TEXTURE_FORMAT); + if (!m_color_copy_texture || !m_color_readback_texture) + { + ERROR_LOG(VIDEO, "Failed to create EFB color readback texture"); + return false; + } + + m_depth_copy_texture = + Texture2D::Create(EFB_WIDTH, EFB_HEIGHT, 1, 1, EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + + // We can't copy to/from color<->depth formats, so using a linear texture is not an option here. + // TODO: Investigate if vkCmdBlitImage can be used. The documentation isn't that clear. + m_depth_readback_texture = StagingTexture2DBuffer::Create(STAGING_BUFFER_TYPE_READBACK, EFB_WIDTH, + EFB_HEIGHT, EFB_DEPTH_TEXTURE_FORMAT); + if (!m_depth_copy_texture || !m_depth_readback_texture) + { + ERROR_LOG(VIDEO, "Failed to create EFB depth readback texture"); + return false; + } + + // With Vulkan, we can leave these textures mapped and use invalidate/flush calls instead. + if (!m_color_readback_texture->Map() || !m_depth_readback_texture->Map()) + { + ERROR_LOG(VIDEO, "Failed to map EFB readback textures"); + return false; + } + + // Transition to TRANSFER_SRC, as this is expected by the render pass. + m_color_copy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_depth_copy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + return true; +} + +void FramebufferManager::DestroyReadbackTextures() +{ + m_color_copy_texture.reset(); + m_color_readback_texture.reset(); + m_color_readback_texture_valid = false; + m_depth_copy_texture.reset(); + m_depth_readback_texture.reset(); + m_depth_readback_texture_valid = false; +} + +bool FramebufferManager::CreateReadbackFramebuffer() +{ + VkImageView framebuffer_attachment = m_color_copy_texture->GetView(); + VkFramebufferCreateInfo framebuffer_info = { + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkFramebufferCreateFlags flags + m_copy_color_render_pass, // VkRenderPass renderPass + 1, // uint32_t attachmentCount + &framebuffer_attachment, // const VkImageView* pAttachments + EFB_WIDTH, // uint32_t width + EFB_HEIGHT, // uint32_t height + 1 // uint32_t layers + }; + VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, + &m_color_copy_framebuffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + return false; + } + + // Swap for depth + framebuffer_info.renderPass = m_copy_depth_render_pass; + framebuffer_attachment = m_depth_copy_texture->GetView(); + res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, + &m_depth_copy_framebuffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + return false; + } + + return true; +} + +void FramebufferManager::DestroyReadbackFramebuffer() +{ + if (m_color_copy_framebuffer != VK_NULL_HANDLE) + { + vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_color_copy_framebuffer, nullptr); + m_color_copy_framebuffer = VK_NULL_HANDLE; + } + if (m_depth_copy_framebuffer != VK_NULL_HANDLE) + { + vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_depth_copy_framebuffer, nullptr); + m_depth_copy_framebuffer = VK_NULL_HANDLE; + } +} + +void FramebufferManager::PokeEFBColor(StateTracker* state_tracker, u32 x, u32 y, u32 color) +{ + // Flush if we exceeded the number of vertices per batch. + if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) + FlushEFBPokes(state_tracker); + + CreatePokeVertices(&m_color_poke_vertices, x, y, 0.0f, color); + + // Update the peek cache if it's valid, since we know the color of the pixel now. + if (m_color_readback_texture_valid) + m_color_readback_texture->WriteTexel(x, y, &color, sizeof(color)); +} + +void FramebufferManager::PokeEFBDepth(StateTracker* state_tracker, u32 x, u32 y, float depth) +{ + // Flush if we exceeded the number of vertices per batch. + if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) + FlushEFBPokes(state_tracker); + + CreatePokeVertices(&m_depth_poke_vertices, x, y, depth, 0); + + // Update the peek cache if it's valid, since we know the color of the pixel now. + if (m_depth_readback_texture_valid) + m_depth_readback_texture->WriteTexel(x, y, &depth, sizeof(depth)); +} + +void FramebufferManager::CreatePokeVertices(std::vector* destination_list, u32 x, + u32 y, float z, u32 color) +{ + // Some devices don't support point sizes >1 (e.g. Adreno). + if (m_poke_primitive_topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) + { + // generate quad from the single point (clip-space coordinates) + float x1 = float(x) * 2.0f / EFB_WIDTH - 1.0f; + float y1 = float(y) * 2.0f / EFB_HEIGHT - 1.0f; + float x2 = float(x + 1) * 2.0f / EFB_WIDTH - 1.0f; + float y2 = float(y + 1) * 2.0f / EFB_HEIGHT - 1.0f; + destination_list->push_back({{x1, y1, z, 1.0f}, color}); + destination_list->push_back({{x2, y1, z, 1.0f}, color}); + destination_list->push_back({{x1, y2, z, 1.0f}, color}); + destination_list->push_back({{x1, y2, z, 1.0f}, color}); + destination_list->push_back({{x2, y1, z, 1.0f}, color}); + destination_list->push_back({{x2, y2, z, 1.0f}, color}); + } + else + { + // GPU will expand the point to a quad. + float cs_x = float(x) * 2.0f / EFB_WIDTH - 1.0f; + float cs_y = float(y) * 2.0f / EFB_HEIGHT - 1.0f; + float point_size = m_efb_width / static_cast(EFB_WIDTH); + destination_list->push_back({{cs_x, cs_y, z, point_size}, color}); + } +} + +void FramebufferManager::FlushEFBPokes(StateTracker* state_tracker) +{ + if (!m_color_poke_vertices.empty()) + { + DrawPokeVertices(state_tracker, m_color_poke_vertices.data(), m_color_poke_vertices.size(), + true, false); + + m_color_poke_vertices.clear(); + } + + if (!m_depth_poke_vertices.empty()) + { + DrawPokeVertices(state_tracker, m_depth_poke_vertices.data(), m_depth_poke_vertices.size(), + false, true); + + m_depth_poke_vertices.clear(); + } +} + +void FramebufferManager::DrawPokeVertices(StateTracker* state_tracker, + const EFBPokeVertex* vertices, size_t vertex_count, + bool write_color, bool write_depth) +{ + // Relatively simple since we don't have any bindings. + VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + + // We don't use the utility shader in order to keep the vertices compact. + PipelineInfo pipeline_info = {}; + pipeline_info.vertex_format = m_poke_vertex_format.get(); + pipeline_info.pipeline_layout = g_object_cache->GetStandardPipelineLayout(); + pipeline_info.vs = m_poke_vertex_shader; + pipeline_info.gs = (m_efb_layers > 1) ? m_poke_geometry_shader : VK_NULL_HANDLE; + pipeline_info.ps = m_poke_fragment_shader; + pipeline_info.render_pass = m_efb_load_render_pass; + pipeline_info.rasterization_state.bits = Util::GetNoCullRasterizationState().bits; + pipeline_info.depth_stencil_state.bits = Util::GetNoDepthTestingDepthStencilState().bits; + pipeline_info.blend_state.bits = Util::GetNoBlendingBlendState().bits; + pipeline_info.blend_state.write_mask = 0; + pipeline_info.primitive_topology = m_poke_primitive_topology; + if (write_color) + { + pipeline_info.blend_state.write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } + if (write_depth) + { + pipeline_info.depth_stencil_state.test_enable = VK_TRUE; + pipeline_info.depth_stencil_state.write_enable = VK_TRUE; + pipeline_info.depth_stencil_state.compare_op = VK_COMPARE_OP_ALWAYS; + } + + VkPipeline pipeline = g_object_cache->GetPipeline(pipeline_info); + if (pipeline == VK_NULL_HANDLE) + { + PanicAlert("Failed to get pipeline for EFB poke draw"); + return; + } + + // Populate vertex buffer. + size_t vertices_size = sizeof(EFBPokeVertex) * m_color_poke_vertices.size(); + if (!m_poke_vertex_stream_buffer->ReserveMemory(vertices_size, sizeof(EfbPokeData), true, true, + false)) + { + // Kick a command buffer first. + WARN_LOG(VIDEO, "Kicking command buffer due to no EFB poke space."); + Util::ExecuteCurrentCommandsAndRestoreState(state_tracker, true); + command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + + if (!m_poke_vertex_stream_buffer->ReserveMemory(vertices_size, sizeof(EfbPokeData), true, true, + false)) + { + PanicAlert("Failed to get space for EFB poke vertices"); + return; + } + } + VkBuffer vb_buffer = m_poke_vertex_stream_buffer->GetBuffer(); + VkDeviceSize vb_offset = m_poke_vertex_stream_buffer->GetCurrentOffset(); + memcpy(m_poke_vertex_stream_buffer->GetCurrentHostPointer(), vertices, vertices_size); + m_poke_vertex_stream_buffer->CommitMemory(vertices_size); + + // Set up state. + state_tracker->EndClearRenderPass(); + state_tracker->BeginRenderPass(); + state_tracker->SetPendingRebind(); + Util::SetViewportAndScissor(command_buffer, 0, 0, m_efb_width, m_efb_height); + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + vkCmdBindVertexBuffers(command_buffer, 0, 1, &vb_buffer, &vb_offset); + vkCmdDraw(command_buffer, static_cast(vertex_count), 1, 0, 0); +} + +void FramebufferManager::CreatePokeVertexFormat() +{ + PortableVertexDeclaration vtx_decl = {}; + vtx_decl.position.enable = true; + vtx_decl.position.type = VAR_FLOAT; + vtx_decl.position.components = 4; + vtx_decl.position.integer = false; + vtx_decl.position.offset = offsetof(EFBPokeVertex, position); + vtx_decl.colors[0].enable = true; + vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; + vtx_decl.colors[0].components = 4; + vtx_decl.colors[0].integer = false; + vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color); + vtx_decl.stride = sizeof(EFBPokeVertex); + + m_poke_vertex_format = std::make_unique(vtx_decl); +} + +bool FramebufferManager::CreatePokeVertexBuffer() +{ + m_poke_vertex_stream_buffer = StreamBuffer::Create( + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, POKE_VERTEX_BUFFER_SIZE, POKE_VERTEX_BUFFER_SIZE); + if (!m_poke_vertex_stream_buffer) + { + ERROR_LOG(VIDEO, "Failed to create EFB poke vertex buffer"); + return false; + } + + return true; +} + +void FramebufferManager::DestroyPokeVertexBuffer() +{ +} + +bool FramebufferManager::CompilePokeShaders() +{ + static const char POKE_VERTEX_SHADER_SOURCE[] = R"( + layout(location = 0) in vec4 ipos; + layout(location = 5) in vec4 icol0; + + layout(location = 0) out vec4 col0; + + void main() + { + gl_Position = vec4(ipos.xyz, 1.0f); + #if USE_POINT_SIZE + gl_PointSize = ipos.w; + #endif + col0 = icol0; + } + + )"; + + static const char POKE_GEOMETRY_SHADER_SOURCE[] = R"( + layout(triangles) in; + layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; + + in VertexData + { + vec4 col0; + } in_data[]; + + out VertexData + { + vec4 col0; + } out_data; + + void main() + { + for (int j = 0; j < EFB_LAYERS; j++) + { + for (int i = 0; i < 3; i++) + { + gl_Layer = j; + gl_Position = gl_in[i].gl_Position; + out_data.col0 = in_data[i].col0; + EmitVertex(); + } + EndPrimitive(); + } + } + )"; + + static const char POKE_PIXEL_SHADER_SOURCE[] = R"( + layout(location = 0) in vec4 col0; + layout(location = 0) out vec4 ocol0; + void main() + { + ocol0 = col0; + } + )"; + + std::string source = g_object_cache->GetUtilityShaderHeader(); + if (m_poke_primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) + source += "#define USE_POINT_SIZE 1\n"; + source += POKE_VERTEX_SHADER_SOURCE; + m_poke_vertex_shader = Util::CompileAndCreateVertexShader(source); + if (m_poke_vertex_shader == VK_NULL_HANDLE) + return false; + + if (g_vulkan_context->SupportsGeometryShaders()) + { + source = g_object_cache->GetUtilityShaderHeader() + POKE_GEOMETRY_SHADER_SOURCE; + m_poke_geometry_shader = Util::CompileAndCreateGeometryShader(source); + if (m_poke_geometry_shader == VK_NULL_HANDLE) + return false; + } + + source = g_object_cache->GetUtilityShaderHeader() + POKE_PIXEL_SHADER_SOURCE; + m_poke_fragment_shader = Util::CompileAndCreateFragmentShader(source); + if (m_poke_fragment_shader == VK_NULL_HANDLE) + return false; + + return true; +} + +void FramebufferManager::DestroyPokeShaders() +{ + if (m_poke_vertex_shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_vertex_shader, nullptr); + m_poke_vertex_shader = VK_NULL_HANDLE; + } + if (m_poke_geometry_shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_geometry_shader, nullptr); + m_poke_geometry_shader = VK_NULL_HANDLE; + } + if (m_poke_fragment_shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_fragment_shader, nullptr); + m_poke_vertex_shader = VK_NULL_HANDLE; + } +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/FramebufferManager.h b/Source/Core/VideoBackends/Vulkan/FramebufferManager.h new file mode 100644 index 0000000000..3ae8b90f05 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/FramebufferManager.h @@ -0,0 +1,174 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "VideoCommon/FramebufferManagerBase.h" + +#include "VideoBackends/Vulkan/Constants.h" + +namespace Vulkan +{ +class StagingTexture2D; +class StateTracker; +class StreamBuffer; +class Texture2D; +class VertexFormat; + +class XFBSource : public XFBSourceBase +{ + void DecodeToTexture(u32 xfb_addr, u32 fb_width, u32 fb_height) override {} + void CopyEFB(float gamma) override {} +}; + +class FramebufferManager : public FramebufferManagerBase +{ +public: + FramebufferManager(); + ~FramebufferManager(); + + bool Initialize(); + + VkRenderPass GetEFBLoadRenderPass() const { return m_efb_load_render_pass; } + VkRenderPass GetEFBClearRenderPass() const { return m_efb_clear_render_pass; } + u32 GetEFBWidth() const { return m_efb_width; } + u32 GetEFBHeight() const { return m_efb_height; } + u32 GetEFBLayers() const { return m_efb_layers; } + VkSampleCountFlagBits GetEFBSamples() const { return m_efb_samples; } + Texture2D* GetEFBColorTexture() const { return m_efb_color_texture.get(); } + Texture2D* GetEFBDepthTexture() const { return m_efb_depth_texture.get(); } + VkFramebuffer GetEFBFramebuffer() const { return m_efb_framebuffer; } + void GetTargetSize(unsigned int* width, unsigned int* height) override; + + std::unique_ptr CreateXFBSource(unsigned int target_width, + unsigned int target_height, + unsigned int layers) override + { + return std::make_unique(); + } + + void CopyToRealXFB(u32 xfb_addr, u32 fb_stride, u32 fb_height, const EFBRectangle& source_rc, + float gamma = 1.0f) override + { + } + + void ResizeEFBTextures(); + + // Recompile shaders, use when MSAA mode changes. + void RecreateRenderPass(); + void RecompileShaders(); + + // Reinterpret pixel format of EFB color texture. + // Assumes no render pass is currently in progress. + // Swaps EFB framebuffers, so re-bind afterwards. + void ReinterpretPixelData(int convtype); + + // This render pass can be used for other readback operations. + VkRenderPass GetColorCopyForReadbackRenderPass() const { return m_copy_color_render_pass; } + // Resolve color/depth textures to a non-msaa texture, and return it. + Texture2D* ResolveEFBColorTexture(StateTracker* state_tracker, const VkRect2D& region); + Texture2D* ResolveEFBDepthTexture(StateTracker* state_tracker, const VkRect2D& region); + + // Reads a framebuffer value back from the GPU. This may block if the cache is not current. + u32 PeekEFBColor(StateTracker* state_tracker, u32 x, u32 y); + float PeekEFBDepth(StateTracker* state_tracker, u32 x, u32 y); + void InvalidatePeekCache(); + + // Writes a value to the framebuffer. This will never block, and writes will be batched. + void PokeEFBColor(StateTracker* state_tracker, u32 x, u32 y, u32 color); + void PokeEFBDepth(StateTracker* state_tracker, u32 x, u32 y, float depth); + void FlushEFBPokes(StateTracker* state_tracker); + +private: + struct EFBPokeVertex + { + float position[4]; + u32 color; + }; + + bool CreateEFBRenderPass(); + void DestroyEFBRenderPass(); + bool CreateEFBFramebuffer(); + void DestroyEFBFramebuffer(); + + bool CompileConversionShaders(); + void DestroyConversionShaders(); + + bool CreateReadbackRenderPasses(); + void DestroyReadbackRenderPasses(); + bool CompileReadbackShaders(); + void DestroyReadbackShaders(); + bool CreateReadbackTextures(); + void DestroyReadbackTextures(); + bool CreateReadbackFramebuffer(); + void DestroyReadbackFramebuffer(); + + void CreatePokeVertexFormat(); + bool CreatePokeVertexBuffer(); + void DestroyPokeVertexBuffer(); + bool CompilePokeShaders(); + void DestroyPokeShaders(); + + bool PopulateColorReadbackTexture(StateTracker* state_tracker); + bool PopulateDepthReadbackTexture(StateTracker* state_tracker); + + void CreatePokeVertices(std::vector* destination_list, u32 x, u32 y, float z, + u32 color); + + void DrawPokeVertices(StateTracker* state_tracker, const EFBPokeVertex* vertices, + size_t vertex_count, bool write_color, bool write_depth); + + VkRenderPass m_efb_load_render_pass = VK_NULL_HANDLE; + VkRenderPass m_efb_clear_render_pass = VK_NULL_HANDLE; + VkRenderPass m_depth_resolve_render_pass = VK_NULL_HANDLE; + + u32 m_efb_width = 0; + u32 m_efb_height = 0; + u32 m_efb_layers = 1; + VkSampleCountFlagBits m_efb_samples = VK_SAMPLE_COUNT_1_BIT; + + std::unique_ptr m_efb_color_texture; + std::unique_ptr m_efb_convert_color_texture; + std::unique_ptr m_efb_depth_texture; + std::unique_ptr m_efb_resolve_color_texture; + std::unique_ptr m_efb_resolve_depth_texture; + VkFramebuffer m_efb_framebuffer = VK_NULL_HANDLE; + VkFramebuffer m_efb_convert_framebuffer = VK_NULL_HANDLE; + VkFramebuffer m_depth_resolve_framebuffer = VK_NULL_HANDLE; + + // Format conversion shaders + VkShaderModule m_ps_rgb8_to_rgba6 = VK_NULL_HANDLE; + VkShaderModule m_ps_rgba6_to_rgb8 = VK_NULL_HANDLE; + VkShaderModule m_ps_depth_resolve = VK_NULL_HANDLE; + + // EFB readback texture + std::unique_ptr m_color_copy_texture; + std::unique_ptr m_depth_copy_texture; + VkFramebuffer m_color_copy_framebuffer = VK_NULL_HANDLE; + VkFramebuffer m_depth_copy_framebuffer = VK_NULL_HANDLE; + + // CPU-side EFB readback texture + std::unique_ptr m_color_readback_texture; + std::unique_ptr m_depth_readback_texture; + bool m_color_readback_texture_valid = false; + bool m_depth_readback_texture_valid = false; + + // EFB poke drawing setup + std::unique_ptr m_poke_vertex_format; + std::unique_ptr m_poke_vertex_stream_buffer; + std::vector m_color_poke_vertices; + std::vector m_depth_poke_vertices; + VkPrimitiveTopology m_poke_primitive_topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + + VkRenderPass m_copy_color_render_pass = VK_NULL_HANDLE; + VkRenderPass m_copy_depth_render_pass = VK_NULL_HANDLE; + VkShaderModule m_copy_color_shader = VK_NULL_HANDLE; + VkShaderModule m_copy_depth_shader = VK_NULL_HANDLE; + + VkShaderModule m_poke_vertex_shader = VK_NULL_HANDLE; + VkShaderModule m_poke_geometry_shader = VK_NULL_HANDLE; + VkShaderModule m_poke_fragment_shader = VK_NULL_HANDLE; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp new file mode 100644 index 0000000000..89af5b7c53 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -0,0 +1,1037 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include +#include + +#include "Common/CommonFuncs.h" +#include "Common/LinearDiskCache.h" +#include "Core/ConfigManager.h" + +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/ShaderCompiler.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VertexFormat.h" +#include "VideoBackends/Vulkan/VulkanContext.h" +#include "VideoCommon/Statistics.h" + +namespace Vulkan +{ +std::unique_ptr g_object_cache; + +ObjectCache::ObjectCache() +{ +} + +ObjectCache::~ObjectCache() +{ + DestroyPipelineCache(); + DestroyShaderCaches(); + DestroySharedShaders(); + DestroySamplers(); + DestroyPipelineLayouts(); + DestroyDescriptorSetLayouts(); +} + +bool ObjectCache::Initialize() +{ + if (!CreateDescriptorSetLayouts()) + return false; + + if (!CreatePipelineLayouts()) + return false; + + LoadShaderCaches(); + if (!CreatePipelineCache(true)) + return false; + + if (!CreateUtilityShaderVertexFormat()) + return false; + + if (!CreateStaticSamplers()) + return false; + + if (!CompileSharedShaders()) + return false; + + m_utility_shader_vertex_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1024 * 1024, 4 * 1024 * 1024); + m_utility_shader_uniform_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 1024, 4 * 1024 * 1024); + if (!m_utility_shader_vertex_buffer || !m_utility_shader_uniform_buffer) + return false; + + return true; +} + +static VkPipelineRasterizationStateCreateInfo +GetVulkanRasterizationState(const RasterizationState& state) +{ + return { + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineRasterizationStateCreateFlags flags + state.depth_clamp, // VkBool32 depthClampEnable + VK_FALSE, // VkBool32 rasterizerDiscardEnable + VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode + state.cull_mode, // VkCullModeFlags cullMode + VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace + VK_FALSE, // VkBool32 depthBiasEnable + 0.0f, // float depthBiasConstantFactor + 0.0f, // float depthBiasClamp + 0.0f, // float depthBiasSlopeFactor + 1.0f // float lineWidth + }; +} + +static VkPipelineMultisampleStateCreateInfo +GetVulkanMultisampleState(const RasterizationState& rs_state) +{ + return { + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineMultisampleStateCreateFlags flags + rs_state.samples, // VkSampleCountFlagBits rasterizationSamples + rs_state.per_sample_shading, // VkBool32 sampleShadingEnable + 1.0f, // float minSampleShading + nullptr, // const VkSampleMask* pSampleMask; + VK_FALSE, // VkBool32 alphaToCoverageEnable + VK_FALSE // VkBool32 alphaToOneEnable + }; +} + +static VkPipelineDepthStencilStateCreateInfo +GetVulkanDepthStencilState(const DepthStencilState& state) +{ + return { + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineDepthStencilStateCreateFlags flags + state.test_enable, // VkBool32 depthTestEnable + state.write_enable, // VkBool32 depthWriteEnable + state.compare_op, // VkCompareOp depthCompareOp + VK_FALSE, // VkBool32 depthBoundsTestEnable + VK_FALSE, // VkBool32 stencilTestEnable + {}, // VkStencilOpState front + {}, // VkStencilOpState back + 0.0f, // float minDepthBounds + 1.0f // float maxDepthBounds + }; +} + +static VkPipelineColorBlendAttachmentState GetVulkanAttachmentBlendState(const BlendState& state) +{ + VkPipelineColorBlendAttachmentState vk_state = { + state.blend_enable, // VkBool32 blendEnable + state.src_blend, // VkBlendFactor srcColorBlendFactor + state.dst_blend, // VkBlendFactor dstColorBlendFactor + state.blend_op, // VkBlendOp colorBlendOp + state.src_alpha_blend, // VkBlendFactor srcAlphaBlendFactor + state.dst_alpha_blend, // VkBlendFactor dstAlphaBlendFactor + state.alpha_blend_op, // VkBlendOp alphaBlendOp + state.write_mask // VkColorComponentFlags colorWriteMask + }; + + return vk_state; +} + +static VkPipelineColorBlendStateCreateInfo +GetVulkanColorBlendState(const BlendState& state, + const VkPipelineColorBlendAttachmentState* attachments, + uint32_t num_attachments) +{ + VkPipelineColorBlendStateCreateInfo vk_state = { + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineColorBlendStateCreateFlags flags + state.logic_op_enable, // VkBool32 logicOpEnable + state.logic_op, // VkLogicOp logicOp + num_attachments, // uint32_t attachmentCount + attachments, // const VkPipelineColorBlendAttachmentState* pAttachments + {1.0f, 1.0f, 1.0f, 1.0f} // float blendConstants[4] + }; + + return vk_state; +} + +VkPipeline ObjectCache::GetPipeline(const PipelineInfo& info) +{ + auto iter = m_pipeline_objects.find(info); + if (iter != m_pipeline_objects.end()) + return iter->second; + + // Declare descriptors for empty vertex buffers/attributes + static const VkPipelineVertexInputStateCreateInfo empty_vertex_input_state = { + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineVertexInputStateCreateFlags flags + 0, // uint32_t vertexBindingDescriptionCount + nullptr, // const VkVertexInputBindingDescription* pVertexBindingDescriptions + 0, // uint32_t vertexAttributeDescriptionCount + nullptr // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions + }; + + // Vertex inputs + const VkPipelineVertexInputStateCreateInfo& vertex_input_state = + info.vertex_format ? info.vertex_format->GetVertexInputStateInfo() : empty_vertex_input_state; + + // Input assembly + VkPipelineInputAssemblyStateCreateInfo input_assembly_state = { + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineInputAssemblyStateCreateFlags flags + info.primitive_topology, // VkPrimitiveTopology topology + VK_TRUE // VkBool32 primitiveRestartEnable + }; + + // Shaders to stages + VkPipelineShaderStageCreateInfo shader_stages[3]; + uint32_t num_shader_stages = 0; + if (info.vs != VK_NULL_HANDLE) + { + shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_VERTEX_BIT, + info.vs, + "main"}; + } + if (info.gs != VK_NULL_HANDLE) + { + shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_GEOMETRY_BIT, + info.gs, + "main"}; + } + if (info.ps != VK_NULL_HANDLE) + { + shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_FRAGMENT_BIT, + info.ps, + "main"}; + } + + // Fill in Vulkan descriptor structs from our state structures. + VkPipelineRasterizationStateCreateInfo rasterization_state = + GetVulkanRasterizationState(info.rasterization_state); + VkPipelineMultisampleStateCreateInfo multisample_state = + GetVulkanMultisampleState(info.rasterization_state); + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = + GetVulkanDepthStencilState(info.depth_stencil_state); + VkPipelineColorBlendAttachmentState blend_attachment_state = + GetVulkanAttachmentBlendState(info.blend_state); + VkPipelineColorBlendStateCreateInfo blend_state = + GetVulkanColorBlendState(info.blend_state, &blend_attachment_state, 1); + + // This viewport isn't used, but needs to be specified anyway. + static const VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + static const VkRect2D scissor = {{0, 0}, {1, 1}}; + static const VkPipelineViewportStateCreateInfo viewport_state = { + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + nullptr, + 0, // VkPipelineViewportStateCreateFlags flags; + 1, // uint32_t viewportCount + &viewport, // const VkViewport* pViewports + 1, // uint32_t scissorCount + &scissor // const VkRect2D* pScissors + }; + + // Set viewport and scissor dynamic state so we can change it elsewhere. + static const VkDynamicState dynamic_states[] = {VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + static const VkPipelineDynamicStateCreateInfo dynamic_state = { + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, nullptr, + 0, // VkPipelineDynamicStateCreateFlags flags + static_cast(ArraySize(dynamic_states)), // uint32_t dynamicStateCount + dynamic_states // const VkDynamicState* pDynamicStates + }; + + // Combine to full pipeline info structure. + VkGraphicsPipelineCreateInfo pipeline_info = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + nullptr, // VkStructureType sType + 0, // VkPipelineCreateFlags flags + num_shader_stages, // uint32_t stageCount + shader_stages, // const VkPipelineShaderStageCreateInfo* pStages + &vertex_input_state, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState + &input_assembly_state, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState + nullptr, // const VkPipelineTessellationStateCreateInfo* pTessellationState + &viewport_state, // const VkPipelineViewportStateCreateInfo* pViewportState + &rasterization_state, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState + &multisample_state, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState + &depth_stencil_state, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState + &blend_state, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState + &dynamic_state, // const VkPipelineDynamicStateCreateInfo* pDynamicState + info.pipeline_layout, // VkPipelineLayout layout + info.render_pass, // VkRenderPass renderPass + 0, // uint32_t subpass + VK_NULL_HANDLE, // VkPipeline basePipelineHandle + -1 // int32_t basePipelineIndex + }; + + VkPipeline pipeline = VK_NULL_HANDLE; + VkResult res = vkCreateGraphicsPipelines(g_vulkan_context->GetDevice(), m_pipeline_cache, 1, + &pipeline_info, nullptr, &pipeline); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkCreateGraphicsPipelines failed: "); + + m_pipeline_objects.emplace(info, pipeline); + return pipeline; +} + +std::string ObjectCache::GetDiskCacheFileName(const char* type) +{ + return StringFromFormat("%svulkan-%s-%s.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(), + SConfig::GetInstance().m_strUniqueID.c_str(), type); +} + +class PipelineCacheReadCallback : public LinearDiskCacheReader +{ +public: + PipelineCacheReadCallback(std::vector* data) : m_data(data) {} + void Read(const u32& key, const u8* value, u32 value_size) override + { + m_data->resize(value_size); + if (value_size > 0) + memcpy(m_data->data(), value, value_size); + } + +private: + std::vector* m_data; +}; + +class PipelineCacheReadIgnoreCallback : public LinearDiskCacheReader +{ +public: + void Read(const u32& key, const u8* value, u32 value_size) override {} +}; + +bool ObjectCache::CreatePipelineCache(bool load_from_disk) +{ + // We have to keep the pipeline cache file name around since when we save it + // we delete the old one, by which time the game's unique ID is already cleared. + m_pipeline_cache_filename = GetDiskCacheFileName("pipeline"); + + std::vector disk_data; + if (load_from_disk) + { + LinearDiskCache disk_cache; + PipelineCacheReadCallback read_callback(&disk_data); + if (disk_cache.OpenAndRead(m_pipeline_cache_filename, read_callback) != 1) + disk_data.clear(); + } + + VkPipelineCacheCreateInfo info = { + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineCacheCreateFlags flags + disk_data.size(), // size_t initialDataSize + !disk_data.empty() ? disk_data.data() : nullptr, // const void* pInitialData + }; + + VkResult res = + vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); + if (res == VK_SUCCESS) + return true; + + // Failed to create pipeline cache, try with it empty. + LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed, trying empty cache: "); + info.initialDataSize = 0; + info.pInitialData = nullptr; + res = vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); + if (res == VK_SUCCESS) + return true; + + LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed: "); + return false; +} + +void ObjectCache::DestroyPipelineCache() +{ + for (const auto& it : m_pipeline_objects) + { + if (it.second != VK_NULL_HANDLE) + vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr); + } + m_pipeline_objects.clear(); + + vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr); + m_pipeline_cache = VK_NULL_HANDLE; +} + +void ObjectCache::ClearPipelineCache() +{ + // Reallocate the pipeline cache object, so it starts fresh and we don't + // save old pipelines to disk. This is for major changes, e.g. MSAA mode change. + DestroyPipelineCache(); + if (!CreatePipelineCache(false)) + PanicAlert("Failed to re-create pipeline cache"); +} + +void ObjectCache::SavePipelineCache() +{ + size_t data_size; + VkResult res = + vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); + return; + } + + std::vector data(data_size); + res = vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, + data.data()); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); + return; + } + + // Delete the old cache and re-create. + File::Delete(m_pipeline_cache_filename); + + // We write a single key of 1, with the entire pipeline cache data. + // Not ideal, but our disk cache class does not support just writing a single blob + // of data without specifying a key. + LinearDiskCache disk_cache; + PipelineCacheReadIgnoreCallback callback; + disk_cache.OpenAndRead(m_pipeline_cache_filename, callback); + disk_cache.Append(1, data.data(), static_cast(data.size())); + disk_cache.Close(); +} + +// Cache inserter that is called back when reading from the file +template +struct ShaderCacheReader : public LinearDiskCacheReader +{ + ShaderCacheReader(std::map& shader_map) : m_shader_map(shader_map) {} + void Read(const Uid& key, const u32* value, u32 value_size) override + { + // We don't insert null modules into the shader map since creation could succeed later on. + // e.g. we're generating bad code, but fix this in a later version, and for some reason + // the cache is not invalidated. + VkShaderModule module = Util::CreateShaderModule(value, value_size); + if (module == VK_NULL_HANDLE) + return; + + m_shader_map.emplace(key, module); + } + + std::map& m_shader_map; +}; + +void ObjectCache::LoadShaderCaches() +{ + ShaderCacheReader vs_reader(m_vs_cache.shader_map); + m_vs_cache.disk_cache.OpenAndRead(GetDiskCacheFileName("vs"), vs_reader); + SETSTAT(stats.numVertexShadersCreated, static_cast(m_vs_cache.shader_map.size())); + SETSTAT(stats.numVertexShadersAlive, static_cast(m_vs_cache.shader_map.size())); + + ShaderCacheReader ps_reader(m_ps_cache.shader_map); + m_ps_cache.disk_cache.OpenAndRead(GetDiskCacheFileName("ps"), ps_reader); + SETSTAT(stats.numPixelShadersCreated, static_cast(m_ps_cache.shader_map.size())); + SETSTAT(stats.numPixelShadersAlive, static_cast(m_ps_cache.shader_map.size())); + + if (g_vulkan_context->SupportsGeometryShaders()) + { + ShaderCacheReader gs_reader(m_gs_cache.shader_map); + m_gs_cache.disk_cache.OpenAndRead(GetDiskCacheFileName("gs"), gs_reader); + } +} + +template +static void DestroyShaderCache(T& cache) +{ + cache.disk_cache.Close(); + for (const auto& it : cache.shader_map) + { + if (it.second != VK_NULL_HANDLE) + vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second, nullptr); + } + cache.shader_map.clear(); +} + +void ObjectCache::DestroyShaderCaches() +{ + DestroyShaderCache(m_vs_cache); + DestroyShaderCache(m_ps_cache); + + if (g_vulkan_context->SupportsGeometryShaders()) + DestroyShaderCache(m_gs_cache); +} + +VkShaderModule ObjectCache::GetVertexShaderForUid(const VertexShaderUid& uid) +{ + auto it = m_vs_cache.shader_map.find(uid); + if (it != m_vs_cache.shader_map.end()) + return it->second; + + // Not in the cache, so compile the shader. + ShaderCompiler::SPIRVCodeVector spv; + VkShaderModule module = VK_NULL_HANDLE; + ShaderCode source_code = GenerateVertexShaderCode(APIType::Vulkan, uid.GetUidData()); + if (ShaderCompiler::CompileVertexShader(&spv, source_code.GetBuffer().c_str(), + source_code.GetBuffer().length())) + { + module = Util::CreateShaderModule(spv.data(), spv.size()); + + // Append to shader cache if it created successfully. + if (module != VK_NULL_HANDLE) + { + m_vs_cache.disk_cache.Append(uid, spv.data(), static_cast(spv.size())); + INCSTAT(stats.numVertexShadersCreated); + INCSTAT(stats.numVertexShadersAlive); + } + } + + // We still insert null entries to prevent further compilation attempts. + m_vs_cache.shader_map.emplace(uid, module); + return module; +} + +VkShaderModule ObjectCache::GetGeometryShaderForUid(const GeometryShaderUid& uid) +{ + _assert_(g_vulkan_context->SupportsGeometryShaders()); + auto it = m_gs_cache.shader_map.find(uid); + if (it != m_gs_cache.shader_map.end()) + return it->second; + + // Not in the cache, so compile the shader. + ShaderCompiler::SPIRVCodeVector spv; + VkShaderModule module = VK_NULL_HANDLE; + ShaderCode source_code = GenerateGeometryShaderCode(APIType::Vulkan, uid.GetUidData()); + if (ShaderCompiler::CompileGeometryShader(&spv, source_code.GetBuffer().c_str(), + source_code.GetBuffer().length())) + { + module = Util::CreateShaderModule(spv.data(), spv.size()); + + // Append to shader cache if it created successfully. + if (module != VK_NULL_HANDLE) + m_gs_cache.disk_cache.Append(uid, spv.data(), static_cast(spv.size())); + } + + // We still insert null entries to prevent further compilation attempts. + m_gs_cache.shader_map.emplace(uid, module); + return module; +} + +VkShaderModule ObjectCache::GetPixelShaderForUid(const PixelShaderUid& uid, + DSTALPHA_MODE dstalpha_mode) +{ + auto it = m_ps_cache.shader_map.find(uid); + if (it != m_ps_cache.shader_map.end()) + return it->second; + + // Not in the cache, so compile the shader. + ShaderCompiler::SPIRVCodeVector spv; + VkShaderModule module = VK_NULL_HANDLE; + ShaderCode source_code = + GeneratePixelShaderCode(dstalpha_mode, APIType::Vulkan, uid.GetUidData()); + if (ShaderCompiler::CompileFragmentShader(&spv, source_code.GetBuffer().c_str(), + source_code.GetBuffer().length())) + { + module = Util::CreateShaderModule(spv.data(), spv.size()); + + // Append to shader cache if it created successfully. + if (module != VK_NULL_HANDLE) + { + m_ps_cache.disk_cache.Append(uid, spv.data(), static_cast(spv.size())); + INCSTAT(stats.numPixelShadersCreated); + INCSTAT(stats.numPixelShadersAlive); + } + } + + // We still insert null entries to prevent further compilation attempts. + m_ps_cache.shader_map.emplace(uid, module); + return module; +} + +void ObjectCache::ClearSamplerCache() +{ + for (const auto& it : m_sampler_cache) + { + if (it.second != VK_NULL_HANDLE) + vkDestroySampler(g_vulkan_context->GetDevice(), it.second, nullptr); + } + m_sampler_cache.clear(); +} + +void ObjectCache::DestroySamplers() +{ + ClearSamplerCache(); + + if (m_point_sampler != VK_NULL_HANDLE) + { + vkDestroySampler(g_vulkan_context->GetDevice(), m_point_sampler, nullptr); + m_point_sampler = VK_NULL_HANDLE; + } + + if (m_linear_sampler != VK_NULL_HANDLE) + { + vkDestroySampler(g_vulkan_context->GetDevice(), m_linear_sampler, nullptr); + m_linear_sampler = VK_NULL_HANDLE; + } +} + +void ObjectCache::RecompileSharedShaders() +{ + DestroySharedShaders(); + if (!CompileSharedShaders()) + PanicAlert("Failed to recompile shared shaders."); +} + +bool ObjectCache::CreateDescriptorSetLayouts() +{ + static const VkDescriptorSetLayoutBinding ubo_set_bindings[] = { + {UBO_DESCRIPTOR_SET_BINDING_PS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, + VK_SHADER_STAGE_FRAGMENT_BIT}, + {UBO_DESCRIPTOR_SET_BINDING_VS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT}, + {UBO_DESCRIPTOR_SET_BINDING_GS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, + VK_SHADER_STAGE_GEOMETRY_BIT}}; + + // Annoying these have to be split, apparently we can't partially update an array without the + // validation layers throwing a warning. + static const VkDescriptorSetLayoutBinding sampler_set_bindings[] = { + {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {5, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {6, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {7, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}}; + + static const VkDescriptorSetLayoutBinding ssbo_set_bindings[] = { + {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}}; + + static const VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SETS] = { + {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, + static_cast(ArraySize(ubo_set_bindings)), ubo_set_bindings}, + {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, + static_cast(ArraySize(sampler_set_bindings)), sampler_set_bindings}, + {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, + static_cast(ArraySize(ssbo_set_bindings)), ssbo_set_bindings}}; + + for (size_t i = 0; i < NUM_DESCRIPTOR_SETS; i++) + { + VkResult res = vkCreateDescriptorSetLayout(g_vulkan_context->GetDevice(), &create_infos[i], + nullptr, &m_descriptor_set_layouts[i]); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDescriptorSetLayout failed: "); + return false; + } + } + + return true; +} + +void ObjectCache::DestroyDescriptorSetLayouts() +{ + for (VkDescriptorSetLayout layout : m_descriptor_set_layouts) + { + if (layout != VK_NULL_HANDLE) + vkDestroyDescriptorSetLayout(g_vulkan_context->GetDevice(), layout, nullptr); + } +} + +bool ObjectCache::CreatePipelineLayouts() +{ + VkResult res; + + // Descriptor sets for each pipeline layout + VkDescriptorSetLayout standard_sets[] = { + m_descriptor_set_layouts[DESCRIPTOR_SET_UNIFORM_BUFFERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS]}; + VkDescriptorSetLayout bbox_sets[] = { + m_descriptor_set_layouts[DESCRIPTOR_SET_UNIFORM_BUFFERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_SHADER_STORAGE_BUFFERS]}; + VkPushConstantRange push_constant_range = { + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE}; + + // Info for each pipeline layout + VkPipelineLayoutCreateInfo standard_info = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + nullptr, + 0, + static_cast(ArraySize(standard_sets)), + standard_sets, + 0, + nullptr}; + VkPipelineLayoutCreateInfo bbox_info = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + nullptr, + 0, + static_cast(ArraySize(bbox_sets)), + bbox_sets, + 0, + nullptr}; + VkPipelineLayoutCreateInfo push_constant_info = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + nullptr, + 0, + static_cast(ArraySize(standard_sets)), + standard_sets, + 1, + &push_constant_range}; + + if ((res = vkCreatePipelineLayout(g_vulkan_context->GetDevice(), &standard_info, nullptr, + &m_standard_pipeline_layout)) != VK_SUCCESS || + (res = vkCreatePipelineLayout(g_vulkan_context->GetDevice(), &bbox_info, nullptr, + &m_bbox_pipeline_layout)) != VK_SUCCESS || + (res = vkCreatePipelineLayout(g_vulkan_context->GetDevice(), &push_constant_info, nullptr, + &m_push_constant_pipeline_layout))) + { + LOG_VULKAN_ERROR(res, "vkCreatePipelineLayout failed: "); + return false; + } + + return true; +} + +void ObjectCache::DestroyPipelineLayouts() +{ + if (m_standard_pipeline_layout != VK_NULL_HANDLE) + vkDestroyPipelineLayout(g_vulkan_context->GetDevice(), m_standard_pipeline_layout, nullptr); + if (m_bbox_pipeline_layout != VK_NULL_HANDLE) + vkDestroyPipelineLayout(g_vulkan_context->GetDevice(), m_bbox_pipeline_layout, nullptr); + if (m_push_constant_pipeline_layout != VK_NULL_HANDLE) + vkDestroyPipelineLayout(g_vulkan_context->GetDevice(), m_push_constant_pipeline_layout, + nullptr); +} + +bool ObjectCache::CreateUtilityShaderVertexFormat() +{ + PortableVertexDeclaration vtx_decl = {}; + vtx_decl.position.enable = true; + vtx_decl.position.type = VAR_FLOAT; + vtx_decl.position.components = 4; + vtx_decl.position.integer = false; + vtx_decl.position.offset = offsetof(UtilityShaderVertex, Position); + vtx_decl.texcoords[0].enable = true; + vtx_decl.texcoords[0].type = VAR_FLOAT; + vtx_decl.texcoords[0].components = 4; + vtx_decl.texcoords[0].integer = false; + vtx_decl.texcoords[0].offset = offsetof(UtilityShaderVertex, TexCoord); + vtx_decl.colors[0].enable = true; + vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; + vtx_decl.colors[0].components = 4; + vtx_decl.colors[0].integer = false; + vtx_decl.colors[0].offset = offsetof(UtilityShaderVertex, Color); + vtx_decl.stride = sizeof(UtilityShaderVertex); + + m_utility_shader_vertex_format = std::make_unique(vtx_decl); + return true; +} + +bool ObjectCache::CreateStaticSamplers() +{ + VkSamplerCreateInfo create_info = { + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkSamplerCreateFlags flags + VK_FILTER_NEAREST, // VkFilter magFilter + VK_FILTER_NEAREST, // VkFilter minFilter + VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // VkSamplerAddressMode addressModeU + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // VkSamplerAddressMode addressModeV + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW + 0.0f, // float mipLodBias + VK_FALSE, // VkBool32 anisotropyEnable + 1.0f, // float maxAnisotropy + VK_FALSE, // VkBool32 compareEnable + VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp + std::numeric_limits::min(), // float minLod + std::numeric_limits::max(), // float maxLod + VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor + VK_FALSE // VkBool32 unnormalizedCoordinates + }; + + VkResult res = + vkCreateSampler(g_vulkan_context->GetDevice(), &create_info, nullptr, &m_point_sampler); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSampler failed: "); + return false; + } + + // Most fields are shared across point<->linear samplers, so only change those necessary. + create_info.minFilter = VK_FILTER_LINEAR; + create_info.magFilter = VK_FILTER_LINEAR; + create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + res = vkCreateSampler(g_vulkan_context->GetDevice(), &create_info, nullptr, &m_linear_sampler); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSampler failed: "); + return false; + } + + return true; +} + +VkSampler ObjectCache::GetSampler(const SamplerState& info) +{ + auto iter = m_sampler_cache.find(info); + if (iter != m_sampler_cache.end()) + return iter->second; + + // Cap anisotropy to device limits. + VkBool32 anisotropy_enable = (info.anisotropy != 0) ? VK_TRUE : VK_FALSE; + float max_anisotropy = std::min(static_cast(1 << info.anisotropy), + g_vulkan_context->GetMaxSaxmplerAnisotropy()); + + VkSamplerCreateInfo create_info = { + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkSamplerCreateFlags flags + info.mag_filter, // VkFilter magFilter + info.min_filter, // VkFilter minFilter + info.mipmap_mode, // VkSamplerMipmapMode mipmapMode + info.wrap_u, // VkSamplerAddressMode addressModeU + info.wrap_v, // VkSamplerAddressMode addressModeV + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW + static_cast(info.lod_bias.Value()), // float mipLodBias + anisotropy_enable, // VkBool32 anisotropyEnable + max_anisotropy, // float maxAnisotropy + VK_FALSE, // VkBool32 compareEnable + VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp + static_cast(info.min_lod.Value()), // float minLod + static_cast(info.max_lod.Value()), // float maxLod + VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor + VK_FALSE // VkBool32 unnormalizedCoordinates + }; + + VkSampler sampler = VK_NULL_HANDLE; + VkResult res = vkCreateSampler(g_vulkan_context->GetDevice(), &create_info, nullptr, &sampler); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkCreateSampler failed: "); + + // Store it even if it failed + m_sampler_cache.emplace(info, sampler); + return sampler; +} + +std::string ObjectCache::GetUtilityShaderHeader() const +{ + std::stringstream ss; + if (g_ActiveConfig.iMultisamples > 1) + { + ss << "#define MSAA_ENABLED 1" << std::endl; + ss << "#define MSAA_SAMPLES " << g_ActiveConfig.iMultisamples << std::endl; + if (g_ActiveConfig.bSSAA) + ss << "#define SSAA_ENABLED 1" << std::endl; + } + + u32 efb_layers = (g_ActiveConfig.iStereoMode != STEREO_OFF) ? 2 : 1; + ss << "#define EFB_LAYERS " << efb_layers << std::endl; + + return ss.str(); +} + +// Comparison operators for PipelineInfos +// Since these all boil down to POD types, we can just memcmp the entire thing for speed +// The is_trivially_copyable check fails on MSVC due to BitField. +// TODO: Can we work around this any way? +#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 5 && !defined(_MSC_VER) +static_assert(std::has_trivial_copy_constructor::value, + "PipelineInfo is trivially copyable"); +#elif !defined(_MSC_VER) +static_assert(std::is_trivially_copyable::value, + "PipelineInfo is trivially copyable"); +#endif + +std::size_t PipelineInfoHash::operator()(const PipelineInfo& key) const +{ + return static_cast(XXH64(&key, sizeof(key), 0)); +} + +bool operator==(const PipelineInfo& lhs, const PipelineInfo& rhs) +{ + return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0; +} + +bool operator!=(const PipelineInfo& lhs, const PipelineInfo& rhs) +{ + return !operator==(lhs, rhs); +} + +bool operator<(const PipelineInfo& lhs, const PipelineInfo& rhs) +{ + return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0; +} + +bool operator>(const PipelineInfo& lhs, const PipelineInfo& rhs) +{ + return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0; +} + +bool operator==(const SamplerState& lhs, const SamplerState& rhs) +{ + return lhs.bits == rhs.bits; +} + +bool operator!=(const SamplerState& lhs, const SamplerState& rhs) +{ + return !operator==(lhs, rhs); +} + +bool operator>(const SamplerState& lhs, const SamplerState& rhs) +{ + return lhs.bits > rhs.bits; +} + +bool operator<(const SamplerState& lhs, const SamplerState& rhs) +{ + return lhs.bits < rhs.bits; +} + +bool ObjectCache::CompileSharedShaders() +{ + static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"( + layout(location = 0) in float4 ipos; + layout(location = 5) in float4 icol0; + layout(location = 8) in float3 itex0; + + layout(location = 0) out float3 uv0; + layout(location = 1) out float4 col0; + + void main() + { + gl_Position = ipos; + uv0 = itex0; + col0 = icol0; + } + )"; + + static const char PASSTHROUGH_GEOMETRY_SHADER_SOURCE[] = R"( + layout(triangles) in; + layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; + + in VertexData + { + float3 uv0; + float4 col0; + } in_data[]; + + out VertexData + { + float3 uv0; + float4 col0; + } out_data; + + void main() + { + for (int j = 0; j < EFB_LAYERS; j++) + { + for (int i = 0; i < 3; i++) + { + gl_Layer = j; + gl_Position = gl_in[i].gl_Position; + out_data.uv0 = float3(in_data[i].uv0.xy, float(j)); + out_data.col0 = in_data[i].col0; + EmitVertex(); + } + EndPrimitive(); + } + } + )"; + + static const char SCREEN_QUAD_VERTEX_SHADER_SOURCE[] = R"( + layout(location = 0) out float3 uv0; + + void main() + { + /* + * id &1 &2 clamp(*2-1) + * 0 0,0 0,0 -1,-1 TL + * 1 1,0 1,0 1,-1 TR + * 2 0,2 0,1 -1,1 BL + * 3 1,2 1,1 1,1 BR + */ + vec2 rawpos = float2(float(gl_VertexID & 1), clamp(float(gl_VertexID & 2), 0.0f, 1.0f)); + gl_Position = float4(rawpos * 2.0f - 1.0f, 0.0f, 1.0f); + uv0 = float3(rawpos, 0.0f); + } + )"; + + static const char SCREEN_QUAD_GEOMETRY_SHADER_SOURCE[] = R"( + layout(triangles) in; + layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; + + in VertexData + { + float3 uv0; + } in_data[]; + + out VertexData + { + float3 uv0; + } out_data; + + void main() + { + for (int j = 0; j < EFB_LAYERS; j++) + { + for (int i = 0; i < 3; i++) + { + gl_Layer = j; + gl_Position = gl_in[i].gl_Position; + out_data.uv0 = float3(in_data[i].uv0.xy, float(j)); + EmitVertex(); + } + EndPrimitive(); + } + } + )"; + + std::string header = GetUtilityShaderHeader(); + + m_screen_quad_vertex_shader = + Util::CompileAndCreateVertexShader(header + SCREEN_QUAD_VERTEX_SHADER_SOURCE); + m_passthrough_vertex_shader = + Util::CompileAndCreateVertexShader(header + PASSTHROUGH_VERTEX_SHADER_SOURCE); + if (m_screen_quad_vertex_shader == VK_NULL_HANDLE || + m_passthrough_vertex_shader == VK_NULL_HANDLE) + { + return false; + } + + if (g_ActiveConfig.iStereoMode != STEREO_OFF && g_vulkan_context->SupportsGeometryShaders()) + { + m_screen_quad_geometry_shader = + Util::CompileAndCreateGeometryShader(header + SCREEN_QUAD_GEOMETRY_SHADER_SOURCE); + m_passthrough_geometry_shader = + Util::CompileAndCreateGeometryShader(header + PASSTHROUGH_GEOMETRY_SHADER_SOURCE); + if (m_screen_quad_geometry_shader == VK_NULL_HANDLE || + m_passthrough_geometry_shader == VK_NULL_HANDLE) + { + return false; + } + } + + return true; +} + +void ObjectCache::DestroySharedShaders() +{ + auto DestroyShader = [this](VkShaderModule& shader) { + if (shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); + shader = VK_NULL_HANDLE; + } + }; + + DestroyShader(m_screen_quad_vertex_shader); + DestroyShader(m_passthrough_vertex_shader); + DestroyShader(m_screen_quad_geometry_shader); + DestroyShader(m_passthrough_geometry_shader); +} +} diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.h b/Source/Core/VideoBackends/Vulkan/ObjectCache.h new file mode 100644 index 0000000000..f2ae25142a --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.h @@ -0,0 +1,189 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include "Common/LinearDiskCache.h" + +#include "VideoBackends/Vulkan/Constants.h" + +#include "VideoCommon/GeometryShaderGen.h" +#include "VideoCommon/PixelShaderGen.h" +#include "VideoCommon/VertexShaderGen.h" + +namespace Vulkan +{ +class CommandBufferManager; +class VertexFormat; +class StreamBuffer; + +struct PipelineInfo +{ + // These are packed in descending order of size, to avoid any padding so that the structure + // can be copied/compared as a single block of memory. 64-bit pointer size is assumed. + const VertexFormat* vertex_format; + VkPipelineLayout pipeline_layout; + VkShaderModule vs; + VkShaderModule gs; + VkShaderModule ps; + VkRenderPass render_pass; + BlendState blend_state; + RasterizationState rasterization_state; + DepthStencilState depth_stencil_state; + VkPrimitiveTopology primitive_topology; +}; + +struct PipelineInfoHash +{ + std::size_t operator()(const PipelineInfo& key) const; +}; + +bool operator==(const PipelineInfo& lhs, const PipelineInfo& rhs); +bool operator!=(const PipelineInfo& lhs, const PipelineInfo& rhs); +bool operator<(const PipelineInfo& lhs, const PipelineInfo& rhs); +bool operator>(const PipelineInfo& lhs, const PipelineInfo& rhs); +bool operator==(const SamplerState& lhs, const SamplerState& rhs); +bool operator!=(const SamplerState& lhs, const SamplerState& rhs); +bool operator>(const SamplerState& lhs, const SamplerState& rhs); +bool operator<(const SamplerState& lhs, const SamplerState& rhs); + +class ObjectCache +{ +public: + ObjectCache(); + ~ObjectCache(); + + // We have four shared pipeline layouts: + // - Standard + // - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) + // - 8 combined image samplers (accessible from PS) + // - BBox Enabled + // - Same as standard, plus a single SSBO accessible from PS + // - Push Constant + // - Same as standard, plus 128 bytes of push constants, accessible from all stages. + // + // All three pipeline layouts use the same descriptor set layouts, but the final descriptor set + // (SSBO) is only required when using the BBox Enabled pipeline layout. + // + VkDescriptorSetLayout GetDescriptorSetLayout(DESCRIPTOR_SET set) const + { + return m_descriptor_set_layouts[set]; + } + VkPipelineLayout GetStandardPipelineLayout() const { return m_standard_pipeline_layout; } + VkPipelineLayout GetBBoxPipelineLayout() const { return m_bbox_pipeline_layout; } + VkPipelineLayout GetPushConstantPipelineLayout() const { return m_push_constant_pipeline_layout; } + // Shared utility shader resources + VertexFormat* GetUtilityShaderVertexFormat() const + { + return m_utility_shader_vertex_format.get(); + } + StreamBuffer* GetUtilityShaderVertexBuffer() const + { + return m_utility_shader_vertex_buffer.get(); + } + StreamBuffer* GetUtilityShaderUniformBuffer() const + { + return m_utility_shader_uniform_buffer.get(); + } + + // Get utility shader header based on current config. + std::string GetUtilityShaderHeader() const; + + // Accesses ShaderGen shader caches + VkShaderModule GetVertexShaderForUid(const VertexShaderUid& uid); + VkShaderModule GetGeometryShaderForUid(const GeometryShaderUid& uid); + VkShaderModule GetPixelShaderForUid(const PixelShaderUid& uid, DSTALPHA_MODE dstalpha_mode); + + // Static samplers + VkSampler GetPointSampler() const { return m_point_sampler; } + VkSampler GetLinearSampler() const { return m_linear_sampler; } + VkSampler GetSampler(const SamplerState& info); + + // Perform at startup, create descriptor layouts, compiles all static shaders. + bool Initialize(); + + // Find a pipeline by the specified description, if not found, attempts to create it + VkPipeline GetPipeline(const PipelineInfo& info); + + // Wipes out the pipeline cache, use when MSAA modes change, for example + // Also destroys the data that would be stored in the disk cache. + void ClearPipelineCache(); + + // Saves the pipeline cache to disk. Call when shutting down. + void SavePipelineCache(); + + // Clear sampler cache, use when anisotropy mode changes + // WARNING: Ensure none of the objects from here are in use when calling + void ClearSamplerCache(); + + // Recompile shared shaders, call when stereo mode changes. + void RecompileSharedShaders(); + + // Shared shader accessors + VkShaderModule GetScreenQuadVertexShader() const { return m_screen_quad_vertex_shader; } + VkShaderModule GetPassthroughVertexShader() const { return m_passthrough_vertex_shader; } + VkShaderModule GetScreenQuadGeometryShader() const { return m_screen_quad_geometry_shader; } + VkShaderModule GetPassthroughGeometryShader() const { return m_passthrough_geometry_shader; } +private: + bool CreatePipelineCache(bool load_from_disk); + void DestroyPipelineCache(); + void LoadShaderCaches(); + void DestroyShaderCaches(); + bool CreateDescriptorSetLayouts(); + void DestroyDescriptorSetLayouts(); + bool CreatePipelineLayouts(); + void DestroyPipelineLayouts(); + bool CreateUtilityShaderVertexFormat(); + bool CreateStaticSamplers(); + bool CompileSharedShaders(); + void DestroySharedShaders(); + void DestroySamplers(); + + std::string GetDiskCacheFileName(const char* type); + + std::array m_descriptor_set_layouts = {}; + + VkPipelineLayout m_standard_pipeline_layout = VK_NULL_HANDLE; + VkPipelineLayout m_bbox_pipeline_layout = VK_NULL_HANDLE; + VkPipelineLayout m_push_constant_pipeline_layout = VK_NULL_HANDLE; + + std::unique_ptr m_utility_shader_vertex_format; + std::unique_ptr m_utility_shader_vertex_buffer; + std::unique_ptr m_utility_shader_uniform_buffer; + + template + struct ShaderCache + { + std::map shader_map; + LinearDiskCache disk_cache; + }; + ShaderCache m_vs_cache; + ShaderCache m_gs_cache; + ShaderCache m_ps_cache; + + std::unordered_map m_pipeline_objects; + VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; + std::string m_pipeline_cache_filename; + + VkSampler m_point_sampler = VK_NULL_HANDLE; + VkSampler m_linear_sampler = VK_NULL_HANDLE; + + std::map m_sampler_cache; + + // Utility/shared shaders + VkShaderModule m_screen_quad_vertex_shader = VK_NULL_HANDLE; + VkShaderModule m_passthrough_vertex_shader = VK_NULL_HANDLE; + VkShaderModule m_screen_quad_geometry_shader = VK_NULL_HANDLE; + VkShaderModule m_passthrough_geometry_shader = VK_NULL_HANDLE; +}; + +extern std::unique_ptr g_object_cache; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/PaletteTextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/PaletteTextureConverter.cpp new file mode 100644 index 0000000000..4a0ecb9dce --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/PaletteTextureConverter.cpp @@ -0,0 +1,311 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/Assert.h" +#include "Common/CommonFuncs.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/PaletteTextureConverter.h" +#include "VideoBackends/Vulkan/Renderer.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Texture2D.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +PaletteTextureConverter::PaletteTextureConverter() +{ +} + +PaletteTextureConverter::~PaletteTextureConverter() +{ + for (const auto& it : m_shaders) + { + if (it != VK_NULL_HANDLE) + vkDestroyShaderModule(g_vulkan_context->GetDevice(), it, nullptr); + } + + if (m_palette_buffer_view != VK_NULL_HANDLE) + vkDestroyBufferView(g_vulkan_context->GetDevice(), m_palette_buffer_view, nullptr); + + if (m_palette_set_layout != VK_NULL_HANDLE) + vkDestroyDescriptorSetLayout(g_vulkan_context->GetDevice(), m_palette_set_layout, nullptr); +} + +bool PaletteTextureConverter::Initialize() +{ + if (!CreateBuffers()) + return false; + + if (!CompileShaders()) + return false; + + if (!CreateDescriptorLayout()) + return false; + + return true; +} + +void PaletteTextureConverter::ConvertTexture(StateTracker* state_tracker, VkRenderPass render_pass, + VkFramebuffer dst_framebuffer, Texture2D* src_texture, + u32 width, u32 height, void* palette, + TlutFormat format) +{ + struct PSUniformBlock + { + float multiplier; + int texel_buffer_offset; + int pad[2]; + }; + + _assert_(static_cast(format) < NUM_PALETTE_CONVERSION_SHADERS); + + size_t palette_size = ((format & 0xF) == GX_TF_I4) ? 32 : 512; + VkDescriptorSet texel_buffer_descriptor_set; + + // Allocate memory for the palette, and descriptor sets for the buffer. + // If any of these fail, execute a command buffer, and try again. + if (!m_palette_stream_buffer->ReserveMemory(palette_size, + g_vulkan_context->GetTexelBufferAlignment()) || + (texel_buffer_descriptor_set = + g_command_buffer_mgr->AllocateDescriptorSet(m_palette_set_layout)) == VK_NULL_HANDLE) + { + WARN_LOG(VIDEO, "Executing command list while waiting for space in palette buffer"); + Util::ExecuteCurrentCommandsAndRestoreState(state_tracker, false); + + if (!m_palette_stream_buffer->ReserveMemory(palette_size, + g_vulkan_context->GetTexelBufferAlignment()) || + (texel_buffer_descriptor_set = + g_command_buffer_mgr->AllocateDescriptorSet(m_palette_set_layout)) == VK_NULL_HANDLE) + { + PanicAlert("Failed to allocate space for texture conversion"); + return; + } + } + + // Fill descriptor set #2 (texel buffer) + u32 palette_offset = static_cast(m_palette_stream_buffer->GetCurrentOffset()); + VkWriteDescriptorSet texel_set_write = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + texel_buffer_descriptor_set, + 0, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + nullptr, + nullptr, + &m_palette_buffer_view}; + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), 1, &texel_set_write, 0, nullptr); + + Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + m_palette_stream_buffer->GetBuffer(), VK_ACCESS_HOST_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, palette_offset, palette_size, + VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + + // Set up draw + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), m_pipeline_layout, + render_pass, g_object_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, + m_shaders[format]); + + VkRect2D region = {{0, 0}, {width, height}}; + draw.BeginRenderPass(dst_framebuffer, region); + + // Copy in palette + memcpy(m_palette_stream_buffer->GetCurrentHostPointer(), palette, palette_size); + m_palette_stream_buffer->CommitMemory(palette_size); + + // PS Uniforms/Samplers + PSUniformBlock uniforms = {}; + uniforms.multiplier = ((format & 0xF)) == GX_TF_I4 ? 15.0f : 255.0f; + uniforms.texel_buffer_offset = static_cast(palette_offset / sizeof(u16)); + draw.SetPushConstants(&uniforms, sizeof(uniforms)); + draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetPointSampler()); + + // We have to bind the texel buffer descriptor set separately. + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout, 0, 1, + &texel_buffer_descriptor_set, 0, nullptr); + + // Draw + draw.SetViewportAndScissor(0, 0, width, height); + draw.DrawWithoutVertexBuffer(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 4); + draw.EndRenderPass(); +} + +bool PaletteTextureConverter::CreateBuffers() +{ + // TODO: Check against maximum size + static const size_t BUFFER_SIZE = 1024 * 1024; + + m_palette_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, BUFFER_SIZE, BUFFER_SIZE); + if (!m_palette_stream_buffer) + return false; + + // Create a view of the whole buffer, we'll offset our texel load into it + VkBufferViewCreateInfo view_info = { + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkBufferViewCreateFlags flags + m_palette_stream_buffer->GetBuffer(), // VkBuffer buffer + VK_FORMAT_R16_UINT, // VkFormat format + 0, // VkDeviceSize offset + BUFFER_SIZE // VkDeviceSize range + }; + + VkResult res = vkCreateBufferView(g_vulkan_context->GetDevice(), &view_info, nullptr, + &m_palette_buffer_view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBufferView failed: "); + return false; + } + + return true; +} + +bool PaletteTextureConverter::CompileShaders() +{ + static const char PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE[] = R"( + layout(std140, push_constant) uniform PCBlock + { + float multiplier; + int texture_buffer_offset; + } PC; + + layout(set = 1, binding = 0) uniform sampler2DArray samp0; + layout(set = 0, binding = 0) uniform usamplerBuffer samp1; + + layout(location = 0) in vec3 f_uv0; + layout(location = 0) out vec4 ocol0; + + int Convert3To8(int v) + { + // Swizzle bits: 00000123 -> 12312312 + return (v << 5) | (v << 2) | (v >> 1); + } + int Convert4To8(int v) + { + // Swizzle bits: 00001234 -> 12341234 + return (v << 4) | v; + } + int Convert5To8(int v) + { + // Swizzle bits: 00012345 -> 12345123 + return (v << 3) | (v >> 2); + } + int Convert6To8(int v) + { + // Swizzle bits: 00123456 -> 12345612 + return (v << 2) | (v >> 4); + } + float4 DecodePixel_RGB5A3(int val) + { + int r,g,b,a; + if ((val&0x8000) > 0) + { + r=Convert5To8((val>>10) & 0x1f); + g=Convert5To8((val>>5 ) & 0x1f); + b=Convert5To8((val ) & 0x1f); + a=0xFF; + } + else + { + a=Convert3To8((val>>12) & 0x7); + r=Convert4To8((val>>8 ) & 0xf); + g=Convert4To8((val>>4 ) & 0xf); + b=Convert4To8((val ) & 0xf); + } + return float4(r, g, b, a) / 255.0; + } + float4 DecodePixel_RGB565(int val) + { + int r, g, b, a; + r = Convert5To8((val >> 11) & 0x1f); + g = Convert6To8((val >> 5) & 0x3f); + b = Convert5To8((val) & 0x1f); + a = 0xFF; + return float4(r, g, b, a) / 255.0; + } + float4 DecodePixel_IA8(int val) + { + int i = val & 0xFF; + int a = val >> 8; + return float4(i, i, i, a) / 255.0; + } + void main() + { + int src = int(round(texture(samp0, f_uv0).r * PC.multiplier)); + src = int(texelFetch(samp1, src + PC.texture_buffer_offset).r); + src = ((src << 8) & 0xFF00) | (src >> 8); + ocol0 = DECODE(src); + } + + )"; + + std::string palette_ia8_program = StringFromFormat("%s\n%s", "#define DECODE DecodePixel_IA8", + PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); + std::string palette_rgb565_program = StringFromFormat( + "%s\n%s", "#define DECODE DecodePixel_RGB565", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); + std::string palette_rgb5a3_program = StringFromFormat( + "%s\n%s", "#define DECODE DecodePixel_RGB5A3", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); + + m_shaders[GX_TL_IA8] = Util::CompileAndCreateFragmentShader(palette_ia8_program); + m_shaders[GX_TL_RGB565] = Util::CompileAndCreateFragmentShader(palette_rgb565_program); + m_shaders[GX_TL_RGB5A3] = Util::CompileAndCreateFragmentShader(palette_rgb5a3_program); + + return (m_shaders[GX_TL_IA8] != VK_NULL_HANDLE && m_shaders[GX_TL_RGB565] != VK_NULL_HANDLE && + m_shaders[GX_TL_RGB5A3] != VK_NULL_HANDLE); +} + +bool PaletteTextureConverter::CreateDescriptorLayout() +{ + static const VkDescriptorSetLayoutBinding set_bindings[] = { + {0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + }; + static const VkDescriptorSetLayoutCreateInfo set_info = { + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, + static_cast(ArraySize(set_bindings)), set_bindings}; + + VkResult res = vkCreateDescriptorSetLayout(g_vulkan_context->GetDevice(), &set_info, nullptr, + &m_palette_set_layout); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDescriptorSetLayout failed: "); + return false; + } + + VkDescriptorSetLayout sets[] = {m_palette_set_layout, g_object_cache->GetDescriptorSetLayout( + DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS)}; + + VkPushConstantRange push_constant_range = { + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE}; + + VkPipelineLayoutCreateInfo pipeline_layout_info = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + nullptr, + 0, + static_cast(ArraySize(sets)), + sets, + 1, + &push_constant_range}; + + res = vkCreatePipelineLayout(g_vulkan_context->GetDevice(), &pipeline_layout_info, nullptr, + &m_pipeline_layout); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreatePipelineLayout failed: "); + return false; + } + + return true; +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/PaletteTextureConverter.h b/Source/Core/VideoBackends/Vulkan/PaletteTextureConverter.h new file mode 100644 index 0000000000..655ffbf74f --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/PaletteTextureConverter.h @@ -0,0 +1,50 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/StreamBuffer.h" + +#include "VideoCommon/TextureDecoder.h" + +namespace Vulkan +{ +class StateTracker; +class Texture2D; + +// Since this converter uses a uniform texel buffer, we can't use the general pipeline generators. + +class PaletteTextureConverter +{ +public: + PaletteTextureConverter(); + ~PaletteTextureConverter(); + + bool Initialize(); + + void ConvertTexture(StateTracker* state_tracker, VkRenderPass render_pass, + VkFramebuffer dst_framebuffer, Texture2D* src_texture, u32 width, u32 height, + void* palette, TlutFormat format); + +private: + static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3; + + bool CreateBuffers(); + bool CompileShaders(); + bool CreateDescriptorLayout(); + + VkDescriptorSetLayout m_palette_set_layout = VK_NULL_HANDLE; + VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; + + std::array m_shaders = {}; + + std::unique_ptr m_palette_stream_buffer; + VkBufferView m_palette_buffer_view = VK_NULL_HANDLE; + + std::unique_ptr m_uniform_buffer; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp new file mode 100644 index 0000000000..0ee31e3d00 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp @@ -0,0 +1,370 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include +#include + +#include "Common/Assert.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/PerfQuery.h" +#include "VideoBackends/Vulkan/Renderer.h" +#include "VideoBackends/Vulkan/StagingBuffer.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +PerfQuery::PerfQuery() +{ +} + +PerfQuery::~PerfQuery() +{ + g_command_buffer_mgr->RemoveFencePointCallback(this); + + if (m_query_pool != VK_NULL_HANDLE) + vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr); +} + +bool PerfQuery::Initialize(StateTracker* state_tracker) +{ + m_state_tracker = state_tracker; + + if (!CreateQueryPool()) + { + PanicAlert("Failed to create query pool"); + return false; + } + + if (!CreateReadbackBuffer()) + { + PanicAlert("Failed to create readback buffer"); + return false; + } + + g_command_buffer_mgr->AddFencePointCallback( + this, std::bind(&PerfQuery::OnCommandBufferQueued, this, std::placeholders::_1, + std::placeholders::_2), + std::bind(&PerfQuery::OnCommandBufferExecuted, this, std::placeholders::_1)); + + return true; +} + +void PerfQuery::EnableQuery(PerfQueryGroup type) +{ + // Have we used half of the query buffer already? + if (m_query_count > m_query_buffer.size() / 2) + NonBlockingPartialFlush(); + + // Block if there are no free slots. + if (m_query_count == PERF_QUERY_BUFFER_SIZE) + { + // ERROR_LOG(VIDEO, "Flushed query buffer early!"); + BlockingPartialFlush(); + } + + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + u32 index = (m_query_read_pos + m_query_count) % PERF_QUERY_BUFFER_SIZE; + ActiveQuery& entry = m_query_buffer[index]; + _assert_(!entry.active && !entry.available); + entry.active = true; + m_query_count++; + + DEBUG_LOG(VIDEO, "start query %u", index); + + // Use precise queries if supported, otherwise boolean (which will be incorrect). + VkQueryControlFlags flags = 0; + if (g_vulkan_context->SupportsPreciseOcclusionQueries()) + flags = VK_QUERY_CONTROL_PRECISE_BIT; + + // Ensure the query starts within a render pass. + // TODO: Is this needed? + m_state_tracker->BeginRenderPass(); + vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index, flags); + + // Prevent background command buffer submission while the query is active. + m_state_tracker->SetBackgroundCommandBufferExecution(false); + } +} + +void PerfQuery::DisableQuery(PerfQueryGroup type) +{ + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + // DisableQuery should be called for each EnableQuery, so subtract one to get the previous one. + u32 index = (m_query_read_pos + m_query_count - 1) % PERF_QUERY_BUFFER_SIZE; + vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index); + m_state_tracker->SetBackgroundCommandBufferExecution(true); + DEBUG_LOG(VIDEO, "end query %u", index); + } +} + +void PerfQuery::ResetQuery() +{ + m_query_count = 0; + m_query_read_pos = 0; + std::fill_n(m_results, ArraySize(m_results), 0); + + // Reset entire query pool, ensuring all queries are ready to write to. + m_state_tracker->EndRenderPass(); + vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, 0, + PERF_QUERY_BUFFER_SIZE); + + for (auto& entry : m_query_buffer) + { + entry.pending_fence = VK_NULL_HANDLE; + entry.available = false; + entry.active = false; + } +} + +u32 PerfQuery::GetQueryResult(PerfQueryType type) +{ + u32 result = 0; + + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) + { + result = m_results[PQG_ZCOMP_ZCOMPLOC]; + } + else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) + { + result = m_results[PQG_ZCOMP]; + } + else if (type == PQ_BLEND_INPUT) + { + result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC]; + } + else if (type == PQ_EFB_COPY_CLOCKS) + { + result = m_results[PQG_EFB_COPY_CLOCKS]; + } + + return result / 4; +} + +void PerfQuery::FlushResults() +{ + while (!IsFlushed()) + BlockingPartialFlush(); +} + +bool PerfQuery::IsFlushed() const +{ + return m_query_count == 0; +} + +bool PerfQuery::CreateQueryPool() +{ + VkQueryPoolCreateInfo info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkQueryPoolCreateFlags flags + VK_QUERY_TYPE_OCCLUSION, // VkQueryType queryType + PERF_QUERY_BUFFER_SIZE, // uint32_t queryCount + 0 // VkQueryPipelineStatisticFlags pipelineStatistics; + }; + + VkResult res = vkCreateQueryPool(g_vulkan_context->GetDevice(), &info, nullptr, &m_query_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: "); + return false; + } + + return true; +} + +bool PerfQuery::CreateReadbackBuffer() +{ + m_readback_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_READBACK, + PERF_QUERY_BUFFER_SIZE * sizeof(PerfQueryDataType), + VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + // Leave the buffer persistently mapped, we invalidate it when we need to read. + if (!m_readback_buffer || !m_readback_buffer->Map()) + return false; + + return true; +} + +void PerfQuery::QueueCopyQueryResults(VkCommandBuffer command_buffer, VkFence fence, + u32 start_index, u32 query_count) +{ + DEBUG_LOG(VIDEO, "queue copy of queries %u-%u", start_index, start_index + query_count - 1); + + // Transition buffer for GPU write + // TODO: Is this needed? + m_readback_buffer->PrepareForGPUWrite(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Copy from queries -> buffer + vkCmdCopyQueryPoolResults(command_buffer, m_query_pool, start_index, query_count, + m_readback_buffer->GetBuffer(), start_index * sizeof(PerfQueryDataType), + sizeof(PerfQueryDataType), VK_QUERY_RESULT_WAIT_BIT); + + // Prepare for host readback + m_readback_buffer->FlushGPUCache(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Reset queries so they're ready to use again + vkCmdResetQueryPool(command_buffer, m_query_pool, start_index, query_count); + + // Flag all queries as available, but with a fence that has to be completed first + for (u32 i = 0; i < query_count; i++) + { + u32 index = start_index + i; + ActiveQuery& entry = m_query_buffer[index]; + entry.pending_fence = fence; + entry.available = true; + entry.active = false; + } +} + +void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence) +{ + // Flag all pending queries that aren't available as available after execution. + u32 copy_start_index = 0; + u32 copy_count = 0; + for (u32 i = 0; i < m_query_count; i++) + { + u32 index = (m_query_read_pos + i) % PERF_QUERY_BUFFER_SIZE; + ActiveQuery& entry = m_query_buffer[index]; + + // Skip already-copied queries (will happen if a flush hasn't occurred and + // a command buffer hasn't finished executing). + if (entry.available) + { + // These should be grouped together, and at the start. + _assert_(copy_count == 0); + continue; + } + + // If this wrapped around, we need to flush the entries before the end of the buffer. + _assert_(entry.active); + if (index < copy_start_index) + { + QueueCopyQueryResults(command_buffer, fence, copy_start_index, copy_count); + copy_start_index = index; + copy_count = 0; + } + else if (copy_count == 0) + { + copy_start_index = index; + } + copy_count++; + } + + if (copy_count > 0) + QueueCopyQueryResults(command_buffer, fence, copy_start_index, copy_count); +} + +void PerfQuery::OnCommandBufferExecuted(VkFence fence) +{ + // Need to save these since ProcessResults will modify them. + u32 query_read_pos = m_query_read_pos; + u32 query_count = m_query_count; + + // Flush as many queries as are bound to this fence. + u32 flush_start_index = 0; + u32 flush_count = 0; + for (u32 i = 0; i < query_count; i++) + { + u32 index = (query_read_pos + i) % PERF_QUERY_BUFFER_SIZE; + if (m_query_buffer[index].pending_fence != fence) + { + // These should be grouped together, at the end. + break; + } + + // If this wrapped around, we need to flush the entries before the end of the buffer. + if (index < flush_start_index) + { + ProcessResults(flush_start_index, flush_count); + flush_start_index = index; + flush_count = 0; + } + else if (flush_count == 0) + { + flush_start_index = index; + } + flush_count++; + } + + if (flush_count > 0) + ProcessResults(flush_start_index, flush_count); +} + +void PerfQuery::ProcessResults(u32 start_index, u32 query_count) +{ + // Invalidate CPU caches before reading back. + m_readback_buffer->InvalidateCPUCache(start_index * sizeof(PerfQueryDataType), + query_count * sizeof(PerfQueryDataType)); + + // Should be at maximum query_count queries pending. + _assert_(query_count <= m_query_count); + DEBUG_LOG(VIDEO, "process queries %u-%u", start_index, start_index + query_count - 1); + + // Remove pending queries. + for (u32 i = 0; i < query_count; i++) + { + u32 index = (m_query_read_pos + i) % PERF_QUERY_BUFFER_SIZE; + ActiveQuery& entry = m_query_buffer[index]; + + // Should have a fence associated with it (waiting for a result). + _assert_(entry.pending_fence != VK_NULL_HANDLE); + entry.pending_fence = VK_NULL_HANDLE; + entry.available = false; + entry.active = false; + + // Grab result from readback buffer, it will already have been invalidated. + u32 result; + m_readback_buffer->Read(index * sizeof(PerfQueryDataType), &result, sizeof(result), false); + DEBUG_LOG(VIDEO, " query result %u", result); + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += + static_cast(static_cast(result) * EFB_WIDTH / g_renderer->GetTargetWidth() * + EFB_HEIGHT / g_renderer->GetTargetHeight()); + } + + m_query_read_pos = (m_query_read_pos + query_count) % PERF_QUERY_BUFFER_SIZE; + m_query_count -= query_count; +} + +void PerfQuery::NonBlockingPartialFlush() +{ + if (IsFlushed()) + return; + + // Submit a command buffer in the background if the front query is not bound to one. + // Ideally this will complete before the buffer fills. + if (m_query_buffer[m_query_read_pos].pending_fence == VK_NULL_HANDLE) + Util::ExecuteCurrentCommandsAndRestoreState(m_state_tracker, true, false); +} + +void PerfQuery::BlockingPartialFlush() +{ + if (IsFlushed()) + return; + + // If the first pending query is needing command buffer execution, do that. + ActiveQuery& entry = m_query_buffer[m_query_read_pos]; + if (entry.pending_fence == VK_NULL_HANDLE) + { + // This will callback OnCommandBufferQueued which will set the fence on the entry. + // We wait for completion, which will also call OnCommandBufferExecuted, and clear the fence. + Util::ExecuteCurrentCommandsAndRestoreState(m_state_tracker, false, true); + } + else + { + // The command buffer has been submitted, but is awaiting completion. + // Wait for the fence to complete, which will call OnCommandBufferExecuted. + g_command_buffer_mgr->WaitForFence(entry.pending_fence); + } +} +} diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.h b/Source/Core/VideoBackends/Vulkan/PerfQuery.h new file mode 100644 index 0000000000..5cffb9893b --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.h @@ -0,0 +1,70 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoCommon/PerfQueryBase.h" + +namespace Vulkan +{ +class StagingBuffer; +class StateTracker; + +class PerfQuery : public PerfQueryBase +{ +public: + PerfQuery(); + ~PerfQuery(); + + bool Initialize(StateTracker* state_tracker); + + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void ResetQuery() override; + u32 GetQueryResult(PerfQueryType type) override; + void FlushResults() override; + bool IsFlushed() const override; + +private: + struct ActiveQuery + { + PerfQueryType query_type; + VkFence pending_fence; + bool available; + bool active; + }; + + bool CreateQueryPool(); + bool CreateReadbackBuffer(); + void QueueCopyQueryResults(VkCommandBuffer command_buffer, VkFence fence, u32 start_index, + u32 query_count); + void ProcessResults(u32 start_index, u32 query_count); + + void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence); + void OnCommandBufferExecuted(VkFence fence); + + void NonBlockingPartialFlush(); + void BlockingPartialFlush(); + + StateTracker* m_state_tracker = nullptr; + + // when testing in SMS: 64 was too small, 128 was ok + // TODO: This should be size_t, but the base class uses u32s + using PerfQueryDataType = u32; + static const u32 PERF_QUERY_BUFFER_SIZE = 512; + std::array m_query_buffer = {}; + u32 m_query_read_pos = 0; + + // TODO: Investigate using pipeline statistics to implement other query types + VkQueryPool m_query_pool = VK_NULL_HANDLE; + + // Buffer containing query results. Each query is a u32. + std::unique_ptr m_readback_buffer; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/RasterFont.cpp b/Source/Core/VideoBackends/Vulkan/RasterFont.cpp new file mode 100644 index 0000000000..4432b0d0c6 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/RasterFont.cpp @@ -0,0 +1,408 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/RasterFont.h" +#include "VideoBackends/Vulkan/Texture2D.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +// Based on OGL RasterFont +// TODO: We should move this to common. + +namespace Vulkan +{ +constexpr int CHAR_WIDTH = 8; +constexpr int CHAR_HEIGHT = 13; +constexpr int CHAR_OFFSET = 32; +constexpr int CHAR_COUNT = 95; + +static const u8 rasters[CHAR_COUNT][CHAR_HEIGHT] = { + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36}, + {0x00, 0x00, 0x00, 0x66, 0x66, 0xff, 0x66, 0x66, 0xff, 0x66, 0x66, 0x00, 0x00}, + {0x00, 0x00, 0x18, 0x7e, 0xff, 0x1b, 0x1f, 0x7e, 0xf8, 0xd8, 0xff, 0x7e, 0x18}, + {0x00, 0x00, 0x0e, 0x1b, 0xdb, 0x6e, 0x30, 0x18, 0x0c, 0x76, 0xdb, 0xd8, 0x70}, + {0x00, 0x00, 0x7f, 0xc6, 0xcf, 0xd8, 0x70, 0x70, 0xd8, 0xcc, 0xcc, 0x6c, 0x38}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x1c, 0x0c, 0x0e}, + {0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x18, 0x0c}, + {0x00, 0x00, 0x30, 0x18, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30}, + {0x00, 0x00, 0x00, 0x00, 0x99, 0x5a, 0x3c, 0xff, 0x3c, 0x5a, 0x99, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0xff, 0xff, 0x18, 0x18, 0x18, 0x00, 0x00}, + {0x00, 0x00, 0x30, 0x18, 0x1c, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x60, 0x60, 0x30, 0x30, 0x18, 0x18, 0x0c, 0x0c, 0x06, 0x06, 0x03, 0x03}, + {0x00, 0x00, 0x3c, 0x66, 0xc3, 0xe3, 0xf3, 0xdb, 0xcf, 0xc7, 0xc3, 0x66, 0x3c}, + {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x78, 0x38, 0x18}, + {0x00, 0x00, 0xff, 0xc0, 0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x03, 0xe7, 0x7e}, + {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0x7e, 0x07, 0x03, 0x03, 0xe7, 0x7e}, + {0x00, 0x00, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xff, 0xcc, 0x6c, 0x3c, 0x1c, 0x0c}, + {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0, 0xff}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc7, 0xfe, 0xc0, 0xc0, 0xc0, 0xe7, 0x7e}, + {0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x18, 0x0c, 0x06, 0x03, 0x03, 0x03, 0xff}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xe7, 0x7e, 0xe7, 0xc3, 0xc3, 0xe7, 0x7e}, + {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x03, 0x7f, 0xe7, 0xc3, 0xc3, 0xe7, 0x7e}, + {0x00, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x30, 0x18, 0x1c, 0x1c, 0x00, 0x00, 0x1c, 0x1c, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06}, + {0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60}, + {0x00, 0x00, 0x18, 0x00, 0x00, 0x18, 0x18, 0x0c, 0x06, 0x03, 0xc3, 0xc3, 0x7e}, + {0x00, 0x00, 0x3f, 0x60, 0xcf, 0xdb, 0xd3, 0xdd, 0xc3, 0x7e, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xff, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18}, + {0x00, 0x00, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, 0xfe}, + {0x00, 0x00, 0x7e, 0xe7, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xe7, 0x7e}, + {0x00, 0x00, 0xfc, 0xce, 0xc7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc7, 0xce, 0xfc}, + {0x00, 0x00, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xfc, 0xc0, 0xc0, 0xc0, 0xc0, 0xff}, + {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xfc, 0xc0, 0xc0, 0xc0, 0xff}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xcf, 0xc0, 0xc0, 0xc0, 0xc0, 0xe7, 0x7e}, + {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xff, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3}, + {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e}, + {0x00, 0x00, 0x7c, 0xee, 0xc6, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06}, + {0x00, 0x00, 0xc3, 0xc6, 0xcc, 0xd8, 0xf0, 0xe0, 0xf0, 0xd8, 0xcc, 0xc6, 0xc3}, + {0x00, 0x00, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0}, + {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xff, 0xff, 0xe7, 0xc3}, + {0x00, 0x00, 0xc7, 0xc7, 0xcf, 0xcf, 0xdf, 0xdb, 0xfb, 0xf3, 0xf3, 0xe3, 0xe3}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xe7, 0x7e}, + {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, 0xfe}, + {0x00, 0x00, 0x3f, 0x6e, 0xdf, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c}, + {0x00, 0x00, 0xc3, 0xc6, 0xcc, 0xd8, 0xf0, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, 0xfe}, + {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0x7e, 0xe0, 0xc0, 0xc0, 0xe7, 0x7e}, + {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff}, + {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3}, + {0x00, 0x00, 0x18, 0x3c, 0x3c, 0x66, 0x66, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3}, + {0x00, 0x00, 0xc3, 0xe7, 0xff, 0xff, 0xdb, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3}, + {0x00, 0x00, 0xc3, 0x66, 0x66, 0x3c, 0x3c, 0x18, 0x3c, 0x3c, 0x66, 0x66, 0xc3}, + {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x66, 0x66, 0xc3}, + {0x00, 0x00, 0xff, 0xc0, 0xc0, 0x60, 0x30, 0x7e, 0x0c, 0x06, 0x03, 0x03, 0xff}, + {0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3c}, + {0x00, 0x03, 0x03, 0x06, 0x06, 0x0c, 0x0c, 0x18, 0x18, 0x30, 0x30, 0x60, 0x60}, + {0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18}, + {0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x30, 0x70}, + {0x00, 0x00, 0x7f, 0xc3, 0xc3, 0x7f, 0x03, 0xc3, 0x7e, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xfe, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0}, + {0x00, 0x00, 0x7e, 0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x7f, 0xc3, 0xc3, 0xc3, 0xc3, 0x7f, 0x03, 0x03, 0x03, 0x03, 0x03}, + {0x00, 0x00, 0x7f, 0xc0, 0xc0, 0xfe, 0xc3, 0xc3, 0x7e, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, 0x33, 0x1e}, + {0x7e, 0xc3, 0x03, 0x03, 0x7f, 0xc3, 0xc3, 0xc3, 0x7e, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0}, + {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x18, 0x00}, + {0x38, 0x6c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x00, 0x00, 0x0c, 0x00}, + {0x00, 0x00, 0xc6, 0xcc, 0xf8, 0xf0, 0xd8, 0xcc, 0xc6, 0xc0, 0xc0, 0xc0, 0xc0}, + {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x78}, + {0x00, 0x00, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xfe, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xfc, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00}, + {0xc0, 0xc0, 0xc0, 0xfe, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0x00, 0x00, 0x00, 0x00}, + {0x03, 0x03, 0x03, 0x7f, 0xc3, 0xc3, 0xc3, 0xc3, 0x7f, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xe0, 0xfe, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xfe, 0x03, 0x03, 0x7e, 0xc0, 0xc0, 0x7f, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x1c, 0x36, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, 0x00}, + {0x00, 0x00, 0x7e, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x18, 0x3c, 0x3c, 0x66, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xc3, 0xe7, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, 0x00, 0x00, 0x00, 0x00}, + {0xc0, 0x60, 0x60, 0x30, 0x18, 0x3c, 0x66, 0x66, 0xc3, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0xff, 0x60, 0x30, 0x18, 0x0c, 0x06, 0xff, 0x00, 0x00, 0x00, 0x00}, + {0x00, 0x00, 0x0f, 0x18, 0x18, 0x18, 0x38, 0xf0, 0x38, 0x18, 0x18, 0x18, 0x0f}, + {0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18}, + {0x00, 0x00, 0xf0, 0x18, 0x18, 0x18, 0x1c, 0x0f, 0x1c, 0x18, 0x18, 0x18, 0xf0}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x8f, 0xf1, 0x60, 0x00, 0x00, 0x00}}; + +static const char VERTEX_SHADER_SOURCE[] = R"( + +layout(std140, push_constant) uniform PCBlock { + vec2 char_size; + vec2 offset; + vec4 color; +} PC; + +layout(location = 0) in vec4 ipos; +layout(location = 5) in vec4 icol0; +layout(location = 8) in vec3 itex0; + +layout(location = 0) out vec2 uv0; + +void main() +{ + gl_Position = vec4(ipos.xy + PC.offset, 0.0f, 1.0f); + gl_Position.y = -gl_Position.y; + uv0 = itex0.xy * PC.char_size; +} + +)"; + +static const char FRAGMENT_SHADER_SOURCE[] = R"( + +layout(std140, push_constant) uniform PCBlock { + vec2 char_size; + vec2 offset; + vec4 color; +} PC; + +layout(set = 1, binding = 0) uniform sampler2D samp0; + +layout(location = 0) in vec2 uv0; + +layout(location = 0) out vec4 ocol0; + +void main() +{ + ocol0 = texture(samp0, uv0) * PC.color; +} + +)"; + +RasterFont::RasterFont() +{ +} + +RasterFont::~RasterFont() +{ + if (m_vertex_shader != VK_NULL_HANDLE) + g_command_buffer_mgr->DeferResourceDestruction(m_vertex_shader); + if (m_fragment_shader != VK_NULL_HANDLE) + g_command_buffer_mgr->DeferResourceDestruction(m_fragment_shader); +} + +bool RasterFont::Initialize() +{ + // Create shaders and texture + if (!CreateShaders() || !CreateTexture()) + return false; + + return true; +} + +bool RasterFont::CreateTexture() +{ + // generate the texture + std::vector texture_data(CHAR_WIDTH * CHAR_COUNT * CHAR_HEIGHT); + for (int y = 0; y < CHAR_HEIGHT; y++) + { + for (int c = 0; c < CHAR_COUNT; c++) + { + for (int x = 0; x < CHAR_WIDTH; x++) + { + bool pixel = (0 != (rasters[c][y] & (1 << (CHAR_WIDTH - x - 1)))); + texture_data[CHAR_WIDTH * CHAR_COUNT * y + CHAR_WIDTH * c + x] = pixel ? -1 : 0; + } + } + } + + // create the actual texture object + m_texture = + Texture2D::Create(CHAR_WIDTH * CHAR_COUNT, CHAR_HEIGHT, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, + VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + if (!m_texture) + return false; + + // create temporary buffer for uploading texture + VkBufferCreateInfo buffer_info = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + nullptr, + 0, + static_cast(texture_data.size() * sizeof(u32)), + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_SHARING_MODE_EXCLUSIVE, + 0, + nullptr}; + VkBuffer temp_buffer; + VkResult res = vkCreateBuffer(g_vulkan_context->GetDevice(), &buffer_info, nullptr, &temp_buffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: "); + return false; + } + + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), temp_buffer, &memory_requirements); + uint32_t memory_type_index = g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + VkMemoryAllocateInfo memory_allocate_info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, + memory_requirements.size, memory_type_index}; + VkDeviceMemory temp_buffer_memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, + &temp_buffer_memory); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), temp_buffer, nullptr); + return false; + } + + // Bind buffer to memory + res = vkBindBufferMemory(g_vulkan_context->GetDevice(), temp_buffer, temp_buffer_memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), temp_buffer, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), temp_buffer_memory, nullptr); + return false; + } + + // Copy into buffer + void* mapped_ptr; + res = vkMapMemory(g_vulkan_context->GetDevice(), temp_buffer_memory, 0, buffer_info.size, 0, + &mapped_ptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkMapMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), temp_buffer, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), temp_buffer_memory, nullptr); + return false; + } + + // Copy texture data into staging buffer + memcpy(mapped_ptr, texture_data.data(), texture_data.size() * sizeof(u32)); + vkUnmapMemory(g_vulkan_context->GetDevice(), temp_buffer_memory); + + // Copy from staging buffer to the final texture + VkBufferImageCopy region = {0, CHAR_WIDTH * CHAR_COUNT, + CHAR_HEIGHT, {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, + {0, 0, 0}, {CHAR_WIDTH * CHAR_COUNT, CHAR_HEIGHT, 1}}; + m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), temp_buffer, + m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + // Free temp buffers after command buffer executes + m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + g_command_buffer_mgr->DeferResourceDestruction(temp_buffer); + g_command_buffer_mgr->DeferResourceDestruction(temp_buffer_memory); + return true; +} + +bool RasterFont::CreateShaders() +{ + m_vertex_shader = Util::CompileAndCreateVertexShader(VERTEX_SHADER_SOURCE); + m_fragment_shader = Util::CompileAndCreateFragmentShader(FRAGMENT_SHADER_SOURCE); + return m_vertex_shader != VK_NULL_HANDLE && m_fragment_shader != VK_NULL_HANDLE; +} + +void RasterFont::PrintMultiLineText(VkRenderPass render_pass, const std::string& text, + float start_x, float start_y, u32 bbWidth, u32 bbHeight, + u32 color) +{ + // skip empty strings + if (text.empty()) + return; + + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), + g_object_cache->GetPushConstantPipelineLayout(), render_pass, + m_vertex_shader, VK_NULL_HANDLE, m_fragment_shader); + + UtilityShaderVertex* vertices = + draw.ReserveVertices(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, text.length() * 6); + size_t num_vertices = 0; + if (!vertices) + return; + + float delta_x = float(2 * CHAR_WIDTH) / float(bbWidth); + float delta_y = float(2 * CHAR_HEIGHT) / float(bbHeight); + float border_x = 2.0f / float(bbWidth); + float border_y = 4.0f / float(bbHeight); + + float x = float(start_x); + float y = float(start_y); + + for (const char& c : text) + { + if (c == '\n') + { + x = float(start_x); + y -= delta_y + border_y; + continue; + } + + // do not print spaces, they can be skipped easily + if (c == ' ') + { + x += delta_x + border_x; + continue; + } + + if (c < CHAR_OFFSET || c >= CHAR_COUNT + CHAR_OFFSET) + continue; + + vertices[num_vertices].SetPosition(x, y); + vertices[num_vertices].SetTextureCoordinates(static_cast(c - CHAR_OFFSET), 0.0f); + num_vertices++; + + vertices[num_vertices].SetPosition(x + delta_x, y); + vertices[num_vertices].SetTextureCoordinates(static_cast(c - CHAR_OFFSET + 1), 0.0f); + num_vertices++; + + vertices[num_vertices].SetPosition(x + delta_x, y + delta_y); + vertices[num_vertices].SetTextureCoordinates(static_cast(c - CHAR_OFFSET + 1), 1.0f); + num_vertices++; + + vertices[num_vertices].SetPosition(x, y); + vertices[num_vertices].SetTextureCoordinates(static_cast(c - CHAR_OFFSET), 0.0f); + num_vertices++; + + vertices[num_vertices].SetPosition(x + delta_x, y + delta_y); + vertices[num_vertices].SetTextureCoordinates(static_cast(c - CHAR_OFFSET + 1), 1.0f); + num_vertices++; + + vertices[num_vertices].SetPosition(x, y + delta_y); + vertices[num_vertices].SetTextureCoordinates(static_cast(c - CHAR_OFFSET), 1.0f); + num_vertices++; + + x += delta_x + border_x; + } + + // skip all whitespace strings + if (num_vertices == 0) + return; + + draw.CommitVertices(num_vertices); + + struct PCBlock + { + float char_size[2]; + float offset[2]; + float color[4]; + } pc_block = {}; + + pc_block.char_size[0] = 1.0f / static_cast(CHAR_COUNT); + pc_block.char_size[1] = 1.0f; + + // shadows + pc_block.offset[0] = 2.0f / bbWidth; + pc_block.offset[1] = -2.0f / bbHeight; + pc_block.color[3] = (color >> 24) / 255.0f; + + draw.SetPushConstants(&pc_block, sizeof(pc_block)); + draw.SetPSSampler(0, m_texture->GetView(), g_object_cache->GetLinearSampler()); + + // Setup alpha blending + BlendState blend_state = Util::GetNoBlendingBlendState(); + blend_state.blend_enable = VK_TRUE; + blend_state.src_blend = VK_BLEND_FACTOR_SRC_ALPHA; + blend_state.blend_op = VK_BLEND_OP_ADD; + blend_state.dst_blend = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + draw.SetBlendState(blend_state); + + draw.Draw(); + + // non-shadowed part + pc_block.offset[0] = 0.0f; + pc_block.offset[1] = 0.0f; + pc_block.color[0] = ((color >> 16) & 0xFF) / 255.0f; + pc_block.color[1] = ((color >> 8) & 0xFF) / 255.0f; + pc_block.color[2] = (color & 0xFF) / 255.0f; + draw.SetPushConstants(&pc_block, sizeof(pc_block)); + draw.Draw(); +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/RasterFont.h b/Source/Core/VideoBackends/Vulkan/RasterFont.h new file mode 100644 index 0000000000..5836da2f73 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/RasterFont.h @@ -0,0 +1,39 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" + +#include "VideoBackends/Vulkan/Constants.h" + +namespace Vulkan +{ +class Texture2D; + +class RasterFont +{ +public: + RasterFont(); + ~RasterFont(); + + bool Initialize(); + + void PrintMultiLineText(VkRenderPass render_pass, const std::string& text, float start_x, + float start_y, u32 bbWidth, u32 bbHeight, u32 color); + +private: + bool CreateTexture(); + bool CreateShaders(); + + std::unique_ptr m_texture; + + VkShaderModule m_vertex_shader = VK_NULL_HANDLE; + VkShaderModule m_fragment_shader = VK_NULL_HANDLE; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp new file mode 100644 index 0000000000..a8578d0b27 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -0,0 +1,1557 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Core/ConfigManager.h" + +#include "VideoBackends/Vulkan/BoundingBox.h" +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/RasterFont.h" +#include "VideoBackends/Vulkan/Renderer.h" +#include "VideoBackends/Vulkan/StagingTexture2D.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/SwapChain.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +#if defined(HAVE_LIBAV) || defined(_WIN32) +#include "VideoCommon/AVIDump.h" +#endif + +#include "VideoCommon/BPFunctions.h" +#include "VideoCommon/BPMemory.h" +#include "VideoCommon/ImageWrite.h" +#include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/PixelEngine.h" +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/SamplerCommon.h" +#include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/VideoConfig.h" + +namespace Vulkan +{ +Renderer::Renderer() +{ + // Set to something invalid, forcing all states to be re-initialized. + for (size_t i = 0; i < m_sampler_states.size(); i++) + m_sampler_states[i].bits = std::numeric_limits::max(); + + // Set default size so a decent EFB is created initially. + s_backbuffer_width = MAX_XFB_WIDTH; + s_backbuffer_height = MAX_XFB_HEIGHT; + FramebufferManagerBase::SetLastXfbWidth(MAX_XFB_WIDTH); + FramebufferManagerBase::SetLastXfbHeight(MAX_XFB_HEIGHT); + PixelShaderManager::SetEfbScaleChanged(); + UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); + CalculateTargetSize(s_backbuffer_width, s_backbuffer_height); +} + +Renderer::~Renderer() +{ + g_Config.bRunning = false; + UpdateActiveConfig(); + DestroySemaphores(); +} + +bool Renderer::Initialize(FramebufferManager* framebuffer_mgr, void* window_handle, + VkSurfaceKHR surface) +{ + m_framebuffer_mgr = framebuffer_mgr; + g_Config.bRunning = true; + UpdateActiveConfig(); + + // Create state tracker, doesn't require any resources + m_state_tracker = std::make_unique(); + BindEFBToStateTracker(); + + if (!CreateSemaphores()) + { + PanicAlert("Failed to create semaphores."); + return false; + } + + if (!CompileShaders()) + { + PanicAlert("Failed to compile shaders."); + return false; + } + + m_raster_font = std::make_unique(); + if (!m_raster_font->Initialize()) + { + PanicAlert("Failed to initialize raster font."); + return false; + } + + m_bounding_box = std::make_unique(); + if (!m_bounding_box->Initialize()) + { + PanicAlert("Failed to initialize bounding box."); + return false; + } + + if (g_vulkan_context->SupportsBoundingBox()) + { + // Bind bounding box to state tracker + m_state_tracker->SetBBoxBuffer(m_bounding_box->GetGPUBuffer(), + m_bounding_box->GetGPUBufferOffset(), + m_bounding_box->GetGPUBufferSize()); + } + + // Initialize annoying statics + s_last_efb_scale = g_ActiveConfig.iEFBScale; + + // Create swap chain + if (surface) + { + // Update backbuffer dimensions + m_swap_chain = SwapChain::Create(window_handle, surface); + if (!m_swap_chain) + { + PanicAlert("Failed to create swap chain."); + return false; + } + + // Update render rectangle etc. + OnSwapChainResized(); + } + + // Various initialization routines will have executed commands on the command buffer. + // Execute what we have done before beginning the first frame. + g_command_buffer_mgr->PrepareToSubmitCommandBuffer(); + g_command_buffer_mgr->SubmitCommandBuffer(false); + BeginFrame(); + + return true; +} + +bool Renderer::CreateSemaphores() +{ + // Create two semaphores, one that is triggered when the swapchain buffer is ready, another after + // submit and before present + VkSemaphoreCreateInfo semaphore_info = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0 // VkSemaphoreCreateFlags flags + }; + + VkResult res; + if ((res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_info, nullptr, + &m_image_available_semaphore)) != VK_SUCCESS || + (res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_info, nullptr, + &m_rendering_finished_semaphore)) != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); + return false; + } + + return true; +} + +void Renderer::DestroySemaphores() +{ + if (m_image_available_semaphore) + { + vkDestroySemaphore(g_vulkan_context->GetDevice(), m_image_available_semaphore, nullptr); + m_image_available_semaphore = nullptr; + } + + if (m_rendering_finished_semaphore) + { + vkDestroySemaphore(g_vulkan_context->GetDevice(), m_rendering_finished_semaphore, nullptr); + m_rendering_finished_semaphore = nullptr; + } +} + +void Renderer::RenderText(const std::string& text, int left, int top, u32 color) +{ + u32 backbuffer_width = m_swap_chain->GetWidth(); + u32 backbuffer_height = m_swap_chain->GetHeight(); + + m_raster_font->PrintMultiLineText(m_swap_chain->GetRenderPass(), text, + left * 2.0f / static_cast(backbuffer_width) - 1, + 1 - top * 2.0f / static_cast(backbuffer_height), + backbuffer_width, backbuffer_height, color); +} + +u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) +{ + if (type == PEEK_COLOR) + { + u32 color = m_framebuffer_mgr->PeekEFBColor(m_state_tracker.get(), x, y); + + // a little-endian value is expected to be returned + color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000)); + + // check what to do with the alpha channel (GX_PokeAlphaRead) + PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); + + if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) + { + color = RGBA8ToRGBA6ToRGBA8(color); + } + else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + color = RGBA8ToRGB565ToRGBA8(color); + } + if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) + { + color |= 0xFF000000; + } + + if (alpha_read_mode.ReadMode == 2) + { + return color; // GX_READ_NONE + } + else if (alpha_read_mode.ReadMode == 1) + { + return color | 0xFF000000; // GX_READ_FF + } + else /*if(alpha_read_mode.ReadMode == 0)*/ + { + return color & 0x00FFFFFF; // GX_READ_00 + } + } + else // if (type == PEEK_Z) + { + // Depth buffer is inverted for improved precision near far plane + float depth = 1.0f - m_framebuffer_mgr->PeekEFBDepth(m_state_tracker.get(), x, y); + u32 ret = 0; + + if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + // if Z is in 16 bit format you must return a 16 bit integer + ret = MathUtil::Clamp(static_cast(depth * 65536.0f), 0, 0xFFFF); + } + else + { + ret = MathUtil::Clamp(static_cast(depth * 16777216.0f), 0, 0xFFFFFF); + } + + return ret; + } +} + +void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) +{ + if (type == POKE_COLOR) + { + for (size_t i = 0; i < num_points; i++) + { + // Convert to expected format (BGRA->RGBA) + // TODO: Check alpha, depending on mode? + const EfbPokeData& point = points[i]; + u32 color = ((point.data & 0xFF00FF00) | ((point.data >> 16) & 0xFF) | + ((point.data << 16) & 0xFF0000)); + m_framebuffer_mgr->PokeEFBColor(m_state_tracker.get(), point.x, point.y, color); + } + } + else // if (type == POKE_Z) + { + for (size_t i = 0; i < num_points; i++) + { + // Convert to floating-point depth. + const EfbPokeData& point = points[i]; + float depth = (1.0f - float(point.data & 0xFFFFFF) / 16777216.0f); + m_framebuffer_mgr->PokeEFBDepth(m_state_tracker.get(), point.x, point.y, depth); + } + } +} + +u16 Renderer::BBoxRead(int index) +{ + s32 value = m_bounding_box->Get(m_state_tracker.get(), static_cast(index)); + + // Here we get the min/max value of the truncated position of the upscaled framebuffer. + // So we have to correct them to the unscaled EFB sizes. + if (index < 2) + { + // left/right + value = value * EFB_WIDTH / s_target_width; + } + else + { + // up/down + value = value * EFB_HEIGHT / s_target_height; + } + + // fix max values to describe the outer border + if (index & 1) + value++; + + return static_cast(value); +} + +void Renderer::BBoxWrite(int index, u16 value) +{ + s32 scaled_value = static_cast(value); + + // fix max values to describe the outer border + if (index & 1) + scaled_value--; + + // scale to internal resolution + if (index < 2) + { + // left/right + scaled_value = scaled_value * s_target_width / EFB_WIDTH; + } + else + { + // up/down + scaled_value = scaled_value * s_target_height / EFB_HEIGHT; + } + + m_bounding_box->Set(m_state_tracker.get(), static_cast(index), scaled_value); +} + +TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) +{ + TargetRectangle result; + result.left = EFBToScaledX(rc.left); + result.top = EFBToScaledY(rc.top); + result.right = EFBToScaledX(rc.right); + result.bottom = EFBToScaledY(rc.bottom); + return result; +} + +void Renderer::BeginFrame() +{ + // Activate a new command list, and restore state ready for the next draw + g_command_buffer_mgr->ActivateCommandBuffer(); + + // Ensure that the state tracker rebinds everything, and allocates a new set + // of descriptors out of the next pool. + m_state_tracker->InvalidateDescriptorSets(); + m_state_tracker->SetPendingRebind(); +} + +void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, + bool z_enable, u32 color, u32 z) +{ + // Native -> EFB coordinates + TargetRectangle target_rc = Renderer::ConvertEFBRectangle(rc); + VkRect2D target_vk_rc = { + {target_rc.left, target_rc.top}, + {static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight())}}; + + // Convert RGBA8 -> floating-point values. + VkClearValue clear_color_value = {}; + VkClearValue clear_depth_value = {}; + clear_color_value.color.float32[0] = static_cast((color >> 16) & 0xFF) / 255.0f; + clear_color_value.color.float32[1] = static_cast((color >> 8) & 0xFF) / 255.0f; + clear_color_value.color.float32[2] = static_cast((color >> 0) & 0xFF) / 255.0f; + clear_color_value.color.float32[3] = static_cast((color >> 24) & 0xFF) / 255.0f; + clear_depth_value.depthStencil.depth = (1.0f - (static_cast(z & 0xFFFFFF) / 16777216.0f)); + + // Determine whether the EFB has an alpha channel. If it doesn't, we can clear the alpha + // channel to 0xFF. This hopefully allows us to use the fast path in most cases. + if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16 || + bpmem.zcontrol.pixel_format == PEControl::RGB8_Z24 || + bpmem.zcontrol.pixel_format == PEControl::Z24) + { + // Force alpha writes, and set the color to 0xFF. + alpha_enable = true; + color |= 0xFF000000; + } + + // If we're not in a render pass (start of the frame), we can use a clear render pass + // to discard the data, rather than loading and then clearing. + bool use_clear_render_pass = (color_enable && alpha_enable && z_enable); + if (m_state_tracker->InRenderPass()) + { + // Prefer not to end a render pass just to do a clear. + use_clear_render_pass = false; + } + + // Fastest path: Use a render pass to clear the buffers. + if (use_clear_render_pass) + { + VkClearValue clear_values[2] = {clear_color_value, clear_depth_value}; + m_state_tracker->BeginClearRenderPass(target_vk_rc, clear_values); + return; + } + + // Fast path: Use vkCmdClearAttachments to clear the buffers within a render path + // We can't use this when preserving alpha but clearing color. + { + VkClearAttachment clear_attachments[2]; + uint32_t num_clear_attachments = 0; + if (color_enable && alpha_enable) + { + clear_attachments[num_clear_attachments].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + clear_attachments[num_clear_attachments].colorAttachment = 0; + clear_attachments[num_clear_attachments].clearValue = clear_color_value; + num_clear_attachments++; + color_enable = false; + alpha_enable = false; + } + if (z_enable) + { + clear_attachments[num_clear_attachments].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + clear_attachments[num_clear_attachments].colorAttachment = 0; + clear_attachments[num_clear_attachments].clearValue = clear_depth_value; + num_clear_attachments++; + z_enable = false; + } + if (num_clear_attachments > 0) + { + VkClearRect clear_rect = {target_vk_rc, 0, m_framebuffer_mgr->GetEFBLayers()}; + if (!m_state_tracker->IsWithinRenderArea(target_vk_rc.offset.x, target_vk_rc.offset.y, + target_vk_rc.extent.width, + target_vk_rc.extent.height)) + { + m_state_tracker->EndClearRenderPass(); + } + m_state_tracker->BeginRenderPass(); + + vkCmdClearAttachments(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_clear_attachments, + clear_attachments, 1, &clear_rect); + } + } + + // Anything left over for the slow path? + if (!color_enable && !alpha_enable && !z_enable) + return; + + // Clearing must occur within a render pass. + if (!m_state_tracker->IsWithinRenderArea(target_vk_rc.offset.x, target_vk_rc.offset.y, + target_vk_rc.extent.width, target_vk_rc.extent.height)) + { + m_state_tracker->EndClearRenderPass(); + } + m_state_tracker->BeginRenderPass(); + m_state_tracker->SetPendingRebind(); + + // Mask away the appropriate colors and use a shader + BlendState blend_state = Util::GetNoBlendingBlendState(); + u32 write_mask = 0; + if (color_enable) + write_mask |= VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT; + if (alpha_enable) + write_mask |= VK_COLOR_COMPONENT_A_BIT; + blend_state.write_mask = write_mask; + + DepthStencilState depth_state = Util::GetNoDepthTestingDepthStencilState(); + depth_state.test_enable = z_enable ? VK_TRUE : VK_FALSE; + depth_state.write_enable = z_enable ? VK_TRUE : VK_FALSE; + depth_state.compare_op = VK_COMPARE_OP_ALWAYS; + + RasterizationState rs_state = Util::GetNoCullRasterizationState(); + rs_state.per_sample_shading = g_ActiveConfig.bSSAA ? VK_TRUE : VK_FALSE; + rs_state.samples = m_framebuffer_mgr->GetEFBSamples(); + + // No need to start a new render pass, but we do need to restore viewport state + UtilityShaderDraw draw( + g_command_buffer_mgr->GetCurrentCommandBuffer(), g_object_cache->GetStandardPipelineLayout(), + m_framebuffer_mgr->GetEFBLoadRenderPass(), g_object_cache->GetPassthroughVertexShader(), + g_object_cache->GetPassthroughGeometryShader(), m_clear_fragment_shader); + + draw.SetRasterizationState(rs_state); + draw.SetDepthStencilState(depth_state); + draw.SetBlendState(blend_state); + + draw.DrawColoredQuad(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight(), + clear_color_value.color.float32[0], clear_color_value.color.float32[1], + clear_color_value.color.float32[2], clear_color_value.color.float32[3], + clear_depth_value.depthStencil.depth); +} + +void Renderer::ReinterpretPixelData(unsigned int convtype) +{ + m_state_tracker->EndRenderPass(); + m_state_tracker->SetPendingRebind(); + m_framebuffer_mgr->ReinterpretPixelData(convtype); + + // EFB framebuffer has now changed, so update accordingly. + BindEFBToStateTracker(); +} + +void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, + const EFBRectangle& rc, float gamma) +{ + // Flush any pending EFB pokes. + m_framebuffer_mgr->FlushEFBPokes(m_state_tracker.get()); + + // End the current render pass. + m_state_tracker->EndRenderPass(); + m_state_tracker->OnEndFrame(); + + // Scale the source rectangle to the selected internal resolution. + TargetRectangle source_rc = Renderer::ConvertEFBRectangle(rc); + + // Target rectangle can change if the VI aspect ratio changes. + UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); + + // Transition the EFB render target to a shader resource. + VkRect2D src_region = {{0, 0}, + {m_framebuffer_mgr->GetEFBWidth(), m_framebuffer_mgr->GetEFBHeight()}}; + Texture2D* efb_color_texture = + m_framebuffer_mgr->ResolveEFBColorTexture(m_state_tracker.get(), src_region); + efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + // Draw to the screenshot buffer if needed. + bool needs_screenshot = (s_bScreenshot || SConfig::GetInstance().m_DumpFrames); + if (needs_screenshot && DrawScreenshot(source_rc, efb_color_texture)) + { + if (s_bScreenshot) + WriteScreenshot(); + + if (SConfig::GetInstance().m_DumpFrames) + WriteFrameDump(); + } + else + { + // Stop frame dump if requested. + if (bAVIDumping) + StopFrameDump(); + } + + // Ensure the worker thread is not still submitting a previous command buffer. + // In other words, the last frame has been submitted (otherwise the next call would + // be a race, as the image may not have been consumed yet). + g_command_buffer_mgr->PrepareToSubmitCommandBuffer(); + + // Draw to the screen if we have a swap chain. + if (m_swap_chain) + { + DrawScreen(source_rc, efb_color_texture); + + // Submit the current command buffer, signaling rendering finished semaphore when it's done + // Because this final command buffer is rendering to the swap chain, we need to wait for + // the available semaphore to be signaled before executing the buffer. This final submission + // can happen off-thread in the background while we're preparing the next frame. + g_command_buffer_mgr->SubmitCommandBuffer( + true, m_image_available_semaphore, m_rendering_finished_semaphore, + m_swap_chain->GetSwapChain(), m_swap_chain->GetCurrentImageIndex()); + } + else + { + // No swap chain, just execute command buffer. + g_command_buffer_mgr->SubmitCommandBuffer(true); + } + + // Prep for the next frame (get command buffer ready) before doing anything else. + BeginFrame(); + + // Restore the EFB color texture to color attachment ready for rendering. + m_framebuffer_mgr->GetEFBColorTexture()->TransitionToLayout( + g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + // Clean up stale textures + TextureCacheBase::Cleanup(frameCount); + + // Handle window resizes. + CheckForTargetResize(fb_width, fb_stride, fb_height); + CheckForSurfaceChange(); + + // Determine what has changed in the config. + CheckForConfigChanges(); +} + +void Renderer::DrawScreen(const TargetRectangle& src_rect, const Texture2D* src_tex) +{ + // Grab the next image from the swap chain in preparation for drawing the window. + VkResult res = m_swap_chain->AcquireNextImage(m_image_available_semaphore); + if (res == VK_SUBOPTIMAL_KHR || res == VK_ERROR_OUT_OF_DATE_KHR) + { + // Window has been resized. Update the swap chain and try again. + ResizeSwapChain(); + res = m_swap_chain->AcquireNextImage(m_image_available_semaphore); + } + if (res != VK_SUCCESS) + PanicAlert("Failed to grab image from swap chain"); + + // Transition from undefined (or present src, but it can be substituted) to + // color attachment ready for writing. These transitions must occur outside + // a render pass, unless the render pass declares a self-dependency. + Texture2D* backbuffer = m_swap_chain->GetCurrentTexture(); + backbuffer->OverrideImageLayout(VK_IMAGE_LAYOUT_UNDEFINED); + backbuffer->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + // Blit the EFB to the back buffer (Swap chain) + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), + g_object_cache->GetStandardPipelineLayout(), m_swap_chain->GetRenderPass(), + g_object_cache->GetPassthroughVertexShader(), VK_NULL_HANDLE, + m_blit_fragment_shader); + + // Begin the present render pass + VkClearValue clear_value = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; + VkRect2D target_region = {{0, 0}, {backbuffer->GetWidth(), backbuffer->GetHeight()}}; + draw.BeginRenderPass(m_swap_chain->GetCurrentFramebuffer(), target_region, &clear_value); + + // Copy EFB -> backbuffer + const TargetRectangle& dst_rect = GetTargetRectangle(); + BlitScreen(m_swap_chain->GetRenderPass(), dst_rect, src_rect, src_tex, true); + + // OSD stuff + Util::SetViewportAndScissor(g_command_buffer_mgr->GetCurrentCommandBuffer(), 0, 0, + backbuffer->GetWidth(), backbuffer->GetHeight()); + DrawDebugText(); + + // Do our OSD callbacks + OSD::DoCallbacks(OSD::CallbackType::OnFrame); + OSD::DrawMessages(); + + // End drawing to backbuffer + draw.EndRenderPass(); + + // Transition the backbuffer to PRESENT_SRC to ensure all commands drawing + // to it have finished before present. + backbuffer->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR); +} + +bool Renderer::DrawScreenshot(const TargetRectangle& src_rect, const Texture2D* src_tex) +{ + u32 width = std::max(1u, static_cast(s_backbuffer_width)); + u32 height = std::max(1u, static_cast(s_backbuffer_height)); + if (!ResizeScreenshotBuffer(width, height)) + return false; + + VkClearValue clear_value = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; + VkClearRect clear_rect = {{{0, 0}, {width, height}}, 0, 1}; + VkClearAttachment clear_attachment = {VK_IMAGE_ASPECT_COLOR_BIT, 0, clear_value}; + VkRenderPassBeginInfo info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + m_framebuffer_mgr->GetColorCopyForReadbackRenderPass(), + m_screenshot_framebuffer, + {{0, 0}, {width, height}}, + 1, + &clear_value}; + vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &info, + VK_SUBPASS_CONTENTS_INLINE); + vkCmdClearAttachments(g_command_buffer_mgr->GetCurrentCommandBuffer(), 1, &clear_attachment, 1, + &clear_rect); + BlitScreen(m_framebuffer_mgr->GetColorCopyForReadbackRenderPass(), GetTargetRectangle(), src_rect, + src_tex, true); + vkCmdEndRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer()); + + // Copy to the readback texture. + m_screenshot_readback_texture->CopyFromImage( + g_command_buffer_mgr->GetCurrentCommandBuffer(), m_screenshot_render_texture->GetImage(), + VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, width, height, 0, 0); + + // Wait for the command buffer to complete. + g_command_buffer_mgr->ExecuteCommandBuffer(false, true); + return true; +} + +void Renderer::BlitScreen(VkRenderPass render_pass, const TargetRectangle& dst_rect, + const TargetRectangle& src_rect, const Texture2D* src_tex, + bool linear_filter) +{ + // We could potentially use vkCmdBlitImage here. + VkSampler sampler = + linear_filter ? g_object_cache->GetLinearSampler() : g_object_cache->GetPointSampler(); + + // Set up common data + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), + g_object_cache->GetStandardPipelineLayout(), render_pass, + g_object_cache->GetPassthroughVertexShader(), VK_NULL_HANDLE, + m_blit_fragment_shader); + + draw.SetPSSampler(0, src_tex->GetView(), sampler); + + if (g_ActiveConfig.iStereoMode == STEREO_SBS || g_ActiveConfig.iStereoMode == STEREO_TAB) + { + TargetRectangle left_rect; + TargetRectangle right_rect; + if (g_ActiveConfig.iStereoMode == STEREO_TAB) + ConvertStereoRectangle(dst_rect, right_rect, left_rect); + else + ConvertStereoRectangle(dst_rect, left_rect, right_rect); + + draw.DrawQuad(left_rect.left, left_rect.top, left_rect.GetWidth(), left_rect.GetHeight(), + src_rect.left, src_rect.top, 0, src_rect.GetWidth(), src_rect.GetHeight(), + src_tex->GetWidth(), src_tex->GetHeight()); + + draw.DrawQuad(right_rect.left, right_rect.top, right_rect.GetWidth(), right_rect.GetHeight(), + src_rect.left, src_rect.top, 1, src_rect.GetWidth(), src_rect.GetHeight(), + src_tex->GetWidth(), src_tex->GetHeight()); + } + else + { + draw.DrawQuad(dst_rect.left, dst_rect.top, dst_rect.GetWidth(), dst_rect.GetHeight(), + src_rect.left, src_rect.top, 0, src_rect.GetWidth(), src_rect.GetHeight(), + src_tex->GetWidth(), src_tex->GetHeight()); + } +} + +bool Renderer::ResizeScreenshotBuffer(u32 new_width, u32 new_height) +{ + if (m_screenshot_render_texture && m_screenshot_render_texture->GetWidth() == new_width && + m_screenshot_render_texture->GetHeight() == new_height) + { + return true; + } + + if (m_screenshot_framebuffer != VK_NULL_HANDLE) + { + vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_screenshot_framebuffer, nullptr); + m_screenshot_framebuffer = VK_NULL_HANDLE; + } + + m_screenshot_render_texture = + Texture2D::Create(new_width, new_height, 1, 1, EFB_COLOR_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + + m_screenshot_readback_texture = StagingTexture2D::Create(STAGING_BUFFER_TYPE_READBACK, new_width, + new_height, EFB_COLOR_TEXTURE_FORMAT); + if (!m_screenshot_render_texture || !m_screenshot_readback_texture || + !m_screenshot_readback_texture->Map()) + { + WARN_LOG(VIDEO, "Failed to resize screenshot render texture"); + m_screenshot_render_texture.reset(); + m_screenshot_readback_texture.reset(); + return false; + } + + VkImageView attachment = m_screenshot_render_texture->GetView(); + VkFramebufferCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + info.renderPass = m_framebuffer_mgr->GetColorCopyForReadbackRenderPass(); + info.attachmentCount = 1; + info.pAttachments = &attachment; + info.width = new_width; + info.height = new_height; + info.layers = 1; + + VkResult res = + vkCreateFramebuffer(g_vulkan_context->GetDevice(), &info, nullptr, &m_screenshot_framebuffer); + if (res != VK_SUCCESS) + { + WARN_LOG(VIDEO, "Failed to resize screenshot framebuffer"); + m_screenshot_render_texture.reset(); + m_screenshot_readback_texture.reset(); + return false; + } + + // Render pass expects texture is in transfer src to start with. + m_screenshot_render_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + return true; +} + +void Renderer::DestroyScreenshotResources() +{ + if (m_screenshot_framebuffer != VK_NULL_HANDLE) + { + vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_screenshot_framebuffer, nullptr); + m_screenshot_framebuffer = VK_NULL_HANDLE; + } + + m_screenshot_render_texture.reset(); + m_screenshot_readback_texture.reset(); +} + +void Renderer::WriteScreenshot() +{ + std::lock_guard guard(s_criticalScreenshot); + + if (!TextureToPng(reinterpret_cast(m_screenshot_readback_texture->GetMapPointer()), + static_cast(m_screenshot_readback_texture->GetRowStride()), + s_sScreenshotName, static_cast(m_screenshot_render_texture->GetWidth()), + static_cast(m_screenshot_render_texture->GetHeight()), false)) + { + WARN_LOG(VIDEO, "Failed to write screenshot to %s", s_sScreenshotName.c_str()); + } + + s_sScreenshotName.clear(); + s_bScreenshot = false; + s_screenshotCompleted.Set(); +} + +void Renderer::WriteFrameDump() +{ +#if defined(HAVE_LIBAV) || defined(_WIN32) + if (!bLastFrameDumped) + { + bLastFrameDumped = true; + bAVIDumping = AVIDump::Start(static_cast(m_screenshot_render_texture->GetWidth()), + static_cast(m_screenshot_render_texture->GetHeight()), + AVIDump::DumpFormat::FORMAT_RGBA); + + if (!bAVIDumping) + { + OSD::AddMessage("Failed to start frame dumping.", 2000); + return; + } + + OSD::AddMessage(StringFromFormat("Frame dumping started (%ux%u RGBA8).", + m_screenshot_render_texture->GetWidth(), + m_screenshot_render_texture->GetHeight()), + 2000); + } + + if (bAVIDumping) + { + AVIDump::AddFrame(reinterpret_cast(m_screenshot_readback_texture->GetMapPointer()), + static_cast(m_screenshot_render_texture->GetWidth()), + static_cast(m_screenshot_render_texture->GetHeight())); + } +#else + if (!bLastFrameDumped) + { + OSD::AddMessage("Dumping frames not supported", 2000); + bLastFrameDumped = true; + } +#endif +} + +void Renderer::StopFrameDump() +{ +#if defined(HAVE_LIBAV) || defined(_WIN32) + if (bAVIDumping) + { + OSD::AddMessage("Frame dumping stopped.", 2000); + bAVIDumping = false; + bLastFrameDumped = false; + AVIDump::Stop(); + } +#endif +} + +void Renderer::CheckForTargetResize(u32 fb_width, u32 fb_stride, u32 fb_height) +{ + if (FramebufferManagerBase::LastXfbWidth() != fb_stride || + FramebufferManagerBase::LastXfbHeight() != fb_height) + { + u32 last_w = (fb_stride < 1 || fb_stride > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_stride; + u32 last_h = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height; + FramebufferManagerBase::SetLastXfbWidth(last_w); + FramebufferManagerBase::SetLastXfbHeight(last_h); + } + + // This call is needed for auto-resizing to work. + SetWindowSize(static_cast(fb_stride), static_cast(fb_height)); +} + +void Renderer::CheckForSurfaceChange() +{ + if (!s_surface_needs_change.IsSet()) + return; + + u32 old_width = m_swap_chain ? m_swap_chain->GetWidth() : 0; + u32 old_height = m_swap_chain ? m_swap_chain->GetHeight() : 0; + + // Wait for the GPU to catch up since we're going to destroy the swap chain. + g_command_buffer_mgr->WaitForGPUIdle(); + + // Fast path, if the surface handle is the same, the window has just been resized. + if (m_swap_chain && s_new_surface_handle == m_swap_chain->GetNativeHandle()) + { + INFO_LOG(VIDEO, "Detected window resize."); + ResizeSwapChain(); + + // Notify the main thread we are done. + s_surface_needs_change.Clear(); + s_new_surface_handle = nullptr; + s_surface_changed.Set(); + } + else + { + // Did we previously have a swap chain? + if (m_swap_chain) + { + if (!s_new_surface_handle) + { + // If there is no surface now, destroy the swap chain. + m_swap_chain.reset(); + } + else + { + // Recreate the surface. If this fails we're in trouble. + if (!m_swap_chain->RecreateSurface(s_new_surface_handle)) + PanicAlert("Failed to recreate Vulkan surface. Cannot continue."); + } + } + else + { + // Previously had no swap chain. So create one. + VkSurfaceKHR surface = SwapChain::CreateVulkanSurface(g_vulkan_context->GetVulkanInstance(), + s_new_surface_handle); + if (surface != VK_NULL_HANDLE) + { + m_swap_chain = SwapChain::Create(s_new_surface_handle, surface); + if (!m_swap_chain) + PanicAlert("Failed to create swap chain."); + } + else + { + PanicAlert("Failed to create surface."); + } + } + + // Notify calling thread. + s_surface_needs_change.Clear(); + s_new_surface_handle = nullptr; + s_surface_changed.Set(); + } + + if (m_swap_chain) + { + // Handle case where the dimensions are now different + if (old_width != m_swap_chain->GetWidth() || old_height != m_swap_chain->GetHeight()) + OnSwapChainResized(); + } +} + +void Renderer::CheckForConfigChanges() +{ + // Compare g_Config to g_ActiveConfig to determine what has changed before copying. + bool vsync_changed = (g_Config.bVSync != g_ActiveConfig.bVSync); + bool msaa_changed = (g_Config.iMultisamples != g_ActiveConfig.iMultisamples); + bool ssaa_changed = (g_Config.bSSAA != g_ActiveConfig.bSSAA); + bool anisotropy_changed = (g_Config.iMaxAnisotropy != g_ActiveConfig.iMaxAnisotropy); + bool force_texture_filtering_changed = + (g_Config.bForceFiltering != g_ActiveConfig.bForceFiltering); + bool stereo_changed = (g_Config.iStereoMode != g_ActiveConfig.iStereoMode); + + // Copy g_Config to g_ActiveConfig. + UpdateActiveConfig(); + + // MSAA samples changed, we need to recreate the EFB render pass, and all shaders. + if (msaa_changed) + { + m_framebuffer_mgr->RecreateRenderPass(); + m_framebuffer_mgr->ResizeEFBTextures(); + } + + // SSAA changed on/off, we can leave the buffers/render pass, but have to recompile shaders. + if (msaa_changed || ssaa_changed) + { + BindEFBToStateTracker(); + m_framebuffer_mgr->RecompileShaders(); + g_object_cache->ClearPipelineCache(); + } + + // Handle internal resolution changes. + if (s_last_efb_scale != g_ActiveConfig.iEFBScale) + { + s_last_efb_scale = g_ActiveConfig.iEFBScale; + if (CalculateTargetSize(s_backbuffer_width, s_backbuffer_height)) + ResizeEFBTextures(); + } + + // Handle stereoscopy mode changes. + if (stereo_changed) + { + ResizeEFBTextures(); + BindEFBToStateTracker(); + RecompileShaders(); + } + + // For vsync, we need to change the present mode, which means recreating the swap chain. + if (vsync_changed) + ResizeSwapChain(); + + // Wipe sampler cache if force texture filtering or anisotropy changes. + if (anisotropy_changed || force_texture_filtering_changed) + ResetSamplerStates(); +} + +void Renderer::OnSwapChainResized() +{ + s_backbuffer_width = m_swap_chain->GetWidth(); + s_backbuffer_height = m_swap_chain->GetHeight(); + FramebufferManagerBase::SetLastXfbWidth(MAX_XFB_WIDTH); + FramebufferManagerBase::SetLastXfbHeight(MAX_XFB_HEIGHT); + UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); + if (CalculateTargetSize(s_backbuffer_width, s_backbuffer_height)) + ResizeEFBTextures(); + + PixelShaderManager::SetEfbScaleChanged(); +} + +void Renderer::BindEFBToStateTracker() +{ + // Update framebuffer in state tracker + VkRect2D framebuffer_size = { + {0, 0}, {m_framebuffer_mgr->GetEFBWidth(), m_framebuffer_mgr->GetEFBHeight()}}; + m_state_tracker->SetRenderPass(m_framebuffer_mgr->GetEFBLoadRenderPass(), + m_framebuffer_mgr->GetEFBClearRenderPass()); + m_state_tracker->SetFramebuffer(m_framebuffer_mgr->GetEFBFramebuffer(), framebuffer_size); + + // Update rasterization state with MSAA info + RasterizationState rs_state = {}; + rs_state.bits = m_state_tracker->GetRasterizationState().bits; + rs_state.samples = m_framebuffer_mgr->GetEFBSamples(); + rs_state.per_sample_shading = g_ActiveConfig.bSSAA ? VK_TRUE : VK_FALSE; + m_state_tracker->SetRasterizationState(rs_state); +} + +void Renderer::ResizeEFBTextures() +{ + // Ensure the GPU is finished with the current EFB textures. + g_command_buffer_mgr->WaitForGPUIdle(); + + m_framebuffer_mgr->ResizeEFBTextures(); + s_last_efb_scale = g_ActiveConfig.iEFBScale; + + BindEFBToStateTracker(); + + // Viewport and scissor rect have to be reset since they will be scaled differently. + SetViewport(); + BPFunctions::SetScissor(); +} + +void Renderer::ResizeSwapChain() +{ + // The worker thread may still be submitting a present on this swap chain. + g_command_buffer_mgr->WaitForGPUIdle(); + + // It's now safe to resize the swap chain. + if (!m_swap_chain->ResizeSwapChain()) + PanicAlert("Failed to resize swap chain."); + + OnSwapChainResized(); +} + +void Renderer::ApplyState(bool bUseDstAlpha) +{ +} + +void Renderer::ResetAPIState() +{ + // End the EFB render pass if active + m_state_tracker->EndRenderPass(); +} + +void Renderer::RestoreAPIState() +{ + // Instruct the state tracker to re-bind everything before the next draw + m_state_tracker->SetPendingRebind(); +} + +void Renderer::SetGenerationMode() +{ + RasterizationState new_rs_state = {}; + new_rs_state.bits = m_state_tracker->GetRasterizationState().bits; + + switch (bpmem.genMode.cullmode) + { + case GenMode::CULL_NONE: + new_rs_state.cull_mode = VK_CULL_MODE_NONE; + break; + case GenMode::CULL_BACK: + new_rs_state.cull_mode = VK_CULL_MODE_BACK_BIT; + break; + case GenMode::CULL_FRONT: + new_rs_state.cull_mode = VK_CULL_MODE_FRONT_BIT; + break; + case GenMode::CULL_ALL: + new_rs_state.cull_mode = VK_CULL_MODE_FRONT_AND_BACK; + break; + default: + new_rs_state.cull_mode = VK_CULL_MODE_NONE; + break; + } + + m_state_tracker->SetRasterizationState(new_rs_state); +} + +void Renderer::SetDepthMode() +{ + DepthStencilState new_ds_state = {}; + new_ds_state.test_enable = bpmem.zmode.testenable ? VK_TRUE : VK_FALSE; + new_ds_state.write_enable = bpmem.zmode.updateenable ? VK_TRUE : VK_FALSE; + + // Inverted depth, hence these are swapped + switch (bpmem.zmode.func) + { + case ZMode::NEVER: + new_ds_state.compare_op = VK_COMPARE_OP_NEVER; + break; + case ZMode::LESS: + new_ds_state.compare_op = VK_COMPARE_OP_GREATER; + break; + case ZMode::EQUAL: + new_ds_state.compare_op = VK_COMPARE_OP_EQUAL; + break; + case ZMode::LEQUAL: + new_ds_state.compare_op = VK_COMPARE_OP_GREATER_OR_EQUAL; + break; + case ZMode::GREATER: + new_ds_state.compare_op = VK_COMPARE_OP_LESS; + break; + case ZMode::NEQUAL: + new_ds_state.compare_op = VK_COMPARE_OP_NOT_EQUAL; + break; + case ZMode::GEQUAL: + new_ds_state.compare_op = VK_COMPARE_OP_LESS_OR_EQUAL; + break; + case ZMode::ALWAYS: + new_ds_state.compare_op = VK_COMPARE_OP_ALWAYS; + break; + default: + new_ds_state.compare_op = VK_COMPARE_OP_ALWAYS; + break; + } + + m_state_tracker->SetDepthStencilState(new_ds_state); +} + +void Renderer::SetColorMask() +{ + u32 color_mask = 0; + + if (bpmem.alpha_test.TestResult() != AlphaTest::FAIL) + { + if (bpmem.blendmode.alphaupdate && bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) + color_mask |= VK_COLOR_COMPONENT_A_BIT; + if (bpmem.blendmode.colorupdate) + color_mask |= VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT; + } + + BlendState new_blend_state = {}; + new_blend_state.bits = m_state_tracker->GetBlendState().bits; + new_blend_state.write_mask = color_mask; + + m_state_tracker->SetBlendState(new_blend_state); +} + +void Renderer::SetBlendMode(bool force_update) +{ + BlendState new_blend_state = {}; + new_blend_state.bits = m_state_tracker->GetBlendState().bits; + + // Fast path for blending disabled + if (!bpmem.blendmode.blendenable) + { + new_blend_state.blend_enable = VK_FALSE; + new_blend_state.blend_op = VK_BLEND_OP_ADD; + new_blend_state.src_blend = VK_BLEND_FACTOR_ONE; + new_blend_state.dst_blend = VK_BLEND_FACTOR_ZERO; + new_blend_state.alpha_blend_op = VK_BLEND_OP_ADD; + new_blend_state.src_alpha_blend = VK_BLEND_FACTOR_ONE; + new_blend_state.dst_alpha_blend = VK_BLEND_FACTOR_ZERO; + m_state_tracker->SetBlendState(new_blend_state); + return; + } + // Fast path for subtract blending + else if (bpmem.blendmode.subtract) + { + new_blend_state.blend_enable = VK_TRUE; + new_blend_state.blend_op = VK_BLEND_OP_REVERSE_SUBTRACT; + new_blend_state.src_blend = VK_BLEND_FACTOR_ONE; + new_blend_state.dst_blend = VK_BLEND_FACTOR_ONE; + new_blend_state.alpha_blend_op = VK_BLEND_OP_REVERSE_SUBTRACT; + new_blend_state.src_alpha_blend = VK_BLEND_FACTOR_ONE; + new_blend_state.dst_alpha_blend = VK_BLEND_FACTOR_ONE; + m_state_tracker->SetBlendState(new_blend_state); + return; + } + + // Our render target always uses an alpha channel, so we need to override the blend functions to + // assume a destination alpha of 1 if the render target isn't supposed to have an alpha channel. + bool target_has_alpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; + bool use_dst_alpha = bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && target_has_alpha && + g_vulkan_context->SupportsDualSourceBlend(); + + new_blend_state.blend_enable = VK_TRUE; + new_blend_state.blend_op = VK_BLEND_OP_ADD; + + switch (bpmem.blendmode.srcfactor) + { + case BlendMode::ZERO: + new_blend_state.src_blend = VK_BLEND_FACTOR_ZERO; + break; + case BlendMode::ONE: + new_blend_state.src_blend = VK_BLEND_FACTOR_ONE; + break; + case BlendMode::DSTCLR: + new_blend_state.src_blend = VK_BLEND_FACTOR_DST_COLOR; + break; + case BlendMode::INVDSTCLR: + new_blend_state.src_blend = VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; + break; + case BlendMode::SRCALPHA: + new_blend_state.src_blend = + use_dst_alpha ? VK_BLEND_FACTOR_SRC1_ALPHA : VK_BLEND_FACTOR_SRC_ALPHA; + break; + case BlendMode::INVSRCALPHA: + new_blend_state.src_blend = + use_dst_alpha ? VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA : VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + break; + case BlendMode::DSTALPHA: + new_blend_state.src_blend = target_has_alpha ? VK_BLEND_FACTOR_DST_ALPHA : VK_BLEND_FACTOR_ONE; + break; + case BlendMode::INVDSTALPHA: + new_blend_state.src_blend = + target_has_alpha ? VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA : VK_BLEND_FACTOR_ZERO; + break; + default: + new_blend_state.src_blend = VK_BLEND_FACTOR_ONE; + break; + } + + switch (bpmem.blendmode.dstfactor) + { + case BlendMode::ZERO: + new_blend_state.dst_blend = VK_BLEND_FACTOR_ZERO; + break; + case BlendMode::ONE: + new_blend_state.dst_blend = VK_BLEND_FACTOR_ONE; + break; + case BlendMode::SRCCLR: + new_blend_state.dst_blend = VK_BLEND_FACTOR_SRC_COLOR; + break; + case BlendMode::INVSRCCLR: + new_blend_state.dst_blend = VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + break; + case BlendMode::SRCALPHA: + new_blend_state.dst_blend = + use_dst_alpha ? VK_BLEND_FACTOR_SRC1_ALPHA : VK_BLEND_FACTOR_SRC_ALPHA; + break; + case BlendMode::INVSRCALPHA: + new_blend_state.dst_blend = + use_dst_alpha ? VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA : VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + break; + case BlendMode::DSTALPHA: + new_blend_state.dst_blend = target_has_alpha ? VK_BLEND_FACTOR_DST_ALPHA : VK_BLEND_FACTOR_ONE; + break; + case BlendMode::INVDSTALPHA: + new_blend_state.dst_blend = + target_has_alpha ? VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA : VK_BLEND_FACTOR_ZERO; + break; + default: + new_blend_state.dst_blend = VK_BLEND_FACTOR_ONE; + break; + } + + if (use_dst_alpha) + { + // Destination alpha sets 1*SRC + new_blend_state.alpha_blend_op = VK_BLEND_OP_ADD; + new_blend_state.src_alpha_blend = VK_BLEND_FACTOR_ONE; + new_blend_state.dst_alpha_blend = VK_BLEND_FACTOR_ZERO; + } + else + { + new_blend_state.alpha_blend_op = VK_BLEND_OP_ADD; + new_blend_state.src_alpha_blend = Util::GetAlphaBlendFactor(new_blend_state.src_blend); + new_blend_state.dst_alpha_blend = Util::GetAlphaBlendFactor(new_blend_state.dst_blend); + } + + m_state_tracker->SetBlendState(new_blend_state); +} + +void Renderer::SetLogicOpMode() +{ + BlendState new_blend_state = {}; + new_blend_state.bits = m_state_tracker->GetBlendState().bits; + + // Does our device support logic ops? + bool logic_op_enable = bpmem.blendmode.logicopenable && !bpmem.blendmode.blendenable; + if (g_vulkan_context->SupportsLogicOps()) + { + if (logic_op_enable) + { + static const std::array logic_ops = { + {VK_LOGIC_OP_CLEAR, VK_LOGIC_OP_AND, VK_LOGIC_OP_AND_REVERSE, VK_LOGIC_OP_COPY, + VK_LOGIC_OP_AND_INVERTED, VK_LOGIC_OP_NO_OP, VK_LOGIC_OP_XOR, VK_LOGIC_OP_OR, + VK_LOGIC_OP_NOR, VK_LOGIC_OP_EQUIVALENT, VK_LOGIC_OP_INVERT, VK_LOGIC_OP_OR_REVERSE, + VK_LOGIC_OP_COPY_INVERTED, VK_LOGIC_OP_OR_INVERTED, VK_LOGIC_OP_NAND, VK_LOGIC_OP_SET}}; + + new_blend_state.logic_op_enable = VK_TRUE; + new_blend_state.logic_op = logic_ops[bpmem.blendmode.logicmode]; + } + else + { + new_blend_state.logic_op_enable = VK_FALSE; + new_blend_state.logic_op = VK_LOGIC_OP_CLEAR; + } + + m_state_tracker->SetBlendState(new_blend_state); + } + else + { + // No logic op support, approximate with blending instead. + // This is by no means correct, but necessary for some devices. + if (logic_op_enable) + { + struct LogicOpBlend + { + VkBlendFactor src_factor; + VkBlendOp op; + VkBlendFactor dst_factor; + }; + static const std::array logic_ops = { + {{VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ZERO}, + {VK_BLEND_FACTOR_DST_COLOR, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ZERO}, + {VK_BLEND_FACTOR_ONE, VK_BLEND_OP_SUBTRACT, VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR}, + {VK_BLEND_FACTOR_ONE, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ZERO}, + {VK_BLEND_FACTOR_DST_COLOR, VK_BLEND_OP_REVERSE_SUBTRACT, VK_BLEND_FACTOR_ONE}, + {VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE}, + {VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_OP_MAX, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR}, + {VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE}, + {VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_OP_MAX, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR}, + {VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_OP_MAX, VK_BLEND_FACTOR_SRC_COLOR}, + {VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR}, + {VK_BLEND_FACTOR_ONE, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR}, + {VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR}, + {VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE}, + {VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR}, + {VK_BLEND_FACTOR_ONE, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE}}}; + + new_blend_state.blend_enable = VK_TRUE; + new_blend_state.blend_op = logic_ops[bpmem.blendmode.logicmode].op; + new_blend_state.src_blend = logic_ops[bpmem.blendmode.logicmode].src_factor; + new_blend_state.dst_blend = logic_ops[bpmem.blendmode.logicmode].dst_factor; + new_blend_state.alpha_blend_op = new_blend_state.blend_op; + new_blend_state.src_alpha_blend = Util::GetAlphaBlendFactor(new_blend_state.src_blend); + new_blend_state.dst_alpha_blend = Util::GetAlphaBlendFactor(new_blend_state.dst_blend); + + m_state_tracker->SetBlendState(new_blend_state); + } + else + { + // This is unfortunate. Since we clobber the blend state when enabling logic ops, + // we have to call SetBlendMode again to restore the current blend state. + SetBlendMode(true); + return; + } + } +} + +void Renderer::SetSamplerState(int stage, int texindex, bool custom_tex) +{ + const FourTexUnits& tex = bpmem.tex[texindex]; + const TexMode0& tm0 = tex.texMode0[stage]; + const TexMode1& tm1 = tex.texMode1[stage]; + SamplerState new_state = {}; + + if (g_ActiveConfig.bForceFiltering) + { + new_state.min_filter = VK_FILTER_LINEAR; + new_state.mag_filter = VK_FILTER_LINEAR; + new_state.mipmap_mode = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? + VK_SAMPLER_MIPMAP_MODE_LINEAR : + VK_SAMPLER_MIPMAP_MODE_NEAREST; + } + else + { + // Constants for these? + new_state.min_filter = (tm0.min_filter & 4) != 0 ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + new_state.mipmap_mode = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? + VK_SAMPLER_MIPMAP_MODE_LINEAR : + VK_SAMPLER_MIPMAP_MODE_NEAREST; + new_state.mag_filter = tm0.mag_filter != 0 ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + } + + // If mipmaps are disabled, clamp min/max lod + new_state.max_lod = (SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) ? + static_cast(MathUtil::Clamp(tm1.max_lod / 16.0f, 0.0f, 255.0f)) : + 0; + new_state.min_lod = + std::min(new_state.max_lod.Value(), + static_cast(MathUtil::Clamp(tm1.min_lod / 16.0f, 0.0f, 255.0f))); + new_state.lod_bias = static_cast(tm0.lod_bias / 32.0f); + + // Custom textures may have a greater number of mips + if (custom_tex) + new_state.max_lod = 255; + + // Address modes + static const VkSamplerAddressMode address_modes[] = { + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_REPEAT, + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT}; + new_state.wrap_u = address_modes[tm0.wrap_s]; + new_state.wrap_v = address_modes[tm0.wrap_t]; + + // Only use anisotropic filtering for textures that would be linearly filtered. + if (g_vulkan_context->SupportsAnisotropicFiltering() && g_ActiveConfig.iMaxAnisotropy > 0 && + !SamplerCommon::IsBpTexMode0PointFiltering(tm0)) + { + new_state.anisotropy = g_ActiveConfig.iMaxAnisotropy; + } + else + { + new_state.anisotropy = 0; + } + + // Skip lookup if the state hasn't changed. + size_t bind_index = (texindex * 4) + stage; + if (m_sampler_states[bind_index].bits == new_state.bits) + return; + + // Look up new state and replace in state tracker. + VkSampler sampler = g_object_cache->GetSampler(new_state); + if (sampler == VK_NULL_HANDLE) + { + ERROR_LOG(VIDEO, "Failed to create sampler"); + sampler = g_object_cache->GetPointSampler(); + } + + m_state_tracker->SetSampler(bind_index, sampler); + m_sampler_states[bind_index].bits = new_state.bits; +} + +void Renderer::ResetSamplerStates() +{ + // Ensure none of the sampler objects are in use. + g_command_buffer_mgr->WaitForGPUIdle(); + + // Invalidate all sampler states, next draw will re-initialize them. + for (size_t i = 0; i < m_sampler_states.size(); i++) + { + m_sampler_states[i].bits = std::numeric_limits::max(); + m_state_tracker->SetSampler(i, g_object_cache->GetPointSampler()); + } + + // Invalidate all sampler objects (some will be unused now). + g_object_cache->ClearSamplerCache(); +} + +void Renderer::SetDitherMode() +{ +} + +void Renderer::SetInterlacingMode() +{ +} + +void Renderer::SetScissorRect(const EFBRectangle& rc) +{ + TargetRectangle target_rc = ConvertEFBRectangle(rc); + + VkRect2D scissor = { + {target_rc.left, target_rc.top}, + {static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight())}}; + + m_state_tracker->SetScissor(scissor); +} + +void Renderer::SetViewport() +{ + int scissor_x_offset = bpmem.scissorOffset.x * 2; + int scissor_y_offset = bpmem.scissorOffset.y * 2; + + float x = Renderer::EFBToScaledXf(xfmem.viewport.xOrig - xfmem.viewport.wd - scissor_x_offset); + float y = Renderer::EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissor_y_offset); + float width = Renderer::EFBToScaledXf(2.0f * xfmem.viewport.wd); + float height = Renderer::EFBToScaledYf(-2.0f * xfmem.viewport.ht); + if (width < 0.0f) + { + x += width; + width = -width; + } + if (height < 0.0f) + { + y += height; + height = -height; + } + + // If we do depth clipping and depth range in the vertex shader we only need to ensure + // depth values don't exceed the maximum value supported by the console GPU. If not, + // we simply clamp the near/far values themselves to the maximum value as done above. + float min_depth, max_depth; + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + min_depth = 1.0f - GX_MAX_DEPTH; + max_depth = 1.0f; + } + else + { + float near_val = MathUtil::Clamp(xfmem.viewport.farZ - + MathUtil::Clamp(xfmem.viewport.zRange, + -16777216.0f, 16777216.0f), + 0.0f, 16777215.0f) / + 16777216.0f; + float far_val = MathUtil::Clamp(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f; + min_depth = 1.0f - near_val; + max_depth = 1.0f - far_val; + } + + VkViewport viewport = {x, y, width, height, min_depth, max_depth}; + m_state_tracker->SetViewport(viewport); +} + +void Renderer::ChangeSurface(void* new_surface_handle) +{ + // Called by the main thread when the window is resized. + s_new_surface_handle = new_surface_handle; + s_surface_needs_change.Set(); + s_surface_changed.Set(); +} + +void Renderer::RecompileShaders() +{ + DestroyShaders(); + if (!CompileShaders()) + PanicAlert("Failed to recompile shaders."); +} + +bool Renderer::CompileShaders() +{ + static const char CLEAR_FRAGMENT_SHADER_SOURCE[] = R"( + layout(location = 0) in float3 uv0; + layout(location = 1) in float4 col0; + layout(location = 0) out float4 ocol0; + + void main() + { + ocol0 = col0; + } + + )"; + + static const char BLIT_FRAGMENT_SHADER_SOURCE[] = R"( + layout(set = 1, binding = 0) uniform sampler2DArray samp0; + + layout(location = 0) in float3 uv0; + layout(location = 1) in float4 col0; + layout(location = 0) out float4 ocol0; + + void main() + { + ocol0 = texture(samp0, uv0); + } + )"; + + std::string header = g_object_cache->GetUtilityShaderHeader(); + std::string source; + + source = header + CLEAR_FRAGMENT_SHADER_SOURCE; + m_clear_fragment_shader = Util::CompileAndCreateFragmentShader(source); + source = header + BLIT_FRAGMENT_SHADER_SOURCE; + m_blit_fragment_shader = Util::CompileAndCreateFragmentShader(source); + + if (m_clear_fragment_shader == VK_NULL_HANDLE || m_blit_fragment_shader == VK_NULL_HANDLE) + { + return false; + } + + return true; +} + +void Renderer::DestroyShaders() +{ + auto DestroyShader = [this](VkShaderModule& shader) { + if (shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); + shader = VK_NULL_HANDLE; + } + }; + + DestroyShader(m_clear_fragment_shader); + DestroyShader(m_blit_fragment_shader); +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.h b/Source/Core/VideoBackends/Vulkan/Renderer.h new file mode 100644 index 0000000000..64b84656b9 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/Renderer.h @@ -0,0 +1,126 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoCommon/RenderBase.h" + +namespace Vulkan +{ +class BoundingBox; +class FramebufferManager; +class SwapChain; +class StagingTexture2D; +class StateTracker; +class Texture2D; +class RasterFont; + +class Renderer : public ::Renderer +{ +public: + Renderer(); + ~Renderer(); + + SwapChain* GetSwapChain() const { return m_swap_chain.get(); } + StateTracker* GetStateTracker() const { return m_state_tracker.get(); } + BoundingBox* GetBoundingBox() const { return m_bounding_box.get(); } + bool Initialize(FramebufferManager* framebuffer_mgr, void* window_handle, VkSurfaceKHR surface); + + void RenderText(const std::string& pstr, int left, int top, u32 color) override; + u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; + void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; + u16 BBoxRead(int index) override; + void BBoxWrite(int index, u16 value) override; + int GetMaxTextureSize() override { return 16 * 1024; } + TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; + + void SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, const EFBRectangle& rc, + float gamma) override; + + void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, + u32 color, u32 z) override; + + void ReinterpretPixelData(unsigned int convtype) override; + + bool SaveScreenshot(const std::string& filename, const TargetRectangle& rc) override + { + return false; + } + + void ApplyState(bool bUseDstAlpha) override; + + void ResetAPIState() override; + void RestoreAPIState() override; + + void SetColorMask() override; + void SetBlendMode(bool force_update) override; + void SetScissorRect(const EFBRectangle& rc) override; + void SetGenerationMode() override; + void SetDepthMode() override; + void SetLogicOpMode() override; + void SetDitherMode() override; + void SetSamplerState(int stage, int texindex, bool custom_tex) override; + void SetInterlacingMode() override; + void SetViewport() override; + + void ChangeSurface(void* new_surface_handle) override; + +private: + bool CreateSemaphores(); + void DestroySemaphores(); + + void BeginFrame(); + + void CheckForTargetResize(u32 fb_width, u32 fb_stride, u32 fb_height); + void CheckForSurfaceChange(); + void CheckForConfigChanges(); + + void ResetSamplerStates(); + + void OnSwapChainResized(); + void BindEFBToStateTracker(); + void ResizeEFBTextures(); + void ResizeSwapChain(); + + void RecompileShaders(); + bool CompileShaders(); + void DestroyShaders(); + + void DrawScreen(const TargetRectangle& src_rect, const Texture2D* src_tex); + bool DrawScreenshot(const TargetRectangle& src_rect, const Texture2D* src_tex); + void BlitScreen(VkRenderPass render_pass, const TargetRectangle& dst_rect, + const TargetRectangle& src_rect, const Texture2D* src_tex, bool linear_filter); + bool ResizeScreenshotBuffer(u32 new_width, u32 new_height); + void DestroyScreenshotResources(); + void WriteScreenshot(); + void WriteFrameDump(); + void StopFrameDump(); + + FramebufferManager* m_framebuffer_mgr = nullptr; + + VkSemaphore m_image_available_semaphore = nullptr; + VkSemaphore m_rendering_finished_semaphore = nullptr; + + std::unique_ptr m_swap_chain; + std::unique_ptr m_state_tracker; + std::unique_ptr m_bounding_box; + std::unique_ptr m_raster_font; + + // Keep a copy of sampler states to avoid cache lookups every draw + std::array m_sampler_states = {}; + + // Shaders used for clear/blit. + VkShaderModule m_clear_fragment_shader = VK_NULL_HANDLE; + VkShaderModule m_blit_fragment_shader = VK_NULL_HANDLE; + + // Texture used for screenshot/frame dumping + std::unique_ptr m_screenshot_render_texture; + std::unique_ptr m_screenshot_readback_texture; + VkFramebuffer m_screenshot_framebuffer = VK_NULL_HANDLE; +}; +} diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp new file mode 100644 index 0000000000..1b4f134688 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp @@ -0,0 +1,334 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +// glslang includes +#include "GlslangToSpv.h" +#include "ShaderLang.h" +#include "disassemble.h" + +#include "Common/FileUtil.h" +#include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" +#include "Common/StringUtil.h" + +#include "VideoBackends/Vulkan/ShaderCompiler.h" +#include "VideoCommon/VideoConfig.h" + +namespace Vulkan +{ +namespace ShaderCompiler +{ +// Registers itself for cleanup via atexit +bool InitializeGlslang(); + +// Resource limits used when compiling shaders +static const TBuiltInResource* GetCompilerResourceLimits(); + +// Compile a shader to SPIR-V via glslang +static bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, + const char* stage_filename, const char* source_code, + size_t source_code_length, bool prepend_header); + +// Regarding the UBO bind points, we subtract one from the binding index because +// the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV). +// This allows us to share the same shaders but use bind point #0 in the Vulkan +// backend. None of the Vulkan-specific shaders use UBOs, instead they use push +// constants, so when/if the GL backend moves to uniform blocks completely this +// subtraction can be removed. +static const char SHADER_HEADER[] = R"( + // Target GLSL 4.5. + #version 450 core + #define ATTRIBUTE_LOCATION(x) layout(location = x) + #define FRAGMENT_OUTPUT_LOCATION(x) layout(location = x) + #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y) + #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) + #define SAMPLER_BINDING(x) layout(set = 1, binding = x) + #define SSBO_BINDING(x) layout(set = 2, binding = x) + #define VARYING_LOCATION(x) layout(location = x) + #define FORCE_EARLY_Z layout(early_fragment_tests) in + + // hlsl to glsl function translation + #define float2 vec2 + #define float3 vec3 + #define float4 vec4 + #define uint2 uvec2 + #define uint3 uvec3 + #define uint4 uvec4 + #define int2 ivec2 + #define int3 ivec3 + #define int4 ivec4 + #define frac fract + #define lerp mix + + // These were changed in Vulkan + #define gl_VertexID gl_VertexIndex + #define gl_InstanceID gl_InstanceIndex +)"; + +bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename, + const char* source_code, size_t source_code_length, bool prepend_header) +{ + if (!InitializeGlslang()) + return false; + + std::unique_ptr shader = std::make_unique(stage); + std::unique_ptr program; + glslang::TShader::ForbidInclude includer; + EProfile profile = ECoreProfile; + EShMessages messages = + static_cast(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules); + int default_version = 450; + + std::string full_source_code; + const char* pass_source_code = source_code; + int pass_source_code_length = static_cast(source_code_length); + if (prepend_header) + { + full_source_code.reserve(sizeof(SHADER_HEADER) + source_code_length); + full_source_code.append(SHADER_HEADER, sizeof(SHADER_HEADER) - 1); + full_source_code.append(source_code, source_code_length); + pass_source_code = full_source_code.c_str(); + pass_source_code_length = static_cast(full_source_code.length()); + } + + shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1); + + auto DumpBadShader = [&](const char* msg) { + static int counter = 0; + std::string filename = StringFromFormat( + "%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), stage_filename, counter++); + + std::ofstream stream; + OpenFStream(stream, filename, std::ios_base::out); + if (stream.good()) + { + stream << full_source_code << std::endl; + stream << msg << std::endl; + stream << "Shader Info Log:" << std::endl; + stream << shader->getInfoLog() << std::endl; + stream << shader->getInfoDebugLog() << std::endl; + if (program) + { + stream << "Program Info Log:" << std::endl; + stream << program->getInfoLog() << std::endl; + stream << program->getInfoDebugLog() << std::endl; + } + } + + PanicAlert("%s (written to %s)", msg, filename.c_str()); + }; + + if (!shader->parse(GetCompilerResourceLimits(), default_version, profile, false, true, messages, + includer)) + { + DumpBadShader("Failed to parse shader"); + return false; + } + + // Even though there's only a single shader, we still need to link it to generate SPV + program = std::make_unique(); + program->addShader(shader.get()); + if (!program->link(messages)) + { + DumpBadShader("Failed to link program"); + return false; + } + + glslang::TIntermediate* intermediate = program->getIntermediate(stage); + if (!intermediate) + { + DumpBadShader("Failed to generate SPIR-V"); + return false; + } + + spv::SpvBuildLogger logger; + glslang::GlslangToSpv(*intermediate, *out_code, &logger); + + // Write out messages + // Temporary: skip if it contains "Warning, version 450 is not yet complete; most version-specific + // features are present, but some are missing." + if (strlen(shader->getInfoLog()) > 108) + WARN_LOG(VIDEO, "Shader info log: %s", shader->getInfoLog()); + if (strlen(shader->getInfoDebugLog()) > 0) + WARN_LOG(VIDEO, "Shader debug info log: %s", shader->getInfoDebugLog()); + if (strlen(program->getInfoLog()) > 25) + WARN_LOG(VIDEO, "Program info log: %s", program->getInfoLog()); + if (strlen(program->getInfoDebugLog()) > 0) + WARN_LOG(VIDEO, "Program debug info log: %s", program->getInfoDebugLog()); + std::string spv_messages = logger.getAllMessages(); + if (!spv_messages.empty()) + WARN_LOG(VIDEO, "SPIR-V conversion messages: %s", spv_messages.c_str()); + + // Dump source code of shaders out to file if enabled. + if (g_ActiveConfig.iLog & CONF_SAVESHADERS) + { + static int counter = 0; + std::string filename = StringFromFormat("%s%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), + stage_filename, counter++); + + std::ofstream stream; + OpenFStream(stream, filename, std::ios_base::out); + if (stream.good()) + { + stream << full_source_code << std::endl; + stream << "Shader Info Log:" << std::endl; + stream << shader->getInfoLog() << std::endl; + stream << shader->getInfoDebugLog() << std::endl; + stream << "Program Info Log:" << std::endl; + stream << program->getInfoLog() << std::endl; + stream << program->getInfoDebugLog() << std::endl; + stream << "SPIR-V conversion messages: " << std::endl; + stream << spv_messages; + stream << "SPIR-V:" << std::endl; + spv::Disassemble(stream, *out_code); + } + } + + return true; +} + +bool InitializeGlslang() +{ + static bool glslang_initialized = false; + if (glslang_initialized) + return true; + + if (!glslang::InitializeProcess()) + { + PanicAlert("Failed to initialize glslang shader compiler"); + return false; + } + + std::atexit([]() { glslang::FinalizeProcess(); }); + + glslang_initialized = true; + return true; +} + +const TBuiltInResource* GetCompilerResourceLimits() +{ + static const TBuiltInResource limits = {/* .MaxLights = */ 32, + /* .MaxClipPlanes = */ 6, + /* .MaxTextureUnits = */ 32, + /* .MaxTextureCoords = */ 32, + /* .MaxVertexAttribs = */ 64, + /* .MaxVertexUniformComponents = */ 4096, + /* .MaxVaryingFloats = */ 64, + /* .MaxVertexTextureImageUnits = */ 32, + /* .MaxCombinedTextureImageUnits = */ 80, + /* .MaxTextureImageUnits = */ 32, + /* .MaxFragmentUniformComponents = */ 4096, + /* .MaxDrawBuffers = */ 32, + /* .MaxVertexUniformVectors = */ 128, + /* .MaxVaryingVectors = */ 8, + /* .MaxFragmentUniformVectors = */ 16, + /* .MaxVertexOutputVectors = */ 16, + /* .MaxFragmentInputVectors = */ 15, + /* .MinProgramTexelOffset = */ -8, + /* .MaxProgramTexelOffset = */ 7, + /* .MaxClipDistances = */ 8, + /* .MaxComputeWorkGroupCountX = */ 65535, + /* .MaxComputeWorkGroupCountY = */ 65535, + /* .MaxComputeWorkGroupCountZ = */ 65535, + /* .MaxComputeWorkGroupSizeX = */ 1024, + /* .MaxComputeWorkGroupSizeY = */ 1024, + /* .MaxComputeWorkGroupSizeZ = */ 64, + /* .MaxComputeUniformComponents = */ 1024, + /* .MaxComputeTextureImageUnits = */ 16, + /* .MaxComputeImageUniforms = */ 8, + /* .MaxComputeAtomicCounters = */ 8, + /* .MaxComputeAtomicCounterBuffers = */ 1, + /* .MaxVaryingComponents = */ 60, + /* .MaxVertexOutputComponents = */ 64, + /* .MaxGeometryInputComponents = */ 64, + /* .MaxGeometryOutputComponents = */ 128, + /* .MaxFragmentInputComponents = */ 128, + /* .MaxImageUnits = */ 8, + /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 8, + /* .MaxCombinedShaderOutputResources = */ 8, + /* .MaxImageSamples = */ 0, + /* .MaxVertexImageUniforms = */ 0, + /* .MaxTessControlImageUniforms = */ 0, + /* .MaxTessEvaluationImageUniforms = */ 0, + /* .MaxGeometryImageUniforms = */ 0, + /* .MaxFragmentImageUniforms = */ 8, + /* .MaxCombinedImageUniforms = */ 8, + /* .MaxGeometryTextureImageUnits = */ 16, + /* .MaxGeometryOutputVertices = */ 256, + /* .MaxGeometryTotalOutputComponents = */ 1024, + /* .MaxGeometryUniformComponents = */ 1024, + /* .MaxGeometryVaryingComponents = */ 64, + /* .MaxTessControlInputComponents = */ 128, + /* .MaxTessControlOutputComponents = */ 128, + /* .MaxTessControlTextureImageUnits = */ 16, + /* .MaxTessControlUniformComponents = */ 1024, + /* .MaxTessControlTotalOutputComponents = */ 4096, + /* .MaxTessEvaluationInputComponents = */ 128, + /* .MaxTessEvaluationOutputComponents = */ 128, + /* .MaxTessEvaluationTextureImageUnits = */ 16, + /* .MaxTessEvaluationUniformComponents = */ 1024, + /* .MaxTessPatchComponents = */ 120, + /* .MaxPatchVertices = */ 32, + /* .MaxTessGenLevel = */ 64, + /* .MaxViewports = */ 16, + /* .MaxVertexAtomicCounters = */ 0, + /* .MaxTessControlAtomicCounters = */ 0, + /* .MaxTessEvaluationAtomicCounters = */ 0, + /* .MaxGeometryAtomicCounters = */ 0, + /* .MaxFragmentAtomicCounters = */ 8, + /* .MaxCombinedAtomicCounters = */ 8, + /* .MaxAtomicCounterBindings = */ 1, + /* .MaxVertexAtomicCounterBuffers = */ 0, + /* .MaxTessControlAtomicCounterBuffers = */ 0, + /* .MaxTessEvaluationAtomicCounterBuffers = */ 0, + /* .MaxGeometryAtomicCounterBuffers = */ 0, + /* .MaxFragmentAtomicCounterBuffers = */ 1, + /* .MaxCombinedAtomicCounterBuffers = */ 1, + /* .MaxAtomicCounterBufferSize = */ 16384, + /* .MaxTransformFeedbackBuffers = */ 4, + /* .MaxTransformFeedbackInterleavedComponents = */ 64, + /* .MaxCullDistances = */ 8, + /* .MaxCombinedClipAndCullDistances = */ 8, + /* .MaxSamples = */ 4, + /* .limits = */ { + /* .nonInductiveForLoops = */ 1, + /* .whileLoops = */ 1, + /* .doWhileLoops = */ 1, + /* .generalUniformIndexing = */ 1, + /* .generalAttributeMatrixVectorIndexing = */ 1, + /* .generalVaryingIndexing = */ 1, + /* .generalSamplerIndexing = */ 1, + /* .generalVariableIndexing = */ 1, + /* .generalConstantMatrixVectorIndexing = */ 1, + }}; + + return &limits; +} + +bool CompileVertexShader(SPIRVCodeVector* out_code, const char* source_code, + size_t source_code_length, bool prepend_header) +{ + return CompileShaderToSPV(out_code, EShLangVertex, "vs", source_code, source_code_length, + prepend_header); +} + +bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code, + size_t source_code_length, bool prepend_header) +{ + return CompileShaderToSPV(out_code, EShLangGeometry, "gs", source_code, source_code_length, + prepend_header); +} + +bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code, + size_t source_code_length, bool prepend_header) +{ + return CompileShaderToSPV(out_code, EShLangFragment, "ps", source_code, source_code_length, + prepend_header); +} + +} // namespace ShaderCompiler +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h new file mode 100644 index 0000000000..79f8fc447c --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h @@ -0,0 +1,32 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/Constants.h" + +namespace Vulkan +{ +namespace ShaderCompiler +{ +// SPIR-V compiled code type +using SPIRVCodeType = u32; +using SPIRVCodeVector = std::vector; + +// Compile a vertex shader to SPIR-V. +bool CompileVertexShader(SPIRVCodeVector* out_code, const char* source_code, + size_t source_code_length, bool prepend_header = true); + +// Compile a geometry shader to SPIR-V. +bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code, + size_t source_code_length, bool prepend_header = true); + +// Compile a fragment shader to SPIR-V. +bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code, + size_t source_code_length, bool prepend_header = true); + +} // namespace ShaderCompiler +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp new file mode 100644 index 0000000000..052897d9bc --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp @@ -0,0 +1,198 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/Assert.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/StagingBuffer.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +StagingBuffer::StagingBuffer(STAGING_BUFFER_TYPE type, VkBuffer buffer, VkDeviceMemory memory, + VkDeviceSize size, bool coherent) + : m_type(type), m_buffer(buffer), m_memory(memory), m_size(size), m_coherent(coherent) +{ +} + +StagingBuffer::~StagingBuffer() +{ + // Unmap before destroying + if (m_map_pointer) + Unmap(); + + g_command_buffer_mgr->DeferResourceDestruction(m_memory); + g_command_buffer_mgr->DeferResourceDestruction(m_buffer); +} + +bool StagingBuffer::Map(VkDeviceSize offset, VkDeviceSize size) +{ + m_map_offset = offset; + if (size == VK_WHOLE_SIZE) + m_map_size = m_size - offset; + else + m_map_size = size; + + _assert_(!m_map_pointer); + _assert_(m_map_offset + m_map_size <= m_size); + + void* map_pointer; + VkResult res = vkMapMemory(g_vulkan_context->GetDevice(), m_memory, m_map_offset, m_map_size, 0, + &map_pointer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkMapMemory failed: "); + return false; + } + + m_map_pointer = reinterpret_cast(map_pointer); + return true; +} + +void StagingBuffer::Unmap() +{ + _assert_(m_map_pointer); + + vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory); + m_map_pointer = nullptr; + m_map_offset = 0; + m_map_size = 0; +} + +void StagingBuffer::FlushCPUCache(VkDeviceSize offset, VkDeviceSize size) +{ + _assert_(offset >= m_map_offset); + if (m_coherent) + return; + + VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory, + offset - m_map_offset, size}; + vkFlushMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range); +} + +void StagingBuffer::InvalidateGPUCache(VkCommandBuffer command_buffer, + VkAccessFlagBits dest_access_flags, + VkPipelineStageFlagBits dest_pipeline_stage, + VkDeviceSize offset, VkDeviceSize size) +{ + _assert_((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); + Util::BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, dest_access_flags, + offset, size, VK_PIPELINE_STAGE_HOST_BIT, dest_pipeline_stage); +} + +void StagingBuffer::PrepareForGPUWrite(VkCommandBuffer command_buffer, + VkAccessFlagBits dst_access_flags, + VkPipelineStageFlagBits dst_pipeline_stage, + VkDeviceSize offset, VkDeviceSize size) +{ + _assert_((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); + Util::BufferMemoryBarrier(command_buffer, m_buffer, 0, dst_access_flags, offset, size, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dst_pipeline_stage); +} + +void StagingBuffer::FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBits src_access_flags, + VkPipelineStageFlagBits src_pipeline_stage, VkDeviceSize offset, + VkDeviceSize size) +{ + _assert_((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); + Util::BufferMemoryBarrier(command_buffer, m_buffer, src_access_flags, VK_ACCESS_HOST_READ_BIT, + offset, size, src_pipeline_stage, VK_PIPELINE_STAGE_HOST_BIT); +} + +void StagingBuffer::InvalidateCPUCache(VkDeviceSize offset, VkDeviceSize size) +{ + _assert_(offset >= m_map_offset); + if (m_coherent) + return; + + VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory, + offset - m_map_offset, size}; + vkInvalidateMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range); +} + +void StagingBuffer::Read(VkDeviceSize offset, void* data, size_t size, bool invalidate_caches) +{ + _assert_((offset + size) <= m_size); + _assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset))); + if (invalidate_caches) + InvalidateCPUCache(offset, size); + + memcpy(data, m_map_pointer + (offset - m_map_offset), size); +} + +void StagingBuffer::Write(VkDeviceSize offset, const void* data, size_t size, + bool invalidate_caches) +{ + _assert_((offset + size) <= m_size); + _assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset))); + + memcpy(m_map_pointer + (offset - m_map_offset), data, size); + if (invalidate_caches) + FlushCPUCache(offset, size); +} + +std::unique_ptr +StagingBuffer::Create(STAGING_BUFFER_TYPE type, VkDeviceSize size, VkBufferUsageFlags usage) +{ + VkBufferCreateInfo buffer_create_info = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkBufferCreateFlags flags + size, // VkDeviceSize size + usage, // VkBufferUsageFlags usage + VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode + 0, // uint32_t queueFamilyIndexCount + nullptr // const uint32_t* pQueueFamilyIndices + }; + VkBuffer buffer; + VkResult res = + vkCreateBuffer(g_vulkan_context->GetDevice(), &buffer_create_info, nullptr, &buffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: "); + return nullptr; + } + + VkMemoryRequirements requirements; + vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &requirements); + + bool is_coherent; + u32 type_index; + if (type == STAGING_BUFFER_TYPE_UPLOAD) + type_index = g_vulkan_context->GetUploadMemoryType(requirements.memoryTypeBits, &is_coherent); + else + type_index = g_vulkan_context->GetReadbackMemoryType(requirements.memoryTypeBits, &is_coherent); + + VkMemoryAllocateInfo memory_allocate_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + requirements.size, // VkDeviceSize allocationSize + type_index // uint32_t memoryTypeIndex + }; + VkDeviceMemory memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + return nullptr; + } + + res = vkBindBufferMemory(g_vulkan_context->GetDevice(), buffer, memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr); + return nullptr; + } + + return std::make_unique(type, buffer, memory, size, is_coherent); +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.h b/Source/Core/VideoBackends/Vulkan/StagingBuffer.h new file mode 100644 index 0000000000..9562dda74a --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.h @@ -0,0 +1,71 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/Constants.h" + +namespace Vulkan +{ +class StagingBuffer +{ +public: + StagingBuffer(STAGING_BUFFER_TYPE type, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize size, + bool coherent); + ~StagingBuffer(); + + STAGING_BUFFER_TYPE GetType() const { return m_type; } + VkDeviceSize GetSize() const { return m_size; } + VkBuffer GetBuffer() const { return m_buffer; } + bool IsMapped() const { return m_map_pointer != nullptr; } + const char* GetMapPointer() const { return m_map_pointer; } + char* GetMapPointer() { return m_map_pointer; } + VkDeviceSize GetMapOffset() const { return m_map_offset; } + VkDeviceSize GetMapSize() const { return m_map_size; } + bool Map(VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE); + void Unmap(); + + // Upload part 1: Prepare from device read from the CPU side + void FlushCPUCache(VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE); + + // Upload part 2: Prepare for device read from the GPU side + void InvalidateGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBits dst_access_flags, + VkPipelineStageFlagBits dst_pipeline_stage, VkDeviceSize offset = 0, + VkDeviceSize size = VK_WHOLE_SIZE); + + // Readback part 0: Prepare for GPU usage (if necessary) + void PrepareForGPUWrite(VkCommandBuffer command_buffer, VkAccessFlagBits dst_access_flags, + VkPipelineStageFlagBits dst_pipeline_stage, VkDeviceSize offset = 0, + VkDeviceSize size = VK_WHOLE_SIZE); + + // Readback part 1: Prepare for host readback from the GPU side + void FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBits src_access_flags, + VkPipelineStageFlagBits src_pipeline_stage, VkDeviceSize offset = 0, + VkDeviceSize size = VK_WHOLE_SIZE); + + // Readback part 2: Prepare for host readback from the CPU side + void InvalidateCPUCache(VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE); + + // offset is from the start of the buffer, not from the map offset + void Read(VkDeviceSize offset, void* data, size_t size, bool invalidate_caches = true); + void Write(VkDeviceSize offset, const void* data, size_t size, bool invalidate_caches = true); + + // Creates the optimal format of image copy. + static std::unique_ptr Create(STAGING_BUFFER_TYPE type, VkDeviceSize size, + VkBufferUsageFlags usage); + +protected: + STAGING_BUFFER_TYPE m_type; + VkBuffer m_buffer; + VkDeviceMemory m_memory; + VkDeviceSize m_size; + bool m_coherent; + + char* m_map_pointer = nullptr; + VkDeviceSize m_map_offset = 0; + VkDeviceSize m_map_size = 0; +}; +} diff --git a/Source/Core/VideoBackends/Vulkan/StagingTexture2D.cpp b/Source/Core/VideoBackends/Vulkan/StagingTexture2D.cpp new file mode 100644 index 0000000000..c82d449c49 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/StagingTexture2D.cpp @@ -0,0 +1,536 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/Assert.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/StagingTexture2D.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +StagingTexture2D::StagingTexture2D(STAGING_BUFFER_TYPE type, u32 width, u32 height, VkFormat format, + u32 stride) + : m_type(type), m_width(width), m_height(height), m_format(format), + m_texel_size(Util::GetTexelSize(format)), m_row_stride(stride) +{ +} + +StagingTexture2D::~StagingTexture2D() +{ + _assert_(!m_map_pointer); +} + +void StagingTexture2D::ReadTexel(u32 x, u32 y, void* data, size_t data_size) const +{ + _assert_(data_size >= m_texel_size); + + VkDeviceSize offset = y * m_row_stride + x * m_texel_size; + VkDeviceSize map_offset = offset - m_map_offset; + _assert_(offset >= m_map_offset && (map_offset + m_texel_size) <= (m_map_offset + m_map_size)); + + const char* ptr = m_map_pointer + map_offset; + memcpy(data, ptr, data_size); +} + +void StagingTexture2D::WriteTexel(u32 x, u32 y, const void* data, size_t data_size) +{ + _assert_(data_size >= m_texel_size); + + VkDeviceSize offset = y * m_row_stride + x * m_texel_size; + VkDeviceSize map_offset = offset - m_map_offset; + _assert_(offset >= m_map_offset && (map_offset + m_texel_size) <= (m_map_offset + m_map_size)); + + char* ptr = m_map_pointer + map_offset; + memcpy(ptr, data, data_size); +} + +void StagingTexture2D::ReadTexels(u32 x, u32 y, u32 width, u32 height, void* data, + u32 data_stride) const +{ + const char* src_ptr = GetRowPointer(y); + + // Optimal path: same dimensions, same stride. + _assert_((x + width) <= m_width && (y + height) <= m_height); + if (x == 0 && width == m_width && m_row_stride == data_stride) + { + memcpy(data, src_ptr, m_row_stride * height); + return; + } + + u32 copy_size = std::min(width * m_texel_size, data_stride); + char* dst_ptr = reinterpret_cast(data); + for (u32 row = 0; row < height; row++) + { + memcpy(dst_ptr, src_ptr + (x * m_texel_size), copy_size); + src_ptr += m_row_stride; + dst_ptr += data_stride; + } +} + +void StagingTexture2D::WriteTexels(u32 x, u32 y, u32 width, u32 height, const void* data, + u32 data_stride) +{ + char* dst_ptr = GetRowPointer(y); + + // Optimal path: same dimensions, same stride. + _assert_((x + width) <= m_width && (y + height) <= m_height); + if (x == 0 && width == m_width && m_row_stride == data_stride) + { + memcpy(dst_ptr, data, m_row_stride * height); + return; + } + + u32 copy_size = std::min(width * m_texel_size, data_stride); + const char* src_ptr = reinterpret_cast(data); + for (u32 row = 0; row < height; row++) + { + memcpy(dst_ptr + (x * m_texel_size), src_ptr, copy_size); + dst_ptr += m_row_stride; + src_ptr += data_stride; + } +} + +std::unique_ptr StagingTexture2D::Create(STAGING_BUFFER_TYPE type, u32 width, + u32 height, VkFormat format) +{ +// TODO: Using a buffer here as opposed to a linear texture is faster on AMD. +// NVIDIA also seems faster with buffers over textures. +#if 0 + // Check for support for this format as a linear texture. + // Some drivers don't support this (e.g. adreno). + VkImageFormatProperties properties; + VkResult res = vkGetPhysicalDeviceImageFormatProperties( + g_object_cache->GetPhysicalDevice(), format, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_LINEAR, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, &properties); + if (res == VK_SUCCESS && width <= properties.maxExtent.width && + height <= properties.maxExtent.height) + { + return StagingTexture2DLinear::Create(type, width, height, format); + } +#endif + + // Fall back to a buffer copy. + return StagingTexture2DBuffer::Create(type, width, height, format); +} + +StagingTexture2DLinear::StagingTexture2DLinear(STAGING_BUFFER_TYPE type, u32 width, u32 height, + VkFormat format, u32 stride, VkImage image, + VkDeviceMemory memory, VkDeviceSize size, + bool coherent) + : StagingTexture2D(type, width, height, format, stride), m_image(image), m_memory(memory), + m_size(size), m_layout(VK_IMAGE_LAYOUT_PREINITIALIZED), m_coherent(coherent) +{ +} + +StagingTexture2DLinear::~StagingTexture2DLinear() +{ + if (m_map_pointer) + Unmap(); + + g_command_buffer_mgr->DeferResourceDestruction(m_memory); + g_command_buffer_mgr->DeferResourceDestruction(m_image); +} + +void StagingTexture2DLinear::CopyFromImage(VkCommandBuffer command_buffer, VkImage image, + VkImageAspectFlags src_aspect, u32 x, u32 y, u32 width, + u32 height, u32 level, u32 layer) +{ + // Prepare the buffer for copying. + // We don't care about the existing contents, so set to UNDEFINED. + VkImageMemoryBarrier before_transfer_barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAccessFlags srcAccessMask + VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask + VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1} // VkImageSubresourceRange subresourceRange + }; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, + &before_transfer_barrier); + + // Issue the image copy, gpu -> host. + VkImageCopy copy_region = { + {src_aspect, level, layer, 1}, // VkImageSubresourceLayers srcSubresource + {static_cast(x), static_cast(y), 0}, // VkOffset3D srcOffset + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, // VkImageSubresourceLayers dstSubresource + {0, 0, 0}, // VkOffset3D dstOffset + {width, height, 1} // VkExtent3D extent + }; + vkCmdCopyImage(command_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); + + // Ensure writes are visible to the host. + VkImageMemoryBarrier visible_barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask + VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // VkImageLayout oldLayout + VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1} // VkImageSubresourceRange subresourceRange + }; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, + 0, 0, nullptr, 0, nullptr, 1, &visible_barrier); + m_layout = VK_IMAGE_LAYOUT_GENERAL; + + // Invalidate memory range if currently mapped. + if (m_map_pointer && !m_coherent) + { + VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory, + m_map_offset, m_map_size}; + vkInvalidateMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range); + } +} + +void StagingTexture2DLinear::CopyToImage(VkCommandBuffer command_buffer, VkImage image, + VkImageAspectFlags dst_aspect, u32 x, u32 y, u32 width, + u32 height, u32 level, u32 layer) +{ + // Flush memory range if currently mapped. + if (m_map_pointer && !m_coherent) + { + VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory, + m_map_offset, m_map_size}; + vkFlushMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range); + } + + // Ensure any writes to the image are visible to the GPU. + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + VK_ACCESS_HOST_WRITE_BIT, // VkAccessFlags srcAccessMask + VK_ACCESS_TRANSFER_READ_BIT, // VkAccessFlags dstAccessMask + m_layout, // VkImageLayout oldLayout + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1} // VkImageSubresourceRange subresourceRange + }; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &barrier); + + m_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + + // Issue the image copy, host -> gpu. + VkImageCopy copy_region = { + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, // VkImageSubresourceLayers srcSubresource + {0, 0, 0}, // VkOffset3D srcOffset + {dst_aspect, level, layer, 1}, // VkImageSubresourceLayers dstSubresource + {static_cast(x), static_cast(y), 0}, // VkOffset3D dstOffset + {width, height, 1} // VkExtent3D extent + }; + vkCmdCopyImage(command_buffer, m_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); +} + +bool StagingTexture2DLinear::Map(VkDeviceSize offset /* = 0 */, + VkDeviceSize size /* = VK_WHOLE_SIZE */) +{ + m_map_offset = offset; + if (size == VK_WHOLE_SIZE) + m_map_size = m_size - offset; + else + m_map_size = size; + + _assert_(!m_map_pointer); + _assert_(m_map_offset + m_map_size <= m_size); + + void* map_pointer; + VkResult res = vkMapMemory(g_vulkan_context->GetDevice(), m_memory, m_map_offset, m_map_size, 0, + &map_pointer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkMapMemory failed: "); + return false; + } + + m_map_pointer = reinterpret_cast(map_pointer); + return true; +} + +void StagingTexture2DLinear::Unmap() +{ + _assert_(m_map_pointer); + + vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory); + m_map_pointer = nullptr; + m_map_offset = 0; + m_map_size = 0; +} + +std::unique_ptr +StagingTexture2DLinear::Create(STAGING_BUFFER_TYPE type, u32 width, u32 height, VkFormat format) +{ + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + VkImageCreateInfo create_info = { + VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkImageCreateFlags flags + VK_IMAGE_TYPE_2D, // VkImageType imageType + format, // VkFormat format + {width, height, 1}, // VkExtent3D extent + 1, // uint32_t mipLevels + 1, // uint32_t arrayLayers + VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples + VK_IMAGE_TILING_LINEAR, // VkImageTiling tiling + usage, // VkImageUsageFlags usage + VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode + 0, // uint32_t queueFamilyIndexCount + nullptr, // const uint32_t* pQueueFamilyIndices + VK_IMAGE_LAYOUT_PREINITIALIZED // VkImageLayout initialLayout + }; + + VkImage image; + VkResult res = vkCreateImage(g_vulkan_context->GetDevice(), &create_info, nullptr, &image); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImage failed: "); + return nullptr; + } + + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(g_vulkan_context->GetDevice(), image, &memory_requirements); + + bool is_coherent; + u32 memory_type_index; + if (type == STAGING_BUFFER_TYPE_READBACK) + { + memory_type_index = + g_vulkan_context->GetReadbackMemoryType(memory_requirements.memoryTypeBits, &is_coherent); + } + else + { + memory_type_index = + g_vulkan_context->GetUploadMemoryType(memory_requirements.memoryTypeBits, &is_coherent); + } + VkMemoryAllocateInfo memory_allocate_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + memory_requirements.size, // VkDeviceSize allocationSize + memory_type_index // uint32_t memoryTypeIndex + }; + VkDeviceMemory memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + return nullptr; + } + + res = vkBindImageMemory(g_vulkan_context->GetDevice(), image, memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindImageMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr); + return nullptr; + } + + // Assume tight packing. Is this correct? + u32 stride = width * Util::GetTexelSize(format); + return std::make_unique(type, width, height, format, stride, image, + memory, memory_requirements.size, is_coherent); +} + +StagingTexture2DBuffer::StagingTexture2DBuffer(STAGING_BUFFER_TYPE type, u32 width, u32 height, + VkFormat format, u32 stride, VkBuffer buffer, + VkDeviceMemory memory, VkDeviceSize size, + bool coherent) + : StagingTexture2D(type, width, height, format, stride), m_buffer(buffer), m_memory(memory), + m_size(size), m_coherent(coherent) +{ +} + +StagingTexture2DBuffer::~StagingTexture2DBuffer() +{ + if (m_map_pointer) + Unmap(); + + g_command_buffer_mgr->DeferResourceDestruction(m_memory); + g_command_buffer_mgr->DeferResourceDestruction(m_buffer); +} + +void StagingTexture2DBuffer::CopyFromImage(VkCommandBuffer command_buffer, VkImage image, + VkImageAspectFlags src_aspect, u32 x, u32 y, u32 width, + u32 height, u32 level, u32 layer) +{ + // Issue the image->buffer copy. + VkBufferImageCopy image_copy = { + 0, // VkDeviceSize bufferOffset + m_width, // uint32_t bufferRowLength + 0, // uint32_t bufferImageHeight + {src_aspect, level, layer, 1}, // VkImageSubresourceLayers imageSubresource + {static_cast(x), static_cast(y), 0}, // VkOffset3D imageOffset + {width, height, 1} // VkExtent3D imageExtent + }; + vkCmdCopyImageToBuffer(command_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_buffer, 1, + &image_copy); + + // Ensure the write has completed. + VkDeviceSize copy_size = m_row_stride * height; + Util::BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_HOST_READ_BIT, 0, copy_size, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_HOST_BIT); + + // If we're still mapped, invalidate the mapped range + if (m_map_pointer && !m_coherent) + { + VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory, + m_map_offset, m_map_size}; + vkInvalidateMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range); + } +} + +void StagingTexture2DBuffer::CopyToImage(VkCommandBuffer command_buffer, VkImage image, + VkImageAspectFlags dst_aspect, u32 x, u32 y, u32 width, + u32 height, u32 level, u32 layer) +{ + // If we're still mapped, flush the mapped range + if (m_map_pointer && !m_coherent) + { + VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory, + m_map_offset, m_map_size}; + vkFlushMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range); + } + + // Ensure writes are visible to GPU. + VkDeviceSize copy_size = m_row_stride * height; + Util::BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, + VK_ACCESS_TRANSFER_READ_BIT, 0, copy_size, VK_PIPELINE_STAGE_HOST_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Issue the buffer->image copy + VkBufferImageCopy image_copy = { + 0, // VkDeviceSize bufferOffset + m_width, // uint32_t bufferRowLength + 0, // uint32_t bufferImageHeight + {dst_aspect, level, layer, 1}, // VkImageSubresourceLayers imageSubresource + {static_cast(x), static_cast(y), 0}, // VkOffset3D imageOffset + {width, height, 1} // VkExtent3D imageExtent + }; + vkCmdCopyBufferToImage(command_buffer, m_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + &image_copy); +} + +bool StagingTexture2DBuffer::Map(VkDeviceSize offset /* = 0 */, + VkDeviceSize size /* = VK_WHOLE_SIZE */) +{ + m_map_offset = offset; + if (size == VK_WHOLE_SIZE) + m_map_size = m_size - offset; + else + m_map_size = size; + + _assert_(!m_map_pointer); + _assert_(m_map_offset + m_map_size <= m_size); + + void* map_pointer; + VkResult res = vkMapMemory(g_vulkan_context->GetDevice(), m_memory, m_map_offset, m_map_size, 0, + &map_pointer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkMapMemory failed: "); + return false; + } + + m_map_pointer = reinterpret_cast(map_pointer); + return true; +} + +void StagingTexture2DBuffer::Unmap() +{ + _assert_(m_map_pointer); + + vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory); + m_map_pointer = nullptr; + m_map_offset = 0; + m_map_size = 0; +} + +std::unique_ptr +StagingTexture2DBuffer::Create(STAGING_BUFFER_TYPE type, u32 width, u32 height, VkFormat format) +{ + // Assume tight packing. + u32 row_stride = Util::GetTexelSize(format) * width; + u32 buffer_size = row_stride * height; + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + VkBufferCreateInfo buffer_create_info = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkBufferCreateFlags flags + buffer_size, // VkDeviceSize size + usage, // VkBufferUsageFlags usage + VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode + 0, // uint32_t queueFamilyIndexCount + nullptr // const uint32_t* pQueueFamilyIndices + }; + VkBuffer buffer; + VkResult res = + vkCreateBuffer(g_vulkan_context->GetDevice(), &buffer_create_info, nullptr, &buffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: "); + return nullptr; + } + + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements); + + bool is_coherent; + u32 memory_type_index; + if (type == STAGING_BUFFER_TYPE_READBACK) + { + memory_type_index = + g_vulkan_context->GetReadbackMemoryType(memory_requirements.memoryTypeBits, &is_coherent); + } + else + { + memory_type_index = + g_vulkan_context->GetUploadMemoryType(memory_requirements.memoryTypeBits, &is_coherent); + } + + VkMemoryAllocateInfo memory_allocate_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + memory_requirements.size, // VkDeviceSize allocationSize + memory_type_index // uint32_t memoryTypeIndex + }; + VkDeviceMemory memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + return nullptr; + } + + res = vkBindBufferMemory(g_vulkan_context->GetDevice(), buffer, memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr); + return nullptr; + } + + return std::make_unique(type, width, height, format, row_stride, buffer, + memory, buffer_size, is_coherent); +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StagingTexture2D.h b/Source/Core/VideoBackends/Vulkan/StagingTexture2D.h new file mode 100644 index 0000000000..709c220af7 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/StagingTexture2D.h @@ -0,0 +1,125 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/Constants.h" + +namespace Vulkan +{ +class StagingTexture2D +{ +public: + StagingTexture2D(STAGING_BUFFER_TYPE type, u32 width, u32 height, VkFormat format, u32 stride); + virtual ~StagingTexture2D(); + + STAGING_BUFFER_TYPE GetType() const { return m_type; } + u32 GetWidth() const { return m_width; } + u32 GetHeight() const { return m_height; } + VkFormat GetFormat() const { return m_format; } + u32 GetRowStride() const { return m_row_stride; } + u32 GetTexelSize() const { return m_texel_size; } + bool IsMapped() const { return m_map_pointer != nullptr; } + const char* GetMapPointer() const { return m_map_pointer; } + char* GetMapPointer() { return m_map_pointer; } + VkDeviceSize GetMapOffset() const { return m_map_offset; } + VkDeviceSize GetMapSize() const { return m_map_size; } + const char* GetRowPointer(u32 row) const { return m_map_pointer + row * m_row_stride; } + char* GetRowPointer(u32 row) { return m_map_pointer + row * m_row_stride; } + // Requires Map() to be called first. + void ReadTexel(u32 x, u32 y, void* data, size_t data_size) const; + void WriteTexel(u32 x, u32 y, const void* data, size_t data_size); + void ReadTexels(u32 x, u32 y, u32 width, u32 height, void* data, u32 data_stride) const; + void WriteTexels(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride); + + // Assumes that image is in TRANSFER_SRC layout. + // Results are not ready until command_buffer has been executed. + virtual void CopyFromImage(VkCommandBuffer command_buffer, VkImage image, + VkImageAspectFlags src_aspect, u32 x, u32 y, u32 width, u32 height, + u32 level, u32 layer) = 0; + + // Assumes that image is in TRANSFER_DST layout. + // Buffer is not safe for re-use until after command_buffer has been executed. + virtual void CopyToImage(VkCommandBuffer command_buffer, VkImage image, + VkImageAspectFlags dst_aspect, u32 x, u32 y, u32 width, u32 height, + u32 level, u32 layer) = 0; + virtual bool Map(VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE) = 0; + virtual void Unmap() = 0; + + // Creates the optimal format of image copy. + static std::unique_ptr Create(STAGING_BUFFER_TYPE type, u32 width, u32 height, + VkFormat format); + +protected: + STAGING_BUFFER_TYPE m_type; + u32 m_width; + u32 m_height; + VkFormat m_format; + u32 m_texel_size; + u32 m_row_stride; + + char* m_map_pointer = nullptr; + VkDeviceSize m_map_offset = 0; + VkDeviceSize m_map_size = 0; +}; + +class StagingTexture2DLinear : public StagingTexture2D +{ +public: + StagingTexture2DLinear(STAGING_BUFFER_TYPE type, u32 width, u32 height, VkFormat format, + u32 stride, VkImage image, VkDeviceMemory memory, VkDeviceSize size, + bool coherent); + + ~StagingTexture2DLinear(); + + void CopyFromImage(VkCommandBuffer command_buffer, VkImage image, VkImageAspectFlags src_aspect, + u32 x, u32 y, u32 width, u32 height, u32 level, u32 layer) override; + + void CopyToImage(VkCommandBuffer command_buffer, VkImage image, VkImageAspectFlags dst_aspect, + u32 x, u32 y, u32 width, u32 height, u32 level, u32 layer) override; + + bool Map(VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE) override; + void Unmap() override; + + static std::unique_ptr Create(STAGING_BUFFER_TYPE type, u32 width, u32 height, + VkFormat format); + +private: + VkImage m_image; + VkDeviceMemory m_memory; + VkDeviceSize m_size; + VkImageLayout m_layout; + bool m_coherent; +}; + +class StagingTexture2DBuffer : public StagingTexture2D +{ +public: + StagingTexture2DBuffer(STAGING_BUFFER_TYPE type, u32 width, u32 height, VkFormat format, + u32 stride, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize size, + bool coherent); + + ~StagingTexture2DBuffer(); + + void CopyFromImage(VkCommandBuffer command_buffer, VkImage image, VkImageAspectFlags src_aspect, + u32 x, u32 y, u32 width, u32 height, u32 level, u32 layer) override; + + void CopyToImage(VkCommandBuffer command_buffer, VkImage image, VkImageAspectFlags dst_aspect, + u32 x, u32 y, u32 width, u32 height, u32 level, u32 layer) override; + + bool Map(VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE) override; + void Unmap() override; + + static std::unique_ptr Create(STAGING_BUFFER_TYPE type, u32 width, u32 height, + VkFormat format); + +private: + VkBuffer m_buffer; + VkDeviceMemory m_memory; + VkDeviceSize m_size; + bool m_coherent; +}; +} diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp new file mode 100644 index 0000000000..a1c4e5fb25 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp @@ -0,0 +1,904 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/Assert.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +#include "VideoCommon/GeometryShaderManager.h" +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoConfig.h" + +namespace Vulkan +{ +StateTracker::StateTracker() +{ + // Set some sensible defaults + m_pipeline_state.pipeline_layout = g_object_cache->GetStandardPipelineLayout(); + m_pipeline_state.rasterization_state.cull_mode = VK_CULL_MODE_NONE; + m_pipeline_state.rasterization_state.per_sample_shading = VK_FALSE; + m_pipeline_state.rasterization_state.depth_clamp = VK_FALSE; + m_pipeline_state.depth_stencil_state.test_enable = VK_TRUE; + m_pipeline_state.depth_stencil_state.write_enable = VK_TRUE; + m_pipeline_state.depth_stencil_state.compare_op = VK_COMPARE_OP_LESS; + m_pipeline_state.blend_state.blend_enable = VK_FALSE; + m_pipeline_state.blend_state.blend_op = VK_BLEND_OP_ADD; + m_pipeline_state.blend_state.src_blend = VK_BLEND_FACTOR_ONE; + m_pipeline_state.blend_state.dst_blend = VK_BLEND_FACTOR_ZERO; + m_pipeline_state.blend_state.alpha_blend_op = VK_BLEND_OP_ADD; + m_pipeline_state.blend_state.src_alpha_blend = VK_BLEND_FACTOR_ONE; + m_pipeline_state.blend_state.dst_alpha_blend = VK_BLEND_FACTOR_ZERO; + m_pipeline_state.blend_state.logic_op_enable = VK_FALSE; + m_pipeline_state.blend_state.logic_op = VK_LOGIC_OP_CLEAR; + m_pipeline_state.blend_state.write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + // Enable depth clamping if supported by driver. + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + m_pipeline_state.rasterization_state.depth_clamp = VK_TRUE; + + // BBox is disabled by default. + m_pipeline_state.pipeline_layout = g_object_cache->GetStandardPipelineLayout(); + m_num_active_descriptor_sets = NUM_DESCRIPTOR_SETS - 1; + m_bbox_enabled = false; + + // Initialize all samplers to point by default + for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) + { + m_bindings.ps_samplers[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + m_bindings.ps_samplers[i].imageView = VK_NULL_HANDLE; + m_bindings.ps_samplers[i].sampler = g_object_cache->GetPointSampler(); + } + + // Create the streaming uniform buffer + m_uniform_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, INITIAL_UNIFORM_STREAM_BUFFER_SIZE, + MAXIMUM_UNIFORM_STREAM_BUFFER_SIZE); + if (!m_uniform_stream_buffer) + PanicAlert("Failed to create uniform stream buffer"); + + // The validation layer complains if max(offsets) + max(ubo_ranges) >= ubo_size. + // To work around this we reserve the maximum buffer size at all times, but only commit + // as many bytes as we use. + m_uniform_buffer_reserve_size = sizeof(PixelShaderConstants); + m_uniform_buffer_reserve_size = Util::AlignValue(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment()) + + sizeof(VertexShaderConstants); + m_uniform_buffer_reserve_size = Util::AlignValue(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment()) + + sizeof(GeometryShaderConstants); + + // Default dirty flags include all descriptors + InvalidateDescriptorSets(); + SetPendingRebind(); + + // Set default constants + UploadAllConstants(); +} + +StateTracker::~StateTracker() +{ +} + +void StateTracker::SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset) +{ + if (m_vertex_buffer == buffer && m_vertex_buffer_offset == offset) + return; + + m_vertex_buffer = buffer; + m_vertex_buffer_offset = offset; + m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER; +} + +void StateTracker::SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) +{ + if (m_index_buffer == buffer && m_index_buffer_offset == offset && m_index_type == type) + return; + + m_index_buffer = buffer; + m_index_buffer_offset = offset; + m_index_type = type; + m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER; +} + +void StateTracker::SetRenderPass(VkRenderPass load_render_pass, VkRenderPass clear_render_pass) +{ + // Should not be changed within a render pass. + _assert_(!InRenderPass()); + + // The clear and load render passes are compatible, so we don't need to change our pipeline. + if (m_pipeline_state.render_pass != load_render_pass) + { + m_pipeline_state.render_pass = load_render_pass; + m_dirty_flags |= DIRTY_FLAG_PIPELINE; + } + + m_load_render_pass = load_render_pass; + m_clear_render_pass = clear_render_pass; +} + +void StateTracker::SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& render_area) +{ + // Should not be changed within a render pass. + _assert_(!InRenderPass()); + m_framebuffer = framebuffer; + m_framebuffer_size = render_area; +} + +void StateTracker::SetVertexFormat(const VertexFormat* vertex_format) +{ + if (m_pipeline_state.vertex_format == vertex_format) + return; + + m_pipeline_state.vertex_format = vertex_format; + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + +void StateTracker::SetPrimitiveTopology(VkPrimitiveTopology primitive_topology) +{ + if (m_pipeline_state.primitive_topology == primitive_topology) + return; + + m_pipeline_state.primitive_topology = primitive_topology; + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + +void StateTracker::DisableBackFaceCulling() +{ + if (m_pipeline_state.rasterization_state.cull_mode == VK_CULL_MODE_NONE) + return; + + m_pipeline_state.rasterization_state.cull_mode = VK_CULL_MODE_NONE; + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + +void StateTracker::SetRasterizationState(const RasterizationState& state) +{ + if (m_pipeline_state.rasterization_state.bits == state.bits) + return; + + m_pipeline_state.rasterization_state.bits = state.bits; + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + +void StateTracker::SetDepthStencilState(const DepthStencilState& state) +{ + if (m_pipeline_state.depth_stencil_state.bits == state.bits) + return; + + m_pipeline_state.depth_stencil_state.bits = state.bits; + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + +void StateTracker::SetBlendState(const BlendState& state) +{ + if (m_pipeline_state.blend_state.bits == state.bits) + return; + + m_pipeline_state.blend_state.bits = state.bits; + m_dirty_flags |= DIRTY_FLAG_PIPELINE; +} + +bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type, DSTALPHA_MODE dstalpha_mode) +{ + VertexShaderUid vs_uid = GetVertexShaderUid(); + PixelShaderUid ps_uid = GetPixelShaderUid(dstalpha_mode); + + bool changed = false; + + if (vs_uid != m_vs_uid) + { + m_pipeline_state.vs = g_object_cache->GetVertexShaderForUid(vs_uid); + m_vs_uid = vs_uid; + changed = true; + } + + if (g_vulkan_context->SupportsGeometryShaders()) + { + GeometryShaderUid gs_uid = GetGeometryShaderUid(gx_primitive_type); + if (gs_uid != m_gs_uid) + { + if (gs_uid.GetUidData()->IsPassthrough()) + m_pipeline_state.gs = VK_NULL_HANDLE; + else + m_pipeline_state.gs = g_object_cache->GetGeometryShaderForUid(gs_uid); + + m_gs_uid = gs_uid; + changed = true; + } + } + + if (ps_uid != m_ps_uid) + { + m_pipeline_state.ps = g_object_cache->GetPixelShaderForUid(ps_uid, dstalpha_mode); + m_ps_uid = ps_uid; + changed = true; + } + + if (m_dstalpha_mode != dstalpha_mode) + { + // Switching to/from alpha pass requires a pipeline change, since the blend state + // is overridden in the destination alpha pass. + if (m_dstalpha_mode == DSTALPHA_ALPHA_PASS || dstalpha_mode == DSTALPHA_ALPHA_PASS) + changed = true; + + m_dstalpha_mode = dstalpha_mode; + } + + if (changed) + m_dirty_flags |= DIRTY_FLAG_PIPELINE; + + return changed; +} + +void StateTracker::UpdateVertexShaderConstants() +{ + if (!VertexShaderManager::dirty) + return; + + // Since the other stages uniform buffers' may be still be using the earlier data, + // we can't reuse the earlier part of the buffer without re-uploading everything. + if (!m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment(), false, + false, false)) + { + // Re-upload all constants to a new portion of the buffer. + UploadAllConstants(); + return; + } + + // Buffer allocation changed? + if (m_uniform_stream_buffer->GetBuffer() != + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].buffer) + { + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].buffer = + m_uniform_stream_buffer->GetBuffer(); + m_dirty_flags |= DIRTY_FLAG_VS_UBO; + } + + m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = + static_cast(m_uniform_stream_buffer->GetCurrentOffset()); + m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; + + memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &VertexShaderManager::constants, + sizeof(VertexShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(VertexShaderConstants)); + VertexShaderManager::dirty = false; +} + +void StateTracker::UpdateGeometryShaderConstants() +{ + // Skip updating geometry shader constants if it's not in use. + if (m_pipeline_state.gs == VK_NULL_HANDLE || !GeometryShaderManager::dirty) + return; + + // Since the other stages uniform buffers' may be still be using the earlier data, + // we can't reuse the earlier part of the buffer without re-uploading everything. + if (!m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment(), false, + false, false)) + { + // Re-upload all constants to a new portion of the buffer. + UploadAllConstants(); + return; + } + + // Buffer allocation changed? + if (m_uniform_stream_buffer->GetBuffer() != + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].buffer) + { + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].buffer = + m_uniform_stream_buffer->GetBuffer(); + m_dirty_flags |= DIRTY_FLAG_GS_UBO; + } + + m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_GS] = + static_cast(m_uniform_stream_buffer->GetCurrentOffset()); + m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; + + memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &GeometryShaderManager::constants, + sizeof(GeometryShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(GeometryShaderConstants)); + GeometryShaderManager::dirty = false; +} + +void StateTracker::UpdatePixelShaderConstants() +{ + if (!PixelShaderManager::dirty) + return; + + // Since the other stages uniform buffers' may be still be using the earlier data, + // we can't reuse the earlier part of the buffer without re-uploading everything. + if (!m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment(), false, + false, false)) + { + // Re-upload all constants to a new portion of the buffer. + UploadAllConstants(); + return; + } + + // Buffer allocation changed? + if (m_uniform_stream_buffer->GetBuffer() != + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].buffer) + { + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].buffer = + m_uniform_stream_buffer->GetBuffer(); + m_dirty_flags |= DIRTY_FLAG_PS_UBO; + } + + m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = + static_cast(m_uniform_stream_buffer->GetCurrentOffset()); + m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; + + memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &PixelShaderManager::constants, + sizeof(PixelShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(PixelShaderConstants)); + PixelShaderManager::dirty = false; +} + +void StateTracker::UploadAllConstants() +{ + // We are free to re-use parts of the buffer now since we're uploading all constants. + size_t pixel_constants_offset = 0; + size_t vertex_constants_offset = + Util::AlignValue(pixel_constants_offset + sizeof(PixelShaderConstants), + g_vulkan_context->GetUniformBufferAlignment()); + size_t geometry_constants_offset = + Util::AlignValue(vertex_constants_offset + sizeof(VertexShaderConstants), + g_vulkan_context->GetUniformBufferAlignment()); + size_t total_allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants); + + // Allocate everything at once. + if (!m_uniform_stream_buffer->ReserveMemory( + total_allocation_size, g_vulkan_context->GetUniformBufferAlignment(), true, true, false)) + { + // If this fails, wait until the GPU has caught up. + // The only places that call constant updates are safe to have state restored. + WARN_LOG(VIDEO, "Executing command list while waiting for space in uniform buffer"); + Util::ExecuteCurrentCommandsAndRestoreState(this, false); + if (!m_uniform_stream_buffer->ReserveMemory(total_allocation_size, + g_vulkan_context->GetUniformBufferAlignment(), true, + true, false)) + { + PanicAlert("Failed to allocate space for constants in streaming buffer"); + return; + } + } + + // Update bindings + for (size_t i = 0; i < NUM_UBO_DESCRIPTOR_SET_BINDINGS; i++) + { + m_bindings.uniform_buffer_bindings[i].buffer = m_uniform_stream_buffer->GetBuffer(); + m_bindings.uniform_buffer_bindings[i].offset = 0; + } + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].range = + sizeof(PixelShaderConstants); + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].range = + sizeof(VertexShaderConstants); + m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].range = + sizeof(GeometryShaderConstants); + + // Update dynamic offsets + m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = + static_cast(m_uniform_stream_buffer->GetCurrentOffset() + pixel_constants_offset); + + m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = + static_cast(m_uniform_stream_buffer->GetCurrentOffset() + vertex_constants_offset); + + m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_GS] = static_cast( + m_uniform_stream_buffer->GetCurrentOffset() + geometry_constants_offset); + + m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_VS_UBO | + DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO; + + // Copy the actual data in + memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + pixel_constants_offset, + &PixelShaderManager::constants, sizeof(PixelShaderConstants)); + memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + vertex_constants_offset, + &VertexShaderManager::constants, sizeof(VertexShaderConstants)); + memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + geometry_constants_offset, + &GeometryShaderManager::constants, sizeof(GeometryShaderConstants)); + + // Finally, flush buffer memory after copying + m_uniform_stream_buffer->CommitMemory(total_allocation_size); + + // Clear dirty flags + VertexShaderManager::dirty = false; + GeometryShaderManager::dirty = false; + PixelShaderManager::dirty = false; +} + +void StateTracker::SetTexture(size_t index, VkImageView view) +{ + if (m_bindings.ps_samplers[index].imageView == view) + return; + + m_bindings.ps_samplers[index].imageView = view; + m_bindings.ps_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + m_dirty_flags |= DIRTY_FLAG_PS_SAMPLERS; +} + +void StateTracker::SetSampler(size_t index, VkSampler sampler) +{ + if (m_bindings.ps_samplers[index].sampler == sampler) + return; + + m_bindings.ps_samplers[index].sampler = sampler; + m_dirty_flags |= DIRTY_FLAG_PS_SAMPLERS; +} + +void StateTracker::SetBBoxEnable(bool enable) +{ + if (m_bbox_enabled == enable) + return; + + // Change the number of active descriptor sets, as well as the pipeline layout + if (enable) + { + m_pipeline_state.pipeline_layout = g_object_cache->GetBBoxPipelineLayout(); + m_num_active_descriptor_sets = NUM_DESCRIPTOR_SETS; + + // The bbox buffer never changes, so we defer descriptor updates until it is enabled. + if (m_descriptor_sets[DESCRIPTOR_SET_SHADER_STORAGE_BUFFERS] == VK_NULL_HANDLE) + m_dirty_flags |= DIRTY_FLAG_PS_SSBO; + } + else + { + m_pipeline_state.pipeline_layout = g_object_cache->GetStandardPipelineLayout(); + m_num_active_descriptor_sets = NUM_DESCRIPTOR_SETS - 1; + } + + m_dirty_flags |= DIRTY_FLAG_PIPELINE | DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + m_bbox_enabled = enable; +} + +void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range) +{ + if (m_bindings.ps_ssbo.buffer == buffer && m_bindings.ps_ssbo.offset == offset && + m_bindings.ps_ssbo.range == range) + { + return; + } + + m_bindings.ps_ssbo.buffer = buffer; + m_bindings.ps_ssbo.offset = offset; + m_bindings.ps_ssbo.range = range; + + // Defer descriptor update until bbox is actually enabled. + if (m_bbox_enabled) + m_dirty_flags |= DIRTY_FLAG_PS_SSBO; +} + +void StateTracker::UnbindTexture(VkImageView view) +{ + for (VkDescriptorImageInfo& it : m_bindings.ps_samplers) + { + if (it.imageView == view) + it.imageView = VK_NULL_HANDLE; + } +} + +void StateTracker::InvalidateDescriptorSets() +{ + m_descriptor_sets.fill(VK_NULL_HANDLE); + m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS; + + // Defer SSBO descriptor update until bbox is actually enabled. + if (!m_bbox_enabled) + m_dirty_flags &= ~DIRTY_FLAG_PS_SSBO; +} + +void StateTracker::SetPendingRebind() +{ + m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_DESCRIPTOR_SET_BINDING | + DIRTY_FLAG_PIPELINE_BINDING | DIRTY_FLAG_VERTEX_BUFFER | + DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | + DIRTY_FLAG_PIPELINE; +} + +void StateTracker::BeginRenderPass() +{ + if (InRenderPass()) + return; + + m_current_render_pass = m_load_render_pass; + m_framebuffer_render_area = m_framebuffer_size; + + VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + m_current_render_pass, + m_framebuffer, + m_framebuffer_render_area, + 0, + nullptr}; + + vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, + VK_SUBPASS_CONTENTS_INLINE); +} + +void StateTracker::EndRenderPass() +{ + if (!InRenderPass()) + return; + + vkCmdEndRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer()); + m_current_render_pass = nullptr; +} + +void StateTracker::BeginClearRenderPass(const VkRect2D& area, const VkClearValue clear_values[2]) +{ + _assert_(!InRenderPass()); + + m_current_render_pass = m_clear_render_pass; + m_framebuffer_render_area = area; + + VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + m_current_render_pass, + m_framebuffer, + m_framebuffer_render_area, + 2, + clear_values}; + + vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, + VK_SUBPASS_CONTENTS_INLINE); +} + +void StateTracker::SetViewport(const VkViewport& viewport) +{ + if (memcmp(&m_viewport, &viewport, sizeof(viewport)) == 0) + return; + + m_viewport = viewport; + m_dirty_flags |= DIRTY_FLAG_VIEWPORT; +} + +void StateTracker::SetScissor(const VkRect2D& scissor) +{ + if (memcmp(&m_scissor, &scissor, sizeof(scissor)) == 0) + return; + + m_scissor = scissor; + m_dirty_flags |= DIRTY_FLAG_SCISSOR; +} + +bool StateTracker::Bind(bool rebind_all /*= false*/) +{ + // Check the render area if we were in a clear pass. + if (m_current_render_pass == m_clear_render_pass && !IsViewportWithinRenderArea()) + EndRenderPass(); + + // Get new pipeline object if any parts have changed + if (m_dirty_flags & DIRTY_FLAG_PIPELINE && !UpdatePipeline()) + { + ERROR_LOG(VIDEO, "Failed to get pipeline object, skipping draw"); + return false; + } + + // Get a new descriptor set if any parts have changed + if (m_dirty_flags & DIRTY_FLAG_ALL_DESCRIPTOR_SETS && !UpdateDescriptorSet()) + { + // We can fail to allocate descriptors if we exhaust the pool for this command buffer. + WARN_LOG(VIDEO, "Failed to get a descriptor set, executing buffer"); + + // Try again after executing the current buffer. + g_command_buffer_mgr->ExecuteCommandBuffer(false, false); + InvalidateDescriptorSets(); + SetPendingRebind(); + if (!UpdateDescriptorSet()) + { + // Something strange going on. + ERROR_LOG(VIDEO, "Failed to get descriptor set, skipping draw"); + return false; + } + } + + // Start render pass if not already started + if (!InRenderPass()) + BeginRenderPass(); + + // Re-bind parts of the pipeline + VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + if (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER || rebind_all) + vkCmdBindVertexBuffers(command_buffer, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset); + + if (m_dirty_flags & DIRTY_FLAG_INDEX_BUFFER || rebind_all) + vkCmdBindIndexBuffer(command_buffer, m_index_buffer, m_index_buffer_offset, m_index_type); + + if (m_dirty_flags & DIRTY_FLAG_PIPELINE_BINDING || rebind_all) + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_object); + + if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SET_BINDING || rebind_all) + { + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_pipeline_state.pipeline_layout, 0, m_num_active_descriptor_sets, + m_descriptor_sets.data(), NUM_UBO_DESCRIPTOR_SET_BINDINGS, + m_bindings.uniform_buffer_offsets.data()); + } + else if (m_dirty_flags & DIRTY_FLAG_DYNAMIC_OFFSETS) + { + vkCmdBindDescriptorSets( + command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_state.pipeline_layout, + DESCRIPTOR_SET_UNIFORM_BUFFERS, 1, &m_descriptor_sets[DESCRIPTOR_SET_UNIFORM_BUFFERS], + NUM_UBO_DESCRIPTOR_SET_BINDINGS, m_bindings.uniform_buffer_offsets.data()); + } + + if (m_dirty_flags & DIRTY_FLAG_VIEWPORT || rebind_all) + vkCmdSetViewport(command_buffer, 0, 1, &m_viewport); + + if (m_dirty_flags & DIRTY_FLAG_SCISSOR || rebind_all) + vkCmdSetScissor(command_buffer, 0, 1, &m_scissor); + + m_dirty_flags = 0; + return true; +} + +void StateTracker::OnDraw() +{ + m_draw_counter++; + + // If we didn't have any CPU access last frame, do nothing. + if (m_scheduled_command_buffer_kicks.empty() || !m_allow_background_execution) + return; + + // Check if this draw is scheduled to kick a command buffer. + // The draw counters will always be sorted so a binary search is possible here. + if (std::binary_search(m_scheduled_command_buffer_kicks.begin(), + m_scheduled_command_buffer_kicks.end(), m_draw_counter)) + { + // Kick a command buffer on the background thread. + EndRenderPass(); + g_command_buffer_mgr->ExecuteCommandBuffer(true, false); + InvalidateDescriptorSets(); + SetPendingRebind(); + } +} + +void StateTracker::OnReadback() +{ + // Check this isn't another access without any draws inbetween. + if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter) + return; + + // Store the current draw counter for scheduling in OnEndFrame. + m_cpu_accesses_this_frame.emplace_back(m_draw_counter); +} + +void StateTracker::OnEndFrame() +{ + m_draw_counter = 0; + m_scheduled_command_buffer_kicks.clear(); + + // If we have no CPU access at all, leave everything in the one command buffer for maximum + // parallelism between CPU/GPU, at the cost of slightly higher latency. + if (m_cpu_accesses_this_frame.empty()) + return; + + // In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway + // between the draw counters that invoked the readback, or every 250 draws, whichever is smaller. + if (g_ActiveConfig.iCommandBufferExecuteInterval > 0) + { + u32 last_draw_counter = 0; + u32 interval = static_cast(g_ActiveConfig.iCommandBufferExecuteInterval); + for (u32 draw_counter : m_cpu_accesses_this_frame) + { + u32 draw_count = draw_counter - last_draw_counter; + if (draw_count <= interval) + { + u32 mid_point = draw_count / 2; + m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + mid_point); + } + else + { + u32 counter = interval; + while (counter < draw_count) + { + m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + counter); + counter += interval; + } + } + } + } + +#if 0 + { + std::stringstream ss; + std::for_each(m_cpu_accesses_this_frame.begin(), m_cpu_accesses_this_frame.end(), [&ss](u32 idx) { ss << idx << ","; }); + WARN_LOG(VIDEO, "CPU EFB accesses in last frame: %s", ss.str().c_str()); + } + { + std::stringstream ss; + std::for_each(m_scheduled_command_buffer_kicks.begin(), m_scheduled_command_buffer_kicks.end(), [&ss](u32 idx) { ss << idx << ","; }); + WARN_LOG(VIDEO, "Scheduled command buffer kicks: %s", ss.str().c_str()); + } +#endif + + m_cpu_accesses_this_frame.clear(); +} + +void StateTracker::SetBackgroundCommandBufferExecution(bool enabled) +{ + m_allow_background_execution = enabled; +} + +bool StateTracker::IsWithinRenderArea(s32 x, s32 y, u32 width, u32 height) const +{ + // Check that the viewport does not lie outside the render area. + // If it does, we need to switch to a normal load/store render pass. + s32 left = m_framebuffer_render_area.offset.x; + s32 top = m_framebuffer_render_area.offset.y; + s32 right = left + static_cast(m_framebuffer_render_area.extent.width); + s32 bottom = top + static_cast(m_framebuffer_render_area.extent.height); + s32 test_left = x; + s32 test_top = y; + s32 test_right = test_left + static_cast(width); + s32 test_bottom = test_top + static_cast(height); + return test_left >= left && test_right <= right && test_top >= top && test_bottom <= bottom; +} + +bool StateTracker::IsViewportWithinRenderArea() const +{ + return IsWithinRenderArea(static_cast(m_viewport.x), static_cast(m_viewport.y), + static_cast(m_viewport.width), + static_cast(m_viewport.height)); +} + +void StateTracker::EndClearRenderPass() +{ + if (m_current_render_pass != m_clear_render_pass) + return; + + // End clear render pass. Bind() will call BeginRenderPass() which + // will switch to the load/store render pass. + EndRenderPass(); +} + +bool StateTracker::UpdatePipeline() +{ + // We need at least a vertex and fragment shader + if (m_pipeline_state.vs == VK_NULL_HANDLE || m_pipeline_state.ps == VK_NULL_HANDLE) + return false; + + // Grab a new pipeline object, this can fail + if (m_dstalpha_mode != DSTALPHA_ALPHA_PASS) + { + m_pipeline_object = g_object_cache->GetPipeline(m_pipeline_state); + if (m_pipeline_object == VK_NULL_HANDLE) + return false; + } + else + { + // We need to make a few modifications to the pipeline object, but retain + // the existing state, since we don't want to break the next draw. + PipelineInfo temp_info = m_pipeline_state; + + // Skip depth writes for this pass. The results will be the same, so no + // point in overwriting depth values with the same value. + temp_info.depth_stencil_state.write_enable = VK_FALSE; + + // Only allow alpha writes, and disable blending. + temp_info.blend_state.blend_enable = VK_FALSE; + temp_info.blend_state.logic_op_enable = VK_FALSE; + temp_info.blend_state.write_mask = VK_COLOR_COMPONENT_A_BIT; + + m_pipeline_object = g_object_cache->GetPipeline(temp_info); + if (m_pipeline_object == VK_NULL_HANDLE) + return false; + } + + m_dirty_flags |= DIRTY_FLAG_PIPELINE_BINDING; + return true; +} + +bool StateTracker::UpdateDescriptorSet() +{ + const size_t MAX_DESCRIPTOR_WRITES = NUM_UBO_DESCRIPTOR_SET_BINDINGS + // UBO + NUM_PIXEL_SHADER_SAMPLERS + // Samplers + 1; // SSBO + std::array writes; + u32 num_writes = 0; + + if (m_dirty_flags & (DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO) || + m_descriptor_sets[DESCRIPTOR_SET_UNIFORM_BUFFERS] == VK_NULL_HANDLE) + { + VkDescriptorSetLayout layout = + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_UNIFORM_BUFFERS); + VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); + if (set == VK_NULL_HANDLE) + return false; + + for (size_t i = 0; i < NUM_UBO_DESCRIPTOR_SET_BINDINGS; i++) + { + writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + static_cast(i), + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + nullptr, + &m_bindings.uniform_buffer_bindings[i], + nullptr}; + } + + m_descriptor_sets[DESCRIPTOR_SET_UNIFORM_BUFFERS] = set; + m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + } + + if (m_dirty_flags & DIRTY_FLAG_PS_SAMPLERS || + m_descriptor_sets[DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS] == VK_NULL_HANDLE) + { + VkDescriptorSetLayout layout = + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS); + VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); + if (set == VK_NULL_HANDLE) + return false; + + for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) + { + const VkDescriptorImageInfo& info = m_bindings.ps_samplers[i]; + if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE) + { + writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + static_cast(i), + 0, + 1, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + &info, + nullptr, + nullptr}; + } + } + + m_descriptor_sets[DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS] = set; + m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + } + + if (m_bbox_enabled && + (m_dirty_flags & DIRTY_FLAG_PS_SSBO || + m_descriptor_sets[DESCRIPTOR_SET_SHADER_STORAGE_BUFFERS] == VK_NULL_HANDLE)) + { + VkDescriptorSetLayout layout = + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_SHADER_STORAGE_BUFFERS); + VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); + if (set == VK_NULL_HANDLE) + return false; + + writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + 0, + 0, + 1, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + nullptr, + &m_bindings.ps_ssbo, + nullptr}; + + m_descriptor_sets[DESCRIPTOR_SET_SHADER_STORAGE_BUFFERS] = set; + m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + } + + if (num_writes > 0) + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_writes, writes.data(), 0, nullptr); + + return true; +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.h b/Source/Core/VideoBackends/Vulkan/StateTracker.h new file mode 100644 index 0000000000..2b807306e4 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.h @@ -0,0 +1,188 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoCommon/GeometryShaderGen.h" +#include "VideoCommon/PixelShaderGen.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/VertexShaderGen.h" + +namespace Vulkan +{ +class StreamBuffer; +class VertexFormat; + +class StateTracker +{ +public: + StateTracker(); + ~StateTracker(); + + const RasterizationState& GetRasterizationState() const + { + return m_pipeline_state.rasterization_state; + } + const DepthStencilState& GetDepthStencilState() const + { + return m_pipeline_state.depth_stencil_state; + } + const BlendState& GetBlendState() const { return m_pipeline_state.blend_state; } + void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset); + void SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type); + + void SetRenderPass(VkRenderPass load_render_pass, VkRenderPass clear_render_pass); + + void SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& render_area); + + void SetVertexFormat(const VertexFormat* vertex_format); + + void SetPrimitiveTopology(VkPrimitiveTopology primitive_topology); + + void DisableBackFaceCulling(); + + void SetRasterizationState(const RasterizationState& state); + void SetDepthStencilState(const DepthStencilState& state); + void SetBlendState(const BlendState& state); + + bool CheckForShaderChanges(u32 gx_primitive_type, DSTALPHA_MODE dstalpha_mode); + + void UpdateVertexShaderConstants(); + void UpdateGeometryShaderConstants(); + void UpdatePixelShaderConstants(); + + void SetTexture(size_t index, VkImageView view); + void SetSampler(size_t index, VkSampler sampler); + + void SetBBoxEnable(bool enable); + void SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range); + + void UnbindTexture(VkImageView view); + + // When executing a command buffer, we want to recreate the descriptor set, as it will + // now be in a different pool for the new command buffer. + void InvalidateDescriptorSets(); + + // Set dirty flags on everything to force re-bind at next draw time. + void SetPendingRebind(); + + // Ends a render pass if we're currently in one. + // When Bind() is next called, the pass will be restarted. + // Calling this function is allowed even if a pass has not begun. + bool InRenderPass() const { return m_current_render_pass != VK_NULL_HANDLE; } + void BeginRenderPass(); + void EndRenderPass(); + + // Ends the current render pass if it was a clear render pass. + void BeginClearRenderPass(const VkRect2D& area, const VkClearValue clear_values[2]); + void EndClearRenderPass(); + + void SetViewport(const VkViewport& viewport); + void SetScissor(const VkRect2D& scissor); + + bool Bind(bool rebind_all = false); + + // CPU Access Tracking + // Call after a draw call is made. + void OnDraw(); + + // Call after CPU access is requested. + // This can be via EFBCache or EFB2RAM. + void OnReadback(); + + // Call at the end of a frame. + void OnEndFrame(); + + // Prevent/allow background command buffer execution. + // Use when queries are active. + void SetBackgroundCommandBufferExecution(bool enabled); + + bool IsWithinRenderArea(s32 x, s32 y, u32 width, u32 height) const; + +private: + // Check that the specified viewport is within the render area. + // If not, ends the render pass if it is a clear render pass. + bool IsViewportWithinRenderArea() const; + bool UpdatePipeline(); + bool UpdateDescriptorSet(); + void UploadAllConstants(); + + enum DITRY_FLAG : u32 + { + DIRTY_FLAG_VS_UBO = (1 << 0), + DIRTY_FLAG_GS_UBO = (1 << 1), + DIRTY_FLAG_PS_UBO = (1 << 2), + DIRTY_FLAG_PS_SAMPLERS = (1 << 3), + DIRTY_FLAG_PS_SSBO = (1 << 4), + DIRTY_FLAG_DYNAMIC_OFFSETS = (1 << 5), + DIRTY_FLAG_VERTEX_BUFFER = (1 << 6), + DIRTY_FLAG_INDEX_BUFFER = (1 << 7), + DIRTY_FLAG_VIEWPORT = (1 << 8), + DIRTY_FLAG_SCISSOR = (1 << 9), + DIRTY_FLAG_PIPELINE = (1 << 10), + DIRTY_FLAG_DESCRIPTOR_SET_BINDING = (1 << 11), + DIRTY_FLAG_PIPELINE_BINDING = (1 << 12), + + DIRTY_FLAG_ALL_DESCRIPTOR_SETS = + DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO + }; + u32 m_dirty_flags = 0; + + // input assembly + VkBuffer m_vertex_buffer = VK_NULL_HANDLE; + VkDeviceSize m_vertex_buffer_offset = 0; + VkBuffer m_index_buffer = VK_NULL_HANDLE; + VkDeviceSize m_index_buffer_offset = 0; + VkIndexType m_index_type = VK_INDEX_TYPE_UINT16; + + // shader state + VertexShaderUid m_vs_uid = {}; + GeometryShaderUid m_gs_uid = {}; + PixelShaderUid m_ps_uid = {}; + + // pipeline state + PipelineInfo m_pipeline_state = {}; + DSTALPHA_MODE m_dstalpha_mode = DSTALPHA_NONE; + VkPipeline m_pipeline_object = VK_NULL_HANDLE; + + // shader bindings + std::array m_descriptor_sets = {}; + struct + { + std::array uniform_buffer_bindings = + {}; + std::array uniform_buffer_offsets = {}; + + std::array ps_samplers = {}; + + VkDescriptorBufferInfo ps_ssbo = {}; + } m_bindings; + u32 m_num_active_descriptor_sets = 0; + size_t m_uniform_buffer_reserve_size = 0; + + // rasterization + VkViewport m_viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + VkRect2D m_scissor = {{0, 0}, {1, 1}}; + + // uniform buffers + std::unique_ptr m_uniform_stream_buffer; + + VkFramebuffer m_framebuffer = VK_NULL_HANDLE; + VkRenderPass m_load_render_pass = VK_NULL_HANDLE; + VkRenderPass m_clear_render_pass = VK_NULL_HANDLE; + VkRenderPass m_current_render_pass = VK_NULL_HANDLE; + VkRect2D m_framebuffer_size = {}; + VkRect2D m_framebuffer_render_area = {}; + bool m_bbox_enabled = false; + + // CPU access tracking + u32 m_draw_counter = 0; + std::vector m_cpu_accesses_this_frame; + std::vector m_scheduled_command_buffer_kicks; + bool m_allow_background_execution = true; +}; +} diff --git a/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp new file mode 100644 index 0000000000..cc2999d262 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp @@ -0,0 +1,345 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/Assert.h" +#include "Common/MsgHandler.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, size_t max_size) + : m_usage(usage), m_maximum_size(max_size) +{ + // Add a callback that fires on fence point creation and signal + g_command_buffer_mgr->AddFencePointCallback( + this, std::bind(&StreamBuffer::OnCommandBufferQueued, this, std::placeholders::_1, + std::placeholders::_2), + std::bind(&StreamBuffer::OnCommandBufferExecuted, this, std::placeholders::_1)); +} + +StreamBuffer::~StreamBuffer() +{ + g_command_buffer_mgr->RemoveFencePointCallback(this); + + if (m_host_pointer) + vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory); + + if (m_buffer != VK_NULL_HANDLE) + g_command_buffer_mgr->DeferResourceDestruction(m_buffer); + if (m_memory != VK_NULL_HANDLE) + g_command_buffer_mgr->DeferResourceDestruction(m_memory); +} + +std::unique_ptr StreamBuffer::Create(VkBufferUsageFlags usage, size_t initial_size, + size_t max_size) +{ + std::unique_ptr buffer = std::make_unique(usage, max_size); + if (!buffer->ResizeBuffer(initial_size)) + return nullptr; + + return buffer; +} + +bool StreamBuffer::ResizeBuffer(size_t size) +{ + // Create the buffer descriptor + VkBufferCreateInfo buffer_create_info = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkBufferCreateFlags flags + static_cast(size), // VkDeviceSize size + m_usage, // VkBufferUsageFlags usage + VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode + 0, // uint32_t queueFamilyIndexCount + nullptr // const uint32_t* pQueueFamilyIndices + }; + + VkBuffer buffer = VK_NULL_HANDLE; + VkResult res = + vkCreateBuffer(g_vulkan_context->GetDevice(), &buffer_create_info, nullptr, &buffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: "); + return false; + } + + // Get memory requirements (types etc) for this buffer + VkMemoryRequirements memory_requirements; + vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements); + + // Aim for a coherent mapping if possible. + u32 memory_type_index = g_vulkan_context->GetUploadMemoryType(memory_requirements.memoryTypeBits, + &m_coherent_mapping); + + // Allocate memory for backing this buffer + VkMemoryAllocateInfo memory_allocate_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + memory_requirements.size, // VkDeviceSize allocationSize + memory_type_index // uint32_t memoryTypeIndex + }; + VkDeviceMemory memory = VK_NULL_HANDLE; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + return false; + } + + // Bind memory to buffer + res = vkBindBufferMemory(g_vulkan_context->GetDevice(), buffer, memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr); + return false; + } + + // Map this buffer into user-space + void* mapped_ptr = nullptr; + res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, size, 0, &mapped_ptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkMapMemory failed: "); + vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr); + return false; + } + + // Unmap current host pointer (if there was a previous buffer) + if (m_host_pointer) + vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory); + + // Destroy the backings for the buffer after the command buffer executes + if (m_buffer != VK_NULL_HANDLE) + g_command_buffer_mgr->DeferResourceDestruction(m_buffer); + if (m_memory != VK_NULL_HANDLE) + g_command_buffer_mgr->DeferResourceDestruction(m_memory); + + // Replace with the new buffer + m_buffer = buffer; + m_memory = memory; + m_host_pointer = reinterpret_cast(mapped_ptr); + m_current_size = size; + m_current_offset = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); + return true; +} + +bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_reuse /* = true */, + bool allow_growth /* = true */, + bool reallocate_if_full /* = false */) +{ + size_t required_bytes = num_bytes + alignment; + + // Check for sane allocations + if (required_bytes > m_maximum_size) + { + PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer", + static_cast(num_bytes), static_cast(m_maximum_size)); + + return false; + } + + // Is the GPU behind or up to date with our current offset? + if (m_current_offset >= m_current_gpu_position) + { + size_t remaining_bytes = m_current_size - m_current_offset; + if (required_bytes <= remaining_bytes) + { + // Place at the current position, after the GPU position. + m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_last_allocation_size = num_bytes; + return true; + } + + // Check for space at the start of the buffer + // We use < here because we don't want to have the case of m_current_offset == + // m_current_gpu_position. That would mean the code above would assume the + // GPU has caught up to us, which it hasn't. + if (allow_reuse && required_bytes < m_current_gpu_position) + { + // Reset offset to zero, since we're allocating behind the gpu now + m_current_offset = 0; + m_last_allocation_size = num_bytes; + return true; + } + } + + // Is the GPU ahead of our current offset? + if (m_current_offset < m_current_gpu_position) + { + // We have from m_current_offset..m_current_gpu_position space to use. + size_t remaining_bytes = m_current_gpu_position - m_current_offset; + if (required_bytes < remaining_bytes) + { + // Place at the current position, since this is still behind the GPU. + m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_last_allocation_size = num_bytes; + return true; + } + } + + // Try to grow the buffer up to the maximum size before waiting. + // Double each time until the maximum size is reached. + if (allow_growth && m_current_size < m_maximum_size) + { + size_t new_size = std::min(std::max(num_bytes, m_current_size * 2), m_maximum_size); + if (ResizeBuffer(new_size)) + { + // Allocating from the start of the buffer. + m_last_allocation_size = new_size; + return true; + } + } + + // Can we find a fence to wait on that will give us enough memory? + if (allow_reuse && WaitForClearSpace(required_bytes)) + { + _assert_(m_current_offset == m_current_gpu_position || + (m_current_offset + required_bytes) < m_current_gpu_position); + m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_last_allocation_size = num_bytes; + return true; + } + + // If we are not allowed to execute in our current state (e.g. in the middle of a render pass), + // as a last resort, reallocate the buffer. This will incur a performance hit and is not + // encouraged. + if (reallocate_if_full && ResizeBuffer(m_current_size)) + { + m_last_allocation_size = num_bytes; + return true; + } + + // We tried everything we could, and still couldn't get anything. If we're not at a point + // where the state is known and can be resumed, this is probably a fatal error. + return false; +} + +void StreamBuffer::CommitMemory(size_t final_num_bytes) +{ + _assert_((m_current_offset + final_num_bytes) <= m_current_size); + _assert_(final_num_bytes <= m_last_allocation_size); + + // For non-coherent mappings, flush the memory range + if (!m_coherent_mapping) + { + VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory, + m_current_offset, final_num_bytes}; + vkFlushMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range); + } + + m_current_offset += final_num_bytes; +} + +void StreamBuffer::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence) +{ + // Don't create a tracking entry if the GPU is caught up with the buffer. + if (m_current_offset == m_current_gpu_position) + return; + + // Has the offset changed since the last fence? + if (!m_tracked_fences.empty() && m_tracked_fences.back().second == m_current_offset) + { + // No need to track the new fence, the old one is sufficient. + return; + } + + m_tracked_fences.emplace_back(fence, m_current_offset); +} + +void StreamBuffer::OnCommandBufferExecuted(VkFence fence) +{ + // Locate the entry for this fence (if any, we may have been forced to wait already) + auto iter = std::find_if(m_tracked_fences.begin(), m_tracked_fences.end(), + [fence](const auto& it) { return it.first == fence; }); + + if (iter != m_tracked_fences.end()) + { + // Update the GPU position, and remove any fences before this fence (since + // it is implied that they have been signaled as well, though the callback + // should have removed them already). + m_current_gpu_position = iter->second; + m_tracked_fences.erase(m_tracked_fences.begin(), ++iter); + } +} + +bool StreamBuffer::WaitForClearSpace(size_t num_bytes) +{ + size_t new_offset = 0; + size_t new_gpu_position = 0; + auto iter = m_tracked_fences.begin(); + for (; iter != m_tracked_fences.end(); iter++) + { + // Would this fence bring us in line with the GPU? + size_t gpu_position = iter->second; + if (gpu_position == m_current_offset) + { + // Start at the start of the buffer again. + new_offset = 0; + new_gpu_position = 0; + break; + } + + // We can wrap around to the start, behind the GPU, if there is enough space. + // We use > here because otherwise we'd end up lining up with the GPU, and then the + // allocator would assume that the GPU has consumed what we just wrote. + if (m_current_offset >= m_current_gpu_position) + { + // Wrap around to the start (behind the GPU) if there is sufficient space. + if (gpu_position > num_bytes) + { + new_offset = 0; + new_gpu_position = gpu_position; + break; + } + } + else + { + // We're currently allocating behind the GPU. Therefore, if this fence is behind us, + // and it's the last fence in the list (no data has been written after it), we can + // move back to allocating in front of the GPU. + if (gpu_position < m_current_offset) + { + if (std::none_of(iter, m_tracked_fences.end(), + [gpu_position](const auto& it) { return it.second > gpu_position; })) + { + // Wait for this fence to complete, then allocate directly after it. + new_offset = gpu_position; + new_gpu_position = gpu_position; + break; + } + } + } + } + + // Did any fences satisfy this condition? + if (iter == m_tracked_fences.end()) + return false; + + // Wait until this fence is signaled. + VkResult res = + vkWaitForFences(g_vulkan_context->GetDevice(), 1, &iter->first, VK_TRUE, UINT64_MAX); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); + + // Update GPU position, and remove all fences up to (and including) this fence. + m_current_offset = new_offset; + m_current_gpu_position = new_gpu_position; + m_tracked_fences.erase(m_tracked_fences.begin(), ++iter); + return true; +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StreamBuffer.h b/Source/Core/VideoBackends/Vulkan/StreamBuffer.h new file mode 100644 index 0000000000..b819d80739 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/StreamBuffer.h @@ -0,0 +1,59 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "VideoBackends/Vulkan/Constants.h" + +namespace Vulkan +{ +class StreamBuffer +{ +public: + StreamBuffer(VkBufferUsageFlags usage, size_t max_size); + ~StreamBuffer(); + + VkBuffer GetBuffer() const { return m_buffer; } + VkDeviceMemory GetDeviceMemory() const { return m_memory; } + u8* GetHostPointer() const { return m_host_pointer; } + u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } + size_t GetCurrentSize() const { return m_current_size; } + size_t GetCurrentOffset() const { return m_current_offset; } + bool ReserveMemory(size_t num_bytes, size_t alignment, bool allow_reuse = true, + bool allow_growth = true, bool reallocate_if_full = false); + void CommitMemory(size_t final_num_bytes); + + static std::unique_ptr Create(VkBufferUsageFlags usage, size_t initial_size, + size_t max_size); + +private: + bool ResizeBuffer(size_t size); + void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence); + void OnCommandBufferExecuted(VkFence fence); + + // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. + bool WaitForClearSpace(size_t num_bytes); + + VkBufferUsageFlags m_usage; + size_t m_current_size = 0; + size_t m_maximum_size; + size_t m_current_offset = 0; + size_t m_current_gpu_position = 0; + size_t m_last_allocation_size = 0; + + VkBuffer m_buffer = VK_NULL_HANDLE; + VkDeviceMemory m_memory = VK_NULL_HANDLE; + u8* m_host_pointer = nullptr; + + // List of fences and the corresponding positions in the buffer + std::deque> m_tracked_fences; + + bool m_coherent_mapping = false; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/SwapChain.cpp b/Source/Core/VideoBackends/Vulkan/SwapChain.cpp new file mode 100644 index 0000000000..aab4f6c330 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/SwapChain.cpp @@ -0,0 +1,496 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/Assert.h" +#include "Common/CommonFuncs.h" +#include "Common/Logging/Log.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/SwapChain.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +#if defined(VK_USE_PLATFORM_XLIB_KHR) +#include +#elif defined(VK_USE_PLATFORM_XCB_KHR) +#include +#include +#endif + +namespace Vulkan +{ +SwapChain::SwapChain(void* native_handle, VkSurfaceKHR surface) + : m_native_handle(native_handle), m_surface(surface) +{ +} + +SwapChain::~SwapChain() +{ + DestroySwapChainImages(); + DestroySwapChain(); + DestroyRenderPass(); + DestroySurface(); +} + +VkSurfaceKHR SwapChain::CreateVulkanSurface(VkInstance instance, void* hwnd) +{ +#if defined(VK_USE_PLATFORM_WIN32_KHR) + VkWin32SurfaceCreateInfoKHR surface_create_info = { + VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkWin32SurfaceCreateFlagsKHR flags + nullptr, // HINSTANCE hinstance + reinterpret_cast(hwnd) // HWND hwnd + }; + + VkSurfaceKHR surface; + VkResult res = vkCreateWin32SurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateWin32SurfaceKHR failed: "); + return VK_NULL_HANDLE; + } + + return surface; + +#elif defined(VK_USE_PLATFORM_XLIB_KHR) + // Assuming the display handles are compatible, or shared. This matches what we do in the + // GL backend, but it's not ideal. + Display* display = XOpenDisplay(nullptr); + + VkXlibSurfaceCreateInfoKHR surface_create_info = { + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkXlibSurfaceCreateFlagsKHR flags + display, // Display* dpy + reinterpret_cast(hwnd) // Window window + }; + + VkSurfaceKHR surface; + VkResult res = vkCreateXlibSurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateXlibSurfaceKHR failed: "); + return VK_NULL_HANDLE; + } + + return surface; + +#elif defined(VK_USE_PLATFORM_XCB_KHR) + // If we ever switch to using xcb, we should pass the display handle as well. + Display* display = XOpenDisplay(nullptr); + xcb_connection_t* connection = XGetXCBConnection(display); + + VkXcbSurfaceCreateInfoKHR surface_create_info = { + VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkXcbSurfaceCreateFlagsKHR flags + connection, // xcb_connection_t* connection + static_cast(reinterpret_cast(hwnd)) // xcb_window_t window + }; + + VkSurfaceKHR surface; + VkResult res = vkCreateXcbSurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateXcbSurfaceKHR failed: "); + return VK_NULL_HANDLE; + } + + return surface; + +#elif defined(VK_USE_PLATFORM_ANDROID_KHR) + VkAndroidSurfaceCreateInfoKHR surface_create_info = { + VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAndroidSurfaceCreateFlagsKHR flags + reinterpret_cast(hwnd) // ANativeWindow* window + }; + + VkSurfaceKHR surface; + VkResult res = vkCreateAndroidSurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateAndroidSurfaceKHR failed: "); + return VK_NULL_HANDLE; + } + + return surface; + +#else + return VK_NULL_HANDLE; +#endif +} + +std::unique_ptr SwapChain::Create(void* native_handle, VkSurfaceKHR surface) +{ + std::unique_ptr swap_chain = std::make_unique(native_handle, surface); + + if (!swap_chain->CreateSwapChain() || !swap_chain->CreateRenderPass() || + !swap_chain->SetupSwapChainImages()) + { + return nullptr; + } + + return swap_chain; +} + +bool SwapChain::SelectSurfaceFormat() +{ + u32 format_count; + VkResult res = vkGetPhysicalDeviceSurfaceFormatsKHR(g_vulkan_context->GetPhysicalDevice(), + m_surface, &format_count, nullptr); + if (res != VK_SUCCESS || format_count == 0) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceFormatsKHR failed: "); + return false; + } + + std::vector surface_formats(format_count); + res = vkGetPhysicalDeviceSurfaceFormatsKHR(g_vulkan_context->GetPhysicalDevice(), m_surface, + &format_count, surface_formats.data()); + _assert_(res == VK_SUCCESS); + + // If there is a single undefined surface format, the device doesn't care, so we'll just use RGBA + if (surface_formats[0].format == VK_FORMAT_UNDEFINED) + { + m_surface_format.format = VK_FORMAT_R8G8B8A8_UNORM; + m_surface_format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; + return true; + } + + // Use the first surface format, just use what it prefers. + // Some drivers seem to return a SRGB format here (Intel Mesa). + // This results in gamma correction when presenting to the screen, which we don't want. + // Use a linear format instead, if this is the case. + m_surface_format.format = Util::GetLinearFormat(surface_formats[0].format); + m_surface_format.colorSpace = surface_formats[0].colorSpace; + return true; +} + +bool SwapChain::SelectPresentMode() +{ + VkResult res; + u32 mode_count; + res = vkGetPhysicalDeviceSurfacePresentModesKHR(g_vulkan_context->GetPhysicalDevice(), m_surface, + &mode_count, nullptr); + if (res != VK_SUCCESS || mode_count == 0) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceFormatsKHR failed: "); + return false; + } + + std::vector present_modes(mode_count); + res = vkGetPhysicalDeviceSurfacePresentModesKHR(g_vulkan_context->GetPhysicalDevice(), m_surface, + &mode_count, present_modes.data()); + _assert_(res == VK_SUCCESS); + + // Checks if a particular mode is supported, if it is, returns that mode. + auto CheckForMode = [&present_modes](VkPresentModeKHR check_mode) { + auto it = std::find_if(present_modes.begin(), present_modes.end(), + [check_mode](VkPresentModeKHR mode) { return check_mode == mode; }); + return it != present_modes.end(); + }; + + // If vsync is enabled, prefer VK_PRESENT_MODE_FIFO_KHR. + if (g_ActiveConfig.IsVSync()) + { + // Try for relaxed vsync first, since it's likely the VI won't line up with + // the refresh rate of the system exactly, so tearing once is better than + // waiting for the next vblank. + if (CheckForMode(VK_PRESENT_MODE_FIFO_RELAXED_KHR)) + { + m_present_mode = VK_PRESENT_MODE_FIFO_RELAXED_KHR; + return true; + } + + // Fall back to strict vsync. + if (CheckForMode(VK_PRESENT_MODE_FIFO_KHR)) + { + WARN_LOG(VIDEO, "Vulkan: FIFO_RELAXED not available, falling back to FIFO."); + m_present_mode = VK_PRESENT_MODE_FIFO_KHR; + return true; + } + } + + // Prefer screen-tearing, if possible, for lowest latency. + if (CheckForMode(VK_PRESENT_MODE_IMMEDIATE_KHR)) + { + m_present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; + return true; + } + + // Use optimized-vsync above vsync. + if (CheckForMode(VK_PRESENT_MODE_MAILBOX_KHR)) + { + m_present_mode = VK_PRESENT_MODE_MAILBOX_KHR; + return true; + } + + // Fall back to whatever is available. + m_present_mode = present_modes[0]; + return true; +} + +bool SwapChain::CreateRenderPass() +{ + // render pass for rendering to the swap chain + VkAttachmentDescription present_render_pass_attachments[] = { + {0, m_surface_format.format, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR, + VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}}; + + VkAttachmentReference present_render_pass_color_attachment_references[] = { + {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}}; + + VkSubpassDescription present_render_pass_subpass_descriptions[] = { + {0, VK_PIPELINE_BIND_POINT_GRAPHICS, 0, nullptr, 1, + present_render_pass_color_attachment_references, nullptr, nullptr, 0, nullptr}}; + + VkRenderPassCreateInfo present_render_pass_info = { + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + nullptr, + 0, + static_cast(ArraySize(present_render_pass_attachments)), + present_render_pass_attachments, + static_cast(ArraySize(present_render_pass_subpass_descriptions)), + present_render_pass_subpass_descriptions, + 0, + nullptr}; + + VkResult res = vkCreateRenderPass(g_vulkan_context->GetDevice(), &present_render_pass_info, + nullptr, &m_render_pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass (present) failed: "); + return false; + } + + return true; +} + +void SwapChain::DestroyRenderPass() +{ + if (!m_render_pass) + return; + + g_command_buffer_mgr->DeferResourceDestruction(m_render_pass); + m_render_pass = nullptr; +} + +bool SwapChain::CreateSwapChain() +{ + // Look up surface properties to determine image count and dimensions + VkSurfaceCapabilitiesKHR surface_capabilities; + VkResult res = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(g_vulkan_context->GetPhysicalDevice(), + m_surface, &surface_capabilities); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR failed: "); + return false; + } + + // Select swap chain format and present mode + if (!SelectSurfaceFormat() || !SelectPresentMode()) + return false; + + // Select number of images in swap chain, we prefer one buffer in the background to work on + uint32_t image_count = + std::min(surface_capabilities.minImageCount + 1, surface_capabilities.maxImageCount); + + // Determine the dimensions of the swap chain. Values of -1 indicate the size we specify here + // determines window size? + VkExtent2D size = surface_capabilities.currentExtent; + if (size.width == UINT32_MAX) + { + size.width = std::min(std::max(surface_capabilities.minImageExtent.width, 640u), + surface_capabilities.maxImageExtent.width); + size.height = std::min(std::max(surface_capabilities.minImageExtent.height, 480u), + surface_capabilities.maxImageExtent.height); + } + + // Prefer identity transform if possible + VkSurfaceTransformFlagBitsKHR transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + if (!(surface_capabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)) + transform = surface_capabilities.currentTransform; + + // Select swap chain flags, we only need a colour attachment + VkImageUsageFlags image_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if (!(surface_capabilities.supportedUsageFlags & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) + { + ERROR_LOG(VIDEO, "Vulkan: Swap chain does not support usage as color attachment"); + return false; + } + + // Store the old/current swap chain when recreating for resize + VkSwapchainKHR old_swap_chain = m_swap_chain; + + // Now we can actually create the swap chain + // TODO: Handle case where the present queue is not the graphics queue. + VkSwapchainCreateInfoKHR swap_chain_info = {VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + nullptr, + 0, + m_surface, + image_count, + m_surface_format.format, + m_surface_format.colorSpace, + size, + 1, + image_usage, + VK_SHARING_MODE_EXCLUSIVE, + 0, + nullptr, + transform, + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + m_present_mode, + VK_TRUE, + old_swap_chain}; + + res = + vkCreateSwapchainKHR(g_vulkan_context->GetDevice(), &swap_chain_info, nullptr, &m_swap_chain); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSwapchainKHR failed: "); + return false; + } + + // Now destroy the old swap chain, since it's been recreated. + // We can do this immediately since all work should have been completed before calling resize. + if (old_swap_chain != VK_NULL_HANDLE) + vkDestroySwapchainKHR(g_vulkan_context->GetDevice(), old_swap_chain, nullptr); + + m_width = size.width; + m_height = size.height; + return true; +} + +bool SwapChain::SetupSwapChainImages() +{ + _assert_(m_swap_chain_images.empty()); + + uint32_t image_count; + VkResult res = + vkGetSwapchainImagesKHR(g_vulkan_context->GetDevice(), m_swap_chain, &image_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetSwapchainImagesKHR failed: "); + return false; + } + + std::vector images(image_count); + res = vkGetSwapchainImagesKHR(g_vulkan_context->GetDevice(), m_swap_chain, &image_count, + images.data()); + _assert_(res == VK_SUCCESS); + + m_swap_chain_images.reserve(image_count); + for (uint32_t i = 0; i < image_count; i++) + { + SwapChainImage image; + image.image = images[i]; + + // Create texture object, which creates a view of the backbuffer + image.texture = Texture2D::CreateFromExistingImage( + m_width, m_height, 1, 1, m_surface_format.format, VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D, image.image); + + VkImageView view = image.texture->GetView(); + VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + nullptr, + 0, + m_render_pass, + 1, + &view, + m_width, + m_height, + 1}; + + res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, + &image.framebuffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + return false; + } + + m_swap_chain_images.emplace_back(std::move(image)); + } + + return true; +} + +void SwapChain::DestroySwapChainImages() +{ + for (const auto& it : m_swap_chain_images) + { + // Images themselves are cleaned up by the swap chain object + vkDestroyFramebuffer(g_vulkan_context->GetDevice(), it.framebuffer, nullptr); + } + m_swap_chain_images.clear(); +} + +void SwapChain::DestroySwapChain() +{ + if (m_swap_chain == VK_NULL_HANDLE) + return; + + vkDestroySwapchainKHR(g_vulkan_context->GetDevice(), m_swap_chain, nullptr); + m_swap_chain = VK_NULL_HANDLE; +} + +VkResult SwapChain::AcquireNextImage(VkSemaphore available_semaphore) +{ + VkResult res = + vkAcquireNextImageKHR(g_vulkan_context->GetDevice(), m_swap_chain, UINT64_MAX, + available_semaphore, VK_NULL_HANDLE, &m_current_swap_chain_image_index); + if (res != VK_SUCCESS && res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) + LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR failed: "); + + return res; +} + +bool SwapChain::ResizeSwapChain() +{ + if (!CreateSwapChain()) + return false; + + DestroySwapChainImages(); + if (!SetupSwapChainImages()) + { + PanicAlert("Failed to re-configure swap chain images, this is fatal (for now)"); + return false; + } + + return true; +} + +bool SwapChain::RecreateSurface(void* native_handle) +{ + // Destroy the old swap chain, images, and surface. + DestroyRenderPass(); + DestroySwapChainImages(); + DestroySwapChain(); + DestroySurface(); + + // Re-create the surface with the new native handle + m_native_handle = native_handle; + m_surface = CreateVulkanSurface(g_vulkan_context->GetVulkanInstance(), native_handle); + if (m_surface == VK_NULL_HANDLE) + return false; + + // Finally re-create the swap chain + if (!CreateSwapChain() || !SetupSwapChainImages() || !CreateRenderPass()) + return false; + + return true; +} + +void SwapChain::DestroySurface() +{ + vkDestroySurfaceKHR(g_vulkan_context->GetVulkanInstance(), m_surface, nullptr); + m_surface = VK_NULL_HANDLE; +} +} diff --git a/Source/Core/VideoBackends/Vulkan/SwapChain.h b/Source/Core/VideoBackends/Vulkan/SwapChain.h new file mode 100644 index 0000000000..bb259b27c9 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/SwapChain.h @@ -0,0 +1,93 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoBackends/Vulkan/Texture2D.h" + +namespace Vulkan +{ +class CommandBufferManager; +class ObjectCache; + +class SwapChain +{ +public: + SwapChain(void* native_handle, VkSurfaceKHR surface); + ~SwapChain(); + + // Creates a vulkan-renderable surface for the specified window handle. + static VkSurfaceKHR CreateVulkanSurface(VkInstance instance, void* hwnd); + + // Create a new swap chain from a pre-existing surface. + static std::unique_ptr Create(void* native_handle, VkSurfaceKHR surface); + + void* GetNativeHandle() const { return m_native_handle; } + VkSurfaceKHR GetSurface() const { return m_surface; } + VkSurfaceFormatKHR GetSurfaceFormat() const { return m_surface_format; } + VkSwapchainKHR GetSwapChain() const { return m_swap_chain; } + VkRenderPass GetRenderPass() const { return m_render_pass; } + u32 GetWidth() const { return m_width; } + u32 GetHeight() const { return m_height; } + u32 GetCurrentImageIndex() const { return m_current_swap_chain_image_index; } + VkImage GetCurrentImage() const + { + return m_swap_chain_images[m_current_swap_chain_image_index].image; + } + Texture2D* GetCurrentTexture() const + { + return m_swap_chain_images[m_current_swap_chain_image_index].texture.get(); + } + VkFramebuffer GetCurrentFramebuffer() const + { + return m_swap_chain_images[m_current_swap_chain_image_index].framebuffer; + } + + VkResult AcquireNextImage(VkSemaphore available_semaphore); + + bool RecreateSurface(void* native_handle); + bool ResizeSwapChain(); + +private: + bool SelectSurfaceFormat(); + bool SelectPresentMode(); + + bool CreateSwapChain(); + void DestroySwapChain(); + + bool CreateRenderPass(); + void DestroyRenderPass(); + + bool SetupSwapChainImages(); + void DestroySwapChainImages(); + + void DestroySurface(); + + struct SwapChainImage + { + VkImage image; + std::unique_ptr texture; + VkFramebuffer framebuffer; + }; + + void* m_native_handle = nullptr; + VkSurfaceKHR m_surface = nullptr; + VkSurfaceFormatKHR m_surface_format = {}; + VkPresentModeKHR m_present_mode = VK_PRESENT_MODE_RANGE_SIZE_KHR; + + VkSwapchainKHR m_swap_chain = nullptr; + std::vector m_swap_chain_images; + u32 m_current_swap_chain_image_index = 0; + + VkRenderPass m_render_pass = nullptr; + + u32 m_width = 0; + u32 m_height = 0; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp b/Source/Core/VideoBackends/Vulkan/Texture2D.cpp new file mode 100644 index 0000000000..470328c47c --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/Texture2D.cpp @@ -0,0 +1,280 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/Texture2D.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +Texture2D::Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, + VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image, + VkDeviceMemory device_memory, VkImageView view) + : m_width(width), m_height(height), m_levels(levels), m_layers(layers), m_format(format), + m_samples(samples), m_view_type(view_type), m_image(image), m_device_memory(device_memory), + m_view(view) +{ +} + +Texture2D::~Texture2D() +{ + g_command_buffer_mgr->DeferResourceDestruction(m_view); + + // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) + if (m_device_memory != VK_NULL_HANDLE) + { + g_command_buffer_mgr->DeferResourceDestruction(m_image); + g_command_buffer_mgr->DeferResourceDestruction(m_device_memory); + } +} + +std::unique_ptr Texture2D::Create(u32 width, u32 height, u32 levels, u32 layers, + VkFormat format, VkSampleCountFlagBits samples, + VkImageViewType view_type, VkImageTiling tiling, + VkImageUsageFlags usage) +{ + VkImageCreateInfo image_info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + nullptr, + 0, + VK_IMAGE_TYPE_2D, + format, + {width, height, 1}, + levels, + layers, + samples, + tiling, + usage, + VK_SHARING_MODE_EXCLUSIVE, + 0, + nullptr, + VK_IMAGE_LAYOUT_UNDEFINED}; + + VkImage image = VK_NULL_HANDLE; + VkResult res = vkCreateImage(g_vulkan_context->GetDevice(), &image_info, nullptr, &image); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImage failed: "); + return nullptr; + } + + // Allocate memory to back this texture, we want device local memory in this case + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(g_vulkan_context->GetDevice(), image, &memory_requirements); + + VkMemoryAllocateInfo memory_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size, + g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)}; + + VkDeviceMemory device_memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + return nullptr; + } + + res = vkBindImageMemory(g_vulkan_context->GetDevice(), image, device_memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindImageMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); + return nullptr; + } + + VkImageViewCreateInfo view_info = { + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + nullptr, + 0, + image, + view_type, + format, + {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY}, + {Util::IsDepthFormat(format) ? static_cast(VK_IMAGE_ASPECT_DEPTH_BIT) : + static_cast(VK_IMAGE_ASPECT_COLOR_BIT), + 0, levels, 0, layers}}; + + VkImageView view = VK_NULL_HANDLE; + res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); + return nullptr; + } + + return std::make_unique(width, height, levels, layers, format, samples, view_type, + image, device_memory, view); +} + +std::unique_ptr Texture2D::CreateFromExistingImage(u32 width, u32 height, u32 levels, + u32 layers, VkFormat format, + VkSampleCountFlagBits samples, + VkImageViewType view_type, + VkImage existing_image) +{ + // Only need to create the image view, this is mainly for swap chains. + VkImageViewCreateInfo view_info = { + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + nullptr, + 0, + existing_image, + view_type, + format, + {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY}, + {Util::IsDepthFormat(format) ? static_cast(VK_IMAGE_ASPECT_DEPTH_BIT) : + static_cast(VK_IMAGE_ASPECT_COLOR_BIT), + 0, levels, 0, layers}}; + + VkImageView view = VK_NULL_HANDLE; + VkResult res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); + return nullptr; + } + + return std::make_unique(width, height, levels, layers, format, samples, view_type, + existing_image, nullptr, view); +} + +void Texture2D::OverrideImageLayout(VkImageLayout new_layout) +{ + m_layout = new_layout; +} + +void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) +{ + if (m_layout == new_layout) + return; + + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAccessFlags srcAccessMask + 0, // VkAccessFlags dstAccessMask + m_layout, // VkImageLayout oldLayout + new_layout, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {static_cast(Util::IsDepthFormat(m_format) ? VK_IMAGE_ASPECT_DEPTH_BIT : + VK_IMAGE_ASPECT_COLOR_BIT), + 0, m_levels, 0, m_layers} // VkImageSubresourceRange subresourceRange + }; + + // srcStageMask -> Stages that must complete before the barrier + // dstStageMask -> Stages that must wait for after the barrier before beginning + VkPipelineStageFlags srcStageMask, dstStageMask; + switch (m_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + barrier.srcAccessMask = 0; + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_PREINITIALIZED: + // Image has been pre-initialized by the host, so ensure all writes have completed. + barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + // Image was being used as a color attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // Image was being used as a shader resource, make sure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + // Image was being used as a copy source, ensure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + // Image was being used as a copy destination, ensure all writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + default: + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + } + + switch (new_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + barrier.dstAccessMask = 0; + dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // TODO: Can we use FRAGMENT_SHADER here? We don't sample textures in the earlier stages. + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + default: + dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + break; + } + + vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, + &barrier); + + m_layout = new_layout; +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.h b/Source/Core/VideoBackends/Vulkan/Texture2D.h new file mode 100644 index 0000000000..2d9dccc2ce --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/Texture2D.h @@ -0,0 +1,67 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/Constants.h" + +namespace Vulkan +{ +class CommandBufferManager; +class ObjectCache; + +class Texture2D +{ +public: + Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, + VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image, + VkDeviceMemory device_memory, VkImageView view); + ~Texture2D(); + + static std::unique_ptr Create(u32 width, u32 height, u32 levels, u32 layers, + VkFormat format, VkSampleCountFlagBits samples, + VkImageViewType view_type, VkImageTiling tiling, + VkImageUsageFlags usage); + + static std::unique_ptr CreateFromExistingImage(u32 width, u32 height, u32 levels, + u32 layers, VkFormat format, + VkSampleCountFlagBits samples, + VkImageViewType view_type, + VkImage existing_image); + + u32 GetWidth() const { return m_width; } + u32 GetHeight() const { return m_height; } + u32 GetLevels() const { return m_levels; } + u32 GetLayers() const { return m_layers; } + VkFormat GetFormat() const { return m_format; } + VkSampleCountFlagBits GetSamples() const { return m_samples; } + VkImageLayout GetLayout() const { return m_layout; } + VkImageViewType GetViewType() const { return m_view_type; } + VkImage GetImage() const { return m_image; } + VkDeviceMemory GetDeviceMemory() const { return m_device_memory; } + VkImageView GetView() const { return m_view; } + // Used when the render pass is changing the image layout, or to force it to + // VK_IMAGE_LAYOUT_UNDEFINED, if the existing contents of the image is + // irrelevant and will not be loaded. + void OverrideImageLayout(VkImageLayout new_layout); + + void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout); + +private: + u32 m_width; + u32 m_height; + u32 m_levels; + u32 m_layers; + VkFormat m_format; + VkSampleCountFlagBits m_samples; + VkImageViewType m_view_type; + VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImage m_image; + VkDeviceMemory m_device_memory; + VkImageView m_view; +}; +} diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp new file mode 100644 index 0000000000..65ab7d737c --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp @@ -0,0 +1,740 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include +#include + +#include "Common/Assert.h" +#include "Common/CommonFuncs.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/PaletteTextureConverter.h" +#include "VideoBackends/Vulkan/Renderer.h" +#include "VideoBackends/Vulkan/StagingTexture2D.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Texture2D.h" +#include "VideoBackends/Vulkan/TextureCache.h" +#include "VideoBackends/Vulkan/TextureEncoder.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +#include "VideoCommon/ImageWrite.h" + +namespace Vulkan +{ +TextureCache::TextureCache() +{ +} + +TextureCache::~TextureCache() +{ + if (m_initialize_render_pass != VK_NULL_HANDLE) + vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_initialize_render_pass, nullptr); + if (m_update_render_pass != VK_NULL_HANDLE) + vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_update_render_pass, nullptr); +} + +bool TextureCache::Initialize(StateTracker* state_tracker) +{ + m_state_tracker = state_tracker; + m_texture_upload_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE, + MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE); + if (!m_texture_upload_buffer) + { + PanicAlert("Failed to create texture upload buffer"); + return false; + } + + if (!CreateRenderPasses()) + { + PanicAlert("Failed to create copy render pass"); + return false; + } + + m_texture_encoder = std::make_unique(); + if (!m_texture_encoder->Initialize()) + { + PanicAlert("Failed to initialize texture encoder."); + return false; + } + + m_palette_texture_converter = std::make_unique(); + if (!m_palette_texture_converter->Initialize()) + { + PanicAlert("Failed to initialize palette texture converter"); + return false; + } + + if (!CompileShaders()) + { + PanicAlert("Failed to compile one or more shaders"); + return false; + } + + return true; +} + +void TextureCache::ConvertTexture(TCacheEntryBase* base_entry, TCacheEntryBase* base_unconverted, + void* palette, TlutFormat format) +{ + TCacheEntry* entry = static_cast(base_entry); + TCacheEntry* unconverted = static_cast(base_unconverted); + _assert_(entry->config.rendertarget); + + m_palette_texture_converter->ConvertTexture( + m_state_tracker, GetRenderPassForTextureUpdate(entry->GetTexture()), entry->GetFramebuffer(), + unconverted->GetTexture(), entry->config.width, entry->config.height, palette, format); + + // Render pass transitions to SHADER_READ_ONLY. + entry->GetTexture()->OverrideImageLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); +} + +void TextureCache::CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, + u32 num_blocks_y, u32 memory_stride, PEControl::PixelFormat src_format, + const EFBRectangle& src_rect, bool is_intensity, bool scale_by_half) +{ + // A better way of doing this would be nice. + FramebufferManager* framebuffer_mgr = + static_cast(g_framebuffer_manager.get()); + + // Flush EFB pokes first, as they're expected to be included. + framebuffer_mgr->FlushEFBPokes(m_state_tracker); + + // MSAA case where we need to resolve first. + // TODO: Do in one pass. + TargetRectangle scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); + VkRect2D region = {{scaled_src_rect.left, scaled_src_rect.top}, + {static_cast(scaled_src_rect.GetWidth()), + static_cast(scaled_src_rect.GetHeight())}}; + Texture2D* src_texture = (src_format == PEControl::Z24) ? + framebuffer_mgr->ResolveEFBDepthTexture(m_state_tracker, region) : + framebuffer_mgr->ResolveEFBColorTexture(m_state_tracker, region); + + // End render pass before barrier (since we have no self-dependencies) + m_state_tracker->EndRenderPass(); + m_state_tracker->SetPendingRebind(); + m_state_tracker->InvalidateDescriptorSets(); + m_state_tracker->OnReadback(); + + // Transition to shader resource before reading. + VkImageLayout original_layout = src_texture->GetLayout(); + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + m_texture_encoder->EncodeTextureToRam(m_state_tracker, src_texture->GetView(), dst, format, + native_width, bytes_per_row, num_blocks_y, memory_stride, + src_format, is_intensity, scale_by_half, src_rect); + + // Transition back to original state + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout); +} + +TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConfig& config) +{ + // Determine image usage, we need to flag as an attachment if it can be used as a rendertarget. + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + if (config.rendertarget) + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + // Allocate texture object + std::unique_ptr texture = Texture2D::Create( + config.width, config.height, config.levels, config.layers, TEXTURECACHE_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage); + + if (!texture) + return nullptr; + + // If this is a render target (for efb copies), allocate a framebuffer + VkFramebuffer framebuffer = VK_NULL_HANDLE; + if (config.rendertarget) + { + VkImageView framebuffer_attachments[] = {texture->GetView()}; + VkFramebufferCreateInfo framebuffer_info = { + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + nullptr, + 0, + m_initialize_render_pass, + static_cast(ArraySize(framebuffer_attachments)), + framebuffer_attachments, + texture->GetWidth(), + texture->GetHeight(), + texture->GetLayers()}; + + VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, + &framebuffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + return nullptr; + } + + // Clear render targets before use to prevent reading uninitialized memory. + VkClearColorValue clear_value = {{0.0f, 0.0f, 0.0f, 1.0f}}; + VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, config.levels, 0, + config.layers}; + texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), texture->GetImage(), + texture->GetLayout(), &clear_value, 1, &clear_range); + } + + return new TCacheEntry(config, this, std::move(texture), framebuffer); +} + +bool TextureCache::CreateRenderPasses() +{ + static constexpr VkAttachmentDescription initialize_attachment = { + 0, + TEXTURECACHE_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_STORE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + + static constexpr VkAttachmentDescription update_attachment = { + 0, + TEXTURECACHE_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_STORE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + + static constexpr VkAttachmentReference color_attachment_reference = { + 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; + + static constexpr VkSubpassDescription subpass_description = { + 0, VK_PIPELINE_BIND_POINT_GRAPHICS, + 0, nullptr, + 1, &color_attachment_reference, + nullptr, nullptr, + 0, nullptr}; + + static constexpr VkSubpassDependency initialize_dependancies[] = { + {VK_SUBPASS_EXTERNAL, 0, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_DEPENDENCY_BY_REGION_BIT}, + {0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT}}; + + static constexpr VkSubpassDependency update_dependancies[] = { + {VK_SUBPASS_EXTERNAL, 0, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_DEPENDENCY_BY_REGION_BIT}, + {0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT}}; + + VkRenderPassCreateInfo initialize_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + nullptr, + 0, + 1, + &initialize_attachment, + 1, + &subpass_description, + static_cast(ArraySize(initialize_dependancies)), + initialize_dependancies}; + + VkRenderPassCreateInfo update_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + nullptr, + 0, + 1, + &update_attachment, + 1, + &subpass_description, + static_cast(ArraySize(update_dependancies)), + update_dependancies}; + + VkResult res = vkCreateRenderPass(g_vulkan_context->GetDevice(), &initialize_info, nullptr, + &m_initialize_render_pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass (initialize) failed: "); + return false; + } + + res = vkCreateRenderPass(g_vulkan_context->GetDevice(), &update_info, nullptr, + &m_update_render_pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass (update) failed: "); + return false; + } + + return true; +} + +VkRenderPass TextureCache::GetRenderPassForTextureUpdate(const Texture2D* texture) const +{ + // EFB copies can be re-used as part of the texture pool. If this is the case, we need to insert + // a pipeline barrier to ensure that all reads from the texture expecting the old data have + // completed before overwriting the texture's contents. New textures will be in TRANSFER_DST + // due to the clear after creation. + + // These two render passes are compatible, so even though the framebuffer was created with + // the initialize render pass it's still allowed. + + if (texture->GetLayout() == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + return m_initialize_render_pass; + else + return m_update_render_pass; +} + +TextureCache::TCacheEntry::TCacheEntry(const TCacheEntryConfig& config_, TextureCache* parent, + std::unique_ptr texture, + VkFramebuffer framebuffer) + : TCacheEntryBase(config_), m_parent(parent), m_texture(std::move(texture)), + m_framebuffer(framebuffer) +{ +} + +TextureCache::TCacheEntry::~TCacheEntry() +{ + // Texture is automatically cleaned up, however, we don't want to leave it bound to the state + // tracker. + m_parent->m_state_tracker->UnbindTexture(m_texture->GetView()); + + if (m_framebuffer != VK_NULL_HANDLE) + g_command_buffer_mgr->DeferResourceDestruction(m_framebuffer); +} + +void TextureCache::TCacheEntry::Load(unsigned int width, unsigned int height, + unsigned int expanded_width, unsigned int level) +{ + // Can't copy data larger than the texture extents. + width = std::max(1u, std::min(width, m_texture->GetWidth() >> level)); + height = std::max(1u, std::min(height, m_texture->GetHeight() >> level)); + + // We don't care about the existing contents of the texture, so we set the image layout to + // VK_IMAGE_LAYOUT_UNDEFINED here. However, if this texture is being re-used from the texture + // pool, it may still be in use. We assume that it's not, as non-efb-copy textures are only + // returned to the pool when the frame number is different, furthermore, we're doing this + // on the initialize command buffer, so a texture being re-used mid-frame would have undesirable + // effects regardless. + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAccessFlags srcAccessMask + VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask + VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_texture->GetImage(), // VkImage image + {VK_IMAGE_ASPECT_COLOR_BIT, level, 1, 0, 1}, // VkImageSubresourceRange subresourceRange + }; + vkCmdPipelineBarrier(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, + nullptr, 0, nullptr, 1, &barrier); + + // Does this texture data fit within the streaming buffer? + u32 upload_width = width; + u32 upload_pitch = upload_width * sizeof(u32); + u32 upload_size = upload_pitch * height; + u32 upload_alignment = static_cast(g_vulkan_context->GetBufferImageGranularity()); + u32 source_pitch = expanded_width * 4; + if ((upload_size + upload_alignment) <= STAGING_TEXTURE_UPLOAD_THRESHOLD && + (upload_size + upload_alignment) <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE) + { + // Assume tightly packed rows, with no padding as the buffer source. + StreamBuffer* upload_buffer = m_parent->m_texture_upload_buffer.get(); + + // Allocate memory from the streaming buffer for the texture data. + if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity())) + { + // Execute the command buffer first. + WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer"); + Util::ExecuteCurrentCommandsAndRestoreState(m_parent->m_state_tracker, false); + + // Try allocating again. This may cause a fence wait. + if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity())) + PanicAlert("Failed to allocate space in texture upload buffer"); + } + + // Grab buffer pointers + VkBuffer image_upload_buffer = upload_buffer->GetBuffer(); + VkDeviceSize image_upload_buffer_offset = upload_buffer->GetCurrentOffset(); + u8* image_upload_buffer_pointer = upload_buffer->GetCurrentHostPointer(); + + // Copy to the buffer using the stride from the subresource layout + const u8* source_ptr = TextureCache::temp; + if (upload_pitch != source_pitch) + { + VkDeviceSize copy_pitch = std::min(source_pitch, upload_pitch); + for (unsigned int row = 0; row < height; row++) + { + memcpy(image_upload_buffer_pointer + row * upload_pitch, source_ptr + row * source_pitch, + copy_pitch); + } + } + else + { + // Can copy the whole thing in one block, the pitch matches + memcpy(image_upload_buffer_pointer, source_ptr, upload_size); + } + + // Flush buffer memory if necessary + upload_buffer->CommitMemory(upload_size); + + // Copy from the streaming buffer to the actual image. + VkBufferImageCopy image_copy = { + image_upload_buffer_offset, // VkDeviceSize bufferOffset + 0, // uint32_t bufferRowLength + 0, // uint32_t bufferImageHeight + {VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1}, // VkImageSubresourceLayers imageSubresource + {0, 0, 0}, // VkOffset3D imageOffset + {width, height, 1} // VkExtent3D imageExtent + }; + vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), image_upload_buffer, + m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + &image_copy); + } + else + { + // Slow path. The data for the image is too large to fit in the streaming buffer, so we need + // to allocate a temporary texture to store the data in, then copy to the real texture. + std::unique_ptr staging_texture = StagingTexture2D::Create( + STAGING_BUFFER_TYPE_UPLOAD, width, height, TEXTURECACHE_TEXTURE_FORMAT); + + if (!staging_texture || !staging_texture->Map()) + { + PanicAlert("Failed to allocate staging texture for large texture upload."); + return; + } + + // Copy data to staging texture first, then to the "real" texture. + staging_texture->WriteTexels(0, 0, width, height, TextureCache::temp, source_pitch); + staging_texture->CopyToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + m_texture->GetImage(), VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, width, + height, level, 0); + } + + // Transition to shader read only. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + vkCmdPipelineBarrier(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, + nullptr, 0, nullptr, 1, &barrier); + m_texture->OverrideImageLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); +} + +void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat src_format, + const EFBRectangle& src_rect, bool scale_by_half, + unsigned int cbufid, const float* colmat) +{ + // A better way of doing this would be nice. + FramebufferManager* framebuffer_mgr = + static_cast(g_framebuffer_manager.get()); + TargetRectangle scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); + bool is_depth_copy = (src_format == PEControl::Z24); + + // Flush EFB pokes first, as they're expected to be included. + framebuffer_mgr->FlushEFBPokes(m_parent->m_state_tracker); + + // Has to be flagged as a render target. + _assert_(m_framebuffer != VK_NULL_HANDLE); + + // Can't be done in a render pass, since we're doing our own render pass! + StateTracker* state_tracker = m_parent->m_state_tracker; + VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + state_tracker->EndRenderPass(); + + // Transition EFB to shader resource before binding + VkRect2D region = {{scaled_src_rect.left, scaled_src_rect.top}, + {static_cast(scaled_src_rect.GetWidth()), + static_cast(scaled_src_rect.GetHeight())}}; + Texture2D* src_texture = is_depth_copy ? + framebuffer_mgr->ResolveEFBDepthTexture(state_tracker, region) : + framebuffer_mgr->ResolveEFBColorTexture(state_tracker, region); + VkSampler src_sampler = + scale_by_half ? g_object_cache->GetLinearSampler() : g_object_cache->GetPointSampler(); + VkImageLayout original_layout = src_texture->GetLayout(); + src_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + UtilityShaderDraw draw( + command_buffer, g_object_cache->GetPushConstantPipelineLayout(), + m_parent->GetRenderPassForTextureUpdate(m_texture.get()), + g_object_cache->GetPassthroughVertexShader(), g_object_cache->GetPassthroughGeometryShader(), + is_depth_copy ? m_parent->m_efb_depth_to_tex_shader : m_parent->m_efb_color_to_tex_shader); + + draw.SetPushConstants(colmat, (is_depth_copy ? sizeof(float) * 20 : sizeof(float) * 28)); + draw.SetPSSampler(0, src_texture->GetView(), src_sampler); + + VkRect2D dest_region = {{0, 0}, {m_texture->GetWidth(), m_texture->GetHeight()}}; + + draw.BeginRenderPass(m_framebuffer, dest_region); + + draw.DrawQuad(0, 0, config.width, config.height, scaled_src_rect.left, scaled_src_rect.top, 0, + scaled_src_rect.GetWidth(), scaled_src_rect.GetHeight(), + framebuffer_mgr->GetEFBWidth(), framebuffer_mgr->GetEFBHeight()); + + draw.EndRenderPass(); + + // We touched everything, so put it back. + state_tracker->SetPendingRebind(); + + // Transition the EFB back to its original layout. + src_texture->TransitionToLayout(command_buffer, original_layout); + + // Render pass transitions texture to SHADER_READ_ONLY. + m_texture->OverrideImageLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); +} + +void TextureCache::TCacheEntry::CopyRectangleFromTexture(const TCacheEntryBase* source, + const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& dst_rect) +{ + const TCacheEntry* source_vk = static_cast(source); + VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + + // Fast path when not scaling the image. + if (src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()) + { + // These assertions should hold true unless the base code is passing us sizes too large, in + // which case it should be fixed instead. + _assert_msg_(VIDEO, static_cast(src_rect.GetWidth()) <= source->config.width && + static_cast(src_rect.GetHeight()) <= source->config.height, + "Source rect is too large for CopyRectangleFromTexture"); + + _assert_msg_(VIDEO, static_cast(dst_rect.GetWidth()) <= config.width && + static_cast(dst_rect.GetHeight()) <= config.height, + "Dest rect is too large for CopyRectangleFromTexture"); + + VkImageCopy image_copy = { + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, + source->config.layers}, // VkImageSubresourceLayers srcSubresource + {src_rect.left, src_rect.top, 0}, // VkOffset3D srcOffset + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, + config.layers}, // VkImageSubresourceLayers dstSubresource + {dst_rect.left, dst_rect.top, 0}, // VkOffset3D dstOffset + {static_cast(src_rect.GetWidth()), static_cast(src_rect.GetHeight()), + 1} // VkExtent3D extent + }; + + // Must be called outside of a render pass. + m_parent->m_state_tracker->EndRenderPass(); + + source_vk->m_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + vkCmdCopyImage(command_buffer, source_vk->m_texture->GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_texture->GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); + + m_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + source_vk->m_texture->TransitionToLayout(command_buffer, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + return; + } + + // Can't do this within a game render pass. + m_parent->m_state_tracker->EndRenderPass(); + m_parent->m_state_tracker->SetPendingRebind(); + + // Can't render to a non-rendertarget (no framebuffer). + _assert_msg_(VIDEO, config.rendertarget, + "Destination texture for partial copy is not a rendertarget"); + + UtilityShaderDraw draw( + g_command_buffer_mgr->GetCurrentCommandBuffer(), g_object_cache->GetStandardPipelineLayout(), + m_parent->GetRenderPassForTextureUpdate(m_texture.get()), + g_object_cache->GetPassthroughVertexShader(), VK_NULL_HANDLE, m_parent->m_copy_shader); + + VkRect2D region = { + {dst_rect.left, dst_rect.top}, + {static_cast(dst_rect.GetWidth()), static_cast(dst_rect.GetHeight())}}; + draw.BeginRenderPass(m_framebuffer, region); + draw.SetPSSampler(0, source_vk->GetTexture()->GetView(), g_object_cache->GetLinearSampler()); + draw.DrawQuad(dst_rect.left, dst_rect.top, dst_rect.GetWidth(), dst_rect.GetHeight(), + src_rect.left, src_rect.top, 0, src_rect.GetWidth(), src_rect.GetHeight(), + source->config.width, source->config.height); + draw.EndRenderPass(); + + // Render pass transitions texture to SHADER_READ_ONLY. + m_texture->OverrideImageLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); +} + +void TextureCache::TCacheEntry::Bind(unsigned int stage) +{ + m_parent->m_state_tracker->SetTexture(stage, m_texture->GetView()); +} + +bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level) +{ + _assert_(level < config.levels); + + // Determine dimensions of image we want to save. + u32 level_width = std::max(1u, config.width >> level); + u32 level_height = std::max(1u, config.height >> level); + + // Use a temporary staging texture for the download. Certainly not optimal, + // but since we have to idle the GPU anyway it doesn't really matter. + std::unique_ptr staging_texture = StagingTexture2D::Create( + STAGING_BUFFER_TYPE_READBACK, level_width, level_height, TEXTURECACHE_TEXTURE_FORMAT); + + // Transition image to transfer source, and invalidate the current state, + // since we'll be executing the command buffer. + m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_parent->m_state_tracker->EndRenderPass(); + + // Copy to download buffer. + staging_texture->CopyFromImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), + m_texture->GetImage(), VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, + level_width, level_height, level, 0); + + // Restore original state of texture. + m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + // Block until the GPU has finished copying to the staging texture. + g_command_buffer_mgr->ExecuteCommandBuffer(false, true); + m_parent->m_state_tracker->InvalidateDescriptorSets(); + m_parent->m_state_tracker->SetPendingRebind(); + + // Map the staging texture so we can copy the contents out. + if (staging_texture->Map()) + { + PanicAlert("Failed to map staging texture"); + return false; + } + + // Write texture out to file. + // It's okay to throw this texture away immediately, since we're done with it, and + // we blocked until the copy completed on the GPU anyway. + bool result = TextureToPng(reinterpret_cast(staging_texture->GetMapPointer()), + staging_texture->GetRowStride(), filename, level_width, level_height); + + staging_texture->Unmap(); + return result; +} + +bool TextureCache::CompileShaders() +{ + static const char COPY_SHADER_SOURCE[] = R"( + layout(set = 1, binding = 0) uniform sampler2DArray samp0; + + layout(location = 0) in float3 uv0; + layout(location = 1) in float4 col0; + layout(location = 0) out float4 ocol0; + + void main() + { + ocol0 = texture(samp0, uv0); + } + )"; + + static const char EFB_COLOR_TO_TEX_SOURCE[] = R"( + SAMPLER_BINDING(0) uniform sampler2DArray samp0; + + layout(std140, push_constant) uniform PSBlock + { + vec4 colmat[7]; + } C; + + layout(location = 0) in vec3 uv0; + layout(location = 1) in vec4 col0; + layout(location = 0) out vec4 ocol0; + + void main() + { + float4 texcol = texture(samp0, uv0); + texcol = round(texcol * C.colmat[5]) * C.colmat[6]; + ocol0 = texcol * mat4(C.colmat[0], C.colmat[1], C.colmat[2], C.colmat[3]) + C.colmat[4]; + } + )"; + + static const char EFB_DEPTH_TO_TEX_SOURCE[] = R"( + SAMPLER_BINDING(0) uniform sampler2DArray samp0; + + layout(std140, push_constant) uniform PSBlock + { + vec4 colmat[5]; + } C; + + layout(location = 0) in vec3 uv0; + layout(location = 1) in vec4 col0; + layout(location = 0) out vec4 ocol0; + + void main() + { + #if MONO_DEPTH + vec4 texcol = texture(samp0, vec3(uv0.xy, 0.0f)); + #else + vec4 texcol = texture(samp0, uv0); + #endif + int depth = int((1.0 - texcol.x) * 16777216.0); + + // Convert to Z24 format + ivec4 workspace; + workspace.r = (depth >> 16) & 255; + workspace.g = (depth >> 8) & 255; + workspace.b = depth & 255; + + // Convert to Z4 format + workspace.a = (depth >> 16) & 0xF0; + + // Normalize components to [0.0..1.0] + texcol = vec4(workspace) / 255.0; + + ocol0 = texcol * mat4(C.colmat[0], C.colmat[1], C.colmat[2], C.colmat[3]) + C.colmat[4]; + } + )"; + + std::string header = g_object_cache->GetUtilityShaderHeader(); + std::string source; + + source = header + COPY_SHADER_SOURCE; + m_copy_shader = Util::CompileAndCreateFragmentShader(source); + + source = header + EFB_COLOR_TO_TEX_SOURCE; + m_efb_color_to_tex_shader = Util::CompileAndCreateFragmentShader(source); + + if (g_ActiveConfig.bStereoEFBMonoDepth) + source = header + "#define MONO_DEPTH 1\n" + EFB_DEPTH_TO_TEX_SOURCE; + else + source = header + EFB_DEPTH_TO_TEX_SOURCE; + m_efb_depth_to_tex_shader = Util::CompileAndCreateFragmentShader(source); + + return (m_copy_shader != VK_NULL_HANDLE && m_efb_color_to_tex_shader != VK_NULL_HANDLE && + m_efb_depth_to_tex_shader != VK_NULL_HANDLE); +} + +void TextureCache::DeleteShaders() +{ + auto DestroyShader = [this](VkShaderModule& shader) { + if (shader != VK_NULL_HANDLE) + { + vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); + shader = VK_NULL_HANDLE; + } + }; + + // Since this can be called by the base class we need to wait for idle. + g_command_buffer_mgr->WaitForGPUIdle(); + + DestroyShader(m_copy_shader); + DestroyShader(m_efb_color_to_tex_shader); + DestroyShader(m_efb_depth_to_tex_shader); +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.h b/Source/Core/VideoBackends/Vulkan/TextureCache.h new file mode 100644 index 0000000000..bfef957117 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.h @@ -0,0 +1,87 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/StreamBuffer.h" + +#include "VideoCommon/TextureCacheBase.h" + +namespace Vulkan +{ +class PaletteTextureConverter; +class StateTracker; +class Texture2D; +class TextureEncoder; + +class TextureCache : public TextureCacheBase +{ +public: + TextureCache(); + ~TextureCache(); + + bool Initialize(StateTracker* state_tracker); + + bool CompileShaders() override; + void DeleteShaders() override; + void ConvertTexture(TCacheEntryBase* base_entry, TCacheEntryBase* base_unconverted, void* palette, + TlutFormat format) override; + + void CopyEFB(u8* dst, u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, + u32 memory_stride, PEControl::PixelFormat src_format, const EFBRectangle& src_rect, + bool is_intensity, bool scale_by_half) override; + +private: + struct TCacheEntry : TCacheEntryBase + { + TCacheEntry(const TCacheEntryConfig& config_, TextureCache* parent, + std::unique_ptr texture, VkFramebuffer framebuffer); + ~TCacheEntry(); + + Texture2D* GetTexture() const { return m_texture.get(); } + VkFramebuffer GetFramebuffer() const { return m_framebuffer; } + void Load(unsigned int width, unsigned int height, unsigned int expanded_width, + unsigned int level) override; + void FromRenderTarget(u8* dst, PEControl::PixelFormat src_format, const EFBRectangle& src_rect, + bool scale_by_half, unsigned int cbufid, const float* colmat) override; + void CopyRectangleFromTexture(const TCacheEntryBase* source, + const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& dst_rect) override; + + void Bind(unsigned int stage) override; + bool Save(const std::string& filename, unsigned int level) override; + + private: + TextureCache* m_parent; + std::unique_ptr m_texture; + + // If we're an EFB copy, framebuffer for drawing into. + VkFramebuffer m_framebuffer; + }; + + TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) override; + + bool CreateRenderPasses(); + + VkRenderPass GetRenderPassForTextureUpdate(const Texture2D* texture) const; + + StateTracker* m_state_tracker = nullptr; + + VkRenderPass m_initialize_render_pass = VK_NULL_HANDLE; + VkRenderPass m_update_render_pass = VK_NULL_HANDLE; + + std::unique_ptr m_texture_upload_buffer; + + std::unique_ptr m_texture_encoder; + + std::unique_ptr m_palette_texture_converter; + + VkShaderModule m_copy_shader = VK_NULL_HANDLE; + VkShaderModule m_efb_color_to_tex_shader = VK_NULL_HANDLE; + VkShaderModule m_efb_depth_to_tex_shader = VK_NULL_HANDLE; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureEncoder.cpp b/Source/Core/VideoBackends/Vulkan/TextureEncoder.cpp new file mode 100644 index 0000000000..b89baba62f --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/TextureEncoder.cpp @@ -0,0 +1,238 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/CommonFuncs.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/Renderer.h" +#include "VideoBackends/Vulkan/StagingTexture2D.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Texture2D.h" +#include "VideoBackends/Vulkan/TextureCache.h" +#include "VideoBackends/Vulkan/TextureEncoder.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +#include "VideoCommon/TextureConversionShader.h" +#include "VideoCommon/TextureDecoder.h" + +namespace Vulkan +{ +TextureEncoder::TextureEncoder() +{ +} + +TextureEncoder::~TextureEncoder() +{ + if (m_encoding_render_pass != VK_NULL_HANDLE) + vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_encoding_render_pass, nullptr); + + if (m_encoding_texture_framebuffer != VK_NULL_HANDLE) + vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_encoding_texture_framebuffer, nullptr); + + for (VkShaderModule shader : m_texture_encoding_shaders) + { + if (shader != VK_NULL_HANDLE) + vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); + } +} + +bool TextureEncoder::Initialize() +{ + if (!CompileShaders()) + { + PanicAlert("Failed to compile shaders"); + return false; + } + + if (!CreateEncodingRenderPass()) + { + PanicAlert("Failed to create encode render pass"); + return false; + } + + if (!CreateEncodingTexture()) + { + PanicAlert("Failed to create encoding texture"); + return false; + } + + if (!CreateDownloadTexture()) + { + PanicAlert("Failed to create download texture"); + return false; + } + + return true; +} + +void TextureEncoder::EncodeTextureToRam(StateTracker* state_tracker, VkImageView src_texture, + u8* dest_ptr, u32 format, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + PEControl::PixelFormat src_format, bool is_intensity, + int scale_by_half, const EFBRectangle& src_rect) +{ + if (m_texture_encoding_shaders[format] == VK_NULL_HANDLE) + { + ERROR_LOG(VIDEO, "Missing encoding fragment shader for format %u", format); + return; + } + + // Can't do our own draw within a render pass. + state_tracker->EndRenderPass(); + + UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), + g_object_cache->GetPushConstantPipelineLayout(), m_encoding_render_pass, + g_object_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, + m_texture_encoding_shaders[format]); + + // Uniform - int4 of left,top,native_width,scale + s32 position_uniform[4] = {src_rect.left, src_rect.top, static_cast(native_width), + scale_by_half ? 2 : 1}; + draw.SetPushConstants(position_uniform, sizeof(position_uniform)); + + // Doesn't make sense to linear filter depth values + draw.SetPSSampler(0, src_texture, (scale_by_half && src_format != PEControl::Z24) ? + g_object_cache->GetLinearSampler() : + g_object_cache->GetPointSampler()); + + u32 render_width = bytes_per_row / sizeof(u32); + u32 render_height = num_blocks_y; + Util::SetViewportAndScissor(g_command_buffer_mgr->GetCurrentCommandBuffer(), 0, 0, render_width, + render_height); + + // TODO: We could use compute shaders here. + VkRect2D render_region = {{0, 0}, {render_width, render_height}}; + draw.BeginRenderPass(m_encoding_texture_framebuffer, render_region); + draw.DrawWithoutVertexBuffer(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 4); + draw.EndRenderPass(); + + // Transition the image before copying + m_encoding_texture->OverrideImageLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + m_download_texture->CopyFromImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), + m_encoding_texture->GetImage(), VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, + render_width, render_height, 0, 0); + + // Block until the GPU has finished copying to the staging texture. + g_command_buffer_mgr->ExecuteCommandBuffer(false, true); + state_tracker->InvalidateDescriptorSets(); + state_tracker->SetPendingRebind(); + + // Copy from staging texture to the final destination, adjusting pitch if necessary. + m_download_texture->ReadTexels(0, 0, render_width, render_height, dest_ptr, memory_stride); +} + +bool TextureEncoder::CompileShaders() +{ + // Texture encoding shaders + static const u32 texture_encoding_shader_formats[] = { + GX_TF_I4, GX_TF_I8, GX_TF_IA4, GX_TF_IA8, GX_TF_RGB565, GX_TF_RGB5A3, GX_TF_RGBA8, + GX_CTF_R4, GX_CTF_RA4, GX_CTF_RA8, GX_CTF_A8, GX_CTF_R8, GX_CTF_G8, GX_CTF_B8, + GX_CTF_RG8, GX_CTF_GB8, GX_CTF_Z8H, GX_TF_Z8, GX_CTF_Z16R, GX_TF_Z16, GX_TF_Z24X8, + GX_CTF_Z4, GX_CTF_Z8M, GX_CTF_Z8L, GX_CTF_Z16L}; + for (u32 format : texture_encoding_shader_formats) + { + const char* shader_source = + TextureConversionShader::GenerateEncodingShader(format, APIType::Vulkan); + m_texture_encoding_shaders[format] = Util::CompileAndCreateFragmentShader(shader_source); + if (m_texture_encoding_shaders[format] == VK_NULL_HANDLE) + return false; + } + + return true; +} + +bool TextureEncoder::CreateEncodingRenderPass() +{ + VkAttachmentDescription attachments[] = { + {0, ENCODING_TEXTURE_FORMAT, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL}}; + + VkAttachmentReference color_attachment_references[] = { + {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}}; + + VkSubpassDescription subpass_descriptions[] = {{0, VK_PIPELINE_BIND_POINT_GRAPHICS, 0, nullptr, 1, + color_attachment_references, nullptr, nullptr, 0, + nullptr}}; + + VkSubpassDependency dependancies[] = { + {0, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_TRANSFER_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT}}; + + VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + nullptr, + 0, + static_cast(ArraySize(attachments)), + attachments, + static_cast(ArraySize(subpass_descriptions)), + subpass_descriptions, + static_cast(ArraySize(dependancies)), + dependancies}; + + VkResult res = vkCreateRenderPass(g_vulkan_context->GetDevice(), &pass_info, nullptr, + &m_encoding_render_pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass (Encode) failed: "); + return false; + } + + return true; +} + +bool TextureEncoder::CreateEncodingTexture() +{ + // From OGL: Why do we create a 1024 height texture? + m_encoding_texture = Texture2D::Create( + ENCODING_TEXTURE_WIDTH, ENCODING_TEXTURE_HEIGHT, 1, 1, ENCODING_TEXTURE_FORMAT, + VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); + if (!m_encoding_texture) + return false; + + VkImageView framebuffer_attachments[] = {m_encoding_texture->GetView()}; + VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + nullptr, + 0, + m_encoding_render_pass, + static_cast(ArraySize(framebuffer_attachments)), + framebuffer_attachments, + m_encoding_texture->GetWidth(), + m_encoding_texture->GetHeight(), + m_encoding_texture->GetLayers()}; + + VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, + &m_encoding_texture_framebuffer); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + return false; + } + + return true; +} + +bool TextureEncoder::CreateDownloadTexture() +{ + m_download_texture = + StagingTexture2D::Create(STAGING_BUFFER_TYPE_READBACK, ENCODING_TEXTURE_WIDTH, + ENCODING_TEXTURE_HEIGHT, ENCODING_TEXTURE_FORMAT); + + if (!m_download_texture || !m_download_texture->Map()) + return false; + + return true; +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureEncoder.h b/Source/Core/VideoBackends/Vulkan/TextureEncoder.h new file mode 100644 index 0000000000..30c9413a14 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/TextureEncoder.h @@ -0,0 +1,57 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/StreamBuffer.h" + +#include "VideoCommon/VideoCommon.h" + +namespace Vulkan +{ +class StagingTexture2D; +class StateTracker; +class Texture2D; + +class TextureEncoder +{ +public: + TextureEncoder(); + ~TextureEncoder(); + + bool Initialize(); + + // Uses an encoding shader to copy src_texture to dest_ptr. + // Assumes that no render pass is currently in progress. + // WARNING: Executes the current command buffer. + void EncodeTextureToRam(StateTracker* state_tracker, VkImageView src_texture, u8* dest_ptr, + u32 format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, + u32 memory_stride, PEControl::PixelFormat src_format, bool is_intensity, + int scale_by_half, const EFBRectangle& source); + +private: + // From OGL. + static const u32 NUM_TEXTURE_ENCODING_SHADERS = 64; + static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4; + static const u32 ENCODING_TEXTURE_HEIGHT = 1024; + static const VkFormat ENCODING_TEXTURE_FORMAT = VK_FORMAT_B8G8R8A8_UNORM; + + bool CompileShaders(); + bool CreateEncodingRenderPass(); + bool CreateEncodingTexture(); + bool CreateDownloadTexture(); + + std::array m_texture_encoding_shaders = {}; + + VkRenderPass m_encoding_render_pass = VK_NULL_HANDLE; + + std::unique_ptr m_encoding_texture; + VkFramebuffer m_encoding_texture_framebuffer = VK_NULL_HANDLE; + + std::unique_ptr m_download_texture; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Util.cpp b/Source/Core/VideoBackends/Vulkan/Util.cpp new file mode 100644 index 0000000000..5048466aa2 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/Util.cpp @@ -0,0 +1,755 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/Assert.h" +#include "Common/CommonFuncs.h" +#include "Common/MathUtil.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/Renderer.h" +#include "VideoBackends/Vulkan/ShaderCompiler.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +namespace Vulkan +{ +namespace Util +{ +size_t AlignValue(size_t value, size_t alignment) +{ + // Have to use mod rather than masking bits in case alignment is not a power of two. + size_t offset = value % alignment; + if (offset != 0) + value += (alignment - offset); + return value; +} + +size_t AlignBufferOffset(size_t offset, size_t alignment) +{ + // Assume an offset of zero is already aligned to a value larger than alignment. + if (offset == 0) + return 0; + + return AlignValue(offset, alignment); +} + +u32 MakeRGBA8Color(float r, float g, float b, float a) +{ + return (static_cast(MathUtil::Clamp(static_cast(r * 255.0f), 0, 255)) << 0) | + (static_cast(MathUtil::Clamp(static_cast(g * 255.0f), 0, 255)) << 8) | + (static_cast(MathUtil::Clamp(static_cast(b * 255.0f), 0, 255)) << 16) | + (static_cast(MathUtil::Clamp(static_cast(a * 255.0f), 0, 255)) << 24); +} + +bool IsDepthFormat(VkFormat format) +{ + switch (format) + { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D16_UNORM_S8_UINT: + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return true; + default: + return false; + } +} + +VkFormat GetLinearFormat(VkFormat format) +{ + switch (format) + { + case VK_FORMAT_R8_SRGB: + return VK_FORMAT_R8_UNORM; + case VK_FORMAT_R8G8_SRGB: + return VK_FORMAT_R8G8_UNORM; + case VK_FORMAT_R8G8B8_SRGB: + return VK_FORMAT_R8G8B8_UNORM; + case VK_FORMAT_R8G8B8A8_SRGB: + return VK_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_B8G8R8_SRGB: + return VK_FORMAT_B8G8R8_UNORM; + case VK_FORMAT_B8G8R8A8_SRGB: + return VK_FORMAT_B8G8R8A8_UNORM; + default: + return format; + } +} + +u32 GetTexelSize(VkFormat format) +{ + // Only contains pixel formats we use. + switch (format) + { + case VK_FORMAT_R32_SFLOAT: + return 4; + + case VK_FORMAT_D32_SFLOAT: + return 4; + + case VK_FORMAT_R8G8B8A8_UNORM: + return 4; + + case VK_FORMAT_B8G8R8A8_UNORM: + return 4; + + default: + PanicAlert("Unhandled pixel format"); + return 1; + } +} + +VkBlendFactor GetAlphaBlendFactor(VkBlendFactor factor) +{ + switch (factor) + { + case VK_BLEND_FACTOR_SRC_COLOR: + return VK_BLEND_FACTOR_SRC_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + case VK_BLEND_FACTOR_DST_COLOR: + return VK_BLEND_FACTOR_DST_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; + default: + return factor; + } +} + +RasterizationState GetNoCullRasterizationState() +{ + RasterizationState state = {}; + state.cull_mode = VK_CULL_MODE_NONE; + state.samples = VK_SAMPLE_COUNT_1_BIT; + state.per_sample_shading = VK_FALSE; + state.depth_clamp = VK_FALSE; + return state; +} + +DepthStencilState GetNoDepthTestingDepthStencilState() +{ + DepthStencilState state = {}; + state.test_enable = VK_FALSE; + state.write_enable = VK_FALSE; + state.compare_op = VK_COMPARE_OP_ALWAYS; + return state; +} + +BlendState GetNoBlendingBlendState() +{ + BlendState state = {}; + state.blend_enable = VK_FALSE; + state.blend_op = VK_BLEND_OP_ADD; + state.src_blend = VK_BLEND_FACTOR_ONE; + state.dst_blend = VK_BLEND_FACTOR_ZERO; + state.alpha_blend_op = VK_BLEND_OP_ADD; + state.src_alpha_blend = VK_BLEND_FACTOR_ONE; + state.dst_alpha_blend = VK_BLEND_FACTOR_ZERO; + state.logic_op_enable = VK_FALSE; + state.logic_op = VK_LOGIC_OP_CLEAR; + state.write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + return state; +} + +void SetViewportAndScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, + float min_depth /*= 0.0f*/, float max_depth /*= 1.0f*/) +{ + VkViewport viewport = {static_cast(x), + static_cast(y), + static_cast(width), + static_cast(height), + min_depth, + max_depth}; + + VkRect2D scissor = {{x, y}, {static_cast(width), static_cast(height)}}; + + vkCmdSetViewport(command_buffer, 0, 1, &viewport); + vkCmdSetScissor(command_buffer, 0, 1, &scissor); +} + +void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask) +{ + VkBufferMemoryBarrier buffer_info = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + src_access_mask, // VkAccessFlags srcAccessMask + dst_access_mask, // VkAccessFlags dstAccessMask + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + buffer, // VkBuffer buffer + offset, // VkDeviceSize offset + size // VkDeviceSize size + }; + + vkCmdPipelineBarrier(command_buffer, src_stage_mask, dst_stage_mask, 0, 0, nullptr, 1, + &buffer_info, 0, nullptr); +} + +void ExecuteCurrentCommandsAndRestoreState(StateTracker* state_tracker, bool execute_off_thread, + bool wait_for_completion) +{ + state_tracker->EndRenderPass(); + g_command_buffer_mgr->ExecuteCommandBuffer(execute_off_thread, wait_for_completion); + state_tracker->InvalidateDescriptorSets(); + state_tracker->SetPendingRebind(); +} + +VkShaderModule CreateShaderModule(const u32* spv, size_t spv_word_count) +{ + VkShaderModuleCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + info.codeSize = spv_word_count * sizeof(u32); + info.pCode = spv; + + VkShaderModule module; + VkResult res = vkCreateShaderModule(g_vulkan_context->GetDevice(), &info, nullptr, &module); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateShaderModule failed: "); + return VK_NULL_HANDLE; + } + + return module; +} + +VkShaderModule CompileAndCreateVertexShader(const std::string& source_code, bool prepend_header) +{ + ShaderCompiler::SPIRVCodeVector code; + if (!ShaderCompiler::CompileVertexShader(&code, source_code.c_str(), source_code.length(), + prepend_header)) + { + return VK_NULL_HANDLE; + } + + return CreateShaderModule(code.data(), code.size()); +} + +VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code, bool prepend_header) +{ + ShaderCompiler::SPIRVCodeVector code; + if (!ShaderCompiler::CompileGeometryShader(&code, source_code.c_str(), source_code.length(), + prepend_header)) + { + return VK_NULL_HANDLE; + } + + return CreateShaderModule(code.data(), code.size()); +} + +VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code, bool prepend_header) +{ + ShaderCompiler::SPIRVCodeVector code; + if (!ShaderCompiler::CompileFragmentShader(&code, source_code.c_str(), source_code.length(), + prepend_header)) + { + return VK_NULL_HANDLE; + } + + return CreateShaderModule(code.data(), code.size()); +} + +} // namespace Util + +template <> +DeferredResourceDestruction +DeferredResourceDestruction::Wrapper(VkCommandPool object) +{ + DeferredResourceDestruction ret; + ret.object.command_pool = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyCommandPool(device, obj.command_pool, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction +DeferredResourceDestruction::Wrapper(VkDeviceMemory object) +{ + DeferredResourceDestruction ret; + ret.object.device_memory = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkFreeMemory(device, obj.device_memory, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction DeferredResourceDestruction::Wrapper(VkBuffer object) +{ + DeferredResourceDestruction ret; + ret.object.buffer = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyBuffer(device, obj.buffer, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction DeferredResourceDestruction::Wrapper(VkBufferView object) +{ + DeferredResourceDestruction ret; + ret.object.buffer_view = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyBufferView(device, obj.buffer_view, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction DeferredResourceDestruction::Wrapper(VkImage object) +{ + DeferredResourceDestruction ret; + ret.object.image = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyImage(device, obj.image, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction DeferredResourceDestruction::Wrapper(VkImageView object) +{ + DeferredResourceDestruction ret; + ret.object.image_view = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyImageView(device, obj.image_view, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction DeferredResourceDestruction::Wrapper(VkRenderPass object) +{ + DeferredResourceDestruction ret; + ret.object.render_pass = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyRenderPass(device, obj.render_pass, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction +DeferredResourceDestruction::Wrapper(VkFramebuffer object) +{ + DeferredResourceDestruction ret; + ret.object.framebuffer = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyFramebuffer(device, obj.framebuffer, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction +DeferredResourceDestruction::Wrapper(VkShaderModule object) +{ + DeferredResourceDestruction ret; + ret.object.shader_module = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyShaderModule(device, obj.shader_module, nullptr); + }; + return ret; +} + +template <> +DeferredResourceDestruction DeferredResourceDestruction::Wrapper(VkPipeline object) +{ + DeferredResourceDestruction ret; + ret.object.pipeline = object; + ret.destroy_callback = [](VkDevice device, const Object& obj) { + vkDestroyPipeline(device, obj.pipeline, nullptr); + }; + return ret; +} + +UtilityShaderDraw::UtilityShaderDraw(VkCommandBuffer command_buffer, + VkPipelineLayout pipeline_layout, VkRenderPass render_pass, + VkShaderModule vertex_shader, VkShaderModule geometry_shader, + VkShaderModule pixel_shader) + : m_command_buffer(command_buffer) +{ + // Populate minimal pipeline state + m_pipeline_info.vertex_format = g_object_cache->GetUtilityShaderVertexFormat(); + m_pipeline_info.pipeline_layout = pipeline_layout; + m_pipeline_info.render_pass = render_pass; + m_pipeline_info.vs = vertex_shader; + m_pipeline_info.gs = geometry_shader; + m_pipeline_info.ps = pixel_shader; + m_pipeline_info.rasterization_state.bits = Util::GetNoCullRasterizationState().bits; + m_pipeline_info.depth_stencil_state.bits = Util::GetNoDepthTestingDepthStencilState().bits; + m_pipeline_info.blend_state.bits = Util::GetNoBlendingBlendState().bits; + m_pipeline_info.primitive_topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; +} + +UtilityShaderVertex* UtilityShaderDraw::ReserveVertices(VkPrimitiveTopology topology, size_t count) +{ + m_pipeline_info.primitive_topology = topology; + + if (!g_object_cache->GetUtilityShaderVertexBuffer()->ReserveMemory( + sizeof(UtilityShaderVertex) * count, sizeof(UtilityShaderVertex), true, true, true)) + PanicAlert("Failed to allocate space for vertices in backend shader"); + + m_vertex_buffer = g_object_cache->GetUtilityShaderVertexBuffer()->GetBuffer(); + m_vertex_buffer_offset = g_object_cache->GetUtilityShaderVertexBuffer()->GetCurrentOffset(); + + return reinterpret_cast( + g_object_cache->GetUtilityShaderVertexBuffer()->GetCurrentHostPointer()); +} + +void UtilityShaderDraw::CommitVertices(size_t count) +{ + g_object_cache->GetUtilityShaderVertexBuffer()->CommitMemory(sizeof(UtilityShaderVertex) * count); + m_vertex_count = static_cast(count); +} + +void UtilityShaderDraw::UploadVertices(VkPrimitiveTopology topology, UtilityShaderVertex* vertices, + size_t count) +{ + UtilityShaderVertex* upload_vertices = ReserveVertices(topology, count); + memcpy(upload_vertices, vertices, sizeof(UtilityShaderVertex) * count); + CommitVertices(count); +} + +u8* UtilityShaderDraw::AllocateVSUniforms(size_t size) +{ + if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( + size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) + PanicAlert("Failed to allocate util uniforms"); + + return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); +} + +void UtilityShaderDraw::CommitVSUniforms(size_t size) +{ + m_vs_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); + m_vs_uniform_buffer.offset = 0; + m_vs_uniform_buffer.range = size; + m_ubo_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = + static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); + + g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); +} + +u8* UtilityShaderDraw::AllocatePSUniforms(size_t size) +{ + if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( + size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) + PanicAlert("Failed to allocate util uniforms"); + + return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); +} + +void UtilityShaderDraw::CommitPSUniforms(size_t size) +{ + m_ps_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); + m_ps_uniform_buffer.offset = 0; + m_ps_uniform_buffer.range = size; + m_ubo_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = + static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); + + g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); +} + +void UtilityShaderDraw::SetPushConstants(const void* data, size_t data_size) +{ + _assert_(static_cast(data_size) < PUSH_CONSTANT_BUFFER_SIZE); + + vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, + static_cast(data_size), data); +} + +void UtilityShaderDraw::SetPSSampler(size_t index, VkImageView view, VkSampler sampler) +{ + m_ps_samplers[index].sampler = sampler; + m_ps_samplers[index].imageView = view; + m_ps_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; +} + +void UtilityShaderDraw::SetRasterizationState(const RasterizationState& state) +{ + m_pipeline_info.rasterization_state.bits = state.bits; +} + +void UtilityShaderDraw::SetDepthStencilState(const DepthStencilState& state) +{ + m_pipeline_info.depth_stencil_state.bits = state.bits; +} + +void UtilityShaderDraw::SetBlendState(const BlendState& state) +{ + m_pipeline_info.blend_state.bits = state.bits; +} + +void UtilityShaderDraw::BeginRenderPass(VkFramebuffer framebuffer, const VkRect2D& region, + const VkClearValue* clear_value) +{ + VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + m_pipeline_info.render_pass, + framebuffer, + region, + clear_value ? 1u : 0u, + clear_value}; + + vkCmdBeginRenderPass(m_command_buffer, &begin_info, VK_SUBPASS_CONTENTS_INLINE); +} + +void UtilityShaderDraw::EndRenderPass() +{ + vkCmdEndRenderPass(m_command_buffer); +} + +void UtilityShaderDraw::Draw() +{ + BindVertexBuffer(); + BindDescriptors(); + if (!BindPipeline()) + return; + + vkCmdDraw(m_command_buffer, m_vertex_count, 1, 0, 0); +} + +void UtilityShaderDraw::DrawQuad(int x, int y, int width, int height, float z) +{ + UtilityShaderVertex vertices[4]; + vertices[0].SetPosition(-1.0f, 1.0f, z); + vertices[0].SetTextureCoordinates(0.0f, 1.0f); + vertices[0].SetColor(1.0f, 1.0f, 1.0f, 1.0f); + vertices[1].SetPosition(1.0f, 1.0f, z); + vertices[1].SetTextureCoordinates(1.0f, 1.0f); + vertices[1].SetColor(1.0f, 1.0f, 1.0f, 1.0f); + vertices[2].SetPosition(-1.0f, -1.0f, z); + vertices[2].SetTextureCoordinates(0.0f, 0.0f); + vertices[2].SetColor(1.0f, 1.0f, 1.0f, 1.0f); + vertices[3].SetPosition(1.0f, -1.0f, z); + vertices[3].SetTextureCoordinates(1.0f, 0.0f); + vertices[3].SetColor(1.0f, 1.0f, 1.0f, 1.0f); + + Util::SetViewportAndScissor(m_command_buffer, x, y, width, height); + UploadVertices(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, vertices, ArraySize(vertices)); + Draw(); +} + +void UtilityShaderDraw::DrawQuad(int dst_x, int dst_y, int dst_width, int dst_height, int src_x, + int src_y, int src_layer, int src_width, int src_height, + int src_full_width, int src_full_height, float z) +{ + float u0 = float(src_x) / float(src_full_width); + float v0 = float(src_y) / float(src_full_height); + float u1 = float(src_x + src_width) / float(src_full_width); + float v1 = float(src_y + src_height) / float(src_full_height); + float w = static_cast(src_layer); + + UtilityShaderVertex vertices[4]; + vertices[0].SetPosition(-1.0f, 1.0f, z); + vertices[0].SetTextureCoordinates(u0, v1, w); + vertices[0].SetColor(1.0f, 1.0f, 1.0f, 1.0f); + vertices[1].SetPosition(1.0f, 1.0f, z); + vertices[1].SetTextureCoordinates(u1, v1, w); + vertices[1].SetColor(1.0f, 1.0f, 1.0f, 1.0f); + vertices[2].SetPosition(-1.0f, -1.0f, z); + vertices[2].SetTextureCoordinates(u0, v0, w); + vertices[2].SetColor(1.0f, 1.0f, 1.0f, 1.0f); + vertices[3].SetPosition(1.0f, -1.0f, z); + vertices[3].SetTextureCoordinates(u1, v0, w); + vertices[3].SetColor(1.0f, 1.0f, 1.0f, 1.0f); + + Util::SetViewportAndScissor(m_command_buffer, dst_x, dst_y, dst_width, dst_height); + UploadVertices(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, vertices, ArraySize(vertices)); + Draw(); +} + +void UtilityShaderDraw::DrawColoredQuad(int x, int y, int width, int height, float r, float g, + float b, float a, float z) +{ + return DrawColoredQuad(x, y, width, height, Util::MakeRGBA8Color(r, g, b, a), z); +} + +void UtilityShaderDraw::DrawColoredQuad(int x, int y, int width, int height, u32 color, float z) +{ + UtilityShaderVertex vertices[4]; + vertices[0].SetPosition(-1.0f, 1.0f, z); + vertices[0].SetTextureCoordinates(0.0f, 1.0f); + vertices[0].SetColor(color); + vertices[1].SetPosition(1.0f, 1.0f, z); + vertices[1].SetTextureCoordinates(1.0f, 1.0f); + vertices[1].SetColor(color); + vertices[2].SetPosition(-1.0f, -1.0f, z); + vertices[2].SetTextureCoordinates(0.0f, 0.0f); + vertices[2].SetColor(color); + vertices[3].SetPosition(1.0f, -1.0f, z); + vertices[3].SetTextureCoordinates(1.0f, 0.0f); + vertices[3].SetColor(color); + + Util::SetViewportAndScissor(m_command_buffer, x, y, width, height); + UploadVertices(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, vertices, ArraySize(vertices)); + Draw(); +} + +void UtilityShaderDraw::SetViewportAndScissor(int x, int y, int width, int height) +{ + Util::SetViewportAndScissor(m_command_buffer, x, y, width, height, 0.0f, 1.0f); +} + +void UtilityShaderDraw::DrawWithoutVertexBuffer(VkPrimitiveTopology primitive_topology, + u32 vertex_count) +{ + m_pipeline_info.vertex_format = nullptr; + m_pipeline_info.primitive_topology = primitive_topology; + + BindDescriptors(); + if (!BindPipeline()) + return; + + vkCmdDraw(m_command_buffer, vertex_count, 1, 0, 0); +} + +void UtilityShaderDraw::BindVertexBuffer() +{ + vkCmdBindVertexBuffers(m_command_buffer, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset); +} + +void UtilityShaderDraw::BindDescriptors() +{ + // TODO: This method is a mess, clean it up + std::array bind_descriptor_sets = {}; + std::array + set_writes = {}; + uint32_t num_set_writes = 0; + + VkDescriptorBufferInfo dummy_uniform_buffer = { + g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(), 0, 1}; + + // uniform buffers + if (m_vs_uniform_buffer.buffer != VK_NULL_HANDLE || m_ps_uniform_buffer.buffer != VK_NULL_HANDLE) + { + VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_UNIFORM_BUFFERS)); + if (set == VK_NULL_HANDLE) + PanicAlert("Failed to allocate descriptor set for utility draw"); + + set_writes[num_set_writes++] = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, UBO_DESCRIPTOR_SET_BINDING_VS, 0, 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, nullptr, + (m_vs_uniform_buffer.buffer != VK_NULL_HANDLE) ? &m_vs_uniform_buffer : + &dummy_uniform_buffer, + nullptr}; + + set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + UBO_DESCRIPTOR_SET_BINDING_GS, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + nullptr, + &dummy_uniform_buffer, + nullptr}; + + set_writes[num_set_writes++] = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, UBO_DESCRIPTOR_SET_BINDING_PS, 0, 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, nullptr, + (m_ps_uniform_buffer.buffer != VK_NULL_HANDLE) ? &m_ps_uniform_buffer : + &dummy_uniform_buffer, + nullptr}; + + bind_descriptor_sets[DESCRIPTOR_SET_UNIFORM_BUFFERS] = set; + } + + // PS samplers + size_t first_active_sampler; + for (first_active_sampler = 0; first_active_sampler < NUM_PIXEL_SHADER_SAMPLERS; + first_active_sampler++) + { + if (m_ps_samplers[first_active_sampler].imageView != VK_NULL_HANDLE && + m_ps_samplers[first_active_sampler].sampler != VK_NULL_HANDLE) + { + break; + } + } + + // Check if we have any at all, skip the binding process entirely if we don't + if (first_active_sampler != NUM_PIXEL_SHADER_SAMPLERS) + { + // Allocate a new descriptor set + VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS)); + if (set == VK_NULL_HANDLE) + PanicAlert("Failed to allocate descriptor set for utility draw"); + + for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) + { + const VkDescriptorImageInfo& info = m_ps_samplers[i]; + if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE) + { + set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + set, + static_cast(i), + 0, + 1, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + &info, + nullptr, + nullptr}; + } + } + + bind_descriptor_sets[DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS] = set; + } + + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0, + nullptr); + + // Bind only the sets we updated + if (bind_descriptor_sets[0] != VK_NULL_HANDLE && bind_descriptor_sets[1] == VK_NULL_HANDLE) + { + // UBO only + vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_pipeline_info.pipeline_layout, DESCRIPTOR_SET_UNIFORM_BUFFERS, 1, + &bind_descriptor_sets[0], NUM_UBO_DESCRIPTOR_SET_BINDINGS, + m_ubo_offsets.data()); + } + else if (bind_descriptor_sets[0] == VK_NULL_HANDLE && bind_descriptor_sets[1] != VK_NULL_HANDLE) + { + // Samplers only + vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_pipeline_info.pipeline_layout, DESCRIPTOR_SET_PIXEL_SHADER_SAMPLERS, + 1, &bind_descriptor_sets[1], 0, nullptr); + } + else if (bind_descriptor_sets[0] != VK_NULL_HANDLE && bind_descriptor_sets[1] != VK_NULL_HANDLE) + { + // Both + vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_pipeline_info.pipeline_layout, DESCRIPTOR_SET_UNIFORM_BUFFERS, 2, + bind_descriptor_sets.data(), NUM_UBO_DESCRIPTOR_SET_BINDINGS, + m_ubo_offsets.data()); + } +} + +bool UtilityShaderDraw::BindPipeline() +{ + VkPipeline pipeline = g_object_cache->GetPipeline(m_pipeline_info); + if (pipeline == VK_NULL_HANDLE) + { + PanicAlert("Failed to get pipeline for backend shader draw"); + return false; + } + + vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + return true; +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Util.h b/Source/Core/VideoBackends/Vulkan/Util.h new file mode 100644 index 0000000000..5f156483be --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/Util.h @@ -0,0 +1,206 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoBackends/Vulkan/ObjectCache.h" + +namespace Vulkan +{ +class CommandBufferManager; +class ObjectCache; +class StateTracker; + +namespace Util +{ +size_t AlignValue(size_t value, size_t alignment); +size_t AlignBufferOffset(size_t offset, size_t alignment); + +u32 MakeRGBA8Color(float r, float g, float b, float a); + +bool IsDepthFormat(VkFormat format); +VkFormat GetLinearFormat(VkFormat format); +u32 GetTexelSize(VkFormat format); + +// Map {SRC,DST}_COLOR to {SRC,DST}_ALPHA +VkBlendFactor GetAlphaBlendFactor(VkBlendFactor factor); + +RasterizationState GetNoCullRasterizationState(); +DepthStencilState GetNoDepthTestingDepthStencilState(); +BlendState GetNoBlendingBlendState(); + +// Combines viewport and scissor updates +void SetViewportAndScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, + float min_depth = 0.0f, float max_depth = 1.0f); + +// Wrapper for creating an barrier on a buffer +void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask); + +// Completes the current render pass, executes the command buffer, and restores state ready for next +// render. Use when you want to kick the current buffer to make room for new data. +void ExecuteCurrentCommandsAndRestoreState(StateTracker* state_tracker, bool execute_off_thread, + bool wait_for_completion = false); + +// Create a shader module from the specified SPIR-V. +VkShaderModule CreateShaderModule(const u32* spv, size_t spv_word_count); + +// Compile a vertex shader and create a shader module, discarding the intermediate SPIR-V. +VkShaderModule CompileAndCreateVertexShader(const std::string& source_code, + bool prepend_header = true); + +// Compile a geometry shader and create a shader module, discarding the intermediate SPIR-V. +VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code, + bool prepend_header = true); + +// Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V. +VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code, + bool prepend_header = true); +} + +// Helper methods for cleaning up device objects, used by deferred destruction +struct DeferredResourceDestruction +{ + union Object { + VkCommandPool command_pool; + VkDeviceMemory device_memory; + VkBuffer buffer; + VkBufferView buffer_view; + VkImage image; + VkImageView image_view; + VkRenderPass render_pass; + VkFramebuffer framebuffer; + VkShaderModule shader_module; + VkPipeline pipeline; + } object; + + void (*destroy_callback)(VkDevice device, const Object& object); + + template + static DeferredResourceDestruction Wrapper(T object); +}; + +// Utility shader vertex format +#pragma pack(push, 1) +struct UtilityShaderVertex +{ + float Position[4]; + float TexCoord[4]; + u32 Color; + + void SetPosition(float x, float y) + { + Position[0] = x; + Position[1] = y; + Position[2] = 0.0f; + Position[3] = 1.0f; + } + void SetPosition(float x, float y, float z) + { + Position[0] = x; + Position[1] = y; + Position[2] = z; + Position[3] = 1.0f; + } + void SetTextureCoordinates(float u, float v) + { + TexCoord[0] = u; + TexCoord[1] = v; + TexCoord[2] = 0.0f; + TexCoord[3] = 0.0f; + } + void SetTextureCoordinates(float u, float v, float w) + { + TexCoord[0] = u; + TexCoord[1] = v; + TexCoord[2] = w; + TexCoord[3] = 0.0f; + } + void SetTextureCoordinates(float u, float v, float w, float x) + { + TexCoord[0] = u; + TexCoord[1] = v; + TexCoord[2] = w; + TexCoord[3] = x; + } + void SetColor(u32 color) { Color = color; } + void SetColor(float r, float g, float b) { Color = Util::MakeRGBA8Color(r, g, b, 1.0f); } + void SetColor(float r, float g, float b, float a) { Color = Util::MakeRGBA8Color(r, g, b, a); } +}; +#pragma pack(pop) + +class UtilityShaderDraw +{ +public: + UtilityShaderDraw(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout, + VkRenderPass render_pass, VkShaderModule vertex_shader, + VkShaderModule geometry_shader, VkShaderModule pixel_shader); + + UtilityShaderVertex* ReserveVertices(VkPrimitiveTopology topology, size_t count); + void CommitVertices(size_t count); + + void UploadVertices(VkPrimitiveTopology topology, UtilityShaderVertex* vertices, size_t count); + + u8* AllocateVSUniforms(size_t size); + void CommitVSUniforms(size_t size); + + u8* AllocatePSUniforms(size_t size); + void CommitPSUniforms(size_t size); + + void SetPushConstants(const void* data, size_t data_size); + + void SetPSSampler(size_t index, VkImageView view, VkSampler sampler); + + void SetRasterizationState(const RasterizationState& state); + void SetDepthStencilState(const DepthStencilState& state); + void SetBlendState(const BlendState& state); + + void BeginRenderPass(VkFramebuffer framebuffer, const VkRect2D& region, + const VkClearValue* clear_value = nullptr); + void EndRenderPass(); + + void Draw(); + + // NOTE: These methods alter the viewport state of the command buffer. + + // Sets texture coordinates to 0..1 + void DrawQuad(int x, int y, int width, int height, float z = 0.0f); + + // Sets texture coordinates to the specified range + void DrawQuad(int dst_x, int dst_y, int dst_width, int dst_height, int src_x, int src_y, + int src_layer, int src_width, int src_height, int src_full_width, + int src_full_height, float z = 0.0f); + + void DrawColoredQuad(int x, int y, int width, int height, u32 color, float z = 0.0f); + + void DrawColoredQuad(int x, int y, int width, int height, float r, float g, float b, float a, + float z = 0.0f); + + // Draw without a vertex buffer. Assumes viewport has been initialized separately. + void SetViewportAndScissor(int x, int y, int width, int height); + void DrawWithoutVertexBuffer(VkPrimitiveTopology primitive_topology, u32 vertex_count); + +private: + void BindVertexBuffer(); + void BindDescriptors(); + bool BindPipeline(); + + VkCommandBuffer m_command_buffer = VK_NULL_HANDLE; + VkBuffer m_vertex_buffer = VK_NULL_HANDLE; + VkDeviceSize m_vertex_buffer_offset = 0; + uint32_t m_vertex_count = 0; + + VkDescriptorBufferInfo m_vs_uniform_buffer = {}; + VkDescriptorBufferInfo m_ps_uniform_buffer = {}; + std::array m_ubo_offsets = {}; + + std::array m_ps_samplers = {}; + + PipelineInfo m_pipeline_info = {}; +}; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp b/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp new file mode 100644 index 0000000000..27c1d06199 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp @@ -0,0 +1,131 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/Assert.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/VertexFormat.h" + +#include "VideoCommon/CPMemory.h" +#include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderGen.h" + +namespace Vulkan +{ +static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer) +{ + static const VkFormat float_type_lookup[][4] = { + {VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM, + VK_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE + {VK_FORMAT_R8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_R8G8B8_SNORM, + VK_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE + {VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM, + VK_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT + {VK_FORMAT_R16_SNORM, VK_FORMAT_R16G16_SNORM, VK_FORMAT_R16G16B16_SNORM, + VK_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT + {VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT + }; + + static const VkFormat integer_type_lookup[][4] = { + {VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT, + VK_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE + {VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT, + VK_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE + {VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT, + VK_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT + {VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT, + VK_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT + {VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT} // VAR_FLOAT + }; + + _assert_(components > 0 && components <= 4); + return integer ? integer_type_lookup[t][components - 1] : float_type_lookup[t][components - 1]; +} + +VertexFormat::VertexFormat(const PortableVertexDeclaration& in_vtx_decl) +{ + vtx_decl = in_vtx_decl; + MapAttributes(); + SetupInputState(); +} + +void VertexFormat::MapAttributes() +{ + m_num_attributes = 0; + + if (vtx_decl.position.enable) + AddAttribute(SHADER_POSITION_ATTRIB, 0, + VarToVkFormat(vtx_decl.position.type, vtx_decl.position.components, + vtx_decl.position.integer), + vtx_decl.position.offset); + + for (uint32_t i = 0; i < 3; i++) + { + if (vtx_decl.normals[i].enable) + AddAttribute(SHADER_NORM0_ATTRIB + i, 0, + VarToVkFormat(vtx_decl.normals[i].type, vtx_decl.normals[i].components, + vtx_decl.normals[i].integer), + vtx_decl.normals[i].offset); + } + + for (uint32_t i = 0; i < 2; i++) + { + if (vtx_decl.colors[i].enable) + AddAttribute(SHADER_COLOR0_ATTRIB + i, 0, + VarToVkFormat(vtx_decl.colors[i].type, vtx_decl.colors[i].components, + vtx_decl.colors[i].integer), + vtx_decl.colors[i].offset); + } + + for (uint32_t i = 0; i < 8; i++) + { + if (vtx_decl.texcoords[i].enable) + AddAttribute(SHADER_TEXTURE0_ATTRIB + i, 0, + VarToVkFormat(vtx_decl.texcoords[i].type, vtx_decl.texcoords[i].components, + vtx_decl.texcoords[i].integer), + vtx_decl.texcoords[i].offset); + } + + if (vtx_decl.posmtx.enable) + AddAttribute( + SHADER_POSMTX_ATTRIB, 0, + VarToVkFormat(vtx_decl.posmtx.type, vtx_decl.posmtx.components, vtx_decl.posmtx.integer), + vtx_decl.posmtx.offset); +} + +void VertexFormat::SetupInputState() +{ + m_binding_description.binding = 0; + m_binding_description.stride = vtx_decl.stride; + m_binding_description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + + m_input_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + m_input_state_info.pNext = nullptr; + m_input_state_info.flags = 0; + m_input_state_info.vertexBindingDescriptionCount = 1; + m_input_state_info.pVertexBindingDescriptions = &m_binding_description; + m_input_state_info.vertexAttributeDescriptionCount = m_num_attributes; + m_input_state_info.pVertexAttributeDescriptions = m_attribute_descriptions.data(); +} + +void VertexFormat::AddAttribute(uint32_t location, uint32_t binding, VkFormat format, + uint32_t offset) +{ + _assert_(m_num_attributes < MAX_VERTEX_ATTRIBUTES); + + m_attribute_descriptions[m_num_attributes].location = location; + m_attribute_descriptions[m_num_attributes].binding = binding; + m_attribute_descriptions[m_num_attributes].format = format; + m_attribute_descriptions[m_num_attributes].offset = offset; + m_num_attributes++; +} + +void VertexFormat::SetupVertexPointers() +{ +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VertexFormat.h b/Source/Core/VideoBackends/Vulkan/VertexFormat.h new file mode 100644 index 0000000000..3614366e2f --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VertexFormat.h @@ -0,0 +1,44 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoCommon/NativeVertexFormat.h" + +namespace Vulkan +{ +class VertexFormat : public ::NativeVertexFormat +{ +public: + VertexFormat(const PortableVertexDeclaration& in_vtx_decl); + + // Passed to pipeline state creation + const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const + { + return m_input_state_info; + } + + // Converting PortableVertexDeclaration -> Vulkan types + void MapAttributes(); + void SetupInputState(); + + // Not used in the Vulkan backend. + void SetupVertexPointers() override; + +private: + void AddAttribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset); + + VkVertexInputBindingDescription m_binding_description = {}; + + std::array m_attribute_descriptions = + {}; + + VkPipelineVertexInputStateCreateInfo m_input_state_info = {}; + + uint32_t m_num_attributes = 0; +}; +} diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp new file mode 100644 index 0000000000..e95c1d6ade --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp @@ -0,0 +1,218 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoBackends/Vulkan/VertexManager.h" +#include "VideoBackends/Vulkan/BoundingBox.h" +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/Renderer.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" +#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VertexFormat.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +#include "VideoCommon/BoundingBox.h" +#include "VideoCommon/IndexGenerator.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VideoConfig.h" + +namespace Vulkan +{ +// TODO: Clean up this mess +constexpr size_t INITIAL_VERTEX_BUFFER_SIZE = VertexManager::MAXVBUFFERSIZE * 2; +constexpr size_t MAX_VERTEX_BUFFER_SIZE = VertexManager::MAXVBUFFERSIZE * 16; +constexpr size_t INITIAL_INDEX_BUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 2; +constexpr size_t MAX_INDEX_BUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 16; + +VertexManager::VertexManager() + : m_cpu_vertex_buffer(MAXVBUFFERSIZE), m_cpu_index_buffer(MAXIBUFFERSIZE) +{ +} + +VertexManager::~VertexManager() +{ +} + +bool VertexManager::Initialize(StateTracker* state_tracker) +{ + m_state_tracker = state_tracker; + + m_vertex_stream_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + INITIAL_VERTEX_BUFFER_SIZE, MAX_VERTEX_BUFFER_SIZE); + + m_index_stream_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + INITIAL_INDEX_BUFFER_SIZE, MAX_INDEX_BUFFER_SIZE); + + if (!m_vertex_stream_buffer || !m_index_stream_buffer) + { + PanicAlert("Failed to allocate streaming buffers"); + return false; + } + + return true; +} + +NativeVertexFormat* +VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + return new VertexFormat(vtx_decl); +} + +void VertexManager::PrepareDrawBuffers(u32 stride) +{ + size_t vertex_data_size = IndexGenerator::GetNumVerts() * stride; + size_t index_data_size = IndexGenerator::GetIndexLen() * sizeof(u16); + + m_vertex_stream_buffer->CommitMemory(vertex_data_size); + m_index_stream_buffer->CommitMemory(index_data_size); + + ADDSTAT(stats.thisFrame.bytesVertexStreamed, static_cast(vertex_data_size)); + ADDSTAT(stats.thisFrame.bytesIndexStreamed, static_cast(index_data_size)); + + m_state_tracker->SetVertexBuffer(m_vertex_stream_buffer->GetBuffer(), 0); + m_state_tracker->SetIndexBuffer(m_index_stream_buffer->GetBuffer(), 0, VK_INDEX_TYPE_UINT16); +} + +void VertexManager::ResetBuffer(u32 stride) +{ + if (m_cull_all) + { + // Not drawing on the gpu, so store in a heap buffer instead + m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data(); + m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); + IndexGenerator::Start(m_cpu_index_buffer.data()); + return; + } + + // Attempt to allocate from buffers + bool has_vbuffer_allocation = m_vertex_stream_buffer->ReserveMemory(MAXVBUFFERSIZE, stride); + bool has_ibuffer_allocation = m_index_stream_buffer->ReserveMemory(MAXIBUFFERSIZE, sizeof(u16)); + if (!has_vbuffer_allocation || !has_ibuffer_allocation) + { + // Flush any pending commands first, so that we can wait on the fences + WARN_LOG(VIDEO, "Executing command list while waiting for space in vertex/index buffer"); + Util::ExecuteCurrentCommandsAndRestoreState(m_state_tracker, false); + + // Attempt to allocate again, this may cause a fence wait + if (!has_vbuffer_allocation) + has_vbuffer_allocation = m_vertex_stream_buffer->ReserveMemory(MAXVBUFFERSIZE, stride); + if (!has_ibuffer_allocation) + has_ibuffer_allocation = m_index_stream_buffer->ReserveMemory(MAXIBUFFERSIZE, sizeof(u16)); + + // If we still failed, that means the allocation was too large and will never succeed, so panic + if (!has_vbuffer_allocation || !has_ibuffer_allocation) + PanicAlert("Failed to allocate space in streaming buffers for pending draw"); + } + + // Update pointers + m_base_buffer_pointer = m_vertex_stream_buffer->GetHostPointer(); + m_end_buffer_pointer = m_vertex_stream_buffer->GetCurrentHostPointer() + MAXVBUFFERSIZE; + m_cur_buffer_pointer = m_vertex_stream_buffer->GetCurrentHostPointer(); + IndexGenerator::Start(reinterpret_cast(m_index_stream_buffer->GetCurrentHostPointer())); + + // Update base indices + m_current_draw_base_vertex = + static_cast(m_vertex_stream_buffer->GetCurrentOffset() / stride); + m_current_draw_base_index = + static_cast(m_index_stream_buffer->GetCurrentOffset() / sizeof(u16)); +} + +void VertexManager::vFlush(bool use_dst_alpha) +{ + const VertexFormat* vertex_format = + static_cast(VertexLoaderManager::GetCurrentVertexFormat()); + u32 vertex_stride = vertex_format->GetVertexStride(); + + // Commit memory to device + PrepareDrawBuffers(vertex_stride); + + // Figure out the number of indices to draw + u32 index_count = IndexGenerator::GetIndexLen(); + + // Update assembly state + m_state_tracker->SetVertexFormat(vertex_format); + switch (m_current_primitive_type) + { + case PRIMITIVE_POINTS: + m_state_tracker->SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST); + m_state_tracker->DisableBackFaceCulling(); + break; + + case PRIMITIVE_LINES: + m_state_tracker->SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_LINE_LIST); + m_state_tracker->DisableBackFaceCulling(); + break; + + case PRIMITIVE_TRIANGLES: + m_state_tracker->SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP); + g_renderer->SetGenerationMode(); + break; + } + + // Can we do single-pass dst alpha? + DSTALPHA_MODE dstalpha_mode = DSTALPHA_NONE; + if (use_dst_alpha && g_vulkan_context->SupportsDualSourceBlend()) + dstalpha_mode = DSTALPHA_DUAL_SOURCE_BLEND; + + // Check for any shader stage changes + m_state_tracker->CheckForShaderChanges(m_current_primitive_type, dstalpha_mode); + + // Update any changed constants + m_state_tracker->UpdateVertexShaderConstants(); + m_state_tracker->UpdateGeometryShaderConstants(); + m_state_tracker->UpdatePixelShaderConstants(); + + // Flush all EFB pokes and invalidate the peek cache. + // TODO: Cleaner way without the cast. + FramebufferManager* framebuffer_mgr = + static_cast(g_framebuffer_manager.get()); + framebuffer_mgr->InvalidatePeekCache(); + framebuffer_mgr->FlushEFBPokes(m_state_tracker); + + // If bounding box is enabled, we need to flush any changes first, then invalidate what we have. + if (g_vulkan_context->SupportsBoundingBox()) + { + BoundingBox* bounding_box = static_cast(g_renderer.get())->GetBoundingBox(); + bool bounding_box_enabled = (::BoundingBox::active && g_ActiveConfig.bBBoxEnable); + if (bounding_box_enabled) + { + bounding_box->Flush(m_state_tracker); + bounding_box->Invalidate(m_state_tracker); + } + + // Update which descriptor set/pipeline layout to use. + m_state_tracker->SetBBoxEnable(bounding_box_enabled); + } + + // Bind all pending state to the command buffer + if (!m_state_tracker->Bind()) + { + WARN_LOG(VIDEO, "Skipped draw of %u indices", index_count); + return; + } + + // Execute the draw + vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), index_count, 1, + m_current_draw_base_index, m_current_draw_base_vertex, 0); + + // If we can't do single pass dst alpha, we now need to draw the alpha pass. + if (use_dst_alpha && !g_vulkan_context->SupportsDualSourceBlend()) + { + m_state_tracker->CheckForShaderChanges(m_current_primitive_type, DSTALPHA_ALPHA_PASS); + if (!m_state_tracker->Bind()) + { + WARN_LOG(VIDEO, "Skipped draw of %u indices (alpha pass)", index_count); + return; + } + + vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), index_count, 1, + m_current_draw_base_index, m_current_draw_base_vertex, 0); + } + + m_state_tracker->OnDraw(); +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.h b/Source/Core/VideoBackends/Vulkan/VertexManager.h new file mode 100644 index 0000000000..9729709658 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.h @@ -0,0 +1,44 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "VideoCommon/VertexManagerBase.h" + +namespace Vulkan +{ +class StateTracker; +class StreamBuffer; + +class VertexManager : public VertexManagerBase +{ +public: + VertexManager(); + ~VertexManager(); + + bool Initialize(StateTracker* state_tracker); + + NativeVertexFormat* CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + +protected: + void PrepareDrawBuffers(u32 stride); + void ResetBuffer(u32 stride) override; + +private: + void vFlush(bool use_dst_alpha) override; + + StateTracker* m_state_tracker = nullptr; + + std::vector m_cpu_vertex_buffer; + std::vector m_cpu_index_buffer; + + std::unique_ptr m_vertex_stream_buffer; + std::unique_ptr m_index_stream_buffer; + + u32 m_current_draw_base_vertex = 0; + u32 m_current_draw_base_index = 0; +}; +} diff --git a/Source/Core/VideoBackends/Vulkan/VideoBackend.h b/Source/Core/VideoBackends/Vulkan/VideoBackend.h new file mode 100644 index 0000000000..8e29326389 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VideoBackend.h @@ -0,0 +1,25 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "VideoCommon/VideoBackendBase.h" + +namespace Vulkan +{ +class VideoBackend : public VideoBackendBase +{ + bool Initialize(void* window_handle) override; + void Shutdown() override; + + std::string GetName() const override { return "Vulkan"; } + std::string GetDisplayName() const override { return "Vulkan (experimental)"; } + void Video_Prepare() override; + void Video_Cleanup() override; + + void InitBackendInfo() override; + + unsigned int PeekMessages() override { return 0; } +}; +} diff --git a/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj b/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj new file mode 100644 index 0000000000..64ac839164 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj @@ -0,0 +1,119 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {29F29A19-F141-45AD-9679-5A2923B49DA3} + + + + StaticLibrary + v140 + Unicode + + + true + + + false + + + + + + + + + + + + + $(ExternalsDir)Vulkan\include;$(ExternalsDir)glslang\glslang\Public;$(ExternalsDir)glslang\SPIRV;$(ExternalsDir)zlib;%(AdditionalIncludeDirectories) + + + + + + + $(ExternalsDir)Vulkan\include;$(ExternalsDir)glslang\glslang\Public;$(ExternalsDir)glslang\SPIRV;$(ExternalsDir)zlib;%(AdditionalIncludeDirectories) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {3de9ee35-3e91-4f27-a014-2866ad8c3fe3} + + + {d178061b-84d3-44f9-beed-efd18d9033f0} + + + + + + + + + diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp new file mode 100644 index 0000000000..ef2e6e1fb3 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -0,0 +1,719 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Common/Assert.h" +#include "Common/CommonFuncs.h" +#include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" +#include "Common/StringUtil.h" + +#include "VideoBackends/Vulkan/VulkanContext.h" +#include "VideoCommon/DriverDetails.h" + +namespace Vulkan +{ +std::unique_ptr g_vulkan_context; + +VulkanContext::VulkanContext(VkInstance instance, VkPhysicalDevice physical_device) + : m_instance(instance), m_physical_device(physical_device) +{ + // Read device physical memory properties, we need it for allocating buffers + vkGetPhysicalDeviceProperties(physical_device, &m_device_properties); + vkGetPhysicalDeviceMemoryProperties(physical_device, &m_device_memory_properties); + + // Would any drivers be this silly? I hope not... + m_device_properties.limits.minUniformBufferOffsetAlignment = std::max( + m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast(1)); + m_device_properties.limits.minTexelBufferOffsetAlignment = std::max( + m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast(1)); + m_device_properties.limits.optimalBufferCopyOffsetAlignment = std::max( + m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast(1)); + m_device_properties.limits.optimalBufferCopyRowPitchAlignment = std::max( + m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast(1)); +} + +VulkanContext::~VulkanContext() +{ + if (m_device != VK_NULL_HANDLE) + vkDestroyDevice(m_device, nullptr); + + if (m_debug_report_callback != VK_NULL_HANDLE) + DisableDebugReports(); + + vkDestroyInstance(m_instance, nullptr); +} + +bool VulkanContext::CheckValidationLayerAvailablility() +{ + u32 extension_count = 0; + VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: "); + return false; + } + + std::vector extension_list(extension_count); + res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, extension_list.data()); + _assert_(res == VK_SUCCESS); + + u32 layer_count = 0; + res = vkEnumerateInstanceLayerProperties(&layer_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: "); + return false; + } + + std::vector layer_list(layer_count); + res = vkEnumerateInstanceLayerProperties(&layer_count, layer_list.data()); + _assert_(res == VK_SUCCESS); + + // Check for both VK_EXT_debug_report and VK_LAYER_LUNARG_standard_validation + return (std::find_if(extension_list.begin(), extension_list.end(), + [](const auto& it) { + return strcmp(it.extensionName, VK_EXT_DEBUG_REPORT_EXTENSION_NAME) == 0; + }) != extension_list.end() && + std::find_if(layer_list.begin(), layer_list.end(), [](const auto& it) { + return strcmp(it.layerName, "VK_LAYER_LUNARG_standard_validation") == 0; + }) != layer_list.end()); +} + +VkInstance VulkanContext::CreateVulkanInstance(bool enable_surface, bool enable_validation_layer) +{ + ExtensionList enabled_extensions; + if (!SelectInstanceExtensions(&enabled_extensions, enable_surface, enable_validation_layer)) + return VK_NULL_HANDLE; + + VkApplicationInfo app_info = {}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pNext = nullptr; + app_info.pApplicationName = "Dolphin Emulator"; + app_info.applicationVersion = VK_MAKE_VERSION(5, 0, 0); + app_info.pEngineName = "Dolphin Emulator"; + app_info.engineVersion = VK_MAKE_VERSION(5, 0, 0); + app_info.apiVersion = VK_MAKE_VERSION(1, 0, 0); + + VkInstanceCreateInfo instance_create_info = {}; + instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_create_info.pNext = nullptr; + instance_create_info.flags = 0; + instance_create_info.pApplicationInfo = &app_info; + instance_create_info.enabledExtensionCount = static_cast(enabled_extensions.size()); + instance_create_info.ppEnabledExtensionNames = enabled_extensions.data(); + instance_create_info.enabledLayerCount = 0; + instance_create_info.ppEnabledLayerNames = nullptr; + + // Enable debug layer on debug builds + if (enable_validation_layer) + { + static const char* layer_names[] = {"VK_LAYER_LUNARG_standard_validation"}; + instance_create_info.enabledLayerCount = 1; + instance_create_info.ppEnabledLayerNames = layer_names; + } + + VkInstance instance; + VkResult res = vkCreateInstance(&instance_create_info, nullptr, &instance); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateInstance failed: "); + return nullptr; + } + + return instance; +} + +bool VulkanContext::SelectInstanceExtensions(ExtensionList* extension_list, bool enable_surface, + bool enable_validation_layer) +{ + u32 extension_count = 0; + VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: "); + return false; + } + + if (extension_count == 0) + { + ERROR_LOG(VIDEO, "Vulkan: No extensions supported by instance."); + return false; + } + + std::vector available_extension_list(extension_count); + res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, + available_extension_list.data()); + _assert_(res == VK_SUCCESS); + + for (const auto& extension_properties : available_extension_list) + INFO_LOG(VIDEO, "Available extension: %s", extension_properties.extensionName); + + auto CheckForExtension = [&](const char* name, bool required) -> bool { + if (std::find_if(available_extension_list.begin(), available_extension_list.end(), + [&](const VkExtensionProperties& properties) { + return !strcmp(name, properties.extensionName); + }) != available_extension_list.end()) + { + INFO_LOG(VIDEO, "Enabling extension: %s", name); + extension_list->push_back(name); + return true; + } + + if (required) + { + ERROR_LOG(VIDEO, "Vulkan: Missing required extension %s.", name); + return false; + } + + return true; + }; + + // Common extensions + if (enable_surface && !CheckForExtension(VK_KHR_SURFACE_EXTENSION_NAME, true)) + { + return false; + } + +#if defined(VK_USE_PLATFORM_WIN32_KHR) + if (enable_surface && !CheckForExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, true)) + return false; +#elif defined(VK_USE_PLATFORM_XLIB_KHR) + if (enable_surface && !CheckForExtension(VK_KHR_XLIB_SURFACE_EXTENSION_NAME, true)) + return false; +#elif defined(VK_USE_PLATFORM_XCB_KHR) + if (enable_surface && !CheckForExtension(VK_KHR_XCB_SURFACE_EXTENSION_NAME, true)) + return false; +#elif defined(VK_USE_PLATFORM_ANDROID_KHR) + if (enable_surface && !CheckForExtension(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME, true)) + return false; +#endif + + // VK_EXT_debug_report + if (enable_validation_layer && !CheckForExtension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, true)) + return false; + + return true; +} + +VulkanContext::GPUList VulkanContext::EnumerateGPUs(VkInstance instance) +{ + u32 gpu_count = 0; + VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: "); + return {}; + } + + GPUList gpus; + gpus.resize(gpu_count); + + res = vkEnumeratePhysicalDevices(instance, &gpu_count, gpus.data()); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: "); + return {}; + } + + return gpus; +} + +void VulkanContext::PopulateBackendInfo(VideoConfig* config) +{ + config->backend_info.api_type = APIType::Vulkan; + config->backend_info.bSupportsExclusiveFullscreen = false; // Currently WSI does not allow this. + config->backend_info.bSupports3DVision = false; // D3D-exclusive. + config->backend_info.bSupportsOversizedViewports = true; // Assumed support. + config->backend_info.bSupportsEarlyZ = true; // Assumed support. + config->backend_info.bSupportsPrimitiveRestart = true; // Assumed support. + config->backend_info.bSupportsBindingLayout = false; // Assumed support. + config->backend_info.bSupportsPaletteConversion = true; // Assumed support. + config->backend_info.bSupportsClipControl = true; // Assumed support. + config->backend_info.bSupportsMultithreading = true; // Assumed support. + config->backend_info.bSupportsPostProcessing = false; // No support yet. + config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features. + config->backend_info.bSupportsGeometryShaders = false; // Dependent on features. + config->backend_info.bSupportsGSInstancing = false; // Dependent on features. + config->backend_info.bSupportsBBox = false; // Dependent on features. + config->backend_info.bSupportsSSAA = false; // Dependent on features. + config->backend_info.bSupportsDepthClamp = false; // Dependent on features. + config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs. +} + +void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) +{ + config->backend_info.Adapters.clear(); + for (VkPhysicalDevice physical_device : gpu_list) + { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(physical_device, &properties); + config->backend_info.Adapters.push_back(properties.deviceName); + } +} + +void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalDevice gpu, + const VkPhysicalDeviceFeatures& features) +{ + config->backend_info.bSupportsDualSourceBlend = (features.dualSrcBlend == VK_TRUE); + config->backend_info.bSupportsGeometryShaders = (features.geometryShader == VK_TRUE); + config->backend_info.bSupportsGSInstancing = (features.geometryShader == VK_TRUE); + config->backend_info.bSupportsBBox = (features.fragmentStoresAndAtomics == VK_TRUE); + config->backend_info.bSupportsSSAA = (features.sampleRateShading == VK_TRUE); + + // Disable geometry shader when shaderTessellationAndGeometryPointSize is not supported. + // Seems this is needed for gl_Layer. + if (!features.shaderTessellationAndGeometryPointSize) + config->backend_info.bSupportsGeometryShaders = VK_FALSE; + + // TODO: Investigate if there's a feature we can enable for GS instancing. + config->backend_info.bSupportsGSInstancing = VK_FALSE; + + // Depth clamping implies shaderClipDistance and depthClamp + config->backend_info.bSupportsDepthClamp = + (features.depthClamp == VK_TRUE && features.shaderClipDistance == VK_TRUE); +} + +void VulkanContext::PopulateBackendInfoMultisampleModes( + VideoConfig* config, VkPhysicalDevice gpu, const VkPhysicalDeviceProperties& properties) +{ + // Query image support for the EFB texture formats. + VkImageFormatProperties efb_color_properties = {}; + vkGetPhysicalDeviceImageFormatProperties( + gpu, EFB_COLOR_TEXTURE_FORMAT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, &efb_color_properties); + VkImageFormatProperties efb_depth_properties = {}; + vkGetPhysicalDeviceImageFormatProperties( + gpu, EFB_DEPTH_TEXTURE_FORMAT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0, &efb_depth_properties); + + // We can only support MSAA if it's supported on our render target formats. + VkSampleCountFlags supported_sample_counts = properties.limits.framebufferColorSampleCounts & + properties.limits.framebufferDepthSampleCounts & + efb_color_properties.sampleCounts & + efb_color_properties.sampleCounts; + + // No AA + config->backend_info.AAModes.clear(); + config->backend_info.AAModes.emplace_back(1); + + // 2xMSAA/SSAA + if (supported_sample_counts & VK_SAMPLE_COUNT_2_BIT) + config->backend_info.AAModes.emplace_back(2); + + // 4xMSAA/SSAA + if (supported_sample_counts & VK_SAMPLE_COUNT_4_BIT) + config->backend_info.AAModes.emplace_back(4); + + // 8xMSAA/SSAA + if (supported_sample_counts & VK_SAMPLE_COUNT_8_BIT) + config->backend_info.AAModes.emplace_back(8); + + // 16xMSAA/SSAA + if (supported_sample_counts & VK_SAMPLE_COUNT_16_BIT) + config->backend_info.AAModes.emplace_back(16); + + // 32xMSAA/SSAA + if (supported_sample_counts & VK_SAMPLE_COUNT_32_BIT) + config->backend_info.AAModes.emplace_back(32); + + // 64xMSAA/SSAA + if (supported_sample_counts & VK_SAMPLE_COUNT_64_BIT) + config->backend_info.AAModes.emplace_back(64); +} + +std::unique_ptr VulkanContext::Create(VkInstance instance, VkPhysicalDevice gpu, + VkSurfaceKHR surface, VideoConfig* config, + bool enable_validation_layer) +{ + std::unique_ptr context = std::make_unique(instance, gpu); + + // Initialize DriverDetails so that we can check for bugs to disable features if needed. + DriverDetails::Init(DriverDetails::API_VULKAN, + DriverDetails::TranslatePCIVendorID(context->m_device_properties.vendorID), + DriverDetails::DRIVER_UNKNOWN, + static_cast(context->m_device_properties.driverVersion), + DriverDetails::Family::UNKNOWN); + + // Enable debug reports if validation layer is enabled. + if (enable_validation_layer) + context->EnableDebugReports(); + + // Attempt to create the device. + if (!context->CreateDevice(surface, enable_validation_layer)) + { + // Since we are destroying the instance, we're also responsible for destroying the surface. + if (surface != VK_NULL_HANDLE) + vkDestroySurfaceKHR(instance, surface, nullptr); + + return nullptr; + } + + // Update video config with features. + PopulateBackendInfoFeatures(config, gpu, context->m_device_features); + PopulateBackendInfoMultisampleModes(config, gpu, context->m_device_properties); + return context; +} + +bool VulkanContext::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface, + bool enable_validation_layer) +{ + u32 extension_count = 0; + VkResult res = + vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: "); + return false; + } + + if (extension_count == 0) + { + ERROR_LOG(VIDEO, "Vulkan: No extensions supported by device."); + return false; + } + + std::vector available_extension_list(extension_count); + res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, + available_extension_list.data()); + _assert_(res == VK_SUCCESS); + + for (const auto& extension_properties : available_extension_list) + INFO_LOG(VIDEO, "Available extension: %s", extension_properties.extensionName); + + auto CheckForExtension = [&](const char* name, bool required) -> bool { + if (std::find_if(available_extension_list.begin(), available_extension_list.end(), + [&](const VkExtensionProperties& properties) { + return !strcmp(name, properties.extensionName); + }) != available_extension_list.end()) + { + INFO_LOG(VIDEO, "Enabling extension: %s", name); + extension_list->push_back(name); + return true; + } + + if (required) + { + ERROR_LOG(VIDEO, "Vulkan: Missing required extension %s.", name); + return false; + } + + return true; + }; + + if (enable_surface && !CheckForExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true)) + { + return false; + } + + return true; +} + +bool VulkanContext::SelectDeviceFeatures() +{ + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(m_physical_device, &properties); + + VkPhysicalDeviceFeatures available_features; + vkGetPhysicalDeviceFeatures(m_physical_device, &available_features); + + // Not having geometry shaders or wide lines will cause issues with rendering. + if (!available_features.geometryShader && !available_features.wideLines) + WARN_LOG(VIDEO, "Vulkan: Missing both geometryShader and wideLines features."); + if (!available_features.largePoints) + WARN_LOG(VIDEO, "Vulkan: Missing large points feature. CPU EFB writes will be slower."); + if (!available_features.occlusionQueryPrecise) + WARN_LOG(VIDEO, "Vulkan: Missing precise occlusion queries. Perf queries will be inaccurate."); + + // Check push constant size. + if (properties.limits.maxPushConstantsSize < static_cast(PUSH_CONSTANT_BUFFER_SIZE)) + { + PanicAlert("Vulkan: Push contant buffer size %u is below minimum %u.", + properties.limits.maxPushConstantsSize, static_cast(PUSH_CONSTANT_BUFFER_SIZE)); + + return false; + } + + // Enable the features we use. + m_device_features.dualSrcBlend = available_features.dualSrcBlend; + m_device_features.geometryShader = available_features.geometryShader; + m_device_features.samplerAnisotropy = available_features.samplerAnisotropy; + m_device_features.logicOp = available_features.logicOp; + m_device_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics; + m_device_features.sampleRateShading = available_features.sampleRateShading; + m_device_features.largePoints = available_features.largePoints; + m_device_features.shaderStorageImageMultisample = + available_features.shaderStorageImageMultisample; + m_device_features.shaderTessellationAndGeometryPointSize = + available_features.shaderTessellationAndGeometryPointSize; + m_device_features.occlusionQueryPrecise = available_features.occlusionQueryPrecise; + m_device_features.shaderClipDistance = available_features.shaderClipDistance; + m_device_features.depthClamp = available_features.depthClamp; + return true; +} + +bool VulkanContext::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer) +{ + u32 queue_family_count; + vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr); + if (queue_family_count == 0) + { + ERROR_LOG(VIDEO, "No queue families found on specified vulkan physical device."); + return false; + } + + std::vector queue_family_properties(queue_family_count); + vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, + queue_family_properties.data()); + INFO_LOG(VIDEO, "%u vulkan queue families", queue_family_count); + + // Find a graphics queue + // Currently we only use a single queue for both graphics and presenting. + // TODO: In the future we could do post-processing and presenting on a different queue. + m_graphics_queue_family_index = queue_family_count; + for (uint32_t i = 0; i < queue_family_count; i++) + { + if (queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) + { + // Check that it can present to our surface from this queue + if (surface) + { + VkBool32 present_supported; + VkResult res = + vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: "); + return false; + } + + if (present_supported) + { + m_graphics_queue_family_index = i; + break; + } + } + else + { + // We don't need present, so any graphics queue will do. + m_graphics_queue_family_index = i; + break; + } + } + } + if (m_graphics_queue_family_index == queue_family_count) + { + ERROR_LOG(VIDEO, "Vulkan: Failed to find an acceptable graphics queue."); + return false; + } + + VkDeviceCreateInfo device_info = {}; + device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_info.pNext = nullptr; + device_info.flags = 0; + + static constexpr float queue_priorities[] = {1.0f}; + VkDeviceQueueCreateInfo queue_info = {}; + queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_info.pNext = nullptr; + queue_info.flags = 0; + queue_info.queueFamilyIndex = m_graphics_queue_family_index; + queue_info.queueCount = 1; + queue_info.pQueuePriorities = queue_priorities; + device_info.queueCreateInfoCount = 1; + device_info.pQueueCreateInfos = &queue_info; + + ExtensionList enabled_extensions; + if (!SelectDeviceExtensions(&enabled_extensions, (surface != nullptr), enable_validation_layer)) + return false; + + device_info.enabledLayerCount = 0; + device_info.ppEnabledLayerNames = nullptr; + device_info.enabledExtensionCount = static_cast(enabled_extensions.size()); + device_info.ppEnabledExtensionNames = enabled_extensions.data(); + + // Check for required features before creating. + if (!SelectDeviceFeatures()) + return false; + + device_info.pEnabledFeatures = &m_device_features; + + // Enable debug layer on debug builds + if (enable_validation_layer) + { + static const char* layer_names[] = {"VK_LAYER_LUNARG_standard_validation"}; + device_info.enabledLayerCount = 1; + device_info.ppEnabledLayerNames = layer_names; + } + + VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDevice failed: "); + return false; + } + + // With the device created, we can fill the remaining entry points. + if (!LoadVulkanDeviceFunctions(m_device)) + return false; + + // Grab the graphics queue (only one we're using at this point). + vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue); + return true; +} + +static VKAPI_ATTR VkBool32 VKAPI_CALL DebugReportCallback(VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, size_t location, + int32_t messageCode, + const char* pLayerPrefix, + const char* pMessage, void* pUserData) +{ + std::string log_message = + StringFromFormat("Vulkan debug report: (%s) %s", pLayerPrefix ? pLayerPrefix : "", pMessage); + if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) + GENERIC_LOG(LogTypes::HOST_GPU, LogTypes::LERROR, "%s", log_message.c_str()) + else if (flags & (VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT)) + GENERIC_LOG(LogTypes::HOST_GPU, LogTypes::LWARNING, "%s", log_message.c_str()) + else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) + GENERIC_LOG(LogTypes::HOST_GPU, LogTypes::LINFO, "%s", log_message.c_str()) + else + GENERIC_LOG(LogTypes::HOST_GPU, LogTypes::LDEBUG, "%s", log_message.c_str()) + + return VK_FALSE; +} + +bool VulkanContext::EnableDebugReports() +{ + // Already enabled? + if (m_debug_report_callback != VK_NULL_HANDLE) + return true; + + // Check for presence of the functions before calling + if (!vkCreateDebugReportCallbackEXT || !vkDestroyDebugReportCallbackEXT || + !vkDebugReportMessageEXT) + { + return false; + } + + VkDebugReportCallbackCreateInfoEXT callback_info = { + VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, nullptr, + VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | + VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | VK_DEBUG_REPORT_INFORMATION_BIT_EXT | + VK_DEBUG_REPORT_DEBUG_BIT_EXT, + DebugReportCallback, nullptr}; + + VkResult res = + vkCreateDebugReportCallbackEXT(m_instance, &callback_info, nullptr, &m_debug_report_callback); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDebugReportCallbackEXT failed: "); + return false; + } + + return true; +} + +void VulkanContext::DisableDebugReports() +{ + if (m_debug_report_callback != VK_NULL_HANDLE) + { + vkDestroyDebugReportCallbackEXT(m_instance, m_debug_report_callback, nullptr); + m_debug_report_callback = VK_NULL_HANDLE; + } +} + +bool VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index) +{ + for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++) + { + if ((bits & (1 << i)) != 0) + { + u32 supported = m_device_memory_properties.memoryTypes[i].propertyFlags & properties; + if (supported == properties) + { + *out_type_index = i; + return true; + } + } + } + + return false; +} + +u32 VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties) +{ + u32 type_index = VK_MAX_MEMORY_TYPES; + if (!GetMemoryType(bits, properties, &type_index)) + PanicAlert("Unable to find memory type for %x:%x", bits, properties); + + return type_index; +} + +u32 VulkanContext::GetUploadMemoryType(u32 bits, bool* is_coherent) +{ + // Try for coherent memory first. + VkMemoryPropertyFlags flags = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + u32 type_index; + if (!GetMemoryType(bits, flags, &type_index)) + { + WARN_LOG( + VIDEO, + "Vulkan: Failed to find a coherent memory type for uploads, this will affect performance."); + + // Try non-coherent memory. + flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + if (!GetMemoryType(bits, flags, &type_index)) + { + // We shouldn't have any memory types that aren't host-visible. + PanicAlert("Unable to get memory type for upload."); + type_index = 0; + } + } + + if (is_coherent) + *is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0); + + return type_index; +} + +u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent, bool* is_cached) +{ + // Try for cached and coherent memory first. + VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + u32 type_index; + if (!GetMemoryType(bits, flags, &type_index)) + { + // For readbacks, caching is more important than coherency. + flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + if (!GetMemoryType(bits, flags, &type_index)) + { + WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect " + "performance."); + + // Remove the cached bit as well. + flags &= ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + if (!GetMemoryType(bits, flags, &type_index)) + { + // We shouldn't have any memory types that aren't host-visible. + PanicAlert("Unable to get memory type for upload."); + type_index = 0; + } + } + } + + if (is_coherent) + *is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0); + if (is_cached) + *is_cached = ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0); + + return type_index; +} +} diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.h b/Source/Core/VideoBackends/Vulkan/VulkanContext.h new file mode 100644 index 0000000000..26daa656cd --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.h @@ -0,0 +1,131 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoCommon/VideoConfig.h" + +namespace Vulkan +{ +class VulkanContext +{ +public: + VulkanContext(VkInstance instance, VkPhysicalDevice physical_device); + ~VulkanContext(); + + // Determines if the Vulkan validation layer is available on the system. + static bool CheckValidationLayerAvailablility(); + + // Helper method to create a Vulkan instance. + static VkInstance CreateVulkanInstance(bool enable_surface, bool enable_validation_layer); + + // Returns a list of Vulkan-compatible GPUs. + using GPUList = std::vector; + static GPUList EnumerateGPUs(VkInstance instance); + + // Populates backend/video config. + // These are public so that the backend info can be populated without creating a context. + static void PopulateBackendInfo(VideoConfig* config); + static void PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list); + static void PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalDevice gpu, + const VkPhysicalDeviceFeatures& features); + static void PopulateBackendInfoMultisampleModes(VideoConfig* config, VkPhysicalDevice gpu, + const VkPhysicalDeviceProperties& properties); + + // Creates a Vulkan device context. + // This assumes that PopulateBackendInfo and PopulateBackendInfoAdapters has already + // been called for the specified VideoConfig. + static std::unique_ptr Create(VkInstance instance, VkPhysicalDevice gpu, + VkSurfaceKHR surface, VideoConfig* config, + bool enable_validation_layer); + + // Enable/disable debug message runtime. + // In the future this could be hooked up to the Host GPU logging option. + bool EnableDebugReports(); + void DisableDebugReports(); + + // Global state accessors + VkInstance GetVulkanInstance() const { return m_instance; } + VkPhysicalDevice GetPhysicalDevice() const { return m_physical_device; } + VkDevice GetDevice() const { return m_device; } + VkQueue GetGraphicsQueue() const { return m_graphics_queue; } + u32 GetGraphicsQueueFamilyIndex() const { return m_graphics_queue_family_index; } + const VkQueueFamilyProperties& GetGraphicsQueueProperties() const + { + return m_graphics_queue_properties; + } + const VkPhysicalDeviceMemoryProperties& GetDeviceMemoryProperties() const + { + return m_device_memory_properties; + } + const VkPhysicalDeviceProperties& GetDeviceProperties() const { return m_device_properties; } + const VkPhysicalDeviceFeatures& GetDeviceFeatures() const { return m_device_features; } + const VkPhysicalDeviceLimits& GetDeviceLimits() const { return m_device_properties.limits; } + // Support bits + bool SupportsAnisotropicFiltering() const + { + return m_device_features.samplerAnisotropy == VK_TRUE; + } + bool SupportsGeometryShaders() const { return m_device_features.geometryShader == VK_TRUE; } + bool SupportsDualSourceBlend() const { return m_device_features.dualSrcBlend == VK_TRUE; } + bool SupportsLogicOps() const { return m_device_features.logicOp == VK_TRUE; } + bool SupportsBoundingBox() const { return m_device_features.fragmentStoresAndAtomics == VK_TRUE; } + bool SupportsPreciseOcclusionQueries() const + { + return m_device_features.occlusionQueryPrecise == VK_TRUE; + } + // Helpers for getting constants + VkDeviceSize GetUniformBufferAlignment() const + { + return m_device_properties.limits.minUniformBufferOffsetAlignment; + } + VkDeviceSize GetTexelBufferAlignment() const + { + return m_device_properties.limits.minUniformBufferOffsetAlignment; + } + VkDeviceSize GetBufferImageGranularity() const + { + return m_device_properties.limits.bufferImageGranularity; + } + float GetMaxSaxmplerAnisotropy() const { return m_device_properties.limits.maxSamplerAnisotropy; } + // Finds a memory type index for the specified memory properties and the bits returned by + // vkGetImageMemoryRequirements + bool GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index); + u32 GetMemoryType(u32 bits, VkMemoryPropertyFlags properties); + + // Finds a memory type for upload or readback buffers. + u32 GetUploadMemoryType(u32 bits, bool* is_coherent = nullptr); + u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr, bool* is_cached = nullptr); + +private: + using ExtensionList = std::vector; + static bool SelectInstanceExtensions(ExtensionList* extension_list, bool enable_surface, + bool enable_validation_layer); + bool SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface, + bool enable_validation_layer); + bool SelectDeviceFeatures(); + bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer); + + VkInstance m_instance = VK_NULL_HANDLE; + VkPhysicalDevice m_physical_device = VK_NULL_HANDLE; + VkDevice m_device = VK_NULL_HANDLE; + + VkQueue m_graphics_queue = VK_NULL_HANDLE; + u32 m_graphics_queue_family_index = 0; + VkQueueFamilyProperties m_graphics_queue_properties = {}; + + VkDebugReportCallbackEXT m_debug_report_callback = VK_NULL_HANDLE; + + VkPhysicalDeviceFeatures m_device_features = {}; + VkPhysicalDeviceProperties m_device_properties = {}; + VkPhysicalDeviceMemoryProperties m_device_memory_properties = {}; +}; + +extern std::unique_ptr g_vulkan_context; + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl b/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl new file mode 100644 index 0000000000..a6c392d84b --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl @@ -0,0 +1,197 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +// Expands the VULKAN_ENTRY_POINT macro for each function when this file is included. +// Parameters: Function name, is required +// VULKAN_MODULE_ENTRY_POINT is for functions in vulkan-1.dll +// VULKAN_INSTANCE_ENTRY_POINT is for instance-specific functions. +// VULKAN_DEVICE_ENTRY_POINT is for device-specific functions. + +#ifdef VULKAN_MODULE_ENTRY_POINT + +VULKAN_MODULE_ENTRY_POINT(vkCreateInstance, true) +VULKAN_MODULE_ENTRY_POINT(vkGetInstanceProcAddr, true) +VULKAN_MODULE_ENTRY_POINT(vkGetDeviceProcAddr, true) +VULKAN_MODULE_ENTRY_POINT(vkEnumerateInstanceExtensionProperties, true) +VULKAN_MODULE_ENTRY_POINT(vkEnumerateInstanceLayerProperties, true) + +#endif // VULKAN_MODULE_ENTRY_POINT + +#ifdef VULKAN_INSTANCE_ENTRY_POINT + +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyInstance, true) +VULKAN_INSTANCE_ENTRY_POINT(vkEnumeratePhysicalDevices, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceFeatures, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceFormatProperties, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceImageFormatProperties, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceProperties, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceQueueFamilyProperties, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceMemoryProperties, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateDevice, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyDevice, true) +VULKAN_INSTANCE_ENTRY_POINT(vkEnumerateDeviceExtensionProperties, true) +VULKAN_INSTANCE_ENTRY_POINT(vkEnumerateDeviceLayerProperties, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetDeviceQueue, true) +VULKAN_INSTANCE_ENTRY_POINT(vkQueueSubmit, true) +VULKAN_INSTANCE_ENTRY_POINT(vkQueueWaitIdle, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDeviceWaitIdle, true) +VULKAN_INSTANCE_ENTRY_POINT(vkAllocateMemory, true) +VULKAN_INSTANCE_ENTRY_POINT(vkFreeMemory, true) +VULKAN_INSTANCE_ENTRY_POINT(vkMapMemory, true) +VULKAN_INSTANCE_ENTRY_POINT(vkUnmapMemory, true) +VULKAN_INSTANCE_ENTRY_POINT(vkFlushMappedMemoryRanges, true) +VULKAN_INSTANCE_ENTRY_POINT(vkInvalidateMappedMemoryRanges, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetDeviceMemoryCommitment, true) +VULKAN_INSTANCE_ENTRY_POINT(vkBindBufferMemory, true) +VULKAN_INSTANCE_ENTRY_POINT(vkBindImageMemory, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetBufferMemoryRequirements, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetImageMemoryRequirements, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetImageSparseMemoryRequirements, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceSparseImageFormatProperties, true) +VULKAN_INSTANCE_ENTRY_POINT(vkQueueBindSparse, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateFence, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyFence, true) +VULKAN_INSTANCE_ENTRY_POINT(vkResetFences, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetFenceStatus, true) +VULKAN_INSTANCE_ENTRY_POINT(vkWaitForFences, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateSemaphore, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroySemaphore, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateEvent, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyEvent, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetEventStatus, true) +VULKAN_INSTANCE_ENTRY_POINT(vkSetEvent, true) +VULKAN_INSTANCE_ENTRY_POINT(vkResetEvent, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateQueryPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyQueryPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetQueryPoolResults, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateBufferView, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyBufferView, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateImage, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyImage, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetImageSubresourceLayout, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateImageView, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyImageView, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateShaderModule, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyShaderModule, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreatePipelineCache, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyPipelineCache, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPipelineCacheData, true) +VULKAN_INSTANCE_ENTRY_POINT(vkMergePipelineCaches, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateGraphicsPipelines, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateComputePipelines, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyPipeline, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreatePipelineLayout, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyPipelineLayout, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateSampler, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroySampler, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateDescriptorSetLayout, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyDescriptorSetLayout, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateDescriptorPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyDescriptorPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkResetDescriptorPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkAllocateDescriptorSets, true) +VULKAN_INSTANCE_ENTRY_POINT(vkFreeDescriptorSets, true) +VULKAN_INSTANCE_ENTRY_POINT(vkUpdateDescriptorSets, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateFramebuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyFramebuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateRenderPass, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyRenderPass, true) +VULKAN_INSTANCE_ENTRY_POINT(vkGetRenderAreaGranularity, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateCommandPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyCommandPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkResetCommandPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkAllocateCommandBuffers, true) +VULKAN_INSTANCE_ENTRY_POINT(vkFreeCommandBuffers, true) +VULKAN_INSTANCE_ENTRY_POINT(vkBeginCommandBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkEndCommandBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkResetCommandBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdBindPipeline, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetViewport, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetScissor, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetLineWidth, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetDepthBias, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetBlendConstants, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetDepthBounds, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetStencilCompareMask, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetStencilWriteMask, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetStencilReference, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdBindDescriptorSets, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdBindIndexBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdBindVertexBuffers, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdDraw, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdDrawIndexed, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdDrawIndirect, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdDrawIndexedIndirect, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdDispatch, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdDispatchIndirect, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdCopyBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdCopyImage, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdBlitImage, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdCopyBufferToImage, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdCopyImageToBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdUpdateBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdFillBuffer, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdClearColorImage, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdClearDepthStencilImage, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdClearAttachments, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdResolveImage, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdSetEvent, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdResetEvent, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdWaitEvents, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdPipelineBarrier, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdBeginQuery, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdEndQuery, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdResetQueryPool, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdWriteTimestamp, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdCopyQueryPoolResults, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdPushConstants, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdBeginRenderPass, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdNextSubpass, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdEndRenderPass, true) +VULKAN_INSTANCE_ENTRY_POINT(vkCmdExecuteCommands, true) + +VULKAN_INSTANCE_ENTRY_POINT(vkDestroySurfaceKHR, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceSurfaceSupportKHR, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceSurfaceCapabilitiesKHR, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceSurfaceFormatsKHR, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceSurfacePresentModesKHR, false) + +#if defined(VK_USE_PLATFORM_WIN32_KHR) + +VULKAN_INSTANCE_ENTRY_POINT(vkCreateWin32SurfaceKHR, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceWin32PresentationSupportKHR, false) + +#elif defined(VK_USE_PLATFORM_XLIB_KHR) + +VULKAN_INSTANCE_ENTRY_POINT(vkCreateXlibSurfaceKHR, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceXlibPresentationSupportKHR, false) + +#elif defined(VK_USE_PLATFORM_XCB_KHR) + +VULKAN_INSTANCE_ENTRY_POINT(vkCreateXcbSurfaceKHR, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceXcbPresentationSupportKHR, false) + +#elif defined(VK_USE_PLATFORM_ANDROID_KHR) + +VULKAN_INSTANCE_ENTRY_POINT(vkCreateAndroidSurfaceKHR, false) + +#endif + +VULKAN_INSTANCE_ENTRY_POINT(vkCreateDebugReportCallbackEXT, false) +VULKAN_INSTANCE_ENTRY_POINT(vkDestroyDebugReportCallbackEXT, false) +VULKAN_INSTANCE_ENTRY_POINT(vkDebugReportMessageEXT, false) + +#endif // VULKAN_INSTANCE_ENTRY_POINT + +#ifdef VULKAN_DEVICE_ENTRY_POINT + +VULKAN_DEVICE_ENTRY_POINT(vkCreateSwapchainKHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkDestroySwapchainKHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkGetSwapchainImagesKHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkAcquireNextImageKHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkQueuePresentKHR, false) + +#endif // VULKAN_DEVICE_ENTRY_POINT diff --git a/Source/Core/VideoBackends/Vulkan/VulkanLoader.cpp b/Source/Core/VideoBackends/Vulkan/VulkanLoader.cpp new file mode 100644 index 0000000000..f72d43d62e --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VulkanLoader.cpp @@ -0,0 +1,317 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include "Common/CommonFuncs.h" +#include "Common/Logging/Log.h" +#include "Common/StringUtil.h" + +#include "VideoBackends/Vulkan/VulkanLoader.h" + +#if defined(VK_USE_PLATFORM_WIN32_KHR) +#include +#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || \ + defined(VK_USE_PLATFORM_ANDROID_KHR) +#include +#endif + +#define VULKAN_MODULE_ENTRY_POINT(name, required) PFN_##name name; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) PFN_##name name; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) PFN_##name name; +#include "VideoBackends/Vulkan/VulkanEntryPoints.inl" +#undef VULKAN_DEVICE_ENTRY_POINT +#undef VULKAN_INSTANCE_ENTRY_POINT +#undef VULKAN_MODULE_ENTRY_POINT + +namespace Vulkan +{ +static void ResetVulkanLibraryFunctionPointers() +{ +#define VULKAN_MODULE_ENTRY_POINT(name, required) name = nullptr; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) name = nullptr; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) name = nullptr; +#include "VideoBackends/Vulkan/VulkanEntryPoints.inl" +#undef VULKAN_DEVICE_ENTRY_POINT +#undef VULKAN_INSTANCE_ENTRY_POINT +#undef VULKAN_MODULE_ENTRY_POINT +} + +#if defined(VK_USE_PLATFORM_WIN32_KHR) + +static HMODULE vulkan_module; +static std::atomic_int vulkan_module_ref_count = {0}; + +bool LoadVulkanLibrary() +{ + // Not thread safe if a second thread calls the loader whilst the first is still in-progress. + if (vulkan_module) + { + vulkan_module_ref_count++; + return true; + } + + vulkan_module = LoadLibraryA("vulkan-1.dll"); + if (!vulkan_module) + { + ERROR_LOG(VIDEO, "Failed to load vulkan-1.dll"); + return false; + } + + bool required_functions_missing = false; + auto LoadFunction = [&](FARPROC* func_ptr, const char* name, bool is_required) { + *func_ptr = GetProcAddress(vulkan_module, name); + if (!(*func_ptr) && is_required) + { + ERROR_LOG(VIDEO, "Vulkan: Failed to load required module function %s", name); + required_functions_missing = true; + } + }; + +#define VULKAN_MODULE_ENTRY_POINT(name, required) \ + LoadFunction(reinterpret_cast(&name), #name, required); +#include "VideoBackends/Vulkan/VulkanEntryPoints.inl" +#undef VULKAN_MODULE_ENTRY_POINT + + if (required_functions_missing) + { + ResetVulkanLibraryFunctionPointers(); + FreeLibrary(vulkan_module); + vulkan_module = nullptr; + return false; + } + + vulkan_module_ref_count++; + return true; +} + +void UnloadVulkanLibrary() +{ + if ((--vulkan_module_ref_count) > 0) + return; + + ResetVulkanLibraryFunctionPointers(); + FreeLibrary(vulkan_module); + vulkan_module = nullptr; +} + +#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || \ + defined(VK_USE_PLATFORM_ANDROID_KHR) + +static void* vulkan_module; +static std::atomic_int vulkan_module_ref_count = {0}; + +bool LoadVulkanLibrary() +{ + // Not thread safe if a second thread calls the loader whilst the first is still in-progress. + if (vulkan_module) + { + vulkan_module_ref_count++; + return true; + } + + // Names of libraries to search. Desktop should use libvulkan.so.1 or libvulkan.so. + static const char* search_lib_names[] = {"libvulkan.so.1", "libvulkan.so"}; + + for (size_t i = 0; i < ArraySize(search_lib_names); i++) + { + vulkan_module = dlopen(search_lib_names[i], RTLD_NOW); + if (vulkan_module) + break; + } + + if (!vulkan_module) + { + ERROR_LOG(VIDEO, "Failed to load or locate libvulkan.so"); + return false; + } + + bool required_functions_missing = false; + auto LoadFunction = [&](void** func_ptr, const char* name, bool is_required) { + *func_ptr = dlsym(vulkan_module, name); + if (!(*func_ptr) && is_required) + { + ERROR_LOG(VIDEO, "Vulkan: Failed to load required module function %s", name); + required_functions_missing = true; + } + }; + +#define VULKAN_MODULE_ENTRY_POINT(name, required) \ + LoadFunction(reinterpret_cast(&name), #name, required); +#include "VideoBackends/Vulkan/VulkanEntryPoints.inl" +#undef VULKAN_MODULE_ENTRY_POINT + + if (required_functions_missing) + { + ResetVulkanLibraryFunctionPointers(); + dlclose(vulkan_module); + vulkan_module = nullptr; + return false; + } + + vulkan_module_ref_count++; + return true; +} + +void UnloadVulkanLibrary() +{ + if ((--vulkan_module_ref_count) > 0) + return; + + ResetVulkanLibraryFunctionPointers(); + dlclose(vulkan_module); + vulkan_module = nullptr; +} +#else + +//#warning Unknown platform, not compiling loader. + +bool LoadVulkanLibrary() +{ + return false; +} + +void UnloadVulkanLibrary() +{ + ResetVulkanLibraryFunctionPointers(); +} + +#endif + +bool LoadVulkanInstanceFunctions(VkInstance instance) +{ + bool required_functions_missing = false; + auto LoadFunction = [&](PFN_vkVoidFunction* func_ptr, const char* name, bool is_required) { + *func_ptr = vkGetInstanceProcAddr(instance, name); + if (!(*func_ptr) && is_required) + { + ERROR_LOG(VIDEO, "Vulkan: Failed to load required instance function %s", name); + required_functions_missing = true; + } + }; + +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) \ + LoadFunction(reinterpret_cast(&name), #name, required); +#include "VideoBackends/Vulkan/VulkanEntryPoints.inl" +#undef VULKAN_INSTANCE_ENTRY_POINT + + return !required_functions_missing; +} + +bool LoadVulkanDeviceFunctions(VkDevice device) +{ + bool required_functions_missing = false; + auto LoadFunction = [&](PFN_vkVoidFunction* func_ptr, const char* name, bool is_required) { + *func_ptr = vkGetDeviceProcAddr(device, name); + if (!(*func_ptr) && is_required) + { + ERROR_LOG(VIDEO, "Vulkan: Failed to load required device function %s", name); + required_functions_missing = true; + } + }; + +#define VULKAN_DEVICE_ENTRY_POINT(name, required) \ + LoadFunction(reinterpret_cast(&name), #name, required); +#include "VideoBackends/Vulkan/VulkanEntryPoints.inl" +#undef VULKAN_DEVICE_ENTRY_POINT + + return !required_functions_missing; +} + +const char* VkResultToString(VkResult res) +{ + switch (res) + { + case VK_SUCCESS: + return "VK_SUCCESS"; + + case VK_NOT_READY: + return "VK_NOT_READY"; + + case VK_TIMEOUT: + return "VK_TIMEOUT"; + + case VK_EVENT_SET: + return "VK_EVENT_SET"; + + case VK_EVENT_RESET: + return "VK_EVENT_RESET"; + + case VK_INCOMPLETE: + return "VK_INCOMPLETE"; + + case VK_ERROR_OUT_OF_HOST_MEMORY: + return "VK_ERROR_OUT_OF_HOST_MEMORY"; + + case VK_ERROR_OUT_OF_DEVICE_MEMORY: + return "VK_ERROR_OUT_OF_DEVICE_MEMORY"; + + case VK_ERROR_INITIALIZATION_FAILED: + return "VK_ERROR_INITIALIZATION_FAILED"; + + case VK_ERROR_DEVICE_LOST: + return "VK_ERROR_DEVICE_LOST"; + + case VK_ERROR_MEMORY_MAP_FAILED: + return "VK_ERROR_MEMORY_MAP_FAILED"; + + case VK_ERROR_LAYER_NOT_PRESENT: + return "VK_ERROR_LAYER_NOT_PRESENT"; + + case VK_ERROR_EXTENSION_NOT_PRESENT: + return "VK_ERROR_EXTENSION_NOT_PRESENT"; + + case VK_ERROR_FEATURE_NOT_PRESENT: + return "VK_ERROR_FEATURE_NOT_PRESENT"; + + case VK_ERROR_INCOMPATIBLE_DRIVER: + return "VK_ERROR_INCOMPATIBLE_DRIVER"; + + case VK_ERROR_TOO_MANY_OBJECTS: + return "VK_ERROR_TOO_MANY_OBJECTS"; + + case VK_ERROR_FORMAT_NOT_SUPPORTED: + return "VK_ERROR_FORMAT_NOT_SUPPORTED"; + + case VK_ERROR_SURFACE_LOST_KHR: + return "VK_ERROR_SURFACE_LOST_KHR"; + + case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: + return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"; + + case VK_SUBOPTIMAL_KHR: + return "VK_SUBOPTIMAL_KHR"; + + case VK_ERROR_OUT_OF_DATE_KHR: + return "VK_ERROR_OUT_OF_DATE_KHR"; + + case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: + return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"; + + case VK_ERROR_VALIDATION_FAILED_EXT: + return "VK_ERROR_VALIDATION_FAILED_EXT"; + + case VK_ERROR_INVALID_SHADER_NV: + return "VK_ERROR_INVALID_SHADER_NV"; + + default: + return "UNKNOWN_VK_RESULT"; + } +} + +void LogVulkanResult(int level, const char* func_name, VkResult res, const char* msg, ...) +{ + std::va_list ap; + va_start(ap, msg); + std::string real_msg = StringFromFormatV(msg, ap); + va_end(ap); + + real_msg = StringFromFormat("(%s) %s (%d: %s)", func_name, real_msg.c_str(), + static_cast(res), VkResultToString(res)); + + GENERIC_LOG(LogTypes::VIDEO, static_cast(level), "%s", real_msg.c_str()); +} + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VulkanLoader.h b/Source/Core/VideoBackends/Vulkan/VulkanLoader.h new file mode 100644 index 0000000000..fbac3a30f8 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/VulkanLoader.h @@ -0,0 +1,45 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#define VK_NO_PROTOTYPES + +#if defined(WIN32) +#define VK_USE_PLATFORM_WIN32_KHR +#elif defined(HAVE_X11) +// Currently we're getting xlib handles passed to the backend. +// If this ever changes to xcb, it's a simple change here. +#define VK_USE_PLATFORM_XLIB_KHR +//#define VK_USE_PLATFORM_XCB_KHR +#elif defined(ANDROID) +#define VK_USE_PLATFORM_ANDROID_KHR +#else +//#warning Unknown platform +#endif + +#include "vulkan/vulkan.h" + +// We abuse the preprocessor here to only need to specify function names once. +#define VULKAN_MODULE_ENTRY_POINT(name, required) extern PFN_##name name; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) extern PFN_##name name; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) extern PFN_##name name; +#include "VideoBackends/Vulkan/VulkanEntryPoints.inl" +#undef VULKAN_DEVICE_ENTRY_POINT +#undef VULKAN_INSTANCE_ENTRY_POINT +#undef VULKAN_MODULE_ENTRY_POINT + +namespace Vulkan +{ +bool LoadVulkanLibrary(); +bool LoadVulkanInstanceFunctions(VkInstance instance); +bool LoadVulkanDeviceFunctions(VkDevice device); +void UnloadVulkanLibrary(); + +const char* VkResultToString(VkResult res); +void LogVulkanResult(int level, const char* func_name, VkResult res, const char* msg, ...); + +#define LOG_VULKAN_ERROR(res, ...) LogVulkanResult(2, __FUNCTION__, res, __VA_ARGS__) + +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/main.cpp b/Source/Core/VideoBackends/Vulkan/main.cpp new file mode 100644 index 0000000000..a3ec2bc9f9 --- /dev/null +++ b/Source/Core/VideoBackends/Vulkan/main.cpp @@ -0,0 +1,272 @@ +// Copyright 2016 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Core/Host.h" + +#include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/Constants.h" +#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/PerfQuery.h" +#include "VideoBackends/Vulkan/Renderer.h" +#include "VideoBackends/Vulkan/StateTracker.h" +#include "VideoBackends/Vulkan/SwapChain.h" +#include "VideoBackends/Vulkan/TextureCache.h" +#include "VideoBackends/Vulkan/VertexManager.h" +#include "VideoBackends/Vulkan/VideoBackend.h" +#include "VideoBackends/Vulkan/VulkanContext.h" + +#include "VideoCommon/DriverDetails.h" +#include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/VideoBackendBase.h" +#include "VideoCommon/VideoConfig.h" + +namespace Vulkan +{ +void VideoBackend::InitBackendInfo() +{ + VulkanContext::PopulateBackendInfo(&g_Config); + + if (LoadVulkanLibrary()) + { + VkInstance temp_instance = VulkanContext::CreateVulkanInstance(false, false); + if (temp_instance) + { + if (LoadVulkanInstanceFunctions(temp_instance)) + { + VulkanContext::GPUList gpu_list = VulkanContext::EnumerateGPUs(temp_instance); + VulkanContext::PopulateBackendInfoAdapters(&g_Config, gpu_list); + + if (!gpu_list.empty()) + { + // Use the selected adapter, or the first to fill features. + size_t device_index = static_cast(g_Config.iAdapter); + if (device_index >= gpu_list.size()) + device_index = 0; + + VkPhysicalDevice gpu = gpu_list[device_index]; + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(gpu, &properties); + VkPhysicalDeviceFeatures features; + vkGetPhysicalDeviceFeatures(gpu, &features); + VulkanContext::PopulateBackendInfoFeatures(&g_Config, gpu, features); + VulkanContext::PopulateBackendInfoMultisampleModes(&g_Config, gpu, properties); + } + } + + vkDestroyInstance(temp_instance, nullptr); + } + else + { + PanicAlert("Failed to create Vulkan instance."); + } + + UnloadVulkanLibrary(); + } + else + { + PanicAlert("Failed to load Vulkan library."); + } +} + +bool VideoBackend::Initialize(void* window_handle) +{ + if (!LoadVulkanLibrary()) + { + PanicAlert("Failed to load Vulkan library."); + return false; + } + + // HACK: Use InitBackendInfo to initially populate backend features. + // This is because things like stereo get disabled when the config is validated, + // which happens before our device is created (settings control instance behavior), + // and we don't want that to happen if the device actually supports it. + InitBackendInfo(); + InitializeShared(); + + // Check for presence of the debug layer before trying to enable it + bool enable_validation_layer = g_Config.bEnableValidationLayer; + if (enable_validation_layer && !VulkanContext::CheckValidationLayerAvailablility()) + { + WARN_LOG(VIDEO, "Validation layer requested but not available, disabling."); + enable_validation_layer = false; + } + + // Create Vulkan instance, needed before we can create a surface. + bool enable_surface = (window_handle != nullptr); + VkInstance instance = + VulkanContext::CreateVulkanInstance(enable_surface, enable_validation_layer); + if (instance == VK_NULL_HANDLE) + { + PanicAlert("Failed to create Vulkan instance."); + UnloadVulkanLibrary(); + ShutdownShared(); + return false; + } + + // Load instance function pointers + if (!LoadVulkanInstanceFunctions(instance)) + { + PanicAlert("Failed to load Vulkan instance functions."); + vkDestroyInstance(instance, nullptr); + UnloadVulkanLibrary(); + ShutdownShared(); + return false; + } + + // Create Vulkan surface + VkSurfaceKHR surface = VK_NULL_HANDLE; + if (enable_surface) + { + surface = SwapChain::CreateVulkanSurface(instance, window_handle); + if (surface == VK_NULL_HANDLE) + { + PanicAlert("Failed to create Vulkan surface."); + vkDestroyInstance(instance, nullptr); + UnloadVulkanLibrary(); + ShutdownShared(); + return false; + } + } + + // Fill the adapter list, and check if the user has selected an invalid device + // For some reason nvidia's driver crashes randomly if you call vkEnumeratePhysicalDevices + // after creating a device.. + VulkanContext::GPUList gpu_list = VulkanContext::EnumerateGPUs(instance); + size_t selected_adapter_index = static_cast(g_Config.iAdapter); + if (gpu_list.empty()) + { + PanicAlert("No Vulkan physical devices available."); + if (surface != VK_NULL_HANDLE) + vkDestroySurfaceKHR(instance, surface, nullptr); + + vkDestroyInstance(instance, nullptr); + UnloadVulkanLibrary(); + ShutdownShared(); + return false; + } + else if (selected_adapter_index >= gpu_list.size()) + { + WARN_LOG(VIDEO, "Vulkan adapter index out of range, selecting first adapter."); + selected_adapter_index = 0; + } + + // Pass ownership over to VulkanContext, and let it take care of everything. + g_vulkan_context = VulkanContext::Create(instance, gpu_list[selected_adapter_index], surface, + &g_Config, enable_validation_layer); + if (!g_vulkan_context) + { + PanicAlert("Failed to create Vulkan device"); + UnloadVulkanLibrary(); + ShutdownShared(); + return false; + } + + // Create command buffers. We do this separately because the other classes depend on it. + g_command_buffer_mgr = std::make_unique(g_Config.bBackendMultithreading); + if (!g_command_buffer_mgr->Initialize()) + { + PanicAlert("Failed to create Vulkan command buffers"); + g_command_buffer_mgr.reset(); + g_vulkan_context.reset(); + UnloadVulkanLibrary(); + ShutdownShared(); + return false; + } + + // Create main wrapper instances. + g_object_cache = std::make_unique(); + g_framebuffer_manager = std::make_unique(); + g_renderer = std::make_unique(); + + // Cast to our wrapper classes, so we can call the init methods. + Renderer* renderer = static_cast(g_renderer.get()); + FramebufferManager* framebuffer_mgr = + static_cast(g_framebuffer_manager.get()); + + // Invoke init methods on main wrapper classes. + // These have to be done before the others because the destructors + // for the remaining classes may call methods on these. + if (!g_object_cache->Initialize() || !framebuffer_mgr->Initialize() || + !renderer->Initialize(framebuffer_mgr, window_handle, surface)) + { + PanicAlert("Failed to initialize Vulkan classes."); + g_renderer.reset(); + g_object_cache.reset(); + g_command_buffer_mgr.reset(); + g_vulkan_context.reset(); + UnloadVulkanLibrary(); + ShutdownShared(); + return false; + } + + // Create remaining wrapper instances. + g_vertex_manager = std::make_unique(); + g_texture_cache = std::make_unique(); + g_perf_query = std::make_unique(); + VertexManager* vertex_manager = static_cast(g_vertex_manager.get()); + TextureCache* texture_cache = static_cast(g_texture_cache.get()); + PerfQuery* perf_query = static_cast(g_perf_query.get()); + if (!vertex_manager->Initialize(renderer->GetStateTracker()) || + !texture_cache->Initialize(renderer->GetStateTracker()) || + !perf_query->Initialize(renderer->GetStateTracker())) + { + PanicAlert("Failed to initialize Vulkan classes."); + g_perf_query.reset(); + g_texture_cache.reset(); + g_vertex_manager.reset(); + g_renderer.reset(); + g_object_cache.reset(); + g_command_buffer_mgr.reset(); + g_vulkan_context.reset(); + UnloadVulkanLibrary(); + ShutdownShared(); + return false; + } + + return true; +} + +// This is called after Initialize() from the Core +// Run from the graphics thread +void VideoBackend::Video_Prepare() +{ + // Display the name so the user knows which device was actually created + OSD::AddMessage(StringFromFormat("Using physical adapter %s", + g_vulkan_context->GetDeviceProperties().deviceName) + .c_str(), + 5000); +} + +void VideoBackend::Shutdown() +{ + g_command_buffer_mgr->WaitForGPUIdle(); + + g_object_cache.reset(); + g_command_buffer_mgr.reset(); + g_vulkan_context.reset(); + + UnloadVulkanLibrary(); + + ShutdownShared(); +} + +void VideoBackend::Video_Cleanup() +{ + g_command_buffer_mgr->WaitForGPUIdle(); + + // Save all cached pipelines out to disk for next time. + g_object_cache->SavePipelineCache(); + + g_texture_cache.reset(); + g_perf_query.reset(); + g_vertex_manager.reset(); + g_renderer.reset(); + g_framebuffer_manager.reset(); + + CleanupShared(); +} +} diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index 6da311c4e2..645360d791 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -11,6 +11,7 @@ namespace DriverDetails { struct BugInfo { + API m_api; // Which API has the issue u32 m_os; // Which OS has the issue Vendor m_vendor; // Which vendor has the error Driver m_driver; // Which driver has the error @@ -36,6 +37,7 @@ const u32 m_os = OS_ALL | OS_FREEBSD; const u32 m_os = OS_ALL | OS_OPENBSD; #endif +static API m_api = API_OPENGL; static Vendor m_vendor = VENDOR_UNKNOWN; static Driver m_driver = DRIVER_UNKNOWN; static Family m_family = Family::UNKNOWN; @@ -44,49 +46,60 @@ static double m_version = 0.0; // This is a list of all known bugs for each vendor // We use this to check if the device and driver has a issue static BugInfo m_known_bugs[] = { - {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, - true}, - {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENNEGATEDBOOLEAN, -1.0, + {API_OPENGL, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, + -1.0, -1.0, true}, + {API_OPENGL, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, + BUG_BROKENNEGATEDBOOLEAN, -1.0, -1.0, true}, + {API_OPENGL, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENEXPLICITFLUSH, + -1.0, -1.0, true}, + {API_OPENGL, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true}, - {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKENEXPLICITFLUSH, -1.0, -1.0, + {API_OPENGL, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKENVSYNC, -1.0, -1.0, true}, - {OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true}, - {OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKENVSYNC, -1.0, -1.0, true}, - {OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, + {API_OPENGL, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN, BUG_BROKENBUFFERSTREAM, + -1.0, -1.0, true}, + {API_OPENGL, OS_ALL, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN, BUG_BROKENUBO, 900, 916, true}, - {OS_ALL, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN, BUG_BROKENUBO, 900, 916, true}, - {OS_ALL, VENDOR_MESA, DRIVER_R600, Family::UNKNOWN, BUG_BROKENUBO, 900, 913, true}, - {OS_ALL, VENDOR_MESA, DRIVER_R600, Family::UNKNOWN, BUG_BROKENGEOMETRYSHADERS, -1.0, 1112.0, - true}, - {OS_ALL, VENDOR_MESA, DRIVER_I965, Family::INTEL_SANDY, BUG_BROKENGEOMETRYSHADERS, -1.0, 1120.0, - true}, - {OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENUBO, 900, 920, true}, - {OS_ALL, VENDOR_MESA, DRIVER_ALL, Family::UNKNOWN, BUG_BROKENCOPYIMAGE, -1.0, 1064.0, true}, - {OS_LINUX, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_BROKENPINNEDMEMORY, -1.0, -1.0, true}, - {OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENBUFFERSTORAGE, -1.0, - 33138.0, true}, - {OS_OSX, VENDOR_INTEL, DRIVER_INTEL, Family::INTEL_SANDY, BUG_PRIMITIVERESTART, -1.0, -1.0, - true}, - {OS_WINDOWS, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, - true}, - {OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, - true}, - {OS_WINDOWS, VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, - 101810.3907, 101810.3960, true}, - {OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true}, - {OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENCLIPDISTANCE, -1.0, -1.0, true}, + {API_OPENGL, OS_ALL, VENDOR_MESA, DRIVER_R600, Family::UNKNOWN, BUG_BROKENUBO, 900, 913, true}, + {API_OPENGL, OS_ALL, VENDOR_MESA, DRIVER_R600, Family::UNKNOWN, BUG_BROKENGEOMETRYSHADERS, -1.0, + 1112.0, true}, + {API_OPENGL, OS_ALL, VENDOR_MESA, DRIVER_I965, Family::INTEL_SANDY, BUG_BROKENGEOMETRYSHADERS, + -1.0, 1120.0, true}, + {API_OPENGL, OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENUBO, 900, 920, true}, + {API_OPENGL, OS_ALL, VENDOR_MESA, DRIVER_ALL, Family::UNKNOWN, BUG_BROKENCOPYIMAGE, -1.0, + 1064.0, true}, + {API_OPENGL, OS_LINUX, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_BROKENPINNEDMEMORY, -1.0, + -1.0, true}, + {API_OPENGL, OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENBUFFERSTORAGE, + -1.0, 33138.0, true}, + {API_OPENGL, OS_OSX, VENDOR_INTEL, DRIVER_INTEL, Family::INTEL_SANDY, BUG_PRIMITIVERESTART, + -1.0, -1.0, true}, + {API_OPENGL, OS_WINDOWS, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, + -1.0, -1.0, true}, + {API_OPENGL, OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, + -1.0, -1.0, true}, + {API_OPENGL, OS_WINDOWS, VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, + BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true}, + {API_OPENGL, OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, + -1.0, true}, + {API_OPENGL, OS_ALL, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, BUG_BROKENCLIPDISTANCE, -1.0, + -1.0, true}, + {API_VULKAN, OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, + BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION, -1.0, -1.0, true}, }; static std::map m_bugs; -void Init(Vendor vendor, Driver driver, const double version, const Family family) +void Init(API api, Vendor vendor, Driver driver, const double version, const Family family) { + m_api = api; m_vendor = vendor; m_driver = driver; m_version = version; m_family = family; if (driver == DRIVER_UNKNOWN) + { switch (vendor) { case VENDOR_NVIDIA: @@ -108,10 +121,15 @@ void Init(Vendor vendor, Driver driver, const double version, const Family famil default: break; } + } + + // Clear bug list, as the API may have changed + m_bugs.clear(); for (auto& bug : m_known_bugs) { - if ((bug.m_os & m_os) && (bug.m_vendor == m_vendor || bug.m_vendor == VENDOR_ALL) && + if ((bug.m_api & api) && (bug.m_os & m_os) && + (bug.m_vendor == m_vendor || bug.m_vendor == VENDOR_ALL) && (bug.m_driver == m_driver || bug.m_driver == DRIVER_ALL) && (bug.m_family == m_family || bug.m_family == Family::UNKNOWN) && (bug.m_versionstart <= m_version || bug.m_versionstart == -1) && @@ -127,4 +145,31 @@ bool HasBug(Bug bug) return false; return it->second.m_hasbug; } + +Vendor TranslatePCIVendorID(u32 vendor_id) +{ + switch (vendor_id) + { + case 0x10DE: + return VENDOR_NVIDIA; + + case 0x1002: + case 0x1022: + return VENDOR_ATI; + + case 0x8086: + case 0x8087: + return VENDOR_INTEL; + + // TODO: Is this correct for Mali? + case 0x13B6: + return VENDOR_ARM; + + case 0x5143: + return VENDOR_QUALCOMM; + + default: + return VENDOR_UNKNOWN; + } +} } diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index b6dcf87963..ebf38886be 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -8,6 +8,15 @@ namespace DriverDetails { +// API types supported by driver details +// This is separate to APIType in VideoConfig.h due to the fact that a bug +// can affect multiple APIs. +enum API +{ + API_OPENGL = (1 << 0), + API_VULKAN = (1 << 1) +}; + // Enum of supported operating systems enum OS { @@ -213,12 +222,23 @@ enum Bug // the geometry shader. Current workaround is to make sure the geometry shader always consumes // the gl_ClipDistance inputs from the vertex shader. BUG_BROKENCLIPDISTANCE, + + // Bug: Dual-source outputs from fragment shaders are broken on AMD Vulkan drivers + // Started Version: -1 + // Ended Version: -1 + // Fragment shaders that specify dual-source outputs, via layout(location = 0, index = ...) cause + // the driver to fail to create graphics pipelines. The workaround for this is to specify the + // index as a MRT location instead, or omit the binding completely. + BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION, }; // Initializes our internal vendor, device family, and driver version -void Init(Vendor vendor, Driver driver, const double version, const Family family); +void Init(API api, Vendor vendor, Driver driver, const double version, const Family family); // Once Vendor and driver version is set, this will return if it has the applicable bug passed to // it. bool HasBug(Bug bug); + +// Attempts to map a PCI vendor ID to our Vendor enumeration +Vendor TranslatePCIVendorID(u32 vendor_id); } diff --git a/Source/Core/VideoCommon/GeometryShaderGen.cpp b/Source/Core/VideoCommon/GeometryShaderGen.cpp index 83b30943bc..90b9d01e16 100644 --- a/Source/Core/VideoCommon/GeometryShaderGen.cpp +++ b/Source/Core/VideoCommon/GeometryShaderGen.cpp @@ -55,7 +55,7 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid if (uid_data->wireframe) vertex_out++; - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { // Insert layout parameters if (g_ActiveConfig.backend_info.bSupportsGSInstancing) @@ -77,11 +77,11 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid out.Write("%s", s_lighting_struct); // uniforms - if (ApiType == APIType::OpenGL) - out.Write("layout(std140%s) uniform GSBlock {\n", - g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 3" : ""); + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) + out.Write("UBO_BINDING(std140, 3) uniform GSBlock {\n"); else out.Write("cbuffer GSBlock {\n"); + out.Write("\tfloat4 " I_STEREOPARAMS ";\n" "\tfloat4 " I_LINEPTPARAMS ";\n" "\tint4 " I_TEXOFFSET ";\n" @@ -92,18 +92,18 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid ""); out.Write("};\n"); - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { if (g_ActiveConfig.backend_info.bSupportsGSInstancing) out.Write("#define InstanceID gl_InvocationID\n"); - out.Write("in VertexData {\n"); + out.Write("VARYING_LOCATION(0) in VertexData {\n"); GenerateVSOutputMembers( out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); out.Write("} vs[%d];\n", vertex_in); - out.Write("out VertexData {\n"); + out.Write("VARYING_LOCATION(0) out VertexData {\n"); GenerateVSOutputMembers( out, ApiType, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); @@ -146,7 +146,7 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid if (uid_data->primitive_type == PRIMITIVE_LINES) { - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { out.Write("\tVS_OUTPUT start, end;\n"); AssignVSOutputMembers(out, "start", "vs[0]", uid_data->numTexGens, uid_data->pixel_lighting); @@ -177,7 +177,7 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid } else if (uid_data->primitive_type == PRIMITIVE_POINTS) { - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { out.Write("\tVS_OUTPUT center;\n"); AssignVSOutputMembers(out, "center", "vs[0]", uid_data->numTexGens, uid_data->pixel_lighting); @@ -208,7 +208,7 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid out.Write("\tfor (int i = 0; i < %d; ++i) {\n", vertex_in); - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { out.Write("\tVS_OUTPUT f;\n"); AssignVSOutputMembers(out, "f", "vs[i]", uid_data->numTexGens, uid_data->pixel_lighting); @@ -231,7 +231,7 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid { // Select the output layer out.Write("\tps.layer = eye;\n"); - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) out.Write("\tgl_Layer = eye;\n"); // For stereoscopy add a small horizontal offset in Normalized Device Coordinates proportional @@ -241,7 +241,8 @@ ShaderCode GenerateGeometryShaderCode(APIType ApiType, const geometry_shader_uid // the depth value. This results in objects at a distance smaller than the convergence // distance to seemingly appear in front of the screen. // This formula is based on page 13 of the "Nvidia 3D Vision Automatic, Best Practices Guide" - out.Write("\tf.pos.x += " I_STEREOPARAMS "[eye] * (f.pos.w - " I_STEREOPARAMS "[2]);\n"); + out.Write("\tfloat hoffset = (eye == 0) ? " I_STEREOPARAMS ".x : " I_STEREOPARAMS ".y;\n"); + out.Write("\tf.pos.x += hoffset * (f.pos.w - " I_STEREOPARAMS ".z);\n"); } if (uid_data->primitive_type == PRIMITIVE_LINES) @@ -329,12 +330,19 @@ static void EmitVertex(ShaderCode& out, const geometry_shader_uid_data* uid_data } AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, uid_data->pixel_lighting); } + else if (ApiType == APIType::Vulkan) + { + // Vulkan NDC space has Y pointing down (right-handed NDC space). + out.Write("\tgl_Position = %s.pos;\n", vertex); + out.Write("\tgl_Position.y = -gl_Position.y;\n"); + AssignVSOutputMembers(out, "ps", vertex, uid_data->numTexGens, uid_data->pixel_lighting); + } else { out.Write("\tps.o = %s;\n", vertex); } - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) out.Write("\tEmitVertex();\n"); else out.Write("\toutput.Append(ps);\n"); @@ -345,7 +353,7 @@ static void EndPrimitive(ShaderCode& out, const geometry_shader_uid_data* uid_da if (uid_data->wireframe) EmitVertex(out, uid_data, "first", ApiType); - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) out.Write("\tEndPrimitive();\n"); else out.Write("\toutput.RestartStrip();\n"); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 13d6bdd13e..e4227e8280 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -384,6 +384,17 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, { out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n"); } + else if (ApiType == APIType::Vulkan) + { + out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); + out.Write("SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n"); + out.Write("SAMPLER_BINDING(2) uniform sampler2DArray samp2;\n"); + out.Write("SAMPLER_BINDING(3) uniform sampler2DArray samp3;\n"); + out.Write("SAMPLER_BINDING(4) uniform sampler2DArray samp4;\n"); + out.Write("SAMPLER_BINDING(5) uniform sampler2DArray samp5;\n"); + out.Write("SAMPLER_BINDING(6) uniform sampler2DArray samp6;\n"); + out.Write("SAMPLER_BINDING(7) uniform sampler2DArray samp7;\n"); + } else // D3D { // Declare samplers @@ -393,15 +404,11 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, } out.Write("\n"); - if (ApiType == APIType::OpenGL) - { - out.Write("layout(std140%s) uniform PSBlock {\n", - g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 1" : ""); - } + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) + out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n"); else - { out.Write("cbuffer PSBlock : register(b0) {\n"); - } + out.Write("\tint4 " I_COLORS "[4];\n" "\tint4 " I_KCOLORS "[4];\n" "\tint4 " I_ALPHA ";\n" @@ -420,24 +427,20 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, { out.Write("%s", s_lighting_struct); - if (ApiType == APIType::OpenGL) - { - out.Write("layout(std140%s) uniform VSBlock {\n", - g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); - } + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n"); else - { out.Write("cbuffer VSBlock : register(b1) {\n"); - } + out.Write(s_shader_uniforms); out.Write("};\n"); } if (uid_data->bounding_box) { - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { - out.Write("layout(std140, binding = 3) buffer BBox {\n" + out.Write("SSBO_BINDING(0) buffer BBox {\n" "\tint4 bbox_data;\n" "};\n"); } @@ -488,7 +491,7 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, // ARB_image_load_store extension yet. // D3D11 also has a way to force the driver to enable early-z, so we're fine here. - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { // This is a #define which signals whatever early-z method the driver supports. out.Write("FORCE_EARLY_Z; \n"); @@ -510,18 +513,33 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, warn_once = false; } - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { - out.Write("out vec4 ocol0;\n"); if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) - out.Write("out vec4 ocol1;\n"); + { + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_FRAGMENT_SHADER_INDEX_DECORATION)) + { + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION(1) out vec4 ocol1;\n"); + } + else + { + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) out vec4 ocol0;\n"); + out.Write("FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1;\n"); + } + } + else + { + out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + } if (uid_data->per_pixel_depth) out.Write("#define depth gl_FragDepth\n"); - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) + // We need to always use output blocks for Vulkan, but geometry shaders are also optional. + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders || ApiType == APIType::Vulkan) { - out.Write("in VertexData {\n"); + out.Write("VARYING_LOCATION(0) in VertexData {\n"); GenerateVSOutputMembers( out, ApiType, uid_data->genMode_numtexgens, uid_data->per_pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, true, true)); @@ -557,7 +575,7 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, out.Write("void main()\n{\n"); - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders || ApiType == APIType::Vulkan) { for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i) out.Write("\tfloat3 uv%d = tex%d;\n", i, i); @@ -728,7 +746,7 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, } else { - if (ApiType == APIType::D3D) + if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); @@ -742,7 +760,7 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, // Note: z-textures are not written to depth buffer if early depth test is used if (uid_data->per_pixel_depth && uid_data->early_ztest) { - if (ApiType == APIType::D3D) + if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); @@ -763,7 +781,7 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, if (uid_data->per_pixel_depth && uid_data->late_ztest) { - if (ApiType == APIType::D3D) + if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); @@ -793,7 +811,8 @@ ShaderCode GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, APIType ApiType, if (uid_data->bounding_box) { - const char* atomic_op = ApiType == APIType::OpenGL ? "atomic" : "Interlocked"; + const char* atomic_op = + (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) ? "atomic" : "Interlocked"; out.Write("\tif(bbox_data[0] > int(rawpos.x)) %sMin(bbox_data[0], int(rawpos.x));\n" "\tif(bbox_data[1] < int(rawpos.x)) %sMax(bbox_data[1], int(rawpos.x));\n" "\tif(bbox_data[2] > int(rawpos.y)) %sMin(bbox_data[2], int(rawpos.y));\n" @@ -1140,12 +1159,21 @@ static void SampleTexture(ShaderCode& out, const char* texcoords, const char* te out.SetConstantsUsed(C_TEXDIMS + texmap, C_TEXDIMS + texmap); if (ApiType == APIType::D3D) + { out.Write("iround(255.0 * Tex[%d].Sample(samp[%d], float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n", texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); + } + else if (ApiType == APIType::Vulkan) + { + out.Write("iround(255.0 * texture(samp%d, float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n", + texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); + } else + { out.Write("iround(255.0 * texture(samp[%d], float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n", texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap); + } } static const char* tevAlphaFuncsTable[] = { @@ -1197,7 +1225,10 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND) out.Write("\t\tocol1 = float4(0.0, 0.0, 0.0, 0.0);\n"); if (per_pixel_depth) - out.Write("\t\tdepth = %s;\n", (ApiType == APIType::D3D) ? "0.0" : "1.0"); + { + out.Write("\t\tdepth = %s;\n", + (ApiType == APIType::D3D || ApiType == APIType::Vulkan) ? "0.0" : "1.0"); + } // ZCOMPLOC HACK: if (!uid_data->alpha_test_use_zcomploc_hack) diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 3b50cd2313..8fb77a341f 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -60,10 +60,6 @@ Common::Event Renderer::s_screenshotCompleted; volatile bool Renderer::s_bScreenshot; -// Final surface changing -Common::Flag Renderer::s_SurfaceNeedsChanged; -Common::Event Renderer::s_ChangedSurface; - // The framebuffer size int Renderer::s_target_width; int Renderer::s_target_height; @@ -74,6 +70,11 @@ int Renderer::s_backbuffer_height; std::unique_ptr Renderer::m_post_processor; +// Final surface changing +Common::Flag Renderer::s_surface_needs_change; +Common::Event Renderer::s_surface_changed; +void* Renderer::s_new_surface_handle; + TargetRectangle Renderer::target_rc; int Renderer::s_last_efb_scale; diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 1ae3121637..44a0b982b7 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -138,9 +138,8 @@ public: static Common::Event s_screenshotCompleted; // Final surface changing - static Common::Flag s_SurfaceNeedsChanged; - static Common::Event s_ChangedSurface; - + // This is called when the surface is resized (WX) or the window changes (Android). + virtual void ChangeSurface(void* new_surface_handle) {} protected: static void CalculateTargetScale(int x, int y, int* scaledX, int* scaledY); bool CalculateTargetSize(unsigned int framebuffer_width, unsigned int framebuffer_height); @@ -178,6 +177,10 @@ protected: static const float GX_MAX_DEPTH; + static Common::Flag s_surface_needs_change; + static Common::Event s_surface_changed; + static void* s_new_surface_handle; + private: static PEControl::PixelFormat prev_efb_format; static unsigned int efb_scale_numeratorX; diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index ab260aedd7..39af7f155c 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -133,7 +133,8 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config) config.bStereoEFBMonoDepth != backup_config.s_efb_mono_depth) { g_texture_cache->DeleteShaders(); - g_texture_cache->CompileShaders(); + if (!g_texture_cache->CompileShaders()) + PanicAlert("Failed to recompile one or more texture conversion shaders."); } } diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index b3bde3bb0d..e10a0439f1 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -160,8 +160,8 @@ public: u32 memory_stride, PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf) = 0; - virtual void CompileShaders() = 0; // currently only implemented by OGL - virtual void DeleteShaders() = 0; // currently only implemented by OGL + virtual bool CompileShaders() = 0; + virtual void DeleteShaders() = 0; static TCacheEntryBase* Load(const u32 stage); static void UnbindTextures(); diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index cccb7e7334..bd4e1349df 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -81,7 +81,10 @@ static void WriteSwizzler(char*& p, u32 format, APIType ApiType) { // left, top, of source rectangle within source texture // width of the destination rectangle, scale_factor (1 or 2) - WRITE(p, "uniform int4 position;\n"); + if (ApiType == APIType::Vulkan) + WRITE(p, "layout(std140, push_constant) uniform PCBlock { int4 position; } PC;\n"); + else + WRITE(p, "uniform int4 position;\n"); int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkH = TexDecoder_GetBlockHeightInTexels(format); @@ -92,12 +95,23 @@ static void WriteSwizzler(char*& p, u32 format, APIType ApiType) WRITE(p, "#define samp0 samp9\n"); WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n"); - WRITE(p, " out vec4 ocol0;\n"); + WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); WRITE(p, "void main()\n"); WRITE(p, "{\n" " int2 sampleUv;\n" " int2 uv1 = int2(gl_FragCoord.xy);\n"); } + else if (ApiType == APIType::Vulkan) + { + WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); + WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + + WRITE(p, "void main()\n"); + WRITE(p, "{\n" + " int2 sampleUv;\n" + " int2 uv1 = int2(gl_FragCoord.xy);\n" + " int4 position = PC.position;\n"); + } else // D3D { WRITE(p, "sampler samp0 : register(s0);\n"); @@ -146,7 +160,7 @@ static void WriteSwizzler(char*& p, u32 format, APIType ApiType) static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, APIType ApiType, bool depth = false) { - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { WRITE(p, " %s = texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", dest, xoffset, colorComp); @@ -155,7 +169,10 @@ static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, { WRITE(p, " %s = Tex0.Sample(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0)).%s;\n", dest, xoffset, colorComp); + } + if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + { // Handle D3D depth inversion. if (depth) WRITE(p, " %s = 1.0 - %s;\n", dest, dest); diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 8376d6b8ea..7725331193 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -85,11 +85,11 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da out.Write("%s", s_lighting_struct); // uniforms - if (api_type == APIType::OpenGL) - out.Write("layout(std140%s) uniform VSBlock {\n", - g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n"); else out.Write("cbuffer VSBlock {\n"); + out.Write(s_shader_uniforms); out.Write("};\n"); @@ -97,34 +97,37 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, ""); out.Write("};\n"); - if (api_type == APIType::OpenGL) + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB); + out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawpos;\n", SHADER_POSITION_ATTRIB); if (uid_data->components & VB_HAS_POSMTXIDX) - out.Write("in int posmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB); + out.Write("ATTRIBUTE_LOCATION(%d) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB); if (uid_data->components & VB_HAS_NRM0) - out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB); if (uid_data->components & VB_HAS_NRM1) - out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB); + out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB); if (uid_data->components & VB_HAS_NRM2) - out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB); + out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB); if (uid_data->components & VB_HAS_COL0) - out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB); + out.Write("ATTRIBUTE_LOCATION(%d) in float4 color0;\n", SHADER_COLOR0_ATTRIB); if (uid_data->components & VB_HAS_COL1) - out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB); + out.Write("ATTRIBUTE_LOCATION(%d) in float4 color1;\n", SHADER_COLOR1_ATTRIB); for (int i = 0; i < 8; ++i) { u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i)); if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx) - out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, - SHADER_TEXTURE0_ATTRIB + i); + { + out.Write("ATTRIBUTE_LOCATION(%d) in float%d tex%d;\n", SHADER_TEXTURE0_ATTRIB + i, + hastexmtx ? 3 : 2, i); + } } - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) + // We need to always use output blocks for Vulkan, but geometry shaders are also optional. + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders || api_type == APIType::Vulkan) { - out.Write("out VertexData {\n"); + out.Write("VARYING_LOCATION(0) out VertexData {\n"); GenerateVSOutputMembers( out, api_type, uid_data->numTexGens, uid_data->pixel_lighting, GetInterpolationQualifier(uid_data->msaa, uid_data->ssaa, false, true)); @@ -180,7 +183,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i); } if (uid_data->components & VB_HAS_POSMTXIDX) - out.Write(" int posmtx : BLENDINDICES,\n"); + out.Write(" uint4 posmtx : BLENDINDICES,\n"); out.Write(" float4 rawpos : POSITION) {\n"); } @@ -189,13 +192,14 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da // transforms if (uid_data->components & VB_HAS_POSMTXIDX) { + out.Write("int posidx = int(posmtx.r);\n"); out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES - "[posmtx], rawpos), dot(" I_TRANSFORMMATRICES - "[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posmtx+2], rawpos), 1);\n"); + "[posidx], rawpos), dot(" I_TRANSFORMMATRICES + "[posidx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posidx+2], rawpos), 1);\n"); if (uid_data->components & VB_HAS_NRMALL) { - out.Write("int normidx = posmtx & 31;\n"); + out.Write("int normidx = posidx & 31;\n"); out.Write("float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES "[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"); } @@ -461,9 +465,9 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da // get rasterized correctly. out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n"); - if (api_type == APIType::OpenGL) + if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { - if (g_ActiveConfig.backend_info.bSupportsGeometryShaders) + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders || api_type == APIType::Vulkan) { AssignVSOutputMembers(out, "vs", "o", uid_data->numTexGens, uid_data->pixel_lighting); } @@ -488,7 +492,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da out.Write("gl_ClipDistance[0] = o.clipDist0;\n"); out.Write("gl_ClipDistance[1] = o.clipDist1;\n"); } - out.Write("gl_Position = o.pos;\n"); + + // Vulkan NDC space has Y pointing down (right-handed NDC space). + if (api_type == APIType::Vulkan) + out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n"); + else + out.Write("gl_Position = o.pos;\n"); } else // D3D { diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index 5eb8d50ae8..3e07f66eb8 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -15,6 +15,7 @@ #include "VideoBackends/Null/VideoBackend.h" #include "VideoBackends/OGL/VideoBackend.h" #include "VideoBackends/Software/VideoBackend.h" +#include "VideoBackends/Vulkan/VideoBackend.h" #include "VideoCommon/VideoBackendBase.h" @@ -35,7 +36,7 @@ __declspec(dllexport) DWORD NvOptimusEnablement = 1; void VideoBackendBase::PopulateList() { - // OGL > D3D11 > D3D12 > SW > Null + // OGL > D3D11 > D3D12 > Vulkan > SW > Null g_available_video_backends.push_back(std::make_unique()); #ifdef _WIN32 g_available_video_backends.push_back(std::make_unique()); @@ -48,6 +49,7 @@ void VideoBackendBase::PopulateList() g_available_video_backends.push_back(std::make_unique()); } #endif + g_available_video_backends.push_back(std::make_unique()); g_available_video_backends.push_back(std::make_unique()); g_available_video_backends.push_back(std::make_unique()); diff --git a/Source/Core/VideoCommon/VideoCommon.h b/Source/Core/VideoCommon/VideoCommon.h index de4f69c9a6..e25b2b66a5 100644 --- a/Source/Core/VideoCommon/VideoCommon.h +++ b/Source/Core/VideoCommon/VideoCommon.h @@ -73,6 +73,7 @@ enum class APIType { OpenGL, D3D, + Vulkan, Nothing }; diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 1e4a5b4973..13da3ef47a 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -41,6 +41,10 @@ VideoConfig::VideoConfig() // disable all features by default backend_info.api_type = APIType::Nothing; backend_info.bSupportsExclusiveFullscreen = false; + backend_info.bSupportsMultithreading = false; + + bEnableValidationLayer = false; + bBackendMultithreading = true; } void VideoConfig::Load(const std::string& ini_file) @@ -82,6 +86,9 @@ void VideoConfig::Load(const std::string& ini_file) settings->Get("WireFrame", &bWireFrame, 0); settings->Get("DisableFog", &bDisableFog, 0); settings->Get("BorderlessFullscreen", &bBorderlessFullscreen, false); + settings->Get("EnableValidationLayer", &bEnableValidationLayer, false); + settings->Get("BackendMultithreading", &bBackendMultithreading, true); + settings->Get("CommandBufferExecuteInterval", &iCommandBufferExecuteInterval, 100); settings->Get("SWZComploc", &bZComploc, true); settings->Get("SWZFreeze", &bZFreeze, true); @@ -188,6 +195,8 @@ void VideoConfig::GameIniLoad() } CHECK_SETTING("Video_Settings", "DisableFog", bDisableFog); + CHECK_SETTING("Video_Settings", "BackendMultithreading", bBackendMultithreading); + CHECK_SETTING("Video_Settings", "CommandBufferExecuteInterval", iCommandBufferExecuteInterval); CHECK_SETTING("Video_Enhancements", "ForceFiltering", bForceFiltering); CHECK_SETTING("Video_Enhancements", "MaxAnisotropy", @@ -291,6 +300,9 @@ void VideoConfig::Save(const std::string& ini_file) settings->Set("Wireframe", bWireFrame); settings->Set("DisableFog", bDisableFog); settings->Set("BorderlessFullscreen", bBorderlessFullscreen); + settings->Set("EnableValidationLayer", bEnableValidationLayer); + settings->Set("BackendMultithreading", bBackendMultithreading); + settings->Set("CommandBufferExecuteInterval", iCommandBufferExecuteInterval); settings->Set("SWZComploc", bZComploc); settings->Set("SWZFreeze", bZFreeze); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index c948077acc..a08024b849 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -143,6 +143,16 @@ struct VideoConfig final bool bDumpTevStages; bool bDumpTevTextureFetches; + // Enable API validation layers, currently only supported with Vulkan. + bool bEnableValidationLayer; + + // Multithreaded submission, currently only supported with Vulkan. + bool bBackendMultithreading; + + // Early command buffer execution interval in number of draws. + // Currently only supported with Vulkan. + int iCommandBufferExecuteInterval; + // Static config per API // TODO: Move this out of VideoConfig struct @@ -173,6 +183,7 @@ struct VideoConfig final bool bSupportsSSAA; bool bSupportsDepthClamp; // Needed by VertexShaderGen, so must stay in VideoCommon bool bSupportsReversedDepthRange; + bool bSupportsMultithreading; } backend_info; // Utility diff --git a/Source/UnitTests/UnitTests.vcxproj b/Source/UnitTests/UnitTests.vcxproj index 8f884cb5a2..f7772c3482 100644 --- a/Source/UnitTests/UnitTests.vcxproj +++ b/Source/UnitTests/UnitTests.vcxproj @@ -81,6 +81,9 @@ {53A5391B-737E-49A8-BC8F-312ADA00736F} + + {29F29A19-F141-45AD-9679-5A2923B49DA3} + {570215b7-e32f-4438-95ae-c8d955f9fca3} diff --git a/Source/dolphin-emu.sln b/Source/dolphin-emu.sln index 705a9f41e5..0cc524ce6c 100644 --- a/Source/dolphin-emu.sln +++ b/Source/dolphin-emu.sln @@ -62,6 +62,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Software", "Core\VideoBacke EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Null", "Core\VideoBackends\Null\Null.vcxproj", "{53A5391B-737E-49A8-BC8F-312ADA00736F}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Vulkan", "Core\VideoBackends\Vulkan\Vulkan.vcxproj", "{29F29A19-F141-45AD-9679-5A2923B49DA3}" +EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Video Backends", "Video Backends", "{AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pch", "PCH\pch.vcxproj", "{76563A7F-1011-4EAD-B667-7BB18D09568E}" @@ -196,6 +198,10 @@ Global {53A5391B-737E-49A8-BC8F-312ADA00736F}.Debug|x64.Build.0 = Debug|x64 {53A5391B-737E-49A8-BC8F-312ADA00736F}.Release|x64.ActiveCfg = Release|x64 {53A5391B-737E-49A8-BC8F-312ADA00736F}.Release|x64.Build.0 = Release|x64 + {29F29A19-F141-45AD-9679-5A2923B49DA3}.Debug|x64.ActiveCfg = Debug|x64 + {29F29A19-F141-45AD-9679-5A2923B49DA3}.Debug|x64.Build.0 = Debug|x64 + {29F29A19-F141-45AD-9679-5A2923B49DA3}.Release|x64.ActiveCfg = Release|x64 + {29F29A19-F141-45AD-9679-5A2923B49DA3}.Release|x64.Build.0 = Release|x64 {76563A7F-1011-4EAD-B667-7BB18D09568E}.Debug|x64.ActiveCfg = Debug|x64 {76563A7F-1011-4EAD-B667-7BB18D09568E}.Debug|x64.Build.0 = Debug|x64 {76563A7F-1011-4EAD-B667-7BB18D09568E}.Release|x64.ActiveCfg = Release|x64 @@ -253,6 +259,7 @@ Global {EC1A314C-5588-4506-9C1E-2E58E5817F75} = {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} {A4C423AA-F57C-46C7-A172-D1A777017D29} = {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} {53A5391B-737E-49A8-BC8F-312ADA00736F} = {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} + {29F29A19-F141-45AD-9679-5A2923B49DA3} = {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4} = {15670B2E-CED6-4ED5-94CE-A00B1B2B5BA6} {76563A7F-1011-4EAD-B667-7BB18D09568E} = {15670B2E-CED6-4ED5-94CE-A00B1B2B5BA6} {CBC76802-C128-4B17-BF6C-23B08C313E5E} = {87ADDFF9-5768-4DA2-A33B-2477593D6677}