From c557aa4a15b577fdaf2f5de71078c41037aea01e Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 2 Oct 2019 21:27:32 +1000 Subject: [PATCH 1/3] Vulkan: Add strict flag to memory type selection --- .../Core/VideoBackends/Vulkan/BoundingBox.cpp | 6 +- .../Core/VideoBackends/Vulkan/VKTexture.cpp | 6 +- .../VideoBackends/Vulkan/VulkanContext.cpp | 125 +++++++++--------- .../Core/VideoBackends/Vulkan/VulkanContext.h | 7 +- 4 files changed, 75 insertions(+), 69 deletions(-) diff --git a/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp b/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp index 15c9403228..a1deeafd59 100644 --- a/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp +++ b/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp @@ -172,8 +172,10 @@ bool BoundingBox::CreateGPUBuffer() VkMemoryRequirements memory_requirements; vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements); - uint32_t memory_type_index = g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + uint32_t memory_type_index = g_vulkan_context + ->GetMemoryType(memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, false) + .value_or(0); VkMemoryAllocateInfo memory_allocate_info = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType nullptr, // const void* pNext diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp index 89e4a29d41..fcff0db404 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp @@ -87,8 +87,10 @@ std::unique_ptr VKTexture::Create(const TextureConfig& tex_config) VkMemoryAllocateInfo memory_info = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size, - g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)}; + g_vulkan_context + ->GetMemoryType(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + false) + .value_or(0)}; VkDeviceMemory device_memory; res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory); diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index ee6e11bba0..6a8256b683 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -710,96 +710,97 @@ void VulkanContext::DisableDebugReports() } } -bool VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index) +std::optional VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, + bool strict, bool* is_coherent) { + static constexpr u32 ALL_MEMORY_PROPERTY_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + const u32 mask = strict ? ALL_MEMORY_PROPERTY_FLAGS : properties; + for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++) { if ((bits & (1 << i)) != 0) { - u32 supported = m_device_memory_properties.memoryTypes[i].propertyFlags & properties; + const VkMemoryPropertyFlags type_flags = + m_device_memory_properties.memoryTypes[i].propertyFlags; + const VkMemoryPropertyFlags supported = type_flags & mask; if (supported == properties) { - *out_type_index = i; - return true; + if (is_coherent) + *is_coherent = (type_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; + return i; } } } - return false; -} - -u32 VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties) -{ - u32 type_index = VK_MAX_MEMORY_TYPES; - if (!GetMemoryType(bits, properties, &type_index)) - PanicAlert("Unable to find memory type for %x:%x", bits, properties); - - return type_index; + return std::nullopt; } u32 VulkanContext::GetUploadMemoryType(u32 bits, bool* is_coherent) { - // Try for coherent memory first. - VkMemoryPropertyFlags flags = + static constexpr VkMemoryPropertyFlags COHERENT_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - u32 type_index; - if (!GetMemoryType(bits, flags, &type_index)) - { - WARN_LOG( - VIDEO, - "Vulkan: Failed to find a coherent memory type for uploads, this will affect performance."); + // Try for coherent memory. Some drivers (looking at you, Adreno) have the cached type before the + // uncached type, so use a strict check first. + std::optional type_index = GetMemoryType(bits, COHERENT_FLAGS, true, is_coherent); + if (type_index) + return type_index.value(); - // Try non-coherent memory. - flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - if (!GetMemoryType(bits, flags, &type_index)) - { - // We shouldn't have any memory types that aren't host-visible. - PanicAlert("Unable to get memory type for upload."); - type_index = 0; - } + // Try for coherent memory, with any other bits set. + type_index = GetMemoryType(bits, COHERENT_FLAGS, false, is_coherent); + if (type_index) + { + WARN_LOG(VIDEO, + "Strict check for upload memory properties failed, this may affect performance"); + return type_index.value(); } - if (is_coherent) - *is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0); + // Fall back to non-coherent memory. + WARN_LOG( + VIDEO, + "Vulkan: Failed to find a coherent memory type for uploads, this will affect performance."); + type_index = GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, false, is_coherent); + if (type_index) + return type_index.value(); - return type_index; + // Shouldn't happen, there should be at least one host-visible heap. + PanicAlert("Unable to get memory type for upload."); + return 0; } -u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent, bool* is_cached) +u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent) { - // Try for cached and coherent memory first. - VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + std::optional type_index; - u32 type_index; - if (!GetMemoryType(bits, flags, &type_index)) - { - // For readbacks, caching is more important than coherency. - flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - if (!GetMemoryType(bits, flags, &type_index)) - { - WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect " - "performance."); + // Optimal config uses cached+coherent. + type_index = + GetMemoryType(bits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + true, is_coherent); + if (type_index) + return type_index.value(); - // Remove the cached bit as well. - flags &= ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - if (!GetMemoryType(bits, flags, &type_index)) - { - // We shouldn't have any memory types that aren't host-visible. - PanicAlert("Unable to get memory type for upload."); - type_index = 0; - } - } - } + // Otherwise, prefer cached over coherent if we must choose one. + type_index = + GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + false, is_coherent); + if (type_index) + return type_index.value(); - if (is_coherent) - *is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0); - if (is_cached) - *is_cached = ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0); + WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect " + "performance."); + type_index = GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, false, is_coherent); + *is_coherent = false; + if (type_index) + return type_index.value(); - return type_index; + // We should have at least one host visible memory type... + PanicAlert("Unable to get memory type for upload."); + return 0; } void VulkanContext::InitDriverDetails() diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.h b/Source/Core/VideoBackends/Vulkan/VulkanContext.h index dcd9584e50..e33e5b3716 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.h +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include "Common/CommonTypes.h" @@ -99,12 +100,12 @@ public: float GetMaxSamplerAnisotropy() const { return m_device_properties.limits.maxSamplerAnisotropy; } // Finds a memory type index for the specified memory properties and the bits returned by // vkGetImageMemoryRequirements - bool GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index); - u32 GetMemoryType(u32 bits, VkMemoryPropertyFlags properties); + std::optional GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, bool strict, + bool* is_coherent = nullptr); // Finds a memory type for upload or readback buffers. u32 GetUploadMemoryType(u32 bits, bool* is_coherent = nullptr); - u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr, bool* is_cached = nullptr); + u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr); private: using ExtensionList = std::vector; From 328d89db708ec9c1d3e31a760f530b64c4767b2b Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 2 Oct 2019 21:32:44 +1000 Subject: [PATCH 2/3] Vulkan: Add a DriverDetails bug for "slow cached readback memory" Using the cached memory type appears to be slower on Mali drivers, with ~10-15% CPU spent in the __pi___inval_cache_range kernel function. --- Source/Core/VideoBackends/Vulkan/VulkanContext.cpp | 10 ++++++++++ Source/Core/VideoCommon/DriverDetails.cpp | 6 +++++- Source/Core/VideoCommon/DriverDetails.h | 5 +++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 6a8256b683..72eddd56c9 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -775,6 +775,16 @@ u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent) { std::optional type_index; + // Mali driver appears to be significantly slower for readbacks when using cached memory. + if (DriverDetails::HasBug(DriverDetails::BUG_SLOW_CACHED_READBACK_MEMORY)) + { + type_index = GetMemoryType( + bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, true, + is_coherent); + if (type_index) + return type_index.value(); + } + // Optimal config uses cached+coherent. type_index = GetMemoryType(bits, diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index 0b04309702..1670f1a028 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -113,7 +113,11 @@ constexpr BugInfo m_known_bugs[] = { {API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN, - BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}}; + BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}, + {API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_SLOW_CACHED_READBACK_MEMORY, + -1.0, -1.0, true}, + {API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, + BUG_SLOW_CACHED_READBACK_MEMORY, -1.0, -1.0, true}}; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 4c0f280856..3e58a5fb58 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -281,6 +281,11 @@ enum Bug // The Vulkan spec allows the minDepth/maxDepth fields in the viewport to be reversed, // however the implementation is broken on some drivers. BUG_BROKEN_REVERSED_DEPTH_RANGE, + + // BUG: Cached memory is significantly slower for readbacks than coherent memory in the + // Mali Vulkan driver, causing high CPU usage in the __pi___inval_cache_range kernel + // function. This flag causes readback buffers to select the coherent type. + BUG_SLOW_CACHED_READBACK_MEMORY, }; // Initializes our internal vendor, device family, and driver version From ecdf21a9889a9c6976bcfcb1a6e6b045f5c9f4d2 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 2 Oct 2019 22:15:59 +1000 Subject: [PATCH 3/3] Config: Also set CommandBufferExecuteInterval to 0 by default on Android --- Source/Core/Core/Config/GraphicsSettings.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 83baf5ae05..7068943cc2 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -75,13 +75,15 @@ const ConfigInfo GFX_ENABLE_VALIDATION_LAYER{ #if defined(ANDROID) const ConfigInfo GFX_BACKEND_MULTITHREADING{ {System::GFX, "Settings", "BackendMultithreading"}, false}; +const ConfigInfo GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{ + {System::GFX, "Settings", "CommandBufferExecuteInterval"}, 0}; #else const ConfigInfo GFX_BACKEND_MULTITHREADING{ {System::GFX, "Settings", "BackendMultithreading"}, true}; -#endif - const ConfigInfo GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{ {System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100}; +#endif + const ConfigInfo GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true}; const ConfigInfo GFX_WAIT_FOR_SHADERS_BEFORE_STARTING{ {System::GFX, "Settings", "WaitForShadersBeforeStarting"}, false};