diff --git a/src/Cafe/CafeSystem.cpp b/src/Cafe/CafeSystem.cpp index 08228b62..7ba93fc8 100644 --- a/src/Cafe/CafeSystem.cpp +++ b/src/Cafe/CafeSystem.cpp @@ -258,7 +258,7 @@ void InfoLog_PrintActiveSettings() { cemuLog_log(LogType::Force, "Async compile: {}", GetConfig().async_compile.GetValue() ? "true" : "false"); cemuLog_log(LogType::Force, "Fast math: {}", GetConfig().fast_math.GetValue() ? "true" : "false"); - cemuLog_log(LogType::Force, "Use host memory for cache: {}", g_current_game_profile->UseHostMemForCache() ? "true" : "false"); + cemuLog_log(LogType::Force, "Buffer cache type: {}", g_current_game_profile->GetBufferCacheType()); if (!GetConfig().vk_accurate_barriers.GetValue()) cemuLog_log(LogType::Force, "Accurate barriers are disabled!"); } diff --git a/src/Cafe/GameProfile/GameProfile.cpp b/src/Cafe/GameProfile/GameProfile.cpp index 337786ed..a4ce8fe8 100644 --- a/src/Cafe/GameProfile/GameProfile.cpp +++ b/src/Cafe/GameProfile/GameProfile.cpp @@ -226,7 +226,7 @@ bool GameProfile::Load(uint64_t title_id) m_graphics_api = (GraphicAPI)graphicsApi.value; gameProfile_loadEnumOption(iniParser, "accurateShaderMul", m_accurateShaderMul); - gameProfile_loadBooleanOption2(iniParser, "useHostMemForCache", m_useHostMemForCache); + gameProfile_loadEnumOption(iniParser, "bufferCacheType", m_bufferCacheType); // legacy support auto option_precompiledShaders = iniParser.FindOption("precompiledShaders"); @@ -309,7 +309,7 @@ void GameProfile::Save(uint64_t title_id) fs->writeLine("[Graphics]"); WRITE_ENTRY(accurateShaderMul); - WRITE_ENTRY(useHostMemForCache); + WRITE_ENTRY(bufferCacheType); WRITE_OPTIONAL_ENTRY(precompiledShaders); WRITE_OPTIONAL_ENTRY(graphics_api); fs->writeLine(""); @@ -339,7 +339,7 @@ void GameProfile::ResetOptional() // graphic settings m_accurateShaderMul = AccurateShaderMulOption::True; - m_useHostMemForCache = false; + m_bufferCacheType = BufferCacheType::DevicePrivate; // cpu settings m_threadQuantum = kThreadQuantumDefault; m_cpuMode.reset(); // CPUModeOption::kSingleCoreRecompiler; @@ -360,7 +360,7 @@ void GameProfile::Reset() // graphic settings m_accurateShaderMul = AccurateShaderMulOption::True; - m_useHostMemForCache = false; + m_bufferCacheType = BufferCacheType::DevicePrivate; m_precompiledShaders = PrecompiledShaderOption::Auto; // cpu settings m_threadQuantum = kThreadQuantumDefault; diff --git a/src/Cafe/GameProfile/GameProfile.h b/src/Cafe/GameProfile/GameProfile.h index e2ab29f7..5c2d28d7 100644 --- a/src/Cafe/GameProfile/GameProfile.h +++ b/src/Cafe/GameProfile/GameProfile.h @@ -31,7 +31,7 @@ public: [[nodiscard]] const std::optional& GetGraphicsAPI() const { return m_graphics_api; } [[nodiscard]] const AccurateShaderMulOption& GetAccurateShaderMul() const { return m_accurateShaderMul; } - [[nodiscard]] bool UseHostMemForCache() const { return m_useHostMemForCache; } + [[nodiscard]] BufferCacheType GetBufferCacheType() const { return m_bufferCacheType; } [[nodiscard]] const std::optional& GetPrecompiledShadersState() const { return m_precompiledShaders; } [[nodiscard]] uint32 GetThreadQuantum() const { return m_threadQuantum; } @@ -55,7 +55,7 @@ private: // graphic settings std::optional m_graphics_api{}; AccurateShaderMulOption m_accurateShaderMul = AccurateShaderMulOption::True; - bool m_useHostMemForCache = false; + BufferCacheType m_bufferCacheType = BufferCacheType::DevicePrivate; std::optional m_precompiledShaders{}; // cpu settings uint32 m_threadQuantum = kThreadQuantumDefault; // values: 20000 45000 60000 80000 100000 diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 5f02847a..cd041c5a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -4,7 +4,6 @@ #include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" -#include "GameProfile/GameProfile.h" /* MetalVertexBufferCache::~MetalVertexBufferCache() @@ -118,21 +117,24 @@ void MetalMemoryManager::InitBufferCache(size_t size) { cemu_assert_debug(!m_bufferCache); + m_bufferCacheType = g_current_game_profile->GetBufferCacheType(); + // First, try to import the host memory as a buffer - if (g_current_game_profile->UseHostMemForCache() && m_mtlr->IsAppleGPU()) + if (m_bufferCacheType == BufferCacheType::Host && m_mtlr->IsAppleGPU()) { m_importedMemBaseAddress = 0x10000000; size_t hostAllocationSize = 0x40000000ull; // TODO: get size of allocation m_bufferCache = m_mtlr->GetDevice()->newBuffer(memory_getPointerFromVirtualOffset(m_importedMemBaseAddress), hostAllocationSize, MTL::ResourceStorageModeShared, nullptr); - if (m_bufferCache) - m_useHostMemoryForCache = true; - else + if (!m_bufferCache) + { cemuLog_logDebug(LogType::Force, "Failed to import host memory as a buffer"); + m_bufferCacheType = BufferCacheType::DevicePrivate; + } } - if (!m_useHostMemoryForCache) - m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate); + if (!m_bufferCache) + m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, (m_bufferCacheType == BufferCacheType::DevicePrivate ? MTL::ResourceStorageModePrivate : MTL::ResourceStorageModeShared)); #ifdef CEMU_DEBUG_ASSERT m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache)); @@ -141,24 +143,31 @@ void MetalMemoryManager::InitBufferCache(size_t size) void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size) { - cemu_assert_debug(!m_useHostMemoryForCache); + cemu_assert_debug(m_bufferCacheType != BufferCacheType::Host); cemu_assert_debug(m_bufferCache); cemu_assert_debug((offset + size) <= m_bufferCache->length()); - auto allocation = m_tempBufferAllocator.GetBufferAllocation(size); - auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex); - memcpy((uint8*)buffer->contents() + allocation.offset, data, size); + if (m_bufferCacheType == BufferCacheType::DevicePrivate) + { + auto allocation = m_tempBufferAllocator.GetBufferAllocation(size); + auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex); + memcpy((uint8*)buffer->contents() + allocation.offset, data, size); - // Lock the buffer to make sure it's not deallocated before the copy is done - m_tempBufferAllocator.LockBuffer(allocation.bufferIndex); + // Lock the buffer to make sure it's not deallocated before the copy is done + m_tempBufferAllocator.LockBuffer(allocation.bufferIndex); - m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); + m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); - // Make sure the buffer has the right command buffer - m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this + // Make sure the buffer has the right command buffer + m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this - // We can now safely unlock the buffer - m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex); + // We can now safely unlock the buffer + m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex); + } + else + { + memcpy((uint8*)m_bufferCache->contents() + offset, data, size); + } // Notify vertex buffer cache about the change //m_vertexBufferCache.MemoryRangeChanged(offset, size); @@ -166,7 +175,11 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size) { + cemu_assert_debug(m_bufferCacheType != BufferCacheType::Host); cemu_assert_debug(m_bufferCache); - m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); + if (m_bufferCacheType == BufferCacheType::DevicePrivate) + m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); + else + memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index 4e8b2594..6cc4ab1e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -2,6 +2,8 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h" +#include "GameProfile/GameProfile.h" + /* struct MetalRestridedBufferRange { @@ -118,7 +120,7 @@ public: // Getters bool UseHostMemoryForCache() const { - return m_useHostMemoryForCache; + return (m_bufferCacheType == BufferCacheType::Host); } MPTR GetImportedMemBaseAddress() const @@ -137,6 +139,6 @@ private: //MetalVertexBufferCache m_vertexBufferCache; MTL::Buffer* m_bufferCache = nullptr; - bool m_useHostMemoryForCache = false; + BufferCacheType m_bufferCacheType; MPTR m_importedMemBaseAddress; }; diff --git a/src/config/CemuConfig.h b/src/config/CemuConfig.h index 988916eb..02dc873a 100644 --- a/src/config/CemuConfig.h +++ b/src/config/CemuConfig.h @@ -124,6 +124,14 @@ enum class AccurateShaderMulOption }; ENABLE_ENUM_ITERATORS(AccurateShaderMulOption, AccurateShaderMulOption::False, AccurateShaderMulOption::True); +enum class BufferCacheType +{ + DevicePrivate, + DeviceShared, + Host, +}; +ENABLE_ENUM_ITERATORS(BufferCacheType, BufferCacheType::DevicePrivate, BufferCacheType::Host); + enum class CPUMode { SinglecoreInterpreter = 0, @@ -222,6 +230,21 @@ struct fmt::formatter : formatter { } }; template <> +struct fmt::formatter : formatter { + template + auto format(const BufferCacheType c, FormatContext &ctx) const { + string_view name; + switch (c) + { + case BufferCacheType::DevicePrivate: name = "device private"; break; + case BufferCacheType::DeviceShared: name = "device shared"; break; + case BufferCacheType::Host: name = "host"; break; + default: name = "unknown"; break; + } + return formatter::format(name, ctx); + } +}; +template <> struct fmt::formatter : formatter { template auto format(const CPUMode c, FormatContext &ctx) const { diff --git a/src/gui/GameProfileWindow.cpp b/src/gui/GameProfileWindow.cpp index c1aa63e4..f54a8fb4 100644 --- a/src/gui/GameProfileWindow.cpp +++ b/src/gui/GameProfileWindow.cpp @@ -8,6 +8,7 @@ #include #include +#include "config/CemuConfig.h" #include "gui/helpers/wxHelpers.h" #include "input/InputManager.h" @@ -127,12 +128,12 @@ GameProfileWindow::GameProfileWindow(wxWindow* parent, uint64_t title_id) m_shader_mul_accuracy->SetToolTip(_("EXPERT OPTION\nControls the accuracy of floating point multiplication in shaders.\n\nRecommended: true")); first_row->Add(m_shader_mul_accuracy, 0, wxALL, 5); - first_row->Add(new wxStaticText(panel, wxID_ANY, _("Use host memory for cache")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); + first_row->Add(new wxStaticText(panel, wxID_ANY, _("Buffer cache type")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); - wxString mem_values[] = { _("false"), _("true")}; - m_use_host_mem_for_cache = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(mem_values), mem_values); - m_use_host_mem_for_cache->SetToolTip(_("EXPERT OPTION\nAllows the GPU to access data directly without the need for an intermediate cache. May increase performance and reduce memory usage, but can also cause flickering.\n\nMetal only\n\nRecommended: false")); - first_row->Add(m_use_host_mem_for_cache, 0, wxALL, 5); + wxString cache_values[] = { _("device private"), _("device shared"), _("host")}; + m_buffer_cache_type = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(cache_values), cache_values); + m_buffer_cache_type->SetToolTip(_("EXPERT OPTION\nDecides how the buffer cache memory will be managed.\n\nMetal only\n\nRecommended: device private")); + first_row->Add(m_buffer_cache_type, 0, wxALL, 5); /*first_row->Add(new wxStaticText(panel, wxID_ANY, _("GPU buffer cache accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); wxString accuarcy_values[] = { _("high"), _("medium"), _("low") }; @@ -280,7 +281,7 @@ void GameProfileWindow::ApplyProfile() else m_graphic_api->SetSelection(1 + m_game_profile.m_graphics_api.value()); // "", OpenGL, Vulkan, Metal m_shader_mul_accuracy->SetSelection((int)m_game_profile.m_accurateShaderMul); - m_use_host_mem_for_cache->SetSelection((int)m_game_profile.m_useHostMemForCache); + m_buffer_cache_type->SetSelection((int)m_game_profile.m_bufferCacheType); //// audio //m_disable_audio->Set3StateValue(GetCheckboxState(m_game_profile.disableAudio)); @@ -340,7 +341,7 @@ void GameProfileWindow::SaveProfile() // gpu m_game_profile.m_accurateShaderMul = (AccurateShaderMulOption)m_shader_mul_accuracy->GetSelection(); - m_game_profile.m_useHostMemForCache = (bool)m_use_host_mem_for_cache->GetSelection(); + m_game_profile.m_bufferCacheType = (BufferCacheType)m_buffer_cache_type->GetSelection(); if (m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::False && m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::True) m_game_profile.m_accurateShaderMul = AccurateShaderMulOption::True; // force a legal value diff --git a/src/gui/GameProfileWindow.h b/src/gui/GameProfileWindow.h index a1fe8132..22eda48d 100644 --- a/src/gui/GameProfileWindow.h +++ b/src/gui/GameProfileWindow.h @@ -40,7 +40,7 @@ private: wxChoice* m_graphic_api; wxChoice* m_shader_mul_accuracy; - wxChoice* m_use_host_mem_for_cache; + wxChoice* m_buffer_cache_type; //wxChoice* m_cache_accuracy; // audio