Merge pull request #10 from SamoZ256/metal-host-buffer

Multiple buffer cache types
This commit is contained in:
SamoZ256 2024-11-03 16:17:39 +01:00 committed by GitHub
commit b008ace7d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 211 additions and 41 deletions

View File

@ -258,6 +258,7 @@ void InfoLog_PrintActiveSettings()
{ {
cemuLog_log(LogType::Force, "Async compile: {}", GetConfig().async_compile.GetValue() ? "true" : "false"); cemuLog_log(LogType::Force, "Async compile: {}", GetConfig().async_compile.GetValue() ? "true" : "false");
cemuLog_log(LogType::Force, "Fast math: {}", GetConfig().fast_math.GetValue() ? "true" : "false"); cemuLog_log(LogType::Force, "Fast math: {}", GetConfig().fast_math.GetValue() ? "true" : "false");
cemuLog_log(LogType::Force, "Buffer cache type: {}", g_current_game_profile->GetBufferCacheType());
if (!GetConfig().vk_accurate_barriers.GetValue()) if (!GetConfig().vk_accurate_barriers.GetValue())
cemuLog_log(LogType::Force, "Accurate barriers are disabled!"); cemuLog_log(LogType::Force, "Accurate barriers are disabled!");
} }

View File

@ -127,7 +127,7 @@ bool gameProfile_loadIntegerOption(IniParser& iniParser, const char* optionName,
{ {
cemuLog_log(LogType::Force, "Value '{}' is out of range for option '{}' in game profile", *option_value, optionName); cemuLog_log(LogType::Force, "Value '{}' is out of range for option '{}' in game profile", *option_value, optionName);
return false; return false;
} }
} }
template<typename T> template<typename T>
@ -224,8 +224,9 @@ bool GameProfile::Load(uint64_t title_id)
gameProfile_loadIntegerOption(&iniParser, "graphics_api", &graphicsApi, -1, 0, 1); gameProfile_loadIntegerOption(&iniParser, "graphics_api", &graphicsApi, -1, 0, 1);
if (graphicsApi.value != -1) if (graphicsApi.value != -1)
m_graphics_api = (GraphicAPI)graphicsApi.value; m_graphics_api = (GraphicAPI)graphicsApi.value;
gameProfile_loadEnumOption(iniParser, "accurateShaderMul", m_accurateShaderMul); gameProfile_loadEnumOption(iniParser, "accurateShaderMul", m_accurateShaderMul);
gameProfile_loadEnumOption(iniParser, "bufferCacheType", m_bufferCacheType);
// legacy support // legacy support
auto option_precompiledShaders = iniParser.FindOption("precompiledShaders"); auto option_precompiledShaders = iniParser.FindOption("precompiledShaders");
@ -277,7 +278,7 @@ bool GameProfile::Load(uint64_t title_id)
void GameProfile::Save(uint64_t title_id) void GameProfile::Save(uint64_t title_id)
{ {
auto gameProfileDir = ActiveSettings::GetConfigPath("gameProfiles"); auto gameProfileDir = ActiveSettings::GetConfigPath("gameProfiles");
if (std::error_code ex_ec; !fs::exists(gameProfileDir, ex_ec)) if (std::error_code ex_ec; !fs::exists(gameProfileDir, ex_ec))
fs::create_directories(gameProfileDir, ex_ec); fs::create_directories(gameProfileDir, ex_ec);
auto gameProfilePath = gameProfileDir / fmt::format("{:016x}.ini", title_id); auto gameProfilePath = gameProfileDir / fmt::format("{:016x}.ini", title_id);
FileStream* fs = FileStream::createFile2(gameProfilePath); FileStream* fs = FileStream::createFile2(gameProfilePath);
@ -308,6 +309,7 @@ void GameProfile::Save(uint64_t title_id)
fs->writeLine("[Graphics]"); fs->writeLine("[Graphics]");
WRITE_ENTRY(accurateShaderMul); WRITE_ENTRY(accurateShaderMul);
WRITE_ENTRY(bufferCacheType);
WRITE_OPTIONAL_ENTRY(precompiledShaders); WRITE_OPTIONAL_ENTRY(precompiledShaders);
WRITE_OPTIONAL_ENTRY(graphics_api); WRITE_OPTIONAL_ENTRY(graphics_api);
fs->writeLine(""); fs->writeLine("");
@ -337,6 +339,7 @@ void GameProfile::ResetOptional()
// graphic settings // graphic settings
m_accurateShaderMul = AccurateShaderMulOption::True; m_accurateShaderMul = AccurateShaderMulOption::True;
m_bufferCacheType = BufferCacheType::DevicePrivate;
// cpu settings // cpu settings
m_threadQuantum = kThreadQuantumDefault; m_threadQuantum = kThreadQuantumDefault;
m_cpuMode.reset(); // CPUModeOption::kSingleCoreRecompiler; m_cpuMode.reset(); // CPUModeOption::kSingleCoreRecompiler;
@ -354,9 +357,10 @@ void GameProfile::Reset()
// general settings // general settings
m_loadSharedLibraries = true; m_loadSharedLibraries = true;
m_startWithPadView = false; m_startWithPadView = false;
// graphic settings // graphic settings
m_accurateShaderMul = AccurateShaderMulOption::True; m_accurateShaderMul = AccurateShaderMulOption::True;
m_bufferCacheType = BufferCacheType::DevicePrivate;
m_precompiledShaders = PrecompiledShaderOption::Auto; m_precompiledShaders = PrecompiledShaderOption::Auto;
// cpu settings // cpu settings
m_threadQuantum = kThreadQuantumDefault; m_threadQuantum = kThreadQuantumDefault;
@ -366,4 +370,4 @@ void GameProfile::Reset()
// controller settings // controller settings
for (auto& profile : m_controllerProfile) for (auto& profile : m_controllerProfile)
profile.reset(); profile.reset();
} }

View File

@ -31,6 +31,7 @@ public:
[[nodiscard]] const std::optional<GraphicAPI>& GetGraphicsAPI() const { return m_graphics_api; } [[nodiscard]] const std::optional<GraphicAPI>& GetGraphicsAPI() const { return m_graphics_api; }
[[nodiscard]] const AccurateShaderMulOption& GetAccurateShaderMul() const { return m_accurateShaderMul; } [[nodiscard]] const AccurateShaderMulOption& GetAccurateShaderMul() const { return m_accurateShaderMul; }
[[nodiscard]] BufferCacheType GetBufferCacheType() const { return m_bufferCacheType; }
[[nodiscard]] const std::optional<PrecompiledShaderOption>& GetPrecompiledShadersState() const { return m_precompiledShaders; } [[nodiscard]] const std::optional<PrecompiledShaderOption>& GetPrecompiledShadersState() const { return m_precompiledShaders; }
[[nodiscard]] uint32 GetThreadQuantum() const { return m_threadQuantum; } [[nodiscard]] uint32 GetThreadQuantum() const { return m_threadQuantum; }
@ -54,6 +55,7 @@ private:
// graphic settings // graphic settings
std::optional<GraphicAPI> m_graphics_api{}; std::optional<GraphicAPI> m_graphics_api{};
AccurateShaderMulOption m_accurateShaderMul = AccurateShaderMulOption::True; AccurateShaderMulOption m_accurateShaderMul = AccurateShaderMulOption::True;
BufferCacheType m_bufferCacheType = BufferCacheType::DevicePrivate;
std::optional<PrecompiledShaderOption> m_precompiledShaders{}; std::optional<PrecompiledShaderOption> m_precompiledShaders{};
// cpu settings // cpu settings
uint32 m_threadQuantum = kThreadQuantumDefault; // values: 20000 45000 60000 80000 100000 uint32 m_threadQuantum = kThreadQuantumDefault; // values: 20000 45000 60000 80000 100000

View File

@ -31,7 +31,9 @@ struct MetalQueryRange
#define MAX_MTL_BUFFERS 31 #define MAX_MTL_BUFFERS 31
// Buffer indices 28-30 are reserved for the helper shaders // Buffer indices 28-30 are reserved for the helper shaders
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 4) #define MTL_RESERVED_BUFFERS 3
#define MAX_MTL_VERTEX_BUFFERS (MAX_MTL_BUFFERS - MTL_RESERVED_BUFFERS)
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_VERTEX_BUFFERS - index - 1)
#define MAX_MTL_TEXTURES 31 #define MAX_MTL_TEXTURES 31
#define MAX_MTL_SAMPLERS 16 #define MAX_MTL_SAMPLERS 16

View File

@ -1,6 +1,8 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h"
#include "Cemu/Logging/CemuLogging.h"
#include "Common/precompiled.h" #include "Common/precompiled.h"
/* /*
@ -115,7 +117,25 @@ void MetalMemoryManager::InitBufferCache(size_t size)
{ {
cemu_assert_debug(!m_bufferCache); cemu_assert_debug(!m_bufferCache);
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate); m_bufferCacheType = g_current_game_profile->GetBufferCacheType();
// First, try to import the host memory as a buffer
if (m_bufferCacheType == BufferCacheType::Host && m_mtlr->IsAppleGPU())
{
m_importedMemBaseAddress = 0x10000000;
size_t hostAllocationSize = 0x40000000ull;
// TODO: get size of allocation
m_bufferCache = m_mtlr->GetDevice()->newBuffer(memory_getPointerFromVirtualOffset(m_importedMemBaseAddress), hostAllocationSize, MTL::ResourceStorageModeShared, nullptr);
if (!m_bufferCache)
{
cemuLog_logDebug(LogType::Force, "Failed to import host memory as a buffer");
m_bufferCacheType = BufferCacheType::DevicePrivate;
}
}
if (!m_bufferCache)
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, (m_bufferCacheType == BufferCacheType::DevicePrivate ? MTL::ResourceStorageModePrivate : MTL::ResourceStorageModeShared));
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache)); m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
#endif #endif
@ -123,23 +143,31 @@ void MetalMemoryManager::InitBufferCache(size_t size)
void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size) void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size)
{ {
cemu_assert_debug(m_bufferCacheType != BufferCacheType::Host);
cemu_assert_debug(m_bufferCache); cemu_assert_debug(m_bufferCache);
cemu_assert_debug((offset + size) <= m_bufferCache->length()); cemu_assert_debug((offset + size) <= m_bufferCache->length());
auto allocation = m_tempBufferAllocator.GetBufferAllocation(size); if (m_bufferCacheType == BufferCacheType::DevicePrivate)
auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex); {
memcpy((uint8*)buffer->contents() + allocation.offset, data, size); auto allocation = m_tempBufferAllocator.GetBufferAllocation(size);
auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex);
memcpy((uint8*)buffer->contents() + allocation.offset, data, size);
// Lock the buffer to make sure it's not deallocated before the copy is done // Lock the buffer to make sure it's not deallocated before the copy is done
m_tempBufferAllocator.LockBuffer(allocation.bufferIndex); m_tempBufferAllocator.LockBuffer(allocation.bufferIndex);
m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
// Make sure the buffer has the right command buffer // Make sure the buffer has the right command buffer
m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this
// We can now safely unlock the buffer // We can now safely unlock the buffer
m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex); m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex);
}
else
{
memcpy((uint8*)m_bufferCache->contents() + offset, data, size);
}
// Notify vertex buffer cache about the change // Notify vertex buffer cache about the change
//m_vertexBufferCache.MemoryRangeChanged(offset, size); //m_vertexBufferCache.MemoryRangeChanged(offset, size);
@ -147,7 +175,11 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size) void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)
{ {
cemu_assert_debug(m_bufferCacheType != BufferCacheType::Host);
cemu_assert_debug(m_bufferCache); cemu_assert_debug(m_bufferCache);
m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); if (m_bufferCacheType == BufferCacheType::DevicePrivate)
m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
else
memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size);
} }

View File

@ -2,6 +2,8 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
#include "GameProfile/GameProfile.h"
/* /*
struct MetalRestridedBufferRange struct MetalRestridedBufferRange
{ {
@ -115,6 +117,17 @@ public:
} }
*/ */
// Getters
bool UseHostMemoryForCache() const
{
return (m_bufferCacheType == BufferCacheType::Host);
}
MPTR GetImportedMemBaseAddress() const
{
return m_importedMemBaseAddress;
}
private: private:
class MetalRenderer* m_mtlr; class MetalRenderer* m_mtlr;
@ -126,4 +139,6 @@ private:
//MetalVertexBufferCache m_vertexBufferCache; //MetalVertexBufferCache m_vertexBufferCache;
MTL::Buffer* m_bufferCache = nullptr; MTL::Buffer* m_bufferCache = nullptr;
BufferCacheType m_bufferCacheType;
MPTR m_importedMemBaseAddress;
}; };

View File

@ -30,8 +30,6 @@
#include "imgui/imgui_extension.h" #include "imgui/imgui_extension.h"
#include "imgui/imgui_impl_metal.h" #include "imgui/imgui_impl_metal.h"
#define DEFAULT_COMMIT_TRESHOLD 196
extern bool hasValidFramebufferAttached; extern bool hasValidFramebufferAttached;
float supportBufferData[512 * 4]; float supportBufferData[512 * 4];
@ -90,6 +88,12 @@ MetalRenderer::MetalRenderer()
m_depthStencilCache = new MetalDepthStencilCache(this); m_depthStencilCache = new MetalDepthStencilCache(this);
m_samplerCache = new MetalSamplerCache(this); m_samplerCache = new MetalSamplerCache(this);
// Lower the commit treshold when host memory is used for cache to reduce latency
if (m_memoryManager->UseHostMemoryForCache())
m_defaultCommitTreshlod = 64;
else
m_defaultCommitTreshlod = 196;
// Occlusion queries // Occlusion queries
m_occlusionQuery.m_resultBuffer = m_device->newBuffer(OCCLUSION_QUERY_POOL_SIZE * sizeof(uint64), MTL::ResourceStorageModeShared); m_occlusionQuery.m_resultBuffer = m_device->newBuffer(OCCLUSION_QUERY_POOL_SIZE * sizeof(uint64), MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
@ -97,8 +101,11 @@ MetalRenderer::MetalRenderer()
#endif #endif
m_occlusionQuery.m_resultsPtr = (uint64*)m_occlusionQuery.m_resultBuffer->contents(); m_occlusionQuery.m_resultsPtr = (uint64*)m_occlusionQuery.m_resultBuffer->contents();
// Initialize state // Reset vertex and uniform buffers
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) for (uint32 i = 0; i < MAX_MTL_VERTEX_BUFFERS; i++)
m_state.m_vertexBufferOffsets[i] = INVALID_OFFSET;
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
{ {
for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++)
m_state.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; m_state.m_uniformBufferOffsets[i][j] = INVALID_OFFSET;
@ -821,23 +828,28 @@ void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32
void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size)
{ {
if (m_memoryManager->UseHostMemoryForCache())
dstOffset -= m_memoryManager->GetImportedMemBaseAddress();
CopyBufferToBuffer(GetXfbRingBuffer(), srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size, MTL::RenderStageVertex | MTL::RenderStageMesh, ALL_MTL_RENDER_STAGES); CopyBufferToBuffer(GetXfbRingBuffer(), srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size, MTL::RenderStageVertex | MTL::RenderStageMesh, ALL_MTL_RENDER_STAGES);
} }
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
{ {
cemu_assert_debug(!m_memoryManager->UseHostMemoryForCache());
cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS); cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS);
auto& buffer = m_state.m_vertexBuffers[bufferIndex];
if (buffer.offset == offset && buffer.size == size) m_state.m_vertexBufferOffsets[bufferIndex] = offset;
return; //if (buffer.offset == offset && buffer.size == size)
// return;
//if (buffer.offset != INVALID_OFFSET) //if (buffer.offset != INVALID_OFFSET)
//{ //{
// m_memoryManager->UntrackVertexBuffer(bufferIndex); // m_memoryManager->UntrackVertexBuffer(bufferIndex);
//} //}
buffer.offset = offset; //buffer.offset = offset;
buffer.size = size; //buffer.size = size;
//buffer.restrideInfo = {}; //buffer.restrideInfo = {};
//m_memoryManager->TrackVertexBuffer(bufferIndex, offset, size, &buffer.restrideInfo); //m_memoryManager->TrackVertexBuffer(bufferIndex, offset, size, &buffer.restrideInfo);
@ -845,6 +857,8 @@ void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, u
void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
{ {
cemu_assert_debug(!m_memoryManager->UseHostMemoryForCache());
m_state.m_uniformBufferOffsets[GetMtlGeneralShaderType(shaderType)][bufferIndex] = offset; m_state.m_uniformBufferOffsets[GetMtlGeneralShaderType(shaderType)][bufferIndex] = offset;
} }
@ -988,9 +1002,24 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
uint32 indexBufferIndex = 0; uint32 indexBufferIndex = 0;
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
// synchronize vertex and uniform cache and update buffer bindings // Buffer cache
// We need to call this before getting the render command encoder, since it can cause buffer copies if (m_memoryManager->UseHostMemoryForCache())
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); {
// direct memory access (Wii U memory space imported as a buffer), update buffer bindings
draw_updateVertexBuffersDirectAccess();
if (vertexShader)
draw_updateUniformBuffersDirectAccess(vertexShader, mmSQ_VTX_UNIFORM_BLOCK_START);
if (geometryShader)
draw_updateUniformBuffersDirectAccess(geometryShader, mmSQ_GS_UNIFORM_BLOCK_START);
if (pixelShader)
draw_updateUniformBuffersDirectAccess(pixelShader, mmSQ_PS_UNIFORM_BLOCK_START);
}
else
{
// synchronize vertex and uniform cache and update buffer bindings
// We need to call this before getting the render command encoder, since it can cause buffer copies
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
}
// Render pass // Render pass
auto renderCommandEncoder = GetRenderCommandEncoder(); auto renderCommandEncoder = GetRenderCommandEncoder();
@ -1190,10 +1219,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Vertex buffers // Vertex buffers
//std::vector<MTL::Resource*> barrierBuffers; //std::vector<MTL::Resource*> barrierBuffers;
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) for (uint8 i = 0; i < MAX_MTL_VERTEX_BUFFERS; i++)
{ {
auto& vertexBufferRange = m_state.m_vertexBuffers[i]; size_t offset = m_state.m_vertexBufferOffsets[i];
if (vertexBufferRange.offset != INVALID_OFFSET) if (offset != INVALID_OFFSET)
{ {
/* /*
MTL::Buffer* buffer; MTL::Buffer* buffer;
@ -1218,11 +1247,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
} }
*/ */
MTL::Buffer* buffer = m_memoryManager->GetBufferCache();
size_t offset = m_state.m_vertexBuffers[i].offset;
// Bind // Bind
SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), m_memoryManager->GetBufferCache(), offset, GET_MTL_VERTEX_BUFFER_INDEX(i));
} }
} }
@ -1301,7 +1327,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
m_occlusionQuery.m_currentIndex = (m_occlusionQuery.m_currentIndex + 1) % OCCLUSION_QUERY_POOL_SIZE; m_occlusionQuery.m_currentIndex = (m_occlusionQuery.m_currentIndex + 1) % OCCLUSION_QUERY_POOL_SIZE;
// Streamout // Streamout
LatteStreamout_FinishDrawcall(false); LatteStreamout_FinishDrawcall(m_memoryManager->UseHostMemoryForCache());
// Debug // Debug
if (fetchVertexManually) if (fetchVertexManually)
@ -1333,6 +1359,54 @@ void MetalRenderer::draw_endSequence()
} }
} }
void MetalRenderer::draw_updateVertexBuffersDirectAccess()
{
LatteFetchShader* parsedFetchShader = LatteSHRC_GetActiveFetchShader();
if (!parsedFetchShader)
return;
for (auto& bufferGroup : parsedFetchShader->bufferGroups)
{
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
MPTR bufferAddress = LatteGPUState.contextRegister[bufferBaseRegisterIndex + 0];
//uint32 bufferSize = LatteGPUState.contextRegister[bufferBaseRegisterIndex + 1] + 1;
//uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
if (bufferAddress == MPTR_NULL) [[unlikely]]
bufferAddress = 0x10000000; // TODO: really?
m_state.m_vertexBufferOffsets[bufferIndex] = bufferAddress - m_memoryManager->GetImportedMemBaseAddress();
}
}
void MetalRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader* shader, const uint32 uniformBufferRegOffset)
{
if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
{
for (const auto& buf : shader->list_quickBufferList)
{
sint32 i = buf.index;
MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0];
uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1;
if (physicalAddr == MPTR_NULL) [[unlikely]]
{
cemu_assert_unimplemented();
continue;
}
uniformSize = std::min<uint32>(uniformSize, buf.size);
cemu_assert_debug(physicalAddr < 0x50000000);
uint32 bufferIndex = i;
cemu_assert_debug(bufferIndex < 16);
m_state.m_uniformBufferOffsets[GetMtlGeneralShaderType(shader->shaderType)][bufferIndex] = physicalAddr - m_memoryManager->GetImportedMemBaseAddress();
}
}
}
void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
{ {
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
@ -1486,7 +1560,7 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer()
m_commandBuffers.push_back({mtlCommandBuffer}); m_commandBuffers.push_back({mtlCommandBuffer});
m_recordedDrawcalls = 0; m_recordedDrawcalls = 0;
m_commitTreshold = DEFAULT_COMMIT_TRESHOLD; m_commitTreshold = m_defaultCommitTreshlod;
// Notify memory manager about the new command buffer // Notify memory manager about the new command buffer
m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer);

View File

@ -29,6 +29,7 @@ struct MetalRestrideInfo
}; };
*/ */
/*
struct MetalBoundBuffer struct MetalBoundBuffer
{ {
size_t offset = INVALID_OFFSET; size_t offset = INVALID_OFFSET;
@ -36,6 +37,7 @@ struct MetalBoundBuffer
// Memory manager will write restride info to this variable // Memory manager will write restride info to this variable
//MetalRestrideInfo restrideInfo; //MetalRestrideInfo restrideInfo;
}; };
*/
enum MetalGeneralShaderType enum MetalGeneralShaderType
{ {
@ -141,7 +143,7 @@ struct MetalState
// If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change // If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change
MetalActiveFBOState m_lastUsedFBO; MetalActiveFBOState m_lastUsedFBO;
MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}}; size_t m_vertexBufferOffsets[MAX_MTL_VERTEX_BUFFERS] = {INVALID_OFFSET};
// TODO: find out what is the max number of bound textures on the Wii U // TODO: find out what is the max number of bound textures on the Wii U
class LatteTextureViewMtl* m_textures[64] = {nullptr}; class LatteTextureViewMtl* m_textures[64] = {nullptr};
size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
@ -277,6 +279,9 @@ public:
void draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) override; void draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) override;
void draw_endSequence() override; void draw_endSequence() override;
void draw_updateVertexBuffersDirectAccess();
void draw_updateUniformBuffersDirectAccess(LatteDecompilerShader* shader, const uint32 uniformBufferRegOffset);
// index // index
void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override; void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override;
void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override; void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override;
@ -506,6 +511,7 @@ private:
MTL::CommandEncoder* m_commandEncoder = nullptr; MTL::CommandEncoder* m_commandEncoder = nullptr;
uint32 m_recordedDrawcalls; uint32 m_recordedDrawcalls;
uint32 m_defaultCommitTreshlod;
uint32 m_commitTreshold; uint32 m_commitTreshold;
// State // State

View File

@ -124,6 +124,14 @@ enum class AccurateShaderMulOption
}; };
ENABLE_ENUM_ITERATORS(AccurateShaderMulOption, AccurateShaderMulOption::False, AccurateShaderMulOption::True); ENABLE_ENUM_ITERATORS(AccurateShaderMulOption, AccurateShaderMulOption::False, AccurateShaderMulOption::True);
enum class BufferCacheType
{
DevicePrivate,
DeviceShared,
Host,
};
ENABLE_ENUM_ITERATORS(BufferCacheType, BufferCacheType::DevicePrivate, BufferCacheType::Host);
enum class CPUMode enum class CPUMode
{ {
SinglecoreInterpreter = 0, SinglecoreInterpreter = 0,
@ -222,6 +230,21 @@ struct fmt::formatter<AccurateShaderMulOption> : formatter<string_view> {
} }
}; };
template <> template <>
struct fmt::formatter<BufferCacheType> : formatter<string_view> {
template <typename FormatContext>
auto format(const BufferCacheType c, FormatContext &ctx) const {
string_view name;
switch (c)
{
case BufferCacheType::DevicePrivate: name = "device private"; break;
case BufferCacheType::DeviceShared: name = "device shared"; break;
case BufferCacheType::Host: name = "host"; break;
default: name = "unknown"; break;
}
return formatter<string_view>::format(name, ctx);
}
};
template <>
struct fmt::formatter<CPUMode> : formatter<string_view> { struct fmt::formatter<CPUMode> : formatter<string_view> {
template <typename FormatContext> template <typename FormatContext>
auto format(const CPUMode c, FormatContext &ctx) const { auto format(const CPUMode c, FormatContext &ctx) const {

View File

@ -8,6 +8,7 @@
#include <wx/wupdlock.h> #include <wx/wupdlock.h>
#include <wx/slider.h> #include <wx/slider.h>
#include "config/CemuConfig.h"
#include "gui/helpers/wxHelpers.h" #include "gui/helpers/wxHelpers.h"
#include "input/InputManager.h" #include "input/InputManager.h"
@ -127,6 +128,13 @@ GameProfileWindow::GameProfileWindow(wxWindow* parent, uint64_t title_id)
m_shader_mul_accuracy->SetToolTip(_("EXPERT OPTION\nControls the accuracy of floating point multiplication in shaders.\n\nRecommended: true")); m_shader_mul_accuracy->SetToolTip(_("EXPERT OPTION\nControls the accuracy of floating point multiplication in shaders.\n\nRecommended: true"));
first_row->Add(m_shader_mul_accuracy, 0, wxALL, 5); first_row->Add(m_shader_mul_accuracy, 0, wxALL, 5);
first_row->Add(new wxStaticText(panel, wxID_ANY, _("Buffer cache type")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
wxString cache_values[] = { _("device private"), _("device shared"), _("host")};
m_buffer_cache_type = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(cache_values), cache_values);
m_buffer_cache_type->SetToolTip(_("EXPERT OPTION\nDecides how the buffer cache memory will be managed.\n\nMetal only\n\nRecommended: device private"));
first_row->Add(m_buffer_cache_type, 0, wxALL, 5);
/*first_row->Add(new wxStaticText(panel, wxID_ANY, _("GPU buffer cache accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5); /*first_row->Add(new wxStaticText(panel, wxID_ANY, _("GPU buffer cache accuracy")), 0, wxALIGN_CENTER_VERTICAL | wxALL, 5);
wxString accuarcy_values[] = { _("high"), _("medium"), _("low") }; wxString accuarcy_values[] = { _("high"), _("medium"), _("low") };
m_cache_accuracy = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(accuarcy_values), accuarcy_values); m_cache_accuracy = new wxChoice(panel, wxID_ANY, wxDefaultPosition, wxDefaultSize, (int)std::size(accuarcy_values), accuarcy_values);
@ -273,6 +281,7 @@ void GameProfileWindow::ApplyProfile()
else else
m_graphic_api->SetSelection(1 + m_game_profile.m_graphics_api.value()); // "", OpenGL, Vulkan, Metal m_graphic_api->SetSelection(1 + m_game_profile.m_graphics_api.value()); // "", OpenGL, Vulkan, Metal
m_shader_mul_accuracy->SetSelection((int)m_game_profile.m_accurateShaderMul); m_shader_mul_accuracy->SetSelection((int)m_game_profile.m_accurateShaderMul);
m_buffer_cache_type->SetSelection((int)m_game_profile.m_bufferCacheType);
//// audio //// audio
//m_disable_audio->Set3StateValue(GetCheckboxState(m_game_profile.disableAudio)); //m_disable_audio->Set3StateValue(GetCheckboxState(m_game_profile.disableAudio));
@ -332,6 +341,7 @@ void GameProfileWindow::SaveProfile()
// gpu // gpu
m_game_profile.m_accurateShaderMul = (AccurateShaderMulOption)m_shader_mul_accuracy->GetSelection(); m_game_profile.m_accurateShaderMul = (AccurateShaderMulOption)m_shader_mul_accuracy->GetSelection();
m_game_profile.m_bufferCacheType = (BufferCacheType)m_buffer_cache_type->GetSelection();
if (m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::False && m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::True) if (m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::False && m_game_profile.m_accurateShaderMul != AccurateShaderMulOption::True)
m_game_profile.m_accurateShaderMul = AccurateShaderMulOption::True; // force a legal value m_game_profile.m_accurateShaderMul = AccurateShaderMulOption::True; // force a legal value

View File

@ -40,6 +40,7 @@ private:
wxChoice* m_graphic_api; wxChoice* m_graphic_api;
wxChoice* m_shader_mul_accuracy; wxChoice* m_shader_mul_accuracy;
wxChoice* m_buffer_cache_type;
//wxChoice* m_cache_accuracy; //wxChoice* m_cache_accuracy;
// audio // audio
@ -47,4 +48,4 @@ private:
// controller // controller
wxComboBox* m_controller_profile[8]; wxComboBox* m_controller_profile[8];
}; };