diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 445fb823..702278ca 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -15,7 +15,13 @@ template class MetalBufferAllocator { public: - MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer}, m_storageMode{storageMode} {} + MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer} { + m_isCPUAccessible = (storageMode == MTL::ResourceStorageModeShared) || (storageMode == MTL::ResourceStorageModeManaged); + + m_options = storageMode; + if (m_isCPUAccessible) + m_options |= MTL::ResourceCPUCacheModeWriteCombined; + } ~MetalBufferAllocator() { @@ -54,7 +60,7 @@ public: allocation.bufferIndex = range.bufferIndex; allocation.offset = range.offset; allocation.size = size; - allocation.data = (uint8*)buffer.m_buffer->contents() + range.offset; + allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr); range.offset += size; range.size -= size; @@ -70,7 +76,7 @@ public: // If no free range was found, allocate a new buffer m_allocationSize = std::max(m_allocationSize, size); - MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, m_storageMode | MTL::ResourceCPUCacheModeWriteCombined); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, m_options); #ifdef CEMU_DEBUG_ASSERT buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); #endif @@ -79,7 +85,7 @@ public: allocation.bufferIndex = m_buffers.size(); allocation.offset = 0; allocation.size = size; - allocation.data = buffer->contents(); + allocation.data = (m_isCPUAccessible ? buffer->contents() : nullptr); m_buffers.push_back({buffer}); @@ -129,7 +135,8 @@ public: protected: class MetalRenderer* m_mtlr; - MTL::ResourceOptions m_storageMode; + bool m_isCPUAccessible; + MTL::ResourceOptions m_options; size_t m_allocationSize = 8 * 1024 * 1024; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 008cac99..44090a05 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -41,7 +41,7 @@ MetalRenderer::MetalRenderer() m_commandQueue = m_device->newCommandQueue(); // Feature support - m_isAppleGPU = m_device->supportsFamily(MTL::GPUFamilyApple1); + m_isAppleGPU = false;//m_device->supportsFamily(MTL::GPUFamilyApple1); m_hasUnifiedMemory = m_device->hasUnifiedMemory(); m_supportsMetal3 = m_device->supportsFamily(MTL::GPUFamilyMetal3); m_recommendedMaxVRAMUsage = m_device->recommendedMaxWorkingSetSize(); @@ -511,7 +511,7 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s size_t bytesPerRow = GetMtlTextureBytesPerRow(textureMtl->GetFormat(), textureMtl->IsDepth(), width); // No need to set bytesPerImage for 3D textures, since we always load just one slice //size_t bytesPerImage = GetMtlTextureBytesPerImage(textureMtl->GetFormat(), textureMtl->IsDepth(), height, bytesPerRow); - if (IsAppleGPU()) + if (m_isAppleGPU) { textureMtl->GetTexture()->replaceRegion(MTL::Region(0, 0, offsetZ, width, height, 1), mipIndex, sliceIndex, pixelData, bytesPerRow, 0); } @@ -674,7 +674,7 @@ LatteTextureReadbackInfo* MetalRenderer::texture_createReadback(LatteTextureView void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* sourceTexture, sint32 srcMip, sint32 srcSlice, LatteTexture* destinationTexture, sint32 dstMip, sint32 dstSlice, sint32 width, sint32 height) { - GetCommandBuffer(); + return; // scale copy size to effective size sint32 effectiveCopyWidth = width; @@ -907,10 +907,12 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); // Depth stencil state - // TODO + // Disable depth write when there is no depth attachment - //if (!m_state.m_lastUsedFBO->depthBuffer.texture) - // depthControl.set_Z_WRITE_ENABLE(false); + auto& depthControl = LatteGPUState.contextNew.DB_DEPTH_CONTROL; + bool depthWriteEnable = depthControl.get_Z_WRITE_ENABLE(); + if (!m_state.m_lastUsedFBO->depthBuffer.texture) + depthControl.set_Z_WRITE_ENABLE(false); MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew); if (depthStencilState != encoderState.m_depthStencilState) @@ -919,6 +921,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 encoderState.m_depthStencilState = depthStencilState; } + // Restore the original depth write state + depthControl.set_Z_WRITE_ENABLE(depthWriteEnable); + // Stencil reference bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE(); if (stencilEnable)