diff --git a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp index 1c02f7b4..a9e673f6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp @@ -1,5 +1,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" +#include "HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Metal/MTLRenderPass.hpp" void CachedFBOMtl::CreateRenderPass() @@ -30,7 +31,7 @@ void CachedFBOMtl::CreateRenderPass() depthAttachment->setStoreAction(MTL::StoreActionStore); // setup stencil attachment - if (depthBuffer.hasStencil) + if (depthBuffer.hasStencil && GetMtlPixelFormatInfo(depthBuffer.texture->format, true).hasStencil) { auto stencilAttachment = m_renderPassDescriptor->stencilAttachment(); stencilAttachment->setTexture(textureView->GetRGBAView()); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index 0538650a..704cf883 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -74,11 +74,12 @@ std::map MTL_COLOR_FORMAT_TABLE = { }; std::map MTL_DEPTH_FORMAT_TABLE = { - {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4}}, // TODO: not supported on Apple sillicon, maybe find something else - {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5}}, - {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2}}, - {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4}}, + // TODO: one of these 2 formats is not supported on Apple silicon + {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}}, + {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}}, + {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5, {1, 1}, true}}, + {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2, {1, 1}}}, + {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4, {1, 1}}}, }; const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index c3f697bb..5fcd2d22 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -29,6 +29,7 @@ struct MetalPixelFormatInfo { MetalDataType dataType; size_t bytesPerBlock; Uvec2 blockTexelSize = {1, 1}; + bool hasStencil = false; }; const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 63ded223..fb9419d6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -3,10 +3,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Common/precompiled.h" -#include "Foundation/NSRange.hpp" -#include "Metal/MTLRenderCommandEncoder.hpp" - -const size_t BUFFER_ALLOCATION_SIZE = 8 * 1024 * 1024; MetalBufferAllocator::~MetalBufferAllocator() { @@ -16,10 +12,10 @@ MetalBufferAllocator::~MetalBufferAllocator() } } -MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment) +MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size) { // Align the size - size = Align(size, alignment); + size = Align(size, 16); // First, try to find a free range for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) @@ -45,7 +41,8 @@ MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, siz } // If no free range was found, allocate a new buffer - MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(std::max(size, BUFFER_ALLOCATION_SIZE), MTL::ResourceStorageModeShared); + m_allocationSize = std::max(m_allocationSize, size); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared); #ifdef CEMU_DEBUG_ASSERT buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); #endif @@ -58,16 +55,20 @@ MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, siz m_buffers.push_back(buffer); // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges - if (size < BUFFER_ALLOCATION_SIZE) + if (size < m_allocationSize) { MetalBufferRange range; range.bufferIndex = allocation.bufferIndex; range.offset = size; - range.size = BUFFER_ALLOCATION_SIZE - size; + range.size = m_allocationSize - size; m_freeBufferRanges.push_back(range); } + // Increase the allocation size for the next buffer + if (m_allocationSize < 128 * 1024 * 1024) + m_allocationSize *= 2; + return allocation; } @@ -91,10 +92,11 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu { size_t newStride = Align(stride, 4); size_t newSize = vertexBufferRange.size / stride * newStride; - restrideInfo.allocation = m_bufferAllocator->GetBufferAllocation(newSize, 4); + restrideInfo.allocation = m_bufferAllocator->GetBufferAllocation(newSize); + buffer = m_bufferAllocator->GetBuffer(restrideInfo.allocation.bufferIndex); //uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset; - //uint8* newPtr = (uint8*)restrideInfo.buffer->contents(); + //uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset; //for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++) //{ @@ -123,9 +125,18 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2)); m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = INVALID_OFFSET; - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), vertexBufferRange.size / stride); + // TODO: remove + uint32 vertexCount = vertexBufferRange.size / stride; + if (vertexCount * strideData.oldStride > buffers[0]->length() - offsets[0]) { + throw std::runtime_error("Source buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.oldStride) + " > " + std::to_string(buffers[0]->length()) + " - " + std::to_string(offsets[0]) + ")"); + } + if (vertexCount * strideData.newStride > buffers[1]->length() - offsets[1]) { + throw std::runtime_error("Destination buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.newStride) + " > " + std::to_string(buffers[1]->length()) + " - " + std::to_string(offsets[1]) + ")"); + } - // TODO: do the barrier in one call? + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride); + + // TODO: do the barriers in one call? MTL::Resource* barrierBuffers[] = {buffer}; renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index d3588fd5..9bffe9f2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -47,11 +47,13 @@ public: return m_buffers[bufferIndex]; } - MetalBufferAllocation GetBufferAllocation(size_t size, size_t alignment); + MetalBufferAllocation GetBufferAllocation(size_t size); private: class MetalRenderer* m_mtlr; + size_t m_allocationSize = 8 * 1024 * 1024; + std::vector m_buffers; std::vector m_freeBufferRanges; }; @@ -139,9 +141,9 @@ public: return m_bufferAllocator/*s[bufferAllocatorIndex]*/.GetBuffer(bufferIndex); } - MetalBufferAllocation GetBufferAllocation(size_t size, size_t alignment) + MetalBufferAllocation GetBufferAllocation(size_t size) { - auto allocation = m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.GetBufferAllocation(size, alignment); + auto allocation = m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.GetBufferAllocation(size); //allocation.bufferIndex |= (m_bufferAllocatorIndex << bufferAllocatorIndexShift); return allocation; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 616ceeb1..94ab3721 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -243,6 +243,13 @@ uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchSh stateHash = std::rotl(stateHash, 7); } + if (activeFBO->depthBuffer.texture) + { + auto textureView = static_cast(activeFBO->depthBuffer.texture); + stateHash += textureView->GetRGBAView()->pixelFormat(); + stateHash = std::rotl(stateHash, 7); + } + for (auto& group : fetchShader->bufferGroups) { uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView()); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index bdd3c2ac..a9245383 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -17,10 +17,6 @@ #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" #include "Common/precompiled.h" -#include "HW/Latte/Core/Latte.h" -#include "HW/Latte/Renderer/Metal/MetalCommon.h" -#include "Metal/MTLRenderCommandEncoder.hpp" -#include "Metal/MTLRenderPass.hpp" #include "gui/guiWrapper.h" #define COMMIT_TRESHOLD 256 @@ -34,14 +30,38 @@ MetalRenderer::MetalRenderer() m_device = MTL::CreateSystemDefaultDevice(); m_commandQueue = m_device->newCommandQueue(); + // Resources MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); +#ifdef CEMU_DEBUG_ASSERT + samplerDescriptor->setLabel(GetLabel("Nearest sampler state", samplerDescriptor)); +#endif m_nearestSampler = m_device->newSamplerState(samplerDescriptor); samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); +#ifdef CEMU_DEBUG_ASSERT + samplerDescriptor->setLabel(GetLabel("Linear sampler state", samplerDescriptor)); +#endif m_linearSampler = m_device->newSamplerState(samplerDescriptor); samplerDescriptor->release(); + // Null resources + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setTextureType(MTL::TextureType1D); + textureDescriptor->setWidth(4); + m_nullTexture1D = m_device->newTexture(textureDescriptor); +#ifdef CEMU_DEBUG_ASSERT + m_nullTexture1D->setLabel(GetLabel("Null texture 1D", m_nullTexture1D)); +#endif + + textureDescriptor->setTextureType(MTL::TextureType2D); + textureDescriptor->setHeight(4); + m_nullTexture2D = m_device->newTexture(textureDescriptor); +#ifdef CEMU_DEBUG_ASSERT + m_nullTexture2D->setLabel(GetLabel("Null texture 2D", m_nullTexture2D)); +#endif + textureDescriptor->release(); + m_memoryManager = new MetalMemoryManager(this); m_pipelineCache = new MetalPipelineCache(this); m_depthStencilCache = new MetalDepthStencilCache(this); @@ -296,22 +316,15 @@ void MetalRenderer::AppendOverlayDebugInfo() debug_printf("MetalRenderer::AppendOverlayDebugInfo not implemented\n"); } +// TODO: halfZ void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ) { m_state.m_viewport = MTL::Viewport{x, y, width, height, nearZ, farZ}; - if (m_encoderType == MetalEncoderType::Render) - { - static_cast(m_commandEncoder)->setViewport(m_state.m_viewport); - } } void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight) { - m_state.m_scissor = MTL::ScissorRect{NS::UInteger(scissorX), NS::UInteger(scissorY), NS::UInteger(scissorWidth), NS::UInteger(scissorHeight)}; - if (m_encoderType == MetalEncoderType::Render) - { - static_cast(m_commandEncoder)->setScissorRect(m_state.m_scissor); - } + m_state.m_scissor = MTL::ScissorRect{(uint32)scissorX, (uint32)scissorY, (uint32)scissorWidth, (uint32)scissorHeight}; } LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key) @@ -396,7 +409,7 @@ void MetalRenderer::texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sl depthAttachment->setSlice(sliceIndex); depthAttachment->setLevel(mipIndex); } - if (clearStencil) + if (clearStencil && GetMtlPixelFormatInfo(hostTexture->format, true).hasStencil) { auto stencilAttachment = renderPassDescriptor->stencilAttachment(); stencilAttachment->setTexture(mtlTexture); @@ -854,6 +867,33 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 encoderState.m_frontFaceWinding = frontFaceWinding; } + // Viewport + if (m_state.m_viewport.originX != encoderState.m_viewport.originX || + m_state.m_viewport.originY != encoderState.m_viewport.originY || + m_state.m_viewport.width != encoderState.m_viewport.width || + m_state.m_viewport.height != encoderState.m_viewport.height || + m_state.m_viewport.znear != encoderState.m_viewport.znear || + m_state.m_viewport.zfar != encoderState.m_viewport.zfar) + { + renderCommandEncoder->setViewport(m_state.m_viewport); + + encoderState.m_viewport = m_state.m_viewport; + } + + // Scissor + if (m_state.m_scissor.x != encoderState.m_scissor.x || + m_state.m_scissor.y != encoderState.m_scissor.y || + m_state.m_scissor.width != encoderState.m_scissor.width || + m_state.m_scissor.height != encoderState.m_scissor.height) + { + encoderState.m_scissor = m_state.m_scissor; + + // TODO: clamp scissor to render target dimensions + //scissor.width = ; + //scissor.height = ; + renderCommandEncoder->setScissorRect(encoderState.m_scissor); + } + // Resources // Index buffer @@ -935,7 +975,7 @@ void MetalRenderer::draw_endSequence() void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { - auto allocation = m_memoryManager->GetBufferAllocation(size, 4); + auto allocation = m_memoryManager->GetBufferAllocation(size); offset = allocation.bufferOffset; bufferIndex = allocation.bufferIndex; @@ -1017,7 +1057,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr if (!needsNewRenderPass) { - if (m_state.m_activeFBO->depthBuffer.texture && m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture) + if (m_state.m_activeFBO->depthBuffer.texture && (m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture || ( m_state.m_activeFBO->depthBuffer.hasStencil && !m_state.m_lastUsedFBO->depthBuffer.hasStencil))) { needsNewRenderPass = true; } @@ -1155,7 +1195,7 @@ bool MetalRenderer::AcquireNextDrawable(bool mainWindow) m_drawable = m_metalLayer->nextDrawable(); if (!m_drawable) { - printf("failed to acquire next drawable\n"); + debug_printf("failed to acquire next drawable\n"); return false; } @@ -1191,13 +1231,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE UNREACHABLE; } - auto textureView = m_state.m_textures[hostTextureUnit]; - if (!textureView) - { - debug_printf("invalid bound texture view %u\n", hostTextureUnit); - continue; - } - // TODO: uncomment uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit]; if (binding >= MAX_MTL_TEXTURES) @@ -1206,36 +1239,88 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE continue; } + auto textureView = m_state.m_textures[hostTextureUnit]; + if (!textureView) + { + // TODO: don't bind if already bound + if (textureDim == Latte::E_DIM::DIM_1D) + { + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding); + renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding); + renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); + break; + } + default: + UNREACHABLE; + } + } + else + { + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding); + renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding); + renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); + break; + } + default: + UNREACHABLE; + } + } + continue; + } + LatteTexture* baseTexture = textureView->baseTexture; uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit]; + MTL::SamplerState* sampler; if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE) { uint32 samplerIndex = stageSamplerIndex + LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType); - auto sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex); - - auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding]; - if (sampler != boundSampler) - { - boundSampler = sampler; - - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexSamplerState(sampler, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentSamplerState(sampler, binding); - break; - } - default: - UNREACHABLE; - } - } + sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex); } + else + { + sampler = m_nearestSampler; + } + + auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding]; + if (sampler != boundSampler) + { + boundSampler = sampler; + + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + renderCommandEncoder->setVertexSamplerState(sampler, binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + renderCommandEncoder->setFragmentSamplerState(sampler, binding); + break; + } + default: + UNREACHABLE; + } + } // get texture register word 0 uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; @@ -1347,16 +1432,22 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE } */ + // TODO: uncomment + //auto supportBuffer = m_memoryManager->GetBufferAllocation(sizeof(supportBufferData)); + //memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData)); + switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { + //renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING); renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } case LatteConst::ShaderType::Pixel: { - renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); + //renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING); + renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } default: @@ -1428,12 +1519,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder) { - // Viewport - renderCommandEncoder->setViewport(m_state.m_viewport); - - // Scissor - renderCommandEncoder->setScissorRect(m_state.m_scissor); - // Vertex buffers for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index f315963e..664e8815 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -7,6 +7,7 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" +#include "Metal/MTLRenderCommandEncoder.hpp" struct MetalBoundBuffer { @@ -44,6 +45,8 @@ struct MetalEncoderState MTL::DepthStencilState* m_depthStencilState = nullptr; MTL::CullMode m_cullMode = MTL::CullModeNone; MTL::Winding m_frontFaceWinding = MTL::WindingClockwise; + MTL::Viewport m_viewport; + MTL::ScissorRect m_scissor; uint32 m_stencilRefFront = 0; uint32 m_stencilRefBack = 0; uint32 m_depthBias = 0; @@ -74,8 +77,8 @@ struct MetalState class LatteTextureViewMtl* m_textures[64] = {nullptr}; size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; - MTL::Viewport m_viewport = {0, 0, 0, 0, 0, 0}; - MTL::ScissorRect m_scissor = {0, 0, 0, 0}; + MTL::Viewport m_viewport; + MTL::ScissorRect m_scissor; }; struct MetalCommandBuffer @@ -290,6 +293,8 @@ public: { m_state.m_encoderState = {}; + // TODO: set viewport and scissor to render target dimensions if render commands + for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) { for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++) @@ -350,10 +355,14 @@ private: class MetalHybridComputePipeline* m_copyTextureToTexturePipeline; class MetalHybridComputePipeline* m_restrideBufferPipeline; - // Basic + // Resources MTL::SamplerState* m_nearestSampler; MTL::SamplerState* m_linearSampler; + // Null resources + MTL::Texture* m_nullTexture1D; + MTL::Texture* m_nullTexture2D; + // Texture readback MTL::Buffer* m_readbackBuffer; uint32 m_readbackBufferWriteOffset = 0; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h index 2e94807d..c298150e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h @@ -7,45 +7,45 @@ constexpr const char* utilityShaderSource = _STRINGIFY(( constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; struct VertexOut { - float4 position [[position]]; - float2 texCoord; + float4 position [[position]]; + float2 texCoord; }; vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) { - VertexOut out; - out.position = float4(positions[vid], 0.0, 1.0); - out.texCoord = positions[vid] * 0.5 + 0.5; - out.texCoord.y = 1.0 - out.texCoord.y; + VertexOut out; + out.position = float4(positions[vid], 0.0, 1.0); + out.texCoord = positions[vid] * 0.5 + 0.5; + out.texCoord.y = 1.0 - out.texCoord.y; - return out; + return out; } fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { - return tex.sample(samplr, in.texCoord); + return tex.sample(samplr, in.texCoord); } struct CopyParams { - uint width; - uint srcMip; - uint srcSlice; - uint dstMip; - uint dstSlice; + uint width; + uint srcMip; + uint srcSlice; + uint dstMip; + uint dstSlice; }; vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array src [[texture(GET_TEXTURE_BINDING(0))]], texture2d_array dst [[texture(GET_TEXTURE_BINDING(1))]], constant CopyParams& params [[buffer(GET_BUFFER_BINDING(0))]]) { - uint2 coord = uint2(vid % params.width, vid / params.width); - return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip); + uint2 coord = uint2(vid % params.width, vid / params.width); + return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip); } struct RestrideParams { - uint oldStride; - uint newStride; + uint oldStride; + uint newStride; }; /* TODO: use uint32? Since that would require less iterations */ vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) { - for (uint32_t i = 0; i < params.oldStride; i++) { - dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; - } + for (uint32_t i = 0; i < params.oldStride; i++) { + dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; + } } ));