fix: some Metal validation errors

This commit is contained in:
Samuliak 2024-08-17 17:51:37 +02:00
parent 502d5b8b2f
commit 83a08b2247
9 changed files with 216 additions and 99 deletions

View File

@ -1,5 +1,6 @@
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Metal/MTLRenderPass.hpp" #include "Metal/MTLRenderPass.hpp"
void CachedFBOMtl::CreateRenderPass() void CachedFBOMtl::CreateRenderPass()
@ -30,7 +31,7 @@ void CachedFBOMtl::CreateRenderPass()
depthAttachment->setStoreAction(MTL::StoreActionStore); depthAttachment->setStoreAction(MTL::StoreActionStore);
// setup stencil attachment // setup stencil attachment
if (depthBuffer.hasStencil) if (depthBuffer.hasStencil && GetMtlPixelFormatInfo(depthBuffer.texture->format, true).hasStencil)
{ {
auto stencilAttachment = m_renderPassDescriptor->stencilAttachment(); auto stencilAttachment = m_renderPassDescriptor->stencilAttachment();
stencilAttachment->setTexture(textureView->GetRGBAView()); stencilAttachment->setTexture(textureView->GetRGBAView());

View File

@ -74,11 +74,12 @@ std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
}; };
std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = { std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4}}, // TODO: not supported on Apple sillicon, maybe find something else // TODO: one of these 2 formats is not supported on Apple silicon
{Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4}}, // TODO: correct? {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5}}, {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2}}, {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4}}, {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2, {1, 1}}},
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4, {1, 1}}},
}; };
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth) const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)

View File

@ -29,6 +29,7 @@ struct MetalPixelFormatInfo {
MetalDataType dataType; MetalDataType dataType;
size_t bytesPerBlock; size_t bytesPerBlock;
Uvec2 blockTexelSize = {1, 1}; Uvec2 blockTexelSize = {1, 1};
bool hasStencil = false;
}; };
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth); const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth);

View File

@ -3,10 +3,6 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
#include "Common/precompiled.h" #include "Common/precompiled.h"
#include "Foundation/NSRange.hpp"
#include "Metal/MTLRenderCommandEncoder.hpp"
const size_t BUFFER_ALLOCATION_SIZE = 8 * 1024 * 1024;
MetalBufferAllocator::~MetalBufferAllocator() MetalBufferAllocator::~MetalBufferAllocator()
{ {
@ -16,10 +12,10 @@ MetalBufferAllocator::~MetalBufferAllocator()
} }
} }
MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment) MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size)
{ {
// Align the size // Align the size
size = Align(size, alignment); size = Align(size, 16);
// First, try to find a free range // First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
@ -45,7 +41,8 @@ MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, siz
} }
// If no free range was found, allocate a new buffer // If no free range was found, allocate a new buffer
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(std::max(size, BUFFER_ALLOCATION_SIZE), MTL::ResourceStorageModeShared); m_allocationSize = std::max(m_allocationSize, size);
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer));
#endif #endif
@ -58,16 +55,20 @@ MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, siz
m_buffers.push_back(buffer); m_buffers.push_back(buffer);
// If the buffer is larger than the requested size, add the remaining space to the free buffer ranges // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges
if (size < BUFFER_ALLOCATION_SIZE) if (size < m_allocationSize)
{ {
MetalBufferRange range; MetalBufferRange range;
range.bufferIndex = allocation.bufferIndex; range.bufferIndex = allocation.bufferIndex;
range.offset = size; range.offset = size;
range.size = BUFFER_ALLOCATION_SIZE - size; range.size = m_allocationSize - size;
m_freeBufferRanges.push_back(range); m_freeBufferRanges.push_back(range);
} }
// Increase the allocation size for the next buffer
if (m_allocationSize < 128 * 1024 * 1024)
m_allocationSize *= 2;
return allocation; return allocation;
} }
@ -91,10 +92,11 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
{ {
size_t newStride = Align(stride, 4); size_t newStride = Align(stride, 4);
size_t newSize = vertexBufferRange.size / stride * newStride; size_t newSize = vertexBufferRange.size / stride * newStride;
restrideInfo.allocation = m_bufferAllocator->GetBufferAllocation(newSize, 4); restrideInfo.allocation = m_bufferAllocator->GetBufferAllocation(newSize);
buffer = m_bufferAllocator->GetBuffer(restrideInfo.allocation.bufferIndex);
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset; //uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
//uint8* newPtr = (uint8*)restrideInfo.buffer->contents(); //uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset;
//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++) //for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
//{ //{
@ -123,9 +125,18 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2)); renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2));
m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = INVALID_OFFSET; m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = INVALID_OFFSET;
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), vertexBufferRange.size / stride); // TODO: remove
uint32 vertexCount = vertexBufferRange.size / stride;
if (vertexCount * strideData.oldStride > buffers[0]->length() - offsets[0]) {
throw std::runtime_error("Source buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.oldStride) + " > " + std::to_string(buffers[0]->length()) + " - " + std::to_string(offsets[0]) + ")");
}
if (vertexCount * strideData.newStride > buffers[1]->length() - offsets[1]) {
throw std::runtime_error("Destination buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.newStride) + " > " + std::to_string(buffers[1]->length()) + " - " + std::to_string(offsets[1]) + ")");
}
// TODO: do the barrier in one call? renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride);
// TODO: do the barriers in one call?
MTL::Resource* barrierBuffers[] = {buffer}; MTL::Resource* barrierBuffers[] = {buffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex); renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
} }

View File

@ -47,11 +47,13 @@ public:
return m_buffers[bufferIndex]; return m_buffers[bufferIndex];
} }
MetalBufferAllocation GetBufferAllocation(size_t size, size_t alignment); MetalBufferAllocation GetBufferAllocation(size_t size);
private: private:
class MetalRenderer* m_mtlr; class MetalRenderer* m_mtlr;
size_t m_allocationSize = 8 * 1024 * 1024;
std::vector<MTL::Buffer*> m_buffers; std::vector<MTL::Buffer*> m_buffers;
std::vector<MetalBufferRange> m_freeBufferRanges; std::vector<MetalBufferRange> m_freeBufferRanges;
}; };
@ -139,9 +141,9 @@ public:
return m_bufferAllocator/*s[bufferAllocatorIndex]*/.GetBuffer(bufferIndex); return m_bufferAllocator/*s[bufferAllocatorIndex]*/.GetBuffer(bufferIndex);
} }
MetalBufferAllocation GetBufferAllocation(size_t size, size_t alignment) MetalBufferAllocation GetBufferAllocation(size_t size)
{ {
auto allocation = m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.GetBufferAllocation(size, alignment); auto allocation = m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.GetBufferAllocation(size);
//allocation.bufferIndex |= (m_bufferAllocatorIndex << bufferAllocatorIndexShift); //allocation.bufferIndex |= (m_bufferAllocatorIndex << bufferAllocatorIndexShift);
return allocation; return allocation;

View File

@ -243,6 +243,13 @@ uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchSh
stateHash = std::rotl<uint64>(stateHash, 7); stateHash = std::rotl<uint64>(stateHash, 7);
} }
if (activeFBO->depthBuffer.texture)
{
auto textureView = static_cast<LatteTextureViewMtl*>(activeFBO->depthBuffer.texture);
stateHash += textureView->GetRGBAView()->pixelFormat();
stateHash = std::rotl<uint64>(stateHash, 7);
}
for (auto& group : fetchShader->bufferGroups) for (auto& group : fetchShader->bufferGroups)
{ {
uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView()); uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView());

View File

@ -17,10 +17,6 @@
#include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cafe/HW/Latte/Core/LatteIndices.h"
#include "Cemu/Logging/CemuDebugLogging.h" #include "Cemu/Logging/CemuDebugLogging.h"
#include "Common/precompiled.h" #include "Common/precompiled.h"
#include "HW/Latte/Core/Latte.h"
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Metal/MTLRenderCommandEncoder.hpp"
#include "Metal/MTLRenderPass.hpp"
#include "gui/guiWrapper.h" #include "gui/guiWrapper.h"
#define COMMIT_TRESHOLD 256 #define COMMIT_TRESHOLD 256
@ -34,14 +30,38 @@ MetalRenderer::MetalRenderer()
m_device = MTL::CreateSystemDefaultDevice(); m_device = MTL::CreateSystemDefaultDevice();
m_commandQueue = m_device->newCommandQueue(); m_commandQueue = m_device->newCommandQueue();
// Resources
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
#ifdef CEMU_DEBUG_ASSERT
samplerDescriptor->setLabel(GetLabel("Nearest sampler state", samplerDescriptor));
#endif
m_nearestSampler = m_device->newSamplerState(samplerDescriptor); m_nearestSampler = m_device->newSamplerState(samplerDescriptor);
samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear);
samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear);
#ifdef CEMU_DEBUG_ASSERT
samplerDescriptor->setLabel(GetLabel("Linear sampler state", samplerDescriptor));
#endif
m_linearSampler = m_device->newSamplerState(samplerDescriptor); m_linearSampler = m_device->newSamplerState(samplerDescriptor);
samplerDescriptor->release(); samplerDescriptor->release();
// Null resources
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
textureDescriptor->setTextureType(MTL::TextureType1D);
textureDescriptor->setWidth(4);
m_nullTexture1D = m_device->newTexture(textureDescriptor);
#ifdef CEMU_DEBUG_ASSERT
m_nullTexture1D->setLabel(GetLabel("Null texture 1D", m_nullTexture1D));
#endif
textureDescriptor->setTextureType(MTL::TextureType2D);
textureDescriptor->setHeight(4);
m_nullTexture2D = m_device->newTexture(textureDescriptor);
#ifdef CEMU_DEBUG_ASSERT
m_nullTexture2D->setLabel(GetLabel("Null texture 2D", m_nullTexture2D));
#endif
textureDescriptor->release();
m_memoryManager = new MetalMemoryManager(this); m_memoryManager = new MetalMemoryManager(this);
m_pipelineCache = new MetalPipelineCache(this); m_pipelineCache = new MetalPipelineCache(this);
m_depthStencilCache = new MetalDepthStencilCache(this); m_depthStencilCache = new MetalDepthStencilCache(this);
@ -296,22 +316,15 @@ void MetalRenderer::AppendOverlayDebugInfo()
debug_printf("MetalRenderer::AppendOverlayDebugInfo not implemented\n"); debug_printf("MetalRenderer::AppendOverlayDebugInfo not implemented\n");
} }
// TODO: halfZ
void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ) void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ)
{ {
m_state.m_viewport = MTL::Viewport{x, y, width, height, nearZ, farZ}; m_state.m_viewport = MTL::Viewport{x, y, width, height, nearZ, farZ};
if (m_encoderType == MetalEncoderType::Render)
{
static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder)->setViewport(m_state.m_viewport);
}
} }
void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight) void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight)
{ {
m_state.m_scissor = MTL::ScissorRect{NS::UInteger(scissorX), NS::UInteger(scissorY), NS::UInteger(scissorWidth), NS::UInteger(scissorHeight)}; m_state.m_scissor = MTL::ScissorRect{(uint32)scissorX, (uint32)scissorY, (uint32)scissorWidth, (uint32)scissorHeight};
if (m_encoderType == MetalEncoderType::Render)
{
static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder)->setScissorRect(m_state.m_scissor);
}
} }
LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key) LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key)
@ -396,7 +409,7 @@ void MetalRenderer::texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sl
depthAttachment->setSlice(sliceIndex); depthAttachment->setSlice(sliceIndex);
depthAttachment->setLevel(mipIndex); depthAttachment->setLevel(mipIndex);
} }
if (clearStencil) if (clearStencil && GetMtlPixelFormatInfo(hostTexture->format, true).hasStencil)
{ {
auto stencilAttachment = renderPassDescriptor->stencilAttachment(); auto stencilAttachment = renderPassDescriptor->stencilAttachment();
stencilAttachment->setTexture(mtlTexture); stencilAttachment->setTexture(mtlTexture);
@ -854,6 +867,33 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
encoderState.m_frontFaceWinding = frontFaceWinding; encoderState.m_frontFaceWinding = frontFaceWinding;
} }
// Viewport
if (m_state.m_viewport.originX != encoderState.m_viewport.originX ||
m_state.m_viewport.originY != encoderState.m_viewport.originY ||
m_state.m_viewport.width != encoderState.m_viewport.width ||
m_state.m_viewport.height != encoderState.m_viewport.height ||
m_state.m_viewport.znear != encoderState.m_viewport.znear ||
m_state.m_viewport.zfar != encoderState.m_viewport.zfar)
{
renderCommandEncoder->setViewport(m_state.m_viewport);
encoderState.m_viewport = m_state.m_viewport;
}
// Scissor
if (m_state.m_scissor.x != encoderState.m_scissor.x ||
m_state.m_scissor.y != encoderState.m_scissor.y ||
m_state.m_scissor.width != encoderState.m_scissor.width ||
m_state.m_scissor.height != encoderState.m_scissor.height)
{
encoderState.m_scissor = m_state.m_scissor;
// TODO: clamp scissor to render target dimensions
//scissor.width = ;
//scissor.height = ;
renderCommandEncoder->setScissorRect(encoderState.m_scissor);
}
// Resources // Resources
// Index buffer // Index buffer
@ -935,7 +975,7 @@ void MetalRenderer::draw_endSequence()
void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
{ {
auto allocation = m_memoryManager->GetBufferAllocation(size, 4); auto allocation = m_memoryManager->GetBufferAllocation(size);
offset = allocation.bufferOffset; offset = allocation.bufferOffset;
bufferIndex = allocation.bufferIndex; bufferIndex = allocation.bufferIndex;
@ -1017,7 +1057,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr
if (!needsNewRenderPass) if (!needsNewRenderPass)
{ {
if (m_state.m_activeFBO->depthBuffer.texture && m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture) if (m_state.m_activeFBO->depthBuffer.texture && (m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture || ( m_state.m_activeFBO->depthBuffer.hasStencil && !m_state.m_lastUsedFBO->depthBuffer.hasStencil)))
{ {
needsNewRenderPass = true; needsNewRenderPass = true;
} }
@ -1155,7 +1195,7 @@ bool MetalRenderer::AcquireNextDrawable(bool mainWindow)
m_drawable = m_metalLayer->nextDrawable(); m_drawable = m_metalLayer->nextDrawable();
if (!m_drawable) if (!m_drawable)
{ {
printf("failed to acquire next drawable\n"); debug_printf("failed to acquire next drawable\n");
return false; return false;
} }
@ -1191,13 +1231,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
UNREACHABLE; UNREACHABLE;
} }
auto textureView = m_state.m_textures[hostTextureUnit];
if (!textureView)
{
debug_printf("invalid bound texture view %u\n", hostTextureUnit);
continue;
}
// TODO: uncomment // TODO: uncomment
uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit]; uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit];
if (binding >= MAX_MTL_TEXTURES) if (binding >= MAX_MTL_TEXTURES)
@ -1206,36 +1239,88 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
continue; continue;
} }
auto textureView = m_state.m_textures[hostTextureUnit];
if (!textureView)
{
// TODO: don't bind if already bound
if (textureDim == Latte::E_DIM::DIM_1D)
{
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding);
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding);
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
break;
}
default:
UNREACHABLE;
}
}
else
{
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding);
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding);
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
break;
}
default:
UNREACHABLE;
}
}
continue;
}
LatteTexture* baseTexture = textureView->baseTexture; LatteTexture* baseTexture = textureView->baseTexture;
uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit]; uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit];
MTL::SamplerState* sampler;
if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE) if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE)
{ {
uint32 samplerIndex = stageSamplerIndex + LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType); uint32 samplerIndex = stageSamplerIndex + LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType);
auto sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex); sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex);
auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding];
if (sampler != boundSampler)
{
boundSampler = sampler;
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexSamplerState(sampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentSamplerState(sampler, binding);
break;
}
default:
UNREACHABLE;
}
}
} }
else
{
sampler = m_nearestSampler;
}
auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding];
if (sampler != boundSampler)
{
boundSampler = sampler;
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexSamplerState(sampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentSamplerState(sampler, binding);
break;
}
default:
UNREACHABLE;
}
}
// get texture register word 0 // get texture register word 0
uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4];
@ -1347,16 +1432,22 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
} }
*/ */
// TODO: uncomment
//auto supportBuffer = m_memoryManager->GetBufferAllocation(sizeof(supportBufferData));
//memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData));
switch (shader->shaderType) switch (shader->shaderType)
{ {
case LatteConst::ShaderType::Vertex: case LatteConst::ShaderType::Vertex:
{ {
//renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING);
renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
break; break;
} }
case LatteConst::ShaderType::Pixel: case LatteConst::ShaderType::Pixel:
{ {
renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); //renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING);
renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
break; break;
} }
default: default:
@ -1428,12 +1519,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder) void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder)
{ {
// Viewport
renderCommandEncoder->setViewport(m_state.m_viewport);
// Scissor
renderCommandEncoder->setScissorRect(m_state.m_scissor);
// Vertex buffers // Vertex buffers
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{ {

View File

@ -7,6 +7,7 @@
#include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Metal/MTLRenderCommandEncoder.hpp"
struct MetalBoundBuffer struct MetalBoundBuffer
{ {
@ -44,6 +45,8 @@ struct MetalEncoderState
MTL::DepthStencilState* m_depthStencilState = nullptr; MTL::DepthStencilState* m_depthStencilState = nullptr;
MTL::CullMode m_cullMode = MTL::CullModeNone; MTL::CullMode m_cullMode = MTL::CullModeNone;
MTL::Winding m_frontFaceWinding = MTL::WindingClockwise; MTL::Winding m_frontFaceWinding = MTL::WindingClockwise;
MTL::Viewport m_viewport;
MTL::ScissorRect m_scissor;
uint32 m_stencilRefFront = 0; uint32 m_stencilRefFront = 0;
uint32 m_stencilRefBack = 0; uint32 m_stencilRefBack = 0;
uint32 m_depthBias = 0; uint32 m_depthBias = 0;
@ -74,8 +77,8 @@ struct MetalState
class LatteTextureViewMtl* m_textures[64] = {nullptr}; class LatteTextureViewMtl* m_textures[64] = {nullptr};
size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
MTL::Viewport m_viewport = {0, 0, 0, 0, 0, 0}; MTL::Viewport m_viewport;
MTL::ScissorRect m_scissor = {0, 0, 0, 0}; MTL::ScissorRect m_scissor;
}; };
struct MetalCommandBuffer struct MetalCommandBuffer
@ -290,6 +293,8 @@ public:
{ {
m_state.m_encoderState = {}; m_state.m_encoderState = {};
// TODO: set viewport and scissor to render target dimensions if render commands
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
{ {
for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++) for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++)
@ -350,10 +355,14 @@ private:
class MetalHybridComputePipeline* m_copyTextureToTexturePipeline; class MetalHybridComputePipeline* m_copyTextureToTexturePipeline;
class MetalHybridComputePipeline* m_restrideBufferPipeline; class MetalHybridComputePipeline* m_restrideBufferPipeline;
// Basic // Resources
MTL::SamplerState* m_nearestSampler; MTL::SamplerState* m_nearestSampler;
MTL::SamplerState* m_linearSampler; MTL::SamplerState* m_linearSampler;
// Null resources
MTL::Texture* m_nullTexture1D;
MTL::Texture* m_nullTexture2D;
// Texture readback // Texture readback
MTL::Buffer* m_readbackBuffer; MTL::Buffer* m_readbackBuffer;
uint32 m_readbackBufferWriteOffset = 0; uint32 m_readbackBufferWriteOffset = 0;

View File

@ -7,45 +7,45 @@ constexpr const char* utilityShaderSource = _STRINGIFY((
constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};
struct VertexOut { struct VertexOut {
float4 position [[position]]; float4 position [[position]];
float2 texCoord; float2 texCoord;
}; };
vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) { vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {
VertexOut out; VertexOut out;
out.position = float4(positions[vid], 0.0, 1.0); out.position = float4(positions[vid], 0.0, 1.0);
out.texCoord = positions[vid] * 0.5 + 0.5; out.texCoord = positions[vid] * 0.5 + 0.5;
out.texCoord.y = 1.0 - out.texCoord.y; out.texCoord.y = 1.0 - out.texCoord.y;
return out; return out;
} }
fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) { fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) {
return tex.sample(samplr, in.texCoord); return tex.sample(samplr, in.texCoord);
} }
struct CopyParams { struct CopyParams {
uint width; uint width;
uint srcMip; uint srcMip;
uint srcSlice; uint srcSlice;
uint dstMip; uint dstMip;
uint dstSlice; uint dstSlice;
}; };
vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(GET_TEXTURE_BINDING(0))]], texture2d_array<float, access::write> dst [[texture(GET_TEXTURE_BINDING(1))]], constant CopyParams& params [[buffer(GET_BUFFER_BINDING(0))]]) { vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(GET_TEXTURE_BINDING(0))]], texture2d_array<float, access::write> dst [[texture(GET_TEXTURE_BINDING(1))]], constant CopyParams& params [[buffer(GET_BUFFER_BINDING(0))]]) {
uint2 coord = uint2(vid % params.width, vid / params.width); uint2 coord = uint2(vid % params.width, vid / params.width);
return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip); return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip);
} }
struct RestrideParams { struct RestrideParams {
uint oldStride; uint oldStride;
uint newStride; uint newStride;
}; };
/* TODO: use uint32? Since that would require less iterations */ /* TODO: use uint32? Since that would require less iterations */
vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) { vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) {
for (uint32_t i = 0; i < params.oldStride; i++) { for (uint32_t i = 0; i < params.oldStride; i++) {
dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; dst[vid * params.newStride + i] = src[vid * params.oldStride + i];
} }
} }
)); ));