fix: some Metal validation errors

This commit is contained in:
Samuliak 2024-08-17 17:51:37 +02:00
parent 502d5b8b2f
commit 83a08b2247
9 changed files with 216 additions and 99 deletions

View File

@ -1,5 +1,6 @@
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Metal/MTLRenderPass.hpp"
void CachedFBOMtl::CreateRenderPass()
@ -30,7 +31,7 @@ void CachedFBOMtl::CreateRenderPass()
depthAttachment->setStoreAction(MTL::StoreActionStore);
// setup stencil attachment
if (depthBuffer.hasStencil)
if (depthBuffer.hasStencil && GetMtlPixelFormatInfo(depthBuffer.texture->format, true).hasStencil)
{
auto stencilAttachment = m_renderPassDescriptor->stencilAttachment();
stencilAttachment->setTexture(textureView->GetRGBAView());

View File

@ -74,11 +74,12 @@ std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
};
std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4}}, // TODO: not supported on Apple sillicon, maybe find something else
{Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5}},
{Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2}},
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4}},
// TODO: one of these 2 formats is not supported on Apple silicon
{Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5, {1, 1}, true}},
{Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2, {1, 1}}},
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4, {1, 1}}},
};
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)

View File

@ -29,6 +29,7 @@ struct MetalPixelFormatInfo {
MetalDataType dataType;
size_t bytesPerBlock;
Uvec2 blockTexelSize = {1, 1};
bool hasStencil = false;
};
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth);

View File

@ -3,10 +3,6 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
#include "Common/precompiled.h"
#include "Foundation/NSRange.hpp"
#include "Metal/MTLRenderCommandEncoder.hpp"
const size_t BUFFER_ALLOCATION_SIZE = 8 * 1024 * 1024;
MetalBufferAllocator::~MetalBufferAllocator()
{
@ -16,10 +12,10 @@ MetalBufferAllocator::~MetalBufferAllocator()
}
}
MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment)
MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size)
{
// Align the size
size = Align(size, alignment);
size = Align(size, 16);
// First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
@ -45,7 +41,8 @@ MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, siz
}
// If no free range was found, allocate a new buffer
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(std::max(size, BUFFER_ALLOCATION_SIZE), MTL::ResourceStorageModeShared);
m_allocationSize = std::max(m_allocationSize, size);
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT
buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer));
#endif
@ -58,16 +55,20 @@ MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, siz
m_buffers.push_back(buffer);
// If the buffer is larger than the requested size, add the remaining space to the free buffer ranges
if (size < BUFFER_ALLOCATION_SIZE)
if (size < m_allocationSize)
{
MetalBufferRange range;
range.bufferIndex = allocation.bufferIndex;
range.offset = size;
range.size = BUFFER_ALLOCATION_SIZE - size;
range.size = m_allocationSize - size;
m_freeBufferRanges.push_back(range);
}
// Increase the allocation size for the next buffer
if (m_allocationSize < 128 * 1024 * 1024)
m_allocationSize *= 2;
return allocation;
}
@ -91,10 +92,11 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
{
size_t newStride = Align(stride, 4);
size_t newSize = vertexBufferRange.size / stride * newStride;
restrideInfo.allocation = m_bufferAllocator->GetBufferAllocation(newSize, 4);
restrideInfo.allocation = m_bufferAllocator->GetBufferAllocation(newSize);
buffer = m_bufferAllocator->GetBuffer(restrideInfo.allocation.bufferIndex);
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
//uint8* newPtr = (uint8*)restrideInfo.buffer->contents();
//uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset;
//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
//{
@ -123,9 +125,18 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2));
m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = INVALID_OFFSET;
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), vertexBufferRange.size / stride);
// TODO: remove
uint32 vertexCount = vertexBufferRange.size / stride;
if (vertexCount * strideData.oldStride > buffers[0]->length() - offsets[0]) {
throw std::runtime_error("Source buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.oldStride) + " > " + std::to_string(buffers[0]->length()) + " - " + std::to_string(offsets[0]) + ")");
}
if (vertexCount * strideData.newStride > buffers[1]->length() - offsets[1]) {
throw std::runtime_error("Destination buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.newStride) + " > " + std::to_string(buffers[1]->length()) + " - " + std::to_string(offsets[1]) + ")");
}
// TODO: do the barrier in one call?
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride);
// TODO: do the barriers in one call?
MTL::Resource* barrierBuffers[] = {buffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
}

View File

@ -47,11 +47,13 @@ public:
return m_buffers[bufferIndex];
}
MetalBufferAllocation GetBufferAllocation(size_t size, size_t alignment);
MetalBufferAllocation GetBufferAllocation(size_t size);
private:
class MetalRenderer* m_mtlr;
size_t m_allocationSize = 8 * 1024 * 1024;
std::vector<MTL::Buffer*> m_buffers;
std::vector<MetalBufferRange> m_freeBufferRanges;
};
@ -139,9 +141,9 @@ public:
return m_bufferAllocator/*s[bufferAllocatorIndex]*/.GetBuffer(bufferIndex);
}
MetalBufferAllocation GetBufferAllocation(size_t size, size_t alignment)
MetalBufferAllocation GetBufferAllocation(size_t size)
{
auto allocation = m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.GetBufferAllocation(size, alignment);
auto allocation = m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.GetBufferAllocation(size);
//allocation.bufferIndex |= (m_bufferAllocatorIndex << bufferAllocatorIndexShift);
return allocation;

View File

@ -243,6 +243,13 @@ uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchSh
stateHash = std::rotl<uint64>(stateHash, 7);
}
if (activeFBO->depthBuffer.texture)
{
auto textureView = static_cast<LatteTextureViewMtl*>(activeFBO->depthBuffer.texture);
stateHash += textureView->GetRGBAView()->pixelFormat();
stateHash = std::rotl<uint64>(stateHash, 7);
}
for (auto& group : fetchShader->bufferGroups)
{
uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView());

View File

@ -17,10 +17,6 @@
#include "Cafe/HW/Latte/Core/LatteIndices.h"
#include "Cemu/Logging/CemuDebugLogging.h"
#include "Common/precompiled.h"
#include "HW/Latte/Core/Latte.h"
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Metal/MTLRenderCommandEncoder.hpp"
#include "Metal/MTLRenderPass.hpp"
#include "gui/guiWrapper.h"
#define COMMIT_TRESHOLD 256
@ -34,14 +30,38 @@ MetalRenderer::MetalRenderer()
m_device = MTL::CreateSystemDefaultDevice();
m_commandQueue = m_device->newCommandQueue();
// Resources
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
#ifdef CEMU_DEBUG_ASSERT
samplerDescriptor->setLabel(GetLabel("Nearest sampler state", samplerDescriptor));
#endif
m_nearestSampler = m_device->newSamplerState(samplerDescriptor);
samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear);
samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear);
#ifdef CEMU_DEBUG_ASSERT
samplerDescriptor->setLabel(GetLabel("Linear sampler state", samplerDescriptor));
#endif
m_linearSampler = m_device->newSamplerState(samplerDescriptor);
samplerDescriptor->release();
// Null resources
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
textureDescriptor->setTextureType(MTL::TextureType1D);
textureDescriptor->setWidth(4);
m_nullTexture1D = m_device->newTexture(textureDescriptor);
#ifdef CEMU_DEBUG_ASSERT
m_nullTexture1D->setLabel(GetLabel("Null texture 1D", m_nullTexture1D));
#endif
textureDescriptor->setTextureType(MTL::TextureType2D);
textureDescriptor->setHeight(4);
m_nullTexture2D = m_device->newTexture(textureDescriptor);
#ifdef CEMU_DEBUG_ASSERT
m_nullTexture2D->setLabel(GetLabel("Null texture 2D", m_nullTexture2D));
#endif
textureDescriptor->release();
m_memoryManager = new MetalMemoryManager(this);
m_pipelineCache = new MetalPipelineCache(this);
m_depthStencilCache = new MetalDepthStencilCache(this);
@ -296,22 +316,15 @@ void MetalRenderer::AppendOverlayDebugInfo()
debug_printf("MetalRenderer::AppendOverlayDebugInfo not implemented\n");
}
// TODO: halfZ
void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ)
{
m_state.m_viewport = MTL::Viewport{x, y, width, height, nearZ, farZ};
if (m_encoderType == MetalEncoderType::Render)
{
static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder)->setViewport(m_state.m_viewport);
}
}
void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight)
{
m_state.m_scissor = MTL::ScissorRect{NS::UInteger(scissorX), NS::UInteger(scissorY), NS::UInteger(scissorWidth), NS::UInteger(scissorHeight)};
if (m_encoderType == MetalEncoderType::Render)
{
static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder)->setScissorRect(m_state.m_scissor);
}
m_state.m_scissor = MTL::ScissorRect{(uint32)scissorX, (uint32)scissorY, (uint32)scissorWidth, (uint32)scissorHeight};
}
LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key)
@ -396,7 +409,7 @@ void MetalRenderer::texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sl
depthAttachment->setSlice(sliceIndex);
depthAttachment->setLevel(mipIndex);
}
if (clearStencil)
if (clearStencil && GetMtlPixelFormatInfo(hostTexture->format, true).hasStencil)
{
auto stencilAttachment = renderPassDescriptor->stencilAttachment();
stencilAttachment->setTexture(mtlTexture);
@ -854,6 +867,33 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
encoderState.m_frontFaceWinding = frontFaceWinding;
}
// Viewport
if (m_state.m_viewport.originX != encoderState.m_viewport.originX ||
m_state.m_viewport.originY != encoderState.m_viewport.originY ||
m_state.m_viewport.width != encoderState.m_viewport.width ||
m_state.m_viewport.height != encoderState.m_viewport.height ||
m_state.m_viewport.znear != encoderState.m_viewport.znear ||
m_state.m_viewport.zfar != encoderState.m_viewport.zfar)
{
renderCommandEncoder->setViewport(m_state.m_viewport);
encoderState.m_viewport = m_state.m_viewport;
}
// Scissor
if (m_state.m_scissor.x != encoderState.m_scissor.x ||
m_state.m_scissor.y != encoderState.m_scissor.y ||
m_state.m_scissor.width != encoderState.m_scissor.width ||
m_state.m_scissor.height != encoderState.m_scissor.height)
{
encoderState.m_scissor = m_state.m_scissor;
// TODO: clamp scissor to render target dimensions
//scissor.width = ;
//scissor.height = ;
renderCommandEncoder->setScissorRect(encoderState.m_scissor);
}
// Resources
// Index buffer
@ -935,7 +975,7 @@ void MetalRenderer::draw_endSequence()
void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
{
auto allocation = m_memoryManager->GetBufferAllocation(size, 4);
auto allocation = m_memoryManager->GetBufferAllocation(size);
offset = allocation.bufferOffset;
bufferIndex = allocation.bufferIndex;
@ -1017,7 +1057,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr
if (!needsNewRenderPass)
{
if (m_state.m_activeFBO->depthBuffer.texture && m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture)
if (m_state.m_activeFBO->depthBuffer.texture && (m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture || ( m_state.m_activeFBO->depthBuffer.hasStencil && !m_state.m_lastUsedFBO->depthBuffer.hasStencil)))
{
needsNewRenderPass = true;
}
@ -1155,7 +1195,7 @@ bool MetalRenderer::AcquireNextDrawable(bool mainWindow)
m_drawable = m_metalLayer->nextDrawable();
if (!m_drawable)
{
printf("failed to acquire next drawable\n");
debug_printf("failed to acquire next drawable\n");
return false;
}
@ -1191,13 +1231,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
UNREACHABLE;
}
auto textureView = m_state.m_textures[hostTextureUnit];
if (!textureView)
{
debug_printf("invalid bound texture view %u\n", hostTextureUnit);
continue;
}
// TODO: uncomment
uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit];
if (binding >= MAX_MTL_TEXTURES)
@ -1206,36 +1239,88 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
continue;
}
auto textureView = m_state.m_textures[hostTextureUnit];
if (!textureView)
{
// TODO: don't bind if already bound
if (textureDim == Latte::E_DIM::DIM_1D)
{
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding);
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding);
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
break;
}
default:
UNREACHABLE;
}
}
else
{
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding);
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding);
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
break;
}
default:
UNREACHABLE;
}
}
continue;
}
LatteTexture* baseTexture = textureView->baseTexture;
uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit];
MTL::SamplerState* sampler;
if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE)
{
uint32 samplerIndex = stageSamplerIndex + LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType);
auto sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex);
auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding];
if (sampler != boundSampler)
{
boundSampler = sampler;
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexSamplerState(sampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentSamplerState(sampler, binding);
break;
}
default:
UNREACHABLE;
}
}
sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex);
}
else
{
sampler = m_nearestSampler;
}
auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding];
if (sampler != boundSampler)
{
boundSampler = sampler;
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexSamplerState(sampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentSamplerState(sampler, binding);
break;
}
default:
UNREACHABLE;
}
}
// get texture register word 0
uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4];
@ -1347,16 +1432,22 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
}
*/
// TODO: uncomment
//auto supportBuffer = m_memoryManager->GetBufferAllocation(sizeof(supportBufferData));
//memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData));
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
//renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING);
renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
//renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING);
renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
break;
}
default:
@ -1428,12 +1519,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder)
{
// Viewport
renderCommandEncoder->setViewport(m_state.m_viewport);
// Scissor
renderCommandEncoder->setScissorRect(m_state.m_scissor);
// Vertex buffers
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{

View File

@ -7,6 +7,7 @@
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Metal/MTLRenderCommandEncoder.hpp"
struct MetalBoundBuffer
{
@ -44,6 +45,8 @@ struct MetalEncoderState
MTL::DepthStencilState* m_depthStencilState = nullptr;
MTL::CullMode m_cullMode = MTL::CullModeNone;
MTL::Winding m_frontFaceWinding = MTL::WindingClockwise;
MTL::Viewport m_viewport;
MTL::ScissorRect m_scissor;
uint32 m_stencilRefFront = 0;
uint32 m_stencilRefBack = 0;
uint32 m_depthBias = 0;
@ -74,8 +77,8 @@ struct MetalState
class LatteTextureViewMtl* m_textures[64] = {nullptr};
size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
MTL::Viewport m_viewport = {0, 0, 0, 0, 0, 0};
MTL::ScissorRect m_scissor = {0, 0, 0, 0};
MTL::Viewport m_viewport;
MTL::ScissorRect m_scissor;
};
struct MetalCommandBuffer
@ -290,6 +293,8 @@ public:
{
m_state.m_encoderState = {};
// TODO: set viewport and scissor to render target dimensions if render commands
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
{
for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++)
@ -350,10 +355,14 @@ private:
class MetalHybridComputePipeline* m_copyTextureToTexturePipeline;
class MetalHybridComputePipeline* m_restrideBufferPipeline;
// Basic
// Resources
MTL::SamplerState* m_nearestSampler;
MTL::SamplerState* m_linearSampler;
// Null resources
MTL::Texture* m_nullTexture1D;
MTL::Texture* m_nullTexture2D;
// Texture readback
MTL::Buffer* m_readbackBuffer;
uint32 m_readbackBufferWriteOffset = 0;

View File

@ -7,45 +7,45 @@ constexpr const char* utilityShaderSource = _STRINGIFY((
constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};
struct VertexOut {
float4 position [[position]];
float2 texCoord;
float4 position [[position]];
float2 texCoord;
};
vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {
VertexOut out;
out.position = float4(positions[vid], 0.0, 1.0);
out.texCoord = positions[vid] * 0.5 + 0.5;
out.texCoord.y = 1.0 - out.texCoord.y;
VertexOut out;
out.position = float4(positions[vid], 0.0, 1.0);
out.texCoord = positions[vid] * 0.5 + 0.5;
out.texCoord.y = 1.0 - out.texCoord.y;
return out;
return out;
}
fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) {
return tex.sample(samplr, in.texCoord);
return tex.sample(samplr, in.texCoord);
}
struct CopyParams {
uint width;
uint srcMip;
uint srcSlice;
uint dstMip;
uint dstSlice;
uint width;
uint srcMip;
uint srcSlice;
uint dstMip;
uint dstSlice;
};
vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(GET_TEXTURE_BINDING(0))]], texture2d_array<float, access::write> dst [[texture(GET_TEXTURE_BINDING(1))]], constant CopyParams& params [[buffer(GET_BUFFER_BINDING(0))]]) {
uint2 coord = uint2(vid % params.width, vid / params.width);
return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip);
uint2 coord = uint2(vid % params.width, vid / params.width);
return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip);
}
struct RestrideParams {
uint oldStride;
uint newStride;
uint oldStride;
uint newStride;
};
/* TODO: use uint32? Since that would require less iterations */
vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) {
for (uint32_t i = 0; i < params.oldStride; i++) {
dst[vid * params.newStride + i] = src[vid * params.oldStride + i];
}
for (uint32_t i = 0; i < params.oldStride; i++) {
dst[vid * params.newStride + i] = src[vid * params.oldStride + i];
}
}
));