don't bind textures and buffers which are already bound

This commit is contained in:
Samuliak 2024-08-15 10:15:05 +02:00
parent ed7354fa1b
commit 9a215e064f
5 changed files with 119 additions and 60 deletions

View File

@ -2,6 +2,20 @@
#include <Metal/Metal.hpp> #include <Metal/Metal.hpp>
#define MAX_MTL_BUFFERS 31
// Buffer index 30 is reserved for the support buffer, buffer indices 27-29 are reserved for the helper shaders
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 5)
// TODO: don't harcdode the support buffer binding
#define MTL_SUPPORT_BUFFER_BINDING 30
#define MAX_MTL_TEXTURES 31
#define MAX_MTL_SAMPLERS 16
#define GET_HELPER_BUFFER_BINDING(index) (27 + index)
#define GET_HELPER_TEXTURE_BINDING(index) (29 + index)
#define GET_HELPER_SAMPLER_BINDING(index) (14 + index)
constexpr uint32 INVALID_UINT32 = std::numeric_limits<uint32>::max();
constexpr size_t INVALID_OFFSET = std::numeric_limits<size_t>::max(); constexpr size_t INVALID_OFFSET = std::numeric_limits<size_t>::max();
inline size_t Align(size_t size, size_t alignment) inline size_t Align(size_t size, size_t alignment)

View File

@ -106,14 +106,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
renderCommandEncoder->setRenderPipelineState(m_restrideBufferPipeline->GetRenderPipelineState()); renderCommandEncoder->setRenderPipelineState(m_restrideBufferPipeline->GetRenderPipelineState());
MTL::Buffer* buffers[] = {bufferCache, buffer}; MTL::Buffer* buffers[] = {bufferCache, buffer};
size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.bufferOffset}; size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.bufferOffset};
renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(0, 2)); renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2));
struct struct
{ {
uint32 oldStride; uint32 oldStride;
uint32 newStride; uint32 newStride;
} strideData = {static_cast<uint32>(stride), static_cast<uint32>(newStride)}; } strideData = {static_cast<uint32>(stride), static_cast<uint32>(newStride)};
renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), 2); renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), vertexBufferRange.size / stride); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), vertexBufferRange.size / stride);

View File

@ -16,6 +16,7 @@
#include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cafe/HW/Latte/Core/LatteIndices.h"
#include "Cemu/Logging/CemuDebugLogging.h" #include "Cemu/Logging/CemuDebugLogging.h"
#include "Common/precompiled.h" #include "Common/precompiled.h"
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Metal/MTLRenderPass.hpp" #include "Metal/MTLRenderPass.hpp"
#include "gui/guiWrapper.h" #include "gui/guiWrapper.h"
@ -49,13 +50,11 @@ MetalRenderer::MetalRenderer()
m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize(), MTL::StorageModeShared); m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize(), MTL::StorageModeShared);
// Initialize state // Initialize state
for (uint32 i = 0; i < (uint32)LatteConst::ShaderType::TotalCount; i++) for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
{ {
for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++)
{
m_state.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; m_state.m_uniformBufferOffsets[i][j] = INVALID_OFFSET;
} }
}
// Utility shader library // Utility shader library
@ -63,7 +62,7 @@ MetalRenderer::MetalRenderer()
std::string processedUtilityShaderSource = utilityShaderSource; std::string processedUtilityShaderSource = utilityShaderSource;
processedUtilityShaderSource.pop_back(); processedUtilityShaderSource.pop_back();
processedUtilityShaderSource.erase(processedUtilityShaderSource.begin()); processedUtilityShaderSource.erase(processedUtilityShaderSource.begin());
processedUtilityShaderSource = "#include <metal_stdlib>\n" + processedUtilityShaderSource; processedUtilityShaderSource = "#include <metal_stdlib>\nusing namespace metal;\n#define GET_BUFFER_BINDING(index) (27 + index)\n#define GET_TEXTURE_BINDING(index) (29 + index)\n#define GET_SAMPLER_BINDING(index) (14 + index)\n" + processedUtilityShaderSource;
// Create the library // Create the library
NS::Error* error = nullptr; NS::Error* error = nullptr;
@ -233,8 +232,8 @@ void MetalRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutput
// Draw to Metal layer // Draw to Metal layer
renderCommandEncoder->setRenderPipelineState(m_state.m_usesSRGB ? m_presentPipelineSRGB : m_presentPipelineLinear); renderCommandEncoder->setRenderPipelineState(m_state.m_usesSRGB ? m_presentPipelineSRGB : m_presentPipelineLinear);
renderCommandEncoder->setFragmentTexture(presentTexture, 0); renderCommandEncoder->setFragmentTexture(presentTexture, GET_HELPER_TEXTURE_BINDING(0));
renderCommandEncoder->setFragmentSamplerState((useLinearTexFilter ? m_linearSampler : m_nearestSampler), 0); renderCommandEncoder->setFragmentSamplerState((useLinearTexFilter ? m_linearSampler : m_nearestSampler), GET_HELPER_SAMPLER_BINDING(0));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3));
@ -540,8 +539,8 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so
renderCommandEncoder->setViewport(MTL::Viewport{0.0, 0.0, (double)effectiveCopyWidth, (double)effectiveCopyHeight, 0.0, 1.0}); renderCommandEncoder->setViewport(MTL::Viewport{0.0, 0.0, (double)effectiveCopyWidth, (double)effectiveCopyHeight, 0.0, 1.0});
renderCommandEncoder->setScissorRect(MTL::ScissorRect{0, 0, (uint32)effectiveCopyWidth, (uint32)effectiveCopyHeight}); renderCommandEncoder->setScissorRect(MTL::ScissorRect{0, 0, (uint32)effectiveCopyWidth, (uint32)effectiveCopyHeight});
renderCommandEncoder->setVertexTextures(textures, NS::Range(0, 2)); renderCommandEncoder->setVertexTextures(textures, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2));
renderCommandEncoder->setVertexBytes(&params, sizeof(params), 0); renderCommandEncoder->setVertexBytes(&params, sizeof(params), GET_HELPER_BUFFER_BINDING(0));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3));
} }
@ -596,7 +595,7 @@ void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, u
void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
{ {
m_state.m_uniformBufferOffsets[(uint32)shaderType][bufferIndex] = offset; m_state.m_uniformBufferOffsets[GetMtlShaderType(shaderType)][bufferIndex] = offset;
} }
RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader)
@ -962,6 +961,8 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr
m_commandEncoder = renderCommandEncoder; m_commandEncoder = renderCommandEncoder;
m_encoderType = MetalEncoderType::Render; m_encoderType = MetalEncoderType::Render;
ResetEncoderState();
if (rebindStateIfNewEncoder) if (rebindStateIfNewEncoder)
{ {
// Rebind all the render state // Rebind all the render state
@ -989,6 +990,8 @@ MTL::ComputeCommandEncoder* MetalRenderer::GetComputeCommandEncoder()
m_commandEncoder = computeCommandEncoder; m_commandEncoder = computeCommandEncoder;
m_encoderType = MetalEncoderType::Compute; m_encoderType = MetalEncoderType::Compute;
ResetEncoderState();
return computeCommandEncoder; return computeCommandEncoder;
} }
@ -1010,6 +1013,8 @@ MTL::BlitCommandEncoder* MetalRenderer::GetBlitCommandEncoder()
m_commandEncoder = blitCommandEncoder; m_commandEncoder = blitCommandEncoder;
m_encoderType = MetalEncoderType::Blit; m_encoderType = MetalEncoderType::Blit;
ResetEncoderState();
return blitCommandEncoder; return blitCommandEncoder;
} }
@ -1075,8 +1080,9 @@ bool MetalRenderer::AcquireNextDrawable(bool mainWindow)
void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader) void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader)
{ {
sint32 textureCount = shader->resourceMapping.getTextureCount(); auto mtlShaderType = GetMtlShaderType(shader->shaderType);
sint32 textureCount = shader->resourceMapping.getTextureCount();
for (int i = 0; i < textureCount; ++i) for (int i = 0; i < textureCount; ++i)
{ {
const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i); const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i);
@ -1108,24 +1114,16 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
continue; continue;
} }
LatteTexture* baseTexture = textureView->baseTexture;
// get texture register word 0
uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4];
// TODO: wht
//auto imageViewObj = textureView->GetSamplerView(word4);
//info.imageView = imageViewObj->m_textureImageView;
// TODO: uncomment // TODO: uncomment
uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit]; uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit];
//uint32 textureBinding = binding % MAX_MTL_TEXTURES;
//uint32 samplerBinding = binding % MAX_MTL_SAMPLERS;
if (binding >= MAX_MTL_TEXTURES) if (binding >= MAX_MTL_TEXTURES)
{ {
debug_printf("invalid texture binding %u\n", binding); debug_printf("invalid texture binding %u\n", binding);
continue; continue;
} }
LatteTexture* baseTexture = textureView->baseTexture;
uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit]; uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit];
if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE) if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE)
{ {
@ -1249,6 +1247,14 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
sampler->release(); sampler->release();
} }
// get texture register word 0
uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4];
auto& boundTexture = m_state.m_encoderState.m_textures[mtlShaderType][binding];
if (textureView == boundTexture.m_textureView && word4 == boundTexture.m_word4)
continue;
boundTexture = {textureView, word4};
MTL::Texture* mtlTexture = textureView->GetSwizzledView(word4); MTL::Texture* mtlTexture = textureView->GetSwizzledView(word4);
switch (shader->shaderType) switch (shader->shaderType)
{ {
@ -1376,12 +1382,21 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
uint32 binding = shader->resourceMapping.uniformBuffersBindingPoint[i]; uint32 binding = shader->resourceMapping.uniformBuffersBindingPoint[i];
if (binding >= MAX_MTL_BUFFERS) if (binding >= MAX_MTL_BUFFERS)
{ {
debug_printf("too big buffer index (%u), skipping binding\n", binding); debug_printf("invalid buffer binding%u\n", binding);
continue; continue;
} }
size_t offset = m_state.m_uniformBufferOffsets[(uint32)shader->shaderType][i];
if (offset != INVALID_OFFSET) size_t offset = m_state.m_uniformBufferOffsets[mtlShaderType][i];
{ if (offset == INVALID_OFFSET)
continue;
auto& boundOffset = m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][binding];
if (offset == boundOffset)
continue;
boundOffset = offset;
// TODO: only set the offset if already bound
switch (shader->shaderType) switch (shader->shaderType)
{ {
case LatteConst::ShaderType::Vertex: case LatteConst::ShaderType::Vertex:
@ -1399,7 +1414,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
} }
} }
} }
}
// Storage buffer // Storage buffer
if (shader->resourceMapping.tfStorageBindingPoint >= 0) if (shader->resourceMapping.tfStorageBindingPoint >= 0)

View File

@ -7,18 +7,6 @@
#include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Common/precompiled.h"
#include "Metal/MTLCommandBuffer.hpp"
#include "Metal/MTLCommandEncoder.hpp"
#include "Metal/MTLRenderPass.hpp"
#define MAX_MTL_BUFFERS 31
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 2)
// TODO: don't harcdode the support buffer binding
#define MTL_SUPPORT_BUFFER_BINDING 30
#define MAX_MTL_TEXTURES 31
#define MAX_MTL_SAMPLERS 16
struct MetalBoundBuffer struct MetalBoundBuffer
{ {
@ -29,8 +17,40 @@ struct MetalBoundBuffer
MetalRestrideInfo restrideInfo; MetalRestrideInfo restrideInfo;
}; };
enum MetalShaderType
{
METAL_SHADER_TYPE_VERTEX,
METAL_SHADER_TYPE_FRAGMENT,
METAL_SHADER_TYPE_TOTAL
};
inline MetalShaderType GetMtlShaderType(LatteConst::ShaderType shaderType)
{
switch (shaderType)
{
case LatteConst::ShaderType::Vertex:
return METAL_SHADER_TYPE_VERTEX;
case LatteConst::ShaderType::Pixel:
return METAL_SHADER_TYPE_FRAGMENT;
default:
return METAL_SHADER_TYPE_TOTAL;
}
}
struct MetalEncoderState
{
struct {
class LatteTextureViewMtl* m_textureView = nullptr;
uint32 m_word4 = INVALID_UINT32;
} m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES];
size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
};
struct MetalState struct MetalState
{ {
MetalEncoderState m_encoderState{};
bool m_usesSRGB = false; bool m_usesSRGB = false;
bool m_skipDrawSequence = false; bool m_skipDrawSequence = false;
@ -42,7 +62,7 @@ struct MetalState
MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}}; MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}};
// TODO: find out what is the max number of bound textures on the Wii U // TODO: find out what is the max number of bound textures on the Wii U
class LatteTextureViewMtl* m_textures[64] = {nullptr}; class LatteTextureViewMtl* m_textures[64] = {nullptr};
size_t m_uniformBufferOffsets[(uint32)LatteConst::ShaderType::TotalCount][MAX_MTL_BUFFERS]; size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
MTL::Viewport m_viewport = {0, 0, 0, 0, 0, 0}; MTL::Viewport m_viewport = {0, 0, 0, 0, 0, 0};
MTL::ScissorRect m_scissor = {0, 0, 0, 0}; MTL::ScissorRect m_scissor = {0, 0, 0, 0};
@ -255,6 +275,19 @@ public:
return m_encoderType; return m_encoderType;
} }
void ResetEncoderState()
{
m_state.m_encoderState = {};
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
{
for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++)
m_state.m_encoderState.m_textures[i][j] = {nullptr};
for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++)
m_state.m_encoderState.m_uniformBufferOffsets[i][j] = INVALID_OFFSET;
}
}
MTL::CommandBuffer* GetCommandBuffer(); MTL::CommandBuffer* GetCommandBuffer();
bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer); bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer);
void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer); void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer);

View File

@ -4,8 +4,6 @@
#define _STRINGIFY(x) __STRINGIFY(x) #define _STRINGIFY(x) __STRINGIFY(x)
constexpr const char* utilityShaderSource = _STRINGIFY(( constexpr const char* utilityShaderSource = _STRINGIFY((
using namespace metal;
constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};
struct VertexOut { struct VertexOut {
@ -22,7 +20,7 @@ vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {
return out; return out;
} }
fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) { fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(GET_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_SAMPLER_BINDING(0))]]) {
return tex.sample(samplr, in.texCoord); return tex.sample(samplr, in.texCoord);
} }
@ -34,7 +32,7 @@ struct CopyParams {
uint dstSlice; uint dstSlice;
}; };
vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(0)]], texture2d_array<float, access::write> dst [[texture(1)]], constant CopyParams& params [[buffer(0)]]) { vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(GET_TEXTURE_BINDING(0))]], texture2d_array<float, access::write> dst [[texture(GET_TEXTURE_BINDING(1))]], constant CopyParams& params [[buffer(GET_BUFFER_BINDING(0))]]) {
uint2 coord = uint2(vid % params.width, vid / params.width); uint2 coord = uint2(vid % params.width, vid / params.width);
return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip); return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip);
} }
@ -45,7 +43,7 @@ struct RestrideParams {
}; };
/* TODO: use uint32? Since that would require less iterations */ /* TODO: use uint32? Since that would require less iterations */
vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(0)]], device uint8_t* dst [[buffer(1)]], constant RestrideParams& params [[buffer(2)]]) { vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) {
for (uint32_t i = 0; i < params.oldStride; i++) { for (uint32_t i = 0; i < params.oldStride; i++) {
dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; dst[vid * params.newStride + i] = src[vid * params.oldStride + i];
} }