prepare for surface copy

This commit is contained in:
Samuliak 2024-08-11 20:09:48 +02:00
parent eb573fcaca
commit 8316cee59a
9 changed files with 124 additions and 48 deletions

View File

@ -560,7 +560,9 @@ if(ENABLE_METAL)
HW/Latte/Renderer/Metal/MetalPipelineCache.h
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
HW/Latte/Renderer/Metal/MetalDepthStencilCache.h
HW/Latte/Renderer/Metal/ShaderSourcePresent.h
HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp
HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h
HW/Latte/Renderer/Metal/UtilityShaderSource.h
)
#target_link_libraries(CemuCafe PRIVATE

View File

@ -3732,13 +3732,6 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
"return round(x / 2.0) * 2.0;\r\n"
"}\r\n");
// unpackHalf2x16
fCStr_shaderSource->add(""
"template<typename T>\r\n"
"float2 unpackHalf2x16(T x) {\r\n"
"return float2(as_type<half2>(x));\r\n"
"}\r\n");
// Bit cast
// Scalar

View File

@ -256,7 +256,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
// seen in Giana Sisters: Twisted Dreams
_readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
// TODO: uint4?
src->add("attrDecoder.xyzw = as_type<uint4>(float4(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)),unpackHalf2x16(attrDecoder.z|(attrDecoder.w<<16))));" _CRLF);
src->add("attrDecoder.xyzw = as_type<uint4>(float4(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type<half2>(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF);
}
else if (attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
{
@ -271,7 +271,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
{
// seen in Giana Sisters: Twisted Dreams
_readLittleEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.xy = as_type<uint2>(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)));" _CRLF);
src->add("attrDecoder.xy = as_type<uint2>(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
@ -394,7 +394,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
{
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
// TODO: uint4?
src->add("attrDecoder.xyzw = as_type<uint4>(float4(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)),unpackHalf2x16(attrDecoder.z|(attrDecoder.w<<16))));" _CRLF);
src->add("attrDecoder.xyzw = as_type<uint4>(float4(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type<half2>(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF);
}
else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
{
@ -446,7 +446,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
else if( attrib->format == FMT_16_16_FLOAT && attrib->nfa == 2 )
{
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
src->add("attrDecoder.xy = as_type<uint2>(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)));" _CRLF);
src->add("attrDecoder.xy = as_type<uint2>(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else if( attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned == 0 )

View File

@ -0,0 +1,31 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
MetalHybridComputePipeline::MetalHybridComputePipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const char* vertexFunctionName, const char* kernelFunctionName)
{
// Render pipeline state
MTL::Function* vertexFunction = library->newFunction(NS::String::string(vertexFunctionName, NS::ASCIIStringEncoding));
MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
renderPipelineDescriptor->setVertexFunction(vertexFunction);
renderPipelineDescriptor->setRasterizationEnabled(false);
NS::Error* error = nullptr;
m_renderPipelineState = mtlRenderer->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error);
renderPipelineDescriptor->release();
vertexFunction->release();
if (error)
{
printf("error creating hybrid render pipeline state: %s\n", error->localizedDescription()->utf8String());
error->release();
}
// Compute pipeline state
// TODO
}
MetalHybridComputePipeline::~MetalHybridComputePipeline()
{
m_renderPipelineState->release();
// TODO: uncomment
//m_computePipelineState->release();
}

View File

@ -0,0 +1,19 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Metal/MTLLibrary.hpp"
#include "Metal/MTLRenderPipeline.hpp"
class MetalHybridComputePipeline
{
public:
MetalHybridComputePipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const char* vertexFunctionName, const char* kernelFunctionName);
~MetalHybridComputePipeline();
MTL::RenderPipelineState* GetRenderPipelineState() const { return m_renderPipelineState; }
MTL::RenderPipelineState* GetComputePipelineState() const { return m_computePipelineState; }
private:
MTL::RenderPipelineState* m_renderPipelineState;
MTL::RenderPipelineState* m_computePipelineState;
};

View File

@ -7,13 +7,15 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h"
#include "Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Core/LatteIndices.h"
#include "Cemu/Logging/CemuDebugLogging.h"
#include "Foundation/NSError.hpp"
#include "HW/Latte/Core/Latte.h"
#include "HW/Latte/ISA/LatteReg.h"
#include "Metal/MTLPixelFormat.hpp"
@ -54,10 +56,31 @@ MetalRenderer::MetalRenderer()
m_state.uniformBufferOffsets[i][j] = INVALID_OFFSET;
}
}
// Utility shader source
NS::Error* error = nullptr;
m_utilityLibrary = m_device->newLibrary(NS::String::string(utilityShaderSource, NS::ASCIIStringEncoding), nullptr, &error);
if (error)
{
debug_printf("failed to create present library (error: %s)\n", error->localizedDescription()->utf8String());
error->release();
return;
}
// Hybrid pipelines
m_copyDepthToColorPipeline = new MetalHybridComputePipeline(this, m_utilityLibrary, "vertexCopyDepthToColor", "kernelCopyDepthToColor");
m_copyColorToDepthPipeline = new MetalHybridComputePipeline(this, m_utilityLibrary, "vertexCopyColorToDepth", "kernelCopyColorToDepth");
}
MetalRenderer::~MetalRenderer()
{
delete m_copyDepthToColorPipeline;
delete m_copyColorToDepthPipeline;
m_presentPipeline->release();
m_utilityLibrary->release();
delete m_depthStencilCache;
delete m_pipelineCache;
delete m_memoryManager;
@ -82,22 +105,15 @@ void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow)
m_metalLayer->setPixelFormat(MTL::PixelFormatRGBA8Unorm/*_sRGB*/);
// Present pipeline
NS::Error* error = nullptr;
MTL::Library* presentLibrary = m_device->newLibrary(NS::String::string(presentLibrarySource, NS::ASCIIStringEncoding), nullptr, &error);
if (error)
{
debug_printf("failed to create present library (error: %s)\n", error->localizedDescription()->utf8String());
error->release();
return;
}
MTL::Function* presentVertexFunction = presentLibrary->newFunction(NS::String::string("presentVertex", NS::ASCIIStringEncoding));
MTL::Function* presentFragmentFunction = presentLibrary->newFunction(NS::String::string("presentFragment", NS::ASCIIStringEncoding));
presentLibrary->release();
MTL::Function* presentVertexFunction = m_utilityLibrary->newFunction(NS::String::string("vertexFullscreen", NS::ASCIIStringEncoding));
MTL::Function* presentFragmentFunction = m_utilityLibrary->newFunction(NS::String::string("fragmentPresent", NS::ASCIIStringEncoding));
MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
renderPipelineDescriptor->setVertexFunction(presentVertexFunction);
renderPipelineDescriptor->setFragmentFunction(presentFragmentFunction);
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(m_metalLayer->pixelFormat());
NS::Error* error = nullptr;
m_presentPipeline = m_device->newRenderPipelineState(renderPipelineDescriptor, &error);
renderPipelineDescriptor->release();
presentVertexFunction->release();
@ -106,7 +122,6 @@ void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow)
{
debug_printf("failed to create present pipeline (error: %s)\n", error->localizedDescription()->utf8String());
error->release();
return;
}
}

View File

@ -257,8 +257,13 @@ private:
MTL::CommandQueue* m_commandQueue;
// Pipelines
MTL::Library* m_utilityLibrary;
MTL::RenderPipelineState* m_presentPipeline;
// Hybrid pipelines
class MetalHybridComputePipeline* m_copyDepthToColorPipeline;
class MetalHybridComputePipeline* m_copyColorToDepthPipeline;
// Basic
MTL::SamplerState* m_nearestSampler;

View File

@ -1,23 +0,0 @@
inline const char* presentLibrarySource = \
"#include <metal_stdlib>\n" \
"using namespace metal;\n" \
"\n" \
"constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};\n"
"\n" \
"struct VertexOut {\n" \
" float4 position [[position]];\n" \
" float2 texCoord;\n" \
"};\n" \
"\n" \
"vertex VertexOut presentVertex(ushort vid [[vertex_id]]) {\n" \
" VertexOut out;\n" \
" out.position = float4(positions[vid], 0.0, 1.0);\n" \
" out.texCoord = positions[vid] * 0.5 + 0.5;\n" \
" out.texCoord.y = 1.0 - out.texCoord.y;\n" \
"\n" \
" return out;\n" \
"}\n" \
"\n" \
"fragment float4 presentFragment(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) {\n" \
" return tex.sample(samplr, in.texCoord);\n" \
"}\n";

View File

@ -0,0 +1,34 @@
inline const char* utilityShaderSource = \
"#include <metal_stdlib>\n" \
"using namespace metal;\n" \
"\n" \
"constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};\n"
"\n" \
"struct VertexOut {\n" \
" float4 position [[position]];\n" \
" float2 texCoord;\n" \
"};\n" \
"\n" \
"vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {\n" \
" VertexOut out;\n" \
" out.position = float4(positions[vid], 0.0, 1.0);\n" \
" out.texCoord = positions[vid] * 0.5 + 0.5;\n" \
" out.texCoord.y = 1.0 - out.texCoord.y;\n" \
"\n" \
" return out;\n" \
"}\n" \
"\n" \
"fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) {\n" \
" return tex.sample(samplr, in.texCoord);\n" \
"}\n" \
"\n" \
"vertex void vertexCopyDepthToColor(uint vid [[vertex_id]], depth2d<float, access::read> src [[texture(0)]], texture2d<float, access::write> dst [[texture(1)]], constant uint& copyWidth) {\n" \
" uint2 coord = uint2(vid % copyWidth, vid / copyWidth);\n" \
" return dst.write(float4(src.read(coord), 0.0, 0.0, 0.0), coord);\n" \
"}\n" \
"\n" \
"vertex void vertexCopyColorToDepth(uint vid [[vertex_id]], texture2d<float, access::read> src [[texture(0)]], texture2d<float, access::write> dst [[texture(1)]], constant uint& copyWidth) {\n" \
" uint2 coord = uint2(vid % copyWidth, vid / copyWidth);\n" \
" return dst.write(float4(src.read(coord).r), coord);\n" \
"}\n" \
"\n";