diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index ed9e90cb..096e3201 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -57,9 +57,17 @@ MetalRenderer::MetalRenderer() } } - // Utility shader source + // Utility shader library + + // Process the source first + std::string processedUtilityShaderSource = utilityShaderSource; + processedUtilityShaderSource.pop_back(); + processedUtilityShaderSource.erase(processedUtilityShaderSource.begin()); + processedUtilityShaderSource = "#include \n" + processedUtilityShaderSource; + + // Create the library NS::Error* error = nullptr; - MTL::Library* utilityLibrary = m_device->newLibrary(NS::String::string(utilityShaderSource, NS::ASCIIStringEncoding), nullptr, &error); + MTL::Library* utilityLibrary = m_device->newLibrary(NS::String::string(processedUtilityShaderSource.c_str(), NS::ASCIIStringEncoding), nullptr, &error); if (error) { debug_printf("failed to create utility library (error: %s)\n", error->localizedDescription()->utf8String()); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h index 3bc2ff75..a3e5bae1 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h @@ -1,49 +1,53 @@ -inline const char* utilityShaderSource = \ -"#include \n" \ -"using namespace metal;\n" \ -"\n" \ -"constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};\n" -"\n" \ -"struct VertexOut {\n" \ -" float4 position [[position]];\n" \ -" float2 texCoord;\n" \ -"};\n" \ -"\n" \ -"vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {\n" \ -" VertexOut out;\n" \ -" out.position = float4(positions[vid], 0.0, 1.0);\n" \ -" out.texCoord = positions[vid] * 0.5 + 0.5;\n" \ -" out.texCoord.y = 1.0 - out.texCoord.y;\n" \ -"\n" \ -" return out;\n" \ -"}\n" \ -"\n" \ -"fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) {\n" \ -" return tex.sample(samplr, in.texCoord);\n" \ -"}\n" \ -"\n" \ -"struct CopyParams {\n" \ -" uint width;\n" \ -" uint srcMip;\n" \ -" uint srcSlice;\n" \ -" uint dstMip;\n" \ -" uint dstSlice;\n" \ -"};\n" \ -"\n" \ -"vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array src [[texture(0)]], texture2d_array dst [[texture(1)]], constant CopyParams& params [[buffer(0)]]) {\n" \ -" uint2 coord = uint2(vid % params.width, vid / params.width);\n" \ -" return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip);\n" \ -"}\n" \ -"\n" \ -"struct RestrideParams {\n" \ -" uint oldStride;\n" \ -" uint newStride;\n" \ -"};\n" \ -"\n" \ -/* TODO: use uint32? Since that would require less iterations */ \ -"vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(0)]], device uint8_t* dst [[buffer(1)]], constant RestrideParams& params [[buffer(2)]]) {\n" \ -" for (uint32_t i = 0; i < params.oldStride; i++) {\n" \ -" dst[vid * params.newStride + i] = src[vid * params.oldStride + i];\n" \ -" }\n" \ -"}\n" \ -"\n"; +#pragma once + +#define __STRINGIFY(x) #x +#define _STRINGIFY(x) __STRINGIFY(x) + +constexpr const char* utilityShaderSource = _STRINGIFY(( +using namespace metal; + +constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; + +struct VertexOut { + float4 position [[position]]; + float2 texCoord; +}; + +vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) { + VertexOut out; + out.position = float4(positions[vid], 0.0, 1.0); + out.texCoord = positions[vid] * 0.5 + 0.5; + out.texCoord.y = 1.0 - out.texCoord.y; + + return out; +} + +fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.texCoord); +} + +struct CopyParams { + uint width; + uint srcMip; + uint srcSlice; + uint dstMip; + uint dstSlice; +}; + +vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array src [[texture(0)]], texture2d_array dst [[texture(1)]], constant CopyParams& params [[buffer(0)]]) { + uint2 coord = uint2(vid % params.width, vid / params.width); + return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip); +} + +struct RestrideParams { + uint oldStride; + uint newStride; +}; + +/* TODO: use uint32? Since that would require less iterations */ +vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(0)]], device uint8_t* dst [[buffer(1)]], constant RestrideParams& params [[buffer(2)]]) { + for (uint32_t i = 0; i < params.oldStride; i++) { + dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; + } +} +));