From 13a96c5aba5281fc1ce871709773479e5fcf84b7 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 6 Nov 2022 20:32:18 +0000 Subject: [PATCH] Implement a helper shader for partial clears These are not natively supported by Vulkan, so use a helper shader and colorWriteMask for the same behaviour. --- .../interconnect/maxwell_3d/maxwell_3d.cpp | 29 +++-- .../skyline/gpu/shaders/helper_shaders.cpp | 115 +++++++++++++++--- .../cpp/skyline/gpu/shaders/helper_shaders.h | 31 ++++- app/src/main/shaders/clear.frag | 17 +++ app/src/main/shaders/clear.vert | 14 +++ 5 files changed, 173 insertions(+), 33 deletions(-) create mode 100644 app/src/main/shaders/clear.frag create mode 100644 app/src/main/shaders/clear.vert diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp index 5fe6c0a5..4f054e52 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp @@ -137,6 +137,10 @@ namespace skyline::gpu::interconnect::maxwell3d { view->range.layerCount != 1 || view->range.baseArrayLayer != 0 || clearSurface.rtArrayIndex != 0; }}; + // Always use surfaceClip for render area since it's more likely to match the renderArea of draws and avoid an RP break + const auto &surfaceClip{clearEngineRegisters.surfaceClip}; + vk::Rect2D renderArea{{surfaceClip.horizontal.x, surfaceClip.vertical.y}, {surfaceClip.horizontal.width, surfaceClip.vertical.height}}; + auto clearRects{util::MakeFilledArray(vk::ClearRect{.rect = scissor, .baseArrayLayer = clearSurface.rtArrayIndex, .layerCount = 1})}; boost::container::small_vector clearAttachments; @@ -147,14 +151,23 @@ namespace skyline::gpu::interconnect::maxwell3d { if (auto view{activeState.GetColorRenderTargetForClear(ctx, clearSurface.mrtSelect)}) { ctx.executor.AttachTexture(&*view); - if (!(clearSurface.rEnable && clearSurface.gEnable && clearSurface.bEnable && clearSurface.aEnable)) - Logger::Warn("Partial clears are unimplemented! Performing full clear instead"); - - if (!(view->range.aspectMask & vk::ImageAspectFlagBits::eColor)) { + bool partialClear{!(clearSurface.rEnable && clearSurface.gEnable && clearSurface.bEnable && clearSurface.aEnable)}; + if (!(view->range.aspectMask & vk::ImageAspectFlagBits::eColor)) Logger::Warn("Colour RT used in clear lacks colour aspect"); // TODO: Drop this check after texman rework - } - if (needsAttachmentClearCmd(view)) { + + if (partialClear) { + ctx.gpu.helperShaders.clearHelperShader.Clear(ctx.gpu, view->range.aspectMask, + (clearSurface.rEnable ? vk::ColorComponentFlagBits::eR : vk::ColorComponentFlags{}) | + (clearSurface.gEnable ? vk::ColorComponentFlagBits::eG : vk::ColorComponentFlags{}) | + (clearSurface.bEnable ? vk::ColorComponentFlagBits::eB : vk::ColorComponentFlags{}) | + (clearSurface.aEnable ? vk::ColorComponentFlagBits::eA : vk::ColorComponentFlags{}), + {clearEngineRegisters.colorClearValue}, &*view, [=](auto &&executionCallback) { + auto dst{view.get()}; + ctx.executor.AddSubpass(std::move(executionCallback), renderArea, {}, {}, span{dst}, nullptr); + }); + ctx.executor.NotifyPipelineChange(); + } else if (needsAttachmentClearCmd(view)) { clearAttachments.push_back({.aspectMask = view->range.aspectMask, .clearValue = {clearEngineRegisters.colorClearValue}}); colorView = view; } else { @@ -190,10 +203,6 @@ namespace skyline::gpu::interconnect::maxwell3d { if (clearAttachments.empty()) return; - // Always use surfaceClip for render area since it's more likely to match the renderArea of draws and avoid an RP break - const auto &surfaceClip{clearEngineRegisters.surfaceClip}; - vk::Rect2D renderArea{{surfaceClip.horizontal.x, surfaceClip.vertical.y}, {surfaceClip.horizontal.width, surfaceClip.vertical.height}}; - std::array colorAttachments{colorView ? &*colorView : nullptr}; ctx.executor.AddSubpass([clearAttachments, clearRects](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &, vk::RenderPass, u32) { commandBuffer.clearAttachments(clearAttachments, span(clearRects).first(clearAttachments.size())); diff --git a/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp index fc8e0758..275039e4 100644 --- a/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp +++ b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp @@ -23,7 +23,7 @@ namespace skyline::gpu { ); } - SimpleColourRTShader::SimpleColourRTShader(GPU &gpu, std::shared_ptr vertexShader, std::shared_ptr fragmentShader) + SimpleSingleRtShader::SimpleSingleRtShader(GPU &gpu, std::shared_ptr vertexShader, std::shared_ptr fragmentShader) : vertexShaderModule{CreateShaderModule(gpu, *vertexShader)}, fragmentShaderModule{CreateShaderModule(gpu, *fragmentShader)}, shaderStages{{ @@ -39,8 +39,10 @@ namespace skyline::gpu { }} } {} - cache::GraphicsPipelineCache::CompiledPipeline SimpleColourRTShader::GetPipeline(GPU &gpu, - TextureView *colorAttachment, + cache::GraphicsPipelineCache::CompiledPipeline SimpleSingleRtShader::GetPipeline(GPU &gpu, + TextureView *colorAttachment, TextureView *depthStencilAttachment, + bool depthWrite, bool stencilWrite, u32 stencilValue, + vk::ColorComponentFlags colorWriteMask, span layoutBindings, span pushConstantRanges) { constexpr static vk::PipelineInputAssemblyStateCreateInfo inputAssemblyState{ .topology = vk::PrimitiveTopology::eTriangleList, @@ -73,19 +75,31 @@ namespace skyline::gpu { .alphaToOneEnable = false }; - constexpr static vk::PipelineDepthStencilStateCreateInfo depthStencilState{ - .depthTestEnable = false, - .depthWriteEnable = false, + vk::PipelineDepthStencilStateCreateInfo depthStencilState{ + .depthTestEnable = depthWrite, + .depthWriteEnable = depthWrite, + .depthCompareOp = vk::CompareOp::eAlways, .depthBoundsTestEnable = false, - .stencilTestEnable = false, + .stencilTestEnable = stencilWrite, }; - constexpr static vk::PipelineColorBlendAttachmentState attachmentState{ + if (stencilWrite) { + depthStencilState.front = depthStencilState.back = { + .depthFailOp = vk::StencilOp::eReplace, + .passOp = vk::StencilOp::eReplace, + .compareOp = vk::CompareOp::eAlways, + .compareMask = 0xFF, + .writeMask = 0xFF, + .reference = stencilValue + }; + } + + vk::PipelineColorBlendAttachmentState attachmentState{ .blendEnable = false, - .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA + .colorWriteMask = colorWriteMask }; - constexpr static vk::PipelineColorBlendStateCreateInfo blendState{ + vk::PipelineColorBlendStateCreateInfo blendState{ .logicOpEnable = false, .attachmentCount = 1, .pAttachments = &attachmentState @@ -100,11 +114,11 @@ namespace skyline::gpu { vertexState.unlink(); - auto colourAttachmentDimensions{colorAttachment->texture->dimensions}; + auto attachmentDimensions{colorAttachment ? colorAttachment->texture->dimensions : depthStencilAttachment->texture->dimensions}; vk::Viewport viewport{ - .height = static_cast(colourAttachmentDimensions.height), - .width = static_cast(colourAttachmentDimensions.width), + .height = static_cast(attachmentDimensions.height), + .width = static_cast(attachmentDimensions.width), .x = 0.0f, .y = 0.0f, .minDepth = 0.0f, @@ -112,7 +126,7 @@ namespace skyline::gpu { }; vk::Rect2D scissor{ - .extent = colourAttachmentDimensions + .extent = attachmentDimensions }; vk::PipelineViewportStateCreateInfo viewportState{ @@ -134,7 +148,7 @@ namespace skyline::gpu { .colorBlendState = blendState, .dynamicState = {}, .colorAttachments = span{colorAttachment}, - .depthStencilAttachment = nullptr, + .depthStencilAttachment = depthStencilAttachment, }, layoutBindings, pushConstantRanges, true); } @@ -142,6 +156,12 @@ namespace skyline::gpu { struct Vec2 { float x, y; }; + + struct Vec4 { + float x, y, z, w; + }; + + using Bool = u32; } namespace blit { @@ -177,7 +197,7 @@ namespace skyline::gpu { }; BlitHelperShader::BlitHelperShader(GPU &gpu, std::shared_ptr shaderFileSystem) - : SimpleColourRTShader{gpu, shaderFileSystem->OpenFile("shaders/blit.vert.spv"), shaderFileSystem->OpenFile("shaders/blit.frag.spv")}, + : SimpleSingleRtShader{gpu, shaderFileSystem->OpenFile("shaders/blit.vert.spv"), shaderFileSystem->OpenFile("shaders/blit.frag.spv")}, bilinearSampler{gpu.vkDevice.createSampler( vk::SamplerCreateInfo{ .addressModeU = vk::SamplerAddressMode::eRepeat, @@ -231,7 +251,10 @@ namespace skyline::gpu { .dstSrcScaleFactor = {dstSrcScaleFactorX * (srcRect.width / srcImageDimensions.width), dstSrcScaleFactorY * (srcRect.height / srcImageDimensions.height)}, .srcHeightRecip = 1.0f / srcImageDimensions.height }, - GetPipeline(gpu, dstImageView, {blit::SamplerLayoutBinding}, blit::PushConstantRanges)) + GetPipeline(gpu, dstImageView, + nullptr, false, false, 0, + vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + {blit::SamplerLayoutBinding}, blit::PushConstantRanges)) }; vk::DescriptorImageInfo imageInfo{ @@ -263,7 +286,63 @@ namespace skyline::gpu { } + namespace clear { + struct FragmentPushConstantLayout { + glsl::Vec4 color; + glsl::Bool clearDepth; + float depth; + }; + + constexpr static std::array PushConstantRanges{ + vk::PushConstantRange{ + .stageFlags = vk::ShaderStageFlagBits::eFragment, + .size = sizeof(FragmentPushConstantLayout), + .offset = 0 + } + }; + } + + ClearHelperShader::ClearHelperShader(GPU &gpu, std::shared_ptr shaderFileSystem) + : SimpleSingleRtShader{gpu, shaderFileSystem->OpenFile("shaders/clear.vert.spv"), shaderFileSystem->OpenFile("shaders/clear.frag.spv")} {} + + void ClearHelperShader::Clear(GPU &gpu, vk::ImageAspectFlags mask, vk::ColorComponentFlags components, vk::ClearValue value, TextureView *dstImageView, + std::function &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb) { + struct DrawState { + clear::FragmentPushConstantLayout fragmentPushConstants; + cache::GraphicsPipelineCache::CompiledPipeline pipeline; + + DrawState(GPU &gpu, + clear::FragmentPushConstantLayout fragmentPushConstants, + cache::GraphicsPipelineCache::CompiledPipeline pipeline) + : fragmentPushConstants{fragmentPushConstants}, + pipeline{pipeline} {} + }; + + bool writeColor{mask & vk::ImageAspectFlagBits::eColor}; + bool writeDepth{mask & vk::ImageAspectFlagBits::eDepth}; + bool writeStencil{mask & vk::ImageAspectFlagBits::eStencil}; + + auto drawState{std::make_shared( + gpu, + clear::FragmentPushConstantLayout{ + .color = {value.color.float32[0], value.color.float32[1], value.color.float32[2], value.color.float32[3]}, + .clearDepth = (mask & vk::ImageAspectFlagBits::eDepth) != vk::ImageAspectFlags{}, + .depth = value.depthStencil.depth + }, + GetPipeline(gpu, writeColor ? dstImageView : nullptr, (writeDepth || writeStencil) ? dstImageView : nullptr, writeDepth, writeStencil, value.depthStencil.stencil, components, {}, clear::PushConstantRanges)) + }; + + recordCb([drawState = std::move(drawState)](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu, vk::RenderPass, u32) { + cycle->AttachObject(drawState); + commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, drawState->pipeline.pipeline); + commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eFragment, 0, + vk::ArrayProxy{drawState->fragmentPushConstants}); + commandBuffer.draw(6, 1, 0, 0); + }); + } + HelperShaders::HelperShaders(GPU &gpu, std::shared_ptr shaderFileSystem) - : blitHelperShader(gpu, std::move(shaderFileSystem)) {} + : blitHelperShader(gpu, shaderFileSystem), + clearHelperShader(gpu, shaderFileSystem) {} } \ No newline at end of file diff --git a/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.h b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.h index c0bf1e5b..5c8330e4 100644 --- a/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.h +++ b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.h @@ -16,29 +16,31 @@ namespace skyline::gpu { class GPU; /** - * @brief A base class that can be inherited by helper shaders that render to a single color rendertarget to simplify pipeline creation + * @brief A base class that can be inherited by helper shaders that render to a single color/depth rendertarget to simplify pipeline creation */ - class SimpleColourRTShader { + class SimpleSingleRtShader { protected: vk::raii::ShaderModule vertexShaderModule; vk::raii::ShaderModule fragmentShaderModule; std::array shaderStages; //!< Shader stages for the vertex and fragment shader modules - SimpleColourRTShader(GPU &gpu, std::shared_ptr vertexShader, std::shared_ptr fragmentShader); + SimpleSingleRtShader(GPU &gpu, std::shared_ptr vertexShader, std::shared_ptr fragmentShader); /** * @brief Returns a potentially cached pipeline built according to the supplied input state */ cache::GraphicsPipelineCache::CompiledPipeline GetPipeline(GPU &gpu, - TextureView *colorAttachment, + TextureView *colorAttachment, TextureView *depthStenilAttachment, + bool depthWrite, bool stencilWrite, u32 stencilValue, + vk::ColorComponentFlags colorWriteMask, span layoutBindings, span pushConstantRanges); }; /** * @brief Simple helper shader for blitting a texture to a rendertarget with subpixel-precision */ - class BlitHelperShader : SimpleColourRTShader { + class BlitHelperShader : SimpleSingleRtShader { private: vk::raii::Sampler bilinearSampler; vk::raii::Sampler nearestSampler; @@ -73,11 +75,30 @@ namespace skyline::gpu { std::function &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb); }; + /** + * @brief Simple helper shader for clearing a texture to a given color + */ + class ClearHelperShader : SimpleSingleRtShader { + public: + ClearHelperShader(GPU &gpu, std::shared_ptr shaderFileSystem); + + /** + * @brief Records a sequenced GPU clear operation using a shader + * @param mask Mask of which aspects to clear + * @param components Mask of which components to clear + * @param value The value to clear to + * @param recordCb Callback used to record the blit commands for sequenced execution on the GPU + */ + void Clear(GPU &gpu, vk::ImageAspectFlags mask, vk::ColorComponentFlags components, vk::ClearValue value, TextureView *dstImageView, + std::function &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb); + }; + /** * @brief Holds all helper shaders to avoid redundantly recreating them on each usage */ struct HelperShaders { BlitHelperShader blitHelperShader; + ClearHelperShader clearHelperShader; HelperShaders(GPU &gpu, std::shared_ptr shaderFileSystem); }; diff --git a/app/src/main/shaders/clear.frag b/app/src/main/shaders/clear.frag new file mode 100644 index 00000000..96cb8eb6 --- /dev/null +++ b/app/src/main/shaders/clear.frag @@ -0,0 +1,17 @@ +#version 460 + +layout (location = 0) out vec4 colour; + +layout (push_constant) uniform constants { + vec4 colour; + bool clearDepth; + float depth; +} PC; + +void main() +{ + if (PC.clearDepth) + gl_FragDepth = PC.depth; + else + colour = PC.colour; +} diff --git a/app/src/main/shaders/clear.vert b/app/src/main/shaders/clear.vert new file mode 100644 index 00000000..284ea1ec --- /dev/null +++ b/app/src/main/shaders/clear.vert @@ -0,0 +1,14 @@ +#version 460 + +void main() { + const vec2 lut[6] = vec2[6]( + vec2(-1, -1), + vec2(-1, 1), + vec2(1, 1), + vec2(1, 1), + vec2(1, -1), + vec2(-1, -1) + ); + + gl_Position = vec4(lut[gl_VertexIndex], 0, 1); +}