Implement a helper shader for partial clears

These are not natively supported by Vulkan, so use a helper shader and colorWriteMask for the same behaviour.
This commit is contained in:
Billy Laws 2022-11-06 20:32:18 +00:00
parent ac0e225114
commit 13a96c5aba
5 changed files with 173 additions and 33 deletions

View File

@ -137,6 +137,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
view->range.layerCount != 1 || view->range.baseArrayLayer != 0 || clearSurface.rtArrayIndex != 0;
}};
// Always use surfaceClip for render area since it's more likely to match the renderArea of draws and avoid an RP break
const auto &surfaceClip{clearEngineRegisters.surfaceClip};
vk::Rect2D renderArea{{surfaceClip.horizontal.x, surfaceClip.vertical.y}, {surfaceClip.horizontal.width, surfaceClip.vertical.height}};
auto clearRects{util::MakeFilledArray<vk::ClearRect, 2>(vk::ClearRect{.rect = scissor, .baseArrayLayer = clearSurface.rtArrayIndex, .layerCount = 1})};
boost::container::small_vector<vk::ClearAttachment, 2> clearAttachments;
@ -147,14 +151,23 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (auto view{activeState.GetColorRenderTargetForClear(ctx, clearSurface.mrtSelect)}) {
ctx.executor.AttachTexture(&*view);
if (!(clearSurface.rEnable && clearSurface.gEnable && clearSurface.bEnable && clearSurface.aEnable))
Logger::Warn("Partial clears are unimplemented! Performing full clear instead");
if (!(view->range.aspectMask & vk::ImageAspectFlagBits::eColor)) {
bool partialClear{!(clearSurface.rEnable && clearSurface.gEnable && clearSurface.bEnable && clearSurface.aEnable)};
if (!(view->range.aspectMask & vk::ImageAspectFlagBits::eColor))
Logger::Warn("Colour RT used in clear lacks colour aspect"); // TODO: Drop this check after texman rework
}
if (needsAttachmentClearCmd(view)) {
if (partialClear) {
ctx.gpu.helperShaders.clearHelperShader.Clear(ctx.gpu, view->range.aspectMask,
(clearSurface.rEnable ? vk::ColorComponentFlagBits::eR : vk::ColorComponentFlags{}) |
(clearSurface.gEnable ? vk::ColorComponentFlagBits::eG : vk::ColorComponentFlags{}) |
(clearSurface.bEnable ? vk::ColorComponentFlagBits::eB : vk::ColorComponentFlags{}) |
(clearSurface.aEnable ? vk::ColorComponentFlagBits::eA : vk::ColorComponentFlags{}),
{clearEngineRegisters.colorClearValue}, &*view, [=](auto &&executionCallback) {
auto dst{view.get()};
ctx.executor.AddSubpass(std::move(executionCallback), renderArea, {}, {}, span<TextureView *>{dst}, nullptr);
});
ctx.executor.NotifyPipelineChange();
} else if (needsAttachmentClearCmd(view)) {
clearAttachments.push_back({.aspectMask = view->range.aspectMask, .clearValue = {clearEngineRegisters.colorClearValue}});
colorView = view;
} else {
@ -190,10 +203,6 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (clearAttachments.empty())
return;
// Always use surfaceClip for render area since it's more likely to match the renderArea of draws and avoid an RP break
const auto &surfaceClip{clearEngineRegisters.surfaceClip};
vk::Rect2D renderArea{{surfaceClip.horizontal.x, surfaceClip.vertical.y}, {surfaceClip.horizontal.width, surfaceClip.vertical.height}};
std::array<TextureView *, 1> colorAttachments{colorView ? &*colorView : nullptr};
ctx.executor.AddSubpass([clearAttachments, clearRects](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
commandBuffer.clearAttachments(clearAttachments, span(clearRects).first(clearAttachments.size()));

View File

@ -23,7 +23,7 @@ namespace skyline::gpu {
);
}
SimpleColourRTShader::SimpleColourRTShader(GPU &gpu, std::shared_ptr<vfs::Backing> vertexShader, std::shared_ptr<vfs::Backing> fragmentShader)
SimpleSingleRtShader::SimpleSingleRtShader(GPU &gpu, std::shared_ptr<vfs::Backing> vertexShader, std::shared_ptr<vfs::Backing> fragmentShader)
: vertexShaderModule{CreateShaderModule(gpu, *vertexShader)},
fragmentShaderModule{CreateShaderModule(gpu, *fragmentShader)},
shaderStages{{
@ -39,8 +39,10 @@ namespace skyline::gpu {
}}
} {}
cache::GraphicsPipelineCache::CompiledPipeline SimpleColourRTShader::GetPipeline(GPU &gpu,
TextureView *colorAttachment,
cache::GraphicsPipelineCache::CompiledPipeline SimpleSingleRtShader::GetPipeline(GPU &gpu,
TextureView *colorAttachment, TextureView *depthStencilAttachment,
bool depthWrite, bool stencilWrite, u32 stencilValue,
vk::ColorComponentFlags colorWriteMask,
span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges) {
constexpr static vk::PipelineInputAssemblyStateCreateInfo inputAssemblyState{
.topology = vk::PrimitiveTopology::eTriangleList,
@ -73,19 +75,31 @@ namespace skyline::gpu {
.alphaToOneEnable = false
};
constexpr static vk::PipelineDepthStencilStateCreateInfo depthStencilState{
.depthTestEnable = false,
.depthWriteEnable = false,
vk::PipelineDepthStencilStateCreateInfo depthStencilState{
.depthTestEnable = depthWrite,
.depthWriteEnable = depthWrite,
.depthCompareOp = vk::CompareOp::eAlways,
.depthBoundsTestEnable = false,
.stencilTestEnable = false,
.stencilTestEnable = stencilWrite,
};
constexpr static vk::PipelineColorBlendAttachmentState attachmentState{
if (stencilWrite) {
depthStencilState.front = depthStencilState.back = {
.depthFailOp = vk::StencilOp::eReplace,
.passOp = vk::StencilOp::eReplace,
.compareOp = vk::CompareOp::eAlways,
.compareMask = 0xFF,
.writeMask = 0xFF,
.reference = stencilValue
};
}
vk::PipelineColorBlendAttachmentState attachmentState{
.blendEnable = false,
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
.colorWriteMask = colorWriteMask
};
constexpr static vk::PipelineColorBlendStateCreateInfo blendState{
vk::PipelineColorBlendStateCreateInfo blendState{
.logicOpEnable = false,
.attachmentCount = 1,
.pAttachments = &attachmentState
@ -100,11 +114,11 @@ namespace skyline::gpu {
vertexState.unlink<vk::PipelineVertexInputDivisorStateCreateInfoEXT>();
auto colourAttachmentDimensions{colorAttachment->texture->dimensions};
auto attachmentDimensions{colorAttachment ? colorAttachment->texture->dimensions : depthStencilAttachment->texture->dimensions};
vk::Viewport viewport{
.height = static_cast<float>(colourAttachmentDimensions.height),
.width = static_cast<float>(colourAttachmentDimensions.width),
.height = static_cast<float>(attachmentDimensions.height),
.width = static_cast<float>(attachmentDimensions.width),
.x = 0.0f,
.y = 0.0f,
.minDepth = 0.0f,
@ -112,7 +126,7 @@ namespace skyline::gpu {
};
vk::Rect2D scissor{
.extent = colourAttachmentDimensions
.extent = attachmentDimensions
};
vk::PipelineViewportStateCreateInfo viewportState{
@ -134,7 +148,7 @@ namespace skyline::gpu {
.colorBlendState = blendState,
.dynamicState = {},
.colorAttachments = span<TextureView *>{colorAttachment},
.depthStencilAttachment = nullptr,
.depthStencilAttachment = depthStencilAttachment,
}, layoutBindings, pushConstantRanges, true);
}
@ -142,6 +156,12 @@ namespace skyline::gpu {
struct Vec2 {
float x, y;
};
struct Vec4 {
float x, y, z, w;
};
using Bool = u32;
}
namespace blit {
@ -177,7 +197,7 @@ namespace skyline::gpu {
};
BlitHelperShader::BlitHelperShader(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem)
: SimpleColourRTShader{gpu, shaderFileSystem->OpenFile("shaders/blit.vert.spv"), shaderFileSystem->OpenFile("shaders/blit.frag.spv")},
: SimpleSingleRtShader{gpu, shaderFileSystem->OpenFile("shaders/blit.vert.spv"), shaderFileSystem->OpenFile("shaders/blit.frag.spv")},
bilinearSampler{gpu.vkDevice.createSampler(
vk::SamplerCreateInfo{
.addressModeU = vk::SamplerAddressMode::eRepeat,
@ -231,7 +251,10 @@ namespace skyline::gpu {
.dstSrcScaleFactor = {dstSrcScaleFactorX * (srcRect.width / srcImageDimensions.width), dstSrcScaleFactorY * (srcRect.height / srcImageDimensions.height)},
.srcHeightRecip = 1.0f / srcImageDimensions.height
},
GetPipeline(gpu, dstImageView, {blit::SamplerLayoutBinding}, blit::PushConstantRanges))
GetPipeline(gpu, dstImageView,
nullptr, false, false, 0,
vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA,
{blit::SamplerLayoutBinding}, blit::PushConstantRanges))
};
vk::DescriptorImageInfo imageInfo{
@ -263,7 +286,63 @@ namespace skyline::gpu {
}
namespace clear {
struct FragmentPushConstantLayout {
glsl::Vec4 color;
glsl::Bool clearDepth;
float depth;
};
constexpr static std::array<vk::PushConstantRange, 1> PushConstantRanges{
vk::PushConstantRange{
.stageFlags = vk::ShaderStageFlagBits::eFragment,
.size = sizeof(FragmentPushConstantLayout),
.offset = 0
}
};
}
ClearHelperShader::ClearHelperShader(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem)
: SimpleSingleRtShader{gpu, shaderFileSystem->OpenFile("shaders/clear.vert.spv"), shaderFileSystem->OpenFile("shaders/clear.frag.spv")} {}
void ClearHelperShader::Clear(GPU &gpu, vk::ImageAspectFlags mask, vk::ColorComponentFlags components, vk::ClearValue value, TextureView *dstImageView,
std::function<void(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb) {
struct DrawState {
clear::FragmentPushConstantLayout fragmentPushConstants;
cache::GraphicsPipelineCache::CompiledPipeline pipeline;
DrawState(GPU &gpu,
clear::FragmentPushConstantLayout fragmentPushConstants,
cache::GraphicsPipelineCache::CompiledPipeline pipeline)
: fragmentPushConstants{fragmentPushConstants},
pipeline{pipeline} {}
};
bool writeColor{mask & vk::ImageAspectFlagBits::eColor};
bool writeDepth{mask & vk::ImageAspectFlagBits::eDepth};
bool writeStencil{mask & vk::ImageAspectFlagBits::eStencil};
auto drawState{std::make_shared<DrawState>(
gpu,
clear::FragmentPushConstantLayout{
.color = {value.color.float32[0], value.color.float32[1], value.color.float32[2], value.color.float32[3]},
.clearDepth = (mask & vk::ImageAspectFlagBits::eDepth) != vk::ImageAspectFlags{},
.depth = value.depthStencil.depth
},
GetPipeline(gpu, writeColor ? dstImageView : nullptr, (writeDepth || writeStencil) ? dstImageView : nullptr, writeDepth, writeStencil, value.depthStencil.stencil, components, {}, clear::PushConstantRanges))
};
recordCb([drawState = std::move(drawState)](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu, vk::RenderPass, u32) {
cycle->AttachObject(drawState);
commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, drawState->pipeline.pipeline);
commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eFragment, 0,
vk::ArrayProxy<const clear::FragmentPushConstantLayout>{drawState->fragmentPushConstants});
commandBuffer.draw(6, 1, 0, 0);
});
}
HelperShaders::HelperShaders(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem)
: blitHelperShader(gpu, std::move(shaderFileSystem)) {}
: blitHelperShader(gpu, shaderFileSystem),
clearHelperShader(gpu, shaderFileSystem) {}
}

View File

@ -16,29 +16,31 @@ namespace skyline::gpu {
class GPU;
/**
* @brief A base class that can be inherited by helper shaders that render to a single color rendertarget to simplify pipeline creation
* @brief A base class that can be inherited by helper shaders that render to a single color/depth rendertarget to simplify pipeline creation
*/
class SimpleColourRTShader {
class SimpleSingleRtShader {
protected:
vk::raii::ShaderModule vertexShaderModule;
vk::raii::ShaderModule fragmentShaderModule;
std::array<vk::PipelineShaderStageCreateInfo, 2> shaderStages; //!< Shader stages for the vertex and fragment shader modules
SimpleColourRTShader(GPU &gpu, std::shared_ptr<vfs::Backing> vertexShader, std::shared_ptr<vfs::Backing> fragmentShader);
SimpleSingleRtShader(GPU &gpu, std::shared_ptr<vfs::Backing> vertexShader, std::shared_ptr<vfs::Backing> fragmentShader);
/**
* @brief Returns a potentially cached pipeline built according to the supplied input state
*/
cache::GraphicsPipelineCache::CompiledPipeline GetPipeline(GPU &gpu,
TextureView *colorAttachment,
TextureView *colorAttachment, TextureView *depthStenilAttachment,
bool depthWrite, bool stencilWrite, u32 stencilValue,
vk::ColorComponentFlags colorWriteMask,
span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges);
};
/**
* @brief Simple helper shader for blitting a texture to a rendertarget with subpixel-precision
*/
class BlitHelperShader : SimpleColourRTShader {
class BlitHelperShader : SimpleSingleRtShader {
private:
vk::raii::Sampler bilinearSampler;
vk::raii::Sampler nearestSampler;
@ -73,11 +75,30 @@ namespace skyline::gpu {
std::function<void(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb);
};
/**
* @brief Simple helper shader for clearing a texture to a given color
*/
class ClearHelperShader : SimpleSingleRtShader {
public:
ClearHelperShader(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem);
/**
* @brief Records a sequenced GPU clear operation using a shader
* @param mask Mask of which aspects to clear
* @param components Mask of which components to clear
* @param value The value to clear to
* @param recordCb Callback used to record the blit commands for sequenced execution on the GPU
*/
void Clear(GPU &gpu, vk::ImageAspectFlags mask, vk::ColorComponentFlags components, vk::ClearValue value, TextureView *dstImageView,
std::function<void(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb);
};
/**
* @brief Holds all helper shaders to avoid redundantly recreating them on each usage
*/
struct HelperShaders {
BlitHelperShader blitHelperShader;
ClearHelperShader clearHelperShader;
HelperShaders(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem);
};

View File

@ -0,0 +1,17 @@
#version 460
layout (location = 0) out vec4 colour;
layout (push_constant) uniform constants {
vec4 colour;
bool clearDepth;
float depth;
} PC;
void main()
{
if (PC.clearDepth)
gl_FragDepth = PC.depth;
else
colour = PC.colour;
}

View File

@ -0,0 +1,14 @@
#version 460
void main() {
const vec2 lut[6] = vec2[6](
vec2(-1, -1),
vec2(-1, 1),
vec2(1, 1),
vec2(1, 1),
vec2(1, -1),
vec2(-1, -1)
);
gl_Position = vec4(lut[gl_VertexIndex], 0, 1);
}