Implement a system for helper shaders together with a simple blit shader

It is desirable for us to use a shader for blits to allow easily emulating out of bounds blits and blits between different swizzled colour formats. The helper shader infrastructure is designed to be generic so it can be reused by any other helper shaders that we may  need in the future.
This commit is contained in:
Billy Laws 2022-07-31 15:05:20 +01:00
parent 1da1698f90
commit 395f665a13
9 changed files with 405 additions and 2 deletions

View File

@ -175,9 +175,11 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp ${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp
${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp ${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp
${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp ${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp
${source_DIR}/skyline/gpu/interconnect/fermi_2d.cpp
${source_DIR}/skyline/gpu/interconnect/command_executor.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp
${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp ${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp
${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp ${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp
${source_DIR}/skyline/gpu/shaders/helper_shaders.cpp
${source_DIR}/skyline/soc/smmu.cpp ${source_DIR}/skyline/soc/smmu.cpp
${source_DIR}/skyline/soc/host1x/syncpoint.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp
${source_DIR}/skyline/soc/host1x/command_fifo.cpp ${source_DIR}/skyline/soc/host1x/command_fifo.cpp

View File

@ -375,6 +375,7 @@ namespace skyline::gpu {
megaBufferAllocator(*this), megaBufferAllocator(*this),
descriptor(*this), descriptor(*this),
shader(state, *this), shader(state, *this),
helperShaders(*this, state.os->assetFileSystem),
graphicsPipelineCache(*this), graphicsPipelineCache(*this),
renderPassCache(*this), renderPassCache(*this),
framebufferCache(*this) {} framebufferCache(*this) {}

View File

@ -12,6 +12,7 @@
#include "gpu/megabuffer.h" #include "gpu/megabuffer.h"
#include "gpu/descriptor_allocator.h" #include "gpu/descriptor_allocator.h"
#include "gpu/shader_manager.h" #include "gpu/shader_manager.h"
#include "gpu/shaders/helper_shaders.h"
#include "gpu/cache/graphics_pipeline_cache.h" #include "gpu/cache/graphics_pipeline_cache.h"
#include "gpu/cache/renderpass_cache.h" #include "gpu/cache/renderpass_cache.h"
#include "gpu/cache/framebuffer_cache.h" #include "gpu/cache/framebuffer_cache.h"
@ -50,6 +51,8 @@ namespace skyline::gpu {
DescriptorAllocator descriptor; DescriptorAllocator descriptor;
ShaderManager shader; ShaderManager shader;
HelperShaders helperShaders;
cache::GraphicsPipelineCache graphicsPipelineCache; cache::GraphicsPipelineCache graphicsPipelineCache;
cache::RenderPassCache renderPassCache; cache::RenderPassCache renderPassCache;
cache::FramebufferCache framebufferCache; cache::FramebufferCache framebufferCache;

View File

@ -0,0 +1,268 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <gpu/descriptor_allocator.h>
#include <gpu/texture/texture.h>
#include <gpu/cache/graphics_pipeline_cache.h>
#include <vfs/filesystem.h>
#include "helper_shaders.h"
namespace skyline::gpu {
static vk::raii::ShaderModule CreateShaderModule(GPU &gpu, vfs::Backing &shaderBacking) {
std::vector<u32> shaderBuf(shaderBacking.size / 4);
if (shaderBacking.Read(span(shaderBuf)) != shaderBacking.size)
throw exception("Failed to read shader");
return gpu.vkDevice.createShaderModule(
{
.pCode = shaderBuf.data(),
.codeSize = shaderBacking.size,
}
);
}
SimpleColourRTShader::SimpleColourRTShader(GPU &gpu, std::shared_ptr<vfs::Backing> vertexShader, std::shared_ptr<vfs::Backing> fragmentShader)
: vertexShaderModule{CreateShaderModule(gpu, *vertexShader)},
fragmentShaderModule{CreateShaderModule(gpu, *fragmentShader)},
shaderStages{{
vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eVertex,
.pName = "main",
.module = *vertexShaderModule
},
vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eFragment,
.pName = "main",
.module = *fragmentShaderModule
}}
} {}
cache::GraphicsPipelineCache::CompiledPipeline SimpleColourRTShader::GetPipeline(GPU &gpu,
TextureView *colorAttachment,
span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges) {
constexpr static vk::PipelineInputAssemblyStateCreateInfo inputAssemblyState{
.topology = vk::PrimitiveTopology::eTriangleList,
.primitiveRestartEnable = false
};
constexpr static vk::PipelineTessellationStateCreateInfo tesselationState{
.patchControlPoints = 0,
};
const static vk::StructureChain<vk::PipelineRasterizationStateCreateInfo, vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT> rasterizationState{
{
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.polygonMode = vk::PolygonMode::eFill,
.lineWidth = 1.0f,
.cullMode = vk::CullModeFlagBits::eNone,
.frontFace = vk::FrontFace::eCounterClockwise,
.depthBiasEnable = false
}, {
.provokingVertexMode = vk::ProvokingVertexModeEXT::eFirstVertex
}
};
constexpr static vk::PipelineMultisampleStateCreateInfo multisampleState{
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.sampleShadingEnable = false,
.minSampleShading = 1.0f,
.alphaToCoverageEnable = false,
.alphaToOneEnable = false
};
constexpr static vk::PipelineDepthStencilStateCreateInfo depthStencilState{
.depthTestEnable = false,
.depthWriteEnable = false,
.depthBoundsTestEnable = false,
.stencilTestEnable = false,
};
constexpr static vk::PipelineColorBlendAttachmentState attachmentState{
.blendEnable = false,
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
};
constexpr static vk::PipelineColorBlendStateCreateInfo blendState{
.logicOpEnable = false,
.attachmentCount = 1,
.pAttachments = &attachmentState
};
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState{
{
.vertexAttributeDescriptionCount = 0,
.vertexBindingDescriptionCount = 0
}, {}
};
vertexState.unlink<vk::PipelineVertexInputDivisorStateCreateInfoEXT>();
auto colourAttachmentDimensions{colorAttachment->texture->dimensions};
vk::Viewport viewport{
.height = static_cast<float>(colourAttachmentDimensions.height),
.width = static_cast<float>(colourAttachmentDimensions.width),
.x = 0.0f,
.y = 0.0f,
.minDepth = 0.0f,
.maxDepth = 1.0f
};
vk::Rect2D scissor{
.extent = colourAttachmentDimensions
};
vk::PipelineViewportStateCreateInfo viewportState{
.pViewports = &viewport,
.viewportCount = 1,
.pScissors = &scissor,
.scissorCount = 1
};
return gpu.graphicsPipelineCache.GetCompiledPipeline(cache::GraphicsPipelineCache::PipelineState{
.shaderStages = shaderStages,
.vertexState = vertexState,
.inputAssemblyState = inputAssemblyState,
.tessellationState = tesselationState,
.viewportState = viewportState,
.rasterizationState = rasterizationState,
.multisampleState = multisampleState,
.depthStencilState = depthStencilState,
.colorBlendState = blendState,
.colorAttachments = span<TextureView *>{colorAttachment},
.depthStencilAttachment = nullptr,
}, layoutBindings, pushConstantRanges);
}
namespace glsl {
struct Vec2 {
float x, y;
};
}
namespace blit {
struct VertexPushConstantLayout {
glsl::Vec2 dstOriginClipSpace;
glsl::Vec2 dstDimensionsClipSpace;
};
struct FragmentPushConstantLayout {
glsl::Vec2 srcOriginUV;
glsl::Vec2 dstSrcScaleFactor;
float srcHeightRecip;
};
constexpr static std::array<vk::PushConstantRange, 2> PushConstantRanges{
vk::PushConstantRange{
.stageFlags = vk::ShaderStageFlagBits::eVertex,
.size = sizeof(VertexPushConstantLayout),
.offset = 0
}, vk::PushConstantRange{
.stageFlags = vk::ShaderStageFlagBits::eFragment,
.size = sizeof(FragmentPushConstantLayout),
.offset = sizeof(VertexPushConstantLayout)
}
};
constexpr static vk::DescriptorSetLayoutBinding SamplerLayoutBinding{
.binding = 0,
.descriptorType = vk::DescriptorType::eCombinedImageSampler,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eFragment
};
};
BlitHelperShader::BlitHelperShader(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem)
: SimpleColourRTShader{gpu, shaderFileSystem->OpenFile("shaders/blit.vert.spv"), shaderFileSystem->OpenFile("shaders/blit.frag.spv")},
bilinearSampler{gpu.vkDevice.createSampler(
vk::SamplerCreateInfo{
.addressModeU = vk::SamplerAddressMode::eRepeat,
.addressModeV = vk::SamplerAddressMode::eRepeat,
.addressModeW = vk::SamplerAddressMode::eRepeat,
.anisotropyEnable = false,
.compareEnable = false,
.magFilter = vk::Filter::eLinear,
.minFilter = vk::Filter::eLinear
})
},
nearestSampler{gpu.vkDevice.createSampler(
vk::SamplerCreateInfo{
.addressModeU = vk::SamplerAddressMode::eRepeat,
.addressModeV = vk::SamplerAddressMode::eRepeat,
.addressModeW = vk::SamplerAddressMode::eRepeat,
.anisotropyEnable = false,
.compareEnable = false,
.magFilter = vk::Filter::eNearest,
.minFilter = vk::Filter::eNearest
})
} {}
void BlitHelperShader::Blit(GPU &gpu, BlitRect srcRect, BlitRect dstRect,
vk::Extent2D srcImageDimensions, vk::Extent2D dstImageDimensions,
float dstSrcScaleFactorX, float dstSrcScaleFactorY,
bool bilinearFilter,
TextureView *srcImageView, TextureView *dstImageView,
std::function<void(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb) {
struct DrawState {
blit::VertexPushConstantLayout vertexPushConstants;
blit::FragmentPushConstantLayout fragmentPushConstants;
DescriptorAllocator::ActiveDescriptorSet descriptorSet;
cache::GraphicsPipelineCache::CompiledPipeline pipeline;
DrawState(GPU &gpu,
blit::VertexPushConstantLayout vertexPushConstants, blit::FragmentPushConstantLayout fragmentPushConstants,
cache::GraphicsPipelineCache::CompiledPipeline pipeline)
: vertexPushConstants{vertexPushConstants}, fragmentPushConstants{fragmentPushConstants},
descriptorSet{gpu.descriptor.AllocateSet(pipeline.descriptorSetLayout)},
pipeline{pipeline} {}
};
auto drawState{std::make_shared<DrawState>(
gpu,
blit::VertexPushConstantLayout{
.dstOriginClipSpace = {(2.0f * dstRect.x) / dstImageDimensions.width - 1.0f, (2.0f * dstRect.y) / dstImageDimensions.height - 1.0f},
.dstDimensionsClipSpace = {(2.0f * dstRect.width) / dstImageDimensions.width, (2.0f * dstRect.height) / dstImageDimensions.height}
}, blit::FragmentPushConstantLayout{
.srcOriginUV = {srcRect.x / srcImageDimensions.width, srcRect.y / srcImageDimensions.height},
.dstSrcScaleFactor = {dstSrcScaleFactorX * (srcRect.width / srcImageDimensions.width), dstSrcScaleFactorY * (srcRect.height / srcImageDimensions.height)},
.srcHeightRecip = 1.0f / srcImageDimensions.height
},
GetPipeline(gpu, dstImageView, {blit::SamplerLayoutBinding}, blit::PushConstantRanges))
};
vk::DescriptorImageInfo imageInfo{
.imageLayout = vk::ImageLayout::eGeneral,
.imageView = srcImageView->GetView(),
.sampler = bilinearFilter ? *bilinearSampler : *nearestSampler
};
std::array<vk::WriteDescriptorSet, 1> writes{vk::WriteDescriptorSet{
.dstBinding = 0,
.descriptorType = vk::DescriptorType::eCombinedImageSampler,
.descriptorCount = 1,
.dstSet = *drawState->descriptorSet,
.pImageInfo = &imageInfo
}};
gpu.vkDevice.updateDescriptorSets(writes, nullptr);
recordCb([drawState = std::move(drawState)](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu, vk::RenderPass, u32) {
cycle->AttachObject(drawState);
commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, drawState->pipeline.pipeline);
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, drawState->pipeline.pipelineLayout, 0, *drawState->descriptorSet, nullptr);
commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eVertex, 0,
vk::ArrayProxy<const blit::VertexPushConstantLayout>{drawState->vertexPushConstants});
commandBuffer.pushConstants(drawState->pipeline.pipelineLayout, vk::ShaderStageFlagBits::eFragment, sizeof(blit::VertexPushConstantLayout),
vk::ArrayProxy<const blit::FragmentPushConstantLayout>{drawState->fragmentPushConstants});
commandBuffer.draw(6, 1, 0, 0);
});
}
HelperShaders::HelperShaders(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem)
: blitHelperShader(gpu, std::move(shaderFileSystem)) {}
}

View File

@ -0,0 +1,86 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <vulkan/vulkan_raii.hpp>
#include <gpu/descriptor_allocator.h>
#include <gpu/cache/graphics_pipeline_cache.h>
namespace skyline::vfs {
class FileSystem;
}
namespace skyline::gpu {
class TextureView;
class GPU;
/**
* @brief A base class that can be inherited by helper shaders that render to a single color rendertarget to simplify pipeline creation
*/
class SimpleColourRTShader {
protected:
vk::raii::ShaderModule vertexShaderModule;
vk::raii::ShaderModule fragmentShaderModule;
std::array<vk::PipelineShaderStageCreateInfo, 2> shaderStages; //!< Shader stages for the vertex and fragment shader modules
SimpleColourRTShader(GPU &gpu, std::shared_ptr<vfs::Backing> vertexShader, std::shared_ptr<vfs::Backing> fragmentShader);
/**
* @brief Returns a potentially cached pipeline built according to the supplied input state
*/
cache::GraphicsPipelineCache::CompiledPipeline GetPipeline(GPU &gpu,
TextureView *colorAttachment,
span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges);
};
/**
* @brief Simple helper shader for blitting a texture to a rendertarget with subpixel-precision
*/
class BlitHelperShader : SimpleColourRTShader {
private:
vk::raii::Sampler bilinearSampler;
vk::raii::Sampler nearestSampler;
public:
BlitHelperShader(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem);
/**
* @brief Floating point equivalent to vk::Rect2D to allow for subpixel-precison blits
*/
struct BlitRect {
float width;
float height;
float x;
float y;
};
/**
* @brief Records a sequenced GPU blit operation
* @param srcRect A subrect of the source input texture that will be blitted from
* @param dstRect A subrect of the destination input texture that the source subrect will be blitted into
* @param dstSrcScaleFactorX Scale factor in the X direction from the destination image to the source image
* @param dstSrcScaleFactorY ^ but Y
* @param bilinearFilter Type of filter to use for sampling the source texture, false will use nearest-neighbour and true will use bilinear filtering
* @param recordCb Callback used to record the blit commands for sequenced execution on the GPU
*/
void Blit(GPU &gpu, BlitRect srcRect, BlitRect dstRect,
vk::Extent2D srcImageDimensions, vk::Extent2D dstImageDimensions,
float dstSrcScaleFactorX, float dstSrcScaleFactorY,
bool bilinearFilter,
TextureView *srcImageView, TextureView *dstImageView,
std::function<void(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&)> &&recordCb);
};
/**
* @brief Holds all helper shaders to avoid redundantly recreating them on each usage
*/
struct HelperShaders {
BlitHelperShader blitHelperShader;
HelperShaders(GPU &gpu, std::shared_ptr<vfs::FileSystem> shaderFileSystem);
};
}

View File

@ -24,9 +24,9 @@ namespace skyline::kernel {
: nativeLibraryPath(std::move(nativeLibraryPath)), : nativeLibraryPath(std::move(nativeLibraryPath)),
publicAppFilesPath(std::move(publicAppFilesPath)), publicAppFilesPath(std::move(publicAppFilesPath)),
privateAppFilesPath(std::move(privateAppFilesPath)), privateAppFilesPath(std::move(privateAppFilesPath)),
state(this, jvmManager, settings),
deviceTimeZone(std::move(deviceTimeZone)), deviceTimeZone(std::move(deviceTimeZone)),
assetFileSystem(std::move(assetFileSystem)), assetFileSystem(std::move(assetFileSystem)),
state(this, jvmManager, settings),
serviceManager(state) {} serviceManager(state) {}
void OS::Execute(int romFd, loader::RomFormat romType) { void OS::Execute(int romFd, loader::RomFormat romType) {

View File

@ -17,9 +17,9 @@ namespace skyline::kernel {
std::string nativeLibraryPath; //!< The full path to the app's native library directory std::string nativeLibraryPath; //!< The full path to the app's native library directory
std::string publicAppFilesPath; //!< The full path to the app's public files directory std::string publicAppFilesPath; //!< The full path to the app's public files directory
std::string privateAppFilesPath; //!< The full path to the app's private files directory std::string privateAppFilesPath; //!< The full path to the app's private files directory
DeviceState state;
std::string deviceTimeZone; //!< The timezone name (e.g. Europe/London) std::string deviceTimeZone; //!< The timezone name (e.g. Europe/London)
std::shared_ptr<vfs::FileSystem> assetFileSystem; //!< A filesystem to be used for accessing emulator assets (like tzdata) std::shared_ptr<vfs::FileSystem> assetFileSystem; //!< A filesystem to be used for accessing emulator assets (like tzdata)
DeviceState state;
service::ServiceManager serviceManager; service::ServiceManager serviceManager;
/** /**

View File

@ -0,0 +1,21 @@
#version 460
layout (binding = 0, set = 0) uniform sampler2D src;
layout (location = 0) in vec2 dstUV;
layout (location = 0) out vec4 colour;
layout (push_constant) uniform constants {
layout (offset = 16)
vec2 srcOriginUV;
vec2 dstSrcScaleFactor;
float srcHeightRecip;
} PC;
void main()
{
vec2 srcUV = dstUV * PC.dstSrcScaleFactor + PC.srcOriginUV;
// Account for out of bounds blits by moving to the next line of the source texture for the copy
srcUV.y += floor(srcUV.x) * PC.srcHeightRecip;
srcUV.x = srcUV.x - floor(srcUV.x);
colour.rgba = texture(src, srcUV);
}

View File

@ -0,0 +1,22 @@
#version 460
layout (location = 0) out vec2 dstPosition;
layout (push_constant) uniform constants {
vec2 dstOriginClipSpace;
vec2 dstDimensionsClipSpace;
} PC;
void main() {
const vec2 lut[6] = vec2[6](
vec2(1, 0),
vec2(1, 1),
vec2(0, 1),
vec2(0, 1),
vec2(0, 0),
vec2(1, 0)
);
dstPosition = lut[gl_VertexIndex];
gl_Position = vec4(PC.dstOriginClipSpace + PC.dstDimensionsClipSpace * lut[gl_VertexIndex], 0, 1);
}