Move graphics piplines away from storing hades shader info struct

By only using what we need, and mirroring the descriptor structs to allow for much tighter packing (while keeping the same member names) we can reduce pipeline memory to about 1/3 of what it was before.
This commit is contained in:
Billy Laws 2023-01-18 20:04:50 +00:00
parent dd92cb1536
commit 9d3a9f63d5
5 changed files with 259 additions and 102 deletions

View File

@ -186,7 +186,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp
${source_DIR}/skyline/gpu/shader_manager.cpp ${source_DIR}/skyline/gpu/shader_manager.cpp
${source_DIR}/skyline/gpu/pipeline_cache_manager.cpp ${source_DIR}/skyline/gpu/pipeline_cache_manager.cpp
${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp ${source_DIR}/skyline/gpu/graphics_pipeline_assembler.cpp
${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp ${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp
${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp ${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp
${source_DIR}/skyline/gpu/interconnect/fermi_2d.cpp ${source_DIR}/skyline/gpu/interconnect/fermi_2d.cpp

View File

@ -17,13 +17,13 @@ namespace skyline::gpu::interconnect {
}; };
}; };
static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, const Shader::Info &info, BufferView view, size_t idx) { static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, span<const u32, Shader::Info::MAX_CBUFS> cbufSizes, BufferView view, size_t idx) {
if (!view) // Return a dummy buffer if the constant buffer isn't bound if (!view) // Return a dummy buffer if the constant buffer isn't bound
return BufferBinding{ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, PAGE_SIZE).buffer, 0, PAGE_SIZE}; return BufferBinding{ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, PAGE_SIZE).buffer, 0, PAGE_SIZE};
ctx.executor.AttachBuffer(view); ctx.executor.AttachBuffer(view);
size_t sizeOverride{std::min<size_t>(info.constant_buffer_used_sizes[idx], view.size)}; size_t sizeOverride{std::min<size_t>(cbufSizes[idx], view.size)};
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag, sizeOverride)}) { if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag, sizeOverride)}) {
return megaBufferBinding; return megaBufferBinding;
} else { } else {
@ -32,7 +32,7 @@ namespace skyline::gpu::interconnect {
} }
} }
static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const Shader::StorageBufferDescriptor &desc, ConstantBuffer &cbuf, CachedMappedBufferView &cachedView) { static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const auto &desc, ConstantBuffer &cbuf, CachedMappedBufferView &cachedView) {
struct SsboDescriptor { struct SsboDescriptor {
u64 address; u64 address;
u32 size; u32 size;
@ -59,8 +59,7 @@ namespace skyline::gpu::interconnect {
return view; return view;
} }
template<typename CbufSetT> static BindlessHandle ReadBindlessHandle(InterconnectContext &ctx, auto &constantBuffers, const auto &desc, size_t arrayIdx) {
static BindlessHandle ReadBindlessHandle(InterconnectContext &ctx, CbufSetT &constantBuffers, const auto &desc, size_t arrayIdx) {
ConstantBuffer &primaryCbuf{constantBuffers[desc.cbuf_index]}; ConstantBuffer &primaryCbuf{constantBuffers[desc.cbuf_index]};
size_t elemOffset{arrayIdx << desc.size_shift}; size_t elemOffset{arrayIdx << desc.size_shift};
size_t primaryCbufOffset{desc.cbuf_offset + elemOffset}; size_t primaryCbufOffset{desc.cbuf_offset + elemOffset};
@ -78,7 +77,7 @@ namespace skyline::gpu::interconnect {
return {.raw = primaryVal}; return {.raw = primaryVal};
} }
static std::pair<vk::DescriptorImageInfo, TextureView *> GetTextureBinding(InterconnectContext &ctx, const Shader::TextureDescriptor &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) { static std::pair<vk::DescriptorImageInfo, TextureView *> GetTextureBinding(InterconnectContext &ctx, const auto &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) {
auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)}; auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)};
auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)}; auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)};
ctx.executor.AttachTexture(texture); ctx.executor.AttachTexture(texture);

View File

@ -175,7 +175,7 @@ namespace skyline::gpu::interconnect::kepler_compute {
writeBufferDescs(vk::DescriptorType::eUniformBuffer, shaderStage.info.constant_buffer_descriptors, writeBufferDescs(vk::DescriptorType::eUniformBuffer, shaderStage.info.constant_buffer_descriptors,
[&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) { [&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) {
size_t cbufIdx{desc.index + arrayIdx}; size_t cbufIdx{desc.index + arrayIdx};
return GetConstantBufferBinding(ctx, shaderStage.info, constantBuffers[cbufIdx].view, cbufIdx); return GetConstantBufferBinding(ctx, shaderStage.info.constant_buffer_used_sizes, constantBuffers[cbufIdx].view, cbufIdx);
}); });
writeBufferDescs(vk::DescriptorType::eStorageBuffer, shaderStage.info.storage_buffers_descriptors, writeBufferDescs(vk::DescriptorType::eStorageBuffer, shaderStage.info.storage_buffers_descriptors,

View File

@ -9,10 +9,18 @@
#include <gpu/graphics_pipeline_assembler.h> #include <gpu/graphics_pipeline_assembler.h>
#include <gpu/shader_manager.h> #include <gpu/shader_manager.h>
#include <gpu.h> #include <gpu.h>
#include <vulkan/vulkan_enums.hpp>
#include "graphics_pipeline_state_accessor.h" #include "graphics_pipeline_state_accessor.h"
#include "pipeline_manager.h" #include "pipeline_manager.h"
#include "soc/gm20b/engines/maxwell/types.h"
namespace skyline::gpu::interconnect::maxwell3d { namespace skyline::gpu::interconnect::maxwell3d {
struct ShaderStage {
vk::ShaderStageFlagBits stage;
vk::ShaderModule module;
Shader::Info info;
};
static constexpr Shader::Stage ConvertCompilerShaderStage(engine::Pipeline::Shader::Type stage) { static constexpr Shader::Stage ConvertCompilerShaderStage(engine::Pipeline::Shader::Type stage) {
switch (stage) { switch (stage) {
case engine::Pipeline::Shader::Type::VertexCullBeforeFetch: case engine::Pipeline::Shader::Type::VertexCullBeforeFetch:
@ -186,7 +194,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
return info; return info;
} }
static std::array<Pipeline::ShaderStage, engine::ShaderStageCount> MakePipelineShaders(GPU &gpu, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState) { static std::array<ShaderStage, engine::ShaderStageCount> MakePipelineShaders(GPU &gpu, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState) {
gpu.shader->ResetPools(); gpu.shader->ResetPools();
using PipelineStage = engine::Pipeline::Shader::Type; using PipelineStage = engine::Pipeline::Shader::Type;
@ -233,7 +241,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
Shader::Backend::Bindings bindings{}; Shader::Backend::Bindings bindings{};
Shader::IR::Program *lastProgram{}; Shader::IR::Program *lastProgram{};
std::array<Pipeline::ShaderStage, engine::ShaderStageCount> shaderStages{}; std::array<ShaderStage, engine::ShaderStageCount> shaderStages{};
for (u32 i{stageIdx(ignoreVertexCullBeforeFetch ? PipelineStage::Vertex : PipelineStage::VertexCullBeforeFetch)}; i < engine::PipelineCount; i++) { for (u32 i{stageIdx(ignoreVertexCullBeforeFetch ? PipelineStage::Vertex : PipelineStage::VertexCullBeforeFetch)}; i < engine::PipelineCount; i++) {
if (!packedState.shaderHashes[i] && !(i == stageIdx(PipelineStage::Geometry) && layerConversionSourceProgram)) if (!packedState.shaderHashes[i] && !(i == stageIdx(PipelineStage::Geometry) && layerConversionSourceProgram))
@ -250,9 +258,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
return shaderStages; return shaderStages;
} }
static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const std::array<Pipeline::ShaderStage, engine::ShaderStageCount> &shaderStages, bool needsIndividualTextureBindingWrites) { static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const std::array<ShaderStage, engine::ShaderStageCount> &shaderStages, bool needsIndividualTextureBindingWrites) {
Pipeline::DescriptorInfo descriptorInfo{}; Pipeline::DescriptorInfo descriptorInfo{};
u32 bindingIndex{}; u16 bindingIndex{};
for (size_t i{}; i < engine::ShaderStageCount; i++) { for (size_t i{}; i < engine::ShaderStageCount; i++) {
const auto &stage{shaderStages[i]}; const auto &stage{shaderStages[i]};
@ -261,11 +269,12 @@ namespace skyline::gpu::interconnect::maxwell3d {
auto &stageDescInfo{descriptorInfo.stages[i]}; auto &stageDescInfo{descriptorInfo.stages[i]};
auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u32 &count, auto &&descCb, bool individualDescWrites = false) { auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u16 &count, auto &outputDescs, auto &&descCb, bool individualDescWrites = false) {
descriptorInfo.totalWriteDescCount += individualDescWrites ? descs.size() : ((descs.size() > 0) ? 1 : 0); descriptorInfo.totalWriteDescCount += individualDescWrites ? descs.size() : ((descs.size() > 0) ? 1 : 0);
for (u32 descIdx{}; descIdx < descs.size(); descIdx++) { for (u16 descIdx{}; descIdx < descs.size(); descIdx++) {
const auto &desc{descs[descIdx]}; const auto &desc{descs[descIdx]};
outputDescs.emplace_back(desc);
count += desc.count; count += desc.count;
descCb(desc, descIdx); descCb(desc, descIdx);
@ -287,32 +296,42 @@ namespace skyline::gpu::interconnect::maxwell3d {
} }
}}; }};
pushBindings(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, stageDescInfo.uniformBufferDescCount, [&](const Shader::ConstantBufferDescriptor &desc, u32 descIdx) { pushBindings(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors,
for (u32 cbufIdx{desc.index}; cbufIdx < desc.index + desc.count; cbufIdx++) { stageDescInfo.uniformBufferDescTotalCount, stageDescInfo.uniformBufferDescs,
[&](const Shader::ConstantBufferDescriptor &desc, u16 descIdx) {
for (u16 cbufIdx{static_cast<u16>(desc.index)}; cbufIdx < desc.index + desc.count; cbufIdx++) {
auto &usage{stageDescInfo.cbufUsages[cbufIdx]}; auto &usage{stageDescInfo.cbufUsages[cbufIdx]};
usage.uniformBuffers.push_back({bindingIndex, descIdx}); usage.uniformBuffers.push_back({bindingIndex, descIdx});
usage.totalBufferDescCount += desc.count; usage.totalBufferDescCount += desc.count;
usage.writeDescCount++; usage.writeDescCount++;
} }
}); });
pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount, [&](const Shader::StorageBufferDescriptor &desc, u32 descIdx) { pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors,
stageDescInfo.storageBufferDescTotalCount, stageDescInfo.storageBufferDescs,
[&](const Shader::StorageBufferDescriptor &desc, u16 descIdx) {
auto &usage{stageDescInfo.cbufUsages[desc.cbuf_index]}; auto &usage{stageDescInfo.cbufUsages[desc.cbuf_index]};
usage.storageBuffers.push_back({bindingIndex, descIdx, descriptorInfo.totalStorageBufferCount}); usage.storageBuffers.push_back({bindingIndex, descIdx, descriptorInfo.totalStorageBufferCount});
usage.totalBufferDescCount += desc.count; usage.totalBufferDescCount += desc.count;
usage.writeDescCount++; usage.writeDescCount++;
descriptorInfo.totalStorageBufferCount += desc.count; descriptorInfo.totalStorageBufferCount += desc.count;
}); });
descriptorInfo.totalBufferDescCount += stageDescInfo.uniformBufferDescCount + stageDescInfo.storageBufferDescCount; descriptorInfo.totalBufferDescCount += stageDescInfo.uniformBufferDescTotalCount + stageDescInfo.storageBufferDescTotalCount;
pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, stageDescInfo.uniformTexelBufferDescCount, [](const auto &, u32) { pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors,
stageDescInfo.uniformTexelBufferDescTotalCount, stageDescInfo.uniformTexelBufferDescs,
[](const auto &, u32) {
Logger::Warn("Texture buffer descriptors are not supported"); Logger::Warn("Texture buffer descriptors are not supported");
}); });
pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, stageDescInfo.storageTexelBufferDescCount, [](const auto &, u32) { pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors,
stageDescInfo.storageTexelBufferDescTotalCount, stageDescInfo.storageTexelBufferDescs,
[](const auto &, u32) {
Logger::Warn("Image buffer descriptors are not supported"); Logger::Warn("Image buffer descriptors are not supported");
}); });
descriptorInfo.totalTexelBufferDescCount += stageDescInfo.uniformTexelBufferDescCount + stageDescInfo.storageTexelBufferDescCount; descriptorInfo.totalTexelBufferDescCount += stageDescInfo.uniformTexelBufferDescTotalCount + stageDescInfo.storageTexelBufferDescTotalCount;
pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount, [&](const Shader::TextureDescriptor &desc, u32 descIdx) { pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors,
stageDescInfo.combinedImageSamplerDescTotalCount, stageDescInfo.combinedImageSamplerDescs,
[&](const Shader::TextureDescriptor &desc, u16 descIdx) {
auto addUsage{[&](auto idx) { auto addUsage{[&](auto idx) {
auto &usage{stageDescInfo.cbufUsages[idx]}; auto &usage{stageDescInfo.cbufUsages[idx]};
usage.combinedImageSamplers.push_back({bindingIndex, descIdx, descriptorInfo.totalCombinedImageSamplerCount}); usage.combinedImageSamplers.push_back({bindingIndex, descIdx, descriptorInfo.totalCombinedImageSamplerCount});
@ -326,10 +345,12 @@ namespace skyline::gpu::interconnect::maxwell3d {
descriptorInfo.totalCombinedImageSamplerCount += desc.count; descriptorInfo.totalCombinedImageSamplerCount += desc.count;
}, needsIndividualTextureBindingWrites); }, needsIndividualTextureBindingWrites);
pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, stageDescInfo.storageImageDescCount, [](const auto &, u32) { pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors,
stageDescInfo.storageImageDescTotalCount, stageDescInfo.storageImageDescs,
[](const auto &, u16) {
Logger::Warn("Image descriptors are not supported"); Logger::Warn("Image descriptors are not supported");
}); });
descriptorInfo.totalImageDescCount += stageDescInfo.combinedImageSamplerDescCount + stageDescInfo.storageImageDescCount; descriptorInfo.totalImageDescCount += stageDescInfo.combinedImageSamplerDescTotalCount + stageDescInfo.storageImageDescTotalCount;
} }
return descriptorInfo; return descriptorInfo;
} }
@ -623,10 +644,15 @@ namespace skyline::gpu::interconnect::maxwell3d {
} }
Pipeline::Pipeline(GPU &gpu, PipelineStateAccessor &accessor, const PackedPipelineState &packedState) Pipeline::Pipeline(GPU &gpu, PipelineStateAccessor &accessor, const PackedPipelineState &packedState)
: sourcePackedState{packedState}, : sourcePackedState{packedState} {
shaderStages{MakePipelineShaders(gpu, accessor, sourcePackedState)}, auto shaderStages{MakePipelineShaders(gpu, accessor, sourcePackedState)};
descriptorInfo{MakePipelineDescriptorInfo(shaderStages, gpu.traits.quirks.needsIndividualTextureBindingWrites)}, descriptorInfo = MakePipelineDescriptorInfo(shaderStages, gpu.traits.quirks.needsIndividualTextureBindingWrites);
compiledPipeline{MakeCompiledPipeline(gpu, sourcePackedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings)} { compiledPipeline = MakeCompiledPipeline(gpu, sourcePackedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings);
for (u32 i{}; i < engine::ShaderStageCount; i++)
if (shaderStages[i].stage != vk::ShaderStageFlagBits{})
stageMask |= 1 << i;
storageBufferViews.resize(descriptorInfo.totalStorageBufferCount); storageBufferViews.resize(descriptorInfo.totalStorageBufferCount);
accessor.MarkComplete(); accessor.MarkComplete();
} }
@ -665,11 +691,14 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (auto it{bindingMatchCache.find(other)}; it != bindingMatchCache.end()) if (auto it{bindingMatchCache.find(other)}; it != bindingMatchCache.end())
return it->second; return it->second;
for (size_t i{}; i < shaderStages.size(); i++) { if (stageMask != other->stageMask) {
if (!shaderStages[i].BindingsEqual(other->shaderStages[i])) { bindingMatchCache[other] = false;
bindingMatchCache[other] = false; return false;
return false; }
}
if (descriptorInfo != other->descriptorInfo) {
bindingMatchCache[other] = false;
return false;
} }
bindingMatchCache[other] = true; bindingMatchCache[other] = true;
@ -747,36 +776,35 @@ namespace skyline::gpu::interconnect::maxwell3d {
} }
}}; }};
for (size_t i{}; i < shaderStages.size(); i++) { for (size_t i{}; i < engine::ShaderStageCount; i++) {
const auto &stage{shaderStages[i]}; if (!(stageMask & (1 << i)))
if (!stage.module)
continue; continue;
const auto &stageDescInfo{descriptorInfo.stages[i]}; const auto &stage{descriptorInfo.stages[i]};
writeBufferDescs(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, stageDescInfo.uniformBufferDescCount, writeBufferDescs(vk::DescriptorType::eUniformBuffer, stage.uniformBufferDescs, stage.uniformBufferDescTotalCount,
[&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) { [&](const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) {
size_t cbufIdx{desc.index + arrayIdx}; size_t cbufIdx{desc.index + arrayIdx};
return GetConstantBufferBinding(ctx, stage.info, constantBuffers[i][cbufIdx].view, cbufIdx); return GetConstantBufferBinding(ctx, {stage.constantBufferUsedSizes}, constantBuffers[i][cbufIdx].view, cbufIdx);
}); });
writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount, writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.storageBufferDescs, stage.storageBufferDescTotalCount,
[&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) { [&](const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) {
return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx++]); return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx++]);
}); });
bindingIdx += stageDescInfo.uniformTexelBufferDescCount; bindingIdx += stage.uniformTexelBufferDescs.size();
bindingIdx += stageDescInfo.storageTexelBufferDescCount; bindingIdx += stage.storageTexelBufferDescs.size();
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount, writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.combinedImageSamplerDescs, stage.combinedImageSamplerDescTotalCount,
[&](const Shader::TextureDescriptor &desc, size_t arrayIdx) { [&](const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) {
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)}; BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)};
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)}; auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
sampledImages[combinedImageSamplerIdx++] = binding.second; sampledImages[combinedImageSamplerIdx++] = binding.second;
return binding.first; return binding.first;
}, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites); }, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites);
bindingIdx += stageDescInfo.storageImageDescCount; bindingIdx += stage.storageImageDescs.size();
} }
// Since we don't implement all descriptor types the number of writes might not match what's expected // Since we don't implement all descriptor types the number of writes might not match what's expected
@ -787,8 +815,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
.writes = writes.first(writeIdx), .writes = writes.first(writeIdx),
.bufferDescs = bufferDescs.first(bufferIdx), .bufferDescs = bufferDescs.first(bufferIdx),
.bufferDescDynamicBindings = bufferDescDynamicBindings.first(bufferIdx), .bufferDescDynamicBindings = bufferDescDynamicBindings.first(bufferIdx),
.pipelineLayout = compiledPipeline.pipelineLayout, .pipelineLayout = *compiledPipeline.pipelineLayout,
.descriptorSetLayout = compiledPipeline.descriptorSetLayout, .descriptorSetLayout = *compiledPipeline.descriptorSetLayout,
.bindPoint = vk::PipelineBindPoint::eGraphics, .bindPoint = vk::PipelineBindPoint::eGraphics,
.descriptorSetIndex = 0, .descriptorSetIndex = 0,
}); });
@ -803,7 +831,6 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (!cbufUsageInfo.writeDescCount) if (!cbufUsageInfo.writeDescCount)
return nullptr; return nullptr;
const auto &shaderInfo{shaderStages[stageIndex].info};
auto &stageConstantBuffers{constantBuffers[stageIndex]}; auto &stageConstantBuffers{constantBuffers[stageIndex]};
u32 writeIdx{}; u32 writeIdx{};
@ -846,19 +873,19 @@ namespace skyline::gpu::interconnect::maxwell3d {
} }
}}; }};
writeDescs.operator()<false, true>(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, shaderInfo.constant_buffer_descriptors, writeDescs.operator()<false, true>(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, stageDescInfo.uniformBufferDescs,
[&](auto usage, const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) -> DynamicBufferBinding { [&](auto usage, const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) -> DynamicBufferBinding {
size_t cbufIdx{desc.index + arrayIdx}; size_t cbufIdx{desc.index + arrayIdx};
return GetConstantBufferBinding(ctx, shaderInfo, stageConstantBuffers[cbufIdx].view, cbufIdx); return GetConstantBufferBinding(ctx, {stageDescInfo.constantBufferUsedSizes}, stageConstantBuffers[cbufIdx].view, cbufIdx);
}); });
writeDescs.operator()<false, true>(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, shaderInfo.storage_buffers_descriptors, writeDescs.operator()<false, true>(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, stageDescInfo.storageBufferDescs,
[&](auto usage, const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) { [&](auto usage, const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) {
return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.entirePipelineIdx + arrayIdx]); return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.entirePipelineIdx + arrayIdx]);
}); });
writeDescs.operator()<true, false>(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, shaderInfo.texture_descriptors, writeDescs.operator()<true, false>(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, stageDescInfo.combinedImageSamplerDescs,
[&](auto usage, const Shader::TextureDescriptor &desc, size_t arrayIdx) { [&](auto usage, const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) {
BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)}; BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)};
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)}; auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
sampledImages[usage.entirePipelineIdx + arrayIdx] = binding.second; sampledImages[usage.entirePipelineIdx + arrayIdx] = binding.second;
@ -874,8 +901,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
.writes = writes.first(writeIdx), .writes = writes.first(writeIdx),
.bufferDescs = bufferDescs.first(bufferIdx), .bufferDescs = bufferDescs.first(bufferIdx),
.bufferDescDynamicBindings = bufferDescDynamicBindings.first(bufferIdx), .bufferDescDynamicBindings = bufferDescDynamicBindings.first(bufferIdx),
.pipelineLayout = compiledPipeline.pipelineLayout, .pipelineLayout = *compiledPipeline.pipelineLayout,
.descriptorSetLayout = compiledPipeline.descriptorSetLayout, .descriptorSetLayout = *compiledPipeline.descriptorSetLayout,
.bindPoint = vk::PipelineBindPoint::eGraphics, .bindPoint = vk::PipelineBindPoint::eGraphics,
.descriptorSetIndex = 0, .descriptorSetIndex = 0,
}); });

View File

@ -20,66 +20,191 @@ namespace skyline::gpu {
namespace skyline::gpu::interconnect::maxwell3d { namespace skyline::gpu::interconnect::maxwell3d {
class Pipeline { class Pipeline {
public: public:
struct ShaderStage { /**
vk::ShaderStageFlagBits stage; * @brief A monolithic struct containing all the descriptor state of the pipeline
vk::ShaderModule module; */
Shader::Info info;
/**
* @return Whether the bindings for this stage match those of the input stage
*/
bool BindingsEqual(const ShaderStage &other) const {
return info.constant_buffer_descriptors == other.info.constant_buffer_descriptors &&
info.storage_buffers_descriptors == other.info.storage_buffers_descriptors &&
info.texture_buffer_descriptors == other.info.texture_buffer_descriptors &&
info.image_buffer_descriptors == other.info.image_buffer_descriptors &&
info.texture_descriptors == other.info.texture_descriptors &&
info.image_descriptors == other.info.image_descriptors;
}
};
struct DescriptorInfo { struct DescriptorInfo {
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBindings; std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBindings;
struct StageDescriptorInfo { struct StageDescriptorInfo {
u32 uniformBufferDescCount; // Unwrapped counts (counting each array element as a separate descriptor) for the below desc structs
u32 storageBufferDescCount; u16 uniformBufferDescTotalCount;
u32 uniformTexelBufferDescCount; u16 storageBufferDescTotalCount;
u32 storageTexelBufferDescCount; u16 uniformTexelBufferDescTotalCount;
u32 combinedImageSamplerDescCount; u16 storageTexelBufferDescTotalCount;
u32 storageImageDescCount; u16 combinedImageSamplerDescTotalCount;
u16 storageImageDescTotalCount;
// Below are descriptor structs designed to be compatible with hades (hence the use of snake_case) but in a more compacted format to reduce memory usage
struct UniformBufferDesc {
u8 index;
u8 count;
UniformBufferDesc(const Shader::ConstantBufferDescriptor &desc)
: index{static_cast<u8>(desc.index)},
count{static_cast<u8>(desc.count)} {}
auto operator<=>(const UniformBufferDesc &) const = default;
};
boost::container::static_vector<UniformBufferDesc, engine::ShaderStageConstantBufferCount> uniformBufferDescs;
struct StorageBufferDesc {
u32 cbuf_offset;
u8 cbuf_index;
bool is_written;
u8 count;
StorageBufferDesc(const Shader::StorageBufferDescriptor &desc)
: cbuf_offset{desc.cbuf_offset},
cbuf_index{static_cast<u8>(desc.cbuf_index)},
is_written{desc.is_written},
count{static_cast<u8>(desc.count)} {}
auto operator<=>(const StorageBufferDesc &) const = default;
};
boost::container::small_vector<StorageBufferDesc, 8> storageBufferDescs;
struct UniformTexelBufferDesc {
u32 cbuf_offset;
u32 secondary_cbuf_offset;
bool has_secondary;
u8 cbuf_index;
u8 shift_left;
u8 secondary_cbuf_index;
u8 secondary_shift_left;
u8 count;
u8 size_shift;
UniformTexelBufferDesc(const Shader::TextureBufferDescriptor &desc)
: cbuf_offset{desc.cbuf_offset},
secondary_cbuf_offset{desc.secondary_cbuf_offset},
has_secondary{desc.has_secondary},
cbuf_index{static_cast<u8>(desc.cbuf_index)},
shift_left{static_cast<u8>(desc.shift_left)},
secondary_cbuf_index{static_cast<u8>(desc.secondary_cbuf_index)},
secondary_shift_left{static_cast<u8>(desc.secondary_shift_left)},
count{static_cast<u8>(desc.count)},
size_shift{static_cast<u8>(desc.size_shift)} {}
auto operator<=>(const UniformTexelBufferDesc &) const = default;
};
std::vector<UniformTexelBufferDesc> uniformTexelBufferDescs;
struct StorageTexelBufferDesc {
Shader::ImageFormat format;
u32 cbuf_offset;
bool is_read;
bool is_written;
u8 cbuf_index;
u8 count;
u8 size_shift;
StorageTexelBufferDesc(const Shader::ImageBufferDescriptor &desc)
: format{desc.format},
cbuf_offset{desc.cbuf_offset},
is_read{desc.is_read},
is_written{desc.is_written},
cbuf_index{static_cast<u8>(desc.cbuf_index)},
count{static_cast<u8>(desc.count)},
size_shift{static_cast<u8>(desc.size_shift)} {}
auto operator<=>(const StorageTexelBufferDesc &) const = default;
};
std::vector<StorageTexelBufferDesc> storageTexelBufferDescs;
struct CombinedImageSamplerDesc {
Shader::TextureType type;
u32 cbuf_offset;
u32 secondary_cbuf_offset;
bool has_secondary;
u8 cbuf_index;
u8 shift_left;
u8 secondary_cbuf_index;
u8 secondary_shift_left;
u8 count;
u8 size_shift;
CombinedImageSamplerDesc(const Shader::TextureDescriptor &desc)
: type{desc.type},
cbuf_offset{desc.cbuf_offset},
secondary_cbuf_offset{desc.secondary_cbuf_offset},
has_secondary{desc.has_secondary},
cbuf_index{static_cast<u8>(desc.cbuf_index)},
shift_left{static_cast<u8>(desc.shift_left)},
secondary_cbuf_index{static_cast<u8>(desc.secondary_cbuf_index)},
secondary_shift_left{static_cast<u8>(desc.secondary_shift_left)},
count{static_cast<u8>(desc.count)},
size_shift{static_cast<u8>(desc.size_shift)} {}
auto operator<=>(const CombinedImageSamplerDesc &) const = default;
};
boost::container::small_vector<CombinedImageSamplerDesc, 10> combinedImageSamplerDescs;
struct StorageImageDesc {
Shader::TextureType type;
Shader::ImageFormat format;
u32 cbuf_offset;
bool isRead;
bool is_written;
u8 cbuf_index;
u8 count;
u8 size_shift;
StorageImageDesc(const Shader::ImageDescriptor &desc)
: type{desc.type},
format{desc.format},
cbuf_offset{desc.cbuf_offset},
isRead{desc.is_read},
is_written{desc.is_written},
cbuf_index{static_cast<u8>(desc.cbuf_index)},
count{static_cast<u8>(desc.count)},
size_shift{static_cast<u8>(desc.size_shift)} {}
auto operator<=>(const StorageImageDesc &) const = default;
};
boost::container::small_vector<StorageImageDesc, 1> storageImageDescs;
std::array<u32, Shader::Info::MAX_CBUFS> constantBufferUsedSizes;
/** /**
* @brief Keeps track of all bindings that are dependent on a given constant buffer index to allow for quick binding * @brief Keeps track of all bindings that are dependent on a given constant buffer index to allow for quick binding
*/ */
struct ConstantBufferDescriptorUsages { struct ConstantBufferDescriptorUsages {
struct Usage { struct Usage {
u32 binding; //!< Vulkan binding index u16 binding; //!< Vulkan binding index
u32 shaderDescIdx; //!< Index of the descriptor in the appropriate shader info member u16 shaderDescIdx; //!< Index of the descriptor in the appropriate shader info member
u32 entirePipelineIdx; //!< Index of the image/storage buffer in the entire pipeline u16 entirePipelineIdx; //!< Index of the image/storage buffer in the entire pipeline
bool operator==(const Usage&) const = default;
}; };
boost::container::small_vector<Usage, 2> uniformBuffers; boost::container::small_vector<Usage, 2> uniformBuffers;
boost::container::small_vector<Usage, 2> storageBuffers; boost::container::small_vector<Usage, 2> storageBuffers;
boost::container::small_vector<Usage, 2> combinedImageSamplers; boost::container::small_vector<Usage, 2> combinedImageSamplers;
u32 totalBufferDescCount; u16 totalBufferDescCount;
u32 totalImageDescCount; u16 totalImageDescCount;
u32 writeDescCount; u16 writeDescCount;
bool operator==(const ConstantBufferDescriptorUsages&) const = default;
}; };
std::array<ConstantBufferDescriptorUsages, engine::ShaderStageConstantBufferCount> cbufUsages; std::array<ConstantBufferDescriptorUsages, engine::ShaderStageConstantBufferCount> cbufUsages;
bool operator==(const StageDescriptorInfo&) const = default;
}; };
std::vector<vk::CopyDescriptorSet> copyDescs; std::vector<vk::CopyDescriptorSet> copyDescs; //!< Copy descriptors for all descs in the pipeline to allow for quick binding
std::array<StageDescriptorInfo, 5> stages; std::array<StageDescriptorInfo, engine::ShaderStageCount> stages;
u32 totalStorageBufferCount; u16 totalStorageBufferCount;
u32 totalCombinedImageSamplerCount; u16 totalCombinedImageSamplerCount;
u32 totalWriteDescCount; u16 totalWriteDescCount;
u32 totalBufferDescCount; u16 totalBufferDescCount;
u32 totalTexelBufferDescCount; u16 totalTexelBufferDescCount;
u32 totalImageDescCount; u16 totalImageDescCount;
bool operator==(const DescriptorInfo &) const = default;
}; };
PackedPipelineState sourcePackedState; PackedPipelineState sourcePackedState;
@ -87,24 +212,30 @@ namespace skyline::gpu::interconnect::maxwell3d {
private: private:
std::vector<CachedMappedBufferView> storageBufferViews; std::vector<CachedMappedBufferView> storageBufferViews;
ContextTag lastExecutionTag{}; //!< The last execution tag this pipeline was used at ContextTag lastExecutionTag{}; //!< The last execution tag this pipeline was used at
std::array<ShaderStage, engine::ShaderStageCount> shaderStages; DescriptorInfo descriptorInfo; //!< Info about all descriptors used in each stage of the pipeline
DescriptorInfo descriptorInfo; u8 transitionCacheNextIdx{}; //!< The next index to insert into the transition cache
u8 stageMask{}; //!< Bitmask of active shader stages
u16 sampledImageCount{};
std::array<Pipeline *, 6> transitionCache{}; std::array<Pipeline *, 6> transitionCache{};
size_t transitionCacheNextIdx{};
tsl::robin_map<Pipeline *, bool> bindingMatchCache; //!< Cache of which pipelines have bindings that match this pipeline tsl::robin_map<Pipeline *, bool> bindingMatchCache; //!< Cache of which pipelines have bindings that match this pipeline
void SyncCachedStorageBufferViews(ContextTag executionTag); void SyncCachedStorageBufferViews(ContextTag executionTag);
public: public:
cache::GraphicsPipelineCache::CompiledPipeline compiledPipeline; GraphicsPipelineAssembler::CompiledPipeline compiledPipeline;
size_t sampledImageCount{};
Pipeline(GPU &gpu, PipelineStateAccessor &accessor, const PackedPipelineState &packedState); Pipeline(GPU &gpu, PipelineStateAccessor &accessor, const PackedPipelineState &packedState);
/**
* @brief Returns the pipeline in the transition cache (if present) that matches the given state
*/
Pipeline *LookupNext(const PackedPipelineState &packedState); Pipeline *LookupNext(const PackedPipelineState &packedState);
/**
* @brief Record a transition from this pipeline to the next pipeline in the transition cache
*/
void AddTransition(Pipeline *next); void AddTransition(Pipeline *next);
bool CheckBindingMatch(Pipeline *other); bool CheckBindingMatch(Pipeline *other);