From 9d3a9f63d5d554a3232b029f735d84f4ae0765cb Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Wed, 18 Jan 2023 20:04:50 +0000 Subject: [PATCH] Move graphics piplines away from storing hades shader info struct By only using what we need, and mirroring the descriptor structs to allow for much tighter packing (while keeping the same member names) we can reduce pipeline memory to about 1/3 of what it was before. --- app/CMakeLists.txt | 2 +- .../gpu/interconnect/common/pipeline.inc | 11 +- .../kepler_compute/pipeline_manager.cpp | 2 +- .../maxwell_3d/pipeline_manager.cpp | 129 +++++++---- .../maxwell_3d/pipeline_manager.h | 217 ++++++++++++++---- 5 files changed, 259 insertions(+), 102 deletions(-) diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 6de7d2bd..ee279005 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -186,7 +186,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/shader_manager.cpp ${source_DIR}/skyline/gpu/pipeline_cache_manager.cpp - ${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp + ${source_DIR}/skyline/gpu/graphics_pipeline_assembler.cpp ${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp ${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp ${source_DIR}/skyline/gpu/interconnect/fermi_2d.cpp diff --git a/app/src/main/cpp/skyline/gpu/interconnect/common/pipeline.inc b/app/src/main/cpp/skyline/gpu/interconnect/common/pipeline.inc index 4e94c3f1..5db3d3f8 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/common/pipeline.inc +++ b/app/src/main/cpp/skyline/gpu/interconnect/common/pipeline.inc @@ -17,13 +17,13 @@ namespace skyline::gpu::interconnect { }; }; - static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, const Shader::Info &info, BufferView view, size_t idx) { + static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, span cbufSizes, BufferView view, size_t idx) { if (!view) // Return a dummy buffer if the constant buffer isn't bound return BufferBinding{ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, PAGE_SIZE).buffer, 0, PAGE_SIZE}; ctx.executor.AttachBuffer(view); - size_t sizeOverride{std::min(info.constant_buffer_used_sizes[idx], view.size)}; + size_t sizeOverride{std::min(cbufSizes[idx], view.size)}; if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag, sizeOverride)}) { return megaBufferBinding; } else { @@ -32,7 +32,7 @@ namespace skyline::gpu::interconnect { } } - static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const Shader::StorageBufferDescriptor &desc, ConstantBuffer &cbuf, CachedMappedBufferView &cachedView) { + static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const auto &desc, ConstantBuffer &cbuf, CachedMappedBufferView &cachedView) { struct SsboDescriptor { u64 address; u32 size; @@ -59,8 +59,7 @@ namespace skyline::gpu::interconnect { return view; } - template - static BindlessHandle ReadBindlessHandle(InterconnectContext &ctx, CbufSetT &constantBuffers, const auto &desc, size_t arrayIdx) { + static BindlessHandle ReadBindlessHandle(InterconnectContext &ctx, auto &constantBuffers, const auto &desc, size_t arrayIdx) { ConstantBuffer &primaryCbuf{constantBuffers[desc.cbuf_index]}; size_t elemOffset{arrayIdx << desc.size_shift}; size_t primaryCbufOffset{desc.cbuf_offset + elemOffset}; @@ -78,7 +77,7 @@ namespace skyline::gpu::interconnect { return {.raw = primaryVal}; } - static std::pair GetTextureBinding(InterconnectContext &ctx, const Shader::TextureDescriptor &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) { + static std::pair GetTextureBinding(InterconnectContext &ctx, const auto &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) { auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)}; auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)}; ctx.executor.AttachTexture(texture); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp b/app/src/main/cpp/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp index 899132cd..b7d9a0d4 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp @@ -175,7 +175,7 @@ namespace skyline::gpu::interconnect::kepler_compute { writeBufferDescs(vk::DescriptorType::eUniformBuffer, shaderStage.info.constant_buffer_descriptors, [&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) { size_t cbufIdx{desc.index + arrayIdx}; - return GetConstantBufferBinding(ctx, shaderStage.info, constantBuffers[cbufIdx].view, cbufIdx); + return GetConstantBufferBinding(ctx, shaderStage.info.constant_buffer_used_sizes, constantBuffers[cbufIdx].view, cbufIdx); }); writeBufferDescs(vk::DescriptorType::eStorageBuffer, shaderStage.info.storage_buffers_descriptors, diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp index a09944ed..121d5941 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp @@ -9,10 +9,18 @@ #include #include #include +#include #include "graphics_pipeline_state_accessor.h" #include "pipeline_manager.h" +#include "soc/gm20b/engines/maxwell/types.h" namespace skyline::gpu::interconnect::maxwell3d { + struct ShaderStage { + vk::ShaderStageFlagBits stage; + vk::ShaderModule module; + Shader::Info info; + }; + static constexpr Shader::Stage ConvertCompilerShaderStage(engine::Pipeline::Shader::Type stage) { switch (stage) { case engine::Pipeline::Shader::Type::VertexCullBeforeFetch: @@ -186,7 +194,7 @@ namespace skyline::gpu::interconnect::maxwell3d { return info; } - static std::array MakePipelineShaders(GPU &gpu, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState) { + static std::array MakePipelineShaders(GPU &gpu, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState) { gpu.shader->ResetPools(); using PipelineStage = engine::Pipeline::Shader::Type; @@ -233,7 +241,7 @@ namespace skyline::gpu::interconnect::maxwell3d { Shader::Backend::Bindings bindings{}; Shader::IR::Program *lastProgram{}; - std::array shaderStages{}; + std::array shaderStages{}; for (u32 i{stageIdx(ignoreVertexCullBeforeFetch ? PipelineStage::Vertex : PipelineStage::VertexCullBeforeFetch)}; i < engine::PipelineCount; i++) { if (!packedState.shaderHashes[i] && !(i == stageIdx(PipelineStage::Geometry) && layerConversionSourceProgram)) @@ -250,9 +258,9 @@ namespace skyline::gpu::interconnect::maxwell3d { return shaderStages; } - static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const std::array &shaderStages, bool needsIndividualTextureBindingWrites) { + static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const std::array &shaderStages, bool needsIndividualTextureBindingWrites) { Pipeline::DescriptorInfo descriptorInfo{}; - u32 bindingIndex{}; + u16 bindingIndex{}; for (size_t i{}; i < engine::ShaderStageCount; i++) { const auto &stage{shaderStages[i]}; @@ -261,11 +269,12 @@ namespace skyline::gpu::interconnect::maxwell3d { auto &stageDescInfo{descriptorInfo.stages[i]}; - auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u32 &count, auto &&descCb, bool individualDescWrites = false) { + auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u16 &count, auto &outputDescs, auto &&descCb, bool individualDescWrites = false) { descriptorInfo.totalWriteDescCount += individualDescWrites ? descs.size() : ((descs.size() > 0) ? 1 : 0); - for (u32 descIdx{}; descIdx < descs.size(); descIdx++) { + for (u16 descIdx{}; descIdx < descs.size(); descIdx++) { const auto &desc{descs[descIdx]}; + outputDescs.emplace_back(desc); count += desc.count; descCb(desc, descIdx); @@ -287,32 +296,42 @@ namespace skyline::gpu::interconnect::maxwell3d { } }}; - pushBindings(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, stageDescInfo.uniformBufferDescCount, [&](const Shader::ConstantBufferDescriptor &desc, u32 descIdx) { - for (u32 cbufIdx{desc.index}; cbufIdx < desc.index + desc.count; cbufIdx++) { + pushBindings(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, + stageDescInfo.uniformBufferDescTotalCount, stageDescInfo.uniformBufferDescs, + [&](const Shader::ConstantBufferDescriptor &desc, u16 descIdx) { + for (u16 cbufIdx{static_cast(desc.index)}; cbufIdx < desc.index + desc.count; cbufIdx++) { auto &usage{stageDescInfo.cbufUsages[cbufIdx]}; usage.uniformBuffers.push_back({bindingIndex, descIdx}); usage.totalBufferDescCount += desc.count; usage.writeDescCount++; } }); - pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount, [&](const Shader::StorageBufferDescriptor &desc, u32 descIdx) { + pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, + stageDescInfo.storageBufferDescTotalCount, stageDescInfo.storageBufferDescs, + [&](const Shader::StorageBufferDescriptor &desc, u16 descIdx) { auto &usage{stageDescInfo.cbufUsages[desc.cbuf_index]}; usage.storageBuffers.push_back({bindingIndex, descIdx, descriptorInfo.totalStorageBufferCount}); usage.totalBufferDescCount += desc.count; usage.writeDescCount++; descriptorInfo.totalStorageBufferCount += desc.count; }); - descriptorInfo.totalBufferDescCount += stageDescInfo.uniformBufferDescCount + stageDescInfo.storageBufferDescCount; + descriptorInfo.totalBufferDescCount += stageDescInfo.uniformBufferDescTotalCount + stageDescInfo.storageBufferDescTotalCount; - pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, stageDescInfo.uniformTexelBufferDescCount, [](const auto &, u32) { + pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, + stageDescInfo.uniformTexelBufferDescTotalCount, stageDescInfo.uniformTexelBufferDescs, + [](const auto &, u32) { Logger::Warn("Texture buffer descriptors are not supported"); }); - pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, stageDescInfo.storageTexelBufferDescCount, [](const auto &, u32) { + pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, + stageDescInfo.storageTexelBufferDescTotalCount, stageDescInfo.storageTexelBufferDescs, + [](const auto &, u32) { Logger::Warn("Image buffer descriptors are not supported"); }); - descriptorInfo.totalTexelBufferDescCount += stageDescInfo.uniformTexelBufferDescCount + stageDescInfo.storageTexelBufferDescCount; + descriptorInfo.totalTexelBufferDescCount += stageDescInfo.uniformTexelBufferDescTotalCount + stageDescInfo.storageTexelBufferDescTotalCount; - pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount, [&](const Shader::TextureDescriptor &desc, u32 descIdx) { + pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, + stageDescInfo.combinedImageSamplerDescTotalCount, stageDescInfo.combinedImageSamplerDescs, + [&](const Shader::TextureDescriptor &desc, u16 descIdx) { auto addUsage{[&](auto idx) { auto &usage{stageDescInfo.cbufUsages[idx]}; usage.combinedImageSamplers.push_back({bindingIndex, descIdx, descriptorInfo.totalCombinedImageSamplerCount}); @@ -326,10 +345,12 @@ namespace skyline::gpu::interconnect::maxwell3d { descriptorInfo.totalCombinedImageSamplerCount += desc.count; }, needsIndividualTextureBindingWrites); - pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, stageDescInfo.storageImageDescCount, [](const auto &, u32) { + pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, + stageDescInfo.storageImageDescTotalCount, stageDescInfo.storageImageDescs, + [](const auto &, u16) { Logger::Warn("Image descriptors are not supported"); }); - descriptorInfo.totalImageDescCount += stageDescInfo.combinedImageSamplerDescCount + stageDescInfo.storageImageDescCount; + descriptorInfo.totalImageDescCount += stageDescInfo.combinedImageSamplerDescTotalCount + stageDescInfo.storageImageDescTotalCount; } return descriptorInfo; } @@ -623,10 +644,15 @@ namespace skyline::gpu::interconnect::maxwell3d { } Pipeline::Pipeline(GPU &gpu, PipelineStateAccessor &accessor, const PackedPipelineState &packedState) - : sourcePackedState{packedState}, - shaderStages{MakePipelineShaders(gpu, accessor, sourcePackedState)}, - descriptorInfo{MakePipelineDescriptorInfo(shaderStages, gpu.traits.quirks.needsIndividualTextureBindingWrites)}, - compiledPipeline{MakeCompiledPipeline(gpu, sourcePackedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings)} { + : sourcePackedState{packedState} { + auto shaderStages{MakePipelineShaders(gpu, accessor, sourcePackedState)}; + descriptorInfo = MakePipelineDescriptorInfo(shaderStages, gpu.traits.quirks.needsIndividualTextureBindingWrites); + compiledPipeline = MakeCompiledPipeline(gpu, sourcePackedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings); + + for (u32 i{}; i < engine::ShaderStageCount; i++) + if (shaderStages[i].stage != vk::ShaderStageFlagBits{}) + stageMask |= 1 << i; + storageBufferViews.resize(descriptorInfo.totalStorageBufferCount); accessor.MarkComplete(); } @@ -665,11 +691,14 @@ namespace skyline::gpu::interconnect::maxwell3d { if (auto it{bindingMatchCache.find(other)}; it != bindingMatchCache.end()) return it->second; - for (size_t i{}; i < shaderStages.size(); i++) { - if (!shaderStages[i].BindingsEqual(other->shaderStages[i])) { - bindingMatchCache[other] = false; - return false; - } + if (stageMask != other->stageMask) { + bindingMatchCache[other] = false; + return false; + } + + if (descriptorInfo != other->descriptorInfo) { + bindingMatchCache[other] = false; + return false; } bindingMatchCache[other] = true; @@ -747,36 +776,35 @@ namespace skyline::gpu::interconnect::maxwell3d { } }}; - for (size_t i{}; i < shaderStages.size(); i++) { - const auto &stage{shaderStages[i]}; - if (!stage.module) + for (size_t i{}; i < engine::ShaderStageCount; i++) { + if (!(stageMask & (1 << i))) continue; - const auto &stageDescInfo{descriptorInfo.stages[i]}; + const auto &stage{descriptorInfo.stages[i]}; - writeBufferDescs(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, stageDescInfo.uniformBufferDescCount, - [&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) { + writeBufferDescs(vk::DescriptorType::eUniformBuffer, stage.uniformBufferDescs, stage.uniformBufferDescTotalCount, + [&](const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) { size_t cbufIdx{desc.index + arrayIdx}; - return GetConstantBufferBinding(ctx, stage.info, constantBuffers[i][cbufIdx].view, cbufIdx); + return GetConstantBufferBinding(ctx, {stage.constantBufferUsedSizes}, constantBuffers[i][cbufIdx].view, cbufIdx); }); - writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount, - [&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) { + writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.storageBufferDescs, stage.storageBufferDescTotalCount, + [&](const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) { return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx++]); }); - bindingIdx += stageDescInfo.uniformTexelBufferDescCount; - bindingIdx += stageDescInfo.storageTexelBufferDescCount; + bindingIdx += stage.uniformTexelBufferDescs.size(); + bindingIdx += stage.storageTexelBufferDescs.size(); - writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount, - [&](const Shader::TextureDescriptor &desc, size_t arrayIdx) { + writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.combinedImageSamplerDescs, stage.combinedImageSamplerDescTotalCount, + [&](const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) { BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)}; auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)}; sampledImages[combinedImageSamplerIdx++] = binding.second; return binding.first; }, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites); - bindingIdx += stageDescInfo.storageImageDescCount; + bindingIdx += stage.storageImageDescs.size(); } // Since we don't implement all descriptor types the number of writes might not match what's expected @@ -787,8 +815,8 @@ namespace skyline::gpu::interconnect::maxwell3d { .writes = writes.first(writeIdx), .bufferDescs = bufferDescs.first(bufferIdx), .bufferDescDynamicBindings = bufferDescDynamicBindings.first(bufferIdx), - .pipelineLayout = compiledPipeline.pipelineLayout, - .descriptorSetLayout = compiledPipeline.descriptorSetLayout, + .pipelineLayout = *compiledPipeline.pipelineLayout, + .descriptorSetLayout = *compiledPipeline.descriptorSetLayout, .bindPoint = vk::PipelineBindPoint::eGraphics, .descriptorSetIndex = 0, }); @@ -803,7 +831,6 @@ namespace skyline::gpu::interconnect::maxwell3d { if (!cbufUsageInfo.writeDescCount) return nullptr; - const auto &shaderInfo{shaderStages[stageIndex].info}; auto &stageConstantBuffers{constantBuffers[stageIndex]}; u32 writeIdx{}; @@ -846,19 +873,19 @@ namespace skyline::gpu::interconnect::maxwell3d { } }}; - writeDescs.operator()(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, shaderInfo.constant_buffer_descriptors, - [&](auto usage, const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) -> DynamicBufferBinding { + writeDescs.operator()(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, stageDescInfo.uniformBufferDescs, + [&](auto usage, const DescriptorInfo::StageDescriptorInfo::UniformBufferDesc &desc, size_t arrayIdx) -> DynamicBufferBinding { size_t cbufIdx{desc.index + arrayIdx}; - return GetConstantBufferBinding(ctx, shaderInfo, stageConstantBuffers[cbufIdx].view, cbufIdx); + return GetConstantBufferBinding(ctx, {stageDescInfo.constantBufferUsedSizes}, stageConstantBuffers[cbufIdx].view, cbufIdx); }); - writeDescs.operator()(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, shaderInfo.storage_buffers_descriptors, - [&](auto usage, const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) { + writeDescs.operator()(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, stageDescInfo.storageBufferDescs, + [&](auto usage, const DescriptorInfo::StageDescriptorInfo::StorageBufferDesc &desc, size_t arrayIdx) { return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.entirePipelineIdx + arrayIdx]); }); - writeDescs.operator()(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, shaderInfo.texture_descriptors, - [&](auto usage, const Shader::TextureDescriptor &desc, size_t arrayIdx) { + writeDescs.operator()(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, stageDescInfo.combinedImageSamplerDescs, + [&](auto usage, const DescriptorInfo::StageDescriptorInfo::CombinedImageSamplerDesc &desc, size_t arrayIdx) { BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)}; auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)}; sampledImages[usage.entirePipelineIdx + arrayIdx] = binding.second; @@ -874,8 +901,8 @@ namespace skyline::gpu::interconnect::maxwell3d { .writes = writes.first(writeIdx), .bufferDescs = bufferDescs.first(bufferIdx), .bufferDescDynamicBindings = bufferDescDynamicBindings.first(bufferIdx), - .pipelineLayout = compiledPipeline.pipelineLayout, - .descriptorSetLayout = compiledPipeline.descriptorSetLayout, + .pipelineLayout = *compiledPipeline.pipelineLayout, + .descriptorSetLayout = *compiledPipeline.descriptorSetLayout, .bindPoint = vk::PipelineBindPoint::eGraphics, .descriptorSetIndex = 0, }); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h index 03e49124..c0c1bec1 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h @@ -20,66 +20,191 @@ namespace skyline::gpu { namespace skyline::gpu::interconnect::maxwell3d { class Pipeline { public: - struct ShaderStage { - vk::ShaderStageFlagBits stage; - vk::ShaderModule module; - Shader::Info info; - - /** - * @return Whether the bindings for this stage match those of the input stage - */ - bool BindingsEqual(const ShaderStage &other) const { - return info.constant_buffer_descriptors == other.info.constant_buffer_descriptors && - info.storage_buffers_descriptors == other.info.storage_buffers_descriptors && - info.texture_buffer_descriptors == other.info.texture_buffer_descriptors && - info.image_buffer_descriptors == other.info.image_buffer_descriptors && - info.texture_descriptors == other.info.texture_descriptors && - info.image_descriptors == other.info.image_descriptors; - } - }; - + /** + * @brief A monolithic struct containing all the descriptor state of the pipeline + */ struct DescriptorInfo { std::vector descriptorSetLayoutBindings; struct StageDescriptorInfo { - u32 uniformBufferDescCount; - u32 storageBufferDescCount; - u32 uniformTexelBufferDescCount; - u32 storageTexelBufferDescCount; - u32 combinedImageSamplerDescCount; - u32 storageImageDescCount; + // Unwrapped counts (counting each array element as a separate descriptor) for the below desc structs + u16 uniformBufferDescTotalCount; + u16 storageBufferDescTotalCount; + u16 uniformTexelBufferDescTotalCount; + u16 storageTexelBufferDescTotalCount; + u16 combinedImageSamplerDescTotalCount; + u16 storageImageDescTotalCount; + + // Below are descriptor structs designed to be compatible with hades (hence the use of snake_case) but in a more compacted format to reduce memory usage + struct UniformBufferDesc { + u8 index; + u8 count; + + UniformBufferDesc(const Shader::ConstantBufferDescriptor &desc) + : index{static_cast(desc.index)}, + count{static_cast(desc.count)} {} + + auto operator<=>(const UniformBufferDesc &) const = default; + }; + boost::container::static_vector uniformBufferDescs; + + struct StorageBufferDesc { + u32 cbuf_offset; + u8 cbuf_index; + bool is_written; + u8 count; + + StorageBufferDesc(const Shader::StorageBufferDescriptor &desc) + : cbuf_offset{desc.cbuf_offset}, + cbuf_index{static_cast(desc.cbuf_index)}, + is_written{desc.is_written}, + count{static_cast(desc.count)} {} + + auto operator<=>(const StorageBufferDesc &) const = default; + }; + boost::container::small_vector storageBufferDescs; + + struct UniformTexelBufferDesc { + u32 cbuf_offset; + u32 secondary_cbuf_offset; + bool has_secondary; + u8 cbuf_index; + u8 shift_left; + u8 secondary_cbuf_index; + u8 secondary_shift_left; + u8 count; + u8 size_shift; + + UniformTexelBufferDesc(const Shader::TextureBufferDescriptor &desc) + : cbuf_offset{desc.cbuf_offset}, + secondary_cbuf_offset{desc.secondary_cbuf_offset}, + has_secondary{desc.has_secondary}, + cbuf_index{static_cast(desc.cbuf_index)}, + shift_left{static_cast(desc.shift_left)}, + secondary_cbuf_index{static_cast(desc.secondary_cbuf_index)}, + secondary_shift_left{static_cast(desc.secondary_shift_left)}, + count{static_cast(desc.count)}, + size_shift{static_cast(desc.size_shift)} {} + + auto operator<=>(const UniformTexelBufferDesc &) const = default; + }; + std::vector uniformTexelBufferDescs; + + struct StorageTexelBufferDesc { + Shader::ImageFormat format; + u32 cbuf_offset; + bool is_read; + bool is_written; + u8 cbuf_index; + u8 count; + u8 size_shift; + + StorageTexelBufferDesc(const Shader::ImageBufferDescriptor &desc) + : format{desc.format}, + cbuf_offset{desc.cbuf_offset}, + is_read{desc.is_read}, + is_written{desc.is_written}, + cbuf_index{static_cast(desc.cbuf_index)}, + count{static_cast(desc.count)}, + size_shift{static_cast(desc.size_shift)} {} + + auto operator<=>(const StorageTexelBufferDesc &) const = default; + }; + std::vector storageTexelBufferDescs; + + struct CombinedImageSamplerDesc { + Shader::TextureType type; + u32 cbuf_offset; + u32 secondary_cbuf_offset; + bool has_secondary; + u8 cbuf_index; + u8 shift_left; + u8 secondary_cbuf_index; + u8 secondary_shift_left; + u8 count; + u8 size_shift; + + CombinedImageSamplerDesc(const Shader::TextureDescriptor &desc) + : type{desc.type}, + cbuf_offset{desc.cbuf_offset}, + secondary_cbuf_offset{desc.secondary_cbuf_offset}, + has_secondary{desc.has_secondary}, + cbuf_index{static_cast(desc.cbuf_index)}, + shift_left{static_cast(desc.shift_left)}, + secondary_cbuf_index{static_cast(desc.secondary_cbuf_index)}, + secondary_shift_left{static_cast(desc.secondary_shift_left)}, + count{static_cast(desc.count)}, + size_shift{static_cast(desc.size_shift)} {} + + auto operator<=>(const CombinedImageSamplerDesc &) const = default; + }; + boost::container::small_vector combinedImageSamplerDescs; + + struct StorageImageDesc { + Shader::TextureType type; + Shader::ImageFormat format; + u32 cbuf_offset; + bool isRead; + bool is_written; + u8 cbuf_index; + u8 count; + u8 size_shift; + + StorageImageDesc(const Shader::ImageDescriptor &desc) + : type{desc.type}, + format{desc.format}, + cbuf_offset{desc.cbuf_offset}, + isRead{desc.is_read}, + is_written{desc.is_written}, + cbuf_index{static_cast(desc.cbuf_index)}, + count{static_cast(desc.count)}, + size_shift{static_cast(desc.size_shift)} {} + + auto operator<=>(const StorageImageDesc &) const = default; + }; + boost::container::small_vector storageImageDescs; + + std::array constantBufferUsedSizes; /** * @brief Keeps track of all bindings that are dependent on a given constant buffer index to allow for quick binding */ struct ConstantBufferDescriptorUsages { struct Usage { - u32 binding; //!< Vulkan binding index - u32 shaderDescIdx; //!< Index of the descriptor in the appropriate shader info member - u32 entirePipelineIdx; //!< Index of the image/storage buffer in the entire pipeline + u16 binding; //!< Vulkan binding index + u16 shaderDescIdx; //!< Index of the descriptor in the appropriate shader info member + u16 entirePipelineIdx; //!< Index of the image/storage buffer in the entire pipeline + + bool operator==(const Usage&) const = default; }; boost::container::small_vector uniformBuffers; boost::container::small_vector storageBuffers; boost::container::small_vector combinedImageSamplers; - u32 totalBufferDescCount; - u32 totalImageDescCount; - u32 writeDescCount; + u16 totalBufferDescCount; + u16 totalImageDescCount; + u16 writeDescCount; + + bool operator==(const ConstantBufferDescriptorUsages&) const = default; }; std::array cbufUsages; + + bool operator==(const StageDescriptorInfo&) const = default; }; - std::vector copyDescs; - std::array stages; + std::vector copyDescs; //!< Copy descriptors for all descs in the pipeline to allow for quick binding + std::array stages; - u32 totalStorageBufferCount; - u32 totalCombinedImageSamplerCount; + u16 totalStorageBufferCount; + u16 totalCombinedImageSamplerCount; - u32 totalWriteDescCount; - u32 totalBufferDescCount; - u32 totalTexelBufferDescCount; - u32 totalImageDescCount; + u16 totalWriteDescCount; + u16 totalBufferDescCount; + u16 totalTexelBufferDescCount; + u16 totalImageDescCount; + + bool operator==(const DescriptorInfo &) const = default; }; PackedPipelineState sourcePackedState; @@ -87,24 +212,30 @@ namespace skyline::gpu::interconnect::maxwell3d { private: std::vector storageBufferViews; ContextTag lastExecutionTag{}; //!< The last execution tag this pipeline was used at - std::array shaderStages; - DescriptorInfo descriptorInfo; + DescriptorInfo descriptorInfo; //!< Info about all descriptors used in each stage of the pipeline + u8 transitionCacheNextIdx{}; //!< The next index to insert into the transition cache + u8 stageMask{}; //!< Bitmask of active shader stages + u16 sampledImageCount{}; std::array transitionCache{}; - size_t transitionCacheNextIdx{}; tsl::robin_map bindingMatchCache; //!< Cache of which pipelines have bindings that match this pipeline void SyncCachedStorageBufferViews(ContextTag executionTag); public: - cache::GraphicsPipelineCache::CompiledPipeline compiledPipeline; - size_t sampledImageCount{}; + GraphicsPipelineAssembler::CompiledPipeline compiledPipeline; Pipeline(GPU &gpu, PipelineStateAccessor &accessor, const PackedPipelineState &packedState); + /** + * @brief Returns the pipeline in the transition cache (if present) that matches the given state + */ Pipeline *LookupNext(const PackedPipelineState &packedState); + /** + * @brief Record a transition from this pipeline to the next pipeline in the transition cache + */ void AddTransition(Pipeline *next); bool CheckBindingMatch(Pipeline *other);