From 8f0a6e78c5bd56fbda2cacb89a0e2ba5b9f2ed30 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Fri, 18 Nov 2022 21:30:34 +0000 Subject: [PATCH] Add Vulkan stride dynamic state and robustness support Fixes the waterfall in SMO by specifying vertex buffer bounds. --- app/src/main/cpp/skyline/gpu.cpp | 4 +++- .../interconnect/maxwell_3d/active_state.cpp | 10 ++++++---- .../gpu/interconnect/maxwell_3d/maxwell_3d.cpp | 2 +- .../maxwell_3d/packed_pipeline_state.cpp | 3 ++- .../maxwell_3d/packed_pipeline_state.h | 10 ++++++++-- .../maxwell_3d/pipeline_manager.cpp | 13 +++++++++---- .../interconnect/maxwell_3d/pipeline_state.cpp | 2 ++ app/src/main/cpp/skyline/gpu/trait_manager.cpp | 18 +++++++++++++++++- app/src/main/cpp/skyline/gpu/trait_manager.h | 6 +++++- 9 files changed, 53 insertions(+), 15 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index b541bbb7..10bb2bbc 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -233,7 +233,9 @@ namespace skyline::gpu { vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDeviceImagelessFramebufferFeatures, vk::PhysicalDeviceTransformFeedbackFeaturesEXT, - vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>()}; + vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, + vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, + vk::PhysicalDeviceRobustness2FeaturesEXT>()}; decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features #define FEAT_REQ(structName, feature) \ diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/active_state.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/active_state.cpp index aae21195..3dd9abc9 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/active_state.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/active_state.cpp @@ -31,9 +31,9 @@ namespace skyline::gpu::interconnect::maxwell3d { if (megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber); megaBufferBinding) - builder.SetVertexBuffer(index, megaBufferBinding); + builder.SetVertexBuffer(index, megaBufferBinding, ctx.gpu.traits.supportsExtendedDynamicState, engine->vertexStream.format.stride); else - builder.SetVertexBuffer(index, *view); + builder.SetVertexBuffer(index, *view, ctx.gpu.traits.supportsExtendedDynamicState, engine->vertexStream.format.stride); return; } else { @@ -41,9 +41,11 @@ namespace skyline::gpu::interconnect::maxwell3d { } } - // TODO: null descriptor megaBufferBinding = {}; - builder.SetVertexBuffer(index, {ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer}); + if (ctx.gpu.traits.supportsNullDescriptor) + builder.SetVertexBuffer(index, BufferBinding{}); + else + builder.SetVertexBuffer(index, {ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer}); } bool VertexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder) { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp index f16845d7..6b245ce8 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp @@ -247,7 +247,7 @@ namespace skyline::gpu::interconnect::maxwell3d { if (oldPipeline != pipeline) // If the pipeline has changed, we need to update the pipeline state - builder.SetPipeline(pipeline->compiledPipeline.pipeline); + builder.SetPipeline(pipeline->compiledPipeline.pipeline, vk::PipelineBindPoint::eGraphics); if (descUpdateInfo) { if (ctx.gpu.traits.supportsPushDescriptors) { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp index 8b13f6d1..a2587bb0 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp @@ -19,7 +19,8 @@ namespace skyline::gpu::interconnect::maxwell3d { } void PackedPipelineState::SetVertexBinding(u32 index, engine::VertexStream stream, engine::VertexStreamInstance instance) { - vertexBindings[index].stride = stream.format.stride; + if (!dynamicStateActive) + vertexStrides[index] = stream.format.stride; vertexBindings[index].inputRate = static_cast(instance.isInstanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex); vertexBindings[index].enable = stream.format.enable; vertexBindings[index].divisor = stream.frequency; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.h index b242688c..1424a629 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.h @@ -58,6 +58,7 @@ namespace skyline::gpu::interconnect::maxwell3d { u8 alphaFunc : 3; //!< Use {Set,Get}AlphaFunc bool alphaTestEnable : 1; bool depthClampEnable : 1; // Use SetDepthClampEnable + bool dynamicStateActive : 1; }; u32 patchSize; @@ -69,10 +70,8 @@ namespace skyline::gpu::interconnect::maxwell3d { std::array postVtgShaderAttributeSkipMask; struct VertexBinding { - u16 stride : 12; u8 inputRate : 1; bool enable : 1; - u8 _pad_ : 2; u32 divisor; vk::VertexInputRate GetInputRate() const { @@ -95,6 +94,8 @@ namespace skyline::gpu::interconnect::maxwell3d { std::array attachmentBlendStates; + std::array vertexStrides; //!< Use {Set, Get}VertexBinding + struct TransformFeedbackVarying { u16 stride; u8 offsetWords; @@ -149,6 +150,8 @@ namespace skyline::gpu::interconnect::maxwell3d { // Only hash transform feedback state if it's enabled if (other.transformFeedbackEnable && transformFeedbackEnable) return std::memcmp(this, &other, sizeof(PackedPipelineState)) == 0; + else if (dynamicStateActive) + return std::memcmp(this, &other, offsetof(PackedPipelineState, vertexStrides)) == 0; else return std::memcmp(this, &other, offsetof(PackedPipelineState, transformFeedbackVaryings)) == 0; } @@ -159,6 +162,9 @@ namespace skyline::gpu::interconnect::maxwell3d { // Only hash transform feedback state if it's enabled if (state.transformFeedbackEnable) return XXH64(&state, sizeof(PackedPipelineState), 0); + else if (state.dynamicStateActive) + return XXH64(&state, offsetof(PackedPipelineState, vertexStrides), 0); + return XXH64(&state, offsetof(PackedPipelineState, transformFeedbackVaryings), 0); } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp index de0db9e7..418e1456 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp @@ -442,7 +442,7 @@ namespace skyline::gpu::interconnect::maxwell3d { const auto &binding{packedState.vertexBindings[i]}; bindingDescs.push_back({ .binding = i, - .stride = binding.stride, + .stride = packedState.vertexStrides[i], .inputRate = binding.GetInputRate(), }); @@ -535,7 +535,7 @@ namespace skyline::gpu::interconnect::maxwell3d { .pAttachments = attachmentBlendStates.data() }; - constexpr std::array dynamicStates{ + constexpr std::array dynamicStates{ vk::DynamicState::eViewport, vk::DynamicState::eScissor, vk::DynamicState::eLineWidth, @@ -544,11 +544,16 @@ namespace skyline::gpu::interconnect::maxwell3d { vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask, - vk::DynamicState::eStencilReference + vk::DynamicState::eStencilReference, + // VK_EXT_dynamic_state starts here + vk::DynamicState::eVertexInputBindingStrideEXT }; + static constexpr u32 BaseDynamicStateCount{9}; + static constexpr u32 ExtendedDynamicStateCount{BaseDynamicStateCount + 1}; + vk::PipelineDynamicStateCreateInfo dynamicState{ - .dynamicStateCount = static_cast(dynamicStates.size()), + .dynamicStateCount = ctx.gpu.traits.supportsExtendedDynamicState ? ExtendedDynamicStateCount : BaseDynamicStateCount, .pDynamicStates = dynamicStates.data() }; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_state.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_state.cpp index 5e3a3f81..270f2faa 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_state.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_state.cpp @@ -488,6 +488,8 @@ namespace skyline::gpu::interconnect::maxwell3d { ctSelect{engine.ctSelect} {} void PipelineState::Flush(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, StateUpdateBuilder &builder) { + packedState.dynamicStateActive = ctx.gpu.traits.supportsExtendedDynamicState; + std::array shaderBinaries; for (size_t i{}; i < engine::PipelineCount; i++) { const auto &stage{pipelineStages[i].UpdateGet(ctx)}; diff --git a/app/src/main/cpp/skyline/gpu/trait_manager.cpp b/app/src/main/cpp/skyline/gpu/trait_manager.cpp index d1cb07af..0c04ccd4 100644 --- a/app/src/main/cpp/skyline/gpu/trait_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/trait_manager.cpp @@ -6,7 +6,7 @@ namespace skyline::gpu { TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice &physicalDevice) : quirks(deviceProperties2.get().properties, deviceProperties2.get()) { - bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{}, hasTransformFeedbackExt{}, hasUint8IndicesExt{}; + bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{}, hasTransformFeedbackExt{}, hasUint8IndicesExt{}, hasExtendedDynamicStateExt{}, hasRobustness2Ext{}; bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present for (auto &extension : deviceExtensions) { @@ -57,6 +57,8 @@ namespace skyline::gpu { EXT_SET("VK_KHR_uniform_buffer_standard_layout", supportsUniformBufferStandardLayout); EXT_SET("VK_EXT_primitive_topology_list_restart", hasPrimitiveTopologyListRestartExt); EXT_SET("VK_EXT_transform_feedback", hasTransformFeedbackExt); + EXT_SET("VK_EXT_extended_dynamic_state", hasExtendedDynamicStateExt); + EXT_SET("VK_EXT_robustness2", hasRobustness2Ext); } #undef EXT_SET @@ -83,6 +85,20 @@ namespace skyline::gpu { else enabledFeatures2.unlink(); + + if (hasExtendedDynamicStateExt) + FEAT_SET(vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, extendedDynamicState, supportsExtendedDynamicState) + else + enabledFeatures2.unlink(); + + if (hasRobustness2Ext) { + FEAT_SET(vk::PhysicalDeviceRobustness2FeaturesEXT, nullDescriptor, supportsNullDescriptor) + FEAT_SET(vk::PhysicalDeviceRobustness2FeaturesEXT, robustBufferAccess2, std::ignore) + FEAT_SET(vk::PhysicalDeviceRobustness2FeaturesEXT, robustImageAccess2, std::ignore) + } else { + enabledFeatures2.unlink(); + } + if (hasCustomBorderColorExt) { bool hasCustomBorderColorFeature{}; FEAT_SET(vk::PhysicalDeviceCustomBorderColorFeaturesEXT, customBorderColors, hasCustomBorderColorFeature) diff --git a/app/src/main/cpp/skyline/gpu/trait_manager.h b/app/src/main/cpp/skyline/gpu/trait_manager.h index 5dc17646..02d09e22 100644 --- a/app/src/main/cpp/skyline/gpu/trait_manager.h +++ b/app/src/main/cpp/skyline/gpu/trait_manager.h @@ -46,6 +46,8 @@ namespace skyline::gpu { bool supportsSubgroupVote{}; //!< If subgroup votes are supported in shaders with SPV_KHR_subgroup_vote bool supportsWideLines{}; //!< If the device supports the 'wideLines' Vulkan feature bool supportsDepthClamp{}; //!< If the device supports the 'depthClamp' Vulkan feature + bool supportsExtendedDynamicState{}; //!< If the device supports the 'VK_EXT_extended_dynamic_state' Vulkan extension + bool supportsNullDescriptor{}; //!< If the device supports the null descriptor feature in the 'VK_EXT_robustness2' Vulkan extension u32 subgroupSize{}; //!< Size of a subgroup on the host GPU std::bitset<7> bcnSupport{}; //!< Bitmask of BCn texture formats supported, it is ordered as BC1, BC2, BC3, BC4, BC5, BC6H and BC7 @@ -98,7 +100,9 @@ namespace skyline::gpu { vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDeviceImagelessFramebufferFeatures, vk::PhysicalDeviceTransformFeedbackFeaturesEXT, - vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>; + vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, + vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, + vk::PhysicalDeviceRobustness2FeaturesEXT>; TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice& physicalDevice);