Skip zero-initializing shader bytecode backing

The backing for shader data would implicitly be zero-initialized due to a `resize` on every shader parse, this was entirely unnecessary as we would overwrite the entire range regardless. We avoid this by using statically allocated storage and a span over it containing the shader bytecode which avoids any unnecessary clear semantics without resorting to more complex solutions such as a custom allocator.
2025-02-28 08:33:35 +01:00 · 2022-05-01 18:15:13 +05:30 · 2022-05-01 18:15:13 +05:30 · 25a29f9044
commit 25a29f9044
parent 42573170c6
2 changed files with 17 additions and 19 deletions
--- a/app/src/main/cpp/skyline/common/address_space.h
+++ b/app/src/main/cpp/skyline/common/address_space.h
@ -135,11 +135,12 @@ namespace skyline {
        /**
         * @brief Writes contents starting from the virtual address till the end of the span or an unmapped block has been hit or when `function` returns a non-nullopt value
         * @param function A function that is called on every block where it should return an end offset into the block when it wants to end reading or std::nullopt when it wants to continue reading
-         * @return If returning was caused by the supplied function returning a non-nullopt value or other conditions
+         * @return A span into the supplied container with the contents of the memory region
         * @note The function will **NOT** be run on any sparse block
+         * @note The function will provide no feedback on if the end has been reached or if there was an early exit
         */
        template<typename Function, typename Container>
-        bool ReadTill(Container& destination, VaType virt, Function function) {
+        span<u8> ReadTill(Container& destination, VaType virt, Function function) {
            //TRACE_EVENT("containers", "FlatMemoryManager::ReadTill");

            std::scoped_lock lock(this->blockMutex);
@ -158,18 +159,15 @@ namespace skyline {

            while (remainingSize) {
                if (predecessor->phys == nullptr) {
-                    destination.resize(destination.size() - remainingSize);
-                    return false;
+                    return {destination.data(), destination.size() - remainingSize};
                } else {
                    if (predecessor->extraInfo.sparseMapped) {
                        std::memset(pointer, 0, blockReadSize);
                    } else {
                        auto end{function(span<u8>(blockPhys, blockReadSize))};
                        std::memcpy(pointer, blockPhys, end ? *end : blockReadSize);
-                        if (end) {
-                            destination.resize((destination.size() - remainingSize) + *end);
-                            return true;
-                        }
+                        if (end)
+                            return {destination.data(), (destination.size() - remainingSize) + *end};
                    }
                }

@ -183,7 +181,7 @@ namespace skyline {
                }
            }

-            return false;
+            return {destination.data(), destination.size()};
        }

        void Write(VaType virt, u8 *source, VaType size);
--- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
@ -747,7 +747,8 @@ namespace skyline::gpu::interconnect {
            bool invalidated{true}; //!< If the shader that existed earlier has been invalidated
            bool shouldCheckSame{false}; //!< If we should do a check for the shader being the same as before
            u32 offset{}; //!< Offset of the shader from the base IOVA
-            boost::container::static_vector<u8, MaxShaderBytecodeSize> data; //!< The shader bytecode in a statically allocated vector
+            std::array<u8, MaxShaderBytecodeSize> backing; //!< The backing storage for shader bytecode in a statically allocated array
+            span<u8> bytecode{}; //!< A span of the shader bytecode inside the backing storage
            std::shared_ptr<ShaderManager::ShaderProgram> program{};

            Shader(ShaderCompiler::Stage stage) : stage(stage) {}
@ -914,10 +915,10 @@ namespace skyline::gpu::interconnect {
                        // If a shader is invalidated, we need to reparse the program (given that it has changed)

                        bool shouldParseShader{[&]() {
-                            if (!shader.data.empty() && shader.shouldCheckSame) {
+                            if (shader.bytecode.valid() && shader.shouldCheckSame) {
                                // A fast path to check if the shader is the same as before to avoid reparsing the shader
-                                auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.data.size())};
-                                auto originalShader{shader.data.data()};
+                                auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.bytecode.size())};
+                                auto originalShader{shader.bytecode.data()};

                                for (auto &range : newIovaRanges) {
                                    if (range.data() && std::memcmp(range.data(), originalShader, range.size()) == 0) {
@ -936,8 +937,7 @@ namespace skyline::gpu::interconnect {

                        if (shouldParseShader) {
                            // A pass to check if the shader has a BRA infloop opcode ending (On most commercial games)
-                            shader.data.resize(MaxShaderBytecodeSize);
-                            auto foundEnd{channelCtx.asCtx->gmmu.ReadTill(shader.data, shaderBaseIova + shader.offset, [](span<u8> data) -> std::optional<size_t> {
+                            shader.bytecode = channelCtx.asCtx->gmmu.ReadTill(shader.backing, shaderBaseIova + shader.offset, [](span<u8> data) -> std::optional<size_t> {
                                // We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader
                                // UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351
                                constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F};
@ -950,9 +950,9 @@ namespace skyline::gpu::interconnect {
                                        return static_cast<size_t>(std::distance(shaderInstructions.begin(), it)) * sizeof(u64);
                                }
                                return std::nullopt;
-                            })};
+                            });

-                            shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.data, shader.offset, bindlessTextureConstantBufferIndex);
+                            shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.bytecode, shader.offset, bindlessTextureConstantBufferIndex);

                            if (shader.stage != ShaderCompiler::Stage::VertexA && shader.stage != ShaderCompiler::Stage::VertexB) {
                                pipelineStage.program = shader.program;
@ -963,13 +963,13 @@ namespace skyline::gpu::interconnect {
                                    throw exception("Enabling VertexA without VertexB is not supported");
                                else if (!vertexB.invalidated)
                                    // If only VertexA is invalidated, we need to recombine here but we can defer it otherwise
-                                    pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.data);
+                                    pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.bytecode);
                            } else if (shader.stage == ShaderCompiler::Stage::VertexB) {
                                auto &vertexA{shaders[maxwell3d::ShaderStage::VertexA]};

                                if (vertexA.enabled)
                                    // We need to combine the vertex shader stages if VertexA is enabled
-                                    pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.data);
+                                    pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.bytecode);
                                else
                                    pipelineStage.program = shader.program;
                            }