From 25a29f90445082fae880224b0cc0510626142512 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sun, 1 May 2022 18:15:13 +0530 Subject: [PATCH] Skip zero-initializing shader bytecode backing The backing for shader data would implicitly be zero-initialized due to a `resize` on every shader parse, this was entirely unnecessary as we would overwrite the entire range regardless. We avoid this by using statically allocated storage and a span over it containing the shader bytecode which avoids any unnecessary clear semantics without resorting to more complex solutions such as a custom allocator. --- .../main/cpp/skyline/common/address_space.h | 16 +++++++-------- .../gpu/interconnect/graphics_context.h | 20 +++++++++---------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/app/src/main/cpp/skyline/common/address_space.h b/app/src/main/cpp/skyline/common/address_space.h index d816c1e0..3a64a44f 100644 --- a/app/src/main/cpp/skyline/common/address_space.h +++ b/app/src/main/cpp/skyline/common/address_space.h @@ -135,11 +135,12 @@ namespace skyline { /** * @brief Writes contents starting from the virtual address till the end of the span or an unmapped block has been hit or when `function` returns a non-nullopt value * @param function A function that is called on every block where it should return an end offset into the block when it wants to end reading or std::nullopt when it wants to continue reading - * @return If returning was caused by the supplied function returning a non-nullopt value or other conditions + * @return A span into the supplied container with the contents of the memory region * @note The function will **NOT** be run on any sparse block + * @note The function will provide no feedback on if the end has been reached or if there was an early exit */ template - bool ReadTill(Container& destination, VaType virt, Function function) { + span ReadTill(Container& destination, VaType virt, Function function) { //TRACE_EVENT("containers", "FlatMemoryManager::ReadTill"); std::scoped_lock lock(this->blockMutex); @@ -158,18 +159,15 @@ namespace skyline { while (remainingSize) { if (predecessor->phys == nullptr) { - destination.resize(destination.size() - remainingSize); - return false; + return {destination.data(), destination.size() - remainingSize}; } else { if (predecessor->extraInfo.sparseMapped) { std::memset(pointer, 0, blockReadSize); } else { auto end{function(span(blockPhys, blockReadSize))}; std::memcpy(pointer, blockPhys, end ? *end : blockReadSize); - if (end) { - destination.resize((destination.size() - remainingSize) + *end); - return true; - } + if (end) + return {destination.data(), (destination.size() - remainingSize) + *end}; } } @@ -183,7 +181,7 @@ namespace skyline { } } - return false; + return {destination.data(), destination.size()}; } void Write(VaType virt, u8 *source, VaType size); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 3462ca83..5d91a5c4 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -747,7 +747,8 @@ namespace skyline::gpu::interconnect { bool invalidated{true}; //!< If the shader that existed earlier has been invalidated bool shouldCheckSame{false}; //!< If we should do a check for the shader being the same as before u32 offset{}; //!< Offset of the shader from the base IOVA - boost::container::static_vector data; //!< The shader bytecode in a statically allocated vector + std::array backing; //!< The backing storage for shader bytecode in a statically allocated array + span bytecode{}; //!< A span of the shader bytecode inside the backing storage std::shared_ptr program{}; Shader(ShaderCompiler::Stage stage) : stage(stage) {} @@ -914,10 +915,10 @@ namespace skyline::gpu::interconnect { // If a shader is invalidated, we need to reparse the program (given that it has changed) bool shouldParseShader{[&]() { - if (!shader.data.empty() && shader.shouldCheckSame) { + if (shader.bytecode.valid() && shader.shouldCheckSame) { // A fast path to check if the shader is the same as before to avoid reparsing the shader - auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.data.size())}; - auto originalShader{shader.data.data()}; + auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.bytecode.size())}; + auto originalShader{shader.bytecode.data()}; for (auto &range : newIovaRanges) { if (range.data() && std::memcmp(range.data(), originalShader, range.size()) == 0) { @@ -936,8 +937,7 @@ namespace skyline::gpu::interconnect { if (shouldParseShader) { // A pass to check if the shader has a BRA infloop opcode ending (On most commercial games) - shader.data.resize(MaxShaderBytecodeSize); - auto foundEnd{channelCtx.asCtx->gmmu.ReadTill(shader.data, shaderBaseIova + shader.offset, [](span data) -> std::optional { + shader.bytecode = channelCtx.asCtx->gmmu.ReadTill(shader.backing, shaderBaseIova + shader.offset, [](span data) -> std::optional { // We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader // UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351 constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F}; @@ -950,9 +950,9 @@ namespace skyline::gpu::interconnect { return static_cast(std::distance(shaderInstructions.begin(), it)) * sizeof(u64); } return std::nullopt; - })}; + }); - shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.data, shader.offset, bindlessTextureConstantBufferIndex); + shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.bytecode, shader.offset, bindlessTextureConstantBufferIndex); if (shader.stage != ShaderCompiler::Stage::VertexA && shader.stage != ShaderCompiler::Stage::VertexB) { pipelineStage.program = shader.program; @@ -963,13 +963,13 @@ namespace skyline::gpu::interconnect { throw exception("Enabling VertexA without VertexB is not supported"); else if (!vertexB.invalidated) // If only VertexA is invalidated, we need to recombine here but we can defer it otherwise - pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.data); + pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.bytecode); } else if (shader.stage == ShaderCompiler::Stage::VertexB) { auto &vertexA{shaders[maxwell3d::ShaderStage::VertexA]}; if (vertexA.enabled) // We need to combine the vertex shader stages if VertexA is enabled - pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.data); + pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.bytecode); else pipelineStage.program = shader.program; }