Skip zero-initializing shader bytecode backing

The backing for shader data would implicitly be zero-initialized due to a `resize` on every shader parse, this was entirely unnecessary as we would overwrite the entire range regardless. 

We avoid this by using statically allocated storage and a span over it containing the shader bytecode which avoids any unnecessary clear semantics without resorting to more complex solutions such as a custom allocator.
This commit is contained in:
PixelyIon 2022-05-01 18:15:13 +05:30
parent 42573170c6
commit 25a29f9044
2 changed files with 17 additions and 19 deletions

View File

@ -135,11 +135,12 @@ namespace skyline {
/** /**
* @brief Writes contents starting from the virtual address till the end of the span or an unmapped block has been hit or when `function` returns a non-nullopt value * @brief Writes contents starting from the virtual address till the end of the span or an unmapped block has been hit or when `function` returns a non-nullopt value
* @param function A function that is called on every block where it should return an end offset into the block when it wants to end reading or std::nullopt when it wants to continue reading * @param function A function that is called on every block where it should return an end offset into the block when it wants to end reading or std::nullopt when it wants to continue reading
* @return If returning was caused by the supplied function returning a non-nullopt value or other conditions * @return A span into the supplied container with the contents of the memory region
* @note The function will **NOT** be run on any sparse block * @note The function will **NOT** be run on any sparse block
* @note The function will provide no feedback on if the end has been reached or if there was an early exit
*/ */
template<typename Function, typename Container> template<typename Function, typename Container>
bool ReadTill(Container& destination, VaType virt, Function function) { span<u8> ReadTill(Container& destination, VaType virt, Function function) {
//TRACE_EVENT("containers", "FlatMemoryManager::ReadTill"); //TRACE_EVENT("containers", "FlatMemoryManager::ReadTill");
std::scoped_lock lock(this->blockMutex); std::scoped_lock lock(this->blockMutex);
@ -158,18 +159,15 @@ namespace skyline {
while (remainingSize) { while (remainingSize) {
if (predecessor->phys == nullptr) { if (predecessor->phys == nullptr) {
destination.resize(destination.size() - remainingSize); return {destination.data(), destination.size() - remainingSize};
return false;
} else { } else {
if (predecessor->extraInfo.sparseMapped) { if (predecessor->extraInfo.sparseMapped) {
std::memset(pointer, 0, blockReadSize); std::memset(pointer, 0, blockReadSize);
} else { } else {
auto end{function(span<u8>(blockPhys, blockReadSize))}; auto end{function(span<u8>(blockPhys, blockReadSize))};
std::memcpy(pointer, blockPhys, end ? *end : blockReadSize); std::memcpy(pointer, blockPhys, end ? *end : blockReadSize);
if (end) { if (end)
destination.resize((destination.size() - remainingSize) + *end); return {destination.data(), (destination.size() - remainingSize) + *end};
return true;
}
} }
} }
@ -183,7 +181,7 @@ namespace skyline {
} }
} }
return false; return {destination.data(), destination.size()};
} }
void Write(VaType virt, u8 *source, VaType size); void Write(VaType virt, u8 *source, VaType size);

View File

@ -747,7 +747,8 @@ namespace skyline::gpu::interconnect {
bool invalidated{true}; //!< If the shader that existed earlier has been invalidated bool invalidated{true}; //!< If the shader that existed earlier has been invalidated
bool shouldCheckSame{false}; //!< If we should do a check for the shader being the same as before bool shouldCheckSame{false}; //!< If we should do a check for the shader being the same as before
u32 offset{}; //!< Offset of the shader from the base IOVA u32 offset{}; //!< Offset of the shader from the base IOVA
boost::container::static_vector<u8, MaxShaderBytecodeSize> data; //!< The shader bytecode in a statically allocated vector std::array<u8, MaxShaderBytecodeSize> backing; //!< The backing storage for shader bytecode in a statically allocated array
span<u8> bytecode{}; //!< A span of the shader bytecode inside the backing storage
std::shared_ptr<ShaderManager::ShaderProgram> program{}; std::shared_ptr<ShaderManager::ShaderProgram> program{};
Shader(ShaderCompiler::Stage stage) : stage(stage) {} Shader(ShaderCompiler::Stage stage) : stage(stage) {}
@ -914,10 +915,10 @@ namespace skyline::gpu::interconnect {
// If a shader is invalidated, we need to reparse the program (given that it has changed) // If a shader is invalidated, we need to reparse the program (given that it has changed)
bool shouldParseShader{[&]() { bool shouldParseShader{[&]() {
if (!shader.data.empty() && shader.shouldCheckSame) { if (shader.bytecode.valid() && shader.shouldCheckSame) {
// A fast path to check if the shader is the same as before to avoid reparsing the shader // A fast path to check if the shader is the same as before to avoid reparsing the shader
auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.data.size())}; auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.bytecode.size())};
auto originalShader{shader.data.data()}; auto originalShader{shader.bytecode.data()};
for (auto &range : newIovaRanges) { for (auto &range : newIovaRanges) {
if (range.data() && std::memcmp(range.data(), originalShader, range.size()) == 0) { if (range.data() && std::memcmp(range.data(), originalShader, range.size()) == 0) {
@ -936,8 +937,7 @@ namespace skyline::gpu::interconnect {
if (shouldParseShader) { if (shouldParseShader) {
// A pass to check if the shader has a BRA infloop opcode ending (On most commercial games) // A pass to check if the shader has a BRA infloop opcode ending (On most commercial games)
shader.data.resize(MaxShaderBytecodeSize); shader.bytecode = channelCtx.asCtx->gmmu.ReadTill(shader.backing, shaderBaseIova + shader.offset, [](span<u8> data) -> std::optional<size_t> {
auto foundEnd{channelCtx.asCtx->gmmu.ReadTill(shader.data, shaderBaseIova + shader.offset, [](span<u8> data) -> std::optional<size_t> {
// We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader // We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader
// UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351 // UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351
constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F}; constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F};
@ -950,9 +950,9 @@ namespace skyline::gpu::interconnect {
return static_cast<size_t>(std::distance(shaderInstructions.begin(), it)) * sizeof(u64); return static_cast<size_t>(std::distance(shaderInstructions.begin(), it)) * sizeof(u64);
} }
return std::nullopt; return std::nullopt;
})}; });
shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.data, shader.offset, bindlessTextureConstantBufferIndex); shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.bytecode, shader.offset, bindlessTextureConstantBufferIndex);
if (shader.stage != ShaderCompiler::Stage::VertexA && shader.stage != ShaderCompiler::Stage::VertexB) { if (shader.stage != ShaderCompiler::Stage::VertexA && shader.stage != ShaderCompiler::Stage::VertexB) {
pipelineStage.program = shader.program; pipelineStage.program = shader.program;
@ -963,13 +963,13 @@ namespace skyline::gpu::interconnect {
throw exception("Enabling VertexA without VertexB is not supported"); throw exception("Enabling VertexA without VertexB is not supported");
else if (!vertexB.invalidated) else if (!vertexB.invalidated)
// If only VertexA is invalidated, we need to recombine here but we can defer it otherwise // If only VertexA is invalidated, we need to recombine here but we can defer it otherwise
pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.data); pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.bytecode);
} else if (shader.stage == ShaderCompiler::Stage::VertexB) { } else if (shader.stage == ShaderCompiler::Stage::VertexB) {
auto &vertexA{shaders[maxwell3d::ShaderStage::VertexA]}; auto &vertexA{shaders[maxwell3d::ShaderStage::VertexA]};
if (vertexA.enabled) if (vertexA.enabled)
// We need to combine the vertex shader stages if VertexA is enabled // We need to combine the vertex shader stages if VertexA is enabled
pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.data); pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.bytecode);
else else
pipelineStage.program = shader.program; pipelineStage.program = shader.program;
} }