mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-08 01:55:04 +01:00
Skip zero-initializing shader bytecode backing
The backing for shader data would implicitly be zero-initialized due to a `resize` on every shader parse, this was entirely unnecessary as we would overwrite the entire range regardless. We avoid this by using statically allocated storage and a span over it containing the shader bytecode which avoids any unnecessary clear semantics without resorting to more complex solutions such as a custom allocator.
This commit is contained in:
parent
42573170c6
commit
25a29f9044
@ -135,11 +135,12 @@ namespace skyline {
|
||||
/**
|
||||
* @brief Writes contents starting from the virtual address till the end of the span or an unmapped block has been hit or when `function` returns a non-nullopt value
|
||||
* @param function A function that is called on every block where it should return an end offset into the block when it wants to end reading or std::nullopt when it wants to continue reading
|
||||
* @return If returning was caused by the supplied function returning a non-nullopt value or other conditions
|
||||
* @return A span into the supplied container with the contents of the memory region
|
||||
* @note The function will **NOT** be run on any sparse block
|
||||
* @note The function will provide no feedback on if the end has been reached or if there was an early exit
|
||||
*/
|
||||
template<typename Function, typename Container>
|
||||
bool ReadTill(Container& destination, VaType virt, Function function) {
|
||||
span<u8> ReadTill(Container& destination, VaType virt, Function function) {
|
||||
//TRACE_EVENT("containers", "FlatMemoryManager::ReadTill");
|
||||
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
@ -158,18 +159,15 @@ namespace skyline {
|
||||
|
||||
while (remainingSize) {
|
||||
if (predecessor->phys == nullptr) {
|
||||
destination.resize(destination.size() - remainingSize);
|
||||
return false;
|
||||
return {destination.data(), destination.size() - remainingSize};
|
||||
} else {
|
||||
if (predecessor->extraInfo.sparseMapped) {
|
||||
std::memset(pointer, 0, blockReadSize);
|
||||
} else {
|
||||
auto end{function(span<u8>(blockPhys, blockReadSize))};
|
||||
std::memcpy(pointer, blockPhys, end ? *end : blockReadSize);
|
||||
if (end) {
|
||||
destination.resize((destination.size() - remainingSize) + *end);
|
||||
return true;
|
||||
}
|
||||
if (end)
|
||||
return {destination.data(), (destination.size() - remainingSize) + *end};
|
||||
}
|
||||
}
|
||||
|
||||
@ -183,7 +181,7 @@ namespace skyline {
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return {destination.data(), destination.size()};
|
||||
}
|
||||
|
||||
void Write(VaType virt, u8 *source, VaType size);
|
||||
|
@ -747,7 +747,8 @@ namespace skyline::gpu::interconnect {
|
||||
bool invalidated{true}; //!< If the shader that existed earlier has been invalidated
|
||||
bool shouldCheckSame{false}; //!< If we should do a check for the shader being the same as before
|
||||
u32 offset{}; //!< Offset of the shader from the base IOVA
|
||||
boost::container::static_vector<u8, MaxShaderBytecodeSize> data; //!< The shader bytecode in a statically allocated vector
|
||||
std::array<u8, MaxShaderBytecodeSize> backing; //!< The backing storage for shader bytecode in a statically allocated array
|
||||
span<u8> bytecode{}; //!< A span of the shader bytecode inside the backing storage
|
||||
std::shared_ptr<ShaderManager::ShaderProgram> program{};
|
||||
|
||||
Shader(ShaderCompiler::Stage stage) : stage(stage) {}
|
||||
@ -914,10 +915,10 @@ namespace skyline::gpu::interconnect {
|
||||
// If a shader is invalidated, we need to reparse the program (given that it has changed)
|
||||
|
||||
bool shouldParseShader{[&]() {
|
||||
if (!shader.data.empty() && shader.shouldCheckSame) {
|
||||
if (shader.bytecode.valid() && shader.shouldCheckSame) {
|
||||
// A fast path to check if the shader is the same as before to avoid reparsing the shader
|
||||
auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.data.size())};
|
||||
auto originalShader{shader.data.data()};
|
||||
auto newIovaRanges{channelCtx.asCtx->gmmu.TranslateRange(shaderBaseIova + shader.offset, shader.bytecode.size())};
|
||||
auto originalShader{shader.bytecode.data()};
|
||||
|
||||
for (auto &range : newIovaRanges) {
|
||||
if (range.data() && std::memcmp(range.data(), originalShader, range.size()) == 0) {
|
||||
@ -936,8 +937,7 @@ namespace skyline::gpu::interconnect {
|
||||
|
||||
if (shouldParseShader) {
|
||||
// A pass to check if the shader has a BRA infloop opcode ending (On most commercial games)
|
||||
shader.data.resize(MaxShaderBytecodeSize);
|
||||
auto foundEnd{channelCtx.asCtx->gmmu.ReadTill(shader.data, shaderBaseIova + shader.offset, [](span<u8> data) -> std::optional<size_t> {
|
||||
shader.bytecode = channelCtx.asCtx->gmmu.ReadTill(shader.backing, shaderBaseIova + shader.offset, [](span<u8> data) -> std::optional<size_t> {
|
||||
// We attempt to find the shader size by looking for "BRA $" (Infinite Loop) which is used as padding at the end of the shader
|
||||
// UAM Shader Compiler Reference: https://github.com/devkitPro/uam/blob/5a5afc2bae8b55409ab36ba45be63fcb73f68993/source/compiler_iface.cpp#L319-L351
|
||||
constexpr u64 BraSelf1{0xE2400FFFFF87000F}, BraSelf2{0xE2400FFFFF07000F};
|
||||
@ -950,9 +950,9 @@ namespace skyline::gpu::interconnect {
|
||||
return static_cast<size_t>(std::distance(shaderInstructions.begin(), it)) * sizeof(u64);
|
||||
}
|
||||
return std::nullopt;
|
||||
})};
|
||||
});
|
||||
|
||||
shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.data, shader.offset, bindlessTextureConstantBufferIndex);
|
||||
shader.program = gpu.shader.ParseGraphicsShader(shader.stage, shader.bytecode, shader.offset, bindlessTextureConstantBufferIndex);
|
||||
|
||||
if (shader.stage != ShaderCompiler::Stage::VertexA && shader.stage != ShaderCompiler::Stage::VertexB) {
|
||||
pipelineStage.program = shader.program;
|
||||
@ -963,13 +963,13 @@ namespace skyline::gpu::interconnect {
|
||||
throw exception("Enabling VertexA without VertexB is not supported");
|
||||
else if (!vertexB.invalidated)
|
||||
// If only VertexA is invalidated, we need to recombine here but we can defer it otherwise
|
||||
pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.data);
|
||||
pipelineStage.program = gpu.shader.CombineVertexShaders(shader.program, vertexB.program, vertexB.bytecode);
|
||||
} else if (shader.stage == ShaderCompiler::Stage::VertexB) {
|
||||
auto &vertexA{shaders[maxwell3d::ShaderStage::VertexA]};
|
||||
|
||||
if (vertexA.enabled)
|
||||
// We need to combine the vertex shader stages if VertexA is enabled
|
||||
pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.data);
|
||||
pipelineStage.program = gpu.shader.CombineVertexShaders(vertexA.program, shader.program, shader.bytecode);
|
||||
else
|
||||
pipelineStage.program = shader.program;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user