mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-30 00:04:19 +01:00
Implement rest of I2M engine copies
This commit is contained in:
parent
72c2d94cbe
commit
fd205ff0a9
@ -44,12 +44,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Inline2Memory::Upload(IOVA dst, span<u32> src) {
|
void Inline2Memory::Upload(IOVA dst, span<u8> src) {
|
||||||
auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(dst, src.size_bytes())};
|
auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(dst, src.size())};
|
||||||
|
|
||||||
size_t offset{};
|
size_t offset{};
|
||||||
for (auto mapping : dstMappings) {
|
for (auto mapping : dstMappings) {
|
||||||
UploadSingleMapping(mapping, src.cast<u8>().subspan(offset, mapping.size()));
|
UploadSingleMapping(mapping, src.subspan(offset, mapping.size()));
|
||||||
offset += mapping.size();
|
offset += mapping.size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
public:
|
public:
|
||||||
Inline2Memory(GPU &gpu, soc::gm20b::ChannelContext &channelCtx);
|
Inline2Memory(GPU &gpu, soc::gm20b::ChannelContext &channelCtx);
|
||||||
|
|
||||||
void Upload(IOVA dst, span<u32> src);
|
void Upload(IOVA dst, span<u8> src);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-License-Identifier: MPL-2.0
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
|
#include <gpu/texture/layout.h>
|
||||||
#include <soc/gm20b/channel.h>
|
#include <soc/gm20b/channel.h>
|
||||||
#include "inline2memory.h"
|
#include "inline2memory.h"
|
||||||
|
|
||||||
@ -19,13 +20,52 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
if (state.launchDma.completion == RegisterState::DmaCompletionType::ReleaseSemaphore)
|
if (state.launchDma.completion == RegisterState::DmaCompletionType::ReleaseSemaphore)
|
||||||
throw exception("Semaphore release on I2M completion is not supported!");
|
throw exception("Semaphore release on I2M completion is not supported!");
|
||||||
|
|
||||||
if (state.launchDma.layout == RegisterState::DmaDstMemoryLayout::Pitch && state.lineCount == 1) {
|
|
||||||
Logger::Debug("range: 0x{:X} -> 0x{:X}", u64{state.offsetOut}, u64{state.offsetOut} + buffer.size() * 0x4);
|
Logger::Debug("range: 0x{:X} -> 0x{:X}", u64{state.offsetOut}, u64{state.offsetOut} + buffer.size() * 0x4);
|
||||||
|
if (state.launchDma.layout == RegisterState::DmaDstMemoryLayout::Pitch) {
|
||||||
channelCtx.channelSequenceNumber++;
|
channelCtx.channelSequenceNumber++;
|
||||||
interconnect.Upload(u64{state.offsetOut}, span{buffer});
|
|
||||||
|
auto srcBuffer{span{buffer}.cast<u8>()};
|
||||||
|
for (u32 line{}, pitchOffset{}; line < state.lineCount; ++line, pitchOffset += state.pitchOut)
|
||||||
|
interconnect.Upload(u64{state.offsetOut + pitchOffset}, srcBuffer.subspan(state.lineLengthIn * line, state.lineLengthIn));
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
channelCtx.executor.Submit();
|
channelCtx.executor.Submit();
|
||||||
Logger::Warn("Non-linear I2M uploads are not supported!");
|
|
||||||
|
gpu::texture::Dimensions srcDimensions{state.lineLengthIn, state.lineCount, state.dstDepth};
|
||||||
|
|
||||||
|
gpu::texture::Dimensions dstDimensions{state.dstWidth, state.dstHeight, state.dstDepth};
|
||||||
|
size_t dstSize{GetBlockLinearLayerSize(dstDimensions, 1, 1, 1, 1 << (u8)state.dstBlockSize.height, 1 << (u8)state.dstBlockSize.depth)};
|
||||||
|
|
||||||
|
auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(state.offsetOut, dstSize)};
|
||||||
|
|
||||||
|
auto inlineCopy{[&](u8 *dst){
|
||||||
|
// The I2M engine only supports a formatBpb of 1
|
||||||
|
if ((srcDimensions.width != dstDimensions.width) || (srcDimensions.height != dstDimensions.height))
|
||||||
|
gpu::texture::CopyLinearToBlockLinearSubrect(srcDimensions, dstDimensions,
|
||||||
|
1, 1, 1,
|
||||||
|
1 << static_cast<u8>(state.dstBlockSize.height), 1 << static_cast<u8>(state.dstBlockSize.depth),
|
||||||
|
span{buffer}.cast<u8>().data(), dst,
|
||||||
|
state.originBytesX, state.originSamplesY
|
||||||
|
);
|
||||||
|
else
|
||||||
|
gpu::texture::CopyLinearToBlockLinear(dstDimensions,
|
||||||
|
1, 1, 1,
|
||||||
|
1 << static_cast<u8>(state.dstBlockSize.height), 1 << static_cast<u8>(state.dstBlockSize.depth),
|
||||||
|
span{buffer}.cast<u8>().data(), dst
|
||||||
|
);
|
||||||
|
}};
|
||||||
|
|
||||||
|
if (dstMappings.size() != 1) {
|
||||||
|
// We create a temporary buffer to hold the blockLinear texture if mappings are split
|
||||||
|
// NOTE: We don't reserve memory here since such copies on this engine are rarely used
|
||||||
|
std::vector<u8> tempBuffer(dstSize);
|
||||||
|
|
||||||
|
inlineCopy(tempBuffer.data());
|
||||||
|
|
||||||
|
interconnect.Upload(u64{state.offsetOut}, span{tempBuffer});
|
||||||
|
} else {
|
||||||
|
inlineCopy(dstMappings.front().data());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user