Address review

This commit is contained in:
TheASVigilante 2023-03-02 16:10:34 +01:00 committed by Billy Laws
parent caf1abbe31
commit 3e1db818cf
4 changed files with 46 additions and 67 deletions

View File

@ -43,14 +43,14 @@ namespace skyline::soc::gm20b::engine {
if ((srcDimensions.width != dstDimensions.width) || (srcDimensions.height != dstDimensions.height)) if ((srcDimensions.width != dstDimensions.width) || (srcDimensions.height != dstDimensions.height))
gpu::texture::CopyLinearToBlockLinearSubrect(srcDimensions, dstDimensions, gpu::texture::CopyLinearToBlockLinearSubrect(srcDimensions, dstDimensions,
1, 1, 1, 1, 1, 1,
1 << static_cast<u8>(state.dstBlockSize.height), 1 << static_cast<u8>(state.dstBlockSize.depth), state.dstBlockSize.Height(), state.dstBlockSize.Depth(),
span{buffer}.cast<u8>().data(), dst, span{buffer}.cast<u8>().data(), dst,
state.originBytesX, state.originSamplesY state.originBytesX, state.originSamplesY
); );
else else
gpu::texture::CopyLinearToBlockLinear(dstDimensions, gpu::texture::CopyLinearToBlockLinear(dstDimensions,
1, 1, 1, 1, 1, 1,
1 << static_cast<u8>(state.dstBlockSize.height), 1 << static_cast<u8>(state.dstBlockSize.depth), state.dstBlockSize.Height(), state.dstBlockSize.Depth(),
span{buffer}.cast<u8>().data(), dst span{buffer}.cast<u8>().data(), dst
); );
}}; }};

View File

@ -28,28 +28,6 @@ namespace skyline::soc::gm20b::engine {
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def
*/ */
struct RegisterState { struct RegisterState {
enum class BlockWidth : u8 {
OneGob = 0
};
enum class BlockHeight : u8 {
OneGob = 0,
TwoGobs = 1,
FourGobs = 2,
EightGobs = 3,
SixteenGobs = 4,
ThirtyTwoGobs = 5
};
enum class BlockDepth : u8 {
OneGob = 0,
TwoGobs = 1,
FourGobs = 2,
EightGobs = 3,
SixteenGobs = 4,
ThirtyTwoGobs = 5
};
enum class DmaDstMemoryLayout : u8 { enum class DmaDstMemoryLayout : u8 {
BlockLinear = 0, BlockLinear = 0,
Pitch = 1 Pitch = 1
@ -92,10 +70,17 @@ namespace skyline::soc::gm20b::engine {
Address offsetOut; Address offsetOut;
u32 pitchOut; u32 pitchOut;
struct { struct {
BlockWidth width : 4; u32 width : 4;
BlockHeight height : 4; u32 height : 4;
BlockDepth depth : 4; u32 depth : 4;
u32 _pad1_ : 20; u32 _pad1_ : 20;
size_t Height() const {
return 1 << height;
}
size_t Depth() const {
return 1 << depth;
}
} dstBlockSize; } dstBlockSize;
u32 dstWidth; u32 dstWidth;
u32 dstHeight; u32 dstHeight;

View File

@ -15,7 +15,7 @@ namespace skyline::soc::gm20b::engine {
: channelCtx{channelCtx}, : channelCtx{channelCtx},
syncpoints{state.soc->host1x.syncpoints}, syncpoints{state.soc->host1x.syncpoints},
interconnect{*state.gpu, channelCtx}, interconnect{*state.gpu, channelCtx},
copyCache(0) {} copyCache() {}
__attribute__((always_inline)) void MaxwellDma::CallMethod(u32 method, u32 argument) { __attribute__((always_inline)) void MaxwellDma::CallMethod(u32 method, u32 argument) {
Logger::Verbose("Called method in Maxwell DMA: 0x{:X} args: 0x{:X}", method, argument); Logger::Verbose("Called method in Maxwell DMA: 0x{:X} args: 0x{:X}", method, argument);
@ -48,31 +48,10 @@ namespace skyline::soc::gm20b::engine {
if (registers.launchDma->srcMemoryLayout == registers.launchDma->dstMemoryLayout) [[unlikely]] { if (registers.launchDma->srcMemoryLayout == registers.launchDma->dstMemoryLayout) [[unlikely]] {
// Pitch to Pitch copy // Pitch to Pitch copy
if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch) [[likely]] { if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch) [[likely]] {
auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, *registers.pitchIn * *registers.lineCount)}; CopyPitchToPitch();
auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, *registers.pitchOut * *registers.lineCount)};
if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] {
HandleCopy(srcMappings, dstMappings, *registers.lineLengthIn, *registers.lineLengthIn, [&](u8 *src, u8 *dst) {
// Both Linear, copy as is.
if ((*registers.pitchIn == *registers.pitchOut) && (*registers.pitchIn == *registers.lineLengthIn))
std::memcpy(dst, src, *registers.lineLengthIn * *registers.lineCount);
else
for (u32 linesToCopy{*registers.lineCount}, srcCopyOffset{}, dstCopyOffset{}; linesToCopy; --linesToCopy, srcCopyOffset += *registers.pitchIn, dstCopyOffset += *registers.pitchOut)
std::memcpy(dst + dstCopyOffset, src + srcCopyOffset, *registers.lineLengthIn);
});
} else [[likely]] {
// Both Linear, copy as is.
if ((*registers.pitchIn == *registers.pitchOut) && (*registers.pitchIn == *registers.lineLengthIn))
interconnect.Copy(dstMappings.front(), srcMappings.front());
else
for (u32 linesToCopy{*registers.lineCount}, srcCopyOffset{}, dstCopyOffset{}; linesToCopy; --linesToCopy, srcCopyOffset += *registers.pitchIn, dstCopyOffset += *registers.pitchOut)
interconnect.Copy(dstMappings.front().subspan(dstCopyOffset, u64{*registers.lineLengthIn}), srcMappings.front().subspan(srcCopyOffset, u64{*registers.lineLengthIn}));
}
} else { } else {
Logger::Warn("BlockLinear to BlockLinear DMA copies are unimplemented!"); Logger::Warn("BlockLinear to BlockLinear DMA copies are unimplemented!");
} }
return;
} else if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) { } else if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) {
CopyBlockLinearToPitch(); CopyBlockLinearToPitch();
} else [[likely]] { } else [[likely]] {
@ -109,7 +88,7 @@ namespace skyline::soc::gm20b::engine {
} }
} }
void MaxwellDma::HandleCopy(TranslatedAddressRange srcMappings, TranslatedAddressRange dstMappings, size_t srcSize, size_t dstSize, auto copyCallback) { void MaxwellDma::HandleSplitCopy(TranslatedAddressRange srcMappings, TranslatedAddressRange dstMappings, size_t srcSize, size_t dstSize, auto copyCallback) {
bool isSrcSplit{}; bool isSrcSplit{};
u8 *src{srcMappings.front().data()}, *dst{dstMappings.front().data()}; u8 *src{srcMappings.front().data()}, *dst{dstMappings.front().data()};
if (srcMappings.size() != 1) { if (srcMappings.size() != 1) {
@ -122,18 +101,12 @@ namespace skyline::soc::gm20b::engine {
isSrcSplit = true; isSrcSplit = true;
} }
if (dstMappings.size() != 1) { if (dstMappings.size() != 1) {
// If both the source and destination are split size_t offset{isSrcSplit ? srcSize : 0};
if (isSrcSplit) {
if (copyCache.size() < (srcSize + dstSize))
copyCache.resize(srcSize + dstSize);
dst = copyCache.data() + srcSize; if (copyCache.size() < (dstSize + offset))
} else {
if (copyCache.size() < dstSize)
copyCache.resize(dstSize); copyCache.resize(dstSize);
dst = copyCache.data(); dst = copyCache.data() + offset;
}
// If the destination is not entirely filled by the copy we copy it's current state in the cache to prevent clearing of other data. // If the destination is not entirely filled by the copy we copy it's current state in the cache to prevent clearing of other data.
if (registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) if (registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear)
@ -144,10 +117,29 @@ namespace skyline::soc::gm20b::engine {
if (dstMappings.size() != 1) if (dstMappings.size() != 1)
channelCtx.asCtx->gmmu.Write(u64{*registers.offsetOut}, dst, dstSize); channelCtx.asCtx->gmmu.Write(u64{*registers.offsetOut}, dst, dstSize);
}
// If the cache is over 5 MBs large then we clamp it to not waste memory void MaxwellDma::CopyPitchToPitch() {
if (copyCache.size() > 5242880) [[unlikely]] auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, *registers.pitchIn * *registers.lineCount)};
copyCache.resize(5242880); auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, *registers.pitchOut * *registers.lineCount)};
if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] {
HandleSplitCopy(srcMappings, dstMappings, *registers.lineLengthIn, *registers.lineLengthIn, [&](u8 *src, u8 *dst) {
// Both Linear, copy as is.
if ((*registers.pitchIn == *registers.pitchOut) && (*registers.pitchIn == *registers.lineLengthIn))
std::memcpy(dst, src, *registers.lineLengthIn * *registers.lineCount);
else
for (size_t linesToCopy{*registers.lineCount}, srcCopyOffset{}, dstCopyOffset{}; linesToCopy; --linesToCopy, srcCopyOffset += *registers.pitchIn, dstCopyOffset += *registers.pitchOut)
std::memcpy(dst + dstCopyOffset, src + srcCopyOffset, *registers.lineLengthIn);
});
} else [[likely]] {
// Both Linear, copy as is.
if ((*registers.pitchIn == *registers.pitchOut) && (*registers.pitchIn == *registers.lineLengthIn))
interconnect.Copy(dstMappings.front(), srcMappings.front());
else
for (size_t linesToCopy{*registers.lineCount}, srcCopyOffset{}, dstCopyOffset{}; linesToCopy; --linesToCopy, srcCopyOffset += *registers.pitchIn, dstCopyOffset += *registers.pitchOut)
interconnect.Copy(dstMappings.front().subspan(dstCopyOffset, u64{*registers.lineLengthIn}), srcMappings.front().subspan(srcCopyOffset, u64{*registers.lineLengthIn}));
}
} }
void MaxwellDma::CopyBlockLinearToPitch() { void MaxwellDma::CopyBlockLinearToPitch() {
@ -192,7 +184,7 @@ namespace skyline::soc::gm20b::engine {
Logger::Debug("{}x{}x{}@0x{:X} -> {}x{}x{}@0x{:X}", srcDimensions.width, srcDimensions.height, srcDimensions.depth, srcLayerAddress, dstDimensions.width, dstDimensions.height, dstDimensions.depth, u64{*registers.offsetOut}); Logger::Debug("{}x{}x{}@0x{:X} -> {}x{}x{}@0x{:X}", srcDimensions.width, srcDimensions.height, srcDimensions.depth, srcLayerAddress, dstDimensions.width, dstDimensions.height, dstDimensions.depth, u64{*registers.offsetOut});
if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]]
HandleCopy(srcMappings, dstMappings, srcLayerStride, dstSize, copyFunc); HandleSplitCopy(srcMappings, dstMappings, srcLayerStride, dstSize, copyFunc);
else [[likely]] else [[likely]]
copyFunc(srcMappings.front().data(), dstMappings.front().data()); copyFunc(srcMappings.front().data(), dstMappings.front().data());
} }
@ -239,7 +231,7 @@ namespace skyline::soc::gm20b::engine {
}}; }};
if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]]
HandleCopy(srcMappings, dstMappings, srcSize, dstLayerStride, copyFunc); HandleSplitCopy(srcMappings, dstMappings, srcSize, dstLayerStride, copyFunc);
else [[likely]] else [[likely]]
copyFunc(srcMappings.front().data(), dstMappings.front().data()); copyFunc(srcMappings.front().data(), dstMappings.front().data());
} }

View File

@ -29,7 +29,9 @@ namespace skyline::soc::gm20b::engine {
void DmaCopy(); void DmaCopy();
void HandleCopy(TranslatedAddressRange srcMappings, TranslatedAddressRange dstMappings, size_t srcSize, size_t dstSize, auto copyCallback); void HandleSplitCopy(TranslatedAddressRange srcMappings, TranslatedAddressRange dstMappings, size_t srcSize, size_t dstSize, auto copyCallback);
void CopyPitchToPitch();
void CopyBlockLinearToPitch(); void CopyBlockLinearToPitch();