From 3e1db818cf4a19950412d0423d7f846f8d52bad8 Mon Sep 17 00:00:00 2001 From: TheASVigilante <65920585+TheASVigilante@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:10:34 +0100 Subject: [PATCH] Address review --- .../soc/gm20b/engines/inline2memory.cpp | 4 +- .../skyline/soc/gm20b/engines/inline2memory.h | 35 +++------- .../skyline/soc/gm20b/engines/maxwell_dma.cpp | 70 ++++++++----------- .../skyline/soc/gm20b/engines/maxwell_dma.h | 4 +- 4 files changed, 46 insertions(+), 67 deletions(-) diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp index 36f550c1..8b16de4d 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp @@ -43,14 +43,14 @@ namespace skyline::soc::gm20b::engine { if ((srcDimensions.width != dstDimensions.width) || (srcDimensions.height != dstDimensions.height)) gpu::texture::CopyLinearToBlockLinearSubrect(srcDimensions, dstDimensions, 1, 1, 1, - 1 << static_cast(state.dstBlockSize.height), 1 << static_cast(state.dstBlockSize.depth), + state.dstBlockSize.Height(), state.dstBlockSize.Depth(), span{buffer}.cast().data(), dst, state.originBytesX, state.originSamplesY ); else gpu::texture::CopyLinearToBlockLinear(dstDimensions, 1, 1, 1, - 1 << static_cast(state.dstBlockSize.height), 1 << static_cast(state.dstBlockSize.depth), + state.dstBlockSize.Height(), state.dstBlockSize.Depth(), span{buffer}.cast().data(), dst ); }}; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h index 827d37a3..3c5ea68b 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h @@ -28,28 +28,6 @@ namespace skyline::soc::gm20b::engine { * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def */ struct RegisterState { - enum class BlockWidth : u8 { - OneGob = 0 - }; - - enum class BlockHeight : u8 { - OneGob = 0, - TwoGobs = 1, - FourGobs = 2, - EightGobs = 3, - SixteenGobs = 4, - ThirtyTwoGobs = 5 - }; - - enum class BlockDepth : u8 { - OneGob = 0, - TwoGobs = 1, - FourGobs = 2, - EightGobs = 3, - SixteenGobs = 4, - ThirtyTwoGobs = 5 - }; - enum class DmaDstMemoryLayout : u8 { BlockLinear = 0, Pitch = 1 @@ -92,10 +70,17 @@ namespace skyline::soc::gm20b::engine { Address offsetOut; u32 pitchOut; struct { - BlockWidth width : 4; - BlockHeight height : 4; - BlockDepth depth : 4; + u32 width : 4; + u32 height : 4; + u32 depth : 4; u32 _pad1_ : 20; + + size_t Height() const { + return 1 << height; + } + size_t Depth() const { + return 1 << depth; + } } dstBlockSize; u32 dstWidth; u32 dstHeight; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp index 08452c01..974c5cee 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -15,7 +15,7 @@ namespace skyline::soc::gm20b::engine { : channelCtx{channelCtx}, syncpoints{state.soc->host1x.syncpoints}, interconnect{*state.gpu, channelCtx}, - copyCache(0) {} + copyCache() {} __attribute__((always_inline)) void MaxwellDma::CallMethod(u32 method, u32 argument) { Logger::Verbose("Called method in Maxwell DMA: 0x{:X} args: 0x{:X}", method, argument); @@ -48,31 +48,10 @@ namespace skyline::soc::gm20b::engine { if (registers.launchDma->srcMemoryLayout == registers.launchDma->dstMemoryLayout) [[unlikely]] { // Pitch to Pitch copy if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch) [[likely]] { - auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, *registers.pitchIn * *registers.lineCount)}; - auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, *registers.pitchOut * *registers.lineCount)}; - - if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] { - HandleCopy(srcMappings, dstMappings, *registers.lineLengthIn, *registers.lineLengthIn, [&](u8 *src, u8 *dst) { - // Both Linear, copy as is. - if ((*registers.pitchIn == *registers.pitchOut) && (*registers.pitchIn == *registers.lineLengthIn)) - std::memcpy(dst, src, *registers.lineLengthIn * *registers.lineCount); - else - for (u32 linesToCopy{*registers.lineCount}, srcCopyOffset{}, dstCopyOffset{}; linesToCopy; --linesToCopy, srcCopyOffset += *registers.pitchIn, dstCopyOffset += *registers.pitchOut) - std::memcpy(dst + dstCopyOffset, src + srcCopyOffset, *registers.lineLengthIn); - }); - } else [[likely]] { - // Both Linear, copy as is. - if ((*registers.pitchIn == *registers.pitchOut) && (*registers.pitchIn == *registers.lineLengthIn)) - interconnect.Copy(dstMappings.front(), srcMappings.front()); - else - for (u32 linesToCopy{*registers.lineCount}, srcCopyOffset{}, dstCopyOffset{}; linesToCopy; --linesToCopy, srcCopyOffset += *registers.pitchIn, dstCopyOffset += *registers.pitchOut) - interconnect.Copy(dstMappings.front().subspan(dstCopyOffset, u64{*registers.lineLengthIn}), srcMappings.front().subspan(srcCopyOffset, u64{*registers.lineLengthIn})); - } + CopyPitchToPitch(); } else { Logger::Warn("BlockLinear to BlockLinear DMA copies are unimplemented!"); } - - return; } else if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) { CopyBlockLinearToPitch(); } else [[likely]] { @@ -109,7 +88,7 @@ namespace skyline::soc::gm20b::engine { } } - void MaxwellDma::HandleCopy(TranslatedAddressRange srcMappings, TranslatedAddressRange dstMappings, size_t srcSize, size_t dstSize, auto copyCallback) { + void MaxwellDma::HandleSplitCopy(TranslatedAddressRange srcMappings, TranslatedAddressRange dstMappings, size_t srcSize, size_t dstSize, auto copyCallback) { bool isSrcSplit{}; u8 *src{srcMappings.front().data()}, *dst{dstMappings.front().data()}; if (srcMappings.size() != 1) { @@ -122,18 +101,12 @@ namespace skyline::soc::gm20b::engine { isSrcSplit = true; } if (dstMappings.size() != 1) { - // If both the source and destination are split - if (isSrcSplit) { - if (copyCache.size() < (srcSize + dstSize)) - copyCache.resize(srcSize + dstSize); + size_t offset{isSrcSplit ? srcSize : 0}; - dst = copyCache.data() + srcSize; - } else { - if (copyCache.size() < dstSize) - copyCache.resize(dstSize); + if (copyCache.size() < (dstSize + offset)) + copyCache.resize(dstSize); - dst = copyCache.data(); - } + dst = copyCache.data() + offset; // If the destination is not entirely filled by the copy we copy it's current state in the cache to prevent clearing of other data. if (registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) @@ -144,10 +117,29 @@ namespace skyline::soc::gm20b::engine { if (dstMappings.size() != 1) channelCtx.asCtx->gmmu.Write(u64{*registers.offsetOut}, dst, dstSize); + } - // If the cache is over 5 MBs large then we clamp it to not waste memory - if (copyCache.size() > 5242880) [[unlikely]] - copyCache.resize(5242880); + void MaxwellDma::CopyPitchToPitch() { + auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, *registers.pitchIn * *registers.lineCount)}; + auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, *registers.pitchOut * *registers.lineCount)}; + + if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] { + HandleSplitCopy(srcMappings, dstMappings, *registers.lineLengthIn, *registers.lineLengthIn, [&](u8 *src, u8 *dst) { + // Both Linear, copy as is. + if ((*registers.pitchIn == *registers.pitchOut) && (*registers.pitchIn == *registers.lineLengthIn)) + std::memcpy(dst, src, *registers.lineLengthIn * *registers.lineCount); + else + for (size_t linesToCopy{*registers.lineCount}, srcCopyOffset{}, dstCopyOffset{}; linesToCopy; --linesToCopy, srcCopyOffset += *registers.pitchIn, dstCopyOffset += *registers.pitchOut) + std::memcpy(dst + dstCopyOffset, src + srcCopyOffset, *registers.lineLengthIn); + }); + } else [[likely]] { + // Both Linear, copy as is. + if ((*registers.pitchIn == *registers.pitchOut) && (*registers.pitchIn == *registers.lineLengthIn)) + interconnect.Copy(dstMappings.front(), srcMappings.front()); + else + for (size_t linesToCopy{*registers.lineCount}, srcCopyOffset{}, dstCopyOffset{}; linesToCopy; --linesToCopy, srcCopyOffset += *registers.pitchIn, dstCopyOffset += *registers.pitchOut) + interconnect.Copy(dstMappings.front().subspan(dstCopyOffset, u64{*registers.lineLengthIn}), srcMappings.front().subspan(srcCopyOffset, u64{*registers.lineLengthIn})); + } } void MaxwellDma::CopyBlockLinearToPitch() { @@ -192,7 +184,7 @@ namespace skyline::soc::gm20b::engine { Logger::Debug("{}x{}x{}@0x{:X} -> {}x{}x{}@0x{:X}", srcDimensions.width, srcDimensions.height, srcDimensions.depth, srcLayerAddress, dstDimensions.width, dstDimensions.height, dstDimensions.depth, u64{*registers.offsetOut}); if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] - HandleCopy(srcMappings, dstMappings, srcLayerStride, dstSize, copyFunc); + HandleSplitCopy(srcMappings, dstMappings, srcLayerStride, dstSize, copyFunc); else [[likely]] copyFunc(srcMappings.front().data(), dstMappings.front().data()); } @@ -239,7 +231,7 @@ namespace skyline::soc::gm20b::engine { }}; if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] - HandleCopy(srcMappings, dstMappings, srcSize, dstLayerStride, copyFunc); + HandleSplitCopy(srcMappings, dstMappings, srcSize, dstLayerStride, copyFunc); else [[likely]] copyFunc(srcMappings.front().data(), dstMappings.front().data()); } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h index f3318148..91dfd5e6 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h @@ -29,7 +29,9 @@ namespace skyline::soc::gm20b::engine { void DmaCopy(); - void HandleCopy(TranslatedAddressRange srcMappings, TranslatedAddressRange dstMappings, size_t srcSize, size_t dstSize, auto copyCallback); + void HandleSplitCopy(TranslatedAddressRange srcMappings, TranslatedAddressRange dstMappings, size_t srcSize, size_t dstSize, auto copyCallback); + + void CopyPitchToPitch(); void CopyBlockLinearToPitch();