From 3e4e8de1d243731ff31daa6a26c0c0dbcb9232e0 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sat, 9 Apr 2022 17:56:06 +0100 Subject: [PATCH] Implement primitive Linear->Block Linear DMA engine copies Slightly inaccurate and misses some features but good enough for most games, should be revisted later. --- app/src/main/cpp/skyline/gpu/texture/format.h | 18 +++++ app/src/main/cpp/skyline/gpu/texture/layout.h | 14 ++-- .../skyline/soc/gm20b/engines/maxwell_dma.cpp | 74 ++++++++++++++++++- .../skyline/soc/gm20b/engines/maxwell_dma.h | 33 ++++++++- 4 files changed, 126 insertions(+), 13 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/texture/format.h b/app/src/main/cpp/skyline/gpu/texture/format.h index 47fa71cf..0f727660 100644 --- a/app/src/main/cpp/skyline/gpu/texture/format.h +++ b/app/src/main/cpp/skyline/gpu/texture/format.h @@ -150,4 +150,22 @@ namespace skyline::gpu::format { #undef FORMAT_NORM_INT_FLOAT // @fmt:on + + inline const gpu::texture::FormatBase &GetFormatForBpp(u32 bytesPerPixel) { + switch (bytesPerPixel) { + case 1: + return R8Uint; + case 2: + return R8G8Uint; + case 4: + return R8G8B8A8Uint; + case 8: + return R16G16B16A16Uint; + case 16: + return R32G32B32A32Uint; + default: + Logger::Error("Couldn't convert bytes per pixel: {}", bytesPerPixel); + return R8Uint; + } + } } diff --git a/app/src/main/cpp/skyline/gpu/texture/layout.h b/app/src/main/cpp/skyline/gpu/texture/layout.h index 1d953642..7491cbca 100644 --- a/app/src/main/cpp/skyline/gpu/texture/layout.h +++ b/app/src/main/cpp/skyline/gpu/texture/layout.h @@ -12,7 +12,7 @@ namespace skyline::gpu::texture { constexpr u8 GobWidth{64}; // The width of a GOB in bytes constexpr u8 GobHeight{8}; // The height of a GOB in lines - size_t GetBlockLinearLayerSize(const GuestTexture &guest) { + inline size_t GetBlockLinearLayerSize(const GuestTexture &guest) { u32 blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs u32 robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines u32 surfaceHeightLines{util::DivideCeil(guest.dimensions.height, u32{guest.format->blockHeight})}; //!< The height of the surface in lines @@ -27,7 +27,7 @@ namespace skyline::gpu::texture { /** * @brief Copies pixel data between a linear and blocklinear texture */ - template + template void CopyBlockLinearInternal(const GuestTexture &guest, u8 *blockLinear, u8 *linear, CopyFunction copyFunction) { u32 blockHeight{guest.tileConfig.blockHeight}; u32 robHeight{GobHeight * blockHeight}; @@ -99,15 +99,15 @@ namespace skyline::gpu::texture { /** * @brief Copies the contents of a blocklinear guest texture to a linear output buffer */ - void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { + inline void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { CopyBlockLinearInternal(guest, guestInput, linearOutput, std::memcpy); } /** * @brief Copies the contents of a blocklinear guest texture to a linear output buffer */ - void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { - CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8* src, u8* dst, size_t size) { + inline void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { + CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8 *src, u8 *dst, size_t size) { std::memcpy(dst, src, size); }); } @@ -115,7 +115,7 @@ namespace skyline::gpu::texture { /** * @brief Copies the contents of a pitch-linear guest texture to a linear output buffer */ - void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { + inline void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data @@ -132,7 +132,7 @@ namespace skyline::gpu::texture { /** * @brief Copies the contents of a linear buffer to a pitch-linear guest texture */ - void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { + inline void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp index 1693c089..ce01e70e 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -1,6 +1,9 @@ // SPDX-License-Identifier: MPL-2.0 // Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Copyright © 2022 yuzu Emulator Project (https://github.com/yuzu-emu/yuzu/) +#include +#include #include #include #include @@ -27,9 +30,18 @@ namespace skyline::soc::gm20b::engine { if (*registers.lineLengthIn == 0) return; // Nothing to copy + if (registers.launchDma->remapEnable) { + Logger::Warn("DMA remapping is unimplemented!"); + return; + } + if (registers.launchDma->multiLineEnable) { - // 2D/3D copy - Logger::Warn("2D/3D DMA engine copies are unimplemented"); + if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch && + registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) + CopyPitchToBlockLinear(); + else + Logger::Warn("Unimplemented multi-line copy type: {} -> {}!", + static_cast(registers.launchDma->srcMemoryLayout), static_cast(registers.launchDma->dstMemoryLayout)); } else { // 1D buffer copy // TODO: implement swizzled 1D copies based on VMM 'kind' @@ -38,6 +50,64 @@ namespace skyline::soc::gm20b::engine { } } + void MaxwellDma::CopyPitchToBlockLinear() { + if (registers.dstSurface->blockSize.Depth() > 1 || registers.dstSurface->depth > 1) { + Logger::Warn("3D DMA engine copies are unimplemented!"); + return; + } + + if (registers.dstSurface->blockSize.Width() != 1) { + Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented!"); + return; + } + + u32 bytesPerPixel{static_cast(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())}; + if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchIn) { + Logger::Warn("Non-linear DMA source textures are not implemented!"); + return; + } + + if (registers.dstSurface->origin.x || registers.dstSurface->origin.y) { + Logger::Warn("Non-zero origin DMA copies are not implemented!"); + return; + } + + gpu::GuestTexture srcTexture{span{}, + gpu::texture::Dimensions{*registers.lineLengthIn, *registers.lineCount, 1}, + gpu::format::GetFormatForBpp(bytesPerPixel), + gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear }, + gpu::texture::TextureType::e2D}; + + if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerSize())}; mappings.size() == 1) { + srcTexture.mappings[0] = mappings[0]; + } else { + Logger::Warn("DMA for split textures is unimplemented!"); + return; + } + + if (*registers.lineLengthIn != registers.dstSurface->width) + Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width); + + // This represents a single layer view into a potentially multi-layer texture + gpu::GuestTexture dstTexture{span{}, + gpu::texture::Dimensions{*registers.lineLengthIn, registers.dstSurface->height, 1}, + gpu::format::GetFormatForBpp(bytesPerPixel), + gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 }, + gpu::texture::TextureType::e2D}; + + u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerSize() * registers.dstSurface->layer}; + if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerSize())}; mappings.size() == 1) { + dstTexture.mappings[0] = mappings[0]; + } else { + Logger::Warn("DMA for split textures is unimplemented!"); + return; + } + + Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcTexture.dimensions.width, srcTexture.dimensions.height, *registers.offsetIn, dstTexture.dimensions.width, dstTexture.dimensions.height, dstLayerAddress); + + gpu::texture::CopyLinearToBlockLinear(dstTexture, srcTexture.mappings.front().data(), dstTexture.mappings.front().data()); + } + void MaxwellDma::CallMethodBatchNonInc(u32 method, span arguments) { for (u32 argument : arguments) HandleMethod(method, argument); diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h index 0191df51..29d6f341 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h @@ -22,6 +22,8 @@ namespace skyline::soc::gm20b::engine { void LaunchDma(); + void CopyPitchToBlockLinear(); + public: /** * @url https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h @@ -187,19 +189,42 @@ namespace skyline::soc::gm20b::engine { u8 _pad5_ : 2; u8 numDstComponentsMinusOne : 2; u8 _pad6_ : 6; + + u8 ComponentSize() { + return componentSizeMinusOne + 1; + } + + u8 NumSrcComponents() { + return numSrcComponentsMinusOne + 1; + } + + u8 NumDstComponents() { + return numDstComponentsMinusOne + 1; + } }; static_assert(sizeof(RemapComponents) == 0xC); Register<0x1C2, RemapComponents> remapComponents; struct Surface { - // Nvidias docs here differ from other emus and deko3d so go with what they say struct { - u8 width : 4; - u8 height : 4; - u8 depth : 4; + u8 widthLog2 : 4; + u8 heightLog2 : 4; + u8 depthLog2 : 4; u8 gobHeight : 4; u16 _pad_; + + u8 Width() { + return static_cast(1 << widthLog2); + } + + u8 Height() { + return static_cast(1 << heightLog2); + } + + u8 Depth() { + return static_cast(1 << depthLog2); + } } blockSize; u32 width; u32 height;