From 3c26921d543c712c89f4272a2ce6c27c8fa8ecf8 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sat, 9 Apr 2022 13:14:06 +0100 Subject: [PATCH] Implement the Maxwell DMA engine The DMA engine is used to perform DMA buffer/texture copies directly on the GPU. It can deswizzle arbritary regions of input textures, perform component remapping and swizzle into output textures. This impl only supports 1D buffer copies, 2D ones will come later. --- app/CMakeLists.txt | 1 + .../main/cpp/skyline/soc/gm20b/channel.cpp | 1 + app/src/main/cpp/skyline/soc/gm20b/channel.h | 2 + .../skyline/soc/gm20b/engines/maxwell_dma.cpp | 45 ++++ .../skyline/soc/gm20b/engines/maxwell_dma.h | 228 ++++++++++++++++++ app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp | 14 +- 6 files changed, 287 insertions(+), 4 deletions(-) create mode 100644 app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp create mode 100644 app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 591188bc..deabc37c 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -185,6 +185,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp ${source_DIR}/skyline/soc/gm20b/engines/inline2memory.cpp ${source_DIR}/skyline/soc/gm20b/engines/kepler_compute.cpp + ${source_DIR}/skyline/soc/gm20b/engines/maxwell_dma.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp ${source_DIR}/skyline/input/npad.cpp ${source_DIR}/skyline/input/npad_device.cpp diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp index 29ab19c5..ca9b8301 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp @@ -9,6 +9,7 @@ namespace skyline::soc::gm20b { : asCtx(std::move(pAsCtx)), executor(state), maxwell3D(std::make_unique(state, *this, macroState, executor)), + maxwellDma(state, *this), keplerCompute(state, *this), inline2Memory(asCtx), gpfifo(state, *this, numEntries) {} diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.h b/app/src/main/cpp/skyline/soc/gm20b/channel.h index f0819d06..70029a16 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/channel.h +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.h @@ -6,6 +6,7 @@ #include #include "macro/macro_state.h" #include "engines/engine.h" +#include "engines/maxwell_dma.h" #include "engines/kepler_compute.h" #include "engines/inline2memory.h" #include "gpfifo.h" @@ -26,6 +27,7 @@ namespace skyline::soc::gm20b { gpu::interconnect::CommandExecutor executor; MacroState macroState; std::unique_ptr maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file + engine::MaxwellDma maxwellDma; engine::KeplerCompute keplerCompute; engine::Inline2Memory inline2Memory; ChannelGpfifo gpfifo; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp new file mode 100644 index 00000000..1693c089 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include +#include "maxwell_dma.h" + +namespace skyline::soc::gm20b::engine { + MaxwellDma::MaxwellDma(const DeviceState &state, ChannelContext &channelCtx) + : channelCtx(channelCtx), syncpoints(state.soc->host1x.syncpoints) {} + + __attribute__((always_inline)) void MaxwellDma::CallMethod(u32 method, u32 argument) { + Logger::Verbose("Called method in Maxwell DMA: 0x{:X} args: 0x{:X}", method, argument); + + HandleMethod(method, argument); + } + + void MaxwellDma::HandleMethod(u32 method, u32 argument) { + registers.raw[method] = argument; + + if (method == ENGINE_OFFSET(launchDma)) + LaunchDma(); + } + + void MaxwellDma::LaunchDma() { + if (*registers.lineLengthIn == 0) + return; // Nothing to copy + + if (registers.launchDma->multiLineEnable) { + // 2D/3D copy + Logger::Warn("2D/3D DMA engine copies are unimplemented"); + } else { + // 1D buffer copy + // TODO: implement swizzled 1D copies based on VMM 'kind' + Logger::Debug("src: 0x{:X} dst: 0x{:X} size: 0x{:X}", u64{*registers.offsetIn}, u64{*registers.offsetOut}, *registers.lineLengthIn); + channelCtx.asCtx->gmmu.Copy(*registers.offsetOut, *registers.offsetIn, *registers.lineLengthIn); + } + } + + void MaxwellDma::CallMethodBatchNonInc(u32 method, span arguments) { + for (u32 argument : arguments) + HandleMethod(method, argument); + } +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h new file mode 100644 index 00000000..0191df51 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include "engine.h" + +namespace skyline::soc::gm20b { + struct ChannelContext; +} + +namespace skyline::soc::gm20b::engine { + /** + * @brief The Maxwell DMA Engine is used to perform DMA buffer/texture copies directly on the GPU + */ + class MaxwellDma { + private: + host1x::SyncpointSet &syncpoints; + ChannelContext &channelCtx; + + void HandleMethod(u32 method, u32 argument); + + void LaunchDma(); + + public: + /** + * @url https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h + */ + #pragma pack(push, 1) + union Registers { + std::array raw; + + template + using Register = util::OffsetMember; + + Register<0x40, u32> nop; + Register<0x50, u32> pmTrigger; + + struct Semaphore { + Address address; + u32 payload; + }; + static_assert(sizeof(Semaphore) == 0xC); + + Register<0x90, Semaphore> semaphore; + + struct RenderEnable { + enum class Mode : u8 { + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4 + }; + + Address address; + Mode mode : 3; + u32 _pad_ : 29; + }; + static_assert(sizeof(RenderEnable) == 0xC); + + Register<0x95, RenderEnable> renderEnable; + + struct PhysMode { + enum class Target : u8 { + LocalFb = 0, + CoherentSysmem = 1, + NoncoherentSysmem = 2 + }; + + Target target : 2; + u32 _pad_ : 30; + }; + + Register<0x98, PhysMode> srcPhysMode; + Register<0x99, PhysMode> dstPhysMode; + + struct LaunchDma { + enum class DataTransferType : u8 { + None = 0, + Pipelined = 1, + NonPipelined = 2 + }; + + enum class SemaphoreType : u8 { + None = 0, + ReleaseOneWordSemaphore = 1, + ReleaseFourWordSemaphore = 2 + }; + + enum class InterruptType : u8 { + None = 0, + Blocking = 1, + NonBlocking = 2 + }; + + enum class MemoryLayout : u8 { + BlockLinear = 0, + Pitch = 1 + }; + + enum class Type : u8 { + Virtual = 0, + Physical = 1 + }; + + enum class SemaphoreReduction : u8 { + IMin = 0, + IMax = 1, + IXor = 2, + IAnd = 3, + IOr = 4, + IAdd = 5, + Inc = 6, + Dec = 7, + FAdd = 10, + }; + + enum class SemaphoreReductionSign : u8 { + Signed = 0, + Unsigned = 1, + }; + + enum class BypassL2 : u8 { + UsePteSetting = 0, + ForceVolatile = 1, + }; + + DataTransferType dataTransferType : 2; + bool flushEnable : 1; + SemaphoreType semaphoreType : 2; + InterruptType interruptType : 2; + MemoryLayout srcMemoryLayout : 1; + MemoryLayout dstMemoryLayout : 1; + bool multiLineEnable : 1; + bool remapEnable : 1; + bool rmwDisable : 1; + Type srcType : 1; + Type dstType : 1; + SemaphoreReduction semaphoreReduction : 4; + SemaphoreReductionSign semaphoreReductionSign : 1; + bool reductionEnable : 1; + BypassL2 bypassL2 : 1; + u16 _pad_ : 11; + }; + static_assert(sizeof(LaunchDma) == 4); + + Register<0xC0, LaunchDma> launchDma; + + Register<0x100, Address> offsetIn; + Register<0x102, Address> offsetOut; + + Register<0x104, u32> pitchIn; + Register<0x105, u32> pitchOut; + + Register<0x106, u32> lineLengthIn; + Register<0x107, u32> lineCount; + + Register<0x1C0, u32> remapConstA; + Register<0x1C1, u32> remapConstB; + + struct RemapComponents { + enum class Swizzle : u8 { + SrcX = 0, + SrcY = 1, + SrcZ = 2, + SrcW = 3, + ConstA = 4, + ConstB = 5, + NoWrite = 6 + }; + + Address address; + + Swizzle dstX : 3; + u8 _pad0_ : 1; + Swizzle dstY : 3; + u8 _pad1_ : 1; + Swizzle dstZ : 3; + u8 _pad2_ : 1; + Swizzle dstW : 3; + u8 _pad3_ : 1; + + u8 componentSizeMinusOne : 2; + u8 _pad4_ : 2; + u8 numSrcComponentsMinusOne : 2; + u8 _pad5_ : 2; + u8 numDstComponentsMinusOne : 2; + u8 _pad6_ : 6; + }; + static_assert(sizeof(RemapComponents) == 0xC); + + Register<0x1C2, RemapComponents> remapComponents; + + struct Surface { + // Nvidias docs here differ from other emus and deko3d so go with what they say + struct { + u8 width : 4; + u8 height : 4; + u8 depth : 4; + u8 gobHeight : 4; + u16 _pad_; + } blockSize; + u32 width; + u32 height; + u32 depth; + u32 layer; + + struct { + u16 x; + u16 y; + } origin; + }; + static_assert(sizeof(Surface) == 0x18); + + Register<0x1C3, Surface> dstSurface; + Register<0x1CA, Surface> srcSurface; + } registers{}; + static_assert(sizeof(Registers) == (EngineMethodsEnd * 0x4)); + #pragma pack(pop) + + MaxwellDma(const DeviceState &state, ChannelContext &channelCtx); + + void CallMethod(u32 method, u32 argument); + + void CallMethodBatchNonInc(u32 method, span arguments); + }; +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index ecd641d7..77695743 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -112,11 +112,14 @@ namespace skyline::soc::gm20b { case SubchannelId::ThreeD: channelCtx.maxwell3D->CallMethod(method, argument); break; + case SubchannelId::Compute: + channelCtx.keplerCompute.CallMethod(method, argument); + break; case SubchannelId::Inline2Mem: channelCtx.inline2Memory.CallMethod(method, argument); break; - case SubchannelId::Compute: - channelCtx.keplerCompute.CallMethod(method, argument); + case SubchannelId::Copy: + channelCtx.maxwellDma.CallMethod(method, argument); break; default: Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument); @@ -129,11 +132,14 @@ namespace skyline::soc::gm20b { case SubchannelId::ThreeD: channelCtx.maxwell3D->CallMethodBatchNonInc(method, arguments); break; + case SubchannelId::Compute: + channelCtx.keplerCompute.CallMethodBatchNonInc(method, arguments); + break; case SubchannelId::Inline2Mem: channelCtx.inline2Memory.CallMethodBatchNonInc(method, arguments); break; - case SubchannelId::Compute: - channelCtx.keplerCompute.CallMethodBatchNonInc(method, arguments); + case SubchannelId::Copy: + channelCtx.maxwellDma.CallMethodBatchNonInc(method, arguments); break; default: Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X} with batch args", method, subChannel);