diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 591188bc..deabc37c 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -185,6 +185,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp ${source_DIR}/skyline/soc/gm20b/engines/inline2memory.cpp ${source_DIR}/skyline/soc/gm20b/engines/kepler_compute.cpp + ${source_DIR}/skyline/soc/gm20b/engines/maxwell_dma.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp ${source_DIR}/skyline/input/npad.cpp ${source_DIR}/skyline/input/npad_device.cpp diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp index 29ab19c5..ca9b8301 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp @@ -9,6 +9,7 @@ namespace skyline::soc::gm20b { : asCtx(std::move(pAsCtx)), executor(state), maxwell3D(std::make_unique(state, *this, macroState, executor)), + maxwellDma(state, *this), keplerCompute(state, *this), inline2Memory(asCtx), gpfifo(state, *this, numEntries) {} diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.h b/app/src/main/cpp/skyline/soc/gm20b/channel.h index f0819d06..70029a16 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/channel.h +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.h @@ -6,6 +6,7 @@ #include #include "macro/macro_state.h" #include "engines/engine.h" +#include "engines/maxwell_dma.h" #include "engines/kepler_compute.h" #include "engines/inline2memory.h" #include "gpfifo.h" @@ -26,6 +27,7 @@ namespace skyline::soc::gm20b { gpu::interconnect::CommandExecutor executor; MacroState macroState; std::unique_ptr maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file + engine::MaxwellDma maxwellDma; engine::KeplerCompute keplerCompute; engine::Inline2Memory inline2Memory; ChannelGpfifo gpfifo; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp new file mode 100644 index 00000000..1693c089 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include +#include "maxwell_dma.h" + +namespace skyline::soc::gm20b::engine { + MaxwellDma::MaxwellDma(const DeviceState &state, ChannelContext &channelCtx) + : channelCtx(channelCtx), syncpoints(state.soc->host1x.syncpoints) {} + + __attribute__((always_inline)) void MaxwellDma::CallMethod(u32 method, u32 argument) { + Logger::Verbose("Called method in Maxwell DMA: 0x{:X} args: 0x{:X}", method, argument); + + HandleMethod(method, argument); + } + + void MaxwellDma::HandleMethod(u32 method, u32 argument) { + registers.raw[method] = argument; + + if (method == ENGINE_OFFSET(launchDma)) + LaunchDma(); + } + + void MaxwellDma::LaunchDma() { + if (*registers.lineLengthIn == 0) + return; // Nothing to copy + + if (registers.launchDma->multiLineEnable) { + // 2D/3D copy + Logger::Warn("2D/3D DMA engine copies are unimplemented"); + } else { + // 1D buffer copy + // TODO: implement swizzled 1D copies based on VMM 'kind' + Logger::Debug("src: 0x{:X} dst: 0x{:X} size: 0x{:X}", u64{*registers.offsetIn}, u64{*registers.offsetOut}, *registers.lineLengthIn); + channelCtx.asCtx->gmmu.Copy(*registers.offsetOut, *registers.offsetIn, *registers.lineLengthIn); + } + } + + void MaxwellDma::CallMethodBatchNonInc(u32 method, span arguments) { + for (u32 argument : arguments) + HandleMethod(method, argument); + } +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h new file mode 100644 index 00000000..0191df51 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include "engine.h" + +namespace skyline::soc::gm20b { + struct ChannelContext; +} + +namespace skyline::soc::gm20b::engine { + /** + * @brief The Maxwell DMA Engine is used to perform DMA buffer/texture copies directly on the GPU + */ + class MaxwellDma { + private: + host1x::SyncpointSet &syncpoints; + ChannelContext &channelCtx; + + void HandleMethod(u32 method, u32 argument); + + void LaunchDma(); + + public: + /** + * @url https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h + */ + #pragma pack(push, 1) + union Registers { + std::array raw; + + template + using Register = util::OffsetMember; + + Register<0x40, u32> nop; + Register<0x50, u32> pmTrigger; + + struct Semaphore { + Address address; + u32 payload; + }; + static_assert(sizeof(Semaphore) == 0xC); + + Register<0x90, Semaphore> semaphore; + + struct RenderEnable { + enum class Mode : u8 { + False = 0, + True = 1, + Conditional = 2, + RenderIfEqual = 3, + RenderIfNotEqual = 4 + }; + + Address address; + Mode mode : 3; + u32 _pad_ : 29; + }; + static_assert(sizeof(RenderEnable) == 0xC); + + Register<0x95, RenderEnable> renderEnable; + + struct PhysMode { + enum class Target : u8 { + LocalFb = 0, + CoherentSysmem = 1, + NoncoherentSysmem = 2 + }; + + Target target : 2; + u32 _pad_ : 30; + }; + + Register<0x98, PhysMode> srcPhysMode; + Register<0x99, PhysMode> dstPhysMode; + + struct LaunchDma { + enum class DataTransferType : u8 { + None = 0, + Pipelined = 1, + NonPipelined = 2 + }; + + enum class SemaphoreType : u8 { + None = 0, + ReleaseOneWordSemaphore = 1, + ReleaseFourWordSemaphore = 2 + }; + + enum class InterruptType : u8 { + None = 0, + Blocking = 1, + NonBlocking = 2 + }; + + enum class MemoryLayout : u8 { + BlockLinear = 0, + Pitch = 1 + }; + + enum class Type : u8 { + Virtual = 0, + Physical = 1 + }; + + enum class SemaphoreReduction : u8 { + IMin = 0, + IMax = 1, + IXor = 2, + IAnd = 3, + IOr = 4, + IAdd = 5, + Inc = 6, + Dec = 7, + FAdd = 10, + }; + + enum class SemaphoreReductionSign : u8 { + Signed = 0, + Unsigned = 1, + }; + + enum class BypassL2 : u8 { + UsePteSetting = 0, + ForceVolatile = 1, + }; + + DataTransferType dataTransferType : 2; + bool flushEnable : 1; + SemaphoreType semaphoreType : 2; + InterruptType interruptType : 2; + MemoryLayout srcMemoryLayout : 1; + MemoryLayout dstMemoryLayout : 1; + bool multiLineEnable : 1; + bool remapEnable : 1; + bool rmwDisable : 1; + Type srcType : 1; + Type dstType : 1; + SemaphoreReduction semaphoreReduction : 4; + SemaphoreReductionSign semaphoreReductionSign : 1; + bool reductionEnable : 1; + BypassL2 bypassL2 : 1; + u16 _pad_ : 11; + }; + static_assert(sizeof(LaunchDma) == 4); + + Register<0xC0, LaunchDma> launchDma; + + Register<0x100, Address> offsetIn; + Register<0x102, Address> offsetOut; + + Register<0x104, u32> pitchIn; + Register<0x105, u32> pitchOut; + + Register<0x106, u32> lineLengthIn; + Register<0x107, u32> lineCount; + + Register<0x1C0, u32> remapConstA; + Register<0x1C1, u32> remapConstB; + + struct RemapComponents { + enum class Swizzle : u8 { + SrcX = 0, + SrcY = 1, + SrcZ = 2, + SrcW = 3, + ConstA = 4, + ConstB = 5, + NoWrite = 6 + }; + + Address address; + + Swizzle dstX : 3; + u8 _pad0_ : 1; + Swizzle dstY : 3; + u8 _pad1_ : 1; + Swizzle dstZ : 3; + u8 _pad2_ : 1; + Swizzle dstW : 3; + u8 _pad3_ : 1; + + u8 componentSizeMinusOne : 2; + u8 _pad4_ : 2; + u8 numSrcComponentsMinusOne : 2; + u8 _pad5_ : 2; + u8 numDstComponentsMinusOne : 2; + u8 _pad6_ : 6; + }; + static_assert(sizeof(RemapComponents) == 0xC); + + Register<0x1C2, RemapComponents> remapComponents; + + struct Surface { + // Nvidias docs here differ from other emus and deko3d so go with what they say + struct { + u8 width : 4; + u8 height : 4; + u8 depth : 4; + u8 gobHeight : 4; + u16 _pad_; + } blockSize; + u32 width; + u32 height; + u32 depth; + u32 layer; + + struct { + u16 x; + u16 y; + } origin; + }; + static_assert(sizeof(Surface) == 0x18); + + Register<0x1C3, Surface> dstSurface; + Register<0x1CA, Surface> srcSurface; + } registers{}; + static_assert(sizeof(Registers) == (EngineMethodsEnd * 0x4)); + #pragma pack(pop) + + MaxwellDma(const DeviceState &state, ChannelContext &channelCtx); + + void CallMethod(u32 method, u32 argument); + + void CallMethodBatchNonInc(u32 method, span arguments); + }; +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index ecd641d7..77695743 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -112,11 +112,14 @@ namespace skyline::soc::gm20b { case SubchannelId::ThreeD: channelCtx.maxwell3D->CallMethod(method, argument); break; + case SubchannelId::Compute: + channelCtx.keplerCompute.CallMethod(method, argument); + break; case SubchannelId::Inline2Mem: channelCtx.inline2Memory.CallMethod(method, argument); break; - case SubchannelId::Compute: - channelCtx.keplerCompute.CallMethod(method, argument); + case SubchannelId::Copy: + channelCtx.maxwellDma.CallMethod(method, argument); break; default: Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument); @@ -129,11 +132,14 @@ namespace skyline::soc::gm20b { case SubchannelId::ThreeD: channelCtx.maxwell3D->CallMethodBatchNonInc(method, arguments); break; + case SubchannelId::Compute: + channelCtx.keplerCompute.CallMethodBatchNonInc(method, arguments); + break; case SubchannelId::Inline2Mem: channelCtx.inline2Memory.CallMethodBatchNonInc(method, arguments); break; - case SubchannelId::Compute: - channelCtx.keplerCompute.CallMethodBatchNonInc(method, arguments); + case SubchannelId::Copy: + channelCtx.maxwellDma.CallMethodBatchNonInc(method, arguments); break; default: Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X} with batch args", method, subChannel);