From 8c73b62b2c91c28a7aa54894d1afb75ffc4e71b9 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 20 Mar 2022 18:00:34 +0000 Subject: [PATCH] Implement basic inline2memory engine support Not currently used by anything but will be used by both compute, 3D and its own engine in the future. Block linear copies are currently unsupported. --- app/CMakeLists.txt | 1 + .../cpp/skyline/soc/gm20b/engines/engine.h | 14 ++ .../soc/gm20b/engines/inline2memory.cpp | 92 ++++++++++ .../skyline/soc/gm20b/engines/inline2memory.h | 173 ++++++++++++++++++ .../skyline/soc/gm20b/engines/maxwell/types.h | 15 +- .../skyline/soc/gm20b/engines/maxwell_3d.h | 16 +- 6 files changed, 289 insertions(+), 22 deletions(-) create mode 100644 app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp create mode 100644 app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index b6379cd9..cc1dc638 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -183,6 +183,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/soc/gm20b/engines/engine.cpp ${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp + ${source_DIR}/skyline/soc/gm20b/engines/inline2memory.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp ${source_DIR}/skyline/input/npad.cpp ${source_DIR}/skyline/input/npad_device.cpp diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h index 188bb12c..4dfa110a 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h @@ -10,6 +10,20 @@ namespace skyline::soc::gm20b { #define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32)) namespace engine { + /** + * @brief A 40-bit GMMU virtual address with register-packing + * @note The registers pack the address with big-endian ordering (but with 32 bit words) + */ + struct Address { + u32 high; + u32 low; + + operator u64() { + return (static_cast(high) << 32) | low; + } + }; + static_assert(sizeof(Address) == sizeof(u64)); + constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines /** diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp new file mode 100644 index 00000000..a9713dc5 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include "inline2memory.h" + +namespace skyline::soc::gm20b::engine { + Inline2MemoryBackend::Inline2MemoryBackend(std::shared_ptr addressSpaceContext) : addressSpaceContext(std::move(addressSpaceContext)) {} + + void Inline2MemoryBackend::LaunchDma(Inline2MemoryBackend::RegisterState &state) { + writeOffset = 0; + size_t targetSizeWords{(state.lineCount * util::AlignUp(state.lineLengthIn, 4)) / 4}; + buffer.resize(targetSizeWords); + } + + void Inline2MemoryBackend::CompleteDma(Inline2MemoryBackend::RegisterState &state) { + if (state.launchDma.completion == RegisterState::DmaCompletionType::ReleaseSemaphore) + throw exception("Semaphore release on I2M completion is not supported!"); + + if (state.launchDma.layout == RegisterState::DmaDstMemoryLayout::Pitch && state.lineCount == 1) { + // TODO: we can do this with the buffer manager to avoid some overhead in the future + Logger::Debug("range: 0x{:X} -> 0x{:X}", u64{state.offsetOut}, u64{state.offsetOut} + buffer.size() * 0x4); + addressSpaceContext->gmmu.Write(state.offsetOut, buffer); + } else { + Logger::Warn("Non-linear I2M uploads are not supported!"); + } + } + + void Inline2MemoryBackend::LoadInlineData(RegisterState &state, u32 value) { + if (writeOffset >= buffer.size()) + throw exception("Inline data load overflow!"); + + buffer[writeOffset++] = value; + + if (writeOffset == buffer.size()) + CompleteDma(state); + } + + void Inline2MemoryBackend::LoadInlineData(Inline2MemoryBackend::RegisterState &state, span data) { + if (writeOffset + data.size() > buffer.size()) + throw exception("Inline data load overflow!"); + + span(buffer).subspan(writeOffset).copy_from(data); + writeOffset += data.size(); + + if (writeOffset == buffer.size()) + CompleteDma(state); + } + + Inline2Memory::Inline2Memory(std::shared_ptr addressSpaceContext) : backend(std::move(addressSpaceContext)) {} + + __attribute__((always_inline)) void Inline2Memory::CallMethod(u32 method, u32 argument) { + Logger::Verbose("Called method in I2M: 0x{:X} args: 0x{:X}", method, argument); + + HandleMethod(method, argument); + } + +#define INLINE2MEMORY_OFFSET(field) (sizeof(typeof(Registers::field)) - sizeof(std::remove_reference_t)) / sizeof(u32) +#define INLINE2MEMORY_STRUCT_OFFSET(field, member) INLINE2MEMORY_OFFSET(field) + U32_OFFSET(std::remove_reference_t, member) + + void Inline2Memory::HandleMethod(u32 method, u32 argument) { + registers.raw[method] = argument; + + switch (method) { + case INLINE2MEMORY_STRUCT_OFFSET(i2m, launchDma): + backend.LaunchDma(*registers.i2m); + return; + case INLINE2MEMORY_STRUCT_OFFSET(i2m, loadInlineData): + backend.LoadInlineData(*registers.i2m, argument); + return; + default: + return; + } + + } + + void Inline2Memory::CallMethodBatchNonInc(u32 method, span arguments) { + switch (method) { + case INLINE2MEMORY_STRUCT_OFFSET(i2m, loadInlineData): + backend.LoadInlineData(*registers.i2m, arguments); + return; + default: + break; + } + + for (u32 argument : arguments) + HandleMethod(method, argument); + } + +#undef INLINE2MEMORY_STRUCT_OFFSET +#undef INLINE2MEMORY_OFFSET +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h new file mode 100644 index 00000000..80e200e4 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include "engine.h" + +namespace skyline::soc::gm20b { + struct AddressSpaceContext; +} + +namespace skyline::soc::gm20b::engine { + /** + * @brief Implements the actual behaviour of the I2M engine, allowing it to be shared between other engines which also contain the I2M block (3D, compute) + */ + class Inline2MemoryBackend { + private: + std::vector buffer; //!< Temporary buffer to hold data being currently uploaded + u32 writeOffset{}; //!< Current write offset in words into `buffer` + std::shared_ptr addressSpaceContext; + + public: + /** + * @brief The I2M register state that can be included as part of an engines register state + * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def + */ + struct RegisterState { + enum class BlockWidth : u8 { + OneGob = 0 + }; + + enum class BlockHeight : u8 { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtyTwoGobs = 5 + }; + + enum class BlockDepth : u8 { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtyTwoGobs = 5 + }; + + enum class DmaDstMemoryLayout : u8 { + BlockLinear = 0, + Pitch = 1 + }; + + enum class DmaReductionFormat : u8 { + Unsigned32 = 0, + Signed32 = 1 + }; + + enum class DmaCompletionType : u8 { + FlushDisable = 0, + FlushOnly = 1, + ReleaseSemaphore = 2 + }; + + enum class DmaInterruptType : u8 { + None = 0, + Interrupt = 1 + }; + + enum class DmaSemaphoreStructSize : u8 { + FourWords = 0, + OneWord = 1 + }; + + enum class DmaReductionOp : u8 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7 + }; + + u32 lineLengthIn; + u32 lineCount; + Address offsetOut; + u32 pitchOut; + struct { + BlockWidth width : 4; + BlockHeight height : 4; + BlockDepth depth : 4; + u32 _pad1_ : 20; + } dstBlockSize; + u32 dstWidth; + u32 dstHeight; + u32 dstDepth; + u32 dstLayer; + u32 originBytesX; + u32 originSamplesY; + struct { + DmaDstMemoryLayout layout : 1; + bool reductionEnable : 1; + DmaReductionFormat format : 2; + DmaCompletionType completion : 2; + bool sysmemBarDisable : 1; + u8 _pad0_ : 1; + DmaInterruptType interrupt : 2; + u8 _pad1_ : 2; + DmaSemaphoreStructSize semaphore : 1; + DmaReductionOp reductionOp : 3; + } launchDma; + u32 loadInlineData; + }; + static_assert(sizeof(RegisterState) == (0xE * 0x4)); + + private: + /** + * @brief Ran after all the inline data has been pushed and handles writing that data into memory + */ + void CompleteDma(RegisterState &state); + + public: + Inline2MemoryBackend(std::shared_ptr addressSpaceContext); + + /** + * @brief Should be called when launchDma in `state` is written to + */ + void LaunchDma(RegisterState &state); + + /** + * @brief Should be called when loadInlineData in `state` is written to (non batch version) + */ + void LoadInlineData(RegisterState &state, u32 value); + + /** + * @brief Should be called when loadInlineData in `state` is written to (batch version) + */ + void LoadInlineData(RegisterState &state, span data); + }; + + /** + * @brief Implements the actual I2M engine block that is located on subchannel 2 and handles uploading data from a pushbuffer into GPU memory + */ + class Inline2Memory { + private: + Inline2MemoryBackend backend; + + void HandleMethod(u32 method, u32 argument); + + /** + * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def + */ + union Registers { + std::array raw; + + template + using Register = util::OffsetMember; + + Register<0x60, Inline2MemoryBackend::RegisterState> i2m; + } registers{}; + + public: + Inline2Memory(std::shared_ptr addressSpaceContext); + + void CallMethod(u32 method, u32 argument); + + void CallMethodBatchNonInc(u32 method, span arguments); + }; +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h index 93d9755a..b2745a73 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h @@ -5,24 +5,11 @@ #pragma once #include +#include namespace skyline::soc::gm20b::engine::maxwell3d::type { #pragma pack(push, 1) - /** - * @brief A 40-bit GMMU virtual address with register-packing - * @note The registers pack the address with big-endian ordering (but with 32 bit words) - */ - struct Address { - u32 high; - u32 low; - - operator u64() { - return (static_cast(high) << 32) | low; - } - }; - static_assert(sizeof(Address) == sizeof(u64)); - enum class MmeShadowRamControl : u32 { MethodTrack = 0, //!< Tracks all writes to registers in shadow RAM MethodTrackWithFilter = 1, //!< Tracks all writes to registers in shadow RAM with a filter diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index 7f3132cd..c371b384 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -108,7 +108,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x3EB, u32> rtSeparateFragData; - Register<0x3F8, type::Address> depthTargetAddress; + Register<0x3F8, Address> depthTargetAddress; Register<0x3FA, type::DepthRtFormat> depthTargetFormat; Register<0x3FB, type::RenderTargetTileMode> depthTargetTileMode; Register<0x3FC, u32> depthTargetLayerStride; @@ -193,7 +193,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x54F, type::MultisampleControl> multisampleControl; struct SamplerPool { - type::Address address; // 0x557 + Address address; // 0x557 u32 maximumIndex; // 0x559 }; Register<0x557, SamplerPool> samplerPool; @@ -202,7 +202,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x55C, u32> lineSmoothEnable; struct TexturePool { - type::Address address; // 0x55D + Address address; // 0x55D u32 maximumIndex; // 0x55F }; Register<0x55D, TexturePool> texturePool; @@ -220,7 +220,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x56F, float> depthBiasUnits; Register<0x581, type::PointCoordReplace> pointCoordReplace; - Register<0x582, type::Address> setProgramRegion; + Register<0x582, Address> setProgramRegion; Register<0x585, u32> vertexEndGl; //!< Method-only register with no real value, used after calling vertexBeginGl to invoke the draw Register<0x586, type::VertexBeginGl> vertexBeginGl; //!< Similar to glVertexBegin semantically, supplies a primitive topology for draws alongside instancing data @@ -256,7 +256,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x680, std::array> colorWriteMask; struct Semaphore { - type::Address address; // 0x6C0 + Address address; // 0x6C0 u32 payload; // 0x6C2 type::SemaphoreInfo info; // 0x6C3 }; @@ -270,7 +270,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 enable : 1; }; } config; - type::Address iova; + Address iova; u32 divisor; }; static_assert(sizeof(VertexBuffer) == sizeof(u32) * 4); @@ -288,7 +288,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { }; Register<0x780, std::array> independentBlend; - Register<0x7C0, std::array> vertexBufferLimits; //!< A per-VBO IOVA denoting the end of the vertex buffer + Register<0x7C0, std::array> vertexBufferLimits; //!< A per-VBO IOVA denoting the end of the vertex buffer Register<0x800, std::array> setProgram; @@ -296,7 +296,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { struct ConstantBufferSelector { u32 size; - type::Address address; + Address address; }; Register<0x8E0, ConstantBufferSelector> constantBufferSelector;