diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index b6379cd9..cc1dc638 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -183,6 +183,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/soc/gm20b/engines/engine.cpp ${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp + ${source_DIR}/skyline/soc/gm20b/engines/inline2memory.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp ${source_DIR}/skyline/input/npad.cpp ${source_DIR}/skyline/input/npad_device.cpp diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h index 188bb12c..4dfa110a 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h @@ -10,6 +10,20 @@ namespace skyline::soc::gm20b { #define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32)) namespace engine { + /** + * @brief A 40-bit GMMU virtual address with register-packing + * @note The registers pack the address with big-endian ordering (but with 32 bit words) + */ + struct Address { + u32 high; + u32 low; + + operator u64() { + return (static_cast(high) << 32) | low; + } + }; + static_assert(sizeof(Address) == sizeof(u64)); + constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines /** diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp new file mode 100644 index 00000000..a9713dc5 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.cpp @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include "inline2memory.h" + +namespace skyline::soc::gm20b::engine { + Inline2MemoryBackend::Inline2MemoryBackend(std::shared_ptr addressSpaceContext) : addressSpaceContext(std::move(addressSpaceContext)) {} + + void Inline2MemoryBackend::LaunchDma(Inline2MemoryBackend::RegisterState &state) { + writeOffset = 0; + size_t targetSizeWords{(state.lineCount * util::AlignUp(state.lineLengthIn, 4)) / 4}; + buffer.resize(targetSizeWords); + } + + void Inline2MemoryBackend::CompleteDma(Inline2MemoryBackend::RegisterState &state) { + if (state.launchDma.completion == RegisterState::DmaCompletionType::ReleaseSemaphore) + throw exception("Semaphore release on I2M completion is not supported!"); + + if (state.launchDma.layout == RegisterState::DmaDstMemoryLayout::Pitch && state.lineCount == 1) { + // TODO: we can do this with the buffer manager to avoid some overhead in the future + Logger::Debug("range: 0x{:X} -> 0x{:X}", u64{state.offsetOut}, u64{state.offsetOut} + buffer.size() * 0x4); + addressSpaceContext->gmmu.Write(state.offsetOut, buffer); + } else { + Logger::Warn("Non-linear I2M uploads are not supported!"); + } + } + + void Inline2MemoryBackend::LoadInlineData(RegisterState &state, u32 value) { + if (writeOffset >= buffer.size()) + throw exception("Inline data load overflow!"); + + buffer[writeOffset++] = value; + + if (writeOffset == buffer.size()) + CompleteDma(state); + } + + void Inline2MemoryBackend::LoadInlineData(Inline2MemoryBackend::RegisterState &state, span data) { + if (writeOffset + data.size() > buffer.size()) + throw exception("Inline data load overflow!"); + + span(buffer).subspan(writeOffset).copy_from(data); + writeOffset += data.size(); + + if (writeOffset == buffer.size()) + CompleteDma(state); + } + + Inline2Memory::Inline2Memory(std::shared_ptr addressSpaceContext) : backend(std::move(addressSpaceContext)) {} + + __attribute__((always_inline)) void Inline2Memory::CallMethod(u32 method, u32 argument) { + Logger::Verbose("Called method in I2M: 0x{:X} args: 0x{:X}", method, argument); + + HandleMethod(method, argument); + } + +#define INLINE2MEMORY_OFFSET(field) (sizeof(typeof(Registers::field)) - sizeof(std::remove_reference_t)) / sizeof(u32) +#define INLINE2MEMORY_STRUCT_OFFSET(field, member) INLINE2MEMORY_OFFSET(field) + U32_OFFSET(std::remove_reference_t, member) + + void Inline2Memory::HandleMethod(u32 method, u32 argument) { + registers.raw[method] = argument; + + switch (method) { + case INLINE2MEMORY_STRUCT_OFFSET(i2m, launchDma): + backend.LaunchDma(*registers.i2m); + return; + case INLINE2MEMORY_STRUCT_OFFSET(i2m, loadInlineData): + backend.LoadInlineData(*registers.i2m, argument); + return; + default: + return; + } + + } + + void Inline2Memory::CallMethodBatchNonInc(u32 method, span arguments) { + switch (method) { + case INLINE2MEMORY_STRUCT_OFFSET(i2m, loadInlineData): + backend.LoadInlineData(*registers.i2m, arguments); + return; + default: + break; + } + + for (u32 argument : arguments) + HandleMethod(method, argument); + } + +#undef INLINE2MEMORY_STRUCT_OFFSET +#undef INLINE2MEMORY_OFFSET +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h new file mode 100644 index 00000000..80e200e4 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/inline2memory.h @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include "engine.h" + +namespace skyline::soc::gm20b { + struct AddressSpaceContext; +} + +namespace skyline::soc::gm20b::engine { + /** + * @brief Implements the actual behaviour of the I2M engine, allowing it to be shared between other engines which also contain the I2M block (3D, compute) + */ + class Inline2MemoryBackend { + private: + std::vector buffer; //!< Temporary buffer to hold data being currently uploaded + u32 writeOffset{}; //!< Current write offset in words into `buffer` + std::shared_ptr addressSpaceContext; + + public: + /** + * @brief The I2M register state that can be included as part of an engines register state + * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def + */ + struct RegisterState { + enum class BlockWidth : u8 { + OneGob = 0 + }; + + enum class BlockHeight : u8 { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtyTwoGobs = 5 + }; + + enum class BlockDepth : u8 { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtyTwoGobs = 5 + }; + + enum class DmaDstMemoryLayout : u8 { + BlockLinear = 0, + Pitch = 1 + }; + + enum class DmaReductionFormat : u8 { + Unsigned32 = 0, + Signed32 = 1 + }; + + enum class DmaCompletionType : u8 { + FlushDisable = 0, + FlushOnly = 1, + ReleaseSemaphore = 2 + }; + + enum class DmaInterruptType : u8 { + None = 0, + Interrupt = 1 + }; + + enum class DmaSemaphoreStructSize : u8 { + FourWords = 0, + OneWord = 1 + }; + + enum class DmaReductionOp : u8 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7 + }; + + u32 lineLengthIn; + u32 lineCount; + Address offsetOut; + u32 pitchOut; + struct { + BlockWidth width : 4; + BlockHeight height : 4; + BlockDepth depth : 4; + u32 _pad1_ : 20; + } dstBlockSize; + u32 dstWidth; + u32 dstHeight; + u32 dstDepth; + u32 dstLayer; + u32 originBytesX; + u32 originSamplesY; + struct { + DmaDstMemoryLayout layout : 1; + bool reductionEnable : 1; + DmaReductionFormat format : 2; + DmaCompletionType completion : 2; + bool sysmemBarDisable : 1; + u8 _pad0_ : 1; + DmaInterruptType interrupt : 2; + u8 _pad1_ : 2; + DmaSemaphoreStructSize semaphore : 1; + DmaReductionOp reductionOp : 3; + } launchDma; + u32 loadInlineData; + }; + static_assert(sizeof(RegisterState) == (0xE * 0x4)); + + private: + /** + * @brief Ran after all the inline data has been pushed and handles writing that data into memory + */ + void CompleteDma(RegisterState &state); + + public: + Inline2MemoryBackend(std::shared_ptr addressSpaceContext); + + /** + * @brief Should be called when launchDma in `state` is written to + */ + void LaunchDma(RegisterState &state); + + /** + * @brief Should be called when loadInlineData in `state` is written to (non batch version) + */ + void LoadInlineData(RegisterState &state, u32 value); + + /** + * @brief Should be called when loadInlineData in `state` is written to (batch version) + */ + void LoadInlineData(RegisterState &state, span data); + }; + + /** + * @brief Implements the actual I2M engine block that is located on subchannel 2 and handles uploading data from a pushbuffer into GPU memory + */ + class Inline2Memory { + private: + Inline2MemoryBackend backend; + + void HandleMethod(u32 method, u32 argument); + + /** + * @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def + */ + union Registers { + std::array raw; + + template + using Register = util::OffsetMember; + + Register<0x60, Inline2MemoryBackend::RegisterState> i2m; + } registers{}; + + public: + Inline2Memory(std::shared_ptr addressSpaceContext); + + void CallMethod(u32 method, u32 argument); + + void CallMethodBatchNonInc(u32 method, span arguments); + }; +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h index 93d9755a..b2745a73 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h @@ -5,24 +5,11 @@ #pragma once #include +#include namespace skyline::soc::gm20b::engine::maxwell3d::type { #pragma pack(push, 1) - /** - * @brief A 40-bit GMMU virtual address with register-packing - * @note The registers pack the address with big-endian ordering (but with 32 bit words) - */ - struct Address { - u32 high; - u32 low; - - operator u64() { - return (static_cast(high) << 32) | low; - } - }; - static_assert(sizeof(Address) == sizeof(u64)); - enum class MmeShadowRamControl : u32 { MethodTrack = 0, //!< Tracks all writes to registers in shadow RAM MethodTrackWithFilter = 1, //!< Tracks all writes to registers in shadow RAM with a filter diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index 7f3132cd..c371b384 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -108,7 +108,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x3EB, u32> rtSeparateFragData; - Register<0x3F8, type::Address> depthTargetAddress; + Register<0x3F8, Address> depthTargetAddress; Register<0x3FA, type::DepthRtFormat> depthTargetFormat; Register<0x3FB, type::RenderTargetTileMode> depthTargetTileMode; Register<0x3FC, u32> depthTargetLayerStride; @@ -193,7 +193,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x54F, type::MultisampleControl> multisampleControl; struct SamplerPool { - type::Address address; // 0x557 + Address address; // 0x557 u32 maximumIndex; // 0x559 }; Register<0x557, SamplerPool> samplerPool; @@ -202,7 +202,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x55C, u32> lineSmoothEnable; struct TexturePool { - type::Address address; // 0x55D + Address address; // 0x55D u32 maximumIndex; // 0x55F }; Register<0x55D, TexturePool> texturePool; @@ -220,7 +220,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x56F, float> depthBiasUnits; Register<0x581, type::PointCoordReplace> pointCoordReplace; - Register<0x582, type::Address> setProgramRegion; + Register<0x582, Address> setProgramRegion; Register<0x585, u32> vertexEndGl; //!< Method-only register with no real value, used after calling vertexBeginGl to invoke the draw Register<0x586, type::VertexBeginGl> vertexBeginGl; //!< Similar to glVertexBegin semantically, supplies a primitive topology for draws alongside instancing data @@ -256,7 +256,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x680, std::array> colorWriteMask; struct Semaphore { - type::Address address; // 0x6C0 + Address address; // 0x6C0 u32 payload; // 0x6C2 type::SemaphoreInfo info; // 0x6C3 }; @@ -270,7 +270,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u32 enable : 1; }; } config; - type::Address iova; + Address iova; u32 divisor; }; static_assert(sizeof(VertexBuffer) == sizeof(u32) * 4); @@ -288,7 +288,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { }; Register<0x780, std::array> independentBlend; - Register<0x7C0, std::array> vertexBufferLimits; //!< A per-VBO IOVA denoting the end of the vertex buffer + Register<0x7C0, std::array> vertexBufferLimits; //!< A per-VBO IOVA denoting the end of the vertex buffer Register<0x800, std::array> setProgram; @@ -296,7 +296,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { struct ConstantBufferSelector { u32 size; - type::Address address; + Address address; }; Register<0x8E0, ConstantBufferSelector> constantBufferSelector;