Implement basic inline2memory engine support

Not currently used by anything but will be used by both compute, 3D and its own engine in the future. Block linear copies are currently unsupported.
This commit is contained in:
Billy Laws 2022-03-20 18:00:34 +00:00 committed by PixelyIon
parent 5c387f5c5a
commit 8c73b62b2c
6 changed files with 289 additions and 22 deletions

View File

@ -183,6 +183,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/soc/gm20b/engines/engine.cpp
${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
${source_DIR}/skyline/soc/gm20b/engines/inline2memory.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp
${source_DIR}/skyline/input/npad.cpp
${source_DIR}/skyline/input/npad_device.cpp

View File

@ -10,6 +10,20 @@ namespace skyline::soc::gm20b {
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
namespace engine {
/**
* @brief A 40-bit GMMU virtual address with register-packing
* @note The registers pack the address with big-endian ordering (but with 32 bit words)
*/
struct Address {
u32 high;
u32 low;
operator u64() {
return (static_cast<u64>(high) << 32) | low;
}
};
static_assert(sizeof(Address) == sizeof(u64));
constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines
/**

View File

@ -0,0 +1,92 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <soc/gm20b/gmmu.h>
#include "inline2memory.h"
namespace skyline::soc::gm20b::engine {
Inline2MemoryBackend::Inline2MemoryBackend(std::shared_ptr<AddressSpaceContext> addressSpaceContext) : addressSpaceContext(std::move(addressSpaceContext)) {}
void Inline2MemoryBackend::LaunchDma(Inline2MemoryBackend::RegisterState &state) {
writeOffset = 0;
size_t targetSizeWords{(state.lineCount * util::AlignUp(state.lineLengthIn, 4)) / 4};
buffer.resize(targetSizeWords);
}
void Inline2MemoryBackend::CompleteDma(Inline2MemoryBackend::RegisterState &state) {
if (state.launchDma.completion == RegisterState::DmaCompletionType::ReleaseSemaphore)
throw exception("Semaphore release on I2M completion is not supported!");
if (state.launchDma.layout == RegisterState::DmaDstMemoryLayout::Pitch && state.lineCount == 1) {
// TODO: we can do this with the buffer manager to avoid some overhead in the future
Logger::Debug("range: 0x{:X} -> 0x{:X}", u64{state.offsetOut}, u64{state.offsetOut} + buffer.size() * 0x4);
addressSpaceContext->gmmu.Write(state.offsetOut, buffer);
} else {
Logger::Warn("Non-linear I2M uploads are not supported!");
}
}
void Inline2MemoryBackend::LoadInlineData(RegisterState &state, u32 value) {
if (writeOffset >= buffer.size())
throw exception("Inline data load overflow!");
buffer[writeOffset++] = value;
if (writeOffset == buffer.size())
CompleteDma(state);
}
void Inline2MemoryBackend::LoadInlineData(Inline2MemoryBackend::RegisterState &state, span<u32> data) {
if (writeOffset + data.size() > buffer.size())
throw exception("Inline data load overflow!");
span(buffer).subspan(writeOffset).copy_from(data);
writeOffset += data.size();
if (writeOffset == buffer.size())
CompleteDma(state);
}
Inline2Memory::Inline2Memory(std::shared_ptr<AddressSpaceContext> addressSpaceContext) : backend(std::move(addressSpaceContext)) {}
__attribute__((always_inline)) void Inline2Memory::CallMethod(u32 method, u32 argument) {
Logger::Verbose("Called method in I2M: 0x{:X} args: 0x{:X}", method, argument);
HandleMethod(method, argument);
}
#define INLINE2MEMORY_OFFSET(field) (sizeof(typeof(Registers::field)) - sizeof(std::remove_reference_t<decltype(*Registers::field)>)) / sizeof(u32)
#define INLINE2MEMORY_STRUCT_OFFSET(field, member) INLINE2MEMORY_OFFSET(field) + U32_OFFSET(std::remove_reference_t<decltype(*Registers::field)>, member)
void Inline2Memory::HandleMethod(u32 method, u32 argument) {
registers.raw[method] = argument;
switch (method) {
case INLINE2MEMORY_STRUCT_OFFSET(i2m, launchDma):
backend.LaunchDma(*registers.i2m);
return;
case INLINE2MEMORY_STRUCT_OFFSET(i2m, loadInlineData):
backend.LoadInlineData(*registers.i2m, argument);
return;
default:
return;
}
}
void Inline2Memory::CallMethodBatchNonInc(u32 method, span<u32> arguments) {
switch (method) {
case INLINE2MEMORY_STRUCT_OFFSET(i2m, loadInlineData):
backend.LoadInlineData(*registers.i2m, arguments);
return;
default:
break;
}
for (u32 argument : arguments)
HandleMethod(method, argument);
}
#undef INLINE2MEMORY_STRUCT_OFFSET
#undef INLINE2MEMORY_OFFSET
}

View File

@ -0,0 +1,173 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
#include "engine.h"
namespace skyline::soc::gm20b {
struct AddressSpaceContext;
}
namespace skyline::soc::gm20b::engine {
/**
* @brief Implements the actual behaviour of the I2M engine, allowing it to be shared between other engines which also contain the I2M block (3D, compute)
*/
class Inline2MemoryBackend {
private:
std::vector<u32> buffer; //!< Temporary buffer to hold data being currently uploaded
u32 writeOffset{}; //!< Current write offset in words into `buffer`
std::shared_ptr<AddressSpaceContext> addressSpaceContext;
public:
/**
* @brief The I2M register state that can be included as part of an engines register state
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def
*/
struct RegisterState {
enum class BlockWidth : u8 {
OneGob = 0
};
enum class BlockHeight : u8 {
OneGob = 0,
TwoGobs = 1,
FourGobs = 2,
EightGobs = 3,
SixteenGobs = 4,
ThirtyTwoGobs = 5
};
enum class BlockDepth : u8 {
OneGob = 0,
TwoGobs = 1,
FourGobs = 2,
EightGobs = 3,
SixteenGobs = 4,
ThirtyTwoGobs = 5
};
enum class DmaDstMemoryLayout : u8 {
BlockLinear = 0,
Pitch = 1
};
enum class DmaReductionFormat : u8 {
Unsigned32 = 0,
Signed32 = 1
};
enum class DmaCompletionType : u8 {
FlushDisable = 0,
FlushOnly = 1,
ReleaseSemaphore = 2
};
enum class DmaInterruptType : u8 {
None = 0,
Interrupt = 1
};
enum class DmaSemaphoreStructSize : u8 {
FourWords = 0,
OneWord = 1
};
enum class DmaReductionOp : u8 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7
};
u32 lineLengthIn;
u32 lineCount;
Address offsetOut;
u32 pitchOut;
struct {
BlockWidth width : 4;
BlockHeight height : 4;
BlockDepth depth : 4;
u32 _pad1_ : 20;
} dstBlockSize;
u32 dstWidth;
u32 dstHeight;
u32 dstDepth;
u32 dstLayer;
u32 originBytesX;
u32 originSamplesY;
struct {
DmaDstMemoryLayout layout : 1;
bool reductionEnable : 1;
DmaReductionFormat format : 2;
DmaCompletionType completion : 2;
bool sysmemBarDisable : 1;
u8 _pad0_ : 1;
DmaInterruptType interrupt : 2;
u8 _pad1_ : 2;
DmaSemaphoreStructSize semaphore : 1;
DmaReductionOp reductionOp : 3;
} launchDma;
u32 loadInlineData;
};
static_assert(sizeof(RegisterState) == (0xE * 0x4));
private:
/**
* @brief Ran after all the inline data has been pushed and handles writing that data into memory
*/
void CompleteDma(RegisterState &state);
public:
Inline2MemoryBackend(std::shared_ptr<AddressSpaceContext> addressSpaceContext);
/**
* @brief Should be called when launchDma in `state` is written to
*/
void LaunchDma(RegisterState &state);
/**
* @brief Should be called when loadInlineData in `state` is written to (non batch version)
*/
void LoadInlineData(RegisterState &state, u32 value);
/**
* @brief Should be called when loadInlineData in `state` is written to (batch version)
*/
void LoadInlineData(RegisterState &state, span<u32> data);
};
/**
* @brief Implements the actual I2M engine block that is located on subchannel 2 and handles uploading data from a pushbuffer into GPU memory
*/
class Inline2Memory {
private:
Inline2MemoryBackend backend;
void HandleMethod(u32 method, u32 argument);
/**
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_inline.def
*/
union Registers {
std::array<u32, EngineMethodsEnd> raw;
template<size_t Offset, typename Type>
using Register = util::OffsetMember<Offset, Type, u32>;
Register<0x60, Inline2MemoryBackend::RegisterState> i2m;
} registers{};
public:
Inline2Memory(std::shared_ptr<AddressSpaceContext> addressSpaceContext);
void CallMethod(u32 method, u32 argument);
void CallMethodBatchNonInc(u32 method, span<u32> arguments);
};
}

View File

@ -5,24 +5,11 @@
#pragma once
#include <common.h>
#include <soc/gm20b/engines/engine.h>
namespace skyline::soc::gm20b::engine::maxwell3d::type {
#pragma pack(push, 1)
/**
* @brief A 40-bit GMMU virtual address with register-packing
* @note The registers pack the address with big-endian ordering (but with 32 bit words)
*/
struct Address {
u32 high;
u32 low;
operator u64() {
return (static_cast<u64>(high) << 32) | low;
}
};
static_assert(sizeof(Address) == sizeof(u64));
enum class MmeShadowRamControl : u32 {
MethodTrack = 0, //!< Tracks all writes to registers in shadow RAM
MethodTrackWithFilter = 1, //!< Tracks all writes to registers in shadow RAM with a filter

View File

@ -108,7 +108,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
Register<0x3EB, u32> rtSeparateFragData;
Register<0x3F8, type::Address> depthTargetAddress;
Register<0x3F8, Address> depthTargetAddress;
Register<0x3FA, type::DepthRtFormat> depthTargetFormat;
Register<0x3FB, type::RenderTargetTileMode> depthTargetTileMode;
Register<0x3FC, u32> depthTargetLayerStride;
@ -193,7 +193,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
Register<0x54F, type::MultisampleControl> multisampleControl;
struct SamplerPool {
type::Address address; // 0x557
Address address; // 0x557
u32 maximumIndex; // 0x559
};
Register<0x557, SamplerPool> samplerPool;
@ -202,7 +202,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
Register<0x55C, u32> lineSmoothEnable;
struct TexturePool {
type::Address address; // 0x55D
Address address; // 0x55D
u32 maximumIndex; // 0x55F
};
Register<0x55D, TexturePool> texturePool;
@ -220,7 +220,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
Register<0x56F, float> depthBiasUnits;
Register<0x581, type::PointCoordReplace> pointCoordReplace;
Register<0x582, type::Address> setProgramRegion;
Register<0x582, Address> setProgramRegion;
Register<0x585, u32> vertexEndGl; //!< Method-only register with no real value, used after calling vertexBeginGl to invoke the draw
Register<0x586, type::VertexBeginGl> vertexBeginGl; //!< Similar to glVertexBegin semantically, supplies a primitive topology for draws alongside instancing data
@ -256,7 +256,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
Register<0x680, std::array<type::ColorWriteMask, type::RenderTargetCount>> colorWriteMask;
struct Semaphore {
type::Address address; // 0x6C0
Address address; // 0x6C0
u32 payload; // 0x6C2
type::SemaphoreInfo info; // 0x6C3
};
@ -270,7 +270,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 enable : 1;
};
} config;
type::Address iova;
Address iova;
u32 divisor;
};
static_assert(sizeof(VertexBuffer) == sizeof(u32) * 4);
@ -288,7 +288,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
};
Register<0x780, std::array<IndependentBlend, type::RenderTargetCount>> independentBlend;
Register<0x7C0, std::array<type::Address, type::VertexBufferCount>> vertexBufferLimits; //!< A per-VBO IOVA denoting the end of the vertex buffer
Register<0x7C0, std::array<Address, type::VertexBufferCount>> vertexBufferLimits; //!< A per-VBO IOVA denoting the end of the vertex buffer
Register<0x800, std::array<type::SetProgramInfo, type::ShaderStageCount>> setProgram;
@ -296,7 +296,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
struct ConstantBufferSelector {
u32 size;
type::Address address;
Address address;
};
Register<0x8E0, ConstantBufferSelector> constantBufferSelector;