Rework GPFIFO method distribution and macros to support multiple engines

Fermi2D supports macros in addition to Maxwell3D, these both share code memory. To support this we rework the macro interpreter to support passing in a target engine and abstract the communications out into an interface that can be implemented by applicable engines.

```
GPFIFO <-> MME <-> Maxwell3D
    ^        ^---> Fermi2D
    X------------> I2M
    X------------> MaxwellComputeB
    X--Flush-----> MaxwellDMA
```
This commit is contained in:
Billy Laws 2022-01-19 20:45:51 +00:00 committed by PixelyIon
parent 8d5463ef28
commit 62db21fb78
12 changed files with 179 additions and 107 deletions

View File

@ -178,9 +178,10 @@ add_library(skyline SHARED
${source_DIR}/skyline/soc/gm20b/channel.cpp
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
${source_DIR}/skyline/soc/gm20b/gmmu.cpp
${source_DIR}/skyline/soc/gm20b/macro/macro_interpreter.cpp
${source_DIR}/skyline/soc/gm20b/engines/engine.cpp
${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp
${source_DIR}/skyline/input/npad.cpp
${source_DIR}/skyline/input/npad_device.cpp

View File

@ -6,7 +6,7 @@
namespace skyline::soc::gm20b {
ChannelContext::ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries) :
maxwell3D(std::make_unique<engine::maxwell3d::Maxwell3D>(state, *this, executor)),
maxwell3D(std::make_unique<engine::maxwell3d::Maxwell3D>(state, *this, macroState, executor)),
gpfifo(state, *this, numEntries),
executor(state),
asCtx(std::move(asCtx)){}

View File

@ -4,6 +4,7 @@
#pragma once
#include <gpu/interconnect/command_executor.h>
#include "macro/macro_state.h"
#include "engines/engine.h"
#include "gpfifo.h"
@ -21,6 +22,7 @@ namespace skyline::soc::gm20b {
struct ChannelContext {
std::shared_ptr<AddressSpaceContext> asCtx;
gpu::interconnect::CommandExecutor executor;
MacroState macroState;
std::unique_ptr<engine::maxwell3d::Maxwell3D> maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file
ChannelGpfifo gpfifo;

View File

@ -0,0 +1,30 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "engine.h"
namespace skyline::soc::gm20b::engine {
MacroEngineBase::MacroEngineBase(MacroState &macroState) : macroState(macroState) {}
void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) {
// Starting a new macro at index 'macroMethodOffset / 2'
if (!(macroMethodOffset & 1)) {
// Flush the current macro as we are switching to another one
if (macroInvocation.Valid()) {
macroState.macroInterpreter.Execute(macroState.macroPositions[macroInvocation.index], macroInvocation.arguments, this);
macroInvocation.Reset();
}
// Setup for the new macro index
macroInvocation.index = (macroMethodOffset / 2) % macroState.macroPositions.size();
}
macroInvocation.arguments.emplace_back(argument);
// Flush macro after all of the data in the method call has been sent
if (lastCall && macroInvocation.Valid()) {
macroState.macroInterpreter.Execute(macroState.macroPositions[macroInvocation.index], macroInvocation.arguments, this);
macroInvocation.Reset();
}
};
}

View File

@ -4,27 +4,53 @@
#pragma once
#include <common.h>
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
#include <soc/gm20b/macro/macro_state.h>
namespace skyline::soc::gm20b {
namespace engine {
/**
* @brief The Engine class provides an interface that can be used to communicate with the GPU's internal engines
*/
class Engine {
protected:
const DeviceState &state;
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
public:
Engine(const DeviceState &state) : state(state) {}
namespace engine {
constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines
/**
* @brief The MacroEngineBase interface provides an interface that can be used by engines to allow interfacing with the macro executer
*/
struct MacroEngineBase {
MacroState &macroState;
struct {
size_t index{std::numeric_limits<size_t>::max()};
std::vector<u32> arguments;
bool Valid() {
return index != std::numeric_limits<size_t>::max();
}
void Reset() {
index = std::numeric_limits<size_t>::max();
arguments.clear();
}
} macroInvocation{}; //!< Data for a macro that is pending execution
MacroEngineBase(MacroState &macroState);
virtual ~MacroEngineBase() = default;
/**
* @brief Calls an engine method with the given parameters
*/
void CallMethod(u32 method, u32 argument, bool lastCall) {
Logger::Warn("Called method in unimplemented engine: 0x{:X} args: 0x{:X}", method, argument);
};
virtual void CallMethodFromMacro(u32 method, u32 argument) = 0;
/**
* @brief Reads the current value for the supplied method
*/
virtual u32 ReadMethodFromMacro(u32 method) = 0;
/**
* @brief Handles a call to a method in the MME space
* @param macroMethodOffset The target offset from EngineMethodsEnd
*/
void HandleMacroCall(u32 macroMethodOffset, u32 value, bool lastCall);
};
}
}

View File

@ -7,39 +7,24 @@
#include "maxwell_3d.h"
namespace skyline::soc::gm20b::engine::maxwell3d {
Maxwell3D::Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : Engine(state), macroInterpreter(*this), context(*state.gpu, channelCtx, executor), channelCtx(channelCtx) {
Maxwell3D::Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, MacroState &macroState, gpu::interconnect::CommandExecutor &executor)
: MacroEngineBase(macroState),
syncpoints(state.soc->host1x.syncpoints),
context(*state.gpu, channelCtx, executor),
channelCtx(channelCtx) {
InitializeRegisters();
}
__attribute__((always_inline)) void Maxwell3D::CallMethod(u32 method, u32 argument, bool lastCall) {
Logger::Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", method, argument);
void Maxwell3D::CallMethodFromMacro(u32 method, u32 argument) {
HandleMethod(method, argument);
}
// Methods that are greater than the register size are for macro control
if (method >= RegisterCount) [[unlikely]] {
// Starting a new macro at index 'method - RegisterCount'
if (!(method & 1)) {
if (macroInvocation.index != -1) {
// Flush the current macro as we are switching to another one
macroInterpreter.Execute(macroPositions[static_cast<size_t>(macroInvocation.index)], macroInvocation.arguments);
macroInvocation.arguments.clear();
}
u32 Maxwell3D::ReadMethodFromMacro(u32 method) {
return registers.raw[method];
}
// Setup for the new macro index
macroInvocation.index = ((method - RegisterCount) >> 1) % macroPositions.size();
}
macroInvocation.arguments.emplace_back(argument);
// Flush macro after all of the data in the method call has been sent
if (lastCall && macroInvocation.index != -1) {
macroInterpreter.Execute(macroPositions[static_cast<size_t>(macroInvocation.index)], macroInvocation.arguments);
macroInvocation.arguments.clear();
macroInvocation.index = -1;
}
// Bail out early
return;
}
__attribute__((always_inline)) void Maxwell3D::CallMethod(u32 method, u32 argument) {
Logger::Verbose("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", method, argument);
HandleMethod(method, argument);
}
@ -584,26 +569,27 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
switch (method) {
MAXWELL3D_STRUCT_CASE(mme, instructionRamLoad, {
if (registers.mme->instructionRamPointer >= macroCode.size())
if (registers.mme->instructionRamPointer >= macroState.macroCode.size())
throw exception("Macro memory is full!");
macroCode[registers.mme->instructionRamPointer++] = instructionRamLoad;
macroState.macroCode[registers.mme->instructionRamPointer++] = instructionRamLoad;
// Wraparound writes
registers.mme->instructionRamPointer %= macroCode.size();
// This works on HW but will also generate an error interrupt
registers.mme->instructionRamPointer %= macroState.macroCode.size();
})
MAXWELL3D_STRUCT_CASE(mme, startAddressRamLoad, {
if (registers.mme->startAddressRamPointer >= macroPositions.size())
if (registers.mme->startAddressRamPointer >= macroState.macroPositions.size())
throw exception("Maximum amount of macros reached!");
macroPositions[registers.mme->startAddressRamPointer++] = startAddressRamLoad;
macroState.macroPositions[registers.mme->startAddressRamPointer++] = startAddressRamLoad;
})
MAXWELL3D_CASE(syncpointAction, {
Logger::Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
channelCtx.executor.Execute();
state.soc->host1x.syncpoints.at(syncpointAction.id).Increment();
syncpoints.at(syncpointAction.id).Increment();
})
MAXWELL3D_CASE(clearBuffers, {

View File

@ -6,7 +6,6 @@
#include <gpu/interconnect/graphics_context.h>
#include "engine.h"
#include "maxwell/macro_interpreter.h"
namespace skyline::soc::gm20b {
struct ChannelContext;
@ -16,17 +15,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
/**
* @brief The Maxwell 3D engine handles processing 3D graphics
*/
class Maxwell3D : public Engine {
class Maxwell3D : public MacroEngineBase {
private:
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
struct {
i32 index{-1};
std::vector<u32> arguments;
} macroInvocation{}; //!< Data for a macro that is pending execution
MacroInterpreter macroInterpreter;
host1x::SyncpointSet &syncpoints;
gpu::interconnect::GraphicsContext context;
/**
@ -321,15 +312,18 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
ChannelContext &channelCtx;
std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor);
Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, MacroState &macroState, gpu::interconnect::CommandExecutor &executor);
/**
* @brief Initializes Maxwell 3D registers to their default values
*/
void InitializeRegisters();
void CallMethod(u32 method, u32 argument, bool lastCall = false);
void CallMethod(u32 method, u32 argument);
void CallMethodFromMacro(u32 method, u32 argument) override;
u32 ReadMethodFromMacro(u32 method) override;
};
}

View File

@ -44,7 +44,7 @@ namespace skyline::soc::gm20b {
struct {
u16 _pad1_ : 13;
u8 methodSubChannel : 3;
SubchannelId methodSubChannel : 3;
union {
TertOp tertOp : 3;
u16 methodCount : 13;
@ -66,36 +66,32 @@ namespace skyline::soc::gm20b {
gpEntries(numEntries),
thread(std::thread(&ChannelGpfifo::Run, this)) {}
void ChannelGpfifo::Send(u32 method, u32 argument, u32 subChannel, bool lastCall) {
constexpr u32 ThreeDSubChannel{0};
constexpr u32 ComputeSubChannel{1};
constexpr u32 Inline2MemorySubChannel{2};
constexpr u32 TwoDSubChannel{3};
constexpr u32 CopySubChannel{4}; // HW forces a memory flush on a switch from this subchannel to others
void ChannelGpfifo::Send(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) {
Logger::Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", method, argument, subChannel, lastCall);
if (method < engine::GPFIFO::RegisterCount) {
gpfifoEngine.CallMethod(method, argument);
} else {
} else if (method < engine::EngineMethodsEnd) { [[likely]]
switch (subChannel) {
case ThreeDSubChannel:
channelCtx.maxwell3D->CallMethod(method, argument, lastCall);
break;
case ComputeSubChannel:
channelCtx.maxwellCompute.CallMethod(method, argument, lastCall);
break;
case Inline2MemorySubChannel:
channelCtx.keplerMemory.CallMethod(method, argument, lastCall);
break;
case TwoDSubChannel:
channelCtx.fermi2D.CallMethod(method, argument, lastCall);
break;
case CopySubChannel:
channelCtx.maxwellDma.CallMethod(method, argument, lastCall);
case SubchannelId::ThreeD:
channelCtx.maxwell3D->CallMethod(method, argument);
break;
default:
throw exception("Tried to call into a software subchannel: {}!", subChannel);
Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
break;
}
} else {
switch (subChannel) {
case SubchannelId::ThreeD:
channelCtx.maxwell3D->HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
break;
case SubchannelId::TwoD:
// TODO: Fix this when we implement the 2D Engine
Logger::Warn("Calling macros in the 2D engine is unimplemented!");
break;
default:
Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
break;
}
}
}

View File

@ -9,6 +9,20 @@
namespace skyline::soc::gm20b {
struct ChannelContext;
/**
* @brief Mapping of subchannel names to their corresponding subchannel IDs
*/
enum class SubchannelId : u8 {
ThreeD = 0,
Compute = 1,
Inline2Mem = 2,
TwoD = 3,
Copy = 4,
Software0 = 5,
Software1 = 6,
Software2 = 7,
};
/**
* @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
@ -92,7 +106,6 @@ namespace skyline::soc::gm20b {
ChannelContext &channelCtx;
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
CircularQueue<GpEntry> gpEntries;
std::thread thread; //!< The thread that manages processing of pushbuffers
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
/**
@ -102,7 +115,7 @@ namespace skyline::soc::gm20b {
struct MethodResumeState {
u32 remaining; //!< The number of entries left to handle until the method is finished
u32 address; //!< The method address in the GPU block specified by `subchannel` that is the target of the command
u8 subChannel;
SubchannelId subChannel;
/**
* @brief This is a simplified version of the full method type enum
@ -114,12 +127,12 @@ namespace skyline::soc::gm20b {
} state; //!< The type of method to resume
} resumeState{};
std::thread thread; //!< The thread that manages processing of pushbuffers
/**
* @brief Sends a method call to the GPU hardware
*/
void Send(u32 method, u32 argument, u32 subchannel, bool lastCall);
void Send(u32 method, u32 argument, SubchannelId subchannel, bool lastCall);
/**
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed

View File

@ -1,17 +1,20 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/address_space.h>
#include <soc/gm20b/engines/maxwell_3d.h>
#include "soc/gm20b/engines/engine.h"
#include "macro_interpreter.h"
namespace skyline::soc::gm20b::engine::maxwell3d {
void MacroInterpreter::Execute(size_t offset, const std::vector<u32> &args) {
namespace skyline::soc::gm20b::engine {
MacroInterpreter::MacroInterpreter(span<u32> macroCode) : macroCode(macroCode) {}
void MacroInterpreter::Execute(size_t offset, span<u32> args, MacroEngineBase *targetEngine) {
// Reset the interpreter state
engine = targetEngine;
opcode = reinterpret_cast<Opcode *>(&macroCode[offset]);
registers = {};
carryFlag = false;
methodAddress.raw = 0;
opcode = reinterpret_cast<Opcode *>(&maxwell3D.macroCode[offset]);
argument = args.data();
methodAddress.raw = 0;
carryFlag = false;
// The first argument is stored in register 1
registers[1] = *argument++;
@ -71,7 +74,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
}
case Opcode::Operation::ReadImmediate: {
u32 result{maxwell3D.registers.raw[static_cast<size_t>(static_cast<i32>(registers[opcode->srcA]) + opcode->immediate)]};
u32 result{engine->ReadMethodFromMacro(static_cast<u32>(static_cast<i32>(registers[opcode->srcA]) + opcode->immediate))};
HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
break;
}
@ -194,7 +197,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
}
__attribute__((always_inline)) void MacroInterpreter::Send(u32 pArgument) {
maxwell3D.CallMethod(methodAddress.address, pArgument, true);
engine->CallMethodFromMacro(methodAddress.address, pArgument);
methodAddress.address += methodAddress.increment;
}

View File

@ -5,8 +5,8 @@
#include <common.h>
namespace skyline::soc::gm20b::engine::maxwell3d {
class Maxwell3D; // A forward declaration of Maxwell3D as we don't want to import it here
namespace skyline::soc::gm20b::engine {
struct MacroEngineBase;
/**
* @brief The MacroInterpreter class handles interpreting macros. Macros are small programs that run on the GPU and are used for things like instanced rendering
@ -104,8 +104,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
static_assert(sizeof(MethodAddress) == sizeof(u32));
#pragma pack(pop)
Maxwell3D &maxwell3D; //!< A reference to the parent engine object
span<u32> macroCode; //!< Span pointing to the global macro code memory
MacroEngineBase *engine; //!< Pointer to the target engine
Opcode *opcode{}; //!< A pointer to the instruction that is currently being executed
std::array<u32, 8> registers{}; //!< The state of all the general-purpose registers in the macro interpreter
const u32 *argument{}; //!< A pointer to the argument buffer for the program, it is read from sequentially
@ -139,11 +140,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
void WriteRegister(u8 reg, u32 value);
public:
MacroInterpreter(Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
MacroInterpreter(span<u32> macroCode);
/**
* @brief Executes a GPU macro from macro memory with the given arguments
* @brief Executes a GPU macro from macro memory with the given arguments targeting the specified engine
*/
void Execute(size_t offset, const std::vector<u32> &args);
void Execute(size_t offset, span<u32> args, MacroEngineBase *targetEngine);
};
}

View File

@ -0,0 +1,20 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
#include "macro_interpreter.h"
namespace skyline::soc::gm20b {
/**
* @brief Holds per-channel macro state
*/
struct MacroState {
engine::MacroInterpreter macroInterpreter; //!< The macro interpreter for handling 3D/2D macros
std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro code memory, there can be a maximum of 0x80 macros at any one time
MacroState() : macroInterpreter(macroCode) {}
};
}