Rework GPFIFO method distribution and macros to support multiple engines

Fermi2D supports macros in addition to Maxwell3D, these both share code memory. To support this we rework the macro interpreter to support passing in a target engine and abstract the communications out into an interface that can be implemented by applicable engines. ``` GPFIFO <-> MME <-> Maxwell3D ^ ^---> Fermi2D X------------> I2M X------------> MaxwellComputeB X--Flush-----> MaxwellDMA ```
2024-06-14 12:08:43 +02:00 · 2022-01-19 20:45:51 +00:00 · 2022-01-19 20:45:51 +00:00 · 62db21fb78
commit 62db21fb78
parent 8d5463ef28
12 changed files with 179 additions and 107 deletions
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@ -178,9 +178,10 @@ add_library(skyline SHARED
        ${source_DIR}/skyline/soc/gm20b/channel.cpp
        ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
        ${source_DIR}/skyline/soc/gm20b/gmmu.cpp
+        ${source_DIR}/skyline/soc/gm20b/macro/macro_interpreter.cpp
+        ${source_DIR}/skyline/soc/gm20b/engines/engine.cpp
        ${source_DIR}/skyline/soc/gm20b/engines/gpfifo.cpp
        ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
-        ${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
        ${source_DIR}/skyline/soc/gm20b/engines/maxwell/initialization.cpp
        ${source_DIR}/skyline/input/npad.cpp
        ${source_DIR}/skyline/input/npad_device.cpp
--- a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp
@ -6,7 +6,7 @@

 namespace skyline::soc::gm20b {
    ChannelContext::ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries) :
-        maxwell3D(std::make_unique<engine::maxwell3d::Maxwell3D>(state, *this, executor)),
+        maxwell3D(std::make_unique<engine::maxwell3d::Maxwell3D>(state, *this, macroState, executor)),
        gpfifo(state, *this, numEntries),
        executor(state),
        asCtx(std::move(asCtx)){}
--- a/app/src/main/cpp/skyline/soc/gm20b/channel.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/channel.h
@ -4,6 +4,7 @@
 #pragma once

 #include <gpu/interconnect/command_executor.h>
+#include "macro/macro_state.h"
 #include "engines/engine.h"
 #include "gpfifo.h"

@ -21,6 +22,7 @@ namespace skyline::soc::gm20b {
    struct ChannelContext {
        std::shared_ptr<AddressSpaceContext> asCtx;
        gpu::interconnect::CommandExecutor executor;
+        MacroState macroState;
        std::unique_ptr<engine::maxwell3d::Maxwell3D> maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file
        ChannelGpfifo gpfifo;

--- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp
@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#include "engine.h"
+
+namespace skyline::soc::gm20b::engine {
+    MacroEngineBase::MacroEngineBase(MacroState &macroState) : macroState(macroState) {}
+
+    void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) {
+        // Starting a new macro at index 'macroMethodOffset / 2'
+        if (!(macroMethodOffset & 1)) {
+            // Flush the current macro as we are switching to another one
+            if (macroInvocation.Valid()) {
+                macroState.macroInterpreter.Execute(macroState.macroPositions[macroInvocation.index], macroInvocation.arguments, this);
+                macroInvocation.Reset();
+            }
+
+            // Setup for the new macro index
+            macroInvocation.index = (macroMethodOffset / 2) % macroState.macroPositions.size();
+        }
+
+        macroInvocation.arguments.emplace_back(argument);
+
+        // Flush macro after all of the data in the method call has been sent
+        if (lastCall && macroInvocation.Valid()) {
+            macroState.macroInterpreter.Execute(macroState.macroPositions[macroInvocation.index], macroInvocation.arguments, this);
+            macroInvocation.Reset();
+        }
+    };
+}
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h
@ -4,27 +4,53 @@
 #pragma once

 #include <common.h>
-
-#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
+#include <soc/gm20b/macro/macro_state.h>

 namespace skyline::soc::gm20b {
-    namespace engine {
-        /**
-         * @brief The Engine class provides an interface that can be used to communicate with the GPU's internal engines
-         */
-        class Engine {
-          protected:
-            const DeviceState &state;
+    #define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))

-          public:
-            Engine(const DeviceState &state) : state(state) {}
+    namespace engine {
+        constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines
+
+        /**
+         * @brief The MacroEngineBase interface provides an interface that can be used by engines to allow interfacing with the macro executer
+         */
+        struct MacroEngineBase {
+            MacroState &macroState;
+
+            struct {
+                size_t index{std::numeric_limits<size_t>::max()};
+                std::vector<u32> arguments;
+
+                bool Valid() {
+                    return index != std::numeric_limits<size_t>::max();
+                }
+
+                void Reset() {
+                    index = std::numeric_limits<size_t>::max();
+                    arguments.clear();
+                }
+            } macroInvocation{}; //!< Data for a macro that is pending execution
+
+            MacroEngineBase(MacroState &macroState);
+
+            virtual ~MacroEngineBase() = default;

            /**
             * @brief Calls an engine method with the given parameters
             */
-            void CallMethod(u32 method, u32 argument, bool lastCall) {
-                Logger::Warn("Called method in unimplemented engine: 0x{:X} args: 0x{:X}", method, argument);
-            };
+            virtual void CallMethodFromMacro(u32 method, u32 argument) = 0;
+
+            /**
+             * @brief Reads the current value for the supplied method
+             */
+            virtual u32 ReadMethodFromMacro(u32 method) = 0;
+
+            /**
+             * @brief Handles a call to a method in the MME space
+             * @param macroMethodOffset The target offset from EngineMethodsEnd
+             */
+            void HandleMacroCall(u32 macroMethodOffset, u32 value, bool lastCall);
        };
    }
 }
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
@ -7,39 +7,24 @@
 #include "maxwell_3d.h"

 namespace skyline::soc::gm20b::engine::maxwell3d {
-    Maxwell3D::Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : Engine(state), macroInterpreter(*this), context(*state.gpu, channelCtx, executor), channelCtx(channelCtx) {
+    Maxwell3D::Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, MacroState &macroState, gpu::interconnect::CommandExecutor &executor)
+        : MacroEngineBase(macroState),
+          syncpoints(state.soc->host1x.syncpoints),
+          context(*state.gpu, channelCtx, executor),
+          channelCtx(channelCtx) {
        InitializeRegisters();
    }

-    __attribute__((always_inline)) void Maxwell3D::CallMethod(u32 method, u32 argument, bool lastCall) {
-        Logger::Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", method, argument);
+    void Maxwell3D::CallMethodFromMacro(u32 method, u32 argument) {
+        HandleMethod(method, argument);
+    }

-        // Methods that are greater than the register size are for macro control
-        if (method >= RegisterCount) [[unlikely]] {
-            // Starting a new macro at index 'method - RegisterCount'
-            if (!(method & 1)) {
-                if (macroInvocation.index != -1) {
-                    // Flush the current macro as we are switching to another one
-                    macroInterpreter.Execute(macroPositions[static_cast<size_t>(macroInvocation.index)], macroInvocation.arguments);
-                    macroInvocation.arguments.clear();
-                }
+    u32 Maxwell3D::ReadMethodFromMacro(u32 method) {
+        return registers.raw[method];
+    }

-                // Setup for the new macro index
-                macroInvocation.index = ((method - RegisterCount) >> 1) % macroPositions.size();
-            }
-
-            macroInvocation.arguments.emplace_back(argument);
-
-            // Flush macro after all of the data in the method call has been sent
-            if (lastCall && macroInvocation.index != -1) {
-                macroInterpreter.Execute(macroPositions[static_cast<size_t>(macroInvocation.index)], macroInvocation.arguments);
-                macroInvocation.arguments.clear();
-                macroInvocation.index = -1;
-            }
-
-            // Bail out early
-            return;
-        }
+    __attribute__((always_inline)) void Maxwell3D::CallMethod(u32 method, u32 argument) {
+        Logger::Verbose("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", method, argument);

        HandleMethod(method, argument);
    }
@ -584,26 +569,27 @@ namespace skyline::soc::gm20b::engine::maxwell3d {

        switch (method) {
            MAXWELL3D_STRUCT_CASE(mme, instructionRamLoad, {
-                if (registers.mme->instructionRamPointer >= macroCode.size())
+                if (registers.mme->instructionRamPointer >= macroState.macroCode.size())
                    throw exception("Macro memory is full!");

-                macroCode[registers.mme->instructionRamPointer++] = instructionRamLoad;
+                macroState.macroCode[registers.mme->instructionRamPointer++] = instructionRamLoad;

                // Wraparound writes
-                registers.mme->instructionRamPointer %= macroCode.size();
+                // This works on HW but will also generate an error interrupt
+                registers.mme->instructionRamPointer %= macroState.macroCode.size();
            })

            MAXWELL3D_STRUCT_CASE(mme, startAddressRamLoad, {
-                if (registers.mme->startAddressRamPointer >= macroPositions.size())
+                if (registers.mme->startAddressRamPointer >= macroState.macroPositions.size())
                    throw exception("Maximum amount of macros reached!");

-                macroPositions[registers.mme->startAddressRamPointer++] = startAddressRamLoad;
+                macroState.macroPositions[registers.mme->startAddressRamPointer++] = startAddressRamLoad;
            })

            MAXWELL3D_CASE(syncpointAction, {
                Logger::Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
                channelCtx.executor.Execute();
-                state.soc->host1x.syncpoints.at(syncpointAction.id).Increment();
+                syncpoints.at(syncpointAction.id).Increment();
            })

            MAXWELL3D_CASE(clearBuffers, {
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h
@ -6,7 +6,6 @@

 #include <gpu/interconnect/graphics_context.h>
 #include "engine.h"
-#include "maxwell/macro_interpreter.h"

 namespace skyline::soc::gm20b {
    struct ChannelContext;
@ -16,17 +15,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
    /**
     * @brief The Maxwell 3D engine handles processing 3D graphics
     */
-    class Maxwell3D : public Engine {
+    class Maxwell3D : public MacroEngineBase {
      private:
-        std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro memory, there can be a maximum of 0x80 macros at any one time
-
-        struct {
-            i32 index{-1};
-            std::vector<u32> arguments;
-        } macroInvocation{}; //!< Data for a macro that is pending execution
-
-        MacroInterpreter macroInterpreter;
-
+        host1x::SyncpointSet &syncpoints;
        gpu::interconnect::GraphicsContext context;

        /**
@ -321,15 +312,18 @@ namespace skyline::soc::gm20b::engine::maxwell3d {

        ChannelContext &channelCtx;

-        std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow

-        Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor);
+        Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, MacroState &macroState, gpu::interconnect::CommandExecutor &executor);

        /**
         * @brief Initializes Maxwell 3D registers to their default values
         */
        void InitializeRegisters();

-        void CallMethod(u32 method, u32 argument, bool lastCall = false);
+        void CallMethod(u32 method, u32 argument);
+
+        void CallMethodFromMacro(u32 method, u32 argument) override;
+
+        u32 ReadMethodFromMacro(u32 method) override;
    };
 }
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
@ -44,7 +44,7 @@ namespace skyline::soc::gm20b {

        struct {
            u16 _pad1_ : 13;
-            u8 methodSubChannel : 3;
+            SubchannelId methodSubChannel : 3;
            union {
                TertOp tertOp : 3;
                u16 methodCount : 13;
@ -66,36 +66,32 @@ namespace skyline::soc::gm20b {
        gpEntries(numEntries),
        thread(std::thread(&ChannelGpfifo::Run, this)) {}

-    void ChannelGpfifo::Send(u32 method, u32 argument, u32 subChannel, bool lastCall) {
-        constexpr u32 ThreeDSubChannel{0};
-        constexpr u32 ComputeSubChannel{1};
-        constexpr u32 Inline2MemorySubChannel{2};
-        constexpr u32 TwoDSubChannel{3};
-        constexpr u32 CopySubChannel{4}; // HW forces a memory flush on a switch from this subchannel to others
-
+    void ChannelGpfifo::Send(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) {
        Logger::Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", method, argument, subChannel, lastCall);

        if (method < engine::GPFIFO::RegisterCount) {
            gpfifoEngine.CallMethod(method, argument);
-        } else {
+        } else if (method < engine::EngineMethodsEnd) { [[likely]]
            switch (subChannel) {
-                case ThreeDSubChannel:
-                    channelCtx.maxwell3D->CallMethod(method, argument, lastCall);
-                    break;
-                case ComputeSubChannel:
-                    channelCtx.maxwellCompute.CallMethod(method, argument, lastCall);
-                    break;
-                case Inline2MemorySubChannel:
-                    channelCtx.keplerMemory.CallMethod(method, argument, lastCall);
-                    break;
-                case TwoDSubChannel:
-                    channelCtx.fermi2D.CallMethod(method, argument, lastCall);
-                    break;
-                case CopySubChannel:
-                    channelCtx.maxwellDma.CallMethod(method, argument, lastCall);
+                case SubchannelId::ThreeD:
+                    channelCtx.maxwell3D->CallMethod(method, argument);
                    break;
                default:
-                    throw exception("Tried to call into a software subchannel: {}!", subChannel);
+                    Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
+                    break;
+            }
+        } else {
+            switch (subChannel) {
+                case SubchannelId::ThreeD:
+                    channelCtx.maxwell3D->HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
+                    break;
+                case SubchannelId::TwoD:
+                    // TODO: Fix this when we implement the 2D Engine
+                    Logger::Warn("Calling macros in the 2D engine is unimplemented!");
+                    break;
+                default:
+                    Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
+                    break;
            }
        }
    }
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
@ -9,6 +9,20 @@
 namespace skyline::soc::gm20b {
    struct ChannelContext;

+    /**
+     * @brief Mapping of subchannel names to their corresponding subchannel IDs
+     */
+    enum class SubchannelId : u8 {
+        ThreeD = 0,
+        Compute = 1,
+        Inline2Mem = 2,
+        TwoD = 3,
+        Copy = 4,
+        Software0 = 5,
+        Software1 = 6,
+        Software2 = 7,
+    };
+
    /**
     * @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
     * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
@ -92,7 +106,6 @@ namespace skyline::soc::gm20b {
        ChannelContext &channelCtx;
        engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
        CircularQueue<GpEntry> gpEntries;
-        std::thread thread; //!< The thread that manages processing of pushbuffers
        std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations

        /**
@ -102,7 +115,7 @@ namespace skyline::soc::gm20b {
        struct MethodResumeState {
            u32 remaining; //!< The number of entries left to handle until the method is finished
            u32 address; //!< The method address in the GPU block specified by `subchannel` that is the target of the command
-            u8 subChannel;
+            SubchannelId subChannel;

            /**
             * @brief This is a simplified version of the full method type enum
@ -114,12 +127,12 @@ namespace skyline::soc::gm20b {
            } state; //!< The type of method to resume
        } resumeState{};

+        std::thread thread; //!< The thread that manages processing of pushbuffers

        /**
         * @brief Sends a method call to the GPU hardware
         */
-        void Send(u32 method, u32 argument, u32 subchannel, bool lastCall);
-
+        void Send(u32 method, u32 argument, SubchannelId subchannel, bool lastCall);

        /**
         * @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
@ -1,17 +1,20 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)

-#include <common/address_space.h>
-#include <soc/gm20b/engines/maxwell_3d.h>
+#include "soc/gm20b/engines/engine.h"
+#include "macro_interpreter.h"

-namespace skyline::soc::gm20b::engine::maxwell3d {
-    void MacroInterpreter::Execute(size_t offset, const std::vector<u32> &args) {
+namespace skyline::soc::gm20b::engine {
+    MacroInterpreter::MacroInterpreter(span<u32> macroCode) : macroCode(macroCode) {}
+
+    void MacroInterpreter::Execute(size_t offset, span<u32> args, MacroEngineBase *targetEngine) {
        // Reset the interpreter state
+        engine = targetEngine;
+        opcode = reinterpret_cast<Opcode *>(&macroCode[offset]);
        registers = {};
-        carryFlag = false;
-        methodAddress.raw = 0;
-        opcode = reinterpret_cast<Opcode *>(&maxwell3D.macroCode[offset]);
        argument = args.data();
+        methodAddress.raw = 0;
+        carryFlag = false;

        // The first argument is stored in register 1
        registers[1] = *argument++;
@ -71,7 +74,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
            }

            case Opcode::Operation::ReadImmediate: {
-                u32 result{maxwell3D.registers.raw[static_cast<size_t>(static_cast<i32>(registers[opcode->srcA]) + opcode->immediate)]};
+                u32 result{engine->ReadMethodFromMacro(static_cast<u32>(static_cast<i32>(registers[opcode->srcA]) + opcode->immediate))};
                HandleAssignment(opcode->assignmentOperation, opcode->dest, result);
                break;
            }
@ -194,7 +197,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
    }

    __attribute__((always_inline)) void MacroInterpreter::Send(u32 pArgument) {
-        maxwell3D.CallMethod(methodAddress.address, pArgument, true);
+        engine->CallMethodFromMacro(methodAddress.address, pArgument);
        methodAddress.address += methodAddress.increment;
    }

--- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/macro_interpreter.h
@ -5,8 +5,8 @@

 #include <common.h>

-namespace skyline::soc::gm20b::engine::maxwell3d {
-    class Maxwell3D; // A forward declaration of Maxwell3D as we don't want to import it here
+namespace skyline::soc::gm20b::engine {
+    struct MacroEngineBase;

    /**
     * @brief The MacroInterpreter class handles interpreting macros. Macros are small programs that run on the GPU and are used for things like instanced rendering
@ -104,8 +104,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
        static_assert(sizeof(MethodAddress) == sizeof(u32));
        #pragma pack(pop)

-        Maxwell3D &maxwell3D; //!< A reference to the parent engine object
+        span<u32> macroCode; //!< Span pointing to the global macro code memory

+        MacroEngineBase *engine; //!< Pointer to the target engine
        Opcode *opcode{}; //!< A pointer to the instruction that is currently being executed
        std::array<u32, 8> registers{}; //!< The state of all the general-purpose registers in the macro interpreter
        const u32 *argument{}; //!< A pointer to the argument buffer for the program, it is read from sequentially
@ -139,11 +140,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
        void WriteRegister(u8 reg, u32 value);

      public:
-        MacroInterpreter(Maxwell3D &maxwell3D) : maxwell3D(maxwell3D) {}
+        MacroInterpreter(span<u32> macroCode);

        /**
-         * @brief Executes a GPU macro from macro memory with the given arguments
+         * @brief Executes a GPU macro from macro memory with the given arguments targeting the specified engine
         */
-        void Execute(size_t offset, const std::vector<u32> &args);
+        void Execute(size_t offset, span<u32> args, MacroEngineBase *targetEngine);
    };
 }
--- a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h
@ -0,0 +1,20 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <common.h>
+#include "macro_interpreter.h"
+
+namespace skyline::soc::gm20b {
+    /**
+     * @brief Holds per-channel macro state
+     */
+    struct MacroState {
+        engine::MacroInterpreter macroInterpreter; //!< The macro interpreter for handling 3D/2D macros
+        std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
+        std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro code memory, there can be a maximum of 0x80 macros at any one time
+
+        MacroState() : macroInterpreter(macroCode) {}
+    };
+}