Avoid dereferencing macro argument pointers in memory where possible

Indirect draws are implemented by having the macro arguments overflow into a seperate GP Entry that points directly to the indirect argument buffer. To HLE indirect draws a buffer needs to be created from this pointer, and it cannot be dereferenced on the CPU at any point to avoid hitting traps.
2025-02-23 07:27:10 +01:00 · 2023-02-04 22:38:50 +00:00 · 2023-02-04 22:38:50 +00:00 · b313dcbdca
commit b313dcbdca
parent 2b93604da0
6 changed files with 51 additions and 24 deletions
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp
@ -18,7 +18,7 @@ namespace skyline::soc::gm20b::engine {

    MacroEngineBase::MacroEngineBase(MacroState &macroState) : macroState(macroState) {}

-    void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) {
+    void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, u32 *argumentPtr, bool lastCall) {
        // Starting a new macro at index 'macroMethodOffset / 2'
        if (!(macroMethodOffset & 1)) {
            // Flush the current macro as we are switching to another one
@ -31,7 +31,7 @@ namespace skyline::soc::gm20b::engine {
            macroInvocation.index = (macroMethodOffset / 2) % macroState.macroPositions.size();
        }

-        macroInvocation.arguments.emplace_back(argument);
+        macroInvocation.arguments.emplace_back(argument, argumentPtr);

        // Flush macro after all of the data in the method call has been sent
        if (lastCall && macroInvocation.Valid()) {
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h
@ -80,7 +80,7 @@ namespace skyline::soc::gm20b::engine {

        struct {
            u32 index{std::numeric_limits<u32>::max()};
-            std::vector<u32> arguments;
+            std::vector<MacroArgument> arguments;

            bool Valid() {
                return index != std::numeric_limits<u32>::max();
@ -114,10 +114,14 @@ namespace skyline::soc::gm20b::engine {
            throw exception("DrawIndexedInstanced is not implemented for this engine");
        }

+        virtual void DrawIndexedIndirect(u32 drawTopology, span<u8> indirectBuffer, u32 count, u32 stride) {
+            throw exception("DrawIndexedIndirect is not implemented for this engine");
+        }
+
        /**
         * @brief Handles a call to a method in the MME space
         * @param macroMethodOffset The target offset from EngineMethodsEnd
         */
-        void HandleMacroCall(u32 macroMethodOffset, u32 value, bool lastCall);
+        void HandleMacroCall(u32 macroMethodOffset, u32 argument, u32 *argumentPtr, bool lastCall);
    };
 }
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
@ -88,21 +88,21 @@ namespace skyline::soc::gm20b {
        gpEntries(numEntries),
        thread(std::thread(&ChannelGpfifo::Run, this)) {}

-    void ChannelGpfifo::SendFull(u32 method, u32 argument, SubchannelId subChannel, bool lastCall) {
+    void ChannelGpfifo::SendFull(u32 method, u32 argument, u32 *argumentPtr, SubchannelId subChannel, bool lastCall) {
        if (method < engine::GPFIFO::RegisterCount) {
-            gpfifoEngine.CallMethod(method, argument);
+            gpfifoEngine.CallMethod(method, argumentPtr ? *argumentPtr : argument);
        } else if (method < engine::EngineMethodsEnd) { [[likely]]
-            SendPure(method, argument, subChannel);
+            SendPure(method, argumentPtr ? *argumentPtr : argument, subChannel);
        } else {
            switch (subChannel) {
                case SubchannelId::ThreeD:
-                    channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
+                    channelCtx.maxwell3D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, argumentPtr, lastCall);
                    break;
                case SubchannelId::TwoD:
-                    channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
+                    channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, argumentPtr, lastCall);
                    break;
                default:
-                    Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
+                    Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argumentPtr ? *argumentPtr : argument);
                    break;
            }
        }
@ -172,6 +172,7 @@ namespace skyline::soc::gm20b {
            if (channelCtx.executor.usageTracker.dirtyIntervals.Intersect(range))
                channelCtx.executor.Submit({}, true);

+        bool pushBufferCopied{}; //!< Set by the below lambda in order to track if the pushbuffer is a copy of guest memory or not
        auto pushBuffer{[&]() -> span<u32> {
            if (pushBufferMappedRanges.size() == 1) {
                return pushBufferMappedRanges.front().cast<u32>();
@ -179,6 +180,7 @@ namespace skyline::soc::gm20b {
                // Create an intermediate copy of pushbuffer data if it's split across multiple mappings
                pushBufferData.resize(gpEntry.size);
                channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
+                pushBufferCopied = true;
                return span(pushBufferData);
            }
        }()};
@ -190,19 +192,24 @@ namespace skyline::soc::gm20b {
        auto resumeSplitMethod{[&](){
            switch (resumeState.state) {
                case MethodResumeState::State::Inc:
-                    while (entry != pushBuffer.end() && resumeState.remaining)
-                        SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
+                    while (entry != pushBuffer.end() && resumeState.remaining) {
+                        SendFull(resumeState.address++, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
+                        entry++;
+                    }

                    break;
                case MethodResumeState::State::OneInc:
-                    SendFull(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
+                    SendFull(resumeState.address++, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
+                    entry++;

                    // After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries
                    resumeState.state = MethodResumeState::State::NonInc;
                    [[fallthrough]];
                case MethodResumeState::State::NonInc:
-                    while (entry != pushBuffer.end() && resumeState.remaining)
-                        SendFull(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
+                    while (entry != pushBuffer.end() && resumeState.remaining) {
+                        SendFull(resumeState.address, pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), resumeState.subChannel, --resumeState.remaining == 0);
+                        entry++;
+                    }

                    break;
            }
@ -275,7 +282,7 @@ namespace skyline::soc::gm20b {
                            // For pure oneinc methods we can send the initial method then send the rest as a span in one go
                            if (methodHeader.methodCount > (BatchCutoff + 1)) [[unlikely]] {
                                SendPure(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel);
-                                SendPureBatchNonInc(methodHeader.methodAddress + 1, span(&(*++entry) ,methodHeader.methodCount - 1), methodHeader.methodSubChannel);
+                                SendPureBatchNonInc(methodHeader.methodAddress + 1, span((++entry).base(), methodHeader.methodCount - 1), methodHeader.methodSubChannel);

                                entry += methodHeader.methodCount - 2;
                                return false;
@ -287,8 +294,10 @@ namespace skyline::soc::gm20b {
                            SendPure(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel);
                    } else {
                        // Slow path for methods that touch GPFIFO or macros
-                        for (u32 i{}; i < methodHeader.methodCount; i++)
-                            SendFull(methodHeader.methodAddress + methodOffset(i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
+                        for (u32 i{}; i < methodHeader.methodCount; i++) {
+                            entry++;
+                            SendFull(methodHeader.methodAddress + methodOffset(i), pushBufferCopied ? *entry : 0, pushBufferCopied ? nullptr : entry.base(), methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
+                        }
                    }
                } else {
                    startSplitMethod(State);
@ -311,7 +320,7 @@ namespace skyline::soc::gm20b {
                    if (methodHeader.Pure())
                        SendPure(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel);
                    else
-                        SendFull(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
+                        SendFull(methodHeader.methodAddress, methodHeader.immdData, nullptr, methodHeader.methodSubChannel, true);

                    return false;
                } else if (methodHeader.secOp == PushBufferMethodHeader::SecOp::NonIncMethod) [[unlikely]] {
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h
@ -132,7 +132,7 @@ namespace skyline::soc::gm20b {
        /**
         * @brief Sends a method call to the appropriate subchannel and handles macro and GPFIFO methods
         */
-        void SendFull(u32 method, u32 argument, SubchannelId subchannel, bool lastCall);
+        void SendFull(u32 method, u32 argument, u32 *argumentPtr, SubchannelId subchannel, bool lastCall);

        /**
         * @brief Sends a method call to the appropriate subchannel, macro and GPFIFO methods are not handled
--- a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_interpreter.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_interpreter.cpp
@ -1,11 +1,12 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)

+#include "macro_state.h"
 #include "soc/gm20b/engines/engine.h"
 #include "macro_interpreter.h"

 namespace skyline::soc::gm20b::engine {
-    MacroInterpreter::MacroInterpreter(span<u32> macroCode) : macroCode(macroCode) {}
+    MacroInterpreter::MacroInterpreter(span<u32> macroCode) : macroCode{macroCode} {}

    void MacroInterpreter::Execute(size_t offset, span<u32> args, MacroEngineBase *targetEngine) {
        // Reset the interpreter state
--- a/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/macro/macro_state.h
@ -7,8 +7,19 @@
 #include "macro_interpreter.h"

 namespace skyline::soc::gm20b {
+    struct MacroArgument {
+        u32 argument;
+        u32 *argumentPtr;
+
+        MacroArgument(u32 argument, u32 *argumentPtr) : argument{argument}, argumentPtr{argumentPtr} {}
+
+        u32 operator*() const {
+            return argumentPtr ? *argumentPtr : argument;
+        }
+    };
+
    namespace macro_hle {
-        using Function = void (*)(size_t offset, span<u32> args, engine::MacroEngineBase *targetEngine);
+        using Function = bool (*)(size_t offset, span<MacroArgument> args, engine::MacroEngineBase *targetEngine);
    }

    /**
@ -24,12 +35,14 @@ namespace skyline::soc::gm20b {
        std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
        std::array<size_t, 0x80> macroPositions{}; //!< The positions of each individual macro in macro code memory, there can be a maximum of 0x80 macros at any one time
        std::array<MacroHleEntry, 0x80> macroHleFunctions{}; //!< The HLE functions for each macro position, used to optionally override the interpreter
+        std::vector<u32> argumentStorage; //!< Storage for the macro arguments during execution using the interpreter
+
        bool invalidatePending{};

-        MacroState() : macroInterpreter(macroCode) {}
+        MacroState() : macroInterpreter{macroCode} {}

        void Invalidate();

-        void Execute(u32 position, span<u32> args, engine::MacroEngineBase *targetEngine);
+        void Execute(u32 position, span<MacroArgument> args, engine::MacroEngineBase *targetEngine);
    };
 }