Add support for processing GP Entries and the pushbuffers they contain

This is the backbone of the GPU, in the future this will be expanded to support calling into engines.
2024-06-01 13:58:45 +02:00 · 2020-07-23 20:46:04 +01:00 · 2020-07-23 20:46:04 +01:00 · 9fd0dd848b
commit 9fd0dd848b
parent be70f8715d
7 changed files with 322 additions and 6 deletions
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@ -40,6 +40,7 @@ add_library(skyline SHARED
        ${source_DIR}/skyline/audio/adpcm_decoder.cpp
        ${source_DIR}/skyline/gpu.cpp
        ${source_DIR}/skyline/gpu/memory_manager.cpp
+        ${source_DIR}/skyline/gpu/gpfifo.cpp
        ${source_DIR}/skyline/gpu/texture.cpp
        ${source_DIR}/skyline/os.cpp
        ${source_DIR}/skyline/loader/loader.cpp
--- a/app/src/main/cpp/skyline/gpu.cpp
+++ b/app/src/main/cpp/skyline/gpu.cpp
@ -12,7 +12,7 @@ extern skyline::u16 fps;
 extern skyline::u32 frametime;

 namespace skyline::gpu {
-    GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared<kernel::type::KEvent>(state)), bufferEvent(std::make_shared<kernel::type::KEvent>(state)) {
+    GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared<kernel::type::KEvent>(state)), bufferEvent(std::make_shared<kernel::type::KEvent>(state)) {
        ANativeWindow_acquire(window);
        resolution.width = static_cast<u32>(ANativeWindow_getWidth(window));
        resolution.height = static_cast<u32>(ANativeWindow_getHeight(window));
@ -24,6 +24,8 @@ namespace skyline::gpu {
    }

    void GPU::Loop() {
+        gpfifo.Run();
+
        if (surfaceUpdate) {
            if (Surface == nullptr)
                return;
--- a/app/src/main/cpp/skyline/gpu.h
+++ b/app/src/main/cpp/skyline/gpu.h
@ -10,6 +10,7 @@
 #include <services/nvdrv/devices/nvmap.h>
 #include "gpu/texture.h"
 #include "gpu/memory_manager.h"
+#include "gpu/gpfifo.h"

 namespace skyline::gpu {
    /**
@ -30,6 +31,7 @@ namespace skyline::gpu {
        std::shared_ptr<kernel::type::KEvent> vsyncEvent; //!< This KEvent is triggered every time a frame is drawn
        std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< This KEvent is triggered every time a buffer is freed
        vmm::MemoryManager memoryManager; //!< The GPU Virtual Memory Manager
+        gpfifo::GPFIFO gpfifo;

        /**
         * @param window The ANativeWindow to render to
--- a/app/src/main/cpp/skyline/gpu/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/gpu/gpfifo.cpp
@ -0,0 +1,64 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#include <gpu.h>
+#include "gpfifo.h"
+
+namespace skyline::gpu::gpfifo {
+    void GPFIFO::Send(MethodParams params) {
+        state.logger->Warn("Called unimplemented GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
+    }
+
+    void GPFIFO::Process(const std::vector<u32> &segment) {
+        for (auto entry = segment.begin(); entry != segment.end(); entry++) {
+            auto methodHeader = reinterpret_cast<const PushBufferMethodHeader *>(&*entry);
+
+            switch (methodHeader->secOp) {
+                case PushBufferMethodHeader::SecOp::IncMethod:
+                    for (u16 i{}; i < methodHeader->methodCount; i++)
+                        Send(MethodParams{static_cast<u16>(methodHeader->methodAddress + i), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
+
+                    break;
+                case PushBufferMethodHeader::SecOp::NonIncMethod:
+                    for (u16 i{}; i < methodHeader->methodCount; i++)
+                        Send(MethodParams{methodHeader->methodAddress, *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
+
+                    break;
+                case PushBufferMethodHeader::SecOp::OneInc:
+                    for (u16 i{}; i < methodHeader->methodCount; i++)
+                        Send(MethodParams{static_cast<u16>(methodHeader->methodAddress + bool(i)), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
+
+                    break;
+                case PushBufferMethodHeader::SecOp::ImmdDataMethod:
+                    Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true});
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    void GPFIFO::Run() {
+        std::lock_guard lock(pushBufferQueueLock);
+        while (!pushBufferQueue.empty()) {
+            auto pushBuffer = pushBufferQueue.front();
+            if (pushBuffer.segment.empty())
+                pushBuffer.Fetch(state.gpu->memoryManager);
+
+            Process(pushBuffer.segment);
+            pushBufferQueue.pop();
+        }
+    }
+
+    void GPFIFO::Push(std::span<GpEntry> entries) {
+        std::lock_guard lock(pushBufferQueueLock);
+        bool beforeBarrier{true};
+
+        for (const auto &entry : entries) {
+            if (entry.sync == GpEntry::Sync::Wait)
+                beforeBarrier = false;
+
+            pushBufferQueue.emplace(PushBuffer(entry, state.gpu->memoryManager, beforeBarrier));
+        }
+    }
+}
--- a/app/src/main/cpp/skyline/gpu/gpfifo.h
+++ b/app/src/main/cpp/skyline/gpu/gpfifo.h
@ -0,0 +1,184 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <common.h>
+#include <span>
+#include <queue>
+#include "memory_manager.h"
+
+namespace skyline::gpu::gpfifo {
+    /**
+     * @brief This holds the parameters of a GPU method call
+     */
+    struct MethodParams {
+        u16 method;
+        u32 argument;
+        u32 subChannel;
+        bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro
+    };
+
+    /**
+     * @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpFifo'
+     * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
+     * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
+     */
+    struct GpEntry {
+        enum class Fetch {
+            Unconditional = 0,
+            Conditional = 1,
+        };
+
+        union {
+            struct {
+                Fetch fetch : 1;
+                u8 _pad_ : 1;
+                u32 get : 30;
+            };
+            u32 entry0;
+        };
+
+        enum class Opcode : u8 {
+            Nop = 0,
+            Illegal = 1,
+            Crc = 2,
+            PbCrc = 3,
+        };
+
+        enum class Priv {
+            User = 0,
+            Kernel = 1,
+        };
+
+        enum class Level {
+            Main = 0,
+            Subroutine = 1,
+        };
+
+        enum class Sync {
+            Proceed = 0,
+            Wait = 1,
+        };
+
+        union {
+            struct {
+                union {
+                    u8 getHi;
+                    Opcode opcode;
+                };
+                Priv priv : 1;
+                Level level : 1;
+                u32 size : 21;
+                Sync sync : 1;
+            };
+            u32 entry1;
+        };
+    };
+    static_assert(sizeof(GpEntry) == 0x8);
+
+    /**
+     * @brief This holds a single pushbuffer method header that describes a compressed method sequence
+     * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
+     * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
+     */
+    union PushBufferMethodHeader {
+        enum class TertOp : u8 {
+            Grp0IncMethod = 0,
+            Grp0SetSubDevMask = 1,
+            Grp0StoreSubDevMask = 2,
+            Grp0UseSubDevMask = 3,
+            Grp2NonIncMethod = 0
+        };
+
+        enum class SecOp : u8 {
+            Grp0UseTert = 0,
+            IncMethod = 1,
+            Grp2UseTert = 2,
+            NonIncMethod = 3,
+            ImmdDataMethod = 4,
+            OneInc = 5,
+            Reserved6 = 6,
+            EndPbSegment = 7
+        };
+
+        struct {
+            union {
+                u16 methodAddress : 12;
+                struct {
+                    u8 _pad0_ : 4;
+                    u16 subDeviceMask : 12;
+                };
+
+                struct {
+                    u16 _pad1_ : 13;
+                    u8 methodSubChannel : 3;
+                    union {
+                        TertOp tertOp : 3;
+                        u16 methodCount : 13;
+                        u16 immdData : 13;
+                    };
+                };
+
+                struct {
+                    u32 _pad2_ : 29;
+                    SecOp secOp : 3;
+                };
+            };
+        };
+        u32 entry;
+    };
+    static_assert(sizeof(PushBufferMethodHeader) == 0x4);
+
+    /**
+     * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
+     * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
+     */
+    class GPFIFO {
+      private:
+        /**
+         * @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words
+         */
+        struct PushBuffer {
+            GpEntry gpEntry;
+            std::vector<u32> segment;
+
+            PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) {
+                if (fetch)
+                    Fetch(memoryManager);
+            }
+
+            inline void Fetch(const vmm::MemoryManager &memoryManager) {
+                segment.resize(gpEntry.size * sizeof(u32));
+                memoryManager.Read(reinterpret_cast<u8 *>(segment.data()), (static_cast<u64>(gpEntry.getHi) << 32) | (gpEntry.get << 2), segment.size());
+            }
+        };
+
+        const DeviceState &state;
+        skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread
+        std::queue<PushBuffer> pushBufferQueue;
+
+        /**
+         * @brief Processes a pushbuffer segment, calling methods as needed
+         */
+        void Process(const std::vector<u32> &segment);
+
+        /**
+         * @brief This sends a method call to the GPU hardware
+         */
+        void Send(MethodParams params);
+
+      public:
+        GPFIFO(const DeviceState &state) : state(state) {}
+
+        /**
+         * @brief Executes all pending entries in the FIFO
+         */
+        void Run();
+
+        /**
+         * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
+         */
+        void Push(std::span<GpEntry> entries);
+    };
+}
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp
@ -1,7 +1,11 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)

+#include <span>
+#include <os.h>
+#include <gpu/gpfifo.h>
 #include <kernel/types/KProcess.h>
+#include <services/nvdrv/INvDrvServices.h>
 #include "nvhost_channel.h"

 namespace skyline::service::nvdrv::device {
@ -15,13 +19,62 @@ namespace skyline::service::nvdrv::device {
        {0x480D, NFUNC(NvHostChannel::SetPriority)},
        {0x481A, NFUNC(NvHostChannel::AllocGpfifoEx2)},
        {0x4714, NFUNC(NvHostChannel::SetUserData)},
-    }) {}
+    }) {
+        auto &hostSyncpoint = state.os->serviceManager.GetService<nvdrv::INvDrvServices>(Service::nvdrv_INvDrvServices)->hostSyncpoint;
+
+        channelFence.id = hostSyncpoint.AllocateSyncpoint(false);
+        channelFence.UpdateValue(hostSyncpoint);
+    }

    void NvHostChannel::SetNvmapFd(IoctlData &buffer) {}

    void NvHostChannel::SetSubmitTimeout(IoctlData &buffer) {}

-    void NvHostChannel::SubmitGpFifo(IoctlData &buffer) {}
+    void NvHostChannel::SubmitGpFifo(IoctlData &buffer) {
+        struct Data {
+            u64 address;
+            u32 numEntries;
+            union {
+                struct {
+                    bool fenceWait : 1;
+                    bool fenceIncrement : 1;
+                    bool hwFormat : 1;
+                    u8 _pad0_ : 1;
+                    bool suppressWfi : 1;
+                    u8 _pad1_ : 3;
+                    bool incrementWithValue : 1;
+                };
+                u32 raw;
+            } flags;
+            NvFence fence;
+        } args = state.process->GetReference<Data>(buffer.input.at(0).address);
+
+        auto &hostSyncpoint = state.os->serviceManager.GetService<nvdrv::INvDrvServices>(Service::nvdrv_INvDrvServices)->hostSyncpoint;
+
+        if (args.flags.fenceWait) {
+            if (args.flags.incrementWithValue) {
+                buffer.status = NvStatus::BadValue;
+                return;
+            }
+
+            if (hostSyncpoint.HasSyncpointExpired(args.fence.id, args.fence.value)) {
+                state.logger->Warn("GPU Syncpoints are not currently supported!");
+            }
+        }
+
+        state.gpu->gpfifo.Push(std::span(state.process->GetPointer<gpu::gpfifo::GpEntry>(args.address), args.numEntries));
+
+        bool increment = args.flags.fenceIncrement || args.flags.incrementWithValue;
+        u32 amount = increment ? (args.flags.fenceIncrement ? 2 : 0) + (args.flags.incrementWithValue ? args.fence.value : 0) : 0;
+        args.fence.value = hostSyncpoint.IncrementSyncpointMaxExt(args.fence.id, amount);
+        args.fence.id = channelFence.id;
+
+        if (args.flags.fenceIncrement) {
+            state.logger->Warn("GPU Syncpoints are not currently supported!");
+        }
+
+        args.flags.raw = 0;
+    }

    void NvHostChannel::AllocObjCtx(IoctlData &buffer) {}

@ -30,7 +83,7 @@ namespace skyline::service::nvdrv::device {
    void NvHostChannel::SetErrorNotifier(IoctlData &buffer) {}

    void NvHostChannel::SetPriority(IoctlData &buffer) {
-        auto priority = state.process->GetObject<NvChannelPriority>(buffer.input[0].address);
+        auto priority = state.process->GetObject<NvChannelPriority>(buffer.input.at(0).address);

        switch (priority) {
            case NvChannelPriority::Low:
@ -45,8 +98,16 @@ namespace skyline::service::nvdrv::device {
        }
    }

-    void NvHostChannel::AllocGpfifoEx2(IoctlData &buffer) {}
+    void NvHostChannel::AllocGpfifoEx2(IoctlData &buffer) {
+        struct Data {
+            u32 numEntries;
+            u32 numJobs;
+            u32 flags;
+            NvFence fence;
+            u32 reserved[3];
+        } args = state.process->GetReference<Data>(buffer.input.at(0).address);
+        args.fence = channelFence;
+    }

    void NvHostChannel::SetUserData(IoctlData &buffer) {}
-
 }
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h
@ -3,6 +3,7 @@

 #pragma once

+#include "nvfence.h"
 #include "nvdevice.h"

 namespace skyline::service::nvdrv::device {
@ -17,6 +18,7 @@ namespace skyline::service::nvdrv::device {
            High = 0x94
        };

+        NvFence channelFence{};
        u32 timeslice{};

      public: