From 9fd0dd848b13caf88658306a21be88022712da3b Mon Sep 17 00:00:00 2001
From: Billy Laws <blaws05@gmail.com>
Date: Thu, 23 Jul 2020 20:46:04 +0100
Subject: [PATCH] Add support for processing GP Entries and the pushbuffers
 they contain

This is the backbone of the GPU, in the future this will be expanded to
support calling into engines.
---
 app/CMakeLists.txt                            |   1 +
 app/src/main/cpp/skyline/gpu.cpp              |   4 +-
 app/src/main/cpp/skyline/gpu.h                |   2 +
 app/src/main/cpp/skyline/gpu/gpfifo.cpp       |  64 ++++++
 app/src/main/cpp/skyline/gpu/gpfifo.h         | 184 ++++++++++++++++++
 .../services/nvdrv/devices/nvhost_channel.cpp |  71 ++++++-
 .../services/nvdrv/devices/nvhost_channel.h   |   2 +
 7 files changed, 322 insertions(+), 6 deletions(-)
 create mode 100644 app/src/main/cpp/skyline/gpu/gpfifo.cpp
 create mode 100644 app/src/main/cpp/skyline/gpu/gpfifo.h
diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt
index ac052067..96534afd 100644
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@@ -40,6 +40,7 @@ add_library(skyline SHARED
         ${source_DIR}/skyline/audio/adpcm_decoder.cpp
         ${source_DIR}/skyline/gpu.cpp
         ${source_DIR}/skyline/gpu/memory_manager.cpp
+        ${source_DIR}/skyline/gpu/gpfifo.cpp
         ${source_DIR}/skyline/gpu/texture.cpp
         ${source_DIR}/skyline/os.cpp
         ${source_DIR}/skyline/loader/loader.cpp
diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp
index 5bdcddb2..d52358ab 100644
--- a/app/src/main/cpp/skyline/gpu.cpp
+++ b/app/src/main/cpp/skyline/gpu.cpp
@@ -12,7 +12,7 @@ extern skyline::u16 fps;
 extern skyline::u32 frametime;
 
 namespace skyline::gpu {
-    GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared<kernel::type::KEvent>(state)), bufferEvent(std::make_shared<kernel::type::KEvent>(state)) {
+    GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared<kernel::type::KEvent>(state)), bufferEvent(std::make_shared<kernel::type::KEvent>(state)) {
         ANativeWindow_acquire(window);
         resolution.width = static_cast<u32>(ANativeWindow_getWidth(window));
         resolution.height = static_cast<u32>(ANativeWindow_getHeight(window));
@@ -24,6 +24,8 @@ namespace skyline::gpu {
     }
 
     void GPU::Loop() {
+        gpfifo.Run();
+
         if (surfaceUpdate) {
             if (Surface == nullptr)
                 return;
diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h
index 1b84e552..46947ef5 100644
--- a/app/src/main/cpp/skyline/gpu.h
+++ b/app/src/main/cpp/skyline/gpu.h
@@ -10,6 +10,7 @@
 #include <services/nvdrv/devices/nvmap.h>
 #include "gpu/texture.h"
 #include "gpu/memory_manager.h"
+#include "gpu/gpfifo.h"
 
 namespace skyline::gpu {
     /**
@@ -30,6 +31,7 @@ namespace skyline::gpu {
         std::shared_ptr<kernel::type::KEvent> vsyncEvent; //!< This KEvent is triggered every time a frame is drawn
         std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< This KEvent is triggered every time a buffer is freed
         vmm::MemoryManager memoryManager; //!< The GPU Virtual Memory Manager
+        gpfifo::GPFIFO gpfifo;
 
         /**
          * @param window The ANativeWindow to render to
diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.cpp b/app/src/main/cpp/skyline/gpu/gpfifo.cpp
new file mode 100644
index 00000000..8763660e
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/gpfifo.cpp
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#include <gpu.h>
+#include "gpfifo.h"
+
+namespace skyline::gpu::gpfifo {
+    void GPFIFO::Send(MethodParams params) {
+        state.logger->Warn("Called unimplemented GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
+    }
+
+    void GPFIFO::Process(const std::vector<u32> &segment) {
+        for (auto entry = segment.begin(); entry != segment.end(); entry++) {
+            auto methodHeader = reinterpret_cast<const PushBufferMethodHeader *>(&*entry);
+
+            switch (methodHeader->secOp) {
+                case PushBufferMethodHeader::SecOp::IncMethod:
+                    for (u16 i{}; i < methodHeader->methodCount; i++)
+                        Send(MethodParams{static_cast<u16>(methodHeader->methodAddress + i), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
+
+                    break;
+                case PushBufferMethodHeader::SecOp::NonIncMethod:
+                    for (u16 i{}; i < methodHeader->methodCount; i++)
+                        Send(MethodParams{methodHeader->methodAddress, *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
+
+                    break;
+                case PushBufferMethodHeader::SecOp::OneInc:
+                    for (u16 i{}; i < methodHeader->methodCount; i++)
+                        Send(MethodParams{static_cast<u16>(methodHeader->methodAddress + bool(i)), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
+
+                    break;
+                case PushBufferMethodHeader::SecOp::ImmdDataMethod:
+                    Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true});
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    void GPFIFO::Run() {
+        std::lock_guard lock(pushBufferQueueLock);
+        while (!pushBufferQueue.empty()) {
+            auto pushBuffer = pushBufferQueue.front();
+            if (pushBuffer.segment.empty())
+                pushBuffer.Fetch(state.gpu->memoryManager);
+
+            Process(pushBuffer.segment);
+            pushBufferQueue.pop();
+        }
+    }
+
+    void GPFIFO::Push(std::span<GpEntry> entries) {
+        std::lock_guard lock(pushBufferQueueLock);
+        bool beforeBarrier{true};
+
+        for (const auto &entry : entries) {
+            if (entry.sync == GpEntry::Sync::Wait)
+                beforeBarrier = false;
+
+            pushBufferQueue.emplace(PushBuffer(entry, state.gpu->memoryManager, beforeBarrier));
+        }
+    }
+}
diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.h b/app/src/main/cpp/skyline/gpu/gpfifo.h
new file mode 100644
index 00000000..1f122a18
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/gpfifo.h
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <common.h>
+#include <span>
+#include <queue>
+#include "memory_manager.h"
+
+namespace skyline::gpu::gpfifo {
+    /**
+     * @brief This holds the parameters of a GPU method call
+     */
+    struct MethodParams {
+        u16 method;
+        u32 argument;
+        u32 subChannel;
+        bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro
+    };
+
+    /**
+     * @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpFifo'
+     * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
+     * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
+     */
+    struct GpEntry {
+        enum class Fetch {
+            Unconditional = 0,
+            Conditional = 1,
+        };
+
+        union {
+            struct {
+                Fetch fetch : 1;
+                u8 _pad_ : 1;
+                u32 get : 30;
+            };
+            u32 entry0;
+        };
+
+        enum class Opcode : u8 {
+            Nop = 0,
+            Illegal = 1,
+            Crc = 2,
+            PbCrc = 3,
+        };
+
+        enum class Priv {
+            User = 0,
+            Kernel = 1,
+        };
+
+        enum class Level {
+            Main = 0,
+            Subroutine = 1,
+        };
+
+        enum class Sync {
+            Proceed = 0,
+            Wait = 1,
+        };
+
+        union {
+            struct {
+                union {
+                    u8 getHi;
+                    Opcode opcode;
+                };
+                Priv priv : 1;
+                Level level : 1;
+                u32 size : 21;
+                Sync sync : 1;
+            };
+            u32 entry1;
+        };
+    };
+    static_assert(sizeof(GpEntry) == 0x8);
+
+    /**
+     * @brief This holds a single pushbuffer method header that describes a compressed method sequence
+     * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
+     * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
+     */
+    union PushBufferMethodHeader {
+        enum class TertOp : u8 {
+            Grp0IncMethod = 0,
+            Grp0SetSubDevMask = 1,
+            Grp0StoreSubDevMask = 2,
+            Grp0UseSubDevMask = 3,
+            Grp2NonIncMethod = 0
+        };
+
+        enum class SecOp : u8 {
+            Grp0UseTert = 0,
+            IncMethod = 1,
+            Grp2UseTert = 2,
+            NonIncMethod = 3,
+            ImmdDataMethod = 4,
+            OneInc = 5,
+            Reserved6 = 6,
+            EndPbSegment = 7
+        };
+
+        struct {
+            union {
+                u16 methodAddress : 12;
+                struct {
+                    u8 _pad0_ : 4;
+                    u16 subDeviceMask : 12;
+                };
+
+                struct {
+                    u16 _pad1_ : 13;
+                    u8 methodSubChannel : 3;
+                    union {
+                        TertOp tertOp : 3;
+                        u16 methodCount : 13;
+                        u16 immdData : 13;
+                    };
+                };
+
+                struct {
+                    u32 _pad2_ : 29;
+                    SecOp secOp : 3;
+                };
+            };
+        };
+        u32 entry;
+    };
+    static_assert(sizeof(PushBufferMethodHeader) == 0x4);
+
+    /**
+     * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
+     * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
+     */
+    class GPFIFO {
+      private:
+        /**
+         * @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words
+         */
+        struct PushBuffer {
+            GpEntry gpEntry;
+            std::vector<u32> segment;
+
+            PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) {
+                if (fetch)
+                    Fetch(memoryManager);
+            }
+
+            inline void Fetch(const vmm::MemoryManager &memoryManager) {
+                segment.resize(gpEntry.size * sizeof(u32));
+                memoryManager.Read(reinterpret_cast<u8 *>(segment.data()), (static_cast<u64>(gpEntry.getHi) << 32) | (gpEntry.get << 2), segment.size());
+            }
+        };
+
+        const DeviceState &state;
+        skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread
+        std::queue<PushBuffer> pushBufferQueue;
+
+        /**
+         * @brief Processes a pushbuffer segment, calling methods as needed
+         */
+        void Process(const std::vector<u32> &segment);
+
+        /**
+         * @brief This sends a method call to the GPU hardware
+         */
+        void Send(MethodParams params);
+
+      public:
+        GPFIFO(const DeviceState &state) : state(state) {}
+
+        /**
+         * @brief Executes all pending entries in the FIFO
+         */
+        void Run();
+
+        /**
+         * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
+         */
+        void Push(std::span<GpEntry> entries);
+    };
+}
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp
index b0a194ad..1f701cdb 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp
@@ -1,7 +1,11 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
 
+#include <span>
+#include <os.h>
+#include <gpu/gpfifo.h>
 #include <kernel/types/KProcess.h>
+#include <services/nvdrv/INvDrvServices.h>
 #include "nvhost_channel.h"
 
 namespace skyline::service::nvdrv::device {
@@ -15,13 +19,62 @@ namespace skyline::service::nvdrv::device {
         {0x480D, NFUNC(NvHostChannel::SetPriority)},
         {0x481A, NFUNC(NvHostChannel::AllocGpfifoEx2)},
         {0x4714, NFUNC(NvHostChannel::SetUserData)},
-    }) {}
+    }) {
+        auto &hostSyncpoint = state.os->serviceManager.GetService<nvdrv::INvDrvServices>(Service::nvdrv_INvDrvServices)->hostSyncpoint;
+
+        channelFence.id = hostSyncpoint.AllocateSyncpoint(false);
+        channelFence.UpdateValue(hostSyncpoint);
+    }
 
     void NvHostChannel::SetNvmapFd(IoctlData &buffer) {}
 
     void NvHostChannel::SetSubmitTimeout(IoctlData &buffer) {}
 
-    void NvHostChannel::SubmitGpFifo(IoctlData &buffer) {}
+    void NvHostChannel::SubmitGpFifo(IoctlData &buffer) {
+        struct Data {
+            u64 address;
+            u32 numEntries;
+            union {
+                struct {
+                    bool fenceWait : 1;
+                    bool fenceIncrement : 1;
+                    bool hwFormat : 1;
+                    u8 _pad0_ : 1;
+                    bool suppressWfi : 1;
+                    u8 _pad1_ : 3;
+                    bool incrementWithValue : 1;
+                };
+                u32 raw;
+            } flags;
+            NvFence fence;
+        } args = state.process->GetReference<Data>(buffer.input.at(0).address);
+
+        auto &hostSyncpoint = state.os->serviceManager.GetService<nvdrv::INvDrvServices>(Service::nvdrv_INvDrvServices)->hostSyncpoint;
+
+        if (args.flags.fenceWait) {
+            if (args.flags.incrementWithValue) {
+                buffer.status = NvStatus::BadValue;
+                return;
+            }
+
+            if (hostSyncpoint.HasSyncpointExpired(args.fence.id, args.fence.value)) {
+                state.logger->Warn("GPU Syncpoints are not currently supported!");
+            }
+        }
+
+        state.gpu->gpfifo.Push(std::span(state.process->GetPointer<gpu::gpfifo::GpEntry>(args.address), args.numEntries));
+
+        bool increment = args.flags.fenceIncrement || args.flags.incrementWithValue;
+        u32 amount = increment ? (args.flags.fenceIncrement ? 2 : 0) + (args.flags.incrementWithValue ? args.fence.value : 0) : 0;
+        args.fence.value = hostSyncpoint.IncrementSyncpointMaxExt(args.fence.id, amount);
+        args.fence.id = channelFence.id;
+
+        if (args.flags.fenceIncrement) {
+            state.logger->Warn("GPU Syncpoints are not currently supported!");
+        }
+
+        args.flags.raw = 0;
+    }
 
     void NvHostChannel::AllocObjCtx(IoctlData &buffer) {}
 
@@ -30,7 +83,7 @@ namespace skyline::service::nvdrv::device {
     void NvHostChannel::SetErrorNotifier(IoctlData &buffer) {}
 
     void NvHostChannel::SetPriority(IoctlData &buffer) {
-        auto priority = state.process->GetObject<NvChannelPriority>(buffer.input[0].address);
+        auto priority = state.process->GetObject<NvChannelPriority>(buffer.input.at(0).address);
 
         switch (priority) {
             case NvChannelPriority::Low:
@@ -45,8 +98,16 @@ namespace skyline::service::nvdrv::device {
         }
     }
 
-    void NvHostChannel::AllocGpfifoEx2(IoctlData &buffer) {}
+    void NvHostChannel::AllocGpfifoEx2(IoctlData &buffer) {
+        struct Data {
+            u32 numEntries;
+            u32 numJobs;
+            u32 flags;
+            NvFence fence;
+            u32 reserved[3];
+        } args = state.process->GetReference<Data>(buffer.input.at(0).address);
+        args.fence = channelFence;
+    }
 
     void NvHostChannel::SetUserData(IoctlData &buffer) {}
-
 }
diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h
index df159d9a..1b873b7d 100644
--- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h
+++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include "nvfence.h"
 #include "nvdevice.h"
 
 namespace skyline::service::nvdrv::device {
@@ -17,6 +18,7 @@ namespace skyline::service::nvdrv::device {
             High = 0x94
         };
 
+        NvFence channelFence{};
         u32 timeslice{};
 
       public: