From cf468c20e28404603222f0611e12643e0e533e26 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 9 Aug 2020 14:17:45 +0100 Subject: [PATCH] Extend the GPFIFO implementation with support for engines and fix a few bugs An engine is effectively a HW block in the GPU, the main one is the Maxwell 3D which is used for 3D graphics. Engines can be bound to individual subchannels and then methods within them can be called through pushbuffers. The engine side of the GPFIO is also included, it currently does nothing but will need to be extended in the future with semaphores. --- app/src/main/cpp/skyline/gpu.cpp | 3 +- app/src/main/cpp/skyline/gpu.h | 9 + app/src/main/cpp/skyline/gpu/engines/engine.h | 53 +++ app/src/main/cpp/skyline/gpu/engines/gpfifo.h | 179 +++++++++++ app/src/main/cpp/skyline/gpu/gpfifo.cpp | 43 ++- app/src/main/cpp/skyline/gpu/gpfifo.h | 304 +++++++++--------- 6 files changed, 434 insertions(+), 157 deletions(-) create mode 100644 app/src/main/cpp/skyline/gpu/engines/engine.h create mode 100644 app/src/main/cpp/skyline/gpu/engines/gpfifo.h diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index d52358ab..efa7c368 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -12,11 +12,12 @@ extern skyline::u16 fps; extern skyline::u32 frametime; namespace skyline::gpu { - GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared(state)), bufferEvent(std::make_shared(state)) { + GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), fermi2D(std::make_shared(state)), keplerMemory(std::make_shared(state)), maxwell3D(std::make_shared(state)), maxwellCompute(std::make_shared(state)), maxwellDma(std::make_shared(state)), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared(state)), bufferEvent(std::make_shared(state)) { ANativeWindow_acquire(window); resolution.width = static_cast(ANativeWindow_getWidth(window)); resolution.height = static_cast(ANativeWindow_getHeight(window)); format = ANativeWindow_getFormat(window); + vsyncEvent->Signal(); } GPU::~GPU() { diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h index 46947ef5..40c90ec0 100644 --- a/app/src/main/cpp/skyline/gpu.h +++ b/app/src/main/cpp/skyline/gpu.h @@ -11,6 +11,9 @@ #include "gpu/texture.h" #include "gpu/memory_manager.h" #include "gpu/gpfifo.h" +#include "gpu/syncpoint.h" +#include "gpu/engines/engine.h" +#include "gpu/engines/maxwell_3d.h" namespace skyline::gpu { /** @@ -31,7 +34,13 @@ namespace skyline::gpu { std::shared_ptr vsyncEvent; //!< This KEvent is triggered every time a frame is drawn std::shared_ptr bufferEvent; //!< This KEvent is triggered every time a buffer is freed vmm::MemoryManager memoryManager; //!< The GPU Virtual Memory Manager + std::shared_ptr fermi2D; + std::shared_ptr maxwell3D; + std::shared_ptr maxwellCompute; + std::shared_ptr maxwellDma; + std::shared_ptr keplerMemory; gpfifo::GPFIFO gpfifo; + std::array syncpoints{}; /** * @param window The ANativeWindow to render to diff --git a/app/src/main/cpp/skyline/gpu/engines/engine.h b/app/src/main/cpp/skyline/gpu/engines/engine.h new file mode 100644 index 00000000..a21fcda0 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/engines/engine.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include + +#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32)) + +namespace skyline::gpu { + /** + * @brief This enumerates the identifiers used to label a specific engine + */ + enum class EngineID { + Fermi2D = 0x902D, + KeplerMemory = 0xA140, + Maxwell3D = 0xB197, + MaxwellCompute = 0xB1C0, + MaxwellDma = 0xB0B5, + }; + + /** + * @brief This holds the parameters of a GPU engine method call + */ + struct MethodParams { + u16 method; + u32 argument; + u32 subChannel; + bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro + }; + + namespace engine { + /** + * @brief The Engine class provides an interface that can be used to communicate with the GPU's internal engines + */ + class Engine { + protected: + const DeviceState &state; + + public: + Engine(const DeviceState &state) : state(state) {} + + virtual ~Engine() = default; + + /** + * @brief Calls an engine method with the given parameters + */ + virtual void CallMethod(MethodParams params) { + state.logger->Warn("Called method in unimplemented engine: 0x{:X} args: 0x{:X}", params.method, params.argument); + }; + }; + } +} diff --git a/app/src/main/cpp/skyline/gpu/engines/gpfifo.h b/app/src/main/cpp/skyline/gpu/engines/gpfifo.h new file mode 100644 index 00000000..e0734fd3 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/engines/gpfifo.h @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include +#include "engine.h" + +namespace skyline { + namespace constant { + constexpr u32 GpfifoRegisterSize = 0x40; //!< The size of the GPFIFO's register space in units of u32 + } + + namespace gpu::engine { + /** + * @brief The GPFIFO engine handles managing macros and semaphores + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt + */ + class GPFIFO : public Engine { + private: + /** + * @brief This holds the GPFIFO engine's registers + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65 + */ + union Regs { + enum class SemaphoreOperation { + Acquire = 1, + Release = 2, + AcqGeq = 4, + AcqAnd = 8, + Reduction = 16 + }; + + enum class SemaphoreAcquireSwitch { + Disabled = 0, + Enabled = 1 + }; + + enum class SemaphoreReleaseWfi { + En = 0, + Dis = 1 + }; + + enum class SemaphoreReleaseSize { + SixteenBytes = 0, + FourBytes = 1 + }; + + enum class SemaphoreReduction { + Min = 0, + Max = 1, + Xor = 2, + And = 3, + Or = 4, + Add = 5, + Inc = 6, + Dec = 7 + }; + + enum class SemaphoreFormat { + Signed = 0, + Unsigned = 1 + }; + + enum class MemOpTlbInvalidatePdb { + One = 0, + All = 1 + }; + + enum class SyncpointOperation { + Wait = 0, + Incr = 1 + }; + + enum class SyncpointWaitSwitch { + Dis = 0, + En = 1 + }; + + enum class WfiScope { + CurrentScgType = 0, + All = 1 + }; + + enum class YieldOp { + Nop = 0, + PbdmaTimeslice = 1, + RunlistTimeslice = 2, + Tsg = 3 + }; + + struct { + struct { + u16 nvClass : 16; + u16 engine : 5; + u32 _pad_ : 11; + } setObject; + + u32 illegal; + u32 nop; + u32 _pad0_; + + struct { + struct { + u32 offsetUpper : 8; + u32 _pad0_ : 24; + }; + + struct { + u8 _pad1_ : 2; + u32 offsetLower : 30; + }; + + u32 payload; + + struct __attribute__((__packed__)) { + SemaphoreOperation operation : 5; + u8 _pad2_ : 7; + SemaphoreAcquireSwitch acquireSwitch : 1; + u8 _pad3_ : 7; + SemaphoreReleaseWfi releaseWfi : 1; + u8 _pad4_ : 3; + SemaphoreReleaseSize releaseSize : 1; + u8 _pad5_ : 2; + SemaphoreReduction reduction : 4; + SemaphoreFormat format : 1; + }; + } semaphore; + + u32 nonStallInterrupt; + u32 fbFlush; + u32 _pad1_[2]; + u32 memOpC; + u32 memOpD; + u32 _pad2_[6]; + u32 setReference; + u32 _pad3_[7]; + + struct { + u32 payload; + + struct __attribute__((__packed__)) { + SyncpointOperation operation : 1; + u8 _pad0_ : 3; + SyncpointWaitSwitch waitSwitch : 1; + u8 _pad1_ : 3; + u16 index : 12; + u16 _pad2_ : 12; + }; + } syncpoint; + + struct { + WfiScope scope : 1; + u32 _pad_ : 31; + } wfi; + + u32 crcCheck; + + struct { + YieldOp op : 2; + u32 _pad_ : 30; + } yield; + }; + std::array raw; + } regs{}; + static_assert(sizeof(Regs) == (constant::GpfifoRegisterSize << 2)); + + public: + GPFIFO(const DeviceState &state) : Engine(state) {} + + void CallMethod(MethodParams params) { + state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument); + + regs.raw[params.method] = params.argument; + }; + }; + } +} \ No newline at end of file diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.cpp b/app/src/main/cpp/skyline/gpu/gpfifo.cpp index 8763660e..eca0d702 100644 --- a/app/src/main/cpp/skyline/gpu/gpfifo.cpp +++ b/app/src/main/cpp/skyline/gpu/gpfifo.cpp @@ -2,15 +2,52 @@ // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) #include +#include #include "gpfifo.h" namespace skyline::gpu::gpfifo { void GPFIFO::Send(MethodParams params) { - state.logger->Warn("Called unimplemented GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall); + state.logger->Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall); + + if (params.method == 0) { + switch (static_cast(params.argument)) { + case EngineID::Fermi2D: + subchannels.at(params.subChannel) = state.gpu->fermi2D; + break; + case EngineID::KeplerMemory: + subchannels.at(params.subChannel) = state.gpu->keplerMemory; + break; + case EngineID::Maxwell3D: + subchannels.at(params.subChannel) = state.gpu->maxwell3D; + break; + case EngineID::MaxwellCompute: + subchannels.at(params.subChannel) = state.gpu->maxwellCompute; + break; + case EngineID::MaxwellDma: + subchannels.at(params.subChannel) = state.gpu->maxwellDma; + break; + default: + throw exception("Unknown engine 0x{:X} cannot be bound to subchannel {}", params.argument, params.subChannel); + } + + state.logger->Info("Bound GPU engine 0x{:X} to subchannel {}", params.argument, params.subChannel); + return; + } else if (params.method < constant::GpfifoRegisterSize) { + gpfifoEngine.CallMethod(params); + } else { + if (subchannels.at(params.subChannel) == nullptr) + throw exception("Calling method on unbound channel"); + + subchannels.at(params.subChannel)->CallMethod(params); + } } void GPFIFO::Process(const std::vector &segment) { for (auto entry = segment.begin(); entry != segment.end(); entry++) { + // An entry containing all zeroes is a NOP, skip over it + if (*entry == 0) + continue; + auto methodHeader = reinterpret_cast(&*entry); switch (methodHeader->secOp) { @@ -32,6 +69,8 @@ namespace skyline::gpu::gpfifo { case PushBufferMethodHeader::SecOp::ImmdDataMethod: Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true}); break; + case PushBufferMethodHeader::SecOp::EndPbSegment: + return; default: break; } @@ -52,7 +91,7 @@ namespace skyline::gpu::gpfifo { void GPFIFO::Push(std::span entries) { std::lock_guard lock(pushBufferQueueLock); - bool beforeBarrier{true}; + bool beforeBarrier{false}; for (const auto &entry : entries) { if (entry.sync == GpEntry::Sync::Wait) diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.h b/app/src/main/cpp/skyline/gpu/gpfifo.h index 1f122a18..8fbcc225 100644 --- a/app/src/main/cpp/skyline/gpu/gpfifo.h +++ b/app/src/main/cpp/skyline/gpu/gpfifo.h @@ -6,179 +6,175 @@ #include #include #include +#include "engines/engine.h" +#include "engines/gpfifo.h" #include "memory_manager.h" -namespace skyline::gpu::gpfifo { - /** - * @brief This holds the parameters of a GPU method call - */ - struct MethodParams { - u16 method; - u32 argument; - u32 subChannel; - bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro - }; - - /** - * @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpFifo' - * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt - * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155 - */ - struct GpEntry { - enum class Fetch { - Unconditional = 0, - Conditional = 1, - }; - - union { - struct { - Fetch fetch : 1; - u8 _pad_ : 1; - u32 get : 30; +namespace skyline::gpu { + namespace gpfifo { + /** + * @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpfifo' + * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155 + */ + struct GpEntry { + enum class Fetch { + Unconditional = 0, + Conditional = 1, }; - u32 entry0; - }; - enum class Opcode : u8 { - Nop = 0, - Illegal = 1, - Crc = 2, - PbCrc = 3, - }; + union { + struct { + Fetch fetch : 1; + u8 _pad_ : 1; + u32 get : 30; + }; + u32 entry0; + }; - enum class Priv { - User = 0, - Kernel = 1, - }; + enum class Opcode : u8 { + Nop = 0, + Illegal = 1, + Crc = 2, + PbCrc = 3, + }; - enum class Level { - Main = 0, - Subroutine = 1, - }; + enum class Priv { + User = 0, + Kernel = 1, + }; - enum class Sync { - Proceed = 0, - Wait = 1, - }; + enum class Level { + Main = 0, + Subroutine = 1, + }; + + enum class Sync { + Proceed = 0, + Wait = 1, + }; + + union { + struct { + union { + u8 getHi; + Opcode opcode; + }; + Priv priv : 1; + Level level : 1; + u32 size : 21; + Sync sync : 1; + }; + u32 entry1; + }; + }; + static_assert(sizeof(GpEntry) == 0x8); + + /** + * @brief This holds a single pushbuffer method header that describes a compressed method sequence + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850 + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179 + */ + union PushBufferMethodHeader { + enum class TertOp : u8 { + Grp0IncMethod = 0, + Grp0SetSubDevMask = 1, + Grp0StoreSubDevMask = 2, + Grp0UseSubDevMask = 3, + Grp2NonIncMethod = 0 + }; + + enum class SecOp : u8 { + Grp0UseTert = 0, + IncMethod = 1, + Grp2UseTert = 2, + NonIncMethod = 3, + ImmdDataMethod = 4, + OneInc = 5, + Reserved6 = 6, + EndPbSegment = 7 + }; - union { struct { union { - u8 getHi; - Opcode opcode; - }; - Priv priv : 1; - Level level : 1; - u32 size : 21; - Sync sync : 1; - }; - u32 entry1; - }; - }; - static_assert(sizeof(GpEntry) == 0x8); + u16 methodAddress : 12; + struct { + u8 _pad0_ : 4; + u16 subDeviceMask : 12; + }; - /** - * @brief This holds a single pushbuffer method header that describes a compressed method sequence - * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850 - * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179 - */ - union PushBufferMethodHeader { - enum class TertOp : u8 { - Grp0IncMethod = 0, - Grp0SetSubDevMask = 1, - Grp0StoreSubDevMask = 2, - Grp0UseSubDevMask = 3, - Grp2NonIncMethod = 0 - }; + struct { + u16 _pad1_ : 13; + u8 methodSubChannel : 3; + union { + TertOp tertOp : 3; + u16 methodCount : 13; + u16 immdData : 13; + }; + }; - enum class SecOp : u8 { - Grp0UseTert = 0, - IncMethod = 1, - Grp2UseTert = 2, - NonIncMethod = 3, - ImmdDataMethod = 4, - OneInc = 5, - Reserved6 = 6, - EndPbSegment = 7 - }; - - struct { - union { - u16 methodAddress : 12; - struct { - u8 _pad0_ : 4; - u16 subDeviceMask : 12; - }; - - struct { - u16 _pad1_ : 13; - u8 methodSubChannel : 3; - union { - TertOp tertOp : 3; - u16 methodCount : 13; - u16 immdData : 13; + struct { + u32 _pad2_ : 29; + SecOp secOp : 3; }; }; - - struct { - u32 _pad2_ : 29; - SecOp secOp : 3; - }; }; + u32 entry; }; - u32 entry; - }; - static_assert(sizeof(PushBufferMethodHeader) == 0x4); + static_assert(sizeof(PushBufferMethodHeader) == 0x4); - /** - * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them - * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 - */ - class GPFIFO { - private: /** - * @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words + * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 */ - struct PushBuffer { - GpEntry gpEntry; - std::vector segment; + class GPFIFO { + private: + /** + * @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words + */ + struct PushBuffer { + GpEntry gpEntry; + std::vector segment; - PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) { - if (fetch) - Fetch(memoryManager); - } + PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) { + if (fetch) + Fetch(memoryManager); + } - inline void Fetch(const vmm::MemoryManager &memoryManager) { - segment.resize(gpEntry.size * sizeof(u32)); - memoryManager.Read(reinterpret_cast(segment.data()), (static_cast(gpEntry.getHi) << 32) | (gpEntry.get << 2), segment.size()); - } + inline void Fetch(const vmm::MemoryManager &memoryManager) { + segment.resize(gpEntry.size); + memoryManager.Read(segment, (static_cast(gpEntry.getHi) << 32) | (static_cast(gpEntry.get) << 2)); + } + }; + + const DeviceState &state; + engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls + std::array, 8> subchannels; + std::queue pushBufferQueue; + skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread + + /** + * @brief Processes a pushbuffer segment, calling methods as needed + */ + void Process(const std::vector &segment); + + /** + * @brief This sends a method call to the GPU hardware + */ + void Send(MethodParams params); + + public: + GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {} + + /** + * @brief Executes all pending entries in the FIFO + */ + void Run(); + + /** + * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step' + */ + void Push(std::span entries); }; - - const DeviceState &state; - skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread - std::queue pushBufferQueue; - - /** - * @brief Processes a pushbuffer segment, calling methods as needed - */ - void Process(const std::vector &segment); - - /** - * @brief This sends a method call to the GPU hardware - */ - void Send(MethodParams params); - - public: - GPFIFO(const DeviceState &state) : state(state) {} - - /** - * @brief Executes all pending entries in the FIFO - */ - void Run(); - - /** - * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step' - */ - void Push(std::span entries); - }; -} + } +} \ No newline at end of file