diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index d52358ab..efa7c368 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -12,11 +12,12 @@ extern skyline::u16 fps; extern skyline::u32 frametime; namespace skyline::gpu { - GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared(state)), bufferEvent(std::make_shared(state)) { + GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), fermi2D(std::make_shared(state)), keplerMemory(std::make_shared(state)), maxwell3D(std::make_shared(state)), maxwellCompute(std::make_shared(state)), maxwellDma(std::make_shared(state)), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared(state)), bufferEvent(std::make_shared(state)) { ANativeWindow_acquire(window); resolution.width = static_cast(ANativeWindow_getWidth(window)); resolution.height = static_cast(ANativeWindow_getHeight(window)); format = ANativeWindow_getFormat(window); + vsyncEvent->Signal(); } GPU::~GPU() { diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h index 46947ef5..40c90ec0 100644 --- a/app/src/main/cpp/skyline/gpu.h +++ b/app/src/main/cpp/skyline/gpu.h @@ -11,6 +11,9 @@ #include "gpu/texture.h" #include "gpu/memory_manager.h" #include "gpu/gpfifo.h" +#include "gpu/syncpoint.h" +#include "gpu/engines/engine.h" +#include "gpu/engines/maxwell_3d.h" namespace skyline::gpu { /** @@ -31,7 +34,13 @@ namespace skyline::gpu { std::shared_ptr vsyncEvent; //!< This KEvent is triggered every time a frame is drawn std::shared_ptr bufferEvent; //!< This KEvent is triggered every time a buffer is freed vmm::MemoryManager memoryManager; //!< The GPU Virtual Memory Manager + std::shared_ptr fermi2D; + std::shared_ptr maxwell3D; + std::shared_ptr maxwellCompute; + std::shared_ptr maxwellDma; + std::shared_ptr keplerMemory; gpfifo::GPFIFO gpfifo; + std::array syncpoints{}; /** * @param window The ANativeWindow to render to diff --git a/app/src/main/cpp/skyline/gpu/engines/engine.h b/app/src/main/cpp/skyline/gpu/engines/engine.h new file mode 100644 index 00000000..a21fcda0 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/engines/engine.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include + +#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32)) + +namespace skyline::gpu { + /** + * @brief This enumerates the identifiers used to label a specific engine + */ + enum class EngineID { + Fermi2D = 0x902D, + KeplerMemory = 0xA140, + Maxwell3D = 0xB197, + MaxwellCompute = 0xB1C0, + MaxwellDma = 0xB0B5, + }; + + /** + * @brief This holds the parameters of a GPU engine method call + */ + struct MethodParams { + u16 method; + u32 argument; + u32 subChannel; + bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro + }; + + namespace engine { + /** + * @brief The Engine class provides an interface that can be used to communicate with the GPU's internal engines + */ + class Engine { + protected: + const DeviceState &state; + + public: + Engine(const DeviceState &state) : state(state) {} + + virtual ~Engine() = default; + + /** + * @brief Calls an engine method with the given parameters + */ + virtual void CallMethod(MethodParams params) { + state.logger->Warn("Called method in unimplemented engine: 0x{:X} args: 0x{:X}", params.method, params.argument); + }; + }; + } +} diff --git a/app/src/main/cpp/skyline/gpu/engines/gpfifo.h b/app/src/main/cpp/skyline/gpu/engines/gpfifo.h new file mode 100644 index 00000000..e0734fd3 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/engines/gpfifo.h @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include +#include "engine.h" + +namespace skyline { + namespace constant { + constexpr u32 GpfifoRegisterSize = 0x40; //!< The size of the GPFIFO's register space in units of u32 + } + + namespace gpu::engine { + /** + * @brief The GPFIFO engine handles managing macros and semaphores + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt + */ + class GPFIFO : public Engine { + private: + /** + * @brief This holds the GPFIFO engine's registers + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65 + */ + union Regs { + enum class SemaphoreOperation { + Acquire = 1, + Release = 2, + AcqGeq = 4, + AcqAnd = 8, + Reduction = 16 + }; + + enum class SemaphoreAcquireSwitch { + Disabled = 0, + Enabled = 1 + }; + + enum class SemaphoreReleaseWfi { + En = 0, + Dis = 1 + }; + + enum class SemaphoreReleaseSize { + SixteenBytes = 0, + FourBytes = 1 + }; + + enum class SemaphoreReduction { + Min = 0, + Max = 1, + Xor = 2, + And = 3, + Or = 4, + Add = 5, + Inc = 6, + Dec = 7 + }; + + enum class SemaphoreFormat { + Signed = 0, + Unsigned = 1 + }; + + enum class MemOpTlbInvalidatePdb { + One = 0, + All = 1 + }; + + enum class SyncpointOperation { + Wait = 0, + Incr = 1 + }; + + enum class SyncpointWaitSwitch { + Dis = 0, + En = 1 + }; + + enum class WfiScope { + CurrentScgType = 0, + All = 1 + }; + + enum class YieldOp { + Nop = 0, + PbdmaTimeslice = 1, + RunlistTimeslice = 2, + Tsg = 3 + }; + + struct { + struct { + u16 nvClass : 16; + u16 engine : 5; + u32 _pad_ : 11; + } setObject; + + u32 illegal; + u32 nop; + u32 _pad0_; + + struct { + struct { + u32 offsetUpper : 8; + u32 _pad0_ : 24; + }; + + struct { + u8 _pad1_ : 2; + u32 offsetLower : 30; + }; + + u32 payload; + + struct __attribute__((__packed__)) { + SemaphoreOperation operation : 5; + u8 _pad2_ : 7; + SemaphoreAcquireSwitch acquireSwitch : 1; + u8 _pad3_ : 7; + SemaphoreReleaseWfi releaseWfi : 1; + u8 _pad4_ : 3; + SemaphoreReleaseSize releaseSize : 1; + u8 _pad5_ : 2; + SemaphoreReduction reduction : 4; + SemaphoreFormat format : 1; + }; + } semaphore; + + u32 nonStallInterrupt; + u32 fbFlush; + u32 _pad1_[2]; + u32 memOpC; + u32 memOpD; + u32 _pad2_[6]; + u32 setReference; + u32 _pad3_[7]; + + struct { + u32 payload; + + struct __attribute__((__packed__)) { + SyncpointOperation operation : 1; + u8 _pad0_ : 3; + SyncpointWaitSwitch waitSwitch : 1; + u8 _pad1_ : 3; + u16 index : 12; + u16 _pad2_ : 12; + }; + } syncpoint; + + struct { + WfiScope scope : 1; + u32 _pad_ : 31; + } wfi; + + u32 crcCheck; + + struct { + YieldOp op : 2; + u32 _pad_ : 30; + } yield; + }; + std::array raw; + } regs{}; + static_assert(sizeof(Regs) == (constant::GpfifoRegisterSize << 2)); + + public: + GPFIFO(const DeviceState &state) : Engine(state) {} + + void CallMethod(MethodParams params) { + state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument); + + regs.raw[params.method] = params.argument; + }; + }; + } +} \ No newline at end of file diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.cpp b/app/src/main/cpp/skyline/gpu/gpfifo.cpp index 8763660e..eca0d702 100644 --- a/app/src/main/cpp/skyline/gpu/gpfifo.cpp +++ b/app/src/main/cpp/skyline/gpu/gpfifo.cpp @@ -2,15 +2,52 @@ // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) #include +#include #include "gpfifo.h" namespace skyline::gpu::gpfifo { void GPFIFO::Send(MethodParams params) { - state.logger->Warn("Called unimplemented GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall); + state.logger->Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall); + + if (params.method == 0) { + switch (static_cast(params.argument)) { + case EngineID::Fermi2D: + subchannels.at(params.subChannel) = state.gpu->fermi2D; + break; + case EngineID::KeplerMemory: + subchannels.at(params.subChannel) = state.gpu->keplerMemory; + break; + case EngineID::Maxwell3D: + subchannels.at(params.subChannel) = state.gpu->maxwell3D; + break; + case EngineID::MaxwellCompute: + subchannels.at(params.subChannel) = state.gpu->maxwellCompute; + break; + case EngineID::MaxwellDma: + subchannels.at(params.subChannel) = state.gpu->maxwellDma; + break; + default: + throw exception("Unknown engine 0x{:X} cannot be bound to subchannel {}", params.argument, params.subChannel); + } + + state.logger->Info("Bound GPU engine 0x{:X} to subchannel {}", params.argument, params.subChannel); + return; + } else if (params.method < constant::GpfifoRegisterSize) { + gpfifoEngine.CallMethod(params); + } else { + if (subchannels.at(params.subChannel) == nullptr) + throw exception("Calling method on unbound channel"); + + subchannels.at(params.subChannel)->CallMethod(params); + } } void GPFIFO::Process(const std::vector &segment) { for (auto entry = segment.begin(); entry != segment.end(); entry++) { + // An entry containing all zeroes is a NOP, skip over it + if (*entry == 0) + continue; + auto methodHeader = reinterpret_cast(&*entry); switch (methodHeader->secOp) { @@ -32,6 +69,8 @@ namespace skyline::gpu::gpfifo { case PushBufferMethodHeader::SecOp::ImmdDataMethod: Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true}); break; + case PushBufferMethodHeader::SecOp::EndPbSegment: + return; default: break; } @@ -52,7 +91,7 @@ namespace skyline::gpu::gpfifo { void GPFIFO::Push(std::span entries) { std::lock_guard lock(pushBufferQueueLock); - bool beforeBarrier{true}; + bool beforeBarrier{false}; for (const auto &entry : entries) { if (entry.sync == GpEntry::Sync::Wait) diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.h b/app/src/main/cpp/skyline/gpu/gpfifo.h index 1f122a18..8fbcc225 100644 --- a/app/src/main/cpp/skyline/gpu/gpfifo.h +++ b/app/src/main/cpp/skyline/gpu/gpfifo.h @@ -6,179 +6,175 @@ #include #include #include +#include "engines/engine.h" +#include "engines/gpfifo.h" #include "memory_manager.h" -namespace skyline::gpu::gpfifo { - /** - * @brief This holds the parameters of a GPU method call - */ - struct MethodParams { - u16 method; - u32 argument; - u32 subChannel; - bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro - }; - - /** - * @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpFifo' - * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt - * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155 - */ - struct GpEntry { - enum class Fetch { - Unconditional = 0, - Conditional = 1, - }; - - union { - struct { - Fetch fetch : 1; - u8 _pad_ : 1; - u32 get : 30; +namespace skyline::gpu { + namespace gpfifo { + /** + * @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpfifo' + * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155 + */ + struct GpEntry { + enum class Fetch { + Unconditional = 0, + Conditional = 1, }; - u32 entry0; - }; - enum class Opcode : u8 { - Nop = 0, - Illegal = 1, - Crc = 2, - PbCrc = 3, - }; + union { + struct { + Fetch fetch : 1; + u8 _pad_ : 1; + u32 get : 30; + }; + u32 entry0; + }; - enum class Priv { - User = 0, - Kernel = 1, - }; + enum class Opcode : u8 { + Nop = 0, + Illegal = 1, + Crc = 2, + PbCrc = 3, + }; - enum class Level { - Main = 0, - Subroutine = 1, - }; + enum class Priv { + User = 0, + Kernel = 1, + }; - enum class Sync { - Proceed = 0, - Wait = 1, - }; + enum class Level { + Main = 0, + Subroutine = 1, + }; + + enum class Sync { + Proceed = 0, + Wait = 1, + }; + + union { + struct { + union { + u8 getHi; + Opcode opcode; + }; + Priv priv : 1; + Level level : 1; + u32 size : 21; + Sync sync : 1; + }; + u32 entry1; + }; + }; + static_assert(sizeof(GpEntry) == 0x8); + + /** + * @brief This holds a single pushbuffer method header that describes a compressed method sequence + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850 + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179 + */ + union PushBufferMethodHeader { + enum class TertOp : u8 { + Grp0IncMethod = 0, + Grp0SetSubDevMask = 1, + Grp0StoreSubDevMask = 2, + Grp0UseSubDevMask = 3, + Grp2NonIncMethod = 0 + }; + + enum class SecOp : u8 { + Grp0UseTert = 0, + IncMethod = 1, + Grp2UseTert = 2, + NonIncMethod = 3, + ImmdDataMethod = 4, + OneInc = 5, + Reserved6 = 6, + EndPbSegment = 7 + }; - union { struct { union { - u8 getHi; - Opcode opcode; - }; - Priv priv : 1; - Level level : 1; - u32 size : 21; - Sync sync : 1; - }; - u32 entry1; - }; - }; - static_assert(sizeof(GpEntry) == 0x8); + u16 methodAddress : 12; + struct { + u8 _pad0_ : 4; + u16 subDeviceMask : 12; + }; - /** - * @brief This holds a single pushbuffer method header that describes a compressed method sequence - * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850 - * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179 - */ - union PushBufferMethodHeader { - enum class TertOp : u8 { - Grp0IncMethod = 0, - Grp0SetSubDevMask = 1, - Grp0StoreSubDevMask = 2, - Grp0UseSubDevMask = 3, - Grp2NonIncMethod = 0 - }; + struct { + u16 _pad1_ : 13; + u8 methodSubChannel : 3; + union { + TertOp tertOp : 3; + u16 methodCount : 13; + u16 immdData : 13; + }; + }; - enum class SecOp : u8 { - Grp0UseTert = 0, - IncMethod = 1, - Grp2UseTert = 2, - NonIncMethod = 3, - ImmdDataMethod = 4, - OneInc = 5, - Reserved6 = 6, - EndPbSegment = 7 - }; - - struct { - union { - u16 methodAddress : 12; - struct { - u8 _pad0_ : 4; - u16 subDeviceMask : 12; - }; - - struct { - u16 _pad1_ : 13; - u8 methodSubChannel : 3; - union { - TertOp tertOp : 3; - u16 methodCount : 13; - u16 immdData : 13; + struct { + u32 _pad2_ : 29; + SecOp secOp : 3; }; }; - - struct { - u32 _pad2_ : 29; - SecOp secOp : 3; - }; }; + u32 entry; }; - u32 entry; - }; - static_assert(sizeof(PushBufferMethodHeader) == 0x4); + static_assert(sizeof(PushBufferMethodHeader) == 0x4); - /** - * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them - * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 - */ - class GPFIFO { - private: /** - * @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words + * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 */ - struct PushBuffer { - GpEntry gpEntry; - std::vector segment; + class GPFIFO { + private: + /** + * @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words + */ + struct PushBuffer { + GpEntry gpEntry; + std::vector segment; - PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) { - if (fetch) - Fetch(memoryManager); - } + PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) { + if (fetch) + Fetch(memoryManager); + } - inline void Fetch(const vmm::MemoryManager &memoryManager) { - segment.resize(gpEntry.size * sizeof(u32)); - memoryManager.Read(reinterpret_cast(segment.data()), (static_cast(gpEntry.getHi) << 32) | (gpEntry.get << 2), segment.size()); - } + inline void Fetch(const vmm::MemoryManager &memoryManager) { + segment.resize(gpEntry.size); + memoryManager.Read(segment, (static_cast(gpEntry.getHi) << 32) | (static_cast(gpEntry.get) << 2)); + } + }; + + const DeviceState &state; + engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls + std::array, 8> subchannels; + std::queue pushBufferQueue; + skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread + + /** + * @brief Processes a pushbuffer segment, calling methods as needed + */ + void Process(const std::vector &segment); + + /** + * @brief This sends a method call to the GPU hardware + */ + void Send(MethodParams params); + + public: + GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {} + + /** + * @brief Executes all pending entries in the FIFO + */ + void Run(); + + /** + * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step' + */ + void Push(std::span entries); }; - - const DeviceState &state; - skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread - std::queue pushBufferQueue; - - /** - * @brief Processes a pushbuffer segment, calling methods as needed - */ - void Process(const std::vector &segment); - - /** - * @brief This sends a method call to the GPU hardware - */ - void Send(MethodParams params); - - public: - GPFIFO(const DeviceState &state) : state(state) {} - - /** - * @brief Executes all pending entries in the FIFO - */ - void Run(); - - /** - * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step' - */ - void Push(std::span entries); - }; -} + } +} \ No newline at end of file