From 9fd0dd848b13caf88658306a21be88022712da3b Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 23 Jul 2020 20:46:04 +0100 Subject: [PATCH] Add support for processing GP Entries and the pushbuffers they contain This is the backbone of the GPU, in the future this will be expanded to support calling into engines. --- app/CMakeLists.txt | 1 + app/src/main/cpp/skyline/gpu.cpp | 4 +- app/src/main/cpp/skyline/gpu.h | 2 + app/src/main/cpp/skyline/gpu/gpfifo.cpp | 64 ++++++ app/src/main/cpp/skyline/gpu/gpfifo.h | 184 ++++++++++++++++++ .../services/nvdrv/devices/nvhost_channel.cpp | 71 ++++++- .../services/nvdrv/devices/nvhost_channel.h | 2 + 7 files changed, 322 insertions(+), 6 deletions(-) create mode 100644 app/src/main/cpp/skyline/gpu/gpfifo.cpp create mode 100644 app/src/main/cpp/skyline/gpu/gpfifo.h diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index ac052067..96534afd 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -40,6 +40,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/audio/adpcm_decoder.cpp ${source_DIR}/skyline/gpu.cpp ${source_DIR}/skyline/gpu/memory_manager.cpp + ${source_DIR}/skyline/gpu/gpfifo.cpp ${source_DIR}/skyline/gpu/texture.cpp ${source_DIR}/skyline/os.cpp ${source_DIR}/skyline/loader/loader.cpp diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index 5bdcddb2..d52358ab 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -12,7 +12,7 @@ extern skyline::u16 fps; extern skyline::u32 frametime; namespace skyline::gpu { - GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared(state)), bufferEvent(std::make_shared(state)) { + GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared(state)), bufferEvent(std::make_shared(state)) { ANativeWindow_acquire(window); resolution.width = static_cast(ANativeWindow_getWidth(window)); resolution.height = static_cast(ANativeWindow_getHeight(window)); @@ -24,6 +24,8 @@ namespace skyline::gpu { } void GPU::Loop() { + gpfifo.Run(); + if (surfaceUpdate) { if (Surface == nullptr) return; diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h index 1b84e552..46947ef5 100644 --- a/app/src/main/cpp/skyline/gpu.h +++ b/app/src/main/cpp/skyline/gpu.h @@ -10,6 +10,7 @@ #include #include "gpu/texture.h" #include "gpu/memory_manager.h" +#include "gpu/gpfifo.h" namespace skyline::gpu { /** @@ -30,6 +31,7 @@ namespace skyline::gpu { std::shared_ptr vsyncEvent; //!< This KEvent is triggered every time a frame is drawn std::shared_ptr bufferEvent; //!< This KEvent is triggered every time a buffer is freed vmm::MemoryManager memoryManager; //!< The GPU Virtual Memory Manager + gpfifo::GPFIFO gpfifo; /** * @param window The ANativeWindow to render to diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.cpp b/app/src/main/cpp/skyline/gpu/gpfifo.cpp new file mode 100644 index 00000000..8763660e --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/gpfifo.cpp @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include "gpfifo.h" + +namespace skyline::gpu::gpfifo { + void GPFIFO::Send(MethodParams params) { + state.logger->Warn("Called unimplemented GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall); + } + + void GPFIFO::Process(const std::vector &segment) { + for (auto entry = segment.begin(); entry != segment.end(); entry++) { + auto methodHeader = reinterpret_cast(&*entry); + + switch (methodHeader->secOp) { + case PushBufferMethodHeader::SecOp::IncMethod: + for (u16 i{}; i < methodHeader->methodCount; i++) + Send(MethodParams{static_cast(methodHeader->methodAddress + i), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1}); + + break; + case PushBufferMethodHeader::SecOp::NonIncMethod: + for (u16 i{}; i < methodHeader->methodCount; i++) + Send(MethodParams{methodHeader->methodAddress, *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1}); + + break; + case PushBufferMethodHeader::SecOp::OneInc: + for (u16 i{}; i < methodHeader->methodCount; i++) + Send(MethodParams{static_cast(methodHeader->methodAddress + bool(i)), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1}); + + break; + case PushBufferMethodHeader::SecOp::ImmdDataMethod: + Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true}); + break; + default: + break; + } + } + } + + void GPFIFO::Run() { + std::lock_guard lock(pushBufferQueueLock); + while (!pushBufferQueue.empty()) { + auto pushBuffer = pushBufferQueue.front(); + if (pushBuffer.segment.empty()) + pushBuffer.Fetch(state.gpu->memoryManager); + + Process(pushBuffer.segment); + pushBufferQueue.pop(); + } + } + + void GPFIFO::Push(std::span entries) { + std::lock_guard lock(pushBufferQueueLock); + bool beforeBarrier{true}; + + for (const auto &entry : entries) { + if (entry.sync == GpEntry::Sync::Wait) + beforeBarrier = false; + + pushBufferQueue.emplace(PushBuffer(entry, state.gpu->memoryManager, beforeBarrier)); + } + } +} diff --git a/app/src/main/cpp/skyline/gpu/gpfifo.h b/app/src/main/cpp/skyline/gpu/gpfifo.h new file mode 100644 index 00000000..1f122a18 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/gpfifo.h @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include +#include +#include "memory_manager.h" + +namespace skyline::gpu::gpfifo { + /** + * @brief This holds the parameters of a GPU method call + */ + struct MethodParams { + u16 method; + u32 argument; + u32 subChannel; + bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro + }; + + /** + * @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpFifo' + * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155 + */ + struct GpEntry { + enum class Fetch { + Unconditional = 0, + Conditional = 1, + }; + + union { + struct { + Fetch fetch : 1; + u8 _pad_ : 1; + u32 get : 30; + }; + u32 entry0; + }; + + enum class Opcode : u8 { + Nop = 0, + Illegal = 1, + Crc = 2, + PbCrc = 3, + }; + + enum class Priv { + User = 0, + Kernel = 1, + }; + + enum class Level { + Main = 0, + Subroutine = 1, + }; + + enum class Sync { + Proceed = 0, + Wait = 1, + }; + + union { + struct { + union { + u8 getHi; + Opcode opcode; + }; + Priv priv : 1; + Level level : 1; + u32 size : 21; + Sync sync : 1; + }; + u32 entry1; + }; + }; + static_assert(sizeof(GpEntry) == 0x8); + + /** + * @brief This holds a single pushbuffer method header that describes a compressed method sequence + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850 + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179 + */ + union PushBufferMethodHeader { + enum class TertOp : u8 { + Grp0IncMethod = 0, + Grp0SetSubDevMask = 1, + Grp0StoreSubDevMask = 2, + Grp0UseSubDevMask = 3, + Grp2NonIncMethod = 0 + }; + + enum class SecOp : u8 { + Grp0UseTert = 0, + IncMethod = 1, + Grp2UseTert = 2, + NonIncMethod = 3, + ImmdDataMethod = 4, + OneInc = 5, + Reserved6 = 6, + EndPbSegment = 7 + }; + + struct { + union { + u16 methodAddress : 12; + struct { + u8 _pad0_ : 4; + u16 subDeviceMask : 12; + }; + + struct { + u16 _pad1_ : 13; + u8 methodSubChannel : 3; + union { + TertOp tertOp : 3; + u16 methodCount : 13; + u16 immdData : 13; + }; + }; + + struct { + u32 _pad2_ : 29; + SecOp secOp : 3; + }; + }; + }; + u32 entry; + }; + static_assert(sizeof(PushBufferMethodHeader) == 0x4); + + /** + * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them + * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 + */ + class GPFIFO { + private: + /** + * @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words + */ + struct PushBuffer { + GpEntry gpEntry; + std::vector segment; + + PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) { + if (fetch) + Fetch(memoryManager); + } + + inline void Fetch(const vmm::MemoryManager &memoryManager) { + segment.resize(gpEntry.size * sizeof(u32)); + memoryManager.Read(reinterpret_cast(segment.data()), (static_cast(gpEntry.getHi) << 32) | (gpEntry.get << 2), segment.size()); + } + }; + + const DeviceState &state; + skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread + std::queue pushBufferQueue; + + /** + * @brief Processes a pushbuffer segment, calling methods as needed + */ + void Process(const std::vector &segment); + + /** + * @brief This sends a method call to the GPU hardware + */ + void Send(MethodParams params); + + public: + GPFIFO(const DeviceState &state) : state(state) {} + + /** + * @brief Executes all pending entries in the FIFO + */ + void Run(); + + /** + * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step' + */ + void Push(std::span entries); + }; +} diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp index b0a194ad..1f701cdb 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.cpp @@ -1,7 +1,11 @@ // SPDX-License-Identifier: MPL-2.0 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) +#include +#include +#include #include +#include #include "nvhost_channel.h" namespace skyline::service::nvdrv::device { @@ -15,13 +19,62 @@ namespace skyline::service::nvdrv::device { {0x480D, NFUNC(NvHostChannel::SetPriority)}, {0x481A, NFUNC(NvHostChannel::AllocGpfifoEx2)}, {0x4714, NFUNC(NvHostChannel::SetUserData)}, - }) {} + }) { + auto &hostSyncpoint = state.os->serviceManager.GetService(Service::nvdrv_INvDrvServices)->hostSyncpoint; + + channelFence.id = hostSyncpoint.AllocateSyncpoint(false); + channelFence.UpdateValue(hostSyncpoint); + } void NvHostChannel::SetNvmapFd(IoctlData &buffer) {} void NvHostChannel::SetSubmitTimeout(IoctlData &buffer) {} - void NvHostChannel::SubmitGpFifo(IoctlData &buffer) {} + void NvHostChannel::SubmitGpFifo(IoctlData &buffer) { + struct Data { + u64 address; + u32 numEntries; + union { + struct { + bool fenceWait : 1; + bool fenceIncrement : 1; + bool hwFormat : 1; + u8 _pad0_ : 1; + bool suppressWfi : 1; + u8 _pad1_ : 3; + bool incrementWithValue : 1; + }; + u32 raw; + } flags; + NvFence fence; + } args = state.process->GetReference(buffer.input.at(0).address); + + auto &hostSyncpoint = state.os->serviceManager.GetService(Service::nvdrv_INvDrvServices)->hostSyncpoint; + + if (args.flags.fenceWait) { + if (args.flags.incrementWithValue) { + buffer.status = NvStatus::BadValue; + return; + } + + if (hostSyncpoint.HasSyncpointExpired(args.fence.id, args.fence.value)) { + state.logger->Warn("GPU Syncpoints are not currently supported!"); + } + } + + state.gpu->gpfifo.Push(std::span(state.process->GetPointer(args.address), args.numEntries)); + + bool increment = args.flags.fenceIncrement || args.flags.incrementWithValue; + u32 amount = increment ? (args.flags.fenceIncrement ? 2 : 0) + (args.flags.incrementWithValue ? args.fence.value : 0) : 0; + args.fence.value = hostSyncpoint.IncrementSyncpointMaxExt(args.fence.id, amount); + args.fence.id = channelFence.id; + + if (args.flags.fenceIncrement) { + state.logger->Warn("GPU Syncpoints are not currently supported!"); + } + + args.flags.raw = 0; + } void NvHostChannel::AllocObjCtx(IoctlData &buffer) {} @@ -30,7 +83,7 @@ namespace skyline::service::nvdrv::device { void NvHostChannel::SetErrorNotifier(IoctlData &buffer) {} void NvHostChannel::SetPriority(IoctlData &buffer) { - auto priority = state.process->GetObject(buffer.input[0].address); + auto priority = state.process->GetObject(buffer.input.at(0).address); switch (priority) { case NvChannelPriority::Low: @@ -45,8 +98,16 @@ namespace skyline::service::nvdrv::device { } } - void NvHostChannel::AllocGpfifoEx2(IoctlData &buffer) {} + void NvHostChannel::AllocGpfifoEx2(IoctlData &buffer) { + struct Data { + u32 numEntries; + u32 numJobs; + u32 flags; + NvFence fence; + u32 reserved[3]; + } args = state.process->GetReference(buffer.input.at(0).address); + args.fence = channelFence; + } void NvHostChannel::SetUserData(IoctlData &buffer) {} - } diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h index df159d9a..1b873b7d 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost_channel.h @@ -3,6 +3,7 @@ #pragma once +#include "nvfence.h" #include "nvdevice.h" namespace skyline::service::nvdrv::device { @@ -17,6 +18,7 @@ namespace skyline::service::nvdrv::device { High = 0x94 }; + NvFence channelFence{}; u32 timeslice{}; public: