Extend the GPFIFO implementation with support for engines and fix a few

bugs

An engine is effectively a HW block in the GPU, the main one is the
Maxwell 3D which is used for 3D graphics. Engines can be bound to
individual subchannels and then methods within them can be called
through pushbuffers.

The engine side of the GPFIO is also included, it currently does nothing
but will need to be extended in the future with semaphores.
This commit is contained in:
Billy Laws 2020-08-09 14:17:45 +01:00 committed by ◱ PixelyIon
parent 9fd0dd848b
commit cf468c20e2
6 changed files with 434 additions and 157 deletions

View File

@ -12,11 +12,12 @@ extern skyline::u16 fps;
extern skyline::u32 frametime;
namespace skyline::gpu {
GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared<kernel::type::KEvent>(state)), bufferEvent(std::make_shared<kernel::type::KEvent>(state)) {
GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), fermi2D(std::make_shared<engine::Engine>(state)), keplerMemory(std::make_shared<engine::Engine>(state)), maxwell3D(std::make_shared<engine::Maxwell3D>(state)), maxwellCompute(std::make_shared<engine::Engine>(state)), maxwellDma(std::make_shared<engine::Engine>(state)), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared<kernel::type::KEvent>(state)), bufferEvent(std::make_shared<kernel::type::KEvent>(state)) {
ANativeWindow_acquire(window);
resolution.width = static_cast<u32>(ANativeWindow_getWidth(window));
resolution.height = static_cast<u32>(ANativeWindow_getHeight(window));
format = ANativeWindow_getFormat(window);
vsyncEvent->Signal();
}
GPU::~GPU() {

View File

@ -11,6 +11,9 @@
#include "gpu/texture.h"
#include "gpu/memory_manager.h"
#include "gpu/gpfifo.h"
#include "gpu/syncpoint.h"
#include "gpu/engines/engine.h"
#include "gpu/engines/maxwell_3d.h"
namespace skyline::gpu {
/**
@ -31,7 +34,13 @@ namespace skyline::gpu {
std::shared_ptr<kernel::type::KEvent> vsyncEvent; //!< This KEvent is triggered every time a frame is drawn
std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< This KEvent is triggered every time a buffer is freed
vmm::MemoryManager memoryManager; //!< The GPU Virtual Memory Manager
std::shared_ptr<engine::Engine> fermi2D;
std::shared_ptr<engine::Maxwell3D> maxwell3D;
std::shared_ptr<engine::Engine> maxwellCompute;
std::shared_ptr<engine::Engine> maxwellDma;
std::shared_ptr<engine::Engine> keplerMemory;
gpfifo::GPFIFO gpfifo;
std::array<Syncpoint, constant::MaxHwSyncpointCount> syncpoints{};
/**
* @param window The ANativeWindow to render to

View File

@ -0,0 +1,53 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
namespace skyline::gpu {
/**
* @brief This enumerates the identifiers used to label a specific engine
*/
enum class EngineID {
Fermi2D = 0x902D,
KeplerMemory = 0xA140,
Maxwell3D = 0xB197,
MaxwellCompute = 0xB1C0,
MaxwellDma = 0xB0B5,
};
/**
* @brief This holds the parameters of a GPU engine method call
*/
struct MethodParams {
u16 method;
u32 argument;
u32 subChannel;
bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro
};
namespace engine {
/**
* @brief The Engine class provides an interface that can be used to communicate with the GPU's internal engines
*/
class Engine {
protected:
const DeviceState &state;
public:
Engine(const DeviceState &state) : state(state) {}
virtual ~Engine() = default;
/**
* @brief Calls an engine method with the given parameters
*/
virtual void CallMethod(MethodParams params) {
state.logger->Warn("Called method in unimplemented engine: 0x{:X} args: 0x{:X}", params.method, params.argument);
};
};
}
}

View File

@ -0,0 +1,179 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <array>
#include <common.h>
#include "engine.h"
namespace skyline {
namespace constant {
constexpr u32 GpfifoRegisterSize = 0x40; //!< The size of the GPFIFO's register space in units of u32
}
namespace gpu::engine {
/**
* @brief The GPFIFO engine handles managing macros and semaphores
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
*/
class GPFIFO : public Engine {
private:
/**
* @brief This holds the GPFIFO engine's registers
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65
*/
union Regs {
enum class SemaphoreOperation {
Acquire = 1,
Release = 2,
AcqGeq = 4,
AcqAnd = 8,
Reduction = 16
};
enum class SemaphoreAcquireSwitch {
Disabled = 0,
Enabled = 1
};
enum class SemaphoreReleaseWfi {
En = 0,
Dis = 1
};
enum class SemaphoreReleaseSize {
SixteenBytes = 0,
FourBytes = 1
};
enum class SemaphoreReduction {
Min = 0,
Max = 1,
Xor = 2,
And = 3,
Or = 4,
Add = 5,
Inc = 6,
Dec = 7
};
enum class SemaphoreFormat {
Signed = 0,
Unsigned = 1
};
enum class MemOpTlbInvalidatePdb {
One = 0,
All = 1
};
enum class SyncpointOperation {
Wait = 0,
Incr = 1
};
enum class SyncpointWaitSwitch {
Dis = 0,
En = 1
};
enum class WfiScope {
CurrentScgType = 0,
All = 1
};
enum class YieldOp {
Nop = 0,
PbdmaTimeslice = 1,
RunlistTimeslice = 2,
Tsg = 3
};
struct {
struct {
u16 nvClass : 16;
u16 engine : 5;
u32 _pad_ : 11;
} setObject;
u32 illegal;
u32 nop;
u32 _pad0_;
struct {
struct {
u32 offsetUpper : 8;
u32 _pad0_ : 24;
};
struct {
u8 _pad1_ : 2;
u32 offsetLower : 30;
};
u32 payload;
struct __attribute__((__packed__)) {
SemaphoreOperation operation : 5;
u8 _pad2_ : 7;
SemaphoreAcquireSwitch acquireSwitch : 1;
u8 _pad3_ : 7;
SemaphoreReleaseWfi releaseWfi : 1;
u8 _pad4_ : 3;
SemaphoreReleaseSize releaseSize : 1;
u8 _pad5_ : 2;
SemaphoreReduction reduction : 4;
SemaphoreFormat format : 1;
};
} semaphore;
u32 nonStallInterrupt;
u32 fbFlush;
u32 _pad1_[2];
u32 memOpC;
u32 memOpD;
u32 _pad2_[6];
u32 setReference;
u32 _pad3_[7];
struct {
u32 payload;
struct __attribute__((__packed__)) {
SyncpointOperation operation : 1;
u8 _pad0_ : 3;
SyncpointWaitSwitch waitSwitch : 1;
u8 _pad1_ : 3;
u16 index : 12;
u16 _pad2_ : 12;
};
} syncpoint;
struct {
WfiScope scope : 1;
u32 _pad_ : 31;
} wfi;
u32 crcCheck;
struct {
YieldOp op : 2;
u32 _pad_ : 30;
} yield;
};
std::array<u32, constant::GpfifoRegisterSize> raw;
} regs{};
static_assert(sizeof(Regs) == (constant::GpfifoRegisterSize << 2));
public:
GPFIFO(const DeviceState &state) : Engine(state) {}
void CallMethod(MethodParams params) {
state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument);
regs.raw[params.method] = params.argument;
};
};
}
}

View File

@ -2,15 +2,52 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <gpu/engines/maxwell_3d.h>
#include "gpfifo.h"
namespace skyline::gpu::gpfifo {
void GPFIFO::Send(MethodParams params) {
state.logger->Warn("Called unimplemented GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
state.logger->Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
if (params.method == 0) {
switch (static_cast<EngineID>(params.argument)) {
case EngineID::Fermi2D:
subchannels.at(params.subChannel) = state.gpu->fermi2D;
break;
case EngineID::KeplerMemory:
subchannels.at(params.subChannel) = state.gpu->keplerMemory;
break;
case EngineID::Maxwell3D:
subchannels.at(params.subChannel) = state.gpu->maxwell3D;
break;
case EngineID::MaxwellCompute:
subchannels.at(params.subChannel) = state.gpu->maxwellCompute;
break;
case EngineID::MaxwellDma:
subchannels.at(params.subChannel) = state.gpu->maxwellDma;
break;
default:
throw exception("Unknown engine 0x{:X} cannot be bound to subchannel {}", params.argument, params.subChannel);
}
state.logger->Info("Bound GPU engine 0x{:X} to subchannel {}", params.argument, params.subChannel);
return;
} else if (params.method < constant::GpfifoRegisterSize) {
gpfifoEngine.CallMethod(params);
} else {
if (subchannels.at(params.subChannel) == nullptr)
throw exception("Calling method on unbound channel");
subchannels.at(params.subChannel)->CallMethod(params);
}
}
void GPFIFO::Process(const std::vector<u32> &segment) {
for (auto entry = segment.begin(); entry != segment.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it
if (*entry == 0)
continue;
auto methodHeader = reinterpret_cast<const PushBufferMethodHeader *>(&*entry);
switch (methodHeader->secOp) {
@ -32,6 +69,8 @@ namespace skyline::gpu::gpfifo {
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true});
break;
case PushBufferMethodHeader::SecOp::EndPbSegment:
return;
default:
break;
}
@ -52,7 +91,7 @@ namespace skyline::gpu::gpfifo {
void GPFIFO::Push(std::span<GpEntry> entries) {
std::lock_guard lock(pushBufferQueueLock);
bool beforeBarrier{true};
bool beforeBarrier{false};
for (const auto &entry : entries) {
if (entry.sync == GpEntry::Sync::Wait)

View File

@ -6,179 +6,175 @@
#include <common.h>
#include <span>
#include <queue>
#include "engines/engine.h"
#include "engines/gpfifo.h"
#include "memory_manager.h"
namespace skyline::gpu::gpfifo {
/**
* @brief This holds the parameters of a GPU method call
*/
struct MethodParams {
u16 method;
u32 argument;
u32 subChannel;
bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro
};
/**
* @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpFifo'
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
*/
struct GpEntry {
enum class Fetch {
Unconditional = 0,
Conditional = 1,
};
union {
struct {
Fetch fetch : 1;
u8 _pad_ : 1;
u32 get : 30;
namespace skyline::gpu {
namespace gpfifo {
/**
* @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpfifo'
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
*/
struct GpEntry {
enum class Fetch {
Unconditional = 0,
Conditional = 1,
};
u32 entry0;
};
enum class Opcode : u8 {
Nop = 0,
Illegal = 1,
Crc = 2,
PbCrc = 3,
};
union {
struct {
Fetch fetch : 1;
u8 _pad_ : 1;
u32 get : 30;
};
u32 entry0;
};
enum class Priv {
User = 0,
Kernel = 1,
};
enum class Opcode : u8 {
Nop = 0,
Illegal = 1,
Crc = 2,
PbCrc = 3,
};
enum class Level {
Main = 0,
Subroutine = 1,
};
enum class Priv {
User = 0,
Kernel = 1,
};
enum class Sync {
Proceed = 0,
Wait = 1,
};
enum class Level {
Main = 0,
Subroutine = 1,
};
enum class Sync {
Proceed = 0,
Wait = 1,
};
union {
struct {
union {
u8 getHi;
Opcode opcode;
};
Priv priv : 1;
Level level : 1;
u32 size : 21;
Sync sync : 1;
};
u32 entry1;
};
};
static_assert(sizeof(GpEntry) == 0x8);
/**
* @brief This holds a single pushbuffer method header that describes a compressed method sequence
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
*/
union PushBufferMethodHeader {
enum class TertOp : u8 {
Grp0IncMethod = 0,
Grp0SetSubDevMask = 1,
Grp0StoreSubDevMask = 2,
Grp0UseSubDevMask = 3,
Grp2NonIncMethod = 0
};
enum class SecOp : u8 {
Grp0UseTert = 0,
IncMethod = 1,
Grp2UseTert = 2,
NonIncMethod = 3,
ImmdDataMethod = 4,
OneInc = 5,
Reserved6 = 6,
EndPbSegment = 7
};
union {
struct {
union {
u8 getHi;
Opcode opcode;
};
Priv priv : 1;
Level level : 1;
u32 size : 21;
Sync sync : 1;
};
u32 entry1;
};
};
static_assert(sizeof(GpEntry) == 0x8);
u16 methodAddress : 12;
struct {
u8 _pad0_ : 4;
u16 subDeviceMask : 12;
};
/**
* @brief This holds a single pushbuffer method header that describes a compressed method sequence
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_ram.ref.txt#L850
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L179
*/
union PushBufferMethodHeader {
enum class TertOp : u8 {
Grp0IncMethod = 0,
Grp0SetSubDevMask = 1,
Grp0StoreSubDevMask = 2,
Grp0UseSubDevMask = 3,
Grp2NonIncMethod = 0
};
struct {
u16 _pad1_ : 13;
u8 methodSubChannel : 3;
union {
TertOp tertOp : 3;
u16 methodCount : 13;
u16 immdData : 13;
};
};
enum class SecOp : u8 {
Grp0UseTert = 0,
IncMethod = 1,
Grp2UseTert = 2,
NonIncMethod = 3,
ImmdDataMethod = 4,
OneInc = 5,
Reserved6 = 6,
EndPbSegment = 7
};
struct {
union {
u16 methodAddress : 12;
struct {
u8 _pad0_ : 4;
u16 subDeviceMask : 12;
};
struct {
u16 _pad1_ : 13;
u8 methodSubChannel : 3;
union {
TertOp tertOp : 3;
u16 methodCount : 13;
u16 immdData : 13;
struct {
u32 _pad2_ : 29;
SecOp secOp : 3;
};
};
struct {
u32 _pad2_ : 29;
SecOp secOp : 3;
};
};
u32 entry;
};
u32 entry;
};
static_assert(sizeof(PushBufferMethodHeader) == 0x4);
static_assert(sizeof(PushBufferMethodHeader) == 0x4);
/**
* @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
*/
class GPFIFO {
private:
/**
* @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words
* @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
*/
struct PushBuffer {
GpEntry gpEntry;
std::vector<u32> segment;
class GPFIFO {
private:
/**
* @brief This is used to hold a pushbuffer's GPFIFO entry and contents, pushbuffers are made up of several 32-bit words
*/
struct PushBuffer {
GpEntry gpEntry;
std::vector<u32> segment;
PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) {
if (fetch)
Fetch(memoryManager);
}
PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) {
if (fetch)
Fetch(memoryManager);
}
inline void Fetch(const vmm::MemoryManager &memoryManager) {
segment.resize(gpEntry.size * sizeof(u32));
memoryManager.Read(reinterpret_cast<u8 *>(segment.data()), (static_cast<u64>(gpEntry.getHi) << 32) | (gpEntry.get << 2), segment.size());
}
inline void Fetch(const vmm::MemoryManager &memoryManager) {
segment.resize(gpEntry.size);
memoryManager.Read<u32>(segment, (static_cast<u64>(gpEntry.getHi) << 32) | (static_cast<u64>(gpEntry.get) << 2));
}
};
const DeviceState &state;
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
std::array<std::shared_ptr<engine::Engine>, 8> subchannels;
std::queue<PushBuffer> pushBufferQueue;
skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread
/**
* @brief Processes a pushbuffer segment, calling methods as needed
*/
void Process(const std::vector<u32> &segment);
/**
* @brief This sends a method call to the GPU hardware
*/
void Send(MethodParams params);
public:
GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
/**
* @brief Executes all pending entries in the FIFO
*/
void Run();
/**
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
*/
void Push(std::span<GpEntry> entries);
};
const DeviceState &state;
skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread
std::queue<PushBuffer> pushBufferQueue;
/**
* @brief Processes a pushbuffer segment, calling methods as needed
*/
void Process(const std::vector<u32> &segment);
/**
* @brief This sends a method call to the GPU hardware
*/
void Send(MethodParams params);
public:
GPFIFO(const DeviceState &state) : state(state) {}
/**
* @brief Executes all pending entries in the FIFO
*/
void Run();
/**
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
*/
void Push(std::span<GpEntry> entries);
};
}
}
}