Extend the GPFIFO implementation with support for engines and fix a few

bugs

An engine is effectively a HW block in the GPU, the main one is the
Maxwell 3D which is used for 3D graphics. Engines can be bound to
individual subchannels and then methods within them can be called
through pushbuffers.

The engine side of the GPFIO is also included, it currently does nothing
but will need to be extended in the future with semaphores.
This commit is contained in:
Billy Laws 2020-08-09 14:17:45 +01:00 committed by ◱ PixelyIon
parent 9fd0dd848b
commit cf468c20e2
6 changed files with 434 additions and 157 deletions

View File

@ -12,11 +12,12 @@ extern skyline::u16 fps;
extern skyline::u32 frametime;
namespace skyline::gpu {
GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared<kernel::type::KEvent>(state)), bufferEvent(std::make_shared<kernel::type::KEvent>(state)) {
GPU::GPU(const DeviceState &state) : state(state), memoryManager(state), gpfifo(state), fermi2D(std::make_shared<engine::Engine>(state)), keplerMemory(std::make_shared<engine::Engine>(state)), maxwell3D(std::make_shared<engine::Maxwell3D>(state)), maxwellCompute(std::make_shared<engine::Engine>(state)), maxwellDma(std::make_shared<engine::Engine>(state)), window(ANativeWindow_fromSurface(state.jvm->GetEnv(), Surface)), vsyncEvent(std::make_shared<kernel::type::KEvent>(state)), bufferEvent(std::make_shared<kernel::type::KEvent>(state)) {
ANativeWindow_acquire(window);
resolution.width = static_cast<u32>(ANativeWindow_getWidth(window));
resolution.height = static_cast<u32>(ANativeWindow_getHeight(window));
format = ANativeWindow_getFormat(window);
vsyncEvent->Signal();
}
GPU::~GPU() {

View File

@ -11,6 +11,9 @@
#include "gpu/texture.h"
#include "gpu/memory_manager.h"
#include "gpu/gpfifo.h"
#include "gpu/syncpoint.h"
#include "gpu/engines/engine.h"
#include "gpu/engines/maxwell_3d.h"
namespace skyline::gpu {
/**
@ -31,7 +34,13 @@ namespace skyline::gpu {
std::shared_ptr<kernel::type::KEvent> vsyncEvent; //!< This KEvent is triggered every time a frame is drawn
std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< This KEvent is triggered every time a buffer is freed
vmm::MemoryManager memoryManager; //!< The GPU Virtual Memory Manager
std::shared_ptr<engine::Engine> fermi2D;
std::shared_ptr<engine::Maxwell3D> maxwell3D;
std::shared_ptr<engine::Engine> maxwellCompute;
std::shared_ptr<engine::Engine> maxwellDma;
std::shared_ptr<engine::Engine> keplerMemory;
gpfifo::GPFIFO gpfifo;
std::array<Syncpoint, constant::MaxHwSyncpointCount> syncpoints{};
/**
* @param window The ANativeWindow to render to

View File

@ -0,0 +1,53 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
#define U32_OFFSET(regs, field) (offsetof(regs, field) / sizeof(u32))
namespace skyline::gpu {
/**
* @brief This enumerates the identifiers used to label a specific engine
*/
enum class EngineID {
Fermi2D = 0x902D,
KeplerMemory = 0xA140,
Maxwell3D = 0xB197,
MaxwellCompute = 0xB1C0,
MaxwellDma = 0xB0B5,
};
/**
* @brief This holds the parameters of a GPU engine method call
*/
struct MethodParams {
u16 method;
u32 argument;
u32 subChannel;
bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro
};
namespace engine {
/**
* @brief The Engine class provides an interface that can be used to communicate with the GPU's internal engines
*/
class Engine {
protected:
const DeviceState &state;
public:
Engine(const DeviceState &state) : state(state) {}
virtual ~Engine() = default;
/**
* @brief Calls an engine method with the given parameters
*/
virtual void CallMethod(MethodParams params) {
state.logger->Warn("Called method in unimplemented engine: 0x{:X} args: 0x{:X}", params.method, params.argument);
};
};
}
}

View File

@ -0,0 +1,179 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <array>
#include <common.h>
#include "engine.h"
namespace skyline {
namespace constant {
constexpr u32 GpfifoRegisterSize = 0x40; //!< The size of the GPFIFO's register space in units of u32
}
namespace gpu::engine {
/**
* @brief The GPFIFO engine handles managing macros and semaphores
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
*/
class GPFIFO : public Engine {
private:
/**
* @brief This holds the GPFIFO engine's registers
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L65
*/
union Regs {
enum class SemaphoreOperation {
Acquire = 1,
Release = 2,
AcqGeq = 4,
AcqAnd = 8,
Reduction = 16
};
enum class SemaphoreAcquireSwitch {
Disabled = 0,
Enabled = 1
};
enum class SemaphoreReleaseWfi {
En = 0,
Dis = 1
};
enum class SemaphoreReleaseSize {
SixteenBytes = 0,
FourBytes = 1
};
enum class SemaphoreReduction {
Min = 0,
Max = 1,
Xor = 2,
And = 3,
Or = 4,
Add = 5,
Inc = 6,
Dec = 7
};
enum class SemaphoreFormat {
Signed = 0,
Unsigned = 1
};
enum class MemOpTlbInvalidatePdb {
One = 0,
All = 1
};
enum class SyncpointOperation {
Wait = 0,
Incr = 1
};
enum class SyncpointWaitSwitch {
Dis = 0,
En = 1
};
enum class WfiScope {
CurrentScgType = 0,
All = 1
};
enum class YieldOp {
Nop = 0,
PbdmaTimeslice = 1,
RunlistTimeslice = 2,
Tsg = 3
};
struct {
struct {
u16 nvClass : 16;
u16 engine : 5;
u32 _pad_ : 11;
} setObject;
u32 illegal;
u32 nop;
u32 _pad0_;
struct {
struct {
u32 offsetUpper : 8;
u32 _pad0_ : 24;
};
struct {
u8 _pad1_ : 2;
u32 offsetLower : 30;
};
u32 payload;
struct __attribute__((__packed__)) {
SemaphoreOperation operation : 5;
u8 _pad2_ : 7;
SemaphoreAcquireSwitch acquireSwitch : 1;
u8 _pad3_ : 7;
SemaphoreReleaseWfi releaseWfi : 1;
u8 _pad4_ : 3;
SemaphoreReleaseSize releaseSize : 1;
u8 _pad5_ : 2;
SemaphoreReduction reduction : 4;
SemaphoreFormat format : 1;
};
} semaphore;
u32 nonStallInterrupt;
u32 fbFlush;
u32 _pad1_[2];
u32 memOpC;
u32 memOpD;
u32 _pad2_[6];
u32 setReference;
u32 _pad3_[7];
struct {
u32 payload;
struct __attribute__((__packed__)) {
SyncpointOperation operation : 1;
u8 _pad0_ : 3;
SyncpointWaitSwitch waitSwitch : 1;
u8 _pad1_ : 3;
u16 index : 12;
u16 _pad2_ : 12;
};
} syncpoint;
struct {
WfiScope scope : 1;
u32 _pad_ : 31;
} wfi;
u32 crcCheck;
struct {
YieldOp op : 2;
u32 _pad_ : 30;
} yield;
};
std::array<u32, constant::GpfifoRegisterSize> raw;
} regs{};
static_assert(sizeof(Regs) == (constant::GpfifoRegisterSize << 2));
public:
GPFIFO(const DeviceState &state) : Engine(state) {}
void CallMethod(MethodParams params) {
state.logger->Debug("Called method in GPFIFO: 0x{:X} args: 0x{:X}", params.method, params.argument);
regs.raw[params.method] = params.argument;
};
};
}
}

View File

@ -2,15 +2,52 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <gpu/engines/maxwell_3d.h>
#include "gpfifo.h"
namespace skyline::gpu::gpfifo {
void GPFIFO::Send(MethodParams params) {
state.logger->Warn("Called unimplemented GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
state.logger->Debug("Called GPU method - method: 0x{:X} argument: 0x{:X} subchannel: 0x{:X} last: {}", params.method, params.argument, params.subChannel, params.lastCall);
if (params.method == 0) {
switch (static_cast<EngineID>(params.argument)) {
case EngineID::Fermi2D:
subchannels.at(params.subChannel) = state.gpu->fermi2D;
break;
case EngineID::KeplerMemory:
subchannels.at(params.subChannel) = state.gpu->keplerMemory;
break;
case EngineID::Maxwell3D:
subchannels.at(params.subChannel) = state.gpu->maxwell3D;
break;
case EngineID::MaxwellCompute:
subchannels.at(params.subChannel) = state.gpu->maxwellCompute;
break;
case EngineID::MaxwellDma:
subchannels.at(params.subChannel) = state.gpu->maxwellDma;
break;
default:
throw exception("Unknown engine 0x{:X} cannot be bound to subchannel {}", params.argument, params.subChannel);
}
state.logger->Info("Bound GPU engine 0x{:X} to subchannel {}", params.argument, params.subChannel);
return;
} else if (params.method < constant::GpfifoRegisterSize) {
gpfifoEngine.CallMethod(params);
} else {
if (subchannels.at(params.subChannel) == nullptr)
throw exception("Calling method on unbound channel");
subchannels.at(params.subChannel)->CallMethod(params);
}
}
void GPFIFO::Process(const std::vector<u32> &segment) {
for (auto entry = segment.begin(); entry != segment.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it
if (*entry == 0)
continue;
auto methodHeader = reinterpret_cast<const PushBufferMethodHeader *>(&*entry);
switch (methodHeader->secOp) {
@ -32,6 +69,8 @@ namespace skyline::gpu::gpfifo {
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true});
break;
case PushBufferMethodHeader::SecOp::EndPbSegment:
return;
default:
break;
}
@ -52,7 +91,7 @@ namespace skyline::gpu::gpfifo {
void GPFIFO::Push(std::span<GpEntry> entries) {
std::lock_guard lock(pushBufferQueueLock);
bool beforeBarrier{true};
bool beforeBarrier{false};
for (const auto &entry : entries) {
if (entry.sync == GpEntry::Sync::Wait)

View File

@ -6,21 +6,14 @@
#include <common.h>
#include <span>
#include <queue>
#include "engines/engine.h"
#include "engines/gpfifo.h"
#include "memory_manager.h"
namespace skyline::gpu::gpfifo {
namespace skyline::gpu {
namespace gpfifo {
/**
* @brief This holds the parameters of a GPU method call
*/
struct MethodParams {
u16 method;
u32 argument;
u32 subChannel;
bool lastCall; //!< Whether this is the last call in the pushbuffer entry to this specifc macro
};
/**
* @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpFifo'
* @brief This contains a single GPFIFO entry that is submitted through 'SubmitGpfifo'
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/clb06f.h#L155
*/
@ -149,14 +142,16 @@ namespace skyline::gpu::gpfifo {
}
inline void Fetch(const vmm::MemoryManager &memoryManager) {
segment.resize(gpEntry.size * sizeof(u32));
memoryManager.Read(reinterpret_cast<u8 *>(segment.data()), (static_cast<u64>(gpEntry.getHi) << 32) | (gpEntry.get << 2), segment.size());
segment.resize(gpEntry.size);
memoryManager.Read<u32>(segment, (static_cast<u64>(gpEntry.getHi) << 32) | (static_cast<u64>(gpEntry.get) << 2));
}
};
const DeviceState &state;
skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
std::array<std::shared_ptr<engine::Engine>, 8> subchannels;
std::queue<PushBuffer> pushBufferQueue;
skyline::Mutex pushBufferQueueLock; //!< This is used to lock pushbuffer queue insertions as the GPU runs on a seperate thread
/**
* @brief Processes a pushbuffer segment, calling methods as needed
@ -169,7 +164,7 @@ namespace skyline::gpu::gpfifo {
void Send(MethodParams params);
public:
GPFIFO(const DeviceState &state) : state(state) {}
GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
/**
* @brief Executes all pending entries in the FIFO
@ -182,3 +177,4 @@ namespace skyline::gpu::gpfifo {
void Push(std::span<GpEntry> entries);
};
}
}