Implement multichannel support for GPU

Allows the execution of multiple channels at the same time, with locking
being performed on the host GPU scheduler layer, address spaces can be
bound to one or more channels.
This commit is contained in:
Billy Laws 2021-10-08 20:25:21 +01:00
parent b762d1df23
commit eb25f60033
26 changed files with 209 additions and 109 deletions

View File

@ -103,12 +103,12 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/texture/texture.cpp
${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp
${source_DIR}/skyline/gpu/interconnect/command_executor.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp
${source_DIR}/skyline/soc/gm20b.cpp
${source_DIR}/skyline/soc/host1x/syncpoint.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp
${source_DIR}/skyline/soc/gm20b/channel.cpp
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
${source_DIR}/skyline/soc/gm20b/gmmu.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp
${source_DIR}/skyline/soc/gm20b/gmmu.cpp
${source_DIR}/skyline/input/npad.cpp ${source_DIR}/skyline/input/npad.cpp
${source_DIR}/skyline/input/npad_device.cpp ${source_DIR}/skyline/input/npad_device.cpp
${source_DIR}/skyline/input/touch.cpp ${source_DIR}/skyline/input/touch.cpp

View File

@ -4,8 +4,10 @@
#pragma once #pragma once
#include <gpu/texture/format.h> #include <gpu/texture/format.h>
#include <soc/gm20b/channel.h>
#include <soc/gm20b/gmmu.h> #include <soc/gm20b/gmmu.h>
#include <soc/gm20b/engines/maxwell/types.h> #include <soc/gm20b/engines/maxwell/types.h>
#include "command_executor.h" #include "command_executor.h"
namespace skyline::gpu::interconnect { namespace skyline::gpu::interconnect {
@ -18,7 +20,7 @@ namespace skyline::gpu::interconnect {
class GraphicsContext { class GraphicsContext {
private: private:
GPU &gpu; GPU &gpu;
soc::gm20b::GMMU &gmmu; soc::gm20b::ChannelContext &channelCtx;
gpu::interconnect::CommandExecutor &executor; gpu::interconnect::CommandExecutor &executor;
struct RenderTarget { struct RenderTarget {
@ -50,7 +52,7 @@ namespace skyline::gpu::interconnect {
public: public:
GraphicsContext(GPU &gpu, soc::gm20b::GMMU &gmmu, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), gmmu(gmmu), executor(executor) { GraphicsContext(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor) {
scissors.fill(DefaultScissor); scissors.fill(DefaultScissor);
} }
@ -182,7 +184,7 @@ namespace skyline::gpu::interconnect {
if (renderTarget.guest.mappings.empty()) { if (renderTarget.guest.mappings.empty()) {
auto size{std::max<u64>(renderTarget.guest.layerStride * (renderTarget.guest.layerCount - renderTarget.guest.baseArrayLayer), renderTarget.guest.format->GetSize(renderTarget.guest.dimensions))}; auto size{std::max<u64>(renderTarget.guest.layerStride * (renderTarget.guest.layerCount - renderTarget.guest.baseArrayLayer), renderTarget.guest.format->GetSize(renderTarget.guest.dimensions))};
auto mappings{gmmu.TranslateRange(renderTarget.gpuAddress, size)}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(renderTarget.gpuAddress, size)};
renderTarget.guest.mappings.assign(mappings.begin(), mappings.end()); renderTarget.guest.mappings.assign(mappings.begin(), mappings.end());
} }

View File

@ -11,6 +11,7 @@ namespace skyline::service {
NotPermitted = 1, // EPERM NotPermitted = 1, // EPERM
TryAgain = 11, // EAGAIN TryAgain = 11, // EAGAIN
Busy = 16, // EBUSY Busy = 16, // EBUSY
FileExists = 17, // EEXIST
InvalidArgument = 22, // EINVAL InvalidArgument = 22, // EINVAL
InappropriateIoctlForDevice = 25, // ENOTTY InappropriateIoctlForDevice = 25, // ENOTTY
FunctionNotImplemented = 38, // ENOSYS FunctionNotImplemented = 38, // ENOSYS

View File

@ -5,7 +5,11 @@
#include "nvdevice.h" #include "nvdevice.h"
namespace skyline::service::nvdrv::device { namespace skyline::service::nvdrv::device {
NvDevice::NvDevice(const DeviceState &state, Core &core, const SessionContext &ctx) : state(state), core(core), ctx(ctx) {} NvDevice::NvDevice(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) :
state(state),
driver(driver),
core(core),
ctx(ctx) {}
const std::string &NvDevice::GetName() { const std::string &NvDevice::GetName() {
if (name.empty()) { if (name.empty()) {

View File

@ -11,6 +11,10 @@
#include "deserialisation/types.h" #include "deserialisation/types.h"
namespace skyline::service::nvdrv {
class Driver;
}
namespace skyline::service::nvdrv::device { namespace skyline::service::nvdrv::device {
using namespace kernel; using namespace kernel;
using namespace deserialisation; using namespace deserialisation;
@ -24,11 +28,12 @@ namespace skyline::service::nvdrv::device {
protected: protected:
const DeviceState &state; const DeviceState &state;
Driver &driver;
Core &core; Core &core;
SessionContext ctx; SessionContext ctx;
public: public:
NvDevice(const DeviceState &state, Core &core, const SessionContext &ctx); NvDevice(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx);
virtual ~NvDevice() = default; virtual ~NvDevice() = default;

View File

@ -3,7 +3,10 @@
#include <common/address_space.inc> #include <common/address_space.inc>
#include <soc.h> #include <soc.h>
#include <soc/gm20b/gmmu.h>
#include <services/nvdrv/driver.h>
#include <services/nvdrv/devices/deserialisation/deserialisation.h> #include <services/nvdrv/devices/deserialisation/deserialisation.h>
#include "gpu_channel.h"
#include "as_gpu.h" #include "as_gpu.h"
namespace skyline { namespace skyline {
@ -14,10 +17,31 @@ namespace skyline {
namespace skyline::service::nvdrv::device::nvhost { namespace skyline::service::nvdrv::device::nvhost {
using GMMU = soc::gm20b::GMMU; using GMMU = soc::gm20b::GMMU;
AsGpu::AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} AsGpu::AsGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {}
PosixResult AsGpu::BindChannel(In<FileDescriptor> channelFd) { PosixResult AsGpu::BindChannel(In<FileDescriptor> channelFd) {
// TODO: support once multiple address spaces are supported std::scoped_lock lock(mutex);
if (!vm.initialised)
return PosixResult::InvalidArgument;
try {
std::shared_lock gpuLock(driver.deviceMutex);
auto &gpuCh{dynamic_cast<GpuChannel &>(*driver.devices.at(channelFd))};
std::scoped_lock channelLock(gpuCh.channelMutex);
if (gpuCh.asCtx) {
state.logger->Warn("Attempting to bind multiple ASes to a single GPU channel");
return PosixResult::InvalidArgument;
}
gpuCh.asCtx = asCtx;
} catch (const std::out_of_range &e) {
state.logger->Warn("Attempting to bind AS to an invalid channel: {}", channelFd);
return PosixResult::InvalidArgument;
}
return PosixResult::Success; return PosixResult::Success;
} }
@ -53,7 +77,7 @@ namespace skyline::service::nvdrv::device::nvhost {
u64 size{static_cast<u64>(pages) * pageSize}; u64 size{static_cast<u64>(pages) * pageSize};
if (flags.sparse) if (flags.sparse)
state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), size, {true}); asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), size, {true});
allocationMap[offset] = { allocationMap[offset] = {
.size = size, .size = size,
@ -77,9 +101,9 @@ namespace skyline::service::nvdrv::device::nvhost {
// Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
// Only FreeSpace can unmap them fully // Only FreeSpace can unmap them fully
if (mapping->sparseAlloc) if (mapping->sparseAlloc)
state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true});
else else
state.soc->gm20b.gmmu.Unmap(offset, mapping->size); asCtx->gmmu.Unmap(offset, mapping->size);
mappingMap.erase(offset); mappingMap.erase(offset);
} }
@ -103,7 +127,7 @@ namespace skyline::service::nvdrv::device::nvhost {
// Unset sparse flag if required // Unset sparse flag if required
if (allocation.sparse) if (allocation.sparse)
state.soc->gm20b.gmmu.Unmap(offset, allocation.size); asCtx->gmmu.Unmap(offset, allocation.size);
auto &allocator{pageSize == VM::PageSize ? vm.smallPageAllocator : vm.bigPageAllocator}; auto &allocator{pageSize == VM::PageSize ? vm.smallPageAllocator : vm.bigPageAllocator};
u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits}; u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits};
@ -138,9 +162,9 @@ namespace skyline::service::nvdrv::device::nvhost {
// Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
// Only FreeSpace can unmap them fully // Only FreeSpace can unmap them fully
if (mapping->sparseAlloc) if (mapping->sparseAlloc)
state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true});
else else
state.soc->gm20b.gmmu.Unmap(offset, mapping->size); asCtx->gmmu.Unmap(offset, mapping->size);
mappingMap.erase(offset); mappingMap.erase(offset);
} catch (const std::out_of_range &e) { } catch (const std::out_of_range &e) {
@ -172,7 +196,7 @@ namespace skyline::service::nvdrv::device::nvhost {
u64 gpuAddress{offset + bufferOffset}; u64 gpuAddress{offset + bufferOffset};
u8 *cpuPtr{mapping->ptr + bufferOffset}; u8 *cpuPtr{mapping->ptr + bufferOffset};
state.soc->gm20b.gmmu.Map(gpuAddress, cpuPtr, mappingSize); asCtx->gmmu.Map(gpuAddress, cpuPtr, mappingSize);
return PosixResult::Success; return PosixResult::Success;
} catch (const std::out_of_range &e) { } catch (const std::out_of_range &e) {
@ -194,7 +218,7 @@ namespace skyline::service::nvdrv::device::nvhost {
if (alloc-- == allocationMap.begin() || (offset - alloc->first) + size > alloc->second.size) if (alloc-- == allocationMap.begin() || (offset - alloc->first) + size > alloc->second.size)
throw exception("Cannot perform a fixed mapping into an unallocated region!"); throw exception("Cannot perform a fixed mapping into an unallocated region!");
state.soc->gm20b.gmmu.Map(offset, cpuPtr, size); asCtx->gmmu.Map(offset, cpuPtr, size);
auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, true, false, alloc->second.sparse)}; auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, true, false, alloc->second.sparse)};
alloc->second.mappings.push_back(mapping); alloc->second.mappings.push_back(mapping);
@ -214,7 +238,7 @@ namespace skyline::service::nvdrv::device::nvhost {
u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits}; u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
offset = static_cast<u64>(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits; offset = static_cast<u64>(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits;
state.soc->gm20b.gmmu.Map(offset, cpuPtr, size); asCtx->gmmu.Map(offset, cpuPtr, size);
auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, false, bigPage, false)}; auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, false, bigPage, false)};
mappingMap[offset] = mapping; mappingMap[offset] = mapping;
@ -292,6 +316,7 @@ namespace skyline::service::nvdrv::device::nvhost {
u64 endBigPages{(vm.vaRangeEnd - vm.vaRangeSplit) >> vm.bigPageSizeBits}; u64 endBigPages{(vm.vaRangeEnd - vm.vaRangeSplit) >> vm.bigPageSizeBits};
vm.bigPageAllocator = std::make_unique<VM::Allocator>(startBigPages, endBigPages); vm.bigPageAllocator = std::make_unique<VM::Allocator>(startBigPages, endBigPages);
asCtx = std::make_shared<soc::gm20b::AddressSpaceContext>();
vm.initialised = true; vm.initialised = true;
return PosixResult::Success; return PosixResult::Success;
@ -320,7 +345,7 @@ namespace skyline::service::nvdrv::device::nvhost {
} }
if (!entry.handle) { if (!entry.handle) {
state.soc->gm20b.gmmu.Map(virtAddr, soc::gm20b::GMMU::SparsePlaceholderAddress(), size, {true}); asCtx->gmmu.Map(virtAddr, GMMU::SparsePlaceholderAddress(), size, {true});
} else { } else {
auto h{core.nvMap.GetHandle(entry.handle)}; auto h{core.nvMap.GetHandle(entry.handle)};
if (!h) if (!h)
@ -328,7 +353,7 @@ namespace skyline::service::nvdrv::device::nvhost {
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << vm.bigPageSizeBits))}; u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << vm.bigPageSizeBits))};
state.soc->gm20b.gmmu.Map(virtAddr, cpuPtr, size); asCtx->gmmu.Map(virtAddr, cpuPtr, size);
} }
} }

View File

@ -4,7 +4,7 @@
#pragma once #pragma once
#include <common/address_space.h> #include <common/address_space.h>
#include <soc/gm20b/gmmu.h>
#include <services/nvdrv/devices/nvdevice.h> #include <services/nvdrv/devices/nvdevice.h>
namespace skyline::service::nvdrv::device::nvhost { namespace skyline::service::nvdrv::device::nvhost {
@ -65,6 +65,8 @@ namespace skyline::service::nvdrv::device::nvhost {
bool initialised{}; bool initialised{};
} vm; } vm;
std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx;
void FreeMappingLocked(u64 offset); void FreeMappingLocked(u64 offset);
public: public:
@ -95,7 +97,7 @@ namespace skyline::service::nvdrv::device::nvhost {
}; };
static_assert(sizeof(RemapEntry) == 0x14); static_assert(sizeof(RemapEntry) == 0x14);
AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx); AsGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx);
/** /**
* @brief Binds this address space to a channel * @brief Binds this address space to a channel

View File

@ -34,7 +34,7 @@ namespace skyline::service::nvdrv::device::nvhost {
state == SyncpointEvent::State::Signalling; state == SyncpointEvent::State::Signalling;
} }
Ctrl::Ctrl(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} Ctrl::Ctrl(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {}
u32 Ctrl::FindFreeSyncpointEvent(u32 syncpointId) { u32 Ctrl::FindFreeSyncpointEvent(u32 syncpointId) {
u32 eventSlot{SyncpointEventCount}; //!< Holds the slot of the last populated event in the event array u32 eventSlot{SyncpointEventCount}; //!< Holds the slot of the last populated event in the event array

View File

@ -96,7 +96,7 @@ namespace skyline::service::nvdrv::device::nvhost {
PosixResult SyncpointFreeEventLocked(In<u32> slot); PosixResult SyncpointFreeEventLocked(In<u32> slot);
public: public:
Ctrl(const DeviceState &state, Core &core, const SessionContext &ctx); Ctrl(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx);
/** /**
* @brief Clears a syncpoint event * @brief Clears a syncpoint event

View File

@ -5,8 +5,8 @@
#include "ctrl_gpu.h" #include "ctrl_gpu.h"
namespace skyline::service::nvdrv::device::nvhost { namespace skyline::service::nvdrv::device::nvhost {
CtrlGpu::CtrlGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : CtrlGpu::CtrlGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) :
NvDevice(state, core, ctx), NvDevice(state, driver, core, ctx),
errorNotifierEvent(std::make_shared<type::KEvent>(state, false)), errorNotifierEvent(std::make_shared<type::KEvent>(state, false)),
unknownEvent(std::make_shared<type::KEvent>(state, false)) {} unknownEvent(std::make_shared<type::KEvent>(state, false)) {}

View File

@ -73,7 +73,7 @@ namespace skyline::service::nvdrv::device::nvhost {
u32 subregionCount{0x10}; u32 subregionCount{0x10};
}; };
CtrlGpu(const DeviceState &state, Core &core, const SessionContext &ctx); CtrlGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx);
/** /**
* @brief Returns the zcull context size * @brief Returns the zcull context size

View File

@ -6,8 +6,8 @@
#include "gpu_channel.h" #include "gpu_channel.h"
namespace skyline::service::nvdrv::device::nvhost { namespace skyline::service::nvdrv::device::nvhost {
GpuChannel::GpuChannel(const DeviceState &state, Core &core, const SessionContext &ctx) : GpuChannel::GpuChannel(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) :
NvDevice(state, core, ctx), NvDevice(state, driver, core, ctx),
smExceptionBreakpointIntReportEvent(std::make_shared<type::KEvent>(state, false)), smExceptionBreakpointIntReportEvent(std::make_shared<type::KEvent>(state, false)),
smExceptionBreakpointPauseReportEvent(std::make_shared<type::KEvent>(state, false)), smExceptionBreakpointPauseReportEvent(std::make_shared<type::KEvent>(state, false)),
errorNotifierEvent(std::make_shared<type::KEvent>(state, false)) { errorNotifierEvent(std::make_shared<type::KEvent>(state, false)) {
@ -39,16 +39,20 @@ namespace skyline::service::nvdrv::device::nvhost {
if (flags.incrementWithValue) if (flags.incrementWithValue)
return PosixResult::InvalidArgument; return PosixResult::InvalidArgument;
if (core.syncpointManager.IsFenceSignalled(fence)) if (!core.syncpointManager.IsFenceSignalled(fence))
throw exception("Waiting on a fence through SubmitGpfifo is unimplemented"); throw exception("Waiting on a fence through SubmitGpfifo is unimplemented");
} }
state.soc->gm20b.gpfifo.Push(gpEntries.subspan(0, numEntries)); {
std::scoped_lock lock(channelMutex);
fence.id = channelSyncpoint; channelCtx->gpfifo.Push(gpEntries.subspan(0, numEntries));
u32 increment{(flags.fenceIncrement ? 2 : 0) + (flags.incrementWithValue ? fence.threshold : 0)}; fence.id = channelSyncpoint;
fence.threshold = core.syncpointManager.IncrementSyncpointMaxExt(channelSyncpoint, increment);
u32 increment{(flags.fenceIncrement ? 2 : 0) + (flags.incrementWithValue ? fence.threshold : 0)};
fence.threshold = core.syncpointManager.IncrementSyncpointMaxExt(channelSyncpoint, increment);
}
if (flags.fenceIncrement) if (flags.fenceIncrement)
throw exception("Incrementing a fence through SubmitGpfifo is unimplemented"); throw exception("Incrementing a fence through SubmitGpfifo is unimplemented");
@ -84,7 +88,19 @@ namespace skyline::service::nvdrv::device::nvhost {
PosixResult GpuChannel::AllocGpfifoEx2(In<u32> numEntries, In<u32> numJobs, In<u32> flags, Out<Fence> fence) { PosixResult GpuChannel::AllocGpfifoEx2(In<u32> numEntries, In<u32> numJobs, In<u32> flags, Out<Fence> fence) {
state.logger->Debug("numEntries: {}, numJobs: {}, flags: 0x{:X}", numEntries, numJobs, flags); state.logger->Debug("numEntries: {}, numJobs: {}, flags: 0x{:X}", numEntries, numJobs, flags);
state.soc->gm20b.gpfifo.Initialize(numEntries);
std::scoped_lock lock(channelMutex);
if (!asCtx) {
state.logger->Warn("Trying to allocate a channel without a bound address space");
return PosixResult::InvalidArgument;
}
if (channelCtx) {
state.logger->Warn("Trying to allocate a channel twice!");
return PosixResult::FileExists;
}
channelCtx = std::make_unique<soc::gm20b::ChannelContext>(state, asCtx, numEntries);
fence = core.syncpointManager.GetSyncpointFence(channelSyncpoint); fence = core.syncpointManager.GetSyncpointFence(channelSyncpoint);

View File

@ -3,23 +3,32 @@
#pragma once #pragma once
#include <soc/gm20b/gpfifo.h>
#include <services/common/fence.h> #include <services/common/fence.h>
#include <soc/gm20b/engines/maxwell_3d.h> // TODO: remove
#include <soc/gm20b/channel.h>
#include "services/nvdrv/devices/nvdevice.h" #include "services/nvdrv/devices/nvdevice.h"
namespace skyline::service::nvdrv::device::nvhost { namespace skyline::service::nvdrv::device::nvhost {
class AsGpu;
/** /**
* @brief nvhost::GpuChannel is used to create and submit commands to channels which are effectively GPU processes * @brief nvhost::GpuChannel is used to create and submit commands to channels which are effectively GPU processes
* @url https://switchbrew.org/wiki/NV_services#Channels * @url https://switchbrew.org/wiki/NV_services#Channels
*/ */
class GpuChannel : public NvDevice { class GpuChannel : public NvDevice {
private: private:
u32 channelSyncpoint{}; u32 channelSyncpoint{}; //!< The syncpoint for submissions allocated to this channel in `AllocGpfifo`
u32 channelUserData{}; u32 channelUserData{};
std::mutex channelMutex;
std::shared_ptr<type::KEvent> smExceptionBreakpointIntReportEvent; std::shared_ptr<type::KEvent> smExceptionBreakpointIntReportEvent;
std::shared_ptr<type::KEvent> smExceptionBreakpointPauseReportEvent; std::shared_ptr<type::KEvent> smExceptionBreakpointPauseReportEvent;
std::shared_ptr<type::KEvent> errorNotifierEvent; std::shared_ptr<type::KEvent> errorNotifierEvent;
std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx;
std::unique_ptr<soc::gm20b::ChannelContext> channelCtx;
friend AsGpu;
public: public:
/** /**
* @brief A bitfield of the flags that can be supplied for a specific GPFIFO submission * @brief A bitfield of the flags that can be supplied for a specific GPFIFO submission
@ -37,7 +46,7 @@ namespace skyline::service::nvdrv::device::nvhost {
u32 raw; u32 raw;
}; };
GpuChannel(const DeviceState &state, Core &core, const SessionContext &ctx); GpuChannel(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx);
/** /**
* @brief Sets the nvmap handle id to be used for channel submits (does nothing for GPU channels) * @brief Sets the nvmap handle id to be used for channel submits (does nothing for GPU channels)

View File

@ -6,7 +6,7 @@
#include "nvmap.h" #include "nvmap.h"
namespace skyline::service::nvdrv::device { namespace skyline::service::nvdrv::device {
NvMap::NvMap(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} NvMap::NvMap(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {}
PosixResult NvMap::Create(In<u32> size, Out<NvMapCore::Handle::Id> handle) { PosixResult NvMap::Create(In<u32> size, Out<NvMapCore::Handle::Id> handle) {
auto handleDesc{core.nvMap.CreateHandle(util::AlignUp(size, PAGE_SIZE))}; auto handleDesc{core.nvMap.CreateHandle(util::AlignUp(size, PAGE_SIZE))};

View File

@ -23,7 +23,7 @@ namespace skyline::service::nvdrv::device {
IsSharedMemMapped = 6 IsSharedMemMapped = 6
}; };
NvMap(const DeviceState &state, Core &core, const SessionContext &ctx); NvMap(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx);
/** /**
* @brief Creates an nvmap handle for the given size * @brief Creates an nvmap handle for the given size

View File

@ -23,10 +23,13 @@ namespace skyline::service::nvdrv {
break; \ break; \
} }
#define DEVICE_CASE(path, object) \ #define DEVICE_CASE(path, object) \
case util::Hash(path): \ case util::Hash(path): \
devices.emplace(fd, std::make_unique<device::object>(state, core, ctx)); \ { \
return NvResult::Success; std::unique_lock lock(deviceMutex); \
devices.emplace(fd, std::make_unique<device::object>(state, *this, core, ctx)); \
return NvResult::Success; \
}
DEVICE_SWITCH( DEVICE_SWITCH(
DEVICE_CASE("/dev/nvmap", NvMap) DEVICE_CASE("/dev/nvmap", NvMap)
@ -69,13 +72,13 @@ namespace skyline::service::nvdrv {
default: default:
throw exception("Unhandled POSIX result: {}!", static_cast<i32>(result)); throw exception("Unhandled POSIX result: {}!", static_cast<i32>(result));
} }
} }
NvResult Driver::Ioctl(u32 fd, IoctlDescriptor cmd, span<u8> buffer) { NvResult Driver::Ioctl(u32 fd, IoctlDescriptor cmd, span<u8> buffer) {
state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName());
try { try {
std::shared_lock lock(deviceMutex);
return ConvertResult(devices.at(fd)->Ioctl(cmd, buffer)); return ConvertResult(devices.at(fd)->Ioctl(cmd, buffer));
} catch (const std::out_of_range &) { } catch (const std::out_of_range &) {
throw exception("Ioctl was called with invalid file descriptor: {}", fd); throw exception("Ioctl was called with invalid file descriptor: {}", fd);
@ -86,6 +89,7 @@ namespace skyline::service::nvdrv {
state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName());
try { try {
std::shared_lock lock(deviceMutex);
return ConvertResult(devices.at(fd)->Ioctl2(cmd, buffer, inlineBuffer)); return ConvertResult(devices.at(fd)->Ioctl2(cmd, buffer, inlineBuffer));
} catch (const std::out_of_range &) { } catch (const std::out_of_range &) {
throw exception("Ioctl2 was called with invalid file descriptor: 0x{:X}", fd); throw exception("Ioctl2 was called with invalid file descriptor: 0x{:X}", fd);
@ -96,6 +100,7 @@ namespace skyline::service::nvdrv {
state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName());
try { try {
std::shared_lock lock(deviceMutex);
return ConvertResult(devices.at(fd)->Ioctl3(cmd, buffer, inlineBuffer)); return ConvertResult(devices.at(fd)->Ioctl3(cmd, buffer, inlineBuffer));
} catch (const std::out_of_range &) { } catch (const std::out_of_range &) {
throw exception("Ioctl3 was called with invalid file descriptor: {}", fd); throw exception("Ioctl3 was called with invalid file descriptor: {}", fd);
@ -104,6 +109,7 @@ namespace skyline::service::nvdrv {
void Driver::CloseDevice(u32 fd) { void Driver::CloseDevice(u32 fd) {
try { try {
std::unique_lock lock(deviceMutex);
devices.erase(fd); devices.erase(fd);
} catch (const std::out_of_range &) { } catch (const std::out_of_range &) {
state.logger->Warn("Trying to close non-existent file descriptor: {}"); state.logger->Warn("Trying to close non-existent file descriptor: {}");
@ -114,6 +120,7 @@ namespace skyline::service::nvdrv {
state.logger->Debug("fd: {}, eventId: 0x{:X}, device: {}", fd, eventId, devices.at(fd)->GetName()); state.logger->Debug("fd: {}, eventId: 0x{:X}, device: {}", fd, eventId, devices.at(fd)->GetName());
try { try {
std::shared_lock lock(deviceMutex);
return devices.at(fd)->QueryEvent(eventId); return devices.at(fd)->QueryEvent(eventId);
} catch (const std::exception &) { } catch (const std::exception &) {
throw exception("QueryEvent was called with invalid file descriptor: {}", fd); throw exception("QueryEvent was called with invalid file descriptor: {}", fd);

View File

@ -4,16 +4,27 @@
#pragma once #pragma once
#include <common.h> #include <common.h>
#include <kernel/types/KEvent.h>
#include "types.h" #include "types.h"
#include "devices/nvdevice.h"
#include "core/core.h" #include "core/core.h"
#include "devices/nvdevice.h"
namespace skyline::service::nvdrv { namespace skyline::service::nvdrv {
namespace device {
namespace nvhost {
class AsGpu;
}
}
class Driver { class Driver {
private: private:
const DeviceState &state; const DeviceState &state;
std::shared_mutex deviceMutex; //!< Protects access to `devices`
std::unordered_map<FileDescriptor, std::unique_ptr<device::NvDevice>> devices; std::unordered_map<FileDescriptor, std::unique_ptr<device::NvDevice>> devices;
friend device::nvhost::AsGpu; // For channel address space binding
public: public:
Core core; //!< The core global state object of nvdrv that is accessed by devices Core core; //!< The core global state object of nvdrv that is accessed by devices

View File

@ -4,7 +4,7 @@
#pragma once #pragma once
#include "soc/host1x.h" #include "soc/host1x.h"
#include "soc/gm20b.h" #include "soc/gm20b/gpfifo.h"
namespace skyline::soc { namespace skyline::soc {
/** /**
@ -14,8 +14,7 @@ namespace skyline::soc {
class SOC { class SOC {
public: public:
host1x::Host1X host1x; host1x::Host1X host1x;
gm20b::GM20B gm20b;
SOC(const DeviceState &state) : gm20b(state) {} SOC(const DeviceState &state) {}
}; };
} }

View File

@ -1,15 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "gm20b.h"
namespace skyline::soc::gm20b {
GM20B::GM20B(const DeviceState &state) :
fermi2D(state),
keplerMemory(state),
maxwell3D(state, gmmu, executor),
maxwellCompute(state),
maxwellDma(state),
gpfifo(state),
executor(state) {}
}

View File

@ -0,0 +1,17 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "engines/maxwell_3d.h" //TODO: remove
#include "channel.h"
namespace skyline::soc::gm20b {
ChannelContext::ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries) :
fermi2D(state),
keplerMemory(state),
maxwell3D(std::make_unique<engine::maxwell3d::Maxwell3D>(state, *this, executor)),
maxwellCompute(state),
maxwellDma(state),
gpfifo(state, *this, numEntries),
executor(state),
asCtx(std::move(asCtx)){}
}

View File

@ -4,26 +4,30 @@
#pragma once #pragma once
#include <gpu/interconnect/command_executor.h> #include <gpu/interconnect/command_executor.h>
#include "gm20b/engines/maxwell_3d.h" #include "engines/engine.h"
#include "gm20b/gpfifo.h" #include "gpfifo.h"
#include "gm20b/gmmu.h"
namespace skyline::soc::gm20b { namespace skyline::soc::gm20b {
namespace engine::maxwell3d {
class Maxwell3D;
}
struct AddressSpaceContext;
/** /**
* @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations * @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations
* @note We omit parts of components related to external access such as the grhost, all accesses to the external components are done directly * @note We omit parts of components related to external access such as the grhost, all accesses to the external components are done directly
*/ */
class GM20B { struct ChannelContext {
public: std::shared_ptr<AddressSpaceContext> asCtx;
GMMU gmmu;
gpu::interconnect::CommandExecutor executor; gpu::interconnect::CommandExecutor executor;
engine::Engine fermi2D; engine::Engine fermi2D;
engine::maxwell3d::Maxwell3D maxwell3D; std::unique_ptr<engine::maxwell3d::Maxwell3D> maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file
engine::Engine maxwellCompute; engine::Engine maxwellCompute;
engine::Engine maxwellDma; engine::Engine maxwellDma;
engine::Engine keplerMemory; engine::Engine keplerMemory;
GPFIFO gpfifo; ChannelGpfifo gpfifo;
GM20B(const DeviceState &state); ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries);
}; };
} }

View File

@ -3,10 +3,11 @@
// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d) // Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d)
#include <boost/preprocessor/repeat.hpp> #include <boost/preprocessor/repeat.hpp>
#include "maxwell_3d.h"
#include <soc.h> #include <soc.h>
namespace skyline::soc::gm20b::engine::maxwell3d { namespace skyline::soc::gm20b::engine::maxwell3d {
Maxwell3D::Maxwell3D(const DeviceState &state, GMMU &gmmu, gpu::interconnect::CommandExecutor &executor) : Engine(state), macroInterpreter(*this), context(*state.gpu, gmmu, executor) { Maxwell3D::Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : Engine(state), macroInterpreter(*this), context(*state.gpu, channelCtx, executor), channelCtx(channelCtx) {
ResetRegs(); ResetRegs();
} }
@ -244,7 +245,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
MAXWELL3D_CASE(syncpointAction, { MAXWELL3D_CASE(syncpointAction, {
state.logger->Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id)); state.logger->Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
state.soc->gm20b.executor.Execute(); channelCtx.executor.Execute();
state.soc->host1x.syncpoints.at(syncpointAction.id).Increment(); state.soc->host1x.syncpoints.at(syncpointAction.id).Increment();
}) })
@ -307,7 +308,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
switch (registers.semaphore.info.structureSize) { switch (registers.semaphore.info.structureSize) {
case type::SemaphoreInfo::StructureSize::OneWord: case type::SemaphoreInfo::StructureSize::OneWord:
state.soc->gm20b.gmmu.Write<u32>(registers.semaphore.address.Pack(), static_cast<u32>(result)); channelCtx.asCtx->gmmu.Write<u32>(registers.semaphore.address.Pack(), static_cast<u32>(result));
break; break;
case type::SemaphoreInfo::StructureSize::FourWords: { case type::SemaphoreInfo::StructureSize::FourWords: {
@ -318,7 +319,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u64 nsTime{util::GetTimeNs()}; u64 nsTime{util::GetTimeNs()};
u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
state.soc->gm20b.gmmu.Write<FourWordResult>(registers.semaphore.address.Pack(), FourWordResult{result, timestamp}); channelCtx.asCtx->gmmu.Write<FourWordResult>(registers.semaphore.address.Pack(), FourWordResult{result, timestamp});
break; break;
} }
} }

View File

@ -8,6 +8,10 @@
#include "engine.h" #include "engine.h"
#include "maxwell/macro_interpreter.h" #include "maxwell/macro_interpreter.h"
namespace skyline::soc::gm20b {
struct ChannelContext;
}
namespace skyline::soc::gm20b::engine::maxwell3d { namespace skyline::soc::gm20b::engine::maxwell3d {
/** /**
* @brief The Maxwell 3D engine handles processing 3D graphics * @brief The Maxwell 3D engine handles processing 3D graphics
@ -245,9 +249,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
Registers registers{}; Registers registers{};
Registers shadowRegisters{}; //!< A shadow-copy of the registers, their function is controlled by the 'shadowRamControl' register Registers shadowRegisters{}; //!< A shadow-copy of the registers, their function is controlled by the 'shadowRamControl' register
ChannelContext &channelCtx;
std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow std::array<u32, 0x2000> macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow
Maxwell3D(const DeviceState &state, GMMU &gmmu, gpu::interconnect::CommandExecutor &executor); Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor);
/** /**
* @brief Resets the Maxwell 3D registers to their default values * @brief Resets the Maxwell 3D registers to their default values

View File

@ -10,8 +10,12 @@ namespace skyline::soc::gm20b {
/** /**
* @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1 * @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment * @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction
* @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager * @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager
*/ */
using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits>; using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits>;
struct AddressSpaceContext {
GMMU gmmu;
};
} }

View File

@ -6,6 +6,7 @@
#include <kernel/types/KProcess.h> #include <kernel/types/KProcess.h>
#include <soc.h> #include <soc.h>
#include <os.h> #include <os.h>
#include "engines/maxwell_3d.h"
namespace skyline::soc::gm20b { namespace skyline::soc::gm20b {
/** /**
@ -58,7 +59,14 @@ namespace skyline::soc::gm20b {
}; };
static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32)); static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32));
void GPFIFO::Send(u32 method, u32 argument, u32 subChannel, bool lastCall) { ChannelGpfifo::ChannelGpfifo(const DeviceState &state, ChannelContext &channelCtx, size_t numEntries) :
state(state),
gpfifoEngine(state),
channelCtx(channelCtx),
gpEntries(numEntries),
thread(std::thread(&ChannelGpfifo::Run, this)) {}
void ChannelGpfifo::Send(u32 method, u32 argument, u32 subChannel, bool lastCall) {
constexpr u32 ThreeDSubChannel{0}; constexpr u32 ThreeDSubChannel{0};
constexpr u32 ComputeSubChannel{1}; constexpr u32 ComputeSubChannel{1};
constexpr u32 Inline2MemorySubChannel{2}; constexpr u32 Inline2MemorySubChannel{2};
@ -72,19 +80,19 @@ namespace skyline::soc::gm20b {
} else { } else {
switch (subChannel) { switch (subChannel) {
case ThreeDSubChannel: case ThreeDSubChannel:
state.soc->gm20b.maxwell3D.CallMethod(method, argument, lastCall); channelCtx.maxwell3D->CallMethod(method, argument, lastCall);
break; break;
case ComputeSubChannel: case ComputeSubChannel:
state.soc->gm20b.maxwellCompute.CallMethod(method, argument, lastCall); channelCtx.maxwellCompute.CallMethod(method, argument, lastCall);
break; break;
case Inline2MemorySubChannel: case Inline2MemorySubChannel:
state.soc->gm20b.keplerMemory.CallMethod(method, argument, lastCall); channelCtx.keplerMemory.CallMethod(method, argument, lastCall);
break; break;
case TwoDSubChannel: case TwoDSubChannel:
state.soc->gm20b.fermi2D.CallMethod(method, argument, lastCall); channelCtx.fermi2D.CallMethod(method, argument, lastCall);
break; break;
case CopySubChannel: case CopySubChannel:
state.soc->gm20b.maxwellDma.CallMethod(method, argument, lastCall); channelCtx.maxwellDma.CallMethod(method, argument, lastCall);
break; break;
default: default:
throw exception("Tried to call into a software subchannel: {}!", subChannel); throw exception("Tried to call into a software subchannel: {}!", subChannel);
@ -92,7 +100,7 @@ namespace skyline::soc::gm20b {
} }
} }
void GPFIFO::Process(GpEntry gpEntry) { void ChannelGpfifo::Process(GpEntry gpEntry) {
if (!gpEntry.size) { if (!gpEntry.size) {
// This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers // This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers
switch (gpEntry.opcode) { switch (gpEntry.opcode) {
@ -105,7 +113,7 @@ namespace skyline::soc::gm20b {
} }
pushBufferData.resize(gpEntry.size); pushBufferData.resize(gpEntry.size);
state.soc->gm20b.gmmu.Read<u32>(pushBufferData, gpEntry.Address()); channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) { for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it // An entry containing all zeroes is a NOP, skip over it
@ -142,18 +150,11 @@ namespace skyline::soc::gm20b {
} }
} }
void GPFIFO::Initialize(size_t numBuffers) { void ChannelGpfifo::Run() {
if (pushBuffers)
throw exception("GPFIFO Initialization cannot be done multiple times");
pushBuffers.emplace(numBuffers);
thread = std::thread(&GPFIFO::Run, this);
}
void GPFIFO::Run() {
pthread_setname_np(pthread_self(), "GPFIFO"); pthread_setname_np(pthread_self(), "GPFIFO");
try { try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
pushBuffers->Process([this](GpEntry gpEntry) { gpEntries.Process([this](GpEntry gpEntry) {
state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address()); state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address());
Process(gpEntry); Process(gpEntry);
}); });
@ -170,11 +171,11 @@ namespace skyline::soc::gm20b {
} }
} }
void GPFIFO::Push(span<GpEntry> entries) { void ChannelGpfifo::Push(span<GpEntry> entries) {
pushBuffers->Append(entries); gpEntries.Append(entries);
} }
GPFIFO::~GPFIFO() { ChannelGpfifo::~ChannelGpfifo() {
if (thread.joinable()) { if (thread.joinable()) {
pthread_kill(thread.native_handle(), SIGINT); pthread_kill(thread.native_handle(), SIGINT);
thread.join(); thread.join();

View File

@ -7,6 +7,8 @@
#include "engines/gpfifo.h" #include "engines/gpfifo.h"
namespace skyline::soc::gm20b { namespace skyline::soc::gm20b {
struct ChannelContext;
/** /**
* @brief A GPFIFO entry as submitted through 'SubmitGpfifo' * @brief A GPFIFO entry as submitted through 'SubmitGpfifo'
* @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt
@ -73,15 +75,16 @@ namespace skyline::soc::gm20b {
static_assert(sizeof(GpEntry) == sizeof(u64)); static_assert(sizeof(GpEntry) == sizeof(u64));
/** /**
* @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them * @brief The ChannelGpfifo class handles creating pushbuffers from GP entries and then processing them for a single channel
* @note A single ChannelGpfifo thread exists per channel with a single shared mutex in `GPFIFO` to enforce that only one channel can run at a time
* @note This class doesn't perfectly map to any particular hardware component on the X1, it does a mix of the GPU Host PBDMA (With and handling the GPFIFO entries * @note This class doesn't perfectly map to any particular hardware component on the X1, it does a mix of the GPU Host PBDMA (With and handling the GPFIFO entries
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
*/ */
class GPFIFO { class ChannelGpfifo {
const DeviceState &state; const DeviceState &state;
ChannelContext &channelCtx;
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
std::array<engine::Engine*, 8> subchannels; CircularQueue<GpEntry> gpEntries;
std::optional<CircularQueue<GpEntry>> pushBuffers;
std::thread thread; //!< The thread that manages processing of pushbuffers std::thread thread; //!< The thread that manages processing of pushbuffers
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
@ -96,14 +99,12 @@ namespace skyline::soc::gm20b {
void Process(GpEntry gpEntry); void Process(GpEntry gpEntry);
public: public:
GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {}
~GPFIFO();
/** /**
* @param numBuffers The amount of push-buffers to allocate in the circular buffer * @param numEntries The number of gpEntries to allocate space for in the FIFO
*/ */
void Initialize(size_t numBuffers); ChannelGpfifo(const DeviceState &state, ChannelContext &channelCtx, size_t numEntries);
~ChannelGpfifo();
/** /**
* @brief Executes all pending entries in the FIFO * @brief Executes all pending entries in the FIFO