mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-23 01:59:19 +01:00
Implement support for pushbuffer methods split across multiple GpEntries
These are used heavily in OpenGL games, which now, together with the previous syncpoint changes, work perfectly. The actual implementation is rather novel as rather than using a per-class state machine for all methods we only use it for those that are known to be split across GpEntry boundaries, as a result only a single bounds check is added to the hot path of contiguous method execution and the performance loss is negligible.
This commit is contained in:
parent
fc017e1e95
commit
b7d0f2fafa
@ -62,12 +62,12 @@ namespace skyline::service::nvdrv::device::nvhost {
|
|||||||
using Allocator = FlatAllocator<u32, 0, 32>;
|
using Allocator = FlatAllocator<u32, 0, 32>;
|
||||||
|
|
||||||
std::unique_ptr<Allocator> bigPageAllocator;
|
std::unique_ptr<Allocator> bigPageAllocator;
|
||||||
std::shared_ptr<Allocator> smallPageAllocator; // Shared as this is also used by nvhost::GpuChannel
|
std::shared_ptr<Allocator> smallPageAllocator; //! Shared as this is also used by nvhost::GpuChannel
|
||||||
|
|
||||||
bool initialised{};
|
bool initialised{};
|
||||||
} vm;
|
} vm;
|
||||||
|
|
||||||
std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx;
|
std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx; //!< The guest GPU AS context that is associated with each particular instance
|
||||||
|
|
||||||
friend GpuChannel;
|
friend GpuChannel;
|
||||||
|
|
||||||
|
@ -23,14 +23,14 @@ namespace skyline::service::nvdrv::device::nvhost {
|
|||||||
std::shared_ptr<type::KEvent> smExceptionBreakpointPauseReportEvent;
|
std::shared_ptr<type::KEvent> smExceptionBreakpointPauseReportEvent;
|
||||||
std::shared_ptr<type::KEvent> errorNotifierEvent;
|
std::shared_ptr<type::KEvent> errorNotifierEvent;
|
||||||
|
|
||||||
std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx;
|
std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx; //!< The guest GPU AS context submits from this channel are bound to
|
||||||
std::shared_ptr<AsGpu::VM::Allocator> asAllocator;
|
std::shared_ptr<AsGpu::VM::Allocator> asAllocator; //!< The small page allocator context for the AS that's bound to this channel, used to allocate space for `pushBufferMemory`
|
||||||
std::unique_ptr<soc::gm20b::ChannelContext> channelCtx;
|
std::unique_ptr<soc::gm20b::ChannelContext> channelCtx; //!< The entire guest GPU context specific to this channel
|
||||||
|
|
||||||
|
|
||||||
u64 pushBufferAddr{};
|
u64 pushBufferAddr{}; //!< The GPU address `pushBufferMemory` is mapped to
|
||||||
size_t pushBufferMemoryOffset{};
|
size_t pushBufferMemoryOffset{}; //!< The current offset for which to write new pushbuffer method data into for post-increment and pre-wait
|
||||||
std::vector<u32> pushBufferMemory;
|
std::vector<u32> pushBufferMemory; //!< Mapped into the guest GPU As and used to store method data for pre/post increment commands
|
||||||
|
|
||||||
friend AsGpu;
|
friend AsGpu;
|
||||||
|
|
||||||
|
@ -115,28 +115,92 @@ namespace skyline::soc::gm20b {
|
|||||||
pushBufferData.resize(gpEntry.size);
|
pushBufferData.resize(gpEntry.size);
|
||||||
channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
|
channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
|
||||||
|
|
||||||
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
|
// There will be at least one entry here
|
||||||
|
auto entry{pushBufferData.begin()};
|
||||||
|
|
||||||
|
// Executes the current split method, returning once execution is finished or the current GpEntry has reached its end
|
||||||
|
auto resumeSplitMethod{[&](){
|
||||||
|
switch (resumeState.state) {
|
||||||
|
case MethodResumeState::State::Inc:
|
||||||
|
while (entry != pushBufferData.end() && resumeState.remaining)
|
||||||
|
Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
|
||||||
|
|
||||||
|
break;
|
||||||
|
case MethodResumeState::State::OneInc:
|
||||||
|
Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
|
||||||
|
|
||||||
|
// After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries
|
||||||
|
resumeState.state = MethodResumeState::State::NonInc;
|
||||||
|
[[fallthrough]];
|
||||||
|
case MethodResumeState::State::NonInc:
|
||||||
|
while (entry != pushBufferData.end() && resumeState.remaining)
|
||||||
|
Send(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
|
||||||
|
// We've a method from a previous GpEntry that needs resuming
|
||||||
|
if (resumeState.remaining)
|
||||||
|
resumeSplitMethod();
|
||||||
|
|
||||||
|
// Process more methods if the entries are still not all used up after handling resuming
|
||||||
|
for (; entry != pushBufferData.end(); entry++) {
|
||||||
// An entry containing all zeroes is a NOP, skip over it
|
// An entry containing all zeroes is a NOP, skip over it
|
||||||
if (*entry == 0)
|
if (*entry == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
PushBufferMethodHeader methodHeader{.raw = *entry};
|
PushBufferMethodHeader methodHeader{.raw = *entry};
|
||||||
|
|
||||||
|
// Needed in order to check for methods split across multiple GpEntries
|
||||||
|
auto remainingEntries{std::distance(entry, pushBufferData.end()) - 1};
|
||||||
|
|
||||||
|
// Handles storing state and initial execution for methods that are split across multiple GpEntries
|
||||||
|
auto startSplitMethod{[&](auto methodState) {
|
||||||
|
resumeState = {
|
||||||
|
.remaining = methodHeader.methodCount,
|
||||||
|
.address = methodHeader.methodAddress,
|
||||||
|
.subChannel = methodHeader.methodSubChannel,
|
||||||
|
.state = methodState
|
||||||
|
};
|
||||||
|
|
||||||
|
// Skip over method header as `resumeSplitMethod` doesn't expect it to be there
|
||||||
|
entry++;
|
||||||
|
|
||||||
|
resumeSplitMethod();
|
||||||
|
}};
|
||||||
|
|
||||||
switch (methodHeader.secOp) {
|
switch (methodHeader.secOp) {
|
||||||
case PushBufferMethodHeader::SecOp::IncMethod:
|
case PushBufferMethodHeader::SecOp::IncMethod:
|
||||||
for (u32 i{}; i < methodHeader.methodCount; i++)
|
if (remainingEntries >= methodHeader.methodCount) {
|
||||||
Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
|
for (u32 i{}; i < methodHeader.methodCount; i++)
|
||||||
break;
|
Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
|
||||||
|
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
startSplitMethod(MethodResumeState::State::Inc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
case PushBufferMethodHeader::SecOp::NonIncMethod:
|
case PushBufferMethodHeader::SecOp::NonIncMethod:
|
||||||
for (u32 i{}; i < methodHeader.methodCount; i++)
|
if (remainingEntries >= methodHeader.methodCount) {
|
||||||
Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
|
for (u32 i{}; i < methodHeader.methodCount; i++)
|
||||||
break;
|
Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
|
||||||
|
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
startSplitMethod(MethodResumeState::State::NonInc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
case PushBufferMethodHeader::SecOp::OneInc:
|
case PushBufferMethodHeader::SecOp::OneInc:
|
||||||
for (u32 i{}; i < methodHeader.methodCount; i++)
|
if (remainingEntries >= methodHeader.methodCount) {
|
||||||
Send(methodHeader.methodAddress + !!i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
|
for (u32 i{}; i < methodHeader.methodCount; i++)
|
||||||
break;
|
Send(methodHeader.methodAddress + (i ? 1 : 0), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
|
||||||
|
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
startSplitMethod(MethodResumeState::State::OneInc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
|
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
|
||||||
Send(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
|
Send(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
|
||||||
break;
|
break;
|
||||||
@ -154,6 +218,7 @@ namespace skyline::soc::gm20b {
|
|||||||
pthread_setname_np(pthread_self(), "GPFIFO");
|
pthread_setname_np(pthread_self(), "GPFIFO");
|
||||||
try {
|
try {
|
||||||
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
|
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
|
||||||
|
|
||||||
gpEntries.Process([this](GpEntry gpEntry) {
|
gpEntries.Process([this](GpEntry gpEntry) {
|
||||||
state.logger->Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size);
|
state.logger->Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size);
|
||||||
Process(gpEntry);
|
Process(gpEntry);
|
||||||
|
@ -87,6 +87,7 @@ namespace skyline::soc::gm20b {
|
|||||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
|
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
|
||||||
*/
|
*/
|
||||||
class ChannelGpfifo {
|
class ChannelGpfifo {
|
||||||
|
private:
|
||||||
const DeviceState &state;
|
const DeviceState &state;
|
||||||
ChannelContext &channelCtx;
|
ChannelContext &channelCtx;
|
||||||
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
|
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
|
||||||
@ -94,11 +95,32 @@ namespace skyline::soc::gm20b {
|
|||||||
std::thread thread; //!< The thread that manages processing of pushbuffers
|
std::thread thread; //!< The thread that manages processing of pushbuffers
|
||||||
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
|
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Holds the required state in order to resume a method started from one call to `Process` in another
|
||||||
|
* @note This is needed as games (especially OpenGL ones) can split method entries over multiple GpEntries
|
||||||
|
*/
|
||||||
|
struct MethodResumeState {
|
||||||
|
u32 remaining; //!< The number of entries left to handle until the method is finished
|
||||||
|
u32 address; //!< The method address in the GPU block specified by `subchannel` that is the target of the command
|
||||||
|
u8 subChannel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief This is a simplified version of the full method type enum
|
||||||
|
*/
|
||||||
|
enum class State : u8 {
|
||||||
|
NonInc,
|
||||||
|
Inc,
|
||||||
|
OneInc //!< Will be switched to NonInc after the first call
|
||||||
|
} state; //!< The type of method to resume
|
||||||
|
} resumeState{};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Sends a method call to the GPU hardware
|
* @brief Sends a method call to the GPU hardware
|
||||||
*/
|
*/
|
||||||
void Send(u32 method, u32 argument, u32 subchannel, bool lastCall);
|
void Send(u32 method, u32 argument, u32 subchannel, bool lastCall);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
|
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
|
||||||
*/
|
*/
|
||||||
@ -118,7 +140,7 @@ namespace skyline::soc::gm20b {
|
|||||||
void Run();
|
void Run();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step'
|
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Process'
|
||||||
*/
|
*/
|
||||||
void Push(span<GpEntry> entries);
|
void Push(span<GpEntry> entries);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user