Implement support for pushbuffer methods split across multiple GpEntries

These are used heavily in OpenGL games, which now, together with the
previous syncpoint changes, work perfectly. The actual implementation is
rather novel as rather than using a per-class state machine for all
methods we only use it for those that are known to be split across
GpEntry boundaries, as a result only a single bounds check is added to
the hot path of contiguous method execution and the performance loss is
negligible.
This commit is contained in:
Billy Laws 2021-10-13 21:46:30 +01:00
parent fc017e1e95
commit b7d0f2fafa
4 changed files with 106 additions and 19 deletions

View File

@ -62,12 +62,12 @@ namespace skyline::service::nvdrv::device::nvhost {
using Allocator = FlatAllocator<u32, 0, 32>; using Allocator = FlatAllocator<u32, 0, 32>;
std::unique_ptr<Allocator> bigPageAllocator; std::unique_ptr<Allocator> bigPageAllocator;
std::shared_ptr<Allocator> smallPageAllocator; // Shared as this is also used by nvhost::GpuChannel std::shared_ptr<Allocator> smallPageAllocator; //! Shared as this is also used by nvhost::GpuChannel
bool initialised{}; bool initialised{};
} vm; } vm;
std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx; std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx; //!< The guest GPU AS context that is associated with each particular instance
friend GpuChannel; friend GpuChannel;

View File

@ -23,14 +23,14 @@ namespace skyline::service::nvdrv::device::nvhost {
std::shared_ptr<type::KEvent> smExceptionBreakpointPauseReportEvent; std::shared_ptr<type::KEvent> smExceptionBreakpointPauseReportEvent;
std::shared_ptr<type::KEvent> errorNotifierEvent; std::shared_ptr<type::KEvent> errorNotifierEvent;
std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx; std::shared_ptr<soc::gm20b::AddressSpaceContext> asCtx; //!< The guest GPU AS context submits from this channel are bound to
std::shared_ptr<AsGpu::VM::Allocator> asAllocator; std::shared_ptr<AsGpu::VM::Allocator> asAllocator; //!< The small page allocator context for the AS that's bound to this channel, used to allocate space for `pushBufferMemory`
std::unique_ptr<soc::gm20b::ChannelContext> channelCtx; std::unique_ptr<soc::gm20b::ChannelContext> channelCtx; //!< The entire guest GPU context specific to this channel
u64 pushBufferAddr{}; u64 pushBufferAddr{}; //!< The GPU address `pushBufferMemory` is mapped to
size_t pushBufferMemoryOffset{}; size_t pushBufferMemoryOffset{}; //!< The current offset for which to write new pushbuffer method data into for post-increment and pre-wait
std::vector<u32> pushBufferMemory; std::vector<u32> pushBufferMemory; //!< Mapped into the guest GPU As and used to store method data for pre/post increment commands
friend AsGpu; friend AsGpu;

View File

@ -115,28 +115,92 @@ namespace skyline::soc::gm20b {
pushBufferData.resize(gpEntry.size); pushBufferData.resize(gpEntry.size);
channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address()); channelCtx.asCtx->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) { // There will be at least one entry here
auto entry{pushBufferData.begin()};
// Executes the current split method, returning once execution is finished or the current GpEntry has reached its end
auto resumeSplitMethod{[&](){
switch (resumeState.state) {
case MethodResumeState::State::Inc:
while (entry != pushBufferData.end() && resumeState.remaining)
Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
break;
case MethodResumeState::State::OneInc:
Send(resumeState.address++, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
// After the first increment OneInc methods work the same as a NonInc method, this is needed so they can resume correctly if they are broken up by multiple GpEntries
resumeState.state = MethodResumeState::State::NonInc;
[[fallthrough]];
case MethodResumeState::State::NonInc:
while (entry != pushBufferData.end() && resumeState.remaining)
Send(resumeState.address, *(entry++), resumeState.subChannel, --resumeState.remaining == 0);
break;
}
}};
// We've a method from a previous GpEntry that needs resuming
if (resumeState.remaining)
resumeSplitMethod();
// Process more methods if the entries are still not all used up after handling resuming
for (; entry != pushBufferData.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it // An entry containing all zeroes is a NOP, skip over it
if (*entry == 0) if (*entry == 0)
continue; continue;
PushBufferMethodHeader methodHeader{.raw = *entry}; PushBufferMethodHeader methodHeader{.raw = *entry};
// Needed in order to check for methods split across multiple GpEntries
auto remainingEntries{std::distance(entry, pushBufferData.end()) - 1};
// Handles storing state and initial execution for methods that are split across multiple GpEntries
auto startSplitMethod{[&](auto methodState) {
resumeState = {
.remaining = methodHeader.methodCount,
.address = methodHeader.methodAddress,
.subChannel = methodHeader.methodSubChannel,
.state = methodState
};
// Skip over method header as `resumeSplitMethod` doesn't expect it to be there
entry++;
resumeSplitMethod();
}};
switch (methodHeader.secOp) { switch (methodHeader.secOp) {
case PushBufferMethodHeader::SecOp::IncMethod: case PushBufferMethodHeader::SecOp::IncMethod:
for (u32 i{}; i < methodHeader.methodCount; i++) if (remainingEntries >= methodHeader.methodCount) {
Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); for (u32 i{}; i < methodHeader.methodCount; i++)
break; Send(methodHeader.methodAddress + i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
break;
} else {
startSplitMethod(MethodResumeState::State::Inc);
return;
}
case PushBufferMethodHeader::SecOp::NonIncMethod: case PushBufferMethodHeader::SecOp::NonIncMethod:
for (u32 i{}; i < methodHeader.methodCount; i++) if (remainingEntries >= methodHeader.methodCount) {
Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); for (u32 i{}; i < methodHeader.methodCount; i++)
break; Send(methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
break;
} else {
startSplitMethod(MethodResumeState::State::NonInc);
return;
}
case PushBufferMethodHeader::SecOp::OneInc: case PushBufferMethodHeader::SecOp::OneInc:
for (u32 i{}; i < methodHeader.methodCount; i++) if (remainingEntries >= methodHeader.methodCount) {
Send(methodHeader.methodAddress + !!i, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1); for (u32 i{}; i < methodHeader.methodCount; i++)
break; Send(methodHeader.methodAddress + (i ? 1 : 0), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1);
break;
} else {
startSplitMethod(MethodResumeState::State::OneInc);
return;
}
case PushBufferMethodHeader::SecOp::ImmdDataMethod: case PushBufferMethodHeader::SecOp::ImmdDataMethod:
Send(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true); Send(methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true);
break; break;
@ -154,6 +218,7 @@ namespace skyline::soc::gm20b {
pthread_setname_np(pthread_self(), "GPFIFO"); pthread_setname_np(pthread_self(), "GPFIFO");
try { try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
gpEntries.Process([this](GpEntry gpEntry) { gpEntries.Process([this](GpEntry gpEntry) {
state.logger->Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size); state.logger->Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size);
Process(gpEntry); Process(gpEntry);

View File

@ -87,6 +87,7 @@ namespace skyline::soc::gm20b {
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
*/ */
class ChannelGpfifo { class ChannelGpfifo {
private:
const DeviceState &state; const DeviceState &state;
ChannelContext &channelCtx; ChannelContext &channelCtx;
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
@ -94,11 +95,32 @@ namespace skyline::soc::gm20b {
std::thread thread; //!< The thread that manages processing of pushbuffers std::thread thread; //!< The thread that manages processing of pushbuffers
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
/**
* @brief Holds the required state in order to resume a method started from one call to `Process` in another
* @note This is needed as games (especially OpenGL ones) can split method entries over multiple GpEntries
*/
struct MethodResumeState {
u32 remaining; //!< The number of entries left to handle until the method is finished
u32 address; //!< The method address in the GPU block specified by `subchannel` that is the target of the command
u8 subChannel;
/**
* @brief This is a simplified version of the full method type enum
*/
enum class State : u8 {
NonInc,
Inc,
OneInc //!< Will be switched to NonInc after the first call
} state; //!< The type of method to resume
} resumeState{};
/** /**
* @brief Sends a method call to the GPU hardware * @brief Sends a method call to the GPU hardware
*/ */
void Send(u32 method, u32 argument, u32 subchannel, bool lastCall); void Send(u32 method, u32 argument, u32 subchannel, bool lastCall);
/** /**
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed * @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
*/ */
@ -118,7 +140,7 @@ namespace skyline::soc::gm20b {
void Run(); void Run();
/** /**
* @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Step' * @brief Pushes a list of entries to the FIFO, these commands will be executed on calls to 'Process'
*/ */
void Push(span<GpEntry> entries); void Push(span<GpEntry> entries);