Rework GPFIFO pushing to optimise performance and accuracy

* Pushbuffer data is now stored in a member buffer to avoid reallocating
  it for each pushbuffer which hampered performance before.
* Don't prefetch pushbuffers as it puts unnecessary load on the guest
  thread that is better suited for the GPFIFO thread.
* Clean up some misc code to avoid pointless casts of a 4 byte object
  and handle GPFIFO control opcodes.
This commit is contained in:
Billy Laws 2021-01-21 20:36:02 +00:00 committed by ◱ Mark
parent 78cdb1eeb4
commit c1aec00ed1
2 changed files with 35 additions and 46 deletions

View File

@ -45,36 +45,51 @@ namespace skyline::gpu::gpfifo {
}
}
void GPFIFO::Process(const std::vector<u32> &segment) {
for (auto entry{segment.begin()}; entry != segment.end(); entry++) {
void GPFIFO::Process(GpEntry gpEntry) {
if (!gpEntry.size) {
// This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers
switch (gpEntry.opcode) {
case GpEntry::Opcode::Nop:
return;
default:
state.logger->Warn("Unsupported GpEntry control opcode used: {}", static_cast<u8>(gpEntry.opcode));
return;
}
}
pushBufferData.resize(gpEntry.size);
state.gpu->memoryManager.Read<u32>(pushBufferData, gpEntry.Address());
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
// An entry containing all zeroes is a NOP, skip over it
if (*entry == 0)
continue;
auto methodHeader{reinterpret_cast<const PushBufferMethodHeader *>(&*entry)};
PushBufferMethodHeader methodHeader{.raw = *entry};
switch (methodHeader->secOp) {
switch (methodHeader.secOp) {
case PushBufferMethodHeader::SecOp::IncMethod:
for (u16 i{}; i < methodHeader->methodCount; i++)
Send(MethodParams{static_cast<u16>(methodHeader->methodAddress + i), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{static_cast<u16>(methodHeader.methodAddress + i), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
break;
case PushBufferMethodHeader::SecOp::NonIncMethod:
for (u16 i{}; i < methodHeader->methodCount; i++)
Send(MethodParams{methodHeader->methodAddress, *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{methodHeader.methodAddress, *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
break;
case PushBufferMethodHeader::SecOp::OneInc:
for (u16 i{}; i < methodHeader->methodCount; i++)
Send(MethodParams{static_cast<u16>(methodHeader->methodAddress + static_cast<bool>(i)), *++entry, methodHeader->methodSubChannel, i == methodHeader->methodCount - 1});
for (u16 i{}; i < methodHeader.methodCount; i++)
Send(MethodParams{static_cast<u16>(methodHeader.methodAddress + static_cast<bool>(i)), *++entry, methodHeader.methodSubChannel, i == methodHeader.methodCount - 1});
break;
case PushBufferMethodHeader::SecOp::ImmdDataMethod:
Send(MethodParams{methodHeader->methodAddress, methodHeader->immdData, methodHeader->methodSubChannel, true});
Send(MethodParams{methodHeader.methodAddress, methodHeader.immdData, methodHeader.methodSubChannel, true});
break;
case PushBufferMethodHeader::SecOp::EndPbSegment:
return;
default:
state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
break;
}
}
@ -91,12 +106,9 @@ namespace skyline::gpu::gpfifo {
pthread_setname_np(pthread_self(), "GPFIFO");
try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
pushBuffers->Process([this](PushBuffer &pushBuffer) {
if (pushBuffer.segment.empty())
pushBuffer.Fetch(state.gpu->memoryManager);
state.logger->Debug("Processing pushbuffer: 0x{:X}", pushBuffer.gpEntry.Address());
Process(pushBuffer.segment);
pushBuffers->Process([this](GpEntry gpEntry) {
state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address());
Process(gpEntry);
});
} catch (const signal::SignalException &e) {
if (e.signal != SIGINT) {
@ -112,12 +124,7 @@ namespace skyline::gpu::gpfifo {
}
void GPFIFO::Push(span<GpEntry> entries) {
bool beforeBarrier{true};
pushBuffers->AppendTranform(entries, [&beforeBarrier, this](const GpEntry &entry) {
if (entry.sync == GpEntry::Sync::Wait)
beforeBarrier = false;
return PushBuffer(entry, state.gpu->memoryManager, beforeBarrier);
});
pushBuffers->Append(entries);
}
GPFIFO::~GPFIFO() {

View File

@ -129,35 +129,17 @@ namespace skyline::gpu {
* @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62
*/
class GPFIFO {
private:
/**
* @brief A pushbuffer is a descriptor of tasks that need to be executed for a specific client
*/
struct PushBuffer {
GpEntry gpEntry;
std::vector<u32> segment;
PushBuffer(const GpEntry &gpEntry, const vmm::MemoryManager &memoryManager, bool fetch) : gpEntry(gpEntry) {
if (fetch)
Fetch(memoryManager);
}
inline void Fetch(const vmm::MemoryManager &memoryManager) {
segment.resize(gpEntry.size);
memoryManager.Read<u32>(segment, gpEntry.Address());
}
};
const DeviceState &state;
engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls
std::array<std::shared_ptr<engine::Engine>, 8> subchannels;
std::optional<CircularQueue<PushBuffer>> pushBuffers;
std::thread thread; //!< The thread that manages processing of push-buffers
std::optional<CircularQueue<GpEntry>> pushBuffers;
std::thread thread; //!< The thread that manages processing of pushbuffers
std::vector<u32> pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations
/**
* @brief Processes a pushbuffer segment, calling methods as needed
* @brief Processes the pushbuffer contained within the given GpEntry, calling methods as needed
*/
void Process(const std::vector<u32> &segment);
void Process(GpEntry gpEntry);
/**
* @brief Sends a method call to the GPU hardware