Add a global gm20b channel lock

Allowing for parallel execution of channels never really benefitted many games and prevented optimisations such as keeping frequently used resources always locked to avoid the constant overhead of locking on the hot path.
This commit is contained in:
Billy Laws 2022-10-09 12:51:27 +01:00
parent 57a4699bd1
commit 3e8bd26978
5 changed files with 30 additions and 3 deletions

View File

@ -57,6 +57,8 @@ namespace skyline::gpu {
cache::RenderPassCache renderPassCache; cache::RenderPassCache renderPassCache;
cache::FramebufferCache framebufferCache; cache::FramebufferCache framebufferCache;
std::mutex channelLock;
GPU(const DeviceState &state); GPU(const DeviceState &state);
}; };
} }

View File

@ -12,5 +12,6 @@ namespace skyline::soc::gm20b {
maxwellDma(state, *this, executor), maxwellDma(state, *this, executor),
keplerCompute(state, *this), keplerCompute(state, *this),
inline2Memory(*this), inline2Memory(*this),
gpfifo(state, *this, numEntries) {} gpfifo(state, *this, numEntries),
globalChannelLock{state.gpu->channelLock} {}
} }

View File

@ -30,7 +30,18 @@ namespace skyline::soc::gm20b {
engine::KeplerCompute keplerCompute; engine::KeplerCompute keplerCompute;
engine::Inline2Memory inline2Memory; engine::Inline2Memory inline2Memory;
ChannelGpfifo gpfifo; ChannelGpfifo gpfifo;
std::mutex &globalChannelLock;
ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries); ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries);
void Lock() {
globalChannelLock.lock();
executor.LockPreserve();
}
void Unlock() {
executor.UnlockPreserve();
globalChannelLock.unlock();
}
}; };
} }

View File

@ -25,7 +25,10 @@ namespace skyline::soc::gm20b::engine {
Logger::Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint->payload); Logger::Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint->payload);
// Wait forever for another channel to increment // Wait forever for another channel to increment
channelCtx.Unlock();
syncpoints.at(action.index).Wait(registers.syncpoint->payload, std::chrono::steady_clock::duration::max()); syncpoints.at(action.index).Wait(registers.syncpoint->payload, std::chrono::steady_clock::duration::max());
channelCtx.Lock();
} }
}) })

View File

@ -340,13 +340,23 @@ namespace skyline::soc::gm20b {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE}, signal::ExceptionalSignalHandler); signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE}, signal::ExceptionalSignalHandler);
signal::SetSignalHandler({SIGSEGV}, nce::NCE::HostSignalHandler); // We may access NCE trapped memory signal::SetSignalHandler({SIGSEGV}, nce::NCE::HostSignalHandler); // We may access NCE trapped memory
gpEntries.Process([this](GpEntry gpEntry) { bool channelLocked{};
gpEntries.Process([this, &channelLocked](GpEntry gpEntry) {
Logger::Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size); Logger::Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size);
if (!channelLocked) {
channelCtx.Lock();
channelLocked = true;
}
Process(gpEntry); Process(gpEntry);
}, [this]() { }, [this, &channelLocked]() {
// If we run out of GpEntries to process ensure we submit any remaining GPU work before waiting for more to arrive // If we run out of GpEntries to process ensure we submit any remaining GPU work before waiting for more to arrive
Logger::Debug("Finished processing pushbuffer batch"); Logger::Debug("Finished processing pushbuffer batch");
channelCtx.executor.Submit(); channelCtx.executor.Submit();
channelCtx.Unlock();
channelLocked = false;
}); });
} catch (const signal::SignalException &e) { } catch (const signal::SignalException &e) {
if (e.signal != SIGINT) { if (e.signal != SIGINT) {