Add a global gm20b channel lock

Allowing for parallel execution of channels never really benefitted many games and prevented optimisations such as keeping frequently used resources always locked to avoid the constant overhead of locking on the hot path.
2024-11-27 00:24:14 +01:00 · 2022-10-09 12:51:27 +01:00 · 2022-10-09 12:51:27 +01:00 · 3e8bd26978
commit 3e8bd26978
parent 57a4699bd1
5 changed files with 30 additions and 3 deletions
--- a/app/src/main/cpp/skyline/gpu.h
+++ b/app/src/main/cpp/skyline/gpu.h
@ -57,6 +57,8 @@ namespace skyline::gpu {
        cache::RenderPassCache renderPassCache;
        cache::FramebufferCache framebufferCache;
        std::mutex channelLock;
        GPU(const DeviceState &state);
    };
 }
--- a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp
@ -12,5 +12,6 @@ namespace skyline::soc::gm20b {
          maxwellDma(state, *this, executor),
          keplerCompute(state, *this),
          inline2Memory(*this),
-          gpfifo(state, *this, numEntries) {}
+          gpfifo(state, *this, numEntries),
          globalChannelLock{state.gpu->channelLock} {}
 }
--- a/app/src/main/cpp/skyline/soc/gm20b/channel.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/channel.h
@ -30,7 +30,18 @@ namespace skyline::soc::gm20b {
        engine::KeplerCompute keplerCompute;
        engine::Inline2Memory inline2Memory;
        ChannelGpfifo gpfifo;
        std::mutex &globalChannelLock;
        ChannelContext(const DeviceState &state, std::shared_ptr<AddressSpaceContext> asCtx, size_t numEntries);
        void Lock() {
            globalChannelLock.lock();
            executor.LockPreserve();
        }
        void Unlock() {
            executor.UnlockPreserve();
            globalChannelLock.unlock();
        }
    };
 }
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp
@ -25,7 +25,10 @@ namespace skyline::soc::gm20b::engine {
                    Logger::Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint->payload);
                    // Wait forever for another channel to increment
                    channelCtx.Unlock();
                    syncpoints.at(action.index).Wait(registers.syncpoint->payload, std::chrono::steady_clock::duration::max());
                    channelCtx.Lock();
                }
            })
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
@ -340,13 +340,23 @@ namespace skyline::soc::gm20b {
            signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE}, signal::ExceptionalSignalHandler);
            signal::SetSignalHandler({SIGSEGV}, nce::NCE::HostSignalHandler); // We may access NCE trapped memory
-            gpEntries.Process([this](GpEntry gpEntry) {
+            bool channelLocked{};
            gpEntries.Process([this, &channelLocked](GpEntry gpEntry) {
                Logger::Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size);
                if (!channelLocked) {
                    channelCtx.Lock();
                    channelLocked = true;
                }
                Process(gpEntry);
-            }, [this]() {
+            }, [this, &channelLocked]() {
                // If we run out of GpEntries to process ensure we submit any remaining GPU work before waiting for more to arrive
                Logger::Debug("Finished processing pushbuffer batch");
                channelCtx.executor.Submit();
                channelCtx.Unlock();
                channelLocked = false;
            });
        } catch (const signal::SignalException &e) {
            if (e.signal != SIGINT) {