From b81d5bc86507bc0e979334d62e1d916d62c4e99a Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 12 May 2022 19:40:24 +0100 Subject: [PATCH] Implement and cleanup semaphore operations in all engines Most engines have the capability to release a semaphore payload (or reduce in the case of GPFIFO) when a method is called or action is complete. Semaphores are used by games for both timing how long things take on GPU and waiting on resources so missing them can cause deadlocks or other related issues. --- .../cpp/skyline/soc/gm20b/engines/engine.cpp | 9 ++++ .../cpp/skyline/soc/gm20b/engines/engine.h | 5 ++ .../cpp/skyline/soc/gm20b/engines/gpfifo.cpp | 52 +++++++++++++++++++ .../cpp/skyline/soc/gm20b/engines/gpfifo.h | 13 +---- .../skyline/soc/gm20b/engines/maxwell_3d.cpp | 22 +++----- .../skyline/soc/gm20b/engines/maxwell_dma.cpp | 20 +++++++ .../skyline/soc/gm20b/engines/maxwell_dma.h | 2 + 7 files changed, 97 insertions(+), 26 deletions(-) diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp index e7181cda..86ca2dc0 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp @@ -4,6 +4,15 @@ #include "engine.h" namespace skyline::soc::gm20b::engine { + u64 GetGpuTimeTicks() { + constexpr i64 NsToTickNumerator{384}; + constexpr i64 NsToTickDenominator{625}; + + i64 nsTime{util::GetTimeNs()}; + i64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; + return static_cast(timestamp); + } + MacroEngineBase::MacroEngineBase(MacroState ¯oState) : macroState(macroState) {} void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) { diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h index 5da9979f..4495c383 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h @@ -49,6 +49,11 @@ namespace skyline::soc::gm20b::engine { constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines + /** + * @brief Returns current time in GPU ticks + */ + u64 GetGpuTimeTicks(); + /** * @brief The MacroEngineBase interface provides an interface that can be used by engines to allow interfacing with the macro executer */ diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp index c02ead6e..2decb78c 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp @@ -1,7 +1,9 @@ // SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/) // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) #include +#include #include #include "gpfifo.h" @@ -26,6 +28,56 @@ namespace skyline::soc::gm20b::engine { syncpoints.at(action.index).Wait(registers.syncpoint->payload, std::chrono::steady_clock::duration::max()); } }) + + ENGINE_STRUCT_CASE(semaphore, action, { + // Write timestamp first to ensure ordering + if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) { + channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 4, 0); + channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks()); + } + + if (action.operation == Registers::Semaphore::Operation::Release) { + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, registers.semaphore->payload); + } else if (action.operation == Registers::Semaphore::Operation::Reduction) { + u32 origVal{channelCtx.asCtx->gmmu.Read(registers.semaphore->address)}; + bool isSigned{action.format == Registers::Semaphore::Format::Signed}; + + // https://github.com/NVIDIA/open-gpu-doc/blob/b7d1bd16fe62135ebaec306b39dfdbd9e5657827/manuals/turing/tu104/dev_pbdma.ref.txt#L3549 + u32 val{[](Registers::Semaphore::Reduction reduction, u32 origVal, u32 payload, bool isSigned) { + switch (reduction) { + case Registers::Semaphore::Reduction::Min: + if (isSigned) + return static_cast(std::min(static_cast(origVal), static_cast(payload))); + else + return std::min(origVal, payload); + case Registers::Semaphore::Reduction::Max: + if (isSigned) + return static_cast(std::max(static_cast(origVal), static_cast(payload))); + else + return std::max(origVal, payload); + case Registers::Semaphore::Reduction::Xor: + return origVal ^ payload; + case Registers::Semaphore::Reduction::And: + return origVal & payload; + case Registers::Semaphore::Reduction::Or: + return origVal | payload; + case Registers::Semaphore::Reduction::Add: + if (isSigned) + return static_cast(static_cast(origVal) + static_cast(payload)); + else + return origVal + payload; + case Registers::Semaphore::Reduction::Inc: + return (origVal >= payload) ? 0 : origVal + 1; + case Registers::Semaphore::Reduction::Dec: + return (origVal == 0 || origVal > payload) ? payload : origVal - 1; + } + }(registers.semaphore->action.reduction, origVal, registers.semaphore->payload, isSigned)}; + + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, val); + } else { + Logger::Warn("Unimplemented semaphore operation: 0x{:X}", static_cast(registers.semaphore->action.operation)); + } + }) } }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h index 4345483d..0078481b 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h @@ -81,16 +81,7 @@ namespace skyline::soc::gm20b::engine { Unsigned = 1, }; - struct { - u32 offsetUpper : 8; - u32 _pad0_ : 24; - }; // 0x4 - - struct { - u8 _pad1_ : 2; - u32 offsetLower : 30; - }; // 0x5 - + Address address; // 0x4 u32 payload; // 0x6 struct { @@ -104,7 +95,7 @@ namespace skyline::soc::gm20b::engine { u8 _pad5_ : 2; Reduction reduction : 4; Format format : 1; - }; // 0x7 + } action; // 0x7 }; static_assert(sizeof(Semaphore) == 0x10); diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index 6416390f..6462a0ff 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -688,6 +688,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d { }) ENGINE_STRUCT_CASE(semaphore, info, { + if (info.reductionEnable) + Logger::Warn("Semaphore reduction is unimplemented!"); + switch (info.op) { case type::SemaphoreInfo::Op::Release: WriteSemaphoreResult(registers.semaphore->payload); @@ -751,26 +754,15 @@ namespace skyline::soc::gm20b::engine::maxwell3d { } void Maxwell3D::WriteSemaphoreResult(u64 result) { - struct FourWordResult { - u64 value; - u64 timestamp; - }; - switch (registers.semaphore->info.structureSize) { case type::SemaphoreInfo::StructureSize::OneWord: - channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast(result)); + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast(result)); break; case type::SemaphoreInfo::StructureSize::FourWords: { - // Convert the current nanosecond time to GPU ticks - constexpr i64 NsToTickNumerator{384}; - constexpr i64 NsToTickDenominator{625}; - - i64 nsTime{util::GetTimeNs()}; - i64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; - - channelCtx.asCtx->gmmu.Write(registers.semaphore->address, - FourWordResult{result, static_cast(timestamp)}); + // Write timestamp first to ensure correct ordering + channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks()); + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, result); break; } } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp index 08d99ccd..2024f1d2 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -53,6 +53,26 @@ namespace skyline::soc::gm20b::engine { Logger::Debug("src: 0x{:X} dst: 0x{:X} size: 0x{:X}", u64{*registers.offsetIn}, u64{*registers.offsetOut}, *registers.lineLengthIn); channelCtx.asCtx->gmmu.Copy(*registers.offsetOut, *registers.offsetIn, *registers.lineLengthIn); } + + ReleaseSemaphore(); + } + + void MaxwellDma::ReleaseSemaphore() { + if (registers.launchDma->reductionEnable) + Logger::Warn("Semaphore reduction is unimplemented!"); + + switch (registers.launchDma->semaphoreType) { + case Registers::LaunchDma::SemaphoreType::ReleaseOneWordSemaphore: + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, registers.semaphore->payload); + break; + case Registers::LaunchDma::SemaphoreType::ReleaseFourWordSemaphore: + // Write timestamp first to ensure correct ordering + channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks()); + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast(registers.semaphore->payload)); + break; + default: + break; + } } void MaxwellDma::CopyPitchToBlockLinear() { diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h index caa4a053..676cc3aa 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h @@ -27,6 +27,8 @@ namespace skyline::soc::gm20b::engine { void LaunchDma(); + void ReleaseSemaphore(); + void CopyPitchToBlockLinear(); void CopyBlockLinearToPitch();