diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp index e7181cda..86ca2dc0 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.cpp @@ -4,6 +4,15 @@ #include "engine.h" namespace skyline::soc::gm20b::engine { + u64 GetGpuTimeTicks() { + constexpr i64 NsToTickNumerator{384}; + constexpr i64 NsToTickDenominator{625}; + + i64 nsTime{util::GetTimeNs()}; + i64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; + return static_cast(timestamp); + } + MacroEngineBase::MacroEngineBase(MacroState ¯oState) : macroState(macroState) {} void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) { diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h index 5da9979f..4495c383 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/engine.h @@ -49,6 +49,11 @@ namespace skyline::soc::gm20b::engine { constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines + /** + * @brief Returns current time in GPU ticks + */ + u64 GetGpuTimeTicks(); + /** * @brief The MacroEngineBase interface provides an interface that can be used by engines to allow interfacing with the macro executer */ diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp index c02ead6e..2decb78c 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp @@ -1,7 +1,9 @@ // SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/) // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) #include +#include #include #include "gpfifo.h" @@ -26,6 +28,56 @@ namespace skyline::soc::gm20b::engine { syncpoints.at(action.index).Wait(registers.syncpoint->payload, std::chrono::steady_clock::duration::max()); } }) + + ENGINE_STRUCT_CASE(semaphore, action, { + // Write timestamp first to ensure ordering + if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) { + channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 4, 0); + channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks()); + } + + if (action.operation == Registers::Semaphore::Operation::Release) { + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, registers.semaphore->payload); + } else if (action.operation == Registers::Semaphore::Operation::Reduction) { + u32 origVal{channelCtx.asCtx->gmmu.Read(registers.semaphore->address)}; + bool isSigned{action.format == Registers::Semaphore::Format::Signed}; + + // https://github.com/NVIDIA/open-gpu-doc/blob/b7d1bd16fe62135ebaec306b39dfdbd9e5657827/manuals/turing/tu104/dev_pbdma.ref.txt#L3549 + u32 val{[](Registers::Semaphore::Reduction reduction, u32 origVal, u32 payload, bool isSigned) { + switch (reduction) { + case Registers::Semaphore::Reduction::Min: + if (isSigned) + return static_cast(std::min(static_cast(origVal), static_cast(payload))); + else + return std::min(origVal, payload); + case Registers::Semaphore::Reduction::Max: + if (isSigned) + return static_cast(std::max(static_cast(origVal), static_cast(payload))); + else + return std::max(origVal, payload); + case Registers::Semaphore::Reduction::Xor: + return origVal ^ payload; + case Registers::Semaphore::Reduction::And: + return origVal & payload; + case Registers::Semaphore::Reduction::Or: + return origVal | payload; + case Registers::Semaphore::Reduction::Add: + if (isSigned) + return static_cast(static_cast(origVal) + static_cast(payload)); + else + return origVal + payload; + case Registers::Semaphore::Reduction::Inc: + return (origVal >= payload) ? 0 : origVal + 1; + case Registers::Semaphore::Reduction::Dec: + return (origVal == 0 || origVal > payload) ? payload : origVal - 1; + } + }(registers.semaphore->action.reduction, origVal, registers.semaphore->payload, isSigned)}; + + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, val); + } else { + Logger::Warn("Unimplemented semaphore operation: 0x{:X}", static_cast(registers.semaphore->action.operation)); + } + }) } }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h index 4345483d..0078481b 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.h @@ -81,16 +81,7 @@ namespace skyline::soc::gm20b::engine { Unsigned = 1, }; - struct { - u32 offsetUpper : 8; - u32 _pad0_ : 24; - }; // 0x4 - - struct { - u8 _pad1_ : 2; - u32 offsetLower : 30; - }; // 0x5 - + Address address; // 0x4 u32 payload; // 0x6 struct { @@ -104,7 +95,7 @@ namespace skyline::soc::gm20b::engine { u8 _pad5_ : 2; Reduction reduction : 4; Format format : 1; - }; // 0x7 + } action; // 0x7 }; static_assert(sizeof(Semaphore) == 0x10); diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index 6416390f..6462a0ff 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -688,6 +688,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d { }) ENGINE_STRUCT_CASE(semaphore, info, { + if (info.reductionEnable) + Logger::Warn("Semaphore reduction is unimplemented!"); + switch (info.op) { case type::SemaphoreInfo::Op::Release: WriteSemaphoreResult(registers.semaphore->payload); @@ -751,26 +754,15 @@ namespace skyline::soc::gm20b::engine::maxwell3d { } void Maxwell3D::WriteSemaphoreResult(u64 result) { - struct FourWordResult { - u64 value; - u64 timestamp; - }; - switch (registers.semaphore->info.structureSize) { case type::SemaphoreInfo::StructureSize::OneWord: - channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast(result)); + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast(result)); break; case type::SemaphoreInfo::StructureSize::FourWords: { - // Convert the current nanosecond time to GPU ticks - constexpr i64 NsToTickNumerator{384}; - constexpr i64 NsToTickDenominator{625}; - - i64 nsTime{util::GetTimeNs()}; - i64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; - - channelCtx.asCtx->gmmu.Write(registers.semaphore->address, - FourWordResult{result, static_cast(timestamp)}); + // Write timestamp first to ensure correct ordering + channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks()); + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, result); break; } } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp index 08d99ccd..2024f1d2 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -53,6 +53,26 @@ namespace skyline::soc::gm20b::engine { Logger::Debug("src: 0x{:X} dst: 0x{:X} size: 0x{:X}", u64{*registers.offsetIn}, u64{*registers.offsetOut}, *registers.lineLengthIn); channelCtx.asCtx->gmmu.Copy(*registers.offsetOut, *registers.offsetIn, *registers.lineLengthIn); } + + ReleaseSemaphore(); + } + + void MaxwellDma::ReleaseSemaphore() { + if (registers.launchDma->reductionEnable) + Logger::Warn("Semaphore reduction is unimplemented!"); + + switch (registers.launchDma->semaphoreType) { + case Registers::LaunchDma::SemaphoreType::ReleaseOneWordSemaphore: + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, registers.semaphore->payload); + break; + case Registers::LaunchDma::SemaphoreType::ReleaseFourWordSemaphore: + // Write timestamp first to ensure correct ordering + channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks()); + channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast(registers.semaphore->payload)); + break; + default: + break; + } } void MaxwellDma::CopyPitchToBlockLinear() { diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h index caa4a053..676cc3aa 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.h @@ -27,6 +27,8 @@ namespace skyline::soc::gm20b::engine { void LaunchDma(); + void ReleaseSemaphore(); + void CopyPitchToBlockLinear(); void CopyBlockLinearToPitch();