Implement and cleanup semaphore operations in all engines

Most engines have the capability to release a semaphore payload (or reduce in the case of GPFIFO) when a method is called or action is complete. Semaphores are used by games for both timing how long things take on GPU and waiting on resources so missing them can cause deadlocks or other related issues.
This commit is contained in:
Billy Laws 2022-05-12 19:40:24 +01:00
parent bca88685bd
commit b81d5bc865
7 changed files with 97 additions and 26 deletions

View File

@ -4,6 +4,15 @@
#include "engine.h"
namespace skyline::soc::gm20b::engine {
u64 GetGpuTimeTicks() {
constexpr i64 NsToTickNumerator{384};
constexpr i64 NsToTickDenominator{625};
i64 nsTime{util::GetTimeNs()};
i64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
return static_cast<u64>(timestamp);
}
MacroEngineBase::MacroEngineBase(MacroState &macroState) : macroState(macroState) {}
void MacroEngineBase::HandleMacroCall(u32 macroMethodOffset, u32 argument, bool lastCall) {

View File

@ -49,6 +49,11 @@ namespace skyline::soc::gm20b::engine {
constexpr u32 EngineMethodsEnd = 0xE00; //!< All methods above this are passed to the MME on supported engines
/**
* @brief Returns current time in GPU ticks
*/
u64 GetGpuTimeTicks();
/**
* @brief The MacroEngineBase interface provides an interface that can be used by engines to allow interfacing with the macro executer
*/

View File

@ -1,7 +1,9 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <soc.h>
#include <soc/gm20b/gmmu.h>
#include <soc/gm20b/channel.h>
#include "gpfifo.h"
@ -26,6 +28,56 @@ namespace skyline::soc::gm20b::engine {
syncpoints.at(action.index).Wait(registers.syncpoint->payload, std::chrono::steady_clock::duration::max());
}
})
ENGINE_STRUCT_CASE(semaphore, action, {
// Write timestamp first to ensure ordering
if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) {
channelCtx.asCtx->gmmu.Write<u32>(registers.semaphore->address + 4, 0);
channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks());
}
if (action.operation == Registers::Semaphore::Operation::Release) {
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, registers.semaphore->payload);
} else if (action.operation == Registers::Semaphore::Operation::Reduction) {
u32 origVal{channelCtx.asCtx->gmmu.Read<u32>(registers.semaphore->address)};
bool isSigned{action.format == Registers::Semaphore::Format::Signed};
// https://github.com/NVIDIA/open-gpu-doc/blob/b7d1bd16fe62135ebaec306b39dfdbd9e5657827/manuals/turing/tu104/dev_pbdma.ref.txt#L3549
u32 val{[](Registers::Semaphore::Reduction reduction, u32 origVal, u32 payload, bool isSigned) {
switch (reduction) {
case Registers::Semaphore::Reduction::Min:
if (isSigned)
return static_cast<u32>(std::min(static_cast<i32>(origVal), static_cast<i32>(payload)));
else
return std::min(origVal, payload);
case Registers::Semaphore::Reduction::Max:
if (isSigned)
return static_cast<u32>(std::max(static_cast<i32>(origVal), static_cast<i32>(payload)));
else
return std::max(origVal, payload);
case Registers::Semaphore::Reduction::Xor:
return origVal ^ payload;
case Registers::Semaphore::Reduction::And:
return origVal & payload;
case Registers::Semaphore::Reduction::Or:
return origVal | payload;
case Registers::Semaphore::Reduction::Add:
if (isSigned)
return static_cast<u32>(static_cast<i32>(origVal) + static_cast<i32>(payload));
else
return origVal + payload;
case Registers::Semaphore::Reduction::Inc:
return (origVal >= payload) ? 0 : origVal + 1;
case Registers::Semaphore::Reduction::Dec:
return (origVal == 0 || origVal > payload) ? payload : origVal - 1;
}
}(registers.semaphore->action.reduction, origVal, registers.semaphore->payload, isSigned)};
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, val);
} else {
Logger::Warn("Unimplemented semaphore operation: 0x{:X}", static_cast<u8>(registers.semaphore->action.operation));
}
})
}
};
}

View File

@ -81,16 +81,7 @@ namespace skyline::soc::gm20b::engine {
Unsigned = 1,
};
struct {
u32 offsetUpper : 8;
u32 _pad0_ : 24;
}; // 0x4
struct {
u8 _pad1_ : 2;
u32 offsetLower : 30;
}; // 0x5
Address address; // 0x4
u32 payload; // 0x6
struct {
@ -104,7 +95,7 @@ namespace skyline::soc::gm20b::engine {
u8 _pad5_ : 2;
Reduction reduction : 4;
Format format : 1;
}; // 0x7
} action; // 0x7
};
static_assert(sizeof(Semaphore) == 0x10);

View File

@ -688,6 +688,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
})
ENGINE_STRUCT_CASE(semaphore, info, {
if (info.reductionEnable)
Logger::Warn("Semaphore reduction is unimplemented!");
switch (info.op) {
case type::SemaphoreInfo::Op::Release:
WriteSemaphoreResult(registers.semaphore->payload);
@ -751,26 +754,15 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
}
void Maxwell3D::WriteSemaphoreResult(u64 result) {
struct FourWordResult {
u64 value;
u64 timestamp;
};
switch (registers.semaphore->info.structureSize) {
case type::SemaphoreInfo::StructureSize::OneWord:
channelCtx.asCtx->gmmu.Write<u32>(registers.semaphore->address, static_cast<u32>(result));
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast<u32>(result));
break;
case type::SemaphoreInfo::StructureSize::FourWords: {
// Convert the current nanosecond time to GPU ticks
constexpr i64 NsToTickNumerator{384};
constexpr i64 NsToTickDenominator{625};
i64 nsTime{util::GetTimeNs()};
i64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
channelCtx.asCtx->gmmu.Write<FourWordResult>(registers.semaphore->address,
FourWordResult{result, static_cast<u64>(timestamp)});
// Write timestamp first to ensure correct ordering
channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks());
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, result);
break;
}
}

View File

@ -53,6 +53,26 @@ namespace skyline::soc::gm20b::engine {
Logger::Debug("src: 0x{:X} dst: 0x{:X} size: 0x{:X}", u64{*registers.offsetIn}, u64{*registers.offsetOut}, *registers.lineLengthIn);
channelCtx.asCtx->gmmu.Copy(*registers.offsetOut, *registers.offsetIn, *registers.lineLengthIn);
}
ReleaseSemaphore();
}
void MaxwellDma::ReleaseSemaphore() {
if (registers.launchDma->reductionEnable)
Logger::Warn("Semaphore reduction is unimplemented!");
switch (registers.launchDma->semaphoreType) {
case Registers::LaunchDma::SemaphoreType::ReleaseOneWordSemaphore:
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, registers.semaphore->payload);
break;
case Registers::LaunchDma::SemaphoreType::ReleaseFourWordSemaphore:
// Write timestamp first to ensure correct ordering
channelCtx.asCtx->gmmu.Write(registers.semaphore->address + 8, GetGpuTimeTicks());
channelCtx.asCtx->gmmu.Write(registers.semaphore->address, static_cast<u64>(registers.semaphore->payload));
break;
default:
break;
}
}
void MaxwellDma::CopyPitchToBlockLinear() {

View File

@ -27,6 +27,8 @@ namespace skyline::soc::gm20b::engine {
void LaunchDma();
void ReleaseSemaphore();
void CopyPitchToBlockLinear();
void CopyBlockLinearToPitch();