mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-22 20:39:20 +01:00
Allow for tying guest GPU sync operations to host GPU sync
This is necessary for the upcoming direct buffer support, as in order to use guest buffers directly without trapping we need to recreate any guest GPU sync on the host GPU. This avoids the guest thinking work is done that isn't and overwriting in-use buffer contents.
This commit is contained in:
parent
89c6fab1cb
commit
b3f7e990cc
@ -192,6 +192,42 @@ namespace skyline::gpu::interconnect {
|
|||||||
incoming.Push(slot);
|
incoming.Push(slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ExecutionWaiterThread::Run() {
|
||||||
|
signal::SetSignalHandler({SIGSEGV}, nce::NCE::HostSignalHandler); // We may access NCE trapped memory
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
std::pair<std::shared_ptr<FenceCycle>, std::function<void()>> item{};
|
||||||
|
{
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
idle = true;
|
||||||
|
condition.wait(lock, [this] { return !pendingSignalQueue.empty(); });
|
||||||
|
idle = false;
|
||||||
|
item = std::move(pendingSignalQueue.front());
|
||||||
|
pendingSignalQueue.pop();
|
||||||
|
}
|
||||||
|
{
|
||||||
|
TRACE_EVENT("gpu", "GPU");
|
||||||
|
if (item.first)
|
||||||
|
item.first->Wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (item.second)
|
||||||
|
item.second();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ExecutionWaiterThread::ExecutionWaiterThread() : thread{&ExecutionWaiterThread::Run, this} {}
|
||||||
|
|
||||||
|
bool ExecutionWaiterThread::IsIdle() const {
|
||||||
|
return idle;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ExecutionWaiterThread::Queue(std::shared_ptr<FenceCycle> cycle, std::function<void()> &&callback) {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
pendingSignalQueue.push({std::move(cycle), std::move(callback)});
|
||||||
|
condition.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
CommandExecutor::CommandExecutor(const DeviceState &state)
|
CommandExecutor::CommandExecutor(const DeviceState &state)
|
||||||
: state{state},
|
: state{state},
|
||||||
gpu{*state.gpu},
|
gpu{*state.gpu},
|
||||||
@ -501,18 +537,31 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandExecutor::Submit() {
|
void CommandExecutor::Submit(std::function<void()> &&callback) {
|
||||||
for (const auto &callback : flushCallbacks)
|
for (const auto &flushCallback : flushCallbacks)
|
||||||
callback();
|
flushCallback();
|
||||||
|
|
||||||
executionTag = AllocateTag();
|
executionTag = AllocateTag();
|
||||||
|
|
||||||
if (!slot->nodes.empty()) {
|
if (!slot->nodes.empty()) {
|
||||||
TRACE_EVENT("gpu", "CommandExecutor::Submit");
|
TRACE_EVENT("gpu", "CommandExecutor::Submit");
|
||||||
|
|
||||||
|
if (callback && *state.settings->useDirectMemoryImport)
|
||||||
|
waiterThread.Queue(cycle, std::move(callback));
|
||||||
|
else
|
||||||
|
waiterThread.Queue(cycle, {});
|
||||||
|
|
||||||
SubmitInternal();
|
SubmitInternal();
|
||||||
submissionNumber++;
|
submissionNumber++;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if (callback && *state.settings->useDirectMemoryImport)
|
||||||
|
waiterThread.Queue(nullptr, std::move(callback));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (callback && !*state.settings->useDirectMemoryImport)
|
||||||
|
callback();
|
||||||
|
|
||||||
ResetInternal();
|
ResetInternal();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,6 +92,30 @@ namespace skyline::gpu::interconnect {
|
|||||||
void ReleaseSlot(Slot *slot);
|
void ReleaseSlot(Slot *slot);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Thread responsible for notifying the guest of the completion of GPU operations
|
||||||
|
*/
|
||||||
|
class ExecutionWaiterThread {
|
||||||
|
private:
|
||||||
|
std::thread thread;
|
||||||
|
std::mutex mutex;
|
||||||
|
std::condition_variable condition;
|
||||||
|
std::queue<std::pair<std::shared_ptr<FenceCycle>, std::function<void()>>> pendingSignalQueue; //!< Queue of callbacks to be executed when their coressponding fence is signalled
|
||||||
|
std::atomic<bool> idle{};
|
||||||
|
|
||||||
|
void Run();
|
||||||
|
|
||||||
|
public:
|
||||||
|
ExecutionWaiterThread();
|
||||||
|
|
||||||
|
bool IsIdle() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Queues `callback` to be executed when `cycle` is signalled, null values are valid for either, will null cycle representing an immediate callback (dep on previously queued cycles) and null callback representing a wait with no callback
|
||||||
|
*/
|
||||||
|
void Queue(std::shared_ptr<FenceCycle> cycle, std::function<void()> &&callback);
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Assembles a Vulkan command stream with various nodes and manages execution of the produced graph
|
* @brief Assembles a Vulkan command stream with various nodes and manages execution of the produced graph
|
||||||
* @note This class is **NOT** thread-safe and should **ONLY** be utilized by a single thread
|
* @note This class is **NOT** thread-safe and should **ONLY** be utilized by a single thread
|
||||||
@ -102,6 +126,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
GPU &gpu;
|
GPU &gpu;
|
||||||
CommandRecordThread recordThread;
|
CommandRecordThread recordThread;
|
||||||
CommandRecordThread::Slot *slot{};
|
CommandRecordThread::Slot *slot{};
|
||||||
|
ExecutionWaiterThread waiterThread;
|
||||||
node::RenderPassNode *renderPass{};
|
node::RenderPassNode *renderPass{};
|
||||||
size_t subpassCount{}; //!< The number of subpasses in the current render pass
|
size_t subpassCount{}; //!< The number of subpasses in the current render pass
|
||||||
u32 renderPassIndex{};
|
u32 renderPassIndex{};
|
||||||
@ -274,8 +299,9 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
|
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
|
||||||
|
* @param callback A function to call upon GPU completion of the submission
|
||||||
*/
|
*/
|
||||||
void Submit();
|
void Submit(std::function<void()> &&callback = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Locks all preserve attached buffers/textures
|
* @brief Locks all preserve attached buffers/textures
|
||||||
|
@ -19,8 +19,9 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
ENGINE_STRUCT_CASE(syncpoint, action, {
|
ENGINE_STRUCT_CASE(syncpoint, action, {
|
||||||
if (action.operation == Registers::Syncpoint::Operation::Incr) {
|
if (action.operation == Registers::Syncpoint::Operation::Incr) {
|
||||||
Logger::Debug("Increment syncpoint: {}", +action.index);
|
Logger::Debug("Increment syncpoint: {}", +action.index);
|
||||||
channelCtx.executor.Submit();
|
channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = action.index]() {
|
||||||
syncpoints.at(action.index).Increment();
|
syncpoints->at(index).Increment();
|
||||||
|
});
|
||||||
} else if (action.operation == Registers::Syncpoint::Operation::Wait) {
|
} else if (action.operation == Registers::Syncpoint::Operation::Wait) {
|
||||||
Logger::Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint->payload);
|
Logger::Debug("Wait syncpoint: {}, thresh: {}", +action.index, registers.syncpoint->payload);
|
||||||
|
|
||||||
@ -36,12 +37,6 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
ENGINE_STRUCT_CASE(semaphore, action, {
|
ENGINE_STRUCT_CASE(semaphore, action, {
|
||||||
u64 address{registers.semaphore->address};
|
u64 address{registers.semaphore->address};
|
||||||
|
|
||||||
// Write timestamp first to ensure ordering
|
|
||||||
if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) {
|
|
||||||
channelCtx.asCtx->gmmu.Write<u32>(address + 4, 0);
|
|
||||||
channelCtx.asCtx->gmmu.Write(address + 8, GetGpuTimeTicks());
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (action.operation) {
|
switch (action.operation) {
|
||||||
case Registers::Semaphore::Operation::Acquire:
|
case Registers::Semaphore::Operation::Acquire:
|
||||||
Logger::Debug("Acquire semaphore: 0x{:X} payload: {}", address, registers.semaphore->payload);
|
Logger::Debug("Acquire semaphore: 0x{:X} payload: {}", address, registers.semaphore->payload);
|
||||||
@ -54,7 +49,16 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
channelCtx.Lock();
|
channelCtx.Lock();
|
||||||
break;
|
break;
|
||||||
case Registers::Semaphore::Operation::Release:
|
case Registers::Semaphore::Operation::Release:
|
||||||
channelCtx.asCtx->gmmu.Write(address, registers.semaphore->payload);
|
channelCtx.executor.Submit([this, action, address, payload = registers.semaphore->payload] () {
|
||||||
|
// Write timestamp first to ensure ordering
|
||||||
|
if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) {
|
||||||
|
channelCtx.asCtx->gmmu.Write<u32>(address + 4, 0);
|
||||||
|
channelCtx.asCtx->gmmu.Write(address + 8, GetGpuTimeTicks());
|
||||||
|
}
|
||||||
|
|
||||||
|
channelCtx.asCtx->gmmu.Write(address, payload);
|
||||||
|
});
|
||||||
|
|
||||||
Logger::Debug("SemaphoreRelease: address: 0x{:X} payload: {}", address, registers.semaphore->payload);
|
Logger::Debug("SemaphoreRelease: address: 0x{:X} payload: {}", address, registers.semaphore->payload);
|
||||||
break;
|
break;
|
||||||
case Registers::Semaphore::Operation::AcqGeq :
|
case Registers::Semaphore::Operation::AcqGeq :
|
||||||
|
@ -218,8 +218,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
|
|
||||||
ENGINE_CASE(syncpointAction, {
|
ENGINE_CASE(syncpointAction, {
|
||||||
Logger::Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
|
Logger::Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
|
||||||
channelCtx.executor.Submit();
|
channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = syncpointAction.id]() {
|
||||||
syncpoints.at(syncpointAction.id).Increment();
|
syncpoints->at(index).Increment();
|
||||||
|
});
|
||||||
})
|
})
|
||||||
|
|
||||||
ENGINE_CASE(clearSurface, {
|
ENGINE_CASE(clearSurface, {
|
||||||
@ -338,14 +339,17 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
|
|
||||||
switch (info.op) {
|
switch (info.op) {
|
||||||
case type::SemaphoreInfo::Op::Release:
|
case type::SemaphoreInfo::Op::Release:
|
||||||
channelCtx.executor.Submit();
|
channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() {
|
||||||
WriteSemaphoreResult(registers.semaphore->payload);
|
WriteSemaphoreResult(semaphore, semaphore.payload);
|
||||||
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case type::SemaphoreInfo::Op::Counter: {
|
case type::SemaphoreInfo::Op::Counter: {
|
||||||
switch (info.counterType) {
|
switch (info.counterType) {
|
||||||
case type::SemaphoreInfo::CounterType::Zero:
|
case type::SemaphoreInfo::CounterType::Zero:
|
||||||
WriteSemaphoreResult(registers.semaphore->payload);
|
channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() {
|
||||||
|
WriteSemaphoreResult(semaphore, semaphore.payload);
|
||||||
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -390,21 +394,19 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::WriteSemaphoreResult(u64 result) {
|
void Maxwell3D::WriteSemaphoreResult(const Registers::Semaphore &semaphore, u64 result) {
|
||||||
u64 address{registers.semaphore->address};
|
switch (semaphore.info.structureSize) {
|
||||||
|
|
||||||
switch (registers.semaphore->info.structureSize) {
|
|
||||||
case type::SemaphoreInfo::StructureSize::OneWord:
|
case type::SemaphoreInfo::StructureSize::OneWord:
|
||||||
channelCtx.asCtx->gmmu.Write(address, static_cast<u32>(result));
|
channelCtx.asCtx->gmmu.Write(semaphore.address, static_cast<u32>(result));
|
||||||
Logger::Debug("address: 0x{:X} payload: {}", address, result);
|
Logger::Debug("address: 0x{:X} payload: {}", semaphore.address, result);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case type::SemaphoreInfo::StructureSize::FourWords: {
|
case type::SemaphoreInfo::StructureSize::FourWords: {
|
||||||
// Write timestamp first to ensure correct ordering
|
// Write timestamp first to ensure correct ordering
|
||||||
u64 timestamp{GetGpuTimeTicks()};
|
u64 timestamp{GetGpuTimeTicks()};
|
||||||
channelCtx.asCtx->gmmu.Write(address + 8, timestamp);
|
channelCtx.asCtx->gmmu.Write(semaphore.address + 8, timestamp);
|
||||||
channelCtx.asCtx->gmmu.Write(address, result);
|
channelCtx.asCtx->gmmu.Write(semaphore.address, result);
|
||||||
Logger::Debug("address: 0x{:X} payload: {} timestamp: {}", address, result, timestamp);
|
Logger::Debug("address: 0x{:X} payload: {} timestamp: {}", semaphore.address, result, timestamp);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -78,12 +78,6 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
*/
|
*/
|
||||||
void HandleMethod(u32 method, u32 argument);
|
void HandleMethod(u32 method, u32 argument);
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Writes back a semaphore result to the guest with an auto-generated timestamp (if required)
|
|
||||||
* @note If the semaphore is OneWord then the result will be downcasted to a 32-bit unsigned integer
|
|
||||||
*/
|
|
||||||
void WriteSemaphoreResult(u64 result);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def
|
* @url https://github.com/devkitPro/deko3d/blob/master/source/maxwell/engine_3d.def
|
||||||
@ -421,6 +415,14 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
|||||||
static_assert(sizeof(Registers) == (EngineMethodsEnd * sizeof(u32)));
|
static_assert(sizeof(Registers) == (EngineMethodsEnd * sizeof(u32)));
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* @brief Writes back a semaphore result to the guest with an auto-generated timestamp (if required)
|
||||||
|
* @note If the semaphore is OneWord then the result will be downcasted to a 32-bit unsigned integer
|
||||||
|
*/
|
||||||
|
void WriteSemaphoreResult(const Registers::Semaphore &semaphore, u64 result);
|
||||||
|
|
||||||
|
public:
|
||||||
Registers registers{};
|
Registers registers{};
|
||||||
Registers shadowRegisters{}; //!< A shadow-copy of the registers, their function is controlled by the 'shadowRamControl' register
|
Registers shadowRegisters{}; //!< A shadow-copy of the registers, their function is controlled by the 'shadowRamControl' register
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user