mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-06-02 10:58:45 +02:00
482 lines
20 KiB
C++
482 lines
20 KiB
C++
// SPDX-License-Identifier: MPL-2.0
|
|
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
|
|
|
#include <range/v3/view.hpp>
|
|
#include <common/settings.h>
|
|
#include <loader/loader.h>
|
|
#include <gpu.h>
|
|
#include <dlfcn.h>
|
|
#include "command_executor.h"
|
|
|
|
namespace skyline::gpu::interconnect {
|
|
CommandRecordThread::CommandRecordThread(const DeviceState &state)
|
|
: state{state},
|
|
incoming{*state.settings->executorSlotCount},
|
|
outgoing{*state.settings->executorSlotCount},
|
|
thread{&CommandRecordThread::Run, this} {}
|
|
|
|
static vk::raii::CommandBuffer AllocateRaiiCommandBuffer(GPU &gpu, vk::raii::CommandPool &pool) {
|
|
return {gpu.vkDevice, (*gpu.vkDevice).allocateCommandBuffers(
|
|
{
|
|
.commandPool = *pool,
|
|
.level = vk::CommandBufferLevel::ePrimary,
|
|
.commandBufferCount = 1
|
|
}, *gpu.vkDevice.getDispatcher()).front(),
|
|
*pool};
|
|
}
|
|
|
|
CommandRecordThread::Slot::Slot(GPU &gpu)
|
|
: commandPool{gpu.vkDevice,
|
|
vk::CommandPoolCreateInfo{
|
|
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer | vk::CommandPoolCreateFlagBits::eTransient,
|
|
.queueFamilyIndex = gpu.vkQueueFamilyIndex
|
|
}
|
|
},
|
|
commandBuffer{AllocateRaiiCommandBuffer(gpu, commandPool)},
|
|
fence{gpu.vkDevice, vk::FenceCreateInfo{ .flags = vk::FenceCreateFlagBits::eSignaled }},
|
|
semaphore{gpu.vkDevice, vk::SemaphoreCreateInfo{}},
|
|
cycle{std::make_shared<FenceCycle>(gpu.vkDevice, *fence, *semaphore, true)} {}
|
|
|
|
CommandRecordThread::Slot::Slot(Slot &&other)
|
|
: commandPool{std::move(other.commandPool)},
|
|
commandBuffer{std::move(other.commandBuffer)},
|
|
fence{std::move(other.fence)},
|
|
semaphore{std::move(other.semaphore)},
|
|
cycle{std::move(other.cycle)} {}
|
|
|
|
std::shared_ptr<FenceCycle> CommandRecordThread::Slot::Reset(GPU &gpu) {
|
|
cycle->Wait();
|
|
cycle = std::make_shared<FenceCycle>(*cycle);
|
|
// Command buffer doesn't need to be reset since that's done implicitly by begin
|
|
return cycle;
|
|
}
|
|
|
|
void CommandRecordThread::ProcessSlot(Slot *slot) {
|
|
TRACE_EVENT_FMT("gpu", "ProcessSlot: 0x{:X}, execution: {}", slot, slot->executionNumber);
|
|
auto &gpu{*state.gpu};
|
|
|
|
vk::RenderPass lRenderPass;
|
|
u32 subpassIndex;
|
|
|
|
std::scoped_lock bufferLock{gpu.buffer.recreationMutex};
|
|
using namespace node;
|
|
for (NodeVariant &node : slot->nodes) {
|
|
#define NODE(name) [&](name& node) { node(slot->commandBuffer, slot->cycle, gpu); }
|
|
std::visit(VariantVisitor{
|
|
NODE(FunctionNode),
|
|
|
|
[&](RenderPassNode &node) {
|
|
lRenderPass = node(slot->commandBuffer, slot->cycle, gpu);
|
|
subpassIndex = 0;
|
|
},
|
|
|
|
[&](NextSubpassNode &node) {
|
|
node(slot->commandBuffer, slot->cycle, gpu);
|
|
++subpassIndex;
|
|
},
|
|
[&](SubpassFunctionNode &node) { node(slot->commandBuffer, slot->cycle, gpu, lRenderPass, subpassIndex); },
|
|
[&](NextSubpassFunctionNode &node) { node(slot->commandBuffer, slot->cycle, gpu, lRenderPass, ++subpassIndex); },
|
|
|
|
NODE(RenderPassEndNode),
|
|
}, node);
|
|
#undef NODE
|
|
}
|
|
|
|
slot->commandBuffer.end();
|
|
|
|
gpu.scheduler.SubmitCommandBuffer(slot->commandBuffer, slot->cycle);
|
|
|
|
slot->nodes.clear();
|
|
slot->allocator.Reset();
|
|
}
|
|
|
|
void CommandRecordThread::Run() {
|
|
auto &gpu{*state.gpu};
|
|
|
|
RENDERDOC_API_1_4_2 *renderDocApi{};
|
|
if (void *mod{dlopen("libVkLayer_GLES_RenderDoc.so", RTLD_NOW | RTLD_NOLOAD)}) {
|
|
auto *pfnGetApi{reinterpret_cast<pRENDERDOC_GetAPI>(dlsym(mod, "RENDERDOC_GetAPI"))};
|
|
if (int ret{pfnGetApi(eRENDERDOC_API_Version_1_4_2, (void **)&renderDocApi)}; ret != 1)
|
|
Logger::Warn("Failed to intialise RenderDoc API: {}", ret);
|
|
}
|
|
|
|
std::vector<Slot> slots{};
|
|
std::generate_n(std::back_inserter(slots), *state.settings->executorSlotCount, [&] () -> Slot { return gpu; });
|
|
|
|
outgoing.AppendTranform(span<Slot>(slots), [](auto &slot) { return &slot; });
|
|
|
|
if (int result{pthread_setname_np(pthread_self(), "Sky-CmdRecord")})
|
|
Logger::Warn("Failed to set the thread name: {}", strerror(result));
|
|
|
|
try {
|
|
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
|
|
|
|
incoming.Process([this, renderDocApi, &gpu](Slot *slot) {
|
|
VkInstance instance{*gpu.vkInstance};
|
|
if (renderDocApi && slot->capture)
|
|
renderDocApi->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), nullptr);
|
|
|
|
ProcessSlot(slot);
|
|
|
|
if (renderDocApi && slot->capture)
|
|
renderDocApi->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), nullptr);
|
|
slot->capture = false;
|
|
|
|
outgoing.Push(slot);
|
|
}, [] {});
|
|
} catch (const signal::SignalException &e) {
|
|
Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
|
|
if (state.process)
|
|
state.process->Kill(false);
|
|
else
|
|
std::rethrow_exception(std::current_exception());
|
|
} catch (const std::exception &e) {
|
|
Logger::Error(e.what());
|
|
if (state.process)
|
|
state.process->Kill(false);
|
|
else
|
|
std::rethrow_exception(std::current_exception());
|
|
}
|
|
}
|
|
|
|
CommandRecordThread::Slot *CommandRecordThread::AcquireSlot() {
|
|
return outgoing.Pop();
|
|
}
|
|
|
|
void CommandRecordThread::ReleaseSlot(Slot *slot) {
|
|
incoming.Push(slot);
|
|
}
|
|
|
|
CommandExecutor::CommandExecutor(const DeviceState &state)
|
|
: state{state},
|
|
gpu{*state.gpu},
|
|
recordThread{state},
|
|
tag{AllocateTag()} {
|
|
RotateRecordSlot();
|
|
}
|
|
|
|
CommandExecutor::~CommandExecutor() {
|
|
cycle->Cancel();
|
|
}
|
|
|
|
void CommandExecutor::RotateRecordSlot() {
|
|
if (slot) {
|
|
slot->capture = captureNextExecution;
|
|
recordThread.ReleaseSlot(slot);
|
|
}
|
|
|
|
captureNextExecution = false;
|
|
slot = recordThread.AcquireSlot();
|
|
cycle = slot->Reset(gpu);
|
|
slot->executionNumber = executionNumber;
|
|
allocator = &slot->allocator;
|
|
}
|
|
|
|
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
|
auto addSubpass{[&] {
|
|
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu);
|
|
|
|
lastSubpassAttachments.clear();
|
|
auto insertAttachmentRange{[this](auto &attachments) -> std::pair<size_t, size_t> {
|
|
size_t beginIndex{lastSubpassAttachments.size()};
|
|
lastSubpassAttachments.insert(lastSubpassAttachments.end(), attachments.begin(), attachments.end());
|
|
return {beginIndex, attachments.size()};
|
|
}};
|
|
|
|
auto rangeToSpan{[this](auto &range) -> span<TextureView *> {
|
|
return {lastSubpassAttachments.data() + range.first, range.second};
|
|
}};
|
|
|
|
auto inputAttachmentRange{insertAttachmentRange(inputAttachments)};
|
|
auto colorAttachmentRange{insertAttachmentRange(colorAttachments)};
|
|
|
|
lastSubpassInputAttachments = rangeToSpan(inputAttachmentRange);
|
|
lastSubpassColorAttachments = rangeToSpan(colorAttachmentRange);
|
|
lastSubpassDepthStencilAttachment = depthStencilAttachment;
|
|
}};
|
|
|
|
bool attachmentsMatch{ranges::equal(lastSubpassInputAttachments, inputAttachments) &&
|
|
ranges::equal(lastSubpassColorAttachments, colorAttachments) &&
|
|
lastSubpassDepthStencilAttachment == depthStencilAttachment};
|
|
|
|
if (renderPass == nullptr || renderPass->renderArea != renderArea ||
|
|
((noSubpassCreation || subpassCount >= gpu.traits.quirks.maxSubpassCount) && !attachmentsMatch)) {
|
|
// We need to create a render pass if one doesn't already exist or the current one isn't compatible
|
|
if (renderPass != nullptr)
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
|
renderPass = &std::get<node::RenderPassNode>(slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea));
|
|
addSubpass();
|
|
subpassCount = 1;
|
|
return false;
|
|
} else {
|
|
if (attachmentsMatch) {
|
|
// The last subpass had the same attachments, so we can reuse them
|
|
return false;
|
|
} else {
|
|
// The last subpass had different attachments, so we need to create a new one
|
|
addSubpass();
|
|
subpassCount++;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
void CommandExecutor::FinishRenderPass() {
|
|
if (renderPass) {
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
|
|
|
renderPass = nullptr;
|
|
subpassCount = 0;
|
|
|
|
lastSubpassAttachments.clear();
|
|
lastSubpassInputAttachments = nullptr;
|
|
lastSubpassColorAttachments = nullptr;
|
|
lastSubpassDepthStencilAttachment = nullptr;
|
|
}
|
|
}
|
|
|
|
CommandExecutor::LockedTexture::LockedTexture(std::shared_ptr<Texture> texture) : texture{std::move(texture)} {}
|
|
|
|
constexpr CommandExecutor::LockedTexture::LockedTexture(CommandExecutor::LockedTexture &&other) : texture{std::exchange(other.texture, nullptr)} {}
|
|
|
|
constexpr Texture *CommandExecutor::LockedTexture::operator->() const {
|
|
return texture.get();
|
|
}
|
|
|
|
CommandExecutor::LockedTexture::~LockedTexture() {
|
|
if (texture)
|
|
texture->unlock();
|
|
}
|
|
|
|
bool CommandExecutor::AttachTexture(TextureView *view) {
|
|
bool didLock{view->LockWithTag(tag)};
|
|
if (didLock) {
|
|
if (view->texture->FrequentlyLocked())
|
|
attachedTextures.emplace_back(view->texture);
|
|
else
|
|
preserveAttachedTextures.emplace_back(view->texture);
|
|
}
|
|
|
|
return didLock;
|
|
}
|
|
|
|
CommandExecutor::LockedBuffer::LockedBuffer(std::shared_ptr<Buffer> buffer) : buffer{std::move(buffer)} {}
|
|
|
|
constexpr CommandExecutor::LockedBuffer::LockedBuffer(CommandExecutor::LockedBuffer &&other) : buffer{std::exchange(other.buffer, nullptr)} {}
|
|
|
|
constexpr Buffer *CommandExecutor::LockedBuffer::operator->() const {
|
|
return buffer.get();
|
|
}
|
|
|
|
CommandExecutor::LockedBuffer::~LockedBuffer() {
|
|
if (buffer)
|
|
buffer->unlock();
|
|
}
|
|
|
|
bool CommandExecutor::AttachBuffer(BufferView &view) {
|
|
bool didLock{view.LockWithTag(tag)};
|
|
if (didLock) {
|
|
if (view.GetBuffer()->FrequentlyLocked())
|
|
attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
|
else
|
|
preserveAttachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
|
}
|
|
return didLock;
|
|
}
|
|
|
|
void CommandExecutor::AttachLockedBufferView(BufferView &view, ContextLock<BufferView> &&lock) {
|
|
if (lock.OwnsLock()) {
|
|
// Transfer ownership to executor so that the resource will stay locked for the period it is used on the GPU
|
|
if (view.GetBuffer()->FrequentlyLocked())
|
|
attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
|
else
|
|
preserveAttachedBuffers.emplace_back(view.GetBuffer()->shared_from_this());
|
|
lock.Release(); // The executor will handle unlocking the lock so it doesn't need to be handled here
|
|
}
|
|
}
|
|
|
|
void CommandExecutor::AttachLockedBuffer(std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
|
if (lock.OwnsLock()) {
|
|
if (buffer->FrequentlyLocked())
|
|
attachedBuffers.emplace_back(std::move(buffer));
|
|
else
|
|
preserveAttachedBuffers.emplace_back(std::move(buffer));
|
|
lock.Release(); // See AttachLockedBufferView(...)
|
|
}
|
|
}
|
|
|
|
void CommandExecutor::AttachDependency(const std::shared_ptr<void> &dependency) {
|
|
cycle->AttachObject(dependency);
|
|
}
|
|
|
|
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) {
|
|
bool gotoNext{CreateRenderPassWithSubpass(renderArea, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation)};
|
|
if (gotoNext)
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
|
else
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
|
}
|
|
|
|
void CommandExecutor::AddOutsideRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) {
|
|
if (renderPass)
|
|
FinishRenderPass();
|
|
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
|
|
}
|
|
|
|
void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) {
|
|
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)};
|
|
if (renderPass->ClearColorAttachment(0, value, gpu)) {
|
|
if (gotoNext)
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
|
|
} else {
|
|
auto function{[scissor = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
|
|
commandBuffer.clearAttachments(vk::ClearAttachment{
|
|
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
|
.colorAttachment = 0,
|
|
.clearValue = value,
|
|
}, vk::ClearRect{
|
|
.rect = vk::Rect2D{.extent = scissor},
|
|
.baseArrayLayer = 0,
|
|
.layerCount = 1,
|
|
});
|
|
}};
|
|
|
|
if (gotoNext)
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
|
|
else
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
|
|
}
|
|
}
|
|
|
|
void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) {
|
|
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)};
|
|
if (renderPass->ClearDepthStencilAttachment(value, gpu)) {
|
|
if (gotoNext)
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
|
|
} else {
|
|
auto function{[aspect = attachment->format->vkAspect, extent = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
|
|
commandBuffer.clearAttachments(vk::ClearAttachment{
|
|
.aspectMask = aspect,
|
|
.clearValue = value,
|
|
}, vk::ClearRect{
|
|
.rect.extent = extent,
|
|
.baseArrayLayer = 0,
|
|
.layerCount = 1,
|
|
});
|
|
}};
|
|
|
|
if (gotoNext)
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
|
|
else
|
|
slot->nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
|
|
}
|
|
}
|
|
|
|
void CommandExecutor::AddFlushCallback(std::function<void()> &&callback) {
|
|
flushCallbacks.emplace_back(std::forward<decltype(callback)>(callback));
|
|
}
|
|
|
|
void CommandExecutor::AddPipelineChangeCallback(std::function<void()> &&callback) {
|
|
pipelineChangeCallbacks.emplace_back(std::forward<decltype(callback)>(callback));
|
|
}
|
|
|
|
void CommandExecutor::NotifyPipelineChange() {
|
|
for (auto &callback : pipelineChangeCallbacks)
|
|
callback();
|
|
}
|
|
|
|
void CommandExecutor::SubmitInternal() {
|
|
if (renderPass)
|
|
FinishRenderPass();
|
|
|
|
{
|
|
slot->commandBuffer.begin(vk::CommandBufferBeginInfo{
|
|
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
|
});
|
|
|
|
// We need this barrier here to ensure that resources are in the state we expect them to be in, we shouldn't overwrite resources while prior commands might still be using them or read from them while they might be modified by prior commands
|
|
slot->commandBuffer.pipelineBarrier(
|
|
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
|
|
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
|
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
|
}, {}, {}
|
|
);
|
|
|
|
boost::container::small_vector<FenceCycle *, 8> chainedCycles;
|
|
for (const auto &texture : ranges::views::concat(attachedTextures, preserveAttachedTextures)) {
|
|
texture->SynchronizeHostInline(slot->commandBuffer, cycle, true);
|
|
// We don't need to attach the Texture to the cycle as a TextureView will already be attached
|
|
if (ranges::find(chainedCycles, texture->cycle.get()) == chainedCycles.end()) {
|
|
cycle->ChainCycle(texture->cycle);
|
|
chainedCycles.emplace_back(texture->cycle.get());
|
|
}
|
|
|
|
texture->cycle = cycle;
|
|
}
|
|
}
|
|
|
|
for (const auto &attachedBuffer : ranges::views::concat(attachedBuffers, preserveAttachedBuffers)) {
|
|
if (attachedBuffer->RequiresCycleAttach()) {
|
|
attachedBuffer->SynchronizeHost(); // Synchronize attached buffers from the CPU without using a staging buffer
|
|
cycle->AttachObject(attachedBuffer.buffer);
|
|
attachedBuffer->UpdateCycle(cycle);
|
|
attachedBuffer->AllowAllBackingWrites();
|
|
}
|
|
}
|
|
|
|
RotateRecordSlot();
|
|
}
|
|
|
|
void CommandExecutor::ResetInternal() {
|
|
attachedTextures.clear();
|
|
attachedBuffers.clear();
|
|
allocator->Reset();
|
|
|
|
// Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever
|
|
if ((submissionNumber % (*state.settings->executorSlotCount * 2)) == 0) {
|
|
preserveAttachedBuffers.clear();
|
|
preserveAttachedTextures.clear();
|
|
}
|
|
}
|
|
|
|
void CommandExecutor::Submit() {
|
|
for (const auto &callback : flushCallbacks)
|
|
callback();
|
|
|
|
executionNumber++;
|
|
|
|
if (!slot->nodes.empty()) {
|
|
TRACE_EVENT("gpu", "CommandExecutor::Submit");
|
|
SubmitInternal();
|
|
submissionNumber++;
|
|
}
|
|
|
|
ResetInternal();
|
|
}
|
|
|
|
void CommandExecutor::LockPreserve() {
|
|
if (!preserveLocked) {
|
|
preserveLocked = true;
|
|
|
|
for (auto &buffer : preserveAttachedBuffers)
|
|
buffer->LockWithTag(tag);
|
|
|
|
for (auto &texture : preserveAttachedTextures)
|
|
texture->LockWithTag(tag);
|
|
}
|
|
}
|
|
|
|
void CommandExecutor::UnlockPreserve() {
|
|
if (preserveLocked) {
|
|
for (auto &buffer : preserveAttachedBuffers)
|
|
buffer->unlock();
|
|
|
|
for (auto &texture : preserveAttachedTextures)
|
|
texture->unlock();
|
|
|
|
preserveLocked = false;
|
|
}
|
|
}
|
|
}
|