diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 3511e801..abd1774a 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -131,7 +131,7 @@ namespace skyline::gpu::interconnect { }); for (auto texture : syncTextures) - texture->SynchronizeHostWithBuffer(commandBuffer, cycle); + texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true); for (auto buffer : syncBuffers) buffer->SynchronizeHostWithCycle(cycle); @@ -162,9 +162,6 @@ namespace skyline::gpu::interconnect { #undef NODE } - for (auto texture : syncTextures) - texture->SynchronizeGuestWithBuffer(commandBuffer, cycle); - for (auto buffer : syncBuffers) buffer->SynchronizeGuestWithCycle(cycle); diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index 5a73b782..151578e8 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -115,6 +115,17 @@ namespace skyline::gpu { alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings); mirror = alignedMirror.subspan(static_cast(frontMapping.data() - alignedData), totalSize); } + + trapHandle = gpu.state.nce->TrapRegions(mappings, true, [this] { + std::lock_guard lock(*this); + SynchronizeGuest(true); // We can skip trapping since the caller will do it + WaitOnFence(); + }, [this] { + std::lock_guard lock(*this); + SynchronizeGuest(true); + dirtyState = DirtyState::CpuDirty; // We need to assume the texture is dirty since we don't know what the guest is writing + WaitOnFence(); + }); } std::shared_ptr Texture::SynchronizeHostImpl(const std::shared_ptr &pCycle) { @@ -266,22 +277,6 @@ namespace skyline::gpu { texture->CopyToGuest(stagingBuffer ? stagingBuffer->data() : std::get(texture->backing).data()); } - Texture::Texture(GPU &gpu, BackingType &&backing, GuestTexture guest, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) - : gpu(gpu), - backing(std::move(backing)), - layout(layout), - guest(std::move(guest)), - dimensions(dimensions), - format(format), - tiling(tiling), - mipLevels(mipLevels), - layerCount(layerCount), - sampleCount(sampleCount) { - SetupGuestMappings(); - if (GetBacking()) - SynchronizeHost(); - } - Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) : gpu(gpu), backing(std::move(backing)), @@ -324,45 +319,26 @@ namespace skyline::gpu { .initialLayout = layout, }; backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo); - TransitionLayout(vk::ImageLayout::eGeneral); - SetupGuestMappings(); - } - Texture::Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) - : gpu(gpu), - dimensions(dimensions), - format(format), - layout(initialLayout == vk::ImageLayout::ePreinitialized ? vk::ImageLayout::ePreinitialized : vk::ImageLayout::eUndefined), - tiling(vk::ImageTiling::eOptimal), // Same as above - mipLevels(mipLevels), - layerCount(layerCount), - sampleCount(sampleCount) { - vk::ImageCreateInfo imageCreateInfo{ - .imageType = dimensions.GetType(), - .format = *format, - .extent = dimensions, - .mipLevels = mipLevels, - .arrayLayers = layerCount, - .samples = sampleCount, - .tiling = tiling, - .usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, - .sharingMode = vk::SharingMode::eExclusive, - .queueFamilyIndexCount = 1, - .pQueueFamilyIndices = &gpu.vkQueueFamilyIndex, - .initialLayout = layout, - }; - backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo); - if (initialLayout != layout) - TransitionLayout(initialLayout); + SetupGuestMappings(); } Texture::~Texture() { std::lock_guard lock(*this); + if (trapHandle) + gpu.state.nce->DeleteTrap(*trapHandle); SynchronizeGuest(true); if (alignedMirror.valid()) munmap(alignedMirror.data(), alignedMirror.size()); } + void Texture::MarkGpuDirty() { + if (dirtyState == DirtyState::GpuDirty) + return; + gpu.state.nce->RetrapRegions(*trapHandle, false); + dirtyState = DirtyState::GpuDirty; + } + bool Texture::WaitOnBacking() { TRACE_EVENT("gpu", "Texture::WaitOnBacking"); @@ -420,7 +396,10 @@ namespace skyline::gpu { }); } - void Texture::SynchronizeHost() { + void Texture::SynchronizeHost(bool rwTrap) { + if (dirtyState != DirtyState::CpuDirty) + return; // If the texture has not been modified on the CPU, there is no need to synchronize it + TRACE_EVENT("gpu", "Texture::SynchronizeHost"); auto stagingBuffer{SynchronizeHostImpl(nullptr)}; @@ -431,9 +410,20 @@ namespace skyline::gpu { lCycle->AttachObjects(stagingBuffer, shared_from_this()); cycle = lCycle; } + + if (rwTrap) { + gpu.state.nce->RetrapRegions(*trapHandle, false); + dirtyState = DirtyState::GpuDirty; + } else { + gpu.state.nce->RetrapRegions(*trapHandle, true); // Trap any future CPU writes to this texture + dirtyState = DirtyState::Clean; + } } - void Texture::SynchronizeHostWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &pCycle) { + void Texture::SynchronizeHostWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &pCycle, bool rwTrap) { + if (dirtyState != DirtyState::CpuDirty) + return; + TRACE_EVENT("gpu", "Texture::SynchronizeHostWithBuffer"); auto stagingBuffer{SynchronizeHostImpl(pCycle)}; @@ -442,19 +432,28 @@ namespace skyline::gpu { pCycle->AttachObjects(stagingBuffer, shared_from_this()); cycle = pCycle; } + + if (rwTrap) { + gpu.state.nce->RetrapRegions(*trapHandle, false); + dirtyState = DirtyState::GpuDirty; + } else { + gpu.state.nce->RetrapRegions(*trapHandle, true); // Trap any future CPU writes to this texture + dirtyState = DirtyState::Clean; + } } - void Texture::SynchronizeGuest() { - if (!guest) + void Texture::SynchronizeGuest(bool skipTrap) { + if (dirtyState != DirtyState::GpuDirty || layout == vk::ImageLayout::eUndefined) { + // We can skip syncing in two cases: + // * If the texture has not been used on the GPU, there is no need to synchronize it + // * If the state of the host texture is undefined then so can the guest + return; + } else if (!guest) { throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); - else if (layout == vk::ImageLayout::eUndefined) - return; // If the state of the host texture is undefined then so can the guest + } TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); - if (layout == vk::ImageLayout::eUndefined) - return; // We don't need to synchronize the image if it is in an undefined state on the host - WaitOnBacking(); WaitOnFence(); @@ -473,9 +472,16 @@ namespace skyline::gpu { } else { throw exception("Host -> Guest synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling)); } + + if (!skipTrap) + gpu.state.nce->RetrapRegions(*trapHandle, true); + dirtyState = DirtyState::Clean; } void Texture::SynchronizeGuestWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &pCycle) { + if (dirtyState != DirtyState::GpuDirty) + return; + if (!guest) throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); else if (layout == vk::ImageLayout::eUndefined) @@ -483,9 +489,6 @@ namespace skyline::gpu { TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer"); - if (layout == vk::ImageLayout::eUndefined) - return; - WaitOnBacking(); if (cycle.lock() != pCycle) WaitOnFence(); @@ -504,6 +507,8 @@ namespace skyline::gpu { } else { throw exception("Host -> Guest synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling)); } + + dirtyState = DirtyState::Clean; } std::shared_ptr Texture::GetView(vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format pFormat, vk::ComponentMapping mapping) { diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 0155292c..7e623408 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -3,6 +3,7 @@ #pragma once +#include #include namespace skyline::gpu { @@ -321,6 +322,13 @@ namespace skyline::gpu { span mirror{}; //!< A contiguous mirror of all the guest mappings to allow linear access on the CPU span alignedMirror{}; //!< The mirror mapping aligned to page size to reflect the full mapping + std::optional trapHandle{}; //!< The handle of the traps for the guest mappings + enum class DirtyState { + Clean, //!< The CPU mappings are in sync with the GPU texture + CpuDirty, //!< The CPU mappings have been modified but the GPU texture is not up to date + GpuDirty, //!< The GPU texture has been modified but the CPU mappings have not been updated + } dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU texture + std::vector> views; //!< TextureView(s) that are backed by this Texture, used for repointing to a new Texture on deletion friend TextureManager; @@ -377,17 +385,16 @@ namespace skyline::gpu { u32 layerCount; //!< The amount of array layers in the image, utilized for efficient binding (Not to be confused with the depth or faces in a cubemap) vk::SampleCountFlagBits sampleCount; - Texture(GPU &gpu, BackingType &&backing, GuestTexture guest, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1); - + /** + * @brief Creates a texture object wrapping the supplied backing with the supplied attributes + * @param layout The initial layout of the texture, it **must** be eUndefined or ePreinitialized + */ Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1); - Texture(GPU &gpu, GuestTexture guest); - /** - * @brief Creates and allocates memory for the backing to creates a texture object wrapping it - * @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory + * @brief Creates a texture object wrapping the guest texture with a backing that can represent the guest texture data */ - Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1); + Texture(GPU &gpu, GuestTexture guest); ~Texture(); @@ -426,6 +433,13 @@ namespace skyline::gpu { return mutex.try_lock(); } + /** + * @brief Marks the texture as dirty on the GPU, it will be synced on the next call to SynchronizeGuest + * @note This **must** be called after syncing the texture to the GPU not before + * @note The texture **must** be locked prior to calling this + */ + void MarkGpuDirty(); + /** * @brief Waits on the texture backing to be a valid non-null Vulkan image * @return If the mutex could be unlocked during the function @@ -458,25 +472,28 @@ namespace skyline::gpu { /** * @brief Synchronizes the host texture with the guest after it has been modified + * @param rwTrap If true, the guest buffer will be read/write trapped rather than only being write trapped which is more efficient than calling MarkGpuDirty directly after * @note The texture **must** be locked prior to calling this * @note The guest texture backing should exist prior to calling this */ - void SynchronizeHost(); + void SynchronizeHost(bool rwTrap = false); /** * @brief Same as SynchronizeHost but this records any commands into the supplied command buffer rather than creating one as necessary + * @param rwTrap If true, the guest buffer will be read/write trapped rather than only being write trapped which is more efficient than calling MarkGpuDirty directly after * @note It is more efficient to call SynchronizeHost than allocating a command buffer purely for this function as it may conditionally not record any commands * @note The texture **must** be locked prior to calling this * @note The guest texture backing should exist prior to calling this */ - void SynchronizeHostWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle); + void SynchronizeHostWithBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, bool rwTrap = false); /** * @brief Synchronizes the guest texture with the host texture after it has been modified + * @param skipTrap If true, setting up a CPU trap will be skipped and the dirty state will be Clean/CpuDirty * @note The texture **must** be locked prior to calling this * @note The guest texture should not be null prior to calling this */ - void SynchronizeGuest(); + void SynchronizeGuest(bool skipTrap = false); /** * @brief Synchronizes the guest texture with the host texture after it has been modified