From ffad246d67e443da80c8d174252bcdf40934a5d8 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Thu, 4 Aug 2022 23:46:44 +0100 Subject: [PATCH] Split NCE Trap page-out functionality from `TrapRegions` The `TrapRegions` function performed a page-out on any regions that were trapped as read-only, this wasn't optimal as it would tie them both into the same operation while Buffers/Textures require to protect then synchronize and page-out. The trap was being moved to after the synchronize to get around this limitation but that can cause a potential race due to certain writes being done after the synchronization but prior to the trap which would be lost. This commit fixes these issues by splitting paging out into `PageOutRegions` which can be called after `TrapRegions` by any API users. Co-authored-by: Billy Laws --- app/src/main/cpp/skyline/gpu/buffer.cpp | 9 ++++-- .../main/cpp/skyline/gpu/texture/texture.cpp | 12 +++++-- app/src/main/cpp/skyline/nce.cpp | 32 ++++++++++--------- app/src/main/cpp/skyline/nce.h | 8 ++++- 4 files changed, 40 insertions(+), 21 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index 2a21b38b..aafd7e35 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -100,14 +100,17 @@ namespace skyline::gpu { if (dirtyState == DirtyState::GpuDirty) return; - else if (dirtyState == DirtyState::CpuDirty) + + gpu.state.nce->TrapRegions(*trapHandle, false); // This has to occur prior to any synchronization as it'll skip trapping + + if (dirtyState == DirtyState::CpuDirty) SynchronizeHost(true); // Will transition the Buffer to Clean dirtyState = DirtyState::GpuDirty; + gpu.state.nce->PageOutRegions(*trapHandle); // All data can be paged out from the guest as the guest mirror won't be used + BlockAllCpuBackingWrites(); AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence - - gpu.state.nce->TrapRegions(*trapHandle, false); } void Buffer::WaitOnFence() { diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index dc971bf3..f8cf7bfa 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -691,14 +691,18 @@ namespace skyline::gpu { // If a texture is Clean then we can just transition it to being GPU dirty and retrap it dirtyState = DirtyState::GpuDirty; gpu.state.nce->TrapRegions(*trapHandle, false); + gpu.state.nce->PageOutRegions(*trapHandle); return; } else if (dirtyState != DirtyState::CpuDirty) { return; // If the texture has not been modified on the CPU, there is no need to synchronize it } dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean; + gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture } + // From this point on Clean -> CPU dirty state transitions can occur, GPU dirty -> * transitions will always require the full lock to be held and thus won't occur + auto stagingBuffer{SynchronizeHostImpl()}; if (stagingBuffer) { auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { @@ -709,7 +713,8 @@ namespace skyline::gpu { cycle = lCycle; } - gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture + if (gpuDirty) + gpu.state.nce->PageOutRegions(*trapHandle); // All data can be paged out from the guest as the guest mirror won't be used } void Texture::SynchronizeHostInline(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &pCycle, bool gpuDirty) { @@ -723,12 +728,14 @@ namespace skyline::gpu { if (gpuDirty && dirtyState == DirtyState::Clean) { dirtyState = DirtyState::GpuDirty; gpu.state.nce->TrapRegions(*trapHandle, false); + gpu.state.nce->PageOutRegions(*trapHandle); return; } else if (dirtyState != DirtyState::CpuDirty) { return; } dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean; + gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture } auto stagingBuffer{SynchronizeHostImpl()}; @@ -739,7 +746,8 @@ namespace skyline::gpu { cycle = pCycle; } - gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture + if (gpuDirty) + gpu.state.nce->PageOutRegions(*trapHandle); } void Texture::SynchronizeGuest(bool cpuDirty, bool skipTrap) { diff --git a/app/src/main/cpp/skyline/nce.cpp b/app/src/main/cpp/skyline/nce.cpp index 894cc35e..851333a6 100644 --- a/app/src/main/cpp/skyline/nce.cpp +++ b/app/src/main/cpp/skyline/nce.cpp @@ -455,17 +455,6 @@ namespace skyline::nce { reprotectIntervalsWithFunction([&](auto region) { return PROT_NONE; // No checks are needed as this is already the highest level of protection }); - - // Page out regions that are no longer accessible, these should be paged back in by a callback - TRACE_EVENT("host", "NCE::ReprotectIntervals::PageOut"); - for (auto region : intervals) { - auto freeStart{util::AlignUp(region.start, PAGE_SIZE)}, freeEnd{util::AlignDown(region.end, PAGE_SIZE)}; // We want to avoid the first and last page as they may contain data that won't be paged back in by the callback - ssize_t freeSize{freeEnd - freeStart}; - - constexpr ssize_t MinimumPageoutSize{PAGE_SIZE}; //!< The minimum size to page out, we don't want to page out small intervals for performance reasons - if (freeSize > MinimumPageoutSize) - state.process->memory.FreeMemory(span{freeStart, static_cast(freeSize)}); - } } } @@ -537,29 +526,42 @@ namespace skyline::nce { NCE::TrapHandle NCE::CreateTrap(span> regions, const LockCallback &lockCallback, const TrapCallback &readCallback, const TrapCallback &writeCallback) { TRACE_EVENT("host", "NCE::CreateTrap"); - std::scoped_lock lock(trapMutex); + std::scoped_lock lock{trapMutex}; TrapHandle handle{trapMap.Insert(regions, CallbackEntry{TrapProtection::None, lockCallback, readCallback, writeCallback})}; return handle; } void NCE::TrapRegions(TrapHandle handle, bool writeOnly) { TRACE_EVENT("host", "NCE::TrapRegions"); - std::scoped_lock lock(trapMutex); + std::scoped_lock lock{trapMutex}; auto protection{writeOnly ? TrapProtection::WriteOnly : TrapProtection::ReadWrite}; handle->value.protection = protection; ReprotectIntervals(handle->intervals, protection); } + void NCE::PageOutRegions(TrapHandle handle) { + TRACE_EVENT("host", "NCE::PageOutRegions"); + std::scoped_lock lock{trapMutex}; + for (auto region : handle->intervals) { + auto freeStart{util::AlignUp(region.start, PAGE_SIZE)}, freeEnd{util::AlignDown(region.end, PAGE_SIZE)}; // We want to avoid the first and last page as they may contain unrelated data + ssize_t freeSize{freeEnd - freeStart}; + + constexpr ssize_t MinimumPageoutSize{PAGE_SIZE}; //!< The minimum size to page out, we don't want to page out small intervals for performance reasons + if (freeSize > MinimumPageoutSize) + state.process->memory.FreeMemory(span{freeStart, static_cast(freeSize)}); + } + } + void NCE::RemoveTrap(TrapHandle handle) { TRACE_EVENT("host", "NCE::RemoveTrap"); - std::scoped_lock lock(trapMutex); + std::scoped_lock lock{trapMutex}; handle->value.protection = TrapProtection::None; ReprotectIntervals(handle->intervals, TrapProtection::None); } void NCE::DeleteTrap(TrapHandle handle) { TRACE_EVENT("host", "NCE::DeleteTrap"); - std::scoped_lock lock(trapMutex); + std::scoped_lock lock{trapMutex}; handle->value.protection = TrapProtection::None; ReprotectIntervals(handle->intervals, TrapProtection::None); trapMap.Remove(handle); diff --git a/app/src/main/cpp/skyline/nce.h b/app/src/main/cpp/skyline/nce.h index b3840b20..3429b9d0 100644 --- a/app/src/main/cpp/skyline/nce.h +++ b/app/src/main/cpp/skyline/nce.h @@ -115,10 +115,16 @@ namespace skyline::nce { /** * @brief Re-traps a region of memory after protections were removed * @param writeOnly If the trap is optimally for write-only accesses, this is not guarenteed - * @note Any regions trapped without writeOnly may have their data (except border pages) paged out and it needs to be paged back in inside the callbacks */ void TrapRegions(TrapHandle handle, bool writeOnly); + /** + * @brief Pages out the supplied trap region of memory (except border pages), any future accesses will return 0s + * @note This function is intended to be used after trapping reads to a region where the callback pages back in the data + * @note If the region is determined to be too small, this function will not do anything and is not meant to deterministically page out the region + */ + void PageOutRegions(TrapHandle handle); + /** * @brief Removes protections from a region of memory */