mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-23 14:11:11 +01:00
Split NCE Trap page-out functionality from TrapRegions
The `TrapRegions` function performed a page-out on any regions that were trapped as read-only, this wasn't optimal as it would tie them both into the same operation while Buffers/Textures require to protect then synchronize and page-out. The trap was being moved to after the synchronize to get around this limitation but that can cause a potential race due to certain writes being done after the synchronization but prior to the trap which would be lost. This commit fixes these issues by splitting paging out into `PageOutRegions` which can be called after `TrapRegions` by any API users. Co-authored-by: Billy Laws <blaws05@gmail.com>
This commit is contained in:
parent
da464d84bc
commit
ffad246d67
@ -100,14 +100,17 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
if (dirtyState == DirtyState::GpuDirty)
|
if (dirtyState == DirtyState::GpuDirty)
|
||||||
return;
|
return;
|
||||||
else if (dirtyState == DirtyState::CpuDirty)
|
|
||||||
|
gpu.state.nce->TrapRegions(*trapHandle, false); // This has to occur prior to any synchronization as it'll skip trapping
|
||||||
|
|
||||||
|
if (dirtyState == DirtyState::CpuDirty)
|
||||||
SynchronizeHost(true); // Will transition the Buffer to Clean
|
SynchronizeHost(true); // Will transition the Buffer to Clean
|
||||||
|
|
||||||
dirtyState = DirtyState::GpuDirty;
|
dirtyState = DirtyState::GpuDirty;
|
||||||
|
gpu.state.nce->PageOutRegions(*trapHandle); // All data can be paged out from the guest as the guest mirror won't be used
|
||||||
|
|
||||||
BlockAllCpuBackingWrites();
|
BlockAllCpuBackingWrites();
|
||||||
AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence
|
AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence
|
||||||
|
|
||||||
gpu.state.nce->TrapRegions(*trapHandle, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::WaitOnFence() {
|
void Buffer::WaitOnFence() {
|
||||||
|
@ -691,14 +691,18 @@ namespace skyline::gpu {
|
|||||||
// If a texture is Clean then we can just transition it to being GPU dirty and retrap it
|
// If a texture is Clean then we can just transition it to being GPU dirty and retrap it
|
||||||
dirtyState = DirtyState::GpuDirty;
|
dirtyState = DirtyState::GpuDirty;
|
||||||
gpu.state.nce->TrapRegions(*trapHandle, false);
|
gpu.state.nce->TrapRegions(*trapHandle, false);
|
||||||
|
gpu.state.nce->PageOutRegions(*trapHandle);
|
||||||
return;
|
return;
|
||||||
} else if (dirtyState != DirtyState::CpuDirty) {
|
} else if (dirtyState != DirtyState::CpuDirty) {
|
||||||
return; // If the texture has not been modified on the CPU, there is no need to synchronize it
|
return; // If the texture has not been modified on the CPU, there is no need to synchronize it
|
||||||
}
|
}
|
||||||
|
|
||||||
dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
|
dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
|
||||||
|
gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From this point on Clean -> CPU dirty state transitions can occur, GPU dirty -> * transitions will always require the full lock to be held and thus won't occur
|
||||||
|
|
||||||
auto stagingBuffer{SynchronizeHostImpl()};
|
auto stagingBuffer{SynchronizeHostImpl()};
|
||||||
if (stagingBuffer) {
|
if (stagingBuffer) {
|
||||||
auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
|
auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
|
||||||
@ -709,7 +713,8 @@ namespace skyline::gpu {
|
|||||||
cycle = lCycle;
|
cycle = lCycle;
|
||||||
}
|
}
|
||||||
|
|
||||||
gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
|
if (gpuDirty)
|
||||||
|
gpu.state.nce->PageOutRegions(*trapHandle); // All data can be paged out from the guest as the guest mirror won't be used
|
||||||
}
|
}
|
||||||
|
|
||||||
void Texture::SynchronizeHostInline(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &pCycle, bool gpuDirty) {
|
void Texture::SynchronizeHostInline(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &pCycle, bool gpuDirty) {
|
||||||
@ -723,12 +728,14 @@ namespace skyline::gpu {
|
|||||||
if (gpuDirty && dirtyState == DirtyState::Clean) {
|
if (gpuDirty && dirtyState == DirtyState::Clean) {
|
||||||
dirtyState = DirtyState::GpuDirty;
|
dirtyState = DirtyState::GpuDirty;
|
||||||
gpu.state.nce->TrapRegions(*trapHandle, false);
|
gpu.state.nce->TrapRegions(*trapHandle, false);
|
||||||
|
gpu.state.nce->PageOutRegions(*trapHandle);
|
||||||
return;
|
return;
|
||||||
} else if (dirtyState != DirtyState::CpuDirty) {
|
} else if (dirtyState != DirtyState::CpuDirty) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
|
dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
|
||||||
|
gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
|
||||||
}
|
}
|
||||||
|
|
||||||
auto stagingBuffer{SynchronizeHostImpl()};
|
auto stagingBuffer{SynchronizeHostImpl()};
|
||||||
@ -739,7 +746,8 @@ namespace skyline::gpu {
|
|||||||
cycle = pCycle;
|
cycle = pCycle;
|
||||||
}
|
}
|
||||||
|
|
||||||
gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
|
if (gpuDirty)
|
||||||
|
gpu.state.nce->PageOutRegions(*trapHandle);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Texture::SynchronizeGuest(bool cpuDirty, bool skipTrap) {
|
void Texture::SynchronizeGuest(bool cpuDirty, bool skipTrap) {
|
||||||
|
@ -455,17 +455,6 @@ namespace skyline::nce {
|
|||||||
reprotectIntervalsWithFunction([&](auto region) {
|
reprotectIntervalsWithFunction([&](auto region) {
|
||||||
return PROT_NONE; // No checks are needed as this is already the highest level of protection
|
return PROT_NONE; // No checks are needed as this is already the highest level of protection
|
||||||
});
|
});
|
||||||
|
|
||||||
// Page out regions that are no longer accessible, these should be paged back in by a callback
|
|
||||||
TRACE_EVENT("host", "NCE::ReprotectIntervals::PageOut");
|
|
||||||
for (auto region : intervals) {
|
|
||||||
auto freeStart{util::AlignUp(region.start, PAGE_SIZE)}, freeEnd{util::AlignDown(region.end, PAGE_SIZE)}; // We want to avoid the first and last page as they may contain data that won't be paged back in by the callback
|
|
||||||
ssize_t freeSize{freeEnd - freeStart};
|
|
||||||
|
|
||||||
constexpr ssize_t MinimumPageoutSize{PAGE_SIZE}; //!< The minimum size to page out, we don't want to page out small intervals for performance reasons
|
|
||||||
if (freeSize > MinimumPageoutSize)
|
|
||||||
state.process->memory.FreeMemory(span<u8>{freeStart, static_cast<size_t>(freeSize)});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -537,29 +526,42 @@ namespace skyline::nce {
|
|||||||
|
|
||||||
NCE::TrapHandle NCE::CreateTrap(span<span<u8>> regions, const LockCallback &lockCallback, const TrapCallback &readCallback, const TrapCallback &writeCallback) {
|
NCE::TrapHandle NCE::CreateTrap(span<span<u8>> regions, const LockCallback &lockCallback, const TrapCallback &readCallback, const TrapCallback &writeCallback) {
|
||||||
TRACE_EVENT("host", "NCE::CreateTrap");
|
TRACE_EVENT("host", "NCE::CreateTrap");
|
||||||
std::scoped_lock lock(trapMutex);
|
std::scoped_lock lock{trapMutex};
|
||||||
TrapHandle handle{trapMap.Insert(regions, CallbackEntry{TrapProtection::None, lockCallback, readCallback, writeCallback})};
|
TrapHandle handle{trapMap.Insert(regions, CallbackEntry{TrapProtection::None, lockCallback, readCallback, writeCallback})};
|
||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
void NCE::TrapRegions(TrapHandle handle, bool writeOnly) {
|
void NCE::TrapRegions(TrapHandle handle, bool writeOnly) {
|
||||||
TRACE_EVENT("host", "NCE::TrapRegions");
|
TRACE_EVENT("host", "NCE::TrapRegions");
|
||||||
std::scoped_lock lock(trapMutex);
|
std::scoped_lock lock{trapMutex};
|
||||||
auto protection{writeOnly ? TrapProtection::WriteOnly : TrapProtection::ReadWrite};
|
auto protection{writeOnly ? TrapProtection::WriteOnly : TrapProtection::ReadWrite};
|
||||||
handle->value.protection = protection;
|
handle->value.protection = protection;
|
||||||
ReprotectIntervals(handle->intervals, protection);
|
ReprotectIntervals(handle->intervals, protection);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NCE::PageOutRegions(TrapHandle handle) {
|
||||||
|
TRACE_EVENT("host", "NCE::PageOutRegions");
|
||||||
|
std::scoped_lock lock{trapMutex};
|
||||||
|
for (auto region : handle->intervals) {
|
||||||
|
auto freeStart{util::AlignUp(region.start, PAGE_SIZE)}, freeEnd{util::AlignDown(region.end, PAGE_SIZE)}; // We want to avoid the first and last page as they may contain unrelated data
|
||||||
|
ssize_t freeSize{freeEnd - freeStart};
|
||||||
|
|
||||||
|
constexpr ssize_t MinimumPageoutSize{PAGE_SIZE}; //!< The minimum size to page out, we don't want to page out small intervals for performance reasons
|
||||||
|
if (freeSize > MinimumPageoutSize)
|
||||||
|
state.process->memory.FreeMemory(span<u8>{freeStart, static_cast<size_t>(freeSize)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void NCE::RemoveTrap(TrapHandle handle) {
|
void NCE::RemoveTrap(TrapHandle handle) {
|
||||||
TRACE_EVENT("host", "NCE::RemoveTrap");
|
TRACE_EVENT("host", "NCE::RemoveTrap");
|
||||||
std::scoped_lock lock(trapMutex);
|
std::scoped_lock lock{trapMutex};
|
||||||
handle->value.protection = TrapProtection::None;
|
handle->value.protection = TrapProtection::None;
|
||||||
ReprotectIntervals(handle->intervals, TrapProtection::None);
|
ReprotectIntervals(handle->intervals, TrapProtection::None);
|
||||||
}
|
}
|
||||||
|
|
||||||
void NCE::DeleteTrap(TrapHandle handle) {
|
void NCE::DeleteTrap(TrapHandle handle) {
|
||||||
TRACE_EVENT("host", "NCE::DeleteTrap");
|
TRACE_EVENT("host", "NCE::DeleteTrap");
|
||||||
std::scoped_lock lock(trapMutex);
|
std::scoped_lock lock{trapMutex};
|
||||||
handle->value.protection = TrapProtection::None;
|
handle->value.protection = TrapProtection::None;
|
||||||
ReprotectIntervals(handle->intervals, TrapProtection::None);
|
ReprotectIntervals(handle->intervals, TrapProtection::None);
|
||||||
trapMap.Remove(handle);
|
trapMap.Remove(handle);
|
||||||
|
@ -115,10 +115,16 @@ namespace skyline::nce {
|
|||||||
/**
|
/**
|
||||||
* @brief Re-traps a region of memory after protections were removed
|
* @brief Re-traps a region of memory after protections were removed
|
||||||
* @param writeOnly If the trap is optimally for write-only accesses, this is not guarenteed
|
* @param writeOnly If the trap is optimally for write-only accesses, this is not guarenteed
|
||||||
* @note Any regions trapped without writeOnly may have their data (except border pages) paged out and it needs to be paged back in inside the callbacks
|
|
||||||
*/
|
*/
|
||||||
void TrapRegions(TrapHandle handle, bool writeOnly);
|
void TrapRegions(TrapHandle handle, bool writeOnly);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Pages out the supplied trap region of memory (except border pages), any future accesses will return 0s
|
||||||
|
* @note This function is intended to be used after trapping reads to a region where the callback pages back in the data
|
||||||
|
* @note If the region is determined to be too small, this function will not do anything and is not meant to deterministically page out the region
|
||||||
|
*/
|
||||||
|
void PageOutRegions(TrapHandle handle);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Removes protections from a region of memory
|
* @brief Removes protections from a region of memory
|
||||||
*/
|
*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user