Fix NCE Trapping API Deadlock

A deadlock was caused by holding `trapMutex` while waiting on the lock of a resource inside a callback while another thread holding the resource's mutex waits on `trapMutex`. This has been fixed by no longer allowing blocking locks inside the callbacks and introducing a separate callback for locking the resource which is done after unlocking the `trapMutex` which can then be locked by any contending threads.
This commit is contained in:
PixelyIon 2022-07-02 21:45:50 +05:30
parent a6599c30b4
commit 3ca56ef578
No known key found for this signature in database
GPG Key ID: 11BC6C3201BC2C05
4 changed files with 77 additions and 47 deletions

View File

@ -17,17 +17,23 @@ namespace skyline::gpu {
trapHandle = gpu.state.nce->TrapRegions(*guest, true, [this] {
std::scoped_lock lock{*this};
}, [this] {
std::unique_lock lock{*this, std::try_to_lock};
if (!lock)
return false;
SynchronizeGuest(true); // We can skip trapping since the caller will do it
WaitOnFence();
return true;
}, [this] {
DirtyState expectedState{DirtyState::Clean};
if (dirtyState.compare_exchange_strong(expectedState, DirtyState::CpuDirty, std::memory_order_relaxed) || expectedState == DirtyState::CpuDirty)
return; // If we can transition the buffer to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the buffer is GPU dirty
return true; // If we can transition the buffer to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the buffer is GPU dirty
std::scoped_lock lock{*this};
std::unique_lock lock{*this, std::try_to_lock};
if (!lock)
return false;
SynchronizeGuest(true);
dirtyState = DirtyState::CpuDirty; // We need to assume the buffer is dirty since we don't know what the guest is writing
WaitOnFence();
return true;
});
}

View File

@ -145,17 +145,23 @@ namespace skyline::gpu {
trapHandle = gpu.state.nce->TrapRegions(mappings, true, [this] {
std::scoped_lock lock{*this};
}, [this] {
std::unique_lock lock{*this, std::try_to_lock};
if (!lock)
return false;
SynchronizeGuest(true); // We can skip trapping since the caller will do it
WaitOnFence();
return true;
}, [this] {
DirtyState expectedState{DirtyState::Clean};
if (dirtyState.compare_exchange_strong(expectedState, DirtyState::CpuDirty, std::memory_order_relaxed) || expectedState == DirtyState::CpuDirty)
return; // If we can transition the texture to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the texture is GPU dirty
return true; // If we can transition the texture to CPU dirty (from Clean) or if it already is CPU dirty then we can just return, we only need to do the lock and corresponding sync if the texture is GPU dirty
std::scoped_lock lock{*this};
std::unique_lock lock{*this, std::try_to_lock};
if (!lock)
return false;
SynchronizeGuest(true);
dirtyState = DirtyState::CpuDirty; // We need to assume the texture is dirty since we don't know what the guest is writing
WaitOnFence();
return true;
});
}

View File

@ -406,7 +406,7 @@ namespace skyline::nce {
}
}
NCE::CallbackEntry::CallbackEntry(TrapProtection protection, NCE::TrapCallback readCallback, NCE::TrapCallback writeCallback) : protection(protection), readCallback(std::move(readCallback)), writeCallback(std::move(writeCallback)) {}
NCE::CallbackEntry::CallbackEntry(TrapProtection protection, LockCallback lockCallback, TrapCallback readCallback, TrapCallback writeCallback) : protection{protection}, lockCallback{std::move(lockCallback)}, readCallback{std::move(readCallback)}, writeCallback{std::move(writeCallback)} {}
void NCE::ReprotectIntervals(const std::vector<TrapMap::Interval> &intervals, TrapProtection protection) {
auto reprotectIntervalsWithFunction = [&intervals](auto getProtection) {
@ -467,55 +467,68 @@ namespace skyline::nce {
}
bool NCE::TrapHandler(u8 *address, bool write) {
std::scoped_lock lock(trapMutex);
LockCallback lockCallback{};
while (true) {
if (lockCallback)
// We want to avoid a deadlock of holding trapMutex while locking the resource inside a callback while another thread holding the resource's mutex waits on trapMutex, we solve this by quitting the loop if a callback would be blocking and attempt to lock the resource externally
lockCallback();
// Check if we have a callback for this address
auto[entries, intervals]{trapMap.GetAlignedRecursiveRange<PAGE_SIZE>(address)};
std::scoped_lock lock(trapMutex);
if (entries.empty())
return false;
// Check if we have a callback for this address
auto[entries, intervals]{trapMap.GetAlignedRecursiveRange<PAGE_SIZE>(address)};
// Do callbacks for every entry in the intervals
if (write) {
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection == TrapProtection::None)
// We don't need to do the callback if the entry doesn't require any protection already
continue;
if (entries.empty())
return false;
entry.writeCallback();
entry.protection = TrapProtection::None; // We don't need to protect this entry anymore
}
} else {
bool allNone{true}; // If all entries require no protection, we can protect to allow all accesses
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection < TrapProtection::ReadWrite) {
// We don't need to do the callback if the entry can already handle read accesses
allNone = allNone && entry.protection == TrapProtection::None;
continue;
// Do callbacks for every entry in the intervals
if (write) {
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection == TrapProtection::None)
// We don't need to do the callback if the entry doesn't require any protection already
continue;
if (!entry.writeCallback()) {
lockCallback = entry.lockCallback;
continue;
}
entry.protection = TrapProtection::None; // We don't need to protect this entry anymore
}
} else {
bool allNone{true}; // If all entries require no protection, we can protect to allow all accesses
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection < TrapProtection::ReadWrite) {
// We don't need to do the callback if the entry can already handle read accesses
allNone = allNone && entry.protection == TrapProtection::None;
continue;
}
entry.readCallback();
entry.protection = TrapProtection::WriteOnly; // We only need to trap writes to this entry
if (!entry.readCallback()) {
lockCallback = entry.lockCallback;
continue;
}
entry.protection = TrapProtection::WriteOnly; // We only need to trap writes to this entry
}
write = allNone;
}
write = allNone;
int permission{PROT_READ | (write ? PROT_WRITE : 0) | PROT_EXEC};
for (const auto &interval : intervals)
// Reprotect the interval to the lowest protection level that the callbacks performed allow
mprotect(interval.start, interval.Size(), permission);
return true;
}
int permission{PROT_READ | (write ? PROT_WRITE : 0) | PROT_EXEC};
for (const auto &interval : intervals)
// Reprotect the interval to the lowest protection level that the callbacks performed allow
mprotect(interval.start, interval.Size(), permission);
return true;
}
constexpr NCE::TrapHandle::TrapHandle(const TrapMap::GroupHandle &handle) : TrapMap::GroupHandle(handle) {}
NCE::TrapHandle NCE::TrapRegions(span<span<u8>> regions, bool writeOnly, const TrapCallback &readCallback, const TrapCallback &writeCallback) {
NCE::TrapHandle NCE::TrapRegions(span<span<u8>> regions, bool writeOnly, const LockCallback& lockCallback, const TrapCallback &readCallback, const TrapCallback &writeCallback) {
std::scoped_lock lock(trapMutex);
auto protection{writeOnly ? TrapProtection::WriteOnly : TrapProtection::ReadWrite};
TrapHandle handle{trapMap.Insert(regions, CallbackEntry{protection, readCallback, writeCallback})};
TrapHandle handle{trapMap.Insert(regions, CallbackEntry{protection, lockCallback, readCallback, writeCallback})};
ReprotectIntervals(handle->intervals, protection);
return handle;
}

View File

@ -24,13 +24,15 @@ namespace skyline::nce {
ReadWrite = 2, //!< Both read and write protection are required
};
using TrapCallback = std::function<void()>;
using TrapCallback = std::function<bool()>;
using LockCallback = std::function<void()>;
struct CallbackEntry {
TrapProtection protection; //!< The least restrictive protection that this callback needs to have
LockCallback lockCallback;
TrapCallback readCallback, writeCallback;
CallbackEntry(TrapProtection protection, NCE::TrapCallback readCallback, NCE::TrapCallback writeCallback);
CallbackEntry(TrapProtection protection, LockCallback lockCallback, TrapCallback readCallback, TrapCallback writeCallback);
};
std::mutex trapMutex; //!< Synchronizes the accesses to the trap map
@ -102,11 +104,14 @@ namespace skyline::nce {
/**
* @brief Traps a region of guest memory with a callback for when an access to it has been made
* @param writeOnly If the trap is optimally for write-only accesses initially, this is not guarenteed
* @param lockCallback A callback to lock the resource that is being trapped, it must block until the resource is locked but unlock it prior to returning
* @param readCallback A callback for read accesses to the trapped region, it must not block and return a boolean if it would block
* @param writeCallback A callback for write accesses to the trapped region, it must not block and return a boolean if it would block
* @note The handle **must** be deleted using DeleteTrap before the NCE instance is destroyed
* @note It is UB to supply a region of host memory rather than guest memory
* @note Any regions trapped without writeOnly may have their data (except border pages) paged out and it needs to be paged back in inside the callbacks
*/
TrapHandle TrapRegions(span<span<u8>> regions, bool writeOnly, const TrapCallback& readCallback, const TrapCallback& writeCallback);
TrapHandle TrapRegions(span<span<u8>> regions, bool writeOnly, const LockCallback& lockCallback, const TrapCallback& readCallback, const TrapCallback& writeCallback);
/**
* @brief Re-traps a region of memory after protections were removed