mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-29 19:44:17 +01:00
Allow external synchronization for buffers
In certain situations such as constant buffer updates, we desire to use the guest buffer as a shadow buffer forwarding all writes directly to it while we update the host using inline buffer updates so they happen in-sequence. This requires special behavior as we cannot let any synchronization operations take place as they would break the shadow buffer, as a result, an external synchronization flag has been added to prevent this from happening. It should be noted that this flag is not respected for buffer recreation which will lead to UB, this can and will break updates in certain cases and this change isn't complete without buffer manager support.
This commit is contained in:
parent
c0c4db68a8
commit
372ab8befa
@ -45,12 +45,29 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::MarkGpuDirty() {
|
void Buffer::MarkGpuDirty() {
|
||||||
if (dirtyState == DirtyState::GpuDirty)
|
if (dirtyState == DirtyState::GpuDirty || externallySynchronized) {
|
||||||
|
externallySynchronized = false; // We want to handle synchronize internally after the GPU work is done
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
gpu.state.nce->RetrapRegions(*trapHandle, false);
|
gpu.state.nce->RetrapRegions(*trapHandle, false);
|
||||||
dirtyState = DirtyState::GpuDirty;
|
dirtyState = DirtyState::GpuDirty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Buffer::MarkExternallySynchronized() {
|
||||||
|
TRACE_EVENT("gpu", "Buffer::MarkExternallySynchronized");
|
||||||
|
if (externallySynchronized)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (dirtyState == DirtyState::GpuDirty)
|
||||||
|
std::memcpy(mirror.data(), backing.data(), mirror.size());
|
||||||
|
else if (dirtyState == DirtyState::CpuDirty)
|
||||||
|
std::memcpy(backing.data(), mirror.data(), mirror.size());
|
||||||
|
|
||||||
|
dirtyState = DirtyState::GpuDirty; // Any synchronization will take place on the GPU which in itself would make the buffer dirty
|
||||||
|
gpu.state.nce->RetrapRegions(*trapHandle, false);
|
||||||
|
externallySynchronized = true;
|
||||||
|
}
|
||||||
|
|
||||||
void Buffer::WaitOnFence() {
|
void Buffer::WaitOnFence() {
|
||||||
TRACE_EVENT("gpu", "Buffer::WaitOnFence");
|
TRACE_EVENT("gpu", "Buffer::WaitOnFence");
|
||||||
|
|
||||||
@ -67,6 +84,9 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
WaitOnFence();
|
WaitOnFence();
|
||||||
|
|
||||||
|
if (externallySynchronized)
|
||||||
|
return; // If the buffer is externally synchronized, we don't need to synchronize it
|
||||||
|
|
||||||
TRACE_EVENT("gpu", "Buffer::SynchronizeHost");
|
TRACE_EVENT("gpu", "Buffer::SynchronizeHost");
|
||||||
|
|
||||||
std::memcpy(backing.data(), mirror.data(), mirror.size());
|
std::memcpy(backing.data(), mirror.data(), mirror.size());
|
||||||
@ -81,12 +101,15 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::SynchronizeHostWithCycle(const std::shared_ptr<FenceCycle> &pCycle, bool rwTrap) {
|
void Buffer::SynchronizeHostWithCycle(const std::shared_ptr<FenceCycle> &pCycle, bool rwTrap) {
|
||||||
if (dirtyState != DirtyState::CpuDirty || !guest)
|
if (dirtyState != DirtyState::CpuDirty || !guest || externallySynchronized)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!cycle.owner_before(pCycle))
|
if (!cycle.owner_before(pCycle))
|
||||||
WaitOnFence();
|
WaitOnFence();
|
||||||
|
|
||||||
|
if (externallySynchronized)
|
||||||
|
return;
|
||||||
|
|
||||||
TRACE_EVENT("gpu", "Buffer::SynchronizeHostWithCycle");
|
TRACE_EVENT("gpu", "Buffer::SynchronizeHostWithCycle");
|
||||||
|
|
||||||
std::memcpy(backing.data(), mirror.data(), mirror.size());
|
std::memcpy(backing.data(), mirror.data(), mirror.size());
|
||||||
@ -101,12 +124,15 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::SynchronizeGuest(bool skipTrap, bool skipFence) {
|
void Buffer::SynchronizeGuest(bool skipTrap, bool skipFence) {
|
||||||
if (dirtyState != DirtyState::GpuDirty || !guest)
|
if (dirtyState != DirtyState::GpuDirty || !guest || externallySynchronized)
|
||||||
return; // If the buffer has not been used on the GPU or there's no guest buffer, there is no need to synchronize it
|
return; // If the buffer has not been used on the GPU or there's no guest buffer, there is no need to synchronize it
|
||||||
|
|
||||||
if (!skipFence)
|
if (!skipFence)
|
||||||
WaitOnFence();
|
WaitOnFence();
|
||||||
|
|
||||||
|
if (externallySynchronized)
|
||||||
|
return; // If the buffer is externally synchronized, we don't need to synchronize it
|
||||||
|
|
||||||
TRACE_EVENT("gpu", "Buffer::SynchronizeGuest");
|
TRACE_EVENT("gpu", "Buffer::SynchronizeGuest");
|
||||||
|
|
||||||
std::memcpy(mirror.data(), backing.data(), mirror.size());
|
std::memcpy(mirror.data(), backing.data(), mirror.size());
|
||||||
@ -131,6 +157,9 @@ namespace skyline::gpu {
|
|||||||
};
|
};
|
||||||
|
|
||||||
void Buffer::SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &pCycle) {
|
void Buffer::SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &pCycle) {
|
||||||
|
if (!guest)
|
||||||
|
return; // If there's no guest buffer, there is no need to synchronize it
|
||||||
|
|
||||||
if (!cycle.owner_before(pCycle))
|
if (!cycle.owner_before(pCycle))
|
||||||
WaitOnFence();
|
WaitOnFence();
|
||||||
|
|
||||||
@ -139,16 +168,16 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::Read(span<u8> data, vk::DeviceSize offset) {
|
void Buffer::Read(span<u8> data, vk::DeviceSize offset) {
|
||||||
if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean)
|
if (externallySynchronized || dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean)
|
||||||
std::memcpy(data.data(), mirror.data() + offset, data.size());
|
std::memcpy(data.data(), mirror.data() + offset, data.size());
|
||||||
else if (dirtyState == DirtyState::GpuDirty)
|
else if (dirtyState == DirtyState::GpuDirty)
|
||||||
std::memcpy(data.data(), backing.data() + offset, data.size());
|
std::memcpy(data.data(), backing.data() + offset, data.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::Write(span<u8> data, vk::DeviceSize offset, bool skipCleanHostWrite) {
|
void Buffer::Write(span<u8> data, vk::DeviceSize offset) {
|
||||||
if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean)
|
if (externallySynchronized || dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean)
|
||||||
std::memcpy(mirror.data() + offset, data.data(), data.size());
|
std::memcpy(mirror.data() + offset, data.data(), data.size());
|
||||||
if ((!skipCleanHostWrite && dirtyState == DirtyState::Clean) || dirtyState == DirtyState::GpuDirty)
|
if (!externallySynchronized && ((dirtyState == DirtyState::Clean) || dirtyState == DirtyState::GpuDirty))
|
||||||
std::memcpy(backing.data() + offset, data.data(), data.size());
|
std::memcpy(backing.data() + offset, data.data(), data.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -234,7 +263,7 @@ namespace skyline::gpu {
|
|||||||
bufferDelegate->buffer->Read(data, offset + bufferDelegate->view->offset);
|
bufferDelegate->buffer->Read(data, offset + bufferDelegate->view->offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferView::Write(span<u8> data, vk::DeviceSize offset, bool skipCleanHostWrite) const {
|
void BufferView::Write(span<u8> data, vk::DeviceSize offset) const {
|
||||||
bufferDelegate->buffer->Write(data, offset + bufferDelegate->view->offset, skipCleanHostWrite);
|
bufferDelegate->buffer->Write(data, offset + bufferDelegate->view->offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,7 @@ namespace skyline::gpu {
|
|||||||
CpuDirty, //!< The CPU mappings have been modified but the GPU buffer is not up to date
|
CpuDirty, //!< The CPU mappings have been modified but the GPU buffer is not up to date
|
||||||
GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated
|
GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated
|
||||||
} dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer
|
} dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer
|
||||||
|
bool externallySynchronized{}; //!< Whether the host buffer is externally synchronized with the guest buffer, disables the buffer synchronization and aims to retain guest/host buffer data across buffer recreation
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
@ -123,11 +124,18 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Marks the buffer as dirty on the GPU, it will be synced on the next call to SynchronizeGuest
|
* @brief Marks the buffer as dirty on the GPU, it will be synced on the next call to SynchronizeGuest
|
||||||
|
* @note This clears the externally synchronized flag automatically
|
||||||
* @note This **must** be called after syncing the buffer to the GPU not before
|
* @note This **must** be called after syncing the buffer to the GPU not before
|
||||||
* @note The buffer **must** be locked prior to calling this
|
* @note The buffer **must** be locked prior to calling this
|
||||||
*/
|
*/
|
||||||
void MarkGpuDirty();
|
void MarkGpuDirty();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Marks the buffer as externally synchronized and automatically synchronizes the host buffer and guest buffer, ensuring the buffer is GPU dirty by the end of the current cycle is the responsibility of the API user
|
||||||
|
* @note The buffer **must** be locked and have the desired fence attached prior to calling this
|
||||||
|
*/
|
||||||
|
void MarkExternallySynchronized();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Waits on a fence cycle if it exists till it's signalled and resets it after
|
* @brief Waits on a fence cycle if it exists till it's signalled and resets it after
|
||||||
* @note The buffer **must** be locked prior to calling this
|
* @note The buffer **must** be locked prior to calling this
|
||||||
@ -166,14 +174,17 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Reads data at the specified offset in the buffer
|
* @brief Reads data at the specified offset in the buffer
|
||||||
|
* @note The buffer **must** be locked prior to calling this
|
||||||
|
* @note If this buffer is externally synchronized, this will read exclusively from the guest buffer
|
||||||
*/
|
*/
|
||||||
void Read(span<u8> data, vk::DeviceSize offset);
|
void Read(span<u8> data, vk::DeviceSize offset);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Writes data at the specified offset in the buffer
|
* @brief Writes data at the specified offset in the buffer
|
||||||
* @param skipCleanHostWrite Skip writing to the host buffer if it's clean, assumes the buffer data will be synchronised externally
|
* @note The buffer **must** be locked prior to calling this
|
||||||
|
* @note If this buffer is externally synchronized, this will write to the guest buffer and not to the host buffer
|
||||||
*/
|
*/
|
||||||
void Write(span<u8> data, vk::DeviceSize offset, bool skipCleanHostWrite = false);
|
void Write(span<u8> data, vk::DeviceSize offset);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return A cached or newly created view into this buffer with the supplied attributes
|
* @return A cached or newly created view into this buffer with the supplied attributes
|
||||||
@ -245,14 +256,15 @@ namespace skyline::gpu {
|
|||||||
/**
|
/**
|
||||||
* @brief Reads data at the specified offset in the view
|
* @brief Reads data at the specified offset in the view
|
||||||
* @note The view **must** be locked prior to calling this
|
* @note The view **must** be locked prior to calling this
|
||||||
|
* @note If this buffer is externally synchronized, this will read exclusively from the guest buffer
|
||||||
*/
|
*/
|
||||||
void Read(span<u8> data, vk::DeviceSize offset) const;
|
void Read(span<u8> data, vk::DeviceSize offset) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Writes data at the specified offset in the view
|
* @brief Writes data at the specified offset in the view
|
||||||
* @note The view **must** be locked prior to calling this
|
* @note The view **must** be locked prior to calling this
|
||||||
* @param skipCleanHostWrite Skip writing to the host buffer if it's clean, assumes the buffer data will be synchronised externally
|
* @note If this buffer is externally synchronized, this will write to the guest buffer and not to the host buffer
|
||||||
*/
|
*/
|
||||||
void Write(span<u8> data, vk::DeviceSize offset, bool skipCleanHostWrite = false) const;
|
void Write(span<u8> data, vk::DeviceSize offset) const;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -623,16 +623,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
view.Read(span<T>(object).template cast<u8>(), offset);
|
view.Read(span<T>(object).template cast<u8>(), offset);
|
||||||
return object;
|
return object;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Writes an object to the supplied offset in the constant buffer
|
|
||||||
* @note This must only be called when the GuestBuffer is resolved correctly
|
|
||||||
*/
|
|
||||||
template<typename T>
|
|
||||||
void Write(T &object, size_t offset) {
|
|
||||||
std::scoped_lock lock{view};
|
|
||||||
view.Write(span<T>(object).template cast<u8>(), offset, true);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it
|
ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it
|
||||||
|
|
||||||
@ -700,10 +690,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
if (!view) {
|
if (!view) {
|
||||||
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)};
|
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)};
|
||||||
view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
|
view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
|
||||||
{
|
|
||||||
std::scoped_lock lock{*view};
|
|
||||||
view->bufferDelegate->buffer->SynchronizeHost(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova, *view);
|
constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova, *view);
|
||||||
}
|
}
|
||||||
@ -714,11 +700,17 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
void ConstantBufferUpdate(u32 data, u32 offset) {
|
void ConstantBufferUpdate(u32 data, u32 offset) {
|
||||||
auto constantBuffer{GetConstantBufferSelector().value()};
|
auto constantBuffer{GetConstantBufferSelector().value()};
|
||||||
constantBuffer.Write(data, offset);
|
auto& constantBufferView{constantBuffer.view};
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{constantBufferView};
|
||||||
|
executor.AttachBuffer(constantBufferView);
|
||||||
|
constantBufferView->buffer->MarkExternallySynchronized(); // We want to handle synchronization of updated constant buffers ourselves
|
||||||
|
constantBufferView.Write(span<u32>(data).cast<u8>(), offset);
|
||||||
|
}
|
||||||
|
|
||||||
executor.AddOutsideRpCommand([view = constantBuffer.view, data, offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &) {
|
executor.AddOutsideRpCommand([constantBufferView, data, offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &) {
|
||||||
std::scoped_lock lock{view};
|
std::scoped_lock lock{constantBufferView};
|
||||||
commandBuffer.updateBuffer<u32>(view.bufferDelegate->buffer->GetBacking(), view->view->offset + offset, vk::ArrayProxy(1, &data));
|
commandBuffer.updateBuffer<u32>(constantBufferView->buffer->GetBacking(), constantBufferView->view->offset + offset, vk::ArrayProxy(1, &data));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2583,6 +2575,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
public:
|
public:
|
||||||
template<bool IsIndexed>
|
template<bool IsIndexed>
|
||||||
void Draw(u32 count, u32 first, i32 vertexOffset = 0) {
|
void Draw(u32 count, u32 first, i32 vertexOffset = 0) {
|
||||||
|
// Draw state validation
|
||||||
ValidatePrimitiveRestartState();
|
ValidatePrimitiveRestartState();
|
||||||
|
|
||||||
// Shader + Binding Setup
|
// Shader + Binding Setup
|
||||||
|
Loading…
Reference in New Issue
Block a user