Use mirror mappings for Textures and Buffers

This is a prerequisite to memory trapping as we need to write to the mirror to avoid a race condition with external threads writing to a texture/buffer while we do so ourselves for the sync on a read/write, it also avoids an additional `mprotect` to `-WX`/`RWX` on a read access.

An additional advantage for textures especially is that we now support split-mapping textures due to laying them out in a contiguous mirror and they will not require costly algorithmic changes. Buffers should also benefit from not needing to iterate over every region when they are split into multiple mappings.
This commit is contained in:
PixelyIon 2022-03-06 20:27:13 +05:30
parent 577a67babd
commit 5c9e42e384
7 changed files with 128 additions and 34 deletions

View File

@ -238,7 +238,8 @@ namespace skyline::gpu {
GPU::GPU(const DeviceState &state) GPU::GPU(const DeviceState &state)
: vkContext(LoadVulkanDriver(state)), : state(state),
vkContext(LoadVulkanDriver(state)),
vkInstance(CreateInstance(state, vkContext)), vkInstance(CreateInstance(state, vkContext)),
vkDebugReportCallback(CreateDebugReportCallback(vkInstance)), vkDebugReportCallback(CreateDebugReportCallback(vkInstance)),
vkPhysicalDevice(CreatePhysicalDevice(vkInstance)), vkPhysicalDevice(CreatePhysicalDevice(vkInstance)),

View File

@ -19,6 +19,11 @@ namespace skyline::gpu {
* @brief An interface to host GPU structures, anything concerning host GPU/Presentation APIs is encapsulated by this * @brief An interface to host GPU structures, anything concerning host GPU/Presentation APIs is encapsulated by this
*/ */
class GPU { class GPU {
private:
const DeviceState &state; // We access the device state inside Texture (and Buffers) for setting up NCE memory tracking
friend Texture;
friend Buffer;
public: public:
vk::raii::Context vkContext; vk::raii::Context vkContext;
vk::raii::Instance vkInstance; vk::raii::Instance vkInstance;

View File

@ -2,6 +2,8 @@
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h> #include <gpu.h>
#include <kernel/memory.h>
#include <kernel/types/KProcess.h>
#include <common/trace.h> #include <common/trace.h>
#include "buffer.h" #include "buffer.h"
@ -13,10 +15,50 @@ namespace skyline::gpu {
return size; return size;
} }
Buffer::Buffer(GPU &gpu, GuestBuffer guest) : size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) { void Buffer::SetupGuestMappings() {
auto &mappings{guest.mappings};
if (mappings.size() == 1) {
auto mapping{mappings.front()};
u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)};
size_t alignedSize{static_cast<size_t>(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)};
alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
mirror = alignedMirror.subspan(static_cast<size_t>(mapping.data() - alignedData), mapping.size());
} else {
std::vector<span<u8>> alignedMappings;
const auto &frontMapping{mappings.front()};
u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)};
alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData);
size_t totalSize{frontMapping.size()};
for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) {
auto mappingSize{it->size()};
alignedMappings.emplace_back(it->data(), mappingSize);
totalSize += mappingSize;
}
const auto &backMapping{mappings.back()};
totalSize += backMapping.size();
alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE));
alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings);
mirror = alignedMirror.subspan(static_cast<size_t>(frontMapping.data() - alignedData), totalSize);
}
}
Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) {
SetupGuestMappings();
SynchronizeHost(); SynchronizeHost();
} }
Buffer::~Buffer() {
std::lock_guard lock(*this);
SynchronizeGuest(true);
if (alignedMirror.valid())
munmap(alignedMirror.data(), alignedMirror.size());
}
void Buffer::WaitOnFence() { void Buffer::WaitOnFence() {
TRACE_EVENT("gpu", "Buffer::WaitOnFence"); TRACE_EVENT("gpu", "Buffer::WaitOnFence");
@ -89,6 +131,10 @@ namespace skyline::gpu {
cycle = pCycle; cycle = pCycle;
} }
void Buffer::Write(span<u8> data, vk::DeviceSize offset) {
std::memcpy(mirror.data() + offset, data.data(), data.size());
}
std::shared_ptr<BufferView> Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) { std::shared_ptr<BufferView> Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) {
for (const auto &viewWeak : views) { for (const auto &viewWeak : views) {
auto view{viewWeak.lock()}; auto view{viewWeak.lock()};

View File

@ -28,16 +28,24 @@ namespace skyline::gpu {
*/ */
class Buffer : public std::enable_shared_from_this<Buffer>, public FenceCycleDependency { class Buffer : public std::enable_shared_from_this<Buffer>, public FenceCycleDependency {
private: private:
GPU &gpu;
std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing
vk::DeviceSize size; vk::DeviceSize size;
memory::Buffer backing; memory::Buffer backing;
GuestBuffer guest; GuestBuffer guest;
span<u8> mirror{}; //!< A contiguous mirror of all the guest mappings to allow linear access on the CPU
span<u8> alignedMirror{}; //!< The mirror mapping aligned to page size to reflect the full mapping
std::vector<std::weak_ptr<BufferView>> views; //!< BufferView(s) that are backed by this Buffer, used for repointing to a new Buffer on deletion std::vector<std::weak_ptr<BufferView>> views; //!< BufferView(s) that are backed by this Buffer, used for repointing to a new Buffer on deletion
friend BufferView; friend BufferView;
friend BufferManager; friend BufferManager;
/**
* @brief Sets up mirror mappings for the guest mappings
*/
void SetupGuestMappings();
public: public:
std::weak_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the buffer has completed, it must be waited on prior to any mutations to the backing std::weak_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the buffer has completed, it must be waited on prior to any mutations to the backing
@ -47,8 +55,10 @@ namespace skyline::gpu {
Buffer(GPU &gpu, GuestBuffer guest); Buffer(GPU &gpu, GuestBuffer guest);
~Buffer();
/** /**
* @brief Acquires an exclusive lock on the texture for the calling thread * @brief Acquires an exclusive lock on the buffer for the calling thread
* @note Naming is in accordance to the BasicLockable named requirement * @note Naming is in accordance to the BasicLockable named requirement
*/ */
void lock() { void lock() {
@ -56,7 +66,7 @@ namespace skyline::gpu {
} }
/** /**
* @brief Relinquishes an existing lock on the texture by the calling thread * @brief Relinquishes an existing lock on the buffer by the calling thread
* @note Naming is in accordance to the BasicLockable named requirement * @note Naming is in accordance to the BasicLockable named requirement
*/ */
void unlock() { void unlock() {
@ -103,6 +113,11 @@ namespace skyline::gpu {
*/ */
void SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &cycle); void SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &cycle);
/**
* @brief Writes data at the specified offset in the buffer
*/
void Write(span<u8> data, vk::DeviceSize offset);
/** /**
* @return A cached or newly created view into this buffer with the supplied attributes * @return A cached or newly created view into this buffer with the supplied attributes
*/ */
@ -121,7 +136,7 @@ namespace skyline::gpu {
vk::Format format; vk::Format format;
/** /**
* @note A view must **NOT** be constructed directly, it should always be retrieved using Texture::GetView * @note A view must **NOT** be constructed directly, it should always be retrieved using Buffer::GetView
*/ */
BufferView(std::shared_ptr<Buffer> backing, vk::DeviceSize offset, vk::DeviceSize range, vk::Format format); BufferView(std::shared_ptr<Buffer> backing, vk::DeviceSize offset, vk::DeviceSize range, vk::Format format);

View File

@ -582,7 +582,7 @@ namespace skyline::gpu::interconnect {
T Read(size_t offset) { T Read(size_t offset) {
T object; T object;
size_t objectOffset{}; size_t objectOffset{};
for (auto &mapping: guest.mappings) { for (auto &mapping : guest.mappings) {
if (offset < mapping.size_bytes()) { if (offset < mapping.size_bytes()) {
auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))}; auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))};
std::memcpy(reinterpret_cast<u8 *>(&object) + objectOffset, mapping.data() + offset, copySize); std::memcpy(reinterpret_cast<u8 *>(&object) + objectOffset, mapping.data() + offset, copySize);
@ -602,21 +602,9 @@ namespace skyline::gpu::interconnect {
* @note This must only be called when the GuestBuffer is resolved correctly * @note This must only be called when the GuestBuffer is resolved correctly
*/ */
template<typename T> template<typename T>
void Write(const T &object, size_t offset) { void Write(T &object, size_t offset) {
size_t objectOffset{}; std::lock_guard lock{*view};
for (auto &mapping: guest.mappings) { view->buffer->Write(span<T>(object).template cast<u8>(), view->offset + offset);
if (offset < mapping.size_bytes()) {
auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))};
std::memcpy(mapping.data() + offset, reinterpret_cast<const u8 *>(&object) + objectOffset, copySize);
objectOffset += copySize;
if (objectOffset == sizeof(T))
return;
offset = mapping.size_bytes();
} else {
offset -= mapping.size_bytes();
}
}
throw exception("Object extent ({} + {} = {}) is larger than constant buffer size: {}", size + offset, sizeof(T), size + offset + sizeof(T), size);
} }
}; };
ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it

View File

@ -2,6 +2,7 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h> #include <gpu.h>
#include <kernel/memory.h>
#include <common/trace.h> #include <common/trace.h>
#include <kernel/types/KProcess.h> #include <kernel/types/KProcess.h>
#include "texture.h" #include "texture.h"
@ -84,15 +85,45 @@ namespace skyline::gpu {
} }
} }
void Texture::SetupGuestMappings() {
auto &mappings{guest->mappings};
if (mappings.size() == 1) {
auto mapping{mappings.front()};
u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)};
size_t alignedSize{static_cast<size_t>(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)};
alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
mirror = alignedMirror.subspan(static_cast<size_t>(mapping.data() - alignedData), mapping.size());
} else {
std::vector<span<u8>> alignedMappings;
const auto &frontMapping{mappings.front()};
u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)};
alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData);
size_t totalSize{frontMapping.size()};
for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) {
auto mappingSize{it->size()};
alignedMappings.emplace_back(it->data(), mappingSize);
totalSize += mappingSize;
}
const auto &backMapping{mappings.back()};
totalSize += backMapping.size();
alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE));
alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings);
mirror = alignedMirror.subspan(static_cast<size_t>(frontMapping.data() - alignedData), totalSize);
}
}
std::shared_ptr<memory::StagingBuffer> Texture::SynchronizeHostImpl(const std::shared_ptr<FenceCycle> &pCycle) { std::shared_ptr<memory::StagingBuffer> Texture::SynchronizeHostImpl(const std::shared_ptr<FenceCycle> &pCycle) {
if (!guest) if (!guest)
throw exception("Synchronization of host textures requires a valid guest texture to synchronize from"); throw exception("Synchronization of host textures requires a valid guest texture to synchronize from");
else if (guest->dimensions != dimensions) else if (guest->dimensions != dimensions)
throw exception("Guest and host dimensions being different is not supported currently"); throw exception("Guest and host dimensions being different is not supported currently");
else if (guest->mappings.size() > 1)
throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());
auto pointer{guest->mappings[0].data()}; auto pointer{mirror.data()};
auto size{format->GetSize(dimensions)}; auto size{format->GetSize(dimensions)};
WaitOnBacking(); WaitOnBacking();
@ -218,7 +249,7 @@ namespace skyline::gpu {
} }
void Texture::CopyToGuest(u8 *hostBuffer) { void Texture::CopyToGuest(u8 *hostBuffer) {
auto guestOutput{guest->mappings[0].data()}; auto guestOutput{mirror.data()};
if (guest->tileConfig.mode == texture::TileMode::Block) if (guest->tileConfig.mode == texture::TileMode::Block)
texture::CopyLinearToBlockLinear(*guest, hostBuffer, guestOutput); texture::CopyLinearToBlockLinear(*guest, hostBuffer, guestOutput);
@ -246,6 +277,7 @@ namespace skyline::gpu {
mipLevels(mipLevels), mipLevels(mipLevels),
layerCount(layerCount), layerCount(layerCount),
sampleCount(sampleCount) { sampleCount(sampleCount) {
SetupGuestMappings();
if (GetBacking()) if (GetBacking())
SynchronizeHost(); SynchronizeHost();
} }
@ -293,6 +325,7 @@ namespace skyline::gpu {
}; };
backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo); backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo);
TransitionLayout(vk::ImageLayout::eGeneral); TransitionLayout(vk::ImageLayout::eGeneral);
SetupGuestMappings();
} }
Texture::Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) Texture::Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount)
@ -323,6 +356,13 @@ namespace skyline::gpu {
TransitionLayout(initialLayout); TransitionLayout(initialLayout);
} }
Texture::~Texture() {
std::lock_guard lock(*this);
SynchronizeGuest(true);
if (alignedMirror.valid())
munmap(alignedMirror.data(), alignedMirror.size());
}
bool Texture::WaitOnBacking() { bool Texture::WaitOnBacking() {
TRACE_EVENT("gpu", "Texture::WaitOnBacking"); TRACE_EVENT("gpu", "Texture::WaitOnBacking");
@ -409,8 +449,6 @@ namespace skyline::gpu {
throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to");
else if (layout == vk::ImageLayout::eUndefined) else if (layout == vk::ImageLayout::eUndefined)
return; // If the state of the host texture is undefined then so can the guest return; // If the state of the host texture is undefined then so can the guest
else if (guest->mappings.size() > 1)
throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());
TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
@ -442,8 +480,6 @@ namespace skyline::gpu {
throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to");
else if (layout == vk::ImageLayout::eUndefined) else if (layout == vk::ImageLayout::eUndefined)
return; // If the state of the host texture is undefined then so can the guest return; // If the state of the host texture is undefined then so can the guest
else if (guest->mappings.size() > 1)
throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());
TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer"); TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer");
@ -570,8 +606,4 @@ namespace skyline::gpu {
lCycle->AttachObjects(std::move(source), shared_from_this()); lCycle->AttachObjects(std::move(source), shared_from_this());
cycle = lCycle; cycle = lCycle;
} }
Texture::~Texture() {
WaitOnFence();
}
} }

View File

@ -214,7 +214,7 @@ namespace skyline::gpu {
* @brief A descriptor for a texture present in guest memory, it can be used to create a corresponding Texture object for usage on the host * @brief A descriptor for a texture present in guest memory, it can be used to create a corresponding Texture object for usage on the host
*/ */
struct GuestTexture { struct GuestTexture {
using Mappings = boost::container::small_vector<span < u8>, 3>; using Mappings = boost::container::small_vector<span<u8>, 3>;
Mappings mappings; //!< Spans to CPU memory for the underlying data backing this texture Mappings mappings; //!< Spans to CPU memory for the underlying data backing this texture
texture::Dimensions dimensions{}; texture::Dimensions dimensions{};
@ -319,11 +319,18 @@ namespace skyline::gpu {
using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>; using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
span<u8> mirror{}; //!< A contiguous mirror of all the guest mappings to allow linear access on the CPU
span<u8> alignedMirror{}; //!< The mirror mapping aligned to page size to reflect the full mapping
std::vector<std::weak_ptr<TextureView>> views; //!< TextureView(s) that are backed by this Texture, used for repointing to a new Texture on deletion std::vector<std::weak_ptr<TextureView>> views; //!< TextureView(s) that are backed by this Texture, used for repointing to a new Texture on deletion
friend TextureManager; friend TextureManager;
friend TextureView; friend TextureView;
/**
* @brief Sets up mirror mappings for the guest mappings
*/
void SetupGuestMappings();
/** /**
* @brief An implementation function for guest -> host texture synchronization, it allocates and copies data into a staging buffer or directly into a linear host texture * @brief An implementation function for guest -> host texture synchronization, it allocates and copies data into a staging buffer or directly into a linear host texture
* @return If a staging buffer was required for the texture sync, it's returned filled with guest texture data and must be copied to the host texture by the callee * @return If a staging buffer was required for the texture sync, it's returned filled with guest texture data and must be copied to the host texture by the callee