Use mirror mappings for Textures and Buffers

This is a prerequisite to memory trapping as we need to write to the mirror to avoid a race condition with external threads writing to a texture/buffer while we do so ourselves for the sync on a read/write, it also avoids an additional `mprotect` to `-WX`/`RWX` on a read access.

An additional advantage for textures especially is that we now support split-mapping textures due to laying them out in a contiguous mirror and they will not require costly algorithmic changes. Buffers should also benefit from not needing to iterate over every region when they are split into multiple mappings.
This commit is contained in:
PixelyIon 2022-03-06 20:27:13 +05:30
parent 577a67babd
commit 5c9e42e384
7 changed files with 128 additions and 34 deletions

View File

@ -238,7 +238,8 @@ namespace skyline::gpu {
GPU::GPU(const DeviceState &state)
: vkContext(LoadVulkanDriver(state)),
: state(state),
vkContext(LoadVulkanDriver(state)),
vkInstance(CreateInstance(state, vkContext)),
vkDebugReportCallback(CreateDebugReportCallback(vkInstance)),
vkPhysicalDevice(CreatePhysicalDevice(vkInstance)),

View File

@ -19,6 +19,11 @@ namespace skyline::gpu {
* @brief An interface to host GPU structures, anything concerning host GPU/Presentation APIs is encapsulated by this
*/
class GPU {
private:
const DeviceState &state; // We access the device state inside Texture (and Buffers) for setting up NCE memory tracking
friend Texture;
friend Buffer;
public:
vk::raii::Context vkContext;
vk::raii::Instance vkInstance;

View File

@ -2,6 +2,8 @@
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <kernel/memory.h>
#include <kernel/types/KProcess.h>
#include <common/trace.h>
#include "buffer.h"
@ -13,10 +15,50 @@ namespace skyline::gpu {
return size;
}
Buffer::Buffer(GPU &gpu, GuestBuffer guest) : size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) {
void Buffer::SetupGuestMappings() {
auto &mappings{guest.mappings};
if (mappings.size() == 1) {
auto mapping{mappings.front()};
u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)};
size_t alignedSize{static_cast<size_t>(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)};
alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
mirror = alignedMirror.subspan(static_cast<size_t>(mapping.data() - alignedData), mapping.size());
} else {
std::vector<span<u8>> alignedMappings;
const auto &frontMapping{mappings.front()};
u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)};
alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData);
size_t totalSize{frontMapping.size()};
for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) {
auto mappingSize{it->size()};
alignedMappings.emplace_back(it->data(), mappingSize);
totalSize += mappingSize;
}
const auto &backMapping{mappings.back()};
totalSize += backMapping.size();
alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE));
alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings);
mirror = alignedMirror.subspan(static_cast<size_t>(frontMapping.data() - alignedData), totalSize);
}
}
Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) {
SetupGuestMappings();
SynchronizeHost();
}
Buffer::~Buffer() {
std::lock_guard lock(*this);
SynchronizeGuest(true);
if (alignedMirror.valid())
munmap(alignedMirror.data(), alignedMirror.size());
}
void Buffer::WaitOnFence() {
TRACE_EVENT("gpu", "Buffer::WaitOnFence");
@ -89,6 +131,10 @@ namespace skyline::gpu {
cycle = pCycle;
}
void Buffer::Write(span<u8> data, vk::DeviceSize offset) {
std::memcpy(mirror.data() + offset, data.data(), data.size());
}
std::shared_ptr<BufferView> Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) {
for (const auto &viewWeak : views) {
auto view{viewWeak.lock()};

View File

@ -28,16 +28,24 @@ namespace skyline::gpu {
*/
class Buffer : public std::enable_shared_from_this<Buffer>, public FenceCycleDependency {
private:
GPU &gpu;
std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing
vk::DeviceSize size;
memory::Buffer backing;
GuestBuffer guest;
span<u8> mirror{}; //!< A contiguous mirror of all the guest mappings to allow linear access on the CPU
span<u8> alignedMirror{}; //!< The mirror mapping aligned to page size to reflect the full mapping
std::vector<std::weak_ptr<BufferView>> views; //!< BufferView(s) that are backed by this Buffer, used for repointing to a new Buffer on deletion
friend BufferView;
friend BufferManager;
/**
* @brief Sets up mirror mappings for the guest mappings
*/
void SetupGuestMappings();
public:
std::weak_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the buffer has completed, it must be waited on prior to any mutations to the backing
@ -47,8 +55,10 @@ namespace skyline::gpu {
Buffer(GPU &gpu, GuestBuffer guest);
~Buffer();
/**
* @brief Acquires an exclusive lock on the texture for the calling thread
* @brief Acquires an exclusive lock on the buffer for the calling thread
* @note Naming is in accordance to the BasicLockable named requirement
*/
void lock() {
@ -56,7 +66,7 @@ namespace skyline::gpu {
}
/**
* @brief Relinquishes an existing lock on the texture by the calling thread
* @brief Relinquishes an existing lock on the buffer by the calling thread
* @note Naming is in accordance to the BasicLockable named requirement
*/
void unlock() {
@ -103,6 +113,11 @@ namespace skyline::gpu {
*/
void SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &cycle);
/**
* @brief Writes data at the specified offset in the buffer
*/
void Write(span<u8> data, vk::DeviceSize offset);
/**
* @return A cached or newly created view into this buffer with the supplied attributes
*/
@ -121,7 +136,7 @@ namespace skyline::gpu {
vk::Format format;
/**
* @note A view must **NOT** be constructed directly, it should always be retrieved using Texture::GetView
* @note A view must **NOT** be constructed directly, it should always be retrieved using Buffer::GetView
*/
BufferView(std::shared_ptr<Buffer> backing, vk::DeviceSize offset, vk::DeviceSize range, vk::Format format);

View File

@ -582,7 +582,7 @@ namespace skyline::gpu::interconnect {
T Read(size_t offset) {
T object;
size_t objectOffset{};
for (auto &mapping: guest.mappings) {
for (auto &mapping : guest.mappings) {
if (offset < mapping.size_bytes()) {
auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))};
std::memcpy(reinterpret_cast<u8 *>(&object) + objectOffset, mapping.data() + offset, copySize);
@ -602,21 +602,9 @@ namespace skyline::gpu::interconnect {
* @note This must only be called when the GuestBuffer is resolved correctly
*/
template<typename T>
void Write(const T &object, size_t offset) {
size_t objectOffset{};
for (auto &mapping: guest.mappings) {
if (offset < mapping.size_bytes()) {
auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))};
std::memcpy(mapping.data() + offset, reinterpret_cast<const u8 *>(&object) + objectOffset, copySize);
objectOffset += copySize;
if (objectOffset == sizeof(T))
return;
offset = mapping.size_bytes();
} else {
offset -= mapping.size_bytes();
}
}
throw exception("Object extent ({} + {} = {}) is larger than constant buffer size: {}", size + offset, sizeof(T), size + offset + sizeof(T), size);
void Write(T &object, size_t offset) {
std::lock_guard lock{*view};
view->buffer->Write(span<T>(object).template cast<u8>(), view->offset + offset);
}
};
ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it

View File

@ -2,6 +2,7 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <kernel/memory.h>
#include <common/trace.h>
#include <kernel/types/KProcess.h>
#include "texture.h"
@ -84,15 +85,45 @@ namespace skyline::gpu {
}
}
void Texture::SetupGuestMappings() {
auto &mappings{guest->mappings};
if (mappings.size() == 1) {
auto mapping{mappings.front()};
u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)};
size_t alignedSize{static_cast<size_t>(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)};
alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
mirror = alignedMirror.subspan(static_cast<size_t>(mapping.data() - alignedData), mapping.size());
} else {
std::vector<span<u8>> alignedMappings;
const auto &frontMapping{mappings.front()};
u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)};
alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData);
size_t totalSize{frontMapping.size()};
for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) {
auto mappingSize{it->size()};
alignedMappings.emplace_back(it->data(), mappingSize);
totalSize += mappingSize;
}
const auto &backMapping{mappings.back()};
totalSize += backMapping.size();
alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE));
alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings);
mirror = alignedMirror.subspan(static_cast<size_t>(frontMapping.data() - alignedData), totalSize);
}
}
std::shared_ptr<memory::StagingBuffer> Texture::SynchronizeHostImpl(const std::shared_ptr<FenceCycle> &pCycle) {
if (!guest)
throw exception("Synchronization of host textures requires a valid guest texture to synchronize from");
else if (guest->dimensions != dimensions)
throw exception("Guest and host dimensions being different is not supported currently");
else if (guest->mappings.size() > 1)
throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());
auto pointer{guest->mappings[0].data()};
auto pointer{mirror.data()};
auto size{format->GetSize(dimensions)};
WaitOnBacking();
@ -218,7 +249,7 @@ namespace skyline::gpu {
}
void Texture::CopyToGuest(u8 *hostBuffer) {
auto guestOutput{guest->mappings[0].data()};
auto guestOutput{mirror.data()};
if (guest->tileConfig.mode == texture::TileMode::Block)
texture::CopyLinearToBlockLinear(*guest, hostBuffer, guestOutput);
@ -246,6 +277,7 @@ namespace skyline::gpu {
mipLevels(mipLevels),
layerCount(layerCount),
sampleCount(sampleCount) {
SetupGuestMappings();
if (GetBacking())
SynchronizeHost();
}
@ -293,6 +325,7 @@ namespace skyline::gpu {
};
backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo);
TransitionLayout(vk::ImageLayout::eGeneral);
SetupGuestMappings();
}
Texture::Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount)
@ -323,6 +356,13 @@ namespace skyline::gpu {
TransitionLayout(initialLayout);
}
Texture::~Texture() {
std::lock_guard lock(*this);
SynchronizeGuest(true);
if (alignedMirror.valid())
munmap(alignedMirror.data(), alignedMirror.size());
}
bool Texture::WaitOnBacking() {
TRACE_EVENT("gpu", "Texture::WaitOnBacking");
@ -409,8 +449,6 @@ namespace skyline::gpu {
throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to");
else if (layout == vk::ImageLayout::eUndefined)
return; // If the state of the host texture is undefined then so can the guest
else if (guest->mappings.size() > 1)
throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());
TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
@ -442,8 +480,6 @@ namespace skyline::gpu {
throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to");
else if (layout == vk::ImageLayout::eUndefined)
return; // If the state of the host texture is undefined then so can the guest
else if (guest->mappings.size() > 1)
throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());
TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer");
@ -570,8 +606,4 @@ namespace skyline::gpu {
lCycle->AttachObjects(std::move(source), shared_from_this());
cycle = lCycle;
}
Texture::~Texture() {
WaitOnFence();
}
}

View File

@ -214,7 +214,7 @@ namespace skyline::gpu {
* @brief A descriptor for a texture present in guest memory, it can be used to create a corresponding Texture object for usage on the host
*/
struct GuestTexture {
using Mappings = boost::container::small_vector<span < u8>, 3>;
using Mappings = boost::container::small_vector<span<u8>, 3>;
Mappings mappings; //!< Spans to CPU memory for the underlying data backing this texture
texture::Dimensions dimensions{};
@ -319,11 +319,18 @@ namespace skyline::gpu {
using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
span<u8> mirror{}; //!< A contiguous mirror of all the guest mappings to allow linear access on the CPU
span<u8> alignedMirror{}; //!< The mirror mapping aligned to page size to reflect the full mapping
std::vector<std::weak_ptr<TextureView>> views; //!< TextureView(s) that are backed by this Texture, used for repointing to a new Texture on deletion
friend TextureManager;
friend TextureView;
/**
* @brief Sets up mirror mappings for the guest mappings
*/
void SetupGuestMappings();
/**
* @brief An implementation function for guest -> host texture synchronization, it allocates and copies data into a staging buffer or directly into a linear host texture
* @return If a staging buffer was required for the texture sync, it's returned filled with guest texture data and must be copied to the host texture by the callee