Use mirror mappings for Textures and Buffers

This is a prerequisite to memory trapping as we need to write to the mirror to avoid a race condition with external threads writing to a texture/buffer while we do so ourselves for the sync on a read/write, it also avoids an additional `mprotect` to `-WX`/`RWX` on a read access. An additional advantage for textures especially is that we now support split-mapping textures due to laying them out in a contiguous mirror and they will not require costly algorithmic changes. Buffers should also benefit from not needing to iterate over every region when they are split into multiple mappings.
2024-06-01 12:58:48 +02:00 · 2022-03-06 20:27:13 +05:30 · 2022-03-06 20:27:13 +05:30 · 5c9e42e384
commit 5c9e42e384
parent 577a67babd
7 changed files with 128 additions and 34 deletions
--- a/app/src/main/cpp/skyline/gpu.cpp
+++ b/app/src/main/cpp/skyline/gpu.cpp
@ -238,7 +238,8 @@ namespace skyline::gpu {


    GPU::GPU(const DeviceState &state)
-        : vkContext(LoadVulkanDriver(state)),
+        : state(state),
+          vkContext(LoadVulkanDriver(state)),
          vkInstance(CreateInstance(state, vkContext)),
          vkDebugReportCallback(CreateDebugReportCallback(vkInstance)),
          vkPhysicalDevice(CreatePhysicalDevice(vkInstance)),
--- a/app/src/main/cpp/skyline/gpu.h
+++ b/app/src/main/cpp/skyline/gpu.h
@ -19,6 +19,11 @@ namespace skyline::gpu {
     * @brief An interface to host GPU structures, anything concerning host GPU/Presentation APIs is encapsulated by this
     */
    class GPU {
+      private:
+        const DeviceState &state; // We access the device state inside Texture (and Buffers) for setting up NCE memory tracking
+        friend Texture;
+        friend Buffer;
+
      public:
        vk::raii::Context vkContext;
        vk::raii::Instance vkInstance;
--- a/app/src/main/cpp/skyline/gpu/buffer.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer.cpp
@ -2,6 +2,8 @@
 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)

 #include <gpu.h>
+#include <kernel/memory.h>
+#include <kernel/types/KProcess.h>
 #include <common/trace.h>
 #include "buffer.h"

@ -13,10 +15,50 @@ namespace skyline::gpu {
        return size;
    }

-    Buffer::Buffer(GPU &gpu, GuestBuffer guest) : size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) {
+    void Buffer::SetupGuestMappings() {
+        auto &mappings{guest.mappings};
+        if (mappings.size() == 1) {
+            auto mapping{mappings.front()};
+            u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)};
+            size_t alignedSize{static_cast<size_t>(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)};
+
+            alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
+            mirror = alignedMirror.subspan(static_cast<size_t>(mapping.data() - alignedData), mapping.size());
+        } else {
+            std::vector<span<u8>> alignedMappings;
+
+            const auto &frontMapping{mappings.front()};
+            u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)};
+            alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData);
+
+            size_t totalSize{frontMapping.size()};
+            for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) {
+                auto mappingSize{it->size()};
+                alignedMappings.emplace_back(it->data(), mappingSize);
+                totalSize += mappingSize;
+            }
+
+            const auto &backMapping{mappings.back()};
+            totalSize += backMapping.size();
+            alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE));
+
+            alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings);
+            mirror = alignedMirror.subspan(static_cast<size_t>(frontMapping.data() - alignedData), totalSize);
+        }
+    }
+
+    Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) {
+        SetupGuestMappings();
        SynchronizeHost();
    }

+    Buffer::~Buffer() {
+        std::lock_guard lock(*this);
+        SynchronizeGuest(true);
+        if (alignedMirror.valid())
+            munmap(alignedMirror.data(), alignedMirror.size());
+    }
+
    void Buffer::WaitOnFence() {
        TRACE_EVENT("gpu", "Buffer::WaitOnFence");

@ -89,6 +131,10 @@ namespace skyline::gpu {
        cycle = pCycle;
    }

+    void Buffer::Write(span<u8> data, vk::DeviceSize offset) {
+        std::memcpy(mirror.data() + offset, data.data(), data.size());
+    }
+
    std::shared_ptr<BufferView> Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) {
        for (const auto &viewWeak : views) {
            auto view{viewWeak.lock()};
--- a/app/src/main/cpp/skyline/gpu/buffer.h
+++ b/app/src/main/cpp/skyline/gpu/buffer.h
@ -28,16 +28,24 @@ namespace skyline::gpu {
     */
    class Buffer : public std::enable_shared_from_this<Buffer>, public FenceCycleDependency {
      private:
+        GPU &gpu;
        std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing
        vk::DeviceSize size;
        memory::Buffer backing;
        GuestBuffer guest;

+        span<u8> mirror{}; //!< A contiguous mirror of all the guest mappings to allow linear access on the CPU
+        span<u8> alignedMirror{}; //!< The mirror mapping aligned to page size to reflect the full mapping
        std::vector<std::weak_ptr<BufferView>> views; //!< BufferView(s) that are backed by this Buffer, used for repointing to a new Buffer on deletion

        friend BufferView;
        friend BufferManager;

+        /**
+         * @brief Sets up mirror mappings for the guest mappings
+         */
+        void SetupGuestMappings();
+
      public:
        std::weak_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the buffer has completed, it must be waited on prior to any mutations to the backing

@ -47,8 +55,10 @@ namespace skyline::gpu {

        Buffer(GPU &gpu, GuestBuffer guest);

+        ~Buffer();
+
        /**
-         * @brief Acquires an exclusive lock on the texture for the calling thread
+         * @brief Acquires an exclusive lock on the buffer for the calling thread
         * @note Naming is in accordance to the BasicLockable named requirement
         */
        void lock() {
@ -56,7 +66,7 @@ namespace skyline::gpu {
        }

        /**
-         * @brief Relinquishes an existing lock on the texture by the calling thread
+         * @brief Relinquishes an existing lock on the buffer by the calling thread
         * @note Naming is in accordance to the BasicLockable named requirement
         */
        void unlock() {
@ -103,6 +113,11 @@ namespace skyline::gpu {
         */
        void SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &cycle);

+        /**
+         * @brief Writes data at the specified offset in the buffer
+         */
+        void Write(span<u8> data, vk::DeviceSize offset);
+
        /**
         * @return A cached or newly created view into this buffer with the supplied attributes
         */
@ -121,7 +136,7 @@ namespace skyline::gpu {
        vk::Format format;

        /**
-         * @note A view must **NOT** be constructed directly, it should always be retrieved using Texture::GetView
+         * @note A view must **NOT** be constructed directly, it should always be retrieved using Buffer::GetView
         */
        BufferView(std::shared_ptr<Buffer> backing, vk::DeviceSize offset, vk::DeviceSize range, vk::Format format);

--- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
@ -582,7 +582,7 @@ namespace skyline::gpu::interconnect {
            T Read(size_t offset) {
                T object;
                size_t objectOffset{};
-                for (auto &mapping: guest.mappings) {
+                for (auto &mapping : guest.mappings) {
                    if (offset < mapping.size_bytes()) {
                        auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))};
                        std::memcpy(reinterpret_cast<u8 *>(&object) + objectOffset, mapping.data() + offset, copySize);
@ -602,21 +602,9 @@ namespace skyline::gpu::interconnect {
             * @note This must only be called when the GuestBuffer is resolved correctly
             */
            template<typename T>
-            void Write(const T &object, size_t offset) {
-                size_t objectOffset{};
-                for (auto &mapping: guest.mappings) {
-                    if (offset < mapping.size_bytes()) {
-                        auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))};
-                        std::memcpy(mapping.data() + offset, reinterpret_cast<const u8 *>(&object) + objectOffset, copySize);
-                        objectOffset += copySize;
-                        if (objectOffset == sizeof(T))
-                            return;
-                        offset = mapping.size_bytes();
-                    } else {
-                        offset -= mapping.size_bytes();
-                    }
-                }
-                throw exception("Object extent ({} + {} = {}) is larger than constant buffer size: {}", size + offset, sizeof(T), size + offset + sizeof(T), size);
+            void Write(T &object, size_t offset) {
+                std::lock_guard lock{*view};
+                view->buffer->Write(span<T>(object).template cast<u8>(), view->offset + offset);
            }
        };
        ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it
--- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp
+++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp
@ -2,6 +2,7 @@
 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)

 #include <gpu.h>
+#include <kernel/memory.h>
 #include <common/trace.h>
 #include <kernel/types/KProcess.h>
 #include "texture.h"
@ -84,15 +85,45 @@ namespace skyline::gpu {
        }
    }

+    void Texture::SetupGuestMappings() {
+        auto &mappings{guest->mappings};
+        if (mappings.size() == 1) {
+            auto mapping{mappings.front()};
+            u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)};
+            size_t alignedSize{static_cast<size_t>(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)};
+
+            alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
+            mirror = alignedMirror.subspan(static_cast<size_t>(mapping.data() - alignedData), mapping.size());
+        } else {
+            std::vector<span<u8>> alignedMappings;
+
+            const auto &frontMapping{mappings.front()};
+            u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)};
+            alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData);
+
+            size_t totalSize{frontMapping.size()};
+            for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) {
+                auto mappingSize{it->size()};
+                alignedMappings.emplace_back(it->data(), mappingSize);
+                totalSize += mappingSize;
+            }
+
+            const auto &backMapping{mappings.back()};
+            totalSize += backMapping.size();
+            alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE));
+
+            alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings);
+            mirror = alignedMirror.subspan(static_cast<size_t>(frontMapping.data() - alignedData), totalSize);
+        }
+    }
+
    std::shared_ptr<memory::StagingBuffer> Texture::SynchronizeHostImpl(const std::shared_ptr<FenceCycle> &pCycle) {
        if (!guest)
            throw exception("Synchronization of host textures requires a valid guest texture to synchronize from");
        else if (guest->dimensions != dimensions)
            throw exception("Guest and host dimensions being different is not supported currently");
-        else if (guest->mappings.size() > 1)
-            throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());

-        auto pointer{guest->mappings[0].data()};
+        auto pointer{mirror.data()};
        auto size{format->GetSize(dimensions)};

        WaitOnBacking();
@ -218,7 +249,7 @@ namespace skyline::gpu {
    }

    void Texture::CopyToGuest(u8 *hostBuffer) {
-        auto guestOutput{guest->mappings[0].data()};
+        auto guestOutput{mirror.data()};

        if (guest->tileConfig.mode == texture::TileMode::Block)
            texture::CopyLinearToBlockLinear(*guest, hostBuffer, guestOutput);
@ -246,6 +277,7 @@ namespace skyline::gpu {
          mipLevels(mipLevels),
          layerCount(layerCount),
          sampleCount(sampleCount) {
+        SetupGuestMappings();
        if (GetBacking())
            SynchronizeHost();
    }
@ -293,6 +325,7 @@ namespace skyline::gpu {
        };
        backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo);
        TransitionLayout(vk::ImageLayout::eGeneral);
+        SetupGuestMappings();
    }

    Texture::Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount)
@ -323,6 +356,13 @@ namespace skyline::gpu {
            TransitionLayout(initialLayout);
    }

+    Texture::~Texture() {
+        std::lock_guard lock(*this);
+        SynchronizeGuest(true);
+        if (alignedMirror.valid())
+            munmap(alignedMirror.data(), alignedMirror.size());
+    }
+
    bool Texture::WaitOnBacking() {
        TRACE_EVENT("gpu", "Texture::WaitOnBacking");

@ -409,8 +449,6 @@ namespace skyline::gpu {
            throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to");
        else if (layout == vk::ImageLayout::eUndefined)
            return; // If the state of the host texture is undefined then so can the guest
-        else if (guest->mappings.size() > 1)
-            throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());

        TRACE_EVENT("gpu", "Texture::SynchronizeGuest");

@ -442,8 +480,6 @@ namespace skyline::gpu {
            throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to");
        else if (layout == vk::ImageLayout::eUndefined)
            return; // If the state of the host texture is undefined then so can the guest
-        else if (guest->mappings.size() > 1)
-            throw exception("Synchronizing textures across {} mappings is not supported", guest->mappings.size());

        TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer");

@ -570,8 +606,4 @@ namespace skyline::gpu {
        lCycle->AttachObjects(std::move(source), shared_from_this());
        cycle = lCycle;
    }
-
-    Texture::~Texture() {
-        WaitOnFence();
-    }
 }
--- a/app/src/main/cpp/skyline/gpu/texture/texture.h
+++ b/app/src/main/cpp/skyline/gpu/texture/texture.h
@ -214,7 +214,7 @@ namespace skyline::gpu {
     * @brief A descriptor for a texture present in guest memory, it can be used to create a corresponding Texture object for usage on the host
     */
    struct GuestTexture {
-        using Mappings = boost::container::small_vector<span < u8>, 3>;
+        using Mappings = boost::container::small_vector<span<u8>, 3>;

        Mappings mappings; //!< Spans to CPU memory for the underlying data backing this texture
        texture::Dimensions dimensions{};
@ -319,11 +319,18 @@ namespace skyline::gpu {
        using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
        BackingType backing; //!< The Vulkan image that backs this texture, it is nullable

+        span<u8> mirror{}; //!< A contiguous mirror of all the guest mappings to allow linear access on the CPU
+        span<u8> alignedMirror{}; //!< The mirror mapping aligned to page size to reflect the full mapping
        std::vector<std::weak_ptr<TextureView>> views; //!< TextureView(s) that are backed by this Texture, used for repointing to a new Texture on deletion

        friend TextureManager;
        friend TextureView;

+        /**
+         * @brief Sets up mirror mappings for the guest mappings
+         */
+        void SetupGuestMappings();
+
        /**
         * @brief An implementation function for guest -> host texture synchronization, it allocates and copies data into a staging buffer or directly into a linear host texture
         * @return If a staging buffer was required for the texture sync, it's returned filled with guest texture data and must be copied to the host texture by the callee