Implement Array Texture Swizzling

Textures can have more than one layer which we currently don't handle, all layers past the initial one will be filled with random data or 0s, leading to incorrect rendering. This has now been implemented now which fixes any titles which utilize array textures, such as "Super Mario Odyssey" or "Hatsune Miku: Project DIVA MegaMix".
2024-11-30 03:04:17 +01:00 · 2022-05-12 18:09:41 +05:30 · 2022-05-12 18:09:41 +05:30 · f2cc25ee9f
commit f2cc25ee9f
parent 2a99e1784d
5 changed files with 53 additions and 32 deletions
--- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
@ -113,6 +113,7 @@ namespace skyline::gpu::interconnect {
            bool disabled{true}; //!< If this RT has been disabled and will be an unbound attachment instead
            IOVA iova{};
            u32 widthBytes{}; //!< The width in bytes for linear textures
+            u32 layerStride{}; //!< The stride of a single layer in bytes
            bool is3d{}; //!< If the RT is 3D, this controls if the RT is 3D or layered
            GuestTexture guest{};
            std::shared_ptr<TextureView> view{};
@ -370,7 +371,7 @@ namespace skyline::gpu::interconnect {
        }

        void SetRenderTargetLayerStride(RenderTarget &renderTarget, u32 layerStrideLsr2) {
-            renderTarget.guest.layerStride = layerStrideLsr2 << 2;
+            renderTarget.layerStride = layerStrideLsr2 << 2;
            renderTarget.view.reset();
        }

@ -397,8 +398,13 @@ namespace skyline::gpu::interconnect {
            else if (renderTarget.view)
                return &*renderTarget.view;

+            if (renderTarget.guest.baseArrayLayer > 0 || renderTarget.guest.layerCount > 1)
+                renderTarget.guest.layerStride = renderTarget.layerStride; // Games can supply a layer stride that may include intentional padding which can contain additional mip layers
+            else
+                renderTarget.guest.layerStride = 0; // We want to explicitly reset the stride to 0 for non-array textures
+
            if (renderTarget.guest.mappings.empty()) {
-                size_t layerStride{renderTarget.guest.GetLayerSize()};
+                size_t layerStride{renderTarget.guest.GetLayerStride()};
                size_t size{layerStride * (renderTarget.guest.layerCount - renderTarget.guest.baseArrayLayer)};
                auto mappings{channelCtx.asCtx->gmmu.TranslateRange(renderTarget.iova, size)};
                renderTarget.guest.mappings.assign(mappings.begin(), mappings.end());
@ -2323,7 +2329,7 @@ namespace skyline::gpu::interconnect {
                    throw exception("Unsupported TIC Header Type: {}", static_cast<u32>(textureControl.headerType));
                }

-                auto mappings{channelCtx.asCtx->gmmu.TranslateRange(textureControl.Iova(), guest.GetLayerSize() * (guest.layerCount - guest.baseArrayLayer))};
+                auto mappings{channelCtx.asCtx->gmmu.TranslateRange(textureControl.Iova(), guest.GetLayerStride() * (guest.layerCount - guest.baseArrayLayer))};
                guest.mappings.assign(mappings.begin(), mappings.end());
            } else if (auto textureView{poolTexture.view.lock()}; textureView != nullptr) {
                // If the entry already exists and the view is still valid then we return it directly
--- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp
+++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp
@ -10,7 +10,10 @@
 #include "adreno_aliasing.h"

 namespace skyline::gpu {
-    u32 GuestTexture::GetLayerSize() {
+    u32 GuestTexture::GetLayerStride() {
+        if (layerStride)
+            return layerStride;
+
        switch (tileConfig.mode) {
            case texture::TileMode::Linear:
                return layerStride = static_cast<u32>(format->GetSize(dimensions));
@ -134,7 +137,7 @@ namespace skyline::gpu {
            throw exception("Guest and host dimensions being different is not supported currently");

        auto pointer{mirror.data()};
-        auto size{format->GetSize(dimensions) * layerCount};
+        auto size{layerStride * layerCount};

        WaitOnBacking();

@ -158,12 +161,16 @@ namespace skyline::gpu {
            }
        }()};

-        if (guest->tileConfig.mode == texture::TileMode::Block)
-            texture::CopyBlockLinearToLinear(*guest, pointer, bufferData);
-        else if (guest->tileConfig.mode == texture::TileMode::Pitch)
-            texture::CopyPitchLinearToLinear(*guest, pointer, bufferData);
-        else if (guest->tileConfig.mode == texture::TileMode::Linear)
-            std::memcpy(bufferData, pointer, size);
+        for (size_t layer{}; layer < layerCount; ++layer) {
+            if (guest->tileConfig.mode == texture::TileMode::Block)
+                texture::CopyBlockLinearToLinear(*guest, pointer, bufferData);
+            else if (guest->tileConfig.mode == texture::TileMode::Pitch)
+                texture::CopyPitchLinearToLinear(*guest, pointer, bufferData);
+            else if (guest->tileConfig.mode == texture::TileMode::Linear)
+                std::memcpy(bufferData, pointer, size);
+            pointer += guest->GetLayerStride();
+            bufferData += layerStride;
+        }

        if (stagingBuffer && cycle.lock() != pCycle)
            WaitOnFence();
@ -263,12 +270,16 @@ namespace skyline::gpu {
    void Texture::CopyToGuest(u8 *hostBuffer) {
        auto guestOutput{mirror.data()};

-        if (guest->tileConfig.mode == texture::TileMode::Block)
-            texture::CopyLinearToBlockLinear(*guest, hostBuffer, guestOutput);
-        else if (guest->tileConfig.mode == texture::TileMode::Pitch)
-            texture::CopyLinearToPitchLinear(*guest, hostBuffer, guestOutput);
-        else if (guest->tileConfig.mode == texture::TileMode::Linear)
-            std::memcpy(hostBuffer, guestOutput, format->GetSize(dimensions));
+        for (size_t layer{}; layer < layerCount; ++layer) {
+            if (guest->tileConfig.mode == texture::TileMode::Block)
+                texture::CopyLinearToBlockLinear(*guest, hostBuffer, guestOutput);
+            else if (guest->tileConfig.mode == texture::TileMode::Pitch)
+                texture::CopyLinearToPitchLinear(*guest, hostBuffer, guestOutput);
+            else if (guest->tileConfig.mode == texture::TileMode::Linear)
+                std::memcpy(hostBuffer, guestOutput, layerStride);
+            guestOutput += guest->layerStride;
+            hostBuffer += layerStride;
+        }
    }

    Texture::TextureBufferCopy::TextureBufferCopy(std::shared_ptr<Texture> texture, std::shared_ptr<memory::StagingBuffer> stagingBuffer) : texture(std::move(texture)), stagingBuffer(std::move(stagingBuffer)) {}
@ -289,6 +300,7 @@ namespace skyline::gpu {
          usage(usage),
          mipLevels(mipLevels),
          layerCount(layerCount),
+          layerStride(static_cast<u32>(format->GetSize(dimensions))),
          sampleCount(sampleCount) {}

    Texture::Texture(GPU &pGpu, GuestTexture pGuest)
@ -300,6 +312,7 @@ namespace skyline::gpu {
          tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization
          mipLevels(1),
          layerCount(guest->layerCount),
+          layerStride(static_cast<u32>(format->GetSize(dimensions))),
          sampleCount(vk::SampleCountFlagBits::e1),
          flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat),
          usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) {
@ -485,7 +498,7 @@ namespace skyline::gpu {
        WaitOnFence();

        if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
-            auto size{format->GetSize(dimensions) * layerCount};
+            auto size{layerStride * layerCount};
            auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};

            auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
@ -519,7 +532,7 @@ namespace skyline::gpu {
            WaitOnFence();

        if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
-            auto size{format->GetSize(dimensions) * layerCount};
+            auto size{layerStride * layerCount};
            auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};

            CopyIntoStagingBuffer(commandBuffer, stagingBuffer);
--- a/app/src/main/cpp/skyline/gpu/texture/texture.h
+++ b/app/src/main/cpp/skyline/gpu/texture/texture.h
@ -238,15 +238,15 @@ namespace skyline::gpu {
        texture::Format format{};
        texture::TileConfig tileConfig{};
        texture::TextureType type{};
-        u16 baseArrayLayer{};
-        u16 layerCount{};
-        u32 layerStride{}; //!< An optional hint regarding the size of a single layer, it will be set to 0 when not available, GetLayerSize() should be used to retrieve this value
+        u32 baseArrayLayer{};
+        u32 layerCount{};
+        u32 layerStride{}; //!< An optional hint regarding the size of a single layer, it **should** be set to 0 when not available and should never be a non-0 value that doesn't reflect the correct layer stride
        vk::ComponentMapping swizzle{}; //!< Component swizzle derived from format requirements and the guest supplied swizzle
        vk::ImageAspectFlags aspect{};

        GuestTexture() {}

-        GuestTexture(Mappings mappings, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0)
+        GuestTexture(Mappings mappings, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u32 baseArrayLayer = 0, u32 layerCount = 1, u32 layerStride = 0)
            : mappings(mappings),
              dimensions(dimensions),
              format(format),
@ -257,7 +257,7 @@ namespace skyline::gpu {
              layerStride(layerStride),
              aspect(format->vkAspect) {}

-        GuestTexture(span <u8> mapping, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0)
+        GuestTexture(span<u8> mapping, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u32 baseArrayLayer = 0, u32 layerCount = 1, u32 layerStride = 0)
            : mappings(1, mapping),
              dimensions(dimensions),
              format(format),
@ -269,10 +269,11 @@ namespace skyline::gpu {
              aspect(format->vkAspect) {}

        /**
+         * @note This should be used over accessing the `layerStride` member directly when desiring the actual layer stride for calculations as it will automatically handle it not being filled in
         * @note Requires `dimensions`, `format` and `tileConfig` to be filled in
-         * @return The size of a single layer with alignment in bytes
+         * @return The size of a single layer with layout alignment in bytes
         */
-        u32 GetLayerSize();
+        u32 GetLayerStride();
    };

    class TextureManager;
@ -417,6 +418,7 @@ namespace skyline::gpu {
        vk::ImageUsageFlags usage;
        u32 mipLevels;
        u32 layerCount; //!< The amount of array layers in the image, utilized for efficient binding (Not to be confused with the depth or faces in a cubemap)
+        u32 layerStride; //!< The stride of a single array layer given linear tiling
        vk::SampleCountFlagBits sampleCount;

        /**
--- a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp
+++ b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp
@ -345,7 +345,7 @@ namespace skyline::service::hosbinder {

            gpu::texture::Dimensions dimensions(surface.width, surface.height);
            gpu::GuestTexture guestTexture(span<u8>{}, dimensions, format, tileConfig, gpu::texture::TextureType::e2D);
-            guestTexture.mappings[0] = span<u8>(nvMapHandleObj->GetPointer() + surface.offset, guestTexture.GetLayerSize());
+            guestTexture.mappings[0] = span<u8>(nvMapHandleObj->GetPointer() + surface.offset, guestTexture.GetLayerStride());
            buffer.texture = state.gpu->texture.FindOrCreate(guestTexture)->texture;
        }

--- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp
@ -86,7 +86,7 @@ namespace skyline::soc::gm20b::engine {
                                     gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
                                     gpu::texture::TextureType::e2D};

-        if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerSize())}; mappings.size() == 1) {
+        if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerStride())}; mappings.size() == 1) {
            srcTexture.mappings[0] = mappings[0];
        } else {
            Logger::Warn("DMA for split textures is unimplemented!");
@ -100,8 +100,8 @@ namespace skyline::soc::gm20b::engine {
                                     gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 },
                                     gpu::texture::TextureType::e2D};

-        u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerSize() * registers.dstSurface->layer};
-        if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerSize())}; mappings.size() == 1) {
+        u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerStride() * registers.dstSurface->layer};
+        if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerStride())}; mappings.size() == 1) {
            dstTexture.mappings[0] = mappings[0];
        } else {
            Logger::Warn("DMA for split textures is unimplemented!");
@ -145,7 +145,7 @@ namespace skyline::soc::gm20b::engine {
                                     gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.srcSurface->blockSize.Height(), .blockDepth = 1 },
                                     gpu::texture::TextureType::e2D};

-        if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerSize())}; mappings.size() == 1) {
+        if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerStride())}; mappings.size() == 1) {
            srcTexture.mappings[0] = mappings[0];
        } else {
            Logger::Warn("DMA for split textures is unimplemented!");
@ -158,7 +158,7 @@ namespace skyline::soc::gm20b::engine {
                                     gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
                                     gpu::texture::TextureType::e2D};

-        if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, dstTexture.GetLayerSize())}; mappings.size() == 1) {
+        if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, dstTexture.GetLayerStride())}; mappings.size() == 1) {
            dstTexture.mappings[0] = mappings[0];
        } else {
            Logger::Warn("DMA for split textures is unimplemented!");