From 06053d3caf201d9d1a3c8922ffc4378729c36862 Mon Sep 17 00:00:00 2001
From: Billy Laws <blaws05@gmail.com>
Date: Sun, 31 Jul 2022 15:05:51 +0100
Subject: [PATCH] Rewrite Fermi 2D engine to use the blit helper shader

Entirely rewrites the engine and interconnect code to take advantage of the subpixel and OOB blit support offered by the blit helper shader. The interconnect code is also cleaned up significantly with the 'context' naming being dropped due to potential conflicts with the 'context' from context lock
---
 .../skyline/gpu/interconnect/blit_context.h   | 155 ------------------
 .../cpp/skyline/gpu/interconnect/fermi_2d.cpp | 148 +++++++++++++++++
 .../cpp/skyline/gpu/interconnect/fermi_2d.h   |  41 +++++
 .../main/cpp/skyline/soc/gm20b/channel.cpp    |   2 +-
 app/src/main/cpp/skyline/soc/gm20b/channel.h  |   7 +-
 .../skyline/soc/gm20b/engines/fermi_2d.cpp    |  40 +++--
 .../cpp/skyline/soc/gm20b/engines/fermi_2d.h  |  16 +-
 app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp |   4 +-
 8 files changed, 221 insertions(+), 192 deletions(-)
 delete mode 100644 app/src/main/cpp/skyline/gpu/interconnect/blit_context.h
 create mode 100644 app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp
 create mode 100644 app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.h

diff --git a/app/src/main/cpp/skyline/gpu/interconnect/blit_context.h b/app/src/main/cpp/skyline/gpu/interconnect/blit_context.h
deleted file mode 100644
index 80f6157f..00000000
--- a/app/src/main/cpp/skyline/gpu/interconnect/blit_context.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// SPDX-License-Identifier: MPL-2.0
-// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
-// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
-
-#pragma once
-
-#include <gpu/texture/format.h>
-#include <gpu/texture/texture.h>
-#include <gpu/texture_manager.h>
-#include <gpu/buffer.h>
-#include <soc/gm20b/gmmu.h>
-#include <soc/gm20b/channel.h>
-#include <soc/gm20b/engines/fermi/types.h>
-
-namespace skyline::gpu::interconnect {
-    using IOVA = soc::gm20b::IOVA;
-    namespace fermi2d = skyline::soc::gm20b::engine::fermi2d::type;
-
-    /**
-     * @brief Handles translating Fermi 2D engine blit operations to Vulkan
-     */
-    class BlitContext {
-      private:
-        GPU &gpu;
-        soc::gm20b::ChannelContext &channelCtx;
-        gpu::interconnect::CommandExecutor &executor;
-
-        gpu::GuestTexture GetGuestTexture(const fermi2d::Surface &surface) {
-            auto determineFormat = [&](fermi2d::Surface::SurfaceFormat format) -> skyline::gpu::texture::Format {
-                #define FORMAT_CASE(fermiFmt, skFmt, fmtType) \
-                    case fermi2d::Surface::SurfaceFormat::fermiFmt ## fmtType: \
-                        return skyline::gpu::format::skFmt ## fmtType
-
-                #define FORMAT_SAME_CASE(fmt, type) FORMAT_CASE(fmt, fmt, type)
-
-                #define FORMAT_NORM_CASE(fermiFmt, skFmt) \
-                    FORMAT_CASE(fermiFmt, skFmt, Snorm); \
-                    FORMAT_CASE(fermiFmt, skFmt, Unorm)
-
-                #define FORMAT_SAME_NORM_CASE(fmt) FORMAT_NORM_CASE(fmt, fmt)
-
-                #define FORMAT_NORM_FLOAT_CASE(fermiFmt, skFmt) \
-                    FORMAT_NORM_CASE(fermiFmt, skFmt); \
-                    FORMAT_CASE(fermiFmt, skFmt, Float)
-
-                #define FORMAT_SAME_NORM_FLOAT_CASE(fmt) FORMAT_NORM_FLOAT_CASE(fmt, fmt)
-
-                switch (format) {
-                    FORMAT_SAME_NORM_CASE(R8);
-                    FORMAT_SAME_NORM_FLOAT_CASE(R16);
-                    FORMAT_SAME_NORM_CASE(R8G8);
-                    FORMAT_SAME_CASE(B5G6R5, Unorm);
-                    FORMAT_SAME_CASE(B5G5R5A1, Unorm);
-                    FORMAT_SAME_CASE(R32, Float);
-                    FORMAT_SAME_CASE(B10G11R11, Float);
-                    FORMAT_SAME_NORM_FLOAT_CASE(R16G16);
-                    FORMAT_SAME_CASE(R8G8B8A8, Unorm);
-                    FORMAT_SAME_CASE(R8G8B8A8, Srgb);
-                    FORMAT_NORM_CASE(R8G8B8X8, R8G8B8A8);
-                    FORMAT_CASE(R8G8B8X8, R8G8B8A8, Srgb);
-                    FORMAT_SAME_CASE(B8G8R8A8, Unorm);
-                    FORMAT_SAME_CASE(B8G8R8A8, Srgb);
-                    FORMAT_SAME_CASE(A2B10G10R10, Unorm);
-                    FORMAT_SAME_CASE(R32G32, Float);
-                    FORMAT_SAME_CASE(R16G16B16A16, Float);
-                    FORMAT_NORM_FLOAT_CASE(R16G16B16X16, R16G16B16A16);
-                    FORMAT_SAME_CASE(R32G32B32A32, Float);
-                    FORMAT_CASE(R32G32B32X32, R32G32B32A32, Float);
-
-                    default:
-                        throw exception("Cannot translate the supplied surface format: 0x{:X}", static_cast<u32>(format));
-                }
-
-                #undef FORMAT_CASE
-                #undef FORMAT_SAME_CASE
-                #undef FORMAT_NORM_CASE
-                #undef FORMAT_SAME_NORM_CASE
-                #undef FORMAT_NORM_FLOAT_CASE
-                #undef FORMAT_SAME_NORM_FLOAT_CASE
-            };
-
-            GuestTexture texture{};
-
-            texture.format = determineFormat(surface.format);
-            texture.aspect = texture.format->vkAspect;
-            texture.baseArrayLayer = 0;
-            texture.layerCount = 1;
-            texture.viewType = vk::ImageViewType::e2D;
-
-            if (surface.memoryLayout == fermi2d::MemoryLayout::Pitch) {
-                texture.dimensions = gpu::texture::Dimensions{surface.stride / texture.format->bpb, surface.height, 1};
-                texture.tileConfig = gpu::texture::TileConfig{
-                    .mode = gpu::texture::TileMode::Pitch,
-                    .pitch = surface.stride
-                };
-            } else {
-                texture.dimensions = gpu::texture::Dimensions{surface.width, surface.height, surface.depth};
-                texture.tileConfig = gpu::texture::TileConfig{
-                    .mode = gpu::texture::TileMode::Block,
-                    .blockHeight = surface.blockSize.Height(),
-                    .blockDepth = surface.blockSize.Depth(),
-                };
-            }
-
-            IOVA iova{surface.address};
-            auto mappings{channelCtx.asCtx->gmmu.TranslateRange(iova, texture.GetSize())};
-            texture.mappings.assign(mappings.begin(), mappings.end());
-
-            return texture;
-        }
-
-      public:
-        BlitContext(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor) {}
-
-        void Blit(const fermi2d::Surface &srcSurface, const fermi2d::Surface &dstSurface, i32 srcX, i32 srcY, i32 srcWidth, i32 srcHeight, i32 dstX, i32 dstY, i32 dstWidth, i32 dstHeight, bool resolve, bool linearFilter) {
-            // TODO: OOB blit: https://github.com/Ryujinx/Ryujinx/blob/master/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs#L287
-            // TODO: When we support MSAA perform a resolve operation rather than blit when the `resolve` flag is set.
-            auto srcGuestTexture{GetGuestTexture(srcSurface)};
-            auto dstGuestTexture{GetGuestTexture(dstSurface)};
-
-            auto &textureManager{executor.AcquireTextureManager()};
-            auto srcTextureView{textureManager.FindOrCreate(srcGuestTexture, executor.tag)};
-            executor.AttachTexture(srcTextureView.get());
-
-            auto dstTextureView{textureManager.FindOrCreate(dstGuestTexture, executor.tag)};
-            executor.AttachTexture(dstTextureView.get());
-
-            auto getSubresourceLayers{[](const vk::ImageSubresourceRange &range, vk::ImageAspectFlags aspect) {
-                return vk::ImageSubresourceLayers{
-                    .aspectMask = aspect,
-                    .mipLevel = 0, // Blit engine only does one layer/mip level at a time
-                    .layerCount = 1,
-                    .baseArrayLayer = range.baseArrayLayer
-                };
-            }};
-
-            vk::ImageBlit region{
-                .srcSubresource = getSubresourceLayers(srcTextureView->range, srcTextureView->format->vkAspect),
-                .dstSubresource = getSubresourceLayers(dstTextureView->range, srcTextureView->range.aspectMask),
-                .srcOffsets = {{vk::Offset3D{srcX, srcY, 0}, vk::Offset3D{srcX + srcWidth, srcY + srcHeight, 1}}},
-                .dstOffsets = {{vk::Offset3D{dstX, dstY, 0}, vk::Offset3D{dstX + dstWidth, dstY + dstHeight, 1}}}
-            };
-
-            executor.AddOutsideRpCommand([region, srcTextureView, dstTextureView, linearFilter](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &) {
-                auto blitSrcImage{srcTextureView->texture->GetBacking()};
-                auto blitDstImage{dstTextureView->texture->GetBacking()};
-
-                commandBuffer.blitImage(blitSrcImage, vk::ImageLayout::eGeneral,
-                                        blitDstImage, vk::ImageLayout::eGeneral,
-                                        region,
-                                        linearFilter ? vk::Filter::eLinear : vk::Filter::eNearest);
-            });
-        }
-    };
-}
diff --git a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp
new file mode 100644
index 00000000..8fac1a4f
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
+// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#include <gpu/texture/format.h>
+#include <gpu/texture/texture.h>
+#include <gpu/texture_manager.h>
+#include <soc/gm20b/gmmu.h>
+#include <soc/gm20b/channel.h>
+#include "fermi_2d.h"
+
+namespace skyline::gpu::interconnect {
+    using IOVA = soc::gm20b::IOVA;
+    using MemoryLayout = skyline::soc::gm20b::engine::fermi2d::type::MemoryLayout;
+
+    gpu::GuestTexture Fermi2D::GetGuestTexture(const Surface &surface) {
+        auto determineFormat = [&](Surface::SurfaceFormat format) -> skyline::gpu::texture::Format {
+            #define FORMAT_CASE(fermiFmt, skFmt, fmtType) \
+                case Surface::SurfaceFormat::fermiFmt ## fmtType: \
+                    return skyline::gpu::format::skFmt ## fmtType
+
+            #define FORMAT_SAME_CASE(fmt, type) FORMAT_CASE(fmt, fmt, type)
+
+            switch (format) {
+                FORMAT_SAME_CASE(R8, Unorm);
+                FORMAT_SAME_CASE(R8, Snorm);
+
+                FORMAT_SAME_CASE(R16, Unorm);
+                FORMAT_SAME_CASE(R16, Snorm);
+                FORMAT_SAME_CASE(R16, Float);
+
+                FORMAT_SAME_CASE(R8G8, Unorm);
+                FORMAT_SAME_CASE(R8G8, Snorm);
+
+                FORMAT_SAME_CASE(B5G6R5, Unorm);
+
+                FORMAT_SAME_CASE(B5G5R5A1, Unorm);
+
+                FORMAT_SAME_CASE(R32, Float);
+
+                FORMAT_SAME_CASE(B10G11R11, Float);
+
+                FORMAT_SAME_CASE(R16G16, Unorm);
+                FORMAT_SAME_CASE(R16G16, Snorm);
+                FORMAT_SAME_CASE(R16G16, Float);
+
+                FORMAT_SAME_CASE(R8G8B8A8, Unorm);
+                FORMAT_SAME_CASE(R8G8B8A8, Srgb);
+
+                FORMAT_CASE(R8G8B8X8, R8G8B8A8, Unorm);
+                FORMAT_CASE(R8G8B8X8, R8G8B8A8, Snorm);
+                FORMAT_CASE(R8G8B8X8, R8G8B8A8, Srgb);
+
+                FORMAT_SAME_CASE(B8G8R8A8, Unorm);
+                FORMAT_SAME_CASE(B8G8R8A8, Srgb);
+
+                FORMAT_SAME_CASE(A2B10G10R10, Unorm);
+
+                FORMAT_SAME_CASE(R32G32, Float);
+
+                FORMAT_SAME_CASE(R16G16B16A16, Float);
+
+                FORMAT_CASE(R16G16B16X16, R16G16B16A16, Unorm);
+                FORMAT_CASE(R16G16B16X16, R16G16B16A16, Snorm);
+                FORMAT_CASE(R16G16B16X16, R16G16B16A16, Float);
+
+                FORMAT_SAME_CASE(R32G32B32A32, Float);
+
+                FORMAT_CASE(R32G32B32X32, R32G32B32A32, Float);
+
+                default:
+                    throw exception("Cannot translate the supplied surface format: 0x{:X}", static_cast<u32>(format));
+            }
+
+            #undef FORMAT_CASE
+            #undef FORMAT_SAME_CASE
+        };
+
+        GuestTexture texture{};
+
+        texture.format = determineFormat(surface.format);
+        texture.aspect = texture.format->vkAspect;
+        texture.baseArrayLayer = 0;
+        texture.layerCount = 1;
+        texture.viewType = vk::ImageViewType::e2D;
+
+        if (surface.memoryLayout == MemoryLayout::Pitch) {
+            texture.dimensions = gpu::texture::Dimensions{surface.stride / texture.format->bpb, surface.height, 1};
+            texture.tileConfig = gpu::texture::TileConfig{
+                .mode = gpu::texture::TileMode::Pitch,
+                .pitch = surface.stride
+            };
+        } else {
+            texture.dimensions = gpu::texture::Dimensions{surface.width, surface.height, surface.depth};
+            texture.tileConfig = gpu::texture::TileConfig{
+                .mode = gpu::texture::TileMode::Block,
+                .blockHeight = surface.blockSize.Height(),
+                .blockDepth = surface.blockSize.Depth(),
+            };
+        }
+
+        IOVA iova{surface.address};
+        auto mappings{channelCtx.asCtx->gmmu.TranslateRange(iova, texture.GetSize())};
+        texture.mappings.assign(mappings.begin(), mappings.end());
+
+        return texture;
+    }
+
+    Fermi2D::Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor) {}
+
+    void Fermi2D::Blit(const Surface &srcSurface, const Surface &dstSurface, float srcRectX, float srcRectY, u32 dstRectWidth, u32 dstRectHeight, u32 dstRectX, u32 dstRectY, float duDx, float dvDy, bool resolve, bool bilinearFilter) {
+        // TODO: When we support MSAA perform a resolve operation rather than blit when the `resolve` flag is set.
+        auto srcGuestTexture{GetGuestTexture(srcSurface)};
+        auto dstGuestTexture{GetGuestTexture(dstSurface)};
+
+        auto &textureManager{executor.AcquireTextureManager()};
+        auto srcTextureView{textureManager.FindOrCreate(srcGuestTexture, executor.tag)};
+        executor.AttachTexture(srcTextureView.get());
+
+        auto dstTextureView{textureManager.FindOrCreate(dstGuestTexture, executor.tag)};
+        executor.AttachTexture(dstTextureView.get());
+
+        gpu.helperShaders.blitHelperShader.Blit(
+            gpu,
+            {
+                .width = duDx * dstRectWidth,
+                .height = dvDy * dstRectHeight,
+                .x = srcRectX,
+                .y = srcRectY,
+            },
+            {
+                .width = static_cast<float>(dstRectWidth),
+                .height = static_cast<float>(dstRectHeight),
+                .x = static_cast<float>(dstRectX),
+                .y = static_cast<float>(dstRectY),
+            },
+            srcGuestTexture.dimensions, dstGuestTexture.dimensions,
+            duDx, dvDy,
+            bilinearFilter,
+            srcTextureView.get(), dstTextureView.get(),
+            [=](auto &&executionCallback) {
+                auto dst{dstTextureView.get()};
+                executor.AddSubpass(std::move(executionCallback), {{static_cast<i32>(dstRectX), static_cast<i32>(dstRectY)}, {dstRectWidth, dstRectHeight} }, {}, {dst});
+            }
+        );
+    }
+
+}
diff --git a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.h b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.h
new file mode 100644
index 00000000..5dc82923
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
+// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <gpu/texture/texture.h>
+#include <soc/gm20b/gmmu.h>
+#include <soc/gm20b/engines/fermi/types.h>
+
+namespace skyline::gpu {
+    class GPU;
+}
+
+namespace skyline::soc::gm20b {
+    struct ChannelContext;
+}
+
+namespace skyline::gpu::interconnect {
+    class CommandExecutor;
+
+    /**
+     * @brief Handles translating Fermi 2D engine blit operations to Vulkan
+     */
+    class Fermi2D {
+      private:
+        using IOVA = soc::gm20b::IOVA;
+        using Surface = skyline::soc::gm20b::engine::fermi2d::type::Surface;
+
+        GPU &gpu;
+        soc::gm20b::ChannelContext &channelCtx;
+        gpu::interconnect::CommandExecutor &executor;
+
+        gpu::GuestTexture GetGuestTexture(const Surface &surface);
+
+      public:
+        Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor);
+
+        void Blit(const Surface &srcSurface, const Surface &dstSurface, float srcRectX, float srcRectY, u32 dstRectWidth, u32 dstRectHeight, u32 dstRectX, u32 dstRectY, float duDx, float dvDy, bool resolve, bool bilinearFilter);
+    };
+}
diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp
index f0b6b76e..745c267b 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp
@@ -10,7 +10,7 @@ namespace skyline::soc::gm20b {
         : asCtx(std::move(pAsCtx)),
           executor(state),
           maxwell3D(std::make_unique<engine::maxwell3d::Maxwell3D>(state, *this, macroState, executor)),
-          fermi2D(std::make_unique<engine::fermi2d::Fermi2D>(state, *this, macroState, executor)),
+          fermi2D(state, *this, macroState, executor),
           maxwellDma(state, *this, executor),
           keplerCompute(state, *this),
           inline2Memory(*this),
diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.h b/app/src/main/cpp/skyline/soc/gm20b/channel.h
index 700d2fdf..c11f96c4 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/channel.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/channel.h
@@ -6,6 +6,7 @@
 #include <gpu/interconnect/command_executor.h>
 #include "macro/macro_state.h"
 #include "engines/engine.h"
+#include "engines/fermi_2d.h"
 #include "engines/maxwell_dma.h"
 #include "engines/kepler_compute.h"
 #include "engines/inline2memory.h"
@@ -16,10 +17,6 @@ namespace skyline::soc::gm20b {
         class Maxwell3D;
     }
 
-    namespace engine::fermi2d {
-        class Fermi2D;
-    }
-
     struct AddressSpaceContext;
 
     /**
@@ -31,7 +28,7 @@ namespace skyline::soc::gm20b {
         gpu::interconnect::CommandExecutor executor;
         MacroState macroState;
         std::unique_ptr<engine::maxwell3d::Maxwell3D> maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file
-        std::unique_ptr<engine::fermi2d::Fermi2D> fermi2D;
+        engine::fermi2d::Fermi2D fermi2D;
         engine::MaxwellDma maxwellDma;
         engine::KeplerCompute keplerCompute;
         engine::Inline2Memory inline2Memory;
diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/fermi_2d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/fermi_2d.cpp
index d1e83a53..75c9b816 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/fermi_2d.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/fermi_2d.cpp
@@ -9,7 +9,7 @@ namespace skyline::soc::gm20b::engine::fermi2d {
     Fermi2D::Fermi2D(const DeviceState &state, ChannelContext &channelCtx, MacroState &macroState, gpu::interconnect::CommandExecutor &executor)
         : MacroEngineBase(macroState),
           syncpoints(state.soc->host1x.syncpoints),
-          context(*state.gpu, channelCtx, executor),
+          interconnect(*state.gpu, channelCtx, executor),
           channelCtx(channelCtx) {}
 
     void Fermi2D::HandleMethod(u32 method, u32 argument) {
@@ -28,14 +28,18 @@ namespace skyline::soc::gm20b::engine::fermi2d {
             if (pixelsFromMemory.safeOverlap)
                 Logger::Warn("Safe overlap is unimplemented!");
 
-            constexpr u32 FractionalComponentSize{32};
+            auto fixedToFloating{[](i64 value) {
+                constexpr u32 FractionalComponentSize{32};
+
+                return static_cast<float>(value) / (1ULL << FractionalComponentSize);
+            }};
 
             // The 2D engine supports subpixel blit precision in the lower 32 bits of the src{X,Y}0 registers for filtering, we can safely ignore this in most cases though since the host driver will handle this in its own way
-            i32 srcX{static_cast<i32>(pixelsFromMemory.srcX0 >> FractionalComponentSize)};
-            i32 srcY{static_cast<i32>(pixelsFromMemory.srcY0 >> FractionalComponentSize)};
+            float srcX{fixedToFloating(pixelsFromMemory.srcX)};
+            float srcY{fixedToFloating(pixelsFromMemory.srcY)};
 
-            i32 srcWidth{static_cast<i32>((pixelsFromMemory.duDx * pixelsFromMemory.dstWidth) >> FractionalComponentSize)};
-            i32 srcHeight{static_cast<i32>((pixelsFromMemory.dvDy * pixelsFromMemory.dstHeight) >> FractionalComponentSize)};
+            float duDx{fixedToFloating(pixelsFromMemory.duDx)};
+            float dvDy{fixedToFloating(pixelsFromMemory.dvDy)};
 
             if (registers.pixelsFromMemory->sampleMode.origin == Registers::PixelsFromMemory::SampleModeOrigin::Center) {
                 // This is an MSAA resolve operation, sampling from the center of each pixel in order to resolve the final image from the MSAA samples
@@ -48,23 +52,17 @@ namespace skyline::soc::gm20b::engine::fermi2d {
                 /* 123
                    456 */
 
-                // Since we don't implement MSAA, any image that is supposed to have MSAA applied when drawing is just stored in the corner without any pixel scaling, so adjust width/height appropriately
-                srcWidth = pixelsFromMemory.dstWidth;
-                srcHeight = pixelsFromMemory.dstHeight;
-            } else {
-                // This is a regular blit operation, scaling from one image to another
-                // https://github.com/Ryujinx/Ryujinx/blob/c9c65af59edea05e7206a076cb818128c004384e/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs#L253
-                srcX -= (pixelsFromMemory.duDx >> FractionalComponentSize) >> 1;
-                srcY -= (pixelsFromMemory.dvDy >> FractionalComponentSize) >> 1;
+                // Since we don't implement MSAA, we can avoid any scaling at all by setting using a scale factor of 1
+                duDx = dvDy = 1.0f;
             }
 
-            context.Blit(src, dst,
-                         srcX, srcY,
-                         srcWidth, srcHeight,
-                         pixelsFromMemory.dstX0, pixelsFromMemory.dstY0,
-                         pixelsFromMemory.dstWidth, pixelsFromMemory.dstHeight,
-                         registers.pixelsFromMemory->sampleMode.origin == Registers::PixelsFromMemory::SampleModeOrigin::Center,
-                         pixelsFromMemory.sampleMode.filter == Registers::PixelsFromMemory::SampleModeFilter::Bilinear);
+            interconnect.Blit(src, dst,
+                              srcX, srcY,
+                              pixelsFromMemory.dstWidth, pixelsFromMemory.dstHeight,
+                              pixelsFromMemory.dstX0, pixelsFromMemory.dstY0,
+                              duDx, dvDy,
+                              registers.pixelsFromMemory->sampleMode.origin == Registers::PixelsFromMemory::SampleModeOrigin::Center,
+                              pixelsFromMemory.sampleMode.filter == Registers::PixelsFromMemory::SampleModeFilter::Bilinear);
         }
     }
 
diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/fermi_2d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/fermi_2d.h
index f1e583e8..def16b12 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/fermi_2d.h
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/fermi_2d.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include <gpu/interconnect/blit_context.h>
+#include <gpu/interconnect/fermi_2d.h>
 #include "engine.h"
 
 namespace skyline::soc::gm20b {
@@ -18,7 +18,7 @@ namespace skyline::soc::gm20b::engine::fermi2d {
     class Fermi2D : public MacroEngineBase {
       private:
         host1x::SyncpointSet &syncpoints;
-        gpu::interconnect::BlitContext context;
+        gpu::interconnect::Fermi2D interconnect;
         ChannelContext &channelCtx;
 
         /**
@@ -71,15 +71,15 @@ namespace skyline::soc::gm20b::engine::fermi2d {
 
                 u32 _pad3_[8];
 
-                i32 dstX0;
-                i32 dstY0;
-                i32 dstWidth;
-                i32 dstHeight;
+                u32 dstX0;
+                u32 dstY0;
+                u32 dstWidth;
+                u32 dstHeight;
                 i64 duDx;
                 i64 dvDy;
-                i64 srcX0;
+                i64 srcX;
                 union {
-                    i64 srcY0;
+                    i64 srcY;
                     struct {
                         u32 _pad4_;
                         u32 trigger;
diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
index acf64fbb..f66e5336 100644
--- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp
@@ -98,7 +98,7 @@ namespace skyline::soc::gm20b {
                     channelCtx.maxwell3D->HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
                     break;
                 case SubchannelId::TwoD:
-                    channelCtx.fermi2D->HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
+                    channelCtx.fermi2D.HandleMacroCall(method - engine::EngineMethodsEnd, argument, lastCall);
                     break;
                 default:
                     Logger::Warn("Called method 0x{:X} out of bounds for engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);
@@ -121,7 +121,7 @@ namespace skyline::soc::gm20b {
             case SubchannelId::Copy:
                 channelCtx.maxwellDma.CallMethod(method, argument);
             case SubchannelId::TwoD:
-                channelCtx.fermi2D->CallMethod(method, argument);
+                channelCtx.fermi2D.CallMethod(method, argument);
                 break;
             default:
                 Logger::Warn("Called method 0x{:X} in unimplemented engine 0x{:X}, args: 0x{:X}", method, subChannel, argument);