From 7d0b7f0b717c7a67804c7886e0afb14cbb35f25b Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Wed, 5 Apr 2023 14:57:52 +0100 Subject: [PATCH] Handle OOB blits by adding to the texture base offset The previous method would cause OOB reads for the last row to clamp, and adding an extra row would potentially encounter unmapped memory. So use this technique based on how Ryu does it. --- .../cpp/skyline/gpu/interconnect/fermi_2d.cpp | 32 +++++++++++++------ .../cpp/skyline/gpu/interconnect/fermi_2d.h | 2 +- .../skyline/gpu/shaders/helper_shaders.cpp | 2 -- app/src/main/shaders/blit.frag | 4 --- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp index da618587..0adbf6e4 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp @@ -13,7 +13,7 @@ namespace skyline::gpu::interconnect { using IOVA = soc::gm20b::IOVA; using MemoryLayout = skyline::soc::gm20b::engine::fermi2d::type::MemoryLayout; - gpu::GuestTexture Fermi2D::GetGuestTexture(const Surface &surface) { + std::pair Fermi2D::GetGuestTexture(const Surface &surface, u32 oobReadStart, u32 oobReadWidth) { auto determineFormat = [&](Surface::SurfaceFormat format) -> skyline::gpu::texture::Format { #define FORMAT_CASE(fermiFmt, skFmt, fmtType) \ case Surface::SurfaceFormat::fermiFmt ## fmtType: \ @@ -84,8 +84,15 @@ namespace skyline::gpu::interconnect { texture.layerCount = 1; texture.viewType = vk::ImageViewType::e2D; + + u64 addressOffset{}; if (surface.memoryLayout == MemoryLayout::Pitch) { texture.dimensions = gpu::texture::Dimensions{surface.stride / texture.format->bpb, surface.height, 1}; + + // OpenGL games rely on reads wrapping around to the next line when reading out of bounds, emulate this behaviour by offsetting the address + if (oobReadStart && surface.width == (oobReadWidth + oobReadStart) && (oobReadWidth + oobReadStart) > texture.dimensions.width) + addressOffset += oobReadStart * texture.format->bpb; + texture.tileConfig = gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Pitch, .pitch = surface.stride @@ -99,11 +106,11 @@ namespace skyline::gpu::interconnect { }; } - IOVA iova{surface.address}; + u64 iova{u64{surface.address} + addressOffset}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(iova, texture.GetSize())}; texture.mappings.assign(mappings.begin(), mappings.end()); - return texture; + return {texture, addressOffset != 0}; } Fermi2D::Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx) @@ -114,10 +121,19 @@ namespace skyline::gpu::interconnect { void Fermi2D::Blit(const Surface &srcSurface, const Surface &dstSurface, float srcRectX, float srcRectY, u32 dstRectWidth, u32 dstRectHeight, u32 dstRectX, u32 dstRectY, float duDx, float dvDy, SampleModeOrigin sampleOrigin, bool resolve, SampleModeFilter filter) { TRACE_EVENT("gpu", "Fermi2D::Blit"); - // TODO: When we support MSAA perform a resolve operation rather than blit when the `resolve` flag is set. - auto srcGuestTexture{GetGuestTexture(srcSurface)}; - auto dstGuestTexture{GetGuestTexture(dstSurface)}; + // Blit shader always samples from centre so adjust if necessary + float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX}; + float centredSrcRectY{sampleOrigin == SampleModeOrigin::Corner ? srcRectY - 0.5f : srcRectY}; + u32 oobReadStart{static_cast(centredSrcRectX)}; + u32 oobReadWidth{static_cast(duDx * static_cast(dstRectWidth))}; + + // TODO: When we support MSAA perform a resolve operation rather than blit when the `resolve` flag is set. + auto [srcGuestTexture, srcWentOob]{GetGuestTexture(srcSurface, oobReadStart, oobReadWidth)}; + if (srcWentOob) + centredSrcRectX = 0.0f; + + auto [dstGuestTexture, dstWentOob]{GetGuestTexture(dstSurface)}; auto srcTextureView{gpu.texture.FindOrCreate(srcGuestTexture, executor.tag)}; executor.AttachDependency(srcTextureView); executor.AttachTexture(srcTextureView.get()); @@ -127,10 +143,6 @@ namespace skyline::gpu::interconnect { executor.AttachTexture(dstTextureView.get()); dstTextureView->texture->MarkGpuDirty(executor.usageTracker); - // Blit shader always samples from centre so adjust if necessary - float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX}; - float centredSrcRectY{sampleOrigin == SampleModeOrigin::Corner ? srcRectY - 0.5f : srcRectY}; - executor.AddCheckpoint("Before blit"); gpu.helperShaders.blitHelperShader.Blit( gpu, diff --git a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.h b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.h index a870cc3b..f9b86c24 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.h @@ -33,7 +33,7 @@ namespace skyline::gpu::interconnect { soc::gm20b::ChannelContext &channelCtx; gpu::interconnect::CommandExecutor &executor; - gpu::GuestTexture GetGuestTexture(const Surface &surface); + std::pair GetGuestTexture(const Surface &surface, u32 oobReadStart = 0, u32 oobReadWidth = 0); public: Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx); diff --git a/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp index 370df23c..c3dc829f 100644 --- a/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp +++ b/app/src/main/cpp/skyline/gpu/shaders/helper_shaders.cpp @@ -175,7 +175,6 @@ namespace skyline::gpu { struct FragmentPushConstantLayout { glsl::Vec2 srcOriginUV; glsl::Vec2 dstSrcScaleFactor; - float srcHeightRecip; }; constexpr static std::array PushConstantRanges{ @@ -254,7 +253,6 @@ namespace skyline::gpu { }, blit::FragmentPushConstantLayout{ .srcOriginUV = {srcRect.x / srcImageDimensions.width, srcRect.y / srcImageDimensions.height}, .dstSrcScaleFactor = {dstSrcScaleFactorX * (srcRect.width / srcImageDimensions.width), dstSrcScaleFactorY * (srcRect.height / srcImageDimensions.height)}, - .srcHeightRecip = 1.0f / srcImageDimensions.height }, GetPipeline(gpu, {dstImageView->format->vkFormat, diff --git a/app/src/main/shaders/blit.frag b/app/src/main/shaders/blit.frag index 6ee85767..d3ab6cd8 100644 --- a/app/src/main/shaders/blit.frag +++ b/app/src/main/shaders/blit.frag @@ -8,14 +8,10 @@ layout (push_constant) uniform constants { layout (offset = 16) vec2 srcOriginUV; vec2 dstSrcScaleFactor; - float srcHeightRecip; } PC; void main() { vec2 srcUV = dstUV * PC.dstSrcScaleFactor + PC.srcOriginUV; - // Account for out of bounds blits by moving to the next line of the source texture for the copy - srcUV.y += floor(srcUV.x) * PC.srcHeightRecip; - srcUV.x = srcUV.x - floor(srcUV.x); colour.rgba = texture(src, srcUV); }