From 70ee36e85c42bf44cc07d3e37da6e53d3f12530f Mon Sep 17 00:00:00 2001 From: TheASVigilante <65920585+TheASVigilante@users.noreply.github.com> Date: Sun, 19 Feb 2023 15:19:02 +0100 Subject: [PATCH] Add support for 1D remapped buffer clears --- .../skyline/gpu/interconnect/maxwell_dma.cpp | 28 +++++++++++++++++++ .../skyline/gpu/interconnect/maxwell_dma.h | 2 ++ .../skyline/soc/gm20b/engines/maxwell_dma.cpp | 10 +++++++ 3 files changed, 40 insertions(+) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp index d2a59739..21dae9d0 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp @@ -52,4 +52,32 @@ namespace skyline::gpu::interconnect { }); }); } + + void MaxwellDma::Clear(span mapping, u32 value) { + if (!util::IsAligned(mapping.size(), 4)) + throw exception("Cleared buffer's size is not aligned to 4 bytes!"); + + auto clearBuf{gpu.buffer.FindOrCreate(mapping, executor.tag, [this](std::shared_ptr buffer, ContextLock &&lock) { + executor.AttachLockedBuffer(buffer, std::move(lock)); + })}; + executor.AttachBuffer(clearBuf); + + clearBuf.GetBuffer()->BlockSequencedCpuBackingWrites(); + clearBuf.GetBuffer()->MarkGpuDirty(); + + executor.AddOutsideRpCommand([clearBuf, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &gpu) { + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, {}, vk::MemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eMemoryRead, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite + }, {}, {}); + + auto clearBufBinding{clearBuf.GetBinding(gpu)}; + commandBuffer.fillBuffer(clearBufBinding.buffer, clearBufBinding.offset, clearBufBinding.size, value); + + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }, {}, {}); + }); + } } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.h index 071e7402..1eb0f80d 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.h @@ -30,5 +30,7 @@ namespace skyline::gpu::interconnect { MaxwellDma(GPU &gpu, soc::gm20b::ChannelContext &channelCtx); void Copy(span dstMapping, span srcMapping); + + void Clear(span mapping, u32 value); }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp index 02ab2622..06f1930e 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -89,7 +89,17 @@ namespace skyline::soc::gm20b::engine { auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, *registers.lineLengthIn * dstBpp)}; if (registers.launchDma->remapEnable) [[unlikely]] { + // Remapped buffer clears + if ((registers.remapComponents->dstX == Registers::RemapComponents::Swizzle::ConstA) && + (registers.remapComponents->dstY == Registers::RemapComponents::Swizzle::ConstA) && + (registers.remapComponents->dstZ == Registers::RemapComponents::Swizzle::ConstA) && + (registers.remapComponents->dstW == Registers::RemapComponents::Swizzle::ConstA) && + (registers.remapComponents->ComponentSize() == 4)) { + for (size_t currMapping{dstMappings.size()}; currMapping; --currMapping) + interconnect.Clear(dstMappings[currMapping], *registers.remapConstA); + } else { Logger::Warn("Remapped DMA copies are unimplemented!"); + } } else { if (srcMappings.size() != 1 || dstMappings.size() != 1) [[unlikely]] channelCtx.asCtx->gmmu.Copy(u64{*registers.offsetOut}, u64{*registers.offsetIn}, *registers.lineLengthIn);