From da7e6a7df739ae50969f01ad25e5aa501ad21307 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sat, 28 May 2022 21:21:51 +0530 Subject: [PATCH] Replace Maxwell DMA `GuestTexture` usage with new swizzling API Maxwell DMA requires swizzled copies to/from textures and earlier it had to construct an arbitrary `GuestTexture` to do so but with the introduction of the cleaner API, this has become redundant which this commit cleans up and replaces with direct calls to the API with all the necessary values. --- app/src/main/cpp/skyline/gpu/texture/format.h | 18 ---- .../skyline/soc/gm20b/engines/maxwell_dma.cpp | 101 +++++++++--------- 2 files changed, 48 insertions(+), 71 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/texture/format.h b/app/src/main/cpp/skyline/gpu/texture/format.h index 819e679f..62052287 100644 --- a/app/src/main/cpp/skyline/gpu/texture/format.h +++ b/app/src/main/cpp/skyline/gpu/texture/format.h @@ -182,22 +182,4 @@ namespace skyline::gpu::format { #undef FORMAT_NORM_INT_FLOAT // @fmt:on - - inline const gpu::texture::FormatBase &GetFormatForBpp(u32 bytesPerPixel) { - switch (bytesPerPixel) { - case 1: - return R8Uint; - case 2: - return R8G8Uint; - case 4: - return R8G8B8A8Uint; - case 8: - return R16G16B16A16Uint; - case 16: - return R32G32B32A32Uint; - default: - Logger::Error("Couldn't convert bytes per pixel: {}", bytesPerPixel); - return R8Uint; - } - } } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp index 2024f1d2..1c83c74c 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_dma.cpp @@ -39,14 +39,14 @@ namespace skyline::soc::gm20b::engine { executor.Execute(); if (registers.launchDma->multiLineEnable) { if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch && - registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) + registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear) CopyPitchToBlockLinear(); else if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear && registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch) CopyBlockLinearToPitch(); else Logger::Warn("Unimplemented multi-line copy type: {} -> {}!", - static_cast(registers.launchDma->srcMemoryLayout), static_cast(registers.launchDma->dstMemoryLayout)); + static_cast(registers.launchDma->srcMemoryLayout), static_cast(registers.launchDma->dstMemoryLayout)); } else { // 1D buffer copy // TODO: implement swizzled 1D copies based on VMM 'kind' @@ -77,117 +77,112 @@ namespace skyline::soc::gm20b::engine { void MaxwellDma::CopyPitchToBlockLinear() { if (registers.dstSurface->blockSize.Depth() > 1 || registers.dstSurface->depth > 1) { - Logger::Warn("3D DMA engine copies are unimplemented!"); + Logger::Warn("3D DMA engine copies are unimplemented"); return; } if (registers.dstSurface->blockSize.Width() != 1) { - Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented!"); + Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented"); return; } u32 bytesPerPixel{static_cast(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())}; if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchIn) { - Logger::Warn("Non-linear DMA source textures are not implemented!"); + Logger::Warn("Non-linear DMA source textures are not implemented"); return; } if (registers.dstSurface->origin.x || registers.dstSurface->origin.y) { - Logger::Warn("Non-zero origin DMA copies are not implemented!"); + Logger::Warn("Non-zero origin DMA copies are not implemented"); return; } if (*registers.lineLengthIn != registers.dstSurface->width) Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width); - gpu::GuestTexture srcTexture{span{}, - gpu::texture::Dimensions{*registers.lineLengthIn, *registers.lineCount, 1}, - gpu::format::GetFormatForBpp(bytesPerPixel), - gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear }, - gpu::texture::TextureType::e2D}; + gpu::texture::Dimensions srcDimensions{*registers.lineLengthIn, *registers.lineCount, 1}; + size_t srcStride{srcDimensions.width * srcDimensions.height * bytesPerPixel}; - if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerStride())}; mappings.size() == 1) { - srcTexture.mappings[0] = mappings[0]; - } else { - Logger::Warn("DMA for split textures is unimplemented!"); + auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcStride)}; + if (srcMappings.size() != 1) { + Logger::Warn("DMA for split textures is unimplemented"); return; } - // This represents a single layer view into a potentially multi-layer texture - gpu::GuestTexture dstTexture{span{}, - gpu::texture::Dimensions{*registers.lineLengthIn, registers.dstSurface->height, 1}, - gpu::format::GetFormatForBpp(bytesPerPixel), - gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 }, - gpu::texture::TextureType::e2D}; + gpu::texture::Dimensions dstDimensions{registers.dstSurface->width, registers.dstSurface->height, registers.dstSurface->depth}; + dstDimensions.width = *registers.lineLengthIn; // We do not support copying subrects so we need the width to match on the source and destination + size_t dstBlockHeight{registers.dstSurface->blockSize.Height()}, dstBlockDepth{registers.dstSurface->blockSize.Depth()}; + size_t dstLayerStride{gpu::texture::GetBlockLinearLayerSize(dstDimensions, 1, 1, bytesPerPixel, dstBlockHeight, dstBlockDepth)}; - u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerStride() * registers.dstSurface->layer}; - if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerStride())}; mappings.size() == 1) { - dstTexture.mappings[0] = mappings[0]; - } else { - Logger::Warn("DMA for split textures is unimplemented!"); + size_t dstLayerAddress{*registers.offsetOut + (registers.dstSurface->layer * dstLayerStride)}; + auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstLayerStride)}; + if (dstMappings.size() != 1) { + Logger::Warn("DMA for split textures is unimplemented"); return; } - Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcTexture.dimensions.width, srcTexture.dimensions.height, u64{*registers.offsetIn}, dstTexture.dimensions.width, dstTexture.dimensions.height, dstLayerAddress); + Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcDimensions.width, srcDimensions.height, u64{*registers.offsetIn}, dstDimensions.width, dstDimensions.height, dstLayerAddress); - gpu::texture::CopyLinearToBlockLinear(dstTexture, srcTexture.mappings.front().data(), dstTexture.mappings.front().data()); + gpu::texture::CopyLinearToBlockLinear( + dstDimensions, + 1, 1, bytesPerPixel, + dstBlockHeight, dstBlockDepth, + srcMappings.front().data(), dstMappings.front().data() + ); } - void MaxwellDma::CopyBlockLinearToPitch() { if (registers.srcSurface->blockSize.Depth() > 1 || registers.srcSurface->depth > 1) { - Logger::Warn("3D DMA engine copies are unimplemented!"); + Logger::Warn("3D DMA engine copies are unimplemented"); return; } if (registers.srcSurface->blockSize.Width() != 1) { - Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented!"); + Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented"); return; } u32 bytesPerPixel{static_cast(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())}; if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchOut) { - Logger::Warn("Non-linear DMA destination textures are not implemented!"); + Logger::Warn("Non-linear DMA destination textures are not implemented"); return; } if (registers.srcSurface->origin.x || registers.srcSurface->origin.y) { - Logger::Warn("Non-zero origin DMA copies are not implemented!"); + Logger::Warn("Non-zero origin DMA copies are not implemented"); return; } if (*registers.lineLengthIn != registers.srcSurface->width) Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width); - gpu::GuestTexture srcTexture{span{}, - gpu::texture::Dimensions{registers.srcSurface->width, registers.srcSurface->height, 1}, - gpu::format::GetFormatForBpp(bytesPerPixel), - gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.srcSurface->blockSize.Height(), .blockDepth = 1 }, - gpu::texture::TextureType::e2D}; + gpu::texture::Dimensions srcDimensions{registers.srcSurface->width, registers.srcSurface->height, registers.srcSurface->depth}; + srcDimensions.width = *registers.lineLengthIn; // We do not support copying subrects so we need the width to match on the source and destination + size_t srcBlockHeight{registers.srcSurface->blockSize.Height()}, srcBlockDepth{registers.srcSurface->blockSize.Depth()}; + size_t srcStride{gpu::texture::GetBlockLinearLayerSize(srcDimensions, 1, 1, bytesPerPixel, srcBlockHeight, srcBlockDepth)}; - if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerStride())}; mappings.size() == 1) { - srcTexture.mappings[0] = mappings[0]; - } else { - Logger::Warn("DMA for split textures is unimplemented!"); + auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcStride)}; + if (srcMappings.size() != 1) { + Logger::Warn("DMA for split textures is unimplemented"); return; } - gpu::GuestTexture dstTexture{span{}, - gpu::texture::Dimensions{*registers.lineLengthIn, *registers.lineCount, 1}, - gpu::format::GetFormatForBpp(bytesPerPixel), - gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear }, - gpu::texture::TextureType::e2D}; + gpu::texture::Dimensions dstDimensions{*registers.lineLengthIn, *registers.lineCount, 1}; + size_t dstStride{dstDimensions.width * dstDimensions.height * bytesPerPixel}; - if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, dstTexture.GetLayerStride())}; mappings.size() == 1) { - dstTexture.mappings[0] = mappings[0]; - } else { - Logger::Warn("DMA for split textures is unimplemented!"); + auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetOut, dstStride)}; + if (dstMappings.size() != 1) { + Logger::Warn("DMA for split textures is unimplemented"); return; } - Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcTexture.dimensions.width, srcTexture.dimensions.height, u64{*registers.offsetIn}, dstTexture.dimensions.width, dstTexture.dimensions.height, u64{*registers.offsetOut}); + Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcDimensions.width, srcDimensions.height, u64{*registers.offsetIn}, dstDimensions.width, dstDimensions.height, u64{*registers.offsetOut}); - gpu::texture::CopyBlockLinearToLinear(srcTexture, srcTexture.mappings.front().data(), dstTexture.mappings.front().data()); + gpu::texture::CopyBlockLinearToLinear( + srcDimensions, + 1, 1, bytesPerPixel, + srcBlockHeight, srcBlockDepth, + srcMappings.front().data(), dstMappings.front().data()); } void MaxwellDma::CallMethodBatchNonInc(u32 method, span arguments) {