From 35fde2cd0b19091ddb1e5ba9b0d623f02421ac89 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sat, 22 Jan 2022 05:29:23 +0530 Subject: [PATCH] Rework Blocklinear Texture Deswizzling Blocklinear texture decoding was broken for padding blocks and would incorrectly decode them resulting in major texture corruption for any textures with their widths not aligned to 64 bytes. This has now been fixed with neater code which avoids redundant repetition of any code using lambdas and functions where necessary. --- app/src/main/cpp/skyline/gpu/texture/layout.h | 153 +++++++++--------- 1 file changed, 80 insertions(+), 73 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/texture/layout.h b/app/src/main/cpp/skyline/gpu/texture/layout.h index 0a44ccee..5d10445f 100644 --- a/app/src/main/cpp/skyline/gpu/texture/layout.h +++ b/app/src/main/cpp/skyline/gpu/texture/layout.h @@ -15,94 +15,101 @@ namespace skyline::gpu::texture { size_t GetBlockLinearLayerSize(const GuestTexture &guest) { u32 blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs u32 robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines - u32 surfaceHeight{guest.dimensions.height / guest.format->blockHeight}; //!< The height of the surface in lines - u32 surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; //!< The height of the surface in ROBs (Row Of Blocks) + u32 surfaceHeightLines{guest.dimensions.height / guest.format->blockHeight}; //!< The height of the surface in lines + u32 surfaceHeightRobs{util::AlignUp(surfaceHeightLines, robHeight) / robHeight}; //!< The height of the surface in ROBs (Row Of Blocks, incl. padding ROB) u32 robWidthBytes{util::AlignUp((guest.dimensions.width / guest.format->blockWidth) * guest.format->bpb, GobWidth)}; //!< The width of a ROB in bytes - u32 robWidthBlocks{robWidthBytes / GobWidth}; //!< The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1) - u32 robBytes{robWidthBytes * robHeight}; //!< The size of a ROB in bytes + u32 robWidthBlocks{robWidthBytes / GobWidth}; //!< The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1, incl. padding block) - return robBytes * surfaceHeightRobs; + return robWidthBytes * robHeight * surfaceHeightRobs; + } + + /** + * @brief Copies pixel data between a linear and blocklinear texture + */ + template + void CopyBlockLinearInternal(const GuestTexture &guest, u8 *blockLinear, u8 *linear, CopyFunction copyFunction) { + u32 blockHeight{guest.tileConfig.blockHeight}; + u32 robHeight{GobHeight * blockHeight}; + u32 surfaceHeightLines{guest.dimensions.height / guest.format->blockHeight}; + u32 surfaceHeightRobs{surfaceHeightLines / robHeight}; //!< The height of the surface in ROBs excluding padding ROBs + + u32 formatBpb{guest.format->bpb}; + u32 robWidthUnalignedBytes{(guest.dimensions.width / guest.format->blockWidth) * formatBpb}; + u32 robWidthBytes{util::AlignUp(robWidthUnalignedBytes, GobWidth)}; + u32 robWidthBlocks{robWidthUnalignedBytes / GobWidth}; + + bool hasPaddingBlock{robWidthUnalignedBytes != robWidthBytes}; + u32 blockPaddingOffset{hasPaddingBlock ? (GobWidth - (robWidthBytes - robWidthUnalignedBytes)) : 0}; + + u32 robBytes{robWidthUnalignedBytes * robHeight}; + u32 gobYOffset{robWidthUnalignedBytes * GobHeight}; + + u8 *sector{blockLinear}; + + auto deswizzleRob{[&](u8 *linearRob, u32 paddingY) { + auto deswizzleBlock{[&](u8 *linearBlock, auto copySector) __attribute__((always_inline)) { + for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs + #pragma clang loop unroll_count(32) + for (u32 index{}; index < SectorWidth * SectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors + u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis + u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis + + copySector(linearBlock + (yT * robWidthUnalignedBytes) + xT, xT); + } + + linearBlock += gobYOffset; // Increment the linear GOB to the next Y-axis GOB + } + }}; + + for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` blocks (excl. padding block) + deswizzleBlock(linearRob, [&](u8 *linearSector, u32 xT) __attribute__((always_inline)) { + copyFunction(linearSector, sector, SectorWidth); + sector += SectorWidth; // `sectorWidth` bytes are of sequential image data + }); + + sector += paddingY; // Skip over any padding at the end of this block + linearRob += GobWidth; // Increment the linear block to the next block (As Block Width = 1 GOB Width) + } + + if (hasPaddingBlock) + deswizzleBlock(linearRob, [&](u8 *linearSector, u32 xT) __attribute__((always_inline)) { + #pragma clang loop unroll_count(4) + for (u32 pixelOffset{}; pixelOffset < SectorWidth; pixelOffset += formatBpb) { + if (xT < blockPaddingOffset) + copyFunction(linearSector + pixelOffset, sector, formatBpb); + sector += formatBpb; + xT += formatBpb; + } + }); + }}; + + u8 *linearRob{linear}; + for (u32 rob{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs (excl. padding ROB) + deswizzleRob(linearRob, 0); + linearRob += robBytes; // Increment the linear block to the next ROB + } + + if (surfaceHeightLines % robHeight != 0) { + blockHeight = (util::AlignUp(surfaceHeightLines, GobHeight) - (surfaceHeightRobs * robHeight)) / GobHeight; // Calculate the amount of Y GOBs which aren't padding + deswizzleRob(linearRob, (guest.tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight)); + } } /** * @brief Copies the contents of a blocklinear guest texture to a linear output buffer */ void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) { - u32 blockHeight{guest.tileConfig.blockHeight}; - u32 robHeight{GobHeight * blockHeight}; - u32 surfaceHeight{guest.dimensions.height / guest.format->blockHeight}; - u32 surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; - u32 robWidthBytes{util::AlignUp((guest.dimensions.width / guest.format->blockWidth) * guest.format->bpb, GobWidth)}; - u32 robWidthBlocks{robWidthBytes / GobWidth}; - u32 robBytes{robWidthBytes * robHeight}; - u32 gobYOffset{robWidthBytes * GobHeight}; - - auto inputSector{guestInput}; - auto outputRob{linearOutput}; - - for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs - auto outputBlock{outputRob}; // We iterate through a block independently of the ROB - for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` Blocks - auto outputGob{outputBlock}; // We iterate through a GOB independently of the block - for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs - for (u32 index{}; index < SectorWidth * SectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors - u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis - u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis - std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, SectorWidth); - inputSector += SectorWidth; // `sectorWidth` bytes are of sequential image data - } - outputGob += gobYOffset; // Increment the output GOB to the next Y-axis GOB - } - inputSector += paddingY; // Increment the input sector to the next sector - outputBlock += GobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width) - } - outputRob += robBytes; // Increment the output block to the next ROB - - y += robHeight; // Increment the Y position to the next ROB - blockHeight = static_cast(std::min(static_cast(blockHeight), (surfaceHeight - y) / GobHeight)); // Calculate the amount of Y GOBs which aren't padding - paddingY = (guest.tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight); // Calculate the amount of padding between contiguous sectors - } + CopyBlockLinearInternal(guest, guestInput, linearOutput, std::memcpy); } /** * @brief Copies the contents of a blocklinear guest texture to a linear output buffer */ void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) { - u32 blockHeight{guest.tileConfig.blockHeight}; - u32 robHeight{GobHeight * blockHeight}; - u32 surfaceHeight{guest.dimensions.height / guest.format->blockHeight}; - u32 surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; - u32 robWidthBytes{util::AlignUp((guest.dimensions.width / guest.format->blockWidth) * guest.format->bpb, GobWidth)}; - u32 robWidthBlocks{robWidthBytes / GobWidth}; - u32 robBytes{robWidthBytes * robHeight}; - u32 gobYOffset{robWidthBytes * GobHeight}; - - auto outputSector{guestOutput}; - auto inputRob{linearInput}; - - for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { - auto outputBlock{inputRob}; - for (u32 block{}; block < robWidthBlocks; block++) { - auto inputGob{outputBlock}; - for (u32 gobY{}; gobY < blockHeight; gobY++) { - for (u32 index{}; index < SectorWidth * SectorHeight; index++) { - u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; - u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; - std::memcpy(outputSector, inputGob + (yT * robWidthBytes) + xT, SectorWidth); - outputSector += SectorWidth; - } - inputGob += gobYOffset; - } - outputSector += paddingY; - outputBlock += GobWidth; - } - inputRob += robBytes; - - y += robHeight; - blockHeight = static_cast(std::min(static_cast(blockHeight), (surfaceHeight - y) / GobHeight)); - paddingY = (guest.tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight); - } + CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8* src, u8* dst, size_t size) { + std::memcpy(dst, src, size); + }); } /**