Implement pitch swizzled copies

This commit is contained in:
TheASVigilante 2023-02-19 15:08:40 +01:00 committed by Billy Laws
parent 5c4bb1c44e
commit df0fd88991
2 changed files with 95 additions and 40 deletions

View File

@ -89,39 +89,43 @@ namespace skyline::gpu::texture {
} }
/** /**
* @brief Copies pixel data between a linear and blocklinear texture * @brief Copies pixel data between a pitch-linear and blocklinear texture
* @tparam BlockLinearToLinear Whether to copy from a blocklinear texture to a linear texture or a linear texture to a blocklinear texture * @tparam BlockLinearToPitch Whether to copy from a blocklinear texture to a pitch-linear texture or a pitch-linear texture to a blocklinear texture
*/ */
template<bool BlockLinearToLinear> template<bool BlockLinearToPitch>
void CopyBlockLinearInternal(Dimensions dimensions, void CopyBlockLinearInternal(Dimensions dimensions,
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
size_t gobBlockHeight, size_t gobBlockDepth, size_t gobBlockHeight, size_t gobBlockDepth,
u8 *blockLinear, u8 *linear) { u8 *blockLinear, u8 *pitch) {
size_t robWidthUnalignedBytes{util::DivideCeil<size_t>(dimensions.width, formatBlockWidth) * formatBpb}; size_t robWidthUnalignedBytes{util::DivideCeil<size_t>(dimensions.width, formatBlockWidth) * formatBpb};
size_t robWidthBytes{util::AlignUp(robWidthUnalignedBytes, GobWidth)}; size_t robWidthBytes{util::AlignUp(robWidthUnalignedBytes, GobWidth)};
size_t robWidthBlocks{robWidthUnalignedBytes / GobWidth}; size_t robWidthBlocks{robWidthUnalignedBytes / GobWidth};
if (formatBpb == 12) [[unlikely]]
formatBpb = 4;
size_t blockHeight{gobBlockHeight}; size_t blockHeight{gobBlockHeight};
size_t robHeight{GobHeight * blockHeight}; size_t robHeight{GobHeight * blockHeight};
size_t surfaceHeightLines{util::DivideCeil<size_t>(dimensions.height, formatBlockHeight)}; size_t surfaceHeightLines{util::DivideCeil<size_t>(dimensions.height, formatBlockHeight)};
size_t surfaceHeightRobs{surfaceHeightLines / robHeight}; //!< The height of the surface in ROBs excluding padding ROBs size_t surfaceHeightRobs{surfaceHeightLines / robHeight}; //!< The height of the surface in ROBs excluding padding ROBs
size_t blockDepth{std::min<size_t>(dimensions.depth, gobBlockDepth)}; size_t blockDepth{std::min<size_t>(dimensions.depth, gobBlockDepth)};
size_t blockPaddingZ{SectorWidth * SectorHeight * blockHeight * (gobBlockDepth - blockDepth)}; size_t blockPaddingZ{GobWidth * GobHeight * blockHeight * (gobBlockDepth - blockDepth)};
bool hasPaddingBlock{robWidthUnalignedBytes != robWidthBytes}; bool hasPaddingBlock{robWidthUnalignedBytes != robWidthBytes};
size_t blockPaddingOffset{hasPaddingBlock ? (GobWidth - (robWidthBytes - robWidthUnalignedBytes)) : 0}; size_t blockPaddingOffset{hasPaddingBlock ? (GobWidth - (robWidthBytes - robWidthUnalignedBytes)) : 0};
size_t robBytes{robWidthUnalignedBytes * robHeight}; size_t pitchWidthBytes{pitchAmount ? pitchAmount : robWidthUnalignedBytes};
size_t gobYOffset{robWidthUnalignedBytes * GobHeight}; size_t robBytes{pitchWidthBytes * robHeight};
size_t gobZOffset{robWidthUnalignedBytes * surfaceHeightLines}; size_t gobYOffset{pitchWidthBytes * GobHeight};
size_t gobZOffset{pitchWidthBytes * surfaceHeightLines};
u8 *sector{blockLinear}; u8 *sector{blockLinear};
auto deswizzleRob{[&](u8 *linearRob, auto isLastRob, size_t blockPaddingY = 0, size_t blockExtentY = 0) { auto deswizzleRob{[&](u8 *pitchRob, auto isLastRob, size_t blockPaddingY = 0, size_t blockExtentY = 0) {
auto deswizzleBlock{[&](u8 *linearBlock, auto copySector) __attribute__((always_inline)) { auto deswizzleBlock{[&](u8 *pitchBlock, auto copySector) __attribute__((always_inline)) {
for (size_t gobZ{}; gobZ < blockDepth; gobZ++) { // Every Block contains `blockDepth` Z-axis GOBs (Slices) for (size_t gobZ{}; gobZ < blockDepth; gobZ++) { // Every Block contains `blockDepth` Z-axis GOBs (Slices)
u8 *linearGob{linearBlock}; u8 *pitchGob{pitchBlock};
for (size_t gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs for (size_t gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs
#pragma clang loop unroll_count(SectorLinesInGob) #pragma clang loop unroll_count(SectorLinesInGob)
for (size_t index{}; index < SectorLinesInGob; index++) { for (size_t index{}; index < SectorLinesInGob; index++) {
@ -129,47 +133,49 @@ namespace skyline::gpu::texture {
size_t yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis size_t yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis
if constexpr (!isLastRob) { if constexpr (!isLastRob) {
copySector(linearGob + (yT * robWidthUnalignedBytes) + xT, xT); copySector(pitchGob + (yT * pitchWidthBytes) + xT, xT);
} else { } else {
if (gobY != blockHeight - 1 || yT < blockExtentY) if (gobY != blockHeight - 1 || yT < blockExtentY)
copySector(linearGob + (yT * robWidthUnalignedBytes) + xT, xT); copySector(pitchGob + (yT * pitchWidthBytes) + xT, xT);
else else
sector += SectorWidth; sector += SectorWidth;
} }
} }
linearGob += gobYOffset; // Increment the linear GOB to the next Y-axis GOB pitchGob += gobYOffset; // Increment the linear GOB to the next Y-axis GOB
} }
linearBlock += gobZOffset; // Increment the linear block to the next Z-axis GOB if constexpr (isLastRob)
sector += blockPaddingY; // Skip over any padding at the end of this slice
pitchBlock += gobZOffset; // Increment the linear block to the next Z-axis GOB
} }
sector += blockPaddingZ; // Skip over any padding Z-axis GOBs sector += blockPaddingZ; // Skip over any padding Z-axis GOBs
}}; }};
for (size_t block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` blocks (excl. padding block) for (size_t block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` blocks (excl. padding block)
deswizzleBlock(linearRob, [&](u8 *linearSector, size_t) __attribute__((always_inline)) { deswizzleBlock(pitchRob, [&](u8 *linearSector, size_t) __attribute__((always_inline)) {
if constexpr (BlockLinearToLinear) if constexpr (BlockLinearToPitch)
std::memcpy(linearSector, sector, SectorWidth); std::memcpy(linearSector, sector, SectorWidth);
else else
std::memcpy(sector, linearSector, SectorWidth); std::memcpy(sector, linearSector, SectorWidth);
sector += SectorWidth; // `sectorWidth` bytes are of sequential image data sector += SectorWidth; // `sectorWidth` bytes are of sequential image data
}); });
if constexpr (isLastRob) pitchRob += GobWidth; // Increment the linear block to the next block (As Block Width = 1 GOB Width)
sector += blockPaddingY; // Skip over any padding at the end of this block
linearRob += GobWidth; // Increment the linear block to the next block (As Block Width = 1 GOB Width)
} }
if (hasPaddingBlock) if (hasPaddingBlock)
deswizzleBlock(linearRob, [&](u8 *linearSector, size_t xT) __attribute__((always_inline)) { deswizzleBlock(pitchRob, [&](u8 *linearSector, size_t xT) __attribute__((always_inline)) {
#pragma clang loop unroll_count(4) #pragma clang loop unroll_count(4)
for (size_t pixelOffset{}; pixelOffset < SectorWidth; pixelOffset += formatBpb) { for (size_t pixelOffset{}; pixelOffset < SectorWidth; pixelOffset += formatBpb) {
if (xT < blockPaddingOffset) if (xT < blockPaddingOffset) {
if constexpr (BlockLinearToLinear) if constexpr (BlockLinearToPitch)
std::memcpy(linearSector + pixelOffset, sector, formatBpb); std::memcpy(linearSector + pixelOffset, sector, formatBpb);
else else
std::memcpy(sector, linearSector + pixelOffset, formatBpb); std::memcpy(sector, linearSector + pixelOffset, formatBpb);
}
sector += formatBpb; sector += formatBpb;
xT += formatBpb; xT += formatBpb;
@ -177,21 +183,20 @@ namespace skyline::gpu::texture {
}); });
}}; }};
u8 *linearRob{linear}; u8 *pitchRob{pitch};
for (size_t rob{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs (excl. padding ROB) for (size_t rob{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs (excl. padding ROB)
deswizzleRob(linearRob, std::false_type{}); deswizzleRob(pitchRob, std::false_type{});
linearRob += robBytes; // Increment the linear ROB to the next ROB pitchRob += robBytes; // Increment the linear ROB to the next ROB
} }
if (surfaceHeightLines % robHeight != 0) { if (surfaceHeightLines % robHeight != 0) {
blockHeight = (util::AlignUp(surfaceHeightLines, GobHeight) - (surfaceHeightRobs * robHeight)) / GobHeight; // Calculate the amount of Y GOBs which aren't padding blockHeight = (util::AlignUp(surfaceHeightLines, GobHeight) - (surfaceHeightRobs * robHeight)) / GobHeight; // Calculate the amount of Y GOBs which aren't padding
size_t alignedSurfaceLines{util::DivideCeil<size_t>(dimensions.height, formatBlockHeight)};
deswizzleRob( deswizzleRob(
linearRob, pitchRob,
std::true_type{}, std::true_type{},
(gobBlockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight), // Calculate padding at the end of a block to skip (gobBlockHeight - blockHeight) * (GobWidth * GobHeight), // Calculate padding at the end of a block to skip
util::IsAligned(alignedSurfaceLines, GobHeight) ? GobHeight : alignedSurfaceLines - util::AlignDown(alignedSurfaceLines, GobHeight) // Calculate the line relative to the start of the last GOB that is the cut-off point for the image util::IsAligned(surfaceHeightLines, GobHeight) ? GobHeight : surfaceHeightLines - util::AlignDown(surfaceHeightLines, GobHeight) // Calculate the line relative to the start of the last GOB that is the cut-off point for the image
); );
} }
} }
@ -199,34 +204,58 @@ namespace skyline::gpu::texture {
void CopyBlockLinearToLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, u8 *blockLinear, u8 *linear) { void CopyBlockLinearToLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, u8 *blockLinear, u8 *linear) {
CopyBlockLinearInternal<true>( CopyBlockLinearInternal<true>(
dimensions, dimensions,
formatBlockWidth, formatBlockHeight, formatBpb, formatBlockWidth, formatBlockHeight, formatBpb, 0,
gobBlockHeight, gobBlockDepth, gobBlockHeight, gobBlockDepth,
blockLinear, linear blockLinear, linear
); );
} }
void CopyBlockLinearToPitch(Dimensions dimensions,
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
size_t gobBlockHeight, size_t gobBlockDepth,
u8 *blockLinear, u8 *pitch) {
CopyBlockLinearInternal<true>(
dimensions,
formatBlockWidth, formatBlockHeight, formatBpb, pitchAmount,
gobBlockHeight, gobBlockDepth,
blockLinear, pitch
);
}
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear) { void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear) {
CopyBlockLinearInternal<true>( CopyBlockLinearInternal<true>(
guest.dimensions, guest.dimensions,
guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb, guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb, 0,
guest.tileConfig.blockHeight, guest.tileConfig.blockDepth, guest.tileConfig.blockHeight, guest.tileConfig.blockDepth,
blockLinear, linear blockLinear, linear
); );
} }
void CopyLinearToBlockLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, u8 *linear, u8 *blockLinear) { void CopyLinearToBlockLinear(Dimensions dimensions,
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb,
size_t gobBlockHeight, size_t gobBlockDepth,
u8 *linear, u8 *blockLinear) {
CopyBlockLinearInternal<false>(dimensions,
formatBlockWidth, formatBlockHeight, formatBpb, 0,
gobBlockHeight, gobBlockDepth,
blockLinear, linear
);
}
void CopyPitchToBlockLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount, size_t gobBlockHeight, size_t gobBlockDepth, u8 *pitch, u8 *blockLinear) {
CopyBlockLinearInternal<false>( CopyBlockLinearInternal<false>(
dimensions, dimensions,
formatBlockWidth, formatBlockHeight, formatBpb, formatBlockWidth, formatBlockHeight, formatBpb, pitchAmount,
gobBlockHeight, gobBlockDepth, gobBlockHeight, gobBlockDepth,
blockLinear, linear blockLinear, pitch
);
}
); );
} }
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear) { void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear) {
CopyBlockLinearInternal<false>( CopyBlockLinearInternal<false>(
guest.dimensions, guest.dimensions,
guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb, guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb, 0,
guest.tileConfig.blockHeight, guest.tileConfig.blockDepth, guest.tileConfig.blockHeight, guest.tileConfig.blockDepth,
blockLinear, linear blockLinear, linear
); );

View File

@ -40,21 +40,47 @@ namespace skyline::gpu::texture {
size_t gobBlockHeight, size_t gobBlockDepth, size_t gobBlockHeight, size_t gobBlockDepth,
u8 *blockLinear, u8 *linear); u8 *blockLinear, u8 *linear);
/**
* @brief Copies the contents of a blocklinear texture to a pitch texture
*/
void CopyBlockLinearToPitch(Dimensions dimensions,
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
size_t gobBlockHeight, size_t gobBlockDepth,
u8 *blockLinear, u8 *pitch);
/** /**
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer * @brief Copies the contents of a blocklinear guest texture to a linear output buffer
*/ */
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear); void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear);
/** /**
* @brief Copies the contents of a blocklinear texture to a linear output buffer * @brief Copies the contents of a linear buffer to a blocklinear texture
*/ */
void CopyLinearToBlockLinear(Dimensions dimensions, void CopyLinearToBlockLinear(Dimensions dimensions,
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb,
size_t gobBlockHeight, size_t gobBlockDepth,
u8 *linear, u8 *blockLinear);
/**
* @brief Copies the contents of a pitch texture to a blocklinear texture
*/
void CopyPitchToBlockLinear(Dimensions dimensions,
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
size_t gobBlockHeight, size_t gobBlockDepth, size_t gobBlockHeight, size_t gobBlockDepth,
u8 *linear, u8 *blockLinear); u8 *pitch, u8 *blockLinear);
/** /**
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer * @brief Copies the contents of a blocklinear guest texture to a linear output buffer
/**
* @brief Copies the contents of a pitch texture to a part of a blocklinear texture
*/
void CopyPitchToBlockLinearSubrect(Dimensions pitchDimensions, Dimensions blockLinearDimensions,
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
size_t gobBlockHeight, size_t gobBlockDepth,
u8 *pitch, u8 *blockLinear,
u32 originX, u32 originY);
/**
* @brief Copies the contents of a linear guest texture to a blocklinear texture
*/ */
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear); void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear);