mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-05 16:38:15 +01:00
Implement pitch swizzled copies
This commit is contained in:
parent
5c4bb1c44e
commit
df0fd88991
@ -89,39 +89,43 @@ namespace skyline::gpu::texture {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Copies pixel data between a linear and blocklinear texture
|
* @brief Copies pixel data between a pitch-linear and blocklinear texture
|
||||||
* @tparam BlockLinearToLinear Whether to copy from a blocklinear texture to a linear texture or a linear texture to a blocklinear texture
|
* @tparam BlockLinearToPitch Whether to copy from a blocklinear texture to a pitch-linear texture or a pitch-linear texture to a blocklinear texture
|
||||||
*/
|
*/
|
||||||
template<bool BlockLinearToLinear>
|
template<bool BlockLinearToPitch>
|
||||||
void CopyBlockLinearInternal(Dimensions dimensions,
|
void CopyBlockLinearInternal(Dimensions dimensions,
|
||||||
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb,
|
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
|
||||||
size_t gobBlockHeight, size_t gobBlockDepth,
|
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||||
u8 *blockLinear, u8 *linear) {
|
u8 *blockLinear, u8 *pitch) {
|
||||||
size_t robWidthUnalignedBytes{util::DivideCeil<size_t>(dimensions.width, formatBlockWidth) * formatBpb};
|
size_t robWidthUnalignedBytes{util::DivideCeil<size_t>(dimensions.width, formatBlockWidth) * formatBpb};
|
||||||
size_t robWidthBytes{util::AlignUp(robWidthUnalignedBytes, GobWidth)};
|
size_t robWidthBytes{util::AlignUp(robWidthUnalignedBytes, GobWidth)};
|
||||||
size_t robWidthBlocks{robWidthUnalignedBytes / GobWidth};
|
size_t robWidthBlocks{robWidthUnalignedBytes / GobWidth};
|
||||||
|
|
||||||
|
if (formatBpb == 12) [[unlikely]]
|
||||||
|
formatBpb = 4;
|
||||||
|
|
||||||
size_t blockHeight{gobBlockHeight};
|
size_t blockHeight{gobBlockHeight};
|
||||||
size_t robHeight{GobHeight * blockHeight};
|
size_t robHeight{GobHeight * blockHeight};
|
||||||
size_t surfaceHeightLines{util::DivideCeil<size_t>(dimensions.height, formatBlockHeight)};
|
size_t surfaceHeightLines{util::DivideCeil<size_t>(dimensions.height, formatBlockHeight)};
|
||||||
size_t surfaceHeightRobs{surfaceHeightLines / robHeight}; //!< The height of the surface in ROBs excluding padding ROBs
|
size_t surfaceHeightRobs{surfaceHeightLines / robHeight}; //!< The height of the surface in ROBs excluding padding ROBs
|
||||||
|
|
||||||
size_t blockDepth{std::min<size_t>(dimensions.depth, gobBlockDepth)};
|
size_t blockDepth{std::min<size_t>(dimensions.depth, gobBlockDepth)};
|
||||||
size_t blockPaddingZ{SectorWidth * SectorHeight * blockHeight * (gobBlockDepth - blockDepth)};
|
size_t blockPaddingZ{GobWidth * GobHeight * blockHeight * (gobBlockDepth - blockDepth)};
|
||||||
|
|
||||||
bool hasPaddingBlock{robWidthUnalignedBytes != robWidthBytes};
|
bool hasPaddingBlock{robWidthUnalignedBytes != robWidthBytes};
|
||||||
size_t blockPaddingOffset{hasPaddingBlock ? (GobWidth - (robWidthBytes - robWidthUnalignedBytes)) : 0};
|
size_t blockPaddingOffset{hasPaddingBlock ? (GobWidth - (robWidthBytes - robWidthUnalignedBytes)) : 0};
|
||||||
|
|
||||||
size_t robBytes{robWidthUnalignedBytes * robHeight};
|
size_t pitchWidthBytes{pitchAmount ? pitchAmount : robWidthUnalignedBytes};
|
||||||
size_t gobYOffset{robWidthUnalignedBytes * GobHeight};
|
size_t robBytes{pitchWidthBytes * robHeight};
|
||||||
size_t gobZOffset{robWidthUnalignedBytes * surfaceHeightLines};
|
size_t gobYOffset{pitchWidthBytes * GobHeight};
|
||||||
|
size_t gobZOffset{pitchWidthBytes * surfaceHeightLines};
|
||||||
|
|
||||||
u8 *sector{blockLinear};
|
u8 *sector{blockLinear};
|
||||||
|
|
||||||
auto deswizzleRob{[&](u8 *linearRob, auto isLastRob, size_t blockPaddingY = 0, size_t blockExtentY = 0) {
|
auto deswizzleRob{[&](u8 *pitchRob, auto isLastRob, size_t blockPaddingY = 0, size_t blockExtentY = 0) {
|
||||||
auto deswizzleBlock{[&](u8 *linearBlock, auto copySector) __attribute__((always_inline)) {
|
auto deswizzleBlock{[&](u8 *pitchBlock, auto copySector) __attribute__((always_inline)) {
|
||||||
for (size_t gobZ{}; gobZ < blockDepth; gobZ++) { // Every Block contains `blockDepth` Z-axis GOBs (Slices)
|
for (size_t gobZ{}; gobZ < blockDepth; gobZ++) { // Every Block contains `blockDepth` Z-axis GOBs (Slices)
|
||||||
u8 *linearGob{linearBlock};
|
u8 *pitchGob{pitchBlock};
|
||||||
for (size_t gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs
|
for (size_t gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs
|
||||||
#pragma clang loop unroll_count(SectorLinesInGob)
|
#pragma clang loop unroll_count(SectorLinesInGob)
|
||||||
for (size_t index{}; index < SectorLinesInGob; index++) {
|
for (size_t index{}; index < SectorLinesInGob; index++) {
|
||||||
@ -129,47 +133,49 @@ namespace skyline::gpu::texture {
|
|||||||
size_t yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis
|
size_t yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis
|
||||||
|
|
||||||
if constexpr (!isLastRob) {
|
if constexpr (!isLastRob) {
|
||||||
copySector(linearGob + (yT * robWidthUnalignedBytes) + xT, xT);
|
copySector(pitchGob + (yT * pitchWidthBytes) + xT, xT);
|
||||||
} else {
|
} else {
|
||||||
if (gobY != blockHeight - 1 || yT < blockExtentY)
|
if (gobY != blockHeight - 1 || yT < blockExtentY)
|
||||||
copySector(linearGob + (yT * robWidthUnalignedBytes) + xT, xT);
|
copySector(pitchGob + (yT * pitchWidthBytes) + xT, xT);
|
||||||
else
|
else
|
||||||
sector += SectorWidth;
|
sector += SectorWidth;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
linearGob += gobYOffset; // Increment the linear GOB to the next Y-axis GOB
|
pitchGob += gobYOffset; // Increment the linear GOB to the next Y-axis GOB
|
||||||
}
|
}
|
||||||
|
|
||||||
linearBlock += gobZOffset; // Increment the linear block to the next Z-axis GOB
|
if constexpr (isLastRob)
|
||||||
|
sector += blockPaddingY; // Skip over any padding at the end of this slice
|
||||||
|
|
||||||
|
pitchBlock += gobZOffset; // Increment the linear block to the next Z-axis GOB
|
||||||
}
|
}
|
||||||
|
|
||||||
sector += blockPaddingZ; // Skip over any padding Z-axis GOBs
|
sector += blockPaddingZ; // Skip over any padding Z-axis GOBs
|
||||||
}};
|
}};
|
||||||
|
|
||||||
for (size_t block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` blocks (excl. padding block)
|
for (size_t block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` blocks (excl. padding block)
|
||||||
deswizzleBlock(linearRob, [&](u8 *linearSector, size_t) __attribute__((always_inline)) {
|
deswizzleBlock(pitchRob, [&](u8 *linearSector, size_t) __attribute__((always_inline)) {
|
||||||
if constexpr (BlockLinearToLinear)
|
if constexpr (BlockLinearToPitch)
|
||||||
std::memcpy(linearSector, sector, SectorWidth);
|
std::memcpy(linearSector, sector, SectorWidth);
|
||||||
else
|
else
|
||||||
std::memcpy(sector, linearSector, SectorWidth);
|
std::memcpy(sector, linearSector, SectorWidth);
|
||||||
sector += SectorWidth; // `sectorWidth` bytes are of sequential image data
|
sector += SectorWidth; // `sectorWidth` bytes are of sequential image data
|
||||||
});
|
});
|
||||||
|
|
||||||
if constexpr (isLastRob)
|
pitchRob += GobWidth; // Increment the linear block to the next block (As Block Width = 1 GOB Width)
|
||||||
sector += blockPaddingY; // Skip over any padding at the end of this block
|
|
||||||
linearRob += GobWidth; // Increment the linear block to the next block (As Block Width = 1 GOB Width)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hasPaddingBlock)
|
if (hasPaddingBlock)
|
||||||
deswizzleBlock(linearRob, [&](u8 *linearSector, size_t xT) __attribute__((always_inline)) {
|
deswizzleBlock(pitchRob, [&](u8 *linearSector, size_t xT) __attribute__((always_inline)) {
|
||||||
#pragma clang loop unroll_count(4)
|
#pragma clang loop unroll_count(4)
|
||||||
for (size_t pixelOffset{}; pixelOffset < SectorWidth; pixelOffset += formatBpb) {
|
for (size_t pixelOffset{}; pixelOffset < SectorWidth; pixelOffset += formatBpb) {
|
||||||
if (xT < blockPaddingOffset)
|
if (xT < blockPaddingOffset) {
|
||||||
if constexpr (BlockLinearToLinear)
|
if constexpr (BlockLinearToPitch)
|
||||||
std::memcpy(linearSector + pixelOffset, sector, formatBpb);
|
std::memcpy(linearSector + pixelOffset, sector, formatBpb);
|
||||||
else
|
else
|
||||||
std::memcpy(sector, linearSector + pixelOffset, formatBpb);
|
std::memcpy(sector, linearSector + pixelOffset, formatBpb);
|
||||||
|
}
|
||||||
|
|
||||||
sector += formatBpb;
|
sector += formatBpb;
|
||||||
xT += formatBpb;
|
xT += formatBpb;
|
||||||
@ -177,21 +183,20 @@ namespace skyline::gpu::texture {
|
|||||||
});
|
});
|
||||||
}};
|
}};
|
||||||
|
|
||||||
u8 *linearRob{linear};
|
u8 *pitchRob{pitch};
|
||||||
for (size_t rob{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs (excl. padding ROB)
|
for (size_t rob{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs (excl. padding ROB)
|
||||||
deswizzleRob(linearRob, std::false_type{});
|
deswizzleRob(pitchRob, std::false_type{});
|
||||||
linearRob += robBytes; // Increment the linear ROB to the next ROB
|
pitchRob += robBytes; // Increment the linear ROB to the next ROB
|
||||||
}
|
}
|
||||||
|
|
||||||
if (surfaceHeightLines % robHeight != 0) {
|
if (surfaceHeightLines % robHeight != 0) {
|
||||||
blockHeight = (util::AlignUp(surfaceHeightLines, GobHeight) - (surfaceHeightRobs * robHeight)) / GobHeight; // Calculate the amount of Y GOBs which aren't padding
|
blockHeight = (util::AlignUp(surfaceHeightLines, GobHeight) - (surfaceHeightRobs * robHeight)) / GobHeight; // Calculate the amount of Y GOBs which aren't padding
|
||||||
|
|
||||||
size_t alignedSurfaceLines{util::DivideCeil<size_t>(dimensions.height, formatBlockHeight)};
|
|
||||||
deswizzleRob(
|
deswizzleRob(
|
||||||
linearRob,
|
pitchRob,
|
||||||
std::true_type{},
|
std::true_type{},
|
||||||
(gobBlockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight), // Calculate padding at the end of a block to skip
|
(gobBlockHeight - blockHeight) * (GobWidth * GobHeight), // Calculate padding at the end of a block to skip
|
||||||
util::IsAligned(alignedSurfaceLines, GobHeight) ? GobHeight : alignedSurfaceLines - util::AlignDown(alignedSurfaceLines, GobHeight) // Calculate the line relative to the start of the last GOB that is the cut-off point for the image
|
util::IsAligned(surfaceHeightLines, GobHeight) ? GobHeight : surfaceHeightLines - util::AlignDown(surfaceHeightLines, GobHeight) // Calculate the line relative to the start of the last GOB that is the cut-off point for the image
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -199,34 +204,58 @@ namespace skyline::gpu::texture {
|
|||||||
void CopyBlockLinearToLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, u8 *blockLinear, u8 *linear) {
|
void CopyBlockLinearToLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, u8 *blockLinear, u8 *linear) {
|
||||||
CopyBlockLinearInternal<true>(
|
CopyBlockLinearInternal<true>(
|
||||||
dimensions,
|
dimensions,
|
||||||
formatBlockWidth, formatBlockHeight, formatBpb,
|
formatBlockWidth, formatBlockHeight, formatBpb, 0,
|
||||||
gobBlockHeight, gobBlockDepth,
|
gobBlockHeight, gobBlockDepth,
|
||||||
blockLinear, linear
|
blockLinear, linear
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CopyBlockLinearToPitch(Dimensions dimensions,
|
||||||
|
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
|
||||||
|
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||||
|
u8 *blockLinear, u8 *pitch) {
|
||||||
|
CopyBlockLinearInternal<true>(
|
||||||
|
dimensions,
|
||||||
|
formatBlockWidth, formatBlockHeight, formatBpb, pitchAmount,
|
||||||
|
gobBlockHeight, gobBlockDepth,
|
||||||
|
blockLinear, pitch
|
||||||
|
);
|
||||||
|
}
|
||||||
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear) {
|
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear) {
|
||||||
CopyBlockLinearInternal<true>(
|
CopyBlockLinearInternal<true>(
|
||||||
guest.dimensions,
|
guest.dimensions,
|
||||||
guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb,
|
guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb, 0,
|
||||||
guest.tileConfig.blockHeight, guest.tileConfig.blockDepth,
|
guest.tileConfig.blockHeight, guest.tileConfig.blockDepth,
|
||||||
blockLinear, linear
|
blockLinear, linear
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CopyLinearToBlockLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, u8 *linear, u8 *blockLinear) {
|
void CopyLinearToBlockLinear(Dimensions dimensions,
|
||||||
CopyBlockLinearInternal<false>(
|
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb,
|
||||||
dimensions,
|
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||||
formatBlockWidth, formatBlockHeight, formatBpb,
|
u8 *linear, u8 *blockLinear) {
|
||||||
|
CopyBlockLinearInternal<false>(dimensions,
|
||||||
|
formatBlockWidth, formatBlockHeight, formatBpb, 0,
|
||||||
gobBlockHeight, gobBlockDepth,
|
gobBlockHeight, gobBlockDepth,
|
||||||
blockLinear, linear
|
blockLinear, linear
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CopyPitchToBlockLinear(Dimensions dimensions, size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount, size_t gobBlockHeight, size_t gobBlockDepth, u8 *pitch, u8 *blockLinear) {
|
||||||
|
CopyBlockLinearInternal<false>(
|
||||||
|
dimensions,
|
||||||
|
formatBlockWidth, formatBlockHeight, formatBpb, pitchAmount,
|
||||||
|
gobBlockHeight, gobBlockDepth,
|
||||||
|
blockLinear, pitch
|
||||||
|
);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear) {
|
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear) {
|
||||||
CopyBlockLinearInternal<false>(
|
CopyBlockLinearInternal<false>(
|
||||||
guest.dimensions,
|
guest.dimensions,
|
||||||
guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb,
|
guest.format->blockWidth, guest.format->blockHeight, guest.format->bpb, 0,
|
||||||
guest.tileConfig.blockHeight, guest.tileConfig.blockDepth,
|
guest.tileConfig.blockHeight, guest.tileConfig.blockDepth,
|
||||||
blockLinear, linear
|
blockLinear, linear
|
||||||
);
|
);
|
||||||
|
@ -40,21 +40,47 @@ namespace skyline::gpu::texture {
|
|||||||
size_t gobBlockHeight, size_t gobBlockDepth,
|
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||||
u8 *blockLinear, u8 *linear);
|
u8 *blockLinear, u8 *linear);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copies the contents of a blocklinear texture to a pitch texture
|
||||||
|
*/
|
||||||
|
void CopyBlockLinearToPitch(Dimensions dimensions,
|
||||||
|
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
|
||||||
|
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||||
|
u8 *blockLinear, u8 *pitch);
|
||||||
/**
|
/**
|
||||||
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
||||||
*/
|
*/
|
||||||
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear);
|
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *blockLinear, u8 *linear);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Copies the contents of a blocklinear texture to a linear output buffer
|
* @brief Copies the contents of a linear buffer to a blocklinear texture
|
||||||
*/
|
*/
|
||||||
void CopyLinearToBlockLinear(Dimensions dimensions,
|
void CopyLinearToBlockLinear(Dimensions dimensions,
|
||||||
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb,
|
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb,
|
||||||
size_t gobBlockHeight, size_t gobBlockDepth,
|
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||||
u8 *linear, u8 *blockLinear);
|
u8 *linear, u8 *blockLinear);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copies the contents of a pitch texture to a blocklinear texture
|
||||||
|
*/
|
||||||
|
void CopyPitchToBlockLinear(Dimensions dimensions,
|
||||||
|
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
|
||||||
|
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||||
|
u8 *pitch, u8 *blockLinear);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
||||||
|
/**
|
||||||
|
* @brief Copies the contents of a pitch texture to a part of a blocklinear texture
|
||||||
|
*/
|
||||||
|
void CopyPitchToBlockLinearSubrect(Dimensions pitchDimensions, Dimensions blockLinearDimensions,
|
||||||
|
size_t formatBlockWidth, size_t formatBlockHeight, size_t formatBpb, u32 pitchAmount,
|
||||||
|
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||||
|
u8 *pitch, u8 *blockLinear,
|
||||||
|
u32 originX, u32 originY);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copies the contents of a linear guest texture to a blocklinear texture
|
||||||
*/
|
*/
|
||||||
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear);
|
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linear, u8 *blockLinear);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user