Implement CPU BCn Texture Decoding

Certain GPU vendors such as ARM's Mali do not have support for BCn textures whatsoever while other vendors such as AMD only have partial support (BC1-BC3). Most titles on the guest utilize BC textures and to address this on host GPUs without support for BCn, we need to decompress the texture on the CPU. This commit implements a CPU BCn texture decoder based off Swiftshader's BC decoder, it also adds the necessary infrastructure to have different formats for the `GuestTexture` and `Texture` objects.
This commit is contained in:
PixelyIon 2022-05-28 20:26:24 +05:30
parent fe615b1e03
commit 80c8fb8791
11 changed files with 1855 additions and 73 deletions

View File

@ -165,6 +165,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/buffer_manager.cpp ${source_DIR}/skyline/gpu/buffer_manager.cpp
${source_DIR}/skyline/gpu/command_scheduler.cpp ${source_DIR}/skyline/gpu/command_scheduler.cpp
${source_DIR}/skyline/gpu/descriptor_allocator.cpp ${source_DIR}/skyline/gpu/descriptor_allocator.cpp
${source_DIR}/skyline/gpu/texture/bc_decoder.cpp
${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/texture/texture.cpp
${source_DIR}/skyline/gpu/texture/layout.cpp ${source_DIR}/skyline/gpu/texture/layout.cpp
${source_DIR}/skyline/gpu/buffer.cpp ${source_DIR}/skyline/gpu/buffer.cpp

View File

@ -268,7 +268,7 @@ namespace skyline::gpu {
vk::PhysicalDeviceFloatControlsProperties, vk::PhysicalDeviceFloatControlsProperties,
vk::PhysicalDeviceSubgroupProperties>()}; vk::PhysicalDeviceSubgroupProperties>()};
traits = TraitManager(deviceFeatures2, enabledFeatures2, deviceExtensions, enabledExtensions, deviceProperties2); traits = TraitManager{deviceFeatures2, enabledFeatures2, deviceExtensions, enabledExtensions, deviceProperties2, physicalDevice};
traits.ApplyDriverPatches(context); traits.ApplyDriverPatches(context);
std::vector<const char *> pEnabledExtensions; std::vector<const char *> pEnabledExtensions;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,43 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <cstdint>
namespace bcn {
/**
* @brief Decodes a BC1 encoded image to R8G8B8A8
*/
void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t width, size_t height, bool hasAlphaChannel);
/**
* @brief Decodes a BC2 encoded image to R8G8B8A8
*/
void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t width, size_t height);
/**
* @brief Decodes a BC3 encoded image to R8G8B8A8
*/
void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t width, size_t height);
/**
* @brief Decodes a BC4 encoded image to R8
*/
void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t width, size_t height, bool isSigned);
/**
* @brief Decodes a BC5 encoded image to R8G8
*/
void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t width, size_t height, bool isSigned);
/**
* @brief Decodes a BC6 encoded image to R16G16B16A16
*/
void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t width, size_t height, bool isSigned);
/**
* @brief Decodes a BC7 encoded image to R8G8B8A8
*/
void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t width, size_t height);
}

View File

@ -90,18 +90,43 @@ namespace skyline::gpu::format {
FORMAT(E5B9G9R9Float, 32, eE5B9G9R9UfloatPack32); FORMAT(E5B9G9R9Float, 32, eE5B9G9R9UfloatPack32);
FORMAT_INT_FLOAT(R32G32, 32 * 2, eR32G32); FORMAT_INT_FLOAT(R32G32, 32 * 2, eR32G32);
FORMAT_NORM_INT_FLOAT(R16G16B16, 16 * 3, eR16G16B16);
FORMAT_NORM_INT_FLOAT(R16G16B16A16, 16 * 4, eR16G16B16A16); FORMAT_NORM_INT_FLOAT(R16G16B16A16, 16 * 4, eR16G16B16A16);
FORMAT_INT_FLOAT(R32G32B32A32, 32 * 4, eR32G32B32A32); FORMAT_INT_FLOAT(R32G32B32A32, 32 * 4, eR32G32B32A32);
// Compressed Colour Formats // Compressed Colour Formats
FORMAT_SUFF_UNORM_SRGB(BC1, 64, eBc1Rgba, Block, FORMAT_SUFF_UNORM_SRGB(BC1, 64, eBc1Rgba, Block,
.blockWidth = 4, .blockWidth = 4,
.blockHeight = 4 .blockHeight = 4
); );
FORMAT_SUFF_NORM(BC4, 64, eBc4, Block, FORMAT_SUFF_NORM(BC4, 64, eBc4, Block,
.blockWidth = 4, .blockWidth = 4,
.blockHeight = 4, .blockHeight = 4,
);
FORMAT_SUFF_UNORM_SRGB(BC2, 128, eBc2, Block,
.blockWidth = 4,
.blockHeight = 4
);
FORMAT_SUFF_UNORM_SRGB(BC3, 128, eBc3, Block,
.blockWidth = 4,
.blockHeight = 4
);
FORMAT_SUFF_NORM(BC5, 128, eBc5, Block,
.blockWidth = 4,
.blockHeight = 4,
);
FORMAT(Bc6HUfloat, 128, eBc6HUfloatBlock,
.blockWidth = 4,
.blockHeight = 4,
);
FORMAT(Bc6HSfloat, 128, eBc6HSfloatBlock,
.blockWidth = 4,
.blockHeight = 4,
);
FORMAT_SUFF_UNORM_SRGB(BC7, 128, eBc7, Block,
.blockWidth = 4,
.blockHeight = 4
); );
FORMAT_SUFF_UNORM_SRGB(Astc4x4, 128, eAstc4x4, Block, FORMAT_SUFF_UNORM_SRGB(Astc4x4, 128, eAstc4x4, Block,
@ -117,31 +142,6 @@ namespace skyline::gpu::format {
.blockHeight = 8 .blockHeight = 8
); );
FORMAT_SUFF_UNORM_SRGB(BC2, 128, eBc2, Block,
.blockWidth = 4,
.blockHeight = 4
);
FORMAT_SUFF_UNORM_SRGB(BC3, 128, eBc3, Block,
.blockWidth = 4,
.blockHeight = 4
);
FORMAT_SUFF_NORM(BC5, 128, eBc5, Block,
.blockWidth = 4,
.blockHeight = 4,
);
FORMAT(Bc6HUfloat, 128, eBc6HUfloatBlock,
.blockWidth = 4,
.blockHeight = 4,
);
FORMAT(Bc6HSfloat, 128, eBc6HSfloatBlock,
.blockWidth = 4,
.blockHeight = 4,
);
FORMAT_SUFF_UNORM_SRGB(BC7, 128, eBc7, Block,
.blockWidth = 4,
.blockHeight = 4
);
// Depth/Stencil Formats // Depth/Stencil Formats
// All of these have a G->R swizzle // All of these have a G->R swizzle
FORMAT(D16Unorm, 16, eD16Unorm, vka::eDepth, .swizzleMapping = { FORMAT(D16Unorm, 16, eD16Unorm, vka::eDepth, .swizzleMapping = {

View File

@ -54,7 +54,7 @@ namespace skyline::gpu::texture {
return isMultiLayer ? util::AlignUp(totalSize, layerAlignment) : totalSize; return isMultiLayer ? util::AlignUp(totalSize, layerAlignment) : totalSize;
} }
std::vector<MipLevelLayout> GetBlockLinearMipLayout(Dimensions dimensions, size_t formatBlockHeight, size_t formatBlockWidth, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, size_t levelCount) { std::vector<MipLevelLayout> GetBlockLinearMipLayout(Dimensions dimensions, size_t formatBlockHeight, size_t formatBlockWidth, size_t formatBpb, size_t targetFormatBlockHeight, size_t targetFormatBlockWidth, size_t targetFormatBpb, size_t gobBlockHeight, size_t gobBlockDepth, size_t levelCount) {
std::vector<MipLevelLayout> mipLevels; std::vector<MipLevelLayout> mipLevels;
mipLevels.reserve(levelCount); mipLevels.reserve(levelCount);
@ -63,9 +63,13 @@ namespace skyline::gpu::texture {
// Note: We don't need a separate gobsDepth variable here, since a GOB is always a single slice deep and the value would be the same as the depth dimension // Note: We don't need a separate gobsDepth variable here, since a GOB is always a single slice deep and the value would be the same as the depth dimension
for (size_t i{}; i < levelCount; i++) { for (size_t i{}; i < levelCount; i++) {
size_t linearSize{util::DivideCeil<size_t>(dimensions.width, formatBlockWidth) * formatBpb * util::DivideCeil<size_t>(dimensions.height, formatBlockHeight) * dimensions.depth};
size_t targetLinearSize{targetFormatBpb == 0 ? linearSize : util::DivideCeil<size_t>(dimensions.width, targetFormatBlockWidth) * formatBpb * util::DivideCeil<size_t>(dimensions.height, targetFormatBlockHeight) * dimensions.depth};
mipLevels.emplace_back( mipLevels.emplace_back(
dimensions, dimensions,
util::DivideCeil<size_t>(dimensions.width, formatBlockWidth) * formatBpb * util::DivideCeil<size_t>(dimensions.height, formatBlockHeight) * dimensions.depth, linearSize,
targetLinearSize,
(GobWidth * gobsWidth) * (GobHeight * util::AlignUp(gobsHeight, gobBlockHeight)) * util::AlignUp(dimensions.depth, gobBlockDepth), (GobWidth * gobsWidth) * (GobHeight * util::AlignUp(gobsHeight, gobBlockHeight)) * util::AlignUp(dimensions.depth, gobBlockDepth),
gobBlockHeight, gobBlockDepth gobBlockHeight, gobBlockDepth
); );

View File

@ -23,10 +23,12 @@ namespace skyline::gpu::texture {
size_t levelCount, bool isMultiLayer); size_t levelCount, bool isMultiLayer);
/** /**
* @note The target format is the format of the texture after it has been decoded, if bpb is 0, the target format is the same as the source format
* @return A vector of metadata about every mipmapped level of the supplied block-linear surface * @return A vector of metadata about every mipmapped level of the supplied block-linear surface
*/ */
std::vector<MipLevelLayout> GetBlockLinearMipLayout(Dimensions dimensions, std::vector<MipLevelLayout> GetBlockLinearMipLayout(Dimensions dimensions,
size_t formatBlockHeight, size_t formatBlockWidth, size_t formatBpb, size_t formatBlockHeight, size_t formatBlockWidth, size_t formatBpb,
size_t targetFormatBlockHeight, size_t targetFormatBlockWidth, size_t targetFormatBpb,
size_t gobBlockHeight, size_t gobBlockDepth, size_t gobBlockHeight, size_t gobBlockDepth,
size_t levelCount); size_t levelCount);

View File

@ -8,6 +8,8 @@
#include "texture.h" #include "texture.h"
#include "layout.h" #include "layout.h"
#include "adreno_aliasing.h" #include "adreno_aliasing.h"
#include "bc_decoder.h"
#include "format.h"
namespace skyline::gpu { namespace skyline::gpu {
u32 GuestTexture::GetLayerStride() { u32 GuestTexture::GetLayerStride() {
@ -160,33 +162,42 @@ namespace skyline::gpu {
} }
}()}; }()};
std::vector<u8> deswizzleBuffer;
u8 *deswizzleOutput;
if (guest->format != format) {
deswizzleBuffer.resize(deswizzledSurfaceSize);
deswizzleOutput = deswizzleBuffer.data();
} else [[likely]] {
deswizzleOutput = bufferData;
}
auto guestLayerStride{guest->GetLayerStride()}; auto guestLayerStride{guest->GetLayerStride()};
if (levelCount == 1) { if (levelCount == 1) {
auto outputLayer{deswizzleOutput};
for (size_t layer{}; layer < layerCount; layer++) { for (size_t layer{}; layer < layerCount; layer++) {
if (guest->tileConfig.mode == texture::TileMode::Block) if (guest->tileConfig.mode == texture::TileMode::Block)
texture::CopyBlockLinearToLinear(*guest, pointer, bufferData); texture::CopyBlockLinearToLinear(*guest, pointer, outputLayer);
else if (guest->tileConfig.mode == texture::TileMode::Pitch) else if (guest->tileConfig.mode == texture::TileMode::Pitch)
texture::CopyPitchLinearToLinear(*guest, pointer, bufferData); texture::CopyPitchLinearToLinear(*guest, pointer, outputLayer);
else if (guest->tileConfig.mode == texture::TileMode::Linear) else if (guest->tileConfig.mode == texture::TileMode::Linear)
std::memcpy(bufferData, pointer, surfaceSize); std::memcpy(outputLayer, pointer, surfaceSize);
pointer += guestLayerStride; pointer += guestLayerStride;
bufferData += layerStride; outputLayer += deswizzledLayerStride;
} }
} else if (levelCount > 1 && guest->tileConfig.mode == texture::TileMode::Block) { } else if (levelCount > 1 && guest->tileConfig.mode == texture::TileMode::Block) {
// We need to generate a buffer that has all layers for a given mip level while Tegra X1 layout holds all mip levels for a given layer // We need to generate a buffer that has all layers for a given mip level while Tegra X1 layout holds all mip levels for a given layer
for (size_t layer{}; layer < layerCount; layer++) { for (size_t layer{}; layer < layerCount; layer++) {
auto inputLevel{pointer}, outputLevel{bufferData}; auto inputLevel{pointer}, outputLevel{deswizzleOutput};
for (size_t level{}; level < levelCount; ++level) { for (const auto &level : mipLayouts) {
const auto &mipLayout{mipLayouts[level]};
texture::CopyBlockLinearToLinear( texture::CopyBlockLinearToLinear(
mipLayout.dimensions, level.dimensions,
guest->format->blockWidth, guest->format->blockHeight, guest->format->bpb, guest->format->blockWidth, guest->format->blockHeight, guest->format->bpb,
mipLayout.blockHeight, mipLayout.blockDepth, level.blockHeight, level.blockDepth,
inputLevel, outputLevel + (layer * mipLayout.linearSize) // Offset into the current layer relative to the start of the current mip level inputLevel, outputLevel + (layer * level.linearSize) // Offset into the current layer relative to the start of the current mip level
); );
inputLevel += mipLayout.blockLinearSize; // Skip over the current mip level as we've deswizzled it inputLevel += level.blockLinearSize; // Skip over the current mip level as we've deswizzled it
outputLevel += layerCount * mipLayout.linearSize; // We need to offset the output buffer by the size of the previous mip level outputLevel += layerCount * level.linearSize; // We need to offset the output buffer by the size of the previous mip level
} }
pointer += guestLayerStride; // We need to offset the input buffer by the size of the previous guest layer, this can differ from inputLevel's value due to layer end padding or guest RT layer stride pointer += guestLayerStride; // We need to offset the input buffer by the size of the previous guest layer, this can differ from inputLevel's value due to layer end padding or guest RT layer stride
@ -195,6 +206,60 @@ namespace skyline::gpu {
throw exception("Mipmapped textures with tiling mode '{}' aren't supported", static_cast<int>(tiling)); throw exception("Mipmapped textures with tiling mode '{}' aren't supported", static_cast<int>(tiling));
} }
if (!deswizzleBuffer.empty()) {
for (const auto &level : mipLayouts) {
size_t levelHeight{level.dimensions.height * layerCount}; //!< The height of an image representing all layers in the entire level
switch (guest->format->vkFormat) {
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc1RgbaSrgbBlock:
bcn::DecodeBc1(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, true);
break;
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc2SrgbBlock:
bcn::DecodeBc2(deswizzleOutput, bufferData, level.dimensions.width, levelHeight);
break;
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc3SrgbBlock:
bcn::DecodeBc3(deswizzleOutput, bufferData, level.dimensions.width, levelHeight);
break;
case vk::Format::eBc4UnormBlock:
bcn::DecodeBc4(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, false);
break;
case vk::Format::eBc4SnormBlock:
bcn::DecodeBc4(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, true);
break;
case vk::Format::eBc5UnormBlock:
bcn::DecodeBc5(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, false);
break;
case vk::Format::eBc5SnormBlock:
bcn::DecodeBc5(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, true);
break;
case vk::Format::eBc6HUfloatBlock:
bcn::DecodeBc6(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, false);
break;
case vk::Format::eBc6HSfloatBlock:
bcn::DecodeBc6(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, true);
break;
case vk::Format::eBc7UnormBlock:
case vk::Format::eBc7SrgbBlock:
bcn::DecodeBc7(deswizzleOutput, bufferData, level.dimensions.width, levelHeight);
break;
default:
throw exception("Unsupported guest format '{}'", vk::to_string(guest->format->vkFormat));
}
deswizzleOutput += level.linearSize * layerCount;
bufferData += level.targetLinearSize * layerCount;
}
}
if (stagingBuffer && cycle.lock() != pCycle) if (stagingBuffer && cycle.lock() != pCycle)
WaitOnFence(); WaitOnFence();
@ -235,7 +300,7 @@ namespace skyline::gpu {
.imageExtent = level.dimensions, .imageExtent = level.dimensions,
} }
); );
bufferOffset += level.linearSize * layerCount; bufferOffset += level.targetLinearSize * layerCount;
} }
}}; }};
@ -282,7 +347,7 @@ namespace skyline::gpu {
.imageExtent = level.dimensions, .imageExtent = level.dimensions,
} }
); );
bufferOffset += level.linearSize * levelCount; bufferOffset += level.targetLinearSize * levelCount;
} }
}}; }};
@ -326,17 +391,16 @@ namespace skyline::gpu {
// Note: See SynchronizeHostImpl for additional comments // Note: See SynchronizeHostImpl for additional comments
for (size_t layer{}; layer < layerCount; layer++) { for (size_t layer{}; layer < layerCount; layer++) {
auto outputLevel{guestOutput}, inputLevel{hostBuffer}; auto outputLevel{guestOutput}, inputLevel{hostBuffer};
for (size_t level{}; level < levelCount; ++level) { for (const auto &level : mipLayouts) {
const auto &mipLayout{mipLayouts[level]};
texture::CopyLinearToBlockLinear( texture::CopyLinearToBlockLinear(
mipLayout.dimensions, level.dimensions,
guest->format->blockWidth, guest->format->blockHeight, guest->format->bpb, guest->format->blockWidth, guest->format->blockHeight, guest->format->bpb,
mipLayout.blockHeight, mipLayout.blockDepth, level.blockHeight, level.blockDepth,
outputLevel, inputLevel + (layer * mipLayout.linearSize) outputLevel, inputLevel + (layer * level.linearSize)
); );
outputLevel += mipLayout.blockLinearSize; outputLevel += level.blockLinearSize;
inputLevel += layerCount * mipLayout.linearSize; inputLevel += layerCount * level.linearSize;
} }
guestOutput += guestLayerStride; guestOutput += guestLayerStride;
@ -366,32 +430,87 @@ namespace skyline::gpu {
layerCount(layerCount), layerCount(layerCount),
sampleCount(sampleCount) {} sampleCount(sampleCount) {}
u32 CalculateLevelStride(const std::vector<texture::MipLevelLayout> &mipLayouts) { texture::Format ConvertHostCompatibleFormat(texture::Format format, const TraitManager &traits) {
u32 surfaceSize{}; auto bcnSupport{traits.bcnSupport};
if (bcnSupport.all())
return format;
switch (format->vkFormat) {
case vk::Format::eBc1RgbaUnormBlock:
return bcnSupport[0] ? format : format::R8G8B8A8Unorm;
case vk::Format::eBc1RgbaSrgbBlock:
return bcnSupport[0] ? format : format::R8G8B8A8Srgb;
case vk::Format::eBc2UnormBlock:
return bcnSupport[1] ? format : format::R8G8B8A8Unorm;
case vk::Format::eBc2SrgbBlock:
return bcnSupport[1] ? format : format::R8G8B8A8Srgb;
case vk::Format::eBc3UnormBlock:
return bcnSupport[2] ? format : format::R8G8B8A8Unorm;
case vk::Format::eBc3SrgbBlock:
return bcnSupport[2] ? format : format::R8G8B8A8Srgb;
case vk::Format::eBc4UnormBlock:
return bcnSupport[3] ? format : format::R8Unorm;
case vk::Format::eBc4SnormBlock:
return bcnSupport[3] ? format : format::R8Snorm;
case vk::Format::eBc5UnormBlock:
return bcnSupport[4] ? format : format::R8G8Unorm;
case vk::Format::eBc5SnormBlock:
return bcnSupport[4] ? format : format::R8G8Snorm;
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eBc6HSfloatBlock:
return bcnSupport[5] ? format : format::R16G16B16A16Float; // This is a signed 16-bit FP format, we don't have an unsigned 16-bit FP format
case vk::Format::eBc7UnormBlock:
return bcnSupport[6] ? format : format::R8G8B8A8Unorm;
case vk::Format::eBc7SrgbBlock:
return bcnSupport[6] ? format : format::R8G8B8A8Srgb;
default:
return format;
}
}
size_t CalculateLevelStride(const std::vector<texture::MipLevelLayout> &mipLayouts) {
size_t surfaceSize{};
for (const auto &level : mipLayouts) for (const auto &level : mipLayouts)
surfaceSize += level.linearSize; surfaceSize += level.linearSize;
return surfaceSize; return surfaceSize;
} }
size_t CalculateTargetLevelStride(const std::vector<texture::MipLevelLayout> &mipLayouts) {
size_t surfaceSize{};
for (const auto &level : mipLayouts)
surfaceSize += level.targetLinearSize;
return surfaceSize;
}
Texture::Texture(GPU &pGpu, GuestTexture pGuest) Texture::Texture(GPU &pGpu, GuestTexture pGuest)
: gpu(pGpu), : gpu(pGpu),
guest(std::move(pGuest)), guest(std::move(pGuest)),
dimensions(guest->dimensions), dimensions(guest->dimensions),
format(guest->format), format(ConvertHostCompatibleFormat(guest->format, gpu.traits)),
layout(vk::ImageLayout::eUndefined), layout(vk::ImageLayout::eUndefined),
tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization
layerCount(guest->layerCount), layerCount(guest->layerCount),
layerStride(static_cast<u32>(format->GetSize(dimensions))), deswizzledLayerStride(static_cast<u32>(guest->format->GetSize(dimensions))),
layerStride(format == guest->format ? deswizzledLayerStride : static_cast<u32>(format->GetSize(dimensions))),
levelCount(guest->mipLevelCount), levelCount(guest->mipLevelCount),
mipLayouts( mipLayouts(
texture::GetBlockLinearMipLayout( texture::GetBlockLinearMipLayout(
guest->dimensions, guest->dimensions,
guest->format->blockHeight, guest->format->blockWidth, guest->format->bpb, guest->format->blockHeight, guest->format->blockWidth, guest->format->bpb,
format->blockHeight, format->blockWidth, format->bpb,
guest->tileConfig.blockHeight, guest->tileConfig.blockDepth, guest->tileConfig.blockHeight, guest->tileConfig.blockDepth,
guest->mipLevelCount guest->mipLevelCount
) )
), ),
surfaceSize(CalculateLevelStride(mipLayouts) * layerCount), deswizzledSurfaceSize(CalculateLevelStride(mipLayouts) * layerCount),
surfaceSize(format == guest->format ? deswizzledSurfaceSize : (CalculateTargetLevelStride(mipLayouts) * layerCount)),
sampleCount(vk::SampleCountFlagBits::e1), sampleCount(vk::SampleCountFlagBits::e1),
flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat), flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat),
usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) { usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) {
@ -401,7 +520,7 @@ namespace skyline::gpu {
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
// First attempt to derive type from dimensions // First attempt to derive type from dimensions
auto imageType{guest->dimensions.GetType()}; auto imageType{dimensions.GetType()};
// Try to ensure that the image type is compatible with the given image view type since we can't create a 2D image view from a 1D image // Try to ensure that the image type is compatible with the given image view type since we can't create a 2D image view from a 1D image
if (imageType == vk::ImageType::e1D && guest->type != texture::TextureType::e1D && guest->type != texture::TextureType::e1DArray) { if (imageType == vk::ImageType::e1D && guest->type != texture::TextureType::e1D && guest->type != texture::TextureType::e1DArray) {
@ -450,8 +569,8 @@ namespace skyline::gpu {
} }
void Texture::MarkGpuDirty() { void Texture::MarkGpuDirty() {
if (dirtyState == DirtyState::GpuDirty || !guest) if (dirtyState == DirtyState::GpuDirty || !guest || format != guest->format)
return; return; // In addition to other checks, we also need to skip GPU dirty if the host format and guest format differ as we don't support re-encoding compressed textures which is when this generally occurs
gpu.state.nce->RetrapRegions(*trapHandle, false); gpu.state.nce->RetrapRegions(*trapHandle, false);
dirtyState = DirtyState::GpuDirty; dirtyState = DirtyState::GpuDirty;
} }
@ -571,6 +690,15 @@ namespace skyline::gpu {
return; return;
} }
if (layout == vk::ImageLayout::eUndefined || format != guest->format) {
// If the state of the host texture is undefined then so can the guest
// If the texture has differing formats on the guest and host, we don't support converting back in that case as it may involve recompression of a decompressed texture
if (!skipTrap)
gpu.state.nce->RetrapRegions(*trapHandle, true);
dirtyState = DirtyState::Clean;
return;
}
TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
WaitOnBacking(); WaitOnBacking();
@ -600,8 +728,12 @@ namespace skyline::gpu {
if (dirtyState != DirtyState::GpuDirty || !guest) if (dirtyState != DirtyState::GpuDirty || !guest)
return; return;
if (layout == vk::ImageLayout::eUndefined) if (layout == vk::ImageLayout::eUndefined || format != guest->format) {
return; // If the state of the host texture is undefined then so can the guest // If the state of the host texture is undefined then so can the guest
// If the texture has differing formats on the guest and host, we don't support converting back in that case as it may involve recompression of a decompressed texture
dirtyState = DirtyState::Clean;
return;
}
TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer"); TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer");
@ -627,8 +759,8 @@ namespace skyline::gpu {
} }
std::shared_ptr<TextureView> Texture::GetView(vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format pFormat, vk::ComponentMapping mapping) { std::shared_ptr<TextureView> Texture::GetView(vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format pFormat, vk::ComponentMapping mapping) {
if (!pFormat) if (!pFormat || pFormat == guest->format)
pFormat = format; pFormat = format; // We want to use the texture's format if it isn't supplied or if the requested format matches the guest format then we want to use the host format just in case it is host incompatible and the host format differs from the guest format
auto viewFormat{pFormat->vkFormat}, textureFormat{format->vkFormat}; auto viewFormat{pFormat->vkFormat}, textureFormat{format->vkFormat};
if (gpu.traits.quirks.vkImageMutableFormatCostly && viewFormat != textureFormat && (!gpu.traits.quirks.adrenoRelaxedFormatAliasing || !texture::IsAdrenoAliasCompatible(viewFormat, textureFormat))) if (gpu.traits.quirks.vkImageMutableFormatCostly && viewFormat != textureFormat && (!gpu.traits.quirks.adrenoRelaxedFormatAliasing || !texture::IsAdrenoAliasCompatible(viewFormat, textureFormat)))

View File

@ -87,7 +87,7 @@ namespace skyline::gpu {
* @return The size of the texture in bytes * @return The size of the texture in bytes
*/ */
constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) const { constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) const {
return util::DivideCeil(width, u32{blockWidth}) * util::DivideCeil(height, u32{blockHeight}) * bpb * depth; return util::DivideCeil<size_t>(width, size_t{blockWidth}) * util::DivideCeil<size_t>(height, size_t{blockHeight}) * bpb * depth;
} }
constexpr size_t GetSize(Dimensions dimensions) const { constexpr size_t GetSize(Dimensions dimensions) const {
@ -229,10 +229,11 @@ namespace skyline::gpu {
struct MipLevelLayout { struct MipLevelLayout {
Dimensions dimensions; //!< The dimensions of the mipmapped level, these are exact dimensions and not aligned to a GOB Dimensions dimensions; //!< The dimensions of the mipmapped level, these are exact dimensions and not aligned to a GOB
size_t linearSize; //!< The size of a linear image with this mipmapped level in bytes size_t linearSize; //!< The size of a linear image with this mipmapped level in bytes
size_t targetLinearSize; //!< The size of a linear image with this mipmapped level in bytes and using the target format, this will only differ from linearSize if the target format is supplied
size_t blockLinearSize; //!< The size of a blocklinear image with this mipmapped level in bytes size_t blockLinearSize; //!< The size of a blocklinear image with this mipmapped level in bytes
size_t blockHeight, blockDepth; //!< The block height and block depth set for the level size_t blockHeight, blockDepth; //!< The block height and block depth set for the level
constexpr MipLevelLayout(Dimensions dimensions, size_t linearSize, size_t blockLinearSize, size_t blockHeight, size_t blockDepth) : dimensions{dimensions}, linearSize{linearSize}, blockLinearSize{blockLinearSize}, blockHeight{blockHeight}, blockDepth{blockDepth} {} constexpr MipLevelLayout(Dimensions dimensions, size_t linearSize, size_t targetLinearSize, size_t blockLinearSize, size_t blockHeight, size_t blockDepth) : dimensions{dimensions}, linearSize{linearSize}, targetLinearSize{targetLinearSize}, blockLinearSize{blockLinearSize}, blockHeight{blockHeight}, blockDepth{blockDepth} {}
}; };
} }
@ -438,10 +439,12 @@ namespace skyline::gpu {
vk::ImageCreateFlags flags; vk::ImageCreateFlags flags;
vk::ImageUsageFlags usage; vk::ImageUsageFlags usage;
u32 layerCount; //!< The amount of array layers in the image u32 layerCount; //!< The amount of array layers in the image
u32 layerStride; //!< The stride of a single array layer given linear tiling, this does **not** consider mipmapping size_t deswizzledLayerStride{}; //!< The stride of a single layer given linear tiling using the guest format, this does **not** consider mipmapping
size_t layerStride{}; //!< The stride of a single layer given linear tiling, this does **not** consider mipmapping
u32 levelCount; u32 levelCount;
std::vector<texture::MipLevelLayout> mipLayouts; //!< The layout of each mip level in the guest texture std::vector<texture::MipLevelLayout> mipLayouts; //!< The layout of each mip level in the guest texture
u32 surfaceSize; //!< The size of the entire surface given linear tiling, this contains all mip levels and layers size_t deswizzledSurfaceSize{}; //!< The size of the guest surface with linear tiling, calculated with the guest format which may differ from the host format
size_t surfaceSize{}; //!< The size of the entire surface given linear tiling, this contains all mip levels and layers
vk::SampleCountFlagBits sampleCount; vk::SampleCountFlagBits sampleCount;
/** /**

View File

@ -5,7 +5,7 @@
#include "trait_manager.h" #include "trait_manager.h"
namespace skyline::gpu { namespace skyline::gpu {
TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2) : quirks(deviceProperties2.get<vk::PhysicalDeviceProperties2>().properties, deviceProperties2.get<vk::PhysicalDeviceDriverProperties>()) { TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice &physicalDevice) : quirks(deviceProperties2.get<vk::PhysicalDeviceProperties2>().properties, deviceProperties2.get<vk::PhysicalDeviceDriverProperties>()) {
bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{}; bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{};
bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present
@ -135,12 +135,28 @@ namespace skyline::gpu {
auto &subgroupProperties{deviceProperties2.get<vk::PhysicalDeviceSubgroupProperties>()}; auto &subgroupProperties{deviceProperties2.get<vk::PhysicalDeviceSubgroupProperties>()};
supportsSubgroupVote = static_cast<bool>(subgroupProperties.supportedOperations & vk::SubgroupFeatureFlagBits::eVote); supportsSubgroupVote = static_cast<bool>(subgroupProperties.supportedOperations & vk::SubgroupFeatureFlagBits::eVote);
subgroupSize = deviceProperties2.get<vk::PhysicalDeviceSubgroupProperties>().subgroupSize; subgroupSize = deviceProperties2.get<vk::PhysicalDeviceSubgroupProperties>().subgroupSize;
auto isFormatSupported{[&physicalDevice](vk::Format format) {
auto features{physicalDevice.getFormatProperties(format)};
// We may get false positives here by not checking specifics but this is not seen in practice while the reverse often is of drivers (Such as Adreno 512.6xx drivers which don't report any support aside from buffer features but entirely support BC formats)
return static_cast<bool>(features.linearTilingFeatures) ||
static_cast<bool>(features.optimalTilingFeatures) ||
static_cast<bool>(features.bufferFeatures);
}};
bcnSupport[0] = isFormatSupported(vk::Format::eBc1RgbaUnormBlock) && isFormatSupported(vk::Format::eBc1RgbaSrgbBlock);
bcnSupport[1] = isFormatSupported(vk::Format::eBc2UnormBlock) && isFormatSupported(vk::Format::eBc2SrgbBlock);
bcnSupport[2] = isFormatSupported(vk::Format::eBc3UnormBlock) && isFormatSupported(vk::Format::eBc3SrgbBlock);
bcnSupport[3] = isFormatSupported(vk::Format::eBc4UnormBlock) && isFormatSupported(vk::Format::eBc4SnormBlock);
bcnSupport[4] = isFormatSupported(vk::Format::eBc5UnormBlock) && isFormatSupported(vk::Format::eBc5SnormBlock);
bcnSupport[5] = isFormatSupported(vk::Format::eBc6HSfloatBlock) && isFormatSupported(vk::Format::eBc6HUfloatBlock);
bcnSupport[6] = isFormatSupported(vk::Format::eBc7UnormBlock) && isFormatSupported(vk::Format::eBc7SrgbBlock);
} }
std::string TraitManager::Summary() { std::string TraitManager::Summary() {
return fmt::format( return fmt::format(
"\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", "\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}\n* BCn Support: {}",
supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize, bcnSupport.to_string()
); );
} }
@ -213,8 +229,10 @@ namespace skyline::gpu {
Logger::Info("Applied BCeNabler patch"); Logger::Info("Applied BCeNabler patch");
else else
throw exception("Failed to apply BCeNabler patch!"); throw exception("Failed to apply BCeNabler patch!");
bcnSupport.set();
} else if (type == ADRENOTOOLS_BCN_BLOB) { } else if (type == ADRENOTOOLS_BCN_BLOB) {
Logger::Info("BCeNabler skipped, blob BCN support is present"); Logger::Info("BCeNabler skipped, blob BCN support is present");
bcnSupport.set();
} }
} }
} }

View File

@ -40,6 +40,8 @@ namespace skyline::gpu {
bool supportsSubgroupVote{}; //!< If subgroup votes are supported in shaders with SPV_KHR_subgroup_vote bool supportsSubgroupVote{}; //!< If subgroup votes are supported in shaders with SPV_KHR_subgroup_vote
u32 subgroupSize{}; //!< Size of a subgroup on the host GPU u32 subgroupSize{}; //!< Size of a subgroup on the host GPU
std::bitset<7> bcnSupport{}; //!< Bitmask of BCn texture formats supported, it is ordered as BC1, BC2, BC3, BC4, BC5, BC6H and BC7
/** /**
* @brief Manages a list of any vendor/device-specific errata in the host GPU * @brief Manages a list of any vendor/device-specific errata in the host GPU
*/ */
@ -85,7 +87,7 @@ namespace skyline::gpu {
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDeviceImagelessFramebufferFeatures>; vk::PhysicalDeviceImagelessFramebufferFeatures>;
TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2); TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice& physicalDevice);
/** /**
* @brief Applies driver specific binary patches to the driver (e.g. BCeNabler) * @brief Applies driver specific binary patches to the driver (e.g. BCeNabler)