mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-27 00:34:20 +01:00
Implement CPU BCn Texture Decoding
Certain GPU vendors such as ARM's Mali do not have support for BCn textures whatsoever while other vendors such as AMD only have partial support (BC1-BC3). Most titles on the guest utilize BC textures and to address this on host GPUs without support for BCn, we need to decompress the texture on the CPU. This commit implements a CPU BCn texture decoder based off Swiftshader's BC decoder, it also adds the necessary infrastructure to have different formats for the `GuestTexture` and `Texture` objects.
This commit is contained in:
parent
fe615b1e03
commit
80c8fb8791
@ -165,6 +165,7 @@ add_library(skyline SHARED
|
||||
${source_DIR}/skyline/gpu/buffer_manager.cpp
|
||||
${source_DIR}/skyline/gpu/command_scheduler.cpp
|
||||
${source_DIR}/skyline/gpu/descriptor_allocator.cpp
|
||||
${source_DIR}/skyline/gpu/texture/bc_decoder.cpp
|
||||
${source_DIR}/skyline/gpu/texture/texture.cpp
|
||||
${source_DIR}/skyline/gpu/texture/layout.cpp
|
||||
${source_DIR}/skyline/gpu/buffer.cpp
|
||||
|
@ -268,7 +268,7 @@ namespace skyline::gpu {
|
||||
vk::PhysicalDeviceFloatControlsProperties,
|
||||
vk::PhysicalDeviceSubgroupProperties>()};
|
||||
|
||||
traits = TraitManager(deviceFeatures2, enabledFeatures2, deviceExtensions, enabledExtensions, deviceProperties2);
|
||||
traits = TraitManager{deviceFeatures2, enabledFeatures2, deviceExtensions, enabledExtensions, deviceProperties2, physicalDevice};
|
||||
traits.ApplyDriverPatches(context);
|
||||
|
||||
std::vector<const char *> pEnabledExtensions;
|
||||
|
1577
app/src/main/cpp/skyline/gpu/texture/bc_decoder.cpp
Normal file
1577
app/src/main/cpp/skyline/gpu/texture/bc_decoder.cpp
Normal file
File diff suppressed because it is too large
Load Diff
43
app/src/main/cpp/skyline/gpu/texture/bc_decoder.h
Normal file
43
app/src/main/cpp/skyline/gpu/texture/bc_decoder.h
Normal file
@ -0,0 +1,43 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace bcn {
|
||||
/**
|
||||
* @brief Decodes a BC1 encoded image to R8G8B8A8
|
||||
*/
|
||||
void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t width, size_t height, bool hasAlphaChannel);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC2 encoded image to R8G8B8A8
|
||||
*/
|
||||
void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t width, size_t height);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC3 encoded image to R8G8B8A8
|
||||
*/
|
||||
void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t width, size_t height);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC4 encoded image to R8
|
||||
*/
|
||||
void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t width, size_t height, bool isSigned);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC5 encoded image to R8G8
|
||||
*/
|
||||
void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t width, size_t height, bool isSigned);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC6 encoded image to R16G16B16A16
|
||||
*/
|
||||
void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t width, size_t height, bool isSigned);
|
||||
|
||||
/**
|
||||
* @brief Decodes a BC7 encoded image to R8G8B8A8
|
||||
*/
|
||||
void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t width, size_t height);
|
||||
}
|
@ -90,18 +90,43 @@ namespace skyline::gpu::format {
|
||||
FORMAT(E5B9G9R9Float, 32, eE5B9G9R9UfloatPack32);
|
||||
|
||||
FORMAT_INT_FLOAT(R32G32, 32 * 2, eR32G32);
|
||||
FORMAT_NORM_INT_FLOAT(R16G16B16, 16 * 3, eR16G16B16);
|
||||
FORMAT_NORM_INT_FLOAT(R16G16B16A16, 16 * 4, eR16G16B16A16);
|
||||
|
||||
FORMAT_INT_FLOAT(R32G32B32A32, 32 * 4, eR32G32B32A32);
|
||||
|
||||
// Compressed Colour Formats
|
||||
FORMAT_SUFF_UNORM_SRGB(BC1, 64, eBc1Rgba, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4
|
||||
);
|
||||
FORMAT_SUFF_NORM(BC4, 64, eBc4, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4,
|
||||
);
|
||||
FORMAT_SUFF_UNORM_SRGB(BC2, 128, eBc2, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4
|
||||
);
|
||||
FORMAT_SUFF_UNORM_SRGB(BC3, 128, eBc3, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4
|
||||
);
|
||||
FORMAT_SUFF_NORM(BC5, 128, eBc5, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4,
|
||||
);
|
||||
FORMAT(Bc6HUfloat, 128, eBc6HUfloatBlock,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4,
|
||||
);
|
||||
FORMAT(Bc6HSfloat, 128, eBc6HSfloatBlock,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4,
|
||||
);
|
||||
FORMAT_SUFF_UNORM_SRGB(BC7, 128, eBc7, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4
|
||||
);
|
||||
|
||||
FORMAT_SUFF_UNORM_SRGB(Astc4x4, 128, eAstc4x4, Block,
|
||||
@ -117,31 +142,6 @@ namespace skyline::gpu::format {
|
||||
.blockHeight = 8
|
||||
);
|
||||
|
||||
FORMAT_SUFF_UNORM_SRGB(BC2, 128, eBc2, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4
|
||||
);
|
||||
FORMAT_SUFF_UNORM_SRGB(BC3, 128, eBc3, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4
|
||||
);
|
||||
FORMAT_SUFF_NORM(BC5, 128, eBc5, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4,
|
||||
);
|
||||
FORMAT(Bc6HUfloat, 128, eBc6HUfloatBlock,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4,
|
||||
);
|
||||
FORMAT(Bc6HSfloat, 128, eBc6HSfloatBlock,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4,
|
||||
);
|
||||
FORMAT_SUFF_UNORM_SRGB(BC7, 128, eBc7, Block,
|
||||
.blockWidth = 4,
|
||||
.blockHeight = 4
|
||||
);
|
||||
|
||||
// Depth/Stencil Formats
|
||||
// All of these have a G->R swizzle
|
||||
FORMAT(D16Unorm, 16, eD16Unorm, vka::eDepth, .swizzleMapping = {
|
||||
|
@ -54,7 +54,7 @@ namespace skyline::gpu::texture {
|
||||
return isMultiLayer ? util::AlignUp(totalSize, layerAlignment) : totalSize;
|
||||
}
|
||||
|
||||
std::vector<MipLevelLayout> GetBlockLinearMipLayout(Dimensions dimensions, size_t formatBlockHeight, size_t formatBlockWidth, size_t formatBpb, size_t gobBlockHeight, size_t gobBlockDepth, size_t levelCount) {
|
||||
std::vector<MipLevelLayout> GetBlockLinearMipLayout(Dimensions dimensions, size_t formatBlockHeight, size_t formatBlockWidth, size_t formatBpb, size_t targetFormatBlockHeight, size_t targetFormatBlockWidth, size_t targetFormatBpb, size_t gobBlockHeight, size_t gobBlockDepth, size_t levelCount) {
|
||||
std::vector<MipLevelLayout> mipLevels;
|
||||
mipLevels.reserve(levelCount);
|
||||
|
||||
@ -63,9 +63,13 @@ namespace skyline::gpu::texture {
|
||||
// Note: We don't need a separate gobsDepth variable here, since a GOB is always a single slice deep and the value would be the same as the depth dimension
|
||||
|
||||
for (size_t i{}; i < levelCount; i++) {
|
||||
size_t linearSize{util::DivideCeil<size_t>(dimensions.width, formatBlockWidth) * formatBpb * util::DivideCeil<size_t>(dimensions.height, formatBlockHeight) * dimensions.depth};
|
||||
size_t targetLinearSize{targetFormatBpb == 0 ? linearSize : util::DivideCeil<size_t>(dimensions.width, targetFormatBlockWidth) * formatBpb * util::DivideCeil<size_t>(dimensions.height, targetFormatBlockHeight) * dimensions.depth};
|
||||
|
||||
mipLevels.emplace_back(
|
||||
dimensions,
|
||||
util::DivideCeil<size_t>(dimensions.width, formatBlockWidth) * formatBpb * util::DivideCeil<size_t>(dimensions.height, formatBlockHeight) * dimensions.depth,
|
||||
linearSize,
|
||||
targetLinearSize,
|
||||
(GobWidth * gobsWidth) * (GobHeight * util::AlignUp(gobsHeight, gobBlockHeight)) * util::AlignUp(dimensions.depth, gobBlockDepth),
|
||||
gobBlockHeight, gobBlockDepth
|
||||
);
|
||||
|
@ -23,10 +23,12 @@ namespace skyline::gpu::texture {
|
||||
size_t levelCount, bool isMultiLayer);
|
||||
|
||||
/**
|
||||
* @note The target format is the format of the texture after it has been decoded, if bpb is 0, the target format is the same as the source format
|
||||
* @return A vector of metadata about every mipmapped level of the supplied block-linear surface
|
||||
*/
|
||||
std::vector<MipLevelLayout> GetBlockLinearMipLayout(Dimensions dimensions,
|
||||
size_t formatBlockHeight, size_t formatBlockWidth, size_t formatBpb,
|
||||
size_t targetFormatBlockHeight, size_t targetFormatBlockWidth, size_t targetFormatBpb,
|
||||
size_t gobBlockHeight, size_t gobBlockDepth,
|
||||
size_t levelCount);
|
||||
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include "texture.h"
|
||||
#include "layout.h"
|
||||
#include "adreno_aliasing.h"
|
||||
#include "bc_decoder.h"
|
||||
#include "format.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
u32 GuestTexture::GetLayerStride() {
|
||||
@ -160,33 +162,42 @@ namespace skyline::gpu {
|
||||
}
|
||||
}()};
|
||||
|
||||
std::vector<u8> deswizzleBuffer;
|
||||
u8 *deswizzleOutput;
|
||||
if (guest->format != format) {
|
||||
deswizzleBuffer.resize(deswizzledSurfaceSize);
|
||||
deswizzleOutput = deswizzleBuffer.data();
|
||||
} else [[likely]] {
|
||||
deswizzleOutput = bufferData;
|
||||
}
|
||||
|
||||
auto guestLayerStride{guest->GetLayerStride()};
|
||||
if (levelCount == 1) {
|
||||
auto outputLayer{deswizzleOutput};
|
||||
for (size_t layer{}; layer < layerCount; layer++) {
|
||||
if (guest->tileConfig.mode == texture::TileMode::Block)
|
||||
texture::CopyBlockLinearToLinear(*guest, pointer, bufferData);
|
||||
texture::CopyBlockLinearToLinear(*guest, pointer, outputLayer);
|
||||
else if (guest->tileConfig.mode == texture::TileMode::Pitch)
|
||||
texture::CopyPitchLinearToLinear(*guest, pointer, bufferData);
|
||||
texture::CopyPitchLinearToLinear(*guest, pointer, outputLayer);
|
||||
else if (guest->tileConfig.mode == texture::TileMode::Linear)
|
||||
std::memcpy(bufferData, pointer, surfaceSize);
|
||||
std::memcpy(outputLayer, pointer, surfaceSize);
|
||||
pointer += guestLayerStride;
|
||||
bufferData += layerStride;
|
||||
outputLayer += deswizzledLayerStride;
|
||||
}
|
||||
} else if (levelCount > 1 && guest->tileConfig.mode == texture::TileMode::Block) {
|
||||
// We need to generate a buffer that has all layers for a given mip level while Tegra X1 layout holds all mip levels for a given layer
|
||||
for (size_t layer{}; layer < layerCount; layer++) {
|
||||
auto inputLevel{pointer}, outputLevel{bufferData};
|
||||
for (size_t level{}; level < levelCount; ++level) {
|
||||
const auto &mipLayout{mipLayouts[level]};
|
||||
auto inputLevel{pointer}, outputLevel{deswizzleOutput};
|
||||
for (const auto &level : mipLayouts) {
|
||||
texture::CopyBlockLinearToLinear(
|
||||
mipLayout.dimensions,
|
||||
level.dimensions,
|
||||
guest->format->blockWidth, guest->format->blockHeight, guest->format->bpb,
|
||||
mipLayout.blockHeight, mipLayout.blockDepth,
|
||||
inputLevel, outputLevel + (layer * mipLayout.linearSize) // Offset into the current layer relative to the start of the current mip level
|
||||
level.blockHeight, level.blockDepth,
|
||||
inputLevel, outputLevel + (layer * level.linearSize) // Offset into the current layer relative to the start of the current mip level
|
||||
);
|
||||
|
||||
inputLevel += mipLayout.blockLinearSize; // Skip over the current mip level as we've deswizzled it
|
||||
outputLevel += layerCount * mipLayout.linearSize; // We need to offset the output buffer by the size of the previous mip level
|
||||
inputLevel += level.blockLinearSize; // Skip over the current mip level as we've deswizzled it
|
||||
outputLevel += layerCount * level.linearSize; // We need to offset the output buffer by the size of the previous mip level
|
||||
}
|
||||
|
||||
pointer += guestLayerStride; // We need to offset the input buffer by the size of the previous guest layer, this can differ from inputLevel's value due to layer end padding or guest RT layer stride
|
||||
@ -195,6 +206,60 @@ namespace skyline::gpu {
|
||||
throw exception("Mipmapped textures with tiling mode '{}' aren't supported", static_cast<int>(tiling));
|
||||
}
|
||||
|
||||
if (!deswizzleBuffer.empty()) {
|
||||
for (const auto &level : mipLayouts) {
|
||||
size_t levelHeight{level.dimensions.height * layerCount}; //!< The height of an image representing all layers in the entire level
|
||||
switch (guest->format->vkFormat) {
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
case vk::Format::eBc1RgbaSrgbBlock:
|
||||
bcn::DecodeBc1(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, true);
|
||||
break;
|
||||
|
||||
case vk::Format::eBc2UnormBlock:
|
||||
case vk::Format::eBc2SrgbBlock:
|
||||
bcn::DecodeBc2(deswizzleOutput, bufferData, level.dimensions.width, levelHeight);
|
||||
break;
|
||||
|
||||
case vk::Format::eBc3UnormBlock:
|
||||
case vk::Format::eBc3SrgbBlock:
|
||||
bcn::DecodeBc3(deswizzleOutput, bufferData, level.dimensions.width, levelHeight);
|
||||
break;
|
||||
|
||||
case vk::Format::eBc4UnormBlock:
|
||||
bcn::DecodeBc4(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, false);
|
||||
break;
|
||||
case vk::Format::eBc4SnormBlock:
|
||||
bcn::DecodeBc4(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, true);
|
||||
break;
|
||||
|
||||
case vk::Format::eBc5UnormBlock:
|
||||
bcn::DecodeBc5(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, false);
|
||||
break;
|
||||
case vk::Format::eBc5SnormBlock:
|
||||
bcn::DecodeBc5(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, true);
|
||||
break;
|
||||
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
bcn::DecodeBc6(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, false);
|
||||
break;
|
||||
case vk::Format::eBc6HSfloatBlock:
|
||||
bcn::DecodeBc6(deswizzleOutput, bufferData, level.dimensions.width, levelHeight, true);
|
||||
break;
|
||||
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
bcn::DecodeBc7(deswizzleOutput, bufferData, level.dimensions.width, levelHeight);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw exception("Unsupported guest format '{}'", vk::to_string(guest->format->vkFormat));
|
||||
}
|
||||
|
||||
deswizzleOutput += level.linearSize * layerCount;
|
||||
bufferData += level.targetLinearSize * layerCount;
|
||||
}
|
||||
}
|
||||
|
||||
if (stagingBuffer && cycle.lock() != pCycle)
|
||||
WaitOnFence();
|
||||
|
||||
@ -235,7 +300,7 @@ namespace skyline::gpu {
|
||||
.imageExtent = level.dimensions,
|
||||
}
|
||||
);
|
||||
bufferOffset += level.linearSize * layerCount;
|
||||
bufferOffset += level.targetLinearSize * layerCount;
|
||||
}
|
||||
}};
|
||||
|
||||
@ -282,7 +347,7 @@ namespace skyline::gpu {
|
||||
.imageExtent = level.dimensions,
|
||||
}
|
||||
);
|
||||
bufferOffset += level.linearSize * levelCount;
|
||||
bufferOffset += level.targetLinearSize * levelCount;
|
||||
}
|
||||
}};
|
||||
|
||||
@ -326,17 +391,16 @@ namespace skyline::gpu {
|
||||
// Note: See SynchronizeHostImpl for additional comments
|
||||
for (size_t layer{}; layer < layerCount; layer++) {
|
||||
auto outputLevel{guestOutput}, inputLevel{hostBuffer};
|
||||
for (size_t level{}; level < levelCount; ++level) {
|
||||
const auto &mipLayout{mipLayouts[level]};
|
||||
for (const auto &level : mipLayouts) {
|
||||
texture::CopyLinearToBlockLinear(
|
||||
mipLayout.dimensions,
|
||||
level.dimensions,
|
||||
guest->format->blockWidth, guest->format->blockHeight, guest->format->bpb,
|
||||
mipLayout.blockHeight, mipLayout.blockDepth,
|
||||
outputLevel, inputLevel + (layer * mipLayout.linearSize)
|
||||
level.blockHeight, level.blockDepth,
|
||||
outputLevel, inputLevel + (layer * level.linearSize)
|
||||
);
|
||||
|
||||
outputLevel += mipLayout.blockLinearSize;
|
||||
inputLevel += layerCount * mipLayout.linearSize;
|
||||
outputLevel += level.blockLinearSize;
|
||||
inputLevel += layerCount * level.linearSize;
|
||||
}
|
||||
|
||||
guestOutput += guestLayerStride;
|
||||
@ -366,32 +430,87 @@ namespace skyline::gpu {
|
||||
layerCount(layerCount),
|
||||
sampleCount(sampleCount) {}
|
||||
|
||||
u32 CalculateLevelStride(const std::vector<texture::MipLevelLayout> &mipLayouts) {
|
||||
u32 surfaceSize{};
|
||||
texture::Format ConvertHostCompatibleFormat(texture::Format format, const TraitManager &traits) {
|
||||
auto bcnSupport{traits.bcnSupport};
|
||||
if (bcnSupport.all())
|
||||
return format;
|
||||
|
||||
switch (format->vkFormat) {
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
return bcnSupport[0] ? format : format::R8G8B8A8Unorm;
|
||||
case vk::Format::eBc1RgbaSrgbBlock:
|
||||
return bcnSupport[0] ? format : format::R8G8B8A8Srgb;
|
||||
|
||||
case vk::Format::eBc2UnormBlock:
|
||||
return bcnSupport[1] ? format : format::R8G8B8A8Unorm;
|
||||
case vk::Format::eBc2SrgbBlock:
|
||||
return bcnSupport[1] ? format : format::R8G8B8A8Srgb;
|
||||
|
||||
case vk::Format::eBc3UnormBlock:
|
||||
return bcnSupport[2] ? format : format::R8G8B8A8Unorm;
|
||||
case vk::Format::eBc3SrgbBlock:
|
||||
return bcnSupport[2] ? format : format::R8G8B8A8Srgb;
|
||||
|
||||
case vk::Format::eBc4UnormBlock:
|
||||
return bcnSupport[3] ? format : format::R8Unorm;
|
||||
case vk::Format::eBc4SnormBlock:
|
||||
return bcnSupport[3] ? format : format::R8Snorm;
|
||||
|
||||
case vk::Format::eBc5UnormBlock:
|
||||
return bcnSupport[4] ? format : format::R8G8Unorm;
|
||||
case vk::Format::eBc5SnormBlock:
|
||||
return bcnSupport[4] ? format : format::R8G8Snorm;
|
||||
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
case vk::Format::eBc6HSfloatBlock:
|
||||
return bcnSupport[5] ? format : format::R16G16B16A16Float; // This is a signed 16-bit FP format, we don't have an unsigned 16-bit FP format
|
||||
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
return bcnSupport[6] ? format : format::R8G8B8A8Unorm;
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
return bcnSupport[6] ? format : format::R8G8B8A8Srgb;
|
||||
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
size_t CalculateLevelStride(const std::vector<texture::MipLevelLayout> &mipLayouts) {
|
||||
size_t surfaceSize{};
|
||||
for (const auto &level : mipLayouts)
|
||||
surfaceSize += level.linearSize;
|
||||
return surfaceSize;
|
||||
}
|
||||
|
||||
size_t CalculateTargetLevelStride(const std::vector<texture::MipLevelLayout> &mipLayouts) {
|
||||
size_t surfaceSize{};
|
||||
for (const auto &level : mipLayouts)
|
||||
surfaceSize += level.targetLinearSize;
|
||||
return surfaceSize;
|
||||
}
|
||||
|
||||
Texture::Texture(GPU &pGpu, GuestTexture pGuest)
|
||||
: gpu(pGpu),
|
||||
guest(std::move(pGuest)),
|
||||
dimensions(guest->dimensions),
|
||||
format(guest->format),
|
||||
format(ConvertHostCompatibleFormat(guest->format, gpu.traits)),
|
||||
layout(vk::ImageLayout::eUndefined),
|
||||
tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization
|
||||
layerCount(guest->layerCount),
|
||||
layerStride(static_cast<u32>(format->GetSize(dimensions))),
|
||||
deswizzledLayerStride(static_cast<u32>(guest->format->GetSize(dimensions))),
|
||||
layerStride(format == guest->format ? deswizzledLayerStride : static_cast<u32>(format->GetSize(dimensions))),
|
||||
levelCount(guest->mipLevelCount),
|
||||
mipLayouts(
|
||||
texture::GetBlockLinearMipLayout(
|
||||
guest->dimensions,
|
||||
guest->format->blockHeight, guest->format->blockWidth, guest->format->bpb,
|
||||
format->blockHeight, format->blockWidth, format->bpb,
|
||||
guest->tileConfig.blockHeight, guest->tileConfig.blockDepth,
|
||||
guest->mipLevelCount
|
||||
)
|
||||
),
|
||||
surfaceSize(CalculateLevelStride(mipLayouts) * layerCount),
|
||||
deswizzledSurfaceSize(CalculateLevelStride(mipLayouts) * layerCount),
|
||||
surfaceSize(format == guest->format ? deswizzledSurfaceSize : (CalculateTargetLevelStride(mipLayouts) * layerCount)),
|
||||
sampleCount(vk::SampleCountFlagBits::e1),
|
||||
flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat),
|
||||
usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) {
|
||||
@ -401,7 +520,7 @@ namespace skyline::gpu {
|
||||
usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||
|
||||
// First attempt to derive type from dimensions
|
||||
auto imageType{guest->dimensions.GetType()};
|
||||
auto imageType{dimensions.GetType()};
|
||||
|
||||
// Try to ensure that the image type is compatible with the given image view type since we can't create a 2D image view from a 1D image
|
||||
if (imageType == vk::ImageType::e1D && guest->type != texture::TextureType::e1D && guest->type != texture::TextureType::e1DArray) {
|
||||
@ -450,8 +569,8 @@ namespace skyline::gpu {
|
||||
}
|
||||
|
||||
void Texture::MarkGpuDirty() {
|
||||
if (dirtyState == DirtyState::GpuDirty || !guest)
|
||||
return;
|
||||
if (dirtyState == DirtyState::GpuDirty || !guest || format != guest->format)
|
||||
return; // In addition to other checks, we also need to skip GPU dirty if the host format and guest format differ as we don't support re-encoding compressed textures which is when this generally occurs
|
||||
gpu.state.nce->RetrapRegions(*trapHandle, false);
|
||||
dirtyState = DirtyState::GpuDirty;
|
||||
}
|
||||
@ -571,6 +690,15 @@ namespace skyline::gpu {
|
||||
return;
|
||||
}
|
||||
|
||||
if (layout == vk::ImageLayout::eUndefined || format != guest->format) {
|
||||
// If the state of the host texture is undefined then so can the guest
|
||||
// If the texture has differing formats on the guest and host, we don't support converting back in that case as it may involve recompression of a decompressed texture
|
||||
if (!skipTrap)
|
||||
gpu.state.nce->RetrapRegions(*trapHandle, true);
|
||||
dirtyState = DirtyState::Clean;
|
||||
return;
|
||||
}
|
||||
|
||||
TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
|
||||
|
||||
WaitOnBacking();
|
||||
@ -600,8 +728,12 @@ namespace skyline::gpu {
|
||||
if (dirtyState != DirtyState::GpuDirty || !guest)
|
||||
return;
|
||||
|
||||
if (layout == vk::ImageLayout::eUndefined)
|
||||
return; // If the state of the host texture is undefined then so can the guest
|
||||
if (layout == vk::ImageLayout::eUndefined || format != guest->format) {
|
||||
// If the state of the host texture is undefined then so can the guest
|
||||
// If the texture has differing formats on the guest and host, we don't support converting back in that case as it may involve recompression of a decompressed texture
|
||||
dirtyState = DirtyState::Clean;
|
||||
return;
|
||||
}
|
||||
|
||||
TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer");
|
||||
|
||||
@ -627,8 +759,8 @@ namespace skyline::gpu {
|
||||
}
|
||||
|
||||
std::shared_ptr<TextureView> Texture::GetView(vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format pFormat, vk::ComponentMapping mapping) {
|
||||
if (!pFormat)
|
||||
pFormat = format;
|
||||
if (!pFormat || pFormat == guest->format)
|
||||
pFormat = format; // We want to use the texture's format if it isn't supplied or if the requested format matches the guest format then we want to use the host format just in case it is host incompatible and the host format differs from the guest format
|
||||
|
||||
auto viewFormat{pFormat->vkFormat}, textureFormat{format->vkFormat};
|
||||
if (gpu.traits.quirks.vkImageMutableFormatCostly && viewFormat != textureFormat && (!gpu.traits.quirks.adrenoRelaxedFormatAliasing || !texture::IsAdrenoAliasCompatible(viewFormat, textureFormat)))
|
||||
|
@ -87,7 +87,7 @@ namespace skyline::gpu {
|
||||
* @return The size of the texture in bytes
|
||||
*/
|
||||
constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) const {
|
||||
return util::DivideCeil(width, u32{blockWidth}) * util::DivideCeil(height, u32{blockHeight}) * bpb * depth;
|
||||
return util::DivideCeil<size_t>(width, size_t{blockWidth}) * util::DivideCeil<size_t>(height, size_t{blockHeight}) * bpb * depth;
|
||||
}
|
||||
|
||||
constexpr size_t GetSize(Dimensions dimensions) const {
|
||||
@ -229,10 +229,11 @@ namespace skyline::gpu {
|
||||
struct MipLevelLayout {
|
||||
Dimensions dimensions; //!< The dimensions of the mipmapped level, these are exact dimensions and not aligned to a GOB
|
||||
size_t linearSize; //!< The size of a linear image with this mipmapped level in bytes
|
||||
size_t targetLinearSize; //!< The size of a linear image with this mipmapped level in bytes and using the target format, this will only differ from linearSize if the target format is supplied
|
||||
size_t blockLinearSize; //!< The size of a blocklinear image with this mipmapped level in bytes
|
||||
size_t blockHeight, blockDepth; //!< The block height and block depth set for the level
|
||||
|
||||
constexpr MipLevelLayout(Dimensions dimensions, size_t linearSize, size_t blockLinearSize, size_t blockHeight, size_t blockDepth) : dimensions{dimensions}, linearSize{linearSize}, blockLinearSize{blockLinearSize}, blockHeight{blockHeight}, blockDepth{blockDepth} {}
|
||||
constexpr MipLevelLayout(Dimensions dimensions, size_t linearSize, size_t targetLinearSize, size_t blockLinearSize, size_t blockHeight, size_t blockDepth) : dimensions{dimensions}, linearSize{linearSize}, targetLinearSize{targetLinearSize}, blockLinearSize{blockLinearSize}, blockHeight{blockHeight}, blockDepth{blockDepth} {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -438,10 +439,12 @@ namespace skyline::gpu {
|
||||
vk::ImageCreateFlags flags;
|
||||
vk::ImageUsageFlags usage;
|
||||
u32 layerCount; //!< The amount of array layers in the image
|
||||
u32 layerStride; //!< The stride of a single array layer given linear tiling, this does **not** consider mipmapping
|
||||
size_t deswizzledLayerStride{}; //!< The stride of a single layer given linear tiling using the guest format, this does **not** consider mipmapping
|
||||
size_t layerStride{}; //!< The stride of a single layer given linear tiling, this does **not** consider mipmapping
|
||||
u32 levelCount;
|
||||
std::vector<texture::MipLevelLayout> mipLayouts; //!< The layout of each mip level in the guest texture
|
||||
u32 surfaceSize; //!< The size of the entire surface given linear tiling, this contains all mip levels and layers
|
||||
size_t deswizzledSurfaceSize{}; //!< The size of the guest surface with linear tiling, calculated with the guest format which may differ from the host format
|
||||
size_t surfaceSize{}; //!< The size of the entire surface given linear tiling, this contains all mip levels and layers
|
||||
vk::SampleCountFlagBits sampleCount;
|
||||
|
||||
/**
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include "trait_manager.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2) : quirks(deviceProperties2.get<vk::PhysicalDeviceProperties2>().properties, deviceProperties2.get<vk::PhysicalDeviceDriverProperties>()) {
|
||||
TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice &physicalDevice) : quirks(deviceProperties2.get<vk::PhysicalDeviceProperties2>().properties, deviceProperties2.get<vk::PhysicalDeviceDriverProperties>()) {
|
||||
bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{};
|
||||
bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present
|
||||
|
||||
@ -135,12 +135,28 @@ namespace skyline::gpu {
|
||||
auto &subgroupProperties{deviceProperties2.get<vk::PhysicalDeviceSubgroupProperties>()};
|
||||
supportsSubgroupVote = static_cast<bool>(subgroupProperties.supportedOperations & vk::SubgroupFeatureFlagBits::eVote);
|
||||
subgroupSize = deviceProperties2.get<vk::PhysicalDeviceSubgroupProperties>().subgroupSize;
|
||||
|
||||
auto isFormatSupported{[&physicalDevice](vk::Format format) {
|
||||
auto features{physicalDevice.getFormatProperties(format)};
|
||||
// We may get false positives here by not checking specifics but this is not seen in practice while the reverse often is of drivers (Such as Adreno 512.6xx drivers which don't report any support aside from buffer features but entirely support BC formats)
|
||||
return static_cast<bool>(features.linearTilingFeatures) ||
|
||||
static_cast<bool>(features.optimalTilingFeatures) ||
|
||||
static_cast<bool>(features.bufferFeatures);
|
||||
}};
|
||||
|
||||
bcnSupport[0] = isFormatSupported(vk::Format::eBc1RgbaUnormBlock) && isFormatSupported(vk::Format::eBc1RgbaSrgbBlock);
|
||||
bcnSupport[1] = isFormatSupported(vk::Format::eBc2UnormBlock) && isFormatSupported(vk::Format::eBc2SrgbBlock);
|
||||
bcnSupport[2] = isFormatSupported(vk::Format::eBc3UnormBlock) && isFormatSupported(vk::Format::eBc3SrgbBlock);
|
||||
bcnSupport[3] = isFormatSupported(vk::Format::eBc4UnormBlock) && isFormatSupported(vk::Format::eBc4SnormBlock);
|
||||
bcnSupport[4] = isFormatSupported(vk::Format::eBc5UnormBlock) && isFormatSupported(vk::Format::eBc5SnormBlock);
|
||||
bcnSupport[5] = isFormatSupported(vk::Format::eBc6HSfloatBlock) && isFormatSupported(vk::Format::eBc6HUfloatBlock);
|
||||
bcnSupport[6] = isFormatSupported(vk::Format::eBc7UnormBlock) && isFormatSupported(vk::Format::eBc7SrgbBlock);
|
||||
}
|
||||
|
||||
std::string TraitManager::Summary() {
|
||||
return fmt::format(
|
||||
"\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}",
|
||||
supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize
|
||||
"\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}\n* BCn Support: {}",
|
||||
supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize, bcnSupport.to_string()
|
||||
);
|
||||
}
|
||||
|
||||
@ -213,8 +229,10 @@ namespace skyline::gpu {
|
||||
Logger::Info("Applied BCeNabler patch");
|
||||
else
|
||||
throw exception("Failed to apply BCeNabler patch!");
|
||||
bcnSupport.set();
|
||||
} else if (type == ADRENOTOOLS_BCN_BLOB) {
|
||||
Logger::Info("BCeNabler skipped, blob BCN support is present");
|
||||
bcnSupport.set();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -40,6 +40,8 @@ namespace skyline::gpu {
|
||||
bool supportsSubgroupVote{}; //!< If subgroup votes are supported in shaders with SPV_KHR_subgroup_vote
|
||||
u32 subgroupSize{}; //!< Size of a subgroup on the host GPU
|
||||
|
||||
std::bitset<7> bcnSupport{}; //!< Bitmask of BCn texture formats supported, it is ordered as BC1, BC2, BC3, BC4, BC5, BC6H and BC7
|
||||
|
||||
/**
|
||||
* @brief Manages a list of any vendor/device-specific errata in the host GPU
|
||||
*/
|
||||
@ -85,7 +87,7 @@ namespace skyline::gpu {
|
||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||
vk::PhysicalDeviceImagelessFramebufferFeatures>;
|
||||
|
||||
TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2);
|
||||
TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice& physicalDevice);
|
||||
|
||||
/**
|
||||
* @brief Applies driver specific binary patches to the driver (e.g. BCeNabler)
|
||||
|
Loading…
Reference in New Issue
Block a user