Implement Maxwell3D Samplers

Maxwell3D `TextureSamplerControl` (TSC) are fully converted into Vulkan samplers with extension backing for all aspects that require them (border color/reduction mode) and approximations where Vulkan doesn't support certain functionality (sampler address mode) alongside cases where extensions may not be present (border color).
This commit is contained in:
PixelyIon 2021-12-28 11:32:02 +05:30
parent e48a7d7009
commit 87c8dc94d2
6 changed files with 376 additions and 5 deletions

View File

@ -120,7 +120,7 @@ namespace skyline::gpu {
}
vk::raii::Device GPU::CreateDevice(const vk::raii::PhysicalDevice &physicalDevice, decltype(vk::DeviceQueueCreateInfo::queueCount) &vkQueueFamilyIndex, QuirkManager &quirks) {
auto deviceFeatures2{physicalDevice.getFeatures2<vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT, vk::PhysicalDeviceShaderFloat16Int8Features, vk::PhysicalDeviceShaderAtomicInt64Features>()};
auto deviceFeatures2{physicalDevice.getFeatures2<vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT, vk::PhysicalDeviceShaderFloat16Int8Features, vk::PhysicalDeviceShaderAtomicInt64Features>()};
decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features
#define FEAT_REQ(structName, feature) \

View File

@ -11,6 +11,7 @@
#include <soc/gm20b/engines/maxwell/types.h>
#include "command_executor.h"
#include "types/tsc.h"
namespace skyline::gpu::interconnect {
namespace maxwell3d = soc::gm20b::engine::maxwell3d::type;
@ -1547,6 +1548,211 @@ namespace skyline::gpu::interconnect {
private:
u32 bindlessTextureConstantBufferIndex{};
/* Samplers */
private:
struct Sampler : public vk::raii::Sampler, public FenceCycleDependency {
using vk::raii::Sampler::Sampler;
};
struct SamplerPool {
IOVA iova;
u32 maximumIndex;
span<TextureSamplerControl> samplerControls;
std::unordered_map<TextureSamplerControl, std::shared_ptr<Sampler>, util::ObjectHash<TextureSamplerControl>> samplers;
} samplerPool{};
public:
void SetSamplerPoolIovaHigh(u32 high) {
samplerPool.iova.high = high;
samplerPool.samplerControls = nullptr;
}
void SetSamplerPoolIovaLow(u32 low) {
samplerPool.iova.low = low;
samplerPool.samplerControls = nullptr;
}
void SetSamplerPoolMaximumIndex(u32 index) {
samplerPool.maximumIndex = index;
samplerPool.samplerControls = nullptr;
}
vk::Filter ConvertSamplerFilter(TextureSamplerControl::Filter filter) {
using TscFilter = TextureSamplerControl::Filter;
using VkFilter = vk::Filter;
switch (filter) {
// @fmt:off
case TscFilter::Nearest: return VkFilter::eNearest;
case TscFilter::Linear: return VkFilter::eLinear;
// @fmt:on
}
}
vk::SamplerMipmapMode ConvertSamplerMipFilter(TextureSamplerControl::MipFilter filter) {
using TscFilter = TextureSamplerControl::MipFilter;
using VkMode = vk::SamplerMipmapMode;
switch (filter) {
// @fmt:off
case TscFilter::None: return VkMode{};
case TscFilter::Nearest: return VkMode::eNearest;
case TscFilter::Linear: return VkMode::eLinear;
// @fmt:on
}
}
vk::SamplerAddressMode ConvertSamplerAddressMode(TextureSamplerControl::AddressMode mode) {
using TscMode = TextureSamplerControl::AddressMode;
using VkMode = vk::SamplerAddressMode;
switch (mode) {
// @fmt:off
case TscMode::Repeat: return VkMode::eRepeat;
case TscMode::MirroredRepeat: return VkMode::eMirroredRepeat;
case TscMode::ClampToEdge: return VkMode::eClampToEdge;
case TscMode::ClampToBorder: return VkMode::eClampToBorder;
case TscMode::Clamp: return VkMode::eClampToEdge; // Vulkan doesn't support 'GL_CLAMP' so this is an approximation
case TscMode::MirrorClampToEdge: return VkMode::eMirrorClampToEdge;
case TscMode::MirrorClampToBorder: return VkMode::eMirrorClampToEdge; // Only supported mirror clamps are to edges so this is an approximation
case TscMode::MirrorClamp: return VkMode::eMirrorClampToEdge; // Same as above
// @fmt:on
}
}
vk::CompareOp ConvertSamplerCompareOp(TextureSamplerControl::CompareOp compareOp) {
using TscOp = TextureSamplerControl::CompareOp;
using VkOp = vk::CompareOp;
switch (compareOp) {
// @fmt:off
case TscOp::Never: return VkOp::eNever;
case TscOp::Less: return VkOp::eLess;
case TscOp::Equal: return VkOp::eEqual;
case TscOp::LessOrEqual: return VkOp::eLessOrEqual;
case TscOp::Greater: return VkOp::eGreater;
case TscOp::NotEqual: return VkOp::eNotEqual;
case TscOp::GreaterOrEqual: return VkOp::eGreaterOrEqual;
case TscOp::Always: return VkOp::eAlways;
// @fmt:on
}
}
vk::SamplerReductionMode ConvertSamplerReductionFilter(TextureSamplerControl::SamplerReduction reduction) {
using TscReduction = TextureSamplerControl::SamplerReduction;
using VkReduction = vk::SamplerReductionMode;
switch (reduction) {
// @fmt:off
case TscReduction::WeightedAverage: return VkReduction::eWeightedAverage;
case TscReduction::Min: return VkReduction::eMin;
case TscReduction::Max: return VkReduction::eMax;
// @fmt:on
}
}
vk::BorderColor ConvertBorderColorWithCustom(float red, float green, float blue, float alpha) {
if (alpha == 1.0f) {
if (red == 1.0f && green == 1.0f && blue == 1.0f)
return vk::BorderColor::eFloatOpaqueWhite;
else if (red == 0.0f && green == 0.0f && blue == 0.0f)
return vk::BorderColor::eFloatOpaqueBlack;
} else if (red == 1.0f && green == 1.0f && blue == 1.0f && alpha == 0.0f) {
return vk::BorderColor::eFloatTransparentBlack;
}
return vk::BorderColor::eFloatCustomEXT;
}
vk::BorderColor ConvertBorderColorFixed(float red, float green, float blue, float alpha) {
if (alpha == 1.0f) {
if (red == 1.0f && green == 1.0f && blue == 1.0f)
return vk::BorderColor::eFloatOpaqueWhite;
else if (red == 0.0f && green == 0.0f && blue == 0.0f)
return vk::BorderColor::eFloatOpaqueBlack;
} else if (red == 1.0f && green == 1.0f && blue == 1.0f && alpha == 0.0f) {
return vk::BorderColor::eFloatTransparentBlack;
}
// Approximations of a custom color using fixed colors
if (red + green + blue > 1.0f)
return vk::BorderColor::eFloatOpaqueWhite;
else if (alpha > 0.0f)
return vk::BorderColor::eFloatOpaqueBlack;
else
return vk::BorderColor::eFloatTransparentBlack;
}
std::shared_ptr<Sampler> GetSampler(u32 index) {
if (!samplerPool.samplerControls.valid()) {
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(samplerPool.iova, samplerPool.maximumIndex * sizeof(TextureSamplerControl))};
if (mappings.size() != 1)
throw exception("Sampler pool mapping count is unexpected: {}", mappings.size());
samplerPool.samplerControls = mappings.front().cast<TextureSamplerControl>();
}
TextureSamplerControl &samplerControl{samplerPool.samplerControls[index]};
auto &sampler{samplerPool.samplers[samplerControl]};
if (sampler)
return sampler;
auto convertAddressModeWithCheck{[&](TextureSamplerControl::AddressMode mode) {
auto vkMode{ConvertSamplerAddressMode(mode)};
if (vkMode == vk::SamplerAddressMode::eMirrorClampToEdge && !gpu.quirks.supportsSamplerMirrorClampToEdge) [[unlikely]] {
Logger::Warn("Cannot use Mirror Clamp To Edge as Sampler Address Mode without host GPU support");
return vk::SamplerAddressMode::eClampToEdge; // We use a normal clamp to edge to approximate it
}
return vkMode;
}};
auto maxAnisotropy{samplerControl.MaxAnisotropy()};
vk::StructureChain<vk::SamplerCreateInfo, vk::SamplerReductionModeCreateInfoEXT, vk::SamplerCustomBorderColorCreateInfoEXT> samplerInfo{
vk::SamplerCreateInfo{
.magFilter = ConvertSamplerFilter(samplerControl.magFilter),
.minFilter = ConvertSamplerFilter(samplerControl.minFilter),
.mipmapMode = ConvertSamplerMipFilter(samplerControl.mipFilter),
.addressModeU = convertAddressModeWithCheck(samplerControl.addressModeU),
.addressModeV = convertAddressModeWithCheck(samplerControl.addressModeV),
.addressModeW = convertAddressModeWithCheck(samplerControl.addressModeP),
.mipLodBias = samplerControl.MipLodBias(),
.anisotropyEnable = maxAnisotropy > 1.0f,
.maxAnisotropy = maxAnisotropy,
.compareEnable = samplerControl.depthCompareEnable,
.compareOp = ConvertSamplerCompareOp(samplerControl.depthCompareOp),
.minLod = samplerControl.MinLodClamp(),
.maxLod = samplerControl.MaxLodClamp(),
.unnormalizedCoordinates = false,
}, vk::SamplerReductionModeCreateInfoEXT{
.reductionMode = ConvertSamplerReductionFilter(samplerControl.reductionFilter),
}, vk::SamplerCustomBorderColorCreateInfoEXT{
.customBorderColor.float32 = {{samplerControl.borderColorR, samplerControl.borderColorG, samplerControl.borderColorB, samplerControl.borderColorA}},
.format = vk::Format::eUndefined,
},
};
if (!gpu.quirks.supportsSamplerReductionMode)
samplerInfo.unlink<vk::SamplerReductionModeCreateInfoEXT>();
vk::BorderColor &borderColor{samplerInfo.get<vk::SamplerCreateInfo>().borderColor};
if (gpu.quirks.supportsCustomBorderColor) {
borderColor = ConvertBorderColorWithCustom(samplerControl.borderColorR, samplerControl.borderColorG, samplerControl.borderColorB, samplerControl.borderColorA);
if (borderColor != vk::BorderColor::eFloatCustomEXT)
samplerInfo.unlink<vk::SamplerCustomBorderColorCreateInfoEXT>();
} else {
borderColor = ConvertBorderColorFixed(samplerControl.borderColorR, samplerControl.borderColorG, samplerControl.borderColorB, samplerControl.borderColorA);
samplerInfo.unlink<vk::SamplerCustomBorderColorCreateInfoEXT>();
}
return sampler = std::make_shared<Sampler>(gpu.vkDevice, samplerInfo.get<vk::SamplerCreateInfo>());
}
public:
void SetBindlessTextureConstantBufferIndex(u32 index) {
bindlessTextureConstantBufferIndex = index;

View File

@ -0,0 +1,136 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d)
#pragma once
#include <common/base.h>
namespace skyline::gpu::interconnect {
/**
* @brief The Texture Sampler Control is a descriptor used to configure the texture sampler in Maxwell GPUs
* @url https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_texture.xml#L367
* @url https://github.com/devkitPro/deko3d/blob/00c12d1f4809014f1cc22719dd2e3476735eec64/source/maxwell/texture_sampler_control_block.h
*/
struct TextureSamplerControl {
enum class AddressMode : u32 {
Repeat = 0,
MirroredRepeat = 1,
ClampToEdge = 2,
ClampToBorder = 3,
Clamp = 4,
MirrorClampToEdge = 5,
MirrorClampToBorder = 6,
MirrorClamp = 7,
};
enum class CompareOp : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessOrEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterOrEqual = 6,
Always = 7,
};
enum class Filter : u32 {
Nearest = 1,
Linear = 2,
};
enum class MipFilter : u32 {
None = 1,
Nearest = 2,
Linear = 3,
};
enum class SamplerReduction : u32 {
WeightedAverage = 0,
Min = 1,
Max = 2,
};
// 0x00
AddressMode addressModeU : 3;
AddressMode addressModeV : 3;
AddressMode addressModeP : 3;
u32 depthCompareEnable : 1;
CompareOp depthCompareOp : 3;
u32 srgbConversion : 1;
u32 fontFilterWidth : 3;
u32 fontFilterHeight : 3;
u32 maxAnisotropy : 3;
u32 _pad0_ : 9;
// 0x04
Filter magFilter : 2;
u32 _pad1_ : 2;
Filter minFilter : 2;
MipFilter mipFilter : 2;
u32 cubemapAnisotropy : 1;
u32 cubemapInterfaceFiltering : 1;
SamplerReduction reductionFilter : 2;
signed int mipLodBias : 13;
u32 floatCoordNormalization : 1;
u32 trilinearOptimization : 5;
u32 _pad2_ : 1;
// 0x08
u32 minLodClamp : 12;
u32 maxLodClamp : 12;
u32 srgbBorderColorR : 8;
// 0x0C
u32 _pad3_ : 12;
u32 srgbBorderColorG : 8;
u32 srgbBorderColorB : 8;
u32 _pad4_ : 4;
// 0x10
float borderColorR;
// 0x14
float borderColorG;
// 0x18
float borderColorB;
// 0x1C
float borderColorA;
private:
/**
* @brief Convert a fixed point integer to a floating point integer
*/
template<typename T, size_t FractionalBits = 8>
float ConvertFixedToFloat(T fixed) {
return static_cast<float>(fixed) / static_cast<float>(1 << FractionalBits);
};
public:
bool operator==(const TextureSamplerControl&) const = default;
float MaxAnisotropy() {
constexpr size_t AnisotropyCount{8}; //!< The amount of unique anisotropy values that can be represented (2^3 — 3-bit value)
constexpr std::array<float, AnisotropyCount> anisotropyLut{
1.0f, 3.14f, 5.28f, 7.42f, 9.57f, 11.71f, 13.85f, 16.0f
}; //!< A linear mapping of value range (0..7) to anisotropy range (1..16) calculated using `(index * 15 / 7) + 1`
return anisotropyLut[maxAnisotropy];
}
float MipLodBias() {
return ConvertFixedToFloat(mipLodBias);
}
float MinLodClamp() {
return ConvertFixedToFloat(minLodClamp);
}
float MaxLodClamp() {
return ConvertFixedToFloat(maxLodClamp);
}
};
static_assert(sizeof(TextureSamplerControl) == 0x20);
}

View File

@ -5,7 +5,7 @@
namespace skyline::gpu {
QuirkManager::QuirkManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2) {
bool hasShaderAtomicInt64{}, hasShaderFloat16Int8Ext{};
bool hasCustomBorderColorExtension{}, hasShaderAtomicInt64{}, hasShaderFloat16Int8Ext{};
for (auto &extension : deviceExtensions) {
#define EXT_SET(name, property) \
@ -28,6 +28,9 @@ namespace skyline::gpu {
auto extensionVersion{extension.specVersion};
switch (util::Hash(extensionName)) {
EXT_SET("VK_EXT_index_type_uint8", supportsUint8Indices);
EXT_SET("VK_EXT_sampler_mirror_clamp_to_edge", supportsSamplerMirrorClampToEdge);
EXT_SET("VK_EXT_sampler_filter_minmax", supportsSamplerReductionMode);
EXT_SET("VK_EXT_custom_border_color", hasCustomBorderColorExtension);
EXT_SET("VK_EXT_provoking_vertex", supportsLastProvokingVertex);
EXT_SET("VK_EXT_vertex_attribute_divisor", supportsVertexAttributeDivisor);
EXT_SET("VK_EXT_shader_viewport_index_layer", supportsShaderViewportIndexLayer);
@ -53,6 +56,16 @@ namespace skyline::gpu {
FEAT_SET(vk::PhysicalDeviceFeatures2, features.shaderInt64, supportsInt64)
FEAT_SET(vk::PhysicalDeviceFeatures2, features.shaderStorageImageReadWithoutFormat, supportsImageReadWithoutFormat)
if (hasCustomBorderColorExtension) {
bool hasCustomBorderColorFeature{};
FEAT_SET(vk::PhysicalDeviceCustomBorderColorFeaturesEXT, customBorderColors, hasCustomBorderColorFeature)
if (hasCustomBorderColorFeature)
// We only want to mark custom border colors as supported if it can be done without supplying a format
FEAT_SET(vk::PhysicalDeviceCustomBorderColorFeaturesEXT, customBorderColorWithoutFormat, supportsCustomBorderColor)
} else {
enabledFeatures2.unlink<vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT>();
}
if (supportsVertexAttributeDivisor) {
FEAT_SET(vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT, vertexAttributeInstanceRateDivisor, supportsVertexAttributeDivisor)
FEAT_SET(vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT, vertexAttributeInstanceRateZeroDivisor, supportsVertexAttributeZeroDivisor)
@ -85,6 +98,9 @@ namespace skyline::gpu {
}
std::string QuirkManager::Summary() {
return fmt::format("\n* Supports U8 Indices: {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", supportsUint8Indices, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsSubgroupVote, subgroupSize);
return fmt::format(
"\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}",
supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsSubgroupVote, subgroupSize
);
}
}

View File

@ -12,7 +12,10 @@ namespace skyline::gpu {
*/
class QuirkManager {
public:
bool supportsUint8Indices{}; //!< If the device supports using uint8 indices in index buffers
bool supportsUint8Indices{}; //!< If the device supports using uint8 indices in index buffers (with VK_EXT_index_type_uint8)
bool supportsSamplerMirrorClampToEdge{}; //!< If the device supports a mirrored clamp to edge as a sampler address mode (with VK_KHR_sampler_mirror_clamp_to_edge)
bool supportsSamplerReductionMode{}; //!< If the device supports explicitly specifying a reduction mode for sampling (with VK_EXT_sampler_filter_minmax)
bool supportsCustomBorderColor{}; //!< If the device supports a custom border color without format (VK_EXT_custom_border_color)
bool supportsLastProvokingVertex{}; //!< If the device supports setting the last vertex as the provoking vertex (with VK_EXT_provoking_vertex)
bool supportsLogicOp{}; //!< If the device supports framebuffer logical operations during blending
bool supportsVertexAttributeDivisor{}; //!< If the device supports a divisor for instance-rate vertex attributes (with VK_EXT_vertex_attribute_divisor)
@ -35,7 +38,7 @@ namespace skyline::gpu {
using DeviceProperties2 = vk::StructureChain<vk::PhysicalDeviceProperties2, vk::PhysicalDeviceFloatControlsProperties, vk::PhysicalDeviceSubgroupProperties>;
using DeviceFeatures2 = vk::StructureChain<vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT, vk::PhysicalDeviceShaderFloat16Int8Features, vk::PhysicalDeviceShaderAtomicInt64Features>;
using DeviceFeatures2 = vk::StructureChain<vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT, vk::PhysicalDeviceShaderFloat16Int8Features, vk::PhysicalDeviceShaderAtomicInt64Features>;
QuirkManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2);

View File

@ -467,6 +467,16 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
context.SetBindlessTextureConstantBufferIndex(bindlessTextureConstantBufferIndex);
})
MAXWELL3D_STRUCT_STRUCT_CASE(samplerPool, address, high, {
context.SetSamplerPoolIovaHigh(high);
})
MAXWELL3D_STRUCT_STRUCT_CASE(samplerPool, address, low, {
context.SetSamplerPoolIovaLow(low);
})
MAXWELL3D_STRUCT_CASE(samplerPool, maximumIndex, {
context.SetSamplerPoolMaximumIndex(maximumIndex);
})
default:
break;
}