Implement combined image samplers and make descriptor code common between quick/normal updates

This commit is contained in:
Billy Laws 2022-09-29 21:17:25 +01:00
parent ccf2d59351
commit 55b85d0691
4 changed files with 337 additions and 75 deletions

View File

@ -637,18 +637,65 @@ namespace skyline::gpu::interconnect::maxwell3d {
return view;
}
// TODO: EXEC ID FOR STORAGE BUFS PURGE REMAP
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers) {
u32 bindingIdx{};
size_t writeIdx{};
size_t bufferIdx{};
size_t imageIdx{};
size_t storageBufferIdx{};
union BindlessHandle {
u32 raw;
struct {
u32 textureIndex : 20;
u32 samplerIndex : 12;
};
};
static BindlessHandle ReadBindlessHandle(InterconnectContext &ctx, std::array<ConstantBuffer, engine::ShaderStageConstantBufferCount> &constantBuffers, const auto &desc, size_t arrayIdx) {
ConstantBuffer &primaryCbuf{constantBuffers[desc.cbuf_index]};
size_t elemOffset{arrayIdx << desc.size_shift};
size_t primaryCbufOffset{desc.cbuf_offset + elemOffset};
u32 primaryVal{primaryCbuf.Read<u32>(ctx.executor, primaryCbufOffset)};
if constexpr (requires { desc.has_secondary; } ) {
if (desc.has_secondary) {
ConstantBuffer &secondaryCbuf{constantBuffers[desc.secondary_cbuf_index]};
size_t secondaryCbufOffset{desc.secondary_cbuf_offset + elemOffset};
u32 secondaryVal{secondaryCbuf.Read<u32>(ctx.executor, secondaryCbufOffset)};
return {primaryVal | secondaryVal};
}
}
return { .raw = primaryVal };
}
static vk::DescriptorImageInfo GetTextureBinding(InterconnectContext &ctx, const Shader::TextureDescriptor &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) {
auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)};
auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)};
ctx.executor.AttachTexture(texture);
auto view{texture->GetView()};
return vk::DescriptorImageInfo{
.sampler = **sampler,
.imageView = view,
.imageLayout = texture->texture->layout,
};
}
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures) {
SyncCachedStorageBufferViews(ctx.executor.executionNumber);
u32 writeIdx{};
auto writes{ctx.executor.allocator->AllocateUntracked<vk::WriteDescriptorSet>(descriptorInfo.totalWriteDescCount)};
u32 bufferIdx{};
auto bufferDescs{ctx.executor.allocator->AllocateUntracked<vk::DescriptorBufferInfo>(descriptorInfo.totalBufferDescCount)};
auto bufferDescDynamicBindings{ctx.executor.allocator->AllocateUntracked<DynamicBufferBinding>(descriptorInfo.totalBufferDescCount)};
u32 imageIdx{};
auto imageDescs{ctx.executor.allocator->AllocateUntracked<vk::DescriptorImageInfo>(descriptorInfo.totalImageDescCount)};
u32 storageBufferIdx{}; // Need to keep track of this since to index into the cached view array
u32 bindingIdx{};
/**
* @brief Adds descriptor writes for a single Vulkan descriptor type that uses buffer descriptors
* @param count Total number of descriptors to write, including array elements
*/
auto writeBufferDescs{[&](vk::DescriptorType type, const auto &descs, u32 count, auto getBufferCb) {
if (!descs.empty()) {
writes[writeIdx++] = {
@ -660,12 +707,42 @@ namespace skyline::gpu::interconnect::maxwell3d {
bindingIdx += descs.size();
// The underlying buffer bindings will be resolved from the dynamic ones during recording
for (const auto &desc : descs)
for (size_t arrayIdx{}; arrayIdx < desc.count; arrayIdx++)
for (u32 arrayIdx{}; arrayIdx < desc.count; arrayIdx++)
bufferDescDynamicBindings[bufferIdx++] = getBufferCb(desc, arrayIdx);
}
}};
auto writeImageDescs{[&](vk::DescriptorType type, const auto &descs, u32 count, auto getTextureCb, bool needsIndividualTextureBindingWrites) {
if (!descs.empty()) {
if (!needsIndividualTextureBindingWrites) {
writes[writeIdx++] = {
.dstBinding = bindingIdx,
.descriptorCount = count,
.descriptorType = type,
.pImageInfo = &imageDescs[imageIdx],
};
bindingIdx += descs.size();
}
for (const auto &desc : descs) {
if (needsIndividualTextureBindingWrites) {
writes[writeIdx++] = {
.dstBinding = bindingIdx++,
.descriptorCount = desc.count,
.descriptorType = type,
.pImageInfo = &imageDescs[imageIdx],
};
}
for (u32 arrayIdx{}; arrayIdx < desc.count; arrayIdx++)
imageDescs[imageIdx++] = getTextureCb(desc, arrayIdx);
}
}
}};
for (size_t i{}; i < shaderStages.size(); i++) {
const auto &stage{shaderStages[i]};
if (!stage.module)
@ -681,12 +758,20 @@ namespace skyline::gpu::interconnect::maxwell3d {
writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount,
[&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) {
auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx])};
auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx - arrayIdx ? 1 : 0])};
// Storage buffer arrays all share the same view index, so to only increment the index once per array do it at element zero and subtract that for all subsequent array elems (see above)
storageBufferIdx += arrayIdx ? 0 : 1;
return binding;
});
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount,
[&](const Shader::TextureDescriptor &desc, size_t arrayIdx) {
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)};
return GetTextureBinding(ctx, desc, samplers, textures, handle);
}, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites);
}
// Since we don't implement all descriptor types the number of writes might not match what's expected
if (!writeIdx)
return nullptr;
@ -701,7 +786,9 @@ namespace skyline::gpu::interconnect::maxwell3d {
});
}
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, ConstantBuffers::QuickBind quickBind) {
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind) {
SyncCachedStorageBufferViews(ctx.executor.executionNumber);
size_t stageIndex{static_cast<size_t>(quickBind.stage)};
const auto &stageDescInfo{descriptorInfo.stages[stageIndex]};
const auto &cbufUsageInfo{stageDescInfo.cbufUsages[quickBind.index]};
@ -721,33 +808,54 @@ namespace skyline::gpu::interconnect::maxwell3d {
u32 imageIdx{};
auto imageDescs{ctx.executor.allocator->AllocateUntracked<vk::DescriptorImageInfo>(cbufUsageInfo.totalImageDescCount)};
auto writeBufferDescs{[&](vk::DescriptorType type, const auto &usages, const auto &descs, auto getBufferCb) {
/**
* @brief Unified function to add descriptor set writes for any descriptor type
* @note Since quick bind always results in one write per buffer, `needsIndividualTextureBindingWrites` is implicit
*/
auto writeDescs{[&]<bool ImageDesc, bool BufferDesc>(vk::DescriptorType type, const auto &usages, const auto &descs, auto getBindingCb) {
for (const auto &usage : usages) {
const auto &shaderDesc{descs[usage.shaderDescIdx]};
writes[writeIdx++] = {
writes[writeIdx] = {
.dstBinding = usage.binding,
.descriptorCount = shaderDesc.count,
.descriptorType = type,
.pBufferInfo = &bufferDescs[bufferIdx],
};
for (size_t i{}; i < shaderDesc.count; i++)
bufferDescDynamicBindings[bufferIdx++] = getBufferCb(usage, shaderDesc, i);
if constexpr (ImageDesc)
writes[writeIdx].pImageInfo = &imageDescs[imageIdx];
else if constexpr (BufferDesc)
writes[writeIdx].pBufferInfo = &bufferDescs[bufferIdx];
writeIdx++;
for (size_t i{}; i < shaderDesc.count; i++) {
if constexpr (ImageDesc)
imageDescs[imageIdx++] = getBindingCb(usage, shaderDesc, i);
else if constexpr (BufferDesc)
bufferDescDynamicBindings[bufferIdx++] = getBindingCb(usage, shaderDesc, i);
}
}
}};
writeBufferDescs(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, shaderInfo.constant_buffer_descriptors,
writeDescs.operator()<false, true>(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, shaderInfo.constant_buffer_descriptors,
[&](auto usage, const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) -> DynamicBufferBinding {
size_t cbufIdx{desc.index + arrayIdx};
return GetConstantBufferBinding(ctx, shaderInfo, stageConstantBuffers[cbufIdx].view, cbufIdx);
});
writeBufferDescs(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, shaderInfo.storage_buffers_descriptors,
writeDescs.operator()<false, true>(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, shaderInfo.storage_buffers_descriptors,
[&](auto usage, const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) {
return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.storageBufferIdx]);
});
writeDescs.operator()<true, false>(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, shaderInfo.texture_descriptors,
[&](auto usage, const Shader::TextureDescriptor &desc, size_t arrayIdx) {
BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)};
return GetTextureBinding(ctx, desc, samplers, textures, handle);
});
// Since we don't implement all descriptor types the number of writes might not match what's expected
if (!writeIdx)
return nullptr;

View File

@ -13,7 +13,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
SamplerPoolState::SamplerPoolState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine) : engine{manager, dirtyHandle, engine} {}
void SamplerPoolState::Flush(InterconnectContext &ctx) {
u32 maximumIndex{engine->samplerBinding.value == engine::SamplerBinding::Value::ViaHeaderBinding ? engine->texHeaderPool.maximumIndex : engine->texSamplerPool.maximumIndex};
useTexHeaderBinding = engine->samplerBinding.value == engine::SamplerBinding::Value::ViaHeaderBinding;
u32 maximumIndex{useTexHeaderBinding ? engine->texHeaderPool.maximumIndex : engine->texSamplerPool.maximumIndex};
auto mapping{ctx.channelCtx.asCtx->gmmu.LookupBlock(engine->texSamplerPool.offset)};
texSamplers = mapping.first.subspan(mapping.second).cast<TextureSamplerControl>().first(maximumIndex + 1);
@ -27,6 +28,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
void Samplers::MarkAllDirty() {
samplerPool.MarkDirty(true);
std::fill(texSamplerCache.begin(), texSamplerCache.end(), nullptr);
}
static vk::Filter ConvertSamplerFilter(TextureSamplerControl::Filter filter) {
@ -137,14 +139,20 @@ namespace skyline::gpu::interconnect::maxwell3d {
return vk::BorderColor::eFloatTransparentBlack;
}
std::shared_ptr<vk::raii::Sampler> Samplers::GetSampler(InterconnectContext &ctx, u32 index) {
auto texSamplers{samplerPool.UpdateGet(ctx).texSamplers};
vk::raii::Sampler *Samplers::GetSampler(InterconnectContext &ctx, u32 samplerIndex, u32 textureIndex) {
const auto &samplerPoolObj{samplerPool.UpdateGet(ctx)};
u32 index{samplerPoolObj.useTexHeaderBinding ? textureIndex : samplerIndex};
auto texSamplers{samplerPoolObj.texSamplers};
if (texSamplers.size() != texSamplerCache.size()) {
texSamplerCache.resize(texSamplers.size());
std::fill(texSamplerCache.begin(), texSamplerCache.end(), nullptr);
} else if (texSamplerCache[index]) {
return texSamplerCache[index];
}
TextureSamplerControl &texSampler{texSamplers[index]};
auto &sampler{texSamplerCache[texSampler]};
if (sampler)
return sampler;
auto &sampler{texSamplerStore[texSampler]};
if (!sampler) {
auto convertAddressModeWithCheck{[&](TextureSamplerControl::AddressMode mode) {
auto vkMode{ConvertSamplerAddressMode(mode)};
if (vkMode == vk::SamplerAddressMode::eMirrorClampToEdge && !ctx.gpu.traits.supportsSamplerMirrorClampToEdge) [[unlikely]] {
@ -192,8 +200,11 @@ namespace skyline::gpu::interconnect::maxwell3d {
samplerInfo.unlink<vk::SamplerCustomBorderColorCreateInfoEXT>();
}
sampler = std::make_shared<vk::raii::Sampler>(ctx.gpu.vkDevice, samplerInfo.get<vk::SamplerCreateInfo>());
return sampler;
sampler = std::make_unique<vk::raii::Sampler>(ctx.gpu.vkDevice, samplerInfo.get<vk::SamplerCreateInfo>());
}
texSamplerCache[index] = sampler.get();
return sampler.get();
}

View File

@ -23,6 +23,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
public:
span<TextureSamplerControl> texSamplers;
bool useTexHeaderBinding;
SamplerPoolState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine);
@ -35,13 +36,15 @@ namespace skyline::gpu::interconnect::maxwell3d {
private:
dirty::ManualDirtyState<SamplerPoolState> samplerPool;
tsl::robin_map<TextureSamplerControl, std::shared_ptr<vk::raii::Sampler>, util::ObjectHash<TextureSamplerControl>> texSamplerCache;
tsl::robin_map<TextureSamplerControl, std::unique_ptr<vk::raii::Sampler>, util::ObjectHash<TextureSamplerControl>> texSamplerStore;
std::vector<vk::raii::Sampler *> texSamplerCache;
public:
Samplers(DirtyManager &manager, const SamplerPoolState::EngineRegisters &engine);
void MarkAllDirty();
std::shared_ptr<vk::raii::Sampler> GetSampler(InterconnectContext &ctx, u32 index);
vk::raii::Sampler *GetSampler(InterconnectContext &ctx, u32 samplerIndex, u32 textureIndex);
};
}

View File

@ -0,0 +1,140 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d)
#pragma once
#include <common/base.h>
namespace skyline::gpu::interconnect {
#pragma pack(push, 1)
/**
* @brief The Texture Sampler Control is a descriptor used to configure the texture sampler in Maxwell GPUs
* @url https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_texture.xml#L367
* @url https://github.com/devkitPro/deko3d/blob/00c12d1f4809014f1cc22719dd2e3476735eec64/source/maxwell/texture_sampler_control_block.h
*/
struct TextureSamplerControl {
enum class AddressMode : u32 {
Repeat = 0,
MirroredRepeat = 1,
ClampToEdge = 2,
ClampToBorder = 3,
Clamp = 4,
MirrorClampToEdge = 5,
MirrorClampToBorder = 6,
MirrorClamp = 7,
};
enum class CompareOp : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessOrEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterOrEqual = 6,
Always = 7,
};
enum class Filter : u32 {
Nearest = 1,
Linear = 2,
};
enum class MipFilter : u32 {
None = 1,
Nearest = 2,
Linear = 3,
};
enum class SamplerReduction : u32 {
WeightedAverage = 0,
Min = 1,
Max = 2,
};
// 0x00
AddressMode addressModeU : 3;
AddressMode addressModeV : 3;
AddressMode addressModeP : 3;
u32 depthCompareEnable : 1;
CompareOp depthCompareOp : 3;
u32 srgbConversion : 1;
u32 fontFilterWidth : 3;
u32 fontFilterHeight : 3;
u32 maxAnisotropy : 3;
u32 _pad0_ : 9;
// 0x04
Filter magFilter : 2;
u32 _pad1_ : 2;
Filter minFilter : 2;
MipFilter mipFilter : 2;
u32 cubemapAnisotropy : 1;
u32 cubemapInterfaceFiltering : 1;
SamplerReduction reductionFilter : 2;
i32 mipLodBias : 13;
u32 floatCoordNormalization : 1;
u32 trilinearOptimization : 5;
u32 _pad2_ : 1;
// 0x08
u32 minLodClamp : 12;
u32 maxLodClamp : 12;
u32 srgbBorderColorR : 8;
// 0x0C
u32 _pad3_ : 12;
u32 srgbBorderColorG : 8;
u32 srgbBorderColorB : 8;
u32 _pad4_ : 4;
// 0x10
float borderColorR;
// 0x14
float borderColorG;
// 0x18
float borderColorB;
// 0x1C
float borderColorA;
private:
/**
* @brief Convert a fixed point integer to a floating point integer
*/
template<typename T, size_t FractionalBits = 8>
float ConvertFixedToFloat(T fixed) {
return static_cast<float>(fixed) / static_cast<float>(1 << FractionalBits);
};
public:
bool operator==(const TextureSamplerControl&) const = default;
float MaxAnisotropy() {
constexpr size_t AnisotropyCount{8}; //!< The amount of unique anisotropy values that can be represented (2^3 — 3-bit value)
constexpr std::array<float, AnisotropyCount> anisotropyLut{
1.0f, 3.14f, 5.28f, 7.42f, 9.57f, 11.71f, 13.85f, 16.0f
}; //!< A linear mapping of value range (0..7) to anisotropy range (1..16) calculated using `(index * 15 / 7) + 1`
return anisotropyLut[maxAnisotropy];
}
float MipLodBias() {
return ConvertFixedToFloat(mipLodBias);
}
float MinLodClamp() {
return ConvertFixedToFloat(minLodClamp);
}
float MaxLodClamp() {
return ConvertFixedToFloat(maxLodClamp);
}
};
static_assert(sizeof(TextureSamplerControl) == 0x20);
#pragma pack(pop)
}