Merge pull request #12186 from TellowKrinkle/MultiTextureComputeMetal

VideoBackends:Metal: Support multiple compute textures
This commit is contained in:
Mai 2023-11-30 17:46:02 -05:00 committed by GitHub
commit d67f54b175
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 87 additions and 45 deletions

View File

@ -386,9 +386,11 @@ void Metal::Gfx::SetSamplerState(u32 index, const SamplerState& state)
g_state_tracker->SetSampler(index, state);
}
void Metal::Gfx::SetComputeImageTexture(u32, AbstractTexture* texture, bool read, bool write)
void Metal::Gfx::SetComputeImageTexture(u32 index, AbstractTexture* texture, bool read, bool write)
{
g_state_tracker->SetComputeTexture(static_cast<const Texture*>(texture));
g_state_tracker->SetTexture(index + VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS,
texture ? static_cast<const Texture*>(texture)->GetMTLTexture() :
nullptr);
}
void Metal::Gfx::UnbindTexture(const AbstractTexture* texture)

View File

@ -61,12 +61,14 @@ public:
MRCOwned<id<MTLComputePipelineState>> pipeline);
id<MTLComputePipelineState> GetComputePipeline() const { return m_compute_pipeline; }
bool UsesTexture(u32 index) const { return m_textures & (1 << index); }
u32 GetTextures() const { return m_textures; }
u32 GetSamplers() const { return m_samplers; }
bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); }
private:
MRCOwned<id<MTLComputePipelineState>> m_compute_pipeline;
u32 m_textures = 0;
u32 m_samplers = 0;
u32 m_buffers = 0;
};
} // namespace Metal

View File

@ -67,5 +67,5 @@ Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineRef
MRCOwned<id<MTLComputePipelineState>> pipeline)
: Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline))
{
GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers);
GetArguments([reflection arguments], &m_textures, &m_samplers, &m_buffers);
}

View File

@ -90,7 +90,6 @@ public:
void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth);
void SetTexture(u32 idx, id<MTLTexture> texture);
void SetSampler(u32 idx, const SamplerState& sampler);
void SetComputeTexture(const Texture* texture);
void InvalidateUniforms(bool vertex, bool geometry, bool fragment);
void SetUtilityUniform(const void* buffer, size_t size);
void SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1);
@ -191,11 +190,18 @@ private:
MRCOwned<id<MTLTexture>> m_dummy_texture;
// Compute has a set of samplers and a set of writable images
static constexpr u32 MAX_COMPUTE_TEXTURES = VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS * 2;
static constexpr u32 MAX_PIXEL_TEXTURES = VideoCommon::MAX_PIXEL_SHADER_SAMPLERS;
static constexpr u32 MAX_TEXTURES = std::max(MAX_PIXEL_TEXTURES, MAX_COMPUTE_TEXTURES);
static constexpr u32 MAX_SAMPLERS =
std::max(VideoCommon::MAX_PIXEL_SHADER_SAMPLERS, VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS);
// MARK: State
u8 m_dirty_textures;
u16 m_dirty_textures;
u8 m_dirty_samplers;
static_assert(sizeof(m_dirty_textures) * 8 >= VideoCommon::MAX_PIXEL_SHADER_SAMPLERS,
"Make these bigger");
static_assert(sizeof(m_dirty_textures) * 8 >= MAX_TEXTURES, "Make this bigger");
static_assert(sizeof(m_dirty_samplers) * 8 >= MAX_SAMPLERS, "Make this bigger");
union Flags
{
struct
@ -206,7 +212,6 @@ private:
bool has_gx_ps_uniform : 1;
bool has_utility_vs_uniform : 1;
bool has_utility_ps_uniform : 1;
bool has_compute_texture : 1;
bool has_pipeline : 1;
bool has_scissor : 1;
bool has_viewport : 1;
@ -251,11 +256,11 @@ private:
Util::Viewport viewport;
const Pipeline* render_pipeline = nullptr;
const ComputePipeline* compute_pipeline = nullptr;
std::array<id<MTLTexture>, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> textures = {};
std::array<id<MTLSamplerState>, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> samplers = {};
std::array<float, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_min_lod;
std::array<float, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_max_lod;
std::array<SamplerState, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_states;
std::array<id<MTLTexture>, MAX_TEXTURES> textures = {};
std::array<id<MTLSamplerState>, MAX_SAMPLERS> samplers = {};
std::array<float, MAX_SAMPLERS> sampler_min_lod;
std::array<float, MAX_SAMPLERS> sampler_max_lod;
std::array<SamplerState, MAX_SAMPLERS> sampler_states;
const Texture* compute_texture = nullptr;
std::unique_ptr<u8[]> utility_uniform;
u32 utility_uniform_size = 0;

View File

@ -7,6 +7,7 @@
#include <bit>
#include <mutex>
#include "Common/Align.h"
#include "Common/Assert.h"
#include "Core/System.h"
@ -344,8 +345,8 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
m_current.cull_mode = MTLCullModeNone;
m_current.perf_query_group = static_cast<PerfQueryGroup>(-1);
m_flags.NewEncoder();
m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
m_dirty_textures = (1 << MAX_TEXTURES) - 1;
CheckScissor();
CheckViewport();
ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!");
@ -359,8 +360,8 @@ void Metal::StateTracker::BeginComputePass()
if (m_manual_buffer_upload)
[m_current_compute_encoder waitForFence:m_fence];
m_flags.NewEncoder();
m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
m_dirty_textures = (1 << MAX_TEXTURES) - 1;
}
void Metal::StateTracker::EndRenderPass()
@ -535,15 +536,6 @@ void Metal::StateTracker::SetSampler(u32 idx, const SamplerState& sampler)
SetSamplerForce(idx, sampler);
}
void Metal::StateTracker::SetComputeTexture(const Texture* texture)
{
if (m_state.compute_texture != texture)
{
m_state.compute_texture = texture;
m_flags.has_compute_texture = false;
}
}
void Metal::StateTracker::UnbindTexture(id<MTLTexture> texture)
{
for (size_t i = 0; i < std::size(m_state.textures); ++i)
@ -565,12 +557,15 @@ void Metal::StateTracker::InvalidateUniforms(bool vertex, bool geometry, bool fr
void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size)
{
// Shader often uses 16-byte aligned types
// Metal validation will complain if our upload is smaller than the struct with padding
size_t aligned_size = Common::AlignUp(size, 16);
if (m_state.utility_uniform_capacity < size)
{
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[size]);
m_state.utility_uniform_capacity = size;
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[aligned_size]);
m_state.utility_uniform_capacity = static_cast<u32>(aligned_size);
}
m_state.utility_uniform_size = size;
m_state.utility_uniform_size = static_cast<u32>(aligned_size);
memcpy(m_state.utility_uniform.get(), buffer, size);
m_flags.has_utility_vs_uniform = false;
m_flags.has_utility_ps_uniform = false;
@ -893,10 +888,31 @@ void Metal::StateTracker::PrepareCompute()
m_flags.has_pipeline = true;
[enc setComputePipelineState:pipe->GetComputePipeline()];
}
if (!m_flags.has_compute_texture && pipe->UsesTexture(0))
if (u32 dirty = m_dirty_textures & pipe->GetTextures())
{
m_flags.has_compute_texture = true;
[enc setTexture:m_state.compute_texture->GetMTLTexture() atIndex:0];
m_dirty_textures &= ~pipe->GetTextures();
// Since there's two sets of textures, it's likely there'll be a few in each
// Check each set separately to avoid doing too many unneccessary bindings
constexpr u32 lo_mask = (1 << VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS) - 1;
if (u32 lo = dirty & lo_mask)
{
NSRange range = RangeOfBits(lo);
[enc setTextures:&m_state.textures[range.location] withRange:range];
}
if (u32 hi = dirty & ~lo_mask)
{
NSRange range = RangeOfBits(hi);
[enc setTextures:&m_state.textures[range.location] withRange:range];
}
}
if (u32 dirty = m_dirty_samplers & pipe->GetSamplers())
{
m_dirty_samplers &= ~pipe->GetSamplers();
NSRange range = RangeOfBits(dirty);
[enc setSamplerStates:&m_state.samplers[range.location]
lodMinClamps:&m_state.sampler_min_lod[range.location]
lodMaxClamps:&m_state.sampler_max_lod[range.location]
withRange:range];
}
// Compute and render can't happen at the same time, so just reuse one of the flags
if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0))

View File

@ -11,6 +11,7 @@
#include "Common/MsgHandler.h"
#include "VideoCommon/Constants.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/Spirv.h"
@ -365,10 +366,10 @@ static const std::string_view COMPUTE_SHADER_HEADER = R"(
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
// All resources are packed into one descriptor set for compute.
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
#define SAMPLER_BINDING(x) layout(set = 1, binding = x)
#define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
#define IMAGE_BINDING(format, x) layout(format, set = 1, binding = x)
#define IMAGE_BINDING(format, x) layout(format, set = 3, binding = x)
// hlsl to glsl function translation
#define API_METAL 1
@ -462,20 +463,13 @@ std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo
MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0
MakeResourceBinding(spv::ExecutionModelFragment, 1, 1, 0, 1, 1), // ps/samp1
MakeResourceBinding(spv::ExecutionModelFragment, 1, 2, 0, 2, 2), // ps/samp2
MakeResourceBinding(spv::ExecutionModelFragment, 1, 3, 0, 3, 3), // ps/samp3
MakeResourceBinding(spv::ExecutionModelFragment, 1, 4, 0, 4, 4), // ps/samp4
MakeResourceBinding(spv::ExecutionModelFragment, 1, 5, 0, 5, 5), // ps/samp5
MakeResourceBinding(spv::ExecutionModelFragment, 1, 6, 0, 6, 6), // ps/samp6
MakeResourceBinding(spv::ExecutionModelFragment, 1, 7, 0, 7, 7), // ps/samp7
MakeResourceBinding(spv::ExecutionModelFragment, 1, 8, 0, 8, 8), // ps/samp8
// Dynamic list initialized below Fragment, 1, N, 0, N, N // ps/samp0-N
MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo
MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, 0, 0, 0, 0), // cs/output_image
// Dynamic list initialized below GLCompute, 1, N, 0, N, N, // cs/samp0-N
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo
// Dynamic list initialized below GLCompute, 3, N, 0, N, 0, // cs/img0-N
};
spirv_cross::CompilerMSL::Options options;
@ -503,6 +497,29 @@ std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
for (auto& binding : resource_bindings)
compiler.add_msl_resource_binding(binding);
if (stage == ShaderStage::Pixel)
{
for (u32 i = 0; i < VideoCommon::MAX_PIXEL_SHADER_SAMPLERS; i++) // ps/samp0-N
{
compiler.add_msl_resource_binding(
MakeResourceBinding(spv::ExecutionModelFragment, 1, i, 0, i, i));
}
}
else if (stage == ShaderStage::Compute)
{
u32 img = 0;
u32 smp = 0;
for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/samp0-N
{
compiler.add_msl_resource_binding(
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, i, 0, img++, smp++));
}
for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/img0-N
{
compiler.add_msl_resource_binding(
MakeResourceBinding(spv::ExecutionModelGLCompute, 3, i, 0, img++, 0));
}
}
std::string output(MSL_HEADER);
std::string compiled = compiler.compile();