mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 23:59:27 +01:00
Merge pull request #12186 from TellowKrinkle/MultiTextureComputeMetal
VideoBackends:Metal: Support multiple compute textures
This commit is contained in:
commit
d67f54b175
@ -386,9 +386,11 @@ void Metal::Gfx::SetSamplerState(u32 index, const SamplerState& state)
|
||||
g_state_tracker->SetSampler(index, state);
|
||||
}
|
||||
|
||||
void Metal::Gfx::SetComputeImageTexture(u32, AbstractTexture* texture, bool read, bool write)
|
||||
void Metal::Gfx::SetComputeImageTexture(u32 index, AbstractTexture* texture, bool read, bool write)
|
||||
{
|
||||
g_state_tracker->SetComputeTexture(static_cast<const Texture*>(texture));
|
||||
g_state_tracker->SetTexture(index + VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS,
|
||||
texture ? static_cast<const Texture*>(texture)->GetMTLTexture() :
|
||||
nullptr);
|
||||
}
|
||||
|
||||
void Metal::Gfx::UnbindTexture(const AbstractTexture* texture)
|
||||
|
@ -61,12 +61,14 @@ public:
|
||||
MRCOwned<id<MTLComputePipelineState>> pipeline);
|
||||
|
||||
id<MTLComputePipelineState> GetComputePipeline() const { return m_compute_pipeline; }
|
||||
bool UsesTexture(u32 index) const { return m_textures & (1 << index); }
|
||||
u32 GetTextures() const { return m_textures; }
|
||||
u32 GetSamplers() const { return m_samplers; }
|
||||
bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); }
|
||||
|
||||
private:
|
||||
MRCOwned<id<MTLComputePipelineState>> m_compute_pipeline;
|
||||
u32 m_textures = 0;
|
||||
u32 m_samplers = 0;
|
||||
u32 m_buffers = 0;
|
||||
};
|
||||
} // namespace Metal
|
||||
|
@ -67,5 +67,5 @@ Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineRef
|
||||
MRCOwned<id<MTLComputePipelineState>> pipeline)
|
||||
: Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline))
|
||||
{
|
||||
GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers);
|
||||
GetArguments([reflection arguments], &m_textures, &m_samplers, &m_buffers);
|
||||
}
|
||||
|
@ -90,7 +90,6 @@ public:
|
||||
void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth);
|
||||
void SetTexture(u32 idx, id<MTLTexture> texture);
|
||||
void SetSampler(u32 idx, const SamplerState& sampler);
|
||||
void SetComputeTexture(const Texture* texture);
|
||||
void InvalidateUniforms(bool vertex, bool geometry, bool fragment);
|
||||
void SetUtilityUniform(const void* buffer, size_t size);
|
||||
void SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1);
|
||||
@ -191,11 +190,18 @@ private:
|
||||
|
||||
MRCOwned<id<MTLTexture>> m_dummy_texture;
|
||||
|
||||
// Compute has a set of samplers and a set of writable images
|
||||
static constexpr u32 MAX_COMPUTE_TEXTURES = VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS * 2;
|
||||
static constexpr u32 MAX_PIXEL_TEXTURES = VideoCommon::MAX_PIXEL_SHADER_SAMPLERS;
|
||||
static constexpr u32 MAX_TEXTURES = std::max(MAX_PIXEL_TEXTURES, MAX_COMPUTE_TEXTURES);
|
||||
static constexpr u32 MAX_SAMPLERS =
|
||||
std::max(VideoCommon::MAX_PIXEL_SHADER_SAMPLERS, VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS);
|
||||
|
||||
// MARK: State
|
||||
u8 m_dirty_textures;
|
||||
u16 m_dirty_textures;
|
||||
u8 m_dirty_samplers;
|
||||
static_assert(sizeof(m_dirty_textures) * 8 >= VideoCommon::MAX_PIXEL_SHADER_SAMPLERS,
|
||||
"Make these bigger");
|
||||
static_assert(sizeof(m_dirty_textures) * 8 >= MAX_TEXTURES, "Make this bigger");
|
||||
static_assert(sizeof(m_dirty_samplers) * 8 >= MAX_SAMPLERS, "Make this bigger");
|
||||
union Flags
|
||||
{
|
||||
struct
|
||||
@ -206,7 +212,6 @@ private:
|
||||
bool has_gx_ps_uniform : 1;
|
||||
bool has_utility_vs_uniform : 1;
|
||||
bool has_utility_ps_uniform : 1;
|
||||
bool has_compute_texture : 1;
|
||||
bool has_pipeline : 1;
|
||||
bool has_scissor : 1;
|
||||
bool has_viewport : 1;
|
||||
@ -251,11 +256,11 @@ private:
|
||||
Util::Viewport viewport;
|
||||
const Pipeline* render_pipeline = nullptr;
|
||||
const ComputePipeline* compute_pipeline = nullptr;
|
||||
std::array<id<MTLTexture>, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> textures = {};
|
||||
std::array<id<MTLSamplerState>, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> samplers = {};
|
||||
std::array<float, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_min_lod;
|
||||
std::array<float, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_max_lod;
|
||||
std::array<SamplerState, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_states;
|
||||
std::array<id<MTLTexture>, MAX_TEXTURES> textures = {};
|
||||
std::array<id<MTLSamplerState>, MAX_SAMPLERS> samplers = {};
|
||||
std::array<float, MAX_SAMPLERS> sampler_min_lod;
|
||||
std::array<float, MAX_SAMPLERS> sampler_max_lod;
|
||||
std::array<SamplerState, MAX_SAMPLERS> sampler_states;
|
||||
const Texture* compute_texture = nullptr;
|
||||
std::unique_ptr<u8[]> utility_uniform;
|
||||
u32 utility_uniform_size = 0;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <bit>
|
||||
#include <mutex>
|
||||
|
||||
#include "Common/Align.h"
|
||||
#include "Common/Assert.h"
|
||||
|
||||
#include "Core/System.h"
|
||||
@ -344,8 +345,8 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
|
||||
m_current.cull_mode = MTLCullModeNone;
|
||||
m_current.perf_query_group = static_cast<PerfQueryGroup>(-1);
|
||||
m_flags.NewEncoder();
|
||||
m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
|
||||
m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
|
||||
m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
|
||||
m_dirty_textures = (1 << MAX_TEXTURES) - 1;
|
||||
CheckScissor();
|
||||
CheckViewport();
|
||||
ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!");
|
||||
@ -359,8 +360,8 @@ void Metal::StateTracker::BeginComputePass()
|
||||
if (m_manual_buffer_upload)
|
||||
[m_current_compute_encoder waitForFence:m_fence];
|
||||
m_flags.NewEncoder();
|
||||
m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
|
||||
m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
|
||||
m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
|
||||
m_dirty_textures = (1 << MAX_TEXTURES) - 1;
|
||||
}
|
||||
|
||||
void Metal::StateTracker::EndRenderPass()
|
||||
@ -535,15 +536,6 @@ void Metal::StateTracker::SetSampler(u32 idx, const SamplerState& sampler)
|
||||
SetSamplerForce(idx, sampler);
|
||||
}
|
||||
|
||||
void Metal::StateTracker::SetComputeTexture(const Texture* texture)
|
||||
{
|
||||
if (m_state.compute_texture != texture)
|
||||
{
|
||||
m_state.compute_texture = texture;
|
||||
m_flags.has_compute_texture = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Metal::StateTracker::UnbindTexture(id<MTLTexture> texture)
|
||||
{
|
||||
for (size_t i = 0; i < std::size(m_state.textures); ++i)
|
||||
@ -565,12 +557,15 @@ void Metal::StateTracker::InvalidateUniforms(bool vertex, bool geometry, bool fr
|
||||
|
||||
void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size)
|
||||
{
|
||||
// Shader often uses 16-byte aligned types
|
||||
// Metal validation will complain if our upload is smaller than the struct with padding
|
||||
size_t aligned_size = Common::AlignUp(size, 16);
|
||||
if (m_state.utility_uniform_capacity < size)
|
||||
{
|
||||
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[size]);
|
||||
m_state.utility_uniform_capacity = size;
|
||||
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[aligned_size]);
|
||||
m_state.utility_uniform_capacity = static_cast<u32>(aligned_size);
|
||||
}
|
||||
m_state.utility_uniform_size = size;
|
||||
m_state.utility_uniform_size = static_cast<u32>(aligned_size);
|
||||
memcpy(m_state.utility_uniform.get(), buffer, size);
|
||||
m_flags.has_utility_vs_uniform = false;
|
||||
m_flags.has_utility_ps_uniform = false;
|
||||
@ -893,10 +888,31 @@ void Metal::StateTracker::PrepareCompute()
|
||||
m_flags.has_pipeline = true;
|
||||
[enc setComputePipelineState:pipe->GetComputePipeline()];
|
||||
}
|
||||
if (!m_flags.has_compute_texture && pipe->UsesTexture(0))
|
||||
if (u32 dirty = m_dirty_textures & pipe->GetTextures())
|
||||
{
|
||||
m_flags.has_compute_texture = true;
|
||||
[enc setTexture:m_state.compute_texture->GetMTLTexture() atIndex:0];
|
||||
m_dirty_textures &= ~pipe->GetTextures();
|
||||
// Since there's two sets of textures, it's likely there'll be a few in each
|
||||
// Check each set separately to avoid doing too many unneccessary bindings
|
||||
constexpr u32 lo_mask = (1 << VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS) - 1;
|
||||
if (u32 lo = dirty & lo_mask)
|
||||
{
|
||||
NSRange range = RangeOfBits(lo);
|
||||
[enc setTextures:&m_state.textures[range.location] withRange:range];
|
||||
}
|
||||
if (u32 hi = dirty & ~lo_mask)
|
||||
{
|
||||
NSRange range = RangeOfBits(hi);
|
||||
[enc setTextures:&m_state.textures[range.location] withRange:range];
|
||||
}
|
||||
}
|
||||
if (u32 dirty = m_dirty_samplers & pipe->GetSamplers())
|
||||
{
|
||||
m_dirty_samplers &= ~pipe->GetSamplers();
|
||||
NSRange range = RangeOfBits(dirty);
|
||||
[enc setSamplerStates:&m_state.samplers[range.location]
|
||||
lodMinClamps:&m_state.sampler_min_lod[range.location]
|
||||
lodMaxClamps:&m_state.sampler_max_lod[range.location]
|
||||
withRange:range];
|
||||
}
|
||||
// Compute and render can't happen at the same time, so just reuse one of the flags
|
||||
if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0))
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#include "Common/MsgHandler.h"
|
||||
|
||||
#include "VideoCommon/Constants.h"
|
||||
#include "VideoCommon/DriverDetails.h"
|
||||
#include "VideoCommon/Spirv.h"
|
||||
|
||||
@ -365,10 +366,10 @@ static const std::string_view COMPUTE_SHADER_HEADER = R"(
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
|
||||
// All resources are packed into one descriptor set for compute.
|
||||
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
|
||||
#define SAMPLER_BINDING(x) layout(set = 1, binding = x)
|
||||
#define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
|
||||
#define IMAGE_BINDING(format, x) layout(format, set = 1, binding = x)
|
||||
#define IMAGE_BINDING(format, x) layout(format, set = 3, binding = x)
|
||||
|
||||
// hlsl to glsl function translation
|
||||
#define API_METAL 1
|
||||
@ -462,20 +463,13 @@ std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
|
||||
MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 1, 0, 1, 1), // ps/samp1
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 2, 0, 2, 2), // ps/samp2
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 3, 0, 3, 3), // ps/samp3
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 4, 0, 4, 4), // ps/samp4
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 5, 0, 5, 5), // ps/samp5
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 6, 0, 6, 6), // ps/samp6
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 7, 0, 7, 7), // ps/samp7
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 8, 0, 8, 8), // ps/samp8
|
||||
// Dynamic list initialized below Fragment, 1, N, 0, N, N // ps/samp0-N
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo
|
||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo
|
||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, 0, 0, 0, 0), // cs/output_image
|
||||
// Dynamic list initialized below GLCompute, 1, N, 0, N, N, // cs/samp0-N
|
||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo
|
||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo
|
||||
// Dynamic list initialized below GLCompute, 3, N, 0, N, 0, // cs/img0-N
|
||||
};
|
||||
|
||||
spirv_cross::CompilerMSL::Options options;
|
||||
@ -503,6 +497,29 @@ std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
|
||||
|
||||
for (auto& binding : resource_bindings)
|
||||
compiler.add_msl_resource_binding(binding);
|
||||
if (stage == ShaderStage::Pixel)
|
||||
{
|
||||
for (u32 i = 0; i < VideoCommon::MAX_PIXEL_SHADER_SAMPLERS; i++) // ps/samp0-N
|
||||
{
|
||||
compiler.add_msl_resource_binding(
|
||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, i, 0, i, i));
|
||||
}
|
||||
}
|
||||
else if (stage == ShaderStage::Compute)
|
||||
{
|
||||
u32 img = 0;
|
||||
u32 smp = 0;
|
||||
for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/samp0-N
|
||||
{
|
||||
compiler.add_msl_resource_binding(
|
||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, i, 0, img++, smp++));
|
||||
}
|
||||
for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/img0-N
|
||||
{
|
||||
compiler.add_msl_resource_binding(
|
||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 3, i, 0, img++, 0));
|
||||
}
|
||||
}
|
||||
|
||||
std::string output(MSL_HEADER);
|
||||
std::string compiled = compiler.compile();
|
||||
|
Loading…
x
Reference in New Issue
Block a user