mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
Merge pull request #12186 from TellowKrinkle/MultiTextureComputeMetal
VideoBackends:Metal: Support multiple compute textures
This commit is contained in:
commit
d67f54b175
@ -386,9 +386,11 @@ void Metal::Gfx::SetSamplerState(u32 index, const SamplerState& state)
|
|||||||
g_state_tracker->SetSampler(index, state);
|
g_state_tracker->SetSampler(index, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Metal::Gfx::SetComputeImageTexture(u32, AbstractTexture* texture, bool read, bool write)
|
void Metal::Gfx::SetComputeImageTexture(u32 index, AbstractTexture* texture, bool read, bool write)
|
||||||
{
|
{
|
||||||
g_state_tracker->SetComputeTexture(static_cast<const Texture*>(texture));
|
g_state_tracker->SetTexture(index + VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS,
|
||||||
|
texture ? static_cast<const Texture*>(texture)->GetMTLTexture() :
|
||||||
|
nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Metal::Gfx::UnbindTexture(const AbstractTexture* texture)
|
void Metal::Gfx::UnbindTexture(const AbstractTexture* texture)
|
||||||
|
@ -61,12 +61,14 @@ public:
|
|||||||
MRCOwned<id<MTLComputePipelineState>> pipeline);
|
MRCOwned<id<MTLComputePipelineState>> pipeline);
|
||||||
|
|
||||||
id<MTLComputePipelineState> GetComputePipeline() const { return m_compute_pipeline; }
|
id<MTLComputePipelineState> GetComputePipeline() const { return m_compute_pipeline; }
|
||||||
bool UsesTexture(u32 index) const { return m_textures & (1 << index); }
|
u32 GetTextures() const { return m_textures; }
|
||||||
|
u32 GetSamplers() const { return m_samplers; }
|
||||||
bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); }
|
bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MRCOwned<id<MTLComputePipelineState>> m_compute_pipeline;
|
MRCOwned<id<MTLComputePipelineState>> m_compute_pipeline;
|
||||||
u32 m_textures = 0;
|
u32 m_textures = 0;
|
||||||
|
u32 m_samplers = 0;
|
||||||
u32 m_buffers = 0;
|
u32 m_buffers = 0;
|
||||||
};
|
};
|
||||||
} // namespace Metal
|
} // namespace Metal
|
||||||
|
@ -67,5 +67,5 @@ Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineRef
|
|||||||
MRCOwned<id<MTLComputePipelineState>> pipeline)
|
MRCOwned<id<MTLComputePipelineState>> pipeline)
|
||||||
: Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline))
|
: Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline))
|
||||||
{
|
{
|
||||||
GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers);
|
GetArguments([reflection arguments], &m_textures, &m_samplers, &m_buffers);
|
||||||
}
|
}
|
||||||
|
@ -90,7 +90,6 @@ public:
|
|||||||
void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth);
|
void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth);
|
||||||
void SetTexture(u32 idx, id<MTLTexture> texture);
|
void SetTexture(u32 idx, id<MTLTexture> texture);
|
||||||
void SetSampler(u32 idx, const SamplerState& sampler);
|
void SetSampler(u32 idx, const SamplerState& sampler);
|
||||||
void SetComputeTexture(const Texture* texture);
|
|
||||||
void InvalidateUniforms(bool vertex, bool geometry, bool fragment);
|
void InvalidateUniforms(bool vertex, bool geometry, bool fragment);
|
||||||
void SetUtilityUniform(const void* buffer, size_t size);
|
void SetUtilityUniform(const void* buffer, size_t size);
|
||||||
void SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1);
|
void SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1);
|
||||||
@ -191,11 +190,18 @@ private:
|
|||||||
|
|
||||||
MRCOwned<id<MTLTexture>> m_dummy_texture;
|
MRCOwned<id<MTLTexture>> m_dummy_texture;
|
||||||
|
|
||||||
|
// Compute has a set of samplers and a set of writable images
|
||||||
|
static constexpr u32 MAX_COMPUTE_TEXTURES = VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS * 2;
|
||||||
|
static constexpr u32 MAX_PIXEL_TEXTURES = VideoCommon::MAX_PIXEL_SHADER_SAMPLERS;
|
||||||
|
static constexpr u32 MAX_TEXTURES = std::max(MAX_PIXEL_TEXTURES, MAX_COMPUTE_TEXTURES);
|
||||||
|
static constexpr u32 MAX_SAMPLERS =
|
||||||
|
std::max(VideoCommon::MAX_PIXEL_SHADER_SAMPLERS, VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS);
|
||||||
|
|
||||||
// MARK: State
|
// MARK: State
|
||||||
u8 m_dirty_textures;
|
u16 m_dirty_textures;
|
||||||
u8 m_dirty_samplers;
|
u8 m_dirty_samplers;
|
||||||
static_assert(sizeof(m_dirty_textures) * 8 >= VideoCommon::MAX_PIXEL_SHADER_SAMPLERS,
|
static_assert(sizeof(m_dirty_textures) * 8 >= MAX_TEXTURES, "Make this bigger");
|
||||||
"Make these bigger");
|
static_assert(sizeof(m_dirty_samplers) * 8 >= MAX_SAMPLERS, "Make this bigger");
|
||||||
union Flags
|
union Flags
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
@ -206,7 +212,6 @@ private:
|
|||||||
bool has_gx_ps_uniform : 1;
|
bool has_gx_ps_uniform : 1;
|
||||||
bool has_utility_vs_uniform : 1;
|
bool has_utility_vs_uniform : 1;
|
||||||
bool has_utility_ps_uniform : 1;
|
bool has_utility_ps_uniform : 1;
|
||||||
bool has_compute_texture : 1;
|
|
||||||
bool has_pipeline : 1;
|
bool has_pipeline : 1;
|
||||||
bool has_scissor : 1;
|
bool has_scissor : 1;
|
||||||
bool has_viewport : 1;
|
bool has_viewport : 1;
|
||||||
@ -251,11 +256,11 @@ private:
|
|||||||
Util::Viewport viewport;
|
Util::Viewport viewport;
|
||||||
const Pipeline* render_pipeline = nullptr;
|
const Pipeline* render_pipeline = nullptr;
|
||||||
const ComputePipeline* compute_pipeline = nullptr;
|
const ComputePipeline* compute_pipeline = nullptr;
|
||||||
std::array<id<MTLTexture>, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> textures = {};
|
std::array<id<MTLTexture>, MAX_TEXTURES> textures = {};
|
||||||
std::array<id<MTLSamplerState>, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> samplers = {};
|
std::array<id<MTLSamplerState>, MAX_SAMPLERS> samplers = {};
|
||||||
std::array<float, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_min_lod;
|
std::array<float, MAX_SAMPLERS> sampler_min_lod;
|
||||||
std::array<float, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_max_lod;
|
std::array<float, MAX_SAMPLERS> sampler_max_lod;
|
||||||
std::array<SamplerState, VideoCommon::MAX_PIXEL_SHADER_SAMPLERS> sampler_states;
|
std::array<SamplerState, MAX_SAMPLERS> sampler_states;
|
||||||
const Texture* compute_texture = nullptr;
|
const Texture* compute_texture = nullptr;
|
||||||
std::unique_ptr<u8[]> utility_uniform;
|
std::unique_ptr<u8[]> utility_uniform;
|
||||||
u32 utility_uniform_size = 0;
|
u32 utility_uniform_size = 0;
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <bit>
|
#include <bit>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
|
#include "Common/Align.h"
|
||||||
#include "Common/Assert.h"
|
#include "Common/Assert.h"
|
||||||
|
|
||||||
#include "Core/System.h"
|
#include "Core/System.h"
|
||||||
@ -344,8 +345,8 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
|
|||||||
m_current.cull_mode = MTLCullModeNone;
|
m_current.cull_mode = MTLCullModeNone;
|
||||||
m_current.perf_query_group = static_cast<PerfQueryGroup>(-1);
|
m_current.perf_query_group = static_cast<PerfQueryGroup>(-1);
|
||||||
m_flags.NewEncoder();
|
m_flags.NewEncoder();
|
||||||
m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
|
m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
|
||||||
m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
|
m_dirty_textures = (1 << MAX_TEXTURES) - 1;
|
||||||
CheckScissor();
|
CheckScissor();
|
||||||
CheckViewport();
|
CheckViewport();
|
||||||
ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!");
|
ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!");
|
||||||
@ -359,8 +360,8 @@ void Metal::StateTracker::BeginComputePass()
|
|||||||
if (m_manual_buffer_upload)
|
if (m_manual_buffer_upload)
|
||||||
[m_current_compute_encoder waitForFence:m_fence];
|
[m_current_compute_encoder waitForFence:m_fence];
|
||||||
m_flags.NewEncoder();
|
m_flags.NewEncoder();
|
||||||
m_dirty_samplers = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
|
m_dirty_samplers = (1 << MAX_SAMPLERS) - 1;
|
||||||
m_dirty_textures = (1 << VideoCommon::MAX_PIXEL_SHADER_SAMPLERS) - 1;
|
m_dirty_textures = (1 << MAX_TEXTURES) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Metal::StateTracker::EndRenderPass()
|
void Metal::StateTracker::EndRenderPass()
|
||||||
@ -535,15 +536,6 @@ void Metal::StateTracker::SetSampler(u32 idx, const SamplerState& sampler)
|
|||||||
SetSamplerForce(idx, sampler);
|
SetSamplerForce(idx, sampler);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Metal::StateTracker::SetComputeTexture(const Texture* texture)
|
|
||||||
{
|
|
||||||
if (m_state.compute_texture != texture)
|
|
||||||
{
|
|
||||||
m_state.compute_texture = texture;
|
|
||||||
m_flags.has_compute_texture = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Metal::StateTracker::UnbindTexture(id<MTLTexture> texture)
|
void Metal::StateTracker::UnbindTexture(id<MTLTexture> texture)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < std::size(m_state.textures); ++i)
|
for (size_t i = 0; i < std::size(m_state.textures); ++i)
|
||||||
@ -565,12 +557,15 @@ void Metal::StateTracker::InvalidateUniforms(bool vertex, bool geometry, bool fr
|
|||||||
|
|
||||||
void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size)
|
void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size)
|
||||||
{
|
{
|
||||||
|
// Shader often uses 16-byte aligned types
|
||||||
|
// Metal validation will complain if our upload is smaller than the struct with padding
|
||||||
|
size_t aligned_size = Common::AlignUp(size, 16);
|
||||||
if (m_state.utility_uniform_capacity < size)
|
if (m_state.utility_uniform_capacity < size)
|
||||||
{
|
{
|
||||||
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[size]);
|
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[aligned_size]);
|
||||||
m_state.utility_uniform_capacity = size;
|
m_state.utility_uniform_capacity = static_cast<u32>(aligned_size);
|
||||||
}
|
}
|
||||||
m_state.utility_uniform_size = size;
|
m_state.utility_uniform_size = static_cast<u32>(aligned_size);
|
||||||
memcpy(m_state.utility_uniform.get(), buffer, size);
|
memcpy(m_state.utility_uniform.get(), buffer, size);
|
||||||
m_flags.has_utility_vs_uniform = false;
|
m_flags.has_utility_vs_uniform = false;
|
||||||
m_flags.has_utility_ps_uniform = false;
|
m_flags.has_utility_ps_uniform = false;
|
||||||
@ -893,10 +888,31 @@ void Metal::StateTracker::PrepareCompute()
|
|||||||
m_flags.has_pipeline = true;
|
m_flags.has_pipeline = true;
|
||||||
[enc setComputePipelineState:pipe->GetComputePipeline()];
|
[enc setComputePipelineState:pipe->GetComputePipeline()];
|
||||||
}
|
}
|
||||||
if (!m_flags.has_compute_texture && pipe->UsesTexture(0))
|
if (u32 dirty = m_dirty_textures & pipe->GetTextures())
|
||||||
{
|
{
|
||||||
m_flags.has_compute_texture = true;
|
m_dirty_textures &= ~pipe->GetTextures();
|
||||||
[enc setTexture:m_state.compute_texture->GetMTLTexture() atIndex:0];
|
// Since there's two sets of textures, it's likely there'll be a few in each
|
||||||
|
// Check each set separately to avoid doing too many unneccessary bindings
|
||||||
|
constexpr u32 lo_mask = (1 << VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS) - 1;
|
||||||
|
if (u32 lo = dirty & lo_mask)
|
||||||
|
{
|
||||||
|
NSRange range = RangeOfBits(lo);
|
||||||
|
[enc setTextures:&m_state.textures[range.location] withRange:range];
|
||||||
|
}
|
||||||
|
if (u32 hi = dirty & ~lo_mask)
|
||||||
|
{
|
||||||
|
NSRange range = RangeOfBits(hi);
|
||||||
|
[enc setTextures:&m_state.textures[range.location] withRange:range];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u32 dirty = m_dirty_samplers & pipe->GetSamplers())
|
||||||
|
{
|
||||||
|
m_dirty_samplers &= ~pipe->GetSamplers();
|
||||||
|
NSRange range = RangeOfBits(dirty);
|
||||||
|
[enc setSamplerStates:&m_state.samplers[range.location]
|
||||||
|
lodMinClamps:&m_state.sampler_min_lod[range.location]
|
||||||
|
lodMaxClamps:&m_state.sampler_max_lod[range.location]
|
||||||
|
withRange:range];
|
||||||
}
|
}
|
||||||
// Compute and render can't happen at the same time, so just reuse one of the flags
|
// Compute and render can't happen at the same time, so just reuse one of the flags
|
||||||
if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0))
|
if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0))
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
#include "Common/MsgHandler.h"
|
#include "Common/MsgHandler.h"
|
||||||
|
|
||||||
|
#include "VideoCommon/Constants.h"
|
||||||
#include "VideoCommon/DriverDetails.h"
|
#include "VideoCommon/DriverDetails.h"
|
||||||
#include "VideoCommon/Spirv.h"
|
#include "VideoCommon/Spirv.h"
|
||||||
|
|
||||||
@ -365,10 +366,10 @@ static const std::string_view COMPUTE_SHADER_HEADER = R"(
|
|||||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||||
|
|
||||||
// All resources are packed into one descriptor set for compute.
|
|
||||||
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
|
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
|
||||||
|
#define SAMPLER_BINDING(x) layout(set = 1, binding = x)
|
||||||
#define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
|
#define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
|
||||||
#define IMAGE_BINDING(format, x) layout(format, set = 1, binding = x)
|
#define IMAGE_BINDING(format, x) layout(format, set = 3, binding = x)
|
||||||
|
|
||||||
// hlsl to glsl function translation
|
// hlsl to glsl function translation
|
||||||
#define API_METAL 1
|
#define API_METAL 1
|
||||||
@ -462,20 +463,13 @@ std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
|
|||||||
MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo
|
MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo
|
MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo
|
MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0
|
// Dynamic list initialized below Fragment, 1, N, 0, N, N // ps/samp0-N
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 1, 0, 1, 1), // ps/samp1
|
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 2, 0, 2, 2), // ps/samp2
|
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 3, 0, 3, 3), // ps/samp3
|
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 4, 0, 4, 4), // ps/samp4
|
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 5, 0, 5, 5), // ps/samp5
|
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 6, 0, 6, 6), // ps/samp6
|
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 7, 0, 7, 7), // ps/samp7
|
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 1, 8, 0, 8, 8), // ps/samp8
|
|
||||||
MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo
|
MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo
|
||||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo
|
MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo
|
||||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, 0, 0, 0, 0), // cs/output_image
|
// Dynamic list initialized below GLCompute, 1, N, 0, N, N, // cs/samp0-N
|
||||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo
|
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo
|
||||||
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo
|
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo
|
||||||
|
// Dynamic list initialized below GLCompute, 3, N, 0, N, 0, // cs/img0-N
|
||||||
};
|
};
|
||||||
|
|
||||||
spirv_cross::CompilerMSL::Options options;
|
spirv_cross::CompilerMSL::Options options;
|
||||||
@ -503,6 +497,29 @@ std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
|
|||||||
|
|
||||||
for (auto& binding : resource_bindings)
|
for (auto& binding : resource_bindings)
|
||||||
compiler.add_msl_resource_binding(binding);
|
compiler.add_msl_resource_binding(binding);
|
||||||
|
if (stage == ShaderStage::Pixel)
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < VideoCommon::MAX_PIXEL_SHADER_SAMPLERS; i++) // ps/samp0-N
|
||||||
|
{
|
||||||
|
compiler.add_msl_resource_binding(
|
||||||
|
MakeResourceBinding(spv::ExecutionModelFragment, 1, i, 0, i, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (stage == ShaderStage::Compute)
|
||||||
|
{
|
||||||
|
u32 img = 0;
|
||||||
|
u32 smp = 0;
|
||||||
|
for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/samp0-N
|
||||||
|
{
|
||||||
|
compiler.add_msl_resource_binding(
|
||||||
|
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, i, 0, img++, smp++));
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < VideoCommon::MAX_COMPUTE_SHADER_SAMPLERS; i++) // cs/img0-N
|
||||||
|
{
|
||||||
|
compiler.add_msl_resource_binding(
|
||||||
|
MakeResourceBinding(spv::ExecutionModelGLCompute, 3, i, 0, img++, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string output(MSL_HEADER);
|
std::string output(MSL_HEADER);
|
||||||
std::string compiled = compiler.compile();
|
std::string compiled = compiler.compile();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user