Merge pull request #11028 from tellowkrinkle/MetalFixes

Various Metal renderer improvements
This commit is contained in:
JMC47 2022-10-24 15:22:37 -04:00 committed by GitHub
commit b66793194e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 284 additions and 29 deletions

View File

@ -87,6 +87,11 @@ const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{ const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{
{System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false}; {System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false};
const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};
const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE{{System::GFX, "Settings", "MTLUsePresentDrawable"},
false};
const Info<bool> GFX_SW_DUMP_OBJECTS{{System::GFX, "Settings", "SWDumpObjects"}, false}; const Info<bool> GFX_SW_DUMP_OBJECTS{{System::GFX, "Settings", "SWDumpObjects"}, false};
const Info<bool> GFX_SW_DUMP_TEV_STAGES{{System::GFX, "Settings", "SWDumpTevStages"}, false}; const Info<bool> GFX_SW_DUMP_TEV_STAGES{{System::GFX, "Settings", "SWDumpTevStages"}, false};
const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES{{System::GFX, "Settings", "SWDumpTevTexFetches"}, const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES{{System::GFX, "Settings", "SWDumpTevTexFetches"},

View File

@ -11,6 +11,7 @@ enum class AspectMode : int;
enum class ShaderCompilationMode : int; enum class ShaderCompilationMode : int;
enum class StereoMode : int; enum class StereoMode : int;
enum class FreelookControlType : int; enum class FreelookControlType : int;
enum class TriState : int;
namespace Config namespace Config
{ {
@ -75,6 +76,9 @@ extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE; extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION; extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION;
extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE;
extern const Info<bool> GFX_SW_DUMP_OBJECTS; extern const Info<bool> GFX_SW_DUMP_OBJECTS;
extern const Info<bool> GFX_SW_DUMP_TEV_STAGES; extern const Info<bool> GFX_SW_DUMP_TEV_STAGES;
extern const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES; extern const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES;

View File

@ -39,3 +39,5 @@ PRIVATE
${METAL_LIBRARY} ${METAL_LIBRARY}
${QUARTZCORE_LIBRARY} ${QUARTZCORE_LIBRARY}
) )
target_compile_options(videometal PRIVATE -fno-objc-arc)

View File

@ -36,6 +36,7 @@ std::vector<BBoxType> Metal::BoundingBox::Read(u32 index, u32 length)
{ {
g_state_tracker->EndRenderPass(); g_state_tracker->EndRenderPass();
g_state_tracker->FlushEncoders(); g_state_tracker->FlushEncoders();
g_state_tracker->NotifyOfCPUGPUSync();
g_state_tracker->WaitForFlushedEncoders(); g_state_tracker->WaitForFlushedEncoders();
return std::vector<BBoxType>(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length); return std::vector<BBoxType>(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length);
} }

View File

@ -56,6 +56,7 @@ void Metal::PerfQuery::FlushResults()
// There's a possibility that some active performance queries are unflushed // There's a possibility that some active performance queries are unflushed
g_state_tracker->FlushEncoders(); g_state_tracker->FlushEncoders();
g_state_tracker->NotifyOfCPUGPUSync();
std::unique_lock<std::mutex> lock(m_results_mtx); std::unique_lock<std::mutex> lock(m_results_mtx);
while (!IsFlushed()) while (!IsFlushed())

View File

@ -20,6 +20,7 @@ Metal::Renderer::Renderer(MRCOwned<CAMetalLayer*> layer, int width, int height,
m_layer(std::move(layer)) m_layer(std::move(layer))
{ {
UpdateActiveConfig(); UpdateActiveConfig();
[m_layer setDisplaySyncEnabled:g_ActiveConfig.bVSyncActive];
} }
Metal::Renderer::~Renderer() = default; Metal::Renderer::~Renderer() = default;
@ -454,8 +455,15 @@ void Metal::Renderer::PresentBackbuffer()
g_state_tracker->EndRenderPass(); g_state_tracker->EndRenderPass();
if (m_drawable) if (m_drawable)
{ {
[g_state_tracker->GetRenderCmdBuf() // PresentDrawable refuses to allow Dolphin to present faster than the display's refresh rate
addScheduledHandler:[drawable = std::move(m_drawable)](id) { [drawable present]; }]; // when windowed (or fullscreen with vsync enabled, but that's more understandable).
// On the other hand, it helps Xcode's GPU captures start and stop on frame boundaries
// which is convenient. Put it here as a default-off config, which we can override in Xcode.
if (g_ActiveConfig.bUsePresentDrawable)
[g_state_tracker->GetRenderCmdBuf() presentDrawable:m_drawable];
else
[g_state_tracker->GetRenderCmdBuf()
addScheduledHandler:[drawable = std::move(m_drawable)](id) { [drawable present]; }];
m_bb_texture->SetMTLTexture(nullptr); m_bb_texture->SetMTLTexture(nullptr);
m_drawable = nullptr; m_drawable = nullptr;
} }

View File

@ -34,7 +34,6 @@ public:
Uniform, Uniform,
Vertex, Vertex,
Index, Index,
TextureData,
Texels, Texels,
Last = Texels Last = Texels
}; };
@ -75,6 +74,14 @@ public:
return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire); return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire);
} }
void ReloadSamplers(); void ReloadSamplers();
void NotifyOfCPUGPUSync()
{
if (!g_features.manual_buffer_upload || !m_manual_buffer_upload)
return;
if (m_upload_cmdbuf || m_current_render_cmdbuf)
return;
SetManualBufferUpload(false);
}
void SetPipeline(const Pipeline* pipe); void SetPipeline(const Pipeline* pipe);
void SetPipeline(const ComputePipeline* pipe); void SetPipeline(const ComputePipeline* pipe);
@ -106,6 +113,7 @@ public:
{ {
return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align); return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align);
} }
Map AllocateForTextureUpload(size_t amt);
Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align) Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align)
{ {
Preallocate(buffer_idx, amt); Preallocate(buffer_idx, amt);
@ -119,6 +127,7 @@ public:
static_cast<size_t>(align)) == 0); static_cast<size_t>(align)) == 0);
return CommitPreallocation(buffer_idx, Align(amt, align)); return CommitPreallocation(buffer_idx, Align(amt, align));
} }
id<MTLBlitCommandEncoder> GetUploadEncoder();
id<MTLBlitCommandEncoder> GetTextureUploadEncoder(); id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
id<MTLCommandBuffer> GetRenderCmdBuf(); id<MTLCommandBuffer> GetRenderCmdBuf();
@ -142,18 +151,28 @@ private:
void Reset(size_t new_size); void Reset(size_t new_size);
}; };
struct Buffer struct CPUBuffer
{ {
UsageTracker usage; UsageTracker usage;
MRCOwned<id<MTLBuffer>> mtlbuffer; MRCOwned<id<MTLBuffer>> mtlbuffer;
void* buffer = nullptr; void* buffer = nullptr;
}; };
struct BufferPair
{
UsageTracker usage;
MRCOwned<id<MTLBuffer>> cpubuffer;
MRCOwned<id<MTLBuffer>> gpubuffer;
void* buffer = nullptr;
size_t last_upload = 0;
};
struct Backref; struct Backref;
struct PerfQueryTracker; struct PerfQueryTracker;
std::shared_ptr<Backref> m_backref; std::shared_ptr<Backref> m_backref;
std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache; std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
MRCOwned<id<MTLFence>> m_fence;
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf; MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder; MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf; MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
@ -165,7 +184,8 @@ private:
MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3]; MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3];
MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc; MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc;
Framebuffer* m_current_framebuffer; Framebuffer* m_current_framebuffer;
Buffer m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1]; CPUBuffer m_texture_upload_buffer;
BufferPair m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
u64 m_current_draw = 1; u64 m_current_draw = 1;
std::atomic<u64> m_last_finished_draw{0}; std::atomic<u64> m_last_finished_draw{0};
@ -250,9 +270,12 @@ private:
} m_state; } m_state;
u32 m_perf_query_tracker_counter = 0; u32 m_perf_query_tracker_counter = 0;
bool m_manual_buffer_upload = false;
void SetManualBufferUpload(bool enable);
std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker(); std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
void SetSamplerForce(u32 idx, const SamplerState& sampler); void SetSamplerForce(u32 idx, const SamplerState& sampler);
void Sync(BufferPair& buffer);
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt); Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
void CheckViewport(); void CheckViewport();
void CheckScissor(); void CheckScissor();

View File

@ -45,12 +45,11 @@ static NSString* GetName(Metal::StateTracker::UploadBuffer buffer)
// clang-format off // clang-format off
switch (buffer) switch (buffer)
{ {
case Metal::StateTracker::UploadBuffer::TextureData: return @"Texture Data"; case Metal::StateTracker::UploadBuffer::Texels: return @"Texels";
case Metal::StateTracker::UploadBuffer::Texels: return @"Texels"; case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices";
case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices"; case Metal::StateTracker::UploadBuffer::Index: return @"Indices";
case Metal::StateTracker::UploadBuffer::Index: return @"Indices"; case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms";
case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms"; case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload";
case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload";
} }
// clang-format on // clang-format on
} }
@ -105,6 +104,7 @@ void Metal::StateTracker::UsageTracker::Reset(size_t new_size)
Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this)) Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this))
{ {
m_flags.should_apply_label = true; m_flags.should_apply_label = true;
m_fence = MRCTransfer([g_device newFence]);
for (MRCOwned<MTLRenderPassDescriptor*>& rpdesc : m_render_pass_desc) for (MRCOwned<MTLRenderPassDescriptor*>& rpdesc : m_render_pass_desc)
{ {
rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
@ -141,9 +141,10 @@ Metal::StateTracker::~StateTracker()
// MARK: BufferPair Ops // MARK: BufferPair Ops
std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt)
{ {
Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)]; amt = (amt + 15) & ~15ull;
CPUBuffer& buffer = m_texture_upload_buffer;
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
if (__builtin_expect(needs_new, false)) if (__builtin_expect(needs_new, false))
@ -155,11 +156,61 @@ std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_id
MTLResourceOptions options = MTLResourceOptions options =
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.mtlbuffer setLabel:GetName(buffer_idx)]; [buffer.mtlbuffer setLabel:@"Texture Upload Buffer"];
ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)"); ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)");
buffer.buffer = [buffer.mtlbuffer contents]; buffer.buffer = [buffer.mtlbuffer contents];
buffer.usage.Reset(newsize); buffer.usage.Reset(newsize);
} }
size_t pos = buffer.usage.Allocate(m_current_draw, amt);
Map ret = {buffer.mtlbuffer, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
DEBUG_ASSERT(pos <= buffer.usage.Size() &&
"Previous code should have guaranteed there was enough space");
return ret;
}
std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
{
BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
size_t base_pos = buffer.usage.Pos();
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
bool needs_upload = needs_new || buffer.usage.Pos() == 0;
if (m_manual_buffer_upload && needs_upload)
{
if (base_pos != buffer.last_upload)
{
id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
[encoder copyFromBuffer:buffer.cpubuffer
sourceOffset:buffer.last_upload
toBuffer:buffer.gpubuffer
destinationOffset:buffer.last_upload
size:base_pos - buffer.last_upload];
}
buffer.last_upload = 0;
}
if (__builtin_expect(needs_new, false))
{
// Orphan buffer
size_t newsize = std::max<size_t>(buffer.usage.Size() * 2, 4096);
while (newsize < amt)
newsize *= 2;
MTLResourceOptions options =
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.cpubuffer setLabel:GetName(buffer_idx)];
ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
buffer.buffer = [buffer.cpubuffer contents];
buffer.usage.Reset(newsize);
if (g_features.manual_buffer_upload)
{
options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked;
buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.gpubuffer setLabel:GetName(buffer_idx)];
ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
}
}
size_t pos = buffer.usage.Pos(); size_t pos = buffer.usage.Pos();
return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos); return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos);
} }
@ -167,17 +218,46 @@ std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_id
Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx, Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx,
size_t amt) size_t amt)
{ {
Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)]; BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
size_t pos = buffer.usage.Allocate(m_current_draw, amt); size_t pos = buffer.usage.Allocate(m_current_draw, amt);
Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos}; Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
ret.gpu_buffer = buffer.mtlbuffer; ret.gpu_buffer = m_manual_buffer_upload ? buffer.gpubuffer : buffer.cpubuffer;
DEBUG_ASSERT(pos <= buffer.usage.Size() && DEBUG_ASSERT(pos <= buffer.usage.Size() &&
"Previous code should have guaranteed there was enough space"); "Previous code should have guaranteed there was enough space");
return ret; return ret;
} }
void Metal::StateTracker::Sync(BufferPair& buffer)
{
if (!m_manual_buffer_upload || buffer.usage.Pos() == buffer.last_upload)
return;
id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
[encoder copyFromBuffer:buffer.cpubuffer
sourceOffset:buffer.last_upload
toBuffer:buffer.gpubuffer
destinationOffset:buffer.last_upload
size:buffer.usage.Pos() - buffer.last_upload];
buffer.last_upload = buffer.usage.Pos();
}
// MARK: Render Pass / Encoder Management // MARK: Render Pass / Encoder Management
id<MTLBlitCommandEncoder> Metal::StateTracker::GetUploadEncoder()
{
if (!m_upload_cmdbuf)
{
@autoreleasepool
{
m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]);
[m_upload_cmdbuf setLabel:@"Vertex Upload"];
m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]);
[m_upload_encoder setLabel:@"Vertex Upload"];
}
}
return m_upload_encoder;
}
id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder() id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder()
{ {
if (!m_texture_upload_cmdbuf) if (!m_texture_upload_cmdbuf)
@ -270,6 +350,8 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]); MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
if (m_current_perf_query) if (m_current_perf_query)
[descriptor setVisibilityResultBuffer:nil]; [descriptor setVisibilityResultBuffer:nil];
if (m_manual_buffer_upload)
[m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex];
AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment(); AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
if (!attachment) if (!attachment)
attachment = m_current_framebuffer->GetDepthAttachment(); attachment = m_current_framebuffer->GetDepthAttachment();
@ -299,6 +381,8 @@ void Metal::StateTracker::BeginComputePass()
EndRenderPass(); EndRenderPass();
m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]); m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]);
[m_current_compute_encoder setLabel:@"Compute"]; [m_current_compute_encoder setLabel:@"Compute"];
if (m_manual_buffer_upload)
[m_current_compute_encoder waitForFence:m_fence];
m_flags.NewEncoder(); m_flags.NewEncoder();
m_dirty_samplers = 0xff; m_dirty_samplers = 0xff;
m_dirty_textures = 0xff; m_dirty_textures = 0xff;
@ -326,6 +410,20 @@ void Metal::StateTracker::FlushEncoders()
if (!m_current_render_cmdbuf) if (!m_current_render_cmdbuf)
return; return;
EndRenderPass(); EndRenderPass();
for (int i = 0; i <= static_cast<int>(UploadBuffer::Last); ++i)
Sync(m_upload_buffers[i]);
if (!m_manual_buffer_upload)
{
ASSERT(!m_upload_cmdbuf && "Should never be used!");
}
else if (m_upload_cmdbuf)
{
[m_upload_encoder updateFence:m_fence];
[m_upload_encoder endEncoding];
[m_upload_cmdbuf commit];
m_upload_encoder = nullptr;
m_upload_cmdbuf = nullptr;
}
if (m_texture_upload_cmdbuf) if (m_texture_upload_cmdbuf)
{ {
[m_texture_upload_encoder endEncoding]; [m_texture_upload_encoder endEncoding];
@ -355,6 +453,8 @@ void Metal::StateTracker::FlushEncoders()
m_last_render_cmdbuf = std::move(m_current_render_cmdbuf); m_last_render_cmdbuf = std::move(m_current_render_cmdbuf);
m_current_render_cmdbuf = nullptr; m_current_render_cmdbuf = nullptr;
m_current_draw++; m_current_draw++;
if (g_features.manual_buffer_upload && !m_manual_buffer_upload)
SetManualBufferUpload(true);
} }
void Metal::StateTracker::WaitForFlushedEncoders() void Metal::StateTracker::WaitForFlushedEncoders()
@ -368,6 +468,23 @@ void Metal::StateTracker::ReloadSamplers()
m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]); m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]);
} }
void Metal::StateTracker::SetManualBufferUpload(bool enabled)
{
// When a game does something that needs CPU-GPU sync (e.g. bbox, texture download, etc),
// the next command buffer will be done with manual buffer upload disabled,
// since overlapping the upload with the previous draw won't be possible (due to sync).
// This greatly improves performance in heavy bbox games like Super Paper Mario.
m_manual_buffer_upload = enabled;
if (enabled)
{
for (BufferPair& buffer : m_upload_buffers)
{
// Update sync positions, since Sync doesn't do it when manual buffer upload is off
buffer.last_upload = buffer.usage.Pos();
}
}
}
// MARK: State Setters // MARK: State Setters
void Metal::StateTracker::SetPipeline(const Pipeline* pipe) void Metal::StateTracker::SetPipeline(const Pipeline* pipe)

View File

@ -6,6 +6,7 @@
#include "Common/Align.h" #include "Common/Align.h"
#include "Common/Assert.h" #include "Common/Assert.h"
#include "VideoBackends/Metal/MTLRenderer.h"
#include "VideoBackends/Metal/MTLStateTracker.h" #include "VideoBackends/Metal/MTLStateTracker.h"
Metal::Texture::Texture(MRCOwned<id<MTLTexture>> tex, const TextureConfig& config) Metal::Texture::Texture(MRCOwned<id<MTLTexture>> tex, const TextureConfig& config)
@ -50,6 +51,10 @@ void Metal::Texture::ResolveFromTexture(const AbstractTexture* src,
g_state_tracker->ResolveTexture(src_tex, m_tex, layer, level); g_state_tracker->ResolveTexture(src_tex, m_tex, layer, level);
} }
// Use a temporary texture for large texture loads
// (Since the main upload buffer doesn't shrink after it grows)
static constexpr u32 STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4;
void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, // void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, //
const u8* buffer, size_t buffer_size) const u8* buffer, size_t buffer_size)
{ {
@ -59,8 +64,23 @@ void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, //
const u32 num_rows = Common::AlignUp(height, block_size) / block_size; const u32 num_rows = Common::AlignUp(height, block_size) / block_size;
const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length); const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length);
const u32 upload_size = source_pitch * num_rows; const u32 upload_size = source_pitch * num_rows;
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::TextureData, MRCOwned<id<MTLBuffer>> tmp_buffer;
upload_size, StateTracker::AlignMask::Other); StateTracker::Map map;
if (upload_size > STAGING_TEXTURE_UPLOAD_THRESHOLD)
{
tmp_buffer = MRCTransfer([g_device
newBufferWithLength:upload_size
options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined]);
[tmp_buffer setLabel:@"Temp Texture Upload"];
map.gpu_buffer = tmp_buffer;
map.gpu_offset = 0;
map.cpu_buffer = [tmp_buffer contents];
}
else
{
map = g_state_tracker->AllocateForTextureUpload(upload_size);
}
memcpy(map.cpu_buffer, buffer, upload_size); memcpy(map.cpu_buffer, buffer, upload_size);
id<MTLBlitCommandEncoder> encoder = g_state_tracker->GetTextureUploadEncoder(); id<MTLBlitCommandEncoder> encoder = g_state_tracker->GetTextureUploadEncoder();
[encoder copyFromBuffer:map.gpu_buffer [encoder copyFromBuffer:map.gpu_buffer
@ -163,6 +183,7 @@ void Metal::StagingTexture::Flush()
{ {
// Flush while we wait, since who knows how long we'll be sitting here // Flush while we wait, since who knows how long we'll be sitting here
g_state_tracker->FlushEncoders(); g_state_tracker->FlushEncoders();
g_state_tracker->NotifyOfCPUGPUSync();
[m_wait_buffer waitUntilCompleted]; [m_wait_buffer waitUntilCompleted];
} }
m_wait_buffer = nullptr; m_wait_buffer = nullptr;

View File

@ -16,6 +16,10 @@ namespace Metal
{ {
struct DeviceFeatures struct DeviceFeatures
{ {
/// Manually copy buffer data to the GPU (instead of letting the GPU read from system memory)
/// On discrete GPUs, this tends to be faster if the copy is able to operate in parallel with a
/// previous render. This is the case unless a game uses features like bbox or texture downloads.
bool manual_buffer_upload;
bool subgroup_ops; bool subgroup_ops;
}; };

View File

@ -217,6 +217,27 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id<MTLDevice>
config->backend_info.AAModes.push_back(i); config->backend_info.AAModes.push_back(i);
} }
switch (config->iManuallyUploadBuffers)
{
case TriState::Off:
g_features.manual_buffer_upload = false;
break;
case TriState::On:
g_features.manual_buffer_upload = true;
break;
case TriState::Auto:
#if TARGET_OS_OSX
g_features.manual_buffer_upload = false;
if (@available(macOS 10.15, *))
if (![device hasUnifiedMemory])
g_features.manual_buffer_upload = true;
#else
// All iOS devices have unified memory
g_features.manual_buffer_upload = false;
#endif
break;
}
g_features.subgroup_ops = false; g_features.subgroup_ops = false;
if (@available(macOS 10.15, iOS 13, *)) if (@available(macOS 10.15, iOS 13, *))
{ {
@ -225,7 +246,7 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id<MTLDevice>
[device supportsFamily:MTLGPUFamilyMac2] || [device supportsFamily:MTLGPUFamilyApple6]; [device supportsFamily:MTLGPUFamilyMac2] || [device supportsFamily:MTLGPUFamilyApple6];
config->backend_info.bSupportsFramebufferFetch = [device supportsFamily:MTLGPUFamilyApple1]; config->backend_info.bSupportsFramebufferFetch = [device supportsFamily:MTLGPUFamilyApple1];
} }
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_SUBGROUP_INVOCATION_ID)) if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_SUBGROUP_OPS))
g_features.subgroup_ops = false; g_features.subgroup_ops = false;
#if TARGET_OS_OSX #if TARGET_OS_OSX
if (@available(macOS 11, *)) if (@available(macOS 11, *))
@ -378,6 +399,12 @@ static const std::string_view MSL_HEADER =
// These are usually when the compiler doesn't think a switch is exhaustive // These are usually when the compiler doesn't think a switch is exhaustive
"#pragma clang diagnostic ignored \"-Wreturn-type\"\n"; "#pragma clang diagnostic ignored \"-Wreturn-type\"\n";
static constexpr std::pair<std::string_view, std::string_view> MSL_FIXUPS[] = {
// Force-unroll the lighting loop in ubershaders, which greatly reduces register pressure on AMD
{"for (uint chan = 0u; chan < 2u; chan++)",
"_Pragma(\"unroll\") for (uint chan = 0u; chan < 2u; chan++)"},
};
static constexpr spirv_cross::MSLResourceBinding static constexpr spirv_cross::MSLResourceBinding
MakeResourceBinding(spv::ExecutionModel stage, u32 set, u32 binding, // MakeResourceBinding(spv::ExecutionModel stage, u32 set, u32 binding, //
u32 msl_buffer, u32 msl_texture, u32 msl_sampler) u32 msl_buffer, u32 msl_texture, u32 msl_sampler)
@ -474,7 +501,27 @@ std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
for (auto& binding : resource_bindings) for (auto& binding : resource_bindings)
compiler.add_msl_resource_binding(binding); compiler.add_msl_resource_binding(binding);
std::string msl(MSL_HEADER); std::string output(MSL_HEADER);
msl += compiler.compile(); std::string compiled = compiler.compile();
return msl; std::string_view remaining = compiled;
while (!remaining.empty())
{
// Apply fixups
std::string_view piece = remaining;
std::string_view fixup_piece = {};
size_t next = piece.size();
for (const auto& fixup : MSL_FIXUPS)
{
size_t found = piece.find(fixup.first);
if (found == std::string_view::npos)
continue;
piece = piece.substr(0, found);
fixup_piece = fixup.second;
next = found + fixup.first.size();
}
output += piece;
output += fixup_piece;
remaining = remaining.substr(next);
}
return output;
} }

View File

@ -918,7 +918,7 @@ void VulkanContext::PopulateShaderSubgroupSupport()
m_supports_shader_subgroup_operations = m_supports_shader_subgroup_operations =
(subgroup_properties.supportedOperations & required_operations) == required_operations && (subgroup_properties.supportedOperations & required_operations) == required_operations &&
subgroup_properties.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT && subgroup_properties.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT &&
!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_SUBGROUP_INVOCATION_ID); !DriverDetails::HasBug(DriverDetails::BUG_BROKEN_SUBGROUP_OPS);
} }
bool VulkanContext::SupportsExclusiveFullscreen(const WindowSystemInfo& wsi, VkSurfaceKHR surface) bool VulkanContext::SupportsExclusiveFullscreen(const WindowSystemInfo& wsi, VkSurfaceKHR surface)

View File

@ -132,10 +132,14 @@ constexpr BugInfo m_known_bugs[] = {
-1.0, -1.0, true}, -1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKEN_VECTOR_BITWISE_AND, {API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_BROKEN_VECTOR_BITWISE_AND,
-1.0, -1.0, true}, -1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_ATI, DRIVER_PORTABILITY, Family::UNKNOWN, {API_VULKAN, OS_OSX, VENDOR_ATI, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_SUBGROUP_OPS,
BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true}, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_ATI, DRIVER_APPLE, Family::UNKNOWN, {API_VULKAN, OS_OSX, VENDOR_INTEL, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_SUBGROUP_OPS,
BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true}, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_ATI, DRIVER_APPLE, Family::UNKNOWN, BUG_BROKEN_SUBGROUP_OPS, -1.0,
-1.0, true},
{API_METAL, OS_OSX, VENDOR_INTEL, DRIVER_APPLE, Family::UNKNOWN, BUG_BROKEN_SUBGROUP_OPS, -1.0,
-1.0, true},
{API_OPENGL, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, {API_OPENGL, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN,
BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true}, BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true},
{API_VULKAN, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, {API_VULKAN, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN,

View File

@ -306,10 +306,15 @@ enum Bug
BUG_BROKEN_VECTOR_BITWISE_AND, BUG_BROKEN_VECTOR_BITWISE_AND,
// BUG: Accessing gl_SubgroupInvocationID causes the Metal shader compiler to crash. // BUG: Accessing gl_SubgroupInvocationID causes the Metal shader compiler to crash.
// Affected devices: AMD (macOS) // Affected devices: AMD (older macOS)
// BUG: gl_HelperInvocation always returns true, even for non-helper invocations
// Affected devices: AMD (newer macOS)
// BUG: Using subgroupMax in a shader that can discard results in garbage data
// (For some reason, this only happens at 4x+ IR on Metal, but 2x+ IR on MoltenVK)
// Affected devices: Intel (macOS)
// Started version: -1 // Started version: -1
// Ended version: -1 // Ended version: -1
BUG_BROKEN_SUBGROUP_INVOCATION_ID, BUG_BROKEN_SUBGROUP_OPS,
// BUG: Multi-threaded shader pre-compilation sometimes crashes // BUG: Multi-threaded shader pre-compilation sometimes crashes
// Used primarily in Videoconfig.cpp's GetNumAutoShaderPreCompilerThreads() // Used primarily in Videoconfig.cpp's GetNumAutoShaderPreCompilerThreads()

View File

@ -55,6 +55,8 @@ void VideoConfig::Refresh()
bVSync = Config::Get(Config::GFX_VSYNC); bVSync = Config::Get(Config::GFX_VSYNC);
iAdapter = Config::Get(Config::GFX_ADAPTER); iAdapter = Config::Get(Config::GFX_ADAPTER);
iManuallyUploadBuffers = Config::Get(Config::GFX_MTL_MANUALLY_UPLOAD_BUFFERS);
bUsePresentDrawable = Config::Get(Config::GFX_MTL_USE_PRESENT_DRAWABLE);
bWidescreenHack = Config::Get(Config::GFX_WIDESCREEN_HACK); bWidescreenHack = Config::Get(Config::GFX_WIDESCREEN_HACK);
aspect_mode = Config::Get(Config::GFX_ASPECT_RATIO); aspect_mode = Config::Get(Config::GFX_ASPECT_RATIO);

View File

@ -45,6 +45,13 @@ enum class ShaderCompilationMode : int
AsynchronousSkipRendering AsynchronousSkipRendering
}; };
enum class TriState : int
{
Off,
On,
Auto
};
// NEVER inherit from this class. // NEVER inherit from this class.
struct VideoConfig final struct VideoConfig final
{ {
@ -149,6 +156,10 @@ struct VideoConfig final
// D3D only config, mostly to be merged into the above // D3D only config, mostly to be merged into the above
int iAdapter = 0; int iAdapter = 0;
// Metal only config
TriState iManuallyUploadBuffers = TriState::Auto;
bool bUsePresentDrawable = false;
// Enable API validation layers, currently only supported with Vulkan. // Enable API validation layers, currently only supported with Vulkan.
bool bEnableValidationLayer = false; bool bEnableValidationLayer = false;