From 99f0c3fa01e7efe5cabf65f634b6b7df4eca27b0 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 17 May 2023 20:15:33 -0500 Subject: [PATCH] VideoCommon: Add ability for backends to override bugs --- Source/Core/VideoBackends/Metal/MTLUtil.mm | 176 +++++++++++++++------ Source/Core/VideoCommon/DriverDetails.cpp | 11 ++ Source/Core/VideoCommon/DriverDetails.h | 3 + 3 files changed, 139 insertions(+), 51 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index 987d36cbfc..5870cc5ad5 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -89,6 +89,68 @@ void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, } } +/// For testing driver brokenness +static bool RenderSinglePixel(id dev, id vs, id fs, // + u32 px_in, u32* px_out) +{ + auto pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]); + [pdesc setVertexFunction:vs]; + [pdesc setFragmentFunction:fs]; + [[pdesc colorAttachments][0] setPixelFormat:MTLPixelFormatRGBA8Unorm]; + auto pipe = MRCTransfer([dev newRenderPipelineStateWithDescriptor:pdesc error:nil]); + if (!pipe) + return false; + auto buf = MRCTransfer([dev newBufferWithLength:4 options:MTLResourceStorageModeShared]); + memcpy([buf contents], &px_in, sizeof(px_in)); + auto tdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:1 + height:1 + mipmapped:false]; + [tdesc setUsage:MTLTextureUsageRenderTarget]; + auto tex = MRCTransfer([dev newTextureWithDescriptor:tdesc]); + auto q = MRCTransfer([dev newCommandQueue]); + id cmdbuf = [q commandBuffer]; + + id upload_encoder = [cmdbuf blitCommandEncoder]; + [upload_encoder copyFromBuffer:buf + sourceOffset:0 + sourceBytesPerRow:4 + sourceBytesPerImage:4 + sourceSize:MTLSizeMake(1, 1, 1) + toTexture:tex + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + [upload_encoder endEncoding]; + + auto rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); + [[rpdesc colorAttachments][0] setTexture:tex]; + [[rpdesc colorAttachments][0] setLoadAction:MTLLoadActionLoad]; + [[rpdesc colorAttachments][0] setStoreAction:MTLStoreActionStore]; + id renc = [cmdbuf renderCommandEncoderWithDescriptor:rpdesc]; + [renc setRenderPipelineState:pipe]; + [renc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; + [renc endEncoding]; + + id download_encoder = [cmdbuf blitCommandEncoder]; + [download_encoder copyFromTexture:tex + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(1, 1, 1) + toBuffer:buf + destinationOffset:0 + destinationBytesPerRow:4 + destinationBytesPerImage:4]; + [download_encoder endEncoding]; + + [cmdbuf commit]; + [cmdbuf waitUntilCompleted]; + + memcpy(px_out, [buf contents], sizeof(*px_out)); + return [cmdbuf status] == MTLCommandBufferStatusCompleted; +} + static bool DetectIntelGPUFBFetch(id dev) { // Even though it's nowhere in the feature set tables, some Intel GPUs support fbfetch! @@ -111,58 +173,14 @@ fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) { error:nil]); if (!lib) return false; - auto pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]); - [pdesc setVertexFunction:MRCTransfer([lib newFunctionWithName:@"fs_triangle"])]; - [pdesc setFragmentFunction:MRCTransfer([lib newFunctionWithName:@"fbfetch_test"])]; - [[pdesc colorAttachments][0] setPixelFormat:MTLPixelFormatRGBA8Unorm]; - auto pipe = MRCTransfer([dev newRenderPipelineStateWithDescriptor:pdesc error:nil]); - if (!pipe) - return false; - auto buf = MRCTransfer([dev newBufferWithLength:4 options:MTLResourceStorageModeShared]); - auto tdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm - width:1 - height:1 - mipmapped:false]; - [tdesc setUsage:MTLTextureUsageRenderTarget]; - auto tex = MRCTransfer([dev newTextureWithDescriptor:tdesc]); - auto q = MRCTransfer([dev newCommandQueue]); - u32 px = 0x11223344; - memcpy([buf contents], &px, 4); - id cmdbuf = [q commandBuffer]; - id upload_encoder = [cmdbuf blitCommandEncoder]; - [upload_encoder copyFromBuffer:buf - sourceOffset:0 - sourceBytesPerRow:4 - sourceBytesPerImage:4 - sourceSize:MTLSizeMake(1, 1, 1) - toTexture:tex - destinationSlice:0 - destinationLevel:0 - destinationOrigin:MTLOriginMake(0, 0, 0)]; - [upload_encoder endEncoding]; - auto rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); - [[rpdesc colorAttachments][0] setTexture:tex]; - [[rpdesc colorAttachments][0] setLoadAction:MTLLoadActionLoad]; - [[rpdesc colorAttachments][0] setStoreAction:MTLStoreActionStore]; - id renc = [cmdbuf renderCommandEncoderWithDescriptor:rpdesc]; - [renc setRenderPipelineState:pipe]; - [renc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; - [renc endEncoding]; - id download_encoder = [cmdbuf blitCommandEncoder]; - [download_encoder copyFromTexture:tex - sourceSlice:0 - sourceLevel:0 - sourceOrigin:MTLOriginMake(0, 0, 0) - sourceSize:MTLSizeMake(1, 1, 1) - toBuffer:buf - destinationOffset:0 - destinationBytesPerRow:4 - destinationBytesPerImage:4]; - [download_encoder endEncoding]; - [cmdbuf commit]; - [cmdbuf waitUntilCompleted]; u32 outpx; - memcpy(&outpx, [buf contents], 4); + bool ok = RenderSinglePixel(dev, // + MRCTransfer([lib newFunctionWithName:@"fs_triangle"]), // + MRCTransfer([lib newFunctionWithName:@"fbfetch_test"]), // + 0x11223344, &outpx); + if (!ok) + return false; + // Proper fbfetch will double contents, Haswell will return black, and Broadwell will do nothing if (outpx == 0x22446688) return true; // Skylake+ @@ -172,6 +190,52 @@ fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) { return false; // Haswell } +enum class DetectionResult +{ + Yes, + No, + Unsure +}; + +static DetectionResult DetectInvertedIsHelper(id dev) +{ + static constexpr const char* shader = R"( +vertex float4 fs_triangle(uint vid [[vertex_id]]) { + return float4(vid & 1 ? 3 : -1, vid & 2 ? 3 : -1, 0, 1); +} +fragment float4 is_helper_test() { + float val = metal::simd_is_helper_thread() ? 1 : 0.5; + return float4(val, metal::dfdx(val) + 0.5, metal::dfdy(val) + 0.5, 0); +} +)"; + + auto lib = MRCTransfer([dev newLibraryWithSource:[NSString stringWithUTF8String:shader] + options:nil + error:nil]); + if (!lib) + return DetectionResult::Unsure; + + u32 outpx; + bool ok = RenderSinglePixel(dev, // + MRCTransfer([lib newFunctionWithName:@"fs_triangle"]), // + MRCTransfer([lib newFunctionWithName:@"is_helper_test"]), // + 0, &outpx); + + // The pixel itself should not be a helper thread (0.5) + // The pixels to its right and below should be helper threads (1.0) + // Correctly working would therefore be 0.5 for the pixel and (0.5 + 0.5) for the derivatives + // Inverted would be 1.0 for the pixel and (-0.5 + 0.5) for the derivatives + if (!ok) + return DetectionResult::Unsure; + if (outpx == 0xffff80) + return DetectionResult::No; // Working correctly + if (outpx == 0x0000ff) + return DetectionResult::Yes; // Inverted + WARN_LOG_FMT(VIDEO, "metal::simd_is_helper_thread might be broken! Test shader returned {:06x}!", + outpx); + return DetectionResult::Unsure; +} + void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id device) { // Initialize DriverDetails first so we can use it later @@ -245,6 +309,16 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id [device supportsFamily:MTLGPUFamilyMac2] || [device supportsFamily:MTLGPUFamilyApple6]; config->backend_info.bSupportsFramebufferFetch = [device supportsFamily:MTLGPUFamilyApple1]; } + if (g_features.subgroup_ops) + { + DetectionResult result = DetectInvertedIsHelper(device); + if (result != DetectionResult::Unsure) + { + bool is_helper_inverted = result == DetectionResult::Yes; + if (is_helper_inverted != DriverDetails::HasBug(DriverDetails::BUG_INVERTED_IS_HELPER)) + DriverDetails::OverrideBug(DriverDetails::BUG_INVERTED_IS_HELPER, is_helper_inverted); + } + } #if TARGET_OS_OSX if (@available(macOS 11, *)) if (vendor == DriverDetails::VENDOR_INTEL) diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index 351a673414..14594a489c 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -217,4 +217,15 @@ bool HasBug(Bug bug) return false; return it->second.m_hasbug; } + +void OverrideBug(Bug bug, bool new_value) +{ + const auto [it, added] = m_bugs.try_emplace( + bug, BugInfo{m_api, m_os, m_vendor, m_driver, m_family, bug, -1, -1, false}); + if (it->second.m_hasbug != new_value) + { + // TODO: Report to DolphinAnalytics? + it->second.m_hasbug = new_value; + } +} } // namespace DriverDetails diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 64ff4b89af..5b0f2bfcea 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -350,4 +350,7 @@ void Init(API api, Vendor vendor, Driver driver, const double version, const Fam // Once Vendor and driver version is set, this will return if it has the applicable bug passed to // it. bool HasBug(Bug bug); + +// Overrides the current state of a bug +void OverrideBug(Bug bug, bool new_value); } // namespace DriverDetails