diff --git a/Source/Core/Common/CPUDetect.h b/Source/Core/Common/CPUDetect.h index be807e6fef..db377a2006 100644 --- a/Source/Core/Common/CPUDetect.h +++ b/Source/Core/Common/CPUDetect.h @@ -26,7 +26,6 @@ struct CPUInfo bool HTT = false; int num_cores = 0; - int logical_cpu_count = 0; bool bSSE = false; bool bSSE2 = false; diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp index 2cac9fb640..644d485ed8 100644 --- a/Source/Core/Common/x64CPUDetect.cpp +++ b/Source/Core/Common/x64CPUDetect.cpp @@ -5,6 +5,7 @@ #include #include +#include #include "Common/CommonTypes.h" #include "Common/Intrinsics.h" @@ -107,7 +108,6 @@ void CPUInfo::Detect() // Detect family and other misc stuff. bool ht = false; HTT = ht; - logical_cpu_count = 1; if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); @@ -121,9 +121,13 @@ void CPUInfo::Detect() // Detect AMD Zen1, Zen1+ and Zen2 if (family == 23) bZen1p2 = true; - logical_cpu_count = (cpu_id[1] >> 16) & 0xFF; ht = (cpu_id[3] >> 28) & 1; + // AMD CPUs before Zen faked this flag and didn't actually + // implement simultaneous multithreading (SMT; Intel calls it HTT) + // but rather some weird middle-ground between 1-2 cores + HTT = ht && (vendor == CPUVendor::Intel || family >= 23); + if ((cpu_id[3] >> 25) & 1) bSSE = true; if ((cpu_id[3] >> 26) & 1) @@ -201,35 +205,10 @@ void CPUInfo::Detect() bLongMode = true; } - num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count; - - if (max_ex_fn >= 0x80000008) - { - // Get number of cores. This is a bit complicated. Following AMD manual here. - __cpuid(cpu_id, 0x80000008); - int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF; - if (apic_id_core_id_size == 0) - { - if (ht) - { - // New mechanism for modern Intel CPUs. - if (vendor == CPUVendor::Intel) - { - __cpuidex(cpu_id, 0x00000004, 0x00000000); - int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1; - HTT = (cores_x_package < logical_cpu_count); - cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1; - num_cores = (cores_x_package > 1) ? cores_x_package : num_cores; - logical_cpu_count /= cores_x_package; - } - } - } - else - { - // Use AMD's new method. - num_cores = (cpu_id[2] & 0xFF) + 1; - } - } + // this should be much more reliable and easier + // than trying to get the number of cores out of the CPUID data + // ourselves + num_cores = std::max(std::thread::hardware_concurrency(), 1u); } // Turn the CPU info into a string we can show diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index b10a20d918..1015bd47aa 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -86,7 +86,7 @@ const Info GFX_SHADER_COMPILATION_MODE{ {System::GFX, "Settings", "ShaderCompilationMode"}, ShaderCompilationMode::Synchronous}; const Info GFX_SHADER_COMPILER_THREADS{{System::GFX, "Settings", "ShaderCompilerThreads"}, 1}; const Info GFX_SHADER_PRECOMPILER_THREADS{ - {System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1}; + {System::GFX, "Settings", "ShaderPrecompilerThreads"}, -1}; const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE{ {System::GFX, "Settings", "SaveTextureCacheToState"}, true}; diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index c7f6412e8d..bb48a06500 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -132,6 +132,21 @@ constexpr BugInfo m_known_bugs[] = { -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_ATI, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true}, + // Default cases for broken MT precompilation + // Default cases get replaced by known-good places during init + {API_OPENGL, OS_ALL, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true}, + {API_VULKAN, OS_ALL, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true}, + // known good cases for broken MT precompilation + {API_OPENGL, OS_OSX, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, + {API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, + {API_OPENGL, OS_WINDOWS, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, + {API_VULKAN, OS_WINDOWS, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, false}, }; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 5e3f2ce161..a81ed3d6ff 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -314,6 +314,14 @@ enum Bug // Started version: -1 // Ended version: -1 BUG_BROKEN_SUBGROUP_INVOCATION_ID, + + // BUG: Multi-threaded shader pre-compilation sometimes crashes + // Used primarily in Videoconfig.cpp's GetNumAutoShaderPreCompilerThreads() + // refer to https://github.com/dolphin-emu/dolphin/pull/9414 for initial validation coverage + // All untested platforms will report as having this bug as to avoid crashes + // Note that things should highly likely work out fine on D3D + // so we didn't extend the Bug API to also support D3D + BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION }; // Initializes our internal vendor, device family, and driver version diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index a5c9834dc8..a835625691 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -12,6 +12,7 @@ #include "Core/ConfigManager.h" #include "Core/Core.h" #include "Core/Movie.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/VideoCommon.h" @@ -177,6 +178,14 @@ static u32 GetNumAutoShaderCompilerThreads() return static_cast(std::min(std::max(cpu_info.num_cores - 3, 1), 4)); } +static u32 GetNumAutoShaderPreCompilerThreads() +{ + // Automatic number. We use clamp(cpus - 2, 1, infty) here. + // We chose this because we don't want to limit our speed-up + // and at the same time leave two logical cores for the dolphin UI and the rest of the OS. + return static_cast(std::max(cpu_info.num_cores - 2, 1)); +} + u32 VideoConfig::GetShaderCompilerThreads() const { if (!backend_info.bSupportsBackgroundCompiling) @@ -197,8 +206,17 @@ u32 VideoConfig::GetShaderPrecompilerThreads() const if (!backend_info.bSupportsBackgroundCompiling) return 0; + const bool bugDatabaseSupported = + backend_info.api_type == APIType::OpenGL || backend_info.api_type == APIType::Vulkan; + // DirectX has always worked in our tests in PR#9414 + const bool multiThreadingWorking = + !bugDatabaseSupported || + !DriverDetails::HasBug(DriverDetails::BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION); + if (iShaderPrecompilerThreads >= 0) return static_cast(iShaderPrecompilerThreads); + else if (multiThreadingWorking) + return GetNumAutoShaderPreCompilerThreads(); else - return GetNumAutoShaderCompilerThreads(); + return 1; }