From 7f57e27a6502e6ed8e9ec07d8e545ac3a29c969a Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Thu, 14 May 2015 23:14:34 +0200 Subject: [PATCH] x64CPUDetect: fix AVX2/BMI1/BMI2/HTT detection On non-Windows platforms, ECX was not being initialized to 0. --- Source/Core/Common/x64CPUDetect.cpp | 69 +++++++++++------------------ 1 file changed, 25 insertions(+), 44 deletions(-) diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp index d8825d3d50..eb195b6092 100644 --- a/Source/Core/Common/x64CPUDetect.cpp +++ b/Source/Core/Common/x64CPUDetect.cpp @@ -11,64 +11,45 @@ #ifndef _WIN32 -#if defined __FreeBSD__ +#ifdef __FreeBSD__ #include #include -#else -static inline void do_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) +#endif + +static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { -#if defined _LP64 - // Note: EBX is reserved on Mac OS X and in PIC on Linux, so it has to - // restored at the end of the asm block. - __asm__( - "cpuid;" - "movl %%ebx,%1;" - : "=a" (*eax), - "=S" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "a" (*eax) - : "rbx" - ); +#ifdef __FreeBSD__ + // Despite the name, this is just do_cpuid() with ECX as second input. + cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); #else + info[0] = function_id; // eax + info[2] = subfunction_id; // ecx __asm__( - "cpuid;" - "movl %%ebx,%1;" - : "=a" (*eax), - "=S" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "a" (*eax) - : "ebx" - ); + "cpuid" + : "=a" (info[0]), + "=b" (info[1]), + "=c" (info[2]), + "=d" (info[3]) + : "a" (function_id), + "c" (subfunction_id) + ); #endif } -#endif /* defined __FreeBSD__ */ -static void __cpuid(int info[4], int x) +static inline void __cpuid(int info[4], int function_id) { -#if defined __FreeBSD__ - do_cpuid((unsigned int)x, (unsigned int*)info); -#else - unsigned int eax = x, ebx = 0, ecx = 0, edx = 0; - do_cpuid(&eax, &ebx, &ecx, &edx); - info[0] = eax; - info[1] = ebx; - info[2] = ecx; - info[3] = edx; -#endif + return __cpuidex(info, function_id, 0); } #define _XCR_XFEATURE_ENABLED_MASK 0 -static unsigned long long _xgetbv(unsigned int index) +static u64 _xgetbv(u32 index) { - unsigned int eax, edx; + u32 eax, edx; __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); - return ((unsigned long long)edx << 32) | eax; + return ((u64)edx << 32) | eax; } -#endif +#endif // ifndef _WIN32 CPUInfo cpu_info; @@ -165,7 +146,7 @@ void CPUInfo::Detect() if (max_std_fn >= 7) { - __cpuid(cpu_id, 0x00000007); + __cpuidex(cpu_id, 0x00000007, 0x00000000); // careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed if ((cpu_id[1] >> 5) & 1) bAVX2 = bAVX; @@ -211,7 +192,7 @@ void CPUInfo::Detect() // New mechanism for modern Intel CPUs. if (vendor == VENDOR_INTEL) { - __cpuid(cpu_id, 0x00000004); + __cpuidex(cpu_id, 0x00000004, 0x00000000); int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1; HTT = (cores_x_package < logical_cpu_count); cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1;