x64CPUDetect: fix AVX2/BMI1/BMI2/HTT detection

On non-Windows platforms, ECX was not being initialized to 0.
This commit is contained in:
Tillmann Karras 2015-05-14 23:14:34 +02:00
parent 8290fa1e46
commit 7f57e27a65

View File

@ -11,64 +11,45 @@
#ifndef _WIN32 #ifndef _WIN32
#if defined __FreeBSD__ #ifdef __FreeBSD__
#include <sys/types.h> #include <sys/types.h>
#include <machine/cpufunc.h> #include <machine/cpufunc.h>
#else #endif
static inline void do_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx) static inline void __cpuidex(int info[4], int function_id, int subfunction_id)
{ {
#if defined _LP64 #ifdef __FreeBSD__
// Note: EBX is reserved on Mac OS X and in PIC on Linux, so it has to // Despite the name, this is just do_cpuid() with ECX as second input.
// restored at the end of the asm block. cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
__asm__(
"cpuid;"
"movl %%ebx,%1;"
: "=a" (*eax),
"=S" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "a" (*eax)
: "rbx"
);
#else #else
info[0] = function_id; // eax
info[2] = subfunction_id; // ecx
__asm__( __asm__(
"cpuid;" "cpuid"
"movl %%ebx,%1;" : "=a" (info[0]),
: "=a" (*eax), "=b" (info[1]),
"=S" (*ebx), "=c" (info[2]),
"=c" (*ecx), "=d" (info[3])
"=d" (*edx) : "a" (function_id),
: "a" (*eax) "c" (subfunction_id)
: "ebx" );
);
#endif #endif
} }
#endif /* defined __FreeBSD__ */
static void __cpuid(int info[4], int x) static inline void __cpuid(int info[4], int function_id)
{ {
#if defined __FreeBSD__ return __cpuidex(info, function_id, 0);
do_cpuid((unsigned int)x, (unsigned int*)info);
#else
unsigned int eax = x, ebx = 0, ecx = 0, edx = 0;
do_cpuid(&eax, &ebx, &ecx, &edx);
info[0] = eax;
info[1] = ebx;
info[2] = ecx;
info[3] = edx;
#endif
} }
#define _XCR_XFEATURE_ENABLED_MASK 0 #define _XCR_XFEATURE_ENABLED_MASK 0
static unsigned long long _xgetbv(unsigned int index) static u64 _xgetbv(u32 index)
{ {
unsigned int eax, edx; u32 eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((unsigned long long)edx << 32) | eax; return ((u64)edx << 32) | eax;
} }
#endif #endif // ifndef _WIN32
CPUInfo cpu_info; CPUInfo cpu_info;
@ -165,7 +146,7 @@ void CPUInfo::Detect()
if (max_std_fn >= 7) if (max_std_fn >= 7)
{ {
__cpuid(cpu_id, 0x00000007); __cpuidex(cpu_id, 0x00000007, 0x00000000);
// careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed // careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed
if ((cpu_id[1] >> 5) & 1) if ((cpu_id[1] >> 5) & 1)
bAVX2 = bAVX; bAVX2 = bAVX;
@ -211,7 +192,7 @@ void CPUInfo::Detect()
// New mechanism for modern Intel CPUs. // New mechanism for modern Intel CPUs.
if (vendor == VENDOR_INTEL) if (vendor == VENDOR_INTEL)
{ {
__cpuid(cpu_id, 0x00000004); __cpuidex(cpu_id, 0x00000004, 0x00000000);
int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1; int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1;
HTT = (cores_x_package < logical_cpu_count); HTT = (cores_x_package < logical_cpu_count);
cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1; cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1;