1
0
mirror of https://github.com/google/cpu_features.git synced 2025-04-28 15:33:37 +02:00

[NFC] refactor the code so it's easier to understand the execution flow (#161)

This commit is contained in:
Guillaume Chatelet 2021-06-23 14:21:05 +00:00 committed by GitHub
parent 40ef93802f
commit 646b80fa3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1181,41 +1181,11 @@ static bool GetDarwinSysCtlByName(const char* name) {
// Internal structure to hold the OS support for vector operations. // Internal structure to hold the OS support for vector operations.
// Avoid to recompute them since each call to cpuid is ~100 cycles. // Avoid to recompute them since each call to cpuid is ~100 cycles.
typedef struct { typedef struct {
bool have_sse_via_os; bool sse_registers;
bool have_sse_via_cpuid; bool avx_registers;
bool have_avx; bool avx512_registers;
bool have_avx512; bool amx_registers;
bool have_amx; } OsPreserves;
} OsSupport;
static const OsSupport kEmptyOsSupport;
static OsSupport CheckOsSupport(const uint32_t max_cpuid_leaf) {
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
const bool have_xsave = IsBitSet(leaf_1.ecx, 26);
const bool have_osxsave = IsBitSet(leaf_1.ecx, 27);
const bool have_xcr0 = have_xsave && have_osxsave;
OsSupport os_support = kEmptyOsSupport;
if (have_xcr0) {
// AVX capable cpu will expose XCR0.
const uint32_t xcr0_eax = GetXCR0Eax();
os_support.have_sse_via_cpuid = HasXmmOsXSave(xcr0_eax);
os_support.have_avx = HasYmmOsXSave(xcr0_eax);
#if defined(CPU_FEATURES_OS_DARWIN)
os_support.have_avx512 = GetDarwinSysCtlByName("hw.optional.avx512f");
#else
os_support.have_avx512 = HasZmmOsXSave(xcr0_eax);
#endif // CPU_FEATURES_OS_DARWIN
os_support.have_amx = HasTmmOsXSave(xcr0_eax);
} else {
// Atom based or older cpus need to ask the OS for sse support.
os_support.have_sse_via_os = true;
}
return os_support;
}
#if defined(CPU_FEATURES_OS_WINDOWS) #if defined(CPU_FEATURES_OS_WINDOWS)
#if defined(CPU_FEATURES_MOCK_CPUID_X86) #if defined(CPU_FEATURES_MOCK_CPUID_X86)
@ -1227,8 +1197,124 @@ static bool GetWindowsIsProcessorFeaturePresent(DWORD ProcessorFeature) {
#endif #endif
#endif // CPU_FEATURES_OS_WINDOWS #endif // CPU_FEATURES_OS_WINDOWS
static void DetectSseViaOs(X86Features* features) { // Reference https://en.wikipedia.org/wiki/CPUID.
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info,
OsPreserves* os_preserves) {
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1);
const bool have_xsave = IsBitSet(leaf_1.ecx, 26);
const bool have_osxsave = IsBitSet(leaf_1.ecx, 27);
const bool have_xcr0 = have_xsave && have_osxsave;
const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8);
const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20);
const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4);
const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16);
X86Features* const features = &info->features;
info->family = extended_family + family;
info->model = (extended_model << 4) + model;
info->stepping = ExtractBitRange(leaf_1.eax, 3, 0);
features->fpu = IsBitSet(leaf_1.edx, 0);
features->tsc = IsBitSet(leaf_1.edx, 4);
features->cx8 = IsBitSet(leaf_1.edx, 8);
features->clfsh = IsBitSet(leaf_1.edx, 19);
features->mmx = IsBitSet(leaf_1.edx, 23);
features->ss = IsBitSet(leaf_1.edx, 27);
features->pclmulqdq = IsBitSet(leaf_1.ecx, 1);
features->smx = IsBitSet(leaf_1.ecx, 6);
features->cx16 = IsBitSet(leaf_1.ecx, 13);
features->dca = IsBitSet(leaf_1.ecx, 18);
features->movbe = IsBitSet(leaf_1.ecx, 22);
features->popcnt = IsBitSet(leaf_1.ecx, 23);
features->aes = IsBitSet(leaf_1.ecx, 25);
features->f16c = IsBitSet(leaf_1.ecx, 29);
features->rdrnd = IsBitSet(leaf_1.ecx, 30);
features->sgx = IsBitSet(leaf_7.ebx, 2);
features->bmi1 = IsBitSet(leaf_7.ebx, 3);
features->hle = IsBitSet(leaf_7.ebx, 4);
features->bmi2 = IsBitSet(leaf_7.ebx, 8);
features->erms = IsBitSet(leaf_7.ebx, 9);
features->rtm = IsBitSet(leaf_7.ebx, 11);
features->rdseed = IsBitSet(leaf_7.ebx, 18);
features->clflushopt = IsBitSet(leaf_7.ebx, 23);
features->clwb = IsBitSet(leaf_7.ebx, 24);
features->sha = IsBitSet(leaf_7.ebx, 29);
features->vaes = IsBitSet(leaf_7.ecx, 9);
features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10);
features->adx = IsBitSet(leaf_7.ebx, 19);
/////////////////////////////////////////////////////////////////////////////
// The following section is devoted to Vector Extensions.
/////////////////////////////////////////////////////////////////////////////
// CPU with AVX expose XCR0 which enables checking vector extensions OS
// support through cpuid.
if (have_xcr0) {
// Here we rely exclusively on cpuid for both CPU and OS support of vector
// extensions.
const uint32_t xcr0_eax = GetXCR0Eax();
os_preserves->sse_registers = HasXmmOsXSave(xcr0_eax);
os_preserves->avx_registers = HasYmmOsXSave(xcr0_eax);
#if defined(CPU_FEATURES_OS_DARWIN)
// On Darwin AVX512 support is On-demand.
// We have to query the OS instead of querying the Zmm save/restore state.
// https://github.com/apple/darwin-xnu/blob/8f02f2a044b9bb1ad951987ef5bab20ec9486310/osfmk/i386/fpu.c#L173-L199
os_preserves->avx512_registers =
GetDarwinSysCtlByName("hw.optional.avx512f");
#else
os_preserves->avx512_registers = HasZmmOsXSave(xcr0_eax);
#endif // CPU_FEATURES_OS_DARWIN
os_preserves->amx_registers = HasTmmOsXSave(xcr0_eax);
if (os_preserves->sse_registers) {
features->sse = IsBitSet(leaf_1.edx, 25);
features->sse2 = IsBitSet(leaf_1.edx, 26);
features->sse3 = IsBitSet(leaf_1.ecx, 0);
features->ssse3 = IsBitSet(leaf_1.ecx, 9);
features->sse4_1 = IsBitSet(leaf_1.ecx, 19);
features->sse4_2 = IsBitSet(leaf_1.ecx, 20);
}
if (os_preserves->avx_registers) {
features->fma3 = IsBitSet(leaf_1.ecx, 12);
features->avx = IsBitSet(leaf_1.ecx, 28);
features->avx2 = IsBitSet(leaf_7.ebx, 5);
}
if (os_preserves->avx512_registers) {
features->avx512f = IsBitSet(leaf_7.ebx, 16);
features->avx512cd = IsBitSet(leaf_7.ebx, 28);
features->avx512er = IsBitSet(leaf_7.ebx, 27);
features->avx512pf = IsBitSet(leaf_7.ebx, 26);
features->avx512bw = IsBitSet(leaf_7.ebx, 30);
features->avx512dq = IsBitSet(leaf_7.ebx, 17);
features->avx512vl = IsBitSet(leaf_7.ebx, 31);
features->avx512ifma = IsBitSet(leaf_7.ebx, 21);
features->avx512vbmi = IsBitSet(leaf_7.ecx, 1);
features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6);
features->avx512vnni = IsBitSet(leaf_7.ecx, 11);
features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
features->avx512_second_fma = HasSecondFMA(info->model);
features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3);
features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5);
features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8);
}
if (os_preserves->amx_registers) {
features->amx_bf16 = IsBitSet(leaf_7.edx, 22);
features->amx_tile = IsBitSet(leaf_7.edx, 24);
features->amx_int8 = IsBitSet(leaf_7.edx, 25);
}
} else {
// When XCR0 is not available (Atom based or older cpus) we need to defer to
// the OS via custom code.
#if defined(CPU_FEATURES_OS_WINDOWS) #if defined(CPU_FEATURES_OS_WINDOWS)
// Handling Windows platform through IsProcessorFeaturePresent.
// https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
features->sse = features->sse =
GetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); GetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE);
@ -1272,121 +1358,34 @@ static void DetectSseViaOs(X86Features* features) {
#else #else
#error "Unsupported fallback detection of SSE OS support." #error "Unsupported fallback detection of SSE OS support."
#endif #endif
} // Now that we have queried the OS for SSE support, we report this back to
// os_preserves. This is needed in case of AMD CPU's to enable testing of
// Reference https://en.wikipedia.org/wiki/CPUID. // sse4a (See ParseExtraAMDCpuId below).
static void ParseCpuId(const uint32_t max_cpuid_leaf, if (features->sse) os_preserves->sse_registers = true;
const OsSupport os_support, X86Info* info) {
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1);
const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8);
const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20);
const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4);
const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16);
X86Features* const features = &info->features;
info->family = extended_family + family;
info->model = (extended_model << 4) + model;
info->stepping = ExtractBitRange(leaf_1.eax, 3, 0);
features->fpu = IsBitSet(leaf_1.edx, 0);
features->tsc = IsBitSet(leaf_1.edx, 4);
features->cx8 = IsBitSet(leaf_1.edx, 8);
features->clfsh = IsBitSet(leaf_1.edx, 19);
features->mmx = IsBitSet(leaf_1.edx, 23);
features->ss = IsBitSet(leaf_1.edx, 27);
features->pclmulqdq = IsBitSet(leaf_1.ecx, 1);
features->smx = IsBitSet(leaf_1.ecx, 6);
features->cx16 = IsBitSet(leaf_1.ecx, 13);
features->dca = IsBitSet(leaf_1.ecx, 18);
features->movbe = IsBitSet(leaf_1.ecx, 22);
features->popcnt = IsBitSet(leaf_1.ecx, 23);
features->aes = IsBitSet(leaf_1.ecx, 25);
features->f16c = IsBitSet(leaf_1.ecx, 29);
features->rdrnd = IsBitSet(leaf_1.ecx, 30);
features->sgx = IsBitSet(leaf_7.ebx, 2);
features->bmi1 = IsBitSet(leaf_7.ebx, 3);
features->hle = IsBitSet(leaf_7.ebx, 4);
features->bmi2 = IsBitSet(leaf_7.ebx, 8);
features->erms = IsBitSet(leaf_7.ebx, 9);
features->rtm = IsBitSet(leaf_7.ebx, 11);
features->rdseed = IsBitSet(leaf_7.ebx, 18);
features->clflushopt = IsBitSet(leaf_7.ebx, 23);
features->clwb = IsBitSet(leaf_7.ebx, 24);
features->sha = IsBitSet(leaf_7.ebx, 29);
features->vaes = IsBitSet(leaf_7.ecx, 9);
features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10);
features->adx = IsBitSet(leaf_7.ebx, 19);
if (os_support.have_sse_via_os) {
DetectSseViaOs(features);
} else if (os_support.have_sse_via_cpuid) {
features->sse = IsBitSet(leaf_1.edx, 25);
features->sse2 = IsBitSet(leaf_1.edx, 26);
features->sse3 = IsBitSet(leaf_1.ecx, 0);
features->ssse3 = IsBitSet(leaf_1.ecx, 9);
features->sse4_1 = IsBitSet(leaf_1.ecx, 19);
features->sse4_2 = IsBitSet(leaf_1.ecx, 20);
}
if (os_support.have_avx) {
features->fma3 = IsBitSet(leaf_1.ecx, 12);
features->avx = IsBitSet(leaf_1.ecx, 28);
features->avx2 = IsBitSet(leaf_7.ebx, 5);
}
if (os_support.have_avx512) {
features->avx512f = IsBitSet(leaf_7.ebx, 16);
features->avx512cd = IsBitSet(leaf_7.ebx, 28);
features->avx512er = IsBitSet(leaf_7.ebx, 27);
features->avx512pf = IsBitSet(leaf_7.ebx, 26);
features->avx512bw = IsBitSet(leaf_7.ebx, 30);
features->avx512dq = IsBitSet(leaf_7.ebx, 17);
features->avx512vl = IsBitSet(leaf_7.ebx, 31);
features->avx512ifma = IsBitSet(leaf_7.ebx, 21);
features->avx512vbmi = IsBitSet(leaf_7.ecx, 1);
features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6);
features->avx512vnni = IsBitSet(leaf_7.ecx, 11);
features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
features->avx512_second_fma = HasSecondFMA(info->model);
features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3);
features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5);
features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8);
}
if (os_support.have_amx) {
features->amx_bf16 = IsBitSet(leaf_7.edx, 22);
features->amx_tile = IsBitSet(leaf_7.edx, 24);
features->amx_int8 = IsBitSet(leaf_7.edx, 25);
} }
} }
// Reference // Reference
// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented. // https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
static void ParseExtraAMDCpuId(X86Info* info, OsSupport os_support) { static void ParseExtraAMDCpuId(X86Info* info, OsPreserves os_preserves) {
const Leaf leaf_80000000 = CpuId(0x80000000); const Leaf leaf_80000000 = CpuId(0x80000000);
const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax; const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax;
const Leaf leaf_80000001 = SafeCpuId(max_extended_cpuid_leaf, 0x80000001); const Leaf leaf_80000001 = SafeCpuId(max_extended_cpuid_leaf, 0x80000001);
X86Features* const features = &info->features; X86Features* const features = &info->features;
if (os_support.have_sse_via_cpuid) { if (os_preserves.sse_registers) {
features->sse4a = IsBitSet(leaf_80000001.ecx, 6); features->sse4a = IsBitSet(leaf_80000001.ecx, 6);
} }
if (os_support.have_avx) { if (os_preserves.avx_registers) {
features->fma4 = IsBitSet(leaf_80000001.ecx, 16); features->fma4 = IsBitSet(leaf_80000001.ecx, 16);
} }
} }
static const X86Info kEmptyX86Info; static const X86Info kEmptyX86Info;
static const CacheInfo kEmptyCacheInfo; static const CacheInfo kEmptyCacheInfo;
static const OsPreserves kEmptyOsPreserves;
X86Info GetX86Info(void) { X86Info GetX86Info(void) {
X86Info info = kEmptyX86Info; X86Info info = kEmptyX86Info;
@ -1395,11 +1394,11 @@ X86Info GetX86Info(void) {
const bool is_amd = IsVendor(leaf_0, "AuthenticAMD"); const bool is_amd = IsVendor(leaf_0, "AuthenticAMD");
SetVendor(leaf_0, info.vendor); SetVendor(leaf_0, info.vendor);
if (is_intel || is_amd) { if (is_intel || is_amd) {
OsPreserves os_preserves = kEmptyOsPreserves;
const uint32_t max_cpuid_leaf = leaf_0.eax; const uint32_t max_cpuid_leaf = leaf_0.eax;
const OsSupport os_support = CheckOsSupport(max_cpuid_leaf); ParseCpuId(max_cpuid_leaf, &info, &os_preserves);
ParseCpuId(max_cpuid_leaf, os_support, &info);
if (is_amd) { if (is_amd) {
ParseExtraAMDCpuId(&info, os_support); ParseExtraAMDCpuId(&info, os_preserves);
} }
} }
return info; return info;