From 646b80fa3ac7761551aa3b65299758babef7eda2 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 23 Jun 2021 14:21:05 +0000 Subject: [PATCH] [NFC] refactor the code so it's easier to understand the execution flow (#161) --- src/cpuinfo_x86.c | 261 +++++++++++++++++++++++----------------------- 1 file changed, 130 insertions(+), 131 deletions(-) diff --git a/src/cpuinfo_x86.c b/src/cpuinfo_x86.c index 844a23d..60eef5a 100644 --- a/src/cpuinfo_x86.c +++ b/src/cpuinfo_x86.c @@ -1181,41 +1181,11 @@ static bool GetDarwinSysCtlByName(const char* name) { // Internal structure to hold the OS support for vector operations. // Avoid to recompute them since each call to cpuid is ~100 cycles. typedef struct { - bool have_sse_via_os; - bool have_sse_via_cpuid; - bool have_avx; - bool have_avx512; - bool have_amx; -} OsSupport; - -static const OsSupport kEmptyOsSupport; - -static OsSupport CheckOsSupport(const uint32_t max_cpuid_leaf) { - const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); - const bool have_xsave = IsBitSet(leaf_1.ecx, 26); - const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); - const bool have_xcr0 = have_xsave && have_osxsave; - - OsSupport os_support = kEmptyOsSupport; - - if (have_xcr0) { - // AVX capable cpu will expose XCR0. - const uint32_t xcr0_eax = GetXCR0Eax(); - os_support.have_sse_via_cpuid = HasXmmOsXSave(xcr0_eax); - os_support.have_avx = HasYmmOsXSave(xcr0_eax); -#if defined(CPU_FEATURES_OS_DARWIN) - os_support.have_avx512 = GetDarwinSysCtlByName("hw.optional.avx512f"); -#else - os_support.have_avx512 = HasZmmOsXSave(xcr0_eax); -#endif // CPU_FEATURES_OS_DARWIN - os_support.have_amx = HasTmmOsXSave(xcr0_eax); - } else { - // Atom based or older cpus need to ask the OS for sse support. - os_support.have_sse_via_os = true; - } - - return os_support; -} + bool sse_registers; + bool avx_registers; + bool avx512_registers; + bool amx_registers; +} OsPreserves; #if defined(CPU_FEATURES_OS_WINDOWS) #if defined(CPU_FEATURES_MOCK_CPUID_X86) @@ -1227,60 +1197,17 @@ static bool GetWindowsIsProcessorFeaturePresent(DWORD ProcessorFeature) { #endif #endif // CPU_FEATURES_OS_WINDOWS -static void DetectSseViaOs(X86Features* features) { -#if defined(CPU_FEATURES_OS_WINDOWS) - // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent - features->sse = - GetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); - features->sse2 = - GetWindowsIsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); - features->sse3 = - GetWindowsIsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE); -#elif defined(CPU_FEATURES_OS_DARWIN) - // Handling Darwin platform through sysctlbyname. - features->sse = GetDarwinSysCtlByName("hw.optional.sse"); - features->sse2 = GetDarwinSysCtlByName("hw.optional.sse2"); - features->sse3 = GetDarwinSysCtlByName("hw.optional.sse3"); - features->ssse3 = GetDarwinSysCtlByName("hw.optional.supplementalsse3"); - features->sse4_1 = GetDarwinSysCtlByName("hw.optional.sse4_1"); - features->sse4_2 = GetDarwinSysCtlByName("hw.optional.sse4_2"); -#elif defined(CPU_FEATURES_OS_LINUX_OR_ANDROID) - // Handling Linux platform through /proc/cpuinfo. - const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); - if (fd >= 0) { - StackLineReader reader; - StackLineReader_Initialize(&reader, fd); - for (;;) { - const LineResult result = StackLineReader_NextLine(&reader); - const StringView line = result.line; - StringView key, value; - if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { - if (CpuFeatures_StringView_IsEquals(key, str("flags"))) { - features->sse = CpuFeatures_StringView_HasWord(value, "sse"); - features->sse2 = CpuFeatures_StringView_HasWord(value, "sse2"); - features->sse3 = CpuFeatures_StringView_HasWord(value, "sse3"); - features->ssse3 = CpuFeatures_StringView_HasWord(value, "ssse3"); - features->sse4_1 = CpuFeatures_StringView_HasWord(value, "sse4_1"); - features->sse4_2 = CpuFeatures_StringView_HasWord(value, "sse4_2"); - break; - } - } - if (result.eof) break; - } - CpuFeatures_CloseFile(fd); - } -#else -#error "Unsupported fallback detection of SSE OS support." -#endif -} - // Reference https://en.wikipedia.org/wiki/CPUID. -static void ParseCpuId(const uint32_t max_cpuid_leaf, - const OsSupport os_support, X86Info* info) { +static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, + OsPreserves* os_preserves) { const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1); + const bool have_xsave = IsBitSet(leaf_1.ecx, 26); + const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); + const bool have_xcr0 = have_xsave && have_osxsave; + const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4); @@ -1321,72 +1248,144 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10); features->adx = IsBitSet(leaf_7.ebx, 19); - if (os_support.have_sse_via_os) { - DetectSseViaOs(features); - } else if (os_support.have_sse_via_cpuid) { - features->sse = IsBitSet(leaf_1.edx, 25); - features->sse2 = IsBitSet(leaf_1.edx, 26); - features->sse3 = IsBitSet(leaf_1.ecx, 0); - features->ssse3 = IsBitSet(leaf_1.ecx, 9); - features->sse4_1 = IsBitSet(leaf_1.ecx, 19); - features->sse4_2 = IsBitSet(leaf_1.ecx, 20); - } + ///////////////////////////////////////////////////////////////////////////// + // The following section is devoted to Vector Extensions. + ///////////////////////////////////////////////////////////////////////////// - if (os_support.have_avx) { - features->fma3 = IsBitSet(leaf_1.ecx, 12); - features->avx = IsBitSet(leaf_1.ecx, 28); - features->avx2 = IsBitSet(leaf_7.ebx, 5); - } + // CPU with AVX expose XCR0 which enables checking vector extensions OS + // support through cpuid. + if (have_xcr0) { + // Here we rely exclusively on cpuid for both CPU and OS support of vector + // extensions. + const uint32_t xcr0_eax = GetXCR0Eax(); + os_preserves->sse_registers = HasXmmOsXSave(xcr0_eax); + os_preserves->avx_registers = HasYmmOsXSave(xcr0_eax); +#if defined(CPU_FEATURES_OS_DARWIN) + // On Darwin AVX512 support is On-demand. + // We have to query the OS instead of querying the Zmm save/restore state. + // https://github.com/apple/darwin-xnu/blob/8f02f2a044b9bb1ad951987ef5bab20ec9486310/osfmk/i386/fpu.c#L173-L199 + os_preserves->avx512_registers = + GetDarwinSysCtlByName("hw.optional.avx512f"); +#else + os_preserves->avx512_registers = HasZmmOsXSave(xcr0_eax); +#endif // CPU_FEATURES_OS_DARWIN + os_preserves->amx_registers = HasTmmOsXSave(xcr0_eax); - if (os_support.have_avx512) { - features->avx512f = IsBitSet(leaf_7.ebx, 16); - features->avx512cd = IsBitSet(leaf_7.ebx, 28); - features->avx512er = IsBitSet(leaf_7.ebx, 27); - features->avx512pf = IsBitSet(leaf_7.ebx, 26); - features->avx512bw = IsBitSet(leaf_7.ebx, 30); - features->avx512dq = IsBitSet(leaf_7.ebx, 17); - features->avx512vl = IsBitSet(leaf_7.ebx, 31); - features->avx512ifma = IsBitSet(leaf_7.ebx, 21); - features->avx512vbmi = IsBitSet(leaf_7.ecx, 1); - features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6); - features->avx512vnni = IsBitSet(leaf_7.ecx, 11); - features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); - features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); - features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); - features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); - features->avx512_second_fma = HasSecondFMA(info->model); - features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3); - features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5); - features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8); - } - - if (os_support.have_amx) { - features->amx_bf16 = IsBitSet(leaf_7.edx, 22); - features->amx_tile = IsBitSet(leaf_7.edx, 24); - features->amx_int8 = IsBitSet(leaf_7.edx, 25); + if (os_preserves->sse_registers) { + features->sse = IsBitSet(leaf_1.edx, 25); + features->sse2 = IsBitSet(leaf_1.edx, 26); + features->sse3 = IsBitSet(leaf_1.ecx, 0); + features->ssse3 = IsBitSet(leaf_1.ecx, 9); + features->sse4_1 = IsBitSet(leaf_1.ecx, 19); + features->sse4_2 = IsBitSet(leaf_1.ecx, 20); + } + if (os_preserves->avx_registers) { + features->fma3 = IsBitSet(leaf_1.ecx, 12); + features->avx = IsBitSet(leaf_1.ecx, 28); + features->avx2 = IsBitSet(leaf_7.ebx, 5); + } + if (os_preserves->avx512_registers) { + features->avx512f = IsBitSet(leaf_7.ebx, 16); + features->avx512cd = IsBitSet(leaf_7.ebx, 28); + features->avx512er = IsBitSet(leaf_7.ebx, 27); + features->avx512pf = IsBitSet(leaf_7.ebx, 26); + features->avx512bw = IsBitSet(leaf_7.ebx, 30); + features->avx512dq = IsBitSet(leaf_7.ebx, 17); + features->avx512vl = IsBitSet(leaf_7.ebx, 31); + features->avx512ifma = IsBitSet(leaf_7.ebx, 21); + features->avx512vbmi = IsBitSet(leaf_7.ecx, 1); + features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6); + features->avx512vnni = IsBitSet(leaf_7.ecx, 11); + features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); + features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); + features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); + features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); + features->avx512_second_fma = HasSecondFMA(info->model); + features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3); + features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5); + features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8); + } + if (os_preserves->amx_registers) { + features->amx_bf16 = IsBitSet(leaf_7.edx, 22); + features->amx_tile = IsBitSet(leaf_7.edx, 24); + features->amx_int8 = IsBitSet(leaf_7.edx, 25); + } + } else { + // When XCR0 is not available (Atom based or older cpus) we need to defer to + // the OS via custom code. +#if defined(CPU_FEATURES_OS_WINDOWS) + // Handling Windows platform through IsProcessorFeaturePresent. + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + features->sse = + GetWindowsIsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE); + features->sse2 = + GetWindowsIsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); + features->sse3 = + GetWindowsIsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE); +#elif defined(CPU_FEATURES_OS_DARWIN) + // Handling Darwin platform through sysctlbyname. + features->sse = GetDarwinSysCtlByName("hw.optional.sse"); + features->sse2 = GetDarwinSysCtlByName("hw.optional.sse2"); + features->sse3 = GetDarwinSysCtlByName("hw.optional.sse3"); + features->ssse3 = GetDarwinSysCtlByName("hw.optional.supplementalsse3"); + features->sse4_1 = GetDarwinSysCtlByName("hw.optional.sse4_1"); + features->sse4_2 = GetDarwinSysCtlByName("hw.optional.sse4_2"); +#elif defined(CPU_FEATURES_OS_LINUX_OR_ANDROID) + // Handling Linux platform through /proc/cpuinfo. + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + const LineResult result = StackLineReader_NextLine(&reader); + const StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("flags"))) { + features->sse = CpuFeatures_StringView_HasWord(value, "sse"); + features->sse2 = CpuFeatures_StringView_HasWord(value, "sse2"); + features->sse3 = CpuFeatures_StringView_HasWord(value, "sse3"); + features->ssse3 = CpuFeatures_StringView_HasWord(value, "ssse3"); + features->sse4_1 = CpuFeatures_StringView_HasWord(value, "sse4_1"); + features->sse4_2 = CpuFeatures_StringView_HasWord(value, "sse4_2"); + break; + } + } + if (result.eof) break; + } + CpuFeatures_CloseFile(fd); + } +#else +#error "Unsupported fallback detection of SSE OS support." +#endif + // Now that we have queried the OS for SSE support, we report this back to + // os_preserves. This is needed in case of AMD CPU's to enable testing of + // sse4a (See ParseExtraAMDCpuId below). + if (features->sse) os_preserves->sse_registers = true; } } // Reference // https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented. -static void ParseExtraAMDCpuId(X86Info* info, OsSupport os_support) { +static void ParseExtraAMDCpuId(X86Info* info, OsPreserves os_preserves) { const Leaf leaf_80000000 = CpuId(0x80000000); const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax; const Leaf leaf_80000001 = SafeCpuId(max_extended_cpuid_leaf, 0x80000001); X86Features* const features = &info->features; - if (os_support.have_sse_via_cpuid) { + if (os_preserves.sse_registers) { features->sse4a = IsBitSet(leaf_80000001.ecx, 6); } - if (os_support.have_avx) { + if (os_preserves.avx_registers) { features->fma4 = IsBitSet(leaf_80000001.ecx, 16); } } static const X86Info kEmptyX86Info; static const CacheInfo kEmptyCacheInfo; +static const OsPreserves kEmptyOsPreserves; X86Info GetX86Info(void) { X86Info info = kEmptyX86Info; @@ -1395,11 +1394,11 @@ X86Info GetX86Info(void) { const bool is_amd = IsVendor(leaf_0, "AuthenticAMD"); SetVendor(leaf_0, info.vendor); if (is_intel || is_amd) { + OsPreserves os_preserves = kEmptyOsPreserves; const uint32_t max_cpuid_leaf = leaf_0.eax; - const OsSupport os_support = CheckOsSupport(max_cpuid_leaf); - ParseCpuId(max_cpuid_leaf, os_support, &info); + ParseCpuId(max_cpuid_leaf, &info, &os_preserves); if (is_amd) { - ParseExtraAMDCpuId(&info, os_support); + ParseExtraAMDCpuId(&info, os_preserves); } } return info;