From 33bd72c1bcf080e8d9cf48403e0ebcbb8bf5ab5a Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 21 Sep 2020 00:56:26 -0700 Subject: [PATCH] detect future Intel AVX/AMX features (#124) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add Ice Lake Server and Sapphire Rapids models The information contained in this commit was obtained from "IntelĀ® Architecture Instruction Set Extensions and Future Features Programming Reference" document 319433-040 from https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html Signed-off-by: Jeff Hammond * Tiger Lake; Ice Lake NNP-I; SPR string Signed-off-by: Hammond, Jeff R * add AVX512_BF16 and AVX512_VP2INTERSECT detection Signed-off-by: Hammond, Jeff R * correction for KNM features: s/4VBMI2/4FMAPS/g Signed-off-by: Hammond, Jeff R * add AMX/TMUL bits from 319433-040 Signed-off-by: Hammond, Jeff R * add Intel copyright Fixes #128 --- include/cpuinfo_x86.h | 15 ++++++++++-- src/cpuinfo_x86.c | 50 ++++++++++++++++++++++++++++++++++++---- test/cpuinfo_x86_test.cc | 2 +- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/include/cpuinfo_x86.h b/include/cpuinfo_x86.h index c1c68cb..60af05a 100644 --- a/include/cpuinfo_x86.h +++ b/include/cpuinfo_x86.h @@ -69,7 +69,13 @@ typedef struct { int avx512bitalg : 1; int avx512vpopcntdq : 1; int avx512_4vnniw : 1; - int avx512_4vbmi2 : 1; + int avx512_4fmaps : 1; + int avx512_bf16 : 1; + int avx512_vp2intersect : 1; + + int amx_bf16 : 1; + int amx_tile : 1; + int amx_int8 : 1; int pclmulqdq : 1; int smx : 1; @@ -188,7 +194,12 @@ typedef enum { X86_AVX512BITALG, X86_AVX512VPOPCNTDQ, X86_AVX512_4VNNIW, - X86_AVX512_4VBMI2, + X86_AVX512_4FMAPS, + X86_AVX512_BF16, + X86_AVX512_VP2INTERSECT, + X86_AMX_BF16, + X86_AMX_TILE, + X86_AMX_INT8, X86_PCLMULQDQ, X86_SMX, X86_SGX, diff --git a/src/cpuinfo_x86.c b/src/cpuinfo_x86.c index ab6838b..a75e52e 100644 --- a/src/cpuinfo_x86.c +++ b/src/cpuinfo_x86.c @@ -92,6 +92,8 @@ static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) { #define MASK_MASKREG 0x20 #define MASK_ZMM0_15 0x40 #define MASK_ZMM16_31 0x80 +#define MASK_XTILECFG 0x20000 +#define MASK_XTILEDATA 0x40000 static bool HasMask(uint32_t value, uint32_t mask) { return (value & mask) == mask; @@ -116,6 +118,13 @@ static bool HasZmmOsXSave(uint32_t xcr0_eax) { MASK_ZMM16_31); } +// Checks that operating system saves and restores AMX/TMUL state during context +// switches. +static bool HasTmmOsXSave(uint32_t xcr0_eax) { + return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 | + MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA); +} + static void SetVendor(const Leaf leaf, char* const vendor) { *(uint32_t*)(vendor) = leaf.ebx; *(uint32_t*)(vendor + 4) = leaf.edx; @@ -1046,12 +1055,14 @@ typedef struct { bool have_sse; bool have_avx; bool have_avx512; + bool have_amx; } OsSupport; // Reference https://en.wikipedia.org/wiki/CPUID. static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* os_support) { const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); + const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1); const bool have_xsave = IsBitSet(leaf_1.ecx, 26); const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); @@ -1059,6 +1070,7 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* os_support->have_sse = HasXmmOsXSave(xcr0_eax); os_support->have_avx = HasYmmOsXSave(xcr0_eax); os_support->have_avx512 = HasZmmOsXSave(xcr0_eax); + os_support->have_amx = HasTmmOsXSave(xcr0_eax); const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); @@ -1129,7 +1141,15 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); - features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); + features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3); + features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5); + features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8); + } + + if (os_support->have_amx) { + features->amx_bf16 = IsBitSet(leaf_7.edx, 22); + features->amx_tile = IsBitSet(leaf_7.edx, 24); + features->amx_int8 = IsBitSet(leaf_7.edx, 25); } } @@ -1404,8 +1424,18 @@ int GetX86FeaturesEnumValue(const X86Features* features, return features->avx512vpopcntdq; case X86_AVX512_4VNNIW: return features->avx512_4vnniw; - case X86_AVX512_4VBMI2: - return features->avx512_4vbmi2; + case X86_AVX512_4FMAPS: + return features->avx512_4fmaps; + case X86_AVX512_BF16: + return features->avx512_bf16; + case X86_AVX512_VP2INTERSECT: + return features->avx512_vp2intersect; + case X86_AMX_BF16: + return features->amx_bf16; + case X86_AMX_TILE: + return features->amx_tile; + case X86_AMX_INT8: + return features->amx_int8; case X86_PCLMULQDQ: return features->pclmulqdq; case X86_SMX: @@ -1518,8 +1548,18 @@ const char* GetX86FeaturesEnumName(X86FeaturesEnum value) { return "avx512vpopcntdq"; case X86_AVX512_4VNNIW: return "avx512_4vnniw"; - case X86_AVX512_4VBMI2: - return "avx512_4vbmi2"; + case X86_AVX512_4FMAPS: + return "avx512_4fmaps"; + case X86_AVX512_BF16: + return "avx512_bf16"; + case X86_AVX512_VP2INTERSECT: + return "avx512_vp2intersect"; + case X86_AMX_BF16: + return "amx_bf16"; + case X86_AMX_TILE: + return "amx_tile"; + case X86_AMX_INT8: + return "amx_int8"; case X86_PCLMULQDQ: return "pclmulqdq"; case X86_SMX: diff --git a/test/cpuinfo_x86_test.cc b/test/cpuinfo_x86_test.cc index 10b9624..3c80eee 100644 --- a/test/cpuinfo_x86_test.cc +++ b/test/cpuinfo_x86_test.cc @@ -88,7 +88,7 @@ TEST(CpuidX86Test, SandyBridge) { EXPECT_FALSE(features.avx512bitalg); EXPECT_FALSE(features.avx512vpopcntdq); EXPECT_FALSE(features.avx512_4vnniw); - EXPECT_FALSE(features.avx512_4vbmi2); + EXPECT_FALSE(features.avx512_4fmaps); // All old cpu features should be set. EXPECT_TRUE(features.aes); EXPECT_TRUE(features.ssse3);