mirror of
				https://github.com/google/cpu_features.git
				synced 2025-10-31 05:00:44 +01:00 
			
		
		
		
	detect future Intel AVX/AMX features (#124)
* add Ice Lake Server and Sapphire Rapids models The information contained in this commit was obtained from "Intel® Architecture Instruction Set Extensions and Future Features Programming Reference" document 319433-040 from https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html Signed-off-by: Jeff Hammond <jeff.r.hammond@intel.com> * Tiger Lake; Ice Lake NNP-I; SPR string Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * add AVX512_BF16 and AVX512_VP2INTERSECT detection Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * correction for KNM features: s/4VBMI2/4FMAPS/g Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * add AMX/TMUL bits from 319433-040 Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * add Intel copyright Fixes #128
This commit is contained in:
		| @@ -69,7 +69,13 @@ typedef struct { | ||||
|   int avx512bitalg : 1; | ||||
|   int avx512vpopcntdq : 1; | ||||
|   int avx512_4vnniw : 1; | ||||
|   int avx512_4vbmi2 : 1; | ||||
|   int avx512_4fmaps : 1; | ||||
|   int avx512_bf16 : 1; | ||||
|   int avx512_vp2intersect : 1; | ||||
|  | ||||
|   int amx_bf16 : 1; | ||||
|   int amx_tile : 1; | ||||
|   int amx_int8 : 1; | ||||
|  | ||||
|   int pclmulqdq : 1; | ||||
|   int smx : 1; | ||||
| @@ -188,7 +194,12 @@ typedef enum { | ||||
|   X86_AVX512BITALG, | ||||
|   X86_AVX512VPOPCNTDQ, | ||||
|   X86_AVX512_4VNNIW, | ||||
|   X86_AVX512_4VBMI2, | ||||
|   X86_AVX512_4FMAPS, | ||||
|   X86_AVX512_BF16, | ||||
|   X86_AVX512_VP2INTERSECT, | ||||
|   X86_AMX_BF16, | ||||
|   X86_AMX_TILE, | ||||
|   X86_AMX_INT8, | ||||
|   X86_PCLMULQDQ, | ||||
|   X86_SMX, | ||||
|   X86_SGX, | ||||
|   | ||||
| @@ -92,6 +92,8 @@ static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) { | ||||
| #define MASK_MASKREG 0x20 | ||||
| #define MASK_ZMM0_15 0x40 | ||||
| #define MASK_ZMM16_31 0x80 | ||||
| #define MASK_XTILECFG 0x20000 | ||||
| #define MASK_XTILEDATA 0x40000 | ||||
|  | ||||
| static bool HasMask(uint32_t value, uint32_t mask) { | ||||
|   return (value & mask) == mask; | ||||
| @@ -116,6 +118,13 @@ static bool HasZmmOsXSave(uint32_t xcr0_eax) { | ||||
|                                MASK_ZMM16_31); | ||||
| } | ||||
|  | ||||
| // Checks that operating system saves and restores AMX/TMUL state during context | ||||
| // switches. | ||||
| static bool HasTmmOsXSave(uint32_t xcr0_eax) { | ||||
|   return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 | | ||||
|                                MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA); | ||||
| } | ||||
|  | ||||
| static void SetVendor(const Leaf leaf, char* const vendor) { | ||||
|   *(uint32_t*)(vendor) = leaf.ebx; | ||||
|   *(uint32_t*)(vendor + 4) = leaf.edx; | ||||
| @@ -1046,12 +1055,14 @@ typedef struct { | ||||
|   bool have_sse; | ||||
|   bool have_avx; | ||||
|   bool have_avx512; | ||||
|   bool have_amx; | ||||
| } OsSupport; | ||||
|  | ||||
| // Reference https://en.wikipedia.org/wiki/CPUID. | ||||
| static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* os_support) { | ||||
|   const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); | ||||
|   const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); | ||||
|   const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1); | ||||
|  | ||||
|   const bool have_xsave = IsBitSet(leaf_1.ecx, 26); | ||||
|   const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); | ||||
| @@ -1059,6 +1070,7 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* | ||||
|   os_support->have_sse = HasXmmOsXSave(xcr0_eax); | ||||
|   os_support->have_avx = HasYmmOsXSave(xcr0_eax); | ||||
|   os_support->have_avx512 = HasZmmOsXSave(xcr0_eax); | ||||
|   os_support->have_amx = HasTmmOsXSave(xcr0_eax); | ||||
|  | ||||
|   const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); | ||||
|   const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); | ||||
| @@ -1129,7 +1141,15 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* | ||||
|     features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); | ||||
|     features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); | ||||
|     features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); | ||||
|     features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); | ||||
|     features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3); | ||||
|     features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5); | ||||
|     features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8); | ||||
|   } | ||||
|  | ||||
|   if (os_support->have_amx) { | ||||
|     features->amx_bf16 = IsBitSet(leaf_7.edx, 22); | ||||
|     features->amx_tile = IsBitSet(leaf_7.edx, 24); | ||||
|     features->amx_int8 = IsBitSet(leaf_7.edx, 25); | ||||
|   } | ||||
| } | ||||
|  | ||||
| @@ -1404,8 +1424,18 @@ int GetX86FeaturesEnumValue(const X86Features* features, | ||||
|       return features->avx512vpopcntdq; | ||||
|     case X86_AVX512_4VNNIW: | ||||
|       return features->avx512_4vnniw; | ||||
|     case X86_AVX512_4VBMI2: | ||||
|       return features->avx512_4vbmi2; | ||||
|     case X86_AVX512_4FMAPS: | ||||
|       return features->avx512_4fmaps; | ||||
|     case X86_AVX512_BF16: | ||||
|       return features->avx512_bf16; | ||||
|     case X86_AVX512_VP2INTERSECT: | ||||
|       return features->avx512_vp2intersect; | ||||
|     case X86_AMX_BF16: | ||||
|       return features->amx_bf16; | ||||
|     case X86_AMX_TILE: | ||||
|       return features->amx_tile; | ||||
|     case X86_AMX_INT8: | ||||
|       return features->amx_int8; | ||||
|     case X86_PCLMULQDQ: | ||||
|       return features->pclmulqdq; | ||||
|     case X86_SMX: | ||||
| @@ -1518,8 +1548,18 @@ const char* GetX86FeaturesEnumName(X86FeaturesEnum value) { | ||||
|       return "avx512vpopcntdq"; | ||||
|     case X86_AVX512_4VNNIW: | ||||
|       return "avx512_4vnniw"; | ||||
|     case X86_AVX512_4VBMI2: | ||||
|       return "avx512_4vbmi2"; | ||||
|     case X86_AVX512_4FMAPS: | ||||
|       return "avx512_4fmaps"; | ||||
|     case X86_AVX512_BF16: | ||||
|       return "avx512_bf16"; | ||||
|     case X86_AVX512_VP2INTERSECT: | ||||
|       return "avx512_vp2intersect"; | ||||
|     case X86_AMX_BF16: | ||||
|       return "amx_bf16"; | ||||
|     case X86_AMX_TILE: | ||||
|       return "amx_tile"; | ||||
|     case X86_AMX_INT8: | ||||
|       return "amx_int8"; | ||||
|     case X86_PCLMULQDQ: | ||||
|       return "pclmulqdq"; | ||||
|     case X86_SMX: | ||||
|   | ||||
| @@ -88,7 +88,7 @@ TEST(CpuidX86Test, SandyBridge) { | ||||
|   EXPECT_FALSE(features.avx512bitalg); | ||||
|   EXPECT_FALSE(features.avx512vpopcntdq); | ||||
|   EXPECT_FALSE(features.avx512_4vnniw); | ||||
|   EXPECT_FALSE(features.avx512_4vbmi2); | ||||
|   EXPECT_FALSE(features.avx512_4fmaps); | ||||
|   // All old cpu features should be set. | ||||
|   EXPECT_TRUE(features.aes); | ||||
|   EXPECT_TRUE(features.ssse3); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jeff Hammond
					Jeff Hammond