mirror of
https://github.com/google/cpu_features.git
synced 2025-04-28 07:23:37 +02:00
detect AVX-512 FMA count (#125)
* add Ice Lake Server and Sapphire Rapids models The information contained in this commit was obtained from "Intel® Architecture Instruction Set Extensions and Future Features Programming Reference" document 319433-040 from https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html Signed-off-by: Jeff Hammond <jeff.r.hammond@intel.com> * Tiger Lake; Ice Lake NNP-I; SPR string Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * second FMA features - incomplete and wrong Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * oops: use T/F not 2/1 Signed-off-by: Jeff Hammond <jeff.r.hammond@intel.com> * implement SKX lookup Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * add Intel copyright * cleanup AVX512 second FMA code 1) remove debug stuff 2) remove ICX - will add details when available Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * fix CPX detection Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * remove elses Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * remove curly braces from single-line conditional bodies Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> * apply clang-format Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com> Fixes #120
This commit is contained in:
parent
76dafc7e3b
commit
17ffb65117
@ -69,10 +69,11 @@ typedef struct {
|
|||||||
int avx512bitalg : 1;
|
int avx512bitalg : 1;
|
||||||
int avx512vpopcntdq : 1;
|
int avx512vpopcntdq : 1;
|
||||||
int avx512_4vnniw : 1;
|
int avx512_4vnniw : 1;
|
||||||
|
int avx512_4vbmi2 : 1;
|
||||||
|
int avx512_second_fma : 1;
|
||||||
int avx512_4fmaps : 1;
|
int avx512_4fmaps : 1;
|
||||||
int avx512_bf16 : 1;
|
int avx512_bf16 : 1;
|
||||||
int avx512_vp2intersect : 1;
|
int avx512_vp2intersect : 1;
|
||||||
|
|
||||||
int amx_bf16 : 1;
|
int amx_bf16 : 1;
|
||||||
int amx_tile : 1;
|
int amx_tile : 1;
|
||||||
int amx_int8 : 1;
|
int amx_int8 : 1;
|
||||||
@ -194,6 +195,8 @@ typedef enum {
|
|||||||
X86_AVX512BITALG,
|
X86_AVX512BITALG,
|
||||||
X86_AVX512VPOPCNTDQ,
|
X86_AVX512VPOPCNTDQ,
|
||||||
X86_AVX512_4VNNIW,
|
X86_AVX512_4VNNIW,
|
||||||
|
X86_AVX512_4VBMI2,
|
||||||
|
X86_AVX512_SECOND_FMA,
|
||||||
X86_AVX512_4FMAPS,
|
X86_AVX512_4FMAPS,
|
||||||
X86_AVX512_BF16,
|
X86_AVX512_BF16,
|
||||||
X86_AVX512_VP2INTERSECT,
|
X86_AVX512_VP2INTERSECT,
|
||||||
|
@ -14,12 +14,13 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "cpuinfo_x86.h"
|
#include "cpuinfo_x86.h"
|
||||||
#include "internal/bit_utils.h"
|
|
||||||
#include "internal/cpuid_x86.h"
|
|
||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "internal/bit_utils.h"
|
||||||
|
#include "internal/cpuid_x86.h"
|
||||||
|
|
||||||
#if !defined(CPU_FEATURES_ARCH_X86)
|
#if !defined(CPU_FEATURES_ARCH_X86)
|
||||||
#error "Cannot compile cpuinfo_x86 on a non x86 platform."
|
#error "Cannot compile cpuinfo_x86 on a non x86 platform."
|
||||||
#endif
|
#endif
|
||||||
@ -125,6 +126,35 @@ static bool HasTmmOsXSave(uint32_t xcr0_eax) {
|
|||||||
MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA);
|
MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool HasSecondFMA(uint32_t model) {
|
||||||
|
// Skylake server
|
||||||
|
if (model == 0x55) {
|
||||||
|
char proc_name[49] = {0};
|
||||||
|
FillX86BrandString(proc_name);
|
||||||
|
// detect Xeon
|
||||||
|
if (proc_name[9] == 'X') {
|
||||||
|
// detect Silver or Bronze
|
||||||
|
if (proc_name[17] == 'S' || proc_name[17] == 'B') return false;
|
||||||
|
// detect Gold 5_20 and below, except for Gold 53__
|
||||||
|
if (proc_name[17] == 'G' && proc_name[22] == '5')
|
||||||
|
return ((proc_name[23] == '3') ||
|
||||||
|
(proc_name[24] == '2' && proc_name[25] == '2'));
|
||||||
|
// detect Xeon W 210x
|
||||||
|
if (proc_name[17] == 'W' && proc_name[21] == '0') return false;
|
||||||
|
// detect Xeon D 2xxx
|
||||||
|
if (proc_name[17] == 'D' && proc_name[19] == '2' && proc_name[20] == '1')
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Cannon Lake client
|
||||||
|
if (model == 0x66) return false;
|
||||||
|
// Ice Lake client
|
||||||
|
if (model == 0x7d || model == 0x7e) return false;
|
||||||
|
// This is the right default...
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static void SetVendor(const Leaf leaf, char* const vendor) {
|
static void SetVendor(const Leaf leaf, char* const vendor) {
|
||||||
*(uint32_t*)(vendor) = leaf.ebx;
|
*(uint32_t*)(vendor) = leaf.ebx;
|
||||||
*(uint32_t*)(vendor + 4) = leaf.edx;
|
*(uint32_t*)(vendor + 4) = leaf.edx;
|
||||||
@ -1059,7 +1089,8 @@ typedef struct {
|
|||||||
} OsSupport;
|
} OsSupport;
|
||||||
|
|
||||||
// Reference https://en.wikipedia.org/wiki/CPUID.
|
// Reference https://en.wikipedia.org/wiki/CPUID.
|
||||||
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* os_support) {
|
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info,
|
||||||
|
OsSupport* os_support) {
|
||||||
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
|
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
|
||||||
const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
|
const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
|
||||||
const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1);
|
const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1);
|
||||||
@ -1141,6 +1172,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport*
|
|||||||
features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
|
features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
|
||||||
features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
|
features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
|
||||||
features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
|
features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
|
||||||
|
features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
|
||||||
|
features->avx512_second_fma = HasSecondFMA(info->model);
|
||||||
features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3);
|
features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3);
|
||||||
features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5);
|
features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5);
|
||||||
features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8);
|
features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8);
|
||||||
@ -1153,7 +1186,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport*
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reference https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
|
// Reference
|
||||||
|
// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
|
||||||
static void ParseExtraAMDCpuId(X86Info* info, OsSupport os_support) {
|
static void ParseExtraAMDCpuId(X86Info* info, OsSupport os_support) {
|
||||||
const Leaf leaf_80000000 = CpuId(0x80000000);
|
const Leaf leaf_80000000 = CpuId(0x80000000);
|
||||||
const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax;
|
const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax;
|
||||||
@ -1281,10 +1315,14 @@ X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
|
|||||||
return INTEL_SPR;
|
return INTEL_SPR;
|
||||||
case CPUID(0x06, 0x8E):
|
case CPUID(0x06, 0x8E):
|
||||||
switch (info->stepping) {
|
switch (info->stepping) {
|
||||||
case 9: return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake
|
case 9:
|
||||||
case 10: return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake
|
return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake
|
||||||
case 11: return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
|
case 10:
|
||||||
default: return X86_UNKNOWN;
|
return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake
|
||||||
|
case 11:
|
||||||
|
return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
|
||||||
|
default:
|
||||||
|
return X86_UNKNOWN;
|
||||||
}
|
}
|
||||||
case CPUID(0x06, 0x9E):
|
case CPUID(0x06, 0x9E):
|
||||||
if (info->stepping > 9) {
|
if (info->stepping > 9) {
|
||||||
@ -1427,6 +1465,10 @@ int GetX86FeaturesEnumValue(const X86Features* features,
|
|||||||
return features->avx512vpopcntdq;
|
return features->avx512vpopcntdq;
|
||||||
case X86_AVX512_4VNNIW:
|
case X86_AVX512_4VNNIW:
|
||||||
return features->avx512_4vnniw;
|
return features->avx512_4vnniw;
|
||||||
|
case X86_AVX512_4VBMI2:
|
||||||
|
return features->avx512_4vbmi2;
|
||||||
|
case X86_AVX512_SECOND_FMA:
|
||||||
|
return features->avx512_second_fma;
|
||||||
case X86_AVX512_4FMAPS:
|
case X86_AVX512_4FMAPS:
|
||||||
return features->avx512_4fmaps;
|
return features->avx512_4fmaps;
|
||||||
case X86_AVX512_BF16:
|
case X86_AVX512_BF16:
|
||||||
@ -1551,6 +1593,10 @@ const char* GetX86FeaturesEnumName(X86FeaturesEnum value) {
|
|||||||
return "avx512vpopcntdq";
|
return "avx512vpopcntdq";
|
||||||
case X86_AVX512_4VNNIW:
|
case X86_AVX512_4VNNIW:
|
||||||
return "avx512_4vnniw";
|
return "avx512_4vnniw";
|
||||||
|
case X86_AVX512_4VBMI2:
|
||||||
|
return "avx512_4vbmi2";
|
||||||
|
case X86_AVX512_SECOND_FMA:
|
||||||
|
return "avx512_second_fma";
|
||||||
case X86_AVX512_4FMAPS:
|
case X86_AVX512_4FMAPS:
|
||||||
return "avx512_4fmaps";
|
return "avx512_4fmaps";
|
||||||
case X86_AVX512_BF16:
|
case X86_AVX512_BF16:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user