1
0
mirror of https://github.com/google/cpu_features.git synced 2025-04-28 07:23:37 +02:00

detect AVX-512 FMA count (#125)

* add Ice Lake Server and Sapphire Rapids models

The information contained in this commit was obtained from
"Intel® Architecture Instruction Set Extensions and Future Features Programming Reference" document 319433-040 from
https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Signed-off-by: Jeff Hammond <jeff.r.hammond@intel.com>

* Tiger Lake; Ice Lake NNP-I; SPR string

Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com>

* second FMA features - incomplete and wrong

Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com>

* oops: use T/F not 2/1

Signed-off-by: Jeff Hammond <jeff.r.hammond@intel.com>

* implement SKX lookup

Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com>

* add Intel copyright

* cleanup AVX512 second FMA code

1) remove debug stuff
2) remove ICX - will add details when available

Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com>

* fix CPX detection

Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com>

* remove elses

Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com>

* remove curly braces from single-line conditional bodies

Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com>

* apply clang-format

Signed-off-by: Hammond, Jeff R <jeff.r.hammond@intel.com>

Fixes #120
This commit is contained in:
Jeff Hammond 2020-09-22 00:29:46 -07:00 committed by GitHub
parent 76dafc7e3b
commit 17ffb65117
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 63 additions and 14 deletions

View File

@ -69,10 +69,11 @@ typedef struct {
int avx512bitalg : 1;
int avx512vpopcntdq : 1;
int avx512_4vnniw : 1;
int avx512_4vbmi2 : 1;
int avx512_second_fma : 1;
int avx512_4fmaps : 1;
int avx512_bf16 : 1;
int avx512_vp2intersect : 1;
int amx_bf16 : 1;
int amx_tile : 1;
int amx_int8 : 1;
@ -194,6 +195,8 @@ typedef enum {
X86_AVX512BITALG,
X86_AVX512VPOPCNTDQ,
X86_AVX512_4VNNIW,
X86_AVX512_4VBMI2,
X86_AVX512_SECOND_FMA,
X86_AVX512_4FMAPS,
X86_AVX512_BF16,
X86_AVX512_VP2INTERSECT,

View File

@ -14,12 +14,13 @@
// limitations under the License.
#include "cpuinfo_x86.h"
#include "internal/bit_utils.h"
#include "internal/cpuid_x86.h"
#include <stdbool.h>
#include <string.h>
#include "internal/bit_utils.h"
#include "internal/cpuid_x86.h"
#if !defined(CPU_FEATURES_ARCH_X86)
#error "Cannot compile cpuinfo_x86 on a non x86 platform."
#endif
@ -125,6 +126,35 @@ static bool HasTmmOsXSave(uint32_t xcr0_eax) {
MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA);
}
static bool HasSecondFMA(uint32_t model) {
// Skylake server
if (model == 0x55) {
char proc_name[49] = {0};
FillX86BrandString(proc_name);
// detect Xeon
if (proc_name[9] == 'X') {
// detect Silver or Bronze
if (proc_name[17] == 'S' || proc_name[17] == 'B') return false;
// detect Gold 5_20 and below, except for Gold 53__
if (proc_name[17] == 'G' && proc_name[22] == '5')
return ((proc_name[23] == '3') ||
(proc_name[24] == '2' && proc_name[25] == '2'));
// detect Xeon W 210x
if (proc_name[17] == 'W' && proc_name[21] == '0') return false;
// detect Xeon D 2xxx
if (proc_name[17] == 'D' && proc_name[19] == '2' && proc_name[20] == '1')
return false;
}
return true;
}
// Cannon Lake client
if (model == 0x66) return false;
// Ice Lake client
if (model == 0x7d || model == 0x7e) return false;
// This is the right default...
return true;
}
static void SetVendor(const Leaf leaf, char* const vendor) {
*(uint32_t*)(vendor) = leaf.ebx;
*(uint32_t*)(vendor + 4) = leaf.edx;
@ -1059,7 +1089,8 @@ typedef struct {
} OsSupport;
// Reference https://en.wikipedia.org/wiki/CPUID.
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* os_support) {
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info,
OsSupport* os_support) {
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1);
@ -1141,6 +1172,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport*
features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
features->avx512_second_fma = HasSecondFMA(info->model);
features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3);
features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5);
features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8);
@ -1153,7 +1186,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport*
}
}
// Reference https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
// Reference
// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented.
static void ParseExtraAMDCpuId(X86Info* info, OsSupport os_support) {
const Leaf leaf_80000000 = CpuId(0x80000000);
const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax;
@ -1281,10 +1315,14 @@ X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
return INTEL_SPR;
case CPUID(0x06, 0x8E):
switch (info->stepping) {
case 9: return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake
case 10: return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake
case 11: return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
default: return X86_UNKNOWN;
case 9:
return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake
case 10:
return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake
case 11:
return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture)
default:
return X86_UNKNOWN;
}
case CPUID(0x06, 0x9E):
if (info->stepping > 9) {
@ -1427,6 +1465,10 @@ int GetX86FeaturesEnumValue(const X86Features* features,
return features->avx512vpopcntdq;
case X86_AVX512_4VNNIW:
return features->avx512_4vnniw;
case X86_AVX512_4VBMI2:
return features->avx512_4vbmi2;
case X86_AVX512_SECOND_FMA:
return features->avx512_second_fma;
case X86_AVX512_4FMAPS:
return features->avx512_4fmaps;
case X86_AVX512_BF16:
@ -1551,6 +1593,10 @@ const char* GetX86FeaturesEnumName(X86FeaturesEnum value) {
return "avx512vpopcntdq";
case X86_AVX512_4VNNIW:
return "avx512_4vnniw";
case X86_AVX512_4VBMI2:
return "avx512_4vbmi2";
case X86_AVX512_SECOND_FMA:
return "avx512_second_fma";
case X86_AVX512_4FMAPS:
return "avx512_4fmaps";
case X86_AVX512_BF16: