Update features for AArch64 to Linux 5.8 (#122)

This adds the following features: dcpodp, sve2, sveaes, svepmull, svebitperm, svesha3, svesm4, flagm2, frint, svei8mm, svef32mm, svef64mm, svebf16, i8mm, bf16, dgh and rng. With these, all features used by Linux 5.8 on AArch64 is supported. Fixes #126
2025-10-25 11:20:49 +02:00 · 2020-09-21 09:50:38 +02:00
parent 9e03e13ae7
commit 73d10ad25b
4 changed files with 168 additions and 32 deletions
--- a/include/cpuinfo_aarch64.h
+++ b/include/cpuinfo_aarch64.h
@@ -21,38 +21,56 @@
 CPU_FEATURES_START_CPP_NAMESPACE

 typedef struct {
-  int fp : 1;       // Floating-point.
-  int asimd : 1;    // Advanced SIMD.
-  int evtstrm : 1;  // Generic timer generated events.
-  int aes : 1;      // Hardware-accelerated Advanced Encryption Standard.
-  int pmull : 1;    // Polynomial multiply long.
-  int sha1 : 1;     // Hardware-accelerated SHA1.
-  int sha2 : 1;     // Hardware-accelerated SHA2-256.
-  int crc32 : 1;    // Hardware-accelerated CRC-32.
-  int atomics : 1;  // Armv8.1 atomic instructions.
-  int fphp : 1;     // Half-precision floating point support.
-  int asimdhp : 1;  // Advanced SIMD half-precision support.
-  int cpuid : 1;    // Access to certain ID registers.
-  int asimdrdm : 1; // Rounding Double Multiply Accumulate/Subtract.
-  int jscvt : 1;    // Support for JavaScript conversion.
-  int fcma : 1;     // Floating point complex numbers.
-  int lrcpc : 1;    // Support for weaker release consistency.
-  int dcpop : 1;    // Data persistence writeback.
-  int sha3 : 1;     // Hardware-accelerated SHA3.
-  int sm3 : 1;      // Hardware-accelerated SM3.
-  int sm4 : 1;      // Hardware-accelerated SM4.
-  int asimddp : 1;  // Dot product instruction.
-  int sha512 : 1;   // Hardware-accelerated SHA512.
-  int sve : 1;      // Scalable Vector Extension.
-  int asimdfhm : 1; // Additional half-precision instructions.
-  int dit : 1;      // Data independent timing.
-  int uscat : 1;    // Unaligned atomics support.
-  int ilrcpc : 1;   // Additional support for weaker release consistency.
-  int flagm : 1;    // Flag manipulation instructions.
-  int ssbs : 1;     // Speculative Store Bypass Safe PSTATE bit.
-  int sb : 1;       // Speculation barrier.
-  int paca : 1;     // Address authentication.
-  int pacg : 1;     // Generic authentication.
+  int fp : 1;         // Floating-point.
+  int asimd : 1;      // Advanced SIMD.
+  int evtstrm : 1;    // Generic timer generated events.
+  int aes : 1;        // Hardware-accelerated Advanced Encryption Standard.
+  int pmull : 1;      // Polynomial multiply long.
+  int sha1 : 1;       // Hardware-accelerated SHA1.
+  int sha2 : 1;       // Hardware-accelerated SHA2-256.
+  int crc32 : 1;      // Hardware-accelerated CRC-32.
+  int atomics : 1;    // Armv8.1 atomic instructions.
+  int fphp : 1;       // Half-precision floating point support.
+  int asimdhp : 1;    // Advanced SIMD half-precision support.
+  int cpuid : 1;      // Access to certain ID registers.
+  int asimdrdm : 1;   // Rounding Double Multiply Accumulate/Subtract.
+  int jscvt : 1;      // Support for JavaScript conversion.
+  int fcma : 1;       // Floating point complex numbers.
+  int lrcpc : 1;      // Support for weaker release consistency.
+  int dcpop : 1;      // Data persistence writeback.
+  int sha3 : 1;       // Hardware-accelerated SHA3.
+  int sm3 : 1;        // Hardware-accelerated SM3.
+  int sm4 : 1;        // Hardware-accelerated SM4.
+  int asimddp : 1;    // Dot product instruction.
+  int sha512 : 1;     // Hardware-accelerated SHA512.
+  int sve : 1;        // Scalable Vector Extension.
+  int asimdfhm : 1;   // Additional half-precision instructions.
+  int dit : 1;        // Data independent timing.
+  int uscat : 1;      // Unaligned atomics support.
+  int ilrcpc : 1;     // Additional support for weaker release consistency.
+  int flagm : 1;      // Flag manipulation instructions.
+  int ssbs : 1;       // Speculative Store Bypass Safe PSTATE bit.
+  int sb : 1;         // Speculation barrier.
+  int paca : 1;       // Address authentication.
+  int pacg : 1;       // Generic authentication.
+  int dcpodp : 1;     // Data cache clean to point of persistence.
+  int sve2 : 1;       // Scalable Vector Extension (version 2).
+  int sveaes : 1;     // SVE AES instructions.
+  int svepmull : 1;   // SVE polynomial multiply long instructions.
+  int svebitperm : 1; // SVE bit permute instructions.
+  int svesha3 : 1;    // SVE SHA3 instructions.
+  int svesm4 : 1;     // SVE SM4 instructions.
+  int flagm2 : 1;     // Additional flag manipulation instructions.
+  int frint : 1;      // Floating point to integer rounding.
+  int svei8mm : 1;    // SVE Int8 matrix multiplication instructions.
+  int svef32mm : 1;   // SVE FP32 matrix multiplication instruction.
+  int svef64mm : 1;   // SVE FP64 matrix multiplication instructions.
+  int svebf16 : 1;    // SVE BFloat16 instructions.
+  int i8mm : 1;       // Int8 matrix multiplication instructions.
+  int bf16 : 1;       // BFloat16 instructions.
+  int dgh : 1;        // Data Gathering Hint instruction.
+  int rng : 1;        // True random number generator support.
+  int bti : 1;        // Branch target identification.

  // Make sure to update Aarch64FeaturesEnum below if you add a field here.
 } Aarch64Features;
@@ -103,6 +121,24 @@ typedef enum {
  AARCH64_SB,
  AARCH64_PACA,
  AARCH64_PACG,
+  AARCH64_DCPODP,
+  AARCH64_SVE2,
+  AARCH64_SVEAES,
+  AARCH64_SVEPMULL,
+  AARCH64_SVEBITPERM,
+  AARCH64_SVESHA3,
+  AARCH64_SVESM4,
+  AARCH64_FLAGM2,
+  AARCH64_FRINT,
+  AARCH64_SVEI8MM,
+  AARCH64_SVEF32MM,
+  AARCH64_SVEF64MM,
+  AARCH64_SVEBF16,
+  AARCH64_I8MM,
+  AARCH64_BF16,
+  AARCH64_DGH,
+  AARCH64_RNG,
+  AARCH64_BTI,
  AARCH64_LAST_,
 } Aarch64FeaturesEnum;

--- a/include/internal/hwcaps.h
+++ b/include/internal/hwcaps.h
@@ -59,6 +59,25 @@ CPU_FEATURES_START_CPP_NAMESPACE
 #define AARCH64_HWCAP_PACA (1UL << 30)
 #define AARCH64_HWCAP_PACG (1UL << 31)

+#define AARCH64_HWCAP2_DCPODP (1UL << 0)
+#define AARCH64_HWCAP2_SVE2 (1UL << 1)
+#define AARCH64_HWCAP2_SVEAES (1UL << 2)
+#define AARCH64_HWCAP2_SVEPMULL (1UL << 3)
+#define AARCH64_HWCAP2_SVEBITPERM (1UL << 4)
+#define AARCH64_HWCAP2_SVESHA3 (1UL << 5)
+#define AARCH64_HWCAP2_SVESM4 (1UL << 6)
+#define AARCH64_HWCAP2_FLAGM2 (1UL << 7)
+#define AARCH64_HWCAP2_FRINT (1UL << 8)
+#define AARCH64_HWCAP2_SVEI8MM (1UL << 9)
+#define AARCH64_HWCAP2_SVEF32MM (1UL << 10)
+#define AARCH64_HWCAP2_SVEF64MM (1UL << 11)
+#define AARCH64_HWCAP2_SVEBF16 (1UL << 12)
+#define AARCH64_HWCAP2_I8MM (1UL << 13)
+#define AARCH64_HWCAP2_BF16 (1UL << 14)
+#define AARCH64_HWCAP2_DGH (1UL << 15)
+#define AARCH64_HWCAP2_RNG (1UL << 16)
+#define AARCH64_HWCAP2_BTI (1UL << 17)
+
 // http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h
 #define ARM_HWCAP_SWP (1UL << 0)
 #define ARM_HWCAP_HALF (1UL << 1)
--- a/src/cpuinfo_aarch64.c
+++ b/src/cpuinfo_aarch64.c
@@ -55,6 +55,24 @@ DECLARE_SETTER_AND_GETTER(Aarch64Features, ssbs)
 DECLARE_SETTER_AND_GETTER(Aarch64Features, sb)
 DECLARE_SETTER_AND_GETTER(Aarch64Features, paca)
 DECLARE_SETTER_AND_GETTER(Aarch64Features, pacg)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, dcpodp)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, sve2)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, sveaes)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, svepmull)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, svebitperm)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, svesha3)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, svesm4)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, flagm2)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, frint)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, svei8mm)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, svef32mm)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, svef64mm)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, svebf16)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, i8mm)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, bf16)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, dgh)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, rng)
+DECLARE_SETTER_AND_GETTER(Aarch64Features, bti)

 static const CapabilityConfig kConfigs[] = {
  [AARCH64_FP] = {{AARCH64_HWCAP_FP, 0}, "fp", &set_fp, &get_fp},
@@ -89,6 +107,24 @@ static const CapabilityConfig kConfigs[] = {
  [AARCH64_SB] = {{AARCH64_HWCAP_SB, 0}, "sb", &set_sb, &get_sb},
  [AARCH64_PACA] = {{AARCH64_HWCAP_PACA, 0}, "paca", &set_paca, &get_paca},
  [AARCH64_PACG] = {{AARCH64_HWCAP_PACG, 0}, "pacg", &set_pacg, &get_pacg},
+  [AARCH64_DCPODP] = {{0, AARCH64_HWCAP2_DCPODP}, "dcpodp", &set_dcpodp, &get_dcpodp},
+  [AARCH64_SVE2] = {{0, AARCH64_HWCAP2_SVE2}, "sve2", &set_sve2, &get_sve2},
+  [AARCH64_SVEAES] = {{0, AARCH64_HWCAP2_SVEAES}, "sveaes", &set_sveaes, &get_sveaes},
+  [AARCH64_SVEPMULL] = {{0, AARCH64_HWCAP2_SVEPMULL}, "svepmull", &set_svepmull, &get_svepmull},
+  [AARCH64_SVEBITPERM] = {{0, AARCH64_HWCAP2_SVEBITPERM}, "svebitperm", &set_svebitperm, &get_svebitperm},
+  [AARCH64_SVESHA3] = {{0, AARCH64_HWCAP2_SVESHA3}, "svesha3", &set_svesha3, &get_svesha3},
+  [AARCH64_SVESM4] = {{0, AARCH64_HWCAP2_SVESM4}, "svesm4", &set_svesm4, &get_svesm4},
+  [AARCH64_FLAGM2] = {{0, AARCH64_HWCAP2_FLAGM2}, "flagm2", &set_flagm2, &get_flagm2},
+  [AARCH64_FRINT] = {{0, AARCH64_HWCAP2_FRINT}, "frint", &set_frint, &get_frint},
+  [AARCH64_SVEI8MM] = {{0, AARCH64_HWCAP2_SVEI8MM}, "svei8mm", &set_svei8mm, &get_svei8mm},
+  [AARCH64_SVEF32MM] = {{0, AARCH64_HWCAP2_SVEF32MM}, "svef32mm", &set_svef32mm, &get_svef32mm},
+  [AARCH64_SVEF64MM] = {{0, AARCH64_HWCAP2_SVEF64MM}, "svef64mm", &set_svef64mm, &get_svef64mm},
+  [AARCH64_SVEBF16] = {{0, AARCH64_HWCAP2_SVEBF16}, "svebf16", &set_svebf16, &get_svebf16},
+  [AARCH64_I8MM] = {{0, AARCH64_HWCAP2_I8MM}, "i8mm", &set_i8mm, &get_i8mm},
+  [AARCH64_BF16] = {{0, AARCH64_HWCAP2_BF16}, "bf16", &set_bf16, &get_bf16},
+  [AARCH64_DGH] = {{0, AARCH64_HWCAP2_DGH}, "dgh", &set_dgh, &get_dgh},
+  [AARCH64_RNG] = {{0, AARCH64_HWCAP2_RNG}, "rng", &set_rng, &get_rng},
+  [AARCH64_BTI] = {{0, AARCH64_HWCAP2_BTI}, "bti", &set_bti, &get_bti},
 };

 static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
--- a/test/cpuinfo_aarch64_test.cc
+++ b/test/cpuinfo_aarch64_test.cc
@@ -61,6 +61,33 @@ TEST(CpuinfoAarch64Test, FromHardwareCap) {
  EXPECT_FALSE(info.features.pacg);
 }

+TEST(CpuinfoAarch64Test, FromHardwareCap2) {
+  SetHardwareCapabilities(AARCH64_HWCAP_FP, AARCH64_HWCAP2_SVE2 | AARCH64_HWCAP2_BTI);
+  GetEmptyFilesystem();  // disabling /proc/cpuinfo
+  const auto info = GetAarch64Info();
+  EXPECT_TRUE(info.features.fp);
+
+  EXPECT_TRUE(info.features.sve2);
+  EXPECT_TRUE(info.features.bti);
+
+  EXPECT_FALSE(info.features.dcpodp);
+  EXPECT_FALSE(info.features.sveaes);
+  EXPECT_FALSE(info.features.svepmull);
+  EXPECT_FALSE(info.features.svebitperm);
+  EXPECT_FALSE(info.features.svesha3);
+  EXPECT_FALSE(info.features.svesm4);
+  EXPECT_FALSE(info.features.flagm2);
+  EXPECT_FALSE(info.features.frint);
+  EXPECT_FALSE(info.features.svei8mm);
+  EXPECT_FALSE(info.features.svef32mm);
+  EXPECT_FALSE(info.features.svef64mm);
+  EXPECT_FALSE(info.features.svebf16);
+  EXPECT_FALSE(info.features.i8mm);
+  EXPECT_FALSE(info.features.bf16);
+  EXPECT_FALSE(info.features.dgh);
+  EXPECT_FALSE(info.features.rng);
+}
+
 TEST(CpuinfoAarch64Test, ARMCortexA53) {
  DisableHardwareCapabilities();
  auto& fs = GetEmptyFilesystem();
@@ -119,6 +146,24 @@ CPU revision    : 3)");
  EXPECT_FALSE(info.features.sb);
  EXPECT_FALSE(info.features.paca);
  EXPECT_FALSE(info.features.pacg);
+  EXPECT_FALSE(info.features.dcpodp);
+  EXPECT_FALSE(info.features.sve2);
+  EXPECT_FALSE(info.features.sveaes);
+  EXPECT_FALSE(info.features.svepmull);
+  EXPECT_FALSE(info.features.svebitperm);
+  EXPECT_FALSE(info.features.svesha3);
+  EXPECT_FALSE(info.features.svesm4);
+  EXPECT_FALSE(info.features.flagm2);
+  EXPECT_FALSE(info.features.frint);
+  EXPECT_FALSE(info.features.svei8mm);
+  EXPECT_FALSE(info.features.svef32mm);
+  EXPECT_FALSE(info.features.svef64mm);
+  EXPECT_FALSE(info.features.svebf16);
+  EXPECT_FALSE(info.features.i8mm);
+  EXPECT_FALSE(info.features.bf16);
+  EXPECT_FALSE(info.features.dgh);
+  EXPECT_FALSE(info.features.rng);
+  EXPECT_FALSE(info.features.bti);
 }

 }  // namespace