From 73d10ad25b80d8006670d9b1e3504517818712cc Mon Sep 17 00:00:00 2001 From: Tamas Zsoldos <54321620+tamaszarm@users.noreply.github.com> Date: Mon, 21 Sep 2020 09:50:38 +0200 Subject: [PATCH] Update features for AArch64 to Linux 5.8 (#122) This adds the following features: dcpodp, sve2, sveaes, svepmull, svebitperm, svesha3, svesm4, flagm2, frint, svei8mm, svef32mm, svef64mm, svebf16, i8mm, bf16, dgh and rng. With these, all features used by Linux 5.8 on AArch64 is supported. Fixes #126 --- include/cpuinfo_aarch64.h | 100 ++++++++++++++++++++++++----------- include/internal/hwcaps.h | 19 +++++++ src/cpuinfo_aarch64.c | 36 +++++++++++++ test/cpuinfo_aarch64_test.cc | 45 ++++++++++++++++ 4 files changed, 168 insertions(+), 32 deletions(-) diff --git a/include/cpuinfo_aarch64.h b/include/cpuinfo_aarch64.h index 5a7532e..a42ecdf 100644 --- a/include/cpuinfo_aarch64.h +++ b/include/cpuinfo_aarch64.h @@ -21,38 +21,56 @@ CPU_FEATURES_START_CPP_NAMESPACE typedef struct { - int fp : 1; // Floating-point. - int asimd : 1; // Advanced SIMD. - int evtstrm : 1; // Generic timer generated events. - int aes : 1; // Hardware-accelerated Advanced Encryption Standard. - int pmull : 1; // Polynomial multiply long. - int sha1 : 1; // Hardware-accelerated SHA1. - int sha2 : 1; // Hardware-accelerated SHA2-256. - int crc32 : 1; // Hardware-accelerated CRC-32. - int atomics : 1; // Armv8.1 atomic instructions. - int fphp : 1; // Half-precision floating point support. - int asimdhp : 1; // Advanced SIMD half-precision support. - int cpuid : 1; // Access to certain ID registers. - int asimdrdm : 1; // Rounding Double Multiply Accumulate/Subtract. - int jscvt : 1; // Support for JavaScript conversion. - int fcma : 1; // Floating point complex numbers. - int lrcpc : 1; // Support for weaker release consistency. - int dcpop : 1; // Data persistence writeback. - int sha3 : 1; // Hardware-accelerated SHA3. - int sm3 : 1; // Hardware-accelerated SM3. - int sm4 : 1; // Hardware-accelerated SM4. - int asimddp : 1; // Dot product instruction. - int sha512 : 1; // Hardware-accelerated SHA512. - int sve : 1; // Scalable Vector Extension. - int asimdfhm : 1; // Additional half-precision instructions. - int dit : 1; // Data independent timing. - int uscat : 1; // Unaligned atomics support. - int ilrcpc : 1; // Additional support for weaker release consistency. - int flagm : 1; // Flag manipulation instructions. - int ssbs : 1; // Speculative Store Bypass Safe PSTATE bit. - int sb : 1; // Speculation barrier. - int paca : 1; // Address authentication. - int pacg : 1; // Generic authentication. + int fp : 1; // Floating-point. + int asimd : 1; // Advanced SIMD. + int evtstrm : 1; // Generic timer generated events. + int aes : 1; // Hardware-accelerated Advanced Encryption Standard. + int pmull : 1; // Polynomial multiply long. + int sha1 : 1; // Hardware-accelerated SHA1. + int sha2 : 1; // Hardware-accelerated SHA2-256. + int crc32 : 1; // Hardware-accelerated CRC-32. + int atomics : 1; // Armv8.1 atomic instructions. + int fphp : 1; // Half-precision floating point support. + int asimdhp : 1; // Advanced SIMD half-precision support. + int cpuid : 1; // Access to certain ID registers. + int asimdrdm : 1; // Rounding Double Multiply Accumulate/Subtract. + int jscvt : 1; // Support for JavaScript conversion. + int fcma : 1; // Floating point complex numbers. + int lrcpc : 1; // Support for weaker release consistency. + int dcpop : 1; // Data persistence writeback. + int sha3 : 1; // Hardware-accelerated SHA3. + int sm3 : 1; // Hardware-accelerated SM3. + int sm4 : 1; // Hardware-accelerated SM4. + int asimddp : 1; // Dot product instruction. + int sha512 : 1; // Hardware-accelerated SHA512. + int sve : 1; // Scalable Vector Extension. + int asimdfhm : 1; // Additional half-precision instructions. + int dit : 1; // Data independent timing. + int uscat : 1; // Unaligned atomics support. + int ilrcpc : 1; // Additional support for weaker release consistency. + int flagm : 1; // Flag manipulation instructions. + int ssbs : 1; // Speculative Store Bypass Safe PSTATE bit. + int sb : 1; // Speculation barrier. + int paca : 1; // Address authentication. + int pacg : 1; // Generic authentication. + int dcpodp : 1; // Data cache clean to point of persistence. + int sve2 : 1; // Scalable Vector Extension (version 2). + int sveaes : 1; // SVE AES instructions. + int svepmull : 1; // SVE polynomial multiply long instructions. + int svebitperm : 1; // SVE bit permute instructions. + int svesha3 : 1; // SVE SHA3 instructions. + int svesm4 : 1; // SVE SM4 instructions. + int flagm2 : 1; // Additional flag manipulation instructions. + int frint : 1; // Floating point to integer rounding. + int svei8mm : 1; // SVE Int8 matrix multiplication instructions. + int svef32mm : 1; // SVE FP32 matrix multiplication instruction. + int svef64mm : 1; // SVE FP64 matrix multiplication instructions. + int svebf16 : 1; // SVE BFloat16 instructions. + int i8mm : 1; // Int8 matrix multiplication instructions. + int bf16 : 1; // BFloat16 instructions. + int dgh : 1; // Data Gathering Hint instruction. + int rng : 1; // True random number generator support. + int bti : 1; // Branch target identification. // Make sure to update Aarch64FeaturesEnum below if you add a field here. } Aarch64Features; @@ -103,6 +121,24 @@ typedef enum { AARCH64_SB, AARCH64_PACA, AARCH64_PACG, + AARCH64_DCPODP, + AARCH64_SVE2, + AARCH64_SVEAES, + AARCH64_SVEPMULL, + AARCH64_SVEBITPERM, + AARCH64_SVESHA3, + AARCH64_SVESM4, + AARCH64_FLAGM2, + AARCH64_FRINT, + AARCH64_SVEI8MM, + AARCH64_SVEF32MM, + AARCH64_SVEF64MM, + AARCH64_SVEBF16, + AARCH64_I8MM, + AARCH64_BF16, + AARCH64_DGH, + AARCH64_RNG, + AARCH64_BTI, AARCH64_LAST_, } Aarch64FeaturesEnum; diff --git a/include/internal/hwcaps.h b/include/internal/hwcaps.h index eb54f5c..f0e91b3 100644 --- a/include/internal/hwcaps.h +++ b/include/internal/hwcaps.h @@ -59,6 +59,25 @@ CPU_FEATURES_START_CPP_NAMESPACE #define AARCH64_HWCAP_PACA (1UL << 30) #define AARCH64_HWCAP_PACG (1UL << 31) +#define AARCH64_HWCAP2_DCPODP (1UL << 0) +#define AARCH64_HWCAP2_SVE2 (1UL << 1) +#define AARCH64_HWCAP2_SVEAES (1UL << 2) +#define AARCH64_HWCAP2_SVEPMULL (1UL << 3) +#define AARCH64_HWCAP2_SVEBITPERM (1UL << 4) +#define AARCH64_HWCAP2_SVESHA3 (1UL << 5) +#define AARCH64_HWCAP2_SVESM4 (1UL << 6) +#define AARCH64_HWCAP2_FLAGM2 (1UL << 7) +#define AARCH64_HWCAP2_FRINT (1UL << 8) +#define AARCH64_HWCAP2_SVEI8MM (1UL << 9) +#define AARCH64_HWCAP2_SVEF32MM (1UL << 10) +#define AARCH64_HWCAP2_SVEF64MM (1UL << 11) +#define AARCH64_HWCAP2_SVEBF16 (1UL << 12) +#define AARCH64_HWCAP2_I8MM (1UL << 13) +#define AARCH64_HWCAP2_BF16 (1UL << 14) +#define AARCH64_HWCAP2_DGH (1UL << 15) +#define AARCH64_HWCAP2_RNG (1UL << 16) +#define AARCH64_HWCAP2_BTI (1UL << 17) + // http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h #define ARM_HWCAP_SWP (1UL << 0) #define ARM_HWCAP_HALF (1UL << 1) diff --git a/src/cpuinfo_aarch64.c b/src/cpuinfo_aarch64.c index 2e43a25..6cd5308 100644 --- a/src/cpuinfo_aarch64.c +++ b/src/cpuinfo_aarch64.c @@ -55,6 +55,24 @@ DECLARE_SETTER_AND_GETTER(Aarch64Features, ssbs) DECLARE_SETTER_AND_GETTER(Aarch64Features, sb) DECLARE_SETTER_AND_GETTER(Aarch64Features, paca) DECLARE_SETTER_AND_GETTER(Aarch64Features, pacg) +DECLARE_SETTER_AND_GETTER(Aarch64Features, dcpodp) +DECLARE_SETTER_AND_GETTER(Aarch64Features, sve2) +DECLARE_SETTER_AND_GETTER(Aarch64Features, sveaes) +DECLARE_SETTER_AND_GETTER(Aarch64Features, svepmull) +DECLARE_SETTER_AND_GETTER(Aarch64Features, svebitperm) +DECLARE_SETTER_AND_GETTER(Aarch64Features, svesha3) +DECLARE_SETTER_AND_GETTER(Aarch64Features, svesm4) +DECLARE_SETTER_AND_GETTER(Aarch64Features, flagm2) +DECLARE_SETTER_AND_GETTER(Aarch64Features, frint) +DECLARE_SETTER_AND_GETTER(Aarch64Features, svei8mm) +DECLARE_SETTER_AND_GETTER(Aarch64Features, svef32mm) +DECLARE_SETTER_AND_GETTER(Aarch64Features, svef64mm) +DECLARE_SETTER_AND_GETTER(Aarch64Features, svebf16) +DECLARE_SETTER_AND_GETTER(Aarch64Features, i8mm) +DECLARE_SETTER_AND_GETTER(Aarch64Features, bf16) +DECLARE_SETTER_AND_GETTER(Aarch64Features, dgh) +DECLARE_SETTER_AND_GETTER(Aarch64Features, rng) +DECLARE_SETTER_AND_GETTER(Aarch64Features, bti) static const CapabilityConfig kConfigs[] = { [AARCH64_FP] = {{AARCH64_HWCAP_FP, 0}, "fp", &set_fp, &get_fp}, @@ -89,6 +107,24 @@ static const CapabilityConfig kConfigs[] = { [AARCH64_SB] = {{AARCH64_HWCAP_SB, 0}, "sb", &set_sb, &get_sb}, [AARCH64_PACA] = {{AARCH64_HWCAP_PACA, 0}, "paca", &set_paca, &get_paca}, [AARCH64_PACG] = {{AARCH64_HWCAP_PACG, 0}, "pacg", &set_pacg, &get_pacg}, + [AARCH64_DCPODP] = {{0, AARCH64_HWCAP2_DCPODP}, "dcpodp", &set_dcpodp, &get_dcpodp}, + [AARCH64_SVE2] = {{0, AARCH64_HWCAP2_SVE2}, "sve2", &set_sve2, &get_sve2}, + [AARCH64_SVEAES] = {{0, AARCH64_HWCAP2_SVEAES}, "sveaes", &set_sveaes, &get_sveaes}, + [AARCH64_SVEPMULL] = {{0, AARCH64_HWCAP2_SVEPMULL}, "svepmull", &set_svepmull, &get_svepmull}, + [AARCH64_SVEBITPERM] = {{0, AARCH64_HWCAP2_SVEBITPERM}, "svebitperm", &set_svebitperm, &get_svebitperm}, + [AARCH64_SVESHA3] = {{0, AARCH64_HWCAP2_SVESHA3}, "svesha3", &set_svesha3, &get_svesha3}, + [AARCH64_SVESM4] = {{0, AARCH64_HWCAP2_SVESM4}, "svesm4", &set_svesm4, &get_svesm4}, + [AARCH64_FLAGM2] = {{0, AARCH64_HWCAP2_FLAGM2}, "flagm2", &set_flagm2, &get_flagm2}, + [AARCH64_FRINT] = {{0, AARCH64_HWCAP2_FRINT}, "frint", &set_frint, &get_frint}, + [AARCH64_SVEI8MM] = {{0, AARCH64_HWCAP2_SVEI8MM}, "svei8mm", &set_svei8mm, &get_svei8mm}, + [AARCH64_SVEF32MM] = {{0, AARCH64_HWCAP2_SVEF32MM}, "svef32mm", &set_svef32mm, &get_svef32mm}, + [AARCH64_SVEF64MM] = {{0, AARCH64_HWCAP2_SVEF64MM}, "svef64mm", &set_svef64mm, &get_svef64mm}, + [AARCH64_SVEBF16] = {{0, AARCH64_HWCAP2_SVEBF16}, "svebf16", &set_svebf16, &get_svebf16}, + [AARCH64_I8MM] = {{0, AARCH64_HWCAP2_I8MM}, "i8mm", &set_i8mm, &get_i8mm}, + [AARCH64_BF16] = {{0, AARCH64_HWCAP2_BF16}, "bf16", &set_bf16, &get_bf16}, + [AARCH64_DGH] = {{0, AARCH64_HWCAP2_DGH}, "dgh", &set_dgh, &get_dgh}, + [AARCH64_RNG] = {{0, AARCH64_HWCAP2_RNG}, "rng", &set_rng, &get_rng}, + [AARCH64_BTI] = {{0, AARCH64_HWCAP2_BTI}, "bti", &set_bti, &get_bti}, }; static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig); diff --git a/test/cpuinfo_aarch64_test.cc b/test/cpuinfo_aarch64_test.cc index e208d35..1bd0648 100644 --- a/test/cpuinfo_aarch64_test.cc +++ b/test/cpuinfo_aarch64_test.cc @@ -61,6 +61,33 @@ TEST(CpuinfoAarch64Test, FromHardwareCap) { EXPECT_FALSE(info.features.pacg); } +TEST(CpuinfoAarch64Test, FromHardwareCap2) { + SetHardwareCapabilities(AARCH64_HWCAP_FP, AARCH64_HWCAP2_SVE2 | AARCH64_HWCAP2_BTI); + GetEmptyFilesystem(); // disabling /proc/cpuinfo + const auto info = GetAarch64Info(); + EXPECT_TRUE(info.features.fp); + + EXPECT_TRUE(info.features.sve2); + EXPECT_TRUE(info.features.bti); + + EXPECT_FALSE(info.features.dcpodp); + EXPECT_FALSE(info.features.sveaes); + EXPECT_FALSE(info.features.svepmull); + EXPECT_FALSE(info.features.svebitperm); + EXPECT_FALSE(info.features.svesha3); + EXPECT_FALSE(info.features.svesm4); + EXPECT_FALSE(info.features.flagm2); + EXPECT_FALSE(info.features.frint); + EXPECT_FALSE(info.features.svei8mm); + EXPECT_FALSE(info.features.svef32mm); + EXPECT_FALSE(info.features.svef64mm); + EXPECT_FALSE(info.features.svebf16); + EXPECT_FALSE(info.features.i8mm); + EXPECT_FALSE(info.features.bf16); + EXPECT_FALSE(info.features.dgh); + EXPECT_FALSE(info.features.rng); +} + TEST(CpuinfoAarch64Test, ARMCortexA53) { DisableHardwareCapabilities(); auto& fs = GetEmptyFilesystem(); @@ -119,6 +146,24 @@ CPU revision : 3)"); EXPECT_FALSE(info.features.sb); EXPECT_FALSE(info.features.paca); EXPECT_FALSE(info.features.pacg); + EXPECT_FALSE(info.features.dcpodp); + EXPECT_FALSE(info.features.sve2); + EXPECT_FALSE(info.features.sveaes); + EXPECT_FALSE(info.features.svepmull); + EXPECT_FALSE(info.features.svebitperm); + EXPECT_FALSE(info.features.svesha3); + EXPECT_FALSE(info.features.svesm4); + EXPECT_FALSE(info.features.flagm2); + EXPECT_FALSE(info.features.frint); + EXPECT_FALSE(info.features.svei8mm); + EXPECT_FALSE(info.features.svef32mm); + EXPECT_FALSE(info.features.svef64mm); + EXPECT_FALSE(info.features.svebf16); + EXPECT_FALSE(info.features.i8mm); + EXPECT_FALSE(info.features.bf16); + EXPECT_FALSE(info.features.dgh); + EXPECT_FALSE(info.features.rng); + EXPECT_FALSE(info.features.bti); } } // namespace