1
0
mirror of https://github.com/google/cpu_features.git synced 2025-04-27 15:12:30 +02:00

Add Windows Arm64 support (#291)

* Add Windows Arm64 support

To add Windows Arm64 support was added detection of features via Windows API function IsProcessorFeaturePresent. Added _M_ARM64 to detect CPU_FEATURES_AARCH64 macro on Windows. Added initial code for Windows Arm64 testing and provided test for Raspberry PI 4. We can't use "define_introspection_and_hwcaps.inl" as a common file for all operating systems due to msvc compiler error C2099: initializer is not a constant, so as a workaround for Windows I used separate "define_introspection.inl"

See also: #268, #284, #186

* [CMake] Add  windows_utils.h to PROCESSOR_IS_AARCH64

* Add detection of armv8.1 atomic instructions

* Update note on win-arm64 implementation and move to cpuinfo_aarch64.h

* Remove redundant #ifdef CPU_FEATURES_OS_WINDOWS

* Add note on FP/SIMD and Cryptographic Extension for win-arm64

* Add comments to Aarch64Info fields

Added comments to specify that implementer, part and variant we set 0 for Windows, since Win API does not provide a way to get information. For revision added comment that we use GetNativeSystemInfo
This commit is contained in:
Mykola Hohsadze 2023-02-23 12:41:33 +02:00 committed by GitHub
parent 273af56a15
commit a6bf4f9031
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 373 additions and 16 deletions

View File

@ -79,6 +79,7 @@ macro(add_cpu_features_headers_and_sources HDRS_LIST_NAME SRCS_LIST_NAME)
list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_arm.h)
elseif(PROCESSOR_IS_AARCH64)
list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_aarch64.h)
list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/internal/windows_utils.h)
elseif(PROCESSOR_IS_X86)
list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_x86.h)
list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/internal/cpuid_x86.h)

View File

@ -39,7 +39,7 @@
#define CPU_FEATURES_ARCH_ARM
#endif
#if defined(__aarch64__)
#if (defined(__aarch64__) || defined(_M_ARM64))
#define CPU_FEATURES_ARCH_AARCH64
#endif

View File

@ -12,6 +12,100 @@
// See the License for the specific language governing permissions and
// limitations under the License.
////////////////////////////////////////////////////////////////////////////////
// A note on Windows AArch64 implementation
////////////////////////////////////////////////////////////////////////////////
// Getting cpu info via EL1 system registers is not possible, so we delegate it
// to the Windows API (i.e., IsProcessorFeaturePresent and GetNativeSystemInfo).
// The `implementer`, `variant` and `part` fields of the `Aarch64Info` struct
// are not used, so they are set to 0. To get `revision` we use
// `wProcessorRevision` from `SYSTEM_INFO`.
//
// Cryptographic Extension:
// -----------------------------------------------------------------------------
// According to documentation Arm Architecture Reference Manual for
// A-profile architecture. A2.3 The Armv8 Cryptographic Extension. The Armv8.0
// Cryptographic Extension provides instructions for the acceleration of
// encryption and decryption, and includes the following features: FEAT_AES,
// FEAT_PMULL, FEAT_SHA1, FEAT_SHA256.
// see: https://developer.arm.com/documentation/ddi0487/latest
//
// We use `PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE` to detect all Armv8.0 crypto
// features. This value reports all features or nothing, so even if you only
// have support FEAT_AES and FEAT_PMULL, it will still return false.
//
// From Armv8.2, an implementation of the Armv8.0 Cryptographic Extension can
// include either or both of:
//
// • The AES functionality, including support for multiplication of 64-bit
// polynomials. The ID_AA64ISAR0_EL1.AES field indicates whether this
// functionality is supported.
// • The SHA1 and SHA2-256 functionality. The ID_AA64ISAR0_EL1.{SHA2, SHA1}
// fields indicate whether this functionality is supported.
//
// ID_AA64ISAR0_EL1.AES, bits [7:4]:
// Indicates support for AES instructions in AArch64 state. Defined values are:
// - 0b0000 No AES instructions implemented.
// - 0b0001 AESE, AESD, AESMC, and AESIMC instructions implemented.
// - 0b0010 As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit
// data quantities.
//
// FEAT_AES implements the functionality identified by the value 0b0001.
// FEAT_PMULL implements the functionality identified by the value 0b0010.
// From Armv8, the permitted values are 0b0000 and 0b0010.
//
// ID_AA64ISAR0_EL1.SHA1, bits [11:8]:
// Indicates support for SHA1 instructions in AArch64 state. Defined values are:
// - 0b0000 No SHA1 instructions implemented.
// - 0b0001 SHA1C, SHA1P, SHA1M, SHA1H, SHA1SU0, and SHA1SU1 instructions
// implemented.
//
// FEAT_SHA1 implements the functionality identified by the value 0b0001.
// From Armv8, the permitted values are 0b0000 and 0b0001.
// If the value of ID_AA64ISAR0_EL1.SHA2 is 0b0000, this field must have the
// value 0b0000.
//
// ID_AA64ISAR0_EL1.SHA2, bits [15:12]:
// Indicates support for SHA2 instructions in AArch64 state. Defined values are:
// - 0b0000 No SHA2 instructions implemented.
// - 0b0001 Implements instructions: SHA256H, SHA256H2, SHA256SU0, and
// SHA256SU1.
// - 0b0010 Implements instructions:
// • SHA256H, SHA256H2, SHA256SU0, and SHA256SU1.
// • SHA512H, SHA512H2, SHA512SU0, and SHA512SU1.
//
// FEAT_SHA256 implements the functionality identified by the value 0b0001.
// FEAT_SHA512 implements the functionality identified by the value 0b0010.
//
// In Armv8, the permitted values are 0b0000 and 0b0001.
// From Armv8.2, the permitted values are 0b0000, 0b0001, and 0b0010.
//
// If the value of ID_AA64ISAR0_EL1.SHA1 is 0b0000, this field must have the
// value 0b0000.
//
// If the value of this field is 0b0010, ID_AA64ISAR0_EL1.SHA3
// must have the value 0b0001.
//
// Other cryptographic features that we cannot detect such as sha512, sha3, sm3,
// sm4, sveaes, svepmull, svesha3, svesm4 we set to 0.
//
// FP/SIMD:
// -----------------------------------------------------------------------------
// FP/SIMD must be implemented on all Armv8.0 implementations, but
// implementations targeting specialized markets may support the following
// combinations:
//
// • No NEON or floating-point.
// • Full floating-point and SIMD support with exception trapping.
// • Full floating-point and SIMD support without exception trapping.
//
// ref:
// https://developer.arm.com/documentation/den0024/a/AArch64-Floating-point-and-NEON
//
// So, we use `PF_ARM_VFP_32_REGISTERS_AVAILABLE`,
// `PF_ARM_NEON_INSTRUCTIONS_AVAILABLE` to detect `asimd` and `fp`
#ifndef CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_
#define CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_
@ -81,10 +175,11 @@ typedef struct {
typedef struct {
Aarch64Features features;
int implementer;
int variant;
int part;
int revision;
int implementer; // We set 0 for Windows.
int variant; // We set 0 for Windows.
int part; // We set 0 for Windows.
int revision; // We use GetNativeSystemInfo to get processor revision for
// Windows.
} Aarch64Info;
Aarch64Info GetAarch64Info(void);

View File

@ -34,5 +34,37 @@
#define PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
#endif
#if !defined(PF_ARM_VFP_32_REGISTERS_AVAILABLE)
#define PF_ARM_VFP_32_REGISTERS_AVAILABLE 18
#endif
#if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
#define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
#endif
#if !defined(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)
#define PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE 30
#endif
#if !defined(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)
#define PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE 31
#endif
#if !defined(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)
#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34
#endif
#if !defined(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)
#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
#endif
#if !defined(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)
#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44
#endif
#if !defined(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE)
#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45
#endif
#endif // CPU_FEATURES_OS_WINDOWS
#endif // CPU_FEATURES_INCLUDE_INTERNAL_WINDOWS_UTILS_H_

138
src/impl_aarch64_windows.c Normal file
View File

@ -0,0 +1,138 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpu_features_macros.h"
#ifdef CPU_FEATURES_ARCH_AARCH64
#ifdef CPU_FEATURES_OS_WINDOWS
#include "cpuinfo_aarch64.h"
////////////////////////////////////////////////////////////////////////////////
// Definitions for introspection.
////////////////////////////////////////////////////////////////////////////////
#define INTROSPECTION_TABLE \
LINE(AARCH64_FP, fp, , , ) \
LINE(AARCH64_ASIMD, asimd, , , ) \
LINE(AARCH64_EVTSTRM, evtstrm, , , ) \
LINE(AARCH64_AES, aes, , , ) \
LINE(AARCH64_PMULL, pmull, , , ) \
LINE(AARCH64_SHA1, sha1, , , ) \
LINE(AARCH64_SHA2, sha2, , , ) \
LINE(AARCH64_CRC32, crc32, , , ) \
LINE(AARCH64_ATOMICS, atomics, , , ) \
LINE(AARCH64_FPHP, fphp, , , ) \
LINE(AARCH64_ASIMDHP, asimdhp, , , ) \
LINE(AARCH64_CPUID, cpuid, , , ) \
LINE(AARCH64_ASIMDRDM, asimdrdm, , , ) \
LINE(AARCH64_JSCVT, jscvt, , , ) \
LINE(AARCH64_FCMA, fcma, , , ) \
LINE(AARCH64_LRCPC, lrcpc, , , ) \
LINE(AARCH64_DCPOP, dcpop, , , ) \
LINE(AARCH64_SHA3, sha3, , , ) \
LINE(AARCH64_SM3, sm3, , , ) \
LINE(AARCH64_SM4, sm4, , , ) \
LINE(AARCH64_ASIMDDP, asimddp, , , ) \
LINE(AARCH64_SHA512, sha512, , , ) \
LINE(AARCH64_SVE, sve, , , ) \
LINE(AARCH64_ASIMDFHM, asimdfhm, , , ) \
LINE(AARCH64_DIT, dit, , , ) \
LINE(AARCH64_USCAT, uscat, , , ) \
LINE(AARCH64_ILRCPC, ilrcpc, , , ) \
LINE(AARCH64_FLAGM, flagm, , , ) \
LINE(AARCH64_SSBS, ssbs, , , ) \
LINE(AARCH64_SB, sb, , , ) \
LINE(AARCH64_PACA, paca, , , ) \
LINE(AARCH64_PACG, pacg, , , ) \
LINE(AARCH64_DCPODP, dcpodp, , , ) \
LINE(AARCH64_SVE2, sve2, , , ) \
LINE(AARCH64_SVEAES, sveaes, , , ) \
LINE(AARCH64_SVEPMULL, svepmull, , , ) \
LINE(AARCH64_SVEBITPERM, svebitperm, , , ) \
LINE(AARCH64_SVESHA3, svesha3, , , ) \
LINE(AARCH64_SVESM4, svesm4, , , ) \
LINE(AARCH64_FLAGM2, flagm2, , , ) \
LINE(AARCH64_FRINT, frint, , , ) \
LINE(AARCH64_SVEI8MM, svei8mm, , , ) \
LINE(AARCH64_SVEF32MM, svef32mm, , , ) \
LINE(AARCH64_SVEF64MM, svef64mm, , , ) \
LINE(AARCH64_SVEBF16, svebf16, , , ) \
LINE(AARCH64_I8MM, i8mm, , , ) \
LINE(AARCH64_BF16, bf16, , , ) \
LINE(AARCH64_DGH, dgh, , , ) \
LINE(AARCH64_RNG, rng, , , ) \
LINE(AARCH64_BTI, bti, , , ) \
LINE(AARCH64_MTE, mte, , , ) \
LINE(AARCH64_ECV, ecv, , , ) \
LINE(AARCH64_AFP, afp, , , ) \
LINE(AARCH64_RPRES, rpres, , , )
#define INTROSPECTION_PREFIX Aarch64
#define INTROSPECTION_ENUM_PREFIX AARCH64
#include "define_introspection.inl"
////////////////////////////////////////////////////////////////////////////////
// Implementation.
////////////////////////////////////////////////////////////////////////////////
#include <stdbool.h>
#include "internal/windows_utils.h"
#ifdef CPU_FEATURES_MOCK_CPUID_AARCH64
extern bool GetWindowsIsProcessorFeaturePresent(DWORD);
extern WORD GetWindowsNativeSystemInfoProcessorRevision();
#else // CPU_FEATURES_MOCK_CPUID_AARCH64
static bool GetWindowsIsProcessorFeaturePresent(DWORD dwProcessorFeature) {
return IsProcessorFeaturePresent(dwProcessorFeature);
}
static WORD GetWindowsNativeSystemInfoProcessorRevision() {
SYSTEM_INFO system_info;
GetNativeSystemInfo(&system_info);
return system_info.wProcessorRevision;
}
#endif
static const Aarch64Info kEmptyAarch64Info;
Aarch64Info GetAarch64Info(void) {
Aarch64Info info = kEmptyAarch64Info;
info.revision = GetWindowsNativeSystemInfoProcessorRevision();
info.features.fp =
GetWindowsIsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE);
info.features.asimd =
GetWindowsIsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE);
info.features.crc32 = GetWindowsIsProcessorFeaturePresent(
PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
info.features.asimddp =
GetWindowsIsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
info.features.jscvt = GetWindowsIsProcessorFeaturePresent(
PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE);
info.features.lrcpc = GetWindowsIsProcessorFeaturePresent(
PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE);
info.features.atomics = GetWindowsIsProcessorFeaturePresent(
PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE);
bool is_crypto_available = GetWindowsIsProcessorFeaturePresent(
PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
info.features.aes = is_crypto_available;
info.features.sha1 = is_crypto_available;
info.features.sha2 = is_crypto_available;
info.features.pmull = is_crypto_available;
return info;
}
#endif // CPU_FEATURES_OS_WINDOWS
#endif // CPU_FEATURES_ARCH_AARCH64

View File

@ -71,7 +71,11 @@ endif()
##------------------------------------------------------------------------------
## cpuinfo_aarch64_test
if(PROCESSOR_IS_AARCH64)
add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/impl_aarch64_linux_or_android.c)
add_executable(cpuinfo_aarch64_test
cpuinfo_aarch64_test.cc
../src/impl_aarch64_linux_or_android.c
../src/impl_aarch64_windows.c)
target_compile_definitions(cpuinfo_aarch64_test PUBLIC CPU_FEATURES_MOCK_CPUID_AARCH64)
target_link_libraries(cpuinfo_aarch64_test all_libraries)
add_test(NAME cpuinfo_aarch64_test COMMAND cpuinfo_aarch64_test)
endif()

View File

@ -14,27 +14,88 @@
#include "cpuinfo_aarch64.h"
#include <set>
#include "filesystem_for_testing.h"
#include "gtest/gtest.h"
#include "hwcaps_for_testing.h"
#if defined(CPU_FEATURES_OS_WINDOWS)
#include "internal/windows_utils.h"
#endif // CPU_FEATURES_OS_WINDOWS
namespace cpu_features {
class FakeCpuAarch64 {
public:
#if defined(CPU_FEATURES_OS_WINDOWS)
bool GetWindowsIsProcessorFeaturePresent(DWORD dwProcessorFeature) {
return windows_isprocessorfeaturepresent_.count(dwProcessorFeature);
}
void SetWindowsIsProcessorFeaturePresent(DWORD dwProcessorFeature) {
windows_isprocessorfeaturepresent_.insert(dwProcessorFeature);
}
WORD GetWindowsNativeSystemInfoProcessorRevision() const {
return processor_revision_;
}
void SetWindowsNativeSystemInfoProcessorRevision(WORD wProcessorRevision) {
processor_revision_ = wProcessorRevision;
}
private:
std::set<DWORD> windows_isprocessorfeaturepresent_;
WORD processor_revision_{};
#endif // CPU_FEATURES_OS_WINDOWS
};
static FakeCpuAarch64* g_fake_cpu_instance = nullptr;
static FakeCpuAarch64& cpu() {
assert(g_fake_cpu_instance != nullptr);
return *g_fake_cpu_instance;
}
#if defined(CPU_FEATURES_OS_WINDOWS)
extern "C" bool GetWindowsIsProcessorFeaturePresent(DWORD dwProcessorFeature) {
return cpu().GetWindowsIsProcessorFeaturePresent(dwProcessorFeature);
}
extern "C" WORD GetWindowsNativeSystemInfoProcessorRevision() {
return cpu().GetWindowsNativeSystemInfoProcessorRevision();
}
#endif // CPU_FEATURES_OS_WINDOWS
namespace {
void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
class CpuidAarch64Test : public ::testing::Test {
protected:
void SetUp() override {
assert(g_fake_cpu_instance == nullptr);
g_fake_cpu_instance = new FakeCpuAarch64();
}
void TearDown() override {
delete g_fake_cpu_instance;
g_fake_cpu_instance = nullptr;
}
};
TEST(CpuinfoAarch64Test, Aarch64FeaturesEnum) {
const char *last_name = GetAarch64FeaturesEnumName(AARCH64_LAST_);
EXPECT_STREQ(last_name, "unknown_feature");
for (int i = static_cast<int>(AARCH64_FP); i != static_cast<int>(AARCH64_LAST_); ++i) {
const auto feature = static_cast<Aarch64FeaturesEnum>(i);
const char *name = GetAarch64FeaturesEnumName(feature);
ASSERT_FALSE(name == nullptr);
EXPECT_STRNE(name, "");
EXPECT_STRNE(name, last_name);
}
const char* last_name = GetAarch64FeaturesEnumName(AARCH64_LAST_);
EXPECT_STREQ(last_name, "unknown_feature");
for (int i = static_cast<int>(AARCH64_FP);
i != static_cast<int>(AARCH64_LAST_); ++i) {
const auto feature = static_cast<Aarch64FeaturesEnum>(i);
const char* name = GetAarch64FeaturesEnumName(feature);
ASSERT_FALSE(name == nullptr);
EXPECT_STRNE(name, "");
EXPECT_STRNE(name, last_name);
}
}
#if defined(CPU_FEATURES_OS_LINUX)
void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
TEST(CpuinfoAarch64Test, FromHardwareCap) {
ResetHwcaps();
SetHardwareCapabilities(AARCH64_HWCAP_FP | AARCH64_HWCAP_AES, 0);
@ -184,6 +245,32 @@ CPU revision : 3)");
EXPECT_FALSE(info.features.afp);
EXPECT_FALSE(info.features.rpres);
}
#endif // CPU_FEATURES_OS_LINUX
#if defined(CPU_FEATURES_OS_WINDOWS)
TEST_F(CpuidAarch64Test, WINDOWS_AARCH64_RPI4) {
cpu().SetWindowsNativeSystemInfoProcessorRevision(0x03);
cpu().SetWindowsIsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE);
cpu().SetWindowsIsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE);
cpu().SetWindowsIsProcessorFeaturePresent(
PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
const auto info = GetAarch64Info();
EXPECT_EQ(info.revision, 0x03);
EXPECT_TRUE(info.features.fp);
EXPECT_TRUE(info.features.asimd);
EXPECT_TRUE(info.features.crc32);
EXPECT_FALSE(info.features.aes);
EXPECT_FALSE(info.features.sha1);
EXPECT_FALSE(info.features.sha2);
EXPECT_FALSE(info.features.pmull);
EXPECT_FALSE(info.features.atomics);
EXPECT_FALSE(info.features.asimddp);
EXPECT_FALSE(info.features.jscvt);
EXPECT_FALSE(info.features.lrcpc);
}
#endif // CPU_FEATURES_OS_WINDOWS
} // namespace
} // namespace cpu_features