From 32b49eb5e7809052a28422cfde2f2745fbb0eb76 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Wed, 20 Oct 2021 17:02:52 +0200 Subject: [PATCH] Fixes wrong cache detection of old processors (#183) --- src/cpuinfo_x86.c | 60 ++++++++++++++++++++-------------------- test/cpuinfo_x86_test.cc | 53 +++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 30 deletions(-) diff --git a/src/cpuinfo_x86.c b/src/cpuinfo_x86.c index b38e9c2..3d9f1ac 100644 --- a/src/cpuinfo_x86.c +++ b/src/cpuinfo_x86.c @@ -1120,26 +1120,29 @@ static CacheLevelInfo GetCacheLevelInfo(const uint32_t reg) { } } -static void GetByteArrayFromRegister(uint32_t result[4], const uint32_t reg) { - for (int i = 0; i < 4; ++i) { - result[i] = ExtractBitRange(reg, (i + 1) * 8, i * 8); - } -} - +// From https://www.felixcloutier.com/x86/cpuid#tbl-3-12 static void ParseLeaf2(const int max_cpuid_leaf, CacheInfo* info) { Leaf leaf = SafeCpuId(max_cpuid_leaf, 2); - uint32_t registers[] = {leaf.eax, leaf.ebx, leaf.ecx, leaf.edx}; - for (int i = 0; i < 4; ++i) { - if (registers[i] & (1U << 31)) { - continue; // register does not contains valid information - } - uint32_t bytes[4]; - GetByteArrayFromRegister(bytes, registers[i]); - for (int j = 0; j < 4; ++j) { - if (bytes[j] == 0xFF) - break; // leaf 4 should be used to fetch cache information - info->levels[info->size] = GetCacheLevelInfo(bytes[j]); - } + // The least-significant byte in register EAX (register AL) will always return + // 01H. Software should ignore this value and not interpret it as an + // informational descriptor. + leaf.eax &= 0xFFFFFF00; // Zeroing out AL. 0 is the empty descriptor. + // The most significant bit (bit 31) of each register indicates whether the + // register contains valid information (set to 0) or is reserved (set to 1). + if (IsBitSet(leaf.eax, 31)) leaf.eax = 0; + if (IsBitSet(leaf.ebx, 31)) leaf.ebx = 0; + if (IsBitSet(leaf.ecx, 31)) leaf.ecx = 0; + if (IsBitSet(leaf.edx, 31)) leaf.edx = 0; + + uint8_t data[16]; +#if __STDC_VERSION__ >= 201112L + _Static_assert(sizeof(Leaf) == sizeof(data), "Leaf must be 16 bytes"); +#endif + memcpy(&data, &leaf, sizeof(data)); + for (size_t i = 0; i < sizeof(data); ++i) { + const uint8_t descriptor = data[i]; + if (descriptor == 0) continue; + info->levels[info->size] = GetCacheLevelInfo(descriptor); info->size++; } } @@ -1148,27 +1151,23 @@ static void ParseLeaf2(const int max_cpuid_leaf, CacheInfo* info) { // For newer AMD CPUs uses "CPUID, eax=0x8000001D" static void ParseCacheInfo(const int max_cpuid_leaf, uint32_t leaf_id, CacheInfo* info) { - info->size = 0; for (int cache_id = 0; cache_id < CPU_FEATURES_MAX_CACHE_LEVEL; cache_id++) { const Leaf leaf = SafeCpuIdEx(max_cpuid_leaf, leaf_id, cache_id); CacheType cache_type = ExtractBitRange(leaf.eax, 4, 0); - if (cache_type == CPU_FEATURE_CACHE_NULL) { - info->levels[cache_id] = kEmptyCacheLevelInfo; - continue; - } + if (cache_type == CPU_FEATURE_CACHE_NULL) continue; int level = ExtractBitRange(leaf.eax, 7, 5); int line_size = ExtractBitRange(leaf.ebx, 11, 0) + 1; int partitioning = ExtractBitRange(leaf.ebx, 21, 12) + 1; int ways = ExtractBitRange(leaf.ebx, 31, 22) + 1; int tlb_entries = leaf.ecx + 1; int cache_size = (ways * partitioning * line_size * (tlb_entries)); - info->levels[cache_id] = (CacheLevelInfo){.level = level, - .cache_type = cache_type, - .cache_size = cache_size, - .ways = ways, - .line_size = line_size, - .tlb_entries = tlb_entries, - .partitioning = partitioning}; + info->levels[info->size] = (CacheLevelInfo){.level = level, + .cache_type = cache_type, + .cache_size = cache_size, + .ways = ways, + .line_size = line_size, + .tlb_entries = tlb_entries, + .partitioning = partitioning}; info->size++; } } @@ -1454,6 +1453,7 @@ CacheInfo GetX86CacheInfo(void) { CacheInfo info = kEmptyCacheInfo; const Leaf leaf_0 = CpuId(0); if (IsVendor(leaf_0, CPU_FEATURES_VENDOR_GENUINE_INTEL)) { + info.size = 0; ParseLeaf2(leaf_0.eax, &info); ParseCacheInfo(leaf_0.eax, 4, &info); } else if (IsVendor(leaf_0, CPU_FEATURES_VENDOR_AUTHENTIC_AMD) || diff --git a/test/cpuinfo_x86_test.cc b/test/cpuinfo_x86_test.cc index d3373c9..a78ebdd 100644 --- a/test/cpuinfo_x86_test.cc +++ b/test/cpuinfo_x86_test.cc @@ -162,6 +162,7 @@ TEST_F(CpuidX86Test, SandyBridge) { EXPECT_FALSE(features.adx); } +const int UNDEF = -1; const int KiB = 1024; const int MiB = 1024 * KiB; @@ -926,6 +927,58 @@ flags : fpu mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 #endif // !defined(CPU_FEATURES_OS_WINDOWS) } +// https://www.felixcloutier.com/x86/cpuid#example-3-1--example-of-cache-and-tlb-interpretation +TEST_F(CpuidX86Test, P4_CacheInfo) { + cpu().SetLeaves({ + {{0x00000000, 0}, Leaf{0x00000002, 0x756E6547, 0x6C65746E, 0x49656E69}}, + {{0x00000001, 0}, Leaf{0x00000F0A, 0x00010808, 0x00000000, 0x3FEBFBFF}}, + {{0x00000002, 0}, Leaf{0x665B5001, 0x00000000, 0x00000000, 0x007A7000}}, + }); + + const auto info = GetX86CacheInfo(); + EXPECT_EQ(info.size, 5); + + EXPECT_EQ(info.levels[0].level, UNDEF); + EXPECT_EQ(info.levels[0].cache_type, CPU_FEATURE_CACHE_TLB); + EXPECT_EQ(info.levels[0].cache_size, 4 * KiB); + EXPECT_EQ(info.levels[0].ways, UNDEF); + EXPECT_EQ(info.levels[0].line_size, UNDEF); + EXPECT_EQ(info.levels[0].tlb_entries, 64); + EXPECT_EQ(info.levels[0].partitioning, 0); + + EXPECT_EQ(info.levels[1].level, UNDEF); + EXPECT_EQ(info.levels[1].cache_type, CPU_FEATURE_CACHE_TLB); + EXPECT_EQ(info.levels[1].cache_size, 4 * KiB); + EXPECT_EQ(info.levels[1].ways, UNDEF); + EXPECT_EQ(info.levels[1].line_size, UNDEF); + EXPECT_EQ(info.levels[1].tlb_entries, 64); + EXPECT_EQ(info.levels[1].partitioning, 0); + + EXPECT_EQ(info.levels[2].level, 1); + EXPECT_EQ(info.levels[2].cache_type, CPU_FEATURE_CACHE_DATA); + EXPECT_EQ(info.levels[2].cache_size, 8 * KiB); + EXPECT_EQ(info.levels[2].ways, 4); + EXPECT_EQ(info.levels[2].line_size, 64); + EXPECT_EQ(info.levels[2].tlb_entries, UNDEF); + EXPECT_EQ(info.levels[2].partitioning, 0); + + EXPECT_EQ(info.levels[3].level, 1); + EXPECT_EQ(info.levels[3].cache_type, CPU_FEATURE_CACHE_INSTRUCTION); + EXPECT_EQ(info.levels[3].cache_size, 12 * KiB); + EXPECT_EQ(info.levels[3].ways, 8); + EXPECT_EQ(info.levels[3].line_size, UNDEF); + EXPECT_EQ(info.levels[3].tlb_entries, UNDEF); + EXPECT_EQ(info.levels[3].partitioning, 0); + + EXPECT_EQ(info.levels[4].level, 2); + EXPECT_EQ(info.levels[4].cache_type, CPU_FEATURE_CACHE_DATA); + EXPECT_EQ(info.levels[4].cache_size, 256 * KiB); + EXPECT_EQ(info.levels[4].ways, 8); + EXPECT_EQ(info.levels[4].line_size, 64); + EXPECT_EQ(info.levels[4].tlb_entries, UNDEF); + EXPECT_EQ(info.levels[4].partitioning, 2); +} + // https://github.com/InstLatx64/InstLatx64/blob/master/GenuineIntel/GenuineIntel0000673_P3_KatmaiDP_CPUID.txt TEST_F(CpuidX86Test, P3) { // Pre AVX cpus don't have xsave