From: Tom Hughes Date: Fri, 10 Jun 2011 15:04:22 +0000 (+0000) Subject: Teach cachegrind/callgrind how to parse the cache description X-Git-Tag: svn/VALGRIND_3_7_0~427 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c5931781bc234502c67fe5ff4909106a863eb1f5;p=thirdparty%2Fvalgrind.git Teach cachegrind/callgrind how to parse the cache description in the CPUID data on recent Intel processors. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11810 --- diff --git a/cachegrind/cg-x86-amd64.c b/cachegrind/cg-x86-amd64.c index 6794319b75..6f3d2342b7 100644 --- a/cachegrind/cg-x86-amd64.c +++ b/cachegrind/cg-x86-amd64.c @@ -66,7 +66,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc) Int family; Int model; UChar info[16]; - Int i, trials; + Int i, j, trials; Bool L2_found = False; /* If we see L3 cache info, copy it into L3c. Then, at the end, copy it into *LLc. Hence if a L3 cache is specified, *LLc will @@ -83,13 +83,13 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc) } /* family/model needed to distinguish code reuse (currently 0x49) */ - VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore, + VG_(cpuid)(1, 0, &cpuid1_eax, &cpuid1_ignore, &cpuid1_ignore, &cpuid1_ignore); family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf); model = (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf); - VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4], - (Int*)&info[8], (Int*)&info[12]); + VG_(cpuid)(2, 0, (Int*)&info[0], (Int*)&info[4], + (Int*)&info[8], (Int*)&info[12]); trials = info[0] - 1; /* AL register - bits 0..7 of %eax */ info[0] = 0x0; /* reset AL */ @@ -237,6 +237,61 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc) case 0xf0: case 0xf1: break; + case 0xff: + j = 0; + VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4], + (Int*)&info[8], (Int*)&info[12]); + + while ((info[0] & 0x1f) != 0) { + UInt assoc = ((*(UInt *)&info[4] >> 22) & 0x3ff) + 1; + UInt parts = ((*(UInt *)&info[4] >> 12) & 0x3ff) + 1; + UInt line_size = (*(UInt *)&info[4] & 0x7ff) + 1; + UInt sets = *(UInt *)&info[8] + 1; + cache_t c; + + c.size = assoc * parts * line_size * sets / 1024; + c.assoc = assoc; + c.line_size = line_size; + + switch ((info[0] & 0xe0) >> 5) + { + case 1: + switch (info[0] & 0x1f) + { + case 1: *D1c = c; break; + case 2: *I1c = c; break; + case 3: VG_(dmsg)("warning: L1 unified cache ignored\n"); break; + default: VG_(dmsg)("warning: L1 cache of unknown type ignored\n"); break; + } + break; + case 2: + switch (info[0] & 0x1f) + { + case 1: VG_(dmsg)("warning: L2 data cache ignored\n"); break; + case 2: VG_(dmsg)("warning: L2 instruction cache ignored\n"); break; + case 3: *LLc = c; L2_found = True; break; + default: VG_(dmsg)("warning: L2 cache of unknown type ignored\n"); break; + } + break; + case 3: + switch (info[0] & 0x1f) + { + case 1: VG_(dmsg)("warning: L3 data cache ignored\n"); break; + case 2: VG_(dmsg)("warning: L3 instruction cache ignored\n"); break; + case 3: L3c = c; L3_found = True; break; + default: VG_(dmsg)("warning: L3 cache of unknown type ignored\n"); break; + } + break; + default: + VG_(dmsg)("warning: L%u cache ignored\n", (info[0] & 0xe0) >> 5); + break; + } + + VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4], + (Int*)&info[8], (Int*)&info[12]); + } + break; + default: VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), ignoring\n", info[i]); @@ -311,7 +366,7 @@ Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* LLc) UInt dummy, model; UInt I1i, D1i, L2i, L3i; - VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy); + VG_(cpuid)(0x80000000, 0, &ext_level, &dummy, &dummy, &dummy); if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) { VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n", @@ -319,10 +374,10 @@ Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* LLc) return -1; } - VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i); - VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &L3i); + VG_(cpuid)(0x80000005, 0, &dummy, &dummy, &D1i, &I1i); + VG_(cpuid)(0x80000006, 0, &dummy, &dummy, &L2i, &L3i); - VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy); + VG_(cpuid)(0x1, 0, &model, &dummy, &dummy, &dummy); /* Check for Duron bug */ if (model == 0x630) { @@ -367,7 +422,7 @@ Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc) return -1; } - VG_(cpuid)(0, &level, (int*)&vendor_id[0], + VG_(cpuid)(0, 0, &level, (int*)&vendor_id[0], (int*)&vendor_id[8], (int*)&vendor_id[4]); vendor_id[12] = '\0'; diff --git a/coregrind/m_cpuid.S b/coregrind/m_cpuid.S index 42df34344e..2947f79b4f 100644 --- a/coregrind/m_cpuid.S +++ b/coregrind/m_cpuid.S @@ -66,7 +66,7 @@ #endif /* - void VG_(cpuid)(UInt eax, + void VG_(cpuid)(UInt eax, UInt ecx, UInt* eax_ret, UInt* ebx_ret, UInt* ecx_ret, UInt* edx_ret) */ #if defined(VGA_x86) @@ -81,23 +81,24 @@ pushl %edx pushl %esi movl 8(%ebp), %eax + movl 12(%ebp), %ecx cpuid - movl 12(%ebp), %esi + movl 16(%ebp), %esi testl %esi, %esi jz 1f movl %eax, (%esi) 1: - movl 16(%ebp), %esi + movl 20(%ebp), %esi testl %esi, %esi jz 2f movl %ebx, (%esi) 2: - movl 20(%ebp), %esi + movl 24(%ebp), %esi testl %esi, %esi jz 3f movl %ecx, (%esi) 3: - movl 24(%ebp), %esi + movl 28(%ebp), %esi testl %esi, %esi jz 4f movl %edx, (%esi) @@ -118,13 +119,14 @@ movq %rsp, %rbp pushq %rbx movl %edi, %eax - movq %rdx, %rdi - movq %rcx, %r9 + movq %rcx, %rdi + movl %esi, %ecx + movq %rdx, %rsi /* eax_ret now in %rsi ebx_ret now in %rdi - ecx_ret now in %r9 - edx_ret now in %r8 + ecx_ret now in %r8 + edx_ret now in %r9 */ cpuid testq %rsi, %rsi @@ -135,13 +137,13 @@ jz 2f movl %ebx, (%rdi) 2: - testq %r9, %r9 + testq %r8, %r8 jz 3f - movl %ecx, (%r9) + movl %ecx, (%r8) 3: - testq %r8, %r8 + testq %r9, %r9 jz 4f - movl %edx, (%r8) + movl %edx, (%r9) 4: popq %rbx movq %rbp, %rsp diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c index bfa5fda122..8c3816808e 100644 --- a/coregrind/m_machine.c +++ b/coregrind/m_machine.c @@ -621,7 +621,7 @@ Bool VG_(machine_get_hwcaps)( void ) /* we can't do cpuid at all. Give up. */ return False; - VG_(cpuid)(0, &eax, &ebx, &ecx, &edx); + VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); if (eax < 1) /* we can't ask for cpuid(x) for x > 0. Give up. */ return False; @@ -633,11 +633,11 @@ Bool VG_(machine_get_hwcaps)( void ) VG_(memcpy)(&vstr[8], &ecx, 4); vstr[12] = 0; - VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx); + VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); max_extended = eax; /* get capabilities bits into edx */ - VG_(cpuid)(1, &eax, &ebx, &ecx, &edx); + VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ @@ -653,7 +653,7 @@ Bool VG_(machine_get_hwcaps)( void ) have_lzcnt = False; if (0 == VG_(strcmp)(vstr, "AuthenticAMD") && max_extended >= 0x80000001) { - VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx); + VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ } @@ -691,7 +691,7 @@ Bool VG_(machine_get_hwcaps)( void ) /* we can't do cpuid at all. Give up. */ return False; - VG_(cpuid)(0, &eax, &ebx, &ecx, &edx); + VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); if (eax < 1) /* we can't ask for cpuid(x) for x > 0. Give up. */ return False; @@ -703,11 +703,11 @@ Bool VG_(machine_get_hwcaps)( void ) VG_(memcpy)(&vstr[8], &ecx, 4); vstr[12] = 0; - VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx); + VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); max_extended = eax; /* get capabilities bits into edx */ - VG_(cpuid)(1, &eax, &ebx, &ecx, &edx); + VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); // we assume that SSE1 and SSE2 are available by default have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ @@ -729,7 +729,7 @@ Bool VG_(machine_get_hwcaps)( void ) have_lzcnt = False; if (0 == VG_(strcmp)(vstr, "AuthenticAMD") && max_extended >= 0x80000001) { - VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx); + VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ } diff --git a/include/pub_tool_cpuid.h b/include/pub_tool_cpuid.h index 51a7b12ec6..eac41b1436 100644 --- a/include/pub_tool_cpuid.h +++ b/include/pub_tool_cpuid.h @@ -34,7 +34,7 @@ #if defined(VGA_x86) || defined(VGA_amd64) extern Bool VG_(has_cpuid) ( void ); -extern void VG_(cpuid) ( UInt eax, +extern void VG_(cpuid) ( UInt eax, UInt ecx, UInt* eax_ret, UInt* ebx_ret, UInt* ecx_ret, UInt* edx_ret ); #endif