From a12e858c35219aca79c2db244a5af1e39857d23a Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Mon, 13 Jun 2011 13:14:00 +0000 Subject: [PATCH] Try to handle LL caches which are of size 50% above a power of 2 (eg, 6MB, 12MB) and have a non-power-of-2 number of sets. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11812 --- cachegrind/cg-x86-amd64.c | 53 +++++++++++++++++++++++++++++++++++++ coregrind/m_libcbase.c | 9 +++++++ include/pub_tool_libcbase.h | 7 +++-- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/cachegrind/cg-x86-amd64.c b/cachegrind/cg-x86-amd64.c index 18f95d9592..8d6039b5a4 100644 --- a/cachegrind/cg-x86-amd64.c +++ b/cachegrind/cg-x86-amd64.c @@ -464,6 +464,59 @@ Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc) D1c->size *= 1024; LLc->size *= 1024; + /* If the LL cache config isn't something the simulation functions + can handle, try to adjust it so it is. Caches are characterised + by (total size T, line size L, associativity A), and then we + have + + number of sets S = T / (L * A) + + The required constraints are: + + * L must be a power of 2, but it always is in practice, so + no problem there + + * A can be any value >= 1 + + * T can be any value, but .. + + * S must be a power of 2. + + That sometimes gives a problem. For example, some Core iX based + Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288 + sets. The "fix" in this case is to increase the associativity + by 50% to 24, which reduces the number of sets to 8192, making + it a power of 2. That's what the following code does (handing + the "3/2 rescaling case".) We might need to deal with other + ratios later (5/4 ?). + + The "fix" is "justified" (cough, cough) by alleging that + increases of associativity above about 4 have very little effect + on the actual miss rate. It would be far more inaccurate to + fudge this by changing the size of the simulated cache -- + changing the associativity is a much better option. + */ + if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) { + Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc); + if (/* stay sane */ + nSets >= 4 + /* nSets is not a power of 2 */ + && VG_(log2_64)( (ULong)nSets ) == -1 + /* nSets is 50% above a power of 2 */ + && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1 + /* associativity can be increased by exactly 50% */ + && (LLc->assoc % 2) == 0 + ) { + /* # sets is 1.5 * a power of two, but the associativity is + even, so we can increase that up by 50% and implicitly + scale the # sets down accordingly. */ + Int new_assoc = LLc->assoc + (LLc->assoc / 2); + VG_(dmsg)("warning: pretending that LL cache has associativity" + " %d instead of actual %d\n", new_assoc, LLc->assoc); + LLc->assoc = new_assoc; + } + } + return ret; } diff --git a/coregrind/m_libcbase.c b/coregrind/m_libcbase.c index 6d6a2740fd..c01bedcb32 100644 --- a/coregrind/m_libcbase.c +++ b/coregrind/m_libcbase.c @@ -794,6 +794,15 @@ Int VG_(log2) ( UInt x ) return -1; } +/* Ditto for 64 bit numbers. */ +Int VG_(log2_64) ( ULong x ) +{ + Int i; + for (i = 0; i < 64; i++) { + if ((1ULL << i) == x) return i; + } + return -1; +} // Generic quick sort. void VG_(ssort)( void* base, SizeT nmemb, SizeT size, diff --git a/include/pub_tool_libcbase.h b/include/pub_tool_libcbase.h index 0e616913f4..bc7c9f4214 100644 --- a/include/pub_tool_libcbase.h +++ b/include/pub_tool_libcbase.h @@ -181,10 +181,13 @@ static void VG_(bzero_inline) ( void* s, SizeT sz ) extern void VG_(ssort)( void* base, SizeT nmemb, SizeT size, Int (*compar)(void*, void*) ); -/* Returns the base-2 logarithm of x. Returns -1 if x is not a power - of two. Nb: VG_(log2)(1) == 0. */ +/* Returns the base-2 logarithm of a 32 bit unsigned number. Returns + -1 if it is not a power of two. Nb: VG_(log2)(1) == 0. */ extern Int VG_(log2) ( UInt x ); +/* Ditto for 64 bit unsigned numbers. */ +extern Int VG_(log2_64)( ULong x ); + // A pseudo-random number generator returning a random UInt. If pSeed // is NULL, it uses its own seed, which starts at zero. If pSeed is // non-NULL, it uses and updates whatever pSeed points at. -- 2.47.2