From: Florian Krohm <florian@eich-krohm.de>
Date: Thu, 18 Oct 2012 03:16:45 +0000 (+0000)
Subject: Change cache detection for x86/amd64 to fill in VexCacheInfo directly.
X-Git-Tag: svn/VALGRIND_3_9_0~615
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=52cb6c14fa2f3d642793241e902ff950469d3351;p=thirdparty%2Fvalgrind.git

Change cache detection for x86/amd64 to fill in VexCacheInfo directly.
New function write_cache_info to dump what was detected for debugging
purposes.
New function cache_info_is_sensible to ensure that autodetected
cache info lives up to the promises made in libvex.h.
Moved the trace-cache related kludgery to cachegrind where it belongs.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13053
---

diff --git a/cachegrind/cg-arch.c b/cachegrind/cg-arch.c
index e48dcaa486..be442d0b4f 100644
--- a/cachegrind/cg-arch.c
+++ b/cachegrind/cg-arch.c
@@ -304,13 +304,38 @@ configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
    d1 = locate_cache(ci, DATA_CACHE, 1);
    ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
 
+   if (ll == NULL) {
+      VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
+   }
+
    if (ll && ci->num_levels > 2) {
       VG_(dmsg)("warning: L%u cache found, using its data for the "
                 "LL simulation.\n", ci->num_levels);
    }
 
    if (i1 && d1 && ll) {
-      *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
+      if (i1->is_trace_cache) {
+         /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
+          * conversion to byte size is a total guess;  treat the 12K and 16K
+          * cases the same since the cache byte size must be a power of two for
+          * everything to work!.  Also guessing 32 bytes for the line size...
+          */
+         UInt adjusted_size, guessed_line_size = 32;
+
+         if (i1->sizeB == 12 * 1024 || i1->sizeB == 16 * 1024) {
+            adjusted_size = 16 * 1024;
+         } else {
+            adjusted_size = 32 * 1024;
+         }
+         VG_(dmsg)("warning: %u KB micro-op instruction trace cache\n",
+                   i1->sizeB / 1024);
+         VG_(dmsg)("         Simulating a %d KB I-cache with %d B lines\n",
+                   adjusted_size, guessed_line_size);
+
+         *I1c = (cache_t) { adjusted_size, i1->assoc, guessed_line_size };
+      } else {
+         *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
+      }
       *D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
       *LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
 
diff --git a/coregrind/m_cache.c b/coregrind/m_cache.c
index 3d3860e4f4..6c0ee314e5 100644
--- a/coregrind/m_cache.c
+++ b/coregrind/m_cache.c
@@ -35,6 +35,7 @@
 #include "pub_core_libcprint.h"
 #include "pub_core_mallocfree.h"
 #include "pub_core_machine.h"
+#include "pub_core_debuglog.h"
 #include "libvex.h"
 
 #if defined(VGA_x86) || defined(VGA_amd64)
@@ -45,24 +46,60 @@
 // Probably only works for Intel and AMD chips, and probably only for some of
 // them.
 
-static void 
-micro_ops_warn(Int actual_size, Int used_size, Int line_size)
+static void
+add_cache(VexCacheInfo *ci, VexCache cache)
 {
-   VG_(dmsg)("warning: Pentium 4 with %d KB micro-op instruction trace cache\n",
-             actual_size);
-   VG_(dmsg)("         Simulating a %d KB I-cache with %d B lines\n",
-             used_size, line_size);
+   static UInt num_allocated = 0;
+
+   if (ci->num_caches == num_allocated) {
+      num_allocated += 6;
+      ci->caches = VG_(realloc)("m_cache", ci->caches,
+                                num_allocated * sizeof *ci->caches);
+   }
+
+   if (ci->num_levels < cache.level) ci->num_levels = cache.level;
+   ci->caches[ci->num_caches++] = cache;
 }
 
-/* FIXME: Temporarily introduce cachegrind's cache_t structure here to
-   get Intel_cache_info to work. This function needs to be rewritten to
-   properly fill in VexCacheInfo. Absolutely no warnings about ignored
-   caches and such are appropriate here! */
-typedef struct {
-   Int size;       // bytes
-   Int assoc;
-   Int line_size;  // bytes
-} cache_t;
+/* Convenience macros */
+#define add_icache(level, size, assoc, linesize) \
+   do { \
+      add_cache(ci, \
+                VEX_CACHE_INIT(INSN_CACHE, level, size, linesize, assoc)); \
+   } while (0)
+
+#define add_dcache(level, size, assoc, linesize) \
+   do { \
+      add_cache(ci, \
+                VEX_CACHE_INIT(DATA_CACHE, level, size, linesize, assoc)); \
+   } while (0)
+
+#define add_ucache(level, size, assoc, linesize) \
+   do { \
+      add_cache(ci, \
+                VEX_CACHE_INIT(UNIFIED_CACHE, level, size, linesize, assoc)); \
+   } while (0)
+
+#define add_itcache(level, size, assoc) \
+   do { \
+      VexCache c = \
+          VEX_CACHE_INIT(INSN_CACHE, level, size, 0, assoc); \
+      c.is_trace_cache = True; \
+      add_cache(ci, c); \
+   } while (0)
+
+#define add_I1(size, assoc, linesize) add_icache(1, size, assoc, linesize)
+#define add_D1(size, assoc, linesize) add_dcache(1, size, assoc, linesize)
+#define add_U1(size, assoc, linesize) add_ucache(1, size, assoc, linesize)
+#define add_I2(size, assoc, linesize) add_icache(2, size, assoc, linesize)
+#define add_D2(size, assoc, linesize) add_dcache(2, size, assoc, linesize)
+#define add_U2(size, assoc, linesize) add_ucache(2, size, assoc, linesize)
+#define add_I3(size, assoc, linesize) add_icache(3, size, assoc, linesize)
+#define add_D3(size, assoc, linesize) add_dcache(3, size, assoc, linesize)
+#define add_U3(size, assoc, linesize) add_ucache(3, size, assoc, linesize)
+
+#define add_I1T(size, assoc) \
+   add_itcache(1, size, assoc)
 
 /* Intel method is truly wretched.  We have to do an insane indexing into an
  * array of pre-defined configurations for various parts of the memory
@@ -73,7 +110,7 @@ typedef struct {
  * is returned via *LLc.
  */
 static Int
-Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
+Intel_cache_info(Int level, VexCacheInfo *ci)
 {
    Int cpuid1_eax;
    Int cpuid1_ignore;
@@ -81,18 +118,10 @@ Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
    Int model;
    UChar info[16];
    Int   i, j, trials;
-   Bool  L2_found = False;
-   /* If we see L3 cache info, copy it into L3c.  Then, at the end,
-      copy it into *LLc.  Hence if a L3 cache is specified, *LLc will
-      eventually contain a description of it rather than the L2 cache.
-      The use of the L3c intermediary makes this process independent
-      of the order in which the cache specifications appear in
-      info[]. */
-   Bool  L3_found = False;
-   cache_t L3c = { 0, 0, 0 };
 
    if (level < 2) {
-      VG_(dmsg)("warning: CPUID level < 2 for Intel processor (%d)\n", level);
+      VG_(debugLog)(1, "cache", "warning: CPUID level < 2 for Intel "
+                    "processor (%d)\n", level);
       return -1;
    }
 
@@ -108,11 +137,16 @@ Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
    info[0] = 0x0;           /* reset AL */
 
    if (0 != trials) {
-      VG_(dmsg)("warning: non-zero CPUID trials for Intel processor (%d)\n",
-                trials);
+      VG_(debugLog)(1, "cache", "warning: non-zero CPUID trials for Intel "
+                    "processor (%d)\n", trials);
       return -1;
    }
 
+   ci->num_levels = 0;
+   ci->num_caches = 0;
+   ci->icaches_maintain_coherence = True;
+   ci->caches = NULL;
+
    for (i = 0; i < 16; i++) {
 
       switch (info[i]) {
@@ -132,16 +166,16 @@ Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
       case 0xca:
           break;
 
-      case 0x06: *I1c = (cache_t) {  8, 4, 32 }; break;
-      case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
-      case 0x09: *I1c = (cache_t) { 32, 4, 64 }; break;
-      case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
+      case 0x06: add_I1( 8, 4, 32); break;
+      case 0x08: add_I1(16, 4, 32); break;
+      case 0x09: add_I1(32, 4, 64); break;
+      case 0x30: add_I1(32, 8, 64); break;
 
-      case 0x0a: *D1c = (cache_t) {  8, 2, 32 }; break;
-      case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
-      case 0x0d: *D1c = (cache_t) { 16, 4, 64 }; break;
-      case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break;
-      case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
+      case 0x0a: add_D1( 8, 2, 32); break;
+      case 0x0c: add_D1(16, 4, 32); break;
+      case 0x0d: add_D1(16, 4, 64); break;
+      case 0x0e: add_D1(24, 6, 64); break;
+      case 0x2c: add_D1(32, 8, 64); break;
 
       /* IA-64 info -- panic! */
       case 0x10: case 0x15: case 0x1a:
@@ -150,38 +184,39 @@ Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
          VG_(core_panic)("IA-64 cache detected?!");
 
       /* L3 cache info. */
-      case 0x22: L3c = (cache_t) { 512,    4, 64 }; L3_found = True; break;
-      case 0x23: L3c = (cache_t) { 1024,   8, 64 }; L3_found = True; break;
-      case 0x25: L3c = (cache_t) { 2048,   8, 64 }; L3_found = True; break;
-      case 0x29: L3c = (cache_t) { 4096,   8, 64 }; L3_found = True; break;
-      case 0x46: L3c = (cache_t) { 4096,   4, 64 }; L3_found = True; break;
-      case 0x47: L3c = (cache_t) { 8192,   8, 64 }; L3_found = True; break;
-      case 0x4a: L3c = (cache_t) { 6144,  12, 64 }; L3_found = True; break;
-      case 0x4b: L3c = (cache_t) { 8192,  16, 64 }; L3_found = True; break;
-      case 0x4c: L3c = (cache_t) { 12288, 12, 64 }; L3_found = True; break;
-      case 0x4d: L3c = (cache_t) { 16384, 16, 64 }; L3_found = True; break;
-      case 0xd0: L3c = (cache_t) { 512,    4, 64 }; L3_found = True; break;
-      case 0xd1: L3c = (cache_t) { 1024,   4, 64 }; L3_found = True; break;
-      case 0xd2: L3c = (cache_t) { 2048,   4, 64 }; L3_found = True; break;
-      case 0xd6: L3c = (cache_t) { 1024,   8, 64 }; L3_found = True; break;
-      case 0xd7: L3c = (cache_t) { 2048,   8, 64 }; L3_found = True; break;
-      case 0xd8: L3c = (cache_t) { 4096,   8, 64 }; L3_found = True; break;
-      case 0xdc: L3c = (cache_t) { 1536,  12, 64 }; L3_found = True; break;
-      case 0xdd: L3c = (cache_t) { 3072,  12, 64 }; L3_found = True; break;
-      case 0xde: L3c = (cache_t) { 6144,  12, 64 }; L3_found = True; break;
-      case 0xe2: L3c = (cache_t) { 2048,  16, 64 }; L3_found = True; break;
-      case 0xe3: L3c = (cache_t) { 4096,  16, 64 }; L3_found = True; break;
-      case 0xe4: L3c = (cache_t) { 8192,  16, 64 }; L3_found = True; break;
-      case 0xea: L3c = (cache_t) { 12288, 24, 64 }; L3_found = True; break;
-      case 0xeb: L3c = (cache_t) { 18432, 24, 64 }; L3_found = True; break;
-      case 0xec: L3c = (cache_t) { 24576, 24, 64 }; L3_found = True; break;
+      case 0x22: add_U3(512,    4, 64); break;
+      case 0x23: add_U3(1024,   8, 64); break;
+      case 0x25: add_U3(2048,   8, 64); break;
+      case 0x29: add_U3(4096,   8, 64); break;
+      case 0x46: add_U3(4096,   4, 64); break;
+      case 0x47: add_U3(8192,   8, 64); break;
+      case 0x4a: add_U3(6144,  12, 64); break;
+      case 0x4b: add_U3(8192,  16, 64); break;
+      case 0x4c: add_U3(12288, 12, 64); break;
+      case 0x4d: add_U3(16384, 16, 64); break;
+      case 0xd0: add_U3(512,    4, 64); break;
+      case 0xd1: add_U3(1024,   4, 64); break;
+      case 0xd2: add_U3(2048,   4, 64); break;
+      case 0xd6: add_U3(1024,   8, 64); break;
+      case 0xd7: add_U3(2048,   8, 64); break;
+      case 0xd8: add_U3(4096,   8, 64); break;
+      case 0xdc: add_U3(1536,  12, 64); break;
+      case 0xdd: add_U3(3072,  12, 64); break;
+      case 0xde: add_U3(6144,  12, 64); break;
+      case 0xe2: add_U3(2048,  16, 64); break;
+      case 0xe3: add_U3(4096,  16, 64); break;
+      case 0xe4: add_U3(8192,  16, 64); break;
+      case 0xea: add_U3(12288, 24, 64); break;
+      case 0xeb: add_U3(18432, 24, 64); break;
+      case 0xec: add_U3(24576, 24, 64); break;
 
       /* Described as "MLC" in Intel documentation */
-      case 0x21: *LLc = (cache_t) {  256, 8, 64 }; L2_found = True; break;
+      case 0x21: add_U2(256, 8, 64); break;
 
       /* These are sectored, whatever that means */
-      case 0x39: *LLc = (cache_t) {  128, 4, 64 }; L2_found = True; break;
-      case 0x3c: *LLc = (cache_t) {  256, 4, 64 }; L2_found = True; break;
+         // FIXME: I did not find these in the Intel docs
+      case 0x39: add_U2(128, 4, 64); break;
+      case 0x3c: add_U2(256, 4, 64); break;
 
       /* If a P6 core, this means "no L2 cache".
          If a P4 core, this means "no L3 cache".
@@ -190,27 +225,27 @@ Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
       case 0x40:
           break;
 
-      case 0x41: *LLc = (cache_t) {  128,  4, 32 }; L2_found = True; break;
-      case 0x42: *LLc = (cache_t) {  256,  4, 32 }; L2_found = True; break;
-      case 0x43: *LLc = (cache_t) {  512,  4, 32 }; L2_found = True; break;
-      case 0x44: *LLc = (cache_t) { 1024,  4, 32 }; L2_found = True; break;
-      case 0x45: *LLc = (cache_t) { 2048,  4, 32 }; L2_found = True; break;
-      case 0x48: *LLc = (cache_t) { 3072, 12, 64 }; L2_found = True; break;
-      case 0x4e: *LLc = (cache_t) { 6144, 24, 64 }; L2_found = True; break;
+      case 0x41: add_U2(  128,  4, 32); break;
+      case 0x42: add_U2(  256,  4, 32); break;
+      case 0x43: add_U2(  512,  4, 32); break;
+      case 0x44: add_U2( 1024,  4, 32); break;
+      case 0x45: add_U2( 2048,  4, 32); break;
+      case 0x48: add_U2( 3072, 12, 64); break;
+      case 0x4e: add_U2( 6144, 24, 64); break;
       case 0x49:
          if (family == 15 && model == 6) {
             /* On Xeon MP (family F, model 6), this is for L3 */
-            L3c = (cache_t) { 4096, 16, 64 }; L3_found = True;
+            add_U3(4096, 16, 64);
          } else {
-	    *LLc = (cache_t) { 4096, 16, 64 }; L2_found = True;
+	    add_U2(4096, 16, 64);
          }
          break;
 
       /* These are sectored, whatever that means */
-      case 0x60: *D1c = (cache_t) { 16, 8, 64 };  break;      /* sectored */
-      case 0x66: *D1c = (cache_t) {  8, 4, 64 };  break;      /* sectored */
-      case 0x67: *D1c = (cache_t) { 16, 4, 64 };  break;      /* sectored */
-      case 0x68: *D1c = (cache_t) { 32, 4, 64 };  break;      /* sectored */
+      case 0x60: add_D1(16, 8, 64);  break;      /* sectored */
+      case 0x66: add_D1( 8, 4, 64);  break;      /* sectored */
+      case 0x67: add_D1(16, 4, 64);  break;      /* sectored */
+      case 0x68: add_D1(32, 4, 64);  break;      /* sectored */
 
       /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
        * conversion to byte size is a total guess;  treat the 12K and 16K
@@ -218,37 +253,34 @@ Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
        * everything to work!.  Also guessing 32 bytes for the line size...
        */
       case 0x70:    /* 12K micro-ops, 8-way */
-         *I1c = (cache_t) { 16, 8, 32 };
-         micro_ops_warn(12, 16, 32);
+         add_I1T(12, 8);
          break;
       case 0x71:    /* 16K micro-ops, 8-way */
-         *I1c = (cache_t) { 16, 8, 32 };
-         micro_ops_warn(16, 16, 32);
+         add_I1T(16, 8);
          break;
       case 0x72:    /* 32K micro-ops, 8-way */
-         *I1c = (cache_t) { 32, 8, 32 };
-         micro_ops_warn(32, 32, 32);
+         add_I1T(32, 8);
          break;
 
       /* not sectored, whatever that might mean */
-      case 0x78: *LLc = (cache_t) { 1024, 4,  64 }; L2_found = True;  break;
+      case 0x78: add_U2(1024, 4,  64);  break;
 
       /* These are sectored, whatever that means */
-      case 0x79: *LLc = (cache_t) {  128, 8,  64 }; L2_found = True;  break;
-      case 0x7a: *LLc = (cache_t) {  256, 8,  64 }; L2_found = True;  break;
-      case 0x7b: *LLc = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
-      case 0x7c: *LLc = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
-      case 0x7d: *LLc = (cache_t) { 2048, 8,  64 }; L2_found = True;  break;
-      case 0x7e: *LLc = (cache_t) {  256, 8, 128 }; L2_found = True;  break;
-      case 0x7f: *LLc = (cache_t) {  512, 2,  64 }; L2_found = True;  break;
-      case 0x80: *LLc = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
-      case 0x81: *LLc = (cache_t) {  128, 8,  32 }; L2_found = True;  break;
-      case 0x82: *LLc = (cache_t) {  256, 8,  32 }; L2_found = True;  break;
-      case 0x83: *LLc = (cache_t) {  512, 8,  32 }; L2_found = True;  break;
-      case 0x84: *LLc = (cache_t) { 1024, 8,  32 }; L2_found = True;  break;
-      case 0x85: *LLc = (cache_t) { 2048, 8,  32 }; L2_found = True;  break;
-      case 0x86: *LLc = (cache_t) {  512, 4,  64 }; L2_found = True;  break;
-      case 0x87: *LLc = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
+      case 0x79: add_U2( 128, 8,  64);  break;
+      case 0x7a: add_U2( 256, 8,  64);  break;
+      case 0x7b: add_U2( 512, 8,  64);  break;
+      case 0x7c: add_U2(1024, 8,  64);  break;
+      case 0x7d: add_U2(2048, 8,  64);  break;
+      case 0x7e: add_U2( 256, 8, 128);  break;
+      case 0x7f: add_U2( 512, 2,  64);  break;
+      case 0x80: add_U2( 512, 8,  64);  break;
+      case 0x81: add_U2( 128, 8,  32);  break;
+      case 0x82: add_U2( 256, 8,  32);  break;
+      case 0x83: add_U2( 512, 8,  32);  break;
+      case 0x84: add_U2(1024, 8,  32);  break;
+      case 0x85: add_U2(2048, 8,  32);  break;
+      case 0x86: add_U2( 512, 4,  64);  break;
+      case 0x87: add_U2(1024, 8,  64);  break;
 
       /* Ignore prefetch information */
       case 0xf0: case 0xf1:
@@ -264,51 +296,50 @@ Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
             UInt parts = ((*(UInt *)&info[4] >> 12) & 0x3ff) + 1;
             UInt line_size = (*(UInt *)&info[4] & 0x7ff) + 1;
             UInt sets = *(UInt *)&info[8] + 1;
-            cache_t c;
 
-            c.size = assoc * parts * line_size * sets / 1024;
-            c.assoc = assoc;
-            c.line_size = line_size;
+            UInt size = assoc * parts * line_size * sets / 1024;
 
             switch ((info[0] & 0xe0) >> 5)
             {
             case 1:
                switch (info[0] & 0x1f)
                {
-               case 1: *D1c = c; break;
-               case 2: *I1c = c; break;
-               case 3: VG_(dmsg)("warning: L1 unified cache ignored\n"); break;
+               case 1: add_D1(size, assoc, line_size); break;
+               case 2: add_I1(size, assoc, line_size); break;
+               case 3: add_U1(size, assoc, line_size); break;
                default:
-                  VG_(dmsg)("warning: L1 cache of unknown type ignored\n");
+                  VG_(debugLog)(1, "cache",
+                                "warning: L1 cache of unknown type ignored\n");
                   break;
                }
                break;
             case 2:
                switch (info[0] & 0x1f)
                {
-               case 1: VG_(dmsg)("warning: L2 data cache ignored\n"); break;
-               case 2: VG_(dmsg)("warning: L2 instruction cache ignored\n");
-                  break;
-               case 3: *LLc = c; L2_found = True; break;
+               case 1: add_D2(size, assoc, line_size); break;
+               case 2: add_I2(size, assoc, line_size); break;
+               case 3: add_U2(size, assoc, line_size); break;
                default:
-                  VG_(dmsg)("warning: L2 cache of unknown type ignored\n");
+                  VG_(debugLog)(1, "cache",
+                                "warning: L2 cache of unknown type ignored\n");
                   break;
                }
                break;
             case 3:
                switch (info[0] & 0x1f)
                {
-               case 1: VG_(dmsg)("warning: L3 data cache ignored\n"); break;
-               case 2: VG_(dmsg)("warning: L3 instruction cache ignored\n");
-                  break;
-               case 3: L3c = c; L3_found = True; break;
+               case 1: add_D3(size, assoc, line_size); break;
+               case 2: add_I3(size, assoc, line_size); break;
+               case 3: add_U3(size, assoc, line_size); break;
                default:
-                  VG_(dmsg)("warning: L3 cache of unknown type ignored\n");
+                  VG_(debugLog)(1, "cache",
+                                "warning: L3 cache of unknown type ignored\n");
                   break;
                }
                break;
             default:
-               VG_(dmsg)("warning: L%u cache ignored\n", (info[0] & 0xe0) >> 5);
+               VG_(debugLog)(1, "cache", "warning: L%u cache ignored\n",
+                             (info[0] & 0xe0) >> 5);
                break;
             }
 
@@ -318,57 +349,16 @@ Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
          break;
 
       default:
-         VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), "
-                   "ignoring\n", info[i]);
+         VG_(debugLog)(1, "cache",
+                       "warning: Unknown Intel cache config value (0x%x), "
+                       "ignoring\n", info[i]);
          break;
       }
    }
 
-   /* If we found a L3 cache, throw away the L2 data and use the L3's
-      instead. */
-   if (L3_found) {
-      /* Can't warn here: as we're not necessarily in cachegrind */
-#if 0
-      VG_(dmsg)("warning: L3 cache found, using its data for the "
-                "LL simulation.\n");
-#endif
-      *LLc = L3c;
-      L2_found = True;
-   }
-
-   if (!L2_found)
-      VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
-
    return 0;
 }
 
-static Int
-Intel_cache_info(Int level, VexCacheInfo *ci)
-{
-   cache_t I1c, D1c, LLc;
-   Int ret;
-
-   ret = Intel_cache_info_aux(level, &I1c, &D1c, &LLc);
-
-   /* Map results to VexCacheInfo. This is lossy as we simply assume
-      there is an L2 here (where in fact it could have been an L3). It
-      is irrelevant for current usages but needs to be fixed! */
-   if (ret == 0) {
-      ci->num_levels = 2;
-      ci->num_caches = 3;
-      ci->icaches_maintain_coherence = True;
-      ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof *ci->caches);
-
-      ci->caches[0] = VEX_CACHE_INIT(DATA_CACHE, 1, D1c.size, D1c.line_size,
-                                     D1c.assoc);
-      ci->caches[1] = VEX_CACHE_INIT(INSN_CACHE, 1, I1c.size, I1c.line_size,
-                                     I1c.assoc);
-      ci->caches[2] = VEX_CACHE_INIT(UNIFIED_CACHE, 2, LLc.size, LLc.line_size,
-                                     LLc.assoc);
-   }
-   return ret;
-}
-
 /* AMD method is straightforward, just extract appropriate bits from the
  * result registers.
  *
@@ -428,8 +418,8 @@ AMD_cache_info(VexCacheInfo *ci)
    VG_(cpuid)(0x80000000, 0, &ext_level, &dummy, &dummy, &dummy);
 
    if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
-      VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n",
-                ext_level);
+      VG_(debugLog)(1, "cache", "warning: ext_level < 0x80000006 for AMD "
+                    "processor (0x%x)\n", ext_level);
       return -1;
    }
 
@@ -440,8 +430,8 @@ AMD_cache_info(VexCacheInfo *ci)
 
    /* Check for Duron bug */
    if (model == 0x630) {
-      VG_(dmsg)("warning: Buggy Duron stepping A0. "
-                "Assuming L2 size=65536 bytes\n");
+      VG_(debugLog)(1, "cache", "warning: Buggy Duron stepping A0. "
+                    "Assuming L2 size=65536 bytes\n");
       L2i = (64 << 16) | (L2i & 0xffff);
    }
 
@@ -473,7 +463,7 @@ AMD_cache_info(VexCacheInfo *ci)
    size      = (L2i >> 16) & 0xffff;
    assoc     = decode_AMD_cache_L2_L3_assoc((L2i >> 12) & 0xf);
    line_size = (L2i >>  0) & 0xff;
-   ci->caches[2] = VEX_CACHE_INIT(INSN_CACHE, 2, size, line_size, assoc);
+   ci->caches[2] = VEX_CACHE_INIT(UNIFIED_CACHE, 2, size, line_size, assoc);
 
    // L3, if any
    if (((L3i >> 18) & 0x3fff) > 0) {
@@ -484,7 +474,7 @@ AMD_cache_info(VexCacheInfo *ci)
       size      = ((L3i >> 18) & 0x3fff) * 512;
       assoc     = decode_AMD_cache_L2_L3_assoc((L3i >> 12) & 0xf);
       line_size = (L3i >>  0) & 0xff;
-      ci->caches[3] = VEX_CACHE_INIT(INSN_CACHE, 3, size, line_size, assoc);
+      ci->caches[3] = VEX_CACHE_INIT(UNIFIED_CACHE, 3, size, line_size, assoc);
    }
 
    return 0;
@@ -496,17 +486,13 @@ get_caches_from_CPUID(VexCacheInfo *ci)
    Int  level, ret, i;
    Char vendor_id[13];
 
-   if (!VG_(has_cpuid)()) {
-      VG_(dmsg)("CPUID instruction not supported\n");
-      return -1;
-   }
+   vg_assert(VG_(has_cpuid)());
 
    VG_(cpuid)(0, 0, &level, (int*)&vendor_id[0],
 	      (int*)&vendor_id[8], (int*)&vendor_id[4]);
    vendor_id[12] = '\0';
 
-   if (0 == level) {
-      VG_(dmsg)("CPUID level is 0, early Pentium?\n");
+   if (0 == level) {    // CPUID level is 0, early Pentium?
       return -1;
    }
 
@@ -530,7 +516,8 @@ get_caches_from_CPUID(VexCacheInfo *ci)
       ret = 0;
 
    } else {
-      VG_(dmsg)("CPU vendor ID not recognised (%s)\n", vendor_id);
+      VG_(debugLog)(1, "cache", "CPU vendor ID not recognised (%s)\n",
+                    vendor_id);
       return -1;
    }
 
@@ -542,8 +529,8 @@ get_caches_from_CPUID(VexCacheInfo *ci)
    return ret;
 }
 
-Bool
-VG_(machine_get_cache_info)(VexArchInfo *vai)
+static Bool
+get_cache_info(VexArchInfo *vai)
 {
    Int ret = get_caches_from_CPUID(&vai->hwcache_info); 
 
@@ -553,8 +540,8 @@ VG_(machine_get_cache_info)(VexArchInfo *vai)
 #elif defined(VGA_arm) || defined(VGA_ppc32) || defined(VGA_ppc64) || \
       defined(VGA_mips32)
 
-Bool
-VG_(machine_get_cache_info)(VexArchInfo *vai)
+static Bool
+get_cache_info(VexArchInfo *vai)
 {
    vai->hwcache_info.icaches_maintain_coherence = False;
 
@@ -563,8 +550,8 @@ VG_(machine_get_cache_info)(VexArchInfo *vai)
 
 #elif defined(VGA_s390x)
 
-Bool
-VG_(machine_get_cache_info)(VexArchInfo *vai)
+static Bool
+get_cache_info(VexArchInfo *vai)
 {
    vai->hwcache_info.icaches_maintain_coherence = True;
 
@@ -577,6 +564,116 @@ VG_(machine_get_cache_info)(VexArchInfo *vai)
 
 #endif
 
+/* Debug information */
+static void
+write_cache_info(const VexCacheInfo *ci)
+{
+   UInt i;
+
+   VG_(debugLog)(1, "cache", "Cache info:\n");
+   VG_(debugLog)(1, "cache", "  #levels = %u\n", ci->num_levels);
+   VG_(debugLog)(1, "cache", "  #caches = %u\n", ci->num_caches);
+   for (i = 0; i < ci->num_caches; ++i) {
+      VexCache *c = ci->caches + i;
+      const HChar *kind;
+      VG_(debugLog)(1, "cache", "     cache #%u:\n", i);
+      switch (c->kind) {
+      case INSN_CACHE:    kind = "insn";    break;
+      case DATA_CACHE:    kind = "data";    break;
+      case UNIFIED_CACHE: kind = "unified"; break;
+      default: kind = "unknown"; break;
+      }
+      VG_(debugLog)(1, "cache", "        kind = %s\n", kind);
+      VG_(debugLog)(1, "cache", "        level = %u\n", c->level);
+      VG_(debugLog)(1, "cache", "        size = %u bytes\n", c->sizeB);
+      VG_(debugLog)(1, "cache", "        linesize = %u bytes\n", c->line_sizeB);
+      VG_(debugLog)(1, "cache", "        assoc = %u\n", c->assoc);
+   }
+}
+
+static Bool
+cache_info_is_sensible(const VexCacheInfo *ci)
+{
+   UInt level, i;
+   Bool sensible = True;
+
+   /* There must be at most one cache of a given kind at the same level.
+      If there is a unified cache at a given level, no other cache may
+      exist at that level. */
+   for (level = 1; level <= ci->num_levels; ++level) {
+      UInt num_icache, num_dcache, num_ucache;
+
+      num_icache = num_dcache = num_ucache = 0;
+      for (i = 0; i < ci->num_caches; ++i) {
+         if (ci->caches[i].level == level) {
+            switch (ci->caches[i].kind) {
+            case INSN_CACHE:    ++num_icache; break;
+            case DATA_CACHE:    ++num_dcache; break;
+            case UNIFIED_CACHE: ++num_ucache; break;
+            }
+         }
+      }
+      if (num_icache == 0 && num_dcache == 0 && num_ucache == 0) {
+         VG_(debugLog)(1, "cache", "warning: No caches at level %u\n", level);
+         sensible = False;
+      }
+      if (num_icache > 1 || num_dcache > 1 || num_ucache > 1) {
+         VG_(debugLog)(1, "cache", "warning: More than one cache of a given "
+                       "kind at level %u\n", level);
+         sensible = False;
+      }
+      if (num_ucache != 0 && (num_icache > 0 || num_dcache > 0)) {
+         VG_(debugLog)(1, "cache", "warning: Unified cache and I/D cache "
+                       "at level %u\n", level);
+         sensible = False;
+      }
+   }
+
+   /* If there is a cache at level N > 1 there must be a cache at level N-1 */
+   for (level = 2; level <= ci->num_levels; ++level) {
+      Bool found = False;
+      for (i = 0; i < ci->num_caches; ++i) {
+         if (ci->caches[i].level == level - 1) {
+            found = True;
+            break;
+         }
+      }
+      if (! found) {
+         VG_(debugLog)(1, "cache", "warning: Cache at level %u but no cache "
+                       "at level %u\n", level, level - 1);
+         sensible = False;
+      }
+   }
+
+   return sensible;
+}
+
+
+/* Autodetect the cache information for this host and stuff it into
+   VexArchInfo::hwcache_info. Return True if successful. */
+Bool
+VG_(machine_get_cache_info)(VexArchInfo *vai)
+{
+   Bool ok = get_cache_info(vai);
+
+   if (! ok) {
+      VexCacheInfo *ci = &vai->hwcache_info;
+
+      VG_(debugLog)(1, "cache", "Autodetected cache info is not sensible\n");
+      write_cache_info(ci);  /* write out for debugging */
+
+      /* Reset cache info */
+      ci->num_levels = 0;
+      ci->num_caches = 0;
+      VG_(free)(ci->caches);
+      ci->caches = NULL;
+   } else {
+      VG_(debugLog)(1, "cache", "Autodetected cache info is sensible\n");
+   }
+
+   return ok;
+}
+
 /*--------------------------------------------------------------------*/
 /*--- end                                                          ---*/
 /*--------------------------------------------------------------------*/