Cachegrind/Callgrind: allow for cache sizes other than only powers of two

author Josef Weidendorfer <Josef.Weidendorfer@gmx.de>

Mon, 26 Jan 2009 22:56:14 +0000 (22:56 +0000)

committer Josef Weidendorfer <Josef.Weidendorfer@gmx.de>

Mon, 26 Jan 2009 22:56:14 +0000 (22:56 +0000)
author Josef Weidendorfer <Josef.Weidendorfer@gmx.de>
Mon, 26 Jan 2009 22:56:14 +0000 (22:56 +0000)
committer Josef Weidendorfer <Josef.Weidendorfer@gmx.de>
Mon, 26 Jan 2009 22:56:14 +0000 (22:56 +0000)
diff --git a/cachegrind/cg-x86.c b/cachegrind/cg-x86.c

index 873c351537380d2239083a836a5b4ea9de5fd84f..eceec6d3bd8da76cfd115e0d19e86a9e1d03ebb1 100644 (file)
--- a/cachegrind/cg-x86.c
+++ b/cachegrind/cg-x86.c
@@ -113,12 +113,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
  
        case 0x0a: *D1c = (cache_t) {  8, 2, 32 }; break;
        case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
-      case 0x0e:
-         /* Real D1 cache configuration is:
-            D1c = (cache_t) { 24, 6, 64 }; */
-         VG_(message)(Vg_DebugMsg, "warning: 24Kb D1 cache detected, treating as 16Kb");
-         *D1c = (cache_t) { 16, 4, 64 };
-         break;
+      case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break;
        case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
  
        /* IA-64 info -- panic! */
@@ -149,12 +144,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
        case 0x43: *L2c = (cache_t) {  512, 4, 32 }; L2_found = True; break;
        case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
        case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
-      case 0x48:
-         /* Real L2 cache configuration is:
-            *L2c = (cache_t) { 3072, 12, 64 }; L2_found = True; */
-         VG_(message)(Vg_DebugMsg, "warning: 3Mb L2 cache detected, treating as 2Mb");
-         *L2c = (cache_t) { 2048, 8, 64 }; L2_found = True;
-         break;
+      case 0x48: *L2c = (cache_t) { 3072,12, 64 }; L2_found = True; break;
        case 0x49:
           if ((family == 15) && (model == 6))
               /* On Xeon MP (family F, model 6), this is for L3 */
@@ -163,12 +153,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
           else
               *L2c = (cache_t) { 4096, 16, 64 }; L2_found = True;
           break;
-      case 0x4e:
-         /* Real L2 cache configuration is:
-            *L2c = (cache_t) { 6144, 24, 64 }; L2_found = True; */
-         VG_(message)(Vg_DebugMsg, "warning: 6Mb L2 cache detected, treating as 4Mb");
-         *L2c = (cache_t) { 4096, 16, 64 }; L2_found = True;
-         break;
+      case 0x4e: *L2c = (cache_t) { 6144, 24, 64 }; L2_found = True; break;
  
        /* These are sectored, whatever that means */
        case 0x60: *D1c = (cache_t) { 16, 8, 64 };  break;      /* sectored */
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c

index a30ba758d3195e76d4fe7ddc3d4865a8add43f3a..36ddbab430485bf21cbd1e73b224b04955efd429 100644 (file)
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -1158,18 +1158,12 @@ static cache_t clo_L2_cache = UNDEFINED_CACHE;
  static 
  void check_cache(cache_t* cache, Char *name)
  {
-   /* First check they're all powers of two */
-   if (-1 == VG_(log2)(cache->size)) {
+   /* Simulator requires line size and set count to be powers of two */
+   if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
+       (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
        VG_(message)(Vg_UserMsg,
-         "error: %s size of %dB not a power of two; aborting.",
-         name, cache->size);
-      VG_(exit)(1);
-   }
-
-   if (-1 == VG_(log2)(cache->assoc)) {
-      VG_(message)(Vg_UserMsg,
-         "error: %s associativity of %d not a power of two; aborting.",
-         name, cache->assoc);
+         "error: %s set count not a power of two; aborting.",
+         name);
        VG_(exit)(1);
     }
  
diff --git a/cachegrind/cg_sim.c b/cachegrind/cg_sim.c

index 6edf12628fe9dac8f968f2db297f53889437ec2e..25bef497ad8b466cedc5eb5f9f0e383e1db9e3a1 100644 (file)
--- a/cachegrind/cg_sim.c
+++ b/cachegrind/cg_sim.c
@@ -44,7 +44,6 @@ typedef struct {
     Int          line_size;              /* bytes */
     Int          sets;
     Int          sets_min_1;
-   Int          assoc_bits;
     Int          line_size_bits;
     Int          tag_shift;
     Char         desc_line[128];
@@ -62,7 +61,6 @@ static void cachesim_initcache(cache_t config, cache_t2* c)
  
     c->sets           = (c->size / c->line_size) / c->assoc;
     c->sets_min_1     = c->sets - 1;
-   c->assoc_bits     = VG_(log2)(c->assoc);
     c->line_size_bits = VG_(log2)(c->line_size);
     c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
  
@@ -111,8 +109,7 @@ void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2)         \
     /* First case: word entirely within line. */                             \
     if (set1 == set2) {                                                      \
                                                                              \
-      /* Shifting is a bit faster than multiplying */                       \
-      set = &(L.tags[set1 << L.assoc_bits]);                                \
+      set = &(L.tags[set1 * L.assoc]);                                      \
                                                                              \
        /* This loop is unrolled for just the first case, which is the most */\
        /* common.  We can't unroll any further because it would screw up   */\
@@ -143,7 +140,7 @@ void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2)         \
     /* Second case: word straddles two lines. */                             \
     /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
     } else if (((set1 + 1) & (L.sets-1)) == set2) {                          \
-      set = &(L.tags[set1 << L.assoc_bits]);                                \
+      set = &(L.tags[set1 * L.assoc]);                                      \
        if (tag == set[0]) {                                                  \
           goto block2;                                                       \
        }                                                                     \
@@ -162,7 +159,7 @@ void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2)         \
        set[0] = tag;                                                         \
        is_miss = True;                                                       \
  block2:                                                                     \
-      set = &(L.tags[set2 << L.assoc_bits]);                                \
+      set = &(L.tags[set2 * L.assoc]);                                      \
        tag2 = (a+size-1) >> L.tag_shift;                                     \
        if (tag2 == set[0]) {                                                 \
           goto miss_treatment;                                               \
diff --git a/cachegrind/docs/cg-manual.xml b/cachegrind/docs/cg-manual.xml

index f65272bbc2cfdaa1955415d42c09e498012513c2..512eeb409bf48682af6e3e9b33c10af62c578346 100644 (file)
--- a/cachegrind/docs/cg-manual.xml
+++ b/cachegrind/docs/cg-manual.xml
@@ -142,7 +142,7 @@ follows:</para>
    </listitem>
  
    <listitem>
-    <para>Bit-selection hash function: the line(s) in the cache
+    <para>Bit-selection hash function: the set of line(s) in the cache
      to which a memory block maps is chosen by the middle bits
      M--(M+N-1) of the byte address, where:</para>
      <itemizedlist>
@@ -150,15 +150,17 @@ follows:</para>
          <para>line size = 2^M bytes</para>
        </listitem>
        <listitem>
-        <para>(cache size / line size) = 2^N bytes</para>
+        <para>(cache size / line size / associativity) = 2^N bytes</para>
        </listitem>
      </itemizedlist> 
    </listitem>
  
    <listitem>
-    <para>Inclusive L2 cache: the L2 cache replicates all the
-    entries of the L1 cache.  This is standard on Pentium chips,
-    but AMD Opterons, Athlons and Durons 
+    <para>Inclusive L2 cache: the L2 cache typically replicates all
+    the entries of the L1 caches, because fetching into L1 involves
+    fetching into L2 first (this does not guarantee strict inclusiveness,
+    as lines evicted from L2 still could reside in L1).  This is
+    standard on Pentium chips, but AMD Opterons, Athlons and Durons
      use an exclusive L2 cache that only holds
      blocks evicted from L1.  Ditto most modern VIA CPUs.</para>
    </listitem>
@@ -176,7 +178,10 @@ happens.  You can manually specify one, two or all three levels
  (I1/D1/L2) of the cache from the command line using the
  <computeroutput>--I1</computeroutput>,
  <computeroutput>--D1</computeroutput> and
-<computeroutput>--L2</computeroutput> options.</para>
+<computeroutput>--L2</computeroutput> options.
+For cache parameters to be valid for simulation, the number
+of sets (with associativity being the number of cache lines in
+each set) has to be a power of two.</para>
  
  <para>On PowerPC platforms
  Cachegrind cannot automatically 
@@ -227,10 +232,7 @@ need to specify it with the
  <para>If you are interested in simulating a cache with different
  properties, it is not particularly hard to write your own cache
  simulator, or to modify the existing ones in
-<computeroutput>vg_cachesim_I1.c</computeroutput>,
-<computeroutput>vg_cachesim_D1.c</computeroutput>,
-<computeroutput>vg_cachesim_L2.c</computeroutput> and
-<computeroutput>vg_cachesim_gen.c</computeroutput>.  We'd be
+<computeroutput>cg_sim.c</computeroutput>. We'd be
  interested to hear from anyone who does.</para>
  
  </sect2>
diff --git a/cachegrind/tests/Makefile.am b/cachegrind/tests/Makefile.am

index 0ffc4da6dcda921e8618ceb096fdcf30ef49226c..8e568cc379fa929309cfc564717bf74b3189b2c4 100644 (file)
--- a/cachegrind/tests/Makefile.am
+++ b/cachegrind/tests/Makefile.am
@@ -15,6 +15,7 @@ EXTRA_DIST = $(noinst_SCRIPTS) \
         chdir.vgtest chdir.stderr.exp \
         clreq.vgtest clreq.stderr.exp \
         dlclose.vgtest dlclose.stderr.exp dlclose.stdout.exp \
+       notpower2.vgtest notpower2.stderr.exp \
         wrap5.vgtest wrap5.stderr.exp wrap5.stdout.exp
  
  check_PROGRAMS = \
diff --git a/cachegrind/tests/filter_stderr b/cachegrind/tests/filter_stderr

index 43efa2d6dc001f400fc756ed68e29648033b1362..a5bc1f44113e60ed1089e89c1445af9266cf7ec2 100755 (executable)
--- a/cachegrind/tests/filter_stderr
+++ b/cachegrind/tests/filter_stderr
@@ -17,5 +17,4 @@ sed "s/\(\(I1\|D1\|L2\|L2i\|L2d\) *\(misses\|miss rate\):\)[ 0-9,()+rdw%\.]*$/\1
  sed "/warning: Pentium 4 with 12 KB micro-op instruction trace cache/d" |
  sed "/Simulating a 16 KB I-cache with 32 B lines/d"   |
  sed "/warning: L3 cache detected but ignored/d" |
-sed "/warning: 6Mb L2 cache detected, treating as 4Mb/d" |
  sed "/Warning: Cannot auto-detect cache config on PPC.., using one or more defaults/d"
diff --git a/cachegrind/tests/notpower2.stderr.exp b/cachegrind/tests/notpower2.stderr.exp

new file mode 100644 (file)

index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/notpower2.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/notpower2.vgtest b/cachegrind/tests/notpower2.vgtest

new file mode 100644 (file)

index 0000000..132cfe5
--- /dev/null
+++ b/cachegrind/tests/notpower2.vgtest
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64
+cleanup: rm cachegrind.out.*
diff --git a/callgrind/sim.c b/callgrind/sim.c

index 506ed9e400071b6eedc20ced4389e33d740ffdbb..29f72bd7a647b4c6cd6bd8ccaffdff0b1155458b 100644 (file)
--- a/callgrind/sim.c
+++ b/callgrind/sim.c
@@ -74,7 +74,6 @@ typedef struct {
     Bool         sectored;  /* prefetch nearside cacheline on read */
     int          sets;
     int          sets_min_1;
-   int          assoc_bits;
     int          line_size_bits;
     int          tag_shift;
     UWord        tag_mask;
@@ -195,7 +194,6 @@ static void cachesim_initcache(cache_t config, cache_t2* c)
  
     c->sets           = (c->size / c->line_size) / c->assoc;
     c->sets_min_1     = c->sets - 1;
-   c->assoc_bits     = VG_(log2)(c->assoc);
     c->line_size_bits = VG_(log2)(c->line_size);
     c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
     c->tag_mask       = ~((1<<c->tag_shift)-1);
@@ -259,8 +257,7 @@ CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag)
      int i, j;
      UWord *set;
  
-    /* Shifting is a bit faster than multiplying */
-    set = &(c->tags[set_no << c->assoc_bits]);
+    set = &(c->tags[set_no * c->assoc]);
  
      /* This loop is unrolled for just the first case, which is the most */
      /* common.  We can't unroll any further because it would screw up   */
@@ -359,8 +356,7 @@ CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag)
      int i, j;
      UWord *set, tmp_tag;
  
-    /* Shifting is a bit faster than multiplying */
-    set = &(c->tags[set_no << c->assoc_bits]);
+    set = &(c->tags[set_no * c->assoc]);
  
      /* This loop is unrolled for just the first case, which is the most */
      /* common.  We can't unroll any further because it would screw up   */
@@ -407,7 +403,7 @@ CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size)
      /* Access straddles two lines. */
      /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
      else if (((set1 + 1) & (c->sets-1)) == set2) {
-       UWord tag2  = (a+size-1) >> c->tag_shift;
+       UWord tag2  = (a+size-1) & c->tag_mask;
  
         /* the call updates cache structures as side effect */
         CacheResult res1 =  cachesim_setref_wb(c, ref, set1, tag);
@@ -676,7 +672,7 @@ void cacheuse_initcache(cache_t2* c)
      /* We use lower tag bits as offset pointers to cache use info.
       * I.e. some cache parameters don't work.
       */
-    if (c->tag_shift < c->assoc_bits) {
+    if ( (1<<c->tag_shift) < c->assoc) {
         VG_(message)(Vg_DebugMsg,
                      "error: Use associativity < %d for cache use statistics!",
                      (1<<c->tag_shift) );
@@ -690,7 +686,7 @@ void cacheuse_initcache(cache_t2* c)
  static __inline__
  void cacheuse_update_hit(cache_t2* c, UInt high_idx, UInt low_idx, UInt use_mask)
  {
-    int idx = (high_idx << c->assoc_bits) | low_idx;
+    int idx = (high_idx * c->assoc) + low_idx;
  
      c->use[idx].count ++;
      c->use[idx].mask |= use_mask;
@@ -709,8 +705,7 @@ CacheResult cacheuse_setref(cache_t2* c, UInt set_no, UWord tag)
      UWord *set, tmp_tag;
      UInt use_mask;
  
-    /* Shifting is a bit faster than multiplying */
-    set = &(c->tags[set_no << c->assoc_bits]);
+    set = &(c->tags[set_no * c->assoc]);
      use_mask =
         c->line_start_mask[a & c->line_size_mask] &
         c->line_end_mask[(a+size-1) & c->line_size_mask];
@@ -745,7 +740,7 @@ CacheResult cacheuse_setref(cache_t2* c, UInt set_no, UWord tag)
      }
      set[0] = tag | tmp_tag;
  
-    cacheuse_L2_miss(c, (set_no << c->assoc_bits) | tmp_tag,
+    cacheuse_L2_miss(c, (set_no * c->assoc) | tmp_tag,
                      use_mask, a & ~c->line_size_mask);
  
      return Miss;
@@ -756,7 +751,7 @@ static CacheResult cacheuse_ref(cache_t2* c, Addr a, UChar size)
  {
      UInt  set1 = ( a         >> c->line_size_bits) & (c->sets_min_1);
      UInt  set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
-    UWord tag  = a >> c->tag_shift;
+    UWord tag  = a & c->tag_mask;
  
      /* Access entirely within line. */
      if (set1 == set2) 
@@ -765,7 +760,7 @@ static CacheResult cacheuse_ref(cache_t2* c, Addr a, UChar size)
      /* Access straddles two lines. */
      /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
      else if (((set1 + 1) & (c->sets-1)) == set2) {
-       UWord tag2  = a >> c->tag_shift;
+       UWord tag2  = a & c->tag_mask;
  
         /* the call updates cache structures as side effect */
         CacheResult res1 =  cacheuse_isMiss(c, set1, tag);
@@ -800,8 +795,7 @@ static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size)         \
     /* First case: word entirely within line. */                             \
     if (set1 == set2) {                                                      \
                                                                              \
-      /* Shifting is a bit faster than multiplying */                       \
-      set = &(L.tags[set1 << L.assoc_bits]);                                \
+      set = &(L.tags[set1 * L.assoc]);                                      \
        use_mask = L.line_start_mask[a & L.line_size_mask] &                  \
                  L.line_end_mask[(a+size-1) & L.line_size_mask];            \
                                                                              \
@@ -809,7 +803,7 @@ static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size)         \
        /* common.  We can't unroll any further because it would screw up   */\
        /* if we have a direct-mapped (1-way) cache.                        */\
        if (tag == (set[0] & L.tag_mask)) {                                   \
-        idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask);              \
+        idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask);                    \
          L.use[idx].count ++;                                                \
          L.use[idx].mask |= use_mask;                                        \
         CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -826,7 +820,7 @@ static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size)         \
                 set[j] = set[j - 1];                                         \
              }                                                               \
              set[0] = tmp_tag;                                              \
-            idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask);         \
+            idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask);               \
              L.use[idx].count ++;                                            \
              L.use[idx].mask |= use_mask;                                    \
         CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -842,7 +836,7 @@ static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size)         \
           set[j] = set[j - 1];                                               \
        }                                                                     \
        set[0] = tag | tmp_tag;                                               \
-      idx = (set1 << L.assoc_bits) | tmp_tag;                               \
+      idx = (set1 * L.assoc) + tmp_tag;                                     \
        return update_##L##_use(&L, idx,                                             \
                        use_mask, a &~ L.line_size_mask);                    \
                                                                              \
@@ -850,10 +844,10 @@ static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size)         \
     /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
     } else if (((set1 + 1) & (L.sets-1)) == set2) {                          \
        Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:L2 miss */           \
-      set = &(L.tags[set1 << L.assoc_bits]);                                \
+      set = &(L.tags[set1 * L.assoc]);                                      \
        use_mask = L.line_start_mask[a & L.line_size_mask];                  \
        if (tag == (set[0] & L.tag_mask)) {                                   \
-         idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask);             \
+         idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask);                   \
           L.use[idx].count ++;                                               \
           L.use[idx].mask |= use_mask;                                       \
         CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -868,7 +862,7 @@ static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size)         \
                 set[j] = set[j - 1];                                         \
              }                                                               \
              set[0] = tmp_tag;                                               \
-            idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask);         \
+            idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask);               \
              L.use[idx].count ++;                                            \
              L.use[idx].mask |= use_mask;                                    \
         CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -882,15 +876,15 @@ static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size)         \
           set[j] = set[j - 1];                                               \
        }                                                                     \
        set[0] = tag | tmp_tag;                                               \
-      idx = (set1 << L.assoc_bits) | tmp_tag;                               \
+      idx = (set1 * L.assoc) + tmp_tag;                                     \
        miss1 = update_##L##_use(&L, idx,                                            \
                        use_mask, a &~ L.line_size_mask);                    \
  block2:                                                                     \
-      set = &(L.tags[set2 << L.assoc_bits]);                                \
+      set = &(L.tags[set2 * L.assoc]);                                      \
        use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask];           \
        tag2  = (a+size-1) & L.tag_mask;                                      \
        if (tag2 == (set[0] & L.tag_mask)) {                                  \
-         idx = (set2 << L.assoc_bits) | (set[0] & ~L.tag_mask);             \
+         idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask);                   \
           L.use[idx].count ++;                                               \
           L.use[idx].mask |= use_mask;                                       \
         CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -905,7 +899,7 @@ block2:                                                                     \
                 set[j] = set[j - 1];                                         \
              }                                                               \
              set[0] = tmp_tag;                                               \
-            idx = (set2 << L.assoc_bits) | (tmp_tag & ~L.tag_mask);         \
+            idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask);               \
              L.use[idx].count ++;                                            \
              L.use[idx].mask |= use_mask;                                    \
         CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -919,7 +913,7 @@ block2:                                                                     \
           set[j] = set[j - 1];                                               \
        }                                                                     \
        set[0] = tag2 | tmp_tag;                                              \
-      idx = (set2 << L.assoc_bits) | tmp_tag;                               \
+      idx = (set2 * L.assoc) + tmp_tag;                                     \
        miss2 = update_##L##_use(&L, idx,                                            \
                        use_mask, (a+size-1) &~ L.line_size_mask);           \
        return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:L2_Hit;     \
@@ -984,7 +978,7 @@ static
  CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)
  {
     UInt setNo = (memline >> L2.line_size_bits) & (L2.sets_min_1);
-   UWord* set = &(L2.tags[setNo << L2.assoc_bits]);
+   UWord* set = &(L2.tags[setNo * L2.assoc]);
     UWord tag  = memline & L2.tag_mask;
  
     int i, j, idx;
@@ -993,7 +987,7 @@ CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)
     CLG_DEBUG(6,"L2.Acc(Memline %#lx): Set %d\n", memline, setNo);
  
     if (tag == (set[0] & L2.tag_mask)) {
-     idx = (setNo << L2.assoc_bits) | (set[0] & ~L2.tag_mask);
+     idx = (setNo * L2.assoc) + (set[0] & ~L2.tag_mask);
       l1_loaded->dep_use = &(L2.use[idx]);
  
       CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
@@ -1008,7 +1002,7 @@ CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)
          set[j] = set[j - 1];
         }
         set[0] = tmp_tag;
-       idx = (setNo << L2.assoc_bits) | (tmp_tag & ~L2.tag_mask);
+       idx = (setNo * L2.assoc) + (tmp_tag & ~L2.tag_mask);
         l1_loaded->dep_use = &(L2.use[idx]);
  
         CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
@@ -1024,7 +1018,7 @@ CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)
       set[j] = set[j - 1];
     }
     set[0] = tag | tmp_tag;
-   idx = (setNo << L2.assoc_bits) | tmp_tag;
+   idx = (setNo * L2.assoc) + tmp_tag;
     l1_loaded->dep_use = &(L2.use[idx]);
  
     update_L2_use(idx, memline);
@@ -1380,22 +1374,15 @@ static cache_t clo_L2_cache = UNDEFINED_CACHE;
  static
  void check_cache(cache_t* cache, Char *name)
  {
-   /* First check they're all powers of two */
-   if (-1 == VG_(log2)(cache->size)) {
+   /* Simulator requires line size and set count to be powers of two */
+   if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
+       (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
        VG_(message)(Vg_UserMsg,
-         "error: %s size of %dB not a power of two; aborting.",
-         name, cache->size);
-      VG_(exit)(1);
-   }
-
-   if (-1 == VG_(log2)(cache->assoc)) {
-      VG_(message)(Vg_UserMsg,
-         "error: %s associativity of %d not a power of two; aborting.",
-         name, cache->assoc);
-      VG_(exit)(1);
+         "error: %s set count not a power of two; aborting.",
+         name);
     }
  
-  if (-1 == VG_(log2)(cache->line_size)) {
+   if (-1 == VG_(log2)(cache->line_size)) {
        VG_(message)(Vg_UserMsg,
           "error: %s line size of %dB not a power of two; aborting.",
           name, cache->line_size);
diff --git a/callgrind/tests/Makefile.am b/callgrind/tests/Makefile.am

index b95cfc3cf9692034c6b531b28f659142d7905482..1c3f42c070e11a639c207729f9a8927dc85fc135 100644 (file)
--- a/callgrind/tests/Makefile.am
+++ b/callgrind/tests/Makefile.am
@@ -11,6 +11,10 @@ EXTRA_DIST = $(noinst_SCRIPTS) \
               simwork1.vgtest simwork1.stdout.exp simwork1.stderr.exp \
               simwork2.vgtest simwork2.stdout.exp simwork2.stderr.exp \
               simwork3.vgtest simwork3.stdout.exp simwork3.stderr.exp \
+             notpower2.vgtest notpower2.stderr.exp \
+             notpower2-wb.vgtest notpower2-wb.stderr.exp \
+             notpower2-hwpref.vgtest notpower2-hwpref.stderr.exp \
+             notpower2-use.vgtest notpower2-use.stderr.exp \
               threads.vgtest threads.stderr.exp
  
  check_PROGRAMS = clreq simwork threads
diff --git a/callgrind/tests/filter_stderr b/callgrind/tests/filter_stderr

index 1023bc6fe0a9feefaca7420f0a8d8deeb046222d..7b69674f50c758f13538b3ae4def6c29ad7f62f9 100755 (executable)
--- a/callgrind/tests/filter_stderr
+++ b/callgrind/tests/filter_stderr
@@ -23,5 +23,4 @@ sed "s/\(\(I1\|D1\|L2\|L2i\|L2d\) *\(misses\|miss rate\):\)[ 0-9,()+rdw%\.]*$/\1
  sed "/warning: Pentium 4 with 12 KB micro-op instruction trace cache/d" |
  sed "/Simulating a 16 KB I-cache with 32 B lines/d"   |
  sed "/warning: L3 cache detected but ignored/d" |
-sed "/warning: 6Mb L2 cache detected, treating as 4Mb/d" |
  sed "/Warning: Cannot auto-detect cache config on PPC.., using one or more defaults/d"
diff --git a/callgrind/tests/notpower2-hwpref.stderr.exp b/callgrind/tests/notpower2-hwpref.stderr.exp

new file mode 100644 (file)

index 0000000..0705c1c
--- /dev/null
+++ b/callgrind/tests/notpower2-hwpref.stderr.exp
@@ -0,0 +1,20 @@
+
+
+Events    : Ir Dr Dw I1mr D1mr D1mw I2mr D2mr D2mw
+Collected :
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/callgrind/tests/notpower2-hwpref.vgtest b/callgrind/tests/notpower2-hwpref.vgtest

new file mode 100644 (file)

index 0000000..9da7dce
--- /dev/null
+++ b/callgrind/tests/notpower2-hwpref.vgtest
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64 --simulate-hwpref=yes
+cleanup: rm callgrind.out.*
diff --git a/callgrind/tests/notpower2-use.stderr.exp b/callgrind/tests/notpower2-use.stderr.exp

new file mode 100644 (file)

index 0000000..ea9acc8
--- /dev/null
+++ b/callgrind/tests/notpower2-use.stderr.exp
@@ -0,0 +1,20 @@
+
+
+Events    : Ir Dr Dw I1mr D1mr D1mw I2mr D2mr D2mw AcCost1 SpLoss1 AcCost2 SpLoss2
+Collected :
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/callgrind/tests/notpower2-use.vgtest b/callgrind/tests/notpower2-use.vgtest

new file mode 100644 (file)

index 0000000..b8312a7
--- /dev/null
+++ b/callgrind/tests/notpower2-use.vgtest
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64 --cacheuse=yes
+cleanup: rm callgrind.out.*
diff --git a/callgrind/tests/notpower2-wb.stderr.exp b/callgrind/tests/notpower2-wb.stderr.exp

new file mode 100644 (file)

index 0000000..90da3e4
--- /dev/null
+++ b/callgrind/tests/notpower2-wb.stderr.exp
@@ -0,0 +1,20 @@
+
+
+Events    : Ir Dr Dw I1mr D1mr D1mw I2mr D2mr D2mw I2dmr D2dmr D2dmw
+Collected :
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/callgrind/tests/notpower2-wb.vgtest b/callgrind/tests/notpower2-wb.vgtest

new file mode 100644 (file)

index 0000000..34a1f6b
--- /dev/null
+++ b/callgrind/tests/notpower2-wb.vgtest
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64 --simulate-wb=yes
+cleanup: rm callgrind.out.*
diff --git a/callgrind/tests/notpower2.stderr.exp b/callgrind/tests/notpower2.stderr.exp

new file mode 100644 (file)

index 0000000..0705c1c
--- /dev/null
+++ b/callgrind/tests/notpower2.stderr.exp
@@ -0,0 +1,20 @@
+
+
+Events    : Ir Dr Dw I1mr D1mr D1mw I2mr D2mr D2mw
+Collected :
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/callgrind/tests/notpower2.vgtest b/callgrind/tests/notpower2.vgtest

new file mode 100644 (file)

index 0000000..73823d7
--- /dev/null
+++ b/callgrind/tests/notpower2.vgtest
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64
+cleanup: rm callgrind.out.*
author	Josef Weidendorfer <Josef.Weidendorfer@gmx.de>
	Mon, 26 Jan 2009 22:56:14 +0000 (22:56 +0000)
committer	Josef Weidendorfer <Josef.Weidendorfer@gmx.de>
	Mon, 26 Jan 2009 22:56:14 +0000 (22:56 +0000)
cachegrind/cg-x86.c		patch \| blob \| blame \| history
cachegrind/cg_main.c		patch \| blob \| blame \| history
cachegrind/cg_sim.c		patch \| blob \| blame \| history
cachegrind/docs/cg-manual.xml		patch \| blob \| blame \| history
cachegrind/tests/Makefile.am		patch \| blob \| blame \| history
cachegrind/tests/filter_stderr		patch \| blob \| blame \| history
cachegrind/tests/notpower2.stderr.exp	[new file with mode: 0644]	patch \| blob
cachegrind/tests/notpower2.vgtest	[new file with mode: 0644]	patch \| blob
callgrind/sim.c		patch \| blob \| blame \| history
callgrind/tests/Makefile.am		patch \| blob \| blame \| history
callgrind/tests/filter_stderr		patch \| blob \| blame \| history
callgrind/tests/notpower2-hwpref.stderr.exp	[new file with mode: 0644]	patch \| blob
callgrind/tests/notpower2-hwpref.vgtest	[new file with mode: 0644]	patch \| blob
callgrind/tests/notpower2-use.stderr.exp	[new file with mode: 0644]	patch \| blob
callgrind/tests/notpower2-use.vgtest	[new file with mode: 0644]	patch \| blob
callgrind/tests/notpower2-wb.stderr.exp	[new file with mode: 0644]	patch \| blob
callgrind/tests/notpower2-wb.vgtest	[new file with mode: 0644]	patch \| blob
callgrind/tests/notpower2.stderr.exp	[new file with mode: 0644]	patch \| blob
callgrind/tests/notpower2.vgtest	[new file with mode: 0644]	patch \| blob