This patch is the first installment of the cache info reorganisation.

author Florian Krohm <florian@eich-krohm.de>

Sun, 7 Oct 2012 19:47:04 +0000 (19:47 +0000)

committer Florian Krohm <florian@eich-krohm.de>

Sun, 7 Oct 2012 19:47:04 +0000 (19:47 +0000)
author Florian Krohm <florian@eich-krohm.de>
Sun, 7 Oct 2012 19:47:04 +0000 (19:47 +0000)
committer Florian Krohm <florian@eich-krohm.de>
Sun, 7 Oct 2012 19:47:04 +0000 (19:47 +0000)
diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am

index f22fe17307cba2d708174c47180ee66ea46708c1..43a44af6c8f5b6b691915b62f036de9bbfdae647 100644 (file)
--- a/cachegrind/Makefile.am
+++ b/cachegrind/Makefile.am
@@ -41,13 +41,7 @@ endif
  
  CACHEGRIND_SOURCES_COMMON = \
         cg_main.c \
-       cg-arch.c \
-       cg-x86-amd64.c \
-       cg-ppc32.c \
-       cg-ppc64.c \
-       cg-arm.c   \
-       cg-s390x.c \
-       cg-mips32.c
+       cg-arch.c
  
  cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
         $(CACHEGRIND_SOURCES_COMMON)
@@ -88,5 +82,3 @@ cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LINK = \
         $(cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS) \
         $(cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDFLAGS)
  endif
-
-
diff --git a/cachegrind/cg-arch.c b/cachegrind/cg-arch.c

index 4afaab69977ba4d47534e948d10a246ad27a28be..c6847d99e98da79665c838da5336874aac1d5fe4 100644 (file)
--- a/cachegrind/cg-arch.c
+++ b/cachegrind/cg-arch.c
@@ -1,8 +1,5 @@
  /*--------------------------------------------------------------------*/
-/*--- Cachegrind: cache configuration.                             ---*/
-/*--- The architecture specific void VG_(configure_caches) are     ---*/
-/*--- located in the cg-<architecture>.c files.                    ---*/
-/*---                                                    cg-arch.c ---*/
+/*--- Cachegrind: cache configuration.                   cg-arch.c ---*/
  /*--------------------------------------------------------------------*/
  
  /*
@@ -35,9 +32,13 @@
  #include "pub_tool_libcbase.h"
  #include "pub_tool_libcprint.h"
  #include "pub_tool_options.h"
+#include "pub_tool_machine.h"
  
  #include "cg_arch.h"
  
+static void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc,
+                             Bool all_caches_clo_defined);
+
  // Checks cache config is ok.  Returns NULL if ok, or a pointer to an error
  // string otherwise.
  static Char* check_cache(cache_t* cache)
@@ -157,6 +158,65 @@ static void check_cache_or_override(Char* desc, cache_t* c, Bool clo_redefined)
     }
  }
  
+
+/* If the LL cache config isn't something the simulation functions
+   can handle, try to adjust it so it is.  Caches are characterised
+   by (total size T, line size L, associativity A), and then we
+   have
+
+     number of sets S = T / (L * A)
+
+   The required constraints are:
+
+   * L must be a power of 2, but it always is in practice, so
+     no problem there
+
+   * A can be any value >= 1
+
+   * T can be any value, but ..
+
+   * S must be a power of 2.
+
+   That sometimes gives a problem.  For example, some Core iX based
+   Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
+   sets.  The "fix" in this case is to increase the associativity
+   by 50% to 24, which reduces the number of sets to 8192, making
+   it a power of 2.  That's what the following code does (handing
+   the "3/2 rescaling case".)  We might need to deal with other
+   ratios later (5/4 ?).
+
+   The "fix" is "justified" (cough, cough) by alleging that
+   increases of associativity above about 4 have very little effect
+   on the actual miss rate.  It would be far more inaccurate to
+   fudge this by changing the size of the simulated cache --
+   changing the associativity is a much better option.
+*/
+
+static void
+maybe_tweak_LLc(cache_t *LLc)
+{
+  if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) {
+      Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc);
+      if (/* stay sane */
+          nSets >= 4
+          /* nSets is not a power of 2 */
+          && VG_(log2_64)( (ULong)nSets ) == -1
+          /* nSets is 50% above a power of 2 */
+          && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1
+          /* associativity can be increased by exactly 50% */
+          && (LLc->assoc % 2) == 0
+         ) {
+         /* # sets is 1.5 * a power of two, but the associativity is
+            even, so we can increase that up by 50% and implicitly
+            scale the # sets down accordingly. */
+         Int new_assoc = LLc->assoc + (LLc->assoc / 2);
+         VG_(dmsg)("warning: pretending that LL cache has associativity"
+                   " %d instead of actual %d\n", new_assoc, LLc->assoc);
+         LLc->assoc = new_assoc;
+      }
+   }
+}
+
  void VG_(post_clo_init_configure_caches)(cache_t* I1c,
                                           cache_t* D1c,
                                           cache_t* LLc,
@@ -174,7 +234,9 @@ void VG_(post_clo_init_configure_caches)(cache_t* I1c,
  
     // Set the cache config (using auto-detection, if supported by the
     // architecture).
-   VG_(configure_caches)( I1c, D1c, LLc, all_caches_clo_defined );
+   configure_caches( I1c, D1c, LLc, all_caches_clo_defined );
+
+   maybe_tweak_LLc( LLc );
  
     // Check the default/auto-detected values.
     // Allow the user to override invalid auto-detected caches
@@ -206,3 +268,172 @@ void VG_(print_cache_clo_opts)()
  "    --LL=<size>,<assoc>,<line_size>  set LL cache manually\n"
                 );
  }
+
+
+// Traverse the cache info and return a cache of the given kind and level.
+// Return NULL if no such cache exists.
+static const VexCache *
+locate_cache(const VexCacheInfo *ci, VexCacheKind kind, UInt level)
+{
+   const VexCache *c;
+
+   for (c = ci->caches; c != ci->caches + ci->num_caches; ++c) {
+      if (c->level == level && c->kind == kind) {
+         return c;
+      }
+   }
+   return NULL;  // not found
+}
+
+
+// Gives the auto-detected configuration of I1, D1 and LL caches.  They get
+// overridden by any cache configurations specified on the command line.
+static void
+configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
+                 Bool all_caches_clo_defined)
+{
+   VexArchInfo vai;
+   const VexCacheInfo *ci;
+   const VexCache *i1, *d1, *ll;
+
+   VG_(machine_get_VexArchInfo)(NULL, &vai);
+   ci = &vai.hwcache_info;
+
+   // Extract what we need
+   i1 = locate_cache(ci, INSN_CACHE, 1);
+   d1 = locate_cache(ci, DATA_CACHE, 1);
+   // FIXME: needs clarification for num_levels > 3 see also warning below
+   // FIXME: whether it needs adjustment
+   ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
+
+   if (ll && ci->num_levels > 2) {
+      VG_(dmsg)("warning: L%u cache found, using its data for the "
+                "LL simulation.\n", ci->num_levels);
+   }
+
+   if (i1 && d1 && ll) {
+      *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
+      *D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
+      *LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
+
+      return;
+   }
+
+   // Cache information could not be queried; choose some default
+   // architecture specific default setting.
+
+#if defined(VGA_ppc32)
+
+   // Default cache configuration
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *LLc = (cache_t) { 262144, 8, 64 };
+
+#elif defined(VGA_ppc64)
+
+   // Default cache configuration
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *LLc = (cache_t) { 262144, 8, 64 };
+
+#elif defined(VGA_arm)
+
+   // Set caches to default (for Cortex-A8 ?)
+   *I1c = (cache_t) {  16384, 4, 64 };
+   *D1c = (cache_t) {  16384, 4, 64 };
+   *LLc = (cache_t) { 262144, 8, 64 };
+
+#elif defined(VGA_s390x)
+   // z900
+   //
+   // Source:
+   // The microarchitecture of the IBM eServer z900 processor
+   // IBM Journal of Research and Development
+   // Volume 46, Number 4/5, pp 381-395, July/September 2002
+   //
+   // Split L1 I/D cache
+   // Size: 256 kB each
+   // Line size: 256 bytes
+   // 4-way set associative
+   // L2 cache: 16 MB x 2 (16 MB per 10 CPs)  (Charles Webb)
+
+   // z800
+   //
+   // Source:  Charles Webb from IBM
+   //
+   // Split L1 I/D cache
+   // Size: 256 kB each
+   // Line size: 256 bytes
+   // 4-way set associative
+   // L2 cache: 16 MB  (or half that size)
+
+   // z990
+   //
+   // The IBM eServer z990 microprocessor
+   // IBM Journal of Research and Development
+   // Volume 48, Number 3/4, pp 295-309, May/July 2004 
+   //
+   // Split L1 I/D cache
+   // Size: 256 kB each
+   // Line size: 256 bytes
+   // 4-way set associative
+   // L2 cache: 32 MB x 4 (32 MB per book/node)  (Charles Webb)
+
+   // z890
+   //
+   // Source:  Charles Webb from IBM
+   //
+   // Split L1 I/D cache
+   // Size: 256 kB each
+   // Line size: 256 bytes
+   // 4-way set associative
+   // L2 cache: 32 MB  (or half that size)
+
+   // z9
+   //
+   // Source:  Charles Webb from IBM
+   //
+   // Split L1 I/D cache
+   // Size: 256 kB each
+   // Line size: 256 bytes
+   // 4-way set associative
+   // L2 cache: 40 MB x 4 (40 MB per book/node)
+
+   // fixs390: have a table for all models we support and check
+   // fixs390: VEX_S390X_MODEL(hwcaps)
+
+   // Default cache configuration is z10-EC  (Source: ECAG insn)
+   *I1c = (cache_t) {    65536,  4, 256 };
+   *D1c = (cache_t) {   131072,  8, 256 };
+   *LLc = (cache_t) { 50331648, 24, 256 };
+
+#elif defined(VGA_mips32)
+
+   // Set caches to default (for MIPS32-r2(mips 74kc))
+   *I1c = (cache_t) {  32768, 4, 32 };
+   *D1c = (cache_t) {  32768, 4, 32 };
+   *L2c = (cache_t) { 524288, 8, 32 };
+
+#elif defined(VGA_x86) || defined(VGA_amd64)
+
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *LLc = (cache_t) { 262144, 8, 64 };
+
+#else
+
+#error "Unknown arch"
+
+#endif
+
+   if (!all_caches_clo_defined) {
+      const char warning[] =
+        "Warning: Cannot auto-detect cache config, using defaults.\n"
+        "         Run with -v to see.\n";
+      VG_(dmsg)("%s", warning);
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg-arm.c b/cachegrind/cg-arm.c

deleted file mode 100644 (file)

index 00badcd..0000000
--- a/cachegrind/cg-arm.c
+++ /dev/null
@@ -1,59 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- ARM-specific definitions.                           cg-arm.c ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Cachegrind, a Valgrind tool for cache
-   profiling programs.
-
-   Copyright (C) 2005-2012 Johan Bjork
-      jbjoerk@gmail.com
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file COPYING.
-*/
-
-#if defined(VGA_arm)
-
-#include "pub_tool_basics.h"
-#include "pub_tool_libcbase.h"
-#include "pub_tool_libcassert.h"
-#include "pub_tool_libcprint.h"
-
-#include "cg_arch.h"
-
-void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc,
-                           Bool all_caches_clo_defined)
-{
-   // Set caches to default (for Cortex-A8 ?)
-   *I1c = (cache_t) {  16384, 4, 64 };
-   *D1c = (cache_t) {  16384, 4, 64 };
-   *LLc = (cache_t) { 262144, 8, 64 };
-
-   if (!all_caches_clo_defined) {
-      VG_(message)(Vg_DebugMsg, 
-                   "Warning: Cannot auto-detect cache config on ARM, using one "
-                   "or more defaults\n");
-   }
-}
-
-#endif // #if defined(VGA_arm)
-
-/*--------------------------------------------------------------------*/
-/*--- end                                                 cg-arm.c ---*/
-/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg-mips32.c b/cachegrind/cg-mips32.c

deleted file mode 100644 (file)

index 5ad69c2..0000000
--- a/cachegrind/cg-mips32.c
+++ /dev/null
@@ -1,59 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- MIPS-specific definitions.                       cg-mips32.c ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Cachegrind, a Valgrind tool for cache
-   profiling programs.
-
-   Copyright (C) 2010-2012 RT-RK
-      mips-valgrind@rt-rk.com
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file COPYING.
-*/
-
-#if defined(VGA_mips32)
-
-#include "pub_tool_basics.h"
-#include "pub_tool_libcbase.h"
-#include "pub_tool_libcassert.h"
-#include "pub_tool_libcprint.h"
-
-#include "cg_arch.h"
-
-void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
-                           Bool all_caches_clo_defined)
-{
-   // Set caches to default (for MIPS32-r2(mips 74kc))
-   *I1c = (cache_t) {  32768, 4, 32 };
-   *D1c = (cache_t) {  32768, 4, 32 };
-   *L2c = (cache_t) { 524288, 8, 32 };
-
-   if (!all_caches_clo_defined) {
-      VG_(message)(Vg_DebugMsg, 
-                   "Warning: Cannot auto-detect cache config on MIPS32, using one "
-                   "or more defaults\n");
-   }
-}
-
-#endif // #if defined(VGA_mips32)
-
-/*--------------------------------------------------------------------*/
-/*--- end                                              cg-mips32.c ---*/
-/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg-ppc32.c b/cachegrind/cg-ppc32.c

deleted file mode 100644 (file)

index d0386d6..0000000
--- a/cachegrind/cg-ppc32.c
+++ /dev/null
@@ -1,68 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- PPC32-specific definitions.                       cg-ppc32.c ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Cachegrind, a Valgrind tool for cache
-   profiling programs.
-
-   Copyright (C) 2005-2012 Nicholas Nethercote
-      njn@valgrind.org
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file COPYING.
-*/
-
-#if defined(VGA_ppc32)
-
-#include "pub_tool_basics.h"
-#include "pub_tool_libcbase.h"
-#include "pub_tool_libcassert.h"
-#include "pub_tool_libcprint.h"
-
-#include "cg_arch.h"
-
-void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc,
-                           Bool all_caches_clo_defined)
-{
-   // Set caches to default.
-   *I1c = (cache_t) {  65536, 2, 64 };
-   *D1c = (cache_t) {  65536, 2, 64 };
-   *LLc = (cache_t) { 262144, 8, 64 };
-
-   // Warn if config not completely specified from cmd line.  Note that
-   // this message is slightly different from the one we give on x86/AMD64
-   // when auto-detection fails;  this lets us filter out this one (which is
-   // not important) in the regression test suite without filtering the
-   // x86/AMD64 one (which we want to see if it ever occurs in the
-   // regression test suite).
-   //
-   // If you change this message, please update
-   // cachegrind/tests/filter_stderr!
-   //
-   if (!all_caches_clo_defined) {
-      VG_(dmsg)("Warning: Cannot auto-detect cache config on PPC32, using one "
-                "or more defaults\n");
-   }
-}
-
-#endif // defined(VGA_ppc32)
-
-/*--------------------------------------------------------------------*/
-/*--- end                                                          ---*/
-/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg-ppc64.c b/cachegrind/cg-ppc64.c

deleted file mode 100644 (file)

index e594b99..0000000
--- a/cachegrind/cg-ppc64.c
+++ /dev/null
@@ -1,68 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- PPC64-specific definitions.                       cg-ppc64.c ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Cachegrind, a Valgrind tool for cache
-   profiling programs.
-
-   Copyright (C) 2005-2012 Nicholas Nethercote
-      njn@valgrind.org
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file COPYING.
-*/
-
-#if defined(VGA_ppc64)
-
-#include "pub_tool_basics.h"
-#include "pub_tool_libcbase.h"
-#include "pub_tool_libcassert.h"
-#include "pub_tool_libcprint.h"
-
-#include "cg_arch.h"
-
-void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc,
-                           Bool all_caches_clo_defined)
-{
-   // Set caches to default.
-   *I1c = (cache_t) {  65536, 2, 64 };
-   *D1c = (cache_t) {  65536, 2, 64 };
-   *LLc = (cache_t) { 262144, 8, 64 };
-
-   // Warn if config not completely specified from cmd line.  Note that
-   // this message is slightly different from the one we give on x86/AMD64
-   // when auto-detection fails;  this lets us filter out this one (which is
-   // not important) in the regression test suite without filtering the
-   // x86/AMD64 one (which we want to see if it ever occurs in the
-   // regression test suite).
-   //
-   // If you change this message, please update
-   // cachegrind/tests/filter_stderr!
-   //
-   if (!all_caches_clo_defined) {
-      VG_(dmsg)("Warning: Cannot auto-detect cache config on PPC64, using one "
-                "or more defaults\n");
-   }
-}
-
-#endif
-
-/*--------------------------------------------------------------------*/
-/*--- end                                                          ---*/
-/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg-s390x.c b/cachegrind/cg-s390x.c

deleted file mode 100644 (file)

index 3165efd..0000000
--- a/cachegrind/cg-s390x.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/* -*- mode: C; c-basic-offset: 3; -*- */
-
-/*--------------------------------------------------------------------*/
-/*--- s390x-specific definitions.                       cg-s390x.c ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Cachegrind, a Valgrind tool for cache
-   profiling programs.
-
-   Copyright IBM Corp. 2010-2012
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file COPYING.
-*/
-
-/* Contributed by Christian Borntraeger */
-
-#if defined(VGA_s390x)
-
-#include "pub_tool_basics.h"
-#include "pub_tool_libcbase.h"
-#include "pub_tool_libcassert.h"
-#include "pub_tool_libcprint.h"
-
-#include "cg_arch.h"
-
-void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc,
-                           Bool all_caches_clo_defined)
-{
-   // z900
-   //
-   // Source:
-   // The microarchitecture of the IBM eServer z900 processor
-   // IBM Journal of Research and Development
-   // Volume 46, Number 4/5, pp 381-395, July/September 2002
-   //
-   // Split L1 I/D cache
-   // Size: 256 kB each
-   // Line size: 256 bytes
-   // 4-way set associative
-   // L2 cache: 16 MB x 2 (16 MB per 10 CPs)  (Charles Webb)
-
-   // z800
-   //
-   // Source:  Charles Webb from IBM
-   //
-   // Split L1 I/D cache
-   // Size: 256 kB each
-   // Line size: 256 bytes
-   // 4-way set associative
-   // L2 cache: 16 MB  (or half that size)
-
-   // z990
-   //
-   // The IBM eServer z990 microprocessor
-   // IBM Journal of Research and Development
-   // Volume 48, Number 3/4, pp 295-309, May/July 2004 
-   //
-   // Split L1 I/D cache
-   // Size: 256 kB each
-   // Line size: 256 bytes
-   // 4-way set associative
-   // L2 cache: 32 MB x 4 (32 MB per book/node)  (Charles Webb)
-
-   // z890
-   //
-   // Source:  Charles Webb from IBM
-   //
-   // Split L1 I/D cache
-   // Size: 256 kB each
-   // Line size: 256 bytes
-   // 4-way set associative
-   // L2 cache: 32 MB  (or half that size)
-
-   // z9
-   //
-   // Source:  Charles Webb from IBM
-   //
-   // Split L1 I/D cache
-   // Size: 256 kB each
-   // Line size: 256 bytes
-   // 4-way set associative
-   // L2 cache: 40 MB x 4 (40 MB per book/node)
-
-
-   // Set caches to z10 default.
-   // See IBM Journal of Research and Development
-   // Issue Date: Jan. 2009
-   // Volume: 53 Issue:1
-   // fixs390: have a table for all available models and check /proc/cpuinfo
-   *I1c = (cache_t) {   65536,  4, 256 };
-   *D1c = (cache_t) {  131072,  8, 256 };
-   *LLc = (cache_t) {50331648, 24, 256 };
-
-   // Warn if config not completely specified from cmd line.  Note that
-   // this message is slightly different from the one we give on x86/AMD64
-   // when auto-detection fails;  this lets us filter out this one (which is
-   // not important) in the regression test suite without filtering the
-   // x86/AMD64 one (which we want to see if it ever occurs in the
-   // regression test suite).
-   //
-   // If you change this message, please update
-   // cachegrind/tests/filter_stderr!
-   //
-   if (!all_caches_clo_defined) {
-      VG_(dmsg)("Warning: Cannot auto-detect cache config, "
-                "assuming z10-EC cache configuration\n");
-   }
-}
-
-#endif
-
-/*--------------------------------------------------------------------*/
-/*--- end                                               cg-s390x.c ---*/
-/*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg_arch.h b/cachegrind/cg_arch.h

index 99d0cb1d337cd41322d6b3a7467a5d6a3a6fc08a..d35ec73bd963f404361c86308c8d7260957e047b 100644 (file)
--- a/cachegrind/cg_arch.h
+++ b/cachegrind/cg_arch.h
@@ -44,11 +44,6 @@ typedef struct {
  // initialized to UNDEFINED_CACHE.
  #define UNDEFINED_CACHE     { -1, -1, -1 }
  
-// Gives the auto-detected configuration of I1, D1 and LL caches.  They get
-// overridden by any cache configurations specified on the command line.
-void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc,
-                           Bool all_caches_clo_defined);
-
  // If arg is a command line option configuring I1 or D1 or LL cache,
  // then parses arg to set the relevant cache_t elements.
  // Returns True if arg is a cache command line option, False otherwise.
diff --git a/cachegrind/tests/filter_stderr b/cachegrind/tests/filter_stderr

index 0eeb91ea30e6dac9e26a651015bb6de56fd03c31..8b9dd78eaf5cc608dc7528da2c6a37a117d95a1b 100755 (executable)
--- a/cachegrind/tests/filter_stderr
+++ b/cachegrind/tests/filter_stderr
@@ -17,8 +17,6 @@ perl -p -e 's/((I1|D1|LL|LLi|LLd) *(misses|miss rate):)[ 0-9,()+rdw%\.]*$/\1/' |
  sed "/warning: Pentium 4 with 12 KB micro-op instruction trace cache/d" |
  sed "/Simulating a 16 KB I-cache with 32 B lines/d"   |
  sed "/warning: L3 cache found, using its data for the LL simulation./d" |
-sed "/Warning: Cannot auto-detect cache config on PPC.., using one or more defaults/d" |
-sed "/Warning: Cannot auto-detect cache config on ARM, using one or more defaults/d" |
-sed "/Warning: Cannot auto-detect cache config, assuming z10-EC cache configuration/d" |
-sed "/Warning: Cannot auto-detect cache config on MIPS.., using one or more defaults/d" |
+sed "/Warning: Cannot auto-detect cache config, using defaults./d" |
+sed "/Run with -v to see./d" |
  sed "/warning: pretending that LL cache has associativity .*$/d"
diff --git a/callgrind/Makefile.am b/callgrind/Makefile.am

index ae4ff4fc69ac84dc7b6fa66cd094d7e467f35d66..343f03470e0bd6415f2d9c1417421cfc3021df59 100644 (file)
--- a/callgrind/Makefile.am
+++ b/callgrind/Makefile.am
@@ -45,13 +45,7 @@ CALLGRIND_SOURCES_COMMON = \
         main.c \
         sim.c \
         threads.c \
-       ../cachegrind/cg-arch.c \
-       ../cachegrind/cg-x86-amd64.c \
-       ../cachegrind/cg-ppc32.c \
-       ../cachegrind/cg-ppc64.c \
-       ../cachegrind/cg-arm.c   \
-       ../cachegrind/cg-s390x.c \
-       ../cachegrind/cg-mips32.c
+       ../cachegrind/cg-arch.c
  
  CALLGRIND_CFLAGS_COMMON = -I$(top_srcdir)/cachegrind
  
@@ -94,4 +88,3 @@ callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LINK = \
         $(callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS) \
         $(callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDFLAGS)
  endif
-
diff --git a/callgrind/tests/filter_stderr b/callgrind/tests/filter_stderr

index 0842a936617487a6d1ee22933f4aa48ab2f5f478..3f6f448627d5bdd73a38fe0717136a89d0d52406 100755 (executable)
--- a/callgrind/tests/filter_stderr
+++ b/callgrind/tests/filter_stderr
@@ -26,8 +26,6 @@ perl -p -e 's/((Branches|Mispredicts|Mispred rate):)[ 0-9,()+condi%\.]*$/\1/' |
  sed "/warning: Pentium 4 with 12 KB micro-op instruction trace cache/d" |
  sed "/Simulating a 16 KB I-cache with 32 B lines/d"   |
  sed "/warning: L3 cache found, using its data for the LL simulation./d" |
-sed "/Warning: Cannot auto-detect cache config on PPC.., using one or more defaults/d" |
-sed "/Warning: Cannot auto-detect cache config on ARM, using one or more defaults/d" |
-sed "/Warning: Cannot auto-detect cache config, assuming z10-EC cache configuration/d" |
-sed "/Warning: Cannot auto-detect cache config on MIPS.., using one or more defaults/d" |
+sed "/Warning: Cannot auto-detect cache config, using defaults./d" |
+sed "/Run with -v to see./d" |
  sed "/warning: pretending that LL cache has associativity .*$/d"
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am

index 19fecf8b98aefcf502557e57d6bf3571dba773bc..d4eb8167f8ae1e040e6d03acd8f95913acf7e47d 100644 (file)
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -240,6 +240,7 @@ pkglib_LIBRARIES += libcoregrind-@VGCONF_ARCH_SEC@-@VGCONF_OS@.a
  endif
  
  COREGRIND_SOURCES_COMMON = \
+       m_cache.c \
         m_commandline.c \
         m_clientstate.c \
         m_cpuid.S \
diff --git a/cachegrind/cg-x86-amd64.c b/coregrind/m_cache.c

similarity index 71%

rename from cachegrind/cg-x86-amd64.c

rename to coregrind/m_cache.c

index 1eb6c996341cbd9378282bd89a7cfcfcd77d6582..3d3860e4f4459678821f9e4301072f442727a282 100644 (file)
--- a/cachegrind/cg-x86-amd64.c
+++ b/coregrind/m_cache.c
@@ -1,11 +1,12 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
  
  /*--------------------------------------------------------------------*/
-/*--- x86- and AMD64-specific definitions.          cg-x86-amd64.c ---*/
+/*--- Cache-related stuff.                               m_cache.c ---*/
  /*--------------------------------------------------------------------*/
  
  /*
-   This file is part of Cachegrind, a Valgrind tool for cache
-   profiling programs.
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
  
     Copyright (C) 2002-2012 Nicholas Nethercote
        njn@valgrind.org
@@ -28,38 +29,51 @@
     The GNU General Public License is contained in the file COPYING.
  */
  
-#if defined(VGA_x86) || defined(VGA_amd64)
+#include "pub_core_basics.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_mallocfree.h"
+#include "pub_core_machine.h"
+#include "libvex.h"
  
-#include "pub_tool_basics.h"
-#include "pub_tool_cpuid.h"
-#include "pub_tool_libcbase.h"
-#include "pub_tool_libcassert.h"
-#include "pub_tool_libcprint.h"
+#if defined(VGA_x86) || defined(VGA_amd64)
  
-#include "cg_arch.h"
+#include "pub_core_cpuid.h"
  
  // All CPUID info taken from sandpile.org/ia32/cpuid.htm */
  // Probably only works for Intel and AMD chips, and probably only for some of
-// them. 
+// them.
  
-static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
+static void 
+micro_ops_warn(Int actual_size, Int used_size, Int line_size)
  {
-   VG_(dmsg)("warning: Pentium 4 with %d KB micro-op instruction trace cache\n", 
+   VG_(dmsg)("warning: Pentium 4 with %d KB micro-op instruction trace cache\n",
               actual_size);
-   VG_(dmsg)("         Simulating a %d KB I-cache with %d B lines\n", 
+   VG_(dmsg)("         Simulating a %d KB I-cache with %d B lines\n",
               used_size, line_size);
  }
  
+/* FIXME: Temporarily introduce cachegrind's cache_t structure here to
+   get Intel_cache_info to work. This function needs to be rewritten to
+   properly fill in VexCacheInfo. Absolutely no warnings about ignored
+   caches and such are appropriate here! */
+typedef struct {
+   Int size;       // bytes
+   Int assoc;
+   Int line_size;  // bytes
+} cache_t;
+
  /* Intel method is truly wretched.  We have to do an insane indexing into an
   * array of pre-defined configurations for various parts of the memory
   * hierarchy.
   * According to Intel Processor Identification, App Note 485.
- * 
+ *
   * If a L3 cache is found, then data for it rather than the L2
   * is returned via *LLc.
   */
-static
-Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
+static Int
+Intel_cache_info_aux(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
  {
     Int cpuid1_eax;
     Int cpuid1_ignore;
@@ -88,7 +102,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
     family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf);
     model =  (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf);
  
-   VG_(cpuid)(2, 0, (Int*)&info[0], (Int*)&info[4], 
+   VG_(cpuid)(2, 0, (Int*)&info[0], (Int*)&info[4],
                      (Int*)&info[8], (Int*)&info[12]);
     trials  = info[0] - 1;   /* AL register - bits 0..7 of %eax */
     info[0] = 0x0;           /* reset AL */
@@ -105,7 +119,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
  
        case 0x0:       /* ignore zeros */
            break;
-          
+
        /* TLB info, ignore */
        case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
        case 0x0b:
@@ -116,7 +130,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
        case 0xb0: case 0xb1: case 0xb2:
        case 0xb3: case 0xb4: case 0xba: case 0xc0:
        case 0xca:
-          break;      
+          break;
  
        case 0x06: *I1c = (cache_t) {  8, 4, 32 }; break;
        case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
@@ -130,10 +144,10 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
        case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
  
        /* IA-64 info -- panic! */
-      case 0x10: case 0x15: case 0x1a: 
+      case 0x10: case 0x15: case 0x1a:
        case 0x88: case 0x89: case 0x8a: case 0x8d:
        case 0x90: case 0x96: case 0x9b:
-         VG_(tool_panic)("IA-64 cache detected?!");
+         VG_(core_panic)("IA-64 cache detected?!");
  
        /* L3 cache info. */
        case 0x22: L3c = (cache_t) { 512,    4, 64 }; L3_found = True; break;
@@ -169,7 +183,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
        case 0x39: *LLc = (cache_t) {  128, 4, 64 }; L2_found = True; break;
        case 0x3c: *LLc = (cache_t) {  256, 4, 64 }; L2_found = True; break;
  
-      /* If a P6 core, this means "no L2 cache".  
+      /* If a P6 core, this means "no L2 cache".
           If a P4 core, this means "no L3 cache".
           We don't know what core it is, so don't issue a warning.  To detect
           a missing L2 cache, we use 'L2_found'. */
@@ -201,20 +215,20 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
        /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
         * conversion to byte size is a total guess;  treat the 12K and 16K
         * cases the same since the cache byte size must be a power of two for
-       * everything to work!.  Also guessing 32 bytes for the line size... 
+       * everything to work!.  Also guessing 32 bytes for the line size...
         */
        case 0x70:    /* 12K micro-ops, 8-way */
-         *I1c = (cache_t) { 16, 8, 32 };  
+         *I1c = (cache_t) { 16, 8, 32 };
           micro_ops_warn(12, 16, 32);
-         break;  
+         break;
        case 0x71:    /* 16K micro-ops, 8-way */
-         *I1c = (cache_t) { 16, 8, 32 };  
-         micro_ops_warn(16, 16, 32); 
-         break;  
+         *I1c = (cache_t) { 16, 8, 32 };
+         micro_ops_warn(16, 16, 32);
+         break;
        case 0x72:    /* 32K micro-ops, 8-way */
-         *I1c = (cache_t) { 32, 8, 32 };  
-         micro_ops_warn(32, 32, 32); 
-         break;  
+         *I1c = (cache_t) { 32, 8, 32 };
+         micro_ops_warn(32, 32, 32);
+         break;
  
        /* not sectored, whatever that might mean */
        case 0x78: *LLc = (cache_t) { 1024, 4,  64 }; L2_found = True;  break;
@@ -242,7 +256,7 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
  
        case 0xff:
           j = 0;
-         VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4], 
+         VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4],
                              (Int*)&info[8], (Int*)&info[12]);
  
           while ((info[0] & 0x1f) != 0) {
@@ -264,25 +278,33 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
                 case 1: *D1c = c; break;
                 case 2: *I1c = c; break;
                 case 3: VG_(dmsg)("warning: L1 unified cache ignored\n"); break;
-               default: VG_(dmsg)("warning: L1 cache of unknown type ignored\n"); break;
+               default:
+                  VG_(dmsg)("warning: L1 cache of unknown type ignored\n");
+                  break;
                 }
                 break;
              case 2:
                 switch (info[0] & 0x1f)
                 {
                 case 1: VG_(dmsg)("warning: L2 data cache ignored\n"); break;
-               case 2: VG_(dmsg)("warning: L2 instruction cache ignored\n"); break;
+               case 2: VG_(dmsg)("warning: L2 instruction cache ignored\n");
+                  break;
                 case 3: *LLc = c; L2_found = True; break;
-               default: VG_(dmsg)("warning: L2 cache of unknown type ignored\n"); break;
+               default:
+                  VG_(dmsg)("warning: L2 cache of unknown type ignored\n");
+                  break;
                 }
                 break;
              case 3:
                 switch (info[0] & 0x1f)
                 {
                 case 1: VG_(dmsg)("warning: L3 data cache ignored\n"); break;
-               case 2: VG_(dmsg)("warning: L3 instruction cache ignored\n"); break;
+               case 2: VG_(dmsg)("warning: L3 instruction cache ignored\n");
+                  break;
                 case 3: L3c = c; L3_found = True; break;
-               default: VG_(dmsg)("warning: L3 cache of unknown type ignored\n"); break;
+               default:
+                  VG_(dmsg)("warning: L3 cache of unknown type ignored\n");
+                  break;
                 }
                 break;
              default:
@@ -290,21 +312,26 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
                 break;
              }
  
-            VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4], 
+            VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4],
                                 (Int*)&info[8], (Int*)&info[12]);
           }
           break;
  
        default:
-         VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), ignoring\n",
-                   info[i]);
+         VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), "
+                   "ignoring\n", info[i]);
           break;
        }
     }
  
-   /* If we found a L3 cache, throw away the L2 data and use the L3's instead. */
+   /* If we found a L3 cache, throw away the L2 data and use the L3's
+      instead. */
     if (L3_found) {
-      VG_(dmsg)("warning: L3 cache found, using its data for the LL simulation.\n");
+      /* Can't warn here: as we're not necessarily in cachegrind */
+#if 0
+      VG_(dmsg)("warning: L3 cache found, using its data for the "
+                "LL simulation.\n");
+#endif
        *LLc = L3c;
        L2_found = True;
     }
@@ -315,35 +342,63 @@ Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
     return 0;
  }
  
+static Int
+Intel_cache_info(Int level, VexCacheInfo *ci)
+{
+   cache_t I1c, D1c, LLc;
+   Int ret;
+
+   ret = Intel_cache_info_aux(level, &I1c, &D1c, &LLc);
+
+   /* Map results to VexCacheInfo. This is lossy as we simply assume
+      there is an L2 here (where in fact it could have been an L3). It
+      is irrelevant for current usages but needs to be fixed! */
+   if (ret == 0) {
+      ci->num_levels = 2;
+      ci->num_caches = 3;
+      ci->icaches_maintain_coherence = True;
+      ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof *ci->caches);
+
+      ci->caches[0] = VEX_CACHE_INIT(DATA_CACHE, 1, D1c.size, D1c.line_size,
+                                     D1c.assoc);
+      ci->caches[1] = VEX_CACHE_INIT(INSN_CACHE, 1, I1c.size, I1c.line_size,
+                                     I1c.assoc);
+      ci->caches[2] = VEX_CACHE_INIT(UNIFIED_CACHE, 2, LLc.size, LLc.line_size,
+                                     LLc.assoc);
+   }
+   return ret;
+}
+
  /* AMD method is straightforward, just extract appropriate bits from the
   * result registers.
   *
   * Bits, for D1 and I1:
- *  31..24  data L1 cache size in KBs    
- *  23..16  data L1 cache associativity (FFh=full)    
- *  15.. 8  data L1 cache lines per tag    
+ *  31..24  data L1 cache size in KBs
+ *  23..16  data L1 cache associativity (FFh=full)
+ *  15.. 8  data L1 cache lines per tag
   *   7.. 0  data L1 cache line size in bytes
   *
   * Bits, for L2:
   *  31..16  unified L2 cache size in KBs
   *  15..12  unified L2 cache associativity (0=off, FFh=full)
- *  11.. 8  unified L2 cache lines per tag    
+ *  11.. 8  unified L2 cache lines per tag
   *   7.. 0  unified L2 cache line size in bytes
   *
- * #3  The AMD K7 processor's L2 cache must be configured prior to relying 
+ * #3  The AMD K7 processor's L2 cache must be configured prior to relying
   *     upon this information. (Whatever that means -- njn)
   *
   * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
   * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
   * so we detect that.
- * 
+ *
   * Returns 0 on success, non-zero on failure.  As with the Intel code
   * above, if a L3 cache is found, then data for it rather than the L2
   * is returned via *LLc.
   */
  
  /* A small helper */
-static Int decode_AMD_cache_L2_L3_assoc ( Int bits_15_12 )
+static Int
+decode_AMD_cache_L2_L3_assoc ( Int bits_15_12 )
  {
     /* Decode a L2/L3 associativity indication.  It is encoded
        differently from the I1/D1 associativity.  Returns 1
@@ -362,17 +417,18 @@ static Int decode_AMD_cache_L2_L3_assoc ( Int bits_15_12 )
     }
  }
  
-static
-Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* LLc)
+static Int
+AMD_cache_info(VexCacheInfo *ci)
  {
     UInt ext_level;
     UInt dummy, model;
     UInt I1i, D1i, L2i, L3i;
-   
+   UInt size, line_size, assoc;
+
     VG_(cpuid)(0x80000000, 0, &ext_level, &dummy, &dummy, &dummy);
  
     if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
-      VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n", 
+      VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n",
                  ext_level);
        return -1;
     }
@@ -384,40 +440,60 @@ Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* LLc)
  
     /* Check for Duron bug */
     if (model == 0x630) {
-      VG_(dmsg)("warning: Buggy Duron stepping A0. Assuming L2 size=65536 bytes\n");
+      VG_(dmsg)("warning: Buggy Duron stepping A0. "
+                "Assuming L2 size=65536 bytes\n");
        L2i = (64 << 16) | (L2i & 0xffff);
     }
  
-   D1c->size      = (D1i >> 24) & 0xff;
-   D1c->assoc     = (D1i >> 16) & 0xff;
-   D1c->line_size = (D1i >>  0) & 0xff;
+   ci->num_levels = 2;
+   ci->num_caches = 3;
+   ci->icaches_maintain_coherence = True;
+
+   /* Check for L3 cache */
+   if (((L3i >> 18) & 0x3fff) > 0) {
+      ci->num_levels = 3;
+      ci->num_caches = 4;
+   }
+
+   ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof *ci->caches);
+
+   // D1
+   size      = (D1i >> 24) & 0xff;
+   assoc     = (D1i >> 16) & 0xff;
+   line_size = (D1i >>  0) & 0xff;
+   ci->caches[0] = VEX_CACHE_INIT(DATA_CACHE, 1, size, line_size, assoc);
  
-   I1c->size      = (I1i >> 24) & 0xff;
-   I1c->assoc     = (I1i >> 16) & 0xff;
-   I1c->line_size = (I1i >>  0) & 0xff;
+   // I1
+   size      = (I1i >> 24) & 0xff;
+   assoc     = (I1i >> 16) & 0xff;
+   line_size = (I1i >>  0) & 0xff;
+   ci->caches[1] = VEX_CACHE_INIT(INSN_CACHE, 1, size, line_size, assoc);
  
-   LLc->size      = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
-   LLc->assoc     = decode_AMD_cache_L2_L3_assoc((L2i >> 12) & 0xf);
-   LLc->line_size = (L2i >>  0) & 0xff;
+   // L2    Nb: different bits used for L2
+   size      = (L2i >> 16) & 0xffff;
+   assoc     = decode_AMD_cache_L2_L3_assoc((L2i >> 12) & 0xf);
+   line_size = (L2i >>  0) & 0xff;
+   ci->caches[2] = VEX_CACHE_INIT(INSN_CACHE, 2, size, line_size, assoc);
  
+   // L3, if any
     if (((L3i >> 18) & 0x3fff) > 0) {
-      /* There's an L3 cache.  Replace *LLc contents with this info. */
+      /* There's an L3 cache. */
        /* NB: the test in the if is "if L3 size > 0 ".  I don't know if
           this is the right way to test presence-vs-absence of L3.  I
           can't see any guidance on this in the AMD documentation. */
-      LLc->size      = ((L3i >> 18) & 0x3fff) * 512;
-      LLc->assoc     = decode_AMD_cache_L2_L3_assoc((L3i >> 12) & 0xf);
-      LLc->line_size = (L3i >>  0) & 0xff;
-      VG_(dmsg)("warning: L3 cache found, using its data for the L2 simulation.\n");
+      size      = ((L3i >> 18) & 0x3fff) * 512;
+      assoc     = decode_AMD_cache_L2_L3_assoc((L3i >> 12) & 0xf);
+      line_size = (L3i >>  0) & 0xff;
+      ci->caches[3] = VEX_CACHE_INIT(INSN_CACHE, 3, size, line_size, assoc);
     }
  
     return 0;
  }
  
-static 
-Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc)
+static Int
+get_caches_from_CPUID(VexCacheInfo *ci)
  {
-   Int  level, ret;
+   Int  level, ret, i;
     Char vendor_id[13];
  
     if (!VG_(has_cpuid)()) {
@@ -425,8 +501,8 @@ Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc)
        return -1;
     }
  
-   VG_(cpuid)(0, 0, &level, (int*)&vendor_id[0], 
-             (int*)&vendor_id[8], (int*)&vendor_id[4]);    
+   VG_(cpuid)(0, 0, &level, (int*)&vendor_id[0],
+             (int*)&vendor_id[8], (int*)&vendor_id[4]);
     vendor_id[12] = '\0';
  
     if (0 == level) {
@@ -436,22 +512,21 @@ Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc)
  
     /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
     if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
-      ret = Intel_cache_info(level, I1c, D1c, LLc);
+      ret = Intel_cache_info(level, ci);
  
     } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
-      ret = AMD_cache_info(I1c, D1c, LLc);
+      ret = AMD_cache_info(ci);
  
     } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
        /* Total kludge.  Pretend to be a VIA Nehemiah. */
-      D1c->size      = 64;
-      D1c->assoc     = 16;
-      D1c->line_size = 16;
-      I1c->size      = 64;
-      I1c->assoc     = 4;
-      I1c->line_size = 16;
-      LLc->size      = 64;
-      LLc->assoc     = 16;
-      LLc->line_size = 16;
+      ci->num_levels = 2;
+      ci->num_caches = 3;
+      ci->icaches_maintain_coherence = True;
+      ci->caches = VG_(malloc)("m_cache", ci->num_caches * sizeof *ci->caches);
+      ci->caches[0] = VEX_CACHE_INIT(DATA_CACHE,    1, 64, 16, 16);
+      ci->caches[1] = VEX_CACHE_INIT(INSN_CACHE,    1, 64, 16,  4);
+      ci->caches[2] = VEX_CACHE_INIT(UNIFIED_CACHE, 2, 64, 16, 16);
+
        ret = 0;
  
     } else {
@@ -460,88 +535,47 @@ Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc)
     }
  
     /* Successful!  Convert sizes from KB to bytes */
-   I1c->size *= 1024;
-   D1c->size *= 1024;
-   LLc->size *= 1024;
-
-   /* If the LL cache config isn't something the simulation functions
-      can handle, try to adjust it so it is.  Caches are characterised
-      by (total size T, line size L, associativity A), and then we
-      have
-
-        number of sets S = T / (L * A)
-
-      The required constraints are:
-
-      * L must be a power of 2, but it always is in practice, so
-        no problem there
-
-      * A can be any value >= 1
-
-      * T can be any value, but ..
-
-      * S must be a power of 2.
-
-      That sometimes gives a problem.  For example, some Core iX based
-      Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
-      sets.  The "fix" in this case is to increase the associativity
-      by 50% to 24, which reduces the number of sets to 8192, making
-      it a power of 2.  That's what the following code does (handing
-      the "3/2 rescaling case".)  We might need to deal with other
-      ratios later (5/4 ?).
-
-      The "fix" is "justified" (cough, cough) by alleging that
-      increases of associativity above about 4 have very little effect
-      on the actual miss rate.  It would be far more inaccurate to
-      fudge this by changing the size of the simulated cache --
-      changing the associativity is a much better option.
-   */
-   if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) {
-      Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc);
-      if (/* stay sane */
-          nSets >= 4
-          /* nSets is not a power of 2 */
-          && VG_(log2_64)( (ULong)nSets ) == -1
-          /* nSets is 50% above a power of 2 */
-          && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1
-          /* associativity can be increased by exactly 50% */
-          && (LLc->assoc % 2) == 0
-         ) {
-         /* # sets is 1.5 * a power of two, but the associativity is
-            even, so we can increase that up by 50% and implicitly
-            scale the # sets down accordingly. */
-         Int new_assoc = LLc->assoc + (LLc->assoc / 2);
-         VG_(dmsg)("warning: pretending that LL cache has associativity"
-                   " %d instead of actual %d\n", new_assoc, LLc->assoc);
-         LLc->assoc = new_assoc;
-      }
+   for (i = 0; i < ci->num_caches; ++i) {
+      ci->caches[i].sizeB *= 1024;
     }
  
     return ret;
  }
  
+Bool
+VG_(machine_get_cache_info)(VexArchInfo *vai)
+{
+   Int ret = get_caches_from_CPUID(&vai->hwcache_info); 
+
+   return ret == 0 ? True : False;
+}
+
+#elif defined(VGA_arm) || defined(VGA_ppc32) || defined(VGA_ppc64) || \
+      defined(VGA_mips32)
  
-void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc,
-                           Bool all_caches_clo_defined)
+Bool
+VG_(machine_get_cache_info)(VexArchInfo *vai)
  {
-   Int res;
-   
-   // Set caches to default.
-   *I1c = (cache_t) {  65536, 2, 64 };
-   *D1c = (cache_t) {  65536, 2, 64 };
-   *LLc = (cache_t) { 262144, 8, 64 };
-
-   // Then replace with any info we can get from CPUID.
-   res = get_caches_from_CPUID(I1c, D1c, LLc);
-
-   // Warn if CPUID failed and config not completely specified from cmd line.
-   if (res != 0 && !all_caches_clo_defined) {
-      VG_(dmsg)("Warning: Couldn't auto-detect cache config, using one "
-                "or more defaults \n");
-   }
+   vai->hwcache_info.icaches_maintain_coherence = False;
+
+   return False;   // not yet
  }
  
-#endif // defined(VGA_x86) || defined(VGA_amd64)
+#elif defined(VGA_s390x)
+
+Bool
+VG_(machine_get_cache_info)(VexArchInfo *vai)
+{
+   vai->hwcache_info.icaches_maintain_coherence = True;
+
+   return False;   // not yet
+}
+
+#else
+
+#error "Unknown arch"
+
+#endif
  
  /*--------------------------------------------------------------------*/
  /*--- end                                                          ---*/
diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c

index 136426b4d260bdf5836564315fb304c4135f32e6..f0a7d7f3e24d28b90032e090ed590c36a707274f 100644 (file)
--- a/coregrind/m_libcproc.c
+++ b/coregrind/m_libcproc.c
@@ -724,6 +724,15 @@ void VG_(do_atfork_child)(ThreadId tid)
  
  void VG_(invalidate_icache) ( void *ptr, SizeT nbytes )
  {
+   if (nbytes == 0) return;    // nothing to do
+
+   // Get cache info
+   VexArchInfo vai;
+   VG_(machine_get_VexArchInfo)(NULL, &vai);
+
+   // If I-caches are coherent, nothing needs to be done here
+   if (vai.hwcache_info.icaches_maintain_coherence) return;
+
  #  if defined(VGA_ppc32) || defined(VGA_ppc64)
     Addr startaddr = (Addr) ptr;
     Addr endaddr   = startaddr + nbytes;
@@ -731,9 +740,6 @@ void VG_(invalidate_icache) ( void *ptr, SizeT nbytes )
     Addr addr;
     VexArchInfo vai;
  
-   if (nbytes == 0) return;
-   vg_assert(nbytes > 0);
-
     VG_(machine_get_VexArchInfo)( NULL, &vai );
     cls = vai.ppc_cache_line_szB;
  
@@ -750,15 +756,6 @@ void VG_(invalidate_icache) ( void *ptr, SizeT nbytes )
     }
     __asm__ __volatile__("sync; isync");
  
-#  elif defined(VGA_x86)
-   /* no need to do anything, hardware provides coherence */
-
-#  elif defined(VGA_amd64)
-   /* no need to do anything, hardware provides coherence */
-
-#  elif defined(VGA_s390x)
-   /* no need to do anything, hardware provides coherence */
-
  #  elif defined(VGP_arm_linux)
     /* ARM cache flushes are privileged, so we must defer to the kernel. */
     Addr startaddr = (Addr) ptr;
@@ -770,8 +767,6 @@ void VG_(invalidate_icache) ( void *ptr, SizeT nbytes )
                                   (UWord) nbytes, (UWord) 3);
     vg_assert( sres._isError == 0 );
  
-#  else
-#    error "Unknown ARCH"
  #  endif
  }
  
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c

index f2d90a211e792acee673c9a1bb25a54e27f7e560..9b4dabc1ae896b2d0a4e25bdc2b19378550d8287 100644 (file)
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -674,7 +674,8 @@ static UInt VG_(get_machine_model)(void)
  
  /* Determine what insn set and insn set variant the host has, and
     record it.  To be called once at system startup.  Returns False if
-   this a CPU incapable of running Valgrind. */
+   this a CPU incapable of running Valgrind.
+   Also determine information about the caches on this host. */
  
  Bool VG_(machine_get_hwcaps)( void )
  {
@@ -731,26 +732,23 @@ Bool VG_(machine_get_hwcaps)( void )
          have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
       }
  
+     va = VexArchX86;
       if (have_sse2 && have_sse1) {
-        va          = VexArchX86;
          vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
          vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
          if (have_lzcnt)
             vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
          VG_(machine_x86_have_mxcsr) = 1;
-        return True;
-     }
-
-     if (have_sse1) {
-        va          = VexArchX86;
+     } else if (have_sse1) {
          vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
          VG_(machine_x86_have_mxcsr) = 1;
-        return True;
+     } else {
+       vai.hwcaps = 0; /*baseline - no sse at all*/
+       VG_(machine_x86_have_mxcsr) = 0;
       }
  
-     va         = VexArchX86;
-     vai.hwcaps = 0; /*baseline - no sse at all*/
-     VG_(machine_x86_have_mxcsr) = 0;
+     VG_(machine_get_cache_info)(&vai);
+
       return True;
     }
  
@@ -836,6 +834,9 @@ Bool VG_(machine_get_hwcaps)( void )
                  | (have_cx16  ? VEX_HWCAPS_AMD64_CX16  : 0)
                  | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
                  | (have_avx   ? VEX_HWCAPS_AMD64_AVX   : 0);
+
+     VG_(machine_get_cache_info)(&vai);
+
       return True;
     }
  
@@ -980,6 +981,7 @@ Bool VG_(machine_get_hwcaps)( void )
       if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
       if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
  
+     VG_(machine_get_cache_info)(&vai);
  
       /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
          called before we're ready to go. */
@@ -1105,6 +1107,8 @@ Bool VG_(machine_get_hwcaps)( void )
       if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
       if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
  
+     VG_(machine_get_cache_info)(&vai);
+
       /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
          called before we're ready to go. */
       return True;
@@ -1251,6 +1255,8 @@ Bool VG_(machine_get_hwcaps)( void )
  
       VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
  
+     VG_(machine_get_cache_info)(&vai);
+
       return True;
     }
  
@@ -1360,6 +1366,8 @@ Bool VG_(machine_get_hwcaps)( void )
       if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
       if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
  
+     VG_(machine_get_cache_info)(&vai);
+
       return True;
     }
  
@@ -1371,6 +1379,9 @@ Bool VG_(machine_get_hwcaps)( void )
           return False;
  
       vai.hwcaps = model;
+
+     VG_(machine_get_cache_info)(&vai);
+
       return True;
     }
  
diff --git a/coregrind/pub_core_cpuid.h b/coregrind/pub_core_cpuid.h

index 7f1d75035f034469126c2edda3a1f9f37ff37133..269ae57df8d420155de24040b2499ff994c4dd3d 100644 (file)
--- a/coregrind/pub_core_cpuid.h
+++ b/coregrind/pub_core_cpuid.h
@@ -36,7 +36,13 @@
  // CPUID instruction.
  //--------------------------------------------------------------------
  
-#include "pub_tool_cpuid.h"
+#if defined(VGA_x86) || defined(VGA_amd64)
+extern Bool VG_(has_cpuid) ( void );
+
+extern void VG_(cpuid) ( UInt eax, UInt ecx,
+                         UInt* eax_ret, UInt* ebx_ret,
+                         UInt* ecx_ret, UInt* edx_ret );
+#endif
  
  #endif   // __PUB_CORE_CPUID_H
  
diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h

index ded9b9af48209bdbe6e991e8a462d280645b3e72..11e590a8cf4700022e55430f0b3b3ad10888d2a6 100644 (file)
--- a/coregrind/pub_core_machine.h
+++ b/coregrind/pub_core_machine.h
@@ -199,9 +199,9 @@ void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
     this a CPU incapable of running Valgrind. */
  extern Bool VG_(machine_get_hwcaps)( void );
  
-/* Fetch host cpu info, as per above comment. */
-extern void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch*,
-                                          /*OUT*/VexArchInfo* );
+/* Determine information about the cache system this host has and
+   record it. Returns False, if cache information cannot be auto-detected. */
+extern Bool VG_(machine_get_cache_info)( VexArchInfo * );
  
  /* Notify host cpu cache line size, as per above comment. */
  #if defined(VGA_ppc32)
diff --git a/include/Makefile.am b/include/Makefile.am

index a115754a65acbec56447e4604310ce87f7640bc8..41defb8ceb46804740c3ee6d75ad968a8ff8ee98 100644 (file)
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -8,7 +8,6 @@ nobase_pkginclude_HEADERS = \
         pub_tool_aspacemgr.h            \
         pub_tool_clientstate.h          \
         pub_tool_clreq.h                \
-       pub_tool_cpuid.h                \
         pub_tool_debuginfo.h            \
         pub_tool_errormgr.h             \
         pub_tool_execontext.h           \
diff --git a/include/pub_tool_cpuid.h b/include/pub_tool_cpuid.h

deleted file mode 100644 (file)

index 149131a..0000000
--- a/include/pub_tool_cpuid.h
+++ /dev/null
@@ -1,46 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- Interface to CPUID.                         pub_tool_cpuid.h ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Valgrind, a dynamic binary instrumentation
-   framework.
-
-   Copyright (C) 2000-2012 Julian Seward
-      jseward@acm.org
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file COPYING.
-*/
-
-#ifndef __PUB_TOOL_CPUID_H
-#define __PUB_TOOL_CPUID_H
-
-#if defined(VGA_x86) || defined(VGA_amd64)
-extern Bool VG_(has_cpuid) ( void );
-
-extern void VG_(cpuid) ( UInt eax, UInt ecx,
-                         UInt* eax_ret, UInt* ebx_ret,
-                         UInt* ecx_ret, UInt* edx_ret );
-#endif
-
-#endif   // __PUB_TOOL_CPUID_H
-
-/*--------------------------------------------------------------------*/
-/*--- end                                                          ---*/
-/*--------------------------------------------------------------------*/
diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h

index 026db6b5749bcd4f1f89d43970bb559db2c6155d..08ab203a2093f7f9f157fefd7a63946d88721555 100644 (file)
--- a/include/pub_tool_machine.h
+++ b/include/pub_tool_machine.h
@@ -31,6 +31,8 @@
  #ifndef __PUB_TOOL_MACHINE_H
  #define __PUB_TOOL_MACHINE_H
  
+#include "libvex.h"                    // VexArchInfo
+
  #if defined(VGP_x86_linux)
  #  define VG_MIN_INSTR_SZB          1  // min length of native instruction
  #  define VG_MAX_INSTR_SZB         16  // max length of native instruction
@@ -164,6 +166,10 @@ extern void* VG_(fnptr_to_fnentry)( void* );
     (eg, AVX or non-AVX ?, for amd64). */
  extern Int VG_(machine_get_size_of_largest_guest_register) ( void );
  
+/* Return host cpu info. */
+extern void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch*,
+                                          /*OUT*/VexArchInfo* );
+
  #endif   // __PUB_TOOL_MACHINE_H
  
  /*--------------------------------------------------------------------*/
author	Florian Krohm <florian@eich-krohm.de>
	Sun, 7 Oct 2012 19:47:04 +0000 (19:47 +0000)
committer	Florian Krohm <florian@eich-krohm.de>
	Sun, 7 Oct 2012 19:47:04 +0000 (19:47 +0000)
cachegrind/Makefile.am		patch \| blob \| blame \| history
cachegrind/cg-arch.c		patch \| blob \| blame \| history
cachegrind/cg-arm.c	[deleted file]	patch \| blob \| blame \| history
cachegrind/cg-mips32.c	[deleted file]	patch \| blob \| blame \| history
cachegrind/cg-ppc32.c	[deleted file]	patch \| blob \| blame \| history
cachegrind/cg-ppc64.c	[deleted file]	patch \| blob \| blame \| history
cachegrind/cg-s390x.c	[deleted file]	patch \| blob \| blame \| history
cachegrind/cg_arch.h		patch \| blob \| blame \| history
cachegrind/tests/filter_stderr		patch \| blob \| blame \| history
callgrind/Makefile.am		patch \| blob \| blame \| history
callgrind/tests/filter_stderr		patch \| blob \| blame \| history
coregrind/Makefile.am		patch \| blob \| blame \| history
coregrind/m_cache.c	[moved from cachegrind/cg-x86-amd64.c with 71% similarity]	patch \| blob \| blame \| history
coregrind/m_libcproc.c		patch \| blob \| blame \| history
coregrind/m_machine.c		patch \| blob \| blame \| history
coregrind/pub_core_cpuid.h		patch \| blob \| blame \| history
coregrind/pub_core_machine.h		patch \| blob \| blame \| history
include/Makefile.am		patch \| blob \| blame \| history
include/pub_tool_cpuid.h	[deleted file]	patch \| blob \| blame \| history
include/pub_tool_machine.h		patch \| blob \| blame \| history