m_machine: add new function VG_(machine_get_size_of_largest_guest_register)

author Julian Seward <jseward@acm.org>

Sun, 3 Jun 2012 22:40:07 +0000 (22:40 +0000)

committer Julian Seward <jseward@acm.org>

Sun, 3 Jun 2012 22:40:07 +0000 (22:40 +0000)
author Julian Seward <jseward@acm.org>
Sun, 3 Jun 2012 22:40:07 +0000 (22:40 +0000)
committer Julian Seward <jseward@acm.org>
Sun, 3 Jun 2012 22:40:07 +0000 (22:40 +0000)
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c

index 4b36204cec004d95cf7bfe64e06728d04d8365d2..ba2d740b79e26fff3dc10a0bffd8184509b80425 100644 (file)
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -69,6 +69,12 @@ static Bool  clo_cache_sim  = True;  /* do cache simulation? */
  static Bool  clo_branch_sim = False; /* do branch simulation? */
  static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
  
+/*------------------------------------------------------------*/
+/*--- Cachesim configuration                               ---*/
+/*------------------------------------------------------------*/
+
+static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
+
  /*------------------------------------------------------------*/
  /*--- Types and Data Structures                            ---*/
  /*------------------------------------------------------------*/
@@ -846,7 +852,7 @@ void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
  {
     Event* evt;
     tl_assert(isIRAtom(ea));
-   tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+   tl_assert(datasize >= 1 && datasize <= min_line_size);
     if (!clo_cache_sim)
        return;
     if (cgs->events_used == N_EVENTS)
@@ -868,7 +874,7 @@ void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
     Event* evt;
  
     tl_assert(isIRAtom(ea));
-   tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+   tl_assert(datasize >= 1 && datasize <= min_line_size);
  
     if (!clo_cache_sim)
        return;
@@ -1058,8 +1064,8 @@ IRSB* cg_instrument ( VgCallbackClosure* closure,
                 // instructions will be done inaccurately, but they're
                 // very rare and this avoids errors from hitting more
                 // than two cache lines in the simulation.
-               if (dataSize > MIN_LINE_SIZE)
-                  dataSize = MIN_LINE_SIZE;
+               if (dataSize > min_line_size)
+                  dataSize = min_line_size;
                 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
                    addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
                 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
@@ -1085,8 +1091,8 @@ IRSB* cg_instrument ( VgCallbackClosure* closure,
              if (cas->dataHi != NULL)
                 dataSize *= 2; /* since it's a doubleword-CAS */
              /* I don't think this can ever happen, but play safe. */
-            if (dataSize > MIN_LINE_SIZE)
-               dataSize = MIN_LINE_SIZE;
+            if (dataSize > min_line_size)
+               dataSize = min_line_size;
              addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
              addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
              break;
@@ -1724,6 +1730,26 @@ static void cg_post_clo_init(void)
                                         &clo_D1_cache,
                                         &clo_LL_cache);
  
+   // min_line_size is used to make sure that we never feed
+   // accesses to the simulator straddling more than two
+   // cache lines at any cache level
+   min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
+   min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
+
+   Int largest_load_or_store_size
+      = VG_(machine_get_size_of_largest_guest_register)();
+   if (min_line_size < largest_load_or_store_size) {
+      /* We can't continue, because the cache simulation might
+         straddle more than 2 lines, and it will assert.  So let's
+         just stop before we start. */
+      VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
+                (Int)min_line_size);
+      VG_(umsg)("  must be equal to or larger than the maximum register size (%d)\n",
+                largest_load_or_store_size );
+      VG_(umsg)("  but it is not.  Exiting now.\n");
+      VG_(exit)(1);
+   }
+
     cachesim_I1_initcache(I1c);
     cachesim_D1_initcache(D1c);
     cachesim_LL_initcache(LLc);
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c

index ffe41ee713aaff9d1e1d5a1f1f9afa3e5cee9746..9980640ddf2fe8d1fc7f7dd0a36b41ab89a3471d 100644 (file)
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -367,7 +367,7 @@ SizeT VG_(thread_get_altstack_size)(ThreadId tid)
  static Bool hwcaps_done = False;
  
  /* --- all archs --- */
-static VexArch     va;
+static VexArch     va = VexArch_INVALID;
  static VexArchInfo vai;
  
  #if defined(VGA_x86)
@@ -1316,6 +1316,66 @@ void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
  }
  
  
+/* Returns the size of the largest guest register that we will
+   simulate in this run.  This depends on both the guest architecture
+   and on the specific capabilities we are simulating for that guest
+   (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
+   or 32.  General rule: if in doubt, return a value larger than
+   reality.
+
+   This information is needed by Cachegrind and Callgrind to decide
+   what the minimum cache line size they are prepared to simulate is.
+   Basically require that the minimum cache line size is at least as
+   large as the largest register that might get transferred to/from
+   memory, so as to guarantee that any such transaction can straddle
+   at most 2 cache lines.
+*/
+Int VG_(machine_get_size_of_largest_guest_register) ( void )
+{
+   vg_assert(hwcaps_done);
+   /* Once hwcaps_done is True, we can fish around inside va/vai to
+      find the information we need. */
+
+#  if defined(VGA_x86)
+   vg_assert(va == VexArchX86);
+   /* We don't support AVX, so 32 is out.  At the other end, even if
+      we don't support any SSE, the X87 can generate 10 byte
+      transfers, so let's say 16 to be on the safe side.  Hence the
+      answer is always 16. */
+   return 16;
+
+#  elif defined(VGA_amd64)
+   /* if AVX then 32 else 16 */
+   return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
+
+#  elif defined(VGA_ppc32)
+   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
+   if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
+   if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
+   if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
+   return 8;
+
+#  elif defined(VGA_ppc64)
+   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
+   if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
+   if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
+   if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
+   return 8;
+
+#  elif defined(VGA_s390x)
+   return 8;
+
+#  elif defined(VGA_arm)
+   /* Really it depends whether or not we have NEON, but let's just
+      assume we always do. */
+   return 16;
+
+#  else
+#    error "Unknown arch"
+#  endif
+}
+
+
  // Given a pointer to a function as obtained by "& functionname" in C,
  // produce a pointer to the actual entry point for the function.
  void* VG_(fnptr_to_fnentry)( void* f )
diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h

index 49c700d3e151793595889f42d18812d42c3cca4a..6b30e69b102d33ff668557dba5358b5011b522cf 100644 (file)
--- a/include/pub_tool_machine.h
+++ b/include/pub_tool_machine.h
@@ -152,6 +152,12 @@ extern SizeT VG_(thread_get_altstack_size) ( ThreadId tid );
  // ppc64-linux it isn't (sigh).
  extern void* VG_(fnptr_to_fnentry)( void* );
  
+/* Returns the size of the largest guest register that we will
+   simulate in this run.  This depends on both the guest architecture
+   and on the specific capabilities we are simulating for that guest
+   (eg, AVX or non-AVX ?, for amd64). */
+extern Int VG_(machine_get_size_of_largest_guest_register) ( void );
+
  #endif   // __PUB_TOOL_MACHINE_H
  
  /*--------------------------------------------------------------------*/
author	Julian Seward <jseward@acm.org>
	Sun, 3 Jun 2012 22:40:07 +0000 (22:40 +0000)
committer	Julian Seward <jseward@acm.org>
	Sun, 3 Jun 2012 22:40:07 +0000 (22:40 +0000)
cachegrind/cg_main.c		patch \| blob \| blame \| history
coregrind/m_machine.c		patch \| blob \| blame \| history
include/pub_tool_machine.h		patch \| blob \| blame \| history