From: Julian Seward <jseward@acm.org>
Date: Sun, 3 Jun 2012 22:40:07 +0000 (+0000)
Subject: m_machine: add new function VG_(machine_get_size_of_largest_guest_register)
X-Git-Tag: svn/VALGRIND_3_8_0~263
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=76d7802c9f7f0678c1d509e267a7090172618ccc;p=thirdparty%2Fvalgrind.git

m_machine: add new function VG_(machine_get_size_of_largest_guest_register)
cachegrind: use the new function to abort startup if the minumum line
  size is smaller than the size of the largest guest register.
Partially derived from a patch by Josef Weidendorfer.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@12605
---

diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 4b36204cec..ba2d740b79 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -69,6 +69,12 @@ static Bool  clo_cache_sim  = True;  /* do cache simulation? */
 static Bool  clo_branch_sim = False; /* do branch simulation? */
 static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
 
+/*------------------------------------------------------------*/
+/*--- Cachesim configuration                               ---*/
+/*------------------------------------------------------------*/
+
+static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
+
 /*------------------------------------------------------------*/
 /*--- Types and Data Structures                            ---*/
 /*------------------------------------------------------------*/
@@ -846,7 +852,7 @@ void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
 {
    Event* evt;
    tl_assert(isIRAtom(ea));
-   tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+   tl_assert(datasize >= 1 && datasize <= min_line_size);
    if (!clo_cache_sim)
       return;
    if (cgs->events_used == N_EVENTS)
@@ -868,7 +874,7 @@ void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
    Event* evt;
 
    tl_assert(isIRAtom(ea));
-   tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+   tl_assert(datasize >= 1 && datasize <= min_line_size);
 
    if (!clo_cache_sim)
       return;
@@ -1058,8 +1064,8 @@ IRSB* cg_instrument ( VgCallbackClosure* closure,
                // instructions will be done inaccurately, but they're
                // very rare and this avoids errors from hitting more
                // than two cache lines in the simulation.
-               if (dataSize > MIN_LINE_SIZE)
-                  dataSize = MIN_LINE_SIZE;
+               if (dataSize > min_line_size)
+                  dataSize = min_line_size;
                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
                   addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
@@ -1085,8 +1091,8 @@ IRSB* cg_instrument ( VgCallbackClosure* closure,
             if (cas->dataHi != NULL)
                dataSize *= 2; /* since it's a doubleword-CAS */
             /* I don't think this can ever happen, but play safe. */
-            if (dataSize > MIN_LINE_SIZE)
-               dataSize = MIN_LINE_SIZE;
+            if (dataSize > min_line_size)
+               dataSize = min_line_size;
             addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
             addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
             break;
@@ -1724,6 +1730,26 @@ static void cg_post_clo_init(void)
                                        &clo_D1_cache,
                                        &clo_LL_cache);
 
+   // min_line_size is used to make sure that we never feed
+   // accesses to the simulator straddling more than two
+   // cache lines at any cache level
+   min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
+   min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
+
+   Int largest_load_or_store_size
+      = VG_(machine_get_size_of_largest_guest_register)();
+   if (min_line_size < largest_load_or_store_size) {
+      /* We can't continue, because the cache simulation might
+         straddle more than 2 lines, and it will assert.  So let's
+         just stop before we start. */
+      VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
+                (Int)min_line_size);
+      VG_(umsg)("  must be equal to or larger than the maximum register size (%d)\n",
+                largest_load_or_store_size );
+      VG_(umsg)("  but it is not.  Exiting now.\n");
+      VG_(exit)(1);
+   }
+
    cachesim_I1_initcache(I1c);
    cachesim_D1_initcache(D1c);
    cachesim_LL_initcache(LLc);
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index ffe41ee713..9980640ddf 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -367,7 +367,7 @@ SizeT VG_(thread_get_altstack_size)(ThreadId tid)
 static Bool hwcaps_done = False;
 
 /* --- all archs --- */
-static VexArch     va;
+static VexArch     va = VexArch_INVALID;
 static VexArchInfo vai;
 
 #if defined(VGA_x86)
@@ -1316,6 +1316,66 @@ void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
 }
 
 
+/* Returns the size of the largest guest register that we will
+   simulate in this run.  This depends on both the guest architecture
+   and on the specific capabilities we are simulating for that guest
+   (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
+   or 32.  General rule: if in doubt, return a value larger than
+   reality.
+
+   This information is needed by Cachegrind and Callgrind to decide
+   what the minimum cache line size they are prepared to simulate is.
+   Basically require that the minimum cache line size is at least as
+   large as the largest register that might get transferred to/from
+   memory, so as to guarantee that any such transaction can straddle
+   at most 2 cache lines.
+*/
+Int VG_(machine_get_size_of_largest_guest_register) ( void )
+{
+   vg_assert(hwcaps_done);
+   /* Once hwcaps_done is True, we can fish around inside va/vai to
+      find the information we need. */
+
+#  if defined(VGA_x86)
+   vg_assert(va == VexArchX86);
+   /* We don't support AVX, so 32 is out.  At the other end, even if
+      we don't support any SSE, the X87 can generate 10 byte
+      transfers, so let's say 16 to be on the safe side.  Hence the
+      answer is always 16. */
+   return 16;
+
+#  elif defined(VGA_amd64)
+   /* if AVX then 32 else 16 */
+   return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
+
+#  elif defined(VGA_ppc32)
+   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
+   if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
+   if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
+   if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
+   return 8;
+
+#  elif defined(VGA_ppc64)
+   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
+   if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
+   if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
+   if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
+   return 8;
+
+#  elif defined(VGA_s390x)
+   return 8;
+
+#  elif defined(VGA_arm)
+   /* Really it depends whether or not we have NEON, but let's just
+      assume we always do. */
+   return 16;
+
+#  else
+#    error "Unknown arch"
+#  endif
+}
+
+
 // Given a pointer to a function as obtained by "& functionname" in C,
 // produce a pointer to the actual entry point for the function.
 void* VG_(fnptr_to_fnentry)( void* f )
diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h
index 49c700d3e1..6b30e69b10 100644
--- a/include/pub_tool_machine.h
+++ b/include/pub_tool_machine.h
@@ -152,6 +152,12 @@ extern SizeT VG_(thread_get_altstack_size) ( ThreadId tid );
 // ppc64-linux it isn't (sigh).
 extern void* VG_(fnptr_to_fnentry)( void* );
 
+/* Returns the size of the largest guest register that we will
+   simulate in this run.  This depends on both the guest architecture
+   and on the specific capabilities we are simulating for that guest
+   (eg, AVX or non-AVX ?, for amd64). */
+extern Int VG_(machine_get_size_of_largest_guest_register) ( void );
+
 #endif   // __PUB_TOOL_MACHINE_H
 
 /*--------------------------------------------------------------------*/