Add support for direct V256 shadow helper returns -- memcheck side.

author Julian Seward <jseward@acm.org>

Fri, 16 Aug 2013 08:31:29 +0000 (08:31 +0000)

committer Julian Seward <jseward@acm.org>

Fri, 16 Aug 2013 08:31:29 +0000 (08:31 +0000)
author Julian Seward <jseward@acm.org>
Fri, 16 Aug 2013 08:31:29 +0000 (08:31 +0000)
committer Julian Seward <jseward@acm.org>
Fri, 16 Aug 2013 08:31:29 +0000 (08:31 +0000)
diff --git a/memcheck/mc_include.h b/memcheck/mc_include.h

index 27815368dd80c6ee44b61d5a3a7cac83e9897bf1..24d53fd94780125d8fb272970b97d76a46caa31c 100644 (file)
--- a/memcheck/mc_include.h
+++ b/memcheck/mc_include.h
@@ -580,6 +580,8 @@ VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr, UWord );
  VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr, UWord );
  VG_REGPARM(2) void MC_(helperc_STOREV8)    ( Addr, UWord );
  
+VG_REGPARM(2) void  MC_(helperc_LOADV256be) ( /*OUT*/V256*, Addr );
+VG_REGPARM(2) void  MC_(helperc_LOADV256le) ( /*OUT*/V256*, Addr );
  VG_REGPARM(2) void  MC_(helperc_LOADV128be) ( /*OUT*/V128*, Addr );
  VG_REGPARM(2) void  MC_(helperc_LOADV128le) ( /*OUT*/V128*, Addr );
  VG_REGPARM(1) ULong MC_(helperc_LOADV64be)  ( Addr );
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c

index 4608e984123782d9f611ec77455ee6bcfb0f19b1..dbc347f2f7b8b1e8f02664d1a60040821acb7ffc 100644 (file)
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -1130,43 +1130,45 @@ static Bool parse_ignore_ranges ( const HChar* str0 )
  
  static
  __attribute__((noinline))
-void mc_LOADV128_slow ( /*OUT*/V128* res, Addr a, Bool bigendian )
-{
-   SizeT  nBits          = 128;
-   V128   vbits128;      /* result */
-   V128   pessim128;     /* only used when p-l-ok=yes */
-   SSizeT bytes_per_long = 64 / 8;
-   SSizeT szL            = nBits / 64;  /* Size in longs */
-   SSizeT szB            = bytes_per_long * szL;
+void mc_LOADVx_slow ( /*OUT*/ULong* res, Addr a, SizeT nBits, Bool bigendian )
+{
+   ULong  pessim[4];  /* only used when p-l-ok=yes */
+   SSizeT szB            = nBits / 8;
+   SSizeT szL            = szB / 8;  /* Size in longs */
     SSizeT i, j;          /* Must be signed. */
     SizeT  n_addrs_bad = 0;
     Addr   ai;
     UChar  vbits8;
     Bool   ok;
  
-   vbits128.w64[0] = V_BITS64_UNDEFINED;
-   vbits128.w64[1] = V_BITS64_UNDEFINED;
-   pessim128.w64[0] = V_BITS64_DEFINED;
-   pessim128.w64[1] = V_BITS64_DEFINED;
+   /* Code below assumes load size is a power of two and at least 64
+      bits. */
+   tl_assert((szB & (szB-1)) == 0 && szL > 0);
  
-   tl_assert(nBits == 128);
+   /* If this triggers, you probably just need to increase the size of
+      the pessim array. */
+   tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
  
-   /* Make up a 128-bit result V word, which contains the loaded data
-      for valid addresses and Defined for invalid addresses.  Iterate
-      over the bytes in the word, from the most significant down to
-      the least.  The vbits to return are calculated into vbits128.
-      Also compute the pessimising value to be used when
+   for (j=0 ; j < szL ; j++) {
+      pessim[j] = V_BITS64_DEFINED;
+      res[j] = V_BITS64_UNDEFINED;
+   }
+
+   /* Make up a result V word, which contains the loaded data for
+      valid addresses and Defined for invalid addresses.  Iterate over
+      the bytes in the word, from the most significant down to the
+      least.  The vbits to return are calculated into vbits128.  Also
+      compute the pessimising value to be used when
        --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
-      info can be gleaned from pessim128) but is used as a
+      info can be gleaned from the pessim array) but is used as a
        cross-check. */
     for (j = szL-1 ; j >= 0 ; j--) {
        ULong  vbits64     = V_BITS64_UNDEFINED;
        ULong  pessim64    = V_BITS64_DEFINED;
        UWord  long_index = byte_offset_w(szL, bigendian, j);
-      for (i = bytes_per_long-1; i >= 0; i--) {
+      for (i = 8-1; i >= 0; i--) {
           PROF_EVENT(31, "mc_LOADV128_slow(loop)");
-         ai = a + long_index*bytes_per_long + byte_offset_w(bytes_per_long,
-                                                            bigendian, i);
+         ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
           ok = get_vbits8(ai, &vbits8);
           vbits64 <<= 8;
           vbits64 |= vbits8;
@@ -1174,22 +1176,19 @@ void mc_LOADV128_slow ( /*OUT*/V128* res, Addr a, Bool bigendian )
           pessim64 <<= 8;
           pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
        }
-      vbits128.w64[long_index] = vbits64;
-      pessim128.w64[long_index] = pessim64;
+      res[long_index] = vbits64;
+      pessim[long_index] = pessim64;
     }
  
     /* In the common case, all the addresses involved are valid, so we
        just return the computed V bits and have done. */
-   if (LIKELY(n_addrs_bad == 0)) {
-      *res = vbits128;
+   if (LIKELY(n_addrs_bad == 0))
        return;
-   }
  
     /* If there's no possibility of getting a partial-loads-ok
        exemption, report the error and quit. */
     if (!MC_(clo_partial_loads_ok)) {
        MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
-      *res = vbits128;
        return;
     }
  
@@ -1199,7 +1198,7 @@ void mc_LOADV128_slow ( /*OUT*/V128* res, Addr a, Bool bigendian )
        false negatives.  If it doesn't apply, just report an addressing
        error in the usual way. */
  
-   /* Some code steps along byte strings in aligned word-sized chunks
+   /* Some code steps along byte strings in aligned chunks
        even when there is only a partially defined word at the end (eg,
        optimised strlen).  This is allowed by the memory model of
        modern machines, since an aligned load cannot span two pages and
@@ -1217,29 +1216,28 @@ void mc_LOADV128_slow ( /*OUT*/V128* res, Addr a, Bool bigendian )
     */
  
     /* "at least one of the addresses is invalid" */
-   tl_assert(pessim128.w64[0] != V_BITS64_DEFINED
-             || pessim128.w64[1] != V_BITS64_DEFINED);
+   ok = False;
+   for (j=0 ; j < szL ; j++)
+      ok |= pessim[j] != V_BITS8_DEFINED;
+   tl_assert(ok);
  
     if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
        /* Exemption applies.  Use the previously computed pessimising
-         value for vbits128 and return the combined result, but don't
-         flag an addressing error.  The pessimising value is Defined
-         for valid addresses and Undefined for invalid addresses. */
+         value and return the combined result, but don't flag an
+         addressing error.  The pessimising value is Defined for valid
+         addresses and Undefined for invalid addresses. */
        /* for assumption that doing bitwise or implements UifU */
        tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
        /* (really need "UifU" here...)
-         vbits128 UifU= pessim128  (is pessimised by it, iow) */
+         vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
        for (j = szL-1 ; j >= 0 ; j--)
-         vbits128.w64[j] |= pessim128.w64[j];
-      *res = vbits128;
+         res[j] |= pessim[j];
        return;
     }
  
     /* Exemption doesn't apply.  Flag an addressing error in the normal
        way. */
     MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
-
-   *res = vbits128;
  }
  
  
@@ -4207,28 +4205,29 @@ static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
  /* ------------------------ Size = 16 ------------------------ */
  
  static INLINE
-void mc_LOADV128 ( /*OUT*/V128* res, Addr a, Bool isBigEndian )
+void mc_LOADVx ( /*OUT*/ULong* res, Addr a, SizeT nBits, Bool isBigEndian )
  {
-   PROF_EVENT(200, "mc_LOADV128");
+   PROF_EVENT(200, "mc_LOADVx");
  
  #ifndef PERF_FAST_LOADV
-   mc_LOADV128_slow( res, a, isBigEndian );
+   mc_LOADVx_slow( res, a, nBits, isBigEndian );
     return;
  #else
     {
        UWord   sm_off16, vabits16;
        SecMap* sm;
        int j;
+      int nBytes = nBits / 8;
  
-      if (UNLIKELY( UNALIGNED_OR_HIGH(a,128) )) {
-         PROF_EVENT(201, "mc_LOADV128-slow1");
-         mc_LOADV128_slow( res, a, isBigEndian );
+      if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
+         PROF_EVENT(201, "mc_LOADVx-slow1");
+         mc_LOADVx_slow( res, a, nBits, isBigEndian );
           return;
        }
  
-      // Handle common cases quickly: a (and a+8) is suitably aligned,
-      // is mapped, and addressible.
-      for (j=0 ; j<2 ; ++j) {
+      /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
+         suitably aligned, is mapped, and addressible. */
+      for (j=0 ; j<nBytes/8 ; ++j) {
           sm       = get_secmap_for_reading_low(a + 8*j);
           sm_off16 = SM_OFF_16(a + 8*j);
           vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
@@ -4236,14 +4235,14 @@ void mc_LOADV128 ( /*OUT*/V128* res, Addr a, Bool isBigEndian )
           // Convert V bits from compact memory form to expanded
           // register form.
           if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
-            res->w64[j] = V_BITS64_DEFINED;
+            res[j] = V_BITS64_DEFINED;
           } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
-            res->w64[j] = V_BITS64_UNDEFINED;
+            res[j] = V_BITS64_UNDEFINED;
           } else {
              /* Slow case: some block of 8 bytes are not all-defined or
                 all-undefined. */
-            PROF_EVENT(202, "mc_LOADV128-slow2");
-            mc_LOADV128_slow( res, a, isBigEndian );
+            PROF_EVENT(202, "mc_LOADVx-slow2");
+            mc_LOADVx_slow( res, a, nBits, isBigEndian );
              return;
           }
        }
@@ -4252,16 +4251,24 @@ void mc_LOADV128 ( /*OUT*/V128* res, Addr a, Bool isBigEndian )
  #endif
  }
  
+VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
+{
+   mc_LOADVx(&res->w64[0], a, 256, True);
+}
+VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
+{
+   mc_LOADVx(&res->w64[0], a, 256, False);
+}
+
  VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
  {
-   mc_LOADV128(res, a, True);
+   mc_LOADVx(&res->w64[0], a, 128, True);
  }
  VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
  {
-   mc_LOADV128(res, a, False);
+   mc_LOADVx(&res->w64[0], a, 128, False);
  }
  
-
  /* ------------------------ Size = 8 ------------------------ */
  
  static INLINE
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c

index cebbec27c0c6859e7c9be6efd6966d7363869fb4..d04e6484d16c5312a1add3bd60f7c06ac205cdab 100644 (file)
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -4183,8 +4183,8 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
     The definedness of |guard| itself is not checked.  That is assumed
     to have been done before this point, by the caller. */
  static
-IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 
-                              IREndness end, IRType ty, 
+IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
+                              IREndness end, IRType ty,
                                IRAtom* addr, UInt bias, IRAtom* guard )
  {
     tl_assert(isOriginalAtom(mce,addr));
@@ -4202,8 +4202,12 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
     const HChar* hname            = NULL;
     Bool         ret_via_outparam = False;
  
-   if (end == Iend_LE) {   
+   if (end == Iend_LE) {
        switch (ty) {
+         case Ity_V256: helper = &MC_(helperc_LOADV256le);
+                        hname = "MC_(helperc_LOADV256le)";
+                        ret_via_outparam = True;
+                        break;
           case Ity_V128: helper = &MC_(helperc_LOADV128le);
                          hname = "MC_(helperc_LOADV128le)";
                          ret_via_outparam = True;
@@ -4225,6 +4229,10 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
        }
     } else {
        switch (ty) {
+         case Ity_V256: helper = &MC_(helperc_LOADV256be);
+                        hname = "MC_(helperc_LOADV256be)";
+                        ret_via_outparam = True;
+                        break;
           case Ity_V128: helper = &MC_(helperc_LOADV128be);
                          hname = "MC_(helperc_LOADV128be)";
                          ret_via_outparam = True;
@@ -4309,37 +4317,20 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
     definedness of |guard| before this point.
  */
  static
-IRAtom* expr2vbits_Load ( MCEnv* mce, 
-                          IREndness end, IRType ty, 
+IRAtom* expr2vbits_Load ( MCEnv* mce,
+                          IREndness end, IRType ty,
                            IRAtom* addr, UInt bias,
                            IRAtom* guard )
  {
     tl_assert(end == Iend_LE || end == Iend_BE);
     switch (shadowTypeV(ty)) {
-      case Ity_I8: 
-      case Ity_I16: 
-      case Ity_I32: 
+      case Ity_I8:
+      case Ity_I16:
+      case Ity_I32:
        case Ity_I64:
        case Ity_V128:
+      case Ity_V256:
           return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
-      case Ity_V256: {
-         /* V256-bit case -- phrased in terms of 64 bit units (Qs),
-            with Q3 being the most significant lane. */
-         if (end == Iend_BE) goto unhandled;
-         IRAtom* v64Q0
-            = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0,  guard);
-         IRAtom* v64Q1
-            = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8,  guard);
-         IRAtom* v64Q2
-            = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16, guard);
-         IRAtom* v64Q3
-            = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24, guard);
-         return assignNew( 'V', mce,
-                           Ity_V256,
-                           IRExpr_Qop(Iop_64x4toV256,
-                                      v64Q3, v64Q2, v64Q1, v64Q0));
-      }
-      unhandled:
        default:
           VG_(tool_panic)("expr2vbits_Load");
     }
author	Julian Seward <jseward@acm.org>
	Fri, 16 Aug 2013 08:31:29 +0000 (08:31 +0000)
committer	Julian Seward <jseward@acm.org>
	Fri, 16 Aug 2013 08:31:29 +0000 (08:31 +0000)
memcheck/mc_include.h		patch \| blob \| blame \| history
memcheck/mc_main.c		patch \| blob \| blame \| history
memcheck/mc_translate.c		patch \| blob \| blame \| history