Add machinery to try and transform A ^ ((A ^ B) & M)

author Julian Seward <jseward@acm.org>

Fri, 27 Feb 2015 13:33:56 +0000 (13:33 +0000)

committer Julian Seward <jseward@acm.org>

Fri, 27 Feb 2015 13:33:56 +0000 (13:33 +0000)
author Julian Seward <jseward@acm.org>
Fri, 27 Feb 2015 13:33:56 +0000 (13:33 +0000)
committer Julian Seward <jseward@acm.org>
Fri, 27 Feb 2015 13:33:56 +0000 (13:33 +0000)
diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c

index 5cc0bac8844f1ffc20cc0af6798c3d03b2fc1fd8..7207b884bea1291474aabdd20bfeefefe39cb055 100644 (file)
--- a/VEX/priv/ir_opt.c
+++ b/VEX/priv/ir_opt.c
@@ -1357,6 +1357,15 @@ static IRExpr* chase ( IRExpr** env, IRExpr* e )
     return e;
  }
  
+/* Similar to |chase|, but follows at most one level of tmp reference. */
+static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
+{
+   if (e == NULL || e->tag != Iex_RdTmp)
+      return e;
+   else
+      return env[(Int)e->Iex.RdTmp.tmp];
+}
+
  static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
  {
     Int     shift;
@@ -5936,6 +5945,433 @@ static Interval stmt_modifies_guest_state (
  }
  
  
+/*---------------------------------------------------------------*/
+/*--- MSVC specific transformation hacks                      ---*/
+/*---------------------------------------------------------------*/
+
+/* The purpose of all this is to find MSVC's idiom for non-constant
+   bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
+   gcc's idiom "(a & ~c) | (b & c)".  Motivation is that Memcheck has
+   generates a lot of false positives from the MSVC version because it
+   doesn't understand that XORing an undefined bit with itself gives a
+   defined result.
+
+   This isn't a problem for the simple case "x ^ x", because iropt
+   folds it to a constant zero before Memcheck ever sees it.  But in
+   this case we have an intervening "& c" which defeats the simple
+   case.  So we have to carefully inspect all expressions rooted at an
+   XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
+   7 other variants resulting from swapping the order of arguments to
+   the three binary operations.  If we get a match, we then replace
+   the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
+
+   The key difficulty is to spot the two uses of "a".  To normalise
+   the IR to maximise the chances of success, we first do a CSE pass,
+   with CSEing of loads enabled, since the two "a" expressions may be
+   loads, which need to be commoned up.  Then we do a constant folding
+   pass, so as to remove any tmp-to-tmp assignment chains that would
+   make matching the original expression more difficult.
+*/
+
+
+/* Helper function for debug printing */
+__attribute__((unused))
+static void print_flat_expr ( IRExpr** env, IRExpr* e )
+{
+   if (e == NULL) {
+      vex_printf("?");
+      return;
+   }
+   switch (e->tag) {
+      case Iex_Binop: {
+         ppIROp(e->Iex.Binop.op);
+         vex_printf("(");
+         print_flat_expr(env, e->Iex.Binop.arg1);
+         vex_printf(",");
+         print_flat_expr(env, e->Iex.Binop.arg2);
+         vex_printf(")");
+         break;
+      }
+      case Iex_Unop: {
+         ppIROp(e->Iex.Unop.op);
+         vex_printf("(");
+         print_flat_expr(env, e->Iex.Unop.arg);
+         vex_printf(")");
+         break;
+      }
+      case Iex_RdTmp:
+         ppIRTemp(e->Iex.RdTmp.tmp);
+         vex_printf("=");
+         print_flat_expr(env, chase(env, e));
+         break;
+      case Iex_Const:
+      case Iex_CCall:
+      case Iex_Load:
+      case Iex_ITE:
+      case Iex_Get:
+         ppIRExpr(e);
+         break;
+      default:
+         vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
+         vassert(0);
+   }
+}
+
+/* Spot   a ^ ((a ^ b) & c)   for a,b and c tmp-or-const (atoms)
+   or any of the other 7 variants generated by switching the order
+   of arguments to the outer ^, the inner ^ and the &.
+*/
+static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
+                                     /*OUT*/IRExpr** cc,
+                                     IRExpr** env, IRExpr* e,
+                                     IROp opAND, IROp opXOR)
+{
+#  define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
+                              && (_e)->Iex.Binop.op == (_op))
+#  define ISATOM(_e)    isIRAtom(_e)
+#  define STEP(_e)      chase1(env, (_e))
+#  define LL(_e)        ((_e)->Iex.Binop.arg1)
+#  define RR(_e)        ((_e)->Iex.Binop.arg2)
+
+   IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
+
+   /* This is common to all 8 cases */
+   if (!ISBIN(e, opXOR)) goto fail;
+
+   /*                        -----and------ */
+   /*                         --xor---      */
+   /* find variant 1:   a1 ^ ((a2 ^ b) & c) */
+   /* find variant 2:   a1 ^ ((b ^ a2) & c) */
+   a1 = and = xor = c = a2bL = a2bR = NULL;
+
+   a1   = LL(e);
+   and  = STEP(RR(e));
+   if (!ISBIN(and, opAND)) goto v34;
+   xor  = STEP(LL(and));
+   c    = RR(and);
+   if (!ISBIN(xor, opXOR)) goto v34;
+   a2bL = LL(xor);
+   a2bR = RR(xor);
+
+   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
+      *aa = a1;
+      *bb = a2bR;
+      *cc = c;
+      return 1;
+   }
+   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
+      *aa = a1;
+      *bb = a2bL;
+      *cc = c;
+      return 2;
+   }
+
+  v34:
+   /*                   -----and------      */
+   /*                    --xor---           */
+   /* find variant 3:   ((a2 ^ b) & c) ^ a1 */
+   /* find variant 4:   ((b ^ a2) & c) ^ a1 */
+   a1 = and = xor = c = a2bL = a2bR = NULL;
+
+   a1   = RR(e);
+   and  = STEP(LL(e));
+   if (!ISBIN(and, opAND)) goto v56;
+   xor  = STEP(LL(and));
+   c    = RR(and);
+   if (!ISBIN(xor, opXOR)) goto v56;
+   a2bL = LL(xor);
+   a2bR = RR(xor);
+
+   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
+      *aa = a1;
+      *bb = a2bR;
+      *cc = c;
+      return 3;
+   }
+   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
+      *aa = a1;
+      *bb = a2bL;
+      *cc = c;
+      return 4;
+   }
+
+  v56:
+   /*                        -----and------ */
+   /*                         --xor---      */
+   /* find variant 5:   a1 ^ (c & (a2 ^ b)) */
+   /* find variant 6:   a1 ^ (c & (b ^ a2)) */
+   a1 = and = xor = c = a2bL = a2bR = NULL;
+
+   a1   = LL(e);
+   and  = STEP(RR(e));
+   if (!ISBIN(and, opAND)) goto v78;
+   xor  = STEP(RR(and));
+   c    = LL(and);
+   if (!ISBIN(xor, opXOR)) goto v78;
+   a2bL = LL(xor);
+   a2bR = RR(xor);
+
+   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
+      *aa = a1;
+      *bb = a2bR;
+      *cc = c;
+      vassert(5-5); // ATC
+      return 5;
+   }
+   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
+      *aa = a1;
+      *bb = a2bL;
+      *cc = c;
+      vassert(6-6); // ATC
+      return 6;
+   }
+
+ v78:
+   /*                   -----and------      */
+   /*                    --xor---           */
+   /* find variant 7:   (c & (a2 ^ b)) ^ a1 */
+   /* find variant 8:   (c & (b ^ a2)) ^ a1 */
+   a1 = and = xor = c = a2bL = a2bR = NULL;
+
+   a1   = RR(e);
+   and  = STEP(LL(e));
+   if (!ISBIN(and, opAND)) goto fail;
+   xor  = STEP(RR(and));
+   c    = LL(and);
+   if (!ISBIN(xor, opXOR)) goto fail;
+   a2bL = LL(xor);
+   a2bR = RR(xor);
+
+   if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
+      *aa = a1;
+      *bb = a2bR;
+      *cc = c;
+      return 7;
+   }
+   if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
+      *aa = a1;
+      *bb = a2bL;
+      *cc = c;
+      return 8;
+   }
+
+ fail:
+   return 0;
+
+#  undef ISBIN
+#  undef ISATOM
+#  undef STEP
+#  undef LL
+#  undef RR
+}
+
+/* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
+   variants thereof generated by switching arguments around), return
+   the IRExpr* for (a & ~c) | (b & c).  Else return NULL. */
+static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
+{
+   if (e->tag != Iex_Binop)
+      return NULL;
+
+   const HChar* tyNm = NULL;
+   IROp   opOR  = Iop_INVALID;
+   IROp   opAND = Iop_INVALID;
+   IROp   opNOT = Iop_INVALID;
+   IROp   opXOR = Iop_INVALID;
+   switch (e->Iex.Binop.op) {
+      case Iop_Xor32:
+         tyNm  = "I32";
+         opOR  = Iop_Or32;  opAND = Iop_And32;
+         opNOT = Iop_Not32; opXOR = Iop_Xor32;
+         break;
+      case Iop_Xor16:
+         tyNm  = "I16";
+         opOR  = Iop_Or16;  opAND = Iop_And16;
+         opNOT = Iop_Not16; opXOR = Iop_Xor16;
+         break;
+      case Iop_Xor8:
+         tyNm  = "I8";
+         opOR  = Iop_Or8;  opAND = Iop_And8;
+         opNOT = Iop_Not8; opXOR = Iop_Xor8;
+         break;
+      default:
+         return NULL;
+   }
+
+   IRExpr* aa = NULL;
+   IRExpr* bb = NULL;
+   IRExpr* cc = NULL;
+   UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
+   if (variant > 0) {
+      static UInt ctr = 0;
+      if (0)
+         vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
+                    "type %s, variant %u\n",
+                    ++ctr, tyNm, variant);
+      /* It's vitally important that the returned aa, bb and cc are
+         atoms -- either constants or tmps.  If it's anything else
+         (eg, a GET) then incorporating them in a tree at this point
+         in the SB may erroneously pull them forwards (eg of a PUT
+         that originally was after the GET) and so transform the IR
+         wrongly.  spotBitfieldAssignment should guarantee only to
+         give us atoms, but we check here anyway. */
+      vassert(aa && isIRAtom(aa));
+      vassert(bb && isIRAtom(bb));
+      vassert(cc && isIRAtom(cc));
+      return IRExpr_Binop(
+                opOR,
+                IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
+                IRExpr_Binop(opAND, bb, cc)
+             );
+   }
+   return NULL;
+}
+
+
+/* SB is modified in-place.  Visit all the IRExprs and, for those
+   which are allowed to be non-atomic, perform the XOR transform if
+   possible.  This makes |sb| be non-flat, but that's ok, the caller
+   can re-flatten it.  Returns True iff any changes were made. */
+static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
+{
+   Int  i;
+   Bool changed = False;
+
+   /* Make the tmp->expr environment, so we can use it for
+      chasing expressions. */
+   Int      n_tmps = sb->tyenv->types_used;
+   IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
+   for (i = 0; i < n_tmps; i++)
+      env[i] = NULL;
+
+   for (i = 0; i < sb->stmts_used; i++) {
+      IRStmt* st = sb->stmts[i];
+      if (st->tag != Ist_WrTmp)
+         continue;
+      IRTemp t = st->Ist.WrTmp.tmp;
+      vassert(t >= 0 && t < n_tmps);
+      env[t] = st->Ist.WrTmp.data;
+   }
+
+   for (i = 0; i < sb->stmts_used; i++) {
+      IRStmt* st = sb->stmts[i];
+
+      switch (st->tag) {
+         case Ist_AbiHint:
+            vassert(isIRAtom(st->Ist.AbiHint.base));
+            vassert(isIRAtom(st->Ist.AbiHint.nia));
+            break;
+         case Ist_Put:
+            vassert(isIRAtom(st->Ist.Put.data));
+            break;
+         case Ist_PutI: {
+            IRPutI* puti = st->Ist.PutI.details;
+            vassert(isIRAtom(puti->ix));
+            vassert(isIRAtom(puti->data));
+            break;
+         }
+         case Ist_WrTmp: {
+            /* This is the one place where an expr (st->Ist.WrTmp.data) is
+               allowed to be more than just a constant or a tmp. */
+            IRExpr* mb_new_data
+               = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
+            if (mb_new_data) {
+               //ppIRSB(sb);
+               st->Ist.WrTmp.data = mb_new_data;
+               //ppIRSB(sb);
+               changed = True;
+            }
+            break;
+         }
+         case Ist_Store:
+            vassert(isIRAtom(st->Ist.Store.addr));
+            vassert(isIRAtom(st->Ist.Store.data));
+            break;
+         case Ist_StoreG: {
+            IRStoreG* sg = st->Ist.StoreG.details;
+            vassert(isIRAtom(sg->addr));
+            vassert(isIRAtom(sg->data));
+            vassert(isIRAtom(sg->guard));
+            break;
+         }
+         case Ist_LoadG: {
+            IRLoadG* lg = st->Ist.LoadG.details;
+            vassert(isIRAtom(lg->addr));
+            vassert(isIRAtom(lg->alt));
+            vassert(isIRAtom(lg->guard));
+            break;
+         }
+         case Ist_CAS: {
+            IRCAS* cas = st->Ist.CAS.details;
+            vassert(isIRAtom(cas->addr));
+            vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
+            vassert(isIRAtom(cas->expdLo));
+            vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
+            vassert(isIRAtom(cas->dataLo));
+            break;
+         }
+         case Ist_LLSC:
+            vassert(isIRAtom(st->Ist.LLSC.addr));
+            if (st->Ist.LLSC.storedata)
+               vassert(isIRAtom(st->Ist.LLSC.storedata));
+            break;
+         case Ist_Dirty: {
+            IRDirty* d = st->Ist.Dirty.details;
+            if (d->mFx != Ifx_None) {
+               vassert(isIRAtom(d->mAddr));
+            }
+            vassert(isIRAtom(d->guard));
+            for (Int j = 0; d->args[j]; j++) {
+               IRExpr* arg = d->args[j];
+               if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
+                  vassert(isIRAtom(arg));
+               }
+            }
+            break;
+         }
+         case Ist_IMark:
+         case Ist_NoOp:
+         case Ist_MBE:
+            break;
+         case Ist_Exit:
+            vassert(isIRAtom(st->Ist.Exit.guard));
+            break;
+         default:
+            vex_printf("\n"); ppIRStmt(st);
+            vpanic("do_XOR_TRANSFORMS_IRSB");
+      }
+   }
+
+   vassert(isIRAtom(sb->next));
+   return changed;
+}
+
+
+static IRSB* do_MSVC_HACKS ( IRSB* sb )
+{
+   // Normalise as much as we can.  This is the one-and-only place
+   // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
+   Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
+   if (any_cse_changes) {
+      // CSEing might have created dead code.  Remove it.
+      sb = cprop_BB ( sb );
+      do_deadcode_BB(sb);
+   }
+
+   // Visit all atoms, do the transformation proper.  bb is modified
+   // in-place.
+   Bool changed = do_XOR_TRANSFORM_IRSB(sb);
+
+   if (changed) {
+      // The transformation generates non-flat expressions, so we now
+      // need to re-flatten the block.
+      sb = flatten_BB(sb);
+   }
+
+   return sb;
+}
+
+
  /*---------------------------------------------------------------*/
  /*--- iropt main                                              ---*/
  /*---------------------------------------------------------------*/
@@ -6210,6 +6646,13 @@ IRSB* do_iropt_BB(
                                          preciseMemExnsFn, pxControl );
        }
  
+      ///////////////////////////////////////////////////////////
+      // BEGIN MSVC optimised code transformation hacks
+      if (0)
+         bb = do_MSVC_HACKS(bb);
+      // END   MSVC optimised code transformation hacks
+      ///////////////////////////////////////////////////////////
+
        /* Now have a go at unrolling simple (single-BB) loops.  If
           successful, clean up the results as much as possible. */
author	Julian Seward <jseward@acm.org>
	Fri, 27 Feb 2015 13:33:56 +0000 (13:33 +0000)
committer	Julian Seward <jseward@acm.org>
	Fri, 27 Feb 2015 13:33:56 +0000 (13:33 +0000)