return e;
}
+/* Similar to |chase|, but follows at most one level of tmp reference. */
+static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
+{
+ if (e == NULL || e->tag != Iex_RdTmp)
+ return e;
+ else
+ return env[(Int)e->Iex.RdTmp.tmp];
+}
+
static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
{
Int shift;
}
+/*---------------------------------------------------------------*/
+/*--- MSVC specific transformation hacks ---*/
+/*---------------------------------------------------------------*/
+
+/* The purpose of all this is to find MSVC's idiom for non-constant
+ bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
+ gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has
+ generates a lot of false positives from the MSVC version because it
+ doesn't understand that XORing an undefined bit with itself gives a
+ defined result.
+
+ This isn't a problem for the simple case "x ^ x", because iropt
+ folds it to a constant zero before Memcheck ever sees it. But in
+ this case we have an intervening "& c" which defeats the simple
+ case. So we have to carefully inspect all expressions rooted at an
+ XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
+ 7 other variants resulting from swapping the order of arguments to
+ the three binary operations. If we get a match, we then replace
+ the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
+
+ The key difficulty is to spot the two uses of "a". To normalise
+ the IR to maximise the chances of success, we first do a CSE pass,
+ with CSEing of loads enabled, since the two "a" expressions may be
+ loads, which need to be commoned up. Then we do a constant folding
+ pass, so as to remove any tmp-to-tmp assignment chains that would
+ make matching the original expression more difficult.
+*/
+
+
+/* Helper function for debug printing */
+__attribute__((unused))
+static void print_flat_expr ( IRExpr** env, IRExpr* e )
+{
+ if (e == NULL) {
+ vex_printf("?");
+ return;
+ }
+ switch (e->tag) {
+ case Iex_Binop: {
+ ppIROp(e->Iex.Binop.op);
+ vex_printf("(");
+ print_flat_expr(env, e->Iex.Binop.arg1);
+ vex_printf(",");
+ print_flat_expr(env, e->Iex.Binop.arg2);
+ vex_printf(")");
+ break;
+ }
+ case Iex_Unop: {
+ ppIROp(e->Iex.Unop.op);
+ vex_printf("(");
+ print_flat_expr(env, e->Iex.Unop.arg);
+ vex_printf(")");
+ break;
+ }
+ case Iex_RdTmp:
+ ppIRTemp(e->Iex.RdTmp.tmp);
+ vex_printf("=");
+ print_flat_expr(env, chase(env, e));
+ break;
+ case Iex_Const:
+ case Iex_CCall:
+ case Iex_Load:
+ case Iex_ITE:
+ case Iex_Get:
+ ppIRExpr(e);
+ break;
+ default:
+ vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
+ vassert(0);
+ }
+}
+
+/* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms)
+ or any of the other 7 variants generated by switching the order
+ of arguments to the outer ^, the inner ^ and the &.
+*/
+static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
+ /*OUT*/IRExpr** cc,
+ IRExpr** env, IRExpr* e,
+ IROp opAND, IROp opXOR)
+{
+# define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
+ && (_e)->Iex.Binop.op == (_op))
+# define ISATOM(_e) isIRAtom(_e)
+# define STEP(_e) chase1(env, (_e))
+# define LL(_e) ((_e)->Iex.Binop.arg1)
+# define RR(_e) ((_e)->Iex.Binop.arg2)
+
+ IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
+
+ /* This is common to all 8 cases */
+ if (!ISBIN(e, opXOR)) goto fail;
+
+ /* -----and------ */
+ /* --xor--- */
+ /* find variant 1: a1 ^ ((a2 ^ b) & c) */
+ /* find variant 2: a1 ^ ((b ^ a2) & c) */
+ a1 = and = xor = c = a2bL = a2bR = NULL;
+
+ a1 = LL(e);
+ and = STEP(RR(e));
+ if (!ISBIN(and, opAND)) goto v34;
+ xor = STEP(LL(and));
+ c = RR(and);
+ if (!ISBIN(xor, opXOR)) goto v34;
+ a2bL = LL(xor);
+ a2bR = RR(xor);
+
+ if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
+ *aa = a1;
+ *bb = a2bR;
+ *cc = c;
+ return 1;
+ }
+ if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
+ *aa = a1;
+ *bb = a2bL;
+ *cc = c;
+ return 2;
+ }
+
+ v34:
+ /* -----and------ */
+ /* --xor--- */
+ /* find variant 3: ((a2 ^ b) & c) ^ a1 */
+ /* find variant 4: ((b ^ a2) & c) ^ a1 */
+ a1 = and = xor = c = a2bL = a2bR = NULL;
+
+ a1 = RR(e);
+ and = STEP(LL(e));
+ if (!ISBIN(and, opAND)) goto v56;
+ xor = STEP(LL(and));
+ c = RR(and);
+ if (!ISBIN(xor, opXOR)) goto v56;
+ a2bL = LL(xor);
+ a2bR = RR(xor);
+
+ if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
+ *aa = a1;
+ *bb = a2bR;
+ *cc = c;
+ return 3;
+ }
+ if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
+ *aa = a1;
+ *bb = a2bL;
+ *cc = c;
+ return 4;
+ }
+
+ v56:
+ /* -----and------ */
+ /* --xor--- */
+ /* find variant 5: a1 ^ (c & (a2 ^ b)) */
+ /* find variant 6: a1 ^ (c & (b ^ a2)) */
+ a1 = and = xor = c = a2bL = a2bR = NULL;
+
+ a1 = LL(e);
+ and = STEP(RR(e));
+ if (!ISBIN(and, opAND)) goto v78;
+ xor = STEP(RR(and));
+ c = LL(and);
+ if (!ISBIN(xor, opXOR)) goto v78;
+ a2bL = LL(xor);
+ a2bR = RR(xor);
+
+ if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
+ *aa = a1;
+ *bb = a2bR;
+ *cc = c;
+ vassert(5-5); // ATC
+ return 5;
+ }
+ if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
+ *aa = a1;
+ *bb = a2bL;
+ *cc = c;
+ vassert(6-6); // ATC
+ return 6;
+ }
+
+ v78:
+ /* -----and------ */
+ /* --xor--- */
+ /* find variant 7: (c & (a2 ^ b)) ^ a1 */
+ /* find variant 8: (c & (b ^ a2)) ^ a1 */
+ a1 = and = xor = c = a2bL = a2bR = NULL;
+
+ a1 = RR(e);
+ and = STEP(LL(e));
+ if (!ISBIN(and, opAND)) goto fail;
+ xor = STEP(RR(and));
+ c = LL(and);
+ if (!ISBIN(xor, opXOR)) goto fail;
+ a2bL = LL(xor);
+ a2bR = RR(xor);
+
+ if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
+ *aa = a1;
+ *bb = a2bR;
+ *cc = c;
+ return 7;
+ }
+ if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
+ *aa = a1;
+ *bb = a2bL;
+ *cc = c;
+ return 8;
+ }
+
+ fail:
+ return 0;
+
+# undef ISBIN
+# undef ISATOM
+# undef STEP
+# undef LL
+# undef RR
+}
+
+/* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
+ variants thereof generated by switching arguments around), return
+ the IRExpr* for (a & ~c) | (b & c). Else return NULL. */
+static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
+{
+ if (e->tag != Iex_Binop)
+ return NULL;
+
+ const HChar* tyNm = NULL;
+ IROp opOR = Iop_INVALID;
+ IROp opAND = Iop_INVALID;
+ IROp opNOT = Iop_INVALID;
+ IROp opXOR = Iop_INVALID;
+ switch (e->Iex.Binop.op) {
+ case Iop_Xor32:
+ tyNm = "I32";
+ opOR = Iop_Or32; opAND = Iop_And32;
+ opNOT = Iop_Not32; opXOR = Iop_Xor32;
+ break;
+ case Iop_Xor16:
+ tyNm = "I16";
+ opOR = Iop_Or16; opAND = Iop_And16;
+ opNOT = Iop_Not16; opXOR = Iop_Xor16;
+ break;
+ case Iop_Xor8:
+ tyNm = "I8";
+ opOR = Iop_Or8; opAND = Iop_And8;
+ opNOT = Iop_Not8; opXOR = Iop_Xor8;
+ break;
+ default:
+ return NULL;
+ }
+
+ IRExpr* aa = NULL;
+ IRExpr* bb = NULL;
+ IRExpr* cc = NULL;
+ UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
+ if (variant > 0) {
+ static UInt ctr = 0;
+ if (0)
+ vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
+ "type %s, variant %u\n",
+ ++ctr, tyNm, variant);
+ /* It's vitally important that the returned aa, bb and cc are
+ atoms -- either constants or tmps. If it's anything else
+ (eg, a GET) then incorporating them in a tree at this point
+ in the SB may erroneously pull them forwards (eg of a PUT
+ that originally was after the GET) and so transform the IR
+ wrongly. spotBitfieldAssignment should guarantee only to
+ give us atoms, but we check here anyway. */
+ vassert(aa && isIRAtom(aa));
+ vassert(bb && isIRAtom(bb));
+ vassert(cc && isIRAtom(cc));
+ return IRExpr_Binop(
+ opOR,
+ IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
+ IRExpr_Binop(opAND, bb, cc)
+ );
+ }
+ return NULL;
+}
+
+
+/* SB is modified in-place. Visit all the IRExprs and, for those
+ which are allowed to be non-atomic, perform the XOR transform if
+ possible. This makes |sb| be non-flat, but that's ok, the caller
+ can re-flatten it. Returns True iff any changes were made. */
+static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
+{
+ Int i;
+ Bool changed = False;
+
+ /* Make the tmp->expr environment, so we can use it for
+ chasing expressions. */
+ Int n_tmps = sb->tyenv->types_used;
+ IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
+ for (i = 0; i < n_tmps; i++)
+ env[i] = NULL;
+
+ for (i = 0; i < sb->stmts_used; i++) {
+ IRStmt* st = sb->stmts[i];
+ if (st->tag != Ist_WrTmp)
+ continue;
+ IRTemp t = st->Ist.WrTmp.tmp;
+ vassert(t >= 0 && t < n_tmps);
+ env[t] = st->Ist.WrTmp.data;
+ }
+
+ for (i = 0; i < sb->stmts_used; i++) {
+ IRStmt* st = sb->stmts[i];
+
+ switch (st->tag) {
+ case Ist_AbiHint:
+ vassert(isIRAtom(st->Ist.AbiHint.base));
+ vassert(isIRAtom(st->Ist.AbiHint.nia));
+ break;
+ case Ist_Put:
+ vassert(isIRAtom(st->Ist.Put.data));
+ break;
+ case Ist_PutI: {
+ IRPutI* puti = st->Ist.PutI.details;
+ vassert(isIRAtom(puti->ix));
+ vassert(isIRAtom(puti->data));
+ break;
+ }
+ case Ist_WrTmp: {
+ /* This is the one place where an expr (st->Ist.WrTmp.data) is
+ allowed to be more than just a constant or a tmp. */
+ IRExpr* mb_new_data
+ = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
+ if (mb_new_data) {
+ //ppIRSB(sb);
+ st->Ist.WrTmp.data = mb_new_data;
+ //ppIRSB(sb);
+ changed = True;
+ }
+ break;
+ }
+ case Ist_Store:
+ vassert(isIRAtom(st->Ist.Store.addr));
+ vassert(isIRAtom(st->Ist.Store.data));
+ break;
+ case Ist_StoreG: {
+ IRStoreG* sg = st->Ist.StoreG.details;
+ vassert(isIRAtom(sg->addr));
+ vassert(isIRAtom(sg->data));
+ vassert(isIRAtom(sg->guard));
+ break;
+ }
+ case Ist_LoadG: {
+ IRLoadG* lg = st->Ist.LoadG.details;
+ vassert(isIRAtom(lg->addr));
+ vassert(isIRAtom(lg->alt));
+ vassert(isIRAtom(lg->guard));
+ break;
+ }
+ case Ist_CAS: {
+ IRCAS* cas = st->Ist.CAS.details;
+ vassert(isIRAtom(cas->addr));
+ vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
+ vassert(isIRAtom(cas->expdLo));
+ vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
+ vassert(isIRAtom(cas->dataLo));
+ break;
+ }
+ case Ist_LLSC:
+ vassert(isIRAtom(st->Ist.LLSC.addr));
+ if (st->Ist.LLSC.storedata)
+ vassert(isIRAtom(st->Ist.LLSC.storedata));
+ break;
+ case Ist_Dirty: {
+ IRDirty* d = st->Ist.Dirty.details;
+ if (d->mFx != Ifx_None) {
+ vassert(isIRAtom(d->mAddr));
+ }
+ vassert(isIRAtom(d->guard));
+ for (Int j = 0; d->args[j]; j++) {
+ IRExpr* arg = d->args[j];
+ if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
+ vassert(isIRAtom(arg));
+ }
+ }
+ break;
+ }
+ case Ist_IMark:
+ case Ist_NoOp:
+ case Ist_MBE:
+ break;
+ case Ist_Exit:
+ vassert(isIRAtom(st->Ist.Exit.guard));
+ break;
+ default:
+ vex_printf("\n"); ppIRStmt(st);
+ vpanic("do_XOR_TRANSFORMS_IRSB");
+ }
+ }
+
+ vassert(isIRAtom(sb->next));
+ return changed;
+}
+
+
+static IRSB* do_MSVC_HACKS ( IRSB* sb )
+{
+ // Normalise as much as we can. This is the one-and-only place
+ // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
+ Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
+ if (any_cse_changes) {
+ // CSEing might have created dead code. Remove it.
+ sb = cprop_BB ( sb );
+ do_deadcode_BB(sb);
+ }
+
+ // Visit all atoms, do the transformation proper. bb is modified
+ // in-place.
+ Bool changed = do_XOR_TRANSFORM_IRSB(sb);
+
+ if (changed) {
+ // The transformation generates non-flat expressions, so we now
+ // need to re-flatten the block.
+ sb = flatten_BB(sb);
+ }
+
+ return sb;
+}
+
+
/*---------------------------------------------------------------*/
/*--- iropt main ---*/
/*---------------------------------------------------------------*/
preciseMemExnsFn, pxControl );
}
+ ///////////////////////////////////////////////////////////
+ // BEGIN MSVC optimised code transformation hacks
+ if (0)
+ bb = do_MSVC_HACKS(bb);
+ // END MSVC optimised code transformation hacks
+ ///////////////////////////////////////////////////////////
+
/* Now have a go at unrolling simple (single-BB) loops. If
successful, clean up the results as much as possible. */