From: Julian Seward Date: Fri, 27 Feb 2015 13:33:56 +0000 (+0000) Subject: Add machinery to try and transform A ^ ((A ^ B) & M) X-Git-Tag: svn/VALGRIND_3_11_0^2~85 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a12b8636da77f2b1ed2be90e234c0013db9baddc;p=thirdparty%2Fvalgrind.git Add machinery to try and transform A ^ ((A ^ B) & M) into (A ^ ~M) | (B & M). The former is MSVC's optimised idiom for bitfield assignment, the latter is GCC's idiom. The former causes Memcheck problems because it doesn't understand that (in this complex case) XORing an undefined value with itself produces a defined result. Believed to be working but currently disabled. To re-enable, change if (0) to if (1) at line 6651. Fixes, to some extent, and when enabled, bug 344382. git-svn-id: svn://svn.valgrind.org/vex/trunk@3097 --- diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c index 5cc0bac884..7207b884be 100644 --- a/VEX/priv/ir_opt.c +++ b/VEX/priv/ir_opt.c @@ -1357,6 +1357,15 @@ static IRExpr* chase ( IRExpr** env, IRExpr* e ) return e; } +/* Similar to |chase|, but follows at most one level of tmp reference. */ +static IRExpr* chase1 ( IRExpr** env, IRExpr* e ) +{ + if (e == NULL || e->tag != Iex_RdTmp) + return e; + else + return env[(Int)e->Iex.RdTmp.tmp]; +} + static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) { Int shift; @@ -5936,6 +5945,433 @@ static Interval stmt_modifies_guest_state ( } +/*---------------------------------------------------------------*/ +/*--- MSVC specific transformation hacks ---*/ +/*---------------------------------------------------------------*/ + +/* The purpose of all this is to find MSVC's idiom for non-constant + bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into + gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has + generates a lot of false positives from the MSVC version because it + doesn't understand that XORing an undefined bit with itself gives a + defined result. + + This isn't a problem for the simple case "x ^ x", because iropt + folds it to a constant zero before Memcheck ever sees it. But in + this case we have an intervening "& c" which defeats the simple + case. So we have to carefully inspect all expressions rooted at an + XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the + 7 other variants resulting from swapping the order of arguments to + the three binary operations. If we get a match, we then replace + the tree with "(a & ~c) | (b & c)", and Memcheck is happy. + + The key difficulty is to spot the two uses of "a". To normalise + the IR to maximise the chances of success, we first do a CSE pass, + with CSEing of loads enabled, since the two "a" expressions may be + loads, which need to be commoned up. Then we do a constant folding + pass, so as to remove any tmp-to-tmp assignment chains that would + make matching the original expression more difficult. +*/ + + +/* Helper function for debug printing */ +__attribute__((unused)) +static void print_flat_expr ( IRExpr** env, IRExpr* e ) +{ + if (e == NULL) { + vex_printf("?"); + return; + } + switch (e->tag) { + case Iex_Binop: { + ppIROp(e->Iex.Binop.op); + vex_printf("("); + print_flat_expr(env, e->Iex.Binop.arg1); + vex_printf(","); + print_flat_expr(env, e->Iex.Binop.arg2); + vex_printf(")"); + break; + } + case Iex_Unop: { + ppIROp(e->Iex.Unop.op); + vex_printf("("); + print_flat_expr(env, e->Iex.Unop.arg); + vex_printf(")"); + break; + } + case Iex_RdTmp: + ppIRTemp(e->Iex.RdTmp.tmp); + vex_printf("="); + print_flat_expr(env, chase(env, e)); + break; + case Iex_Const: + case Iex_CCall: + case Iex_Load: + case Iex_ITE: + case Iex_Get: + ppIRExpr(e); + break; + default: + vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n"); + vassert(0); + } +} + +/* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms) + or any of the other 7 variants generated by switching the order + of arguments to the outer ^, the inner ^ and the &. +*/ +static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb, + /*OUT*/IRExpr** cc, + IRExpr** env, IRExpr* e, + IROp opAND, IROp opXOR) +{ +# define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \ + && (_e)->Iex.Binop.op == (_op)) +# define ISATOM(_e) isIRAtom(_e) +# define STEP(_e) chase1(env, (_e)) +# define LL(_e) ((_e)->Iex.Binop.arg1) +# define RR(_e) ((_e)->Iex.Binop.arg2) + + IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR; + + /* This is common to all 8 cases */ + if (!ISBIN(e, opXOR)) goto fail; + + /* -----and------ */ + /* --xor--- */ + /* find variant 1: a1 ^ ((a2 ^ b) & c) */ + /* find variant 2: a1 ^ ((b ^ a2) & c) */ + a1 = and = xor = c = a2bL = a2bR = NULL; + + a1 = LL(e); + and = STEP(RR(e)); + if (!ISBIN(and, opAND)) goto v34; + xor = STEP(LL(and)); + c = RR(and); + if (!ISBIN(xor, opXOR)) goto v34; + a2bL = LL(xor); + a2bR = RR(xor); + + if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { + *aa = a1; + *bb = a2bR; + *cc = c; + return 1; + } + if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { + *aa = a1; + *bb = a2bL; + *cc = c; + return 2; + } + + v34: + /* -----and------ */ + /* --xor--- */ + /* find variant 3: ((a2 ^ b) & c) ^ a1 */ + /* find variant 4: ((b ^ a2) & c) ^ a1 */ + a1 = and = xor = c = a2bL = a2bR = NULL; + + a1 = RR(e); + and = STEP(LL(e)); + if (!ISBIN(and, opAND)) goto v56; + xor = STEP(LL(and)); + c = RR(and); + if (!ISBIN(xor, opXOR)) goto v56; + a2bL = LL(xor); + a2bR = RR(xor); + + if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { + *aa = a1; + *bb = a2bR; + *cc = c; + return 3; + } + if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { + *aa = a1; + *bb = a2bL; + *cc = c; + return 4; + } + + v56: + /* -----and------ */ + /* --xor--- */ + /* find variant 5: a1 ^ (c & (a2 ^ b)) */ + /* find variant 6: a1 ^ (c & (b ^ a2)) */ + a1 = and = xor = c = a2bL = a2bR = NULL; + + a1 = LL(e); + and = STEP(RR(e)); + if (!ISBIN(and, opAND)) goto v78; + xor = STEP(RR(and)); + c = LL(and); + if (!ISBIN(xor, opXOR)) goto v78; + a2bL = LL(xor); + a2bR = RR(xor); + + if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { + *aa = a1; + *bb = a2bR; + *cc = c; + vassert(5-5); // ATC + return 5; + } + if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { + *aa = a1; + *bb = a2bL; + *cc = c; + vassert(6-6); // ATC + return 6; + } + + v78: + /* -----and------ */ + /* --xor--- */ + /* find variant 7: (c & (a2 ^ b)) ^ a1 */ + /* find variant 8: (c & (b ^ a2)) ^ a1 */ + a1 = and = xor = c = a2bL = a2bR = NULL; + + a1 = RR(e); + and = STEP(LL(e)); + if (!ISBIN(and, opAND)) goto fail; + xor = STEP(RR(and)); + c = LL(and); + if (!ISBIN(xor, opXOR)) goto fail; + a2bL = LL(xor); + a2bR = RR(xor); + + if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) { + *aa = a1; + *bb = a2bR; + *cc = c; + return 7; + } + if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) { + *aa = a1; + *bb = a2bL; + *cc = c; + return 8; + } + + fail: + return 0; + +# undef ISBIN +# undef ISATOM +# undef STEP +# undef LL +# undef RR +} + +/* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other + variants thereof generated by switching arguments around), return + the IRExpr* for (a & ~c) | (b & c). Else return NULL. */ +static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e ) +{ + if (e->tag != Iex_Binop) + return NULL; + + const HChar* tyNm = NULL; + IROp opOR = Iop_INVALID; + IROp opAND = Iop_INVALID; + IROp opNOT = Iop_INVALID; + IROp opXOR = Iop_INVALID; + switch (e->Iex.Binop.op) { + case Iop_Xor32: + tyNm = "I32"; + opOR = Iop_Or32; opAND = Iop_And32; + opNOT = Iop_Not32; opXOR = Iop_Xor32; + break; + case Iop_Xor16: + tyNm = "I16"; + opOR = Iop_Or16; opAND = Iop_And16; + opNOT = Iop_Not16; opXOR = Iop_Xor16; + break; + case Iop_Xor8: + tyNm = "I8"; + opOR = Iop_Or8; opAND = Iop_And8; + opNOT = Iop_Not8; opXOR = Iop_Xor8; + break; + default: + return NULL; + } + + IRExpr* aa = NULL; + IRExpr* bb = NULL; + IRExpr* cc = NULL; + UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR); + if (variant > 0) { + static UInt ctr = 0; + if (0) + vex_printf("XXXXXXXXXX Bitfield Assignment number %u, " + "type %s, variant %u\n", + ++ctr, tyNm, variant); + /* It's vitally important that the returned aa, bb and cc are + atoms -- either constants or tmps. If it's anything else + (eg, a GET) then incorporating them in a tree at this point + in the SB may erroneously pull them forwards (eg of a PUT + that originally was after the GET) and so transform the IR + wrongly. spotBitfieldAssignment should guarantee only to + give us atoms, but we check here anyway. */ + vassert(aa && isIRAtom(aa)); + vassert(bb && isIRAtom(bb)); + vassert(cc && isIRAtom(cc)); + return IRExpr_Binop( + opOR, + IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)), + IRExpr_Binop(opAND, bb, cc) + ); + } + return NULL; +} + + +/* SB is modified in-place. Visit all the IRExprs and, for those + which are allowed to be non-atomic, perform the XOR transform if + possible. This makes |sb| be non-flat, but that's ok, the caller + can re-flatten it. Returns True iff any changes were made. */ +static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb ) +{ + Int i; + Bool changed = False; + + /* Make the tmp->expr environment, so we can use it for + chasing expressions. */ + Int n_tmps = sb->tyenv->types_used; + IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*)); + for (i = 0; i < n_tmps; i++) + env[i] = NULL; + + for (i = 0; i < sb->stmts_used; i++) { + IRStmt* st = sb->stmts[i]; + if (st->tag != Ist_WrTmp) + continue; + IRTemp t = st->Ist.WrTmp.tmp; + vassert(t >= 0 && t < n_tmps); + env[t] = st->Ist.WrTmp.data; + } + + for (i = 0; i < sb->stmts_used; i++) { + IRStmt* st = sb->stmts[i]; + + switch (st->tag) { + case Ist_AbiHint: + vassert(isIRAtom(st->Ist.AbiHint.base)); + vassert(isIRAtom(st->Ist.AbiHint.nia)); + break; + case Ist_Put: + vassert(isIRAtom(st->Ist.Put.data)); + break; + case Ist_PutI: { + IRPutI* puti = st->Ist.PutI.details; + vassert(isIRAtom(puti->ix)); + vassert(isIRAtom(puti->data)); + break; + } + case Ist_WrTmp: { + /* This is the one place where an expr (st->Ist.WrTmp.data) is + allowed to be more than just a constant or a tmp. */ + IRExpr* mb_new_data + = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data); + if (mb_new_data) { + //ppIRSB(sb); + st->Ist.WrTmp.data = mb_new_data; + //ppIRSB(sb); + changed = True; + } + break; + } + case Ist_Store: + vassert(isIRAtom(st->Ist.Store.addr)); + vassert(isIRAtom(st->Ist.Store.data)); + break; + case Ist_StoreG: { + IRStoreG* sg = st->Ist.StoreG.details; + vassert(isIRAtom(sg->addr)); + vassert(isIRAtom(sg->data)); + vassert(isIRAtom(sg->guard)); + break; + } + case Ist_LoadG: { + IRLoadG* lg = st->Ist.LoadG.details; + vassert(isIRAtom(lg->addr)); + vassert(isIRAtom(lg->alt)); + vassert(isIRAtom(lg->guard)); + break; + } + case Ist_CAS: { + IRCAS* cas = st->Ist.CAS.details; + vassert(isIRAtom(cas->addr)); + vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi)); + vassert(isIRAtom(cas->expdLo)); + vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi)); + vassert(isIRAtom(cas->dataLo)); + break; + } + case Ist_LLSC: + vassert(isIRAtom(st->Ist.LLSC.addr)); + if (st->Ist.LLSC.storedata) + vassert(isIRAtom(st->Ist.LLSC.storedata)); + break; + case Ist_Dirty: { + IRDirty* d = st->Ist.Dirty.details; + if (d->mFx != Ifx_None) { + vassert(isIRAtom(d->mAddr)); + } + vassert(isIRAtom(d->guard)); + for (Int j = 0; d->args[j]; j++) { + IRExpr* arg = d->args[j]; + if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) { + vassert(isIRAtom(arg)); + } + } + break; + } + case Ist_IMark: + case Ist_NoOp: + case Ist_MBE: + break; + case Ist_Exit: + vassert(isIRAtom(st->Ist.Exit.guard)); + break; + default: + vex_printf("\n"); ppIRStmt(st); + vpanic("do_XOR_TRANSFORMS_IRSB"); + } + } + + vassert(isIRAtom(sb->next)); + return changed; +} + + +static IRSB* do_MSVC_HACKS ( IRSB* sb ) +{ + // Normalise as much as we can. This is the one-and-only place + // where we call do_cse_BB with allowLoadsToBeCSEd set to True. + Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ ); + if (any_cse_changes) { + // CSEing might have created dead code. Remove it. + sb = cprop_BB ( sb ); + do_deadcode_BB(sb); + } + + // Visit all atoms, do the transformation proper. bb is modified + // in-place. + Bool changed = do_XOR_TRANSFORM_IRSB(sb); + + if (changed) { + // The transformation generates non-flat expressions, so we now + // need to re-flatten the block. + sb = flatten_BB(sb); + } + + return sb; +} + + /*---------------------------------------------------------------*/ /*--- iropt main ---*/ /*---------------------------------------------------------------*/ @@ -6210,6 +6646,13 @@ IRSB* do_iropt_BB( preciseMemExnsFn, pxControl ); } + /////////////////////////////////////////////////////////// + // BEGIN MSVC optimised code transformation hacks + if (0) + bb = do_MSVC_HACKS(bb); + // END MSVC optimised code transformation hacks + /////////////////////////////////////////////////////////// + /* Now have a go at unrolling simple (single-BB) loops. If successful, clean up the results as much as possible. */