From: Julian Seward Date: Sat, 25 Aug 2007 23:07:44 +0000 (+0000) Subject: Merge from CGTUNE branch: X-Git-Tag: svn/VALGRIND_3_3_1^2~33 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d339da9abf21ef642f9e025bece705e87b990644;p=thirdparty%2Fvalgrind.git Merge from CGTUNE branch: r1769: This commit provides a bunch of enhancements to the IR optimiser (iropt) and to the various backend instruction selectors. Unfortunately the changes are interrelated and cannot easily be committed in pieces in any meaningful way. Between them and the already-committed register allocation enhancements (r1765, r1767) performance of Memcheck is improved by 0%-10%. Improvements are also applicable to other tools to lesser extents. Main changes are: * Add new IR primops Iop_Left64/32/16/8 and Iop_CmpwNEZ64/32/16/8 which Memcheck uses to express some primitive operations on definedness (V) bits: Left(x) = set all bits to the left of the rightmost 1 bit to 1 CmpwNEZ(x) = if x == 0 then 0 else 0xFF...FF Left and CmpwNEZ are detailed in the Usenix 2005 paper (in which CmpwNEZ is called PCast). The new primops expose opportunities for IR optimisation at tree-build time. Prior to this change Memcheck expressed Left and CmpwNEZ in terms of lower level primitives (logical or, negation, compares, various casts) which was simpler but hindered further optimisation. * Enhance the IR optimiser's tree builder so it can rewrite trees as they are constructed, according to useful identities, for example: CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) which gets rid of a CmpwNEZ64 operation - a win as they are relatively expensive. See functions fold_IRExpr_Binop and fold_IRExpr_Unop. Allowing the tree builder to rewrite trees also makes it possible to have a single implementation of certain transformation rules which were previously duplicated in the x86, amd64 and ppc instruction selectors. For example 32to1(1Uto32(x)) --> x This simplifies the instruction selectors and gives a central place to put such IR-level transformations, which is a Good Thing. * Various minor refinements to the instruction selectors: - ppc64 generates 32Sto64 into 1 instruction instead of 2 - x86 can now generate movsbl - x86 handles 64-bit integer Mux0X better for cases typically arising from Memchecking of FP code - misc other patterns handled better Overall these changes are a straight win - vex generates less code, and does so a bit faster since its register allocator has to chew through fewer instructions. The main risk is that of correctness: making Left and CmpwNEZ explicit, and adding rewrite rules for them, is a substantial change in the way Memcheck deals with undefined value tracking, and I am concerned to ensure that the changes do not cause false negatives. I _think_ it's all correct so far. r1770: Get rid of Iop_Neg64/32/16/8 as they are no longer used by Memcheck, and any uses as generated by the front ends are so infrequent that generating the equivalent Sub(0, ..) is good enough. This gets rid of quite a few lines of code. Add isel cases for Sub(0, ..) patterns so that the x86/amd64 backends still generate negl/negq where possible. r1771: Handle Left64. Fixes failure on none/tests/x86/insn_sse2. git-svn-id: svn://svn.valgrind.org/vex/trunk@1780 --- diff --git a/VEX/priv/guest-ppc/toIR.c b/VEX/priv/guest-ppc/toIR.c index 1098c86468..78f81a1672 100644 --- a/VEX/priv/guest-ppc/toIR.c +++ b/VEX/priv/guest-ppc/toIR.c @@ -783,7 +783,7 @@ static IROp mkSzOp ( IRType ty, IROp op8 ) op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 || - op8 == Iop_Not8 || op8 == Iop_Neg8 ); + op8 == Iop_Not8 ); adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : (ty==Ity_I32 ? 2 : 3)); return adj + op8; } diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c index 8f1deaa4a1..0a5c5de014 100644 --- a/VEX/priv/guest-x86/toIR.c +++ b/VEX/priv/guest-x86/toIR.c @@ -684,7 +684,7 @@ static IROp mkSizedOp ( IRType ty, IROp op8 ) || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 - || op8 == Iop_Not8 || op8 == Iop_Neg8); + || op8 == Iop_Not8); adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); return adj + op8; } @@ -2631,7 +2631,7 @@ UInt dis_Grp3 ( UChar sorb, Int sz, Int delta, Bool* decode_OK ) dst1 = newTemp(ty); assign(dst0, mkU(ty,0)); assign(src, getIReg(sz,eregOfRM(modrm))); - assign(dst1, unop(mkSizedOp(ty,Iop_Neg8), mkexpr(src))); + assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src))); setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); @@ -2693,7 +2693,7 @@ UInt dis_Grp3 ( UChar sorb, Int sz, Int delta, Bool* decode_OK ) dst1 = newTemp(ty); assign(dst0, mkU(ty,0)); assign(src, mkexpr(t1)); - assign(dst1, unop(mkSizedOp(ty,Iop_Neg8), mkexpr(src))); + assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src))); setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); storeLE( mkexpr(addr), mkexpr(dst1) ); DIP("neg%c %s\n", nameISize(sz), dis_buf); diff --git a/VEX/priv/host-amd64/isel.c b/VEX/priv/host-amd64/isel.c index 377c74903b..6f25c9fb9c 100644 --- a/VEX/priv/host-amd64/isel.c +++ b/VEX/priv/host-amd64/isel.c @@ -278,14 +278,21 @@ static Bool fitsIn32Bits ( ULong x ) return toBool(x == y1); } -//.. /* Is this a 32-bit zero expression? */ -//.. -//.. static Bool isZero32 ( IRExpr* e ) -//.. { -//.. return e->tag == Iex_Const -//.. && e->Iex.Const.con->tag == Ico_U32 -//.. && e->Iex.Const.con->Ico.U32 == 0; -//.. } +/* Is this a 64-bit zero expression? */ + +static Bool isZeroU64 ( IRExpr* e ) +{ + return e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U64 + && e->Iex.Const.con->Ico.U64 == 0ULL; +} + +static Bool isZeroU32 ( IRExpr* e ) +{ + return e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U32 + && e->Iex.Const.con->Ico.U32 == 0; +} /* Make a int reg-reg move. */ @@ -841,16 +848,17 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) AMD64AluOp aluOp; AMD64ShiftOp shOp; -//.. -//.. /* Pattern: Sub32(0,x) */ -//.. if (e->Iex.Binop.op == Iop_Sub32 && isZero32(e->Iex.Binop.arg1)) { -//.. HReg dst = newVRegI(env); -//.. HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); -//.. addInstr(env, mk_iMOVsd_RR(reg,dst)); -//.. addInstr(env, X86Instr_Unary32(Xun_NEG,X86RM_Reg(dst))); -//.. return dst; -//.. } -//.. + /* Pattern: Sub64(0,x) */ + /* and: Sub32(0,x) */ + if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1)) + || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) { + HReg dst = newVRegI(env); + HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); + addInstr(env, mk_iMOVsd_RR(reg,dst)); + addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); + return dst; + } + /* Is it an addition or logical style op? */ switch (e->Iex.Binop.op) { case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64: @@ -1449,14 +1457,41 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) AMD64RMI_Reg(tmp), dst)); return dst; } - case Iop_Neg8: - case Iop_Neg16: - case Iop_Neg32: - case Iop_Neg64: { + + case Iop_CmpwNEZ64: { HReg dst = newVRegI(env); - HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); - addInstr(env, mk_iMOVsd_RR(reg,dst)); + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, mk_iMOVsd_RR(src,dst)); addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); + addInstr(env, AMD64Instr_Alu64R(Aalu_OR, + AMD64RMI_Reg(src), dst)); + addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); + return dst; + } + + case Iop_CmpwNEZ32: { + HReg src = newVRegI(env); + HReg dst = newVRegI(env); + HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, mk_iMOVsd_RR(pre,src)); + addInstr(env, AMD64Instr_MovZLQ(src,src)); + addInstr(env, mk_iMOVsd_RR(src,dst)); + addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst)); + addInstr(env, AMD64Instr_Alu64R(Aalu_OR, + AMD64RMI_Reg(src), dst)); + addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst)); + return dst; + } + + case Iop_Left8: + case Iop_Left16: + case Iop_Left32: + case Iop_Left64: { + HReg dst = newVRegI(env); + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, mk_iMOVsd_RR(src, dst)); + addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst)); + addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst)); return dst; } @@ -1965,10 +2000,6 @@ static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) { MatchInfo mi; -//.. DECLARE_PATTERN(p_1Uto32_then_32to1); -//.. DECLARE_PATTERN(p_1Sto32_then_32to1); - - DECLARE_PATTERN(p_1Uto64_then_64to1); vassert(e); vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); @@ -2002,30 +2033,6 @@ static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) /* --- patterns rooted at: 64to1 --- */ - /* 64to1(1Uto64(expr1)) ==> expr1 */ - DEFINE_PATTERN( p_1Uto64_then_64to1, - unop(Iop_64to1, unop(Iop_1Uto64, bind(0))) ); - if (matchIRExpr(&mi,p_1Uto64_then_64to1,e)) { - IRExpr* expr1 = mi.bindee[0]; - return iselCondCode(env, expr1); - } - -//.. /* 32to1(1Uto32(expr1)) -- the casts are pointless, ignore them */ -//.. DEFINE_PATTERN(p_1Uto32_then_32to1, -//.. unop(Iop_32to1,unop(Iop_1Uto32,bind(0)))); -//.. if (matchIRExpr(&mi,p_1Uto32_then_32to1,e)) { -//.. IRExpr* expr1 = mi.bindee[0]; -//.. return iselCondCode(env, expr1); -//.. } -//.. -//.. /* 32to1(1Sto32(expr1)) -- the casts are pointless, ignore them */ -//.. DEFINE_PATTERN(p_1Sto32_then_32to1, -//.. unop(Iop_32to1,unop(Iop_1Sto32,bind(0)))); -//.. if (matchIRExpr(&mi,p_1Sto32_then_32to1,e)) { -//.. IRExpr* expr1 = mi.bindee[0]; -//.. return iselCondCode(env, expr1); -//.. } - /* 64to1 */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) { HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); @@ -2168,53 +2175,6 @@ static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) } } -//.. /* CmpNE64(1Sto64(b), 0) ==> b */ -//.. { -//.. DECLARE_PATTERN(p_CmpNE64_1Sto64); -//.. DEFINE_PATTERN( -//.. p_CmpNE64_1Sto64, -//.. binop(Iop_CmpNE64, unop(Iop_1Sto64,bind(0)), mkU64(0))); -//.. if (matchIRExpr(&mi, p_CmpNE64_1Sto64, e)) { -//.. return iselCondCode(env, mi.bindee[0]); -//.. } -//.. } -//.. -//.. /* CmpNE64(x, 0) */ -//.. { -//.. DECLARE_PATTERN(p_CmpNE64_x_zero); -//.. DEFINE_PATTERN( -//.. p_CmpNE64_x_zero, -//.. binop(Iop_CmpNE64, bind(0), mkU64(0)) ); -//.. if (matchIRExpr(&mi, p_CmpNE64_x_zero, e)) { -//.. HReg hi, lo; -//.. IRExpr* x = mi.bindee[0]; -//.. HReg tmp = newVRegI(env); -//.. iselInt64Expr( &hi, &lo, env, x ); -//.. addInstr(env, mk_iMOVsd_RR(hi, tmp)); -//.. addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp)); -//.. return Xcc_NZ; -//.. } -//.. } -//.. -//.. /* CmpNE64 */ -//.. if (e->tag == Iex_Binop -//.. && e->Iex.Binop.op == Iop_CmpNE64) { -//.. HReg hi1, hi2, lo1, lo2; -//.. HReg tHi = newVRegI(env); -//.. HReg tLo = newVRegI(env); -//.. iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 ); -//.. iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 ); -//.. addInstr(env, mk_iMOVsd_RR(hi1, tHi)); -//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi)); -//.. addInstr(env, mk_iMOVsd_RR(lo1, tLo)); -//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo)); -//.. addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo)); -//.. switch (e->Iex.Binop.op) { -//.. case Iop_CmpNE64: return Xcc_NZ; -//.. default: vpanic("iselCondCode(x86): CmpXX64"); -//.. } -//.. } - ppIRExpr(e); vpanic("iselCondCode(amd64)"); } diff --git a/VEX/priv/host-ppc/hdefs.c b/VEX/priv/host-ppc/hdefs.c index cc21c1b484..60752ec7d4 100644 --- a/VEX/priv/host-ppc/hdefs.c +++ b/VEX/priv/host-ppc/hdefs.c @@ -2706,7 +2706,13 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, /* srawi (PPC32 p507) */ UInt n = srcR->Prh.Imm.imm16; vassert(!srcR->Prh.Imm.syned); - vassert(n > 0 && n < 32); + /* In 64-bit mode, we allow right shifts by zero bits + as that is a handy way to sign extend the lower 32 + bits into the upper 32 bits. */ + if (mode64) + vassert(n >= 0 && n < 32); + else + vassert(n > 0 && n < 32); p = mkFormX(p, 31, r_srcL, r_dst, n, 824, 0); } else { /* sraw (PPC32 p506) */ diff --git a/VEX/priv/host-ppc/isel.c b/VEX/priv/host-ppc/isel.c index 6dc8c1068c..c85d219cb8 100644 --- a/VEX/priv/host-ppc/isel.c +++ b/VEX/priv/host-ppc/isel.c @@ -1569,8 +1569,7 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e ) return r_dst; } case Iop_8Sto64: - case Iop_16Sto64: - case Iop_32Sto64: { + case Iop_16Sto64: { HReg r_dst = newVRegI(env); HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg); UShort amt = toUShort(op_unop==Iop_8Sto64 ? 56 : @@ -1584,6 +1583,17 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e ) r_dst, r_dst, PPCRH_Imm(False,amt))); return r_dst; } + case Iop_32Sto64: { + HReg r_dst = newVRegI(env); + HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg); + vassert(mode64); + /* According to the IBM docs, in 64 bit mode, srawi r,r,0 + sign extends the lower 32 bits into the upper 32 bits. */ + addInstr(env, + PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/, + r_dst, r_src, PPCRH_Imm(False,0))); + return r_dst; + } case Iop_Not8: case Iop_Not16: case Iop_Not32: @@ -1695,15 +1705,38 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src)); return r_dst; } - case Iop_Neg8: - case Iop_Neg16: - case Iop_Neg32: - case Iop_Neg64: { + + case Iop_Left8: + case Iop_Left32: + case Iop_Left64: { + HReg r_src, r_dst; + if (op_unop == Iop_Left64 && !mode64) + goto irreducible; + r_dst = newVRegI(env); + r_src = iselWordExpr_R(env, e->Iex.Unop.arg); + addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src)); + addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src))); + return r_dst; + } + + case Iop_CmpwNEZ32: { HReg r_dst = newVRegI(env); HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg); - if (op_unop == Iop_Neg64 && !mode64) - goto irreducible; addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src)); + addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src))); + addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/, + r_dst, r_dst, PPCRH_Imm(False, 31))); + return r_dst; + } + + case Iop_CmpwNEZ64: { + HReg r_dst = newVRegI(env); + HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg); + if (!mode64) goto irreducible; + addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src)); + addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src))); + addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/, + r_dst, r_dst, PPCRH_Imm(False, 63))); return r_dst; } @@ -1761,8 +1794,6 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e ) case Iop_32to16: case Iop_64to8: /* These are no-ops. */ - if (op_unop == Iop_Neg64 && !mode64) - goto irreducible; return iselWordExpr_R(env, e->Iex.Unop.arg); /* ReinterpF64asI64(e) */ @@ -2685,6 +2716,24 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, if (e->tag == Iex_Unop) { switch (e->Iex.Unop.op) { + /* CmpwNEZ64(e) */ + case Iop_CmpwNEZ64: { + HReg argHi, argLo; + HReg tmp1 = newVRegI(env); + HReg tmp2 = newVRegI(env); + iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg); + /* tmp1 = argHi | argLo */ + addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo))); + /* tmp2 = (tmp1 | -tmp1) >>s 31 */ + addInstr(env, PPCInstr_Unary(Pun_NEG,tmp2,tmp1)); + addInstr(env, PPCInstr_Alu(Palu_OR, tmp2, tmp2, PPCRH_Reg(tmp1))); + addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/, + tmp2, tmp2, PPCRH_Imm(False, 31))); + *rHi = tmp2; + *rLo = tmp2; /* yes, really tmp2 */ + return; + } + /* 32Sto64(e) */ case Iop_32Sto64: { HReg tHi = newVRegI(env); @@ -2754,22 +2803,6 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, *rLo = tLo; return; } - - case Iop_Neg64: { - HReg yLo, yHi; - HReg zero = newVRegI(env); - HReg tLo = newVRegI(env); - HReg tHi = newVRegI(env); - iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); - addInstr(env, PPCInstr_LI(zero, 0, False/*mode32*/)); - addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/, - tLo, zero, yLo)); - addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/, - tHi, zero, yHi)); - *rHi = tHi; - *rLo = tLo; - return; - } /* ReinterpF64asI64(e) */ /* Given an IEEE754 double, produce an I64 with the same bit diff --git a/VEX/priv/host-x86/hdefs.c b/VEX/priv/host-x86/hdefs.c index 0cd32bb32a..ec3b6566e4 100644 --- a/VEX/priv/host-x86/hdefs.c +++ b/VEX/priv/host-x86/hdefs.c @@ -1612,7 +1612,7 @@ X86Instr* genReload_X86 ( HReg rreg, Int offsetB, Bool mode64 ) /* The given instruction reads the specified vreg exactly once, and that vreg is currently located at the given spill offset. If - possible, return a variant of the instruction which instead + possible, return a variant of the instruction to one which instead references the spill slot directly. */ X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) @@ -2407,6 +2407,13 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); goto done; } + if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) { + /* movsbl */ + *p++ = 0x0F; + *p++ = 0xBE; + p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); + goto done; + } break; case Xin_Set32: diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c index 4a6522f951..7edefeca0a 100644 --- a/VEX/priv/host-x86/isel.c +++ b/VEX/priv/host-x86/isel.c @@ -120,6 +120,20 @@ static Bool isZeroU8 ( IRExpr* e ) && e->Iex.Const.con->Ico.U8 == 0; } +static Bool isZeroU32 ( IRExpr* e ) +{ + return e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U32 + && e->Iex.Const.con->Ico.U32 == 0; +} + +static Bool isZeroU64 ( IRExpr* e ) +{ + return e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U64 + && e->Iex.Const.con->Ico.U64 == 0ULL; +} + /*---------------------------------------------------------*/ /*--- ISelEnv ---*/ @@ -730,7 +744,6 @@ static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) { MatchInfo mi; - DECLARE_PATTERN(p_32to1_then_1Uto8); IRType ty = typeOfIRExpr(env->type_env,e); vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); @@ -799,6 +812,15 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) X86AluOp aluOp; X86ShiftOp shOp; + /* Pattern: Sub32(0,x) */ + if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) { + HReg dst = newVRegI(env); + HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2); + addInstr(env, mk_iMOVsd_RR(reg,dst)); + addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); + return dst; + } + /* Is it an addition or logical style op? */ switch (e->Iex.Binop.op) { case Iop_Add8: case Iop_Add16: case Iop_Add32: @@ -1011,21 +1033,53 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) /* --------- UNARY OP --------- */ case Iex_Unop: { + /* 1Uto8(32to1(expr32)) */ - DEFINE_PATTERN(p_32to1_then_1Uto8, - unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); - if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { - IRExpr* expr32 = mi.bindee[0]; - HReg dst = newVRegI(env); - HReg src = iselIntExpr_R(env, expr32); - addInstr(env, mk_iMOVsd_RR(src,dst) ); - addInstr(env, X86Instr_Alu32R(Xalu_AND, - X86RMI_Imm(1), dst)); - return dst; + if (e->Iex.Unop.op == Iop_1Uto8) { + DECLARE_PATTERN(p_32to1_then_1Uto8); + DEFINE_PATTERN(p_32to1_then_1Uto8, + unop(Iop_1Uto8,unop(Iop_32to1,bind(0)))); + if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) { + IRExpr* expr32 = mi.bindee[0]; + HReg dst = newVRegI(env); + HReg src = iselIntExpr_R(env, expr32); + addInstr(env, mk_iMOVsd_RR(src,dst) ); + addInstr(env, X86Instr_Alu32R(Xalu_AND, + X86RMI_Imm(1), dst)); + return dst; + } + } + + /* 8Uto32(LDle(expr32)) */ + if (e->Iex.Unop.op == Iop_8Uto32) { + DECLARE_PATTERN(p_LDle8_then_8Uto32); + DEFINE_PATTERN(p_LDle8_then_8Uto32, + unop(Iop_8Uto32, + IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); + if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) { + HReg dst = newVRegI(env); + X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); + addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); + return dst; + } + } + + /* 8Sto32(LDle(expr32)) */ + if (e->Iex.Unop.op == Iop_8Sto32) { + DECLARE_PATTERN(p_LDle8_then_8Sto32); + DEFINE_PATTERN(p_LDle8_then_8Sto32, + unop(Iop_8Sto32, + IRExpr_Load(Iend_LE,Ity_I8,bind(0))) ); + if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) { + HReg dst = newVRegI(env); + X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] ); + addInstr(env, X86Instr_LoadEX(1,True,amode,dst)); + return dst; + } } /* 16Uto32(LDle(expr32)) */ - { + if (e->Iex.Unop.op == Iop_16Uto32) { DECLARE_PATTERN(p_LDle16_then_16Uto32); DEFINE_PATTERN(p_LDle16_then_16Uto32, unop(Iop_16Uto32, @@ -1038,6 +1092,34 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) } } + /* 8Uto32(GET:I8) */ + if (e->Iex.Unop.op == Iop_8Uto32) { + if (e->Iex.Unop.arg->tag == Iex_Get) { + HReg dst; + X86AMode* amode; + vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8); + dst = newVRegI(env); + amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, + hregX86_EBP()); + addInstr(env, X86Instr_LoadEX(1,False,amode,dst)); + return dst; + } + } + + /* 16to32(GET:I16) */ + if (e->Iex.Unop.op == Iop_16Uto32) { + if (e->Iex.Unop.arg->tag == Iex_Get) { + HReg dst; + X86AMode* amode; + vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16); + dst = newVRegI(env); + amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, + hregX86_EBP()); + addInstr(env, X86Instr_LoadEX(2,False,amode,dst)); + return dst; + } + } + switch (e->Iex.Unop.op) { case Iop_8Uto16: case Iop_8Uto32: @@ -1128,13 +1210,25 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) X86RMI_Reg(tmp), dst)); return dst; } - case Iop_Neg8: - case Iop_Neg16: - case Iop_Neg32: { + + case Iop_CmpwNEZ32: { HReg dst = newVRegI(env); - HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg); - addInstr(env, mk_iMOVsd_RR(reg,dst)); + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, mk_iMOVsd_RR(src,dst)); addInstr(env, X86Instr_Unary32(Xun_NEG,dst)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, + X86RMI_Reg(src), dst)); + addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst)); + return dst; + } + case Iop_Left8: + case Iop_Left16: + case Iop_Left32: { + HReg dst = newVRegI(env); + HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); + addInstr(env, mk_iMOVsd_RR(src, dst)); + addInstr(env, X86Instr_Unary32(Xun_NEG, dst)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst)); return dst; } @@ -1547,9 +1641,6 @@ static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) { MatchInfo mi; - DECLARE_PATTERN(p_32to1); - DECLARE_PATTERN(p_1Uto32_then_32to1); - DECLARE_PATTERN(p_1Sto32_then_32to1); vassert(e); vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); @@ -1582,28 +1673,9 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) /* --- patterns rooted at: 32to1 --- */ - /* 32to1(1Uto32(e)) ==> e */ - DEFINE_PATTERN(p_1Uto32_then_32to1, - unop(Iop_32to1,unop(Iop_1Uto32,bind(0)))); - if (matchIRExpr(&mi,p_1Uto32_then_32to1,e)) { - IRExpr* expr1 = mi.bindee[0]; - return iselCondCode(env, expr1); - } - - /* 32to1(1Sto32(e)) ==> e */ - DEFINE_PATTERN(p_1Sto32_then_32to1, - unop(Iop_32to1,unop(Iop_1Sto32,bind(0)))); - if (matchIRExpr(&mi,p_1Sto32_then_32to1,e)) { - IRExpr* expr1 = mi.bindee[0]; - return iselCondCode(env, expr1); - } - - /* 32to1(expr32) */ - DEFINE_PATTERN(p_32to1, - unop(Iop_32to1,bind(0)) - ); - if (matchIRExpr(&mi,p_32to1,e)) { - X86RM* rm = iselIntExpr_RM(env, mi.bindee[0]); + if (e->tag == Iex_Unop + && e->Iex.Unop.op == Iop_32to1) { + X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg); addInstr(env, X86Instr_Test32(1,rm)); return Xcc_NZ; } @@ -1630,16 +1702,6 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) /* --- patterns rooted at: CmpNEZ32 --- */ - /* CmpNEZ32(1Sto32(b)) ==> b */ - { - DECLARE_PATTERN(p_CmpNEZ32_1Sto32); - DEFINE_PATTERN(p_CmpNEZ32_1Sto32, - unop(Iop_CmpNEZ32, unop(Iop_1Sto32,bind(0)))); - if (matchIRExpr(&mi, p_CmpNEZ32_1Sto32, e)) { - return iselCondCode(env, mi.bindee[0]); - } - } - /* CmpNEZ32(And32(x,y)) */ { DECLARE_PATTERN(p_CmpNEZ32_And32); @@ -1670,6 +1732,16 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) } } + /* CmpNEZ32(GET(..):I32) */ + if (e->tag == Iex_Unop + && e->Iex.Unop.op == Iop_CmpNEZ32 + && e->Iex.Unop.arg->tag == Iex_Get) { + X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset, + hregX86_EBP()); + addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am)); + return Xcc_NZ; + } + /* CmpNEZ32(x) */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_CmpNEZ32) { @@ -1681,17 +1753,6 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) /* --- patterns rooted at: CmpNEZ64 --- */ - /* CmpNEZ64(1Sto64(b)) ==> b */ - { - DECLARE_PATTERN(p_CmpNEZ64_1Sto64); - DEFINE_PATTERN( - p_CmpNEZ64_1Sto64, - unop(Iop_CmpNEZ64, unop(Iop_1Sto64,bind(0)))); - if (matchIRExpr(&mi, p_CmpNEZ64_1Sto64, e)) { - return iselCondCode(env, mi.bindee[0]); - } - } - /* CmpNEZ64(Or64(x,y)) */ { DECLARE_PATTERN(p_CmpNEZ64_Or64); @@ -1839,6 +1900,7 @@ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) /* DO NOT CALL THIS DIRECTLY ! */ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) { + MatchInfo mi; HWord fn = 0; /* helper fn for most SIMD64 stuff */ vassert(e); vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); @@ -1915,18 +1977,59 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) return; } - /* 64-bit Mux0X */ + /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */ + if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) { + X86RM* r8; + HReg e0Lo, e0Hi; + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); + iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); + r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); + addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) ); + addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) ); + addInstr(env, X86Instr_Push(X86RMI_Imm(0))); + addInstr(env, X86Instr_Test32(0xFF, r8)); + addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi)); + addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo)); + add_to_esp(env, 4); + *rHi = tHi; + *rLo = tLo; + return; + } + /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */ + if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) { + X86RM* r8; + HReg e0Lo, e0Hi; + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); + iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX); + r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); + addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) ); + addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) ); + addInstr(env, X86Instr_Push(X86RMI_Imm(0))); + addInstr(env, X86Instr_Test32(0xFF, r8)); + addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi)); + addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo)); + add_to_esp(env, 4); + *rHi = tHi; + *rLo = tLo; + return; + } + + /* 64-bit Mux0X: Mux0X(g, expr, expr) */ if (e->tag == Iex_Mux0X) { - X86RM* rm8; - HReg e0Lo, e0Hi, eXLo, eXHi; - HReg tLo = newVRegI(env); - HReg tHi = newVRegI(env); + X86RM* r8; + HReg e0Lo, e0Hi, eXLo, eXHi; + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0); iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX); addInstr(env, mk_iMOVsd_RR(eXHi, tHi)); addInstr(env, mk_iMOVsd_RR(eXLo, tLo)); - rm8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); - addInstr(env, X86Instr_Test32(0xFF, rm8)); + r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); + addInstr(env, X86Instr_Test32(0xFF, r8)); /* This assumes the first cmov32 doesn't trash the condition codes, so they are still available for the second cmov32 */ addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi)); @@ -1992,10 +2095,10 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) : e->Iex.Binop.op==Iop_And64 ? Xalu_AND : Xalu_XOR; iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); - addInstr(env, mk_iMOVsd_RR(xHi, tHi)); - addInstr(env, mk_iMOVsd_RR(xLo, tLo)); iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); + addInstr(env, mk_iMOVsd_RR(xHi, tHi)); addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi)); + addInstr(env, mk_iMOVsd_RR(xLo, tLo)); addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo)); *rHi = tHi; *rLo = tLo; @@ -2398,8 +2501,8 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) return; } - /* Neg64(e) */ - case Iop_Neg64: { + /* Left64(e) */ + case Iop_Left64: { HReg yLo, yHi; HReg tLo = newVRegI(env); HReg tHi = newVRegI(env); @@ -2411,11 +2514,75 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) /* tHi = 0 - yHi - carry */ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi)); addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi)); + /* So now we have tHi:tLo = -arg. To finish off, or 'arg' + back in, so as to give the final result + tHi:tLo = arg | -arg. */ + addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi)); *rHi = tHi; *rLo = tLo; return; } + /* --- patterns rooted at: CmpwNEZ64 --- */ + + /* CmpwNEZ64(e) */ + case Iop_CmpwNEZ64: { + + DECLARE_PATTERN(p_CmpwNEZ64_Or64); + DEFINE_PATTERN(p_CmpwNEZ64_Or64, + unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1)))); + if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) { + /* CmpwNEZ64(Or64(x,y)) */ + HReg xHi,xLo,yHi,yLo; + HReg xBoth = newVRegI(env); + HReg merged = newVRegI(env); + HReg tmp2 = newVRegI(env); + + iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]); + addInstr(env, mk_iMOVsd_RR(xHi,xBoth)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, + X86RMI_Reg(xLo),xBoth)); + + iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]); + addInstr(env, mk_iMOVsd_RR(yHi,merged)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, + X86RMI_Reg(yLo),merged)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, + X86RMI_Reg(xBoth),merged)); + + /* tmp2 = (merged | -merged) >>s 31 */ + addInstr(env, mk_iMOVsd_RR(merged,tmp2)); + addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, + X86RMI_Reg(merged), tmp2)); + addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); + *rHi = tmp2; + *rLo = tmp2; + return; + } else { + /* CmpwNEZ64(e) */ + HReg srcLo, srcHi; + HReg tmp1 = newVRegI(env); + HReg tmp2 = newVRegI(env); + /* srcHi:srcLo = arg */ + iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); + /* tmp1 = srcHi | srcLo */ + addInstr(env, mk_iMOVsd_RR(srcHi,tmp1)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, + X86RMI_Reg(srcLo), tmp1)); + /* tmp2 = (tmp1 | -tmp1) >>s 31 */ + addInstr(env, mk_iMOVsd_RR(tmp1,tmp2)); + addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2)); + addInstr(env, X86Instr_Alu32R(Xalu_OR, + X86RMI_Reg(tmp1), tmp2)); + addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2)); + *rHi = tmp2; + *rLo = tmp2; + return; + } + } + /* ReinterpF64asI64(e) */ /* Given an IEEE754 double, produce an I64 with the same bit pattern. */ @@ -2829,12 +2996,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) if (e->tag == Iex_Mux0X) { if (ty == Ity_F64 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) { - X86RM* rm8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); - HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX); - HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); - HReg dst = newVRegF(env); + X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); + HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX); + HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0); + HReg dst = newVRegF(env); addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst)); - addInstr(env, X86Instr_Test32(0xFF, rm8)); + addInstr(env, X86Instr_Test32(0xFF, r8)); addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst)); return dst; } @@ -3350,12 +3517,12 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) } /* if (e->tag == Iex_Binop) */ if (e->tag == Iex_Mux0X) { - X86RM* rm8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); - HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); - HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); - HReg dst = newVRegV(env); + X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond); + HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX); + HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0); + HReg dst = newVRegV(env); addInstr(env, mk_vMOVsd_RR(rX,dst)); - addInstr(env, X86Instr_Test32(0xFF, rm8)); + addInstr(env, X86Instr_Test32(0xFF, r8)); addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst)); return dst; } diff --git a/VEX/priv/ir/irdefs.c b/VEX/priv/ir/irdefs.c index 15d3dcc828..303bd11027 100644 --- a/VEX/priv/ir/irdefs.c +++ b/VEX/priv/ir/irdefs.c @@ -203,17 +203,20 @@ void ppIROp ( IROp op ) case Iop_CmpNEZ32: vex_printf("CmpNEZ32"); return; case Iop_CmpNEZ64: vex_printf("CmpNEZ64"); return; + case Iop_CmpwNEZ32: vex_printf("CmpwNEZ32"); return; + case Iop_CmpwNEZ64: vex_printf("CmpwNEZ64"); return; + + case Iop_Left8: vex_printf("Left8"); return; + case Iop_Left16: vex_printf("Left16"); return; + case Iop_Left32: vex_printf("Left32"); return; + case Iop_Left64: vex_printf("Left64"); return; + case Iop_CmpORD32U: vex_printf("CmpORD32U"); return; case Iop_CmpORD32S: vex_printf("CmpORD32S"); return; case Iop_CmpORD64U: vex_printf("CmpORD64U"); return; case Iop_CmpORD64S: vex_printf("CmpORD64S"); return; - case Iop_Neg8: vex_printf("Neg8"); return; - case Iop_Neg16: vex_printf("Neg16"); return; - case Iop_Neg32: vex_printf("Neg32"); return; - case Iop_Neg64: vex_printf("Neg64"); return; - case Iop_DivU32: vex_printf("DivU32"); return; case Iop_DivS32: vex_printf("DivS32"); return; case Iop_DivU64: vex_printf("DivU64"); return; @@ -1517,14 +1520,13 @@ void typeOfPrimop ( IROp op, case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: BINARY(Ity_I64,Ity_I8, Ity_I64); - case Iop_Not8: case Iop_Neg8: + case Iop_Not8: UNARY(Ity_I8, Ity_I8); - case Iop_Not16: case Iop_Neg16: + case Iop_Not16: UNARY(Ity_I16, Ity_I16); - case Iop_Not32: case Iop_Neg32: + case Iop_Not32: UNARY(Ity_I32, Ity_I32); - case Iop_Neg64: case Iop_Not64: case Iop_CmpNEZ32x2: case Iop_CmpNEZ16x4: case Iop_CmpNEZ8x8: UNARY(Ity_I64, Ity_I64); @@ -1547,6 +1549,11 @@ void typeOfPrimop ( IROp op, case Iop_CmpNEZ32: UNARY_COMPARISON(Ity_I32); case Iop_CmpNEZ64: UNARY_COMPARISON(Ity_I64); + case Iop_Left8: UNARY(Ity_I8, Ity_I8); + case Iop_Left16: UNARY(Ity_I16,Ity_I16); + case Iop_CmpwNEZ32: case Iop_Left32: UNARY(Ity_I32,Ity_I32); + case Iop_CmpwNEZ64: case Iop_Left64: UNARY(Ity_I64,Ity_I64); + case Iop_MullU8: case Iop_MullS8: BINARY(Ity_I8,Ity_I8, Ity_I16); case Iop_MullU16: case Iop_MullS16: diff --git a/VEX/priv/ir/iropt.c b/VEX/priv/ir/iropt.c index db7aa6b0bc..4c3b0283fa 100644 --- a/VEX/priv/ir/iropt.c +++ b/VEX/priv/ir/iropt.c @@ -1011,19 +1011,6 @@ static IRExpr* fold_Expr ( IRExpr* e ) notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1))); break; - case Iop_Neg64: - e2 = IRExpr_Const(IRConst_U64( - - (e->Iex.Unop.arg->Iex.Const.con->Ico.U64))); - break; - case Iop_Neg32: - e2 = IRExpr_Const(IRConst_U32( - - (e->Iex.Unop.arg->Iex.Const.con->Ico.U32))); - break; - case Iop_Neg8: - e2 = IRExpr_Const(IRConst_U8(toUChar( - - (e->Iex.Unop.arg->Iex.Const.con->Ico.U8)))); - break; - case Iop_64to8: { ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; w64 &= 0xFFULL; @@ -1072,6 +1059,39 @@ static IRExpr* fold_Expr ( IRExpr* e ) ))); break; + case Iop_CmpwNEZ32: { + UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; + if (w32 == 0) + e2 = IRExpr_Const(IRConst_U32( 0 )); + else + e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF )); + break; + } + case Iop_CmpwNEZ64: { + ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; + if (w64 == 0) + e2 = IRExpr_Const(IRConst_U64( 0 )); + else + e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL )); + break; + } + + case Iop_Left32: { + UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32; + Int s32 = (Int)(u32 & 0xFFFFFFFF); + s32 = (s32 | (-s32)); + e2 = IRExpr_Const( IRConst_U32( (UInt)s32 )); + break; + } + + case Iop_Left64: { + ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64; + Long s64 = (Long)u64; + s64 = (s64 | (-s64)); + e2 = IRExpr_Const( IRConst_U64( (ULong)s64 )); + break; + } + default: goto unhandled; } @@ -1465,13 +1485,20 @@ static IRExpr* fold_Expr ( IRExpr* e ) e2 = IRExpr_Const(IRConst_U32(0)); } else - /* And32(0,x) ==> 0 */ - if (e->Iex.Binop.op == Iop_And32 + /* And32/Shl32(0,x) ==> 0 */ + if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32) && e->Iex.Binop.arg1->tag == Iex_Const && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) { e2 = IRExpr_Const(IRConst_U32(0)); } else + /* Or8(0,x) ==> x */ + if (e->Iex.Binop.op == Iop_Or8 + && e->Iex.Binop.arg1->tag == Iex_Const + && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) { + e2 = e->Iex.Binop.arg2; + } else + /* Or32(0,x) ==> x */ if (e->Iex.Binop.op == Iop_Or32 && e->Iex.Binop.arg1->tag == Iex_Const @@ -3698,6 +3725,94 @@ static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp ) 'single-shot', so once a binding is used, it is marked as no longer available, by setting its .bindee field to NULL. */ +static inline Bool is_Unop ( IRExpr* e, IROp op ) { + return e->tag == Iex_Unop && e->Iex.Unop.op == op; +} +static inline Bool is_Binop ( IRExpr* e, IROp op ) { + return e->tag == Iex_Binop && e->Iex.Binop.op == op; +} + +static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 ) +{ + switch (op) { + case Iop_Or32: + /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */ + if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32)) + return IRExpr_Unop( Iop_CmpwNEZ32, + IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg, + a2->Iex.Unop.arg ) ); + break; + default: + break; + } + /* no reduction rule applies */ + return IRExpr_Binop( op, a1, a2 ); +} + +static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa ) +{ + switch (op) { + case Iop_CmpwNEZ64: + /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */ + if (is_Binop(aa, Iop_Or64) + && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64)) + return fold_IRExpr_Unop( + Iop_CmpwNEZ64, + IRExpr_Binop(Iop_Or64, + aa->Iex.Binop.arg1->Iex.Unop.arg, + aa->Iex.Binop.arg2)); + /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */ + if (is_Binop(aa, Iop_Or64) + && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64)) + return fold_IRExpr_Unop( + Iop_CmpwNEZ64, + IRExpr_Binop(Iop_Or64, + aa->Iex.Binop.arg1, + aa->Iex.Binop.arg2->Iex.Unop.arg)); + break; + case Iop_CmpNEZ64: + /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */ + if (is_Unop(aa, Iop_Left64)) + return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg); + break; + case Iop_CmpwNEZ32: + /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */ + if (is_Unop(aa, Iop_CmpwNEZ32)) + return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg ); + break; + case Iop_CmpNEZ32: + /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */ + if (is_Unop(aa, Iop_Left32)) + return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg); + break; + case Iop_Left32: + /* Left32( Left32(x) ) --> Left32(x) */ + if (is_Unop(aa, Iop_Left32)) + return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg ); + break; + case Iop_32to1: + /* 32to1( 1Uto32 ( x ) ) --> x */ + if (is_Unop(aa, Iop_1Uto32)) + return aa->Iex.Unop.arg; + /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */ + if (is_Unop(aa, Iop_CmpwNEZ32)) + return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg ); + break; + case Iop_64to1: + /* 64to1( 1Uto64 ( x ) ) --> x */ + if (is_Unop(aa, Iop_1Uto64)) + return aa->Iex.Unop.arg; + /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */ + if (is_Unop(aa, Iop_CmpwNEZ64)) + return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg ); + break; + default: + break; + } + /* no reduction rule applies */ + return IRExpr_Unop( op, aa ); +} + static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) { IRExpr* e2; @@ -3740,13 +3855,13 @@ static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e ) atbSubst_Expr(env, e->Iex.Triop.arg3) ); case Iex_Binop: - return IRExpr_Binop( + return fold_IRExpr_Binop( e->Iex.Binop.op, atbSubst_Expr(env, e->Iex.Binop.arg1), atbSubst_Expr(env, e->Iex.Binop.arg2) ); case Iex_Unop: - return IRExpr_Unop( + return fold_IRExpr_Unop( e->Iex.Unop.op, atbSubst_Expr(env, e->Iex.Unop.arg) ); diff --git a/VEX/priv/main/vex_util.c b/VEX/priv/main/vex_util.c index 6caca169e5..938509918f 100644 --- a/VEX/priv/main/vex_util.c +++ b/VEX/priv/main/vex_util.c @@ -441,6 +441,10 @@ UInt vprintf_wrk ( void(*sink)(HChar), PAD(len1); PUT('0'); PUT('x'); PUTSTR(intbuf); PAD(len3); break; } + case '%': { + PUT('%'); + break; + } default: /* no idea what it is. Print the format literally and move on. */ diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index f781ed8e88..0dc8b2bd3c 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -422,7 +422,6 @@ typedef Iop_CmpNE8, Iop_CmpNE16, Iop_CmpNE32, Iop_CmpNE64, /* Tags for unary ops */ Iop_Not8, Iop_Not16, Iop_Not32, Iop_Not64, - Iop_Neg8, Iop_Neg16, Iop_Neg32, Iop_Neg64, /* -- Ordering not important after here. -- */ @@ -445,6 +444,8 @@ typedef /* As a sop to Valgrind-Memcheck, the following are useful. */ Iop_CmpNEZ8, Iop_CmpNEZ16, Iop_CmpNEZ32, Iop_CmpNEZ64, + Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */ + Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /* \x -> x | -x */ /* PowerPC-style 3-way integer comparisons. Without them it is difficult to simulate PPC efficiently.