From: Julian Seward Date: Thu, 8 Nov 2012 10:57:08 +0000 (+0000) Subject: Improve accuracy of definedness tracking through the x86 PMOVMSKB and X-Git-Tag: svn/VALGRIND_3_9_0^2~215 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f908842de884092621feff330e5e119aeb60d50f;p=thirdparty%2Fvalgrind.git Improve accuracy of definedness tracking through the x86 PMOVMSKB and BSF instructions, as the lack of it causes false positives (VEX side). Fixes #308627. Combined efforts of Patrick J. LoPresti and me. git-svn-id: svn://svn.valgrind.org/vex/trunk@2559 --- diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h index bbcc95f60a..f687d90a93 100644 --- a/VEX/priv/guest_amd64_defs.h +++ b/VEX/priv/guest_amd64_defs.h @@ -141,8 +141,6 @@ extern void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*, HWord ); extern ULong amd64g_calculate_mmx_pmaddwd ( ULong, ULong ); extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong ); -extern ULong amd64g_calculate_mmx_pmovmskb ( ULong ); -extern ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); extern ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ); diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c index 5f2c6c5d5e..8cc11ad51e 100644 --- a/VEX/priv/guest_amd64_helpers.c +++ b/VEX/priv/guest_amd64_helpers.c @@ -2997,21 +2997,6 @@ ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) ); } -/* CALLED FROM GENERATED CODE: CLEAN HELPER */ -ULong amd64g_calculate_mmx_pmovmskb ( ULong xx ) -{ - ULong r = 0; - if (xx & (1ULL << (64-1))) r |= (1<<7); - if (xx & (1ULL << (56-1))) r |= (1<<6); - if (xx & (1ULL << (48-1))) r |= (1<<5); - if (xx & (1ULL << (40-1))) r |= (1<<4); - if (xx & (1ULL << (32-1))) r |= (1<<3); - if (xx & (1ULL << (24-1))) r |= (1<<2); - if (xx & (1ULL << (16-1))) r |= (1<<1); - if (xx & (1ULL << ( 8-1))) r |= (1<<0); - return r; -} - /* CALLED FROM GENERATED CODE: CLEAN HELPER */ ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) { @@ -3028,14 +3013,6 @@ ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) return (ULong)t; } -/* CALLED FROM GENERATED CODE: CLEAN HELPER */ -ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ) -{ - ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi ); - ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo ); - return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF); -} - /* CALLED FROM GENERATED CODE: CLEAN HELPER */ ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ) { diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 7474802525..84378f80b6 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -7789,11 +7789,15 @@ ULong dis_bs_E_G ( VexAbiInfo* vbi, /* First, widen src to 64 bits if it is not already. */ assign( src64, widenUto64(mkexpr(src)) ); - /* Generate an 8-bit expression which is zero iff the - original is zero, and nonzero otherwise */ + /* Generate an 8-bit expression which is zero iff the original is + zero, and nonzero otherwise. Ask for a CmpNE version which, if + instrumented by Memcheck, is instrumented expensively, since + this may be used on the output of a preceding movmskb insn, + which has been known to be partially defined, and in need of + careful handling. */ assign( src8, unop(Iop_1Uto8, - binop(Iop_CmpNE64, + binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0))) ); /* Flags: Z is 1 iff source value is zero. All others @@ -10277,14 +10281,15 @@ static Long dis_PMOVMSKB_128 ( VexAbiInfo* vbi, Prefix pfx, UInt rG = gregOfRexRM(pfx,modrm); IRTemp t0 = newTemp(Ity_I64); IRTemp t1 = newTemp(Ity_I64); - IRTemp t5 = newTemp(Ity_I64); + IRTemp t5 = newTemp(Ity_I32); assign(t0, getXMMRegLane64(rE, 0)); assign(t1, getXMMRegLane64(rE, 1)); - assign(t5, mkIRExprCCall( Ity_I64, 0/*regparms*/, - "amd64g_calculate_sse_pmovmskb", - &amd64g_calculate_sse_pmovmskb, - mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); - putIReg32(rG, unop(Iop_64to32,mkexpr(t5))); + assign(t5, + unop(Iop_16Uto32, + binop(Iop_8HLto16, + unop(Iop_GetMSBs8x8, mkexpr(t1)), + unop(Iop_GetMSBs8x8, mkexpr(t0))))); + putIReg32(rG, mkexpr(t5)); DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE), nameIReg32(rG)); delta += 1; @@ -13443,7 +13448,7 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, } /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in - mmx(G), turn them into a byte, and put zero-extend of it in + mmx(E), turn them into a byte, and put zero-extend of it in ireg(G). */ if (haveNo66noF2noF3(pfx) && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { @@ -13451,14 +13456,10 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, if (epartIsReg(modrm)) { do_MMX_preamble(); t0 = newTemp(Ity_I64); - t1 = newTemp(Ity_I64); + t1 = newTemp(Ity_I32); assign(t0, getMMXReg(eregLO3ofRM(modrm))); - assign(t1, mkIRExprCCall( - Ity_I64, 0/*regparms*/, - "amd64g_calculate_mmx_pmovmskb", - &amd64g_calculate_mmx_pmovmskb, - mkIRExprVec_1(mkexpr(t0)))); - putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1))); + assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0)))); + putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1)); DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), nameIReg32(gregOfRexRM(pfx,modrm))); delta += 1; diff --git a/VEX/priv/guest_x86_defs.h b/VEX/priv/guest_x86_defs.h index af83cb7a3d..a47040af75 100644 --- a/VEX/priv/guest_x86_defs.h +++ b/VEX/priv/guest_x86_defs.h @@ -134,8 +134,6 @@ ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, extern ULong x86g_calculate_mmx_pmaddwd ( ULong, ULong ); extern ULong x86g_calculate_mmx_psadbw ( ULong, ULong ); -extern UInt x86g_calculate_mmx_pmovmskb ( ULong ); -extern UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); /* --- DIRTY HELPERS --- */ diff --git a/VEX/priv/guest_x86_helpers.c b/VEX/priv/guest_x86_helpers.c index 4676276618..35938c99cf 100644 --- a/VEX/priv/guest_x86_helpers.c +++ b/VEX/priv/guest_x86_helpers.c @@ -2513,21 +2513,6 @@ ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) ); } -/* CALLED FROM GENERATED CODE: CLEAN HELPER */ -UInt x86g_calculate_mmx_pmovmskb ( ULong xx ) -{ - UInt r = 0; - if (xx & (1ULL << (64-1))) r |= (1<<7); - if (xx & (1ULL << (56-1))) r |= (1<<6); - if (xx & (1ULL << (48-1))) r |= (1<<5); - if (xx & (1ULL << (40-1))) r |= (1<<4); - if (xx & (1ULL << (32-1))) r |= (1<<3); - if (xx & (1ULL << (24-1))) r |= (1<<2); - if (xx & (1ULL << (16-1))) r |= (1<<1); - if (xx & (1ULL << ( 8-1))) r |= (1<<0); - return r; -} - /* CALLED FROM GENERATED CODE: CLEAN HELPER */ ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy ) { @@ -2544,14 +2529,6 @@ ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy ) return (ULong)t; } -/* CALLED FROM GENERATED CODE: CLEAN HELPER */ -UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ) -{ - UInt rHi8 = x86g_calculate_mmx_pmovmskb ( w64hi ); - UInt rLo8 = x86g_calculate_mmx_pmovmskb ( w64lo ); - return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF); -} - /*---------------------------------------------------------------*/ /*--- Helpers for dealing with segment overrides. ---*/ diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index e98762d5d8..1a17d89a2e 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -716,6 +716,7 @@ static IROp mkSizedOp ( IRType ty, IROp op8 ) || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 || op8 == Iop_CasCmpNE8 + || op8 == Iop_ExpCmpNE8 || op8 == Iop_Not8); adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); return adj + op8; @@ -6385,10 +6386,14 @@ UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds ) ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ), nameIReg(sz, gregOfRM(modrm))); - /* Generate an 8-bit expression which is zero iff the - original is zero, and nonzero otherwise */ + /* Generate an 8-bit expression which is zero iff the original is + zero, and nonzero otherwise. Ask for a CmpNE version which, if + instrumented by Memcheck, is instrumented expensively, since + this may be used on the output of a preceding movmskb insn, + which has been known to be partially defined, and in need of + careful handling. */ assign( src8, - unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8), + unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_ExpCmpNE8), mkexpr(src), mkU(ty,0))) ); /* Flags: Z is 1 iff source value is zero. All others @@ -9051,7 +9056,7 @@ DisResult disInstr_X86_WRK ( /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in - mmx(G), turn them into a byte, and put zero-extend of it in + mmx(E), turn them into a byte, and put zero-extend of it in ireg(G). */ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) { modrm = insn[2]; @@ -9060,11 +9065,7 @@ DisResult disInstr_X86_WRK ( t0 = newTemp(Ity_I64); t1 = newTemp(Ity_I32); assign(t0, getMMXReg(eregOfRM(modrm))); - assign(t1, mkIRExprCCall( - Ity_I32, 0/*regparms*/, - "x86g_calculate_mmx_pmovmskb", - &x86g_calculate_mmx_pmovmskb, - mkIRExprVec_1(mkexpr(t0)))); + assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0)))); putIReg(4, gregOfRM(modrm), mkexpr(t1)); DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)), nameIReg(4,gregOfRM(modrm))); @@ -10903,11 +10904,9 @@ DisResult disInstr_X86_WRK ( goto decode_success; } - /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in - xmm(G), turn them into a byte, and put zero-extend of it in - ireg(G). Doing this directly is just too cumbersome; give up - therefore and call a helper. */ - /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ + /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes + in xmm(E), turn them into a byte, and put zero-extend of it in + ireg(G). */ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) { modrm = insn[2]; if (epartIsReg(modrm)) { @@ -10916,11 +10915,11 @@ DisResult disInstr_X86_WRK ( assign(t0, getXMMRegLane64(eregOfRM(modrm), 0)); assign(t1, getXMMRegLane64(eregOfRM(modrm), 1)); t5 = newTemp(Ity_I32); - assign(t5, mkIRExprCCall( - Ity_I32, 0/*regparms*/, - "x86g_calculate_sse_pmovmskb", - &x86g_calculate_sse_pmovmskb, - mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); + assign(t5, + unop(Iop_16Uto32, + binop(Iop_8HLto16, + unop(Iop_GetMSBs8x8, mkexpr(t1)), + unop(Iop_GetMSBs8x8, mkexpr(t0))))); putIReg(4, gregOfRM(modrm), mkexpr(t5)); DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)), nameIReg(4,gregOfRM(modrm))); diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index 12963907af..98e90f7ba4 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -791,7 +791,7 @@ static ULong bitmask8_to_bytemask64 ( UShort w8 ) This should handle expressions of 64, 32, 16 and 8-bit type. All results are returned in a 64-bit register. For 32-, 16- and 8-bit - expressions, the upper 32/16/24 bits are arbitrary, so you should + expressions, the upper 32/48/56 bits are arbitrary, so you should mask or sign extend partial values if necessary. */ @@ -1586,6 +1586,25 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) /* These are no-ops. */ return iselIntExpr_R(env, e->Iex.Unop.arg); + case Iop_GetMSBs8x8: { + /* Note: the following assumes the helper is of + signature + UInt fn ( ULong ), and is not a regparm fn. + */ + HReg dst = newVRegI(env); + HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); + fn = (HWord)h_generic_calc_GetMSBs8x8; + addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) ); + addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 )); + /* MovxLQ is not exactly the right thing here. We just + need to get the bottom 8 bits of RAX into dst, and zero + out everything else. Assuming that the helper returns + a UInt with the top 24 bits zeroed out, it'll do, + though. */ + addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst)); + return dst; + } + default: break; } @@ -2223,13 +2242,15 @@ static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) || e->Iex.Binop.op == Iop_CmpLE64S || e->Iex.Binop.op == Iop_CmpLE64U || e->Iex.Binop.op == Iop_CasCmpEQ64 - || e->Iex.Binop.op == Iop_CasCmpNE64)) { + || e->Iex.Binop.op == Iop_CasCmpNE64 + || e->Iex.Binop.op == Iop_ExpCmpNE64)) { HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1)); switch (e->Iex.Binop.op) { case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z; - case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ; + case Iop_CmpNE64: + case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ; case Iop_CmpLT64S: return Acc_L; case Iop_CmpLT64U: return Acc_B; case Iop_CmpLE64S: return Acc_LE; diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c index b70ce88d20..fdc9eed54e 100644 --- a/VEX/priv/host_generic_simd64.c +++ b/VEX/priv/host_generic_simd64.c @@ -1169,6 +1169,20 @@ ULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy ) ); } +UInt h_generic_calc_GetMSBs8x8 ( ULong xx ) +{ + UInt r = 0; + if (xx & (1ULL << (64-1))) r |= (1<<7); + if (xx & (1ULL << (56-1))) r |= (1<<6); + if (xx & (1ULL << (48-1))) r |= (1<<5); + if (xx & (1ULL << (40-1))) r |= (1<<4); + if (xx & (1ULL << (32-1))) r |= (1<<3); + if (xx & (1ULL << (24-1))) r |= (1<<2); + if (xx & (1ULL << (16-1))) r |= (1<<1); + if (xx & (1ULL << ( 8-1))) r |= (1<<0); + return r; +} + /* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */ /* Tuple/select functions for 16x2 vectors. */ diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h index 0858583f56..deef9449ed 100644 --- a/VEX/priv/host_generic_simd64.h +++ b/VEX/priv/host_generic_simd64.h @@ -123,6 +123,8 @@ extern ULong h_generic_calc_Max8Ux8 ( ULong, ULong ); extern ULong h_generic_calc_Min16Sx4 ( ULong, ULong ); extern ULong h_generic_calc_Min8Ux8 ( ULong, ULong ); +extern UInt h_generic_calc_GetMSBs8x8 ( ULong ); + /* 32-bit SIMD HELPERS */ extern UInt h_generic_calc_Add16x2 ( UInt, UInt ); diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index d342d923de..5513d7157a 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -1293,6 +1293,23 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) /* These are no-ops. */ return iselIntExpr_R(env, e->Iex.Unop.arg); + case Iop_GetMSBs8x8: { + /* Note: the following assumes the helper is of + signature + UInt fn ( ULong ), and is not a regparm fn. + */ + HReg xLo, xHi; + HReg dst = newVRegI(env); + HWord fn = (HWord)h_generic_calc_GetMSBs8x8; + iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg); + addInstr(env, X86Instr_Push(X86RMI_Reg(xHi))); + addInstr(env, X86Instr_Push(X86RMI_Reg(xLo))); + addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 )); + add_to_esp(env, 2*4); + addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst)); + return dst; + } + default: break; } @@ -1840,7 +1857,8 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) && (e->Iex.Binop.op == Iop_CmpEQ16 || e->Iex.Binop.op == Iop_CmpNE16 || e->Iex.Binop.op == Iop_CasCmpEQ16 - || e->Iex.Binop.op == Iop_CasCmpNE16)) { + || e->Iex.Binop.op == Iop_CasCmpNE16 + || e->Iex.Binop.op == Iop_ExpCmpNE16)) { HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); HReg r = newVRegI(env); @@ -1848,9 +1866,12 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r)); addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r))); switch (e->Iex.Binop.op) { - case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z; - case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ; - default: vpanic("iselCondCode(x86): CmpXX16"); + case Iop_CmpEQ16: case Iop_CasCmpEQ16: + return Xcc_Z; + case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16: + return Xcc_NZ; + default: + vpanic("iselCondCode(x86): CmpXX16"); } } @@ -1882,13 +1903,15 @@ static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) || e->Iex.Binop.op == Iop_CmpLE32S || e->Iex.Binop.op == Iop_CmpLE32U || e->Iex.Binop.op == Iop_CasCmpEQ32 - || e->Iex.Binop.op == Iop_CasCmpNE32)) { + || e->Iex.Binop.op == Iop_CasCmpNE32 + || e->Iex.Binop.op == Iop_ExpCmpNE32)) { HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1); X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2); addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1)); switch (e->Iex.Binop.op) { case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z; - case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ; + case Iop_CmpNE32: + case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ; case Iop_CmpLT32S: return Xcc_L; case Iop_CmpLT32U: return Xcc_B; case Iop_CmpLE32S: return Xcc_LE; diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 99b2266c23..b356f60d1f 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -147,6 +147,8 @@ void ppIROp ( IROp op ) str = "CasCmpEQ"; base = Iop_CasCmpEQ8; break; case Iop_CasCmpNE8 ... Iop_CasCmpNE64: str = "CasCmpNE"; base = Iop_CasCmpNE8; break; + case Iop_ExpCmpNE8 ... Iop_ExpCmpNE64: + str = "ExpCmpNE"; base = Iop_ExpCmpNE8; break; case Iop_Not8 ... Iop_Not64: str = "Not"; base = Iop_Not8; break; /* other cases must explicitly "return;" */ @@ -581,6 +583,7 @@ void ppIROp ( IROp op ) case Iop_Reverse64_16x4: vex_printf("Reverse64_16x4"); return; case Iop_Reverse64_32x2: vex_printf("Reverse64_32x2"); return; case Iop_Abs32Fx2: vex_printf("Abs32Fx2"); return; + case Iop_GetMSBs8x8: vex_printf("GetMSBs8x8"); return; case Iop_CmpNEZ32x2: vex_printf("CmpNEZ32x2"); return; case Iop_CmpNEZ16x4: vex_printf("CmpNEZ16x4"); return; @@ -2271,18 +2274,18 @@ void typeOfPrimop ( IROp op, UNARY(Ity_I64, Ity_I64); case Iop_CmpEQ8: case Iop_CmpNE8: - case Iop_CasCmpEQ8: case Iop_CasCmpNE8: + case Iop_CasCmpEQ8: case Iop_CasCmpNE8: case Iop_ExpCmpNE8: COMPARISON(Ity_I8); case Iop_CmpEQ16: case Iop_CmpNE16: - case Iop_CasCmpEQ16: case Iop_CasCmpNE16: + case Iop_CasCmpEQ16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16: COMPARISON(Ity_I16); case Iop_CmpEQ32: case Iop_CmpNE32: - case Iop_CasCmpEQ32: case Iop_CasCmpNE32: + case Iop_CasCmpEQ32: case Iop_CasCmpNE32: case Iop_ExpCmpNE32: case Iop_CmpLT32S: case Iop_CmpLE32S: case Iop_CmpLT32U: case Iop_CmpLE32U: COMPARISON(Ity_I32); case Iop_CmpEQ64: case Iop_CmpNE64: - case Iop_CasCmpEQ64: case Iop_CasCmpNE64: + case Iop_CasCmpEQ64: case Iop_CasCmpNE64: case Iop_ExpCmpNE64: case Iop_CmpLT64S: case Iop_CmpLE64S: case Iop_CmpLT64U: case Iop_CmpLE64U: COMPARISON(Ity_I64); @@ -2296,6 +2299,7 @@ void typeOfPrimop ( IROp op, case Iop_Left16: UNARY(Ity_I16,Ity_I16); case Iop_CmpwNEZ32: case Iop_Left32: UNARY(Ity_I32,Ity_I32); case Iop_CmpwNEZ64: case Iop_Left64: UNARY(Ity_I64,Ity_I64); + case Iop_GetMSBs8x8: UNARY(Ity_I64, Ity_I8); case Iop_MullU8: case Iop_MullS8: BINARY(Ity_I8,Ity_I8, Ity_I16); diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c index b7e3d9a75d..1537df686f 100644 --- a/VEX/priv/ir_opt.c +++ b/VEX/priv/ir_opt.c @@ -1836,16 +1836,22 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) /* -- CmpNE -- */ case Iop_CmpNE8: + case Iop_CasCmpNE8: + case Iop_ExpCmpNE8: e2 = IRExpr_Const(IRConst_U1(toBool( ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8) != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))))); break; case Iop_CmpNE32: + case Iop_CasCmpNE32: + case Iop_ExpCmpNE32: e2 = IRExpr_Const(IRConst_U1(toBool( (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))); break; case Iop_CmpNE64: + case Iop_CasCmpNE64: + case Iop_ExpCmpNE64: e2 = IRExpr_Const(IRConst_U1(toBool( (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))); diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index f399bb25cd..99eaaaf6a0 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -437,6 +437,10 @@ typedef Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64, Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64, + /* Exactly like CmpNE8/16/32/64, but carrying the additional + hint that these needs expensive definedness tracking. */ + Iop_ExpCmpNE8, Iop_ExpCmpNE16, Iop_ExpCmpNE32, Iop_ExpCmpNE64, + /* -- Ordering not important after here. -- */ /* Widening multiplies */ @@ -991,6 +995,10 @@ typedef is undefined. */ Iop_Perm8x8, + /* MISC CONVERSION -- get high bits of each byte lane, a la + x86/amd64 pmovmskb */ + Iop_GetMSBs8x8, /* I64 -> I8 */ + /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate See floating-point equiwalents for details. */ Iop_Recip32x2, Iop_Rsqrte32x2,