From: Julian Seward Date: Tue, 27 Jan 2015 23:17:02 +0000 (+0000) Subject: AMD64 front end: translate AVX2 PMASKMOV load instructions (vector X-Git-Tag: svn/VALGRIND_3_11_0^2~107 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=47e5a822b5bff7426f3fe00aaa66d497e78a8b85;p=thirdparty%2Fvalgrind.git AMD64 front end: translate AVX2 PMASKMOV load instructions (vector conditional loads) using IR conditional load statements IRLoadG rather than the previous rather ingenious hack. AMD64 back end: * Add instruction selection etc for 32- and 64-bit conditional loads (IRLoadG) * Handle dirty helper calls that return a value and that are conditional. These result from Memcheck's instrumentation of IRLoadGs. No functional change. This is a cleanup as part of supporting AVX2 PMASKMOV loads and stores by using the existing IR facilities for conditional loads and stores. git-svn-id: svn://svn.valgrind.org/vex/trunk@3075 --- diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index f17f41bb7a..d0c223324a 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -27255,6 +27255,9 @@ static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, const VexAbiInfo* vbi, } delta += alen; + for (i = 0; i < sizeof(res)/sizeof(res[0]); i++) + res[i] = IRTemp_INVALID; + for (i = 0; i < 2 * (isYMM ? 2 : 1) * (ty == Ity_I32 ? 2 : 1); i++) { res[i] = newTemp(ty); cond = newTemp(Ity_I1); @@ -27263,19 +27266,15 @@ static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, const VexAbiInfo* vbi, ty == Ity_I32 ? getYMMRegLane32( rV, i ) : getYMMRegLane64( rV, i ), mkU(ty, 0) )); - assign( res[i], - IRExpr_ITE( - mkexpr(cond), - loadLE(ty, IRExpr_ITE( - mkexpr(cond), - binop(Iop_Add64, mkexpr(addr), - mkU64(i*(ty == Ity_I32 ? 4 : 8))), - getIReg64(R_RSP) - ) - ), - mkU(ty, 0) - ) - ); + stmt( + IRStmt_LoadG( + Iend_LE, + ty == Ity_I32 ? ILGop_Ident32 : ILGop_Ident64, + res[i], + binop(Iop_Add64, mkexpr(addr), mkU64(i * (ty == Ity_I32 ? 4 : 8))), + ty == Ity_I32 ? mkU32(0) : mkU64(0), + mkexpr(cond) + )); } switch (ty) { case Ity_I32: diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 7118e2ba8e..13925fab3f 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -745,6 +745,17 @@ AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) { vassert(cond != Acc_ALWAYS); return i; } +AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB, + AMD64AMode* addr, HReg dst ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_CLoad; + i->Ain.CLoad.cond = cond; + i->Ain.CLoad.szB = szB; + i->Ain.CLoad.addr = addr; + i->Ain.CLoad.dst = dst; + vassert(cond != Acc_ALWAYS); + return i; +} AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) { AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); i->tag = Ain_MovxLQ; @@ -1121,6 +1132,16 @@ void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 ) vex_printf(","); ppHRegAMD64(i->Ain.CMov64.dst); return; + case Ain_CLoad: + vex_printf("if (%%rflags.%s) { ", + showAMD64CondCode(i->Ain.CLoad.cond)); + vex_printf("mov%c (", i->Ain.CLoad.szB == 4 ? 'l' : 'q'); + ppAMD64AMode(i->Ain.CLoad.addr); + vex_printf("), "); + (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64) + (i->Ain.CLoad.dst); + vex_printf(" }"); + return; case Ain_MovxLQ: vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z'); ppHRegAMD64_lo32(i->Ain.MovxLQ.src); @@ -1463,6 +1484,10 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead); addHRegUse(u, HRmModify, i->Ain.CMov64.dst); return; + case Ain_CLoad: + addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr); + addHRegUse(u, HRmModify, i->Ain.CLoad.dst); + return; case Ain_MovxLQ: addHRegUse(u, HRmRead, i->Ain.MovxLQ.src); addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst); @@ -1695,6 +1720,10 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) mapRegs_AMD64RM(m, i->Ain.CMov64.src); mapReg(m, &i->Ain.CMov64.dst); return; + case Ain_CLoad: + mapRegs_AMD64AMode(m, i->Ain.CLoad.addr); + mapReg(m, &i->Ain.CLoad.dst); + return; case Ain_MovxLQ: mapReg(m, &i->Ain.MovxLQ.src); mapReg(m, &i->Ain.MovxLQ.dst); @@ -2671,43 +2700,113 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, } case Ain_Call: { - if (i->Ain.Call.cond != Acc_ALWAYS - && i->Ain.Call.rloc.pri != RLPri_None) { - /* The call might not happen (it isn't unconditional) and it - returns a result. In this case we will need to generate a - control flow diamond to put 0x555..555 in the return - register(s) in the case where the call doesn't happen. If - this ever becomes necessary, maybe copy code from the ARM - equivalent. Until that day, just give up. */ - goto bad; - } - /* As per detailed comment for Ain_Call in - getRegUsage_AMD64Instr above, %r11 is used as an address - temporary. */ - /* jump over the following two insns if the condition does not - hold */ - Bool shortImm = fitsIn32Bits(i->Ain.Call.target); - if (i->Ain.Call.cond != Acc_ALWAYS) { - *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1))); - *p++ = shortImm ? 10 : 13; - /* 10 or 13 bytes in the next two insns */ - } - if (shortImm) { - /* 7 bytes: movl sign-extend(imm32), %r11 */ - *p++ = 0x49; - *p++ = 0xC7; - *p++ = 0xC3; - p = emit32(p, (UInt)i->Ain.Call.target); + /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr + above, %r11 is used as an address temporary. */ + /* If we don't need to do any fixup actions in the case that the + call doesn't happen, just do the simple thing and emit + straight-line code. This is usually the case. */ + if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/ + || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) { + /* jump over the following two insns if the condition does + not hold */ + Bool shortImm = fitsIn32Bits(i->Ain.Call.target); + if (i->Ain.Call.cond != Acc_ALWAYS) { + *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1))); + *p++ = shortImm ? 10 : 13; + /* 10 or 13 bytes in the next two insns */ + } + if (shortImm) { + /* 7 bytes: movl sign-extend(imm32), %r11 */ + *p++ = 0x49; + *p++ = 0xC7; + *p++ = 0xC3; + p = emit32(p, (UInt)i->Ain.Call.target); + } else { + /* 10 bytes: movabsq $target, %r11 */ + *p++ = 0x49; + *p++ = 0xBB; + p = emit64(p, i->Ain.Call.target); + } + /* 3 bytes: call *%r11 */ + *p++ = 0x41; + *p++ = 0xFF; + *p++ = 0xD3; } else { - /* 10 bytes: movabsq $target, %r11 */ + Int delta; + /* Complex case. We have to generate an if-then-else diamond. */ + // before: + // j{!cond} else: + // movabsq $target, %r11 + // call* %r11 + // preElse: + // jmp after: + // else: + // movabsq $0x5555555555555555, %rax // possibly + // movq %rax, %rdx // possibly + // after: + + // before: + UChar* pBefore = p; + + // j{!cond} else: + *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1))); + *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ + + // movabsq $target, %r11 *p++ = 0x49; *p++ = 0xBB; p = emit64(p, i->Ain.Call.target); + + // call* %r11 + *p++ = 0x41; + *p++ = 0xFF; + *p++ = 0xD3; + + // preElse: + UChar* pPreElse = p; + + // jmp after: + *p++ = 0xEB; + *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ + + // else: + UChar* pElse = p; + + /* Do the 'else' actions */ + switch (i->Ain.Call.rloc.pri) { + case RLPri_Int: + // movabsq $0x5555555555555555, %rax + *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL); + break; + case RLPri_2Int: + vassert(0); //ATC + // movabsq $0x5555555555555555, %rax + *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL); + // movq %rax, %rdx + *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2; + case RLPri_None: case RLPri_INVALID: default: + vassert(0); + } + + // after: + UChar* pAfter = p; + + // Fix up the branch offsets. The +2s in the offset + // calculations are there because x86 requires conditional + // branches to have their offset stated relative to the + // instruction immediately following the branch insn. And in + // both cases the branch insns are 2 bytes long. + + // First, the "j{!cond} else:" at pBefore. + delta = (Int)(Long)(pElse - (pBefore + 2)); + vassert(delta >= 0 && delta < 100/*arbitrary*/); + *(pBefore+1) = (UChar)delta; + + // And secondly, the "jmp after:" at pPreElse. + delta = (Int)(Long)(pAfter - (pPreElse + 2)); + vassert(delta >= 0 && delta < 100/*arbitrary*/); + *(pPreElse+1) = (UChar)delta; } - /* 3 bytes: call *%r11 */ - *p++ = 0x41; - *p++ = 0xFF; - *p++ = 0xD3; goto done; } @@ -2917,6 +3016,35 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, } break; + case Ain_CLoad: { + vassert(i->Ain.CLoad.cond != Acc_ALWAYS); + + /* Only 32- or 64-bit variants are allowed. */ + vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8); + + /* Use ptmp for backpatching conditional jumps. */ + ptmp = NULL; + + /* jmp fwds if !condition */ + *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1))); + ptmp = p; /* fill in this bit later */ + *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ + + /* Now the load. Either a normal 64 bit load or a normal 32 bit + load, which, by the default zero-extension rule, zeroes out + the upper half of the destination, as required. */ + rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr); + *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex; + *p++ = 0x8B; + p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr); + + /* Fix up the conditional branch */ + Int delta = p - ptmp; + vassert(delta > 0 && delta < 40); + *ptmp = toUChar(delta-1); + goto done; + } + case Ain_MovxLQ: /* No, _don't_ ask me why the sense of the args has to be different in the S vs Z case. I don't know. */ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 09abfe0f7f..02c89e2434 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -368,6 +368,7 @@ typedef Ain_XIndir, /* indirect transfer to GA */ Ain_XAssisted, /* assisted transfer to GA */ Ain_CMov64, /* conditional move */ + Ain_CLoad, /* cond. load to int reg, 32 bit ZX or 64 bit only */ Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top half */ Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */ Ain_Store, /* store 32/16/8 bit value in memory */ @@ -505,6 +506,14 @@ typedef AMD64RM* src; HReg dst; } CMov64; + /* conditional load to int reg, 32 bit ZX or 64 bit only. + cond may not be Acc_ALWAYS. */ + struct { + AMD64CondCode cond; + UChar szB; /* 4 or 8 only */ + AMD64AMode* addr; + HReg dst; + } CLoad; /* reg-reg move, sx-ing/zx-ing top half */ struct { Bool syned; @@ -710,6 +719,8 @@ extern AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP, extern AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP, AMD64CondCode cond, IRJumpKind jk ); extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst ); +extern AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB, + AMD64AMode* addr, HReg dst ); extern AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ); extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, AMD64AMode* src, HReg dst ); diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index e6ca6f11f4..792629455e 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -4288,6 +4288,32 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) switch (stmt->tag) { + /* --------- LOADG (guarded load) --------- */ + case Ist_LoadG: { + IRLoadG* lg = stmt->Ist.LoadG.details; + if (lg->end != Iend_LE) + goto stmt_fail; + + UChar szB = 0; /* invalid */ + switch (lg->cvt) { + case ILGop_Ident32: szB = 4; break; + case ILGop_Ident64: szB = 8; break; + default: break; + } + if (szB == 0) + goto stmt_fail; + + AMD64AMode* amAddr = iselIntExpr_AMode(env, lg->addr); + HReg rAlt = iselIntExpr_R(env, lg->alt); + HReg rDst = lookupIRTemp(env, lg->dst); + /* Get the alt value into the dst. We'll do a conditional load + which overwrites it -- or not -- with loaded data. */ + addInstr(env, mk_iMOVsd_RR(rAlt, rDst)); + AMD64CondCode cc = iselCondCode(env, lg->guard); + addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst)); + return; + } + /* --------- STORE --------- */ case Ist_Store: { IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);