From: Julian Seward Date: Mon, 2 Apr 2012 21:54:49 +0000 (+0000) Subject: Add translation chaining support for amd64, x86 and ARM (VEX side). X-Git-Tag: svn/VALGRIND_3_8_1^2~182^2~17 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c7e328f7310529480a24606771dc9e516a352973;p=thirdparty%2Fvalgrind.git Add translation chaining support for amd64, x86 and ARM (VEX side). See #296422. git-svn-id: svn://svn.valgrind.org/vex/branches/TCHAIN@2273 --- diff --git a/VEX/Makefile-gcc b/VEX/Makefile-gcc index a3611e07d8..76f7230478 100644 --- a/VEX/Makefile-gcc +++ b/VEX/Makefile-gcc @@ -88,7 +88,9 @@ CCFLAGS = -Wall -Wmissing-prototypes -Wshadow \ -Wpointer-arith -Wbad-function-cast -Wcast-qual \ -Wcast-align -Wmissing-declarations \ -Wno-pointer-sign \ - $(EXTRA_CFLAGS) -g -O2 -fstrict-aliasing + $(EXTRA_CFLAGS) -g -O2 -fstrict-aliasing \ + \ + -O #CC = icc #CCFLAGS = -g -Wall -wd981 -wd279 -wd1287 -wd869 -wd111 -wd188 -wd186 diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h index 6038dedfb5..93b6d12388 100644 --- a/VEX/priv/guest_amd64_defs.h +++ b/VEX/priv/guest_amd64_defs.h @@ -47,7 +47,6 @@ bb_to_IR.h. */ extern DisResult disInstr_AMD64 ( IRSB* irbb, - Bool put_IP, Bool (*resteerOkFn) ( void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c index 022edaa057..80b5a74bcd 100644 --- a/VEX/priv/guest_amd64_helpers.c +++ b/VEX/priv/guest_amd64_helpers.c @@ -3452,6 +3452,10 @@ extern void amd64g_dirtyhelper_AESKEYGENASSIST ( /* VISIBLE TO LIBVEX CLIENT */ void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) { + vex_state->host_EvC_FAILADDR = 0; + vex_state->host_EvC_COUNTER = 0; + vex_state->pad0 = 0; + vex_state->guest_RAX = 0; vex_state->guest_RCX = 0; vex_state->guest_RDX = 0; @@ -3522,7 +3526,7 @@ void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state ) vex_state->guest_GS_0x60 = 0; vex_state->guest_IP_AT_SYSCALL = 0; - /* vex_state->padding = 0; */ + vex_state->pad1 = 0; } diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index b94d0b60a9..a4a02037fd 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -1511,7 +1511,8 @@ static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, binop( mkSizedOp(tyE,Iop_CasCmpNE8), mkexpr(oldTmp), mkexpr(expTmp) ), Ijk_Boring, /*Ijk_NoRedir*/ - IRConst_U64( restart_point ) + IRConst_U64( restart_point ), + OFFB_RIP )); } @@ -2091,36 +2092,55 @@ static HChar nameISize ( Int size ) /*--- JMP helpers ---*/ /*------------------------------------------------------------*/ -static void jmp_lit( IRJumpKind kind, Addr64 d64 ) +static void jmp_lit( /*MOD*/DisResult* dres, + IRJumpKind kind, Addr64 d64 ) { - irsb->next = mkU64(d64); - irsb->jumpkind = kind; + vassert(dres->whatNext == Dis_Continue); + vassert(dres->len == 0); + vassert(dres->continueAt == 0); + vassert(dres->jk_StopHere == Ijk_INVALID); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = kind; + stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) ); } -static void jmp_treg( IRJumpKind kind, IRTemp t ) +static void jmp_treg( /*MOD*/DisResult* dres, + IRJumpKind kind, IRTemp t ) { - irsb->next = mkexpr(t); - irsb->jumpkind = kind; + vassert(dres->whatNext == Dis_Continue); + vassert(dres->len == 0); + vassert(dres->continueAt == 0); + vassert(dres->jk_StopHere == Ijk_INVALID); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = kind; + stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) ); } static -void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) +void jcc_01 ( /*MOD*/DisResult* dres, + AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) { Bool invert; AMD64Condcode condPos; + vassert(dres->whatNext == Dis_Continue); + vassert(dres->len == 0); + vassert(dres->continueAt == 0); + vassert(dres->jk_StopHere == Ijk_INVALID); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Boring; condPos = positiveIse_AMD64Condcode ( cond, &invert ); if (invert) { stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), Ijk_Boring, - IRConst_U64(d64_false) ) ); - irsb->next = mkU64(d64_true); - irsb->jumpkind = Ijk_Boring; + IRConst_U64(d64_false), + OFFB_RIP ) ); + stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) ); } else { stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), Ijk_Boring, - IRConst_U64(d64_true) ) ); - irsb->next = mkU64(d64_false); - irsb->jumpkind = Ijk_Boring; + IRConst_U64(d64_true), + OFFB_RIP ) ); + stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) ); } } @@ -3966,7 +3986,7 @@ ULong dis_Grp4 ( VexAbiInfo* vbi, static ULong dis_Grp5 ( VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta, - DisResult* dres, Bool* decode_OK ) + /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) { Int len; UChar modrm; @@ -4009,8 +4029,8 @@ ULong dis_Grp5 ( VexAbiInfo* vbi, putIReg64(R_RSP, mkexpr(t2)); storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); - jmp_treg(Ijk_Call,t3); - dres->whatNext = Dis_StopHere; + jmp_treg(dres, Ijk_Call, t3); + vassert(dres->whatNext == Dis_StopHere); showSz = False; break; case 4: /* jmp Ev */ @@ -4019,8 +4039,8 @@ ULong dis_Grp5 ( VexAbiInfo* vbi, sz = 8; t3 = newTemp(Ity_I64); assign(t3, getIRegE(sz,pfx,modrm)); - jmp_treg(Ijk_Boring,t3); - dres->whatNext = Dis_StopHere; + jmp_treg(dres, Ijk_Boring, t3); + vassert(dres->whatNext == Dis_StopHere); showSz = False; break; default: @@ -4073,8 +4093,8 @@ ULong dis_Grp5 ( VexAbiInfo* vbi, putIReg64(R_RSP, mkexpr(t2)); storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); - jmp_treg(Ijk_Call,t3); - dres->whatNext = Dis_StopHere; + jmp_treg(dres, Ijk_Call, t3); + vassert(dres->whatNext == Dis_StopHere); showSz = False; break; case 4: /* JMP Ev */ @@ -4083,8 +4103,8 @@ ULong dis_Grp5 ( VexAbiInfo* vbi, sz = 8; t3 = newTemp(Ity_I64); assign(t3, loadLE(Ity_I64,mkexpr(addr))); - jmp_treg(Ijk_Boring,t3); - dres->whatNext = Dis_StopHere; + jmp_treg(dres, Ijk_Boring, t3); + vassert(dres->whatNext == Dis_StopHere); showSz = False; break; case 6: /* PUSH Ev */ @@ -4287,7 +4307,8 @@ void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) the insn is the last one in the basic block, and so emit a jump to the next insn, rather than just falling through. */ static -void dis_REP_op ( AMD64Condcode cond, +void dis_REP_op ( /*MOD*/DisResult* dres, + AMD64Condcode cond, void (*dis_OP)(Int, IRTemp, Prefix), Int sz, Addr64 rip, Addr64 rip_next, HChar* name, Prefix pfx ) @@ -4310,7 +4331,8 @@ void dis_REP_op ( AMD64Condcode cond, cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); } - stmt( IRStmt_Exit( cmp, Ijk_Boring, IRConst_U64(rip_next) ) ); + stmt( IRStmt_Exit( cmp, Ijk_Boring, + IRConst_U64(rip_next), OFFB_RIP ) ); if (haveASO(pfx)) putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); @@ -4321,12 +4343,15 @@ void dis_REP_op ( AMD64Condcode cond, dis_OP (sz, t_inc, pfx); if (cond == AMD64CondAlways) { - jmp_lit(Ijk_Boring,rip); + jmp_lit(dres, Ijk_Boring, rip); + vassert(dres->whatNext == Dis_StopHere); } else { stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), Ijk_Boring, - IRConst_U64(rip) ) ); - jmp_lit(Ijk_Boring,rip_next); + IRConst_U64(rip), + OFFB_RIP ) ); + jmp_lit(dres, Ijk_Boring, rip_next); + vassert(dres->whatNext == Dis_StopHere); } DIP("%s%c\n", name, nameISize(sz)); } @@ -5130,7 +5155,8 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), Ijk_EmWarn, - IRConst_U64( guest_RIP_bbstart+delta ) + IRConst_U64( guest_RIP_bbstart+delta ), + OFFB_RIP ) ); @@ -5172,7 +5198,8 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), Ijk_EmWarn, - IRConst_U64( guest_RIP_bbstart+delta ) + IRConst_U64( guest_RIP_bbstart+delta ), + OFFB_RIP ) ); break; @@ -6108,7 +6135,8 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), Ijk_EmWarn, - IRConst_U64( guest_RIP_bbstart+delta ) + IRConst_U64( guest_RIP_bbstart+delta ), + OFFB_RIP ) ); @@ -8143,7 +8171,7 @@ ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, //.. } static -void dis_ret ( VexAbiInfo* vbi, ULong d64 ) +void dis_ret ( /*MOD*/DisResult* dres, VexAbiInfo* vbi, ULong d64 ) { IRTemp t1 = newTemp(Ity_I64); IRTemp t2 = newTemp(Ity_I64); @@ -8153,7 +8181,8 @@ void dis_ret ( VexAbiInfo* vbi, ULong d64 ) assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); putIReg64(R_RSP, mkexpr(t3)); make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); - jmp_treg(Ijk_Ret,t2); + jmp_treg(dres, Ijk_Ret, t2); + vassert(dres->whatNext == Dis_StopHere); } @@ -8964,7 +8993,8 @@ static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)), mkU64(0)), Ijk_SigSEGV, - IRConst_U64(guest_RIP_curr_instr) + IRConst_U64(guest_RIP_curr_instr), + OFFB_RIP ) ); } @@ -11527,7 +11557,8 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, IRStmt_Exit( binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), Ijk_EmWarn, - IRConst_U64(guest_RIP_bbstart+delta) + IRConst_U64(guest_RIP_bbstart+delta), + OFFB_RIP ) ); goto decode_success; @@ -16954,7 +16985,8 @@ Long dis_ESC_NONE ( mk_amd64g_calculate_condition( (AMD64Condcode)(1 ^ (opc - 0x70))), Ijk_Boring, - IRConst_U64(guest_RIP_bbstart+delta) ) ); + IRConst_U64(guest_RIP_bbstart+delta), + OFFB_RIP ) ); dres->whatNext = Dis_ResteerC; dres->continueAt = d64; comment = "(assumed taken)"; @@ -16972,7 +17004,8 @@ Long dis_ESC_NONE ( stmt( IRStmt_Exit( mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)), Ijk_Boring, - IRConst_U64(d64) ) ); + IRConst_U64(d64), + OFFB_RIP ) ); dres->whatNext = Dis_ResteerC; dres->continueAt = guest_RIP_bbstart+delta; comment = "(assumed not taken)"; @@ -16980,10 +17013,9 @@ Long dis_ESC_NONE ( else { /* Conservative default translation - end the block at this point. */ - jcc_01( (AMD64Condcode)(opc - 0x70), - guest_RIP_bbstart+delta, - d64 ); - dres->whatNext = Dis_StopHere; + jcc_01( dres, (AMD64Condcode)(opc - 0x70), + guest_RIP_bbstart+delta, d64 ); + vassert(dres->whatNext == Dis_StopHere); } DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment); return delta; @@ -17154,8 +17186,8 @@ Long dis_ESC_NONE ( DIP("rep nop (P4 pause)\n"); /* "observe" the hint. The Vex client needs to be careful not to cause very long delays as a result, though. */ - jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta); - dres->whatNext = Dis_StopHere; + jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta); + vassert(dres->whatNext == Dis_StopHere); return delta; } /* detect and handle NOPs specially */ @@ -17393,7 +17425,7 @@ Long dis_ESC_NONE ( if (haveF3(pfx) && !haveF2(pfx)) { if (opc == 0xA4) sz = 1; - dis_REP_op ( AMD64CondAlways, dis_MOVS, sz, + dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz, guest_RIP_curr_instr, guest_RIP_bbstart+delta, "rep movs", pfx ); dres->whatNext = Dis_StopHere; @@ -17414,7 +17446,7 @@ Long dis_ESC_NONE ( if (haveF3(pfx) && !haveF2(pfx)) { if (opc == 0xA6) sz = 1; - dis_REP_op ( AMD64CondZ, dis_CMPS, sz, + dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz, guest_RIP_curr_instr, guest_RIP_bbstart+delta, "repe cmps", pfx ); dres->whatNext = Dis_StopHere; @@ -17428,11 +17460,11 @@ Long dis_ESC_NONE ( if (haveF3(pfx) && !haveF2(pfx)) { if (opc == 0xAA) sz = 1; - dis_REP_op ( AMD64CondAlways, dis_STOS, sz, + dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz, guest_RIP_curr_instr, guest_RIP_bbstart+delta, "rep stos", pfx ); - dres->whatNext = Dis_StopHere; - return delta; + vassert(dres->whatNext == Dis_StopHere); + return delta; } /* AA/AB: stosb/stos{w,l,q} */ if (!haveF3(pfx) && !haveF2(pfx)) { @@ -17463,20 +17495,20 @@ Long dis_ESC_NONE ( if (haveF2(pfx) && !haveF3(pfx)) { if (opc == 0xAE) sz = 1; - dis_REP_op ( AMD64CondNZ, dis_SCAS, sz, + dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz, guest_RIP_curr_instr, guest_RIP_bbstart+delta, "repne scas", pfx ); - dres->whatNext = Dis_StopHere; + vassert(dres->whatNext == Dis_StopHere); return delta; } /* F3 AE/AF: repe scasb/repe scas{w,l,q} */ if (!haveF2(pfx) && haveF3(pfx)) { if (opc == 0xAE) sz = 1; - dis_REP_op ( AMD64CondZ, dis_SCAS, sz, + dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz, guest_RIP_curr_instr, guest_RIP_bbstart+delta, "repe scas", pfx ); - dres->whatNext = Dis_StopHere; + vassert(dres->whatNext == Dis_StopHere); return delta; } /* AE/AF: scasb/scas{w,l,q} */ @@ -17563,16 +17595,14 @@ Long dis_ESC_NONE ( if (have66orF2orF3(pfx)) goto decode_failure; d64 = getUDisp16(delta); delta += 2; - dis_ret(vbi, d64); - dres->whatNext = Dis_StopHere; + dis_ret(dres, vbi, d64); DIP("ret $%lld\n", d64); return delta; case 0xC3: /* RET */ if (have66orF2(pfx)) goto decode_failure; /* F3 is acceptable on AMD. */ - dis_ret(vbi, 0); - dres->whatNext = Dis_StopHere; + dis_ret(dres, vbi, 0); DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); return delta; @@ -17655,8 +17685,8 @@ Long dis_ESC_NONE ( return delta; case 0xCC: /* INT 3 */ - jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta); - dres->whatNext = Dis_StopHere; + jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta); + vassert(dres->whatNext == Dis_StopHere); DIP("int $0x3\n"); return delta; @@ -17808,7 +17838,7 @@ Long dis_ESC_NONE ( default: vassert(0); } - stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) ); + stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) ); DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64); return delta; @@ -17822,20 +17852,22 @@ Long dis_ESC_NONE ( if (haveASO(pfx)) { /* 32-bit */ stmt( IRStmt_Exit( binop(Iop_CmpEQ64, - unop(Iop_32Uto64, getIReg32(R_RCX)), - mkU64(0)), - Ijk_Boring, - IRConst_U64(d64)) - ); + unop(Iop_32Uto64, getIReg32(R_RCX)), + mkU64(0)), + Ijk_Boring, + IRConst_U64(d64), + OFFB_RIP + )); DIP("jecxz 0x%llx\n", d64); } else { /* 64-bit */ stmt( IRStmt_Exit( binop(Iop_CmpEQ64, getIReg64(R_RCX), mkU64(0)), - Ijk_Boring, - IRConst_U64(d64)) - ); + Ijk_Boring, + IRConst_U64(d64), + OFFB_RIP + )); DIP("jrcxz 0x%llx\n", d64); } return delta; @@ -17953,8 +17985,8 @@ Long dis_ESC_NONE ( dres->whatNext = Dis_ResteerU; dres->continueAt = d64; } else { - jmp_lit(Ijk_Call,d64); - dres->whatNext = Dis_StopHere; + jmp_lit(dres, Ijk_Call, d64); + vassert(dres->whatNext == Dis_StopHere); } DIP("call 0x%llx\n",d64); return delta; @@ -17969,8 +18001,8 @@ Long dis_ESC_NONE ( dres->whatNext = Dis_ResteerU; dres->continueAt = d64; } else { - jmp_lit(Ijk_Boring,d64); - dres->whatNext = Dis_StopHere; + jmp_lit(dres, Ijk_Boring, d64); + vassert(dres->whatNext == Dis_StopHere); } DIP("jmp 0x%llx\n", d64); return delta; @@ -17985,8 +18017,8 @@ Long dis_ESC_NONE ( dres->whatNext = Dis_ResteerU; dres->continueAt = d64; } else { - jmp_lit(Ijk_Boring,d64); - dres->whatNext = Dis_StopHere; + jmp_lit(dres, Ijk_Boring, d64); + vassert(dres->whatNext == Dis_StopHere); } DIP("jmp-8 0x%llx\n", d64); return delta; @@ -18153,8 +18185,8 @@ Long dis_ESC_0F ( /* It's important that all guest state is up-to-date at this point. So we declare an end-of-block here, which forces any cached guest state to be flushed. */ - jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed); - dres->whatNext = Dis_StopHere; + jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed); + vassert(dres->whatNext == Dis_StopHere); DIP("syscall\n"); return delta; @@ -18243,7 +18275,9 @@ Long dis_ESC_0F ( mk_amd64g_calculate_condition( (AMD64Condcode)(1 ^ (opc - 0x80))), Ijk_Boring, - IRConst_U64(guest_RIP_bbstart+delta) ) ); + IRConst_U64(guest_RIP_bbstart+delta), + OFFB_RIP + )); dres->whatNext = Dis_ResteerC; dres->continueAt = d64; comment = "(assumed taken)"; @@ -18262,7 +18296,9 @@ Long dis_ESC_0F ( mk_amd64g_calculate_condition((AMD64Condcode) (opc - 0x80)), Ijk_Boring, - IRConst_U64(d64) ) ); + IRConst_U64(d64), + OFFB_RIP + )); dres->whatNext = Dis_ResteerC; dres->continueAt = guest_RIP_bbstart+delta; comment = "(assumed not taken)"; @@ -18270,10 +18306,9 @@ Long dis_ESC_0F ( else { /* Conservative default translation - end the block at this point. */ - jcc_01( (AMD64Condcode)(opc - 0x80), - guest_RIP_bbstart+delta, - d64 ); - dres->whatNext = Dis_StopHere; + jcc_01( dres, (AMD64Condcode)(opc - 0x80), + guest_RIP_bbstart+delta, d64 ); + vassert(dres->whatNext == Dis_StopHere); } DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment); return delta; @@ -18985,7 +19020,6 @@ Long dis_ESC_0F3A ( static DisResult disInstr_AMD64_WRK ( /*OUT*/Bool* expect_CAS, - Bool put_IP, Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -19015,10 +19049,10 @@ DisResult disInstr_AMD64_WRK ( Prefix pfx = PFX_EMPTY; /* Set result defaults. */ - dres.whatNext = Dis_Continue; - dres.len = 0; - dres.continueAt = 0; - + dres.whatNext = Dis_Continue; + dres.len = 0; + dres.continueAt = 0; + dres.jk_StopHere = Ijk_INVALID; *expect_CAS = False; vassert(guest_RIP_next_assumed == 0); @@ -19028,10 +19062,6 @@ DisResult disInstr_AMD64_WRK ( DIP("\t0x%llx: ", guest_RIP_bbstart+delta); - /* We may be asked to update the guest RIP before going further. */ - if (put_IP) - stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) ); - /* Spot "Special" instructions (see comment at top of file). */ { UChar* code = (UChar*)(guest_code + delta); @@ -19055,8 +19085,8 @@ DisResult disInstr_AMD64_WRK ( /* %RDX = client_request ( %RAX ) */ DIP("%%rdx = client_request ( %%rax )\n"); delta += 19; - jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta); + vassert(dres.whatNext == Dis_StopHere); goto decode_success; } else @@ -19080,8 +19110,8 @@ DisResult disInstr_AMD64_WRK ( assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); putIReg64(R_RSP, mkexpr(t2)); storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta)); - jmp_treg(Ijk_NoRedir,t1); - dres.whatNext = Dis_StopHere; + jmp_treg(&dres, Ijk_NoRedir, t1); + vassert(dres.whatNext == Dis_StopHere); goto decode_success; } /* We don't know what it is. */ @@ -19309,7 +19339,7 @@ DisResult disInstr_AMD64_WRK ( /* It's important that all ArchRegs carry their up-to-date value at this point. So we declare an end-of-block here, which forces any TempRegs caching ArchRegs to be flushed. */ - dres.whatNext = Dis_StopHere; + vassert(dres.whatNext == Dis_StopHere); DIP("int $0x%02x\n", (UInt)d64); break; } @@ -19452,9 +19482,9 @@ DisResult disInstr_AMD64_WRK ( insn, but nevertheless be paranoid and update it again right now. */ stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); - jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr); - dres.whatNext = Dis_StopHere; - dres.len = 0; + jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr); + vassert(dres.whatNext == Dis_StopHere); + dres.len = 0; /* We also need to say that a CAS is not expected now, regardless of what it might have been set to at the start of the function, since the IR that we've emitted just above (to synthesis a @@ -19467,6 +19497,20 @@ DisResult disInstr_AMD64_WRK ( decode_success: /* All decode successes end up here. */ + switch (dres.whatNext) { + case Dis_Continue: + stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); + break; + case Dis_ResteerU: + case Dis_ResteerC: + stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) ); + break; + case Dis_StopHere: + break; + default: + vassert(0); + } + DIP("\n"); dres.len = (Int)toUInt(delta - delta_start); return dres; @@ -19484,7 +19528,6 @@ DisResult disInstr_AMD64_WRK ( is located in host memory at &guest_code[delta]. */ DisResult disInstr_AMD64 ( IRSB* irsb_IN, - Bool put_IP, Bool (*resteerOkFn) ( void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -19514,7 +19557,7 @@ DisResult disInstr_AMD64 ( IRSB* irsb_IN, x1 = irsb_IN->stmts_used; expect_CAS = False; - dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn, + dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, resteerCisOk, callback_opaque, delta, archinfo, abiinfo ); @@ -19547,7 +19590,7 @@ DisResult disInstr_AMD64 ( IRSB* irsb_IN, /* inconsistency detected. re-disassemble the instruction so as to generate a useful error message; then assert. */ vex_traceflags |= VEX_TRACE_FE; - dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn, + dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, resteerCisOk, callback_opaque, delta, archinfo, abiinfo ); diff --git a/VEX/priv/guest_arm_defs.h b/VEX/priv/guest_arm_defs.h index be6dd1cd41..22c35fa74f 100644 --- a/VEX/priv/guest_arm_defs.h +++ b/VEX/priv/guest_arm_defs.h @@ -41,7 +41,6 @@ bb_to_IR.h. */ extern DisResult disInstr_ARM ( IRSB* irbb, - Bool put_IP, Bool (*resteerOkFn) ( void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, diff --git a/VEX/priv/guest_arm_helpers.c b/VEX/priv/guest_arm_helpers.c index a545e34bf6..e588178e46 100644 --- a/VEX/priv/guest_arm_helpers.c +++ b/VEX/priv/guest_arm_helpers.c @@ -937,6 +937,9 @@ UInt LibVEX_GuestARM_get_cpsr ( /*IN*/VexGuestARMState* vex_state ) /* VISIBLE TO LIBVEX CLIENT */ void LibVEX_GuestARM_initialise ( /*OUT*/VexGuestARMState* vex_state ) { + vex_state->host_EvC_FAILADDR = 0; + vex_state->host_EvC_COUNTER = 0; + vex_state->guest_R0 = 0; vex_state->guest_R1 = 0; vex_state->guest_R2 = 0; @@ -1014,8 +1017,6 @@ void LibVEX_GuestARM_initialise ( /*OUT*/VexGuestARMState* vex_state ) vex_state->guest_ITSTATE = 0; vex_state->padding1 = 0; - vex_state->padding2 = 0; - vex_state->padding3 = 0; } diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c index 5935b1e3b3..dcf17871dd 100644 --- a/VEX/priv/guest_arm_toIR.c +++ b/VEX/priv/guest_arm_toIR.c @@ -1398,7 +1398,8 @@ static void mk_skip_over_A32_if_cond_is_false ( stmt( IRStmt_Exit( unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), Ijk_Boring, - IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)) + IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)), + OFFB_R15T )); } @@ -1414,7 +1415,8 @@ static void mk_skip_over_T16_if_cond_is_false ( stmt( IRStmt_Exit( unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), Ijk_Boring, - IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)) + IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)), + OFFB_R15T )); } @@ -1431,7 +1433,8 @@ static void mk_skip_over_T32_if_cond_is_false ( stmt( IRStmt_Exit( unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))), Ijk_Boring, - IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)) + IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)), + OFFB_R15T )); } @@ -1448,7 +1451,8 @@ static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ ) IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(t), mkU32(0)), Ijk_NoDecode, - IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)) + IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)), + OFFB_R15T ) ); } @@ -11962,9 +11966,9 @@ static Bool decode_NV_instruction ( /*MOD*/DisResult* dres, UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1); putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4), IRTemp_INVALID/*because AL*/, Ijk_Boring ); - irsb->next = mkU32(dst); - irsb->jumpkind = Ijk_Call; - dres->whatNext = Dis_StopHere; + llPutIReg(15, mkU32(dst)); + dres->jk_StopHere = Ijk_Call; + dres->whatNext = Dis_StopHere; DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1); return True; } @@ -12040,7 +12044,6 @@ static Bool decode_NV_instruction ( /*MOD*/DisResult* dres, static DisResult disInstr_ARM_WRK ( - Bool put_IP, Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -12066,9 +12069,10 @@ DisResult disInstr_ARM_WRK ( // etc etc /* Set result defaults. */ - dres.whatNext = Dis_Continue; - dres.len = 4; - dres.continueAt = 0; + dres.whatNext = Dis_Continue; + dres.len = 4; + dres.continueAt = 0; + dres.jk_StopHere = Ijk_INVALID; /* Set default actions for post-insn handling of writes to r15, if required. */ @@ -12085,11 +12089,7 @@ DisResult disInstr_ARM_WRK ( DIP("\t(arm) 0x%x: ", (UInt)guest_R15_curr_instr_notENC); - /* We may be asked to update the guest R15 before going further. */ vassert(0 == (guest_R15_curr_instr_notENC & 3)); - if (put_IP) { - llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) ); - } /* ----------------------------------------------------------- */ @@ -12116,9 +12116,9 @@ DisResult disInstr_ARM_WRK ( /* orr r10,r10,r10 */) { /* R3 = client_request ( R4 ) */ DIP("r3 = client_request ( %%r4 )\n"); - irsb->next = mkU32( guest_R15_curr_instr_notENC + 20 ); - irsb->jumpkind = Ijk_ClientReq; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 )); + dres.jk_StopHere = Ijk_ClientReq; + dres.whatNext = Dis_StopHere; goto decode_success; } else @@ -12136,9 +12136,9 @@ DisResult disInstr_ARM_WRK ( /* branch-and-link-to-noredir R4 */ DIP("branch-and-link-to-noredir r4\n"); llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) ); - irsb->next = llGetIReg(4); - irsb->jumpkind = Ijk_NoRedir; - dres.whatNext = Dis_StopHere; + llPutIReg(15, llGetIReg(4)); + dres.jk_StopHere = Ijk_NoRedir; + dres.whatNext = Dis_StopHere; goto decode_success; } /* We don't know what it is. Set opc1/opc2 so decode_failure @@ -12977,9 +12977,9 @@ DisResult disInstr_ARM_WRK ( dres.continueAt = (Addr64)dst; } else { /* no; terminate the SB at this point. */ - irsb->next = mkU32(dst); - irsb->jumpkind = jk; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkU32(dst)); + dres.jk_StopHere = jk; + dres.whatNext = Dis_StopHere; } DIP("b%s 0x%x\n", link ? "l" : "", dst); } else { @@ -13002,7 +13002,8 @@ DisResult disInstr_ARM_WRK ( stmt( IRStmt_Exit( unop(Iop_Not1, unop(Iop_32to1, mkexpr(condT))), Ijk_Boring, - IRConst_U32(guest_R15_curr_instr_notENC+4) )); + IRConst_U32(guest_R15_curr_instr_notENC+4), + OFFB_R15T )); dres.whatNext = Dis_ResteerC; dres.continueAt = (Addr64)(Addr32)dst; comment = "(assumed taken)"; @@ -13021,7 +13022,8 @@ DisResult disInstr_ARM_WRK ( following this one. */ stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)), Ijk_Boring, - IRConst_U32(dst) )); + IRConst_U32(dst), + OFFB_R15T )); dres.whatNext = Dis_ResteerC; dres.continueAt = (Addr64)(Addr32) (guest_R15_curr_instr_notENC+4); @@ -13031,10 +13033,10 @@ DisResult disInstr_ARM_WRK ( /* Conservative default translation - end the block at this point. */ stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)), - jk, IRConst_U32(dst) )); - irsb->next = mkU32(guest_R15_curr_instr_notENC + 4); - irsb->jumpkind = Ijk_Boring; - dres.whatNext = Dis_StopHere; + jk, IRConst_U32(dst), OFFB_R15T )); + llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4)); + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; } DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND), dst, comment); @@ -13065,10 +13067,10 @@ DisResult disInstr_ARM_WRK ( putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4), IRTemp_INVALID/*because AL*/, Ijk_Boring ); } - irsb->next = mkexpr(dst); - irsb->jumpkind = link ? Ijk_Call - : (rM == 14 ? Ijk_Ret : Ijk_Boring); - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkexpr(dst)); + dres.jk_StopHere = link ? Ijk_Call + : (rM == 14 ? Ijk_Ret : Ijk_Boring); + dres.whatNext = Dis_StopHere; if (condT == IRTemp_INVALID) { DIP("b%sx r%u\n", link ? "l" : "", rM); } else { @@ -13363,9 +13365,9 @@ DisResult disInstr_ARM_WRK ( mk_skip_over_A32_if_cond_is_false( condT ); } // AL after here - irsb->next = mkU32( guest_R15_curr_instr_notENC + 4 ); - irsb->jumpkind = Ijk_Sys_syscall; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 )); + dres.jk_StopHere = Ijk_Sys_syscall; + dres.whatNext = Dis_StopHere; DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24); goto decode_success; } @@ -13415,7 +13417,8 @@ DisResult disInstr_ARM_WRK ( } stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)), /*Ijk_NoRedir*/Ijk_Boring, - IRConst_U32(guest_R15_curr_instr_notENC)) ); + IRConst_U32(guest_R15_curr_instr_notENC), + OFFB_R15T )); putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld), IRTemp_INVALID, Ijk_Boring); DIP("swp%s%s r%u, r%u, [r%u]\n", @@ -14142,10 +14145,9 @@ DisResult disInstr_ARM_WRK ( now. */ vassert(0 == (guest_R15_curr_instr_notENC & 3)); llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) ); - irsb->next = mkU32(guest_R15_curr_instr_notENC); - irsb->jumpkind = Ijk_NoDecode; - dres.whatNext = Dis_StopHere; - dres.len = 0; + dres.whatNext = Dis_StopHere; + dres.jk_StopHere = Ijk_NoDecode; + dres.len = 0; return dres; decode_success: @@ -14186,12 +14188,31 @@ DisResult disInstr_ARM_WRK ( binop(Iop_Xor32, mkexpr(r15guard), mkU32(1))), r15kind, - IRConst_U32(guest_R15_curr_instr_notENC + 4) + IRConst_U32(guest_R15_curr_instr_notENC + 4), + OFFB_R15T )); } - irsb->next = llGetIReg(15); - irsb->jumpkind = r15kind; - dres.whatNext = Dis_StopHere; + /* This seems crazy, but we're required to finish the insn with + a write to the guest PC. As usual we rely on ir_opt to tidy + up later. */ + llPutIReg(15, llGetIReg(15)); + dres.whatNext = Dis_StopHere; + dres.jk_StopHere = r15kind; + } else { + /* Set up the end-state in the normal way. */ + switch (dres.whatNext) { + case Dis_Continue: + llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC)); + break; + case Dis_ResteerU: + case Dis_ResteerC: + llPutIReg(15, mkU32(dres.continueAt)); + break; + case Dis_StopHere: + break; + default: + vassert(0); + } } return dres; @@ -14219,7 +14240,6 @@ static const UChar it_length_table[256]; /* fwds */ static DisResult disInstr_THUMB_WRK ( - Bool put_IP, Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -14249,9 +14269,10 @@ DisResult disInstr_THUMB_WRK ( // etc etc /* Set result defaults. */ - dres.whatNext = Dis_Continue; - dres.len = 2; - dres.continueAt = 0; + dres.whatNext = Dis_Continue; + dres.len = 2; + dres.continueAt = 0; + dres.jk_StopHere = Ijk_INVALID; /* Set default actions for post-insn handling of writes to r15, if required. */ @@ -14270,11 +14291,7 @@ DisResult disInstr_THUMB_WRK ( DIP("\t(thumb) 0x%x: ", (UInt)guest_R15_curr_instr_notENC); - /* We may be asked to update the guest R15 before going further. */ vassert(0 == (guest_R15_curr_instr_notENC & 1)); - if (put_IP) { - llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) ); - } /* ----------------------------------------------------------- */ /* Spot "Special" instructions (see comment at top of file). */ @@ -14301,9 +14318,9 @@ DisResult disInstr_THUMB_WRK ( /* orr.w r10,r10,r10 */) { /* R3 = client_request ( R4 ) */ DIP("r3 = client_request ( %%r4 )\n"); - irsb->next = mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ); - irsb->jumpkind = Ijk_ClientReq; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 )); + dres.jk_StopHere = Ijk_ClientReq; + dres.whatNext = Dis_StopHere; goto decode_success; } else @@ -14323,9 +14340,9 @@ DisResult disInstr_THUMB_WRK ( /* branch-and-link-to-noredir R4 */ DIP("branch-and-link-to-noredir r4\n"); llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 )); - irsb->next = getIRegT(4); - irsb->jumpkind = Ijk_NoRedir; - dres.whatNext = Dis_StopHere; + llPutIReg(15, getIRegT(4)); + dres.jk_StopHere = Ijk_NoRedir; + dres.whatNext = Dis_StopHere; goto decode_success; } /* We don't know what it is. Set insn0 so decode_failure @@ -14982,9 +14999,9 @@ DisResult disInstr_THUMB_WRK ( vassert(rM == 15); assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) ); } - irsb->next = mkexpr(dst); - irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkexpr(dst)); + dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring; + dres.whatNext = Dis_StopHere; DIP("bx r%u (possibly switch to ARM mode)\n", rM); goto decode_success; } @@ -15006,9 +15023,9 @@ DisResult disInstr_THUMB_WRK ( assign( dst, getIRegT(rM) ); putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ), IRTemp_INVALID ); - irsb->next = mkexpr(dst); - irsb->jumpkind = Ijk_Call; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkexpr(dst)); + dres.jk_StopHere = Ijk_Call; + dres.whatNext = Dis_StopHere; DIP("blx r%u (possibly switch to ARM mode)\n", rM); goto decode_success; } @@ -15039,9 +15056,9 @@ DisResult disInstr_THUMB_WRK ( // stash pseudo-reg, and back up from that if we have to // restart. // uncond after here - irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ); - irsb->jumpkind = Ijk_Sys_syscall; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 )); + dres.jk_StopHere = Ijk_Sys_syscall; + dres.whatNext = Dis_StopHere; DIP("svc #0x%08x\n", imm8); goto decode_success; } @@ -15121,9 +15138,9 @@ DisResult disInstr_THUMB_WRK ( condT = IRTemp_INVALID; // now uncond /* non-interworking branch */ - irsb->next = binop(Iop_Or32, mkexpr(val), mkU32(1)); - irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring; - dres.whatNext = Dis_StopHere; + llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1))); + dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring; + dres.whatNext = Dis_StopHere; } DIP("mov r%u, r%u\n", rD, rM); goto decode_success; @@ -15178,7 +15195,8 @@ DisResult disInstr_THUMB_WRK ( UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1; stmt(IRStmt_Exit( mkexpr(kond), Ijk_Boring, - IRConst_U32(toUInt(dst)) )); + IRConst_U32(toUInt(dst)), + OFFB_R15T )); DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1); goto decode_success; } @@ -15322,9 +15340,9 @@ DisResult disInstr_THUMB_WRK ( it as is, no need to mess with it. Note, therefore, this is an interworking return. */ if (bitR) { - irsb->next = mkexpr(newPC); - irsb->jumpkind = Ijk_Ret; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkexpr(newPC)); + dres.jk_StopHere = Ijk_Ret; + dres.whatNext = Dis_StopHere; } DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF); @@ -15869,9 +15887,9 @@ DisResult disInstr_THUMB_WRK ( mk_skip_over_T16_if_cond_is_false(condT); condT = IRTemp_INVALID; // now uncond - irsb->next = mkU32( dst | 1 /*CPSR.T*/ ); - irsb->jumpkind = Ijk_Boring; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ )); + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; DIP("b 0x%x\n", dst); goto decode_success; } @@ -15900,11 +15918,12 @@ DisResult disInstr_THUMB_WRK ( assign( kondT, mk_armg_calculate_condition(cond) ); stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)), Ijk_Boring, - IRConst_U32(dst | 1/*CPSR.T*/) )); - irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2) - | 1 /*CPSR.T*/ ); - irsb->jumpkind = Ijk_Boring; - dres.whatNext = Dis_StopHere; + IRConst_U32(dst | 1/*CPSR.T*/), + OFFB_R15T )); + llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) + | 1 /*CPSR.T*/ )); + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; DIP("b%s 0x%x\n", nCC(cond), dst); goto decode_success; } @@ -15982,17 +16001,17 @@ DisResult disInstr_THUMB_WRK ( if (isBL) { /* BL: unconditional T -> T call */ /* we're calling Thumb code, hence "| 1" */ - irsb->next = mkU32( dst | 1 ); + llPutIReg(15, mkU32( dst | 1 )); DIP("bl 0x%x (stay in Thumb mode)\n", dst); } else { /* BLX: unconditional T -> A call */ /* we're calling ARM code, hence "& 3" to align to a valid ARM insn address */ - irsb->next = mkU32( dst & ~3 ); + llPutIReg(15, mkU32( dst & ~3 )); DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3); } - irsb->jumpkind = Ijk_Call; - dres.whatNext = Dis_StopHere; + dres.whatNext = Dis_StopHere; + dres.jk_StopHere = Ijk_Call; goto decode_success; } } @@ -16057,15 +16076,15 @@ DisResult disInstr_THUMB_WRK ( condT = IRTemp_INVALID; // now uncond - /* Generate the IR. This might generate a write to R15, */ + /* Generate the IR. This might generate a write to R15. */ mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList); if (bL == 1 && (regList & (1<<15))) { // If we wrote to R15, we have an interworking return to // deal with. - irsb->next = llGetIReg(15); - irsb->jumpkind = Ijk_Ret; - dres.whatNext = Dis_StopHere; + llPutIReg(15, llGetIReg(15)); + dres.jk_StopHere = Ijk_Ret; + dres.whatNext = Dis_StopHere; } DIP("%sm%c%c r%u%s, {0x%04x}\n", @@ -16930,18 +16949,19 @@ DisResult disInstr_THUMB_WRK ( putIRegT(rT, mkexpr(newRt), IRTemp_INVALID); } - if (loadsPC) { - /* Presumably this is an interworking branch. */ - irsb->next = mkexpr(newRt); - irsb->jumpkind = Ijk_Boring; /* or _Ret ? */ - dres.whatNext = Dis_StopHere; - } - /* Update Rn if necessary. */ if (bW == 1) { vassert(rN != rT); // assured by validity check above putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID); } + + if (loadsPC) { + /* Presumably this is an interworking branch. */ + vassert(rN != 15); // assured by validity check above + llPutIReg(15, mkexpr(newRt)); + dres.jk_StopHere = Ijk_Boring; /* or _Ret ? */ + dres.whatNext = Dis_StopHere; + } } if (bP == 1 && bW == 0) { @@ -17087,9 +17107,9 @@ DisResult disInstr_THUMB_WRK ( if (loadsPC) { /* Presumably this is an interworking branch. */ - irsb->next = mkexpr(newRt); - irsb->jumpkind = Ijk_Boring; /* or _Ret ? */ - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkexpr(newRt)); + dres.jk_StopHere = Ijk_Boring; /* or _Ret ? */ + dres.whatNext = Dis_StopHere; } } @@ -17345,11 +17365,12 @@ DisResult disInstr_THUMB_WRK ( assign( kondT, mk_armg_calculate_condition(cond) ); stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)), Ijk_Boring, - IRConst_U32(dst | 1/*CPSR.T*/) )); - irsb->next = mkU32( (guest_R15_curr_instr_notENC + 4) - | 1 /*CPSR.T*/ ); - irsb->jumpkind = Ijk_Boring; - dres.whatNext = Dis_StopHere; + IRConst_U32(dst | 1/*CPSR.T*/), + OFFB_R15T )); + llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4) + | 1 /*CPSR.T*/ )); + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; DIP("b%s.w 0x%x\n", nCC(cond), dst); goto decode_success; } @@ -17390,9 +17411,9 @@ DisResult disInstr_THUMB_WRK ( // now uncond // branch to dst - irsb->next = mkU32( dst | 1 /*CPSR.T*/ ); - irsb->jumpkind = Ijk_Boring; - dres.whatNext = Dis_StopHere; + llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ )); + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; DIP("b.w 0x%x\n", dst); goto decode_success; } @@ -17423,16 +17444,17 @@ DisResult disInstr_THUMB_WRK ( assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea))); } - irsb->next - = binop(Iop_Or32, - binop(Iop_Add32, - getIRegT(15), - binop(Iop_Shl32, mkexpr(delta), mkU8(1)) - ), - mkU32(1) - ); - irsb->jumpkind = Ijk_Boring; - dres.whatNext = Dis_StopHere; + llPutIReg( + 15, + binop(Iop_Or32, + binop(Iop_Add32, + getIRegT(15), + binop(Iop_Shl32, mkexpr(delta), mkU8(1)) + ), + mkU32(1) + )); + dres.jk_StopHere = Ijk_Boring; + dres.whatNext = Dis_StopHere; DIP("tb%c [r%u, r%u%s]\n", bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : ""); goto decode_success; @@ -18199,60 +18221,29 @@ DisResult disInstr_THUMB_WRK ( now. */ vassert(0 == (guest_R15_curr_instr_notENC & 1)); llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) ); - irsb->next = mkU32(guest_R15_curr_instr_notENC | 1 /* CPSR.T */); - irsb->jumpkind = Ijk_NoDecode; - dres.whatNext = Dis_StopHere; - dres.len = 0; + dres.whatNext = Dis_StopHere; + dres.jk_StopHere = Ijk_NoDecode; + dres.len = 0; return dres; decode_success: /* All decode successes end up here. */ - DIP("\n"); - - vassert(dres.len == 2 || dres.len == 4 || dres.len == 20); - -#if 0 - // XXX is this necessary on Thumb? - /* Now then. Do we have an implicit jump to r15 to deal with? */ - if (r15written) { - /* If we get jump to deal with, we assume that there's been no - other competing branch stuff previously generated for this - insn. That's reasonable, in the sense that the ARM insn set - appears to declare as "Unpredictable" any instruction which - generates more than one possible new value for r15. Hence - just assert. The decoders themselves should check against - all such instructions which are thusly Unpredictable, and - decline to decode them. Hence we should never get here if we - have competing new values for r15, and hence it is safe to - assert here. */ - vassert(dres.whatNext == Dis_Continue); - vassert(irsb->next == NULL); - vassert(irsb->jumpkind == Ijk_Boring); - /* If r15 is unconditionally written, terminate the block by - jumping to it. If it's conditionally written, still - terminate the block (a shame, but we can't do side exits to - arbitrary destinations), but first jump to the next - instruction if the condition doesn't hold. */ - /* We can't use getIRegT(15) to get the destination, since that - will produce r15+4, which isn't what we want. Must use - llGetIReg(15) instead. */ - if (r15guard == IRTemp_INVALID) { - /* unconditional */ - } else { - /* conditional */ - stmt( IRStmt_Exit( - unop(Iop_32to1, - binop(Iop_Xor32, - mkexpr(r15guard), mkU32(1))), - r15kind, - IRConst_U32(guest_R15_curr_instr_notENC + 4) - )); - } - irsb->next = llGetIReg(15); - irsb->jumpkind = r15kind; - dres.whatNext = Dis_StopHere; + vassert(dres.len == 4 || dres.len == 2 || dres.len == 20); + switch (dres.whatNext) { + case Dis_Continue: + llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1))); + break; + case Dis_ResteerU: + case Dis_ResteerC: + llPutIReg(15, mkU32(dres.continueAt)); + break; + case Dis_StopHere: + break; + default: + vassert(0); } -#endif + + DIP("\n"); return dres; @@ -18351,7 +18342,6 @@ static const UChar it_length_table[256] is located in host memory at &guest_code[delta]. */ DisResult disInstr_ARM ( IRSB* irsb_IN, - Bool put_IP, Bool (*resteerOkFn) ( void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -18380,12 +18370,12 @@ DisResult disInstr_ARM ( IRSB* irsb_IN, } if (isThumb) { - dres = disInstr_THUMB_WRK ( put_IP, resteerOkFn, + dres = disInstr_THUMB_WRK ( resteerOkFn, resteerCisOk, callback_opaque, &guest_code_IN[delta_ENCODED - 1], archinfo, abiinfo ); } else { - dres = disInstr_ARM_WRK ( put_IP, resteerOkFn, + dres = disInstr_ARM_WRK ( resteerOkFn, resteerCisOk, callback_opaque, &guest_code_IN[delta_ENCODED], archinfo, abiinfo ); diff --git a/VEX/priv/guest_generic_bb_to_IR.c b/VEX/priv/guest_generic_bb_to_IR.c index 32dca8c4f1..6066fe0ae5 100644 --- a/VEX/priv/guest_generic_bb_to_IR.c +++ b/VEX/priv/guest_generic_bb_to_IR.c @@ -140,6 +140,43 @@ static Bool const_False ( void* callback_opaque, Addr64 a ) { (In fact it's a VgInstrumentClosure.) */ +/* Regarding IP updating. dis_instr_fn (that does the guest specific + work of disassembling an individual instruction) must finish the + resulting IR with "PUT(guest_IP) = ". Hence in all cases it must + state the next instruction address. + + If the block is to be ended at that point, then this routine + (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to + make a transfer (of the right kind) to "GET(guest_IP)". Hence if + dis_instr_fn generates incorrect IP updates we will see it + immediately (due to jumping to the wrong next guest address). + + However it is also necessary to set this up so it can be optimised + nicely. The IRSB exit is defined to update the guest IP, so that + chaining works -- since the chain_me stubs expect the chain-to + address to be in the guest state. Hence what the IRSB next fields + will contain initially is (implicitly) + + PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next] + + which looks pretty strange at first. Eg so unconditional branch + to some address 0x123456 looks like this: + + PUT(guest_IP) = 0x123456; // dis_instr_fn generates this + // the exit + PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring + + after redundant-GET and -PUT removal by iropt, we get what we want: + + // the exit + PUT(guest_IP) [implicitly] = 0x123456; exit-Boring + + This makes the IRSB-end case the same as the side-exit case: update + IP, then transfer. There is no redundancy of representation for + the destination, and we use the destination specified by + dis_instr_fn, so any errors it makes show up sooner. +*/ + IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge, /*OUT*/UInt* n_sc_extents, @@ -155,13 +192,15 @@ IRSB* bb_to_IR ( /*IN*/ IRType guest_word_type, /*IN*/ UInt (*needs_self_check)(void*,VexGuestExtents*), /*IN*/ Bool (*preamble_function)(void*,IRSB*), - /*IN*/ Int offB_TISTART, - /*IN*/ Int offB_TILEN + /*IN*/ Int offB_GUEST_TISTART, + /*IN*/ Int offB_GUEST_TILEN, + /*IN*/ Int offB_GUEST_IP, + /*IN*/ Int szB_GUEST_IP ) { Long delta; Int i, n_instrs, first_stmt_idx; - Bool resteerOK, need_to_put_IP, debug_print; + Bool resteerOK, debug_print; DisResult dres; IRStmt* imark; IRStmt* nop; @@ -185,6 +224,14 @@ IRSB* bb_to_IR ( vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns); vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64); + if (guest_word_type == Ity_I32) { + vassert(szB_GUEST_IP == 4); + vassert((offB_GUEST_IP % 4) == 0); + } else { + vassert(szB_GUEST_IP == 8); + vassert((offB_GUEST_IP % 8) == 0); + } + /* Start a new, empty extent. */ vge->n_used = 1; vge->base[0] = guest_IP_bbstart; @@ -297,13 +344,12 @@ IRSB* bb_to_IR ( ); } - /* for the first insn, the dispatch loop will have set - %IP, but for all the others we have to do it ourselves. */ - need_to_put_IP = toBool(n_instrs > 0); + if (debug_print && n_instrs > 0) + vex_printf("\n"); /* Finally, actually disassemble an instruction. */ + vassert(irsb->next == NULL); dres = dis_instr_fn ( irsb, - need_to_put_IP, resteerOKfn, toBool(n_cond_resteers_allowed > 0), callback_opaque, @@ -347,18 +393,22 @@ IRSB* bb_to_IR ( } } - /* If dis_instr_fn terminated the BB at this point, check it - also filled in the irsb->next field. */ - if (dres.whatNext == Dis_StopHere) { - vassert(irsb->next != NULL); - if (debug_print) { - vex_printf(" "); - vex_printf( "goto {"); - ppIRJumpKind(irsb->jumpkind); - vex_printf( "} "); - ppIRExpr( irsb->next ); - vex_printf( "\n"); - } + /* Individual insn disassembly may not mess with irsb->next. + This function is the only place where it can be set. */ + vassert(irsb->next == NULL); + vassert(irsb->jumpkind == Ijk_Boring); + vassert(irsb->offsIP == 0); + + /* Individual insn disassembly must finish the IR for each + instruction with an assignment to the guest PC. */ + vassert(first_stmt_idx < irsb->stmts_used); + /* it follows that irsb->stmts_used must be > 0 */ + { IRStmt* st = irsb->stmts[irsb->stmts_used-1]; + vassert(st); + vassert(st->tag == Ist_Put); + vassert(st->Ist.Put.offset == offB_GUEST_IP); + /* Really we should also check that the type of the Put'd data + == guest_word_type, but that's a bit expensive. */ } /* Update the VexGuestExtents we are constructing. */ @@ -370,36 +420,38 @@ IRSB* bb_to_IR ( vge->len[vge->n_used-1] = toUShort(toUInt( vge->len[vge->n_used-1] + dres.len )); n_instrs++; - if (debug_print) - vex_printf("\n"); /* Advance delta (inconspicuous but very important :-) */ delta += (Long)dres.len; switch (dres.whatNext) { case Dis_Continue: - vassert(irsb->next == NULL); + vassert(dres.continueAt == 0); + vassert(dres.jk_StopHere == Ijk_INVALID); if (n_instrs < vex_control.guest_max_insns) { /* keep going */ } else { - /* We have to stop. */ - irsb->next - = IRExpr_Const( - guest_word_type == Ity_I32 - ? IRConst_U32(toUInt(guest_IP_bbstart+delta)) - : IRConst_U64(guest_IP_bbstart+delta) - ); + /* We have to stop. See comment above re irsb field + settings here. */ + irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type); + /* irsb->jumpkind must already by Ijk_Boring */ + irsb->offsIP = offB_GUEST_IP; goto done; } break; case Dis_StopHere: - vassert(irsb->next != NULL); + vassert(dres.continueAt == 0); + vassert(dres.jk_StopHere != Ijk_INVALID); + /* See comment above re irsb field settings here. */ + irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type); + irsb->jumpkind = dres.jk_StopHere; + irsb->offsIP = offB_GUEST_IP; goto done; + case Dis_ResteerU: case Dis_ResteerC: /* Check that we actually allowed a resteer .. */ vassert(resteerOK); - vassert(irsb->next == NULL); if (dres.whatNext == Dis_ResteerC) { vassert(n_cond_resteers_allowed > 0); n_cond_resteers_allowed--; @@ -628,10 +680,10 @@ IRSB* bb_to_IR ( = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) ); irsb->stmts[selfcheck_idx + i * 5 + 2] - = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) ); + = IRStmt_Put( offB_GUEST_TISTART, IRExpr_RdTmp(tistart_tmp) ); irsb->stmts[selfcheck_idx + i * 5 + 3] - = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) ); + = IRStmt_Put( offB_GUEST_TILEN, IRExpr_RdTmp(tilen_tmp) ); /* Generate the entry point descriptors */ if (abiinfo_both->host_ppc_calls_use_fndescrs) { @@ -685,11 +737,25 @@ IRSB* bb_to_IR ( /* Where we must restart if there's a failure: at the first extent, regardless of which extent the failure actually happened in. */ - guest_IP_bbstart_IRConst + guest_IP_bbstart_IRConst, + offB_GUEST_IP ); } /* for (i = 0; i < vge->n_used; i++) */ } + /* irsb->next must now be set, since we've finished the block. + Print it if necessary.*/ + vassert(irsb->next != NULL); + if (debug_print) { + vex_printf(" "); + vex_printf( "PUT(%d) = ", irsb->offsIP); + ppIRExpr( irsb->next ); + vex_printf( "; exit-"); + ppIRJumpKind(irsb->jumpkind); + vex_printf( "\n"); + vex_printf( "\n"); + } + return irsb; } diff --git a/VEX/priv/guest_generic_bb_to_IR.h b/VEX/priv/guest_generic_bb_to_IR.h index f623443b47..9c1e7409be 100644 --- a/VEX/priv/guest_generic_bb_to_IR.h +++ b/VEX/priv/guest_generic_bb_to_IR.h @@ -76,6 +76,13 @@ typedef enum { Dis_StopHere, Dis_Continue, Dis_ResteerU, Dis_ResteerC } whatNext; + /* For Dis_StopHere, we need to end the block and create a + transfer to whatever the NIA is. That will have presumably + been set by the IR generated for this insn. So we need to + know the jump kind to use. Should Ijk_INVALID in other Dis_ + cases. */ + IRJumpKind jk_StopHere; + /* For Dis_Resteer, this is the guest address we should continue at. Otherwise ignored (should be zero). */ Addr64 continueAt; @@ -112,10 +119,6 @@ typedef /* This is the IRSB to which the resulting IR is to be appended. */ /*OUT*/ IRSB* irbb, - /* Do we need to generate IR to set the guest IP for this insn, - or not? */ - /*IN*/ Bool put_IP, - /* Return True iff resteering to the given addr is allowed (for branches/calls to destinations that are known at JIT-time) */ /*IN*/ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), @@ -173,8 +176,10 @@ IRSB* bb_to_IR ( /*IN*/ IRType guest_word_type, /*IN*/ UInt (*needs_self_check)(void*,VexGuestExtents*), /*IN*/ Bool (*preamble_function)(void*,IRSB*), - /*IN*/ Int offB_TISTART, - /*IN*/ Int offB_TILEN + /*IN*/ Int offB_GUEST_TISTART, + /*IN*/ Int offB_GUEST_TILEN, + /*IN*/ Int offB_GUEST_IP, + /*IN*/ Int szB_GUEST_IP ); diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 85d3b0c311..d084e2e1f7 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -1500,7 +1500,7 @@ static void gen_SIGBUS_if_misaligned ( IRTemp addr, UChar align ) if (mode64) { vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64); stmt( - IRStmt_Exit( + IRStmt_Exit3( binop(Iop_CmpNE64, binop(Iop_And64, mkexpr(addr), mkU64(align-1)), mkU64(0)), @@ -1511,7 +1511,7 @@ static void gen_SIGBUS_if_misaligned ( IRTemp addr, UChar align ) } else { vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I32); stmt( - IRStmt_Exit( + IRStmt_Exit3( binop(Iop_CmpNE32, binop(Iop_And32, mkexpr(addr), mkU32(align-1)), mkU32(0)), @@ -2690,7 +2690,7 @@ static void putGST_masked ( PPC_GST reg, IRExpr* src, ULong mask ) so that Valgrind's dispatcher sees the warning. */ putGST( PPC_GST_EMWARN, mkU32(ew) ); stmt( - IRStmt_Exit( + IRStmt_Exit3( binop(Iop_CmpNE32, mkU32(ew), mkU32(EmWarn_NONE)), Ijk_EmWarn, mkSzConst( ty, nextInsnAddr()) )); @@ -4975,7 +4975,7 @@ void generate_lsw_sequence ( IRTemp tNBytes, // # bytes, :: Ity_I32 for (i = 0; i < maxBytes; i++) { /* if (nBytes < (i+1)) goto NIA; */ - stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), + stmt( IRStmt_Exit3( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), Ijk_Boring, mkSzConst( ty, nextInsnAddr()) )); /* when crossing into a new dest register, set it to zero. */ @@ -5026,7 +5026,7 @@ void generate_stsw_sequence ( IRTemp tNBytes, // # bytes, :: Ity_I32 for (i = 0; i < maxBytes; i++) { /* if (nBytes < (i+1)) goto NIA; */ - stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), + stmt( IRStmt_Exit3( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)), Ijk_Boring, mkSzConst( ty, nextInsnAddr() ) )); /* check for crossing into a new src register. */ @@ -5301,7 +5301,7 @@ static Bool dis_branch ( UInt theInstr, cond_ok is either zero or nonzero, since that's the cheapest way to compute it. Anding them together gives a value which is either zero or non zero and so that's what we must test - for in the IRStmt_Exit. */ + for in the IRStmt_Exit3. */ assign( ctr_ok, branch_ctr_ok( BO ) ); assign( cond_ok, branch_cond_ok( BO, BI ) ); assign( do_branch, @@ -5316,7 +5316,7 @@ static Bool dis_branch ( UInt theInstr, if (flag_LK) putGST( PPC_GST_LR, e_nia ); - stmt( IRStmt_Exit( + stmt( IRStmt_Exit3( binop(Iop_CmpNE32, mkexpr(do_branch), mkU32(0)), flag_LK ? Ijk_Call : Ijk_Boring, mkSzConst(ty, tgt) ) ); @@ -5351,7 +5351,7 @@ static Bool dis_branch ( UInt theInstr, if (flag_LK) putGST( PPC_GST_LR, e_nia ); - stmt( IRStmt_Exit( + stmt( IRStmt_Exit3( binop(Iop_CmpEQ32, mkexpr(cond_ok), mkU32(0)), Ijk_Boring, c_nia )); @@ -5391,7 +5391,7 @@ static Bool dis_branch ( UInt theInstr, if (flag_LK) putGST( PPC_GST_LR, e_nia ); - stmt( IRStmt_Exit( + stmt( IRStmt_Exit3( binop(Iop_CmpEQ32, mkexpr(do_branch), mkU32(0)), Ijk_Boring, c_nia )); @@ -5558,7 +5558,7 @@ static Bool do_trap ( UChar TO, if ((TO & b11100) == b11100 || (TO & b00111) == b00111) { /* Unconditional trap. Just do the exit without testing the arguments. */ - stmt( IRStmt_Exit( + stmt( IRStmt_Exit3( binop(opCMPEQ, const0, const0), Ijk_SigTRAP, mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia) @@ -5601,7 +5601,7 @@ static Bool do_trap ( UChar TO, tmp = binop(opAND, binop(opCMPORDU, argLe, argRe), const4); cond = binop(opOR, tmp, cond); } - stmt( IRStmt_Exit( + stmt( IRStmt_Exit3( binop(opCMPNE, cond, const0), Ijk_SigTRAP, mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia) diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index d0dc00e744..ca23acb042 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -289,7 +289,7 @@ if_not_condition_goto_computed(IRExpr *condition, IRExpr *target) { vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1); - stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(guest_IA_next_instr))); + stmt(IRStmt_Exit3(condition, Ijk_Boring, IRConst_U64(guest_IA_next_instr))); irsb->next = target; irsb->jumpkind = Ijk_Boring; @@ -303,7 +303,7 @@ if_condition_goto(IRExpr *condition, Addr64 target) { vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1); - stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(target))); + stmt(IRStmt_Exit3(condition, Ijk_Boring, IRConst_U64(target))); dis_res->whatNext = Dis_Continue; } @@ -8869,7 +8869,7 @@ void (*irgen)(IRTemp length, IRTemp start1, IRTemp start2), int lensize) stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), mkU64(guest_IA_curr_instr))); stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4))); - stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval, + stmt(IRStmt_Exit3(mkexpr(cond), Ijk_TInval, IRConst_U64(guest_IA_curr_instr))); ss.bytes = last_execute_target; @@ -8900,7 +8900,7 @@ s390_irgen_EX(UChar r1, IRTemp addr2) stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), mkU64(guest_IA_curr_instr))); stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4))); - stmt(IRStmt_Exit(IRExpr_Const(IRConst_U1(True)), Ijk_TInval, + stmt(IRStmt_Exit3(IRExpr_Const(IRConst_U1(True)), Ijk_TInval, IRConst_U64(guest_IA_curr_instr))); /* we know that this will be invalidated */ irsb->next = mkU64(guest_IA_next_instr); @@ -8958,7 +8958,7 @@ s390_irgen_EX(UChar r1, IRTemp addr2) /* and restart */ stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), mkU64(guest_IA_curr_instr))); stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4))); - stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval, + stmt(IRStmt_Exit3(mkexpr(cond), Ijk_TInval, IRConst_U64(guest_IA_curr_instr))); /* Now comes the actual translation */ @@ -9032,7 +9032,7 @@ s390_irgen_SRST(UChar r1, UChar r2) put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); put_gpr_dw0(r1, mkexpr(next)); put_gpr_dw0(r2, binop(Iop_Add64, mkexpr(address), mkU64(1))); - stmt(IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)), + stmt(IRStmt_Exit3(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)), Ijk_Boring, IRConst_U64(guest_IA_curr_instr))); // >= 256 bytes done CC=3 s390_cc_set(3); @@ -9098,7 +9098,7 @@ s390_irgen_CLST(UChar r1, UChar r2) put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), mkU64(1))); put_gpr_dw0(r2, binop(Iop_Add64, get_gpr_dw0(r2), mkU64(1))); - stmt(IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)), + stmt(IRStmt_Exit3(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)), Ijk_Boring, IRConst_U64(guest_IA_curr_instr))); // >= 256 bytes done CC=3 s390_cc_set(3); @@ -9823,7 +9823,7 @@ s390_irgen_cas_32(UChar r1, UChar r3, IRTemp op2addr) Otherwise, store the old_value from memory in r1 and yield. */ assign(nequal, binop(Iop_CmpNE32, s390_call_calculate_cc(), mkU32(0))); put_gpr_w1(r1, mkite(mkexpr(nequal), mkexpr(old_mem), mkexpr(op1))); - stmt(IRStmt_Exit(mkexpr(nequal), Ijk_Yield, + stmt(IRStmt_Exit3(mkexpr(nequal), Ijk_Yield, IRConst_U64(guest_IA_next_instr))); } @@ -9872,7 +9872,7 @@ s390_irgen_CSG(UChar r1, UChar r3, IRTemp op2addr) Otherwise, store the old_value from memory in r1 and yield. */ assign(nequal, binop(Iop_CmpNE32, s390_call_calculate_cc(), mkU32(0))); put_gpr_dw0(r1, mkite(mkexpr(nequal), mkexpr(old_mem), mkexpr(op1))); - stmt(IRStmt_Exit(mkexpr(nequal), Ijk_Yield, + stmt(IRStmt_Exit3(mkexpr(nequal), Ijk_Yield, IRConst_U64(guest_IA_next_instr))); return "csg"; diff --git a/VEX/priv/guest_x86_defs.h b/VEX/priv/guest_x86_defs.h index 130d84d0f2..e0b15263d9 100644 --- a/VEX/priv/guest_x86_defs.h +++ b/VEX/priv/guest_x86_defs.h @@ -47,7 +47,6 @@ bb_to_IR.h. */ extern DisResult disInstr_X86 ( IRSB* irbb, - Bool put_IP, Bool (*resteerOkFn) ( void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, diff --git a/VEX/priv/guest_x86_helpers.c b/VEX/priv/guest_x86_helpers.c index d14d08b0fb..9f7a8f5c1b 100644 --- a/VEX/priv/guest_x86_helpers.c +++ b/VEX/priv/guest_x86_helpers.c @@ -2670,6 +2670,9 @@ ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, /* VISIBLE TO LIBVEX CLIENT */ void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state ) { + vex_state->host_EvC_FAILADDR = 0; + vex_state->host_EvC_COUNTER = 0; + vex_state->guest_EAX = 0; vex_state->guest_ECX = 0; vex_state->guest_EDX = 0; @@ -2727,8 +2730,6 @@ void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state ) vex_state->guest_IP_AT_SYSCALL = 0; vex_state->padding1 = 0; - vex_state->padding2 = 0; - vex_state->padding3 = 0; } diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index 4b15c61e7c..ee51b43b6b 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -768,7 +768,8 @@ static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, binop( mkSizedOp(tyE,Iop_CasCmpNE8), mkexpr(oldTmp), mkexpr(expTmp) ), Ijk_Boring, /*Ijk_NoRedir*/ - IRConst_U32( restart_point ) + IRConst_U32( restart_point ), + OFFB_EIP )); } @@ -1340,36 +1341,55 @@ static HChar nameISize ( Int size ) /*--- JMP helpers ---*/ /*------------------------------------------------------------*/ -static void jmp_lit( IRJumpKind kind, Addr32 d32 ) +static void jmp_lit( /*MOD*/DisResult* dres, + IRJumpKind kind, Addr32 d32 ) { - irsb->next = mkU32(d32); - irsb->jumpkind = kind; + vassert(dres->whatNext == Dis_Continue); + vassert(dres->len == 0); + vassert(dres->continueAt == 0); + vassert(dres->jk_StopHere == Ijk_INVALID); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = kind; + stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) ); } -static void jmp_treg( IRJumpKind kind, IRTemp t ) +static void jmp_treg( /*MOD*/DisResult* dres, + IRJumpKind kind, IRTemp t ) { - irsb->next = mkexpr(t); - irsb->jumpkind = kind; + vassert(dres->whatNext == Dis_Continue); + vassert(dres->len == 0); + vassert(dres->continueAt == 0); + vassert(dres->jk_StopHere == Ijk_INVALID); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = kind; + stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) ); } static -void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true ) +void jcc_01( /*MOD*/DisResult* dres, + X86Condcode cond, Addr32 d32_false, Addr32 d32_true ) { Bool invert; X86Condcode condPos; + vassert(dres->whatNext == Dis_Continue); + vassert(dres->len == 0); + vassert(dres->continueAt == 0); + vassert(dres->jk_StopHere == Ijk_INVALID); + dres->whatNext = Dis_StopHere; + dres->jk_StopHere = Ijk_Boring; condPos = positiveIse_X86Condcode ( cond, &invert ); if (invert) { stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), Ijk_Boring, - IRConst_U32(d32_false) ) ); - irsb->next = mkU32(d32_true); - irsb->jumpkind = Ijk_Boring; + IRConst_U32(d32_false), + OFFB_EIP ) ); + stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) ); } else { stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), Ijk_Boring, - IRConst_U32(d32_true) ) ); - irsb->next = mkU32(d32_false); - irsb->jumpkind = Ijk_Boring; + IRConst_U32(d32_true), + OFFB_EIP ) ); + stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) ); } } @@ -1450,7 +1470,8 @@ IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual ) IRStmt_Exit( binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), Ijk_MapFail, - IRConst_U32( guest_EIP_curr_instr ) + IRConst_U32( guest_EIP_curr_instr ), + OFFB_EIP ) ); @@ -3009,7 +3030,7 @@ UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK ) /* Group 5 extended opcodes. */ static UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta, - DisResult* dres, Bool* decode_OK ) + /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) { Int len; UChar modrm; @@ -3054,13 +3075,13 @@ UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta, assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); putIReg(4, R_ESP, mkexpr(t2)); storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1)); - jmp_treg(Ijk_Call,t1); - dres->whatNext = Dis_StopHere; + jmp_treg(dres, Ijk_Call, t1); + vassert(dres->whatNext == Dis_StopHere); break; case 4: /* jmp Ev */ vassert(sz == 4); - jmp_treg(Ijk_Boring,t1); - dres->whatNext = Dis_StopHere; + jmp_treg(dres, Ijk_Boring, t1); + vassert(dres->whatNext == Dis_StopHere); break; case 6: /* PUSH Ev */ vassert(sz == 4 || sz == 2); @@ -3110,13 +3131,13 @@ UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta, assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); putIReg(4, R_ESP, mkexpr(t2)); storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len)); - jmp_treg(Ijk_Call,t1); - dres->whatNext = Dis_StopHere; + jmp_treg(dres, Ijk_Call, t1); + vassert(dres->whatNext == Dis_StopHere); break; case 4: /* JMP Ev */ vassert(sz == 4); - jmp_treg(Ijk_Boring,t1); - dres->whatNext = Dis_StopHere; + jmp_treg(dres, Ijk_Boring, t1); + vassert(dres->whatNext == Dis_StopHere); break; case 6: /* PUSH Ev */ vassert(sz == 4 || sz == 2); @@ -3253,7 +3274,8 @@ void dis_SCAS ( Int sz, IRTemp t_inc ) We assume the insn is the last one in the basic block, and so emit a jump to the next insn, rather than just falling through. */ static -void dis_REP_op ( X86Condcode cond, +void dis_REP_op ( /*MOD*/DisResult* dres, + X86Condcode cond, void (*dis_OP)(Int, IRTemp), Int sz, Addr32 eip, Addr32 eip_next, HChar* name ) { @@ -3264,7 +3286,7 @@ void dis_REP_op ( X86Condcode cond, stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)), Ijk_Boring, - IRConst_U32(eip_next) ) ); + IRConst_U32(eip_next), OFFB_EIP ) ); putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); @@ -3272,12 +3294,14 @@ void dis_REP_op ( X86Condcode cond, dis_OP (sz, t_inc); if (cond == X86CondAlways) { - jmp_lit(Ijk_Boring,eip); + jmp_lit(dres, Ijk_Boring, eip); + vassert(dres->whatNext == Dis_StopHere); } else { stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond), Ijk_Boring, - IRConst_U32(eip) ) ); - jmp_lit(Ijk_Boring,eip_next); + IRConst_U32(eip), OFFB_EIP ) ); + jmp_lit(dres, Ijk_Boring, eip_next); + vassert(dres->whatNext == Dis_StopHere); } DIP("%s%c\n", name, nameISize(sz)); } @@ -3958,7 +3982,8 @@ UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), Ijk_EmWarn, - IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) + IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), + OFFB_EIP ) ); @@ -4000,7 +4025,8 @@ UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), Ijk_EmWarn, - IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) + IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), + OFFB_EIP ) ); break; @@ -4948,7 +4974,8 @@ UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), Ijk_EmWarn, - IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) + IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), + OFFB_EIP ) ); @@ -6811,13 +6838,15 @@ void dis_pop_segreg ( UInt sreg, Int sz ) } static -void dis_ret ( UInt d32 ) +void dis_ret ( /*MOD*/DisResult* dres, UInt d32 ) { - IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32); + IRTemp t1 = newTemp(Ity_I32); + IRTemp t2 = newTemp(Ity_I32); assign(t1, getIReg(4,R_ESP)); assign(t2, loadLE(Ity_I32,mkexpr(t1))); putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32))); - jmp_treg(Ijk_Ret,t2); + jmp_treg(dres, Ijk_Ret, t2); + vassert(dres->whatNext == Dis_StopHere); } /*------------------------------------------------------------*/ @@ -7523,7 +7552,8 @@ void set_EFLAGS_from_value ( IRTemp t1, binop(Iop_And32, mkexpr(t1), mkU32(1<<18)), mkU32(0) ), Ijk_EmWarn, - IRConst_U32( next_insn_EIP ) + IRConst_U32( next_insn_EIP ), + OFFB_EIP ) ); } @@ -7700,7 +7730,8 @@ static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)), mkU32(0)), Ijk_SigSEGV, - IRConst_U32(guest_EIP_curr_instr) + IRConst_U32(guest_EIP_curr_instr), + OFFB_EIP ) ); } @@ -7854,7 +7885,6 @@ static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) static DisResult disInstr_X86_WRK ( /*OUT*/Bool* expect_CAS, - Bool put_IP, Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -7893,9 +7923,10 @@ DisResult disInstr_X86_WRK ( Bool pfx_lock = False; /* Set result defaults. */ - dres.whatNext = Dis_Continue; - dres.len = 0; - dres.continueAt = 0; + dres.whatNext = Dis_Continue; + dres.len = 0; + dres.continueAt = 0; + dres.jk_StopHere = Ijk_INVALID; *expect_CAS = False; @@ -7904,10 +7935,6 @@ DisResult disInstr_X86_WRK ( vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr); DIP("\t0x%x: ", guest_EIP_bbstart+delta); - /* We may be asked to update the guest EIP before going further. */ - if (put_IP) - stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) ); - /* Spot "Special" instructions (see comment at top of file). */ { UChar* code = (UChar*)(guest_code + delta); @@ -7926,8 +7953,8 @@ DisResult disInstr_X86_WRK ( /* %EDX = client_request ( %EAX ) */ DIP("%%edx = client_request ( %%eax )\n"); delta += 14; - jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta); + vassert(dres.whatNext == Dis_StopHere); goto decode_success; } else @@ -7949,8 +7976,8 @@ DisResult disInstr_X86_WRK ( assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); putIReg(4, R_ESP, mkexpr(t2)); storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta)); - jmp_treg(Ijk_NoRedir,t1); - dres.whatNext = Dis_StopHere; + jmp_treg(&dres, Ijk_NoRedir, t1); + vassert(dres.whatNext == Dis_StopHere); goto decode_success; } /* We don't know what it is. */ @@ -8537,7 +8564,8 @@ DisResult disInstr_X86_WRK ( IRStmt_Exit( binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), Ijk_EmWarn, - IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) + IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta), + OFFB_EIP ) ); goto decode_success; @@ -12729,7 +12757,8 @@ DisResult disInstr_X86_WRK ( stmt( IRStmt_Exit( binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)), Ijk_Boring, - IRConst_U32(d32) + IRConst_U32(d32), + OFFB_EIP )); DIP("jcxz 0x%x\n", d32); goto decode_success; @@ -12752,13 +12781,11 @@ DisResult disInstr_X86_WRK ( case 0xC2: /* RET imm16 */ d32 = getUDisp16(delta); delta += 2; - dis_ret(d32); - dres.whatNext = Dis_StopHere; + dis_ret(&dres, d32); DIP("ret %d\n", (Int)d32); break; case 0xC3: /* RET */ - dis_ret(0); - dres.whatNext = Dis_StopHere; + dis_ret(&dres, 0); DIP("ret\n"); break; @@ -12782,8 +12809,8 @@ DisResult disInstr_X86_WRK ( /* set %EFLAGS */ set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ ); /* goto new EIP value */ - jmp_treg(Ijk_Ret,t2); - dres.whatNext = Dis_StopHere; + jmp_treg(&dres, Ijk_Ret, t2); + vassert(dres.whatNext == Dis_StopHere); DIP("iret (very kludgey)\n"); break; @@ -12815,8 +12842,8 @@ DisResult disInstr_X86_WRK ( dres.whatNext = Dis_ResteerU; dres.continueAt = (Addr64)(Addr32)d32; } else { - jmp_lit(Ijk_Call,d32); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_Call, d32); + vassert(dres.whatNext == Dis_StopHere); } DIP("call 0x%x\n",d32); } @@ -13060,8 +13087,8 @@ DisResult disInstr_X86_WRK ( /* ------------------------ INT ------------------------ */ case 0xCC: /* INT 3 */ - jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta); + vassert(dres.whatNext == Dis_StopHere); DIP("int $0x3\n"); break; @@ -13082,8 +13109,8 @@ DisResult disInstr_X86_WRK ( This used to handle just 0x40-0x43; Jikes RVM uses a larger range (0x3F-0x49), and this allows some slack as well. */ if (d32 >= 0x3F && d32 <= 0x4F) { - jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2); + vassert(dres.whatNext == Dis_StopHere); DIP("int $0x%x\n", (Int)d32); break; } @@ -13095,24 +13122,24 @@ DisResult disInstr_X86_WRK ( if (d32 == 0x80) { stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, mkU32(guest_EIP_curr_instr) ) ); - jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_Sys_int128, ((Addr32)guest_EIP_bbstart)+delta); + vassert(dres.whatNext == Dis_StopHere); DIP("int $0x80\n"); break; } if (d32 == 0x81) { stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, mkU32(guest_EIP_curr_instr) ) ); - jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_Sys_int129, ((Addr32)guest_EIP_bbstart)+delta); + vassert(dres.whatNext == Dis_StopHere); DIP("int $0x81\n"); break; } if (d32 == 0x82) { stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, mkU32(guest_EIP_curr_instr) ) ); - jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_Sys_int130, ((Addr32)guest_EIP_bbstart)+delta); + vassert(dres.whatNext == Dis_StopHere); DIP("int $0x82\n"); break; } @@ -13129,8 +13156,8 @@ DisResult disInstr_X86_WRK ( dres.whatNext = Dis_ResteerU; dres.continueAt = (Addr64)(Addr32)d32; } else { - jmp_lit(Ijk_Boring,d32); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_Boring, d32); + vassert(dres.whatNext == Dis_StopHere); } DIP("jmp-8 0x%x\n", d32); break; @@ -13143,8 +13170,8 @@ DisResult disInstr_X86_WRK ( dres.whatNext = Dis_ResteerU; dres.continueAt = (Addr64)(Addr32)d32; } else { - jmp_lit(Ijk_Boring,d32); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_Boring, d32); + vassert(dres.whatNext == Dis_StopHere); } DIP("jmp 0x%x\n", d32); break; @@ -13185,7 +13212,8 @@ DisResult disInstr_X86_WRK ( stmt( IRStmt_Exit( mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))), Ijk_Boring, - IRConst_U32(guest_EIP_bbstart+delta) ) ); + IRConst_U32(guest_EIP_bbstart+delta), + OFFB_EIP ) ); dres.whatNext = Dis_ResteerC; dres.continueAt = (Addr64)(Addr32)d32; comment = "(assumed taken)"; @@ -13204,7 +13232,8 @@ DisResult disInstr_X86_WRK ( stmt( IRStmt_Exit( mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)), Ijk_Boring, - IRConst_U32(d32) ) ); + IRConst_U32(d32), + OFFB_EIP ) ); dres.whatNext = Dis_ResteerC; dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta); comment = "(assumed not taken)"; @@ -13212,9 +13241,9 @@ DisResult disInstr_X86_WRK ( else { /* Conservative default translation - end the block at this point. */ - jcc_01( (X86Condcode)(opc - 0x70), + jcc_01( &dres, (X86Condcode)(opc - 0x70), (Addr32)(guest_EIP_bbstart+delta), d32); - dres.whatNext = Dis_StopHere; + vassert(dres.whatNext == Dis_StopHere); } DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment); break; @@ -13227,7 +13256,8 @@ DisResult disInstr_X86_WRK ( stmt( IRStmt_Exit( binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)), Ijk_Boring, - IRConst_U32(d32) + IRConst_U32(d32), + OFFB_EIP )); DIP("jecxz 0x%x\n", d32); break; @@ -13268,7 +13298,7 @@ DisResult disInstr_X86_WRK ( default: vassert(0); } - stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) ); + stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) ); DIP("loop%s 0x%x\n", xtra, d32); break; @@ -13948,33 +13978,32 @@ DisResult disInstr_X86_WRK ( abyte = getIByte(delta); delta++; if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } - dres.whatNext = Dis_StopHere; switch (abyte) { /* According to the Intel manual, "repne movs" should never occur, but * in practice it has happened, so allow for it here... */ case 0xA4: sz = 1; /* REPNE MOVS */ case 0xA5: - dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig, - guest_EIP_bbstart+delta, "repne movs" ); + dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig, + guest_EIP_bbstart+delta, "repne movs" ); break; case 0xA6: sz = 1; /* REPNE CMP */ case 0xA7: - dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig, - guest_EIP_bbstart+delta, "repne cmps" ); + dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig, + guest_EIP_bbstart+delta, "repne cmps" ); break; case 0xAA: sz = 1; /* REPNE STOS */ case 0xAB: - dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig, - guest_EIP_bbstart+delta, "repne stos" ); + dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig, + guest_EIP_bbstart+delta, "repne stos" ); break; case 0xAE: sz = 1; /* REPNE SCAS */ case 0xAF: - dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig, - guest_EIP_bbstart+delta, "repne scas" ); + dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig, + guest_EIP_bbstart+delta, "repne scas" ); break; default: @@ -13991,37 +14020,36 @@ DisResult disInstr_X86_WRK ( abyte = getIByte(delta); delta++; if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } - dres.whatNext = Dis_StopHere; switch (abyte) { case 0xA4: sz = 1; /* REP MOVS */ case 0xA5: - dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig, - guest_EIP_bbstart+delta, "rep movs" ); + dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig, + guest_EIP_bbstart+delta, "rep movs" ); break; case 0xA6: sz = 1; /* REPE CMP */ case 0xA7: - dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig, - guest_EIP_bbstart+delta, "repe cmps" ); + dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig, + guest_EIP_bbstart+delta, "repe cmps" ); break; case 0xAA: sz = 1; /* REP STOS */ case 0xAB: - dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig, - guest_EIP_bbstart+delta, "rep stos" ); + dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig, + guest_EIP_bbstart+delta, "rep stos" ); break; case 0xAC: sz = 1; /* REP LODS */ case 0xAD: - dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig, - guest_EIP_bbstart+delta, "rep lods" ); + dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig, + guest_EIP_bbstart+delta, "rep lods" ); break; case 0xAE: sz = 1; /* REPE SCAS */ case 0xAF: - dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig, - guest_EIP_bbstart+delta, "repe scas" ); + dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig, + guest_EIP_bbstart+delta, "repe scas" ); break; case 0x90: /* REP NOP (PAUSE) */ @@ -14029,13 +14057,12 @@ DisResult disInstr_X86_WRK ( DIP("rep nop (P4 pause)\n"); /* "observe" the hint. The Vex client needs to be careful not to cause very long delays as a result, though. */ - jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta); + vassert(dres.whatNext == Dis_StopHere); break; case 0xC3: /* REP RET -- same as normal ret? */ - dis_ret(0); - dres.whatNext = Dis_StopHere; + dis_ret(&dres, 0); DIP("rep ret\n"); break; @@ -14741,7 +14768,8 @@ DisResult disInstr_X86_WRK ( mk_x86g_calculate_condition((X86Condcode) (1 ^ (opc - 0x80))), Ijk_Boring, - IRConst_U32(guest_EIP_bbstart+delta) ) ); + IRConst_U32(guest_EIP_bbstart+delta), + OFFB_EIP ) ); dres.whatNext = Dis_ResteerC; dres.continueAt = (Addr64)(Addr32)d32; comment = "(assumed taken)"; @@ -14760,7 +14788,8 @@ DisResult disInstr_X86_WRK ( stmt( IRStmt_Exit( mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)), Ijk_Boring, - IRConst_U32(d32) ) ); + IRConst_U32(d32), + OFFB_EIP ) ); dres.whatNext = Dis_ResteerC; dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta); comment = "(assumed not taken)"; @@ -14768,9 +14797,9 @@ DisResult disInstr_X86_WRK ( else { /* Conservative default translation - end the block at this point. */ - jcc_01( (X86Condcode)(opc - 0x80), + jcc_01( &dres, (X86Condcode)(opc - 0x80), (Addr32)(guest_EIP_bbstart+delta), d32); - dres.whatNext = Dis_StopHere; + vassert(dres.whatNext == Dis_StopHere); } DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment); break; @@ -14896,8 +14925,8 @@ DisResult disInstr_X86_WRK ( point if the syscall needs to be restarted. */ stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, mkU32(guest_EIP_curr_instr) ) ); - jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/); + vassert(dres.whatNext == Dis_StopHere); DIP("sysenter"); break; @@ -15073,8 +15102,8 @@ DisResult disInstr_X86_WRK ( insn, but nevertheless be paranoid and update it again right now. */ stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) ); - jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr); - dres.whatNext = Dis_StopHere; + jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr); + vassert(dres.whatNext == Dis_StopHere); dres.len = 0; /* We also need to say that a CAS is not expected now, regardless of what it might have been set to at the start of the function, @@ -15088,6 +15117,20 @@ DisResult disInstr_X86_WRK ( decode_success: /* All decode successes end up here. */ + switch (dres.whatNext) { + case Dis_Continue: + stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) ); + break; + case Dis_ResteerU: + case Dis_ResteerC: + stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) ); + break; + case Dis_StopHere: + break; + default: + vassert(0); + } + DIP("\n"); dres.len = delta - delta_start; return dres; @@ -15105,7 +15148,6 @@ DisResult disInstr_X86_WRK ( is located in host memory at &guest_code[delta]. */ DisResult disInstr_X86 ( IRSB* irsb_IN, - Bool put_IP, Bool (*resteerOkFn) ( void*, Addr64 ), Bool resteerCisOk, void* callback_opaque, @@ -15131,7 +15173,7 @@ DisResult disInstr_X86 ( IRSB* irsb_IN, x1 = irsb_IN->stmts_used; expect_CAS = False; - dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn, + dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn, resteerCisOk, callback_opaque, delta, archinfo, abiinfo ); @@ -15151,7 +15193,7 @@ DisResult disInstr_X86 ( IRSB* irsb_IN, /* inconsistency detected. re-disassemble the instruction so as to generate a useful error message; then assert. */ vex_traceflags |= VEX_TRACE_FE; - dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn, + dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn, resteerCisOk, callback_opaque, delta, archinfo, abiinfo ); diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 8b97772a94..479a0c56b7 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -118,13 +118,6 @@ HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); } HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); } HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); } -//.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } -//.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } -//.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } -//.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } -//.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } -//.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } -//.. HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); } HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); } HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); } @@ -231,18 +224,6 @@ AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { return am; } -//.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) { -//.. switch (am->tag) { -//.. case Xam_IR: -//.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); -//.. case Xam_IRRS: -//.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, -//.. am->Xam.IRRS.index, am->Xam.IRRS.shift ); -//.. default: -//.. vpanic("dopyAMD64AMode"); -//.. } -//.. } - void ppAMD64AMode ( AMD64AMode* am ) { switch (am->tag) { case Aam_IR: @@ -538,10 +519,6 @@ HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) { HChar* showA87FpOp ( A87FpOp op ) { switch (op) { -//.. case Xfp_ADD: return "add"; -//.. case Xfp_SUB: return "sub"; -//.. case Xfp_MUL: return "mul"; -//.. case Xfp_DIV: return "div"; case Afp_SCALE: return "scale"; case Afp_ATAN: return "atan"; case Afp_YL2X: return "yl2x"; @@ -549,9 +526,6 @@ HChar* showA87FpOp ( A87FpOp op ) { case Afp_PREM: return "prem"; case Afp_PREM1: return "prem1"; case Afp_SQRT: return "sqrt"; -//.. case Xfp_ABS: return "abs"; -//.. case Xfp_NEG: return "chs"; -//.. case Xfp_MOV: return "mov"; case Afp_SIN: return "sin"; case Afp_COS: return "cos"; case Afp_TAN: return "tan"; @@ -717,16 +691,6 @@ AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) { vassert(sz == 4 || sz == 8); return i; } -//.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_Sh3232; -//.. i->Xin.Sh3232.op = op; -//.. i->Xin.Sh3232.amt = amt; -//.. i->Xin.Sh3232.src = src; -//.. i->Xin.Sh3232.dst = dst; -//.. vassert(op == Xsh_SHL || op == Xsh_SHR); -//.. return i; -//.. } AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) { AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); i->tag = Ain_Push; @@ -742,14 +706,37 @@ AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) vassert(regparms >= 0 && regparms <= 6); return i; } -AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) { - AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); - i->tag = Ain_Goto; - i->Ain.Goto.cond = cond; - i->Ain.Goto.dst = dst; - i->Ain.Goto.jk = jk; + +AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP, + AMD64CondCode cond, Bool toFastEP ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_XDirect; + i->Ain.XDirect.dstGA = dstGA; + i->Ain.XDirect.amRIP = amRIP; + i->Ain.XDirect.cond = cond; + i->Ain.XDirect.toFastEP = toFastEP; + return i; +} +AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP, + AMD64CondCode cond ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_XIndir; + i->Ain.XIndir.dstGA = dstGA; + i->Ain.XIndir.amRIP = amRIP; + i->Ain.XIndir.cond = cond; + return i; +} +AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP, + AMD64CondCode cond, IRJumpKind jk ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_XAssisted; + i->Ain.XAssisted.dstGA = dstGA; + i->Ain.XAssisted.amRIP = amRIP; + i->Ain.XAssisted.cond = cond; + i->Ain.XAssisted.jk = jk; return i; } + AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) { AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); i->tag = Ain_CMov64; @@ -863,72 +850,12 @@ AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr ) i->Ain.A87StSW.addr = addr; return i; } - -//.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_FpUnary; -//.. i->Xin.FpUnary.op = op; -//.. i->Xin.FpUnary.src = src; -//.. i->Xin.FpUnary.dst = dst; -//.. return i; -//.. } -//.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_FpBinary; -//.. i->Xin.FpBinary.op = op; -//.. i->Xin.FpBinary.srcL = srcL; -//.. i->Xin.FpBinary.srcR = srcR; -//.. i->Xin.FpBinary.dst = dst; -//.. return i; -//.. } -//.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_FpLdSt; -//.. i->Xin.FpLdSt.isLoad = isLoad; -//.. i->Xin.FpLdSt.sz = sz; -//.. i->Xin.FpLdSt.reg = reg; -//.. i->Xin.FpLdSt.addr = addr; -//.. vassert(sz == 4 || sz == 8); -//.. return i; -//.. } -//.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, -//.. HReg reg, AMD64AMode* addr ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_FpLdStI; -//.. i->Xin.FpLdStI.isLoad = isLoad; -//.. i->Xin.FpLdStI.sz = sz; -//.. i->Xin.FpLdStI.reg = reg; -//.. i->Xin.FpLdStI.addr = addr; -//.. vassert(sz == 2 || sz == 4 || sz == 8); -//.. return i; -//.. } -//.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_Fp64to32; -//.. i->Xin.Fp64to32.src = src; -//.. i->Xin.Fp64to32.dst = dst; -//.. return i; -//.. } -//.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_FpCMov; -//.. i->Xin.FpCMov.cond = cond; -//.. i->Xin.FpCMov.src = src; -//.. i->Xin.FpCMov.dst = dst; -//.. vassert(cond != Xcc_ALWAYS); -//.. return i; -//.. } AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) { AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); i->tag = Ain_LdMXCSR; i->Ain.LdMXCSR.addr = addr; return i; } -//.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_FpStSW_AX; -//.. return i; -//.. } AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) { AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); i->tag = Ain_SseUComIS; @@ -970,15 +897,6 @@ AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ) i->Ain.SseSDSS.dst = dst; return i; } - -//.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) { -//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); -//.. i->tag = Xin_SseConst; -//.. i->Xin.SseConst.con = con; -//.. i->Xin.SseConst.dst = dst; -//.. vassert(hregClass(dst) == HRcVec128); -//.. return i; -//.. } AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg reg, AMD64AMode* addr ) { AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); @@ -1062,6 +980,19 @@ AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) { vassert(order >= 0 && order <= 0xFF); return i; } +AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter, + AMD64AMode* amFailAddr ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_EvCheck; + i->Ain.EvCheck.amCounter = amCounter; + i->Ain.EvCheck.amFailAddr = amFailAddr; + return i; +} +AMD64Instr* AMD64Instr_ProfInc ( void ) { + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_ProfInc; + return i; +} void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) { @@ -1121,16 +1052,6 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) showAMD64ScalarSz(i->Ain.Div.sz)); ppAMD64RM(i->Ain.Div.src); return; -//.. case Xin_Sh3232: -//.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op)); -//.. if (i->Xin.Sh3232.amt == 0) -//.. vex_printf(" %%cl,"); -//.. else -//.. vex_printf(" $%d,", i->Xin.Sh3232.amt); -//.. ppHRegAMD64(i->Xin.Sh3232.src); -//.. vex_printf(","); -//.. ppHRegAMD64(i->Xin.Sh3232.dst); -//.. return; case Ain_Push: vex_printf("pushq "); ppAMD64RMI(i->Ain.Push.src); @@ -1142,25 +1063,41 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) i->Ain.Call.regparms ); vex_printf("0x%llx", i->Ain.Call.target); break; - case Ain_Goto: - if (i->Ain.Goto.cond != Acc_ALWAYS) { - vex_printf("if (%%rflags.%s) { ", - showAMD64CondCode(i->Ain.Goto.cond)); - } - if (i->Ain.Goto.jk != Ijk_Boring - && i->Ain.Goto.jk != Ijk_Call - && i->Ain.Goto.jk != Ijk_Ret) { - vex_printf("movl $"); - ppIRJumpKind(i->Ain.Goto.jk); - vex_printf(",%%ebp ; "); - } + + case Ain_XDirect: + vex_printf("(xDirect) "); + vex_printf("if (%%rflags.%s) { ", + showAMD64CondCode(i->Ain.XDirect.cond)); + vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA); + vex_printf("movq %%r11,"); + ppAMD64AMode(i->Ain.XDirect.amRIP); + vex_printf("; "); + vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }", + i->Ain.XDirect.toFastEP ? "fast" : "slow"); + return; + case Ain_XIndir: + vex_printf("(xIndir) "); + vex_printf("if (%%rflags.%s) { ", + showAMD64CondCode(i->Ain.XIndir.cond)); vex_printf("movq "); - ppAMD64RI(i->Ain.Goto.dst); - vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx"); - if (i->Ain.Goto.cond != Acc_ALWAYS) { - vex_printf(" }"); - } + ppHRegAMD64(i->Ain.XIndir.dstGA); + vex_printf(","); + ppAMD64AMode(i->Ain.XIndir.amRIP); + vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }"); + return; + case Ain_XAssisted: + vex_printf("(xAssisted) "); + vex_printf("if (%%rflags.%s) { ", + showAMD64CondCode(i->Ain.XAssisted.cond)); + vex_printf("movq "); + ppHRegAMD64(i->Ain.XAssisted.dstGA); + vex_printf(","); + ppAMD64AMode(i->Ain.XAssisted.amRIP); + vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp", + (Int)i->Ain.XAssisted.jk); + vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }"); return; + case Ain_CMov64: vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond)); ppAMD64RM(i->Ain.CMov64.src); @@ -1241,67 +1178,6 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) vex_printf("fstsw "); ppAMD64AMode(i->Ain.A87StSW.addr); break; -//.. case Xin_FpUnary: -//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op)); -//.. ppHRegAMD64(i->Xin.FpUnary.src); -//.. vex_printf(","); -//.. ppHRegAMD64(i->Xin.FpUnary.dst); -//.. break; -//.. case Xin_FpBinary: -//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op)); -//.. ppHRegAMD64(i->Xin.FpBinary.srcL); -//.. vex_printf(","); -//.. ppHRegAMD64(i->Xin.FpBinary.srcR); -//.. vex_printf(","); -//.. ppHRegAMD64(i->Xin.FpBinary.dst); -//.. break; -//.. case Xin_FpLdSt: -//.. if (i->Xin.FpLdSt.isLoad) { -//.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); -//.. ppAMD64AMode(i->Xin.FpLdSt.addr); -//.. vex_printf(", "); -//.. ppHRegAMD64(i->Xin.FpLdSt.reg); -//.. } else { -//.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F'); -//.. ppHRegAMD64(i->Xin.FpLdSt.reg); -//.. vex_printf(", "); -//.. ppAMD64AMode(i->Xin.FpLdSt.addr); -//.. } -//.. return; -//.. case Xin_FpLdStI: -//.. if (i->Xin.FpLdStI.isLoad) { -//.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : -//.. i->Xin.FpLdStI.sz==4 ? "l" : "w"); -//.. ppAMD64AMode(i->Xin.FpLdStI.addr); -//.. vex_printf(", "); -//.. ppHRegAMD64(i->Xin.FpLdStI.reg); -//.. } else { -//.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : -//.. i->Xin.FpLdStI.sz==4 ? "l" : "w"); -//.. ppHRegAMD64(i->Xin.FpLdStI.reg); -//.. vex_printf(", "); -//.. ppAMD64AMode(i->Xin.FpLdStI.addr); -//.. } -//.. return; -//.. case Xin_Fp64to32: -//.. vex_printf("gdtof "); -//.. ppHRegAMD64(i->Xin.Fp64to32.src); -//.. vex_printf(","); -//.. ppHRegAMD64(i->Xin.Fp64to32.dst); -//.. return; -//.. case Xin_FpCMov: -//.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond)); -//.. ppHRegAMD64(i->Xin.FpCMov.src); -//.. vex_printf(","); -//.. ppHRegAMD64(i->Xin.FpCMov.dst); -//.. return; -//.. case Xin_FpLdStCW: -//.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw "); -//.. ppAMD64AMode(i->Xin.FpLdStCW.addr); -//.. return; -//.. case Xin_FpStSW_AX: -//.. vex_printf("fstsw %%ax"); -//.. return; case Ain_LdMXCSR: vex_printf("ldmxcsr "); ppAMD64AMode(i->Ain.LdMXCSR.addr); @@ -1334,10 +1210,6 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) vex_printf(","); ppHRegAMD64(i->Ain.SseSDSS.dst); break; -//.. case Xin_SseConst: -//.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); -//.. ppHRegAMD64(i->Xin.SseConst.dst); -//.. break; case Ain_SseLdSt: switch (i->Ain.SseLdSt.sz) { case 4: vex_printf("movss "); break; @@ -1403,7 +1275,16 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) vex_printf(","); ppHRegAMD64(i->Ain.SseShuf.dst); return; - + case Ain_EvCheck: + vex_printf("(evCheck) decl "); + ppAMD64AMode(i->Ain.EvCheck.amCounter); + vex_printf("; jns nofail; jmp *"); + ppAMD64AMode(i->Ain.EvCheck.amFailAddr); + vex_printf("; nofail:"); + return; + case Ain_ProfInc: + vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)"); + return; default: vpanic("ppAMD64Instr"); } @@ -1470,12 +1351,6 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) addHRegUse(u, HRmModify, hregAMD64_RAX()); addHRegUse(u, HRmModify, hregAMD64_RDX()); return; -//.. case Xin_Sh3232: -//.. addHRegUse(u, HRmRead, i->Xin.Sh3232.src); -//.. addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); -//.. if (i->Xin.Sh3232.amt == 0) -//.. addHRegUse(u, HRmRead, hregAMD64_ECX()); -//.. return; case Ain_Push: addRegUsage_AMD64RMI(u, i->Ain.Push.src); addHRegUse(u, HRmModify, hregAMD64_RSP()); @@ -1533,16 +1408,25 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) /* Upshot of this is that the assembler really must use r11, and no other, as a destination temporary. */ return; - case Ain_Goto: - addRegUsage_AMD64RI(u, i->Ain.Goto.dst); - addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */ - addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */ - if (i->Ain.Goto.jk != Ijk_Boring - && i->Ain.Goto.jk != Ijk_Call - && i->Ain.Goto.jk != Ijk_Ret) - /* note, this is irrelevant since rbp is not actually - available to the allocator. But still .. */ - addHRegUse(u, HRmWrite, hregAMD64_RBP()); + /* XDirect/XIndir/XAssisted are also a bit subtle. They + conditionally exit the block. Hence we only need to list (1) + the registers that they read, and (2) the registers that they + write in the case where the block is not exited. (2) is + empty, hence only (1) is relevant here. */ + case Ain_XDirect: + /* Don't bother to mention the write to %r11, since it is not + available to the allocator. */ + addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP); + return; + case Ain_XIndir: + /* Ditto re %r11 */ + addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA); + addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP); + return; + case Ain_XAssisted: + /* Ditto re %r11 and %rbp (the baseblock ptr) */ + addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA); + addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP); return; case Ain_CMov64: addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead); @@ -1594,39 +1478,9 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) case Ain_A87StSW: addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr); return; -//.. case Xin_FpUnary: -//.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src); -//.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); -//.. return; -//.. case Xin_FpBinary: -//.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); -//.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); -//.. addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); -//.. return; -//.. case Xin_FpLdSt: -//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr); -//.. addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, -//.. i->Xin.FpLdSt.reg); -//.. return; -//.. case Xin_FpLdStI: -//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr); -//.. addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, -//.. i->Xin.FpLdStI.reg); -//.. return; -//.. case Xin_Fp64to32: -//.. addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); -//.. addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); -//.. return; -//.. case Xin_FpCMov: -//.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src); -//.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); -//.. return; case Ain_LdMXCSR: addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr); return; -//.. case Xin_FpStSW_AX: -//.. addHRegUse(u, HRmWrite, hregAMD64_EAX()); -//.. return; case Ain_SseUComIS: addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL); addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR); @@ -1653,9 +1507,6 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr); addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg); return; -//.. case Xin_SseConst: -//.. addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); -//.. return; case Ain_Sse32Fx4: vassert(i->Ain.Sse32Fx4.op != Asse_MOV); unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF @@ -1716,6 +1567,15 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->Ain.SseShuf.src); addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst); return; + case Ain_EvCheck: + /* We expect both amodes only to mention %rbp, so this is in + fact pointless, since %rbp isn't allocatable, but anyway.. */ + addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter); + addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr); + return; + case Ain_ProfInc: + addHRegUse(u, HRmWrite, hregAMD64_R11()); + return; default: ppAMD64Instr(i, mode64); vpanic("getRegUsage_AMD64Instr"); @@ -1766,17 +1626,21 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) case Ain_Div: mapRegs_AMD64RM(m, i->Ain.Div.src); return; -//.. case Xin_Sh3232: -//.. mapReg(m, &i->Xin.Sh3232.src); -//.. mapReg(m, &i->Xin.Sh3232.dst); -//.. return; case Ain_Push: mapRegs_AMD64RMI(m, i->Ain.Push.src); return; case Ain_Call: return; - case Ain_Goto: - mapRegs_AMD64RI(m, i->Ain.Goto.dst); + case Ain_XDirect: + mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP); + return; + case Ain_XIndir: + mapReg(m, &i->Ain.XIndir.dstGA); + mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP); + return; + case Ain_XAssisted: + mapReg(m, &i->Ain.XAssisted.dstGA); + mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP); return; case Ain_CMov64: mapRegs_AMD64RM(m, i->Ain.CMov64.src); @@ -1822,36 +1686,9 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) case Ain_A87StSW: mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr); return; -//.. case Xin_FpUnary: -//.. mapReg(m, &i->Xin.FpUnary.src); -//.. mapReg(m, &i->Xin.FpUnary.dst); -//.. return; -//.. case Xin_FpBinary: -//.. mapReg(m, &i->Xin.FpBinary.srcL); -//.. mapReg(m, &i->Xin.FpBinary.srcR); -//.. mapReg(m, &i->Xin.FpBinary.dst); -//.. return; -//.. case Xin_FpLdSt: -//.. mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr); -//.. mapReg(m, &i->Xin.FpLdSt.reg); -//.. return; -//.. case Xin_FpLdStI: -//.. mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr); -//.. mapReg(m, &i->Xin.FpLdStI.reg); -//.. return; -//.. case Xin_Fp64to32: -//.. mapReg(m, &i->Xin.Fp64to32.src); -//.. mapReg(m, &i->Xin.Fp64to32.dst); -//.. return; -//.. case Xin_FpCMov: -//.. mapReg(m, &i->Xin.FpCMov.src); -//.. mapReg(m, &i->Xin.FpCMov.dst); -//.. return; case Ain_LdMXCSR: mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr); return; -//.. case Xin_FpStSW_AX: -//.. return; case Ain_SseUComIS: mapReg(m, &i->Ain.SseUComIS.srcL); mapReg(m, &i->Ain.SseUComIS.srcR); @@ -1869,9 +1706,6 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) mapReg(m, &i->Ain.SseSDSS.src); mapReg(m, &i->Ain.SseSDSS.dst); return; -//.. case Xin_SseConst: -//.. mapReg(m, &i->Xin.SseConst.dst); -//.. return; case Ain_SseLdSt: mapReg(m, &i->Ain.SseLdSt.reg); mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr); @@ -1908,6 +1742,15 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) mapReg(m, &i->Ain.SseShuf.src); mapReg(m, &i->Ain.SseShuf.dst); return; + case Ain_EvCheck: + /* We expect both amodes only to mention %rbp, so this is in + fact pointless, since %rbp isn't allocatable, but anyway.. */ + mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter); + mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr); + return; + case Ain_ProfInc: + /* hardwires r11 -- nothing to modify. */ + return; default: ppAMD64Instr(i, mode64); vpanic("mapRegs_AMD64Instr"); @@ -2252,101 +2095,19 @@ static UChar* do_ffree_st ( UChar* p, Int n ) return p; } -//.. /* Emit fstp %st(i), 1 <= i <= 7 */ -//.. static UChar* do_fstp_st ( UChar* p, Int i ) -//.. { -//.. vassert(1 <= i && i <= 7); -//.. *p++ = 0xDD; -//.. *p++ = 0xD8+i; -//.. return p; -//.. } -//.. -//.. /* Emit fld %st(i), 0 <= i <= 6 */ -//.. static UChar* do_fld_st ( UChar* p, Int i ) -//.. { -//.. vassert(0 <= i && i <= 6); -//.. *p++ = 0xD9; -//.. *p++ = 0xC0+i; -//.. return p; -//.. } -//.. -//.. /* Emit f %st(0) */ -//.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op ) -//.. { -//.. switch (op) { -//.. case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; -//.. case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; -//.. case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; -//.. case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; -//.. case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; -//.. case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; -//.. case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; -//.. case Xfp_MOV: break; -//.. case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ -//.. *p++ = 0xD9; *p++ = 0xF2; /* fptan */ -//.. *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ -//.. break; -//.. default: vpanic("do_fop1_st: unknown op"); -//.. } -//.. return p; -//.. } -//.. -//.. /* Emit f %st(i), 1 <= i <= 5 */ -//.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i ) -//.. { -//.. # define fake(_n) mkHReg((_n), HRcInt32, False) -//.. Int subopc; -//.. switch (op) { -//.. case Xfp_ADD: subopc = 0; break; -//.. case Xfp_SUB: subopc = 4; break; -//.. case Xfp_MUL: subopc = 1; break; -//.. case Xfp_DIV: subopc = 6; break; -//.. default: vpanic("do_fop2_st: unknown op"); -//.. } -//.. *p++ = 0xD8; -//.. p = doAMode_R(p, fake(subopc), fake(i)); -//.. return p; -//.. # undef fake -//.. } -//.. -//.. /* Push a 32-bit word on the stack. The word depends on tags[3:0]; -//.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. -//.. */ -//.. static UChar* push_word_from_tags ( UChar* p, UShort tags ) -//.. { -//.. UInt w; -//.. vassert(0 == (tags & ~0xF)); -//.. if (tags == 0) { -//.. /* pushl $0x00000000 */ -//.. *p++ = 0x6A; -//.. *p++ = 0x00; -//.. } -//.. else -//.. /* pushl $0xFFFFFFFF */ -//.. if (tags == 0xF) { -//.. *p++ = 0x6A; -//.. *p++ = 0xFF; -//.. } else { -//.. vassert(0); /* awaiting test case */ -//.. w = 0; -//.. if (tags & 1) w |= 0x000000FF; -//.. if (tags & 2) w |= 0x0000FF00; -//.. if (tags & 4) w |= 0x00FF0000; -//.. if (tags & 8) w |= 0xFF000000; -//.. *p++ = 0x68; -//.. p = emit32(p, w); -//.. } -//.. return p; -//.. } - /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is - imperative to emit position-independent code. */ + imperative to emit position-independent code. If the emitted + instruction was a profiler inc, set *is_profInc to True, else + leave it unchanged. */ -Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, +Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, AMD64Instr* i, Bool mode64, - void* dispatch_unassisted, - void* dispatch_assisted ) + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ) { UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; UInt xtra; @@ -2545,35 +2306,6 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, goto bad; } } -//.. /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not -//.. allowed here. */ -//.. opc = subopc_imm = opc_imma = 0; -//.. switch (i->Xin.Alu32M.op) { -//.. case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; -//.. case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; -//.. default: goto bad; -//.. } -//.. switch (i->Xin.Alu32M.src->tag) { -//.. case Xri_Reg: -//.. *p++ = opc; -//.. p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, -//.. i->Xin.Alu32M.dst); -//.. goto done; -//.. case Xri_Imm: -//.. if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { -//.. *p++ = 0x83; -//.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); -//.. *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32; -//.. goto done; -//.. } else { -//.. *p++ = 0x81; -//.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); -//.. p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); -//.. goto done; -//.. } -//.. default: -//.. goto bad; -//.. } break; case Ain_Sh64: @@ -2756,21 +2488,6 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, } break; -//.. case Xin_Sh3232: -//.. vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); -//.. if (i->Xin.Sh3232.amt == 0) { -//.. /* shldl/shrdl by %cl */ -//.. *p++ = 0x0F; -//.. if (i->Xin.Sh3232.op == Xsh_SHL) { -//.. *p++ = 0xA5; -//.. } else { -//.. *p++ = 0xAD; -//.. } -//.. p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); -//.. goto done; -//.. } -//.. break; - case Ain_Push: switch (i->Ain.Push.src->tag) { case Armi_Mem: @@ -2822,117 +2539,167 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, goto done; } - case Ain_Goto: { - void* dispatch_to_use = NULL; - vassert(dispatch_unassisted != NULL); - vassert(dispatch_assisted != NULL); + case Ain_XDirect: { + /* NB: what goes on here has to be very closely coordinated with the + chainXDirect_AMD64 and unchainXDirect_AMD64 below. */ + /* We're generating chain-me requests here, so we need to be + sure this is actually allowed -- no-redir translations can't + use chain-me's. Hence: */ + vassert(disp_cp_chain_me_to_slowEP != NULL); + vassert(disp_cp_chain_me_to_fastEP != NULL); + + HReg r11 = hregAMD64_R11(); /* Use ptmp for backpatching conditional jumps. */ ptmp = NULL; /* First off, if this is conditional, create a conditional jump over the rest of it. */ - if (i->Ain.Goto.cond != Acc_ALWAYS) { + if (i->Ain.XDirect.cond != Acc_ALWAYS) { /* jmp fwds if !condition */ - *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1)); + *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1))); ptmp = p; /* fill in this bit later */ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ } - /* If a non-boring, set %rbp (the guest state pointer) - appropriately. Since these numbers are all small positive - integers, we can get away with "movl $N, %ebp" rather than - the longer "movq $N, %rbp". Also, decide which dispatcher we - need to use. */ - dispatch_to_use = dispatch_assisted; - - /* movl $magic_number, %ebp */ - switch (i->Ain.Goto.jk) { - case Ijk_ClientReq: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break; - case Ijk_Sys_syscall: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break; - case Ijk_Sys_int32: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SYS_INT32); break; - case Ijk_Yield: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_YIELD); break; - case Ijk_EmWarn: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_EMWARN); break; - case Ijk_MapFail: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_MAPFAIL); break; - case Ijk_NoDecode: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_NODECODE); break; - case Ijk_TInval: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_TINVAL); break; - case Ijk_NoRedir: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_NOREDIR); break; - case Ijk_SigTRAP: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SIGTRAP); break; - case Ijk_SigSEGV: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SIGSEGV); break; - case Ijk_Ret: - case Ijk_Call: - case Ijk_Boring: - dispatch_to_use = dispatch_unassisted; - break; - default: - ppIRJumpKind(i->Ain.Goto.jk); - vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind"); + /* Update the guest RIP. */ + /* movabsq $dstGA, %r11 */ + *p++ = 0x49; + *p++ = 0xBB; + p = emit64(p, i->Ain.XDirect.dstGA); + /* movq %r11, amRIP */ + *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP); + *p++ = 0x89; + p = doAMode_M(p, r11, i->Ain.XDirect.amRIP); + + /* --- FIRST PATCHABLE BYTE follows --- */ + /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling + to) backs up the return address, so as to find the address of + the first patchable byte. So: don't change the length of the + two instructions below. */ + /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */ + *p++ = 0x49; + *p++ = 0xBB; + void* disp_cp_chain_me + = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP + : disp_cp_chain_me_to_slowEP; + p = emit64(p, Ptr_to_ULong(disp_cp_chain_me)); + /* call *%r11 */ + *p++ = 0x41; + *p++ = 0xFF; + *p++ = 0xD3; + /* --- END of PATCHABLE BYTES --- */ + + /* Fix up the conditional jump, if there was one. */ + if (i->Ain.XDirect.cond != Acc_ALWAYS) { + Int delta = p - ptmp; + vassert(delta > 0 && delta < 40); + *ptmp = toUChar(delta-1); } + goto done; + } - /* Get the destination address into %rax */ - if (i->Ain.Goto.dst->tag == Ari_Imm) { - /* movl sign-ext($immediate), %rax ; ret */ - *p++ = 0x48; - *p++ = 0xC7; - *p++ = 0xC0; - p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32); - } else { - vassert(i->Ain.Goto.dst->tag == Ari_Reg); - /* movq %reg, %rax ; ret */ - if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) { - *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX()); - *p++ = 0x89; - p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX()); - } + case Ain_XIndir: { + /* We're generating transfers that could lead indirectly to a + chain-me, so we need to be sure this is actually allowed -- + no-redir translations are not allowed to reach normal + translations without going through the scheduler. That means + no XDirects or XIndirs out from no-redir translations. + Hence: */ + vassert(disp_cp_xindir != NULL); + + /* Use ptmp for backpatching conditional jumps. */ + ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. */ + if (i->Ain.XIndir.cond != Acc_ALWAYS) { + /* jmp fwds if !condition */ + *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1))); + ptmp = p; /* fill in this bit later */ + *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ } - /* Get the dispatcher address into %rdx. This has to happen - after the load of %rax since %rdx might be carrying the value - destined for %rax immediately prior to this Ain_Goto. */ - vassert(sizeof(ULong) == sizeof(void*)); + /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */ + *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP); + *p++ = 0x89; + p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP); + /* movabsq $disp_indir, %r11 */ + *p++ = 0x49; + *p++ = 0xBB; + p = emit64(p, Ptr_to_ULong(disp_cp_xindir)); + /* jmp *%r11 */ + *p++ = 0x41; + *p++ = 0xFF; + *p++ = 0xE3; - if (fitsIn32Bits(Ptr_to_ULong(dispatch_to_use))) { - /* movl sign-extend(imm32), %rdx */ - *p++ = 0x48; - *p++ = 0xC7; - *p++ = 0xC2; - p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use)); - } else { - /* movabsq $imm64, %rdx */ - *p++ = 0x48; - *p++ = 0xBA; - p = emit64(p, Ptr_to_ULong(dispatch_to_use)); + /* Fix up the conditional jump, if there was one. */ + if (i->Ain.XIndir.cond != Acc_ALWAYS) { + Int delta = p - ptmp; + vassert(delta > 0 && delta < 40); + *ptmp = toUChar(delta-1); + } + goto done; + } + + case Ain_XAssisted: { + /* Use ptmp for backpatching conditional jumps. */ + ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. */ + if (i->Ain.XAssisted.cond != Acc_ALWAYS) { + /* jmp fwds if !condition */ + *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1))); + ptmp = p; /* fill in this bit later */ + *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ } - /* jmp *%rdx */ + + /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */ + *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP); + *p++ = 0x89; + p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP); + /* movl $magic_number, %ebp. Since these numbers are all small positive + integers, we can get away with "movl $N, %ebp" rather than + the longer "movq $N, %rbp". */ + UInt trcval = 0; + switch (i->Ain.XAssisted.jk) { + case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; + case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; + case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break; + case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; + case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; + case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; + case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; + case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; + case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; + case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; + case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; + /* We don't expect to see the following being assisted. */ + case Ijk_Ret: + case Ijk_Call: + /* fallthrough */ + default: + ppIRJumpKind(i->Ain.XAssisted.jk); + vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind"); + } + vassert(trcval != 0); + *p++ = 0xBD; + p = emit32(p, trcval); + /* movabsq $disp_assisted, %r11 */ + *p++ = 0x49; + *p++ = 0xBB; + p = emit64(p, Ptr_to_ULong(disp_cp_xassisted)); + /* jmp *%r11 */ + *p++ = 0x41; *p++ = 0xFF; - *p++ = 0xE2; + *p++ = 0xE3; /* Fix up the conditional jump, if there was one. */ - if (i->Ain.Goto.cond != Acc_ALWAYS) { + if (i->Ain.XAssisted.cond != Acc_ALWAYS) { Int delta = p - ptmp; - vassert(delta > 0 && delta < 30); + vassert(delta > 0 && delta < 40); *ptmp = toUChar(delta-1); } goto done; @@ -3164,165 +2931,6 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, } break; -//.. case Xin_FpUnary: -//.. /* gop %src, %dst -//.. --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) -//.. */ -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src)); -//.. p = do_fop1_st(p, i->Xin.FpUnary.op); -//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst)); -//.. goto done; -//.. -//.. case Xin_FpBinary: -//.. if (i->Xin.FpBinary.op == Xfp_YL2X -//.. || i->Xin.FpBinary.op == Xfp_YL2XP1) { -//.. /* Have to do this specially. */ -//.. /* ffree %st7 ; fld %st(srcL) ; -//.. ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); -//.. *p++ = 0xD9; -//.. *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9; -//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); -//.. goto done; -//.. } -//.. if (i->Xin.FpBinary.op == Xfp_ATAN) { -//.. /* Have to do this specially. */ -//.. /* ffree %st7 ; fld %st(srcL) ; -//.. ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); -//.. *p++ = 0xD9; *p++ = 0xF3; -//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); -//.. goto done; -//.. } -//.. if (i->Xin.FpBinary.op == Xfp_PREM -//.. || i->Xin.FpBinary.op == Xfp_PREM1 -//.. || i->Xin.FpBinary.op == Xfp_SCALE) { -//.. /* Have to do this specially. */ -//.. /* ffree %st7 ; fld %st(srcR) ; -//.. ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; -//.. fincstp ; ffree %st7 */ -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR)); -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL)); -//.. *p++ = 0xD9; -//.. switch (i->Xin.FpBinary.op) { -//.. case Xfp_PREM: *p++ = 0xF8; break; -//.. case Xfp_PREM1: *p++ = 0xF5; break; -//.. case Xfp_SCALE: *p++ = 0xFD; break; -//.. default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)"); -//.. } -//.. p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst)); -//.. *p++ = 0xD9; *p++ = 0xF7; -//.. p = do_ffree_st7(p); -//.. goto done; -//.. } -//.. /* General case */ -//.. /* gop %srcL, %srcR, %dst -//.. --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) -//.. */ -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); -//.. p = do_fop2_st(p, i->Xin.FpBinary.op, -//.. 1+hregNumber(i->Xin.FpBinary.srcR)); -//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); -//.. goto done; -//.. -//.. case Xin_FpLdSt: -//.. vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8); -//.. if (i->Xin.FpLdSt.isLoad) { -//.. /* Load from memory into %fakeN. -//.. --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1) -//.. */ -//.. p = do_ffree_st7(p); -//.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD; -//.. p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); -//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg)); -//.. goto done; -//.. } else { -//.. /* Store from %fakeN into memory. -//.. --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode -//.. */ -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg)); -//.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD; -//.. p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); -//.. goto done; -//.. } -//.. break; -//.. -//.. case Xin_FpLdStI: -//.. if (i->Xin.FpLdStI.isLoad) { -//.. /* Load from memory into %fakeN, converting from an int. -//.. --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) -//.. */ -//.. switch (i->Xin.FpLdStI.sz) { -//.. case 8: opc = 0xDF; subopc_imm = 5; break; -//.. case 4: opc = 0xDB; subopc_imm = 0; break; -//.. case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; -//.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)"); -//.. } -//.. p = do_ffree_st7(p); -//.. *p++ = opc; -//.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); -//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg)); -//.. goto done; -//.. } else { -//.. /* Store from %fakeN into memory, converting to an int. -//.. --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode -//.. */ -//.. switch (i->Xin.FpLdStI.sz) { -//.. case 8: opc = 0xDF; subopc_imm = 7; break; -//.. case 4: opc = 0xDB; subopc_imm = 3; break; -//.. case 2: opc = 0xDF; subopc_imm = 3; break; -//.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)"); -//.. } -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg)); -//.. *p++ = opc; -//.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); -//.. goto done; -//.. } -//.. break; -//.. -//.. case Xin_Fp64to32: -//.. /* ffree %st7 ; fld %st(src) */ -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src)); -//.. /* subl $4, %esp */ -//.. *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; -//.. /* fstps (%esp) */ -//.. *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; -//.. /* flds (%esp) */ -//.. *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; -//.. /* addl $4, %esp */ -//.. *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; -//.. /* fstp %st(1+dst) */ -//.. p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst)); -//.. goto done; -//.. -//.. case Xin_FpCMov: -//.. /* jmp fwds if !condition */ -//.. *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1); -//.. *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ -//.. ptmp = p; -//.. -//.. /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ -//.. p = do_ffree_st7(p); -//.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src)); -//.. p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst)); -//.. -//.. /* Fill in the jump offset. */ -//.. *(ptmp-1) = p - ptmp; -//.. goto done; - case Ain_LdMXCSR: *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr)); *p++ = 0x0F; @@ -3330,12 +2938,6 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr); goto done; -//.. case Xin_FpStSW_AX: -//.. /* note, this emits fnstsw %ax, not fstsw %ax */ -//.. *p++ = 0xDF; -//.. *p++ = 0xE0; -//.. goto done; - case Ain_SseUComIS: /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */ /* ucomi[sd] %srcL, %srcR */ @@ -3395,45 +2997,6 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, vreg2ireg(i->Ain.SseSDSS.src) ); goto done; -//.. -//.. case Xin_FpCmp: -//.. /* gcmp %fL, %fR, %dst -//.. -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; -//.. fnstsw %ax ; movl %eax, %dst -//.. */ -//.. /* ffree %st7 */ -//.. p = do_ffree_st7(p); -//.. /* fpush %fL */ -//.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL)); -//.. /* fucomp %(fR+1) */ -//.. *p++ = 0xDD; -//.. *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))); -//.. /* fnstsw %ax */ -//.. *p++ = 0xDF; -//.. *p++ = 0xE0; -//.. /* movl %eax, %dst */ -//.. *p++ = 0x89; -//.. p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst); -//.. goto done; -//.. -//.. case Xin_SseConst: { -//.. UShort con = i->Xin.SseConst.con; -//.. p = push_word_from_tags(p, (con >> 12) & 0xF); -//.. p = push_word_from_tags(p, (con >> 8) & 0xF); -//.. p = push_word_from_tags(p, (con >> 4) & 0xF); -//.. p = push_word_from_tags(p, con & 0xF); -//.. /* movl (%esp), %xmm-dst */ -//.. *p++ = 0x0F; -//.. *p++ = 0x10; -//.. *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)); -//.. *p++ = 0x24; -//.. /* addl $16, %esp */ -//.. *p++ = 0x83; -//.. *p++ = 0xC4; -//.. *p++ = 0x10; -//.. goto done; -//.. } - case Ain_SseLdSt: if (i->Ain.SseLdSt.sz == 8) { *p++ = 0xF2; @@ -3505,8 +3068,6 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, case Asse_MAXF: *p++ = 0x5F; break; case Asse_MINF: *p++ = 0x5D; break; case Asse_MULF: *p++ = 0x59; break; -//.. case Xsse_RCPF: *p++ = 0x53; break; -//.. case Xsse_RSQRTF: *p++ = 0x52; break; case Asse_SQRTF: *p++ = 0x51; break; case Asse_SUBF: *p++ = 0x5C; break; case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; @@ -3563,8 +3124,6 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, case Asse_MAXF: *p++ = 0x5F; break; case Asse_MINF: *p++ = 0x5D; break; case Asse_MULF: *p++ = 0x59; break; -//.. case Xsse_RCPF: *p++ = 0x53; break; -//.. case Xsse_RSQRTF: *p++ = 0x52; break; case Asse_SQRTF: *p++ = 0x51; break; case Asse_SUBF: *p++ = 0x5C; break; case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; @@ -3680,6 +3239,70 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, *p++ = (UChar)(i->Ain.SseShuf.order); goto done; + case Ain_EvCheck: { + /* We generate: + (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER) + (2 bytes) jns nofail expected taken + (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR) + nofail: + */ + /* This is heavily asserted re instruction lengths. It needs to + be. If we get given unexpected forms of .amCounter or + .amFailAddr -- basically, anything that's not of the form + uimm7(%rbp) -- they are likely to fail. */ + /* Note also that after the decl we must be very careful not to + read the carry flag, else we get a partial flags stall. + js/jns avoids that, though. */ + UChar* p0 = p; + /* --- decl 8(%rbp) --- */ + /* Need to compute the REX byte for the decl in order to prove + that we don't need it, since this is a 32-bit inc and all + registers involved in the amode are < r8. "fake(1)" because + there's no register in this encoding; instead the register + field is used as a sub opcode. The encoding for "decl r/m32" + is FF /1, hence the fake(1). */ + rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter)); + if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */ + *p++ = 0xFF; + p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter); + vassert(p - p0 == 3); + /* --- jns nofail --- */ + *p++ = 0x79; + *p++ = 0x03; /* need to check this 0x03 after the next insn */ + vassert(p - p0 == 5); + /* --- jmp* 0(%rbp) --- */ + /* Once again, verify we don't need REX. The encoding is FF /4. + We don't need REX.W since by default FF /4 in 64-bit mode + implies a 64 bit load. */ + rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr)); + if (rex != 0x40) goto bad; + *p++ = 0xFF; + p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr); + vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ + /* And crosscheck .. */ + vassert(evCheckSzB_AMD64() == 8); + goto done; + } + + case Ain_ProfInc: { + /* We generate movabsq $0, %r11 + incq (%r11) + in the expectation that a later call to LibVEX_patchProfCtr + will be used to fill in the immediate field once the right + value is known. + 49 BB 00 00 00 00 00 00 00 00 + 49 FF 03 + */ + *p++ = 0x49; *p++ = 0xBB; + *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; + *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; + *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03; + /* Tell the caller .. */ + vassert(!(*is_profInc)); + *is_profInc = True; + goto done; + } + default: goto bad; } @@ -3696,6 +3319,200 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, # undef fake } + +/* How big is an event check? See case for Ain_EvCheck in + emit_AMD64Instr just above. That crosschecks what this returns, so + we can tell if we're inconsistent. */ +Int evCheckSzB_AMD64 ( void ) +{ + return 8; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange chainXDirect_AMD64 ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ) +{ + /* What we're expecting to see is: + movabsq $disp_cp_chain_me_EXPECTED, %r11 + call *%r11 + viz + 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED> + 41 FF D3 + */ + UChar* p = (UChar*)place_to_chain; + vassert(p[0] == 0x49); + vassert(p[1] == 0xBB); + vassert(*(ULong*)(&p[2]) == Ptr_to_ULong(disp_cp_chain_me_EXPECTED)); + vassert(p[10] == 0x41); + vassert(p[11] == 0xFF); + vassert(p[12] == 0xD3); + /* And what we want to change it to is either: + (general case): + movabsq $place_to_jump_to, %r11 + jmpq *%r11 + viz + 49 BB <8 bytes value == place_to_jump_to> + 41 FF E3 + So it's the same length (convenient, huh) and we don't + need to change all the bits. + ---OR--- + in the case where the displacement falls within 32 bits + jmpq disp32 where disp32 is relative to the next insn + ud2; ud2; ud2; ud2 + viz + E9 <4 bytes == disp32> + 0F 0B 0F 0B 0F 0B 0F 0B + + In both cases the replacement has the same length as the original. + To remain sane & verifiable, + (1) limit the displacement for the short form to + (say) +/- one billion, so as to avoid wraparound + off-by-ones + (2) even if the short form is applicable, once every (say) + 1024 times use the long form anyway, so as to maintain + verifiability + */ + /* This is the delta we need to put into a JMP d32 insn. It's + relative to the start of the next insn, hence the -5. */ + Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5; + Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000; + + static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */ + if (shortOK) { + shortCTR++; // thread safety bleh + if (0 == (shortCTR & 0x3FF)) { + shortOK = False; + if (0) + vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, " + "using long jmp\n", shortCTR); + } + } + + /* And make the modifications. */ + if (shortOK) { + p[0] = 0xE9; + p[1] = (delta >> 0) & 0xFF; + p[2] = (delta >> 8) & 0xFF; + p[3] = (delta >> 16) & 0xFF; + p[4] = (delta >> 24) & 0xFF; + p[5] = 0x0F; p[6] = 0x0B; + p[7] = 0x0F; p[8] = 0x0B; + p[9] = 0x0F; p[10] = 0x0B; + p[11] = 0x0F; p[12] = 0x0B; + /* sanity check on the delta -- top 32 are all 0 or all 1 */ + delta >>= 32; + vassert(delta == 0LL || delta == -1LL); + } else { + /* Minimal modifications from the starting sequence. */ + *(ULong*)(&p[2]) = Ptr_to_ULong(place_to_jump_to); + p[12] = 0xE3; + } + VexInvalRange vir = {0, 0}; + return vir; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ) +{ + /* What we're expecting to see is either: + (general case) + movabsq $place_to_jump_to_EXPECTED, %r11 + jmpq *%r11 + viz + 49 BB <8 bytes value == place_to_jump_to_EXPECTED> + 41 FF E3 + ---OR--- + in the case where the displacement falls within 32 bits + jmpq d32 + ud2; ud2; ud2; ud2 + viz + E9 <4 bytes == disp32> + 0F 0B 0F 0B 0F 0B 0F 0B + */ + UChar* p = (UChar*)place_to_unchain; + Bool valid = False; + if (p[0] == 0x49 && p[1] == 0xBB + && *(ULong*)(&p[2]) == Ptr_to_ULong(place_to_jump_to_EXPECTED) + && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) { + /* it's the long form */ + valid = True; + } + else + if (p[0] == 0xE9 + && p[5] == 0x0F && p[6] == 0x0B + && p[7] == 0x0F && p[8] == 0x0B + && p[9] == 0x0F && p[10] == 0x0B + && p[11] == 0x0F && p[12] == 0x0B) { + /* It's the short form. Check the offset is right. */ + Int s32 = *(Int*)(&p[1]); + Long s64 = (Long)s32; + if ((UChar*)p + 5 + s64 == (UChar*)place_to_jump_to_EXPECTED) { + valid = True; + if (0) + vex_printf("QQQ unchainXDirect_AMD64: found short form\n"); + } + } + vassert(valid); + /* And what we want to change it to is: + movabsq $disp_cp_chain_me, %r11 + call *%r11 + viz + 49 BB <8 bytes value == disp_cp_chain_me> + 41 FF D3 + So it's the same length (convenient, huh). + */ + p[0] = 0x49; + p[1] = 0xBB; + *(ULong*)(&p[2]) = Ptr_to_ULong(disp_cp_chain_me); + p[10] = 0x41; + p[11] = 0xFF; + p[12] = 0xD3; + VexInvalRange vir = {0, 0}; + return vir; +} + + +/* Patch the counter address into a profile inc point, as previously + created by the Ain_ProfInc case for emit_AMD64Instr. */ +VexInvalRange patchProfInc_AMD64 ( void* place_to_patch, + ULong* location_of_counter ) +{ + vassert(sizeof(ULong*) == 8); + UChar* p = (UChar*)place_to_patch; + vassert(p[0] == 0x49); + vassert(p[1] == 0xBB); + vassert(p[2] == 0x00); + vassert(p[3] == 0x00); + vassert(p[4] == 0x00); + vassert(p[5] == 0x00); + vassert(p[6] == 0x00); + vassert(p[7] == 0x00); + vassert(p[8] == 0x00); + vassert(p[9] == 0x00); + vassert(p[10] == 0x49); + vassert(p[11] == 0xFF); + vassert(p[12] == 0x03); + ULong imm64 = (ULong)Ptr_to_ULong(location_of_counter); + p[2] = imm64 & 0xFF; imm64 >>= 8; + p[3] = imm64 & 0xFF; imm64 >>= 8; + p[4] = imm64 & 0xFF; imm64 >>= 8; + p[5] = imm64 & 0xFF; imm64 >>= 8; + p[6] = imm64 & 0xFF; imm64 >>= 8; + p[7] = imm64 & 0xFF; imm64 >>= 8; + p[8] = imm64 & 0xFF; imm64 >>= 8; + p[9] = imm64 & 0xFF; imm64 >>= 8; + VexInvalRange vir = {0, 0}; + return vir; +} + + /*---------------------------------------------------------------*/ /*--- end host_amd64_defs.c ---*/ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 4e7ae05e54..bc63bd2f7f 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -363,10 +363,11 @@ typedef Ain_Alu32R, /* 32-bit add/sub/and/or/xor/cmp, dst=REG (a la Alu64R) */ Ain_MulL, /* widening multiply */ Ain_Div, /* div and mod */ -//.. Xin_Sh3232, /* shldl or shrdl */ Ain_Push, /* push 64-bit value on stack */ Ain_Call, /* call to address in register */ - Ain_Goto, /* conditional/unconditional jmp to dst */ + Ain_XDirect, /* direct transfer to GA */ + Ain_XIndir, /* indirect transfer to GA */ + Ain_XAssisted, /* assisted transfer to GA */ Ain_CMov64, /* conditional move */ Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top half */ Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */ @@ -377,28 +378,17 @@ typedef Ain_ACAS, /* 8/16/32/64-bit lock;cmpxchg */ Ain_DACAS, /* lock;cmpxchg8b/16b (doubleword ACAS, 2 x 32-bit or 2 x 64-bit only) */ - Ain_A87Free, /* free up x87 registers */ Ain_A87PushPop, /* x87 loads/stores */ Ain_A87FpOp, /* x87 operations */ Ain_A87LdCW, /* load x87 control word */ Ain_A87StSW, /* store x87 status word */ -//.. -//.. Xin_FpUnary, /* FP fake unary op */ -//.. Xin_FpBinary, /* FP fake binary op */ -//.. Xin_FpLdSt, /* FP fake load/store */ -//.. Xin_FpLdStI, /* FP fake load/store, converting to/from Int */ -//.. Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single */ -//.. Xin_FpCMov, /* FP fake floating point conditional move */ Ain_LdMXCSR, /* load %mxcsr */ -//.. Xin_FpStSW_AX, /* fstsw %ax */ Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int register */ Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */ Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */ Ain_SseSDSS, /* scalar float32 to/from float64 */ -//.. -//.. Xin_SseConst, /* Generate restricted SSE literal */ Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment constraints, upper 96/64/0 bits arbitrary */ Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */ @@ -408,7 +398,9 @@ typedef Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */ Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */ Ain_SseCMov, /* SSE conditional move */ - Ain_SseShuf /* SSE2 shuffle (pshufd) */ + Ain_SseShuf, /* SSE2 shuffle (pshufd) */ + Ain_EvCheck, /* Event check */ + Ain_ProfInc /* 64-bit profile counter increment */ } AMD64InstrTag; @@ -470,13 +462,6 @@ typedef Int sz; /* 4 or 8 only */ AMD64RM* src; } Div; -//.. /* shld/shrd. op may only be Xsh_SHL or Xsh_SHR */ -//.. struct { -//.. X86ShiftOp op; -//.. UInt amt; /* shift amount, or 0 means %cl */ -//.. HReg src; -//.. HReg dst; -//.. } Sh3232; struct { AMD64RMI* src; } Push; @@ -487,13 +472,29 @@ typedef Addr64 target; Int regparms; /* 0 .. 6 */ } Call; - /* Pseudo-insn. Goto dst, on given condition (which could be - Acc_ALWAYS). */ - struct { + /* Update the guest RIP value, then exit requesting to chain + to it. May be conditional. */ + struct { + Addr64 dstGA; /* next guest address */ + AMD64AMode* amRIP; /* amode in guest state for RIP */ + AMD64CondCode cond; /* can be Acc_ALWAYS */ + Bool toFastEP; /* chain to the slow or fast point? */ + } XDirect; + /* Boring transfer to a guest address not known at JIT time. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + AMD64AMode* amRIP; + AMD64CondCode cond; /* can be Acc_ALWAYS */ + } XIndir; + /* Assisted transfer to a guest address, most general case. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + AMD64AMode* amRIP; + AMD64CondCode cond; /* can be Acc_ALWAYS */ IRJumpKind jk; - AMD64CondCode cond; - AMD64RI* dst; - } Goto; + } XAssisted; /* Mov src to dst on the given condition, which may not be the bogus Acc_ALWAYS. */ struct { @@ -588,11 +589,6 @@ typedef AMD64AMode* addr; } LdMXCSR; -//.. /* fstsw %ax */ -//.. struct { -//.. /* no fields */ -//.. } -//.. FpStSW_AX; /* ucomisd/ucomiss, then get %rflags into int register */ struct { UChar sz; /* 4 or 8 only */ @@ -620,12 +616,6 @@ typedef HReg src; HReg dst; } SseSDSS; -//.. -//.. /* Simplistic SSE[123] */ -//.. struct { -//.. UShort con; -//.. HReg dst; -//.. } SseConst; struct { Bool isLoad; UChar sz; /* 4, 8 or 16 only */ @@ -674,6 +664,15 @@ typedef HReg src; HReg dst; } SseShuf; + struct { + AMD64AMode* amCounter; + AMD64AMode* amFailAddr; + } EvCheck; + struct { + /* No fields. The address of the counter to inc is + installed later, post-translation, by patching it in, + as it is not known at translation time. */ + } ProfInc; } Ain; } @@ -689,10 +688,14 @@ extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, HReg ); extern AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ); extern AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* ); extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* ); -//.. extern AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp, UInt amt, HReg src, HReg dst ); extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* ); extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int ); -extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond, AMD64RI* dst ); +extern AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP, + AMD64CondCode cond, Bool toFastEP ); +extern AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP, + AMD64CondCode cond ); +extern AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP, + AMD64CondCode cond, IRJumpKind jk ); extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst ); extern AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ); extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, @@ -709,21 +712,11 @@ extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ); extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ); extern AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr ); -//.. -//.. extern AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ); -//.. extern AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ); -//.. extern AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* ); -//.. extern AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* ); -//.. extern AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ); -//.. extern AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode, HReg src, HReg dst ); extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* ); -//.. extern AMD64Instr* AMD64Instr_FpStSW_AX ( void ); extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ); extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ); extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ); extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ); -//.. -//.. extern AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ); extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* ); extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* ); extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg ); @@ -733,6 +726,9 @@ extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg ); extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg ); extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst ); extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ); +extern AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter, + AMD64AMode* amFailAddr ); +extern AMD64Instr* AMD64Instr_ProfInc ( void ); extern void ppAMD64Instr ( AMD64Instr*, Bool ); @@ -742,10 +738,13 @@ extern void ppAMD64Instr ( AMD64Instr*, Bool ); extern void getRegUsage_AMD64Instr ( HRegUsage*, AMD64Instr*, Bool ); extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool ); extern Bool isMove_AMD64Instr ( AMD64Instr*, HReg*, HReg* ); -extern Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr*, - Bool, - void* dispatch_unassisted, - void* dispatch_assisted ); +extern Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, AMD64Instr* i, + Bool mode64, + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ); extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); @@ -753,9 +752,36 @@ extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void getAllocableRegs_AMD64 ( Int*, HReg** ); -extern HInstrArray* iselSB_AMD64 ( IRSB*, VexArch, - VexArchInfo*, - VexAbiInfo* ); +extern HInstrArray* iselSB_AMD64 ( IRSB*, + VexArch, + VexArchInfo*, + VexAbiInfo*, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ); + +/* How big is an event check? This is kind of a kludge because it + depends on the offsets of host_EvC_FAILADDR and host_EvC_COUNTER, + and so assumes that they are both <= 128, and so can use the short + offset encoding. This is all checked with assertions, so in the + worst case we will merely assert at startup. */ +extern Int evCheckSzB_AMD64 ( void ); + +/* Perform a chaining and unchaining of an XDirect jump. */ +extern VexInvalRange chainXDirect_AMD64 ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ); + +extern VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ); + +/* Patch the counter location into an existing ProfInc point. */ +extern VexInvalRange patchProfInc_AMD64 ( void* place_to_patch, + ULong* location_of_counter ); + #endif /* ndef __VEX_HOST_AMD64_DEFS_H */ diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index bcd213fca1..63ba74c75f 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -112,12 +112,24 @@ static IRExpr* bind ( Int binder ) 64-bit virtual HReg, which holds the high half of the value. + - The host subarchitecture we are selecting insns for. + This is set at the start and does not change. + - The code array, that is, the insns selected so far. - A counter, for generating new virtual registers. - - The host subarchitecture we are selecting insns for. - This is set at the start and does not change. + - A Bool for indicating whether we may generate chain-me + instructions for control flow transfers, or whether we must use + XAssisted. + + - The maximum guest address of any guest insn in this block. + Actually, the address of the highest-addressed byte from any insn + in this block. Is set at the start and does not change. This is + used for detecting jumps which are definitely forward-edges from + this block, and therefore can be made (chained) to the fast entry + point of the destination, thereby avoiding the destination's + event check. Note, this is all host-independent. (JRS 20050201: well, kinda ... not completely. Compare with ISelEnv for X86.) @@ -125,17 +137,21 @@ static IRExpr* bind ( Int binder ) typedef struct { + /* Constant -- are set at the start and do not change. */ IRTypeEnv* type_env; HReg* vregmap; HReg* vregmapHI; Int n_vregmap; - HInstrArray* code; + UInt hwcaps; - Int vreg_ctr; + Bool chainingAllowed; + Addr64 max_ga; - UInt hwcaps; + /* These are modified as we go along. */ + HInstrArray* code; + Int vreg_ctr; } ISelEnv; @@ -4131,14 +4147,47 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /* --------- EXIT --------- */ case Ist_Exit: { - AMD64RI* dst; - AMD64CondCode cc; if (stmt->Ist.Exit.dst->tag != Ico_U64) vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value"); - dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst)); - cc = iselCondCode(env,stmt->Ist.Exit.guard); - addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst)); - return; + + AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); + AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP, + hregAMD64_RBP()); + + /* Case: boring transfer to known address */ + if (stmt->Ist.Exit.jk == Ijk_Boring) { + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; + if (0) vex_printf("%s", toFastEP ? "Y" : ","); + addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, + amRIP, cc, toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring)); + } + return; + } + + /* Case: assisted transfer to arbitrary address */ + switch (stmt->Ist.Exit.jk) { + case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn: { + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk)); + return; + } + default: + break; + } + + /* Do we ever expect to see any other kind? */ + goto stmt_fail; } default: break; @@ -4153,18 +4202,83 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /*--- ISEL: Basic block terminators (Nexts) ---*/ /*---------------------------------------------------------*/ -static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) +static void iselNext ( ISelEnv* env, + IRExpr* next, IRJumpKind jk, Int offsIP ) { - AMD64RI* ri; if (vex_traceflags & VEX_TRACE_VCODE) { - vex_printf("\n-- goto {"); + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); ppIRJumpKind(jk); - vex_printf("} "); - ppIRExpr(next); - vex_printf("\n"); + vex_printf( "\n"); + } + + /* Case: boring transfer to known address */ + if (next->tag == Iex_Const) { + IRConst* cdst = next->Iex.Const.con; + vassert(cdst->tag == Ico_U64); + if (jk == Ijk_Boring || jk == Ijk_Call) { + /* Boring transfer to known address */ + AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = ((Addr64)cdst->Ico.U64) > env->max_ga; + if (0) vex_printf("%s", toFastEP ? "X" : "."); + addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64, + amRIP, Acc_ALWAYS, + toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an indirect transfer, + as that's the cheapest alternative that is + allowable. */ + HReg r = iselIntExpr_R(env, next); + addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, + Ijk_Boring)); + } + return; + } + } + + /* Case: call/return (==boring) transfer to any address */ + switch (jk) { + case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { + HReg r = iselIntExpr_R(env, next); + AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); + if (env->chainingAllowed) { + addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS)); + } else { + addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, + Ijk_Boring)); + } + return; + } + default: + break; + } + + /* Case: some other kind of transfer to any address */ + switch (jk) { + case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoRedir: + case Ijk_Yield: case Ijk_SigTRAP: { + HReg r = iselIntExpr_R(env, next); + AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP()); + addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk)); + return; + } + default: + break; } - ri = iselIntExpr_RI(env, next); - addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri)); + + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); + ppIRJumpKind(jk); + vex_printf( "\n"); + vassert(0); // are we expecting any other kind? } @@ -4174,14 +4288,21 @@ static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) /* Translate an entire SB to amd64 code. */ -HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host, - VexArchInfo* archinfo_host, - VexAbiInfo* vbi/*UNUSED*/ ) +HInstrArray* iselSB_AMD64 ( IRSB* bb, + VexArch arch_host, + VexArchInfo* archinfo_host, + VexAbiInfo* vbi/*UNUSED*/, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ) { - Int i, j; - HReg hreg, hregHI; - ISelEnv* env; - UInt hwcaps_host = archinfo_host->hwcaps; + Int i, j; + HReg hreg, hregHI; + ISelEnv* env; + UInt hwcaps_host = archinfo_host->hwcaps; + AMD64AMode *amCounter, *amFailAddr; /* sanity ... */ vassert(arch_host == VexArchAMD64); @@ -4207,7 +4328,9 @@ HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host, env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); /* and finally ... */ - env->hwcaps = hwcaps_host; + env->chainingAllowed = chainingAllowed; + env->hwcaps = hwcaps_host; + env->max_ga = max_ga; /* For each IR temporary, allocate a suitably-kinded virtual register. */ @@ -4233,12 +4356,25 @@ HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host, } env->vreg_ctr = j; + /* The very first instruction must be an event check. */ + amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP()); + amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP()); + addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr)); + + /* Possibly a block counter increment (for profiling). At this + point we don't know the address of the counter, so just pretend + it is zero. It will have to be patched later, but before this + translation is used, by a call to LibVEX_patchProfCtr. */ + if (addProfInc) { + addInstr(env, AMD64Instr_ProfInc()); + } + /* Ok, finally we can iterate over the statements. */ for (i = 0; i < bb->stmts_used; i++) if (bb->stmts[i]) - iselStmt(env,bb->stmts[i]); + iselStmt(env, bb->stmts[i]); - iselNext(env,bb->next,bb->jumpkind); + iselNext(env, bb->next, bb->jumpkind, bb->offsIP); /* record the number of vregs we used. */ env->code->n_vregs = env->vreg_ctr; diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index 2f0ebf0265..a76e7153a7 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -1170,13 +1170,33 @@ ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg rD, ARMAMode1* amode ) { i->ARMin.LdSt8U.amode = amode; return i; } -//extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* ); -ARMInstr* ARMInstr_Goto ( IRJumpKind jk, ARMCondCode cond, HReg gnext ) { - ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); - i->tag = ARMin_Goto; - i->ARMin.Goto.jk = jk; - i->ARMin.Goto.cond = cond; - i->ARMin.Goto.gnext = gnext; +ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T, + ARMCondCode cond, Bool toFastEP ) { + ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); + i->tag = ARMin_XDirect; + i->ARMin.XDirect.dstGA = dstGA; + i->ARMin.XDirect.amR15T = amR15T; + i->ARMin.XDirect.cond = cond; + i->ARMin.XDirect.toFastEP = toFastEP; + return i; +} +ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T, + ARMCondCode cond ) { + ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); + i->tag = ARMin_XIndir; + i->ARMin.XIndir.dstGA = dstGA; + i->ARMin.XIndir.amR15T = amR15T; + i->ARMin.XIndir.cond = cond; + return i; +} +ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T, + ARMCondCode cond, IRJumpKind jk ) { + ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); + i->tag = ARMin_XAssisted; + i->ARMin.XAssisted.dstGA = dstGA; + i->ARMin.XAssisted.amR15T = amR15T; + i->ARMin.XAssisted.cond = cond; + i->ARMin.XAssisted.jk = jk; return i; } ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) { @@ -1479,6 +1499,21 @@ ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) { return i; } +ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter, + ARMAMode1* amFailAddr ) { + ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); + i->tag = ARMin_EvCheck; + i->ARMin.EvCheck.amCounter = amCounter; + i->ARMin.EvCheck.amFailAddr = amFailAddr; + return i; +} + +ARMInstr* ARMInstr_ProfInc ( void ) { + ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); + i->tag = ARMin_ProfInc; + return i; +} + /* ... */ void ppARMInstr ( ARMInstr* i ) { @@ -1564,28 +1599,47 @@ void ppARMInstr ( ARMInstr* i ) { return; case ARMin_Ld8S: goto unhandled; - case ARMin_Goto: - if (i->ARMin.Goto.cond != ARMcc_AL) { - vex_printf("if (%%cpsr.%s) { ", - showARMCondCode(i->ARMin.Goto.cond)); - } else { - vex_printf("if (1) { "); - } - if (i->ARMin.Goto.jk != Ijk_Boring - && i->ARMin.Goto.jk != Ijk_Call - && i->ARMin.Goto.jk != Ijk_Ret) { - vex_printf("mov r8, $"); - ppIRJumpKind(i->ARMin.Goto.jk); - vex_printf(" ; "); - } - vex_printf("mov r0, "); - ppHRegARM(i->ARMin.Goto.gnext); - vex_printf(" ; bx r14"); - if (i->ARMin.Goto.cond != ARMcc_AL) { - vex_printf(" }"); - } else { - vex_printf(" }"); - } + case ARMin_XDirect: + vex_printf("(xDirect) "); + vex_printf("if (%%cpsr.%s) { ", + showARMCondCode(i->ARMin.XDirect.cond)); + vex_printf("movw r12,0x%x; ", + (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF)); + vex_printf("movt r12,0x%x; ", + (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF)); + vex_printf("str r12,"); + ppARMAMode1(i->ARMin.XDirect.amR15T); + vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ", + i->ARMin.XDirect.toFastEP ? "fast" : "slow"); + vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ", + i->ARMin.XDirect.toFastEP ? "fast" : "slow"); + vex_printf("blx r12 }"); + return; + case ARMin_XIndir: + vex_printf("(xIndir) "); + vex_printf("if (%%cpsr.%s) { ", + showARMCondCode(i->ARMin.XIndir.cond)); + vex_printf("str "); + ppHRegARM(i->ARMin.XIndir.dstGA); + vex_printf(","); + ppARMAMode1(i->ARMin.XIndir.amR15T); + vex_printf("; movw r12,LO16($disp_cp_xindir); "); + vex_printf("movt r12,HI16($disp_cp_xindir); "); + vex_printf("blx r12 }"); + return; + case ARMin_XAssisted: + vex_printf("(xAssisted) "); + vex_printf("if (%%cpsr.%s) { ", + showARMCondCode(i->ARMin.XAssisted.cond)); + vex_printf("str "); + ppHRegARM(i->ARMin.XAssisted.dstGA); + vex_printf(","); + ppARMAMode1(i->ARMin.XAssisted.amR15T); + vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ", + (Int)i->ARMin.XAssisted.jk); + vex_printf("movw r12,LO16($disp_cp_xassisted); "); + vex_printf("movt r12,HI16($disp_cp_xassisted); "); + vex_printf("blx r12 }"); return; case ARMin_CMov: vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond)); @@ -1878,6 +1932,25 @@ void ppARMInstr ( ARMInstr* i ) { vex_printf(", "); vex_printf("%d", i->ARMin.Add32.imm32); return; + case ARMin_EvCheck: + vex_printf("(evCheck) ldr r12,"); + ppARMAMode1(i->ARMin.EvCheck.amCounter); + vex_printf("; subs r12,r12,$1; str r12,"); + ppARMAMode1(i->ARMin.EvCheck.amCounter); + vex_printf("; bpl nofail; ldr r12,"); + ppARMAMode1(i->ARMin.EvCheck.amFailAddr); + vex_printf("; bx r12; nofail:"); + return; + case ARMin_ProfInc: + vex_printf("(profInc) movw r12,LO16($NotKnownYet); " + "movw r12,HI16($NotKnownYet); " + "ldr r11,[r12]; " + "adds r11,r11,$1; " + "str r11,[r12]; " + "ldr r11,[r12+4]; " + "adc r11,r11,$0; " + "str r11,[r12+4]"); + return; default: unhandled: vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag); @@ -1945,18 +2018,21 @@ void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 ) return; case ARMin_Ld8S: goto unhandled; - case ARMin_Goto: - /* reads the reg holding the next guest addr */ - addHRegUse(u, HRmRead, i->ARMin.Goto.gnext); - /* writes it to the standard integer return register */ - addHRegUse(u, HRmWrite, hregARM_R0()); - /* possibly messes with the baseblock pointer */ - if (i->ARMin.Goto.jk != Ijk_Boring - && i->ARMin.Goto.jk != Ijk_Call - && i->ARMin.Goto.jk != Ijk_Ret) - /* note, this is irrelevant since r8 is not actually - available to the allocator. But still .. */ - addHRegUse(u, HRmWrite, hregARM_R8()); + /* XDirect/XIndir/XAssisted are also a bit subtle. They + conditionally exit the block. Hence we only need to list (1) + the registers that they read, and (2) the registers that they + write in the case where the block is not exited. (2) is + empty, hence only (1) is relevant here. */ + case ARMin_XDirect: + addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T); + return; + case ARMin_XIndir: + addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA); + addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T); + return; + case ARMin_XAssisted: + addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA); + addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T); return; case ARMin_CMov: addHRegUse(u, HRmWrite, i->ARMin.CMov.dst); @@ -2159,6 +2235,18 @@ void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 ) addHRegUse(u, HRmWrite, i->ARMin.Add32.rD); addHRegUse(u, HRmRead, i->ARMin.Add32.rN); return; + case ARMin_EvCheck: + /* We expect both amodes only to mention r8, so this is in + fact pointless, since r8 isn't allocatable, but + anyway.. */ + addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter); + addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr); + addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */ + return; + case ARMin_ProfInc: + addHRegUse(u, HRmWrite, hregARM_R12()); + addHRegUse(u, HRmWrite, hregARM_R11()); + return; unhandled: default: ppARMInstr(i); @@ -2210,8 +2298,18 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 ) return; case ARMin_Ld8S: goto unhandled; - case ARMin_Goto: - i->ARMin.Goto.gnext = lookupHRegRemap(m, i->ARMin.Goto.gnext); + case ARMin_XDirect: + mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T); + return; + case ARMin_XIndir: + i->ARMin.XIndir.dstGA + = lookupHRegRemap(m, i->ARMin.XIndir.dstGA); + mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T); + return; + case ARMin_XAssisted: + i->ARMin.XAssisted.dstGA + = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA); + mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T); return; case ARMin_CMov: i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst); @@ -2329,6 +2427,17 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 ) case ARMin_Add32: i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD); i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN); + return; + case ARMin_EvCheck: + /* We expect both amodes only to mention r8, so this is in + fact pointless, since r8 isn't allocatable, but + anyway.. */ + mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter); + mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr); + return; + case ARMin_ProfInc: + /* hardwires r11 and r12 -- nothing to modify. */ + return; unhandled: default: ppARMInstr(i); @@ -2586,6 +2695,9 @@ static inline UChar qregNo ( HReg r ) (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \ (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0)) +#define XX______(zzx7,zzx6) \ + ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24)) + /* Generate a skeletal insn that involves an a RI84 shifter operand. Returns a word which is all zeroes apart from bits 25 and 11..0, since it is those that encode the shifter operand (at least to the @@ -2704,10 +2816,92 @@ static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 ) return p; } +/* Get an immediate into a register, using only that register, and + generating exactly 2 instructions, regardless of the value of the + immediate. This is used when generating sections of code that need + to be patched later, so as to guarantee a specific size. */ +static UInt* imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 ) +{ + if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) { + /* Generate movw rD, #low16 ; movt rD, #high16. */ + UInt lo16 = imm32 & 0xFFFF; + UInt hi16 = (imm32 >> 16) & 0xFFFF; + UInt instr; + instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD, + (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF, + lo16 & 0xF); + *p++ = instr; + instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD, + (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF, + hi16 & 0xF); + *p++ = instr; + } else { + vassert(0); /* lose */ + } + return p; +} -Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i, +/* Check whether p points at a 2-insn sequence cooked up by + imm32_to_iregNo_EXACTLY2(). */ +static Bool is_imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 ) +{ + if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) { + /* Generate movw rD, #low16 ; movt rD, #high16. */ + UInt lo16 = imm32 & 0xFFFF; + UInt hi16 = (imm32 >> 16) & 0xFFFF; + UInt i0, i1; + i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD, + (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF, + lo16 & 0xF); + i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD, + (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF, + hi16 & 0xF); + return p[0] == i0 && p[1] == i1; + } else { + vassert(0); /* lose */ + } +} + + +static UInt* do_load_or_store32 ( UInt* p, + Bool isLoad, UInt rD, ARMAMode1* am ) +{ + vassert(rD <= 12); + vassert(am->tag == ARMam1_RI); // RR case is not handled + UInt bB = 0; + UInt bL = isLoad ? 1 : 0; + Int simm12; + UInt instr, bP; + if (am->ARMam1.RI.simm13 < 0) { + bP = 0; + simm12 = -am->ARMam1.RI.simm13; + } else { + bP = 1; + simm12 = am->ARMam1.RI.simm13; + } + vassert(simm12 >= 0 && simm12 <= 4095); + instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL), + iregNo(am->ARMam1.RI.reg), + rD); + instr |= simm12; + *p++ = instr; + return p; +} + + +/* Emit an instruction into buf and return the number of bytes used. + Note that buf is not the insn's final place, and therefore it is + imperative to emit position-independent code. If the emitted + instruction was a profiler inc, set *is_profInc to True, else + leave it unchanged. */ + +Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, ARMInstr* i, Bool mode64, - void* dispatch_unassisted, void* dispatch_assisted ) + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ) { UInt* p = (UInt*)buf; vassert(nbuf >= 32); @@ -2894,59 +3088,177 @@ Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i, } case ARMin_Ld8S: goto bad; - case ARMin_Goto: { - UInt instr; - IRJumpKind jk = i->ARMin.Goto.jk; - ARMCondCode cond = i->ARMin.Goto.cond; - UInt rnext = iregNo(i->ARMin.Goto.gnext); - Int trc = -1; - /* since we branch to lr(r13) to get back to dispatch: */ - vassert(dispatch_unassisted == NULL); - vassert(dispatch_assisted == NULL); - switch (jk) { - case Ijk_Ret: case Ijk_Call: case Ijk_Boring: - break; /* no need to set GST in these common cases */ - case Ijk_ClientReq: - trc = VEX_TRC_JMP_CLIENTREQ; break; - case Ijk_Sys_int128: - case Ijk_Sys_int129: - case Ijk_Sys_int130: - case Ijk_Yield: - case Ijk_EmWarn: - case Ijk_MapFail: - goto unhandled_jk; - case Ijk_NoDecode: - trc = VEX_TRC_JMP_NODECODE; break; - case Ijk_TInval: - trc = VEX_TRC_JMP_TINVAL; break; - case Ijk_NoRedir: - trc = VEX_TRC_JMP_NOREDIR; break; - case Ijk_Sys_sysenter: - case Ijk_SigTRAP: - case Ijk_SigSEGV: - goto unhandled_jk; - case Ijk_Sys_syscall: - trc = VEX_TRC_JMP_SYS_SYSCALL; break; - unhandled_jk: - default: - goto bad; + + case ARMin_XDirect: { + /* NB: what goes on here has to be very closely coordinated + with the chainXDirect_ARM and unchainXDirect_ARM below. */ + /* We're generating chain-me requests here, so we need to be + sure this is actually allowed -- no-redir translations + can't use chain-me's. Hence: */ + vassert(disp_cp_chain_me_to_slowEP != NULL); + vassert(disp_cp_chain_me_to_fastEP != NULL); + + /* Use ptmp for backpatching conditional jumps. */ + UInt* ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. Or at least, leave a space for + it that we will shortly fill in. */ + if (i->ARMin.XDirect.cond != ARMcc_AL) { + vassert(i->ARMin.XDirect.cond != ARMcc_NV); + ptmp = p; + *p++ = 0; } - if (trc != -1) { - // mov{cond} r8, #trc - vassert(trc >= 0 && trc <= 255); - instr = (cond << 28) | 0x03A08000 | (0xFF & (UInt)trc); - *p++ = instr; + + /* Update the guest R15T. */ + /* movw r12, lo16(dstGA) */ + /* movt r12, hi16(dstGA) */ + /* str r12, amR15T */ + p = imm32_to_iregNo(p, /*r*/12, i->ARMin.XDirect.dstGA); + p = do_load_or_store32(p, False/*!isLoad*/, + /*r*/12, i->ARMin.XDirect.amR15T); + + /* --- FIRST PATCHABLE BYTE follows --- */ + /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're + calling to) backs up the return address, so as to find the + address of the first patchable byte. So: don't change the + number of instructions (3) below. */ + /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */ + /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */ + /* blx r12 (A1) */ + void* disp_cp_chain_me + = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP + : disp_cp_chain_me_to_slowEP; + p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, + (UInt)Ptr_to_ULong(disp_cp_chain_me)); + *p++ = 0xE12FFF3C; + /* --- END of PATCHABLE BYTES --- */ + + /* Fix up the conditional jump, if there was one. */ + if (i->ARMin.XDirect.cond != ARMcc_AL) { + Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */ + vassert(delta > 0 && delta < 40); + vassert((delta & 3) == 0); + UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond; + vassert(notCond <= 13); /* Neither AL nor NV */ + delta = (delta >> 2) - 2; + *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF); } - // mov{cond} r0, rnext - if (rnext != 0) { - instr = (cond << 28) | 0x01A00000 | rnext; - *p++ = instr; + goto done; + } + + case ARMin_XIndir: { + /* We're generating transfers that could lead indirectly to a + chain-me, so we need to be sure this is actually allowed + -- no-redir translations are not allowed to reach normal + translations without going through the scheduler. That + means no XDirects or XIndirs out from no-redir + translations. Hence: */ + vassert(disp_cp_xindir != NULL); + + /* Use ptmp for backpatching conditional jumps. */ + UInt* ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. Or at least, leave a space for + it that we will shortly fill in. */ + if (i->ARMin.XIndir.cond != ARMcc_AL) { + vassert(i->ARMin.XIndir.cond != ARMcc_NV); + ptmp = p; + *p++ = 0; + } + + /* Update the guest R15T. */ + /* str r-dstGA, amR15T */ + p = do_load_or_store32(p, False/*!isLoad*/, + iregNo(i->ARMin.XIndir.dstGA), + i->ARMin.XIndir.amR15T); + + /* movw r12, lo16(VG_(disp_cp_xindir)) */ + /* movt r12, hi16(VG_(disp_cp_xindir)) */ + /* bx r12 (A1) */ + p = imm32_to_iregNo(p, /*r*/12, + (UInt)Ptr_to_ULong(disp_cp_xindir)); + *p++ = 0xE12FFF1C; + + /* Fix up the conditional jump, if there was one. */ + if (i->ARMin.XIndir.cond != ARMcc_AL) { + Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */ + vassert(delta > 0 && delta < 40); + vassert((delta & 3) == 0); + UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond; + vassert(notCond <= 13); /* Neither AL nor NV */ + delta = (delta >> 2) - 2; + *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF); } - // bx{cond} r14 - instr =(cond << 28) | 0x012FFF1E; - *p++ = instr; goto done; } + + case ARMin_XAssisted: { + /* Use ptmp for backpatching conditional jumps. */ + UInt* ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. Or at least, leave a space for + it that we will shortly fill in. */ + if (i->ARMin.XAssisted.cond != ARMcc_AL) { + vassert(i->ARMin.XAssisted.cond != ARMcc_NV); + ptmp = p; + *p++ = 0; + } + + /* Update the guest R15T. */ + /* str r-dstGA, amR15T */ + p = do_load_or_store32(p, False/*!isLoad*/, + iregNo(i->ARMin.XAssisted.dstGA), + i->ARMin.XAssisted.amR15T); + + /* movw r8, $magic_number */ + UInt trcval = 0; + switch (i->ARMin.XAssisted.jk) { + case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; + case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; + //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; + //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; + //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; + //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; + case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; + //case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; + //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; + //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; + case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; + /* We don't expect to see the following being assisted. */ + //case Ijk_Ret: + //case Ijk_Call: + /* fallthrough */ + default: + ppIRJumpKind(i->ARMin.XAssisted.jk); + vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind"); + } + vassert(trcval != 0); + p = imm32_to_iregNo(p, /*r*/8, trcval); + + /* movw r12, lo16(VG_(disp_cp_xassisted)) */ + /* movt r12, hi16(VG_(disp_cp_xassisted)) */ + /* bx r12 (A1) */ + p = imm32_to_iregNo(p, /*r*/12, + (UInt)Ptr_to_ULong(disp_cp_xassisted)); + *p++ = 0xE12FFF1C; + + /* Fix up the conditional jump, if there was one. */ + if (i->ARMin.XAssisted.cond != ARMcc_AL) { + Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */ + vassert(delta > 0 && delta < 40); + vassert((delta & 3) == 0); + UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond; + vassert(notCond <= 13); /* Neither AL nor NV */ + delta = (delta >> 2) - 2; + *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF); + } + goto done; + } + case ARMin_CMov: { UInt instr = skeletal_RI84(i->ARMin.CMov.src); UInt subopc = X1101; /* MOV */ @@ -4099,6 +4411,62 @@ Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i, *p++ = insn; goto done; } + + case ARMin_EvCheck: { + /* We generate: + ldr r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER) + subs r12, r12, #1 (A1) + str r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER) + bpl nofail + ldr r12, [r8 + #0] 0 == offsetof(host_EvC_FAILADDR) + bx r12 + nofail: + */ + UInt* p0 = p; + p = do_load_or_store32(p, True/*isLoad*/, /*r*/12, + i->ARMin.EvCheck.amCounter); + *p++ = 0xE25CC001; /* subs r12, r12, #1 */ + p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12, + i->ARMin.EvCheck.amCounter); + *p++ = 0x5A000001; /* bpl nofail */ + p = do_load_or_store32(p, True/*isLoad*/, /*r*/12, + i->ARMin.EvCheck.amFailAddr); + *p++ = 0xE12FFF1C; /* bx r12 */ + /* nofail: */ + + /* Crosscheck */ + vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0); + goto done; + } + + case ARMin_ProfInc: { + /* We generate: + (ctrP is unknown now, so use 0x65556555 in the + expectation that a later call to LibVEX_patchProfCtr + will be used to fill in the immediate fields once the + right value is known.) + movw r12, lo16(0x65556555) + movt r12, lo16(0x65556555) + ldr r11, [r12] + adds r11, r11, #1 + str r11, [r12] + ldr r11, [r12+4] + adc r11, r11, #0 + str r11, [r12+4] + */ + p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555); + *p++ = 0xE59CB000; + *p++ = 0xE29BB001; + *p++ = 0xE58CB000; + *p++ = 0xE59CB004; + *p++ = 0xE2ABB000; + *p++ = 0xE58CB004; + /* Tell the caller .. */ + vassert(!(*is_profInc)); + *is_profInc = True; + goto done; + } + /* ... */ default: goto bad; @@ -4114,6 +4482,109 @@ Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i, return ((UChar*)p) - &buf[0]; } + +/* How big is an event check? See case for Ain_EvCheck in + emit_ARMInstr just above. That crosschecks what this returns, so + we can tell if we're inconsistent. */ +Int evCheckSzB_ARM ( void ) +{ + return 24; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange chainXDirect_ARM ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ) +{ + /* What we're expecting to see is: + movw r12, lo16(disp_cp_chain_me_to_EXPECTED) + movt r12, hi16(disp_cp_chain_me_to_EXPECTED) + blx r12 + viz + <8 bytes generated by imm32_to_iregNo_EXACTLY2> + E1 2F FF 3C + */ + UInt* p = (UInt*)place_to_chain; + vassert(0 == (3 & (UInt)p)); + vassert(is_imm32_to_iregNo_EXACTLY2( + p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED))); + vassert(p[2] == 0xE12FFF3C); + /* And what we want to change it to is: + movw r12, lo16(place_to_jump_to) + movt r12, hi16(place_to_jump_to) + bx r12 + viz + <8 bytes generated by imm32_to_iregNo_EXACTLY2> + E1 2F FF 1C + The replacement has the same length as the original. + */ + (void)imm32_to_iregNo_EXACTLY2( + p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to)); + p[2] = 0xE12FFF1C; + VexInvalRange vir = {(HWord)p, 12}; + return vir; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange unchainXDirect_ARM ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ) +{ + /* What we're expecting to see is: + movw r12, lo16(place_to_jump_to_EXPECTED) + movt r12, lo16(place_to_jump_to_EXPECTED) + bx r12 + viz + <8 bytes generated by imm32_to_iregNo_EXACTLY2> + E1 2F FF 1C + */ + UInt* p = (UInt*)place_to_unchain; + vassert(0 == (3 & (UInt)p)); + vassert(is_imm32_to_iregNo_EXACTLY2( + p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED))); + vassert(p[2] == 0xE12FFF1C); + /* And what we want to change it to is: + movw r12, lo16(disp_cp_chain_me) + movt r12, hi16(disp_cp_chain_me) + blx r12 + viz + <8 bytes generated by imm32_to_iregNo_EXACTLY2> + E1 2F FF 3C + */ + (void)imm32_to_iregNo_EXACTLY2( + p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me)); + p[2] = 0xE12FFF3C; + VexInvalRange vir = {(HWord)p, 12}; + return vir; +} + + +/* Patch the counter address into a profile inc point, as previously + created by the Xin_ProfInc case for emit_ARMInstr. */ +VexInvalRange patchProfInc_ARM ( void* place_to_patch, + ULong* location_of_counter ) +{ + vassert(sizeof(ULong*) == 4); + UInt* p = (UInt*)place_to_patch; + vassert(0 == (3 & (UInt)p)); + vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555)); + vassert(p[2] == 0xE59CB000); + vassert(p[3] == 0xE29BB001); + vassert(p[4] == 0xE58CB000); + vassert(p[5] == 0xE59CB004); + vassert(p[6] == 0xE2ABB000); + vassert(p[7] == 0xE58CB004); + imm32_to_iregNo_EXACTLY2(p, /*r*/12, + (UInt)Ptr_to_ULong(location_of_counter)); + VexInvalRange vir = {(HWord)p, 8}; + return vir; +} + + #undef BITS4 #undef X0000 #undef X0001 @@ -4136,6 +4607,7 @@ Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i, #undef XXX___XX #undef XXXXX__X #undef XXXXXXXX +#undef XX______ /*---------------------------------------------------------------*/ /*--- end host_arm_defs.c ---*/ diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index 0dea3f5c6f..7eb4f3e850 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -564,7 +564,9 @@ typedef ARMin_LdSt16, ARMin_LdSt8U, ARMin_Ld8S, - ARMin_Goto, + ARMin_XDirect, /* direct transfer to GA */ + ARMin_XIndir, /* indirect transfer to GA */ + ARMin_XAssisted, /* assisted transfer to GA */ ARMin_CMov, ARMin_Call, ARMin_Mul, @@ -604,9 +606,10 @@ typedef allocator demands them to consist of no more than two instructions. We will split this instruction into 2 or 3 ARM instructions on the emiting phase. - NOTE: source and destination registers should be different! */ - ARMin_Add32 + ARMin_Add32, + ARMin_EvCheck, /* Event check */ + ARMin_ProfInc /* 64-bit profile counter increment */ } ARMInstrTag; @@ -676,13 +679,30 @@ typedef HReg rD; ARMAMode2* amode; } Ld8S; - /* Pseudo-insn. Go to guest address gnext, on given - condition, which could be ARMcc_AL. */ + /* Update the guest R15T value, then exit requesting to chain + to it. May be conditional. Urr, use of Addr32 implicitly + assumes that wordsize(guest) == wordsize(host). */ + struct { + Addr32 dstGA; /* next guest address */ + ARMAMode1* amR15T; /* amode in guest state for R15T */ + ARMCondCode cond; /* can be ARMcc_AL */ + Bool toFastEP; /* chain to the slow or fast point? */ + } XDirect; + /* Boring transfer to a guest address not known at JIT time. + Not chainable. May be conditional. */ struct { + HReg dstGA; + ARMAMode1* amR15T; + ARMCondCode cond; /* can be ARMcc_AL */ + } XIndir; + /* Assisted transfer to a guest address, most general case. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + ARMAMode1* amR15T; + ARMCondCode cond; /* can be ARMcc_AL */ IRJumpKind jk; - ARMCondCode cond; - HReg gnext; - } Goto; + } XAssisted; /* Mov src to dst on the given condition, which may not be ARMcc_AL. */ struct { @@ -905,6 +925,15 @@ typedef HReg rN; UInt imm32; } Add32; + struct { + ARMAMode1* amCounter; + ARMAMode1* amFailAddr; + } EvCheck; + struct { + /* No fields. The address of the counter to inc is + installed later, post-translation, by patching it in, + as it is not known at translation time. */ + } ProfInc; } ARMin; } ARMInstr; @@ -921,7 +950,12 @@ extern ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad, HReg, ARMAMode2* ); extern ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg, ARMAMode1* ); extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* ); -extern ARMInstr* ARMInstr_Goto ( IRJumpKind, ARMCondCode, HReg gnext ); +extern ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T, + ARMCondCode cond, Bool toFastEP ); +extern ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T, + ARMCondCode cond ); +extern ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T, + ARMCondCode cond, IRJumpKind jk ); extern ARMInstr* ARMInstr_CMov ( ARMCondCode, HReg dst, ARMRI84* src ); extern ARMInstr* ARMInstr_Call ( ARMCondCode, HWord, Int nArgRegs ); extern ARMInstr* ARMInstr_Mul ( ARMMulOp op ); @@ -957,6 +991,9 @@ extern ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp, HReg, HReg, HReg, extern ARMInstr* ARMInstr_NeonImm ( HReg, ARMNImm* ); extern ARMInstr* ARMInstr_NCMovQ ( ARMCondCode, HReg, HReg ); extern ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ); +extern ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter, + ARMAMode1* amFailAddr ); +extern ARMInstr* ARMInstr_ProfInc ( void ); extern void ppARMInstr ( ARMInstr* ); @@ -966,10 +1003,13 @@ extern void ppARMInstr ( ARMInstr* ); extern void getRegUsage_ARMInstr ( HRegUsage*, ARMInstr*, Bool ); extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool ); extern Bool isMove_ARMInstr ( ARMInstr*, HReg*, HReg* ); -extern Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr*, - Bool, - void* dispatch_unassisted, - void* dispatch_assisted ); +extern Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, ARMInstr* i, + Bool mode64, + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ); extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); @@ -977,8 +1017,34 @@ extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void getAllocableRegs_ARM ( Int*, HReg** ); -extern HInstrArray* iselSB_ARM ( IRSB*, VexArch, - VexArchInfo*, VexAbiInfo* ); +extern HInstrArray* iselSB_ARM ( IRSB*, + VexArch, + VexArchInfo*, + VexAbiInfo*, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ); + +/* How big is an event check? This is kind of a kludge because it + depends on the offsets of host_EvC_FAILADDR and + host_EvC_COUNTER. */ +extern Int evCheckSzB_ARM ( void ); + +/* Perform a chaining and unchaining of an XDirect jump. */ +extern VexInvalRange chainXDirect_ARM ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ); + +extern VexInvalRange unchainXDirect_ARM ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ); + +/* Patch the counter location into an existing ProfInc point. */ +extern VexInvalRange patchProfInc_ARM ( void* place_to_patch, + ULong* location_of_counter ); + #endif /* ndef __VEX_HOST_ARM_DEFS_H */ diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c index e6955676ac..7ddd0775db 100644 --- a/VEX/priv/host_arm_isel.c +++ b/VEX/priv/host_arm_isel.c @@ -84,9 +84,6 @@ 32-bit virtual HReg, which holds the high half of the value. - - The name of the vreg in which we stash a copy of the link reg, so - helper functions don't kill it. - - The code array, that is, the insns selected so far. - A counter, for generating new virtual registers. @@ -94,23 +91,38 @@ - The host hardware capabilities word. This is set at the start and does not change. - Note, this is all host-independent. */ + - A Bool for indicating whether we may generate chain-me + instructions for control flow transfers, or whether we must use + XAssisted. + + - The maximum guest address of any guest insn in this block. + Actually, the address of the highest-addressed byte from any insn + in this block. Is set at the start and does not change. This is + used for detecting jumps which are definitely forward-edges from + this block, and therefore can be made (chained) to the fast entry + point of the destination, thereby avoiding the destination's + event check. + + Note, this is all (well, mostly) host-independent. +*/ typedef struct { + /* Constant -- are set at the start and do not change. */ IRTypeEnv* type_env; HReg* vregmap; HReg* vregmapHI; Int n_vregmap; - HReg savedLR; + UInt hwcaps; - HInstrArray* code; + Bool chainingAllowed; + Addr64 max_ga; + /* These are modified as we go along. */ + HInstrArray* code; Int vreg_ctr; - - UInt hwcaps; } ISelEnv; @@ -1514,7 +1526,7 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) } case Iop_64to8: { HReg rHi, rLo; - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { HReg tHi = newVRegI(env); HReg tLo = newVRegI(env); HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg); @@ -1819,7 +1831,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) /* read 64-bit IRTemp */ if (e->tag == Iex_RdTmp) { - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { HReg tHi = newVRegI(env); HReg tLo = newVRegI(env); HReg tmp = iselNeon64Expr(env, e); @@ -2028,7 +2040,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) /* It is convenient sometimes to call iselInt64Expr even when we have NEON support (e.g. in do_helper_call we need 64-bit arguments as 2 x 32 regs). */ - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { HReg tHi = newVRegI(env); HReg tLo = newVRegI(env); HReg tmp = iselNeon64Expr(env, e); @@ -5339,7 +5351,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) if (e->tag == Iex_Unop) { switch (e->Iex.Unop.op) { case Iop_ReinterpI64asF64: { - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { return iselNeon64Expr(env, e->Iex.Unop.arg); } else { HReg srcHi, srcLo; @@ -5631,7 +5643,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) return; } if (tyd == Ity_I64) { - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data); ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); addInstr(env, ARMInstr_NLdStD(False, dD, am)); @@ -5680,7 +5692,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) return; } if (tyd == Ity_I64) { - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { HReg addr = newVRegI(env); HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data); addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), @@ -5765,7 +5777,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) return; } if (ty == Ity_I64) { - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False)); @@ -5824,7 +5836,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) retty = typeOfIRTemp(env->type_env, d->tmp); if (retty == Ity_I64) { - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { HReg tmp = lookupIRTemp(env, d->tmp); addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(), hregARM_R0())); @@ -5878,7 +5890,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) move it into a result register pair. On a NEON capable CPU, the result register will be a 64 bit NEON register, so we must move it there instead. */ - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { HReg dst = lookupIRTemp(env, res); addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(), hregARM_R2())); @@ -5964,15 +5976,53 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /* --------- EXIT --------- */ case Ist_Exit: { - HReg gnext; - ARMCondCode cc; if (stmt->Ist.Exit.dst->tag != Ico_U32) vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value"); - gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); - cc = iselCondCode(env, stmt->Ist.Exit.guard); - addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR)); - addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext)); - return; + + ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); + ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), + stmt->Ist.Exit.offsIP); + + /* Case: boring transfer to known address */ + if (stmt->Ist.Exit.jk == Ijk_Boring + || stmt->Ist.Exit.jk == Ijk_Call + || stmt->Ist.Exit.jk == Ijk_Ret) { + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga; + if (0) vex_printf("%s", toFastEP ? "Y" : ","); + addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32, + amR15T, cc, toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring)); + } + return; + } + + /* Case: assisted transfer to arbitrary address */ + switch (stmt->Ist.Exit.jk) { + //case Ijk_MapFail: + //case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn: + case Ijk_NoDecode: + { + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, + stmt->Ist.Exit.jk)); + return; + } + default: + break; + } + + /* Do we ever expect to see any other kind? */ + goto stmt_fail; } default: break; @@ -5987,19 +6037,85 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /*--- ISEL: Basic block terminators (Nexts) ---*/ /*---------------------------------------------------------*/ -static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) +static void iselNext ( ISelEnv* env, + IRExpr* next, IRJumpKind jk, Int offsIP ) { - HReg rDst; if (vex_traceflags & VEX_TRACE_VCODE) { - vex_printf("\n-- goto {"); + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); ppIRJumpKind(jk); - vex_printf("} "); - ppIRExpr(next); - vex_printf("\n"); + vex_printf( "\n"); + } + + /* Case: boring transfer to known address */ + if (next->tag == Iex_Const) { + IRConst* cdst = next->Iex.Const.con; + vassert(cdst->tag == Ico_U32); + if (jk == Ijk_Boring || jk == Ijk_Call) { + /* Boring transfer to known address */ + ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = ((Addr64)cdst->Ico.U32) > env->max_ga; + if (0) vex_printf("%s", toFastEP ? "X" : "."); + addInstr(env, ARMInstr_XDirect(cdst->Ico.U32, + amR15T, ARMcc_AL, + toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselIntExpr_R(env, next); + addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, + Ijk_Boring)); + } + return; + } + } + + /* Case: call/return (==boring) transfer to any address */ + switch (jk) { + case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { + HReg r = iselIntExpr_R(env, next); + ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); + if (env->chainingAllowed) { + addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL)); + } else { + addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, + Ijk_Boring)); + } + return; + } + default: + break; } - rDst = iselIntExpr_R(env, next); - addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR)); - addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst)); + + /* Case: some other kind of transfer to any address */ + switch (jk) { + case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoDecode: + case Ijk_NoRedir: + //case Ijk_Sys_int128: + //case Ijk_Yield: case Ijk_SigTRAP: + { + HReg r = iselIntExpr_R(env, next); + ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP); + addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk)); + return; + } + default: + break; + } + + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); + ppIRJumpKind(jk); + vex_printf( "\n"); + vassert(0); // are we expecting any other kind? } @@ -6009,21 +6125,27 @@ static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) /* Translate an entire SB to arm code. */ -HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host, - VexArchInfo* archinfo_host, - VexAbiInfo* vbi/*UNUSED*/ ) +HInstrArray* iselSB_ARM ( IRSB* bb, + VexArch arch_host, + VexArchInfo* archinfo_host, + VexAbiInfo* vbi/*UNUSED*/, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ) { - Int i, j; - HReg hreg, hregHI; - ISelEnv* env; - UInt hwcaps_host = archinfo_host->hwcaps; - static UInt counter = 0; + Int i, j; + HReg hreg, hregHI; + ISelEnv* env; + UInt hwcaps_host = archinfo_host->hwcaps; + ARMAMode1 *amCounter, *amFailAddr; /* sanity ... */ vassert(arch_host == VexArchARM); /* hwcaps should not change from one ISEL call to another. */ - arm_hwcaps = hwcaps_host; + arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM) /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); @@ -6041,6 +6163,11 @@ HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host, env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); + /* and finally ... */ + env->chainingAllowed = chainingAllowed; + env->hwcaps = hwcaps_host; + env->max_ga = max_ga; + /* For each IR temporary, allocate a suitably-kinded virtual register. */ j = 0; @@ -6052,7 +6179,7 @@ HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host, case Ity_I16: case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break; case Ity_I64: - if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) { + if (hwcaps_host & VEX_HWCAPS_ARM_NEON) { hreg = mkHReg(j++, HRcFlt64, True); } else { hregHI = mkHReg(j++, HRcInt32, True); @@ -6070,21 +6197,27 @@ HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host, } env->vreg_ctr = j; - /* Keep a copy of the link reg, since any call to a helper function - will trash it, and we can't get back to the dispatcher once that - happens. */ - env->savedLR = newVRegI(env); - addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14())); + /* The very first instruction must be an event check. */ + amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter); + amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr); + addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr)); + + /* Possibly a block counter increment (for profiling). At this + point we don't know the address of the counter, so just pretend + it is zero. It will have to be patched later, but before this + translation is used, by a call to LibVEX_patchProfCtr. */ + if (addProfInc) { + addInstr(env, ARMInstr_ProfInc()); + } /* Ok, finally we can iterate over the statements. */ for (i = 0; i < bb->stmts_used; i++) - iselStmt(env,bb->stmts[i]); + iselStmt(env, bb->stmts[i]); - iselNext(env,bb->next,bb->jumpkind); + iselNext(env, bb->next, bb->jumpkind, bb->offsIP); /* record the number of vregs we used. */ env->code->n_vregs = env->vreg_ctr; - counter++; return env->code; } diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index 25848a34fa..4471f4d835 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -647,12 +647,33 @@ X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms ) { vassert(regparms >= 0 && regparms <= 3); return i; } -X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) { - X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); - i->tag = Xin_Goto; - i->Xin.Goto.cond = cond; - i->Xin.Goto.dst = dst; - i->Xin.Goto.jk = jk; +X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP, + X86CondCode cond, Bool toFastEP ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_XDirect; + i->Xin.XDirect.dstGA = dstGA; + i->Xin.XDirect.amEIP = amEIP; + i->Xin.XDirect.cond = cond; + i->Xin.XDirect.toFastEP = toFastEP; + return i; +} +X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP, + X86CondCode cond ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_XIndir; + i->Xin.XIndir.dstGA = dstGA; + i->Xin.XIndir.amEIP = amEIP; + i->Xin.XIndir.cond = cond; + return i; +} +X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP, + X86CondCode cond, IRJumpKind jk ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_XAssisted; + i->Xin.XAssisted.dstGA = dstGA; + i->Xin.XAssisted.amEIP = amEIP; + i->Xin.XAssisted.cond = cond; + i->Xin.XAssisted.jk = jk; return i; } X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) { @@ -797,7 +818,6 @@ X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) { i->Xin.FpCmp.dst = dst; return i; } - X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) { X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); i->tag = Xin_SseConst; @@ -886,6 +906,19 @@ X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) { vassert(order >= 0 && order <= 0xFF); return i; } +X86Instr* X86Instr_EvCheck ( X86AMode* amCounter, + X86AMode* amFailAddr ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_EvCheck; + i->Xin.EvCheck.amCounter = amCounter; + i->Xin.EvCheck.amFailAddr = amFailAddr; + return i; +} +X86Instr* X86Instr_ProfInc ( void ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_ProfInc; + return i; +} void ppX86Instr ( X86Instr* i, Bool mode64 ) { vassert(mode64 == False); @@ -953,24 +986,36 @@ void ppX86Instr ( X86Instr* i, Bool mode64 ) { i->Xin.Call.regparms); vex_printf("0x%x", i->Xin.Call.target); break; - case Xin_Goto: - if (i->Xin.Goto.cond != Xcc_ALWAYS) { - vex_printf("if (%%eflags.%s) { ", - showX86CondCode(i->Xin.Goto.cond)); - } - if (i->Xin.Goto.jk != Ijk_Boring - && i->Xin.Goto.jk != Ijk_Call - && i->Xin.Goto.jk != Ijk_Ret) { - vex_printf("movl $"); - ppIRJumpKind(i->Xin.Goto.jk); - vex_printf(",%%ebp ; "); - } + case Xin_XDirect: + vex_printf("(xDirect) "); + vex_printf("if (%%eflags.%s) { ", + showX86CondCode(i->Xin.XDirect.cond)); + vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA); + ppX86AMode(i->Xin.XDirect.amEIP); + vex_printf("; "); + vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }", + i->Xin.XDirect.toFastEP ? "fast" : "slow"); + return; + case Xin_XIndir: + vex_printf("(xIndir) "); + vex_printf("if (%%eflags.%s) { movl ", + showX86CondCode(i->Xin.XIndir.cond)); + ppHRegX86(i->Xin.XIndir.dstGA); + vex_printf(","); + ppX86AMode(i->Xin.XIndir.amEIP); + vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }"); + return; + case Xin_XAssisted: + vex_printf("(xAssisted) "); + vex_printf("if (%%eflags.%s) { ", + showX86CondCode(i->Xin.XAssisted.cond)); vex_printf("movl "); - ppX86RI(i->Xin.Goto.dst); - vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx"); - if (i->Xin.Goto.cond != Xcc_ALWAYS) { - vex_printf(" }"); - } + ppHRegX86(i->Xin.XAssisted.dstGA); + vex_printf(","); + ppX86AMode(i->Xin.XAssisted.amEIP); + vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp", + (Int)i->Xin.XAssisted.jk); + vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }"); return; case Xin_CMov32: vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond)); @@ -1152,7 +1197,17 @@ void ppX86Instr ( X86Instr* i, Bool mode64 ) { vex_printf(","); ppHRegX86(i->Xin.SseShuf.dst); return; - + case Xin_EvCheck: + vex_printf("(evCheck) decl "); + ppX86AMode(i->Xin.EvCheck.amCounter); + vex_printf("; jns nofail; jmp *"); + ppX86AMode(i->Xin.EvCheck.amFailAddr); + vex_printf("; nofail:"); + return; + case Xin_ProfInc: + vex_printf("(profInc) addl $1,NotKnownYet; " + "adcl $0,NotKnownYet+4"); + return; default: vpanic("ppX86Instr"); } @@ -1258,16 +1313,21 @@ void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) address temporary, depending on the regparmness: 0==EAX, 1==EDX, 2==ECX, 3==EDI. */ return; - case Xin_Goto: - addRegUsage_X86RI(u, i->Xin.Goto.dst); - addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */ - addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */ - if (i->Xin.Goto.jk != Ijk_Boring - && i->Xin.Goto.jk != Ijk_Call - && i->Xin.Goto.jk != Ijk_Ret) - /* note, this is irrelevant since ebp is not actually - available to the allocator. But still .. */ - addHRegUse(u, HRmWrite, hregX86_EBP()); + /* XDirect/XIndir/XAssisted are also a bit subtle. They + conditionally exit the block. Hence we only need to list (1) + the registers that they read, and (2) the registers that they + write in the case where the block is not exited. (2) is + empty, hence only (1) is relevant here. */ + case Xin_XDirect: + addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP); + return; + case Xin_XIndir: + addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA); + addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP); + return; + case Xin_XAssisted: + addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA); + addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP); return; case Xin_CMov32: addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead); @@ -1410,6 +1470,15 @@ void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) addHRegUse(u, HRmRead, i->Xin.SseShuf.src); addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst); return; + case Xin_EvCheck: + /* We expect both amodes only to mention %ebp, so this is in + fact pointless, since %ebp isn't allocatable, but anyway.. */ + addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter); + addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr); + return; + case Xin_ProfInc: + /* does not use any registers. */ + return; default: ppX86Instr(i, False); vpanic("getRegUsage_X86Instr"); @@ -1462,8 +1531,16 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) return; case Xin_Call: return; - case Xin_Goto: - mapRegs_X86RI(m, i->Xin.Goto.dst); + case Xin_XDirect: + mapRegs_X86AMode(m, i->Xin.XDirect.amEIP); + return; + case Xin_XIndir: + mapReg(m, &i->Xin.XIndir.dstGA); + mapRegs_X86AMode(m, i->Xin.XIndir.amEIP); + return; + case Xin_XAssisted: + mapReg(m, &i->Xin.XAssisted.dstGA); + mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP); return; case Xin_CMov32: mapRegs_X86RM(m, i->Xin.CMov32.src); @@ -1566,6 +1643,16 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) mapReg(m, &i->Xin.SseShuf.src); mapReg(m, &i->Xin.SseShuf.dst); return; + case Xin_EvCheck: + /* We expect both amodes only to mention %ebp, so this is in + fact pointless, since %ebp isn't allocatable, but anyway.. */ + mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter); + mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr); + return; + case Xin_ProfInc: + /* does not use any registers. */ + return; + default: ppX86Instr(i, mode64); vpanic("mapRegs_X86Instr"); @@ -1986,12 +2073,17 @@ static UChar* push_word_from_tags ( UChar* p, UShort tags ) /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is - imperative to emit position-independent code. */ + imperative to emit position-independent code. If the emitted + instruction was a profiler inc, set *is_profInc to True, else + leave it unchanged. */ -Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, +Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, X86Instr* i, Bool mode64, - void* dispatch_unassisted, - void* dispatch_assisted ) + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ) { UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; @@ -2306,110 +2398,153 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, *p++ = toUChar(0xD0 + irno); goto done; - case Xin_Goto: { - void* dispatch_to_use = NULL; - vassert(dispatch_unassisted != NULL); - vassert(dispatch_assisted != NULL); + case Xin_XDirect: { + /* NB: what goes on here has to be very closely coordinated with the + chainXDirect_X86 and unchainXDirect_X86 below. */ + /* We're generating chain-me requests here, so we need to be + sure this is actually allowed -- no-redir translations can't + use chain-me's. Hence: */ + vassert(disp_cp_chain_me_to_slowEP != NULL); + vassert(disp_cp_chain_me_to_fastEP != NULL); /* Use ptmp for backpatching conditional jumps. */ ptmp = NULL; /* First off, if this is conditional, create a conditional - jump over the rest of it. */ - if (i->Xin.Goto.cond != Xcc_ALWAYS) { + jump over the rest of it. */ + if (i->Xin.XDirect.cond != Xcc_ALWAYS) { /* jmp fwds if !condition */ - *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1))); + *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1))); ptmp = p; /* fill in this bit later */ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ } - /* If a non-boring, set %ebp (the guest state pointer) - appropriately. Also, decide which dispatcher we need to - use. */ - dispatch_to_use = dispatch_assisted; - - /* movl $magic_number, %ebp */ - switch (i->Xin.Goto.jk) { - case Ijk_ClientReq: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break; - case Ijk_Sys_int128: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SYS_INT128); break; - case Ijk_Sys_int129: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SYS_INT129); break; - case Ijk_Sys_int130: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SYS_INT130); break; - case Ijk_Yield: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_YIELD); break; - case Ijk_EmWarn: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_EMWARN); break; - case Ijk_MapFail: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_MAPFAIL); break; - case Ijk_NoDecode: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_NODECODE); break; - case Ijk_TInval: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_TINVAL); break; - case Ijk_NoRedir: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_NOREDIR); break; - case Ijk_Sys_sysenter: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break; - case Ijk_SigTRAP: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SIGTRAP); break; - case Ijk_SigSEGV: - *p++ = 0xBD; - p = emit32(p, VEX_TRC_JMP_SIGSEGV); break; - case Ijk_Ret: - case Ijk_Call: - case Ijk_Boring: - dispatch_to_use = dispatch_unassisted; - break; - default: - ppIRJumpKind(i->Xin.Goto.jk); - vpanic("emit_X86Instr.Xin_Goto: unknown jump kind"); + /* Update the guest EIP. */ + /* movl $dstGA, amEIP */ + *p++ = 0xC7; + p = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP); + p = emit32(p, i->Xin.XDirect.dstGA); + + /* --- FIRST PATCHABLE BYTE follows --- */ + /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling + to) backs up the return address, so as to find the address of + the first patchable byte. So: don't change the length of the + two instructions below. */ + /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */ + *p++ = 0xBA; + void* disp_cp_chain_me + = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP + : disp_cp_chain_me_to_slowEP; + p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me)); + /* call *%edx */ + *p++ = 0xFF; + *p++ = 0xD2; + /* --- END of PATCHABLE BYTES --- */ + + /* Fix up the conditional jump, if there was one. */ + if (i->Xin.XDirect.cond != Xcc_ALWAYS) { + Int delta = p - ptmp; + vassert(delta > 0 && delta < 40); + *ptmp = toUChar(delta-1); } + goto done; + } - /* Get the destination address into %eax */ - if (i->Xin.Goto.dst->tag == Xri_Imm) { - /* movl $immediate, %eax */ - *p++ = 0xB8; - p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32); - } else { - vassert(i->Xin.Goto.dst->tag == Xri_Reg); - /* movl %reg, %eax */ - if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) { - *p++ = 0x89; - p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX()); - } + case Xin_XIndir: { + /* We're generating transfers that could lead indirectly to a + chain-me, so we need to be sure this is actually allowed -- + no-redir translations are not allowed to reach normal + translations without going through the scheduler. That means + no XDirects or XIndirs out from no-redir translations. + Hence: */ + vassert(disp_cp_xindir != NULL); + + /* Use ptmp for backpatching conditional jumps. */ + ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. */ + if (i->Xin.XIndir.cond != Xcc_ALWAYS) { + /* jmp fwds if !condition */ + *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1))); + ptmp = p; /* fill in this bit later */ + *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ } - /* Get the dispatcher address into %edx. This has to happen - after the load of %eax since %edx might be carrying the value - destined for %eax immediately prior to this Xin_Goto. */ - vassert(sizeof(UInt) == sizeof(void*)); - vassert(dispatch_to_use != NULL); - /* movl $imm32, %edx */ + /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ + *p++ = 0x89; + p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); + + /* movl $disp_indir, %edx */ *p++ = 0xBA; - p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use)); + p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir)); + /* jmp *%edx */ + *p++ = 0xFF; + *p++ = 0xE2; + + /* Fix up the conditional jump, if there was one. */ + if (i->Xin.XIndir.cond != Xcc_ALWAYS) { + Int delta = p - ptmp; + vassert(delta > 0 && delta < 40); + *ptmp = toUChar(delta-1); + } + goto done; + } + + case Xin_XAssisted: { + /* Use ptmp for backpatching conditional jumps. */ + ptmp = NULL; + + /* First off, if this is conditional, create a conditional + jump over the rest of it. */ + if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { + /* jmp fwds if !condition */ + *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1))); + ptmp = p; /* fill in this bit later */ + *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ + } + + /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ + *p++ = 0x89; + p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); + /* movl $magic_number, %ebp. */ + UInt trcval = 0; + switch (i->Xin.XAssisted.jk) { + case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; + case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; + case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; + case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; + case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; + case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; + case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; + case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; + case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; + case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; + case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; + case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; + /* We don't expect to see the following being assisted. */ + case Ijk_Ret: + case Ijk_Call: + /* fallthrough */ + default: + ppIRJumpKind(i->Xin.XAssisted.jk); + vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind"); + } + vassert(trcval != 0); + *p++ = 0xBD; + p = emit32(p, trcval); + /* movl $disp_indir, %edx */ + *p++ = 0xBA; + p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted)); /* jmp *%edx */ *p++ = 0xFF; *p++ = 0xE2; /* Fix up the conditional jump, if there was one. */ - if (i->Xin.Goto.cond != Xcc_ALWAYS) { + if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { Int delta = p - ptmp; - vassert(delta > 0 && delta < 20); + vassert(delta > 0 && delta < 40); *ptmp = toUChar(delta-1); } goto done; @@ -3088,6 +3223,63 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, *p++ = (UChar)(i->Xin.SseShuf.order); goto done; + case Xin_EvCheck: { + /* We generate: + (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER) + (2 bytes) jns nofail expected taken + (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR) + nofail: + */ + /* This is heavily asserted re instruction lengths. It needs to + be. If we get given unexpected forms of .amCounter or + .amFailAddr -- basically, anything that's not of the form + uimm7(%ebp) -- they are likely to fail. */ + /* Note also that after the decl we must be very careful not to + read the carry flag, else we get a partial flags stall. + js/jns avoids that, though. */ + UChar* p0 = p; + /* --- decl 8(%ebp) --- */ + /* "fake(1)" because + there's no register in this encoding; + instead the register + field is used as a sub opcode. The + encoding for "decl r/m32" + is FF /1, hence the fake(1). */ + *p++ = 0xFF; + p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter); + vassert(p - p0 == 3); + /* --- jns nofail --- */ + *p++ = 0x79; + *p++ = 0x03; /* need to check this 0x03 after the next insn */ + vassert(p - p0 == 5); + /* --- jmp* 0(%ebp) --- */ + /* The encoding is FF /4. */ + *p++ = 0xFF; + p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr); + vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ + /* And crosscheck .. */ + vassert(evCheckSzB_X86() == 8); + goto done; + } + + case Xin_ProfInc: { + /* We generate addl $1,NotKnownYet + adcl $0,NotKnownYet+4 + in the expectation that a later call to LibVEX_patchProfCtr + will be used to fill in the immediate fields once the right + value is known. + 83 05 00 00 00 00 01 + 83 15 00 00 00 00 00 + */ + *p++ = 0x83; *p++ = 0x05; + *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; + *p++ = 0x01; + *p++ = 0x83; *p++ = 0x15; + *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; + *p++ = 0x00; + /* Tell the caller .. */ + vassert(!(*is_profInc)); + *is_profInc = True; + goto done; + } + default: goto bad; } @@ -3104,6 +3296,140 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, # undef fake } + +/* How big is an event check? See case for Xin_EvCheck in + emit_X86Instr just above. That crosschecks what this returns, so + we can tell if we're inconsistent. */ +Int evCheckSzB_X86 ( void ) +{ + return 8; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange chainXDirect_X86 ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ) +{ + /* What we're expecting to see is: + movl $disp_cp_chain_me_EXPECTED, %edx + call *%edx + viz + BA <4 bytes value == disp_cp_chain_me_EXPECTED> + FF D2 + */ + UChar* p = (UChar*)place_to_chain; + vassert(p[0] == 0xBA); + vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)); + vassert(p[5] == 0xFF); + vassert(p[6] == 0xD2); + /* And what we want to change it to is: + jmp disp32 where disp32 is relative to the next insn + ud2; + viz + E9 <4 bytes == disp32> + 0F 0B + The replacement has the same length as the original. + */ + /* This is the delta we need to put into a JMP d32 insn. It's + relative to the start of the next insn, hence the -5. */ + Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5; + + /* And make the modifications. */ + p[0] = 0xE9; + p[1] = (delta >> 0) & 0xFF; + p[2] = (delta >> 8) & 0xFF; + p[3] = (delta >> 16) & 0xFF; + p[4] = (delta >> 24) & 0xFF; + p[5] = 0x0F; p[6] = 0x0B; + /* sanity check on the delta -- top 32 are all 0 or all 1 */ + delta >>= 32; + vassert(delta == 0LL || delta == -1LL); + VexInvalRange vir = {0, 0}; + return vir; +} + + +/* NB: what goes on here has to be very closely coordinated with the + emitInstr case for XDirect, above. */ +VexInvalRange unchainXDirect_X86 ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ) +{ + /* What we're expecting to see is: + jmp d32 + ud2; + viz + E9 <4 bytes == disp32> + 0F 0B + */ + UChar* p = (UChar*)place_to_unchain; + Bool valid = False; + if (p[0] == 0xE9 + && p[5] == 0x0F && p[6] == 0x0B) { + /* Check the offset is right. */ + Int s32 = *(Int*)(&p[1]); + if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) { + valid = True; + if (0) + vex_printf("QQQ unchainXDirect_X86: found valid\n"); + } + } + vassert(valid); + /* And what we want to change it to is: + movl $disp_cp_chain_me, %edx + call *%edx + viz + BA <4 bytes value == disp_cp_chain_me_EXPECTED> + FF D2 + So it's the same length (convenient, huh). + */ + p[0] = 0xBA; + *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me); + p[5] = 0xFF; + p[6] = 0xD2; + VexInvalRange vir = {0, 0}; + return vir; +} + + +/* Patch the counter address into a profile inc point, as previously + created by the Xin_ProfInc case for emit_X86Instr. */ +VexInvalRange patchProfInc_X86 ( void* place_to_patch, + ULong* location_of_counter ) +{ + vassert(sizeof(ULong*) == 4); + UChar* p = (UChar*)place_to_patch; + vassert(p[0] == 0x83); + vassert(p[1] == 0x05); + vassert(p[2] == 0x00); + vassert(p[3] == 0x00); + vassert(p[4] == 0x00); + vassert(p[5] == 0x00); + vassert(p[6] == 0x01); + vassert(p[7] == 0x83); + vassert(p[8] == 0x15); + vassert(p[9] == 0x00); + vassert(p[10] == 0x00); + vassert(p[11] == 0x00); + vassert(p[12] == 0x00); + vassert(p[13] == 0x00); + UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter); + p[2] = imm32 & 0xFF; imm32 >>= 8; + p[3] = imm32 & 0xFF; imm32 >>= 8; + p[4] = imm32 & 0xFF; imm32 >>= 8; + p[5] = imm32 & 0xFF; imm32 >>= 8; + imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter); + p[9] = imm32 & 0xFF; imm32 >>= 8; + p[10] = imm32 & 0xFF; imm32 >>= 8; + p[11] = imm32 & 0xFF; imm32 >>= 8; + p[12] = imm32 & 0xFF; imm32 >>= 8; + VexInvalRange vir = {0, 0}; + return vir; +} + + /*---------------------------------------------------------------*/ /*--- end host_x86_defs.c ---*/ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h index f68a426771..544f8df4a1 100644 --- a/VEX/priv/host_x86_defs.h +++ b/VEX/priv/host_x86_defs.h @@ -349,7 +349,9 @@ typedef Xin_Sh3232, /* shldl or shrdl */ Xin_Push, /* push (32-bit?) value on stack */ Xin_Call, /* call to address in register */ - Xin_Goto, /* conditional/unconditional jmp to dst */ + Xin_XDirect, /* direct transfer to GA */ + Xin_XIndir, /* indirect transfer to GA */ + Xin_XAssisted, /* assisted transfer to GA */ Xin_CMov32, /* conditional move */ Xin_LoadEX, /* mov{s,z}{b,w}l from mem to reg */ Xin_Store, /* store 16/8 bit value in memory */ @@ -378,7 +380,9 @@ typedef Xin_Sse64FLo, /* SSE binary, 64F in lowest lane only */ Xin_SseReRg, /* SSE binary general reg-reg, Re, Rg */ Xin_SseCMov, /* SSE conditional move */ - Xin_SseShuf /* SSE2 shuffle (pshufd) */ + Xin_SseShuf, /* SSE2 shuffle (pshufd) */ + Xin_EvCheck, /* Event check */ + Xin_ProfInc /* 64-bit profile counter increment */ } X86InstrTag; @@ -444,13 +448,30 @@ typedef Addr32 target; Int regparms; /* 0 .. 3 */ } Call; - /* Pseudo-insn. Goto dst, on given condition (which could be - Xcc_ALWAYS). */ - struct { + /* Update the guest EIP value, then exit requesting to chain + to it. May be conditional. Urr, use of Addr32 implicitly + assumes that wordsize(guest) == wordsize(host). */ + struct { + Addr32 dstGA; /* next guest address */ + X86AMode* amEIP; /* amode in guest state for EIP */ + X86CondCode cond; /* can be Xcc_ALWAYS */ + Bool toFastEP; /* chain to the slow or fast point? */ + } XDirect; + /* Boring transfer to a guest address not known at JIT time. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + X86AMode* amEIP; + X86CondCode cond; /* can be Xcc_ALWAYS */ + } XIndir; + /* Assisted transfer to a guest address, most general case. + Not chainable. May be conditional. */ + struct { + HReg dstGA; + X86AMode* amEIP; + X86CondCode cond; /* can be Xcc_ALWAYS */ IRJumpKind jk; - X86CondCode cond; - X86RI* dst; - } Goto; + } XAssisted; /* Mov src to dst on the given condition, which may not be the bogus Xcc_ALWAYS. */ struct { @@ -615,6 +636,15 @@ typedef HReg src; HReg dst; } SseShuf; + struct { + X86AMode* amCounter; + X86AMode* amFailAddr; + } EvCheck; + struct { + /* No fields. The address of the counter to inc is + installed later, post-translation, by patching it in, + as it is not known at translation time. */ + } ProfInc; } Xin; } @@ -632,7 +662,12 @@ extern X86Instr* X86Instr_Div ( Bool syned, X86RM* ); extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst ); extern X86Instr* X86Instr_Push ( X86RMI* ); extern X86Instr* X86Instr_Call ( X86CondCode, Addr32, Int ); -extern X86Instr* X86Instr_Goto ( IRJumpKind, X86CondCode cond, X86RI* dst ); +extern X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP, + X86CondCode cond, Bool toFastEP ); +extern X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP, + X86CondCode cond ); +extern X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP, + X86CondCode cond, IRJumpKind jk ); extern X86Instr* X86Instr_CMov32 ( X86CondCode, X86RM* src, HReg dst ); extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, X86AMode* src, HReg dst ); @@ -663,6 +698,9 @@ extern X86Instr* X86Instr_Sse64FLo ( X86SseOp, HReg, HReg ); extern X86Instr* X86Instr_SseReRg ( X86SseOp, HReg, HReg ); extern X86Instr* X86Instr_SseCMov ( X86CondCode, HReg src, HReg dst ); extern X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ); +extern X86Instr* X86Instr_EvCheck ( X86AMode* amCounter, + X86AMode* amFailAddr ); +extern X86Instr* X86Instr_ProfInc ( void ); extern void ppX86Instr ( X86Instr*, Bool ); @@ -672,10 +710,13 @@ extern void ppX86Instr ( X86Instr*, Bool ); extern void getRegUsage_X86Instr ( HRegUsage*, X86Instr*, Bool ); extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool ); extern Bool isMove_X86Instr ( X86Instr*, HReg*, HReg* ); -extern Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr*, - Bool, - void* dispatch_unassisted, - void* dispatch_assisted ); +extern Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, + UChar* buf, Int nbuf, X86Instr* i, + Bool mode64, + void* disp_cp_chain_me_to_slowEP, + void* disp_cp_chain_me_to_fastEP, + void* disp_cp_xindir, + void* disp_cp_xassisted ); extern void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); @@ -685,9 +726,36 @@ extern void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, extern X86Instr* directReload_X86 ( X86Instr* i, HReg vreg, Short spill_off ); extern void getAllocableRegs_X86 ( Int*, HReg** ); -extern HInstrArray* iselSB_X86 ( IRSB*, VexArch, - VexArchInfo*, - VexAbiInfo* ); +extern HInstrArray* iselSB_X86 ( IRSB*, + VexArch, + VexArchInfo*, + VexAbiInfo*, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ); + +/* How big is an event check? This is kind of a kludge because it + depends on the offsets of host_EvC_FAILADDR and host_EvC_COUNTER, + and so assumes that they are both <= 128, and so can use the short + offset encoding. This is all checked with assertions, so in the + worst case we will merely assert at startup. */ +extern Int evCheckSzB_X86 ( void ); + +/* Perform a chaining and unchaining of an XDirect jump. */ +extern VexInvalRange chainXDirect_X86 ( void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ); + +extern VexInvalRange unchainXDirect_X86 ( void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ); + +/* Patch the counter location into an existing ProfInc point. */ +extern VexInvalRange patchProfInc_X86 ( void* place_to_patch, + ULong* location_of_counter ); + #endif /* ndef __VEX_HOST_X86_DEFS_H */ diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index 81896b3814..bad28a11b4 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -154,21 +154,38 @@ static Bool isZeroU64 ( IRExpr* e ) - The host subarchitecture we are selecting insns for. This is set at the start and does not change. - Note, this is all host-independent. */ + - A Bool for indicating whether we may generate chain-me + instructions for control flow transfers, or whether we must use + XAssisted. + + - The maximum guest address of any guest insn in this block. + Actually, the address of the highest-addressed byte from any insn + in this block. Is set at the start and does not change. This is + used for detecting jumps which are definitely forward-edges from + this block, and therefore can be made (chained) to the fast entry + point of the destination, thereby avoiding the destination's + event check. + + Note, this is all (well, mostly) host-independent. +*/ typedef struct { + /* Constant -- are set at the start and do not change. */ IRTypeEnv* type_env; HReg* vregmap; HReg* vregmapHI; Int n_vregmap; - HInstrArray* code; + UInt hwcaps; - Int vreg_ctr; + Bool chainingAllowed; + Addr64 max_ga; - UInt hwcaps; + /* These are modified as we go along. */ + HInstrArray* code; + Int vreg_ctr; } ISelEnv; @@ -4038,14 +4055,48 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /* --------- EXIT --------- */ case Ist_Exit: { - X86RI* dst; - X86CondCode cc; if (stmt->Ist.Exit.dst->tag != Ico_U32) - vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value"); - dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst)); - cc = iselCondCode(env,stmt->Ist.Exit.guard); - addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst)); - return; + vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value"); + + X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); + X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP, + hregX86_EBP()); + + /* Case: boring transfer to known address */ + if (stmt->Ist.Exit.jk == Ijk_Boring) { + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga; + if (0) vex_printf("%s", toFastEP ? "Y" : ","); + addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32, + amEIP, cc, toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring)); + } + return; + } + + /* Case: assisted transfer to arbitrary address */ + switch (stmt->Ist.Exit.jk) { + case Ijk_MapFail: + case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn: { + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk)); + return; + } + default: + break; + } + + /* Do we ever expect to see any other kind? */ + goto stmt_fail; } default: break; @@ -4060,18 +4111,82 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /*--- ISEL: Basic block terminators (Nexts) ---*/ /*---------------------------------------------------------*/ -static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) +static void iselNext ( ISelEnv* env, + IRExpr* next, IRJumpKind jk, Int offsIP ) { - X86RI* ri; if (vex_traceflags & VEX_TRACE_VCODE) { - vex_printf("\n-- goto {"); + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); ppIRJumpKind(jk); - vex_printf("} "); - ppIRExpr(next); - vex_printf("\n"); + vex_printf( "\n"); } - ri = iselIntExpr_RI(env, next); - addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri)); + + /* Case: boring transfer to known address */ + if (next->tag == Iex_Const) { + IRConst* cdst = next->Iex.Const.con; + vassert(cdst->tag == Ico_U32); + if (jk == Ijk_Boring || jk == Ijk_Call) { + /* Boring transfer to known address */ + X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); + if (env->chainingAllowed) { + /* .. almost always true .. */ + /* Skip the event check at the dst if this is a forwards + edge. */ + Bool toFastEP + = ((Addr64)cdst->Ico.U32) > env->max_ga; + if (0) vex_printf("%s", toFastEP ? "X" : "."); + addInstr(env, X86Instr_XDirect(cdst->Ico.U32, + amEIP, Xcc_ALWAYS, + toFastEP)); + } else { + /* .. very occasionally .. */ + /* We can't use chaining, so ask for an assisted transfer, + as that's the only alternative that is allowable. */ + HReg r = iselIntExpr_R(env, next); + addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, + Ijk_Boring)); + } + return; + } + } + + /* Case: call/return (==boring) transfer to any address */ + switch (jk) { + case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { + HReg r = iselIntExpr_R(env, next); + X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); + if (env->chainingAllowed) { + addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS)); + } else { + addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, + Ijk_Boring)); + } + return; + } + default: + break; + } + + /* Case: some other kind of transfer to any address */ + switch (jk) { + case Ijk_Sys_int128: case Ijk_ClientReq: case Ijk_NoRedir: + case Ijk_Yield: case Ijk_SigTRAP: { + HReg r = iselIntExpr_R(env, next); + X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP()); + addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk)); + return; + } + default: + break; + } + + vex_printf( "\n-- PUT(%d) = ", offsIP); + ppIRExpr( next ); + vex_printf( "; exit-"); + ppIRJumpKind(jk); + vex_printf( "\n"); + vassert(0); // are we expecting any other kind? } @@ -4081,14 +4196,21 @@ static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) /* Translate an entire SB to x86 code. */ -HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host, - VexArchInfo* archinfo_host, - VexAbiInfo* vbi/*UNUSED*/ ) +HInstrArray* iselSB_X86 ( IRSB* bb, + VexArch arch_host, + VexArchInfo* archinfo_host, + VexAbiInfo* vbi/*UNUSED*/, + Int offs_Host_EvC_Counter, + Int offs_Host_EvC_FailAddr, + Bool chainingAllowed, + Bool addProfInc, + Addr64 max_ga ) { Int i, j; HReg hreg, hregHI; ISelEnv* env; UInt hwcaps_host = archinfo_host->hwcaps; + X86AMode *amCounter, *amFailAddr; /* sanity ... */ vassert(arch_host == VexArchX86); @@ -4097,6 +4219,8 @@ HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host, | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT))); + vassert(sizeof(max_ga) == 8); + vassert((max_ga >> 32) == 0); /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); @@ -4115,7 +4239,9 @@ HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host, env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); /* and finally ... */ - env->hwcaps = hwcaps_host; + env->chainingAllowed = chainingAllowed; + env->hwcaps = hwcaps_host; + env->max_ga = max_ga; /* For each IR temporary, allocate a suitably-kinded virtual register. */ @@ -4140,11 +4266,24 @@ HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host, } env->vreg_ctr = j; + /* The very first instruction must be an event check. */ + amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP()); + amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP()); + addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr)); + + /* Possibly a block counter increment (for profiling). At this + point we don't know the address of the counter, so just pretend + it is zero. It will have to be patched later, but before this + translation is used, by a call to LibVEX_patchProfCtr. */ + if (addProfInc) { + addInstr(env, X86Instr_ProfInc()); + } + /* Ok, finally we can iterate over the statements. */ for (i = 0; i < bb->stmts_used; i++) - iselStmt(env,bb->stmts[i]); + iselStmt(env, bb->stmts[i]); - iselNext(env,bb->next,bb->jumpkind); + iselNext(env, bb->next, bb->jumpkind, bb->offsIP); /* record the number of vregs we used. */ env->code->n_vregs = env->vreg_ctr; diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 545df8e3ff..81d2e4227a 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -1241,10 +1241,11 @@ void ppIRStmt ( IRStmt* s ) case Ist_Exit: vex_printf( "if (" ); ppIRExpr(s->Ist.Exit.guard); - vex_printf( ") goto {"); - ppIRJumpKind(s->Ist.Exit.jk); - vex_printf("} "); + vex_printf( ") { PUT(%d) = ", s->Ist.Exit.offsIP); ppIRConst(s->Ist.Exit.dst); + vex_printf("; exit-"); + ppIRJumpKind(s->Ist.Exit.jk); + vex_printf(" } "); break; default: vpanic("ppIRStmt"); @@ -1279,10 +1280,10 @@ void ppIRSB ( IRSB* bb ) ppIRStmt(bb->stmts[i]); vex_printf( "\n"); } - vex_printf( " goto {"); - ppIRJumpKind(bb->jumpkind); - vex_printf( "} "); + vex_printf( " PUT(%d) = ", bb->offsIP ); ppIRExpr( bb->next ); + vex_printf( "; exit-"); + ppIRJumpKind(bb->jumpkind); vex_printf( "\n}\n"); } @@ -1713,12 +1714,14 @@ IRStmt* IRStmt_MBE ( IRMBusEvent event ) s->Ist.MBE.event = event; return s; } -IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ) { - IRStmt* s = LibVEX_Alloc(sizeof(IRStmt)); - s->tag = Ist_Exit; - s->Ist.Exit.guard = guard; - s->Ist.Exit.jk = jk; - s->Ist.Exit.dst = dst; +IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst, + Int offsIP ) { + IRStmt* s = LibVEX_Alloc(sizeof(IRStmt)); + s->tag = Ist_Exit; + s->Ist.Exit.guard = guard; + s->Ist.Exit.jk = jk; + s->Ist.Exit.dst = dst; + s->Ist.Exit.offsIP = offsIP; return s; } @@ -1746,6 +1749,7 @@ IRSB* emptyIRSB ( void ) bb->stmts = LibVEX_Alloc(bb->stmts_size * sizeof(IRStmt*)); bb->next = NULL; bb->jumpkind = Ijk_Boring; + bb->offsIP = 0; return bb; } @@ -1936,7 +1940,8 @@ IRStmt* deepCopyIRStmt ( IRStmt* s ) case Ist_Exit: return IRStmt_Exit(deepCopyIRExpr(s->Ist.Exit.guard), s->Ist.Exit.jk, - deepCopyIRConst(s->Ist.Exit.dst)); + deepCopyIRConst(s->Ist.Exit.dst), + s->Ist.Exit.offsIP); default: vpanic("deepCopyIRStmt"); } @@ -1963,7 +1968,7 @@ IRSB* deepCopyIRSB ( IRSB* bb ) sts2 = LibVEX_Alloc(bb2->stmts_used * sizeof(IRStmt*)); for (i = 0; i < bb2->stmts_used; i++) sts2[i] = deepCopyIRStmt(bb->stmts[i]); - bb2->stmts = sts2; + bb2->stmts = sts2; return bb2; } @@ -1973,6 +1978,7 @@ IRSB* deepCopyIRSBExceptStmts ( IRSB* bb ) bb2->tyenv = deepCopyIRTypeEnv(bb->tyenv); bb2->next = deepCopyIRExpr(bb->next); bb2->jumpkind = bb->jumpkind; + bb2->offsIP = bb->offsIP; return bb2; } @@ -3466,6 +3472,9 @@ void tcStmt ( IRSB* bb, IRStmt* stmt, IRType gWordTy ) sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: bad dst"); if (typeOfIRConst(stmt->Ist.Exit.dst) != gWordTy) sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: not :: guest word type"); + /* because it would intersect with host_EvC_* */ + if (stmt->Ist.Exit.offsIP < 16) + sanityCheckFail(bb,stmt,"IRStmt.Exit.offsIP: too low"); break; default: vpanic("tcStmt"); @@ -3592,6 +3601,10 @@ void sanityCheckIRSB ( IRSB* bb, HChar* caller, tcStmt( bb, bb->stmts[i], guest_word_size ); if (typeOfIRExpr(bb->tyenv,bb->next) != guest_word_size) sanityCheckFail(bb, NULL, "bb->next field has wrong type"); + /* because it would intersect with host_EvC_* */ + if (bb->offsIP < 16) + sanityCheckFail(bb, NULL, "bb->offsIP: too low"); + } /*---------------------------------------------------------------*/ diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c index a97ce58917..e6df9a15a6 100644 --- a/VEX/priv/ir_opt.c +++ b/VEX/priv/ir_opt.c @@ -467,7 +467,8 @@ static void flatten_Stmt ( IRSB* bb, IRStmt* st ) case Ist_Exit: e1 = flatten_Expr(bb, st->Ist.Exit.guard); addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk, - st->Ist.Exit.dst)); + st->Ist.Exit.dst, + st->Ist.Exit.offsIP)); break; default: vex_printf("\n"); @@ -489,6 +490,7 @@ static IRSB* flatten_BB ( IRSB* in ) flatten_Stmt( out, in->stmts[i] ); out->next = flatten_Expr( out, in->next ); out->jumpkind = in->jumpkind; + out->offsIP = in->offsIP; return out; } @@ -815,6 +817,14 @@ static void redundant_put_removal_BB ( UInt key = 0; /* keep gcc -O happy */ HashHW* env = newHHW(); + + /* Initialise the running env with the fact that the final exit + writes the IP (or, whatever it claims to write. We don't + care.) */ + key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next)); + addToHHW(env, (HWord)key, 0); + + /* And now scan backwards through the statements. */ for (i = bb->stmts_used-1; i >= 0; i--) { st = bb->stmts[i]; @@ -823,13 +833,32 @@ static void redundant_put_removal_BB ( /* Deal with conditional exits. */ if (st->tag == Ist_Exit) { - /* Since control may not get beyond this point, we must empty - out the set, since we can no longer claim that the next - event for any part of the guest state is definitely a - write. */ - vassert(isIRAtom(st->Ist.Exit.guard)); + //Bool re_add; + /* Need to throw out from the env, any part of it which + doesn't overlap with the guest state written by this exit. + Since the exit only writes one section, it's simplest to + do this: (1) check whether env contains a write that + completely overlaps the write done by this exit; (2) empty + out env; and (3) if (1) was true, add the write done by + this exit. + + To make (1) a bit simpler, merely search for a write that + exactly matches the one done by this exit. That's safe + because it will fail as often or more often than a full + overlap check, and failure to find an overlapping write in + env is the safe case (we just nuke env if that + happens). */ + //vassert(isIRAtom(st->Ist.Exit.guard)); + /* (1) */ + //key = mk_key_GetPut(st->Ist.Exit.offsIP, + // typeOfIRConst(st->Ist.Exit.dst)); + //re_add = lookupHHW(env, NULL, key); + /* (2) */ for (j = 0; j < env->used; j++) env->inuse[j] = False; + /* (3) */ + //if (0 && re_add) + // addToHHW(env, (HWord)key, 0); continue; } @@ -926,10 +955,24 @@ static UInt num_nodes_visited; assumed to compute different values. After all the accesses may happen at different times and the guest state / memory can have changed in the meantime. */ + +/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable + wrapper that deals with the common tags-don't-match case, and a + slower out of line general case. Saves a few insns. */ + +__attribute__((noinline)) +static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ); + +inline static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) { if (e1->tag != e2->tag) return False; + return sameIRExprs_aux2(env, e1, e2); +} +__attribute__((noinline)) +static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) +{ if (num_nodes_visited++ > NODE_LIMIT) return False; switch (e1->tag) { @@ -996,6 +1039,7 @@ static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) return False; } +inline static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) { Bool same; @@ -2217,7 +2261,8 @@ static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) vex_printf("vex iropt: IRStmt_Exit became unconditional\n"); } } - return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst); + return IRStmt_Exit(fcond, st->Ist.Exit.jk, + st->Ist.Exit.dst, st->Ist.Exit.offsIP); } default: @@ -2294,6 +2339,7 @@ IRSB* cprop_BB ( IRSB* in ) out->next = subst_Expr( env, in->next ); out->jumpkind = in->jumpkind; + out->offsIP = in->offsIP; return out; } @@ -2519,6 +2565,8 @@ static Bool isOneU1 ( IRExpr* e ) = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst ); bb->jumpkind = bb->stmts[i_unconditional_exit]->Ist.Exit.jk; + bb->offsIP + = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP; for (i = i_unconditional_exit; i < bb->stmts_used; i++) bb->stmts[i] = IRStmt_NoOp(); } @@ -4470,7 +4518,8 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) return IRStmt_Exit( atbSubst_Expr(env, st->Ist.Exit.guard), st->Ist.Exit.jk, - st->Ist.Exit.dst + st->Ist.Exit.dst, + st->Ist.Exit.offsIP ); case Ist_IMark: return IRStmt_IMark(st->Ist.IMark.addr, @@ -4515,7 +4564,7 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) } } -/* notstatic */ void ado_treebuild_BB ( IRSB* bb ) +/* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb ) { Int i, j, k, m; Bool stmtPuts, stmtStores, invalidateMe; @@ -4523,19 +4572,37 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) IRStmt* st2; ATmpInfo env[A_NENV]; + Bool max_ga_known = False; + Addr64 max_ga = 0; + Int n_tmps = bb->tyenv->types_used; UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort)); /* Phase 1. Scan forwards in bb, counting use occurrences of each - temp. Also count occurrences in the bb->next field. */ + temp. Also count occurrences in the bb->next field. Take the + opportunity to also find the maximum guest address in the block, + since that will be needed later for deciding when we can safely + elide event checks. */ for (i = 0; i < n_tmps; i++) uses[i] = 0; for (i = 0; i < bb->stmts_used; i++) { st = bb->stmts[i]; - if (st->tag == Ist_NoOp) - continue; + switch (st->tag) { + case Ist_NoOp: + continue; + case Ist_IMark: { + Int len = st->Ist.IMark.len; + Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1; + max_ga_known = True; + if (mga > max_ga) + max_ga = mga; + break; + } + default: + break; + } aoccCount_Stmt( uses, st ); } aoccCount_Expr(uses, bb->next ); @@ -4707,6 +4774,8 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) by definition dead? */ bb->next = atbSubst_Expr(env, bb->next); bb->stmts_used = j; + + return max_ga_known ? max_ga : ~(Addr64)0; } diff --git a/VEX/priv/ir_opt.h b/VEX/priv/ir_opt.h index 9390a1c8a1..ded1c2d82b 100644 --- a/VEX/priv/ir_opt.h +++ b/VEX/priv/ir_opt.h @@ -60,9 +60,11 @@ extern void do_deadcode_BB ( IRSB* bb ); /* The tree-builder. Make (approximately) maximal safe trees. bb is - destructively modified. */ + destructively modified. Returns (unrelatedly, but useful later on) + the guest address of the highest addressed byte from any insn in + this block, or Addr64_MAX if unknown (can that ever happen?) */ extern -void ado_treebuild_BB ( IRSB* bb ); +Addr64 ado_treebuild_BB ( IRSB* bb ); #endif /* ndef __VEX_IR_OPT_H */ diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 5b818ae41d..521e63caf4 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -155,6 +155,17 @@ void LibVEX_Init ( vassert(VEX_HOST_WORDSIZE == sizeof(void*)); vassert(VEX_HOST_WORDSIZE == sizeof(HWord)); + /* These take a lot of space, so make sure we don't have + any unnoticed size regressions. */ + if (VEX_HOST_WORDSIZE == 4) { + vassert(sizeof(IRExpr) == 24); + vassert(sizeof(IRStmt) == 20 /* x86 */ + || sizeof(IRStmt) == 24 /* arm */); + } else { + vassert(sizeof(IRExpr) == 48); + vassert(sizeof(IRStmt) == 40); + } + /* Really start up .. */ vex_debuglevel = debuglevel; vex_valgrind_support = valgrind_support; @@ -183,9 +194,11 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) HInstr* (*directReload) ( HInstr*, HReg, Short ); void (*ppInstr) ( HInstr*, Bool ); void (*ppReg) ( HReg ); - HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*, - VexAbiInfo* ); - Int (*emit) ( UChar*, Int, HInstr*, Bool, void*, void* ); + HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*, VexAbiInfo*, + Int, Int, Bool, Bool, Addr64 ); + Int (*emit) ( /*MB_MOD*/Bool*, + UChar*, Int, HInstr*, Bool, + void*, void*, void*, void* ); IRExpr* (*specHelper) ( HChar*, IRExpr**, IRStmt**, Int ); Bool (*preciseMemExnsFn) ( Int, Int ); @@ -197,11 +210,13 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) HInstrArray* vcode; HInstrArray* rcode; Int i, j, k, out_used, guest_sizeB; - Int offB_TISTART, offB_TILEN; - UChar insn_bytes[48]; + Int offB_TISTART, offB_TILEN, offB_GUEST_IP, szB_GUEST_IP; + Int offB_HOST_EvC_COUNTER, offB_HOST_EvC_FAILADDR; + UChar insn_bytes[64]; IRType guest_word_type; IRType host_word_type; - Bool mode64; + Bool mode64, chainingAllowed; + Addr64 max_ga; guest_layout = NULL; available_real_regs = NULL; @@ -223,12 +238,27 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) host_word_type = Ity_INVALID; offB_TISTART = 0; offB_TILEN = 0; + offB_GUEST_IP = 0; + szB_GUEST_IP = 0; + offB_HOST_EvC_COUNTER = 0; + offB_HOST_EvC_FAILADDR = 0; mode64 = False; + chainingAllowed = False; vex_traceflags = vta->traceflags; vassert(vex_initdone); - vassert(vta->needs_self_check != NULL); + vassert(vta->needs_self_check != NULL); + vassert(vta->disp_cp_xassisted != NULL); + /* Both the chainers and the indir are either NULL or non-NULL. */ + if (vta->disp_cp_chain_me_to_slowEP != NULL) { + vassert(vta->disp_cp_chain_me_to_fastEP != NULL); + vassert(vta->disp_cp_xindir != NULL); + chainingAllowed = True; + } else { + vassert(vta->disp_cp_chain_me_to_fastEP == NULL); + vassert(vta->disp_cp_xindir == NULL); + } vexSetAllocModeTEMP_and_clear(); vexAllocSanityCheck(); @@ -254,14 +284,12 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) ppInstr = (void(*)(HInstr*, Bool)) ppX86Instr; ppReg = (void(*)(HReg)) ppHRegX86; iselSB = iselSB_X86; - emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*)) + emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool, + void*,void*,void*,void*)) emit_X86Instr; host_is_bigendian = False; host_word_type = Ity_I32; vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps)); - /* jump-to-dispatcher scheme */ - vassert(vta->dispatch_unassisted != NULL); - vassert(vta->dispatch_assisted != NULL); break; case VexArchAMD64: @@ -279,16 +307,14 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) ppInstr = (void(*)(HInstr*, Bool)) ppAMD64Instr; ppReg = (void(*)(HReg)) ppHRegAMD64; iselSB = iselSB_AMD64; - emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*)) + emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool, + void*,void*,void*,void*)) emit_AMD64Instr; host_is_bigendian = False; host_word_type = Ity_I64; vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps)); - /* jump-to-dispatcher scheme */ - vassert(vta->dispatch_unassisted != NULL); - vassert(vta->dispatch_assisted != NULL); break; - +#if 0 case VexArchPPC32: mode64 = False; getAllocableRegs_PPC ( &n_available_real_regs, @@ -354,7 +380,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) vassert(vta->dispatch_unassisted == NULL); vassert(vta->dispatch_assisted == NULL); break; - +#endif case VexArchARM: mode64 = False; getAllocableRegs_ARM ( &n_available_real_regs, @@ -367,14 +393,12 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) ppInstr = (void(*)(HInstr*, Bool)) ppARMInstr; ppReg = (void(*)(HReg)) ppHRegARM; iselSB = iselSB_ARM; - emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*)) + emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool, + void*,void*,void*,void*)) emit_ARMInstr; host_is_bigendian = False; host_word_type = Ity_I32; vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_host.hwcaps)); - vassert(vta->dispatch_unassisted == NULL); - vassert(vta->dispatch_assisted == NULL); - /* return-to-dispatcher scheme */ break; default: @@ -385,14 +409,18 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) switch (vta->arch_guest) { case VexArchX86: - preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns; - disInstrFn = disInstr_X86; - specHelper = guest_x86_spechelper; - guest_sizeB = sizeof(VexGuestX86State); - guest_word_type = Ity_I32; - guest_layout = &x86guest_layout; - offB_TISTART = offsetof(VexGuestX86State,guest_TISTART); - offB_TILEN = offsetof(VexGuestX86State,guest_TILEN); + preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns; + disInstrFn = disInstr_X86; + specHelper = guest_x86_spechelper; + guest_sizeB = sizeof(VexGuestX86State); + guest_word_type = Ity_I32; + guest_layout = &x86guest_layout; + offB_TISTART = offsetof(VexGuestX86State,guest_TISTART); + offB_TILEN = offsetof(VexGuestX86State,guest_TILEN); + offB_GUEST_IP = offsetof(VexGuestX86State,guest_EIP); + szB_GUEST_IP = sizeof( ((VexGuestX86State*)0)->guest_EIP ); + offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestX86State) % 16); vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART) == 4); @@ -401,21 +429,25 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) break; case VexArchAMD64: - preciseMemExnsFn = guest_amd64_state_requires_precise_mem_exns; - disInstrFn = disInstr_AMD64; - specHelper = guest_amd64_spechelper; - guest_sizeB = sizeof(VexGuestAMD64State); - guest_word_type = Ity_I64; - guest_layout = &amd64guest_layout; - offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART); - offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN); + preciseMemExnsFn = guest_amd64_state_requires_precise_mem_exns; + disInstrFn = disInstr_AMD64; + specHelper = guest_amd64_spechelper; + guest_sizeB = sizeof(VexGuestAMD64State); + guest_word_type = Ity_I64; + guest_layout = &amd64guest_layout; + offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART); + offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN); + offB_GUEST_IP = offsetof(VexGuestAMD64State,guest_RIP); + szB_GUEST_IP = sizeof( ((VexGuestAMD64State*)0)->guest_RIP ); + offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestAMD64State) % 16); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TILEN ) == 8); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_NRADDR ) == 8); break; - +#if 0 case VexArchPPC32: preciseMemExnsFn = guest_ppc32_state_requires_precise_mem_exns; disInstrFn = disInstr_PPC; @@ -464,16 +496,20 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) vassert(sizeof( ((VexGuestS390XState*)0)->guest_TILEN ) == 8); vassert(sizeof( ((VexGuestS390XState*)0)->guest_NRADDR ) == 8); break; - +#endif case VexArchARM: - preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns; - disInstrFn = disInstr_ARM; - specHelper = guest_arm_spechelper; - guest_sizeB = sizeof(VexGuestARMState); - guest_word_type = Ity_I32; - guest_layout = &armGuest_layout; - offB_TISTART = offsetof(VexGuestARMState,guest_TISTART); - offB_TILEN = offsetof(VexGuestARMState,guest_TILEN); + preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns; + disInstrFn = disInstr_ARM; + specHelper = guest_arm_spechelper; + guest_sizeB = sizeof(VexGuestARMState); + guest_word_type = Ity_I32; + guest_layout = &armGuest_layout; + offB_TISTART = offsetof(VexGuestARMState,guest_TISTART); + offB_TILEN = offsetof(VexGuestARMState,guest_TILEN); + offB_GUEST_IP = offsetof(VexGuestARMState,guest_R15T); + szB_GUEST_IP = sizeof( ((VexGuestARMState*)0)->guest_R15T ); + offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR); vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestARMState) % 16); vassert(sizeof( ((VexGuestARMState*)0)->guest_TISTART) == 4); @@ -489,6 +525,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) VexTranslateResult res; res.status = VexTransOK; res.n_sc_extents = 0; + res.offs_profInc = -1; /* yet more sanity checks ... */ if (vta->arch_guest == vta->arch_host) { @@ -520,7 +557,9 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) vta->needs_self_check, vta->preamble_function, offB_TISTART, - offB_TILEN ); + offB_TILEN, + offB_GUEST_IP, + szB_GUEST_IP ); vexAllocSanityCheck(); @@ -627,7 +666,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) /* Turn it into virtual-registerised code. Build trees -- this also throws away any dead bindings. */ - ado_treebuild_BB( irsb ); + max_ga = ado_treebuild_BB( irsb ); if (vta->finaltidy) { irsb = vta->finaltidy(irsb); @@ -655,8 +694,19 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) " Instruction selection " "------------------------\n"); - vcode = iselSB ( irsb, vta->arch_host, &vta->archinfo_host, - &vta->abiinfo_both ); + /* No guest has its IP field at offset zero. If this fails it + means some transformation pass somewhere failed to update/copy + irsb->offsIP properly. */ + vassert(irsb->offsIP >= 16); + + vcode = iselSB ( irsb, vta->arch_host, + &vta->archinfo_host, + &vta->abiinfo_both, + offB_HOST_EvC_COUNTER, + offB_HOST_EvC_FAILADDR, + chainingAllowed, + vta->addProfInc, + max_ga ); vexAllocSanityCheck(); @@ -710,13 +760,19 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) out_used = 0; /* tracks along the host_bytes array */ for (i = 0; i < rcode->arr_used; i++) { - if (vex_traceflags & VEX_TRACE_ASM) { - ppInstr(rcode->arr[i], mode64); + HInstr* hi = rcode->arr[i]; + Bool hi_isProfInc = False; + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) { + ppInstr(hi, mode64); vex_printf("\n"); } - j = (*emit)( insn_bytes, sizeof insn_bytes, rcode->arr[i], mode64, - vta->dispatch_unassisted, vta->dispatch_assisted ); - if (vex_traceflags & VEX_TRACE_ASM) { + j = emit( &hi_isProfInc, + insn_bytes, sizeof insn_bytes, hi, mode64, + vta->disp_cp_chain_me_to_slowEP, + vta->disp_cp_chain_me_to_fastEP, + vta->disp_cp_xindir, + vta->disp_cp_xassisted ); + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) { for (k = 0; k < j; k++) if (insn_bytes[k] < 16) vex_printf("0%x ", (UInt)insn_bytes[k]); @@ -724,15 +780,23 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) vex_printf("%x ", (UInt)insn_bytes[k]); vex_printf("\n\n"); } - if (out_used + j > vta->host_bytes_size) { + if (UNLIKELY(out_used + j > vta->host_bytes_size)) { vexSetAllocModeTEMP_and_clear(); vex_traceflags = 0; res.status = VexTransOutputFull; return res; } - for (k = 0; k < j; k++) { - vta->host_bytes[out_used] = insn_bytes[k]; - out_used++; + if (UNLIKELY(hi_isProfInc)) { + vassert(vta->addProfInc); /* else where did it come from? */ + vassert(res.offs_profInc == -1); /* there can be only one (tm) */ + vassert(out_used >= 0); + res.offs_profInc = out_used; + } + { UChar* dst = &vta->host_bytes[out_used]; + for (k = 0; k < j; k++) { + dst[k] = insn_bytes[k]; + } + out_used += j; } vassert(out_used <= vta->host_bytes_size); } @@ -748,6 +812,94 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) } +/* --------- Chain/Unchain XDirects. --------- */ + +VexInvalRange LibVEX_Chain ( VexArch arch_host, + void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ) +{ + VexInvalRange (*chainXDirect)(void*, void*, void*) = NULL; + switch (arch_host) { + case VexArchX86: + chainXDirect = chainXDirect_X86; break; + case VexArchAMD64: + chainXDirect = chainXDirect_AMD64; break; + case VexArchARM: + chainXDirect = chainXDirect_ARM; break; + default: + vassert(0); + } + vassert(chainXDirect); + VexInvalRange vir + = chainXDirect(place_to_chain, disp_cp_chain_me_EXPECTED, + place_to_jump_to); + return vir; +} + +VexInvalRange LibVEX_UnChain ( VexArch arch_host, + void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ) +{ + VexInvalRange (*unchainXDirect)(void*, void*, void*) = NULL; + switch (arch_host) { + case VexArchX86: + unchainXDirect = unchainXDirect_X86; break; + case VexArchAMD64: + unchainXDirect = unchainXDirect_AMD64; break; + case VexArchARM: + unchainXDirect = unchainXDirect_ARM; break; + default: + vassert(0); + } + vassert(unchainXDirect); + VexInvalRange vir + = unchainXDirect(place_to_unchain, place_to_jump_to_EXPECTED, + disp_cp_chain_me); + return vir; +} + +Int LibVEX_evCheckSzB ( VexArch arch_host ) +{ + static Int cached = 0; /* DO NOT MAKE NON-STATIC */ + if (UNLIKELY(cached == 0)) { + switch (arch_host) { + case VexArchX86: + cached = evCheckSzB_X86(); break; + case VexArchAMD64: + cached = evCheckSzB_AMD64(); break; + case VexArchARM: + cached = evCheckSzB_ARM(); break; + default: + vassert(0); + } + } + return cached; +} + +VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host, + void* place_to_patch, + ULong* location_of_counter ) +{ + VexInvalRange (*patchProfInc)(void*,ULong*) = NULL; + switch (arch_host) { + case VexArchX86: + patchProfInc = patchProfInc_X86; break; + case VexArchAMD64: + patchProfInc = patchProfInc_AMD64; break; + case VexArchARM: + patchProfInc = patchProfInc_ARM; break; + default: + vassert(0); + } + vassert(patchProfInc); + VexInvalRange vir + = patchProfInc(place_to_patch, location_of_counter); + return vir; +} + + /* --------- Emulation warnings. --------- */ HChar* LibVEX_EmWarn_string ( VexEmWarn ew ) diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index a259fcce8e..efd9ccda57 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -483,6 +483,9 @@ typedef VexTransAccessFail, VexTransOutputFull } status; /* The number of extents that have a self-check (0 to 3) */ UInt n_sc_extents; + /* Offset in generated code of the profile inc, or -1 if + none. Needed for later patching. */ + Int offs_profInc; } VexTranslateResult; @@ -580,6 +583,10 @@ typedef /* IN: debug: trace vex activity at various points */ Int traceflags; + /* IN: profiling: add a 64 bit profiler counter increment to the + translation? */ + Bool addProfInc; + /* IN: address of the dispatcher entry points. Describes the places where generated code should jump to at the end of each bb. @@ -612,9 +619,13 @@ typedef The aim is to get back and forth between translations and the dispatcher without creating memory traffic to store return addresses. + + FIXME: update this comment */ - void* dispatch_unassisted; - void* dispatch_assisted; + void* disp_cp_chain_me_to_slowEP; + void* disp_cp_chain_me_to_fastEP; + void* disp_cp_xindir; + void* disp_cp_xassisted; } VexTranslateArgs; @@ -632,7 +643,60 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* ); would not be the result. Therefore chase_into_ok should disallow following into #2. That will force the caller to eventually request a new translation starting at #2, at which point Vex will - correctly observe the make-a-self-check flag. */ + correctly observe the make-a-self-check flag. + + FIXME: is this still up to date? */ + + +/*-------------------------------------------------------*/ +/*--- Patch existing translations ---*/ +/*-------------------------------------------------------*/ + +/* Indicates a host address range for which callers to the functions + below must request I-D cache syncing after the call. ::len == 0 is + ambiguous -- it could mean either zero bytes or the entire address + space, so we mean the former. */ +typedef + struct { + HWord start; + HWord len; + } + VexInvalRange; + +/* Chain an XDirect jump located at place_to_chain so it jumps to + place_to_jump_to. It is expected (and checked) that this site + currently contains a call to the dispatcher specified by + disp_cp_chain_me_EXPECTED. */ +extern +VexInvalRange LibVEX_Chain ( VexArch arch_host, + void* place_to_chain, + void* disp_cp_chain_me_EXPECTED, + void* place_to_jump_to ); + +/* Undo an XDirect jump located at place_to_unchain, so it is + converted back into a call to disp_cp_chain_me. It is expected + (and checked) that this site currently contains a jump directly to + the address specified by place_to_jump_to_EXPECTED. */ +extern +VexInvalRange LibVEX_UnChain ( VexArch arch_host, + void* place_to_unchain, + void* place_to_jump_to_EXPECTED, + void* disp_cp_chain_me ); + +/* Returns a constant -- the size of the event check that is put at + the start of every translation. This makes it possible to + calculate the fast entry point address if the slow entry point + address is known (the usual case), or vice versa. */ +extern +Int LibVEX_evCheckSzB ( VexArch arch_host ); + + +/* Patch the counter location into an existing ProfInc point. The + specified point is checked to make sure it is plausible. */ +extern +VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host, + void* place_to_patch, + ULong* location_of_counter ); /*-------------------------------------------------------*/ diff --git a/VEX/pub/libvex_guest_amd64.h b/VEX/pub/libvex_guest_amd64.h index 564f6a0e4e..7d5d354ed9 100644 --- a/VEX/pub/libvex_guest_amd64.h +++ b/VEX/pub/libvex_guest_amd64.h @@ -52,34 +52,39 @@ typedef struct { - /* 0 */ ULong guest_RAX; - /* 8 */ ULong guest_RCX; - /* 16 */ ULong guest_RDX; - /* 24 */ ULong guest_RBX; - /* 32 */ ULong guest_RSP; - /* 40 */ ULong guest_RBP; - /* 48 */ ULong guest_RSI; - /* 56 */ ULong guest_RDI; - /* 64 */ ULong guest_R8; - /* 72 */ ULong guest_R9; - /* 80 */ ULong guest_R10; - /* 88 */ ULong guest_R11; - /* 96 */ ULong guest_R12; - /* 104 */ ULong guest_R13; - /* 112 */ ULong guest_R14; - /* 120 */ ULong guest_R15; + /* Event check fail addr, counter, and padding to make RAX 16 + aligned. */ + /* 0 */ ULong host_EvC_FAILADDR; + /* 8 */ UInt host_EvC_COUNTER; + /* 12 */ UInt pad0; + /* 16 */ ULong guest_RAX; + /* 24 */ ULong guest_RCX; + /* 32 */ ULong guest_RDX; + /* 40 */ ULong guest_RBX; + /* 48 */ ULong guest_RSP; + /* 56 */ ULong guest_RBP; + /* 64 */ ULong guest_RSI; + /* 72 */ ULong guest_RDI; + /* 80 */ ULong guest_R8; + /* 88 */ ULong guest_R9; + /* 96 */ ULong guest_R10; + /* 104 */ ULong guest_R11; + /* 112 */ ULong guest_R12; + /* 120 */ ULong guest_R13; + /* 128 */ ULong guest_R14; + /* 136 */ ULong guest_R15; /* 4-word thunk used to calculate O S Z A C P flags. */ - /* 128 */ ULong guest_CC_OP; - /* 136 */ ULong guest_CC_DEP1; - /* 144 */ ULong guest_CC_DEP2; - /* 152 */ ULong guest_CC_NDEP; + /* 144 */ ULong guest_CC_OP; + /* 152 */ ULong guest_CC_DEP1; + /* 160 */ ULong guest_CC_DEP2; + /* 168 */ ULong guest_CC_NDEP; /* The D flag is stored here, encoded as either -1 or +1 */ - /* 160 */ ULong guest_DFLAG; - /* 168 */ ULong guest_RIP; + /* 176 */ ULong guest_DFLAG; + /* 184 */ ULong guest_RIP; /* Bit 18 (AC) of eflags stored here, as either 0 or 1. */ /* ... */ ULong guest_ACFLAG; /* Bit 21 (ID) of eflags stored here, as either 0 or 1. */ - /* 176 */ ULong guest_IDFLAG; + /* 192 */ ULong guest_IDFLAG; /* Probably a lot more stuff too. D,ID flags 16 128-bit SSE registers @@ -89,14 +94,14 @@ typedef /* HACK to make tls on amd64-linux work. %fs only ever seems to hold zero, and so guest_FS_ZERO holds the 64-bit offset associated with a %fs value of zero. */ - /* 184 */ ULong guest_FS_ZERO; + /* 200 */ ULong guest_FS_ZERO; /* XMM registers. Note that these must be allocated consecutively in order that the SSE4.2 PCMP{E,I}STR{I,M} helpers can treat them as an array. XMM16 is a fake reg used as an intermediary in handling aforementioned insns. */ - /* 192 */ULong guest_SSEROUND; - /* 200 */U128 guest_XMM0; + /* 208 */ULong guest_SSEROUND; + /* 216 */U128 guest_XMM0; U128 guest_XMM1; U128 guest_XMM2; U128 guest_XMM3; @@ -118,14 +123,14 @@ typedef /* Note. Setting guest_FTOP to be ULong messes up the delicately-balanced PutI/GetI optimisation machinery. Therefore best to leave it as a UInt. */ - /* 456 */UInt guest_FTOP; + UInt guest_FTOP; ULong guest_FPREG[8]; - /* 528 */ UChar guest_FPTAG[8]; - /* 536 */ ULong guest_FPROUND; - /* 544 */ ULong guest_FC3210; + UChar guest_FPTAG[8]; + ULong guest_FPROUND; + ULong guest_FC3210; /* Emulation warnings */ - /* 552 */ UInt guest_EMWARN; + UInt guest_EMWARN; /* Translation-invalidation area description. Not used on amd64 (there is no invalidate-icache insn), but needed so as to @@ -161,7 +166,7 @@ typedef ULong guest_IP_AT_SYSCALL; /* Padding to make it have an 16-aligned size */ - ULong padding; + ULong pad1; } VexGuestAMD64State; diff --git a/VEX/pub/libvex_guest_arm.h b/VEX/pub/libvex_guest_arm.h index b6a6a4fa4c..19be179af6 100644 --- a/VEX/pub/libvex_guest_arm.h +++ b/VEX/pub/libvex_guest_arm.h @@ -42,6 +42,9 @@ typedef struct { /* 0 */ + /* Event check fail addr and counter. */ + UInt host_EvC_FAILADDR; /* 0 */ + UInt host_EvC_COUNTER; /* 4 */ UInt guest_R0; UInt guest_R1; UInt guest_R2; @@ -69,7 +72,7 @@ typedef /* 4-word thunk used to calculate N(sign) Z(zero) C(carry, unsigned overflow) and V(signed overflow) flags. */ - /* 64 */ + /* 72 */ UInt guest_CC_OP; UInt guest_CC_DEP1; UInt guest_CC_DEP2; @@ -108,11 +111,11 @@ typedef program counter at the last syscall insn (int 0x80/81/82, sysenter, syscall, svc). Used when backing up to restart a syscall that has been interrupted by a signal. */ - /* 116 */ + /* 124 */ UInt guest_IP_AT_SYSCALL; /* VFP state. D0 .. D15 must be 8-aligned. */ - /* 120 -- I guess there's 4 bytes of padding just prior to this? */ + /* 128 */ ULong guest_D0; ULong guest_D1; ULong guest_D2; @@ -193,8 +196,6 @@ typedef /* Padding to make it have an 16-aligned size */ UInt padding1; - UInt padding2; - UInt padding3; } VexGuestARMState; diff --git a/VEX/pub/libvex_guest_x86.h b/VEX/pub/libvex_guest_x86.h index 80ee423e4e..e0b1b7631f 100644 --- a/VEX/pub/libvex_guest_x86.h +++ b/VEX/pub/libvex_guest_x86.h @@ -141,40 +141,43 @@ */ typedef struct { - UInt guest_EAX; /* 0 */ + /* Event check fail addr and counter. */ + UInt host_EvC_FAILADDR; /* 0 */ + UInt host_EvC_COUNTER; /* 4 */ + UInt guest_EAX; /* 8 */ UInt guest_ECX; UInt guest_EDX; UInt guest_EBX; UInt guest_ESP; UInt guest_EBP; UInt guest_ESI; - UInt guest_EDI; /* 28 */ + UInt guest_EDI; /* 36 */ /* 4-word thunk used to calculate O S Z A C P flags. */ - UInt guest_CC_OP; /* 32 */ + UInt guest_CC_OP; /* 40 */ UInt guest_CC_DEP1; UInt guest_CC_DEP2; - UInt guest_CC_NDEP; /* 44 */ + UInt guest_CC_NDEP; /* 52 */ /* The D flag is stored here, encoded as either -1 or +1 */ - UInt guest_DFLAG; /* 48 */ + UInt guest_DFLAG; /* 56 */ /* Bit 21 (ID) of eflags stored here, as either 0 or 1. */ - UInt guest_IDFLAG; /* 52 */ + UInt guest_IDFLAG; /* 60 */ /* Bit 18 (AC) of eflags stored here, as either 0 or 1. */ - UInt guest_ACFLAG; /* 56 */ + UInt guest_ACFLAG; /* 64 */ /* EIP */ - UInt guest_EIP; /* 60 */ + UInt guest_EIP; /* 68 */ /* FPU */ - ULong guest_FPREG[8]; /* 64 */ - UChar guest_FPTAG[8]; /* 128 */ - UInt guest_FPROUND; /* 136 */ - UInt guest_FC3210; /* 140 */ - UInt guest_FTOP; /* 144 */ + ULong guest_FPREG[8]; /* 72 */ + UChar guest_FPTAG[8]; /* 136 */ + UInt guest_FPROUND; /* 144 */ + UInt guest_FC3210; /* 148 */ + UInt guest_FTOP; /* 152 */ /* SSE */ - UInt guest_SSEROUND; /* 148 */ - U128 guest_XMM0; /* 152 */ + UInt guest_SSEROUND; /* 156 */ + U128 guest_XMM0; /* 160 */ U128 guest_XMM1; U128 guest_XMM2; U128 guest_XMM3; @@ -220,8 +223,6 @@ typedef /* Padding to make it have an 16-aligned size */ UInt padding1; - UInt padding2; - UInt padding3; } VexGuestX86State; diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 3b4c7f006d..8cdcc1ccff 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -1672,8 +1672,9 @@ extern Bool eqIRAtom ( IRExpr*, IRExpr* ); guest to restart a syscall that has been interrupted by a signal. */ typedef - enum { - Ijk_Boring=0x16000, /* not interesting; just goto next */ + enum { + Ijk_INVALID=0x16000, + Ijk_Boring, /* not interesting; just goto next */ Ijk_Call, /* guest is doing a call */ Ijk_Ret, /* guest is doing a return */ Ijk_ClientReq, /* do guest client req before continuing */ @@ -2154,11 +2155,15 @@ typedef /* Conditional exit from the middle of an IRSB. ppIRStmt output: if () goto {} eg. if (t69) goto {Boring} 0x4000AAA:I32 + If is true, the guest state is also updated by + PUT-ing at . This is done because a + taken exit must update the guest program counter. */ struct { IRExpr* guard; /* Conditional expression */ IRJumpKind jk; /* Jump kind */ IRConst* dst; /* Jump target (constant only) */ + Int offsIP; /* Guest state offset for IP */ } Exit; } Ist; } @@ -2178,7 +2183,11 @@ extern IRStmt* IRStmt_LLSC ( IREndness end, IRTemp result, IRExpr* addr, IRExpr* storedata ); extern IRStmt* IRStmt_Dirty ( IRDirty* details ); extern IRStmt* IRStmt_MBE ( IRMBusEvent event ); -extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ); +extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst, + Int offsIP ); +// TEMP HACK +#define IRStmt_Exit3(__guard,__jk,__dst) IRStmt_Exit(__guard,__jk,__dst,0) + /* Deep-copy an IRStmt. */ extern IRStmt* deepCopyIRStmt ( IRStmt* ); @@ -2223,6 +2232,8 @@ extern void ppIRTypeEnv ( IRTypeEnv* ); executes all the way to the end, without a side exit - An indication of any special actions (JumpKind) needed for this final jump. + - Offset of the IP field in the guest state. This will be + updated before the final jump is done. "IRSB" stands for "IR Super Block". */ @@ -2234,6 +2245,7 @@ typedef Int stmts_used; IRExpr* next; IRJumpKind jumpkind; + Int offsIP; } IRSB; diff --git a/VEX/pub/libvex_trc_values.h b/VEX/pub/libvex_trc_values.h index b882d1d160..cf69444af1 100644 --- a/VEX/pub/libvex_trc_values.h +++ b/VEX/pub/libvex_trc_values.h @@ -80,6 +80,9 @@ #define VEX_TRC_JMP_SYS_SYSENTER 79 /* do syscall before continuing */ +#define VEX_TRC_JMP_BORING 95 /* return to sched, but just + keep going; no special action */ + #endif /* ndef __LIBVEX_TRC_VALUES_H */ /*---------------------------------------------------------------*/ diff --git a/VEX/switchback/switchback.c b/VEX/switchback/switchback.c index 1cf98ef766..990c7d3e08 100644 --- a/VEX/switchback/switchback.c +++ b/VEX/switchback/switchback.c @@ -867,6 +867,7 @@ void make_translation ( Addr64 guest_addr, Bool verbose ) vta.do_self_check = False; vta.traceflags = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS; vta.dispatch = NULL; + vta.addProfInc = False; tres = LibVEX_Translate ( &vta ); diff --git a/VEX/test_main.c b/VEX/test_main.c index cb794086c0..f443580044 100644 --- a/VEX/test_main.c +++ b/VEX/test_main.c @@ -107,7 +107,7 @@ int main ( int argc, char** argv ) VexTranslateArgs vta; if (argc != 2) { - fprintf(stderr, "usage: vex file.org\n"); + fprintf(stderr, "usage: vex file.orig\n"); exit(1); } f = fopen(argv[1], "r"); @@ -176,8 +176,10 @@ int main ( int argc, char** argv ) vai_ppc32.ppc_cache_line_szB = 128; LibVEX_default_VexAbiInfo(&vbi); + vbi.guest_stack_redzone_size = 128; /* ----- Set up args for LibVEX_Translate ----- */ + #if 0 /* ppc32 -> ppc32 */ vta.arch_guest = VexArchPPC32; vta.archinfo_guest = vai_ppc32; @@ -196,6 +198,7 @@ int main ( int argc, char** argv ) vta.arch_host = VexArchX86; vta.archinfo_host = vai_x86; #endif + vta.abiinfo_both = vbi; vta.guest_bytes = origbuf; vta.guest_bytes_addr = (Addr64)orig_addr; @@ -205,7 +208,8 @@ int main ( int argc, char** argv ) vta.host_bytes = transbuf; vta.host_bytes_size = N_TRANSBUF; vta.host_bytes_used = &trans_used; -#if 0 /* no instrumentation */ + +#if 1 /* no instrumentation */ vta.instrument1 = NULL; vta.instrument2 = NULL; #endif @@ -213,19 +217,19 @@ int main ( int argc, char** argv ) vta.instrument1 = ac_instrument; vta.instrument2 = NULL; #endif -#if 1 /* memcheck */ +#if 0 /* memcheck */ vta.instrument1 = mc_instrument; vta.instrument2 = NULL; #endif vta.needs_self_check = needs_self_check; vta.preamble_function = NULL; vta.traceflags = TEST_FLAGS; -#if 1 /* x86, amd64 hosts */ - vta.dispatch_unassisted = (void*)0x12345678; - vta.dispatch_assisted = (void*)0x12345678; -#else /* ppc32, ppc64 hosts */ - vta.dispatch = NULL; -#endif + vta.addProfInc = False; + + vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678; + vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679; + vta.disp_cp_xindir = (void*)0x1234567A; + vta.disp_cp_xassisted = (void*)0x1234567B; vta.finaltidy = NULL; diff --git a/VEX/test_main.h b/VEX/test_main.h index 0005fd6c0f..0c537a0e51 100644 --- a/VEX/test_main.h +++ b/VEX/test_main.h @@ -2,15 +2,15 @@ /* Copy this file (test_main.h.in) to test_main.h, and edit */ /* DEBUG RUN, ON V */ -#if 0 +#if 1 #define TEST_VSUPPORT True #define TEST_N_ITERS 1 #define TEST_N_BBS 1 -#define TEST_FLAGS (1<<7) /* |(1<<2)|(1<<1) */ +#define TEST_FLAGS (1<<7)|(0<<6)|(1<<3)|(0<<2)|(0<<1)|(0<<0) #endif /* CHECKING RUN, ON V */ -#if 1 +#if 0 #define TEST_VSUPPORT True #define TEST_N_ITERS 1 #define TEST_N_BBS 100000