See #296422.
git-svn-id: svn://svn.valgrind.org/vex/branches/TCHAIN@2273
-Wpointer-arith -Wbad-function-cast -Wcast-qual \
-Wcast-align -Wmissing-declarations \
-Wno-pointer-sign \
- $(EXTRA_CFLAGS) -g -O2 -fstrict-aliasing
+ $(EXTRA_CFLAGS) -g -O2 -fstrict-aliasing \
+ \
+ -O
#CC = icc
#CCFLAGS = -g -Wall -wd981 -wd279 -wd1287 -wd869 -wd111 -wd188 -wd186
bb_to_IR.h. */
extern
DisResult disInstr_AMD64 ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
/* VISIBLE TO LIBVEX CLIENT */
void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
{
+ vex_state->host_EvC_FAILADDR = 0;
+ vex_state->host_EvC_COUNTER = 0;
+ vex_state->pad0 = 0;
+
vex_state->guest_RAX = 0;
vex_state->guest_RCX = 0;
vex_state->guest_RDX = 0;
vex_state->guest_GS_0x60 = 0;
vex_state->guest_IP_AT_SYSCALL = 0;
- /* vex_state->padding = 0; */
+ vex_state->pad1 = 0;
}
binop( mkSizedOp(tyE,Iop_CasCmpNE8),
mkexpr(oldTmp), mkexpr(expTmp) ),
Ijk_Boring, /*Ijk_NoRedir*/
- IRConst_U64( restart_point )
+ IRConst_U64( restart_point ),
+ OFFB_RIP
));
}
/*--- JMP helpers ---*/
/*------------------------------------------------------------*/
-static void jmp_lit( IRJumpKind kind, Addr64 d64 )
+static void jmp_lit( /*MOD*/DisResult* dres,
+ IRJumpKind kind, Addr64 d64 )
{
- irsb->next = mkU64(d64);
- irsb->jumpkind = kind;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = kind;
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
}
-static void jmp_treg( IRJumpKind kind, IRTemp t )
+static void jmp_treg( /*MOD*/DisResult* dres,
+ IRJumpKind kind, IRTemp t )
{
- irsb->next = mkexpr(t);
- irsb->jumpkind = kind;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = kind;
+ stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
}
static
-void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
+void jcc_01 ( /*MOD*/DisResult* dres,
+ AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
{
Bool invert;
AMD64Condcode condPos;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_Boring;
condPos = positiveIse_AMD64Condcode ( cond, &invert );
if (invert) {
stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
Ijk_Boring,
- IRConst_U64(d64_false) ) );
- irsb->next = mkU64(d64_true);
- irsb->jumpkind = Ijk_Boring;
+ IRConst_U64(d64_false),
+ OFFB_RIP ) );
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
} else {
stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
Ijk_Boring,
- IRConst_U64(d64_true) ) );
- irsb->next = mkU64(d64_false);
- irsb->jumpkind = Ijk_Boring;
+ IRConst_U64(d64_true),
+ OFFB_RIP ) );
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
}
}
static
ULong dis_Grp5 ( VexAbiInfo* vbi,
Prefix pfx, Int sz, Long delta,
- DisResult* dres, Bool* decode_OK )
+ /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
{
Int len;
UChar modrm;
putIReg64(R_RSP, mkexpr(t2));
storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
- jmp_treg(Ijk_Call,t3);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Call, t3);
+ vassert(dres->whatNext == Dis_StopHere);
showSz = False;
break;
case 4: /* jmp Ev */
sz = 8;
t3 = newTemp(Ity_I64);
assign(t3, getIRegE(sz,pfx,modrm));
- jmp_treg(Ijk_Boring,t3);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Boring, t3);
+ vassert(dres->whatNext == Dis_StopHere);
showSz = False;
break;
default:
putIReg64(R_RSP, mkexpr(t2));
storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
- jmp_treg(Ijk_Call,t3);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Call, t3);
+ vassert(dres->whatNext == Dis_StopHere);
showSz = False;
break;
case 4: /* JMP Ev */
sz = 8;
t3 = newTemp(Ity_I64);
assign(t3, loadLE(Ity_I64,mkexpr(addr)));
- jmp_treg(Ijk_Boring,t3);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Boring, t3);
+ vassert(dres->whatNext == Dis_StopHere);
showSz = False;
break;
case 6: /* PUSH Ev */
the insn is the last one in the basic block, and so emit a jump to
the next insn, rather than just falling through. */
static
-void dis_REP_op ( AMD64Condcode cond,
+void dis_REP_op ( /*MOD*/DisResult* dres,
+ AMD64Condcode cond,
void (*dis_OP)(Int, IRTemp, Prefix),
Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
Prefix pfx )
cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
}
- stmt( IRStmt_Exit( cmp, Ijk_Boring, IRConst_U64(rip_next) ) );
+ stmt( IRStmt_Exit( cmp, Ijk_Boring,
+ IRConst_U64(rip_next), OFFB_RIP ) );
if (haveASO(pfx))
putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
dis_OP (sz, t_inc, pfx);
if (cond == AMD64CondAlways) {
- jmp_lit(Ijk_Boring,rip);
+ jmp_lit(dres, Ijk_Boring, rip);
+ vassert(dres->whatNext == Dis_StopHere);
} else {
stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
Ijk_Boring,
- IRConst_U64(rip) ) );
- jmp_lit(Ijk_Boring,rip_next);
+ IRConst_U64(rip),
+ OFFB_RIP ) );
+ jmp_lit(dres, Ijk_Boring, rip_next);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("%s%c\n", name, nameISize(sz));
}
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U64( guest_RIP_bbstart+delta )
+ IRConst_U64( guest_RIP_bbstart+delta ),
+ OFFB_RIP
)
);
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U64( guest_RIP_bbstart+delta )
+ IRConst_U64( guest_RIP_bbstart+delta ),
+ OFFB_RIP
)
);
break;
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U64( guest_RIP_bbstart+delta )
+ IRConst_U64( guest_RIP_bbstart+delta ),
+ OFFB_RIP
)
);
//.. }
static
-void dis_ret ( VexAbiInfo* vbi, ULong d64 )
+void dis_ret ( /*MOD*/DisResult* dres, VexAbiInfo* vbi, ULong d64 )
{
IRTemp t1 = newTemp(Ity_I64);
IRTemp t2 = newTemp(Ity_I64);
assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
putIReg64(R_RSP, mkexpr(t3));
make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
- jmp_treg(Ijk_Ret,t2);
+ jmp_treg(dres, Ijk_Ret, t2);
+ vassert(dres->whatNext == Dis_StopHere);
}
binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)),
mkU64(0)),
Ijk_SigSEGV,
- IRConst_U64(guest_RIP_curr_instr)
+ IRConst_U64(guest_RIP_curr_instr),
+ OFFB_RIP
)
);
}
IRStmt_Exit(
binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
Ijk_EmWarn,
- IRConst_U64(guest_RIP_bbstart+delta)
+ IRConst_U64(guest_RIP_bbstart+delta),
+ OFFB_RIP
)
);
goto decode_success;
mk_amd64g_calculate_condition(
(AMD64Condcode)(1 ^ (opc - 0x70))),
Ijk_Boring,
- IRConst_U64(guest_RIP_bbstart+delta) ) );
+ IRConst_U64(guest_RIP_bbstart+delta),
+ OFFB_RIP ) );
dres->whatNext = Dis_ResteerC;
dres->continueAt = d64;
comment = "(assumed taken)";
stmt( IRStmt_Exit(
mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
Ijk_Boring,
- IRConst_U64(d64) ) );
+ IRConst_U64(d64),
+ OFFB_RIP ) );
dres->whatNext = Dis_ResteerC;
dres->continueAt = guest_RIP_bbstart+delta;
comment = "(assumed not taken)";
else {
/* Conservative default translation - end the block at this
point. */
- jcc_01( (AMD64Condcode)(opc - 0x70),
- guest_RIP_bbstart+delta,
- d64 );
- dres->whatNext = Dis_StopHere;
+ jcc_01( dres, (AMD64Condcode)(opc - 0x70),
+ guest_RIP_bbstart+delta, d64 );
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment);
return delta;
DIP("rep nop (P4 pause)\n");
/* "observe" the hint. The Vex client needs to be careful not
to cause very long delays as a result, though. */
- jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
+ vassert(dres->whatNext == Dis_StopHere);
return delta;
}
/* detect and handle NOPs specially */
if (haveF3(pfx) && !haveF2(pfx)) {
if (opc == 0xA4)
sz = 1;
- dis_REP_op ( AMD64CondAlways, dis_MOVS, sz,
+ dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "rep movs", pfx );
dres->whatNext = Dis_StopHere;
if (haveF3(pfx) && !haveF2(pfx)) {
if (opc == 0xA6)
sz = 1;
- dis_REP_op ( AMD64CondZ, dis_CMPS, sz,
+ dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "repe cmps", pfx );
dres->whatNext = Dis_StopHere;
if (haveF3(pfx) && !haveF2(pfx)) {
if (opc == 0xAA)
sz = 1;
- dis_REP_op ( AMD64CondAlways, dis_STOS, sz,
+ dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "rep stos", pfx );
- dres->whatNext = Dis_StopHere;
- return delta;
+ vassert(dres->whatNext == Dis_StopHere);
+ return delta;
}
/* AA/AB: stosb/stos{w,l,q} */
if (!haveF3(pfx) && !haveF2(pfx)) {
if (haveF2(pfx) && !haveF3(pfx)) {
if (opc == 0xAE)
sz = 1;
- dis_REP_op ( AMD64CondNZ, dis_SCAS, sz,
+ dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "repne scas", pfx );
- dres->whatNext = Dis_StopHere;
+ vassert(dres->whatNext == Dis_StopHere);
return delta;
}
/* F3 AE/AF: repe scasb/repe scas{w,l,q} */
if (!haveF2(pfx) && haveF3(pfx)) {
if (opc == 0xAE)
sz = 1;
- dis_REP_op ( AMD64CondZ, dis_SCAS, sz,
+ dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "repe scas", pfx );
- dres->whatNext = Dis_StopHere;
+ vassert(dres->whatNext == Dis_StopHere);
return delta;
}
/* AE/AF: scasb/scas{w,l,q} */
if (have66orF2orF3(pfx)) goto decode_failure;
d64 = getUDisp16(delta);
delta += 2;
- dis_ret(vbi, d64);
- dres->whatNext = Dis_StopHere;
+ dis_ret(dres, vbi, d64);
DIP("ret $%lld\n", d64);
return delta;
case 0xC3: /* RET */
if (have66orF2(pfx)) goto decode_failure;
/* F3 is acceptable on AMD. */
- dis_ret(vbi, 0);
- dres->whatNext = Dis_StopHere;
+ dis_ret(dres, vbi, 0);
DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
return delta;
return delta;
case 0xCC: /* INT 3 */
- jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
+ vassert(dres->whatNext == Dis_StopHere);
DIP("int $0x3\n");
return delta;
default:
vassert(0);
}
- stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) );
+ stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64);
return delta;
if (haveASO(pfx)) {
/* 32-bit */
stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
- unop(Iop_32Uto64, getIReg32(R_RCX)),
- mkU64(0)),
- Ijk_Boring,
- IRConst_U64(d64))
- );
+ unop(Iop_32Uto64, getIReg32(R_RCX)),
+ mkU64(0)),
+ Ijk_Boring,
+ IRConst_U64(d64),
+ OFFB_RIP
+ ));
DIP("jecxz 0x%llx\n", d64);
} else {
/* 64-bit */
stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
getIReg64(R_RCX),
mkU64(0)),
- Ijk_Boring,
- IRConst_U64(d64))
- );
+ Ijk_Boring,
+ IRConst_U64(d64),
+ OFFB_RIP
+ ));
DIP("jrcxz 0x%llx\n", d64);
}
return delta;
dres->whatNext = Dis_ResteerU;
dres->continueAt = d64;
} else {
- jmp_lit(Ijk_Call,d64);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Call, d64);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("call 0x%llx\n",d64);
return delta;
dres->whatNext = Dis_ResteerU;
dres->continueAt = d64;
} else {
- jmp_lit(Ijk_Boring,d64);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Boring, d64);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("jmp 0x%llx\n", d64);
return delta;
dres->whatNext = Dis_ResteerU;
dres->continueAt = d64;
} else {
- jmp_lit(Ijk_Boring,d64);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Boring, d64);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("jmp-8 0x%llx\n", d64);
return delta;
/* It's important that all guest state is up-to-date
at this point. So we declare an end-of-block here, which
forces any cached guest state to be flushed. */
- jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
+ vassert(dres->whatNext == Dis_StopHere);
DIP("syscall\n");
return delta;
mk_amd64g_calculate_condition(
(AMD64Condcode)(1 ^ (opc - 0x80))),
Ijk_Boring,
- IRConst_U64(guest_RIP_bbstart+delta) ) );
+ IRConst_U64(guest_RIP_bbstart+delta),
+ OFFB_RIP
+ ));
dres->whatNext = Dis_ResteerC;
dres->continueAt = d64;
comment = "(assumed taken)";
mk_amd64g_calculate_condition((AMD64Condcode)
(opc - 0x80)),
Ijk_Boring,
- IRConst_U64(d64) ) );
+ IRConst_U64(d64),
+ OFFB_RIP
+ ));
dres->whatNext = Dis_ResteerC;
dres->continueAt = guest_RIP_bbstart+delta;
comment = "(assumed not taken)";
else {
/* Conservative default translation - end the block at
this point. */
- jcc_01( (AMD64Condcode)(opc - 0x80),
- guest_RIP_bbstart+delta,
- d64 );
- dres->whatNext = Dis_StopHere;
+ jcc_01( dres, (AMD64Condcode)(opc - 0x80),
+ guest_RIP_bbstart+delta, d64 );
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment);
return delta;
static
DisResult disInstr_AMD64_WRK (
/*OUT*/Bool* expect_CAS,
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
Prefix pfx = PFX_EMPTY;
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 0;
- dres.continueAt = 0;
-
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
*expect_CAS = False;
vassert(guest_RIP_next_assumed == 0);
DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
- /* We may be asked to update the guest RIP before going further. */
- if (put_IP)
- stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) );
-
/* Spot "Special" instructions (see comment at top of file). */
{
UChar* code = (UChar*)(guest_code + delta);
/* %RDX = client_request ( %RAX ) */
DIP("%%rdx = client_request ( %%rax )\n");
delta += 19;
- jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
+ vassert(dres.whatNext == Dis_StopHere);
goto decode_success;
}
else
assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
putIReg64(R_RSP, mkexpr(t2));
storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
- jmp_treg(Ijk_NoRedir,t1);
- dres.whatNext = Dis_StopHere;
+ jmp_treg(&dres, Ijk_NoRedir, t1);
+ vassert(dres.whatNext == Dis_StopHere);
goto decode_success;
}
/* We don't know what it is. */
/* It's important that all ArchRegs carry their up-to-date value
at this point. So we declare an end-of-block here, which
forces any TempRegs caching ArchRegs to be flushed. */
- dres.whatNext = Dis_StopHere;
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x%02x\n", (UInt)d64);
break;
}
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
- jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
+ vassert(dres.whatNext == Dis_StopHere);
+ dres.len = 0;
/* We also need to say that a CAS is not expected now, regardless
of what it might have been set to at the start of the function,
since the IR that we've emitted just above (to synthesis a
decode_success:
/* All decode successes end up here. */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) );
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
+
DIP("\n");
dres.len = (Int)toUInt(delta - delta_start);
return dres;
is located in host memory at &guest_code[delta]. */
DisResult disInstr_AMD64 ( IRSB* irsb_IN,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
x1 = irsb_IN->stmts_used;
expect_CAS = False;
- dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
resteerCisOk,
callback_opaque,
delta, archinfo, abiinfo );
/* inconsistency detected. re-disassemble the instruction so as
to generate a useful error message; then assert. */
vex_traceflags |= VEX_TRACE_FE;
- dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
resteerCisOk,
callback_opaque,
delta, archinfo, abiinfo );
bb_to_IR.h. */
extern
DisResult disInstr_ARM ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
/* VISIBLE TO LIBVEX CLIENT */
void LibVEX_GuestARM_initialise ( /*OUT*/VexGuestARMState* vex_state )
{
+ vex_state->host_EvC_FAILADDR = 0;
+ vex_state->host_EvC_COUNTER = 0;
+
vex_state->guest_R0 = 0;
vex_state->guest_R1 = 0;
vex_state->guest_R2 = 0;
vex_state->guest_ITSTATE = 0;
vex_state->padding1 = 0;
- vex_state->padding2 = 0;
- vex_state->padding3 = 0;
}
stmt( IRStmt_Exit(
unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
Ijk_Boring,
- IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4))
+ IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
+ OFFB_R15T
));
}
stmt( IRStmt_Exit(
unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
Ijk_Boring,
- IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1))
+ IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
+ OFFB_R15T
));
}
stmt( IRStmt_Exit(
unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
Ijk_Boring,
- IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1))
+ IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
+ OFFB_R15T
));
}
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
Ijk_NoDecode,
- IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1))
+ IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
+ OFFB_R15T
)
);
}
UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
IRTemp_INVALID/*because AL*/, Ijk_Boring );
- irsb->next = mkU32(dst);
- irsb->jumpkind = Ijk_Call;
- dres->whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32(dst));
+ dres->jk_StopHere = Ijk_Call;
+ dres->whatNext = Dis_StopHere;
DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
return True;
}
static
DisResult disInstr_ARM_WRK (
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
// etc etc
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 4;
- dres.continueAt = 0;
+ dres.whatNext = Dis_Continue;
+ dres.len = 4;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
/* Set default actions for post-insn handling of writes to r15, if
required. */
DIP("\t(arm) 0x%x: ", (UInt)guest_R15_curr_instr_notENC);
- /* We may be asked to update the guest R15 before going further. */
vassert(0 == (guest_R15_curr_instr_notENC & 3));
- if (put_IP) {
- llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
- }
/* ----------------------------------------------------------- */
/* orr r10,r10,r10 */) {
/* R3 = client_request ( R4 ) */
DIP("r3 = client_request ( %%r4 )\n");
- irsb->next = mkU32( guest_R15_curr_instr_notENC + 20 );
- irsb->jumpkind = Ijk_ClientReq;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
+ dres.jk_StopHere = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
else
/* branch-and-link-to-noredir R4 */
DIP("branch-and-link-to-noredir r4\n");
llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
- irsb->next = llGetIReg(4);
- irsb->jumpkind = Ijk_NoRedir;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, llGetIReg(4));
+ dres.jk_StopHere = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
/* We don't know what it is. Set opc1/opc2 so decode_failure
dres.continueAt = (Addr64)dst;
} else {
/* no; terminate the SB at this point. */
- irsb->next = mkU32(dst);
- irsb->jumpkind = jk;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32(dst));
+ dres.jk_StopHere = jk;
+ dres.whatNext = Dis_StopHere;
}
DIP("b%s 0x%x\n", link ? "l" : "", dst);
} else {
stmt( IRStmt_Exit( unop(Iop_Not1,
unop(Iop_32to1, mkexpr(condT))),
Ijk_Boring,
- IRConst_U32(guest_R15_curr_instr_notENC+4) ));
+ IRConst_U32(guest_R15_curr_instr_notENC+4),
+ OFFB_R15T ));
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)dst;
comment = "(assumed taken)";
following this one. */
stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
Ijk_Boring,
- IRConst_U32(dst) ));
+ IRConst_U32(dst),
+ OFFB_R15T ));
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)
(guest_R15_curr_instr_notENC+4);
/* Conservative default translation - end the block at
this point. */
stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
- jk, IRConst_U32(dst) ));
- irsb->next = mkU32(guest_R15_curr_instr_notENC + 4);
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ jk, IRConst_U32(dst), OFFB_R15T ));
+ llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
}
DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
dst, comment);
putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
IRTemp_INVALID/*because AL*/, Ijk_Boring );
}
- irsb->next = mkexpr(dst);
- irsb->jumpkind = link ? Ijk_Call
- : (rM == 14 ? Ijk_Ret : Ijk_Boring);
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(dst));
+ dres.jk_StopHere = link ? Ijk_Call
+ : (rM == 14 ? Ijk_Ret : Ijk_Boring);
+ dres.whatNext = Dis_StopHere;
if (condT == IRTemp_INVALID) {
DIP("b%sx r%u\n", link ? "l" : "", rM);
} else {
mk_skip_over_A32_if_cond_is_false( condT );
}
// AL after here
- irsb->next = mkU32( guest_R15_curr_instr_notENC + 4 );
- irsb->jumpkind = Ijk_Sys_syscall;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
+ dres.jk_StopHere = Ijk_Sys_syscall;
+ dres.whatNext = Dis_StopHere;
DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
goto decode_success;
}
}
stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
/*Ijk_NoRedir*/Ijk_Boring,
- IRConst_U32(guest_R15_curr_instr_notENC)) );
+ IRConst_U32(guest_R15_curr_instr_notENC),
+ OFFB_R15T ));
putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
IRTemp_INVALID, Ijk_Boring);
DIP("swp%s%s r%u, r%u, [r%u]\n",
now. */
vassert(0 == (guest_R15_curr_instr_notENC & 3));
llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
- irsb->next = mkU32(guest_R15_curr_instr_notENC);
- irsb->jumpkind = Ijk_NoDecode;
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_NoDecode;
+ dres.len = 0;
return dres;
decode_success:
binop(Iop_Xor32,
mkexpr(r15guard), mkU32(1))),
r15kind,
- IRConst_U32(guest_R15_curr_instr_notENC + 4)
+ IRConst_U32(guest_R15_curr_instr_notENC + 4),
+ OFFB_R15T
));
}
- irsb->next = llGetIReg(15);
- irsb->jumpkind = r15kind;
- dres.whatNext = Dis_StopHere;
+ /* This seems crazy, but we're required to finish the insn with
+ a write to the guest PC. As usual we rely on ir_opt to tidy
+ up later. */
+ llPutIReg(15, llGetIReg(15));
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = r15kind;
+ } else {
+ /* Set up the end-state in the normal way. */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ llPutIReg(15, mkU32(dres.continueAt));
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
}
return dres;
static
DisResult disInstr_THUMB_WRK (
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
// etc etc
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 2;
- dres.continueAt = 0;
+ dres.whatNext = Dis_Continue;
+ dres.len = 2;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
/* Set default actions for post-insn handling of writes to r15, if
required. */
DIP("\t(thumb) 0x%x: ", (UInt)guest_R15_curr_instr_notENC);
- /* We may be asked to update the guest R15 before going further. */
vassert(0 == (guest_R15_curr_instr_notENC & 1));
- if (put_IP) {
- llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
- }
/* ----------------------------------------------------------- */
/* Spot "Special" instructions (see comment at top of file). */
/* orr.w r10,r10,r10 */) {
/* R3 = client_request ( R4 ) */
DIP("r3 = client_request ( %%r4 )\n");
- irsb->next = mkU32( (guest_R15_curr_instr_notENC + 20) | 1 );
- irsb->jumpkind = Ijk_ClientReq;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
+ dres.jk_StopHere = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
else
/* branch-and-link-to-noredir R4 */
DIP("branch-and-link-to-noredir r4\n");
llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
- irsb->next = getIRegT(4);
- irsb->jumpkind = Ijk_NoRedir;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, getIRegT(4));
+ dres.jk_StopHere = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
/* We don't know what it is. Set insn0 so decode_failure
vassert(rM == 15);
assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
}
- irsb->next = mkexpr(dst);
- irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(dst));
+ dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("bx r%u (possibly switch to ARM mode)\n", rM);
goto decode_success;
}
assign( dst, getIRegT(rM) );
putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
IRTemp_INVALID );
- irsb->next = mkexpr(dst);
- irsb->jumpkind = Ijk_Call;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(dst));
+ dres.jk_StopHere = Ijk_Call;
+ dres.whatNext = Dis_StopHere;
DIP("blx r%u (possibly switch to ARM mode)\n", rM);
goto decode_success;
}
// stash pseudo-reg, and back up from that if we have to
// restart.
// uncond after here
- irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2) | 1 );
- irsb->jumpkind = Ijk_Sys_syscall;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
+ dres.jk_StopHere = Ijk_Sys_syscall;
+ dres.whatNext = Dis_StopHere;
DIP("svc #0x%08x\n", imm8);
goto decode_success;
}
condT = IRTemp_INVALID;
// now uncond
/* non-interworking branch */
- irsb->next = binop(Iop_Or32, mkexpr(val), mkU32(1));
- irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
+ dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
}
DIP("mov r%u, r%u\n", rD, rM);
goto decode_success;
UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
stmt(IRStmt_Exit( mkexpr(kond),
Ijk_Boring,
- IRConst_U32(toUInt(dst)) ));
+ IRConst_U32(toUInt(dst)),
+ OFFB_R15T ));
DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
goto decode_success;
}
it as is, no need to mess with it. Note, therefore, this
is an interworking return. */
if (bitR) {
- irsb->next = mkexpr(newPC);
- irsb->jumpkind = Ijk_Ret;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(newPC));
+ dres.jk_StopHere = Ijk_Ret;
+ dres.whatNext = Dis_StopHere;
}
DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
mk_skip_over_T16_if_cond_is_false(condT);
condT = IRTemp_INVALID;
// now uncond
- irsb->next = mkU32( dst | 1 /*CPSR.T*/ );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("b 0x%x\n", dst);
goto decode_success;
}
assign( kondT, mk_armg_calculate_condition(cond) );
stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
Ijk_Boring,
- IRConst_U32(dst | 1/*CPSR.T*/) ));
- irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2)
- | 1 /*CPSR.T*/ );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ IRConst_U32(dst | 1/*CPSR.T*/),
+ OFFB_R15T ));
+ llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
+ | 1 /*CPSR.T*/ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("b%s 0x%x\n", nCC(cond), dst);
goto decode_success;
}
if (isBL) {
/* BL: unconditional T -> T call */
/* we're calling Thumb code, hence "| 1" */
- irsb->next = mkU32( dst | 1 );
+ llPutIReg(15, mkU32( dst | 1 ));
DIP("bl 0x%x (stay in Thumb mode)\n", dst);
} else {
/* BLX: unconditional T -> A call */
/* we're calling ARM code, hence "& 3" to align to a
valid ARM insn address */
- irsb->next = mkU32( dst & ~3 );
+ llPutIReg(15, mkU32( dst & ~3 ));
DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
}
- irsb->jumpkind = Ijk_Call;
- dres.whatNext = Dis_StopHere;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_Call;
goto decode_success;
}
}
condT = IRTemp_INVALID;
// now uncond
- /* Generate the IR. This might generate a write to R15, */
+ /* Generate the IR. This might generate a write to R15. */
mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
if (bL == 1 && (regList & (1<<15))) {
// If we wrote to R15, we have an interworking return to
// deal with.
- irsb->next = llGetIReg(15);
- irsb->jumpkind = Ijk_Ret;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, llGetIReg(15));
+ dres.jk_StopHere = Ijk_Ret;
+ dres.whatNext = Dis_StopHere;
}
DIP("%sm%c%c r%u%s, {0x%04x}\n",
putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
}
- if (loadsPC) {
- /* Presumably this is an interworking branch. */
- irsb->next = mkexpr(newRt);
- irsb->jumpkind = Ijk_Boring; /* or _Ret ? */
- dres.whatNext = Dis_StopHere;
- }
-
/* Update Rn if necessary. */
if (bW == 1) {
vassert(rN != rT); // assured by validity check above
putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID);
}
+
+ if (loadsPC) {
+ /* Presumably this is an interworking branch. */
+ vassert(rN != 15); // assured by validity check above
+ llPutIReg(15, mkexpr(newRt));
+ dres.jk_StopHere = Ijk_Boring; /* or _Ret ? */
+ dres.whatNext = Dis_StopHere;
+ }
}
if (bP == 1 && bW == 0) {
if (loadsPC) {
/* Presumably this is an interworking branch. */
- irsb->next = mkexpr(newRt);
- irsb->jumpkind = Ijk_Boring; /* or _Ret ? */
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(newRt));
+ dres.jk_StopHere = Ijk_Boring; /* or _Ret ? */
+ dres.whatNext = Dis_StopHere;
}
}
assign( kondT, mk_armg_calculate_condition(cond) );
stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
Ijk_Boring,
- IRConst_U32(dst | 1/*CPSR.T*/) ));
- irsb->next = mkU32( (guest_R15_curr_instr_notENC + 4)
- | 1 /*CPSR.T*/ );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ IRConst_U32(dst | 1/*CPSR.T*/),
+ OFFB_R15T ));
+ llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
+ | 1 /*CPSR.T*/ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("b%s.w 0x%x\n", nCC(cond), dst);
goto decode_success;
}
// now uncond
// branch to dst
- irsb->next = mkU32( dst | 1 /*CPSR.T*/ );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("b.w 0x%x\n", dst);
goto decode_success;
}
assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
}
- irsb->next
- = binop(Iop_Or32,
- binop(Iop_Add32,
- getIRegT(15),
- binop(Iop_Shl32, mkexpr(delta), mkU8(1))
- ),
- mkU32(1)
- );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(
+ 15,
+ binop(Iop_Or32,
+ binop(Iop_Add32,
+ getIRegT(15),
+ binop(Iop_Shl32, mkexpr(delta), mkU8(1))
+ ),
+ mkU32(1)
+ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("tb%c [r%u, r%u%s]\n",
bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
goto decode_success;
now. */
vassert(0 == (guest_R15_curr_instr_notENC & 1));
llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
- irsb->next = mkU32(guest_R15_curr_instr_notENC | 1 /* CPSR.T */);
- irsb->jumpkind = Ijk_NoDecode;
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_NoDecode;
+ dres.len = 0;
return dres;
decode_success:
/* All decode successes end up here. */
- DIP("\n");
-
- vassert(dres.len == 2 || dres.len == 4 || dres.len == 20);
-
-#if 0
- // XXX is this necessary on Thumb?
- /* Now then. Do we have an implicit jump to r15 to deal with? */
- if (r15written) {
- /* If we get jump to deal with, we assume that there's been no
- other competing branch stuff previously generated for this
- insn. That's reasonable, in the sense that the ARM insn set
- appears to declare as "Unpredictable" any instruction which
- generates more than one possible new value for r15. Hence
- just assert. The decoders themselves should check against
- all such instructions which are thusly Unpredictable, and
- decline to decode them. Hence we should never get here if we
- have competing new values for r15, and hence it is safe to
- assert here. */
- vassert(dres.whatNext == Dis_Continue);
- vassert(irsb->next == NULL);
- vassert(irsb->jumpkind == Ijk_Boring);
- /* If r15 is unconditionally written, terminate the block by
- jumping to it. If it's conditionally written, still
- terminate the block (a shame, but we can't do side exits to
- arbitrary destinations), but first jump to the next
- instruction if the condition doesn't hold. */
- /* We can't use getIRegT(15) to get the destination, since that
- will produce r15+4, which isn't what we want. Must use
- llGetIReg(15) instead. */
- if (r15guard == IRTemp_INVALID) {
- /* unconditional */
- } else {
- /* conditional */
- stmt( IRStmt_Exit(
- unop(Iop_32to1,
- binop(Iop_Xor32,
- mkexpr(r15guard), mkU32(1))),
- r15kind,
- IRConst_U32(guest_R15_curr_instr_notENC + 4)
- ));
- }
- irsb->next = llGetIReg(15);
- irsb->jumpkind = r15kind;
- dres.whatNext = Dis_StopHere;
+ vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ llPutIReg(15, mkU32(dres.continueAt));
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
}
-#endif
+
+ DIP("\n");
return dres;
is located in host memory at &guest_code[delta]. */
DisResult disInstr_ARM ( IRSB* irsb_IN,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
}
if (isThumb) {
- dres = disInstr_THUMB_WRK ( put_IP, resteerOkFn,
+ dres = disInstr_THUMB_WRK ( resteerOkFn,
resteerCisOk, callback_opaque,
&guest_code_IN[delta_ENCODED - 1],
archinfo, abiinfo );
} else {
- dres = disInstr_ARM_WRK ( put_IP, resteerOkFn,
+ dres = disInstr_ARM_WRK ( resteerOkFn,
resteerCisOk, callback_opaque,
&guest_code_IN[delta_ENCODED],
archinfo, abiinfo );
(In fact it's a VgInstrumentClosure.)
*/
+/* Regarding IP updating. dis_instr_fn (that does the guest specific
+ work of disassembling an individual instruction) must finish the
+ resulting IR with "PUT(guest_IP) = ". Hence in all cases it must
+ state the next instruction address.
+
+ If the block is to be ended at that point, then this routine
+ (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to
+ make a transfer (of the right kind) to "GET(guest_IP)". Hence if
+ dis_instr_fn generates incorrect IP updates we will see it
+ immediately (due to jumping to the wrong next guest address).
+
+ However it is also necessary to set this up so it can be optimised
+ nicely. The IRSB exit is defined to update the guest IP, so that
+ chaining works -- since the chain_me stubs expect the chain-to
+ address to be in the guest state. Hence what the IRSB next fields
+ will contain initially is (implicitly)
+
+ PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next]
+
+ which looks pretty strange at first. Eg so unconditional branch
+ to some address 0x123456 looks like this:
+
+ PUT(guest_IP) = 0x123456; // dis_instr_fn generates this
+ // the exit
+ PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring
+
+ after redundant-GET and -PUT removal by iropt, we get what we want:
+
+ // the exit
+ PUT(guest_IP) [implicitly] = 0x123456; exit-Boring
+
+ This makes the IRSB-end case the same as the side-exit case: update
+ IP, then transfer. There is no redundancy of representation for
+ the destination, and we use the destination specified by
+ dis_instr_fn, so any errors it makes show up sooner.
+*/
+
IRSB* bb_to_IR (
/*OUT*/VexGuestExtents* vge,
/*OUT*/UInt* n_sc_extents,
/*IN*/ IRType guest_word_type,
/*IN*/ UInt (*needs_self_check)(void*,VexGuestExtents*),
/*IN*/ Bool (*preamble_function)(void*,IRSB*),
- /*IN*/ Int offB_TISTART,
- /*IN*/ Int offB_TILEN
+ /*IN*/ Int offB_GUEST_TISTART,
+ /*IN*/ Int offB_GUEST_TILEN,
+ /*IN*/ Int offB_GUEST_IP,
+ /*IN*/ Int szB_GUEST_IP
)
{
Long delta;
Int i, n_instrs, first_stmt_idx;
- Bool resteerOK, need_to_put_IP, debug_print;
+ Bool resteerOK, debug_print;
DisResult dres;
IRStmt* imark;
IRStmt* nop;
vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
+ if (guest_word_type == Ity_I32) {
+ vassert(szB_GUEST_IP == 4);
+ vassert((offB_GUEST_IP % 4) == 0);
+ } else {
+ vassert(szB_GUEST_IP == 8);
+ vassert((offB_GUEST_IP % 8) == 0);
+ }
+
/* Start a new, empty extent. */
vge->n_used = 1;
vge->base[0] = guest_IP_bbstart;
);
}
- /* for the first insn, the dispatch loop will have set
- %IP, but for all the others we have to do it ourselves. */
- need_to_put_IP = toBool(n_instrs > 0);
+ if (debug_print && n_instrs > 0)
+ vex_printf("\n");
/* Finally, actually disassemble an instruction. */
+ vassert(irsb->next == NULL);
dres = dis_instr_fn ( irsb,
- need_to_put_IP,
resteerOKfn,
toBool(n_cond_resteers_allowed > 0),
callback_opaque,
}
}
- /* If dis_instr_fn terminated the BB at this point, check it
- also filled in the irsb->next field. */
- if (dres.whatNext == Dis_StopHere) {
- vassert(irsb->next != NULL);
- if (debug_print) {
- vex_printf(" ");
- vex_printf( "goto {");
- ppIRJumpKind(irsb->jumpkind);
- vex_printf( "} ");
- ppIRExpr( irsb->next );
- vex_printf( "\n");
- }
+ /* Individual insn disassembly may not mess with irsb->next.
+ This function is the only place where it can be set. */
+ vassert(irsb->next == NULL);
+ vassert(irsb->jumpkind == Ijk_Boring);
+ vassert(irsb->offsIP == 0);
+
+ /* Individual insn disassembly must finish the IR for each
+ instruction with an assignment to the guest PC. */
+ vassert(first_stmt_idx < irsb->stmts_used);
+ /* it follows that irsb->stmts_used must be > 0 */
+ { IRStmt* st = irsb->stmts[irsb->stmts_used-1];
+ vassert(st);
+ vassert(st->tag == Ist_Put);
+ vassert(st->Ist.Put.offset == offB_GUEST_IP);
+ /* Really we should also check that the type of the Put'd data
+ == guest_word_type, but that's a bit expensive. */
}
/* Update the VexGuestExtents we are constructing. */
vge->len[vge->n_used-1]
= toUShort(toUInt( vge->len[vge->n_used-1] + dres.len ));
n_instrs++;
- if (debug_print)
- vex_printf("\n");
/* Advance delta (inconspicuous but very important :-) */
delta += (Long)dres.len;
switch (dres.whatNext) {
case Dis_Continue:
- vassert(irsb->next == NULL);
+ vassert(dres.continueAt == 0);
+ vassert(dres.jk_StopHere == Ijk_INVALID);
if (n_instrs < vex_control.guest_max_insns) {
/* keep going */
} else {
- /* We have to stop. */
- irsb->next
- = IRExpr_Const(
- guest_word_type == Ity_I32
- ? IRConst_U32(toUInt(guest_IP_bbstart+delta))
- : IRConst_U64(guest_IP_bbstart+delta)
- );
+ /* We have to stop. See comment above re irsb field
+ settings here. */
+ irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
+ /* irsb->jumpkind must already by Ijk_Boring */
+ irsb->offsIP = offB_GUEST_IP;
goto done;
}
break;
case Dis_StopHere:
- vassert(irsb->next != NULL);
+ vassert(dres.continueAt == 0);
+ vassert(dres.jk_StopHere != Ijk_INVALID);
+ /* See comment above re irsb field settings here. */
+ irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
+ irsb->jumpkind = dres.jk_StopHere;
+ irsb->offsIP = offB_GUEST_IP;
goto done;
+
case Dis_ResteerU:
case Dis_ResteerC:
/* Check that we actually allowed a resteer .. */
vassert(resteerOK);
- vassert(irsb->next == NULL);
if (dres.whatNext == Dis_ResteerC) {
vassert(n_cond_resteers_allowed > 0);
n_cond_resteers_allowed--;
= IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
irsb->stmts[selfcheck_idx + i * 5 + 2]
- = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) );
+ = IRStmt_Put( offB_GUEST_TISTART, IRExpr_RdTmp(tistart_tmp) );
irsb->stmts[selfcheck_idx + i * 5 + 3]
- = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
+ = IRStmt_Put( offB_GUEST_TILEN, IRExpr_RdTmp(tilen_tmp) );
/* Generate the entry point descriptors */
if (abiinfo_both->host_ppc_calls_use_fndescrs) {
/* Where we must restart if there's a failure: at the
first extent, regardless of which extent the
failure actually happened in. */
- guest_IP_bbstart_IRConst
+ guest_IP_bbstart_IRConst,
+ offB_GUEST_IP
);
} /* for (i = 0; i < vge->n_used; i++) */
}
+ /* irsb->next must now be set, since we've finished the block.
+ Print it if necessary.*/
+ vassert(irsb->next != NULL);
+ if (debug_print) {
+ vex_printf(" ");
+ vex_printf( "PUT(%d) = ", irsb->offsIP);
+ ppIRExpr( irsb->next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(irsb->jumpkind);
+ vex_printf( "\n");
+ vex_printf( "\n");
+ }
+
return irsb;
}
enum { Dis_StopHere, Dis_Continue,
Dis_ResteerU, Dis_ResteerC } whatNext;
+ /* For Dis_StopHere, we need to end the block and create a
+ transfer to whatever the NIA is. That will have presumably
+ been set by the IR generated for this insn. So we need to
+ know the jump kind to use. Should Ijk_INVALID in other Dis_
+ cases. */
+ IRJumpKind jk_StopHere;
+
/* For Dis_Resteer, this is the guest address we should continue
at. Otherwise ignored (should be zero). */
Addr64 continueAt;
/* This is the IRSB to which the resulting IR is to be appended. */
/*OUT*/ IRSB* irbb,
- /* Do we need to generate IR to set the guest IP for this insn,
- or not? */
- /*IN*/ Bool put_IP,
-
/* Return True iff resteering to the given addr is allowed (for
branches/calls to destinations that are known at JIT-time) */
/*IN*/ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
/*IN*/ IRType guest_word_type,
/*IN*/ UInt (*needs_self_check)(void*,VexGuestExtents*),
/*IN*/ Bool (*preamble_function)(void*,IRSB*),
- /*IN*/ Int offB_TISTART,
- /*IN*/ Int offB_TILEN
+ /*IN*/ Int offB_GUEST_TISTART,
+ /*IN*/ Int offB_GUEST_TILEN,
+ /*IN*/ Int offB_GUEST_IP,
+ /*IN*/ Int szB_GUEST_IP
);
if (mode64) {
vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64);
stmt(
- IRStmt_Exit(
+ IRStmt_Exit3(
binop(Iop_CmpNE64,
binop(Iop_And64, mkexpr(addr), mkU64(align-1)),
mkU64(0)),
} else {
vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I32);
stmt(
- IRStmt_Exit(
+ IRStmt_Exit3(
binop(Iop_CmpNE32,
binop(Iop_And32, mkexpr(addr), mkU32(align-1)),
mkU32(0)),
so that Valgrind's dispatcher sees the warning. */
putGST( PPC_GST_EMWARN, mkU32(ew) );
stmt(
- IRStmt_Exit(
+ IRStmt_Exit3(
binop(Iop_CmpNE32, mkU32(ew), mkU32(EmWarn_NONE)),
Ijk_EmWarn,
mkSzConst( ty, nextInsnAddr()) ));
for (i = 0; i < maxBytes; i++) {
/* if (nBytes < (i+1)) goto NIA; */
- stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
+ stmt( IRStmt_Exit3( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
Ijk_Boring,
mkSzConst( ty, nextInsnAddr()) ));
/* when crossing into a new dest register, set it to zero. */
for (i = 0; i < maxBytes; i++) {
/* if (nBytes < (i+1)) goto NIA; */
- stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
+ stmt( IRStmt_Exit3( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
Ijk_Boring,
mkSzConst( ty, nextInsnAddr() ) ));
/* check for crossing into a new src register. */
cond_ok is either zero or nonzero, since that's the cheapest
way to compute it. Anding them together gives a value which
is either zero or non zero and so that's what we must test
- for in the IRStmt_Exit. */
+ for in the IRStmt_Exit3. */
assign( ctr_ok, branch_ctr_ok( BO ) );
assign( cond_ok, branch_cond_ok( BO, BI ) );
assign( do_branch,
if (flag_LK)
putGST( PPC_GST_LR, e_nia );
- stmt( IRStmt_Exit(
+ stmt( IRStmt_Exit3(
binop(Iop_CmpNE32, mkexpr(do_branch), mkU32(0)),
flag_LK ? Ijk_Call : Ijk_Boring,
mkSzConst(ty, tgt) ) );
if (flag_LK)
putGST( PPC_GST_LR, e_nia );
- stmt( IRStmt_Exit(
+ stmt( IRStmt_Exit3(
binop(Iop_CmpEQ32, mkexpr(cond_ok), mkU32(0)),
Ijk_Boring,
c_nia ));
if (flag_LK)
putGST( PPC_GST_LR, e_nia );
- stmt( IRStmt_Exit(
+ stmt( IRStmt_Exit3(
binop(Iop_CmpEQ32, mkexpr(do_branch), mkU32(0)),
Ijk_Boring,
c_nia ));
if ((TO & b11100) == b11100 || (TO & b00111) == b00111) {
/* Unconditional trap. Just do the exit without
testing the arguments. */
- stmt( IRStmt_Exit(
+ stmt( IRStmt_Exit3(
binop(opCMPEQ, const0, const0),
Ijk_SigTRAP,
mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia)
tmp = binop(opAND, binop(opCMPORDU, argLe, argRe), const4);
cond = binop(opOR, tmp, cond);
}
- stmt( IRStmt_Exit(
+ stmt( IRStmt_Exit3(
binop(opCMPNE, cond, const0),
Ijk_SigTRAP,
mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia)
{
vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1);
- stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(guest_IA_next_instr)));
+ stmt(IRStmt_Exit3(condition, Ijk_Boring, IRConst_U64(guest_IA_next_instr)));
irsb->next = target;
irsb->jumpkind = Ijk_Boring;
{
vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1);
- stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(target)));
+ stmt(IRStmt_Exit3(condition, Ijk_Boring, IRConst_U64(target)));
dis_res->whatNext = Dis_Continue;
}
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART),
mkU64(guest_IA_curr_instr)));
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4)));
- stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval,
+ stmt(IRStmt_Exit3(mkexpr(cond), Ijk_TInval,
IRConst_U64(guest_IA_curr_instr)));
ss.bytes = last_execute_target;
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART),
mkU64(guest_IA_curr_instr)));
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4)));
- stmt(IRStmt_Exit(IRExpr_Const(IRConst_U1(True)), Ijk_TInval,
+ stmt(IRStmt_Exit3(IRExpr_Const(IRConst_U1(True)), Ijk_TInval,
IRConst_U64(guest_IA_curr_instr)));
/* we know that this will be invalidated */
irsb->next = mkU64(guest_IA_next_instr);
/* and restart */
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), mkU64(guest_IA_curr_instr)));
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4)));
- stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval,
+ stmt(IRStmt_Exit3(mkexpr(cond), Ijk_TInval,
IRConst_U64(guest_IA_curr_instr)));
/* Now comes the actual translation */
put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
put_gpr_dw0(r1, mkexpr(next));
put_gpr_dw0(r2, binop(Iop_Add64, mkexpr(address), mkU64(1)));
- stmt(IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)),
+ stmt(IRStmt_Exit3(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)),
Ijk_Boring, IRConst_U64(guest_IA_curr_instr)));
// >= 256 bytes done CC=3
s390_cc_set(3);
put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), mkU64(1)));
put_gpr_dw0(r2, binop(Iop_Add64, get_gpr_dw0(r2), mkU64(1)));
- stmt(IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)),
+ stmt(IRStmt_Exit3(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)),
Ijk_Boring, IRConst_U64(guest_IA_curr_instr)));
// >= 256 bytes done CC=3
s390_cc_set(3);
Otherwise, store the old_value from memory in r1 and yield. */
assign(nequal, binop(Iop_CmpNE32, s390_call_calculate_cc(), mkU32(0)));
put_gpr_w1(r1, mkite(mkexpr(nequal), mkexpr(old_mem), mkexpr(op1)));
- stmt(IRStmt_Exit(mkexpr(nequal), Ijk_Yield,
+ stmt(IRStmt_Exit3(mkexpr(nequal), Ijk_Yield,
IRConst_U64(guest_IA_next_instr)));
}
Otherwise, store the old_value from memory in r1 and yield. */
assign(nequal, binop(Iop_CmpNE32, s390_call_calculate_cc(), mkU32(0)));
put_gpr_dw0(r1, mkite(mkexpr(nequal), mkexpr(old_mem), mkexpr(op1)));
- stmt(IRStmt_Exit(mkexpr(nequal), Ijk_Yield,
+ stmt(IRStmt_Exit3(mkexpr(nequal), Ijk_Yield,
IRConst_U64(guest_IA_next_instr)));
return "csg";
bb_to_IR.h. */
extern
DisResult disInstr_X86 ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
/* VISIBLE TO LIBVEX CLIENT */
void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
{
+ vex_state->host_EvC_FAILADDR = 0;
+ vex_state->host_EvC_COUNTER = 0;
+
vex_state->guest_EAX = 0;
vex_state->guest_ECX = 0;
vex_state->guest_EDX = 0;
vex_state->guest_IP_AT_SYSCALL = 0;
vex_state->padding1 = 0;
- vex_state->padding2 = 0;
- vex_state->padding3 = 0;
}
binop( mkSizedOp(tyE,Iop_CasCmpNE8),
mkexpr(oldTmp), mkexpr(expTmp) ),
Ijk_Boring, /*Ijk_NoRedir*/
- IRConst_U32( restart_point )
+ IRConst_U32( restart_point ),
+ OFFB_EIP
));
}
/*--- JMP helpers ---*/
/*------------------------------------------------------------*/
-static void jmp_lit( IRJumpKind kind, Addr32 d32 )
+static void jmp_lit( /*MOD*/DisResult* dres,
+ IRJumpKind kind, Addr32 d32 )
{
- irsb->next = mkU32(d32);
- irsb->jumpkind = kind;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = kind;
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) );
}
-static void jmp_treg( IRJumpKind kind, IRTemp t )
+static void jmp_treg( /*MOD*/DisResult* dres,
+ IRJumpKind kind, IRTemp t )
{
- irsb->next = mkexpr(t);
- irsb->jumpkind = kind;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = kind;
+ stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) );
}
static
-void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
+void jcc_01( /*MOD*/DisResult* dres,
+ X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
{
Bool invert;
X86Condcode condPos;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_Boring;
condPos = positiveIse_X86Condcode ( cond, &invert );
if (invert) {
stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
Ijk_Boring,
- IRConst_U32(d32_false) ) );
- irsb->next = mkU32(d32_true);
- irsb->jumpkind = Ijk_Boring;
+ IRConst_U32(d32_false),
+ OFFB_EIP ) );
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) );
} else {
stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
Ijk_Boring,
- IRConst_U32(d32_true) ) );
- irsb->next = mkU32(d32_false);
- irsb->jumpkind = Ijk_Boring;
+ IRConst_U32(d32_true),
+ OFFB_EIP ) );
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) );
}
}
IRStmt_Exit(
binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
Ijk_MapFail,
- IRConst_U32( guest_EIP_curr_instr )
+ IRConst_U32( guest_EIP_curr_instr ),
+ OFFB_EIP
)
);
/* Group 5 extended opcodes. */
static
UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
- DisResult* dres, Bool* decode_OK )
+ /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
{
Int len;
UChar modrm;
assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
putIReg(4, R_ESP, mkexpr(t2));
storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
- jmp_treg(Ijk_Call,t1);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Call, t1);
+ vassert(dres->whatNext == Dis_StopHere);
break;
case 4: /* jmp Ev */
vassert(sz == 4);
- jmp_treg(Ijk_Boring,t1);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Boring, t1);
+ vassert(dres->whatNext == Dis_StopHere);
break;
case 6: /* PUSH Ev */
vassert(sz == 4 || sz == 2);
assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
putIReg(4, R_ESP, mkexpr(t2));
storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
- jmp_treg(Ijk_Call,t1);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Call, t1);
+ vassert(dres->whatNext == Dis_StopHere);
break;
case 4: /* JMP Ev */
vassert(sz == 4);
- jmp_treg(Ijk_Boring,t1);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Boring, t1);
+ vassert(dres->whatNext == Dis_StopHere);
break;
case 6: /* PUSH Ev */
vassert(sz == 4 || sz == 2);
We assume the insn is the last one in the basic block, and so emit a jump
to the next insn, rather than just falling through. */
static
-void dis_REP_op ( X86Condcode cond,
+void dis_REP_op ( /*MOD*/DisResult* dres,
+ X86Condcode cond,
void (*dis_OP)(Int, IRTemp),
Int sz, Addr32 eip, Addr32 eip_next, HChar* name )
{
stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
Ijk_Boring,
- IRConst_U32(eip_next) ) );
+ IRConst_U32(eip_next), OFFB_EIP ) );
putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
dis_OP (sz, t_inc);
if (cond == X86CondAlways) {
- jmp_lit(Ijk_Boring,eip);
+ jmp_lit(dres, Ijk_Boring, eip);
+ vassert(dres->whatNext == Dis_StopHere);
} else {
stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
Ijk_Boring,
- IRConst_U32(eip) ) );
- jmp_lit(Ijk_Boring,eip_next);
+ IRConst_U32(eip), OFFB_EIP ) );
+ jmp_lit(dres, Ijk_Boring, eip_next);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("%s%c\n", name, nameISize(sz));
}
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
+ OFFB_EIP
)
);
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
+ OFFB_EIP
)
);
break;
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
+ OFFB_EIP
)
);
}
static
-void dis_ret ( UInt d32 )
+void dis_ret ( /*MOD*/DisResult* dres, UInt d32 )
{
- IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32);
+ IRTemp t1 = newTemp(Ity_I32);
+ IRTemp t2 = newTemp(Ity_I32);
assign(t1, getIReg(4,R_ESP));
assign(t2, loadLE(Ity_I32,mkexpr(t1)));
putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
- jmp_treg(Ijk_Ret,t2);
+ jmp_treg(dres, Ijk_Ret, t2);
+ vassert(dres->whatNext == Dis_StopHere);
}
/*------------------------------------------------------------*/
binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
mkU32(0) ),
Ijk_EmWarn,
- IRConst_U32( next_insn_EIP )
+ IRConst_U32( next_insn_EIP ),
+ OFFB_EIP
)
);
}
binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
mkU32(0)),
Ijk_SigSEGV,
- IRConst_U32(guest_EIP_curr_instr)
+ IRConst_U32(guest_EIP_curr_instr),
+ OFFB_EIP
)
);
}
static
DisResult disInstr_X86_WRK (
/*OUT*/Bool* expect_CAS,
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
Bool pfx_lock = False;
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 0;
- dres.continueAt = 0;
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
*expect_CAS = False;
vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
DIP("\t0x%x: ", guest_EIP_bbstart+delta);
- /* We may be asked to update the guest EIP before going further. */
- if (put_IP)
- stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) );
-
/* Spot "Special" instructions (see comment at top of file). */
{
UChar* code = (UChar*)(guest_code + delta);
/* %EDX = client_request ( %EAX ) */
DIP("%%edx = client_request ( %%eax )\n");
delta += 14;
- jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta);
+ vassert(dres.whatNext == Dis_StopHere);
goto decode_success;
}
else
assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
putIReg(4, R_ESP, mkexpr(t2));
storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
- jmp_treg(Ijk_NoRedir,t1);
- dres.whatNext = Dis_StopHere;
+ jmp_treg(&dres, Ijk_NoRedir, t1);
+ vassert(dres.whatNext == Dis_StopHere);
goto decode_success;
}
/* We don't know what it is. */
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
+ OFFB_EIP
)
);
goto decode_success;
stmt( IRStmt_Exit(
binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
Ijk_Boring,
- IRConst_U32(d32)
+ IRConst_U32(d32),
+ OFFB_EIP
));
DIP("jcxz 0x%x\n", d32);
goto decode_success;
case 0xC2: /* RET imm16 */
d32 = getUDisp16(delta);
delta += 2;
- dis_ret(d32);
- dres.whatNext = Dis_StopHere;
+ dis_ret(&dres, d32);
DIP("ret %d\n", (Int)d32);
break;
case 0xC3: /* RET */
- dis_ret(0);
- dres.whatNext = Dis_StopHere;
+ dis_ret(&dres, 0);
DIP("ret\n");
break;
/* set %EFLAGS */
set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
/* goto new EIP value */
- jmp_treg(Ijk_Ret,t2);
- dres.whatNext = Dis_StopHere;
+ jmp_treg(&dres, Ijk_Ret, t2);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("iret (very kludgey)\n");
break;
dres.whatNext = Dis_ResteerU;
dres.continueAt = (Addr64)(Addr32)d32;
} else {
- jmp_lit(Ijk_Call,d32);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Call, d32);
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("call 0x%x\n",d32);
}
/* ------------------------ INT ------------------------ */
case 0xCC: /* INT 3 */
- jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x3\n");
break;
This used to handle just 0x40-0x43; Jikes RVM uses a larger
range (0x3F-0x49), and this allows some slack as well. */
if (d32 >= 0x3F && d32 <= 0x4F) {
- jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x%x\n", (Int)d32);
break;
}
if (d32 == 0x80) {
stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Sys_int128, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x80\n");
break;
}
if (d32 == 0x81) {
stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Sys_int129, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x81\n");
break;
}
if (d32 == 0x82) {
stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Sys_int130, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x82\n");
break;
}
dres.whatNext = Dis_ResteerU;
dres.continueAt = (Addr64)(Addr32)d32;
} else {
- jmp_lit(Ijk_Boring,d32);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Boring, d32);
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("jmp-8 0x%x\n", d32);
break;
dres.whatNext = Dis_ResteerU;
dres.continueAt = (Addr64)(Addr32)d32;
} else {
- jmp_lit(Ijk_Boring,d32);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Boring, d32);
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("jmp 0x%x\n", d32);
break;
stmt( IRStmt_Exit(
mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
Ijk_Boring,
- IRConst_U32(guest_EIP_bbstart+delta) ) );
+ IRConst_U32(guest_EIP_bbstart+delta),
+ OFFB_EIP ) );
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)d32;
comment = "(assumed taken)";
stmt( IRStmt_Exit(
mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
Ijk_Boring,
- IRConst_U32(d32) ) );
+ IRConst_U32(d32),
+ OFFB_EIP ) );
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
comment = "(assumed not taken)";
else {
/* Conservative default translation - end the block at this
point. */
- jcc_01( (X86Condcode)(opc - 0x70),
+ jcc_01( &dres, (X86Condcode)(opc - 0x70),
(Addr32)(guest_EIP_bbstart+delta), d32);
- dres.whatNext = Dis_StopHere;
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
break;
stmt( IRStmt_Exit(
binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
Ijk_Boring,
- IRConst_U32(d32)
+ IRConst_U32(d32),
+ OFFB_EIP
));
DIP("jecxz 0x%x\n", d32);
break;
default:
vassert(0);
}
- stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) );
+ stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) );
DIP("loop%s 0x%x\n", xtra, d32);
break;
abyte = getIByte(delta); delta++;
if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
- dres.whatNext = Dis_StopHere;
switch (abyte) {
/* According to the Intel manual, "repne movs" should never occur, but
* in practice it has happened, so allow for it here... */
case 0xA4: sz = 1; /* REPNE MOVS<sz> */
case 0xA5:
- dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repne movs" );
+ dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne movs" );
break;
case 0xA6: sz = 1; /* REPNE CMP<sz> */
case 0xA7:
- dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repne cmps" );
+ dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne cmps" );
break;
case 0xAA: sz = 1; /* REPNE STOS<sz> */
case 0xAB:
- dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repne stos" );
+ dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne stos" );
break;
case 0xAE: sz = 1; /* REPNE SCAS<sz> */
case 0xAF:
- dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repne scas" );
+ dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne scas" );
break;
default:
abyte = getIByte(delta); delta++;
if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
- dres.whatNext = Dis_StopHere;
switch (abyte) {
case 0xA4: sz = 1; /* REP MOVS<sz> */
case 0xA5:
- dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig,
- guest_EIP_bbstart+delta, "rep movs" );
+ dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep movs" );
break;
case 0xA6: sz = 1; /* REPE CMP<sz> */
case 0xA7:
- dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repe cmps" );
+ dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repe cmps" );
break;
case 0xAA: sz = 1; /* REP STOS<sz> */
case 0xAB:
- dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig,
- guest_EIP_bbstart+delta, "rep stos" );
+ dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep stos" );
break;
case 0xAC: sz = 1; /* REP LODS<sz> */
case 0xAD:
- dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig,
- guest_EIP_bbstart+delta, "rep lods" );
+ dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep lods" );
break;
case 0xAE: sz = 1; /* REPE SCAS<sz> */
case 0xAF:
- dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repe scas" );
+ dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repe scas" );
break;
case 0x90: /* REP NOP (PAUSE) */
DIP("rep nop (P4 pause)\n");
/* "observe" the hint. The Vex client needs to be careful not
to cause very long delays as a result, though. */
- jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
break;
case 0xC3: /* REP RET -- same as normal ret? */
- dis_ret(0);
- dres.whatNext = Dis_StopHere;
+ dis_ret(&dres, 0);
DIP("rep ret\n");
break;
mk_x86g_calculate_condition((X86Condcode)
(1 ^ (opc - 0x80))),
Ijk_Boring,
- IRConst_U32(guest_EIP_bbstart+delta) ) );
+ IRConst_U32(guest_EIP_bbstart+delta),
+ OFFB_EIP ) );
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)d32;
comment = "(assumed taken)";
stmt( IRStmt_Exit(
mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
Ijk_Boring,
- IRConst_U32(d32) ) );
+ IRConst_U32(d32),
+ OFFB_EIP ) );
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
comment = "(assumed not taken)";
else {
/* Conservative default translation - end the block at
this point. */
- jcc_01( (X86Condcode)(opc - 0x80),
+ jcc_01( &dres, (X86Condcode)(opc - 0x80),
(Addr32)(guest_EIP_bbstart+delta), d32);
- dres.whatNext = Dis_StopHere;
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
break;
point if the syscall needs to be restarted. */
stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("sysenter");
break;
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr);
+ vassert(dres.whatNext == Dis_StopHere);
dres.len = 0;
/* We also need to say that a CAS is not expected now, regardless
of what it might have been set to at the start of the function,
decode_success:
/* All decode successes end up here. */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) );
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
+
DIP("\n");
dres.len = delta - delta_start;
return dres;
is located in host memory at &guest_code[delta]. */
DisResult disInstr_X86 ( IRSB* irsb_IN,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
x1 = irsb_IN->stmts_used;
expect_CAS = False;
- dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
resteerCisOk,
callback_opaque,
delta, archinfo, abiinfo );
/* inconsistency detected. re-disassemble the instruction so as
to generate a useful error message; then assert. */
vex_traceflags |= VEX_TRACE_FE;
- dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
resteerCisOk,
callback_opaque,
delta, archinfo, abiinfo );
HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
-//.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
-//..
HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); }
return am;
}
-//.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) {
-//.. switch (am->tag) {
-//.. case Xam_IR:
-//.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
-//.. case Xam_IRRS:
-//.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
-//.. am->Xam.IRRS.index, am->Xam.IRRS.shift );
-//.. default:
-//.. vpanic("dopyAMD64AMode");
-//.. }
-//.. }
-
void ppAMD64AMode ( AMD64AMode* am ) {
switch (am->tag) {
case Aam_IR:
HChar* showA87FpOp ( A87FpOp op ) {
switch (op) {
-//.. case Xfp_ADD: return "add";
-//.. case Xfp_SUB: return "sub";
-//.. case Xfp_MUL: return "mul";
-//.. case Xfp_DIV: return "div";
case Afp_SCALE: return "scale";
case Afp_ATAN: return "atan";
case Afp_YL2X: return "yl2x";
case Afp_PREM: return "prem";
case Afp_PREM1: return "prem1";
case Afp_SQRT: return "sqrt";
-//.. case Xfp_ABS: return "abs";
-//.. case Xfp_NEG: return "chs";
-//.. case Xfp_MOV: return "mov";
case Afp_SIN: return "sin";
case Afp_COS: return "cos";
case Afp_TAN: return "tan";
vassert(sz == 4 || sz == 8);
return i;
}
-//.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_Sh3232;
-//.. i->Xin.Sh3232.op = op;
-//.. i->Xin.Sh3232.amt = amt;
-//.. i->Xin.Sh3232.src = src;
-//.. i->Xin.Sh3232.dst = dst;
-//.. vassert(op == Xsh_SHL || op == Xsh_SHR);
-//.. return i;
-//.. }
AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_Push;
vassert(regparms >= 0 && regparms <= 6);
return i;
}
-AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) {
- AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag = Ain_Goto;
- i->Ain.Goto.cond = cond;
- i->Ain.Goto.dst = dst;
- i->Ain.Goto.jk = jk;
+
+AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond, Bool toFastEP ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_XDirect;
+ i->Ain.XDirect.dstGA = dstGA;
+ i->Ain.XDirect.amRIP = amRIP;
+ i->Ain.XDirect.cond = cond;
+ i->Ain.XDirect.toFastEP = toFastEP;
+ return i;
+}
+AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_XIndir;
+ i->Ain.XIndir.dstGA = dstGA;
+ i->Ain.XIndir.amRIP = amRIP;
+ i->Ain.XIndir.cond = cond;
+ return i;
+}
+AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond, IRJumpKind jk ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_XAssisted;
+ i->Ain.XAssisted.dstGA = dstGA;
+ i->Ain.XAssisted.amRIP = amRIP;
+ i->Ain.XAssisted.cond = cond;
+ i->Ain.XAssisted.jk = jk;
return i;
}
+
AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_CMov64;
i->Ain.A87StSW.addr = addr;
return i;
}
-
-//.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpUnary;
-//.. i->Xin.FpUnary.op = op;
-//.. i->Xin.FpUnary.src = src;
-//.. i->Xin.FpUnary.dst = dst;
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpBinary;
-//.. i->Xin.FpBinary.op = op;
-//.. i->Xin.FpBinary.srcL = srcL;
-//.. i->Xin.FpBinary.srcR = srcR;
-//.. i->Xin.FpBinary.dst = dst;
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpLdSt;
-//.. i->Xin.FpLdSt.isLoad = isLoad;
-//.. i->Xin.FpLdSt.sz = sz;
-//.. i->Xin.FpLdSt.reg = reg;
-//.. i->Xin.FpLdSt.addr = addr;
-//.. vassert(sz == 4 || sz == 8);
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz,
-//.. HReg reg, AMD64AMode* addr ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpLdStI;
-//.. i->Xin.FpLdStI.isLoad = isLoad;
-//.. i->Xin.FpLdStI.sz = sz;
-//.. i->Xin.FpLdStI.reg = reg;
-//.. i->Xin.FpLdStI.addr = addr;
-//.. vassert(sz == 2 || sz == 4 || sz == 8);
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_Fp64to32;
-//.. i->Xin.Fp64to32.src = src;
-//.. i->Xin.Fp64to32.dst = dst;
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpCMov;
-//.. i->Xin.FpCMov.cond = cond;
-//.. i->Xin.FpCMov.src = src;
-//.. i->Xin.FpCMov.dst = dst;
-//.. vassert(cond != Xcc_ALWAYS);
-//.. return i;
-//.. }
AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_LdMXCSR;
i->Ain.LdMXCSR.addr = addr;
return i;
}
-//.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpStSW_AX;
-//.. return i;
-//.. }
AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_SseUComIS;
i->Ain.SseSDSS.dst = dst;
return i;
}
-
-//.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_SseConst;
-//.. i->Xin.SseConst.con = con;
-//.. i->Xin.SseConst.dst = dst;
-//.. vassert(hregClass(dst) == HRcVec128);
-//.. return i;
-//.. }
AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
HReg reg, AMD64AMode* addr ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
vassert(order >= 0 && order <= 0xFF);
return i;
}
+AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
+ AMD64AMode* amFailAddr ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_EvCheck;
+ i->Ain.EvCheck.amCounter = amCounter;
+ i->Ain.EvCheck.amFailAddr = amFailAddr;
+ return i;
+}
+AMD64Instr* AMD64Instr_ProfInc ( void ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_ProfInc;
+ return i;
+}
void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
{
showAMD64ScalarSz(i->Ain.Div.sz));
ppAMD64RM(i->Ain.Div.src);
return;
-//.. case Xin_Sh3232:
-//.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op));
-//.. if (i->Xin.Sh3232.amt == 0)
-//.. vex_printf(" %%cl,");
-//.. else
-//.. vex_printf(" $%d,", i->Xin.Sh3232.amt);
-//.. ppHRegAMD64(i->Xin.Sh3232.src);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.Sh3232.dst);
-//.. return;
case Ain_Push:
vex_printf("pushq ");
ppAMD64RMI(i->Ain.Push.src);
i->Ain.Call.regparms );
vex_printf("0x%llx", i->Ain.Call.target);
break;
- case Ain_Goto:
- if (i->Ain.Goto.cond != Acc_ALWAYS) {
- vex_printf("if (%%rflags.%s) { ",
- showAMD64CondCode(i->Ain.Goto.cond));
- }
- if (i->Ain.Goto.jk != Ijk_Boring
- && i->Ain.Goto.jk != Ijk_Call
- && i->Ain.Goto.jk != Ijk_Ret) {
- vex_printf("movl $");
- ppIRJumpKind(i->Ain.Goto.jk);
- vex_printf(",%%ebp ; ");
- }
+
+ case Ain_XDirect:
+ vex_printf("(xDirect) ");
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.XDirect.cond));
+ vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
+ vex_printf("movq %%r11,");
+ ppAMD64AMode(i->Ain.XDirect.amRIP);
+ vex_printf("; ");
+ vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
+ i->Ain.XDirect.toFastEP ? "fast" : "slow");
+ return;
+ case Ain_XIndir:
+ vex_printf("(xIndir) ");
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.XIndir.cond));
vex_printf("movq ");
- ppAMD64RI(i->Ain.Goto.dst);
- vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx");
- if (i->Ain.Goto.cond != Acc_ALWAYS) {
- vex_printf(" }");
- }
+ ppHRegAMD64(i->Ain.XIndir.dstGA);
+ vex_printf(",");
+ ppAMD64AMode(i->Ain.XIndir.amRIP);
+ vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
+ return;
+ case Ain_XAssisted:
+ vex_printf("(xAssisted) ");
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.XAssisted.cond));
+ vex_printf("movq ");
+ ppHRegAMD64(i->Ain.XAssisted.dstGA);
+ vex_printf(",");
+ ppAMD64AMode(i->Ain.XAssisted.amRIP);
+ vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
+ (Int)i->Ain.XAssisted.jk);
+ vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
return;
+
case Ain_CMov64:
vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
ppAMD64RM(i->Ain.CMov64.src);
vex_printf("fstsw ");
ppAMD64AMode(i->Ain.A87StSW.addr);
break;
-//.. case Xin_FpUnary:
-//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
-//.. ppHRegAMD64(i->Xin.FpUnary.src);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.FpUnary.dst);
-//.. break;
-//.. case Xin_FpBinary:
-//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op));
-//.. ppHRegAMD64(i->Xin.FpBinary.srcL);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.FpBinary.srcR);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.FpBinary.dst);
-//.. break;
-//.. case Xin_FpLdSt:
-//.. if (i->Xin.FpLdSt.isLoad) {
-//.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
-//.. ppAMD64AMode(i->Xin.FpLdSt.addr);
-//.. vex_printf(", ");
-//.. ppHRegAMD64(i->Xin.FpLdSt.reg);
-//.. } else {
-//.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
-//.. ppHRegAMD64(i->Xin.FpLdSt.reg);
-//.. vex_printf(", ");
-//.. ppAMD64AMode(i->Xin.FpLdSt.addr);
-//.. }
-//.. return;
-//.. case Xin_FpLdStI:
-//.. if (i->Xin.FpLdStI.isLoad) {
-//.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
-//.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
-//.. ppAMD64AMode(i->Xin.FpLdStI.addr);
-//.. vex_printf(", ");
-//.. ppHRegAMD64(i->Xin.FpLdStI.reg);
-//.. } else {
-//.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
-//.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
-//.. ppHRegAMD64(i->Xin.FpLdStI.reg);
-//.. vex_printf(", ");
-//.. ppAMD64AMode(i->Xin.FpLdStI.addr);
-//.. }
-//.. return;
-//.. case Xin_Fp64to32:
-//.. vex_printf("gdtof ");
-//.. ppHRegAMD64(i->Xin.Fp64to32.src);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.Fp64to32.dst);
-//.. return;
-//.. case Xin_FpCMov:
-//.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond));
-//.. ppHRegAMD64(i->Xin.FpCMov.src);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.FpCMov.dst);
-//.. return;
-//.. case Xin_FpLdStCW:
-//.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw ");
-//.. ppAMD64AMode(i->Xin.FpLdStCW.addr);
-//.. return;
-//.. case Xin_FpStSW_AX:
-//.. vex_printf("fstsw %%ax");
-//.. return;
case Ain_LdMXCSR:
vex_printf("ldmxcsr ");
ppAMD64AMode(i->Ain.LdMXCSR.addr);
vex_printf(",");
ppHRegAMD64(i->Ain.SseSDSS.dst);
break;
-//.. case Xin_SseConst:
-//.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
-//.. ppHRegAMD64(i->Xin.SseConst.dst);
-//.. break;
case Ain_SseLdSt:
switch (i->Ain.SseLdSt.sz) {
case 4: vex_printf("movss "); break;
vex_printf(",");
ppHRegAMD64(i->Ain.SseShuf.dst);
return;
-
+ case Ain_EvCheck:
+ vex_printf("(evCheck) decl ");
+ ppAMD64AMode(i->Ain.EvCheck.amCounter);
+ vex_printf("; jns nofail; jmp *");
+ ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
+ vex_printf("; nofail:");
+ return;
+ case Ain_ProfInc:
+ vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
+ return;
default:
vpanic("ppAMD64Instr");
}
addHRegUse(u, HRmModify, hregAMD64_RAX());
addHRegUse(u, HRmModify, hregAMD64_RDX());
return;
-//.. case Xin_Sh3232:
-//.. addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
-//.. addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
-//.. if (i->Xin.Sh3232.amt == 0)
-//.. addHRegUse(u, HRmRead, hregAMD64_ECX());
-//.. return;
case Ain_Push:
addRegUsage_AMD64RMI(u, i->Ain.Push.src);
addHRegUse(u, HRmModify, hregAMD64_RSP());
/* Upshot of this is that the assembler really must use r11,
and no other, as a destination temporary. */
return;
- case Ain_Goto:
- addRegUsage_AMD64RI(u, i->Ain.Goto.dst);
- addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */
- addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */
- if (i->Ain.Goto.jk != Ijk_Boring
- && i->Ain.Goto.jk != Ijk_Call
- && i->Ain.Goto.jk != Ijk_Ret)
- /* note, this is irrelevant since rbp is not actually
- available to the allocator. But still .. */
- addHRegUse(u, HRmWrite, hregAMD64_RBP());
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
+ conditionally exit the block. Hence we only need to list (1)
+ the registers that they read, and (2) the registers that they
+ write in the case where the block is not exited. (2) is
+ empty, hence only (1) is relevant here. */
+ case Ain_XDirect:
+ /* Don't bother to mention the write to %r11, since it is not
+ available to the allocator. */
+ addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
+ return;
+ case Ain_XIndir:
+ /* Ditto re %r11 */
+ addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
+ addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
+ return;
+ case Ain_XAssisted:
+ /* Ditto re %r11 and %rbp (the baseblock ptr) */
+ addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
+ addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
return;
case Ain_CMov64:
addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
case Ain_A87StSW:
addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
return;
-//.. case Xin_FpUnary:
-//.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
-//.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
-//.. return;
-//.. case Xin_FpBinary:
-//.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
-//.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
-//.. addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
-//.. return;
-//.. case Xin_FpLdSt:
-//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr);
-//.. addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
-//.. i->Xin.FpLdSt.reg);
-//.. return;
-//.. case Xin_FpLdStI:
-//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr);
-//.. addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
-//.. i->Xin.FpLdStI.reg);
-//.. return;
-//.. case Xin_Fp64to32:
-//.. addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
-//.. addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
-//.. return;
-//.. case Xin_FpCMov:
-//.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
-//.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
-//.. return;
case Ain_LdMXCSR:
addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
return;
-//.. case Xin_FpStSW_AX:
-//.. addHRegUse(u, HRmWrite, hregAMD64_EAX());
-//.. return;
case Ain_SseUComIS:
addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
return;
-//.. case Xin_SseConst:
-//.. addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
-//.. return;
case Ain_Sse32Fx4:
vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
return;
+ case Ain_EvCheck:
+ /* We expect both amodes only to mention %rbp, so this is in
+ fact pointless, since %rbp isn't allocatable, but anyway.. */
+ addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
+ addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
+ return;
+ case Ain_ProfInc:
+ addHRegUse(u, HRmWrite, hregAMD64_R11());
+ return;
default:
ppAMD64Instr(i, mode64);
vpanic("getRegUsage_AMD64Instr");
case Ain_Div:
mapRegs_AMD64RM(m, i->Ain.Div.src);
return;
-//.. case Xin_Sh3232:
-//.. mapReg(m, &i->Xin.Sh3232.src);
-//.. mapReg(m, &i->Xin.Sh3232.dst);
-//.. return;
case Ain_Push:
mapRegs_AMD64RMI(m, i->Ain.Push.src);
return;
case Ain_Call:
return;
- case Ain_Goto:
- mapRegs_AMD64RI(m, i->Ain.Goto.dst);
+ case Ain_XDirect:
+ mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
+ return;
+ case Ain_XIndir:
+ mapReg(m, &i->Ain.XIndir.dstGA);
+ mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
+ return;
+ case Ain_XAssisted:
+ mapReg(m, &i->Ain.XAssisted.dstGA);
+ mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
return;
case Ain_CMov64:
mapRegs_AMD64RM(m, i->Ain.CMov64.src);
case Ain_A87StSW:
mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
return;
-//.. case Xin_FpUnary:
-//.. mapReg(m, &i->Xin.FpUnary.src);
-//.. mapReg(m, &i->Xin.FpUnary.dst);
-//.. return;
-//.. case Xin_FpBinary:
-//.. mapReg(m, &i->Xin.FpBinary.srcL);
-//.. mapReg(m, &i->Xin.FpBinary.srcR);
-//.. mapReg(m, &i->Xin.FpBinary.dst);
-//.. return;
-//.. case Xin_FpLdSt:
-//.. mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr);
-//.. mapReg(m, &i->Xin.FpLdSt.reg);
-//.. return;
-//.. case Xin_FpLdStI:
-//.. mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr);
-//.. mapReg(m, &i->Xin.FpLdStI.reg);
-//.. return;
-//.. case Xin_Fp64to32:
-//.. mapReg(m, &i->Xin.Fp64to32.src);
-//.. mapReg(m, &i->Xin.Fp64to32.dst);
-//.. return;
-//.. case Xin_FpCMov:
-//.. mapReg(m, &i->Xin.FpCMov.src);
-//.. mapReg(m, &i->Xin.FpCMov.dst);
-//.. return;
case Ain_LdMXCSR:
mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
return;
-//.. case Xin_FpStSW_AX:
-//.. return;
case Ain_SseUComIS:
mapReg(m, &i->Ain.SseUComIS.srcL);
mapReg(m, &i->Ain.SseUComIS.srcR);
mapReg(m, &i->Ain.SseSDSS.src);
mapReg(m, &i->Ain.SseSDSS.dst);
return;
-//.. case Xin_SseConst:
-//.. mapReg(m, &i->Xin.SseConst.dst);
-//.. return;
case Ain_SseLdSt:
mapReg(m, &i->Ain.SseLdSt.reg);
mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
mapReg(m, &i->Ain.SseShuf.src);
mapReg(m, &i->Ain.SseShuf.dst);
return;
+ case Ain_EvCheck:
+ /* We expect both amodes only to mention %rbp, so this is in
+ fact pointless, since %rbp isn't allocatable, but anyway.. */
+ mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
+ mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
+ return;
+ case Ain_ProfInc:
+ /* hardwires r11 -- nothing to modify. */
+ return;
default:
ppAMD64Instr(i, mode64);
vpanic("mapRegs_AMD64Instr");
return p;
}
-//.. /* Emit fstp %st(i), 1 <= i <= 7 */
-//.. static UChar* do_fstp_st ( UChar* p, Int i )
-//.. {
-//.. vassert(1 <= i && i <= 7);
-//.. *p++ = 0xDD;
-//.. *p++ = 0xD8+i;
-//.. return p;
-//.. }
-//..
-//.. /* Emit fld %st(i), 0 <= i <= 6 */
-//.. static UChar* do_fld_st ( UChar* p, Int i )
-//.. {
-//.. vassert(0 <= i && i <= 6);
-//.. *p++ = 0xD9;
-//.. *p++ = 0xC0+i;
-//.. return p;
-//.. }
-//..
-//.. /* Emit f<op> %st(0) */
-//.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op )
-//.. {
-//.. switch (op) {
-//.. case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
-//.. case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
-//.. case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
-//.. case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
-//.. case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
-//.. case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
-//.. case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
-//.. case Xfp_MOV: break;
-//.. case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */
-//.. *p++ = 0xD9; *p++ = 0xF2; /* fptan */
-//.. *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
-//.. break;
-//.. default: vpanic("do_fop1_st: unknown op");
-//.. }
-//.. return p;
-//.. }
-//..
-//.. /* Emit f<op> %st(i), 1 <= i <= 5 */
-//.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i )
-//.. {
-//.. # define fake(_n) mkHReg((_n), HRcInt32, False)
-//.. Int subopc;
-//.. switch (op) {
-//.. case Xfp_ADD: subopc = 0; break;
-//.. case Xfp_SUB: subopc = 4; break;
-//.. case Xfp_MUL: subopc = 1; break;
-//.. case Xfp_DIV: subopc = 6; break;
-//.. default: vpanic("do_fop2_st: unknown op");
-//.. }
-//.. *p++ = 0xD8;
-//.. p = doAMode_R(p, fake(subopc), fake(i));
-//.. return p;
-//.. # undef fake
-//.. }
-//..
-//.. /* Push a 32-bit word on the stack. The word depends on tags[3:0];
-//.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
-//.. */
-//.. static UChar* push_word_from_tags ( UChar* p, UShort tags )
-//.. {
-//.. UInt w;
-//.. vassert(0 == (tags & ~0xF));
-//.. if (tags == 0) {
-//.. /* pushl $0x00000000 */
-//.. *p++ = 0x6A;
-//.. *p++ = 0x00;
-//.. }
-//.. else
-//.. /* pushl $0xFFFFFFFF */
-//.. if (tags == 0xF) {
-//.. *p++ = 0x6A;
-//.. *p++ = 0xFF;
-//.. } else {
-//.. vassert(0); /* awaiting test case */
-//.. w = 0;
-//.. if (tags & 1) w |= 0x000000FF;
-//.. if (tags & 2) w |= 0x0000FF00;
-//.. if (tags & 4) w |= 0x00FF0000;
-//.. if (tags & 8) w |= 0xFF000000;
-//.. *p++ = 0x68;
-//.. p = emit32(p, w);
-//.. }
-//.. return p;
-//.. }
-
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
- imperative to emit position-independent code. */
+ imperative to emit position-independent code. If the emitted
+ instruction was a profiler inc, set *is_profInc to True, else
+ leave it unchanged. */
-Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
+Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, AMD64Instr* i,
Bool mode64,
- void* dispatch_unassisted,
- void* dispatch_assisted )
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted )
{
UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
UInt xtra;
goto bad;
}
}
-//.. /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
-//.. allowed here. */
-//.. opc = subopc_imm = opc_imma = 0;
-//.. switch (i->Xin.Alu32M.op) {
-//.. case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
-//.. case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
-//.. default: goto bad;
-//.. }
-//.. switch (i->Xin.Alu32M.src->tag) {
-//.. case Xri_Reg:
-//.. *p++ = opc;
-//.. p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
-//.. i->Xin.Alu32M.dst);
-//.. goto done;
-//.. case Xri_Imm:
-//.. if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
-//.. *p++ = 0x83;
-//.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
-//.. *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32;
-//.. goto done;
-//.. } else {
-//.. *p++ = 0x81;
-//.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
-//.. p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
-//.. goto done;
-//.. }
-//.. default:
-//.. goto bad;
-//.. }
break;
case Ain_Sh64:
}
break;
-//.. case Xin_Sh3232:
-//.. vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
-//.. if (i->Xin.Sh3232.amt == 0) {
-//.. /* shldl/shrdl by %cl */
-//.. *p++ = 0x0F;
-//.. if (i->Xin.Sh3232.op == Xsh_SHL) {
-//.. *p++ = 0xA5;
-//.. } else {
-//.. *p++ = 0xAD;
-//.. }
-//.. p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
-//.. goto done;
-//.. }
-//.. break;
-
case Ain_Push:
switch (i->Ain.Push.src->tag) {
case Armi_Mem:
goto done;
}
- case Ain_Goto: {
- void* dispatch_to_use = NULL;
- vassert(dispatch_unassisted != NULL);
- vassert(dispatch_assisted != NULL);
+ case Ain_XDirect: {
+ /* NB: what goes on here has to be very closely coordinated with the
+ chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations can't
+ use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
+
+ HReg r11 = hregAMD64_R11();
/* Use ptmp for backpatching conditional jumps. */
ptmp = NULL;
/* First off, if this is conditional, create a conditional
jump over the rest of it. */
- if (i->Ain.Goto.cond != Acc_ALWAYS) {
+ if (i->Ain.XDirect.cond != Acc_ALWAYS) {
/* jmp fwds if !condition */
- *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1));
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
ptmp = p; /* fill in this bit later */
*p++ = 0; /* # of bytes to jump over; don't know how many yet. */
}
- /* If a non-boring, set %rbp (the guest state pointer)
- appropriately. Since these numbers are all small positive
- integers, we can get away with "movl $N, %ebp" rather than
- the longer "movq $N, %rbp". Also, decide which dispatcher we
- need to use. */
- dispatch_to_use = dispatch_assisted;
-
- /* movl $magic_number, %ebp */
- switch (i->Ain.Goto.jk) {
- case Ijk_ClientReq:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
- case Ijk_Sys_syscall:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break;
- case Ijk_Sys_int32:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_INT32); break;
- case Ijk_Yield:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_YIELD); break;
- case Ijk_EmWarn:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_EMWARN); break;
- case Ijk_MapFail:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
- case Ijk_NoDecode:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_NODECODE); break;
- case Ijk_TInval:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_TINVAL); break;
- case Ijk_NoRedir:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
- case Ijk_SigTRAP:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
- case Ijk_SigSEGV:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
- case Ijk_Ret:
- case Ijk_Call:
- case Ijk_Boring:
- dispatch_to_use = dispatch_unassisted;
- break;
- default:
- ppIRJumpKind(i->Ain.Goto.jk);
- vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind");
+ /* Update the guest RIP. */
+ /* movabsq $dstGA, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, i->Ain.XDirect.dstGA);
+ /* movq %r11, amRIP */
+ *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
+ *p++ = 0x89;
+ p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
+
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
+ to) backs up the return address, so as to find the address of
+ the first patchable byte. So: don't change the length of the
+ two instructions below. */
+ /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ void* disp_cp_chain_me
+ = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+ p = emit64(p, Ptr_to_ULong(disp_cp_chain_me));
+ /* call *%r11 */
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xD3;
+ /* --- END of PATCHABLE BYTES --- */
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Ain.XDirect.cond != Acc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
}
+ goto done;
+ }
- /* Get the destination address into %rax */
- if (i->Ain.Goto.dst->tag == Ari_Imm) {
- /* movl sign-ext($immediate), %rax ; ret */
- *p++ = 0x48;
- *p++ = 0xC7;
- *p++ = 0xC0;
- p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32);
- } else {
- vassert(i->Ain.Goto.dst->tag == Ari_Reg);
- /* movq %reg, %rax ; ret */
- if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) {
- *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
- *p++ = 0x89;
- p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
- }
+ case Ain_XIndir: {
+ /* We're generating transfers that could lead indirectly to a
+ chain-me, so we need to be sure this is actually allowed --
+ no-redir translations are not allowed to reach normal
+ translations without going through the scheduler. That means
+ no XDirects or XIndirs out from no-redir translations.
+ Hence: */
+ vassert(disp_cp_xindir != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Ain.XIndir.cond != Acc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
}
- /* Get the dispatcher address into %rdx. This has to happen
- after the load of %rax since %rdx might be carrying the value
- destined for %rax immediately prior to this Ain_Goto. */
- vassert(sizeof(ULong) == sizeof(void*));
+ /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
+ *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
+ /* movabsq $disp_indir, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, Ptr_to_ULong(disp_cp_xindir));
+ /* jmp *%r11 */
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xE3;
- if (fitsIn32Bits(Ptr_to_ULong(dispatch_to_use))) {
- /* movl sign-extend(imm32), %rdx */
- *p++ = 0x48;
- *p++ = 0xC7;
- *p++ = 0xC2;
- p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use));
- } else {
- /* movabsq $imm64, %rdx */
- *p++ = 0x48;
- *p++ = 0xBA;
- p = emit64(p, Ptr_to_ULong(dispatch_to_use));
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Ain.XIndir.cond != Acc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ }
+ goto done;
+ }
+
+ case Ain_XAssisted: {
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
}
- /* jmp *%rdx */
+
+ /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
+ *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
+ /* movl $magic_number, %ebp. Since these numbers are all small positive
+ integers, we can get away with "movl $N, %ebp" rather than
+ the longer "movq $N, %rbp". */
+ UInt trcval = 0;
+ switch (i->Ain.XAssisted.jk) {
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
+ case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
+ case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
+ case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
+ case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
+ case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
+ case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
+ case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
+ /* We don't expect to see the following being assisted. */
+ case Ijk_Ret:
+ case Ijk_Call:
+ /* fallthrough */
+ default:
+ ppIRJumpKind(i->Ain.XAssisted.jk);
+ vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
+ }
+ vassert(trcval != 0);
+ *p++ = 0xBD;
+ p = emit32(p, trcval);
+ /* movabsq $disp_assisted, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, Ptr_to_ULong(disp_cp_xassisted));
+ /* jmp *%r11 */
+ *p++ = 0x41;
*p++ = 0xFF;
- *p++ = 0xE2;
+ *p++ = 0xE3;
/* Fix up the conditional jump, if there was one. */
- if (i->Ain.Goto.cond != Acc_ALWAYS) {
+ if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
Int delta = p - ptmp;
- vassert(delta > 0 && delta < 30);
+ vassert(delta > 0 && delta < 40);
*ptmp = toUChar(delta-1);
}
goto done;
}
break;
-//.. case Xin_FpUnary:
-//.. /* gop %src, %dst
-//.. --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
-//.. */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
-//.. p = do_fop1_st(p, i->Xin.FpUnary.op);
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
-//.. goto done;
-//..
-//.. case Xin_FpBinary:
-//.. if (i->Xin.FpBinary.op == Xfp_YL2X
-//.. || i->Xin.FpBinary.op == Xfp_YL2XP1) {
-//.. /* Have to do this specially. */
-//.. /* ffree %st7 ; fld %st(srcL) ;
-//.. ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
-//.. *p++ = 0xD9;
-//.. *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9;
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
-//.. goto done;
-//.. }
-//.. if (i->Xin.FpBinary.op == Xfp_ATAN) {
-//.. /* Have to do this specially. */
-//.. /* ffree %st7 ; fld %st(srcL) ;
-//.. ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
-//.. *p++ = 0xD9; *p++ = 0xF3;
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
-//.. goto done;
-//.. }
-//.. if (i->Xin.FpBinary.op == Xfp_PREM
-//.. || i->Xin.FpBinary.op == Xfp_PREM1
-//.. || i->Xin.FpBinary.op == Xfp_SCALE) {
-//.. /* Have to do this specially. */
-//.. /* ffree %st7 ; fld %st(srcR) ;
-//.. ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
-//.. fincstp ; ffree %st7 */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
-//.. *p++ = 0xD9;
-//.. switch (i->Xin.FpBinary.op) {
-//.. case Xfp_PREM: *p++ = 0xF8; break;
-//.. case Xfp_PREM1: *p++ = 0xF5; break;
-//.. case Xfp_SCALE: *p++ = 0xFD; break;
-//.. default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)");
-//.. }
-//.. p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
-//.. *p++ = 0xD9; *p++ = 0xF7;
-//.. p = do_ffree_st7(p);
-//.. goto done;
-//.. }
-//.. /* General case */
-//.. /* gop %srcL, %srcR, %dst
-//.. --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
-//.. */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
-//.. p = do_fop2_st(p, i->Xin.FpBinary.op,
-//.. 1+hregNumber(i->Xin.FpBinary.srcR));
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
-//.. goto done;
-//..
-//.. case Xin_FpLdSt:
-//.. vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8);
-//.. if (i->Xin.FpLdSt.isLoad) {
-//.. /* Load from memory into %fakeN.
-//.. --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1)
-//.. */
-//.. p = do_ffree_st7(p);
-//.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
-//.. p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
-//.. goto done;
-//.. } else {
-//.. /* Store from %fakeN into memory.
-//.. --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
-//.. */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
-//.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
-//.. p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
-//.. goto done;
-//.. }
-//.. break;
-//..
-//.. case Xin_FpLdStI:
-//.. if (i->Xin.FpLdStI.isLoad) {
-//.. /* Load from memory into %fakeN, converting from an int.
-//.. --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
-//.. */
-//.. switch (i->Xin.FpLdStI.sz) {
-//.. case 8: opc = 0xDF; subopc_imm = 5; break;
-//.. case 4: opc = 0xDB; subopc_imm = 0; break;
-//.. case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
-//.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)");
-//.. }
-//.. p = do_ffree_st7(p);
-//.. *p++ = opc;
-//.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
-//.. goto done;
-//.. } else {
-//.. /* Store from %fakeN into memory, converting to an int.
-//.. --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
-//.. */
-//.. switch (i->Xin.FpLdStI.sz) {
-//.. case 8: opc = 0xDF; subopc_imm = 7; break;
-//.. case 4: opc = 0xDB; subopc_imm = 3; break;
-//.. case 2: opc = 0xDF; subopc_imm = 3; break;
-//.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)");
-//.. }
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
-//.. *p++ = opc;
-//.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
-//.. goto done;
-//.. }
-//.. break;
-//..
-//.. case Xin_Fp64to32:
-//.. /* ffree %st7 ; fld %st(src) */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
-//.. /* subl $4, %esp */
-//.. *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
-//.. /* fstps (%esp) */
-//.. *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
-//.. /* flds (%esp) */
-//.. *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
-//.. /* addl $4, %esp */
-//.. *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
-//.. /* fstp %st(1+dst) */
-//.. p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
-//.. goto done;
-//..
-//.. case Xin_FpCMov:
-//.. /* jmp fwds if !condition */
-//.. *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1);
-//.. *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
-//.. ptmp = p;
-//..
-//.. /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
-//.. p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
-//..
-//.. /* Fill in the jump offset. */
-//.. *(ptmp-1) = p - ptmp;
-//.. goto done;
-
case Ain_LdMXCSR:
*p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
*p++ = 0x0F;
p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
goto done;
-//.. case Xin_FpStSW_AX:
-//.. /* note, this emits fnstsw %ax, not fstsw %ax */
-//.. *p++ = 0xDF;
-//.. *p++ = 0xE0;
-//.. goto done;
-
case Ain_SseUComIS:
/* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
/* ucomi[sd] %srcL, %srcR */
vreg2ireg(i->Ain.SseSDSS.src) );
goto done;
-//..
-//.. case Xin_FpCmp:
-//.. /* gcmp %fL, %fR, %dst
-//.. -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
-//.. fnstsw %ax ; movl %eax, %dst
-//.. */
-//.. /* ffree %st7 */
-//.. p = do_ffree_st7(p);
-//.. /* fpush %fL */
-//.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
-//.. /* fucomp %(fR+1) */
-//.. *p++ = 0xDD;
-//.. *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)));
-//.. /* fnstsw %ax */
-//.. *p++ = 0xDF;
-//.. *p++ = 0xE0;
-//.. /* movl %eax, %dst */
-//.. *p++ = 0x89;
-//.. p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst);
-//.. goto done;
-//..
-//.. case Xin_SseConst: {
-//.. UShort con = i->Xin.SseConst.con;
-//.. p = push_word_from_tags(p, (con >> 12) & 0xF);
-//.. p = push_word_from_tags(p, (con >> 8) & 0xF);
-//.. p = push_word_from_tags(p, (con >> 4) & 0xF);
-//.. p = push_word_from_tags(p, con & 0xF);
-//.. /* movl (%esp), %xmm-dst */
-//.. *p++ = 0x0F;
-//.. *p++ = 0x10;
-//.. *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst));
-//.. *p++ = 0x24;
-//.. /* addl $16, %esp */
-//.. *p++ = 0x83;
-//.. *p++ = 0xC4;
-//.. *p++ = 0x10;
-//.. goto done;
-//.. }
-
case Ain_SseLdSt:
if (i->Ain.SseLdSt.sz == 8) {
*p++ = 0xF2;
case Asse_MAXF: *p++ = 0x5F; break;
case Asse_MINF: *p++ = 0x5D; break;
case Asse_MULF: *p++ = 0x59; break;
-//.. case Xsse_RCPF: *p++ = 0x53; break;
-//.. case Xsse_RSQRTF: *p++ = 0x52; break;
case Asse_SQRTF: *p++ = 0x51; break;
case Asse_SUBF: *p++ = 0x5C; break;
case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
case Asse_MAXF: *p++ = 0x5F; break;
case Asse_MINF: *p++ = 0x5D; break;
case Asse_MULF: *p++ = 0x59; break;
-//.. case Xsse_RCPF: *p++ = 0x53; break;
-//.. case Xsse_RSQRTF: *p++ = 0x52; break;
case Asse_SQRTF: *p++ = 0x51; break;
case Asse_SUBF: *p++ = 0x5C; break;
case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
*p++ = (UChar)(i->Ain.SseShuf.order);
goto done;
+ case Ain_EvCheck: {
+ /* We generate:
+ (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
+ (2 bytes) jns nofail expected taken
+ (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
+ nofail:
+ */
+ /* This is heavily asserted re instruction lengths. It needs to
+ be. If we get given unexpected forms of .amCounter or
+ .amFailAddr -- basically, anything that's not of the form
+ uimm7(%rbp) -- they are likely to fail. */
+ /* Note also that after the decl we must be very careful not to
+ read the carry flag, else we get a partial flags stall.
+ js/jns avoids that, though. */
+ UChar* p0 = p;
+ /* --- decl 8(%rbp) --- */
+ /* Need to compute the REX byte for the decl in order to prove
+ that we don't need it, since this is a 32-bit inc and all
+ registers involved in the amode are < r8. "fake(1)" because
+ there's no register in this encoding; instead the register
+ field is used as a sub opcode. The encoding for "decl r/m32"
+ is FF /1, hence the fake(1). */
+ rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter));
+ if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter);
+ vassert(p - p0 == 3);
+ /* --- jns nofail --- */
+ *p++ = 0x79;
+ *p++ = 0x03; /* need to check this 0x03 after the next insn */
+ vassert(p - p0 == 5);
+ /* --- jmp* 0(%rbp) --- */
+ /* Once again, verify we don't need REX. The encoding is FF /4.
+ We don't need REX.W since by default FF /4 in 64-bit mode
+ implies a 64 bit load. */
+ rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr));
+ if (rex != 0x40) goto bad;
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr);
+ vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
+ /* And crosscheck .. */
+ vassert(evCheckSzB_AMD64() == 8);
+ goto done;
+ }
+
+ case Ain_ProfInc: {
+ /* We generate movabsq $0, %r11
+ incq (%r11)
+ in the expectation that a later call to LibVEX_patchProfCtr
+ will be used to fill in the immediate field once the right
+ value is known.
+ 49 BB 00 00 00 00 00 00 00 00
+ 49 FF 03
+ */
+ *p++ = 0x49; *p++ = 0xBB;
+ *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
+ *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
+ *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
+ /* Tell the caller .. */
+ vassert(!(*is_profInc));
+ *is_profInc = True;
+ goto done;
+ }
+
default:
goto bad;
}
# undef fake
}
+
+/* How big is an event check? See case for Ain_EvCheck in
+ emit_AMD64Instr just above. That crosschecks what this returns, so
+ we can tell if we're inconsistent. */
+Int evCheckSzB_AMD64 ( void )
+{
+ return 8;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange chainXDirect_AMD64 ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to )
+{
+ /* What we're expecting to see is:
+ movabsq $disp_cp_chain_me_EXPECTED, %r11
+ call *%r11
+ viz
+ 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
+ 41 FF D3
+ */
+ UChar* p = (UChar*)place_to_chain;
+ vassert(p[0] == 0x49);
+ vassert(p[1] == 0xBB);
+ vassert(*(ULong*)(&p[2]) == Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
+ vassert(p[10] == 0x41);
+ vassert(p[11] == 0xFF);
+ vassert(p[12] == 0xD3);
+ /* And what we want to change it to is either:
+ (general case):
+ movabsq $place_to_jump_to, %r11
+ jmpq *%r11
+ viz
+ 49 BB <8 bytes value == place_to_jump_to>
+ 41 FF E3
+ So it's the same length (convenient, huh) and we don't
+ need to change all the bits.
+ ---OR---
+ in the case where the displacement falls within 32 bits
+ jmpq disp32 where disp32 is relative to the next insn
+ ud2; ud2; ud2; ud2
+ viz
+ E9 <4 bytes == disp32>
+ 0F 0B 0F 0B 0F 0B 0F 0B
+
+ In both cases the replacement has the same length as the original.
+ To remain sane & verifiable,
+ (1) limit the displacement for the short form to
+ (say) +/- one billion, so as to avoid wraparound
+ off-by-ones
+ (2) even if the short form is applicable, once every (say)
+ 1024 times use the long form anyway, so as to maintain
+ verifiability
+ */
+ /* This is the delta we need to put into a JMP d32 insn. It's
+ relative to the start of the next insn, hence the -5. */
+ Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
+ Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
+
+ static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
+ if (shortOK) {
+ shortCTR++; // thread safety bleh
+ if (0 == (shortCTR & 0x3FF)) {
+ shortOK = False;
+ if (0)
+ vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
+ "using long jmp\n", shortCTR);
+ }
+ }
+
+ /* And make the modifications. */
+ if (shortOK) {
+ p[0] = 0xE9;
+ p[1] = (delta >> 0) & 0xFF;
+ p[2] = (delta >> 8) & 0xFF;
+ p[3] = (delta >> 16) & 0xFF;
+ p[4] = (delta >> 24) & 0xFF;
+ p[5] = 0x0F; p[6] = 0x0B;
+ p[7] = 0x0F; p[8] = 0x0B;
+ p[9] = 0x0F; p[10] = 0x0B;
+ p[11] = 0x0F; p[12] = 0x0B;
+ /* sanity check on the delta -- top 32 are all 0 or all 1 */
+ delta >>= 32;
+ vassert(delta == 0LL || delta == -1LL);
+ } else {
+ /* Minimal modifications from the starting sequence. */
+ *(ULong*)(&p[2]) = Ptr_to_ULong(place_to_jump_to);
+ p[12] = 0xE3;
+ }
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me )
+{
+ /* What we're expecting to see is either:
+ (general case)
+ movabsq $place_to_jump_to_EXPECTED, %r11
+ jmpq *%r11
+ viz
+ 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
+ 41 FF E3
+ ---OR---
+ in the case where the displacement falls within 32 bits
+ jmpq d32
+ ud2; ud2; ud2; ud2
+ viz
+ E9 <4 bytes == disp32>
+ 0F 0B 0F 0B 0F 0B 0F 0B
+ */
+ UChar* p = (UChar*)place_to_unchain;
+ Bool valid = False;
+ if (p[0] == 0x49 && p[1] == 0xBB
+ && *(ULong*)(&p[2]) == Ptr_to_ULong(place_to_jump_to_EXPECTED)
+ && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
+ /* it's the long form */
+ valid = True;
+ }
+ else
+ if (p[0] == 0xE9
+ && p[5] == 0x0F && p[6] == 0x0B
+ && p[7] == 0x0F && p[8] == 0x0B
+ && p[9] == 0x0F && p[10] == 0x0B
+ && p[11] == 0x0F && p[12] == 0x0B) {
+ /* It's the short form. Check the offset is right. */
+ Int s32 = *(Int*)(&p[1]);
+ Long s64 = (Long)s32;
+ if ((UChar*)p + 5 + s64 == (UChar*)place_to_jump_to_EXPECTED) {
+ valid = True;
+ if (0)
+ vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
+ }
+ }
+ vassert(valid);
+ /* And what we want to change it to is:
+ movabsq $disp_cp_chain_me, %r11
+ call *%r11
+ viz
+ 49 BB <8 bytes value == disp_cp_chain_me>
+ 41 FF D3
+ So it's the same length (convenient, huh).
+ */
+ p[0] = 0x49;
+ p[1] = 0xBB;
+ *(ULong*)(&p[2]) = Ptr_to_ULong(disp_cp_chain_me);
+ p[10] = 0x41;
+ p[11] = 0xFF;
+ p[12] = 0xD3;
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* Patch the counter address into a profile inc point, as previously
+ created by the Ain_ProfInc case for emit_AMD64Instr. */
+VexInvalRange patchProfInc_AMD64 ( void* place_to_patch,
+ ULong* location_of_counter )
+{
+ vassert(sizeof(ULong*) == 8);
+ UChar* p = (UChar*)place_to_patch;
+ vassert(p[0] == 0x49);
+ vassert(p[1] == 0xBB);
+ vassert(p[2] == 0x00);
+ vassert(p[3] == 0x00);
+ vassert(p[4] == 0x00);
+ vassert(p[5] == 0x00);
+ vassert(p[6] == 0x00);
+ vassert(p[7] == 0x00);
+ vassert(p[8] == 0x00);
+ vassert(p[9] == 0x00);
+ vassert(p[10] == 0x49);
+ vassert(p[11] == 0xFF);
+ vassert(p[12] == 0x03);
+ ULong imm64 = (ULong)Ptr_to_ULong(location_of_counter);
+ p[2] = imm64 & 0xFF; imm64 >>= 8;
+ p[3] = imm64 & 0xFF; imm64 >>= 8;
+ p[4] = imm64 & 0xFF; imm64 >>= 8;
+ p[5] = imm64 & 0xFF; imm64 >>= 8;
+ p[6] = imm64 & 0xFF; imm64 >>= 8;
+ p[7] = imm64 & 0xFF; imm64 >>= 8;
+ p[8] = imm64 & 0xFF; imm64 >>= 8;
+ p[9] = imm64 & 0xFF; imm64 >>= 8;
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_amd64_defs.c ---*/
/*---------------------------------------------------------------*/
Ain_Alu32R, /* 32-bit add/sub/and/or/xor/cmp, dst=REG (a la Alu64R) */
Ain_MulL, /* widening multiply */
Ain_Div, /* div and mod */
-//.. Xin_Sh3232, /* shldl or shrdl */
Ain_Push, /* push 64-bit value on stack */
Ain_Call, /* call to address in register */
- Ain_Goto, /* conditional/unconditional jmp to dst */
+ Ain_XDirect, /* direct transfer to GA */
+ Ain_XIndir, /* indirect transfer to GA */
+ Ain_XAssisted, /* assisted transfer to GA */
Ain_CMov64, /* conditional move */
Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top half */
Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
Ain_ACAS, /* 8/16/32/64-bit lock;cmpxchg */
Ain_DACAS, /* lock;cmpxchg8b/16b (doubleword ACAS, 2 x
32-bit or 2 x 64-bit only) */
-
Ain_A87Free, /* free up x87 registers */
Ain_A87PushPop, /* x87 loads/stores */
Ain_A87FpOp, /* x87 operations */
Ain_A87LdCW, /* load x87 control word */
Ain_A87StSW, /* store x87 status word */
-//..
-//.. Xin_FpUnary, /* FP fake unary op */
-//.. Xin_FpBinary, /* FP fake binary op */
-//.. Xin_FpLdSt, /* FP fake load/store */
-//.. Xin_FpLdStI, /* FP fake load/store, converting to/from Int */
-//.. Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single */
-//.. Xin_FpCMov, /* FP fake floating point conditional move */
Ain_LdMXCSR, /* load %mxcsr */
-//.. Xin_FpStSW_AX, /* fstsw %ax */
Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int
register */
Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */
Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */
Ain_SseSDSS, /* scalar float32 to/from float64 */
-//..
-//.. Xin_SseConst, /* Generate restricted SSE literal */
Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
constraints, upper 96/64/0 bits arbitrary */
Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */
Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */
Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */
Ain_SseCMov, /* SSE conditional move */
- Ain_SseShuf /* SSE2 shuffle (pshufd) */
+ Ain_SseShuf, /* SSE2 shuffle (pshufd) */
+ Ain_EvCheck, /* Event check */
+ Ain_ProfInc /* 64-bit profile counter increment */
}
AMD64InstrTag;
Int sz; /* 4 or 8 only */
AMD64RM* src;
} Div;
-//.. /* shld/shrd. op may only be Xsh_SHL or Xsh_SHR */
-//.. struct {
-//.. X86ShiftOp op;
-//.. UInt amt; /* shift amount, or 0 means %cl */
-//.. HReg src;
-//.. HReg dst;
-//.. } Sh3232;
struct {
AMD64RMI* src;
} Push;
Addr64 target;
Int regparms; /* 0 .. 6 */
} Call;
- /* Pseudo-insn. Goto dst, on given condition (which could be
- Acc_ALWAYS). */
- struct {
+ /* Update the guest RIP value, then exit requesting to chain
+ to it. May be conditional. */
+ struct {
+ Addr64 dstGA; /* next guest address */
+ AMD64AMode* amRIP; /* amode in guest state for RIP */
+ AMD64CondCode cond; /* can be Acc_ALWAYS */
+ Bool toFastEP; /* chain to the slow or fast point? */
+ } XDirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ AMD64AMode* amRIP;
+ AMD64CondCode cond; /* can be Acc_ALWAYS */
+ } XIndir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ AMD64AMode* amRIP;
+ AMD64CondCode cond; /* can be Acc_ALWAYS */
IRJumpKind jk;
- AMD64CondCode cond;
- AMD64RI* dst;
- } Goto;
+ } XAssisted;
/* Mov src to dst on the given condition, which may not
be the bogus Acc_ALWAYS. */
struct {
AMD64AMode* addr;
}
LdMXCSR;
-//.. /* fstsw %ax */
-//.. struct {
-//.. /* no fields */
-//.. }
-//.. FpStSW_AX;
/* ucomisd/ucomiss, then get %rflags into int register */
struct {
UChar sz; /* 4 or 8 only */
HReg src;
HReg dst;
} SseSDSS;
-//..
-//.. /* Simplistic SSE[123] */
-//.. struct {
-//.. UShort con;
-//.. HReg dst;
-//.. } SseConst;
struct {
Bool isLoad;
UChar sz; /* 4, 8 or 16 only */
HReg src;
HReg dst;
} SseShuf;
+ struct {
+ AMD64AMode* amCounter;
+ AMD64AMode* amFailAddr;
+ } EvCheck;
+ struct {
+ /* No fields. The address of the counter to inc is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } ProfInc;
} Ain;
}
extern AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst );
extern AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* );
extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* );
-//.. extern AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp, UInt amt, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* );
extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int );
-extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond, AMD64RI* dst );
+extern AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond, Bool toFastEP );
+extern AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond );
+extern AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond, IRJumpKind jk );
extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst );
extern AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr );
extern AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr );
-//..
-//.. extern AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst );
-//.. extern AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst );
-//.. extern AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* );
-//.. extern AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* );
-//.. extern AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst );
-//.. extern AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* );
-//.. extern AMD64Instr* AMD64Instr_FpStSW_AX ( void );
extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst );
extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst );
-//..
-//.. extern AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst );
extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* );
extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* );
extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg );
extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg );
extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
+ AMD64AMode* amFailAddr );
+extern AMD64Instr* AMD64Instr_ProfInc ( void );
extern void ppAMD64Instr ( AMD64Instr*, Bool );
extern void getRegUsage_AMD64Instr ( HRegUsage*, AMD64Instr*, Bool );
extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool );
extern Bool isMove_AMD64Instr ( AMD64Instr*, HReg*, HReg* );
-extern Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr*,
- Bool,
- void* dispatch_unassisted,
- void* dispatch_assisted );
+extern Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, AMD64Instr* i,
+ Bool mode64,
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted );
extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
HReg rreg, Int offset, Bool );
extern void getAllocableRegs_AMD64 ( Int*, HReg** );
-extern HInstrArray* iselSB_AMD64 ( IRSB*, VexArch,
- VexArchInfo*,
- VexAbiInfo* );
+extern HInstrArray* iselSB_AMD64 ( IRSB*,
+ VexArch,
+ VexArchInfo*,
+ VexAbiInfo*,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga );
+
+/* How big is an event check? This is kind of a kludge because it
+ depends on the offsets of host_EvC_FAILADDR and host_EvC_COUNTER,
+ and so assumes that they are both <= 128, and so can use the short
+ offset encoding. This is all checked with assertions, so in the
+ worst case we will merely assert at startup. */
+extern Int evCheckSzB_AMD64 ( void );
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_AMD64 ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to );
+
+extern VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me );
+
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_AMD64 ( void* place_to_patch,
+ ULong* location_of_counter );
+
#endif /* ndef __VEX_HOST_AMD64_DEFS_H */
64-bit virtual HReg, which holds the high half
of the value.
+ - The host subarchitecture we are selecting insns for.
+ This is set at the start and does not change.
+
- The code array, that is, the insns selected so far.
- A counter, for generating new virtual registers.
- - The host subarchitecture we are selecting insns for.
- This is set at the start and does not change.
+ - A Bool for indicating whether we may generate chain-me
+ instructions for control flow transfers, or whether we must use
+ XAssisted.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any insn
+ in this block. Is set at the start and does not change. This is
+ used for detecting jumps which are definitely forward-edges from
+ this block, and therefore can be made (chained) to the fast entry
+ point of the destination, thereby avoiding the destination's
+ event check.
Note, this is all host-independent. (JRS 20050201: well, kinda
... not completely. Compare with ISelEnv for X86.)
typedef
struct {
+ /* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
HReg* vregmap;
HReg* vregmapHI;
Int n_vregmap;
- HInstrArray* code;
+ UInt hwcaps;
- Int vreg_ctr;
+ Bool chainingAllowed;
+ Addr64 max_ga;
- UInt hwcaps;
+ /* These are modified as we go along. */
+ HInstrArray* code;
+ Int vreg_ctr;
}
ISelEnv;
/* --------- EXIT --------- */
case Ist_Exit: {
- AMD64RI* dst;
- AMD64CondCode cc;
if (stmt->Ist.Exit.dst->tag != Ico_U64)
vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
- dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
- cc = iselCondCode(env,stmt->Ist.Exit.guard);
- addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
- return;
+
+ AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP,
+ hregAMD64_RBP());
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring) {
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
+ addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
+ amRIP, cc, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn: {
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
default: break;
/*--- ISEL: Basic block terminators (Nexts) ---*/
/*---------------------------------------------------------*/
-static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+static void iselNext ( ISelEnv* env,
+ IRExpr* next, IRJumpKind jk, Int offsIP )
{
- AMD64RI* ri;
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
ppIRJumpKind(jk);
- vex_printf("} ");
- ppIRExpr(next);
- vex_printf("\n");
+ vex_printf( "\n");
+ }
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst* cdst = next->Iex.Const.con;
+ vassert(cdst->tag == Ico_U64);
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr64)cdst->Ico.U64) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
+ addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64,
+ amRIP, Acc_ALWAYS,
+ toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an indirect transfer,
+ as that's the cheapest alternative that is
+ allowable. */
+ HReg r = iselIntExpr_R(env, next);
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
+ HReg r = iselIntExpr_R(env, next);
+ AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
+ if (env->chainingAllowed) {
+ addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS));
+ } else {
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoRedir:
+ case Ijk_Yield: case Ijk_SigTRAP: {
+ HReg r = iselIntExpr_R(env, next);
+ AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk));
+ return;
+ }
+ default:
+ break;
}
- ri = iselIntExpr_RI(env, next);
- addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
+
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(jk);
+ vex_printf( "\n");
+ vassert(0); // are we expecting any other kind?
}
/* Translate an entire SB to amd64 code. */
-HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host,
- VexArchInfo* archinfo_host,
- VexAbiInfo* vbi/*UNUSED*/ )
+HInstrArray* iselSB_AMD64 ( IRSB* bb,
+ VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga )
{
- Int i, j;
- HReg hreg, hregHI;
- ISelEnv* env;
- UInt hwcaps_host = archinfo_host->hwcaps;
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ AMD64AMode *amCounter, *amFailAddr;
/* sanity ... */
vassert(arch_host == VexArchAMD64);
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
/* and finally ... */
- env->hwcaps = hwcaps_host;
+ env->chainingAllowed = chainingAllowed;
+ env->hwcaps = hwcaps_host;
+ env->max_ga = max_ga;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
}
env->vreg_ctr = j;
+ /* The very first instruction must be an event check. */
+ amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP());
+ amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP());
+ addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr));
+
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfCtr. */
+ if (addProfInc) {
+ addInstr(env, AMD64Instr_ProfInc());
+ }
+
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
if (bb->stmts[i])
- iselStmt(env,bb->stmts[i]);
+ iselStmt(env, bb->stmts[i]);
- iselNext(env,bb->next,bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
i->ARMin.LdSt8U.amode = amode;
return i;
}
-//extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* );
-ARMInstr* ARMInstr_Goto ( IRJumpKind jk, ARMCondCode cond, HReg gnext ) {
- ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
- i->tag = ARMin_Goto;
- i->ARMin.Goto.jk = jk;
- i->ARMin.Goto.cond = cond;
- i->ARMin.Goto.gnext = gnext;
+ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond, Bool toFastEP ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_XDirect;
+ i->ARMin.XDirect.dstGA = dstGA;
+ i->ARMin.XDirect.amR15T = amR15T;
+ i->ARMin.XDirect.cond = cond;
+ i->ARMin.XDirect.toFastEP = toFastEP;
+ return i;
+}
+ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_XIndir;
+ i->ARMin.XIndir.dstGA = dstGA;
+ i->ARMin.XIndir.amR15T = amR15T;
+ i->ARMin.XIndir.cond = cond;
+ return i;
+}
+ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond, IRJumpKind jk ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_XAssisted;
+ i->ARMin.XAssisted.dstGA = dstGA;
+ i->ARMin.XAssisted.amR15T = amR15T;
+ i->ARMin.XAssisted.cond = cond;
+ i->ARMin.XAssisted.jk = jk;
return i;
}
ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
return i;
}
+ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
+ ARMAMode1* amFailAddr ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_EvCheck;
+ i->ARMin.EvCheck.amCounter = amCounter;
+ i->ARMin.EvCheck.amFailAddr = amFailAddr;
+ return i;
+}
+
+ARMInstr* ARMInstr_ProfInc ( void ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_ProfInc;
+ return i;
+}
+
/* ... */
void ppARMInstr ( ARMInstr* i ) {
return;
case ARMin_Ld8S:
goto unhandled;
- case ARMin_Goto:
- if (i->ARMin.Goto.cond != ARMcc_AL) {
- vex_printf("if (%%cpsr.%s) { ",
- showARMCondCode(i->ARMin.Goto.cond));
- } else {
- vex_printf("if (1) { ");
- }
- if (i->ARMin.Goto.jk != Ijk_Boring
- && i->ARMin.Goto.jk != Ijk_Call
- && i->ARMin.Goto.jk != Ijk_Ret) {
- vex_printf("mov r8, $");
- ppIRJumpKind(i->ARMin.Goto.jk);
- vex_printf(" ; ");
- }
- vex_printf("mov r0, ");
- ppHRegARM(i->ARMin.Goto.gnext);
- vex_printf(" ; bx r14");
- if (i->ARMin.Goto.cond != ARMcc_AL) {
- vex_printf(" }");
- } else {
- vex_printf(" }");
- }
+ case ARMin_XDirect:
+ vex_printf("(xDirect) ");
+ vex_printf("if (%%cpsr.%s) { ",
+ showARMCondCode(i->ARMin.XDirect.cond));
+ vex_printf("movw r12,0x%x; ",
+ (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
+ vex_printf("movt r12,0x%x; ",
+ (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
+ vex_printf("str r12,");
+ ppARMAMode1(i->ARMin.XDirect.amR15T);
+ vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
+ i->ARMin.XDirect.toFastEP ? "fast" : "slow");
+ vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
+ i->ARMin.XDirect.toFastEP ? "fast" : "slow");
+ vex_printf("blx r12 }");
+ return;
+ case ARMin_XIndir:
+ vex_printf("(xIndir) ");
+ vex_printf("if (%%cpsr.%s) { ",
+ showARMCondCode(i->ARMin.XIndir.cond));
+ vex_printf("str ");
+ ppHRegARM(i->ARMin.XIndir.dstGA);
+ vex_printf(",");
+ ppARMAMode1(i->ARMin.XIndir.amR15T);
+ vex_printf("; movw r12,LO16($disp_cp_xindir); ");
+ vex_printf("movt r12,HI16($disp_cp_xindir); ");
+ vex_printf("blx r12 }");
+ return;
+ case ARMin_XAssisted:
+ vex_printf("(xAssisted) ");
+ vex_printf("if (%%cpsr.%s) { ",
+ showARMCondCode(i->ARMin.XAssisted.cond));
+ vex_printf("str ");
+ ppHRegARM(i->ARMin.XAssisted.dstGA);
+ vex_printf(",");
+ ppARMAMode1(i->ARMin.XAssisted.amR15T);
+ vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
+ (Int)i->ARMin.XAssisted.jk);
+ vex_printf("movw r12,LO16($disp_cp_xassisted); ");
+ vex_printf("movt r12,HI16($disp_cp_xassisted); ");
+ vex_printf("blx r12 }");
return;
case ARMin_CMov:
vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
vex_printf(", ");
vex_printf("%d", i->ARMin.Add32.imm32);
return;
+ case ARMin_EvCheck:
+ vex_printf("(evCheck) ldr r12,");
+ ppARMAMode1(i->ARMin.EvCheck.amCounter);
+ vex_printf("; subs r12,r12,$1; str r12,");
+ ppARMAMode1(i->ARMin.EvCheck.amCounter);
+ vex_printf("; bpl nofail; ldr r12,");
+ ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
+ vex_printf("; bx r12; nofail:");
+ return;
+ case ARMin_ProfInc:
+ vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
+ "movw r12,HI16($NotKnownYet); "
+ "ldr r11,[r12]; "
+ "adds r11,r11,$1; "
+ "str r11,[r12]; "
+ "ldr r11,[r12+4]; "
+ "adc r11,r11,$0; "
+ "str r11,[r12+4]");
+ return;
default:
unhandled:
vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
return;
case ARMin_Ld8S:
goto unhandled;
- case ARMin_Goto:
- /* reads the reg holding the next guest addr */
- addHRegUse(u, HRmRead, i->ARMin.Goto.gnext);
- /* writes it to the standard integer return register */
- addHRegUse(u, HRmWrite, hregARM_R0());
- /* possibly messes with the baseblock pointer */
- if (i->ARMin.Goto.jk != Ijk_Boring
- && i->ARMin.Goto.jk != Ijk_Call
- && i->ARMin.Goto.jk != Ijk_Ret)
- /* note, this is irrelevant since r8 is not actually
- available to the allocator. But still .. */
- addHRegUse(u, HRmWrite, hregARM_R8());
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
+ conditionally exit the block. Hence we only need to list (1)
+ the registers that they read, and (2) the registers that they
+ write in the case where the block is not exited. (2) is
+ empty, hence only (1) is relevant here. */
+ case ARMin_XDirect:
+ addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
+ return;
+ case ARMin_XIndir:
+ addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
+ addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
+ return;
+ case ARMin_XAssisted:
+ addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
+ addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
return;
case ARMin_CMov:
addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
return;
+ case ARMin_EvCheck:
+ /* We expect both amodes only to mention r8, so this is in
+ fact pointless, since r8 isn't allocatable, but
+ anyway.. */
+ addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
+ addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
+ addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
+ return;
+ case ARMin_ProfInc:
+ addHRegUse(u, HRmWrite, hregARM_R12());
+ addHRegUse(u, HRmWrite, hregARM_R11());
+ return;
unhandled:
default:
ppARMInstr(i);
return;
case ARMin_Ld8S:
goto unhandled;
- case ARMin_Goto:
- i->ARMin.Goto.gnext = lookupHRegRemap(m, i->ARMin.Goto.gnext);
+ case ARMin_XDirect:
+ mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
+ return;
+ case ARMin_XIndir:
+ i->ARMin.XIndir.dstGA
+ = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
+ mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
+ return;
+ case ARMin_XAssisted:
+ i->ARMin.XAssisted.dstGA
+ = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
+ mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
return;
case ARMin_CMov:
i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
case ARMin_Add32:
i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
+ return;
+ case ARMin_EvCheck:
+ /* We expect both amodes only to mention r8, so this is in
+ fact pointless, since r8 isn't allocatable, but
+ anyway.. */
+ mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
+ mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
+ return;
+ case ARMin_ProfInc:
+ /* hardwires r11 and r12 -- nothing to modify. */
+ return;
unhandled:
default:
ppARMInstr(i);
(((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
(((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
+#define XX______(zzx7,zzx6) \
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
+
/* Generate a skeletal insn that involves an a RI84 shifter operand.
Returns a word which is all zeroes apart from bits 25 and 11..0,
since it is those that encode the shifter operand (at least to the
return p;
}
+/* Get an immediate into a register, using only that register, and
+ generating exactly 2 instructions, regardless of the value of the
+ immediate. This is used when generating sections of code that need
+ to be patched later, so as to guarantee a specific size. */
+static UInt* imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
+{
+ if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
+ /* Generate movw rD, #low16 ; movt rD, #high16. */
+ UInt lo16 = imm32 & 0xFFFF;
+ UInt hi16 = (imm32 >> 16) & 0xFFFF;
+ UInt instr;
+ instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
+ (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
+ lo16 & 0xF);
+ *p++ = instr;
+ instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
+ (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
+ hi16 & 0xF);
+ *p++ = instr;
+ } else {
+ vassert(0); /* lose */
+ }
+ return p;
+}
-Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i,
+/* Check whether p points at a 2-insn sequence cooked up by
+ imm32_to_iregNo_EXACTLY2(). */
+static Bool is_imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
+{
+ if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
+ /* Generate movw rD, #low16 ; movt rD, #high16. */
+ UInt lo16 = imm32 & 0xFFFF;
+ UInt hi16 = (imm32 >> 16) & 0xFFFF;
+ UInt i0, i1;
+ i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
+ (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
+ lo16 & 0xF);
+ i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
+ (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
+ hi16 & 0xF);
+ return p[0] == i0 && p[1] == i1;
+ } else {
+ vassert(0); /* lose */
+ }
+}
+
+
+static UInt* do_load_or_store32 ( UInt* p,
+ Bool isLoad, UInt rD, ARMAMode1* am )
+{
+ vassert(rD <= 12);
+ vassert(am->tag == ARMam1_RI); // RR case is not handled
+ UInt bB = 0;
+ UInt bL = isLoad ? 1 : 0;
+ Int simm12;
+ UInt instr, bP;
+ if (am->ARMam1.RI.simm13 < 0) {
+ bP = 0;
+ simm12 = -am->ARMam1.RI.simm13;
+ } else {
+ bP = 1;
+ simm12 = am->ARMam1.RI.simm13;
+ }
+ vassert(simm12 >= 0 && simm12 <= 4095);
+ instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
+ iregNo(am->ARMam1.RI.reg),
+ rD);
+ instr |= simm12;
+ *p++ = instr;
+ return p;
+}
+
+
+/* Emit an instruction into buf and return the number of bytes used.
+ Note that buf is not the insn's final place, and therefore it is
+ imperative to emit position-independent code. If the emitted
+ instruction was a profiler inc, set *is_profInc to True, else
+ leave it unchanged. */
+
+Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, ARMInstr* i,
Bool mode64,
- void* dispatch_unassisted, void* dispatch_assisted )
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted )
{
UInt* p = (UInt*)buf;
vassert(nbuf >= 32);
}
case ARMin_Ld8S:
goto bad;
- case ARMin_Goto: {
- UInt instr;
- IRJumpKind jk = i->ARMin.Goto.jk;
- ARMCondCode cond = i->ARMin.Goto.cond;
- UInt rnext = iregNo(i->ARMin.Goto.gnext);
- Int trc = -1;
- /* since we branch to lr(r13) to get back to dispatch: */
- vassert(dispatch_unassisted == NULL);
- vassert(dispatch_assisted == NULL);
- switch (jk) {
- case Ijk_Ret: case Ijk_Call: case Ijk_Boring:
- break; /* no need to set GST in these common cases */
- case Ijk_ClientReq:
- trc = VEX_TRC_JMP_CLIENTREQ; break;
- case Ijk_Sys_int128:
- case Ijk_Sys_int129:
- case Ijk_Sys_int130:
- case Ijk_Yield:
- case Ijk_EmWarn:
- case Ijk_MapFail:
- goto unhandled_jk;
- case Ijk_NoDecode:
- trc = VEX_TRC_JMP_NODECODE; break;
- case Ijk_TInval:
- trc = VEX_TRC_JMP_TINVAL; break;
- case Ijk_NoRedir:
- trc = VEX_TRC_JMP_NOREDIR; break;
- case Ijk_Sys_sysenter:
- case Ijk_SigTRAP:
- case Ijk_SigSEGV:
- goto unhandled_jk;
- case Ijk_Sys_syscall:
- trc = VEX_TRC_JMP_SYS_SYSCALL; break;
- unhandled_jk:
- default:
- goto bad;
+
+ case ARMin_XDirect: {
+ /* NB: what goes on here has to be very closely coordinated
+ with the chainXDirect_ARM and unchainXDirect_ARM below. */
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations
+ can't use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ UInt* ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. Or at least, leave a space for
+ it that we will shortly fill in. */
+ if (i->ARMin.XDirect.cond != ARMcc_AL) {
+ vassert(i->ARMin.XDirect.cond != ARMcc_NV);
+ ptmp = p;
+ *p++ = 0;
}
- if (trc != -1) {
- // mov{cond} r8, #trc
- vassert(trc >= 0 && trc <= 255);
- instr = (cond << 28) | 0x03A08000 | (0xFF & (UInt)trc);
- *p++ = instr;
+
+ /* Update the guest R15T. */
+ /* movw r12, lo16(dstGA) */
+ /* movt r12, hi16(dstGA) */
+ /* str r12, amR15T */
+ p = imm32_to_iregNo(p, /*r*/12, i->ARMin.XDirect.dstGA);
+ p = do_load_or_store32(p, False/*!isLoad*/,
+ /*r*/12, i->ARMin.XDirect.amR15T);
+
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
+ calling to) backs up the return address, so as to find the
+ address of the first patchable byte. So: don't change the
+ number of instructions (3) below. */
+ /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
+ /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
+ /* blx r12 (A1) */
+ void* disp_cp_chain_me
+ = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+ p = imm32_to_iregNo_EXACTLY2(p, /*r*/12,
+ (UInt)Ptr_to_ULong(disp_cp_chain_me));
+ *p++ = 0xE12FFF3C;
+ /* --- END of PATCHABLE BYTES --- */
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->ARMin.XDirect.cond != ARMcc_AL) {
+ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
+ vassert(delta > 0 && delta < 40);
+ vassert((delta & 3) == 0);
+ UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
+ vassert(notCond <= 13); /* Neither AL nor NV */
+ delta = (delta >> 2) - 2;
+ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
}
- // mov{cond} r0, rnext
- if (rnext != 0) {
- instr = (cond << 28) | 0x01A00000 | rnext;
- *p++ = instr;
+ goto done;
+ }
+
+ case ARMin_XIndir: {
+ /* We're generating transfers that could lead indirectly to a
+ chain-me, so we need to be sure this is actually allowed
+ -- no-redir translations are not allowed to reach normal
+ translations without going through the scheduler. That
+ means no XDirects or XIndirs out from no-redir
+ translations. Hence: */
+ vassert(disp_cp_xindir != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ UInt* ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. Or at least, leave a space for
+ it that we will shortly fill in. */
+ if (i->ARMin.XIndir.cond != ARMcc_AL) {
+ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
+ ptmp = p;
+ *p++ = 0;
+ }
+
+ /* Update the guest R15T. */
+ /* str r-dstGA, amR15T */
+ p = do_load_or_store32(p, False/*!isLoad*/,
+ iregNo(i->ARMin.XIndir.dstGA),
+ i->ARMin.XIndir.amR15T);
+
+ /* movw r12, lo16(VG_(disp_cp_xindir)) */
+ /* movt r12, hi16(VG_(disp_cp_xindir)) */
+ /* bx r12 (A1) */
+ p = imm32_to_iregNo(p, /*r*/12,
+ (UInt)Ptr_to_ULong(disp_cp_xindir));
+ *p++ = 0xE12FFF1C;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->ARMin.XIndir.cond != ARMcc_AL) {
+ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
+ vassert(delta > 0 && delta < 40);
+ vassert((delta & 3) == 0);
+ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
+ vassert(notCond <= 13); /* Neither AL nor NV */
+ delta = (delta >> 2) - 2;
+ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
}
- // bx{cond} r14
- instr =(cond << 28) | 0x012FFF1E;
- *p++ = instr;
goto done;
}
+
+ case ARMin_XAssisted: {
+ /* Use ptmp for backpatching conditional jumps. */
+ UInt* ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. Or at least, leave a space for
+ it that we will shortly fill in. */
+ if (i->ARMin.XAssisted.cond != ARMcc_AL) {
+ vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
+ ptmp = p;
+ *p++ = 0;
+ }
+
+ /* Update the guest R15T. */
+ /* str r-dstGA, amR15T */
+ p = do_load_or_store32(p, False/*!isLoad*/,
+ iregNo(i->ARMin.XAssisted.dstGA),
+ i->ARMin.XAssisted.amR15T);
+
+ /* movw r8, $magic_number */
+ UInt trcval = 0;
+ switch (i->ARMin.XAssisted.jk) {
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
+ //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
+ //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
+ //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
+ //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
+ //case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
+ //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
+ //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
+ /* We don't expect to see the following being assisted. */
+ //case Ijk_Ret:
+ //case Ijk_Call:
+ /* fallthrough */
+ default:
+ ppIRJumpKind(i->ARMin.XAssisted.jk);
+ vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
+ }
+ vassert(trcval != 0);
+ p = imm32_to_iregNo(p, /*r*/8, trcval);
+
+ /* movw r12, lo16(VG_(disp_cp_xassisted)) */
+ /* movt r12, hi16(VG_(disp_cp_xassisted)) */
+ /* bx r12 (A1) */
+ p = imm32_to_iregNo(p, /*r*/12,
+ (UInt)Ptr_to_ULong(disp_cp_xassisted));
+ *p++ = 0xE12FFF1C;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->ARMin.XAssisted.cond != ARMcc_AL) {
+ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
+ vassert(delta > 0 && delta < 40);
+ vassert((delta & 3) == 0);
+ UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
+ vassert(notCond <= 13); /* Neither AL nor NV */
+ delta = (delta >> 2) - 2;
+ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
+ }
+ goto done;
+ }
+
case ARMin_CMov: {
UInt instr = skeletal_RI84(i->ARMin.CMov.src);
UInt subopc = X1101; /* MOV */
*p++ = insn;
goto done;
}
+
+ case ARMin_EvCheck: {
+ /* We generate:
+ ldr r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
+ subs r12, r12, #1 (A1)
+ str r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
+ bpl nofail
+ ldr r12, [r8 + #0] 0 == offsetof(host_EvC_FAILADDR)
+ bx r12
+ nofail:
+ */
+ UInt* p0 = p;
+ p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
+ i->ARMin.EvCheck.amCounter);
+ *p++ = 0xE25CC001; /* subs r12, r12, #1 */
+ p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
+ i->ARMin.EvCheck.amCounter);
+ *p++ = 0x5A000001; /* bpl nofail */
+ p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
+ i->ARMin.EvCheck.amFailAddr);
+ *p++ = 0xE12FFF1C; /* bx r12 */
+ /* nofail: */
+
+ /* Crosscheck */
+ vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
+ goto done;
+ }
+
+ case ARMin_ProfInc: {
+ /* We generate:
+ (ctrP is unknown now, so use 0x65556555 in the
+ expectation that a later call to LibVEX_patchProfCtr
+ will be used to fill in the immediate fields once the
+ right value is known.)
+ movw r12, lo16(0x65556555)
+ movt r12, lo16(0x65556555)
+ ldr r11, [r12]
+ adds r11, r11, #1
+ str r11, [r12]
+ ldr r11, [r12+4]
+ adc r11, r11, #0
+ str r11, [r12+4]
+ */
+ p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
+ *p++ = 0xE59CB000;
+ *p++ = 0xE29BB001;
+ *p++ = 0xE58CB000;
+ *p++ = 0xE59CB004;
+ *p++ = 0xE2ABB000;
+ *p++ = 0xE58CB004;
+ /* Tell the caller .. */
+ vassert(!(*is_profInc));
+ *is_profInc = True;
+ goto done;
+ }
+
/* ... */
default:
goto bad;
return ((UChar*)p) - &buf[0];
}
+
+/* How big is an event check? See case for Ain_EvCheck in
+ emit_ARMInstr just above. That crosschecks what this returns, so
+ we can tell if we're inconsistent. */
+Int evCheckSzB_ARM ( void )
+{
+ return 24;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange chainXDirect_ARM ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to )
+{
+ /* What we're expecting to see is:
+ movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
+ movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
+ blx r12
+ viz
+ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
+ E1 2F FF 3C
+ */
+ UInt* p = (UInt*)place_to_chain;
+ vassert(0 == (3 & (UInt)p));
+ vassert(is_imm32_to_iregNo_EXACTLY2(
+ p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
+ vassert(p[2] == 0xE12FFF3C);
+ /* And what we want to change it to is:
+ movw r12, lo16(place_to_jump_to)
+ movt r12, hi16(place_to_jump_to)
+ bx r12
+ viz
+ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
+ E1 2F FF 1C
+ The replacement has the same length as the original.
+ */
+ (void)imm32_to_iregNo_EXACTLY2(
+ p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to));
+ p[2] = 0xE12FFF1C;
+ VexInvalRange vir = {(HWord)p, 12};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me )
+{
+ /* What we're expecting to see is:
+ movw r12, lo16(place_to_jump_to_EXPECTED)
+ movt r12, lo16(place_to_jump_to_EXPECTED)
+ bx r12
+ viz
+ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
+ E1 2F FF 1C
+ */
+ UInt* p = (UInt*)place_to_unchain;
+ vassert(0 == (3 & (UInt)p));
+ vassert(is_imm32_to_iregNo_EXACTLY2(
+ p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED)));
+ vassert(p[2] == 0xE12FFF1C);
+ /* And what we want to change it to is:
+ movw r12, lo16(disp_cp_chain_me)
+ movt r12, hi16(disp_cp_chain_me)
+ blx r12
+ viz
+ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
+ E1 2F FF 3C
+ */
+ (void)imm32_to_iregNo_EXACTLY2(
+ p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me));
+ p[2] = 0xE12FFF3C;
+ VexInvalRange vir = {(HWord)p, 12};
+ return vir;
+}
+
+
+/* Patch the counter address into a profile inc point, as previously
+ created by the Xin_ProfInc case for emit_ARMInstr. */
+VexInvalRange patchProfInc_ARM ( void* place_to_patch,
+ ULong* location_of_counter )
+{
+ vassert(sizeof(ULong*) == 4);
+ UInt* p = (UInt*)place_to_patch;
+ vassert(0 == (3 & (UInt)p));
+ vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
+ vassert(p[2] == 0xE59CB000);
+ vassert(p[3] == 0xE29BB001);
+ vassert(p[4] == 0xE58CB000);
+ vassert(p[5] == 0xE59CB004);
+ vassert(p[6] == 0xE2ABB000);
+ vassert(p[7] == 0xE58CB004);
+ imm32_to_iregNo_EXACTLY2(p, /*r*/12,
+ (UInt)Ptr_to_ULong(location_of_counter));
+ VexInvalRange vir = {(HWord)p, 8};
+ return vir;
+}
+
+
#undef BITS4
#undef X0000
#undef X0001
#undef XXX___XX
#undef XXXXX__X
#undef XXXXXXXX
+#undef XX______
/*---------------------------------------------------------------*/
/*--- end host_arm_defs.c ---*/
ARMin_LdSt16,
ARMin_LdSt8U,
ARMin_Ld8S,
- ARMin_Goto,
+ ARMin_XDirect, /* direct transfer to GA */
+ ARMin_XIndir, /* indirect transfer to GA */
+ ARMin_XAssisted, /* assisted transfer to GA */
ARMin_CMov,
ARMin_Call,
ARMin_Mul,
allocator demands them to consist of no more than two instructions.
We will split this instruction into 2 or 3 ARM instructions on the
emiting phase.
-
NOTE: source and destination registers should be different! */
- ARMin_Add32
+ ARMin_Add32,
+ ARMin_EvCheck, /* Event check */
+ ARMin_ProfInc /* 64-bit profile counter increment */
}
ARMInstrTag;
HReg rD;
ARMAMode2* amode;
} Ld8S;
- /* Pseudo-insn. Go to guest address gnext, on given
- condition, which could be ARMcc_AL. */
+ /* Update the guest R15T value, then exit requesting to chain
+ to it. May be conditional. Urr, use of Addr32 implicitly
+ assumes that wordsize(guest) == wordsize(host). */
+ struct {
+ Addr32 dstGA; /* next guest address */
+ ARMAMode1* amR15T; /* amode in guest state for R15T */
+ ARMCondCode cond; /* can be ARMcc_AL */
+ Bool toFastEP; /* chain to the slow or fast point? */
+ } XDirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
struct {
+ HReg dstGA;
+ ARMAMode1* amR15T;
+ ARMCondCode cond; /* can be ARMcc_AL */
+ } XIndir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ ARMAMode1* amR15T;
+ ARMCondCode cond; /* can be ARMcc_AL */
IRJumpKind jk;
- ARMCondCode cond;
- HReg gnext;
- } Goto;
+ } XAssisted;
/* Mov src to dst on the given condition, which may not
be ARMcc_AL. */
struct {
HReg rN;
UInt imm32;
} Add32;
+ struct {
+ ARMAMode1* amCounter;
+ ARMAMode1* amFailAddr;
+ } EvCheck;
+ struct {
+ /* No fields. The address of the counter to inc is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } ProfInc;
} ARMin;
}
ARMInstr;
HReg, ARMAMode2* );
extern ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg, ARMAMode1* );
extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* );
-extern ARMInstr* ARMInstr_Goto ( IRJumpKind, ARMCondCode, HReg gnext );
+extern ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond, Bool toFastEP );
+extern ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond );
+extern ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond, IRJumpKind jk );
extern ARMInstr* ARMInstr_CMov ( ARMCondCode, HReg dst, ARMRI84* src );
extern ARMInstr* ARMInstr_Call ( ARMCondCode, HWord, Int nArgRegs );
extern ARMInstr* ARMInstr_Mul ( ARMMulOp op );
extern ARMInstr* ARMInstr_NeonImm ( HReg, ARMNImm* );
extern ARMInstr* ARMInstr_NCMovQ ( ARMCondCode, HReg, HReg );
extern ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 );
+extern ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
+ ARMAMode1* amFailAddr );
+extern ARMInstr* ARMInstr_ProfInc ( void );
extern void ppARMInstr ( ARMInstr* );
extern void getRegUsage_ARMInstr ( HRegUsage*, ARMInstr*, Bool );
extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool );
extern Bool isMove_ARMInstr ( ARMInstr*, HReg*, HReg* );
-extern Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr*,
- Bool,
- void* dispatch_unassisted,
- void* dispatch_assisted );
+extern Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, ARMInstr* i,
+ Bool mode64,
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted );
extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
HReg rreg, Int offset, Bool );
extern void getAllocableRegs_ARM ( Int*, HReg** );
-extern HInstrArray* iselSB_ARM ( IRSB*, VexArch,
- VexArchInfo*, VexAbiInfo* );
+extern HInstrArray* iselSB_ARM ( IRSB*,
+ VexArch,
+ VexArchInfo*,
+ VexAbiInfo*,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga );
+
+/* How big is an event check? This is kind of a kludge because it
+ depends on the offsets of host_EvC_FAILADDR and
+ host_EvC_COUNTER. */
+extern Int evCheckSzB_ARM ( void );
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_ARM ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to );
+
+extern VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me );
+
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_ARM ( void* place_to_patch,
+ ULong* location_of_counter );
+
#endif /* ndef __VEX_HOST_ARM_DEFS_H */
32-bit virtual HReg, which holds the high half
of the value.
- - The name of the vreg in which we stash a copy of the link reg, so
- helper functions don't kill it.
-
- The code array, that is, the insns selected so far.
- A counter, for generating new virtual registers.
- The host hardware capabilities word. This is set at the start
and does not change.
- Note, this is all host-independent. */
+ - A Bool for indicating whether we may generate chain-me
+ instructions for control flow transfers, or whether we must use
+ XAssisted.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any insn
+ in this block. Is set at the start and does not change. This is
+ used for detecting jumps which are definitely forward-edges from
+ this block, and therefore can be made (chained) to the fast entry
+ point of the destination, thereby avoiding the destination's
+ event check.
+
+ Note, this is all (well, mostly) host-independent.
+*/
typedef
struct {
+ /* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
HReg* vregmap;
HReg* vregmapHI;
Int n_vregmap;
- HReg savedLR;
+ UInt hwcaps;
- HInstrArray* code;
+ Bool chainingAllowed;
+ Addr64 max_ga;
+ /* These are modified as we go along. */
+ HInstrArray* code;
Int vreg_ctr;
-
- UInt hwcaps;
}
ISelEnv;
}
case Iop_64to8: {
HReg rHi, rLo;
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg tHi = newVRegI(env);
HReg tLo = newVRegI(env);
HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
/* read 64-bit IRTemp */
if (e->tag == Iex_RdTmp) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg tHi = newVRegI(env);
HReg tLo = newVRegI(env);
HReg tmp = iselNeon64Expr(env, e);
/* It is convenient sometimes to call iselInt64Expr even when we
have NEON support (e.g. in do_helper_call we need 64-bit
arguments as 2 x 32 regs). */
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg tHi = newVRegI(env);
HReg tLo = newVRegI(env);
HReg tmp = iselNeon64Expr(env, e);
if (e->tag == Iex_Unop) {
switch (e->Iex.Unop.op) {
case Iop_ReinterpI64asF64: {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
return iselNeon64Expr(env, e->Iex.Unop.arg);
} else {
HReg srcHi, srcLo;
return;
}
if (tyd == Ity_I64) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
addInstr(env, ARMInstr_NLdStD(False, dD, am));
return;
}
if (tyd == Ity_I64) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg addr = newVRegI(env);
HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
return;
}
if (ty == Ity_I64) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
HReg dst = lookupIRTemp(env, tmp);
addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
retty = typeOfIRTemp(env->type_env, d->tmp);
if (retty == Ity_I64) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg tmp = lookupIRTemp(env, d->tmp);
addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
hregARM_R0()));
move it into a result register pair. On a NEON capable
CPU, the result register will be a 64 bit NEON
register, so we must move it there instead. */
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg dst = lookupIRTemp(env, res);
addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
hregARM_R2()));
/* --------- EXIT --------- */
case Ist_Exit: {
- HReg gnext;
- ARMCondCode cc;
if (stmt->Ist.Exit.dst->tag != Ico_U32)
vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
- gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
- cc = iselCondCode(env, stmt->Ist.Exit.guard);
- addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
- addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
- return;
+
+ ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
+ stmt->Ist.Exit.offsIP);
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring
+ || stmt->Ist.Exit.jk == Ijk_Call
+ || stmt->Ist.Exit.jk == Ijk_Ret) {
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
+ addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
+ amR15T, cc, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ //case Ijk_MapFail:
+ //case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn:
+ case Ijk_NoDecode:
+ {
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
+ stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
default: break;
/*--- ISEL: Basic block terminators (Nexts) ---*/
/*---------------------------------------------------------*/
-static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+static void iselNext ( ISelEnv* env,
+ IRExpr* next, IRJumpKind jk, Int offsIP )
{
- HReg rDst;
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
ppIRJumpKind(jk);
- vex_printf("} ");
- ppIRExpr(next);
- vex_printf("\n");
+ vex_printf( "\n");
+ }
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst* cdst = next->Iex.Const.con;
+ vassert(cdst->tag == Ico_U32);
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr64)cdst->Ico.U32) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
+ addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
+ amR15T, ARMcc_AL,
+ toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, next);
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
+ HReg r = iselIntExpr_R(env, next);
+ ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
+ if (env->chainingAllowed) {
+ addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
+ } else {
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
}
- rDst = iselIntExpr_R(env, next);
- addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
- addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoDecode:
+ case Ijk_NoRedir:
+ //case Ijk_Sys_int128:
+ //case Ijk_Yield: case Ijk_SigTRAP:
+ {
+ HReg r = iselIntExpr_R(env, next);
+ ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(jk);
+ vex_printf( "\n");
+ vassert(0); // are we expecting any other kind?
}
/* Translate an entire SB to arm code. */
-HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host,
- VexArchInfo* archinfo_host,
- VexAbiInfo* vbi/*UNUSED*/ )
+HInstrArray* iselSB_ARM ( IRSB* bb,
+ VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga )
{
- Int i, j;
- HReg hreg, hregHI;
- ISelEnv* env;
- UInt hwcaps_host = archinfo_host->hwcaps;
- static UInt counter = 0;
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ ARMAMode1 *amCounter, *amFailAddr;
/* sanity ... */
vassert(arch_host == VexArchARM);
/* hwcaps should not change from one ISEL call to another. */
- arm_hwcaps = hwcaps_host;
+ arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+ /* and finally ... */
+ env->chainingAllowed = chainingAllowed;
+ env->hwcaps = hwcaps_host;
+ env->max_ga = max_ga;
+
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
j = 0;
case Ity_I16:
case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
case Ity_I64:
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
hreg = mkHReg(j++, HRcFlt64, True);
} else {
hregHI = mkHReg(j++, HRcInt32, True);
}
env->vreg_ctr = j;
- /* Keep a copy of the link reg, since any call to a helper function
- will trash it, and we can't get back to the dispatcher once that
- happens. */
- env->savedLR = newVRegI(env);
- addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
+ /* The very first instruction must be an event check. */
+ amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
+ amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
+ addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
+
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfCtr. */
+ if (addProfInc) {
+ addInstr(env, ARMInstr_ProfInc());
+ }
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
- iselStmt(env,bb->stmts[i]);
+ iselStmt(env, bb->stmts[i]);
- iselNext(env,bb->next,bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
- counter++;
return env->code;
}
vassert(regparms >= 0 && regparms <= 3);
return i;
}
-X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) {
- X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
- i->tag = Xin_Goto;
- i->Xin.Goto.cond = cond;
- i->Xin.Goto.dst = dst;
- i->Xin.Goto.jk = jk;
+X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
+ X86CondCode cond, Bool toFastEP ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_XDirect;
+ i->Xin.XDirect.dstGA = dstGA;
+ i->Xin.XDirect.amEIP = amEIP;
+ i->Xin.XDirect.cond = cond;
+ i->Xin.XDirect.toFastEP = toFastEP;
+ return i;
+}
+X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
+ X86CondCode cond ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_XIndir;
+ i->Xin.XIndir.dstGA = dstGA;
+ i->Xin.XIndir.amEIP = amEIP;
+ i->Xin.XIndir.cond = cond;
+ return i;
+}
+X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
+ X86CondCode cond, IRJumpKind jk ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_XAssisted;
+ i->Xin.XAssisted.dstGA = dstGA;
+ i->Xin.XAssisted.amEIP = amEIP;
+ i->Xin.XAssisted.cond = cond;
+ i->Xin.XAssisted.jk = jk;
return i;
}
X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) {
i->Xin.FpCmp.dst = dst;
return i;
}
-
X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_SseConst;
vassert(order >= 0 && order <= 0xFF);
return i;
}
+X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
+ X86AMode* amFailAddr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_EvCheck;
+ i->Xin.EvCheck.amCounter = amCounter;
+ i->Xin.EvCheck.amFailAddr = amFailAddr;
+ return i;
+}
+X86Instr* X86Instr_ProfInc ( void ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_ProfInc;
+ return i;
+}
void ppX86Instr ( X86Instr* i, Bool mode64 ) {
vassert(mode64 == False);
i->Xin.Call.regparms);
vex_printf("0x%x", i->Xin.Call.target);
break;
- case Xin_Goto:
- if (i->Xin.Goto.cond != Xcc_ALWAYS) {
- vex_printf("if (%%eflags.%s) { ",
- showX86CondCode(i->Xin.Goto.cond));
- }
- if (i->Xin.Goto.jk != Ijk_Boring
- && i->Xin.Goto.jk != Ijk_Call
- && i->Xin.Goto.jk != Ijk_Ret) {
- vex_printf("movl $");
- ppIRJumpKind(i->Xin.Goto.jk);
- vex_printf(",%%ebp ; ");
- }
+ case Xin_XDirect:
+ vex_printf("(xDirect) ");
+ vex_printf("if (%%eflags.%s) { ",
+ showX86CondCode(i->Xin.XDirect.cond));
+ vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
+ ppX86AMode(i->Xin.XDirect.amEIP);
+ vex_printf("; ");
+ vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
+ i->Xin.XDirect.toFastEP ? "fast" : "slow");
+ return;
+ case Xin_XIndir:
+ vex_printf("(xIndir) ");
+ vex_printf("if (%%eflags.%s) { movl ",
+ showX86CondCode(i->Xin.XIndir.cond));
+ ppHRegX86(i->Xin.XIndir.dstGA);
+ vex_printf(",");
+ ppX86AMode(i->Xin.XIndir.amEIP);
+ vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
+ return;
+ case Xin_XAssisted:
+ vex_printf("(xAssisted) ");
+ vex_printf("if (%%eflags.%s) { ",
+ showX86CondCode(i->Xin.XAssisted.cond));
vex_printf("movl ");
- ppX86RI(i->Xin.Goto.dst);
- vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx");
- if (i->Xin.Goto.cond != Xcc_ALWAYS) {
- vex_printf(" }");
- }
+ ppHRegX86(i->Xin.XAssisted.dstGA);
+ vex_printf(",");
+ ppX86AMode(i->Xin.XAssisted.amEIP);
+ vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
+ (Int)i->Xin.XAssisted.jk);
+ vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
return;
case Xin_CMov32:
vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
vex_printf(",");
ppHRegX86(i->Xin.SseShuf.dst);
return;
-
+ case Xin_EvCheck:
+ vex_printf("(evCheck) decl ");
+ ppX86AMode(i->Xin.EvCheck.amCounter);
+ vex_printf("; jns nofail; jmp *");
+ ppX86AMode(i->Xin.EvCheck.amFailAddr);
+ vex_printf("; nofail:");
+ return;
+ case Xin_ProfInc:
+ vex_printf("(profInc) addl $1,NotKnownYet; "
+ "adcl $0,NotKnownYet+4");
+ return;
default:
vpanic("ppX86Instr");
}
address temporary, depending on the regparmness: 0==EAX,
1==EDX, 2==ECX, 3==EDI. */
return;
- case Xin_Goto:
- addRegUsage_X86RI(u, i->Xin.Goto.dst);
- addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */
- addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */
- if (i->Xin.Goto.jk != Ijk_Boring
- && i->Xin.Goto.jk != Ijk_Call
- && i->Xin.Goto.jk != Ijk_Ret)
- /* note, this is irrelevant since ebp is not actually
- available to the allocator. But still .. */
- addHRegUse(u, HRmWrite, hregX86_EBP());
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
+ conditionally exit the block. Hence we only need to list (1)
+ the registers that they read, and (2) the registers that they
+ write in the case where the block is not exited. (2) is
+ empty, hence only (1) is relevant here. */
+ case Xin_XDirect:
+ addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
+ return;
+ case Xin_XIndir:
+ addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
+ addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
+ return;
+ case Xin_XAssisted:
+ addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
+ addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
return;
case Xin_CMov32:
addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
addHRegUse(u, HRmRead, i->Xin.SseShuf.src);
addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
return;
+ case Xin_EvCheck:
+ /* We expect both amodes only to mention %ebp, so this is in
+ fact pointless, since %ebp isn't allocatable, but anyway.. */
+ addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
+ addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
+ return;
+ case Xin_ProfInc:
+ /* does not use any registers. */
+ return;
default:
ppX86Instr(i, False);
vpanic("getRegUsage_X86Instr");
return;
case Xin_Call:
return;
- case Xin_Goto:
- mapRegs_X86RI(m, i->Xin.Goto.dst);
+ case Xin_XDirect:
+ mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
+ return;
+ case Xin_XIndir:
+ mapReg(m, &i->Xin.XIndir.dstGA);
+ mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
+ return;
+ case Xin_XAssisted:
+ mapReg(m, &i->Xin.XAssisted.dstGA);
+ mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
return;
case Xin_CMov32:
mapRegs_X86RM(m, i->Xin.CMov32.src);
mapReg(m, &i->Xin.SseShuf.src);
mapReg(m, &i->Xin.SseShuf.dst);
return;
+ case Xin_EvCheck:
+ /* We expect both amodes only to mention %ebp, so this is in
+ fact pointless, since %ebp isn't allocatable, but anyway.. */
+ mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
+ mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
+ return;
+ case Xin_ProfInc:
+ /* does not use any registers. */
+ return;
+
default:
ppX86Instr(i, mode64);
vpanic("mapRegs_X86Instr");
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
- imperative to emit position-independent code. */
+ imperative to emit position-independent code. If the emitted
+ instruction was a profiler inc, set *is_profInc to True, else
+ leave it unchanged. */
-Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i,
+Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, X86Instr* i,
Bool mode64,
- void* dispatch_unassisted,
- void* dispatch_assisted )
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted )
{
UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
*p++ = toUChar(0xD0 + irno);
goto done;
- case Xin_Goto: {
- void* dispatch_to_use = NULL;
- vassert(dispatch_unassisted != NULL);
- vassert(dispatch_assisted != NULL);
+ case Xin_XDirect: {
+ /* NB: what goes on here has to be very closely coordinated with the
+ chainXDirect_X86 and unchainXDirect_X86 below. */
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations can't
+ use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
/* Use ptmp for backpatching conditional jumps. */
ptmp = NULL;
/* First off, if this is conditional, create a conditional
- jump over the rest of it. */
- if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+ jump over the rest of it. */
+ if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
/* jmp fwds if !condition */
- *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1)));
+ *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
ptmp = p; /* fill in this bit later */
*p++ = 0; /* # of bytes to jump over; don't know how many yet. */
}
- /* If a non-boring, set %ebp (the guest state pointer)
- appropriately. Also, decide which dispatcher we need to
- use. */
- dispatch_to_use = dispatch_assisted;
-
- /* movl $magic_number, %ebp */
- switch (i->Xin.Goto.jk) {
- case Ijk_ClientReq:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
- case Ijk_Sys_int128:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_INT128); break;
- case Ijk_Sys_int129:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_INT129); break;
- case Ijk_Sys_int130:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_INT130); break;
- case Ijk_Yield:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_YIELD); break;
- case Ijk_EmWarn:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_EMWARN); break;
- case Ijk_MapFail:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
- case Ijk_NoDecode:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_NODECODE); break;
- case Ijk_TInval:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_TINVAL); break;
- case Ijk_NoRedir:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
- case Ijk_Sys_sysenter:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break;
- case Ijk_SigTRAP:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
- case Ijk_SigSEGV:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
- case Ijk_Ret:
- case Ijk_Call:
- case Ijk_Boring:
- dispatch_to_use = dispatch_unassisted;
- break;
- default:
- ppIRJumpKind(i->Xin.Goto.jk);
- vpanic("emit_X86Instr.Xin_Goto: unknown jump kind");
+ /* Update the guest EIP. */
+ /* movl $dstGA, amEIP */
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP);
+ p = emit32(p, i->Xin.XDirect.dstGA);
+
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
+ to) backs up the return address, so as to find the address of
+ the first patchable byte. So: don't change the length of the
+ two instructions below. */
+ /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
+ *p++ = 0xBA;
+ void* disp_cp_chain_me
+ = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+ p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me));
+ /* call *%edx */
+ *p++ = 0xFF;
+ *p++ = 0xD2;
+ /* --- END of PATCHABLE BYTES --- */
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
}
+ goto done;
+ }
- /* Get the destination address into %eax */
- if (i->Xin.Goto.dst->tag == Xri_Imm) {
- /* movl $immediate, %eax */
- *p++ = 0xB8;
- p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32);
- } else {
- vassert(i->Xin.Goto.dst->tag == Xri_Reg);
- /* movl %reg, %eax */
- if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) {
- *p++ = 0x89;
- p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX());
- }
+ case Xin_XIndir: {
+ /* We're generating transfers that could lead indirectly to a
+ chain-me, so we need to be sure this is actually allowed --
+ no-redir translations are not allowed to reach normal
+ translations without going through the scheduler. That means
+ no XDirects or XIndirs out from no-redir translations.
+ Hence: */
+ vassert(disp_cp_xindir != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
}
- /* Get the dispatcher address into %edx. This has to happen
- after the load of %eax since %edx might be carrying the value
- destined for %eax immediately prior to this Xin_Goto. */
- vassert(sizeof(UInt) == sizeof(void*));
- vassert(dispatch_to_use != NULL);
- /* movl $imm32, %edx */
+ /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
+
+ /* movl $disp_indir, %edx */
*p++ = 0xBA;
- p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use));
+ p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir));
+ /* jmp *%edx */
+ *p++ = 0xFF;
+ *p++ = 0xE2;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ }
+ goto done;
+ }
+
+ case Xin_XAssisted: {
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+ }
+
+ /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
+ /* movl $magic_number, %ebp. */
+ UInt trcval = 0;
+ switch (i->Xin.XAssisted.jk) {
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
+ case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
+ case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
+ case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
+ case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
+ case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
+ case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
+ case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
+ /* We don't expect to see the following being assisted. */
+ case Ijk_Ret:
+ case Ijk_Call:
+ /* fallthrough */
+ default:
+ ppIRJumpKind(i->Xin.XAssisted.jk);
+ vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
+ }
+ vassert(trcval != 0);
+ *p++ = 0xBD;
+ p = emit32(p, trcval);
+ /* movl $disp_indir, %edx */
+ *p++ = 0xBA;
+ p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted));
/* jmp *%edx */
*p++ = 0xFF;
*p++ = 0xE2;
/* Fix up the conditional jump, if there was one. */
- if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+ if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
Int delta = p - ptmp;
- vassert(delta > 0 && delta < 20);
+ vassert(delta > 0 && delta < 40);
*ptmp = toUChar(delta-1);
}
goto done;
*p++ = (UChar)(i->Xin.SseShuf.order);
goto done;
+ case Xin_EvCheck: {
+ /* We generate:
+ (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER)
+ (2 bytes) jns nofail expected taken
+ (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR)
+ nofail:
+ */
+ /* This is heavily asserted re instruction lengths. It needs to
+ be. If we get given unexpected forms of .amCounter or
+ .amFailAddr -- basically, anything that's not of the form
+ uimm7(%ebp) -- they are likely to fail. */
+ /* Note also that after the decl we must be very careful not to
+ read the carry flag, else we get a partial flags stall.
+ js/jns avoids that, though. */
+ UChar* p0 = p;
+ /* --- decl 8(%ebp) --- */
+ /* "fake(1)" because + there's no register in this encoding;
+ instead the register + field is used as a sub opcode. The
+ encoding for "decl r/m32" + is FF /1, hence the fake(1). */
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter);
+ vassert(p - p0 == 3);
+ /* --- jns nofail --- */
+ *p++ = 0x79;
+ *p++ = 0x03; /* need to check this 0x03 after the next insn */
+ vassert(p - p0 == 5);
+ /* --- jmp* 0(%ebp) --- */
+ /* The encoding is FF /4. */
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr);
+ vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
+ /* And crosscheck .. */
+ vassert(evCheckSzB_X86() == 8);
+ goto done;
+ }
+
+ case Xin_ProfInc: {
+ /* We generate addl $1,NotKnownYet
+ adcl $0,NotKnownYet+4
+ in the expectation that a later call to LibVEX_patchProfCtr
+ will be used to fill in the immediate fields once the right
+ value is known.
+ 83 05 00 00 00 00 01
+ 83 15 00 00 00 00 00
+ */
+ *p++ = 0x83; *p++ = 0x05;
+ *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
+ *p++ = 0x01;
+ *p++ = 0x83; *p++ = 0x15;
+ *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
+ *p++ = 0x00;
+ /* Tell the caller .. */
+ vassert(!(*is_profInc));
+ *is_profInc = True;
+ goto done;
+ }
+
default:
goto bad;
}
# undef fake
}
+
+/* How big is an event check? See case for Xin_EvCheck in
+ emit_X86Instr just above. That crosschecks what this returns, so
+ we can tell if we're inconsistent. */
+Int evCheckSzB_X86 ( void )
+{
+ return 8;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange chainXDirect_X86 ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to )
+{
+ /* What we're expecting to see is:
+ movl $disp_cp_chain_me_EXPECTED, %edx
+ call *%edx
+ viz
+ BA <4 bytes value == disp_cp_chain_me_EXPECTED>
+ FF D2
+ */
+ UChar* p = (UChar*)place_to_chain;
+ vassert(p[0] == 0xBA);
+ vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
+ vassert(p[5] == 0xFF);
+ vassert(p[6] == 0xD2);
+ /* And what we want to change it to is:
+ jmp disp32 where disp32 is relative to the next insn
+ ud2;
+ viz
+ E9 <4 bytes == disp32>
+ 0F 0B
+ The replacement has the same length as the original.
+ */
+ /* This is the delta we need to put into a JMP d32 insn. It's
+ relative to the start of the next insn, hence the -5. */
+ Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
+
+ /* And make the modifications. */
+ p[0] = 0xE9;
+ p[1] = (delta >> 0) & 0xFF;
+ p[2] = (delta >> 8) & 0xFF;
+ p[3] = (delta >> 16) & 0xFF;
+ p[4] = (delta >> 24) & 0xFF;
+ p[5] = 0x0F; p[6] = 0x0B;
+ /* sanity check on the delta -- top 32 are all 0 or all 1 */
+ delta >>= 32;
+ vassert(delta == 0LL || delta == -1LL);
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange unchainXDirect_X86 ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me )
+{
+ /* What we're expecting to see is:
+ jmp d32
+ ud2;
+ viz
+ E9 <4 bytes == disp32>
+ 0F 0B
+ */
+ UChar* p = (UChar*)place_to_unchain;
+ Bool valid = False;
+ if (p[0] == 0xE9
+ && p[5] == 0x0F && p[6] == 0x0B) {
+ /* Check the offset is right. */
+ Int s32 = *(Int*)(&p[1]);
+ if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) {
+ valid = True;
+ if (0)
+ vex_printf("QQQ unchainXDirect_X86: found valid\n");
+ }
+ }
+ vassert(valid);
+ /* And what we want to change it to is:
+ movl $disp_cp_chain_me, %edx
+ call *%edx
+ viz
+ BA <4 bytes value == disp_cp_chain_me_EXPECTED>
+ FF D2
+ So it's the same length (convenient, huh).
+ */
+ p[0] = 0xBA;
+ *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me);
+ p[5] = 0xFF;
+ p[6] = 0xD2;
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* Patch the counter address into a profile inc point, as previously
+ created by the Xin_ProfInc case for emit_X86Instr. */
+VexInvalRange patchProfInc_X86 ( void* place_to_patch,
+ ULong* location_of_counter )
+{
+ vassert(sizeof(ULong*) == 4);
+ UChar* p = (UChar*)place_to_patch;
+ vassert(p[0] == 0x83);
+ vassert(p[1] == 0x05);
+ vassert(p[2] == 0x00);
+ vassert(p[3] == 0x00);
+ vassert(p[4] == 0x00);
+ vassert(p[5] == 0x00);
+ vassert(p[6] == 0x01);
+ vassert(p[7] == 0x83);
+ vassert(p[8] == 0x15);
+ vassert(p[9] == 0x00);
+ vassert(p[10] == 0x00);
+ vassert(p[11] == 0x00);
+ vassert(p[12] == 0x00);
+ vassert(p[13] == 0x00);
+ UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter);
+ p[2] = imm32 & 0xFF; imm32 >>= 8;
+ p[3] = imm32 & 0xFF; imm32 >>= 8;
+ p[4] = imm32 & 0xFF; imm32 >>= 8;
+ p[5] = imm32 & 0xFF; imm32 >>= 8;
+ imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter);
+ p[9] = imm32 & 0xFF; imm32 >>= 8;
+ p[10] = imm32 & 0xFF; imm32 >>= 8;
+ p[11] = imm32 & 0xFF; imm32 >>= 8;
+ p[12] = imm32 & 0xFF; imm32 >>= 8;
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_x86_defs.c ---*/
/*---------------------------------------------------------------*/
Xin_Sh3232, /* shldl or shrdl */
Xin_Push, /* push (32-bit?) value on stack */
Xin_Call, /* call to address in register */
- Xin_Goto, /* conditional/unconditional jmp to dst */
+ Xin_XDirect, /* direct transfer to GA */
+ Xin_XIndir, /* indirect transfer to GA */
+ Xin_XAssisted, /* assisted transfer to GA */
Xin_CMov32, /* conditional move */
Xin_LoadEX, /* mov{s,z}{b,w}l from mem to reg */
Xin_Store, /* store 16/8 bit value in memory */
Xin_Sse64FLo, /* SSE binary, 64F in lowest lane only */
Xin_SseReRg, /* SSE binary general reg-reg, Re, Rg */
Xin_SseCMov, /* SSE conditional move */
- Xin_SseShuf /* SSE2 shuffle (pshufd) */
+ Xin_SseShuf, /* SSE2 shuffle (pshufd) */
+ Xin_EvCheck, /* Event check */
+ Xin_ProfInc /* 64-bit profile counter increment */
}
X86InstrTag;
Addr32 target;
Int regparms; /* 0 .. 3 */
} Call;
- /* Pseudo-insn. Goto dst, on given condition (which could be
- Xcc_ALWAYS). */
- struct {
+ /* Update the guest EIP value, then exit requesting to chain
+ to it. May be conditional. Urr, use of Addr32 implicitly
+ assumes that wordsize(guest) == wordsize(host). */
+ struct {
+ Addr32 dstGA; /* next guest address */
+ X86AMode* amEIP; /* amode in guest state for EIP */
+ X86CondCode cond; /* can be Xcc_ALWAYS */
+ Bool toFastEP; /* chain to the slow or fast point? */
+ } XDirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ X86AMode* amEIP;
+ X86CondCode cond; /* can be Xcc_ALWAYS */
+ } XIndir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ X86AMode* amEIP;
+ X86CondCode cond; /* can be Xcc_ALWAYS */
IRJumpKind jk;
- X86CondCode cond;
- X86RI* dst;
- } Goto;
+ } XAssisted;
/* Mov src to dst on the given condition, which may not
be the bogus Xcc_ALWAYS. */
struct {
HReg src;
HReg dst;
} SseShuf;
+ struct {
+ X86AMode* amCounter;
+ X86AMode* amFailAddr;
+ } EvCheck;
+ struct {
+ /* No fields. The address of the counter to inc is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } ProfInc;
} Xin;
}
extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst );
extern X86Instr* X86Instr_Push ( X86RMI* );
extern X86Instr* X86Instr_Call ( X86CondCode, Addr32, Int );
-extern X86Instr* X86Instr_Goto ( IRJumpKind, X86CondCode cond, X86RI* dst );
+extern X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
+ X86CondCode cond, Bool toFastEP );
+extern X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
+ X86CondCode cond );
+extern X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
+ X86CondCode cond, IRJumpKind jk );
extern X86Instr* X86Instr_CMov32 ( X86CondCode, X86RM* src, HReg dst );
extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
X86AMode* src, HReg dst );
extern X86Instr* X86Instr_SseReRg ( X86SseOp, HReg, HReg );
extern X86Instr* X86Instr_SseCMov ( X86CondCode, HReg src, HReg dst );
extern X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst );
+extern X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
+ X86AMode* amFailAddr );
+extern X86Instr* X86Instr_ProfInc ( void );
extern void ppX86Instr ( X86Instr*, Bool );
extern void getRegUsage_X86Instr ( HRegUsage*, X86Instr*, Bool );
extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool );
extern Bool isMove_X86Instr ( X86Instr*, HReg*, HReg* );
-extern Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr*,
- Bool,
- void* dispatch_unassisted,
- void* dispatch_assisted );
+extern Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, X86Instr* i,
+ Bool mode64,
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted );
extern void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
extern X86Instr* directReload_X86 ( X86Instr* i,
HReg vreg, Short spill_off );
extern void getAllocableRegs_X86 ( Int*, HReg** );
-extern HInstrArray* iselSB_X86 ( IRSB*, VexArch,
- VexArchInfo*,
- VexAbiInfo* );
+extern HInstrArray* iselSB_X86 ( IRSB*,
+ VexArch,
+ VexArchInfo*,
+ VexAbiInfo*,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga );
+
+/* How big is an event check? This is kind of a kludge because it
+ depends on the offsets of host_EvC_FAILADDR and host_EvC_COUNTER,
+ and so assumes that they are both <= 128, and so can use the short
+ offset encoding. This is all checked with assertions, so in the
+ worst case we will merely assert at startup. */
+extern Int evCheckSzB_X86 ( void );
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_X86 ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to );
+
+extern VexInvalRange unchainXDirect_X86 ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me );
+
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_X86 ( void* place_to_patch,
+ ULong* location_of_counter );
+
#endif /* ndef __VEX_HOST_X86_DEFS_H */
- The host subarchitecture we are selecting insns for.
This is set at the start and does not change.
- Note, this is all host-independent. */
+ - A Bool for indicating whether we may generate chain-me
+ instructions for control flow transfers, or whether we must use
+ XAssisted.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any insn
+ in this block. Is set at the start and does not change. This is
+ used for detecting jumps which are definitely forward-edges from
+ this block, and therefore can be made (chained) to the fast entry
+ point of the destination, thereby avoiding the destination's
+ event check.
+
+ Note, this is all (well, mostly) host-independent.
+*/
typedef
struct {
+ /* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
HReg* vregmap;
HReg* vregmapHI;
Int n_vregmap;
- HInstrArray* code;
+ UInt hwcaps;
- Int vreg_ctr;
+ Bool chainingAllowed;
+ Addr64 max_ga;
- UInt hwcaps;
+ /* These are modified as we go along. */
+ HInstrArray* code;
+ Int vreg_ctr;
}
ISelEnv;
/* --------- EXIT --------- */
case Ist_Exit: {
- X86RI* dst;
- X86CondCode cc;
if (stmt->Ist.Exit.dst->tag != Ico_U32)
- vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value");
- dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
- cc = iselCondCode(env,stmt->Ist.Exit.guard);
- addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
- return;
+ vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
+
+ X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
+ hregX86_EBP());
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring) {
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
+ addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
+ amEIP, cc, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ case Ijk_MapFail:
+ case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn: {
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
default: break;
/*--- ISEL: Basic block terminators (Nexts) ---*/
/*---------------------------------------------------------*/
-static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+static void iselNext ( ISelEnv* env,
+ IRExpr* next, IRJumpKind jk, Int offsIP )
{
- X86RI* ri;
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
ppIRJumpKind(jk);
- vex_printf("} ");
- ppIRExpr(next);
- vex_printf("\n");
+ vex_printf( "\n");
}
- ri = iselIntExpr_RI(env, next);
- addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri));
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst* cdst = next->Iex.Const.con;
+ vassert(cdst->tag == Ico_U32);
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr64)cdst->Ico.U32) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
+ addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
+ amEIP, Xcc_ALWAYS,
+ toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, next);
+ addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
+ HReg r = iselIntExpr_R(env, next);
+ X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
+ if (env->chainingAllowed) {
+ addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
+ } else {
+ addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_Sys_int128: case Ijk_ClientReq: case Ijk_NoRedir:
+ case Ijk_Yield: case Ijk_SigTRAP: {
+ HReg r = iselIntExpr_R(env, next);
+ X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
+ addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(jk);
+ vex_printf( "\n");
+ vassert(0); // are we expecting any other kind?
}
/* Translate an entire SB to x86 code. */
-HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host,
- VexArchInfo* archinfo_host,
- VexAbiInfo* vbi/*UNUSED*/ )
+HInstrArray* iselSB_X86 ( IRSB* bb,
+ VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga )
{
Int i, j;
HReg hreg, hregHI;
ISelEnv* env;
UInt hwcaps_host = archinfo_host->hwcaps;
+ X86AMode *amCounter, *amFailAddr;
/* sanity ... */
vassert(arch_host == VexArchX86);
| VEX_HWCAPS_X86_SSE2
| VEX_HWCAPS_X86_SSE3
| VEX_HWCAPS_X86_LZCNT)));
+ vassert(sizeof(max_ga) == 8);
+ vassert((max_ga >> 32) == 0);
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
/* and finally ... */
- env->hwcaps = hwcaps_host;
+ env->chainingAllowed = chainingAllowed;
+ env->hwcaps = hwcaps_host;
+ env->max_ga = max_ga;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
}
env->vreg_ctr = j;
+ /* The very first instruction must be an event check. */
+ amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP());
+ amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
+ addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
+
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfCtr. */
+ if (addProfInc) {
+ addInstr(env, X86Instr_ProfInc());
+ }
+
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
- iselStmt(env,bb->stmts[i]);
+ iselStmt(env, bb->stmts[i]);
- iselNext(env,bb->next,bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
case Ist_Exit:
vex_printf( "if (" );
ppIRExpr(s->Ist.Exit.guard);
- vex_printf( ") goto {");
- ppIRJumpKind(s->Ist.Exit.jk);
- vex_printf("} ");
+ vex_printf( ") { PUT(%d) = ", s->Ist.Exit.offsIP);
ppIRConst(s->Ist.Exit.dst);
+ vex_printf("; exit-");
+ ppIRJumpKind(s->Ist.Exit.jk);
+ vex_printf(" } ");
break;
default:
vpanic("ppIRStmt");
ppIRStmt(bb->stmts[i]);
vex_printf( "\n");
}
- vex_printf( " goto {");
- ppIRJumpKind(bb->jumpkind);
- vex_printf( "} ");
+ vex_printf( " PUT(%d) = ", bb->offsIP );
ppIRExpr( bb->next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(bb->jumpkind);
vex_printf( "\n}\n");
}
s->Ist.MBE.event = event;
return s;
}
-IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ) {
- IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
- s->tag = Ist_Exit;
- s->Ist.Exit.guard = guard;
- s->Ist.Exit.jk = jk;
- s->Ist.Exit.dst = dst;
+IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
+ Int offsIP ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_Exit;
+ s->Ist.Exit.guard = guard;
+ s->Ist.Exit.jk = jk;
+ s->Ist.Exit.dst = dst;
+ s->Ist.Exit.offsIP = offsIP;
return s;
}
bb->stmts = LibVEX_Alloc(bb->stmts_size * sizeof(IRStmt*));
bb->next = NULL;
bb->jumpkind = Ijk_Boring;
+ bb->offsIP = 0;
return bb;
}
case Ist_Exit:
return IRStmt_Exit(deepCopyIRExpr(s->Ist.Exit.guard),
s->Ist.Exit.jk,
- deepCopyIRConst(s->Ist.Exit.dst));
+ deepCopyIRConst(s->Ist.Exit.dst),
+ s->Ist.Exit.offsIP);
default:
vpanic("deepCopyIRStmt");
}
sts2 = LibVEX_Alloc(bb2->stmts_used * sizeof(IRStmt*));
for (i = 0; i < bb2->stmts_used; i++)
sts2[i] = deepCopyIRStmt(bb->stmts[i]);
- bb2->stmts = sts2;
+ bb2->stmts = sts2;
return bb2;
}
bb2->tyenv = deepCopyIRTypeEnv(bb->tyenv);
bb2->next = deepCopyIRExpr(bb->next);
bb2->jumpkind = bb->jumpkind;
+ bb2->offsIP = bb->offsIP;
return bb2;
}
sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: bad dst");
if (typeOfIRConst(stmt->Ist.Exit.dst) != gWordTy)
sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: not :: guest word type");
+ /* because it would intersect with host_EvC_* */
+ if (stmt->Ist.Exit.offsIP < 16)
+ sanityCheckFail(bb,stmt,"IRStmt.Exit.offsIP: too low");
break;
default:
vpanic("tcStmt");
tcStmt( bb, bb->stmts[i], guest_word_size );
if (typeOfIRExpr(bb->tyenv,bb->next) != guest_word_size)
sanityCheckFail(bb, NULL, "bb->next field has wrong type");
+ /* because it would intersect with host_EvC_* */
+ if (bb->offsIP < 16)
+ sanityCheckFail(bb, NULL, "bb->offsIP: too low");
+
}
/*---------------------------------------------------------------*/
case Ist_Exit:
e1 = flatten_Expr(bb, st->Ist.Exit.guard);
addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
- st->Ist.Exit.dst));
+ st->Ist.Exit.dst,
+ st->Ist.Exit.offsIP));
break;
default:
vex_printf("\n");
flatten_Stmt( out, in->stmts[i] );
out->next = flatten_Expr( out, in->next );
out->jumpkind = in->jumpkind;
+ out->offsIP = in->offsIP;
return out;
}
UInt key = 0; /* keep gcc -O happy */
HashHW* env = newHHW();
+
+ /* Initialise the running env with the fact that the final exit
+ writes the IP (or, whatever it claims to write. We don't
+ care.) */
+ key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
+ addToHHW(env, (HWord)key, 0);
+
+ /* And now scan backwards through the statements. */
for (i = bb->stmts_used-1; i >= 0; i--) {
st = bb->stmts[i];
/* Deal with conditional exits. */
if (st->tag == Ist_Exit) {
- /* Since control may not get beyond this point, we must empty
- out the set, since we can no longer claim that the next
- event for any part of the guest state is definitely a
- write. */
- vassert(isIRAtom(st->Ist.Exit.guard));
+ //Bool re_add;
+ /* Need to throw out from the env, any part of it which
+ doesn't overlap with the guest state written by this exit.
+ Since the exit only writes one section, it's simplest to
+ do this: (1) check whether env contains a write that
+ completely overlaps the write done by this exit; (2) empty
+ out env; and (3) if (1) was true, add the write done by
+ this exit.
+
+ To make (1) a bit simpler, merely search for a write that
+ exactly matches the one done by this exit. That's safe
+ because it will fail as often or more often than a full
+ overlap check, and failure to find an overlapping write in
+ env is the safe case (we just nuke env if that
+ happens). */
+ //vassert(isIRAtom(st->Ist.Exit.guard));
+ /* (1) */
+ //key = mk_key_GetPut(st->Ist.Exit.offsIP,
+ // typeOfIRConst(st->Ist.Exit.dst));
+ //re_add = lookupHHW(env, NULL, key);
+ /* (2) */
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
+ /* (3) */
+ //if (0 && re_add)
+ // addToHHW(env, (HWord)key, 0);
continue;
}
assumed to compute different values. After all the accesses may happen
at different times and the guest state / memory can have changed in
the meantime. */
+
+/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
+ wrapper that deals with the common tags-don't-match case, and a
+ slower out of line general case. Saves a few insns. */
+
+__attribute__((noinline))
+static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
+
+inline
static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
{
if (e1->tag != e2->tag) return False;
+ return sameIRExprs_aux2(env, e1, e2);
+}
+__attribute__((noinline))
+static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
+{
if (num_nodes_visited++ > NODE_LIMIT) return False;
switch (e1->tag) {
return False;
}
+inline
static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
{
Bool same;
vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
}
}
- return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
+ return IRStmt_Exit(fcond, st->Ist.Exit.jk,
+ st->Ist.Exit.dst, st->Ist.Exit.offsIP);
}
default:
out->next = subst_Expr( env, in->next );
out->jumpkind = in->jumpkind;
+ out->offsIP = in->offsIP;
return out;
}
= IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
bb->jumpkind
= bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
+ bb->offsIP
+ = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
for (i = i_unconditional_exit; i < bb->stmts_used; i++)
bb->stmts[i] = IRStmt_NoOp();
}
return IRStmt_Exit(
atbSubst_Expr(env, st->Ist.Exit.guard),
st->Ist.Exit.jk,
- st->Ist.Exit.dst
+ st->Ist.Exit.dst,
+ st->Ist.Exit.offsIP
);
case Ist_IMark:
return IRStmt_IMark(st->Ist.IMark.addr,
}
}
-/* notstatic */ void ado_treebuild_BB ( IRSB* bb )
+/* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb )
{
Int i, j, k, m;
Bool stmtPuts, stmtStores, invalidateMe;
IRStmt* st2;
ATmpInfo env[A_NENV];
+ Bool max_ga_known = False;
+ Addr64 max_ga = 0;
+
Int n_tmps = bb->tyenv->types_used;
UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort));
/* Phase 1. Scan forwards in bb, counting use occurrences of each
- temp. Also count occurrences in the bb->next field. */
+ temp. Also count occurrences in the bb->next field. Take the
+ opportunity to also find the maximum guest address in the block,
+ since that will be needed later for deciding when we can safely
+ elide event checks. */
for (i = 0; i < n_tmps; i++)
uses[i] = 0;
for (i = 0; i < bb->stmts_used; i++) {
st = bb->stmts[i];
- if (st->tag == Ist_NoOp)
- continue;
+ switch (st->tag) {
+ case Ist_NoOp:
+ continue;
+ case Ist_IMark: {
+ Int len = st->Ist.IMark.len;
+ Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
+ max_ga_known = True;
+ if (mga > max_ga)
+ max_ga = mga;
+ break;
+ }
+ default:
+ break;
+ }
aoccCount_Stmt( uses, st );
}
aoccCount_Expr(uses, bb->next );
by definition dead? */
bb->next = atbSubst_Expr(env, bb->next);
bb->stmts_used = j;
+
+ return max_ga_known ? max_ga : ~(Addr64)0;
}
void do_deadcode_BB ( IRSB* bb );
/* The tree-builder. Make (approximately) maximal safe trees. bb is
- destructively modified. */
+ destructively modified. Returns (unrelatedly, but useful later on)
+ the guest address of the highest addressed byte from any insn in
+ this block, or Addr64_MAX if unknown (can that ever happen?) */
extern
-void ado_treebuild_BB ( IRSB* bb );
+Addr64 ado_treebuild_BB ( IRSB* bb );
#endif /* ndef __VEX_IR_OPT_H */
vassert(VEX_HOST_WORDSIZE == sizeof(void*));
vassert(VEX_HOST_WORDSIZE == sizeof(HWord));
+ /* These take a lot of space, so make sure we don't have
+ any unnoticed size regressions. */
+ if (VEX_HOST_WORDSIZE == 4) {
+ vassert(sizeof(IRExpr) == 24);
+ vassert(sizeof(IRStmt) == 20 /* x86 */
+ || sizeof(IRStmt) == 24 /* arm */);
+ } else {
+ vassert(sizeof(IRExpr) == 48);
+ vassert(sizeof(IRStmt) == 40);
+ }
+
/* Really start up .. */
vex_debuglevel = debuglevel;
vex_valgrind_support = valgrind_support;
HInstr* (*directReload) ( HInstr*, HReg, Short );
void (*ppInstr) ( HInstr*, Bool );
void (*ppReg) ( HReg );
- HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*,
- VexAbiInfo* );
- Int (*emit) ( UChar*, Int, HInstr*, Bool, void*, void* );
+ HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*, VexAbiInfo*,
+ Int, Int, Bool, Bool, Addr64 );
+ Int (*emit) ( /*MB_MOD*/Bool*,
+ UChar*, Int, HInstr*, Bool,
+ void*, void*, void*, void* );
IRExpr* (*specHelper) ( HChar*, IRExpr**, IRStmt**, Int );
Bool (*preciseMemExnsFn) ( Int, Int );
HInstrArray* vcode;
HInstrArray* rcode;
Int i, j, k, out_used, guest_sizeB;
- Int offB_TISTART, offB_TILEN;
- UChar insn_bytes[48];
+ Int offB_TISTART, offB_TILEN, offB_GUEST_IP, szB_GUEST_IP;
+ Int offB_HOST_EvC_COUNTER, offB_HOST_EvC_FAILADDR;
+ UChar insn_bytes[64];
IRType guest_word_type;
IRType host_word_type;
- Bool mode64;
+ Bool mode64, chainingAllowed;
+ Addr64 max_ga;
guest_layout = NULL;
available_real_regs = NULL;
host_word_type = Ity_INVALID;
offB_TISTART = 0;
offB_TILEN = 0;
+ offB_GUEST_IP = 0;
+ szB_GUEST_IP = 0;
+ offB_HOST_EvC_COUNTER = 0;
+ offB_HOST_EvC_FAILADDR = 0;
mode64 = False;
+ chainingAllowed = False;
vex_traceflags = vta->traceflags;
vassert(vex_initdone);
- vassert(vta->needs_self_check != NULL);
+ vassert(vta->needs_self_check != NULL);
+ vassert(vta->disp_cp_xassisted != NULL);
+ /* Both the chainers and the indir are either NULL or non-NULL. */
+ if (vta->disp_cp_chain_me_to_slowEP != NULL) {
+ vassert(vta->disp_cp_chain_me_to_fastEP != NULL);
+ vassert(vta->disp_cp_xindir != NULL);
+ chainingAllowed = True;
+ } else {
+ vassert(vta->disp_cp_chain_me_to_fastEP == NULL);
+ vassert(vta->disp_cp_xindir == NULL);
+ }
vexSetAllocModeTEMP_and_clear();
vexAllocSanityCheck();
ppInstr = (void(*)(HInstr*, Bool)) ppX86Instr;
ppReg = (void(*)(HReg)) ppHRegX86;
iselSB = iselSB_X86;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*))
emit_X86Instr;
host_is_bigendian = False;
host_word_type = Ity_I32;
vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps));
- /* jump-to-dispatcher scheme */
- vassert(vta->dispatch_unassisted != NULL);
- vassert(vta->dispatch_assisted != NULL);
break;
case VexArchAMD64:
ppInstr = (void(*)(HInstr*, Bool)) ppAMD64Instr;
ppReg = (void(*)(HReg)) ppHRegAMD64;
iselSB = iselSB_AMD64;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*))
emit_AMD64Instr;
host_is_bigendian = False;
host_word_type = Ity_I64;
vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps));
- /* jump-to-dispatcher scheme */
- vassert(vta->dispatch_unassisted != NULL);
- vassert(vta->dispatch_assisted != NULL);
break;
-
+#if 0
case VexArchPPC32:
mode64 = False;
getAllocableRegs_PPC ( &n_available_real_regs,
vassert(vta->dispatch_unassisted == NULL);
vassert(vta->dispatch_assisted == NULL);
break;
-
+#endif
case VexArchARM:
mode64 = False;
getAllocableRegs_ARM ( &n_available_real_regs,
ppInstr = (void(*)(HInstr*, Bool)) ppARMInstr;
ppReg = (void(*)(HReg)) ppHRegARM;
iselSB = iselSB_ARM;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*))
emit_ARMInstr;
host_is_bigendian = False;
host_word_type = Ity_I32;
vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_host.hwcaps));
- vassert(vta->dispatch_unassisted == NULL);
- vassert(vta->dispatch_assisted == NULL);
- /* return-to-dispatcher scheme */
break;
default:
switch (vta->arch_guest) {
case VexArchX86:
- preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns;
- disInstrFn = disInstr_X86;
- specHelper = guest_x86_spechelper;
- guest_sizeB = sizeof(VexGuestX86State);
- guest_word_type = Ity_I32;
- guest_layout = &x86guest_layout;
- offB_TISTART = offsetof(VexGuestX86State,guest_TISTART);
- offB_TILEN = offsetof(VexGuestX86State,guest_TILEN);
+ preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_X86;
+ specHelper = guest_x86_spechelper;
+ guest_sizeB = sizeof(VexGuestX86State);
+ guest_word_type = Ity_I32;
+ guest_layout = &x86guest_layout;
+ offB_TISTART = offsetof(VexGuestX86State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestX86State,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestX86State,guest_EIP);
+ szB_GUEST_IP = sizeof( ((VexGuestX86State*)0)->guest_EIP );
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestX86State) % 16);
vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART) == 4);
break;
case VexArchAMD64:
- preciseMemExnsFn = guest_amd64_state_requires_precise_mem_exns;
- disInstrFn = disInstr_AMD64;
- specHelper = guest_amd64_spechelper;
- guest_sizeB = sizeof(VexGuestAMD64State);
- guest_word_type = Ity_I64;
- guest_layout = &amd64guest_layout;
- offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART);
- offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN);
+ preciseMemExnsFn = guest_amd64_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_AMD64;
+ specHelper = guest_amd64_spechelper;
+ guest_sizeB = sizeof(VexGuestAMD64State);
+ guest_word_type = Ity_I64;
+ guest_layout = &amd64guest_layout;
+ offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestAMD64State,guest_RIP);
+ szB_GUEST_IP = sizeof( ((VexGuestAMD64State*)0)->guest_RIP );
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestAMD64State) % 16);
vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8);
vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TILEN ) == 8);
vassert(sizeof( ((VexGuestAMD64State*)0)->guest_NRADDR ) == 8);
break;
-
+#if 0
case VexArchPPC32:
preciseMemExnsFn = guest_ppc32_state_requires_precise_mem_exns;
disInstrFn = disInstr_PPC;
vassert(sizeof( ((VexGuestS390XState*)0)->guest_TILEN ) == 8);
vassert(sizeof( ((VexGuestS390XState*)0)->guest_NRADDR ) == 8);
break;
-
+#endif
case VexArchARM:
- preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns;
- disInstrFn = disInstr_ARM;
- specHelper = guest_arm_spechelper;
- guest_sizeB = sizeof(VexGuestARMState);
- guest_word_type = Ity_I32;
- guest_layout = &armGuest_layout;
- offB_TISTART = offsetof(VexGuestARMState,guest_TISTART);
- offB_TILEN = offsetof(VexGuestARMState,guest_TILEN);
+ preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_ARM;
+ specHelper = guest_arm_spechelper;
+ guest_sizeB = sizeof(VexGuestARMState);
+ guest_word_type = Ity_I32;
+ guest_layout = &armGuest_layout;
+ offB_TISTART = offsetof(VexGuestARMState,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestARMState,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestARMState,guest_R15T);
+ szB_GUEST_IP = sizeof( ((VexGuestARMState*)0)->guest_R15T );
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestARMState) % 16);
vassert(sizeof( ((VexGuestARMState*)0)->guest_TISTART) == 4);
VexTranslateResult res;
res.status = VexTransOK;
res.n_sc_extents = 0;
+ res.offs_profInc = -1;
/* yet more sanity checks ... */
if (vta->arch_guest == vta->arch_host) {
vta->needs_self_check,
vta->preamble_function,
offB_TISTART,
- offB_TILEN );
+ offB_TILEN,
+ offB_GUEST_IP,
+ szB_GUEST_IP );
vexAllocSanityCheck();
/* Turn it into virtual-registerised code. Build trees -- this
also throws away any dead bindings. */
- ado_treebuild_BB( irsb );
+ max_ga = ado_treebuild_BB( irsb );
if (vta->finaltidy) {
irsb = vta->finaltidy(irsb);
" Instruction selection "
"------------------------\n");
- vcode = iselSB ( irsb, vta->arch_host, &vta->archinfo_host,
- &vta->abiinfo_both );
+ /* No guest has its IP field at offset zero. If this fails it
+ means some transformation pass somewhere failed to update/copy
+ irsb->offsIP properly. */
+ vassert(irsb->offsIP >= 16);
+
+ vcode = iselSB ( irsb, vta->arch_host,
+ &vta->archinfo_host,
+ &vta->abiinfo_both,
+ offB_HOST_EvC_COUNTER,
+ offB_HOST_EvC_FAILADDR,
+ chainingAllowed,
+ vta->addProfInc,
+ max_ga );
vexAllocSanityCheck();
out_used = 0; /* tracks along the host_bytes array */
for (i = 0; i < rcode->arr_used; i++) {
- if (vex_traceflags & VEX_TRACE_ASM) {
- ppInstr(rcode->arr[i], mode64);
+ HInstr* hi = rcode->arr[i];
+ Bool hi_isProfInc = False;
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) {
+ ppInstr(hi, mode64);
vex_printf("\n");
}
- j = (*emit)( insn_bytes, sizeof insn_bytes, rcode->arr[i], mode64,
- vta->dispatch_unassisted, vta->dispatch_assisted );
- if (vex_traceflags & VEX_TRACE_ASM) {
+ j = emit( &hi_isProfInc,
+ insn_bytes, sizeof insn_bytes, hi, mode64,
+ vta->disp_cp_chain_me_to_slowEP,
+ vta->disp_cp_chain_me_to_fastEP,
+ vta->disp_cp_xindir,
+ vta->disp_cp_xassisted );
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) {
for (k = 0; k < j; k++)
if (insn_bytes[k] < 16)
vex_printf("0%x ", (UInt)insn_bytes[k]);
vex_printf("%x ", (UInt)insn_bytes[k]);
vex_printf("\n\n");
}
- if (out_used + j > vta->host_bytes_size) {
+ if (UNLIKELY(out_used + j > vta->host_bytes_size)) {
vexSetAllocModeTEMP_and_clear();
vex_traceflags = 0;
res.status = VexTransOutputFull;
return res;
}
- for (k = 0; k < j; k++) {
- vta->host_bytes[out_used] = insn_bytes[k];
- out_used++;
+ if (UNLIKELY(hi_isProfInc)) {
+ vassert(vta->addProfInc); /* else where did it come from? */
+ vassert(res.offs_profInc == -1); /* there can be only one (tm) */
+ vassert(out_used >= 0);
+ res.offs_profInc = out_used;
+ }
+ { UChar* dst = &vta->host_bytes[out_used];
+ for (k = 0; k < j; k++) {
+ dst[k] = insn_bytes[k];
+ }
+ out_used += j;
}
vassert(out_used <= vta->host_bytes_size);
}
}
+/* --------- Chain/Unchain XDirects. --------- */
+
+VexInvalRange LibVEX_Chain ( VexArch arch_host,
+ void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to )
+{
+ VexInvalRange (*chainXDirect)(void*, void*, void*) = NULL;
+ switch (arch_host) {
+ case VexArchX86:
+ chainXDirect = chainXDirect_X86; break;
+ case VexArchAMD64:
+ chainXDirect = chainXDirect_AMD64; break;
+ case VexArchARM:
+ chainXDirect = chainXDirect_ARM; break;
+ default:
+ vassert(0);
+ }
+ vassert(chainXDirect);
+ VexInvalRange vir
+ = chainXDirect(place_to_chain, disp_cp_chain_me_EXPECTED,
+ place_to_jump_to);
+ return vir;
+}
+
+VexInvalRange LibVEX_UnChain ( VexArch arch_host,
+ void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me )
+{
+ VexInvalRange (*unchainXDirect)(void*, void*, void*) = NULL;
+ switch (arch_host) {
+ case VexArchX86:
+ unchainXDirect = unchainXDirect_X86; break;
+ case VexArchAMD64:
+ unchainXDirect = unchainXDirect_AMD64; break;
+ case VexArchARM:
+ unchainXDirect = unchainXDirect_ARM; break;
+ default:
+ vassert(0);
+ }
+ vassert(unchainXDirect);
+ VexInvalRange vir
+ = unchainXDirect(place_to_unchain, place_to_jump_to_EXPECTED,
+ disp_cp_chain_me);
+ return vir;
+}
+
+Int LibVEX_evCheckSzB ( VexArch arch_host )
+{
+ static Int cached = 0; /* DO NOT MAKE NON-STATIC */
+ if (UNLIKELY(cached == 0)) {
+ switch (arch_host) {
+ case VexArchX86:
+ cached = evCheckSzB_X86(); break;
+ case VexArchAMD64:
+ cached = evCheckSzB_AMD64(); break;
+ case VexArchARM:
+ cached = evCheckSzB_ARM(); break;
+ default:
+ vassert(0);
+ }
+ }
+ return cached;
+}
+
+VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host,
+ void* place_to_patch,
+ ULong* location_of_counter )
+{
+ VexInvalRange (*patchProfInc)(void*,ULong*) = NULL;
+ switch (arch_host) {
+ case VexArchX86:
+ patchProfInc = patchProfInc_X86; break;
+ case VexArchAMD64:
+ patchProfInc = patchProfInc_AMD64; break;
+ case VexArchARM:
+ patchProfInc = patchProfInc_ARM; break;
+ default:
+ vassert(0);
+ }
+ vassert(patchProfInc);
+ VexInvalRange vir
+ = patchProfInc(place_to_patch, location_of_counter);
+ return vir;
+}
+
+
/* --------- Emulation warnings. --------- */
HChar* LibVEX_EmWarn_string ( VexEmWarn ew )
VexTransAccessFail, VexTransOutputFull } status;
/* The number of extents that have a self-check (0 to 3) */
UInt n_sc_extents;
+ /* Offset in generated code of the profile inc, or -1 if
+ none. Needed for later patching. */
+ Int offs_profInc;
}
VexTranslateResult;
/* IN: debug: trace vex activity at various points */
Int traceflags;
+ /* IN: profiling: add a 64 bit profiler counter increment to the
+ translation? */
+ Bool addProfInc;
+
/* IN: address of the dispatcher entry points. Describes the
places where generated code should jump to at the end of each
bb.
The aim is to get back and forth between translations and the
dispatcher without creating memory traffic to store return
addresses.
+
+ FIXME: update this comment
*/
- void* dispatch_unassisted;
- void* dispatch_assisted;
+ void* disp_cp_chain_me_to_slowEP;
+ void* disp_cp_chain_me_to_fastEP;
+ void* disp_cp_xindir;
+ void* disp_cp_xassisted;
}
VexTranslateArgs;
would not be the result. Therefore chase_into_ok should disallow
following into #2. That will force the caller to eventually
request a new translation starting at #2, at which point Vex will
- correctly observe the make-a-self-check flag. */
+ correctly observe the make-a-self-check flag.
+
+ FIXME: is this still up to date? */
+
+
+/*-------------------------------------------------------*/
+/*--- Patch existing translations ---*/
+/*-------------------------------------------------------*/
+
+/* Indicates a host address range for which callers to the functions
+ below must request I-D cache syncing after the call. ::len == 0 is
+ ambiguous -- it could mean either zero bytes or the entire address
+ space, so we mean the former. */
+typedef
+ struct {
+ HWord start;
+ HWord len;
+ }
+ VexInvalRange;
+
+/* Chain an XDirect jump located at place_to_chain so it jumps to
+ place_to_jump_to. It is expected (and checked) that this site
+ currently contains a call to the dispatcher specified by
+ disp_cp_chain_me_EXPECTED. */
+extern
+VexInvalRange LibVEX_Chain ( VexArch arch_host,
+ void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to );
+
+/* Undo an XDirect jump located at place_to_unchain, so it is
+ converted back into a call to disp_cp_chain_me. It is expected
+ (and checked) that this site currently contains a jump directly to
+ the address specified by place_to_jump_to_EXPECTED. */
+extern
+VexInvalRange LibVEX_UnChain ( VexArch arch_host,
+ void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me );
+
+/* Returns a constant -- the size of the event check that is put at
+ the start of every translation. This makes it possible to
+ calculate the fast entry point address if the slow entry point
+ address is known (the usual case), or vice versa. */
+extern
+Int LibVEX_evCheckSzB ( VexArch arch_host );
+
+
+/* Patch the counter location into an existing ProfInc point. The
+ specified point is checked to make sure it is plausible. */
+extern
+VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host,
+ void* place_to_patch,
+ ULong* location_of_counter );
/*-------------------------------------------------------*/
typedef
struct {
- /* 0 */ ULong guest_RAX;
- /* 8 */ ULong guest_RCX;
- /* 16 */ ULong guest_RDX;
- /* 24 */ ULong guest_RBX;
- /* 32 */ ULong guest_RSP;
- /* 40 */ ULong guest_RBP;
- /* 48 */ ULong guest_RSI;
- /* 56 */ ULong guest_RDI;
- /* 64 */ ULong guest_R8;
- /* 72 */ ULong guest_R9;
- /* 80 */ ULong guest_R10;
- /* 88 */ ULong guest_R11;
- /* 96 */ ULong guest_R12;
- /* 104 */ ULong guest_R13;
- /* 112 */ ULong guest_R14;
- /* 120 */ ULong guest_R15;
+ /* Event check fail addr, counter, and padding to make RAX 16
+ aligned. */
+ /* 0 */ ULong host_EvC_FAILADDR;
+ /* 8 */ UInt host_EvC_COUNTER;
+ /* 12 */ UInt pad0;
+ /* 16 */ ULong guest_RAX;
+ /* 24 */ ULong guest_RCX;
+ /* 32 */ ULong guest_RDX;
+ /* 40 */ ULong guest_RBX;
+ /* 48 */ ULong guest_RSP;
+ /* 56 */ ULong guest_RBP;
+ /* 64 */ ULong guest_RSI;
+ /* 72 */ ULong guest_RDI;
+ /* 80 */ ULong guest_R8;
+ /* 88 */ ULong guest_R9;
+ /* 96 */ ULong guest_R10;
+ /* 104 */ ULong guest_R11;
+ /* 112 */ ULong guest_R12;
+ /* 120 */ ULong guest_R13;
+ /* 128 */ ULong guest_R14;
+ /* 136 */ ULong guest_R15;
/* 4-word thunk used to calculate O S Z A C P flags. */
- /* 128 */ ULong guest_CC_OP;
- /* 136 */ ULong guest_CC_DEP1;
- /* 144 */ ULong guest_CC_DEP2;
- /* 152 */ ULong guest_CC_NDEP;
+ /* 144 */ ULong guest_CC_OP;
+ /* 152 */ ULong guest_CC_DEP1;
+ /* 160 */ ULong guest_CC_DEP2;
+ /* 168 */ ULong guest_CC_NDEP;
/* The D flag is stored here, encoded as either -1 or +1 */
- /* 160 */ ULong guest_DFLAG;
- /* 168 */ ULong guest_RIP;
+ /* 176 */ ULong guest_DFLAG;
+ /* 184 */ ULong guest_RIP;
/* Bit 18 (AC) of eflags stored here, as either 0 or 1. */
/* ... */ ULong guest_ACFLAG;
/* Bit 21 (ID) of eflags stored here, as either 0 or 1. */
- /* 176 */ ULong guest_IDFLAG;
+ /* 192 */ ULong guest_IDFLAG;
/* Probably a lot more stuff too.
D,ID flags
16 128-bit SSE registers
/* HACK to make tls on amd64-linux work. %fs only ever seems to
hold zero, and so guest_FS_ZERO holds the 64-bit offset
associated with a %fs value of zero. */
- /* 184 */ ULong guest_FS_ZERO;
+ /* 200 */ ULong guest_FS_ZERO;
/* XMM registers. Note that these must be allocated
consecutively in order that the SSE4.2 PCMP{E,I}STR{I,M}
helpers can treat them as an array. XMM16 is a fake reg used
as an intermediary in handling aforementioned insns. */
- /* 192 */ULong guest_SSEROUND;
- /* 200 */U128 guest_XMM0;
+ /* 208 */ULong guest_SSEROUND;
+ /* 216 */U128 guest_XMM0;
U128 guest_XMM1;
U128 guest_XMM2;
U128 guest_XMM3;
/* Note. Setting guest_FTOP to be ULong messes up the
delicately-balanced PutI/GetI optimisation machinery.
Therefore best to leave it as a UInt. */
- /* 456 */UInt guest_FTOP;
+ UInt guest_FTOP;
ULong guest_FPREG[8];
- /* 528 */ UChar guest_FPTAG[8];
- /* 536 */ ULong guest_FPROUND;
- /* 544 */ ULong guest_FC3210;
+ UChar guest_FPTAG[8];
+ ULong guest_FPROUND;
+ ULong guest_FC3210;
/* Emulation warnings */
- /* 552 */ UInt guest_EMWARN;
+ UInt guest_EMWARN;
/* Translation-invalidation area description. Not used on amd64
(there is no invalidate-icache insn), but needed so as to
ULong guest_IP_AT_SYSCALL;
/* Padding to make it have an 16-aligned size */
- ULong padding;
+ ULong pad1;
}
VexGuestAMD64State;
typedef
struct {
/* 0 */
+ /* Event check fail addr and counter. */
+ UInt host_EvC_FAILADDR; /* 0 */
+ UInt host_EvC_COUNTER; /* 4 */
UInt guest_R0;
UInt guest_R1;
UInt guest_R2;
/* 4-word thunk used to calculate N(sign) Z(zero) C(carry,
unsigned overflow) and V(signed overflow) flags. */
- /* 64 */
+ /* 72 */
UInt guest_CC_OP;
UInt guest_CC_DEP1;
UInt guest_CC_DEP2;
program counter at the last syscall insn (int 0x80/81/82,
sysenter, syscall, svc). Used when backing up to restart a
syscall that has been interrupted by a signal. */
- /* 116 */
+ /* 124 */
UInt guest_IP_AT_SYSCALL;
/* VFP state. D0 .. D15 must be 8-aligned. */
- /* 120 -- I guess there's 4 bytes of padding just prior to this? */
+ /* 128 */
ULong guest_D0;
ULong guest_D1;
ULong guest_D2;
/* Padding to make it have an 16-aligned size */
UInt padding1;
- UInt padding2;
- UInt padding3;
}
VexGuestARMState;
*/
typedef
struct {
- UInt guest_EAX; /* 0 */
+ /* Event check fail addr and counter. */
+ UInt host_EvC_FAILADDR; /* 0 */
+ UInt host_EvC_COUNTER; /* 4 */
+ UInt guest_EAX; /* 8 */
UInt guest_ECX;
UInt guest_EDX;
UInt guest_EBX;
UInt guest_ESP;
UInt guest_EBP;
UInt guest_ESI;
- UInt guest_EDI; /* 28 */
+ UInt guest_EDI; /* 36 */
/* 4-word thunk used to calculate O S Z A C P flags. */
- UInt guest_CC_OP; /* 32 */
+ UInt guest_CC_OP; /* 40 */
UInt guest_CC_DEP1;
UInt guest_CC_DEP2;
- UInt guest_CC_NDEP; /* 44 */
+ UInt guest_CC_NDEP; /* 52 */
/* The D flag is stored here, encoded as either -1 or +1 */
- UInt guest_DFLAG; /* 48 */
+ UInt guest_DFLAG; /* 56 */
/* Bit 21 (ID) of eflags stored here, as either 0 or 1. */
- UInt guest_IDFLAG; /* 52 */
+ UInt guest_IDFLAG; /* 60 */
/* Bit 18 (AC) of eflags stored here, as either 0 or 1. */
- UInt guest_ACFLAG; /* 56 */
+ UInt guest_ACFLAG; /* 64 */
/* EIP */
- UInt guest_EIP; /* 60 */
+ UInt guest_EIP; /* 68 */
/* FPU */
- ULong guest_FPREG[8]; /* 64 */
- UChar guest_FPTAG[8]; /* 128 */
- UInt guest_FPROUND; /* 136 */
- UInt guest_FC3210; /* 140 */
- UInt guest_FTOP; /* 144 */
+ ULong guest_FPREG[8]; /* 72 */
+ UChar guest_FPTAG[8]; /* 136 */
+ UInt guest_FPROUND; /* 144 */
+ UInt guest_FC3210; /* 148 */
+ UInt guest_FTOP; /* 152 */
/* SSE */
- UInt guest_SSEROUND; /* 148 */
- U128 guest_XMM0; /* 152 */
+ UInt guest_SSEROUND; /* 156 */
+ U128 guest_XMM0; /* 160 */
U128 guest_XMM1;
U128 guest_XMM2;
U128 guest_XMM3;
/* Padding to make it have an 16-aligned size */
UInt padding1;
- UInt padding2;
- UInt padding3;
}
VexGuestX86State;
guest to restart a syscall that has been interrupted by a signal.
*/
typedef
- enum {
- Ijk_Boring=0x16000, /* not interesting; just goto next */
+ enum {
+ Ijk_INVALID=0x16000,
+ Ijk_Boring, /* not interesting; just goto next */
Ijk_Call, /* guest is doing a call */
Ijk_Ret, /* guest is doing a return */
Ijk_ClientReq, /* do guest client req before continuing */
/* Conditional exit from the middle of an IRSB.
ppIRStmt output: if (<guard>) goto {<jk>} <dst>
eg. if (t69) goto {Boring} 0x4000AAA:I32
+ If <guard> is true, the guest state is also updated by
+ PUT-ing <dst> at <offsIP>. This is done because a
+ taken exit must update the guest program counter.
*/
struct {
IRExpr* guard; /* Conditional expression */
IRJumpKind jk; /* Jump kind */
IRConst* dst; /* Jump target (constant only) */
+ Int offsIP; /* Guest state offset for IP */
} Exit;
} Ist;
}
IRExpr* addr, IRExpr* storedata );
extern IRStmt* IRStmt_Dirty ( IRDirty* details );
extern IRStmt* IRStmt_MBE ( IRMBusEvent event );
-extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
+extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
+ Int offsIP );
+// TEMP HACK
+#define IRStmt_Exit3(__guard,__jk,__dst) IRStmt_Exit(__guard,__jk,__dst,0)
+
/* Deep-copy an IRStmt. */
extern IRStmt* deepCopyIRStmt ( IRStmt* );
executes all the way to the end, without a side exit
- An indication of any special actions (JumpKind) needed
for this final jump.
+ - Offset of the IP field in the guest state. This will be
+ updated before the final jump is done.
"IRSB" stands for "IR Super Block".
*/
Int stmts_used;
IRExpr* next;
IRJumpKind jumpkind;
+ Int offsIP;
}
IRSB;
#define VEX_TRC_JMP_SYS_SYSENTER 79 /* do syscall before continuing */
+#define VEX_TRC_JMP_BORING 95 /* return to sched, but just
+ keep going; no special action */
+
#endif /* ndef __LIBVEX_TRC_VALUES_H */
/*---------------------------------------------------------------*/
vta.do_self_check = False;
vta.traceflags = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
vta.dispatch = NULL;
+ vta.addProfInc = False;
tres = LibVEX_Translate ( &vta );
VexTranslateArgs vta;
if (argc != 2) {
- fprintf(stderr, "usage: vex file.org\n");
+ fprintf(stderr, "usage: vex file.orig\n");
exit(1);
}
f = fopen(argv[1], "r");
vai_ppc32.ppc_cache_line_szB = 128;
LibVEX_default_VexAbiInfo(&vbi);
+ vbi.guest_stack_redzone_size = 128;
/* ----- Set up args for LibVEX_Translate ----- */
+
#if 0 /* ppc32 -> ppc32 */
vta.arch_guest = VexArchPPC32;
vta.archinfo_guest = vai_ppc32;
vta.arch_host = VexArchX86;
vta.archinfo_host = vai_x86;
#endif
+
vta.abiinfo_both = vbi;
vta.guest_bytes = origbuf;
vta.guest_bytes_addr = (Addr64)orig_addr;
vta.host_bytes = transbuf;
vta.host_bytes_size = N_TRANSBUF;
vta.host_bytes_used = &trans_used;
-#if 0 /* no instrumentation */
+
+#if 1 /* no instrumentation */
vta.instrument1 = NULL;
vta.instrument2 = NULL;
#endif
vta.instrument1 = ac_instrument;
vta.instrument2 = NULL;
#endif
-#if 1 /* memcheck */
+#if 0 /* memcheck */
vta.instrument1 = mc_instrument;
vta.instrument2 = NULL;
#endif
vta.needs_self_check = needs_self_check;
vta.preamble_function = NULL;
vta.traceflags = TEST_FLAGS;
-#if 1 /* x86, amd64 hosts */
- vta.dispatch_unassisted = (void*)0x12345678;
- vta.dispatch_assisted = (void*)0x12345678;
-#else /* ppc32, ppc64 hosts */
- vta.dispatch = NULL;
-#endif
+ vta.addProfInc = False;
+
+ vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
+ vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
+ vta.disp_cp_xindir = (void*)0x1234567A;
+ vta.disp_cp_xassisted = (void*)0x1234567B;
vta.finaltidy = NULL;
/* Copy this file (test_main.h.in) to test_main.h, and edit */
/* DEBUG RUN, ON V */
-#if 0
+#if 1
#define TEST_VSUPPORT True
#define TEST_N_ITERS 1
#define TEST_N_BBS 1
-#define TEST_FLAGS (1<<7) /* |(1<<2)|(1<<1) */
+#define TEST_FLAGS (1<<7)|(0<<6)|(1<<3)|(0<<2)|(0<<1)|(0<<0)
#endif
/* CHECKING RUN, ON V */
-#if 1
+#if 0
#define TEST_VSUPPORT True
#define TEST_N_ITERS 1
#define TEST_N_BBS 100000