/*------------------------------------------------------------*/
static
-HChar* sorbTxt ( Prefix pfx )
+HChar* segRegTxt ( Prefix pfx )
{
if (pfx & PFX_CS) return "%cs:";
if (pfx & PFX_DS) return "%ds:";
case 0x00: case 0x01: case 0x02: case 0x03:
/* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
{ UChar rm = toUChar(mod_reg_rm & 7);
- DIS(buf, "%s(%s)", sorbTxt(pfx), nameIRegRexB(8,pfx,rm));
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
*len = 1;
return disAMode_copy2tmp(
handleAddrOverrides(pfx, getIRegRexB(8,pfx,rm)));
{ UChar rm = toUChar(mod_reg_rm & 7);
Long d = getSDisp8(delta);
if (d == 0) {
- DIS(buf, "%s(%s)", sorbTxt(pfx), nameIRegRexB(8,pfx,rm));
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
} else {
- DIS(buf, "%s%lld(%s)", sorbTxt(pfx), d, nameIRegRexB(8,pfx,rm));
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
}
*len = 2;
return disAMode_copy2tmp(
/* ! 14 */ case 0x15: case 0x16: case 0x17:
{ UChar rm = toUChar(mod_reg_rm & 7);
Long d = getSDisp32(delta);
- DIS(buf, "%s%lld(%s)", sorbTxt(pfx), d, nameIRegRexB(8,pfx,rm));
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
*len = 5;
return disAMode_copy2tmp(
handleAddrOverrides(pfx,
case 0x05:
{ Long d = getSDisp32(delta);
*len = 5;
- DIS(buf, "%s%lld(%%rip)", sorbTxt(pfx), d);
+ DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
/* We need to know the next instruction's start address.
Try and figure out what it is, record the guess, and ask
the top-level driver logic (bbToIR_AMD64) to check we
if ((!index_is_SP) && (!base_is_BPor13)) {
if (scale == 0) {
- DIS(buf, "%s(%s,%s)", sorbTxt(pfx),
+ DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r));
} else {
- DIS(buf, "%s(%s,%s,%d)", sorbTxt(pfx),
+ DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r), 1<<scale);
}
if ((!index_is_SP) && base_is_BPor13) {
Long d = getSDisp32(delta);
- DIS(buf, "%s%lld(,%s,%d)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
nameIReg64rexX(pfx,index_r), 1<<scale);
*len = 6;
return
}
if (index_is_SP && (!base_is_BPor13)) {
- DIS(buf, "%s(%s)", sorbTxt(pfx), nameIRegRexB(8,pfx,base_r));
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
*len = 2;
return disAMode_copy2tmp(
handleAddrOverrides(pfx, getIRegRexB(8,pfx,base_r)));
if (index_is_SP && base_is_BPor13) {
Long d = getSDisp32(delta);
- DIS(buf, "%s%lld", sorbTxt(pfx), d);
+ DIS(buf, "%s%lld", segRegTxt(pfx), d);
*len = 6;
return disAMode_copy2tmp(
handleAddrOverrides(pfx, mkU64(d)));
Long d = getSDisp8(delta+1);
if (index_r == R_RSP && 0==getRexX(pfx)) {
- DIS(buf, "%s%lld(%s)", sorbTxt(pfx),
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
d, nameIRegRexB(8,pfx,base_r));
*len = 3;
return disAMode_copy2tmp(
binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
} else {
if (scale == 0) {
- DIS(buf, "%s%lld(%s,%s)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r));
} else {
- DIS(buf, "%s%lld(%s,%s,%d)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r), 1<<scale);
}
Long d = getSDisp32(delta+1);
if (index_r == R_RSP && 0==getRexX(pfx)) {
- DIS(buf, "%s%lld(%s)", sorbTxt(pfx),
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
d, nameIRegRexB(8,pfx,base_r));
*len = 6;
return disAMode_copy2tmp(
binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
} else {
if (scale == 0) {
- DIS(buf, "%s%lld(%s,%s)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r));
} else {
- DIS(buf, "%s%lld(%s,%s,%d)", sorbTxt(pfx), d,
+ DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
nameIRegRexB(8,pfx,base_r),
nameIReg64rexX(pfx,index_r), 1<<scale);
}
}
+/* Helper for deciding whether a given insn (starting at the opcode
+ byte) may validly be used with a LOCK prefix. The following insns
+ may be used with LOCK when their destination operand is in memory.
+ Note, this is slightly too permissive. Oh well. Note also, AFAICS
+ this is exactly the same for both 32-bit and 64-bit mode.
+
+ ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03
+ OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B
+ ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13
+ SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B
+ AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23
+ SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B
+ XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33
+
+ DEC FE /1, FF /1
+ INC FE /0, FF /0
+
+ NEG F6 /3, F7 /3
+ NOT F6 /2, F7 /2
+
+ XCHG 86, 87
+
+ BTC 0F BB, 0F BA /7
+ BTR 0F B3, 0F BA /6
+ BTS 0F AB, 0F BA /5
+
+ CMPXCHG 0F B0, 0F B1
+ CMPXCHG8B 0F C7 /1
+
+ XADD 0F C0, 0F C1
+*/
+static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
+{
+ switch (opc[0]) {
+ case 0x00: case 0x01: case 0x02: case 0x03: return True;
+ case 0x08: case 0x09: case 0x0A: case 0x0B: return True;
+ case 0x10: case 0x11: case 0x12: case 0x13: return True;
+ case 0x18: case 0x19: case 0x1A: case 0x1B: return True;
+ case 0x20: case 0x21: case 0x22: case 0x23: return True;
+ case 0x28: case 0x29: case 0x2A: case 0x2B: return True;
+ case 0x30: case 0x31: case 0x32: case 0x33: return True;
+
+ case 0x80: case 0x81: case 0x83:
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6)
+ return True;
+ break;
+
+ case 0xFE: case 0xFF:
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1)
+ return True;
+ break;
+
+ case 0xF6: case 0xF7:
+ if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3)
+ return True;
+ break;
+
+ case 0x86: case 0x87:
+ return True;
+
+ case 0x0F: {
+ switch (opc[1]) {
+ case 0xBB: case 0xB3: case 0xAB:
+ return True;
+ case 0xBA:
+ if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7)
+ return True;
+ break;
+ case 0xB0: case 0xB1:
+ return True;
+ case 0xC7:
+ if (gregLO3ofRM(opc[2]) == 1)
+ return True;
+ break;
+ case 0xC0: case 0xC1:
+ return True;
+ default:
+ break;
+ } /* switch (opc[1]) */
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (opc[0]) */
+
+ return False;
+}
+
+
/*------------------------------------------------------------*/
/*--- Disassemble a single instruction ---*/
/*------------------------------------------------------------*/
/* pfx holds the summary of prefixes. */
Prefix pfx = PFX_EMPTY;
+ /* do we need follow the insn with MBusEvent(BusUnlock) ? */
+ Bool unlock_bus_after_insn = False;
+
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 0;
/* Kludge re LOCK prefixes. We assume here that all code generated
by Vex is going to be run in a single-threaded context, in other
words that concurrent executions of Vex-generated translations
- will not happen. That is certainly the case for how the
- Valgrind-3.0 code line uses Vex. Given that assumption, it
- seems safe to ignore LOCK prefixes since there will never be any
- other thread running at the same time as this one. However, at
- least emit a memory fence on the basis that it would at least be
- prudent to flush any memory transactions from this thread as far
- as possible down the memory hierarchy. */
+ will not happen. So we don't need to worry too much about
+ preserving atomicity. However, mark the fact that the notional
+ hardware bus lock is being acquired (and, after the insn,
+ released), so that thread checking tools know this is a locked
+ insn.
+
+ We check for, and immediately reject, (most) inappropriate uses
+ of the LOCK prefix. Later (at decode_failure: and
+ decode_success:), if we've added a BusLock event, then we will
+ follow up with a BusUnlock event. How do we know execution will
+ actually ever get to the BusUnlock event? Because
+ can_be_used_with_LOCK_prefix rejects all control-flow changing
+ instructions.
+
+ One loophole, though: if a LOCK prefix insn (seg)faults, then
+ the BusUnlock event will never be reached. This could cause
+ tools which track bus hardware lock to lose track. Really, we
+ should explicitly release the lock after every insn, but that's
+ obviously way too expensive. Really, any tool which tracks the
+ state of the bus lock needs to ask V's core/tool interface to
+ notify it of signal deliveries. On delivery of SIGSEGV to the
+ guest, the tool will be notified, in which case it should
+ release the bus hardware lock if it is held.
+
+ Note, guest-x86/toIR.c contains identical logic.
+ */
if (pfx & PFX_LOCK) {
- /* vex_printf("vex amd64->IR: ignoring LOCK prefix on: ");
- insn_verbose = True; */
- stmt( IRStmt_MFence() );
+ if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
+ stmt( IRStmt_MBE(Imbe_BusLock) );
+ unlock_bus_after_insn = True;
+ DIP("lock ");
+ } else {
+ goto decode_failure;
+ }
}
delta += 3;
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("sfence\n");
goto decode_success;
}
delta += 3;
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m");
goto decode_success;
}
assign( addr, handleAddrOverrides(pfx, mkU64(d64)) );
putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
- sorbTxt(pfx), d64,
+ segRegTxt(pfx), d64,
nameIRegRAX(sz));
break;
assign( addr, handleAddrOverrides(pfx, mkU64(d64)) );
storeLE( mkexpr(addr), getIRegRAX(sz) );
DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
- sorbTxt(pfx), d64);
+ segRegTxt(pfx), d64);
break;
/* XXXX be careful here with moves to AH/BH/CH/DH */
/* ------------------------ XCHG ----------------------- */
+ /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
+ prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock)
+ and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is
+ used with an explicit LOCK prefix, we don't want to end up with
+ two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by
+ the generic LOCK logic at the top of disInstr. */
case 0x86: /* XCHG Gb,Eb */
sz = 1;
/* Fall through ... */
nameISize(sz), nameIRegG(sz, pfx, modrm),
nameIRegE(sz, pfx, modrm));
} else {
+ /* Need to add IRStmt_MBE(Imbe_BusLock). */
+ if (pfx & PFX_LOCK) {
+ /* check it's already been taken care of */
+ vassert(unlock_bus_after_insn);
+ } else {
+ vassert(!unlock_bus_after_insn);
+ stmt( IRStmt_MBE(Imbe_BusLock) );
+ unlock_bus_after_insn = True;
+ }
+ /* Because unlock_bus_after_insn is now True, generic logic
+ at the bottom of disInstr will add the
+ IRStmt_MBE(Imbe_BusUnlock). */
addr = disAMode ( &alen, pfx, delta, dis_buf, 0 );
assign( t1, loadLE(ty, mkexpr(addr)) );
assign( t2, getIRegG(sz, pfx, modrm) );
stmt( IRStmt_Dirty(d) );
/* CPUID is a serialising insn. So, just in case someone is
using it as a memory fence ... */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("cpuid\n");
break;
}
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
+ if (unlock_bus_after_insn)
+ stmt( IRStmt_MBE(Imbe_BusUnlock) );
jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
dres.whatNext = Dis_StopHere;
dres.len = 0;
decode_success:
/* All decode successes end up here. */
DIP("\n");
+ if (unlock_bus_after_insn)
+ stmt( IRStmt_MBE(Imbe_BusUnlock) );
dres.len = (Int)toUInt(delta - delta_start);
return dres;
}
return False;
}
DIP("isync\n");
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
break;
/* X-Form */
}
DIP("eieio\n");
/* Insert a memory fence, just to be on the safe side. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
break;
case 0x014: // lwarx (Load Word and Reserve Indexed, PPC32 p458)
DIP("%ssync\n", flag_L == 1 ? "lw" : "");
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
break;
/* 64bit Memsync */
putGST( PPC_GST_TILEN, mkSzImm(ty, lineszB) );
/* be paranoid ... */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
irsb->jumpkind = Ijk_TInval;
irsb->next = mkSzImm(ty, nextInsnAddr());
}
+/* Helper for deciding whether a given insn (starting at the opcode
+ byte) may validly be used with a LOCK prefix. The following insns
+ may be used with LOCK when their destination operand is in memory.
+ Note, this is slightly too permissive. Oh well. Note also, AFAICS
+ this is exactly the same for both 32-bit and 64-bit mode.
+
+ ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03
+ OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B
+ ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13
+ SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B
+ AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23
+ SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B
+ XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33
+
+ DEC FE /1, FF /1
+ INC FE /0, FF /0
+
+ NEG F6 /3, F7 /3
+ NOT F6 /2, F7 /2
+
+ XCHG 86, 87
+
+ BTC 0F BB, 0F BA /7
+ BTR 0F B3, 0F BA /6
+ BTS 0F AB, 0F BA /5
+
+ CMPXCHG 0F B0, 0F B1
+ CMPXCHG8B 0F C7 /1
+
+ XADD 0F C0, 0F C1
+*/
+static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
+{
+ switch (opc[0]) {
+ case 0x00: case 0x01: case 0x02: case 0x03: return True;
+ case 0x08: case 0x09: case 0x0A: case 0x0B: return True;
+ case 0x10: case 0x11: case 0x12: case 0x13: return True;
+ case 0x18: case 0x19: case 0x1A: case 0x1B: return True;
+ case 0x20: case 0x21: case 0x22: case 0x23: return True;
+ case 0x28: case 0x29: case 0x2A: case 0x2B: return True;
+ case 0x30: case 0x31: case 0x32: case 0x33: return True;
+
+ case 0x80: case 0x81: case 0x83:
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6)
+ return True;
+ break;
+
+ case 0xFE: case 0xFF:
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1)
+ return True;
+ break;
+
+ case 0xF6: case 0xF7:
+ if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3)
+ return True;
+ break;
+
+ case 0x86: case 0x87:
+ return True;
+
+ case 0x0F: {
+ switch (opc[1]) {
+ case 0xBB: case 0xB3: case 0xAB:
+ return True;
+ case 0xBA:
+ if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7)
+ return True;
+ break;
+ case 0xB0: case 0xB1:
+ return True;
+ case 0xC7:
+ if (gregOfRM(opc[2]) == 1)
+ return True;
+ break;
+ case 0xC0: case 0xC1:
+ return True;
+ default:
+ break;
+ } /* switch (opc[1]) */
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (opc[0]) */
+
+ return False;
+}
+
+
/*------------------------------------------------------------*/
/*--- Disassemble a single instruction ---*/
/*------------------------------------------------------------*/
IRType ty;
IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
Int alen;
- UChar opc, modrm, abyte;
+ UChar opc, modrm, abyte, pre;
UInt d32;
HChar dis_buf[50];
- Int am_sz, d_sz;
+ Int am_sz, d_sz, n_prefixes;
DisResult dres;
UChar* insn; /* used in SSE decoders */
indicating the prefix. */
UChar sorb = 0;
+ /* Gets set to True if a LOCK prefix is seen. */
+ Bool pfx_lock = False;
+
+ /* do we need follow the insn with MBusEvent(BusUnlock) ? */
+ Bool unlock_bus_after_insn = False;
+
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 0;
}
}
- /* Deal with prefixes. */
- /* Skip a LOCK prefix. */
- /* 2005 Jan 06: the following insns are observed to sometimes
- have a LOCK prefix:
- cmpxchgl %ecx,(%edx)
- cmpxchgl %edx,0x278(%ebx) etc
- xchgl %eax, (%ecx)
- xaddl %eax, (%ecx)
- We need to catch any such which appear to be being used as
- a memory barrier, for example lock addl $0,0(%esp)
- and emit an IR MFence construct.
- */
- if (getIByte(delta) == 0xF0) {
-
+ /* Handle a couple of weird-ass NOPs that have been observed in the
+ wild. */
+ {
UChar* code = (UChar*)(guest_code + delta);
-
- /* Various bits of kernel headers use the following as a memory
- barrier. Hence, first emit an MFence and then let the insn
- go through as usual. */
- /* F08344240000: lock addl $0, 0(%esp) */
- if (code[0] == 0xF0 && code[1] == 0x83 && code[2] == 0x44 &&
- code[3] == 0x24 && code[4] == 0x00 && code[5] == 0x00) {
- stmt( IRStmt_MFence() );
+ /* Sun's JVM 1.5.0 uses the following as a NOP:
+ 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
+ if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
+ && code[3] == 0x65 && code[4] == 0x90) {
+ DIP("%%es:%%cs:%%fs:%%gs:nop\n");
+ delta += 5;
+ goto decode_success;
}
- else
- if (0) {
- vex_printf("vex x86->IR: ignoring LOCK prefix on: ");
- /* insn_verbose = True; */
+ /* don't barf on recent binutils padding
+ 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%eax,%eax,1) */
+ if (code[0] == 0x66
+ && code[1] == 0x2E && code[2] == 0x0F && code[3] == 0x1F
+ && code[4] == 0x84 && code[5] == 0x00 && code[6] == 0x00
+ && code[7] == 0x00 && code[8] == 0x00 && code[9] == 0x00 ) {
+ DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
+ delta += 10;
+ goto decode_success;
}
+ }
- /* In any case, skip the prefix. */
- delta++;
- }
+ /* Normal instruction handling starts here. */
- /* Detect operand-size overrides. It is possible for more than one
- 0x66 to appear. */
- while (getIByte(delta) == 0x66) { sz = 2; delta++; };
-
- /* segment override prefixes come after the operand-size override,
- it seems */
- switch (getIByte(delta)) {
- case 0x3E: /* %DS: */
- case 0x26: /* %ES: */
- /* Sun's JVM 1.5.0 uses the following as a NOP:
- 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
- {
- UChar* code = (UChar*)(guest_code + delta);
- if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
- && code[3] == 0x65 && code[4] == 0x90) {
- DIP("%%es:%%cs:%%fs:%%gs:nop\n");
- delta += 5;
- goto decode_success;
- }
- /* else fall through */
- }
- case 0x64: /* %FS: */
- case 0x65: /* %GS: */
- sorb = getIByte(delta); delta++;
- break;
- case 0x2E: /* %CS: */
- /* 2E prefix on a conditional branch instruction is a
- branch-prediction hint, which can safely be ignored. */
- {
+ /* Deal with some but not all prefixes:
+ 66(oso)
+ F0(lock)
+ 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
+ Not dealt with (left in place):
+ F2 F3
+ */
+ n_prefixes = 0;
+ while (True) {
+ if (n_prefixes > 7) goto decode_failure;
+ pre = getUChar(delta);
+ switch (pre) {
+ case 0x66:
+ sz = 2;
+ break;
+ case 0xF0:
+ pfx_lock = True;
+ break;
+ case 0x3E: /* %DS: */
+ case 0x26: /* %ES: */
+ case 0x64: /* %FS: */
+ case 0x65: /* %GS: */
+ if (sorb != 0)
+ goto decode_failure; /* only one seg override allowed */
+ sorb = pre;
+ break;
+ case 0x2E: { /* %CS: */
+ /* 2E prefix on a conditional branch instruction is a
+ branch-prediction hint, which can safely be ignored. */
UChar op1 = getIByte(delta+1);
UChar op2 = getIByte(delta+2);
if ((op1 >= 0x70 && op1 <= 0x7F)
|| (op1 == 0xE3)
|| (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
- sorb = getIByte(delta); delta++;
- break;
- }
- }
- /* don't barf on recent binutils padding
- 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%eax,%eax,1)
- */
- {
- UChar* code = (UChar*)(guest_code + delta);
- if (sz == 2
- && code[-1] == 0x66
- && code[0] == 0x2E && code[1] == 0x0F && code[2] == 0x1F
- && code[3] == 0x84 && code[4] == 0x00 && code[5] == 0x00
- && code[6] == 0x00 && code[7] == 0x00 && code[8] == 0x00 ) {
- DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
- delta += 9;
- goto decode_success;
+ } else {
+ /* All other CS override cases are not handled */
+ goto decode_failure;
}
+ break;
}
- /* All other CS override cases are not handled */
- goto decode_failure;
- case 0x36: /* %SS: */
- /* SS override cases are not handled */
+ case 0x36: /* %SS: */
+ /* SS override cases are not handled */
+ goto decode_failure;
+ default:
+ goto not_a_prefix;
+ }
+ n_prefixes++;
+ delta++;
+ }
+
+ not_a_prefix:
+
+ /* Now we should be looking at the primary opcode byte or the
+ leading F2 or F3. Check that any LOCK prefix is actually
+ allowed. */
+
+ /* Kludge re LOCK prefixes. We assume here that all code generated
+ by Vex is going to be run in a single-threaded context, in other
+ words that concurrent executions of Vex-generated translations
+ will not happen. So we don't need to worry too much about
+ preserving atomicity. However, mark the fact that the notional
+ hardware bus lock is being acquired (and, after the insn,
+ released), so that thread checking tools know this is a locked
+ insn.
+
+ We check for, and immediately reject, (most) inappropriate uses
+ of the LOCK prefix. Later (at decode_failure: and
+ decode_success:), if we've added a BusLock event, then we will
+ follow up with a BusUnlock event. How do we know execution will
+ actually ever get to the BusUnlock event? Because
+ can_be_used_with_LOCK_prefix rejects all control-flow changing
+ instructions.
+
+ One loophole, though: if a LOCK prefix insn (seg)faults, then
+ the BusUnlock event will never be reached. This could cause
+ tools which track bus hardware lock to lose track. Really, we
+ should explicitly release the lock after every insn, but that's
+ obviously way too expensive. Really, any tool which tracks the
+ state of the bus lock needs to ask V's core/tool interface to
+ notify it of signal deliveries. On delivery of SIGSEGV to the
+ guest, the tool will be notified, in which case it should
+ release the bus hardware lock if it is held.
+
+ Note, guest-amd64/toIR.c contains identical logic.
+ */
+ if (pfx_lock) {
+ if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
+ stmt( IRStmt_MBE(Imbe_BusLock) );
+ unlock_bus_after_insn = True;
+ DIP("lock ");
+ } else {
goto decode_failure;
- default:
- break;
+ }
}
+
/* ---------------------------------------------------- */
/* --- The SSE decoder. --- */
/* ---------------------------------------------------- */
delta += 3;
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("sfence\n");
goto decode_success;
}
delta += 3;
/* Insert a memory fence. It's sometimes important that these
are carried through to the generated code. */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
goto decode_success;
}
/* ------------------------ XCHG ----------------------- */
+ /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
+ prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock)
+ and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is
+ used with an explicit LOCK prefix, we don't want to end up with
+ two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by
+ the generic LOCK logic at the top of disInstr. */
case 0x86: /* XCHG Gb,Eb */
sz = 1;
/* Fall through ... */
nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
nameIReg(sz,eregOfRM(modrm)));
} else {
+ /* Need to add IRStmt_MBE(Imbe_BusLock). */
+ if (pfx_lock) {
+ /* check it's already been taken care of */
+ vassert(unlock_bus_after_insn);
+ } else {
+ vassert(!unlock_bus_after_insn);
+ stmt( IRStmt_MBE(Imbe_BusLock) );
+ unlock_bus_after_insn = True;
+ }
+ /* Because unlock_bus_after_insn is now True, generic logic
+ at the bottom of disInstr will add the
+ IRStmt_MBE(Imbe_BusUnlock). */
addr = disAMode ( &alen, sorb, delta, dis_buf );
assign( t1, loadLE(ty,mkexpr(addr)) );
assign( t2, getIReg(sz,gregOfRM(modrm)) );
stmt( IRStmt_Dirty(d) );
/* CPUID is a serialising insn. So, just in case someone is
using it as a memory fence ... */
- stmt( IRStmt_MFence() );
+ stmt( IRStmt_MBE(Imbe_Fence) );
DIP("cpuid\n");
break;
}
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
+ if (unlock_bus_after_insn)
+ stmt( IRStmt_MBE(Imbe_BusUnlock) );
jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
dres.whatNext = Dis_StopHere;
dres.len = 0;
decode_success:
/* All decode successes end up here. */
DIP("\n");
-
+ if (unlock_bus_after_insn)
+ stmt( IRStmt_MBE(Imbe_BusUnlock) );
dres.len = delta - delta_start;
return dres;
}
}
/* --------- MEM FENCE --------- */
- case Ist_MFence:
- addInstr(env, AMD64Instr_MFence());
- return;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, AMD64Instr_MFence());
+ return;
+ case Imbe_BusLock:
+ case Imbe_BusUnlock:
+ return;
+ default:
+ break;
+ }
+ break;
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
}
/* --------- MEM FENCE --------- */
- case Ist_MFence:
- addInstr(env, PPCInstr_MFence());
- return;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, PPCInstr_MFence());
+ return;
+ case Imbe_BusLock:
+ case Imbe_BusUnlock:
+ return;
+ default:
+ break;
+ }
+ break;
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
}
/* --------- MEM FENCE --------- */
- case Ist_MFence:
- addInstr(env, X86Instr_MFence(env->hwcaps));
- return;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, X86Instr_MFence(env->hwcaps));
+ return;
+ case Imbe_BusLock:
+ case Imbe_BusUnlock:
+ return;
+ default:
+ break;
+ }
+ break;
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
}
}
+void ppIRMBusEvent ( IRMBusEvent event )
+{
+ switch (event) {
+ case Imbe_Fence: vex_printf("Fence"); break;
+ case Imbe_BusLock: vex_printf("BusLock"); break;
+ case Imbe_BusUnlock: vex_printf("BusUnlock"); break;
+ default: vpanic("ppIRMBusEvent");
+ }
+}
+
void ppIRStmt ( IRStmt* s )
{
if (!s) {
case Ist_Dirty:
ppIRDirty(s->Ist.Dirty.details);
break;
- case Ist_MFence:
- vex_printf("IR-MFence");
+ case Ist_MBE:
+ vex_printf("IR-");
+ ppIRMBusEvent(s->Ist.MBE.event);
break;
case Ist_Exit:
vex_printf( "if (" );
s->Ist.Dirty.details = d;
return s;
}
-IRStmt* IRStmt_MFence ( void )
+IRStmt* IRStmt_MBE ( IRMBusEvent event )
{
- /* Just use a single static closure. */
- static IRStmt static_closure;
- static_closure.tag = Ist_MFence;
- return &static_closure;
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_MBE;
+ s->Ist.MBE.event = event;
+ return s;
}
IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ) {
IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
deepCopyIRExpr(s->Ist.Store.data));
case Ist_Dirty:
return IRStmt_Dirty(deepCopyIRDirty(s->Ist.Dirty.details));
- case Ist_MFence:
- return IRStmt_MFence();
+ case Ist_MBE:
+ return IRStmt_MBE(s->Ist.MBE.event);
case Ist_Exit:
return IRStmt_Exit(deepCopyIRExpr(s->Ist.Exit.guard),
s->Ist.Exit.jk,
return True;
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
return True;
case Ist_Exit:
return isIRAtom(st->Ist.Exit.guard);
useBeforeDef_Expr(bb,stmt,d->mAddr,def_counts);
break;
case Ist_NoOp:
- case Ist_MFence:
+ case Ist_MBE:
break;
case Ist_Exit:
useBeforeDef_Expr(bb,stmt,stmt->Ist.Exit.guard,def_counts);
bad_dirty:
sanityCheckFail(bb,stmt,"IRStmt.Dirty: ill-formed");
case Ist_NoOp:
- case Ist_MFence:
+ break;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence: case Imbe_BusLock: case Imbe_BusUnlock:
+ break;
+ default: sanityCheckFail(bb,stmt,"IRStmt.MBE.event: unknown");
+ break;
+ }
break;
case Ist_Exit:
tcExpr( bb, stmt, stmt->Ist.Exit.guard, gWordTy );
addStmtToIRSB(bb, IRStmt_Dirty(d2));
break;
case Ist_NoOp:
- case Ist_MFence:
+ case Ist_MBE:
case Ist_IMark:
addStmtToIRSB(bb, st);
break;
crude solution is just to flush everything; we could easily
enough do a lot better if needed. */
/* Probably also overly-conservative, but also dump everything
- if we hit a memory fence. Ditto AbiHints.*/
+ if we hit a memory bus event (fence, lock, unlock). Ditto
+ AbiHints.*/
case Ist_AbiHint:
vassert(isIRAtom(st->Ist.AbiHint.base));
/* fall through */
- case Ist_MFence:
+ case Ist_MBE:
case Ist_Dirty:
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
case Ist_NoOp:
return IRStmt_NoOp();
- case Ist_MFence:
- return IRStmt_MFence();
+ case Ist_MBE:
+ return IRStmt_MBE(st->Ist.MBE.event);
case Ist_Exit: {
IRExpr* fcond;
return;
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
return;
case Ist_Exit:
addUses_Expr(set, st->Ist.Exit.guard);
/* ------ BEGIN invalidate aenv bindings ------ */
/* This is critical: remove from aenv any E' -> .. bindings
which might be invalidated by this statement. The only
- vulnerable kind of bindings are the GetIt kind.
+ vulnerable kind of bindings are the GetI kind.
Dirty call - dump (paranoia level -> 2)
Store - dump (ditto)
Put, PutI - dump unless no-overlap is proven (.. -> 1)
to do the no-overlap assessments needed for Put/PutI.
*/
switch (st->tag) {
- case Ist_Dirty: case Ist_Store:
+ case Ist_Dirty: case Ist_Store: case Ist_MBE:
paranoia = 2; break;
case Ist_Put: case Ist_PutI:
paranoia = 1; break;
case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
- case Ist_WrTmp: case Ist_MFence: case Ist_Exit:
+ case Ist_WrTmp: case Ist_Exit:
paranoia = 0; break;
default:
vpanic("do_cse_BB(1)");
case Ist_IMark:
return False;
- case Ist_MFence:
+ case Ist_MBE:
case Ist_AbiHint:
/* just be paranoid ... these should be rare. */
return True;
switch (st->tag) {
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
break;
case Ist_AbiHint:
deltaIRExpr(st->Ist.AbiHint.base, delta);
return;
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
return;
case Ist_Exit:
aoccCount_Expr(uses, st->Ist.Exit.guard);
return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
case Ist_NoOp:
return IRStmt_NoOp();
- case Ist_MFence:
- return IRStmt_MFence();
+ case Ist_MBE:
+ return IRStmt_MBE(st->Ist.MBE.event);
case Ist_Dirty:
d = st->Ist.Dirty.details;
d2 = emptyIRDirty();
question is marked as requiring precise
exceptions. */
|| (env[k].doesLoad && stmtPuts)
- /* probably overly conservative: a memory fence
+ /* probably overly conservative: a memory bus event
invalidates absolutely everything, so that all
computation prior to it is forced to complete before
- proceeding with the fence. */
- || st->tag == Ist_MFence
+ proceeding with the event (fence,lock,unlock). */
+ || st->tag == Ist_MBE
/* also be (probably overly) paranoid re AbiHints */
|| st->tag == Ist_AbiHint
);
break;
case Ist_NoOp:
case Ist_IMark:
- case Ist_MFence:
+ case Ist_MBE:
break;
case Ist_Exit:
vassert(isIRAtom(st->Ist.Exit.guard));
float, or a vector (SIMD) value. */
typedef
enum {
- Ity_INVALID=0x10FFF,
- Ity_I1=0x11000,
+ Ity_INVALID=0x11000,
+ Ity_I1,
Ity_I8,
Ity_I16,
Ity_I32,
/* IREndness is used in load IRExprs and store IRStmts. */
typedef
enum {
- Iend_LE=22, /* little endian */
- Iend_BE=33 /* big endian */
+ Iend_LE=0x12000, /* little endian */
+ Iend_BE /* big endian */
}
IREndness;
/* The various kinds of constant. */
typedef
enum {
- Ico_U1=0x12000,
+ Ico_U1=0x13000,
Ico_U8,
Ico_U16,
Ico_U32,
/* -- Do not change this ordering. The IR generators rely on
(eg) Iop_Add64 == IopAdd8 + 3. -- */
- Iop_INVALID=0x13000,
+ Iop_INVALID=0x14000,
Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64,
Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64,
/* Signless mul. MullS/MullU is elsewhere. */
in the comments for IRExpr. */
typedef
enum {
- Iex_Binder,
+ Iex_Binder=0x15000,
Iex_Get,
Iex_GetI,
Iex_RdTmp,
*/
typedef
enum {
- Ijk_Boring=0x14000, /* not interesting; just goto next */
+ Ijk_Boring=0x16000, /* not interesting; just goto next */
Ijk_Call, /* guest is doing a call */
Ijk_Ret, /* guest is doing a return */
Ijk_ClientReq, /* do guest client req before continuing */
/* Effects on resources (eg. registers, memory locations) */
typedef
enum {
- Ifx_None = 0x15000, /* no effect */
+ Ifx_None = 0x17000, /* no effect */
Ifx_Read, /* reads the resource */
Ifx_Write, /* writes the resource */
Ifx_Modify, /* modifies the resource */
IRExpr** args );
+/* --------------- Memory Bus Events --------------- */
+
+typedef
+ enum {
+ Imbe_Fence=0x18000,
+ Imbe_BusLock,
+ Imbe_BusUnlock
+ }
+ IRMBusEvent;
+
+extern void ppIRMBusEvent ( IRMBusEvent );
+
+
/* ------------------ Statements ------------------ */
/* The different kinds of statements. Their meaning is explained
they are required by some IR consumers such as tools that
instrument the code.
*/
+
typedef
enum {
- Ist_NoOp,
+ Ist_NoOp=0x19000,
Ist_IMark, /* META */
Ist_AbiHint, /* META */
Ist_Put,
Ist_WrTmp,
Ist_Store,
Ist_Dirty,
- Ist_MFence,
+ Ist_MBE, /* META (maybe) */
Ist_Exit
}
IRStmtTag;
IRDirty* details;
} Dirty;
- /* A memory fence.
- ppIRExpr output: IR-MFence
+ /* A memory bus event - a fence, or acquisition/release of the
+ hardware bus lock. IR optimisation treats all these as fences
+ across which no memory references may be moved.
+ ppIRExpr output: MBusEvent-Fence,
+ MBusEvent-BusLock, MBusEvent-BusUnlock.
*/
struct {
- } MFence;
+ IRMBusEvent event;
+ } MBE;
/* Conditional exit from the middle of an IRSB.
ppIRExpr output: if (<guard>) goto {<jk>} <dst>
extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data );
extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data );
extern IRStmt* IRStmt_Dirty ( IRDirty* details );
-extern IRStmt* IRStmt_MFence ( void );
+extern IRStmt* IRStmt_MBE ( IRMBusEvent event );
extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
/* Deep-copy an IRStmt. */
/* FIXME: put sensible values into the .hwcaps fields */
LibVEX_default_VexArchInfo(&vai_x86);
- vai_x86.hwcaps = 0;
+ vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
+ | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
LibVEX_default_VexArchInfo(&vai_amd64);
vai_amd64.hwcaps = 0;
LibVEX_default_VexAbiInfo(&vbi);
/* ----- Set up args for LibVEX_Translate ----- */
-#if 1 /* ppc32 -> ppc32 */
+#if 0 /* ppc32 -> ppc32 */
vta.arch_guest = VexArchPPC32;
vta.archinfo_guest = vai_ppc32;
vta.arch_host = VexArchPPC32;
vta.arch_host = VexArchAMD64;
vta.archinfo_host = vai_amd64;
#endif
-#if 0 /* x86 -> x86 */
+#if 1 /* x86 -> x86 */
vta.arch_guest = VexArchX86;
vta.archinfo_guest = vai_x86;
vta.arch_host = VexArchX86;
vta.dispatch = NULL;
#endif
+ vta.finaltidy = NULL;
+
for (i = 0; i < TEST_N_ITERS; i++)
tres = LibVEX_Translate ( &vta );