From: Julian Seward Date: Fri, 9 Nov 2007 21:15:04 +0000 (+0000) Subject: Merge changes from THRCHECK branch r1787. These changes are all to do X-Git-Tag: svn/VALGRIND_3_3_1^2~24 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=31735216ba899790ded9e720929b7c0363c0b92a;p=thirdparty%2Fvalgrind.git Merge changes from THRCHECK branch r1787. These changes are all to do with making x86/amd64 LOCK prefixes properly visible in the IR, since threading tools need to see them. Probably would be no bad thing for cachegrind/callgrind to notice them too, since asserting a bus lock on a multiprocessor is an expensive event that programmers might like to know about. * amd64 front end: handle LOCK prefixes a lot more accurately * x86 front end: ditto, and also a significant cleanup of prefix handling, which was a mess * To represent prefixes, remove the IR 'Ist_MFence' construction and replace it with something more general: an IR Memory Bus Event statement (Ist_MBE), which can represent lock acquisition, lock release, and memory fences. * Fix up all front ends and back ends to respectively generate and handle Ist_MBE. Fix up the middle end (iropt) to deal with them. git-svn-id: svn://svn.valgrind.org/vex/trunk@1793 --- diff --git a/VEX/priv/guest-amd64/toIR.c b/VEX/priv/guest-amd64/toIR.c index c467d4c1cf..67a2b321fc 100644 --- a/VEX/priv/guest-amd64/toIR.c +++ b/VEX/priv/guest-amd64/toIR.c @@ -1963,7 +1963,7 @@ void make_redzone_AbiHint ( VexAbiInfo* vbi, IRTemp new_rsp, HChar* who ) /*------------------------------------------------------------*/ static -HChar* sorbTxt ( Prefix pfx ) +HChar* segRegTxt ( Prefix pfx ) { if (pfx & PFX_CS) return "%cs:"; if (pfx & PFX_DS) return "%ds:"; @@ -2115,7 +2115,7 @@ IRTemp disAMode ( Int* len, Prefix pfx, Long delta, case 0x00: case 0x01: case 0x02: case 0x03: /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: { UChar rm = toUChar(mod_reg_rm & 7); - DIS(buf, "%s(%s)", sorbTxt(pfx), nameIRegRexB(8,pfx,rm)); + DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); *len = 1; return disAMode_copy2tmp( handleAddrOverrides(pfx, getIRegRexB(8,pfx,rm))); @@ -2129,9 +2129,9 @@ IRTemp disAMode ( Int* len, Prefix pfx, Long delta, { UChar rm = toUChar(mod_reg_rm & 7); Long d = getSDisp8(delta); if (d == 0) { - DIS(buf, "%s(%s)", sorbTxt(pfx), nameIRegRexB(8,pfx,rm)); + DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); } else { - DIS(buf, "%s%lld(%s)", sorbTxt(pfx), d, nameIRegRexB(8,pfx,rm)); + DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); } *len = 2; return disAMode_copy2tmp( @@ -2146,7 +2146,7 @@ IRTemp disAMode ( Int* len, Prefix pfx, Long delta, /* ! 14 */ case 0x15: case 0x16: case 0x17: { UChar rm = toUChar(mod_reg_rm & 7); Long d = getSDisp32(delta); - DIS(buf, "%s%lld(%s)", sorbTxt(pfx), d, nameIRegRexB(8,pfx,rm)); + DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); *len = 5; return disAMode_copy2tmp( handleAddrOverrides(pfx, @@ -2164,7 +2164,7 @@ IRTemp disAMode ( Int* len, Prefix pfx, Long delta, case 0x05: { Long d = getSDisp32(delta); *len = 5; - DIS(buf, "%s%lld(%%rip)", sorbTxt(pfx), d); + DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); /* We need to know the next instruction's start address. Try and figure out what it is, record the guess, and ask the top-level driver logic (bbToIR_AMD64) to check we @@ -2207,11 +2207,11 @@ IRTemp disAMode ( Int* len, Prefix pfx, Long delta, if ((!index_is_SP) && (!base_is_BPor13)) { if (scale == 0) { - DIS(buf, "%s(%s,%s)", sorbTxt(pfx), + DIS(buf, "%s(%s,%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r), nameIReg64rexX(pfx,index_r)); } else { - DIS(buf, "%s(%s,%s,%d)", sorbTxt(pfx), + DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r), nameIReg64rexX(pfx,index_r), 1<= 0 && gregLO3ofRM(opc[1]) <= 6) + return True; + break; + + case 0xFE: case 0xFF: + if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1) + return True; + break; + + case 0xF6: case 0xF7: + if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3) + return True; + break; + + case 0x86: case 0x87: + return True; + + case 0x0F: { + switch (opc[1]) { + case 0xBB: case 0xB3: case 0xAB: + return True; + case 0xBA: + if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7) + return True; + break; + case 0xB0: case 0xB1: + return True; + case 0xC7: + if (gregLO3ofRM(opc[2]) == 1) + return True; + break; + case 0xC0: case 0xC1: + return True; + default: + break; + } /* switch (opc[1]) */ + break; + } + + default: + break; + } /* switch (opc[0]) */ + + return False; +} + + /*------------------------------------------------------------*/ /*--- Disassemble a single instruction ---*/ /*------------------------------------------------------------*/ @@ -8341,6 +8431,9 @@ DisResult disInstr_AMD64_WRK ( /* pfx holds the summary of prefixes. */ Prefix pfx = PFX_EMPTY; + /* do we need follow the insn with MBusEvent(BusUnlock) ? */ + Bool unlock_bus_after_insn = False; + /* Set result defaults. */ dres.whatNext = Dis_Continue; dres.len = 0; @@ -8477,17 +8570,40 @@ DisResult disInstr_AMD64_WRK ( /* Kludge re LOCK prefixes. We assume here that all code generated by Vex is going to be run in a single-threaded context, in other words that concurrent executions of Vex-generated translations - will not happen. That is certainly the case for how the - Valgrind-3.0 code line uses Vex. Given that assumption, it - seems safe to ignore LOCK prefixes since there will never be any - other thread running at the same time as this one. However, at - least emit a memory fence on the basis that it would at least be - prudent to flush any memory transactions from this thread as far - as possible down the memory hierarchy. */ + will not happen. So we don't need to worry too much about + preserving atomicity. However, mark the fact that the notional + hardware bus lock is being acquired (and, after the insn, + released), so that thread checking tools know this is a locked + insn. + + We check for, and immediately reject, (most) inappropriate uses + of the LOCK prefix. Later (at decode_failure: and + decode_success:), if we've added a BusLock event, then we will + follow up with a BusUnlock event. How do we know execution will + actually ever get to the BusUnlock event? Because + can_be_used_with_LOCK_prefix rejects all control-flow changing + instructions. + + One loophole, though: if a LOCK prefix insn (seg)faults, then + the BusUnlock event will never be reached. This could cause + tools which track bus hardware lock to lose track. Really, we + should explicitly release the lock after every insn, but that's + obviously way too expensive. Really, any tool which tracks the + state of the bus lock needs to ask V's core/tool interface to + notify it of signal deliveries. On delivery of SIGSEGV to the + guest, the tool will be notified, in which case it should + release the bus hardware lock if it is held. + + Note, guest-x86/toIR.c contains identical logic. + */ if (pfx & PFX_LOCK) { - /* vex_printf("vex amd64->IR: ignoring LOCK prefix on: "); - insn_verbose = True; */ - stmt( IRStmt_MFence() ); + if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { + stmt( IRStmt_MBE(Imbe_BusLock) ); + unlock_bus_after_insn = True; + DIP("lock "); + } else { + goto decode_failure; + } } @@ -9557,7 +9673,7 @@ DisResult disInstr_AMD64_WRK ( delta += 3; /* Insert a memory fence. It's sometimes important that these are carried through to the generated code. */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); DIP("sfence\n"); goto decode_success; } @@ -10336,7 +10452,7 @@ DisResult disInstr_AMD64_WRK ( delta += 3; /* Insert a memory fence. It's sometimes important that these are carried through to the generated code. */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m"); goto decode_success; } @@ -12796,7 +12912,7 @@ DisResult disInstr_AMD64_WRK ( assign( addr, handleAddrOverrides(pfx, mkU64(d64)) ); putIRegRAX(sz, loadLE( ty, mkexpr(addr) )); DIP("mov%c %s0x%llx, %s\n", nameISize(sz), - sorbTxt(pfx), d64, + segRegTxt(pfx), d64, nameIRegRAX(sz)); break; @@ -12814,7 +12930,7 @@ DisResult disInstr_AMD64_WRK ( assign( addr, handleAddrOverrides(pfx, mkU64(d64)) ); storeLE( mkexpr(addr), getIRegRAX(sz) ); DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz), - sorbTxt(pfx), d64); + segRegTxt(pfx), d64); break; /* XXXX be careful here with moves to AH/BH/CH/DH */ @@ -13644,6 +13760,12 @@ DisResult disInstr_AMD64_WRK ( /* ------------------------ XCHG ----------------------- */ + /* XCHG reg,mem automatically asserts LOCK# even without a LOCK + prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock) + and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is + used with an explicit LOCK prefix, we don't want to end up with + two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by + the generic LOCK logic at the top of disInstr. */ case 0x86: /* XCHG Gb,Eb */ sz = 1; /* Fall through ... */ @@ -13662,6 +13784,18 @@ DisResult disInstr_AMD64_WRK ( nameISize(sz), nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); } else { + /* Need to add IRStmt_MBE(Imbe_BusLock). */ + if (pfx & PFX_LOCK) { + /* check it's already been taken care of */ + vassert(unlock_bus_after_insn); + } else { + vassert(!unlock_bus_after_insn); + stmt( IRStmt_MBE(Imbe_BusLock) ); + unlock_bus_after_insn = True; + } + /* Because unlock_bus_after_insn is now True, generic logic + at the bottom of disInstr will add the + IRStmt_MBE(Imbe_BusUnlock). */ addr = disAMode ( &alen, pfx, delta, dis_buf, 0 ); assign( t1, loadLE(ty, mkexpr(addr)) ); assign( t2, getIRegG(sz, pfx, modrm) ); @@ -14169,7 +14303,7 @@ DisResult disInstr_AMD64_WRK ( stmt( IRStmt_Dirty(d) ); /* CPUID is a serialising insn. So, just in case someone is using it as a memory fence ... */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); DIP("cpuid\n"); break; } @@ -14533,6 +14667,8 @@ DisResult disInstr_AMD64_WRK ( insn, but nevertheless be paranoid and update it again right now. */ stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); + if (unlock_bus_after_insn) + stmt( IRStmt_MBE(Imbe_BusUnlock) ); jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr); dres.whatNext = Dis_StopHere; dres.len = 0; @@ -14543,6 +14679,8 @@ DisResult disInstr_AMD64_WRK ( decode_success: /* All decode successes end up here. */ DIP("\n"); + if (unlock_bus_after_insn) + stmt( IRStmt_MBE(Imbe_BusUnlock) ); dres.len = (Int)toUInt(delta - delta_start); return dres; } diff --git a/VEX/priv/guest-ppc/toIR.c b/VEX/priv/guest-ppc/toIR.c index 35431d3293..858833022d 100644 --- a/VEX/priv/guest-ppc/toIR.c +++ b/VEX/priv/guest-ppc/toIR.c @@ -4816,7 +4816,7 @@ static Bool dis_memsync ( UInt theInstr ) return False; } DIP("isync\n"); - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); break; /* X-Form */ @@ -4829,7 +4829,7 @@ static Bool dis_memsync ( UInt theInstr ) } DIP("eieio\n"); /* Insert a memory fence, just to be on the safe side. */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); break; case 0x014: // lwarx (Load Word and Reserve Indexed, PPC32 p458) @@ -4918,7 +4918,7 @@ static Bool dis_memsync ( UInt theInstr ) DIP("%ssync\n", flag_L == 1 ? "lw" : ""); /* Insert a memory fence. It's sometimes important that these are carried through to the generated code. */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); break; /* 64bit Memsync */ @@ -5662,7 +5662,7 @@ static Bool dis_cache_manage ( UInt theInstr, putGST( PPC_GST_TILEN, mkSzImm(ty, lineszB) ); /* be paranoid ... */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); irsb->jumpkind = Ijk_TInval; irsb->next = mkSzImm(ty, nextInsnAddr()); diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c index 2f206ea551..b351ed05f3 100644 --- a/VEX/priv/guest-x86/toIR.c +++ b/VEX/priv/guest-x86/toIR.c @@ -7136,6 +7136,96 @@ static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, } +/* Helper for deciding whether a given insn (starting at the opcode + byte) may validly be used with a LOCK prefix. The following insns + may be used with LOCK when their destination operand is in memory. + Note, this is slightly too permissive. Oh well. Note also, AFAICS + this is exactly the same for both 32-bit and 64-bit mode. + + ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03 + OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B + ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13 + SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B + AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23 + SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B + XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33 + + DEC FE /1, FF /1 + INC FE /0, FF /0 + + NEG F6 /3, F7 /3 + NOT F6 /2, F7 /2 + + XCHG 86, 87 + + BTC 0F BB, 0F BA /7 + BTR 0F B3, 0F BA /6 + BTS 0F AB, 0F BA /5 + + CMPXCHG 0F B0, 0F B1 + CMPXCHG8B 0F C7 /1 + + XADD 0F C0, 0F C1 +*/ +static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) +{ + switch (opc[0]) { + case 0x00: case 0x01: case 0x02: case 0x03: return True; + case 0x08: case 0x09: case 0x0A: case 0x0B: return True; + case 0x10: case 0x11: case 0x12: case 0x13: return True; + case 0x18: case 0x19: case 0x1A: case 0x1B: return True; + case 0x20: case 0x21: case 0x22: case 0x23: return True; + case 0x28: case 0x29: case 0x2A: case 0x2B: return True; + case 0x30: case 0x31: case 0x32: case 0x33: return True; + + case 0x80: case 0x81: case 0x83: + if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6) + return True; + break; + + case 0xFE: case 0xFF: + if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1) + return True; + break; + + case 0xF6: case 0xF7: + if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3) + return True; + break; + + case 0x86: case 0x87: + return True; + + case 0x0F: { + switch (opc[1]) { + case 0xBB: case 0xB3: case 0xAB: + return True; + case 0xBA: + if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7) + return True; + break; + case 0xB0: case 0xB1: + return True; + case 0xC7: + if (gregOfRM(opc[2]) == 1) + return True; + break; + case 0xC0: case 0xC1: + return True; + default: + break; + } /* switch (opc[1]) */ + break; + } + + default: + break; + } /* switch (opc[0]) */ + + return False; +} + + /*------------------------------------------------------------*/ /*--- Disassemble a single instruction ---*/ /*------------------------------------------------------------*/ @@ -7155,10 +7245,10 @@ DisResult disInstr_X86_WRK ( IRType ty; IRTemp addr, t0, t1, t2, t3, t4, t5, t6; Int alen; - UChar opc, modrm, abyte; + UChar opc, modrm, abyte, pre; UInt d32; HChar dis_buf[50]; - Int am_sz, d_sz; + Int am_sz, d_sz, n_prefixes; DisResult dres; UChar* insn; /* used in SSE decoders */ @@ -7178,6 +7268,12 @@ DisResult disInstr_X86_WRK ( indicating the prefix. */ UChar sorb = 0; + /* Gets set to True if a LOCK prefix is seen. */ + Bool pfx_lock = False; + + /* do we need follow the insn with MBusEvent(BusUnlock) ? */ + Bool unlock_bus_after_insn = False; + /* Set result defaults. */ dres.whatNext = Dis_Continue; dres.len = 0; @@ -7242,103 +7338,129 @@ DisResult disInstr_X86_WRK ( } } - /* Deal with prefixes. */ - /* Skip a LOCK prefix. */ - /* 2005 Jan 06: the following insns are observed to sometimes - have a LOCK prefix: - cmpxchgl %ecx,(%edx) - cmpxchgl %edx,0x278(%ebx) etc - xchgl %eax, (%ecx) - xaddl %eax, (%ecx) - We need to catch any such which appear to be being used as - a memory barrier, for example lock addl $0,0(%esp) - and emit an IR MFence construct. - */ - if (getIByte(delta) == 0xF0) { - + /* Handle a couple of weird-ass NOPs that have been observed in the + wild. */ + { UChar* code = (UChar*)(guest_code + delta); - - /* Various bits of kernel headers use the following as a memory - barrier. Hence, first emit an MFence and then let the insn - go through as usual. */ - /* F08344240000: lock addl $0, 0(%esp) */ - if (code[0] == 0xF0 && code[1] == 0x83 && code[2] == 0x44 && - code[3] == 0x24 && code[4] == 0x00 && code[5] == 0x00) { - stmt( IRStmt_MFence() ); + /* Sun's JVM 1.5.0 uses the following as a NOP: + 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */ + if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64 + && code[3] == 0x65 && code[4] == 0x90) { + DIP("%%es:%%cs:%%fs:%%gs:nop\n"); + delta += 5; + goto decode_success; } - else - if (0) { - vex_printf("vex x86->IR: ignoring LOCK prefix on: "); - /* insn_verbose = True; */ + /* don't barf on recent binutils padding + 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%eax,%eax,1) */ + if (code[0] == 0x66 + && code[1] == 0x2E && code[2] == 0x0F && code[3] == 0x1F + && code[4] == 0x84 && code[5] == 0x00 && code[6] == 0x00 + && code[7] == 0x00 && code[8] == 0x00 && code[9] == 0x00 ) { + DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n"); + delta += 10; + goto decode_success; } + } - /* In any case, skip the prefix. */ - delta++; - } + /* Normal instruction handling starts here. */ - /* Detect operand-size overrides. It is possible for more than one - 0x66 to appear. */ - while (getIByte(delta) == 0x66) { sz = 2; delta++; }; - - /* segment override prefixes come after the operand-size override, - it seems */ - switch (getIByte(delta)) { - case 0x3E: /* %DS: */ - case 0x26: /* %ES: */ - /* Sun's JVM 1.5.0 uses the following as a NOP: - 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */ - { - UChar* code = (UChar*)(guest_code + delta); - if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64 - && code[3] == 0x65 && code[4] == 0x90) { - DIP("%%es:%%cs:%%fs:%%gs:nop\n"); - delta += 5; - goto decode_success; - } - /* else fall through */ - } - case 0x64: /* %FS: */ - case 0x65: /* %GS: */ - sorb = getIByte(delta); delta++; - break; - case 0x2E: /* %CS: */ - /* 2E prefix on a conditional branch instruction is a - branch-prediction hint, which can safely be ignored. */ - { + /* Deal with some but not all prefixes: + 66(oso) + F0(lock) + 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:) + Not dealt with (left in place): + F2 F3 + */ + n_prefixes = 0; + while (True) { + if (n_prefixes > 7) goto decode_failure; + pre = getUChar(delta); + switch (pre) { + case 0x66: + sz = 2; + break; + case 0xF0: + pfx_lock = True; + break; + case 0x3E: /* %DS: */ + case 0x26: /* %ES: */ + case 0x64: /* %FS: */ + case 0x65: /* %GS: */ + if (sorb != 0) + goto decode_failure; /* only one seg override allowed */ + sorb = pre; + break; + case 0x2E: { /* %CS: */ + /* 2E prefix on a conditional branch instruction is a + branch-prediction hint, which can safely be ignored. */ UChar op1 = getIByte(delta+1); UChar op2 = getIByte(delta+2); if ((op1 >= 0x70 && op1 <= 0x7F) || (op1 == 0xE3) || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) { if (0) vex_printf("vex x86->IR: ignoring branch hint\n"); - sorb = getIByte(delta); delta++; - break; - } - } - /* don't barf on recent binutils padding - 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%eax,%eax,1) - */ - { - UChar* code = (UChar*)(guest_code + delta); - if (sz == 2 - && code[-1] == 0x66 - && code[0] == 0x2E && code[1] == 0x0F && code[2] == 0x1F - && code[3] == 0x84 && code[4] == 0x00 && code[5] == 0x00 - && code[6] == 0x00 && code[7] == 0x00 && code[8] == 0x00 ) { - DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n"); - delta += 9; - goto decode_success; + } else { + /* All other CS override cases are not handled */ + goto decode_failure; } + break; } - /* All other CS override cases are not handled */ - goto decode_failure; - case 0x36: /* %SS: */ - /* SS override cases are not handled */ + case 0x36: /* %SS: */ + /* SS override cases are not handled */ + goto decode_failure; + default: + goto not_a_prefix; + } + n_prefixes++; + delta++; + } + + not_a_prefix: + + /* Now we should be looking at the primary opcode byte or the + leading F2 or F3. Check that any LOCK prefix is actually + allowed. */ + + /* Kludge re LOCK prefixes. We assume here that all code generated + by Vex is going to be run in a single-threaded context, in other + words that concurrent executions of Vex-generated translations + will not happen. So we don't need to worry too much about + preserving atomicity. However, mark the fact that the notional + hardware bus lock is being acquired (and, after the insn, + released), so that thread checking tools know this is a locked + insn. + + We check for, and immediately reject, (most) inappropriate uses + of the LOCK prefix. Later (at decode_failure: and + decode_success:), if we've added a BusLock event, then we will + follow up with a BusUnlock event. How do we know execution will + actually ever get to the BusUnlock event? Because + can_be_used_with_LOCK_prefix rejects all control-flow changing + instructions. + + One loophole, though: if a LOCK prefix insn (seg)faults, then + the BusUnlock event will never be reached. This could cause + tools which track bus hardware lock to lose track. Really, we + should explicitly release the lock after every insn, but that's + obviously way too expensive. Really, any tool which tracks the + state of the bus lock needs to ask V's core/tool interface to + notify it of signal deliveries. On delivery of SIGSEGV to the + guest, the tool will be notified, in which case it should + release the bus hardware lock if it is held. + + Note, guest-amd64/toIR.c contains identical logic. + */ + if (pfx_lock) { + if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { + stmt( IRStmt_MBE(Imbe_BusLock) ); + unlock_bus_after_insn = True; + DIP("lock "); + } else { goto decode_failure; - default: - break; + } } + /* ---------------------------------------------------- */ /* --- The SSE decoder. --- */ /* ---------------------------------------------------- */ @@ -8324,7 +8446,7 @@ DisResult disInstr_X86_WRK ( delta += 3; /* Insert a memory fence. It's sometimes important that these are carried through to the generated code. */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); DIP("sfence\n"); goto decode_success; } @@ -9104,7 +9226,7 @@ DisResult disInstr_X86_WRK ( delta += 3; /* Insert a memory fence. It's sometimes important that these are carried through to the generated code. */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m"); goto decode_success; } @@ -12184,6 +12306,12 @@ DisResult disInstr_X86_WRK ( /* ------------------------ XCHG ----------------------- */ + /* XCHG reg,mem automatically asserts LOCK# even without a LOCK + prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock) + and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is + used with an explicit LOCK prefix, we don't want to end up with + two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by + the generic LOCK logic at the top of disInstr. */ case 0x86: /* XCHG Gb,Eb */ sz = 1; /* Fall through ... */ @@ -12201,6 +12329,18 @@ DisResult disInstr_X86_WRK ( nameISize(sz), nameIReg(sz,gregOfRM(modrm)), nameIReg(sz,eregOfRM(modrm))); } else { + /* Need to add IRStmt_MBE(Imbe_BusLock). */ + if (pfx_lock) { + /* check it's already been taken care of */ + vassert(unlock_bus_after_insn); + } else { + vassert(!unlock_bus_after_insn); + stmt( IRStmt_MBE(Imbe_BusLock) ); + unlock_bus_after_insn = True; + } + /* Because unlock_bus_after_insn is now True, generic logic + at the bottom of disInstr will add the + IRStmt_MBE(Imbe_BusUnlock). */ addr = disAMode ( &alen, sorb, delta, dis_buf ); assign( t1, loadLE(ty,mkexpr(addr)) ); assign( t2, getIReg(sz,gregOfRM(modrm)) ); @@ -12699,7 +12839,7 @@ DisResult disInstr_X86_WRK ( stmt( IRStmt_Dirty(d) ); /* CPUID is a serialising insn. So, just in case someone is using it as a memory fence ... */ - stmt( IRStmt_MFence() ); + stmt( IRStmt_MBE(Imbe_Fence) ); DIP("cpuid\n"); break; } @@ -13086,6 +13226,8 @@ DisResult disInstr_X86_WRK ( insn, but nevertheless be paranoid and update it again right now. */ stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) ); + if (unlock_bus_after_insn) + stmt( IRStmt_MBE(Imbe_BusUnlock) ); jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr); dres.whatNext = Dis_StopHere; dres.len = 0; @@ -13096,7 +13238,8 @@ DisResult disInstr_X86_WRK ( decode_success: /* All decode successes end up here. */ DIP("\n"); - + if (unlock_bus_after_insn) + stmt( IRStmt_MBE(Imbe_BusUnlock) ); dres.len = delta - delta_start; return dres; } diff --git a/VEX/priv/host-amd64/isel.c b/VEX/priv/host-amd64/isel.c index 2218347a76..356813cdf9 100644 --- a/VEX/priv/host-amd64/isel.c +++ b/VEX/priv/host-amd64/isel.c @@ -3763,9 +3763,18 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) } /* --------- MEM FENCE --------- */ - case Ist_MFence: - addInstr(env, AMD64Instr_MFence()); - return; + case Ist_MBE: + switch (stmt->Ist.MBE.event) { + case Imbe_Fence: + addInstr(env, AMD64Instr_MFence()); + return; + case Imbe_BusLock: + case Imbe_BusUnlock: + return; + default: + break; + } + break; /* --------- INSTR MARK --------- */ /* Doesn't generate any executable code ... */ diff --git a/VEX/priv/host-ppc/isel.c b/VEX/priv/host-ppc/isel.c index c85d219cb8..631d363722 100644 --- a/VEX/priv/host-ppc/isel.c +++ b/VEX/priv/host-ppc/isel.c @@ -3866,9 +3866,18 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) } /* --------- MEM FENCE --------- */ - case Ist_MFence: - addInstr(env, PPCInstr_MFence()); - return; + case Ist_MBE: + switch (stmt->Ist.MBE.event) { + case Imbe_Fence: + addInstr(env, PPCInstr_MFence()); + return; + case Imbe_BusLock: + case Imbe_BusUnlock: + return; + default: + break; + } + break; /* --------- INSTR MARK --------- */ /* Doesn't generate any executable code ... */ diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c index 7edefeca0a..b174f369cd 100644 --- a/VEX/priv/host-x86/isel.c +++ b/VEX/priv/host-x86/isel.c @@ -3802,9 +3802,18 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) } /* --------- MEM FENCE --------- */ - case Ist_MFence: - addInstr(env, X86Instr_MFence(env->hwcaps)); - return; + case Ist_MBE: + switch (stmt->Ist.MBE.event) { + case Imbe_Fence: + addInstr(env, X86Instr_MFence(env->hwcaps)); + return; + case Imbe_BusLock: + case Imbe_BusUnlock: + return; + default: + break; + } + break; /* --------- INSTR MARK --------- */ /* Doesn't generate any executable code ... */ diff --git a/VEX/priv/ir/irdefs.c b/VEX/priv/ir/irdefs.c index cff1c5318e..f4d80b88ed 100644 --- a/VEX/priv/ir/irdefs.c +++ b/VEX/priv/ir/irdefs.c @@ -736,6 +736,16 @@ void ppIRJumpKind ( IRJumpKind kind ) } } +void ppIRMBusEvent ( IRMBusEvent event ) +{ + switch (event) { + case Imbe_Fence: vex_printf("Fence"); break; + case Imbe_BusLock: vex_printf("BusLock"); break; + case Imbe_BusUnlock: vex_printf("BusUnlock"); break; + default: vpanic("ppIRMBusEvent"); + } +} + void ppIRStmt ( IRStmt* s ) { if (!s) { @@ -781,8 +791,9 @@ void ppIRStmt ( IRStmt* s ) case Ist_Dirty: ppIRDirty(s->Ist.Dirty.details); break; - case Ist_MFence: - vex_printf("IR-MFence"); + case Ist_MBE: + vex_printf("IR-"); + ppIRMBusEvent(s->Ist.MBE.event); break; case Ist_Exit: vex_printf( "if (" ); @@ -1186,12 +1197,12 @@ IRStmt* IRStmt_Dirty ( IRDirty* d ) s->Ist.Dirty.details = d; return s; } -IRStmt* IRStmt_MFence ( void ) +IRStmt* IRStmt_MBE ( IRMBusEvent event ) { - /* Just use a single static closure. */ - static IRStmt static_closure; - static_closure.tag = Ist_MFence; - return &static_closure; + IRStmt* s = LibVEX_Alloc(sizeof(IRStmt)); + s->tag = Ist_MBE; + s->Ist.MBE.event = event; + return s; } IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ) { IRStmt* s = LibVEX_Alloc(sizeof(IRStmt)); @@ -1387,8 +1398,8 @@ IRStmt* deepCopyIRStmt ( IRStmt* s ) deepCopyIRExpr(s->Ist.Store.data)); case Ist_Dirty: return IRStmt_Dirty(deepCopyIRDirty(s->Ist.Dirty.details)); - case Ist_MFence: - return IRStmt_MFence(); + case Ist_MBE: + return IRStmt_MBE(s->Ist.MBE.event); case Ist_Exit: return IRStmt_Exit(deepCopyIRExpr(s->Ist.Exit.guard), s->Ist.Exit.jk, @@ -2021,7 +2032,7 @@ Bool isFlatIRStmt ( IRStmt* st ) return True; case Ist_NoOp: case Ist_IMark: - case Ist_MFence: + case Ist_MBE: return True; case Ist_Exit: return isIRAtom(st->Ist.Exit.guard); @@ -2196,7 +2207,7 @@ void useBeforeDef_Stmt ( IRSB* bb, IRStmt* stmt, Int* def_counts ) useBeforeDef_Expr(bb,stmt,d->mAddr,def_counts); break; case Ist_NoOp: - case Ist_MFence: + case Ist_MBE: break; case Ist_Exit: useBeforeDef_Expr(bb,stmt,stmt->Ist.Exit.guard,def_counts); @@ -2500,7 +2511,14 @@ void tcStmt ( IRSB* bb, IRStmt* stmt, IRType gWordTy ) bad_dirty: sanityCheckFail(bb,stmt,"IRStmt.Dirty: ill-formed"); case Ist_NoOp: - case Ist_MFence: + break; + case Ist_MBE: + switch (stmt->Ist.MBE.event) { + case Imbe_Fence: case Imbe_BusLock: case Imbe_BusUnlock: + break; + default: sanityCheckFail(bb,stmt,"IRStmt.MBE.event: unknown"); + break; + } break; case Ist_Exit: tcExpr( bb, stmt, stmt->Ist.Exit.guard, gWordTy ); diff --git a/VEX/priv/ir/iropt.c b/VEX/priv/ir/iropt.c index 4c3b0283fa..4dcb0e36e7 100644 --- a/VEX/priv/ir/iropt.c +++ b/VEX/priv/ir/iropt.c @@ -442,7 +442,7 @@ static void flatten_Stmt ( IRSB* bb, IRStmt* st ) addStmtToIRSB(bb, IRStmt_Dirty(d2)); break; case Ist_NoOp: - case Ist_MFence: + case Ist_MBE: case Ist_IMark: addStmtToIRSB(bb, st); break; @@ -708,11 +708,12 @@ static void handle_gets_Stmt ( crude solution is just to flush everything; we could easily enough do a lot better if needed. */ /* Probably also overly-conservative, but also dump everything - if we hit a memory fence. Ditto AbiHints.*/ + if we hit a memory bus event (fence, lock, unlock). Ditto + AbiHints.*/ case Ist_AbiHint: vassert(isIRAtom(st->Ist.AbiHint.base)); /* fall through */ - case Ist_MFence: + case Ist_MBE: case Ist_Dirty: for (j = 0; j < env->used; j++) env->inuse[j] = False; @@ -1760,8 +1761,8 @@ static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st ) case Ist_NoOp: return IRStmt_NoOp(); - case Ist_MFence: - return IRStmt_MFence(); + case Ist_MBE: + return IRStmt_MBE(st->Ist.MBE.event); case Ist_Exit: { IRExpr* fcond; @@ -1967,7 +1968,7 @@ static void addUses_Stmt ( Bool* set, IRStmt* st ) return; case Ist_NoOp: case Ist_IMark: - case Ist_MFence: + case Ist_MBE: return; case Ist_Exit: addUses_Expr(set, st->Ist.Exit.guard); @@ -2535,7 +2536,7 @@ static Bool do_cse_BB ( IRSB* bb ) /* ------ BEGIN invalidate aenv bindings ------ */ /* This is critical: remove from aenv any E' -> .. bindings which might be invalidated by this statement. The only - vulnerable kind of bindings are the GetIt kind. + vulnerable kind of bindings are the GetI kind. Dirty call - dump (paranoia level -> 2) Store - dump (ditto) Put, PutI - dump unless no-overlap is proven (.. -> 1) @@ -2543,12 +2544,12 @@ static Bool do_cse_BB ( IRSB* bb ) to do the no-overlap assessments needed for Put/PutI. */ switch (st->tag) { - case Ist_Dirty: case Ist_Store: + case Ist_Dirty: case Ist_Store: case Ist_MBE: paranoia = 2; break; case Ist_Put: case Ist_PutI: paranoia = 1; break; case Ist_NoOp: case Ist_IMark: case Ist_AbiHint: - case Ist_WrTmp: case Ist_MFence: case Ist_Exit: + case Ist_WrTmp: case Ist_Exit: paranoia = 0; break; default: vpanic("do_cse_BB(1)"); @@ -2963,7 +2964,7 @@ Bool guestAccessWhichMightOverlapPutI ( case Ist_IMark: return False; - case Ist_MFence: + case Ist_MBE: case Ist_AbiHint: /* just be paranoid ... these should be rare. */ return True; @@ -3206,7 +3207,7 @@ static void deltaIRStmt ( IRStmt* st, Int delta ) switch (st->tag) { case Ist_NoOp: case Ist_IMark: - case Ist_MFence: + case Ist_MBE: break; case Ist_AbiHint: deltaIRExpr(st->Ist.AbiHint.base, delta); @@ -3691,7 +3692,7 @@ static void aoccCount_Stmt ( UShort* uses, IRStmt* st ) return; case Ist_NoOp: case Ist_IMark: - case Ist_MFence: + case Ist_MBE: return; case Ist_Exit: aoccCount_Expr(uses, st->Ist.Exit.guard); @@ -3933,8 +3934,8 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len); case Ist_NoOp: return IRStmt_NoOp(); - case Ist_MFence: - return IRStmt_MFence(); + case Ist_MBE: + return IRStmt_MBE(st->Ist.MBE.event); case Ist_Dirty: d = st->Ist.Dirty.details; d2 = emptyIRDirty(); @@ -4093,11 +4094,11 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st ) question is marked as requiring precise exceptions. */ || (env[k].doesLoad && stmtPuts) - /* probably overly conservative: a memory fence + /* probably overly conservative: a memory bus event invalidates absolutely everything, so that all computation prior to it is forced to complete before - proceeding with the fence. */ - || st->tag == Ist_MFence + proceeding with the event (fence,lock,unlock). */ + || st->tag == Ist_MBE /* also be (probably overly) paranoid re AbiHints */ || st->tag == Ist_AbiHint ); @@ -4265,7 +4266,7 @@ static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI, break; case Ist_NoOp: case Ist_IMark: - case Ist_MFence: + case Ist_MBE: break; case Ist_Exit: vassert(isIRAtom(st->Ist.Exit.guard)); diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 84daa6aea8..056dd23dbb 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -229,8 +229,8 @@ float, or a vector (SIMD) value. */ typedef enum { - Ity_INVALID=0x10FFF, - Ity_I1=0x11000, + Ity_INVALID=0x11000, + Ity_I1, Ity_I8, Ity_I16, Ity_I32, @@ -254,8 +254,8 @@ extern Int sizeofIRType ( IRType ); /* IREndness is used in load IRExprs and store IRStmts. */ typedef enum { - Iend_LE=22, /* little endian */ - Iend_BE=33 /* big endian */ + Iend_LE=0x12000, /* little endian */ + Iend_BE /* big endian */ } IREndness; @@ -267,7 +267,7 @@ typedef /* The various kinds of constant. */ typedef enum { - Ico_U1=0x12000, + Ico_U1=0x13000, Ico_U8, Ico_U16, Ico_U32, @@ -406,7 +406,7 @@ typedef /* -- Do not change this ordering. The IR generators rely on (eg) Iop_Add64 == IopAdd8 + 3. -- */ - Iop_INVALID=0x13000, + Iop_INVALID=0x14000, Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64, Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64, /* Signless mul. MullS/MullU is elsewhere. */ @@ -884,7 +884,7 @@ typedef in the comments for IRExpr. */ typedef enum { - Iex_Binder, + Iex_Binder=0x15000, Iex_Get, Iex_GetI, Iex_RdTmp, @@ -1181,7 +1181,7 @@ extern Bool eqIRAtom ( IRExpr*, IRExpr* ); */ typedef enum { - Ijk_Boring=0x14000, /* not interesting; just goto next */ + Ijk_Boring=0x16000, /* not interesting; just goto next */ Ijk_Call, /* guest is doing a call */ Ijk_Ret, /* guest is doing a return */ Ijk_ClientReq, /* do guest client req before continuing */ @@ -1254,7 +1254,7 @@ extern void ppIRJumpKind ( IRJumpKind ); /* Effects on resources (eg. registers, memory locations) */ typedef enum { - Ifx_None = 0x15000, /* no effect */ + Ifx_None = 0x17000, /* no effect */ Ifx_Read, /* reads the resource */ Ifx_Write, /* writes the resource */ Ifx_Modify, /* modifies the resource */ @@ -1316,6 +1316,19 @@ IRDirty* unsafeIRDirty_1_N ( IRTemp dst, IRExpr** args ); +/* --------------- Memory Bus Events --------------- */ + +typedef + enum { + Imbe_Fence=0x18000, + Imbe_BusLock, + Imbe_BusUnlock + } + IRMBusEvent; + +extern void ppIRMBusEvent ( IRMBusEvent ); + + /* ------------------ Statements ------------------ */ /* The different kinds of statements. Their meaning is explained @@ -1327,9 +1340,10 @@ IRDirty* unsafeIRDirty_1_N ( IRTemp dst, they are required by some IR consumers such as tools that instrument the code. */ + typedef enum { - Ist_NoOp, + Ist_NoOp=0x19000, Ist_IMark, /* META */ Ist_AbiHint, /* META */ Ist_Put, @@ -1337,7 +1351,7 @@ typedef Ist_WrTmp, Ist_Store, Ist_Dirty, - Ist_MFence, + Ist_MBE, /* META (maybe) */ Ist_Exit } IRStmtTag; @@ -1452,11 +1466,15 @@ typedef IRDirty* details; } Dirty; - /* A memory fence. - ppIRExpr output: IR-MFence + /* A memory bus event - a fence, or acquisition/release of the + hardware bus lock. IR optimisation treats all these as fences + across which no memory references may be moved. + ppIRExpr output: MBusEvent-Fence, + MBusEvent-BusLock, MBusEvent-BusUnlock. */ struct { - } MFence; + IRMBusEvent event; + } MBE; /* Conditional exit from the middle of an IRSB. ppIRExpr output: if () goto {} @@ -1481,7 +1499,7 @@ extern IRStmt* IRStmt_PutI ( IRRegArray* descr, IRExpr* ix, Int bias, extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data ); extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data ); extern IRStmt* IRStmt_Dirty ( IRDirty* details ); -extern IRStmt* IRStmt_MFence ( void ); +extern IRStmt* IRStmt_MBE ( IRMBusEvent event ); extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ); /* Deep-copy an IRStmt. */ diff --git a/VEX/test_main.c b/VEX/test_main.c index 8ca122f040..6b171897c9 100644 --- a/VEX/test_main.c +++ b/VEX/test_main.c @@ -127,7 +127,8 @@ int main ( int argc, char** argv ) /* FIXME: put sensible values into the .hwcaps fields */ LibVEX_default_VexArchInfo(&vai_x86); - vai_x86.hwcaps = 0; + vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1 + | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3; LibVEX_default_VexArchInfo(&vai_amd64); vai_amd64.hwcaps = 0; @@ -139,7 +140,7 @@ int main ( int argc, char** argv ) LibVEX_default_VexAbiInfo(&vbi); /* ----- Set up args for LibVEX_Translate ----- */ -#if 1 /* ppc32 -> ppc32 */ +#if 0 /* ppc32 -> ppc32 */ vta.arch_guest = VexArchPPC32; vta.archinfo_guest = vai_ppc32; vta.arch_host = VexArchPPC32; @@ -151,7 +152,7 @@ int main ( int argc, char** argv ) vta.arch_host = VexArchAMD64; vta.archinfo_host = vai_amd64; #endif -#if 0 /* x86 -> x86 */ +#if 1 /* x86 -> x86 */ vta.arch_guest = VexArchX86; vta.archinfo_guest = vai_x86; vta.arch_host = VexArchX86; @@ -187,6 +188,8 @@ int main ( int argc, char** argv ) vta.dispatch = NULL; #endif + vta.finaltidy = NULL; + for (i = 0; i < TEST_N_ITERS; i++) tres = LibVEX_Translate ( &vta );