extern void amd64g_dirtyhelper_storeF80le ( ULong/*addr*/, ULong/*data*/ );
-extern void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st );
+extern void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st );
+extern void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
/*--- Misc integer helpers, including rotates and CPUID. ---*/
/*---------------------------------------------------------------*/
-/* Claim to be the following CPU (2 x ...):
+/* Claim to be the following CPU, which is probably representative of
+ the lowliest (earliest) amd64 offerings. It can do neither sse3
+ nor cx16.
+
+ vendor_id : AuthenticAMD
+ cpu family : 15
+ model : 5
+ model name : AMD Opteron (tm) Processor 848
+ stepping : 10
+ cpu MHz : 1797.682
+ cache size : 1024 KB
+ fpu : yes
+ fpu_exception : yes
+ cpuid level : 1
+ wp : yes
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep
+ mtrr pge mca cmov pat pse36 clflush mmx fxsr
+ sse sse2 syscall nx mmxext lm 3dnowext 3dnow
+ bogomips : 3600.62
+ TLB size : 1088 4K pages
+ clflush size : 64
+ cache_alignment : 64
+ address sizes : 40 bits physical, 48 bits virtual
+ power management: ts fid vid ttp
+*/
+void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
+{
+# define SET_ABCD(_a,_b,_c,_d) \
+ do { st->guest_RAX = (ULong)(_a); \
+ st->guest_RBX = (ULong)(_b); \
+ st->guest_RCX = (ULong)(_c); \
+ st->guest_RDX = (ULong)(_d); \
+ } while (0)
+
+ switch (0xFFFFFFFF & st->guest_RAX) {
+ case 0x00000000:
+ SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
+ break;
+ case 0x00000001:
+ SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
+ break;
+ case 0x80000000:
+ SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff);
+ break;
+ case 0x80000002:
+ SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
+ break;
+ case 0x80000004:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000005:
+ SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ }
+# undef SET_ABCD
+}
+
+
+/* Claim to be the following CPU (2 x ...), which is sse3 and cx16
+ capable.
+
vendor_id : GenuineIntel
cpu family : 6
model : 15
address sizes : 36 bits physical, 48 bits virtual
power management:
*/
-void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st )
+void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
{
# define SET_ABCD(_a,_b,_c,_d) \
do { st->guest_RAX = (ULong)(_a); \
without prior written permission.
*/
-/* LIMITATIONS:
-
- LOCK prefix handling is only safe in the situation where
- Vex-generated code is run single-threadedly. (This is not the same
- as saying that Valgrind can't safely use Vex to run multithreaded
- programs). See comment attached to LOCK prefix handling in
- disInstr for details.
-*/
+/* Translates AMD64 code to IR. */
/* TODO:
All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
to ensure a 64-bit value is being written.
-//.. x87 FP Limitations:
-//..
-//.. * all arithmetic done at 64 bits
-//..
-//.. * no FP exceptions, except for handling stack over/underflow
-//..
-//.. * FP rounding mode observed only for float->int conversions
-//.. and int->float conversions which could lose accuracy, and
-//.. for float-to-float rounding. For all other operations,
-//.. round-to-nearest is used, regardless.
-//..
-//.. * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
-//.. simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
-//.. even when it isn't.
-//..
-//.. * some of the FCOM cases could do with testing -- not convinced
-//.. that the args are the right way round.
-//..
-//.. * FSAVE does not re-initialise the FPU; it should do
-//..
-//.. * FINIT not only initialises the FPU environment, it also
-//.. zeroes all the FP registers. It should leave the registers
-//.. unchanged.
-//..
-//.. RDTSC returns zero, always.
-//..
-//.. SAHF should cause eflags[1] == 1, and in fact it produces 0. As
-//.. per Intel docs this bit has no meaning anyway. Since PUSHF is the
-//.. only way to observe eflags[1], a proper fix would be to make that
-//.. bit be set by PUSHF.
-//..
-//.. This module uses global variables and so is not MT-safe (if that
-//.. should ever become relevant).
+ x87 FP Limitations:
+
+ * all arithmetic done at 64 bits
+
+ * no FP exceptions, except for handling stack over/underflow
+
+ * FP rounding mode observed only for float->int conversions and
+ int->float conversions which could lose accuracy, and for
+ float-to-float rounding. For all other operations,
+ round-to-nearest is used, regardless.
+
+ * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
+ simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
+ even when it isn't.
+
+ * some of the FCOM cases could do with testing -- not convinced
+ that the args are the right way round.
+
+ * FSAVE does not re-initialise the FPU; it should do
+
+ * FINIT not only initialises the FPU environment, it also zeroes
+ all the FP registers. It should leave the registers unchanged.
+
+ RDTSC returns zero, always.
+
+ SAHF should cause eflags[1] == 1, and in fact it produces 0. As
+ per Intel docs this bit has no meaning anyway. Since PUSHF is the
+ only way to observe eflags[1], a proper fix would be to make that
+ bit be set by PUSHF.
+
+ This module uses global variables and so is not MT-safe (if that
+ should ever become relevant).
*/
/* Notes re address size overrides (0x67).
that the preamble will never occur except in specific code
fragments designed for Valgrind to catch.
- No prefixes may precede a "Special" instruction. */
+ No prefixes may precede a "Special" instruction.
+*/
+
+/* casLE (implementation of lock-prefixed insns) and rep-prefixed
+ insns: the side-exit back to the start of the insn is done with
+ Ijk_Boring. This is quite wrong, it should be done with
+ Ijk_NoRedir, since otherwise the side exit, which is intended to
+ restart the instruction for whatever reason, could go somewhere
+ entirely else. Doing it right (with Ijk_NoRedir jumps) would make
+ no-redir jumps performance critical, at least for rep-prefixed
+ instructions, since all iterations thereof would involve such a
+ jump. It's not such a big deal with casLE since the side exit is
+ only taken if the CAS fails, that is, the location is contended,
+ which is relatively unlikely.
+*/
+
+/* LOCK prefixed instructions. These are translated using IR-level
+ CAS statements (IRCAS) and are believed to preserve atomicity, even
+ from the point of view of some other process racing against a
+ simulated one (presumably they communicate via a shared memory
+ segment).
+
+ Handlers which are aware of LOCK prefixes are:
+ dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
+ dis_cmpxchg_G_E (cmpxchg)
+ dis_Grp1 (add, or, adc, sbb, and, sub, xor)
+ dis_Grp3 (not, neg)
+ dis_Grp4 (inc, dec)
+ dis_Grp5 (inc, dec)
+ dis_Grp8_Imm (bts, btc, btr)
+ dis_bt_G_E (bts, btc, btr)
+ dis_xadd_G_E (xadd)
+*/
-/* Translates AMD64 code to IR. */
#include "libvex_basictypes.h"
#include "libvex_ir.h"
static void storeLE ( IRExpr* addr, IRExpr* data )
{
- stmt( IRStmt_Store(Iend_LE,addr,data) );
+ stmt( IRStmt_Store(Iend_LE, IRTemp_INVALID, addr, data) );
}
static IRExpr* loadLE ( IRType ty, IRExpr* data )
{
- return IRExpr_Load(Iend_LE,ty,data);
+ return IRExpr_Load(False, Iend_LE, ty, data);
}
static IROp mkSizedOp ( IRType ty, IROp op8 )
unop(Iop_1Uto64,y)));
}
+/* Generate a compare-and-swap operation, operating on memory at
+ 'addr'. The expected value is 'expVal' and the new value is
+ 'newVal'. If the operation fails, then transfer control (with a
+ no-redir jump (XXX no -- see comment at top of this file)) to
+ 'restart_point', which is presumably the address of the guest
+ instruction again -- retrying, essentially. */
+static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
+ Addr64 restart_point )
+{
+ IRCAS* cas;
+ IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
+ IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
+ IRTemp oldTmp = newTemp(tyE);
+ IRTemp expTmp = newTemp(tyE);
+ vassert(tyE == tyN);
+ vassert(tyE == Ity_I64 || tyE == Ity_I32
+ || tyE == Ity_I16 || tyE == Ity_I8);
+ assign(expTmp, expVal);
+ cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
+ NULL, mkexpr(expTmp), NULL, newVal );
+ stmt( IRStmt_CAS(cas) );
+ stmt( IRStmt_Exit(
+ binop( mkSizedOp(tyE,Iop_CmpNE8), mkexpr(oldTmp), mkexpr(expTmp) ),
+ Ijk_Boring, /*Ijk_NoRedir*/
+ IRConst_U64( restart_point )
+ ));
+}
+
/*------------------------------------------------------------*/
/*--- Helpers for %rflags. ---*/
/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
appropriately.
+
+ Optionally, generate a store for the 'tres' value. This can either
+ be a normal store, or it can be a cas-with-possible-failure style
+ store:
+
+ if taddr is IRTemp_INVALID, then no store is generated.
+
+ if taddr is not IRTemp_INVALID, then a store (using taddr as
+ the address) is generated:
+
+ if texpVal is IRTemp_INVALID then a normal store is
+ generated, and restart_point must be zero (it is irrelevant).
+
+ if texpVal is not IRTemp_INVALID then a cas-style store is
+ generated. texpVal is the expected value, restart_point
+ is the restart point if the store fails, and texpVal must
+ have the same type as tres.
+
*/
static void helper_ADC ( Int sz,
- IRTemp tres, IRTemp ta1, IRTemp ta2 )
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
{
UInt thunkOp;
IRType ty = szToITy(sz);
IROp plus = mkSizedOp(ty, Iop_Add8);
IROp xor = mkSizedOp(ty, Iop_Xor8);
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
+
switch (sz) {
case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
binop(plus,mkexpr(ta1),mkexpr(ta2)),
mkexpr(oldcn)) );
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
- appropriately.
+ appropriately. As with helper_ADC, possibly generate a store of
+ the result -- see comments on helper_ADC for details.
*/
static void helper_SBB ( Int sz,
- IRTemp tres, IRTemp ta1, IRTemp ta2 )
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
{
UInt thunkOp;
IRType ty = szToITy(sz);
IROp minus = mkSizedOp(ty, Iop_Sub8);
IROp xor = mkSizedOp(ty, Iop_Xor8);
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
+
switch (sz) {
case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
binop(minus,mkexpr(ta1),mkexpr(ta2)),
mkexpr(oldcn)) );
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
assign( src, getIRegE(size,pfx,rm) );
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegG(size, pfx, rm, mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegG(size, pfx, rm, mkexpr(dst1));
} else {
assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
assign( src, loadLE(szToITy(size), mkexpr(addr)) );
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegG(size, pfx, rm, mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegG(size, pfx, rm, mkexpr(dst1));
} else {
assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
assign(src, getIRegG(size,pfx,rm));
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegE(size, pfx, rm, mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIRegE(size, pfx, rm, mkexpr(dst1));
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
assign(src, getIRegG(size,pfx,rm));
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
- storeLE(mkexpr(addr), mkexpr(dst1));
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
- storeLE(mkexpr(addr), mkexpr(dst1));
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (keep) {
+ if (pfx & PFX_LOCK) {
+ if (0) vex_printf("locked case\n" );
+ casLE( mkexpr(addr),
+ mkexpr(dst0)/*expval*/,
+ mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
+ } else {
+ if (0) vex_printf("nonlocked case\n");
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
if (isAddSub(op8))
setFlags_DEP1_DEP2(op8, dst0, src, ty);
else
setFlags_DEP1(op8, dst1, ty);
- if (keep)
- storeLE(mkexpr(addr), mkexpr(dst1));
}
DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
}
else
if (op8 == Iop_Add8 && carrying) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
}
else
if (op8 == Iop_Sub8 && carrying) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
}
else
vpanic("dis_op_imm_A(amd64,guest)");
assign(src, mkU(ty,d64 & mask));
if (gregLO3ofRM(modrm) == 2 /* ADC */) {
- helper_ADC( sz, dst1, dst0, src );
+ helper_ADC( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
} else
if (gregLO3ofRM(modrm) == 3 /* SBB */) {
- helper_SBB( sz, dst1, dst0, src );
+ helper_SBB( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
if (isAddSub(op8))
assign(src, mkU(ty,d64 & mask));
if (gregLO3ofRM(modrm) == 2 /* ADC */) {
- helper_ADC( sz, dst1, dst0, src );
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else
if (gregLO3ofRM(modrm) == 3 /* SBB */) {
- helper_SBB( sz, dst1, dst0, src );
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (gregLO3ofRM(modrm) < 7) {
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
+ mkexpr(dst1)/*newVal*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
if (isAddSub(op8))
setFlags_DEP1_DEP2(op8, dst0, src, ty);
else
setFlags_DEP1(op8, dst1, ty);
}
- if (gregLO3ofRM(modrm) < 7)
- storeLE(mkexpr(addr), mkexpr(dst1));
-
delta += (len+d_sz);
DIP("%s%c $%lld, %s\n",
nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
src_val, dis_buf);
}
- /* Copy relevant bit from t2 into the carry flag. */
- /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
- stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
- stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
- stmt( IRStmt_Put(
- OFFB_CC_DEP1,
- binop(Iop_And64,
- binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
- mkU64(1))
- ));
-
/* Compute the new value into t2m, if non-BT. */
switch (gregLO3ofRM(modrm)) {
case 4: /* BT */
assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
break;
default:
+ /*NOTREACHED*/ /*the previous switch guards this*/
vassert(0);
}
if (epartIsReg(modrm)) {
putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
} else {
- storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(t_addr),
+ narrowTo(ty, mkexpr(t2))/*expd*/,
+ narrowTo(ty, mkexpr(t2m))/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ }
}
}
+ /* Copy relevant bit from t2 into the carry flag. */
+ /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
+ mkU64(1))
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
return delta;
}
*decode_OK = False;
return delta;
case 2: /* NOT */
- storeLE( mkexpr(addr), unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ dst1 = newTemp(ty);
+ assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
DIP("not%c %s\n", nameISize(sz), dis_buf);
break;
case 3: /* NEG */
assign(src, mkexpr(t1));
assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
mkexpr(src)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
- storeLE( mkexpr(addr), mkexpr(dst1) );
DIP("neg%c %s\n", nameISize(sz), dis_buf);
break;
case 4: /* MUL (unsigned widening) */
switch (gregLO3ofRM(modrm)) {
case 0: /* INC */
assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
- storeLE( mkexpr(addr), mkexpr(t2) );
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
setFlags_INC_DEC( True, t2, ty );
break;
case 1: /* DEC */
assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
- storeLE( mkexpr(addr), mkexpr(t2) );
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
setFlags_INC_DEC( False, t2, ty );
break;
default:
t2 = newTemp(ty);
assign(t2, binop(mkSizedOp(ty,Iop_Add8),
mkexpr(t1), mkU(ty,1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
setFlags_INC_DEC( True, t2, ty );
- storeLE(mkexpr(addr),mkexpr(t2));
break;
case 1: /* DEC */
t2 = newTemp(ty);
assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
mkexpr(t1), mkU(ty,1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
setFlags_INC_DEC( False, t2, ty );
- storeLE(mkexpr(addr),mkexpr(t2));
break;
case 2: /* call Ev */
/* Ignore any sz value and operate as if sz==8. */
UChar modrm;
Int len;
IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
- t_addr1, t_rsp, t_mask;
+ t_addr1, t_rsp, t_mask, t_new;
vassert(sz == 2 || sz == 4 || sz == 8);
t_fetched = t_bitno0 = t_bitno1 = t_bitno2
- = t_addr0 = t_addr1 = t_rsp = t_mask = IRTemp_INVALID;
+ = t_addr0 = t_addr1 = t_rsp
+ = t_mask = t_new = IRTemp_INVALID;
t_fetched = newTemp(Ity_I8);
+ t_new = newTemp(Ity_I8);
t_bitno0 = newTemp(Ity_I64);
t_bitno1 = newTemp(Ity_I64);
t_bitno2 = newTemp(Ity_I8);
if (op != BtOpNone) {
switch (op) {
- case BtOpSet:
- storeLE( mkexpr(t_addr1),
- binop(Iop_Or8, mkexpr(t_fetched),
- mkexpr(t_mask)) );
+ case BtOpSet:
+ assign( t_new,
+ binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
break;
- case BtOpComp:
- storeLE( mkexpr(t_addr1),
- binop(Iop_Xor8, mkexpr(t_fetched),
- mkexpr(t_mask)) );
+ case BtOpComp:
+ assign( t_new,
+ binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
break;
- case BtOpReset:
- storeLE( mkexpr(t_addr1),
- binop(Iop_And8, mkexpr(t_fetched),
- unop(Iop_Not8, mkexpr(t_mask))) );
+ case BtOpReset:
+ assign( t_new,
+ binop(Iop_And8, mkexpr(t_fetched),
+ unop(Iop_Not8, mkexpr(t_mask))) );
break;
default:
vpanic("dis_bt_G_E(amd64)");
}
+ if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) {
+ casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
+ mkexpr(t_new)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(t_addr1), mkexpr(t_new) );
+ }
}
-
+
/* Side effect done; now get selected bit into Carry flag */
/* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
/* Move reg operand from stack back to reg */
if (epartIsReg(modrm)) {
- /* t_esp still points at it. */
+ /* t_rsp still points at it. */
/* only write the reg if actually modifying it; doing otherwise
zeroes the top half erroneously when doing btl due to
standard zero-extend rule */
- if (op != BtOpNone)
+ if (op != BtOpNone)
putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(sz)) );
}
IRTemp addr = IRTemp_INVALID;
UChar rm = getUChar(delta0);
+ /* There are 3 cases to consider:
+
+ reg-reg: ignore any lock prefix, generate sequence based
+ on Mux0X
+
+ reg-mem, not locked: ignore any lock prefix, generate sequence
+ based on Mux0X
+
+ reg-mem, locked: use IRCAS
+ */
+
if (epartIsReg(rm)) {
+ /* case 1 */
*ok = False;
return delta0;
/* awaiting test case */
assign( dest, getIRegE(size, pfx, rm) );
delta0++;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ putIRegE(size, pfx, rm, mkexpr(dest2));
DIP("cmpxchg%c %s,%s\n", nameISize(size),
nameIRegG(size,pfx,rm),
nameIRegE(size,pfx,rm) );
- } else {
+ }
+ else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
+ /* case 2 */
addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
assign( dest, loadLE(ty, mkexpr(addr)) );
delta0 += len;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ storeLE( mkexpr(addr), mkexpr(dest2) );
DIP("cmpxchg%c %s,%s\n", nameISize(size),
nameIRegG(size,pfx,rm), dis_buf);
}
-
- assign( src, getIRegG(size, pfx, rm) );
- assign( acc, getIRegRAX(size) );
- setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
- assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
- assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
- assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
- putIRegRAX(size, mkexpr(acc2));
-
- if (epartIsReg(rm)) {
- putIRegE(size, pfx, rm, mkexpr(dest2));
- } else {
- storeLE( mkexpr(addr), mkexpr(dest2) );
- }
-
- *ok = True;
- return delta0;
-}
-
-static
-ULong dis_cmpxchg8b ( /*OUT*/Bool* ok,
- VexAbiInfo* vbi,
- Prefix pfx,
- Int sz,
- Long delta0 )
-{
- HChar dis_buf[50];
- Int len;
-
- IRType ty = szToITy(sz);
- IRTemp eq = newTemp(Ity_I8);
- IRTemp olda = newTemp(ty);
- IRTemp oldb = newTemp(ty);
- IRTemp oldc = newTemp(ty);
- IRTemp oldd = newTemp(ty);
- IRTemp newa = newTemp(Ity_I64);
- IRTemp newd = newTemp(Ity_I64);
- IRTemp oldml = newTemp(ty);
- IRTemp oldmh = newTemp(ty);
- IRTemp newml = newTemp(ty);
- IRTemp newmh = newTemp(ty);
- IRTemp addr = IRTemp_INVALID;
- IRTemp oldrf = newTemp(Ity_I64);
- IRTemp newrf = newTemp(Ity_I64);
- UChar rm = getUChar(delta0);
- vassert(sz == 4 || sz == 8); /* guaranteed by caller */
-
- if (epartIsReg(rm)) {
- *ok = False;
- return delta0;
+ else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
+ /* case 3 */
+ /* src is new value. acc is expected value. dest is old value.
+ Compute success from the output of the IRCAS, and steer the
+ new value for RAX accordingly: in case of success, RAX is
+ unchanged. */
+ addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ delta0 += len;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ stmt( IRStmt_CAS(
+ mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
+ NULL, mkexpr(acc), NULL, mkexpr(src) )
+ ));
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ DIP("lock cmpxchg%c %s,%s\n", nameISize(size),
+ nameIRegG(size,pfx,rm), dis_buf);
}
-
- addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
- delta0 += len;
- DIP("cmpxchg%s %s\n", sz == 4 ? "8" : "16", dis_buf);
-
- if (sz == 4) {
- assign( olda, getIReg32( R_RAX ) );
- assign( oldb, getIReg32( R_RBX ) );
- assign( oldc, getIReg32( R_RCX ) );
- assign( oldd, getIReg32( R_RDX ) );
- assign( oldml, loadLE( Ity_I32, mkexpr(addr) ));
- assign( oldmh, loadLE( Ity_I32,
- binop(Iop_Add64,mkexpr(addr),mkU64(4)) ));
- assign(eq,
- unop(Iop_1Uto8,
- binop(Iop_CmpEQ32,
- binop(Iop_Or32,
- binop(Iop_Xor32,mkexpr(olda),mkexpr(oldml)),
- binop(Iop_Xor32,mkexpr(oldd),mkexpr(oldmh))),
- mkU32(0))));
- assign( newml, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(oldb)) );
- assign( newmh, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldc)) );
- assign( newa, IRExpr_Mux0X(mkexpr(eq),
- unop(Iop_32Uto64,mkexpr(oldml)),
- getIRegRAX(8)) );
- assign( newd, IRExpr_Mux0X(mkexpr(eq),
- unop(Iop_32Uto64,mkexpr(oldmh)),
- getIRegRDX(8)) );
-
- storeLE( mkexpr(addr), mkexpr(newml) );
- storeLE( binop(Iop_Add64,mkexpr(addr),mkU64(4)),
- mkexpr(newmh) );
- putIRegRAX( 8, mkexpr(newa) );
- putIRegRDX( 8, mkexpr(newd) );
- } else {
- assign( olda, getIReg64( R_RAX ) );
- assign( oldb, getIReg64( R_RBX ) );
- assign( oldc, getIReg64( R_RCX ) );
- assign( oldd, getIReg64( R_RDX ) );
- assign( oldml, loadLE( Ity_I64, mkexpr(addr) ));
- assign( oldmh, loadLE( Ity_I64,
- binop(Iop_Add64,mkexpr(addr),mkU64(8)) ));
- assign(eq,
- unop(Iop_1Uto8,
- binop(Iop_CmpEQ64,
- binop(Iop_Or64,
- binop(Iop_Xor64,mkexpr(olda),mkexpr(oldml)),
- binop(Iop_Xor64,mkexpr(oldd),mkexpr(oldmh))),
- mkU64(0))));
- assign( newml, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(oldb)) );
- assign( newmh, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldc)) );
- assign( newa, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(olda)) );
- assign( newd, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldd)) );
-
- storeLE( mkexpr(addr), mkexpr(newml) );
- storeLE( binop(Iop_Add64,mkexpr(addr),mkU64(8)),
- mkexpr(newmh) );
- putIRegRAX( 8, mkexpr(newa) );
- putIRegRDX( 8, mkexpr(newd) );
- }
-
- /* And set the flags. Z is set if original d:a == mem, else
- cleared. All others unchanged. (This is different from normal
- cmpxchg which just sets them according to SUB.). */
- assign( oldrf, binop(Iop_And64,
- mk_amd64g_calculate_rflags_all(),
- mkU64(~AMD64G_CC_MASK_Z)) );
- assign( newrf,
- binop(Iop_Or64,
- mkexpr(oldrf),
- binop(Iop_Shl64,
- binop(Iop_And64, unop(Iop_8Uto64, mkexpr(eq)), mkU64(1)),
- mkU8(AMD64G_CC_SHIFT_Z))
- ));
- stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
- stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
- stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newrf) ));
- /* Set NDEP even though it isn't used. This makes redundant-PUT
- elimination of previous stores to this field work better. */
- stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+ else vassert(0);
*ok = True;
return delta0;
}
-//.. //-- static
-//.. //-- Addr dis_cmpxchg8b ( UCodeBlock* cb,
-//.. //-- UChar sorb,
-//.. //-- Addr eip0 )
-//.. //-- {
-//.. //-- Int tal, tah, junkl, junkh, destl, desth, srcl, srch, accl, acch;
-//.. //-- HChar dis_buf[50];
-//.. //-- UChar rm;
-//.. //-- UInt pair;
-//.. //--
-//.. //-- rm = getUChar(eip0);
-//.. //-- accl = newTemp(cb);
-//.. //-- acch = newTemp(cb);
-//.. //-- srcl = newTemp(cb);
-//.. //-- srch = newTemp(cb);
-//.. //-- destl = newTemp(cb);
-//.. //-- desth = newTemp(cb);
-//.. //-- junkl = newTemp(cb);
-//.. //-- junkh = newTemp(cb);
-//.. //--
-//.. //-- vg_assert(!epartIsReg(rm));
-//.. //--
-//.. //-- pair = disAMode ( cb, sorb, eip0, dis_buf );
-//.. //-- tal = LOW24(pair);
-//.. //-- tah = newTemp(cb);
-//.. //-- uInstr2(cb, MOV, 4, TempReg, tal, TempReg, tah);
-//.. //-- uInstr2(cb, ADD, 4, Literal, 0, TempReg, tah);
-//.. //-- uLiteral(cb, 4);
-//.. //-- eip0 += HI8(pair);
-//.. //-- DIP("cmpxchg8b %s\n", dis_buf);
-//.. //--
-//.. //-- uInstr0(cb, CALLM_S, 0);
-//.. //--
-//.. //-- uInstr2(cb, LOAD, 4, TempReg, tah, TempReg, desth);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, desth);
-//.. //-- uInstr2(cb, LOAD, 4, TempReg, tal, TempReg, destl);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, destl);
-//.. //-- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, srch);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, srch);
-//.. //-- uInstr2(cb, GET, 4, ArchReg, R_EBX, TempReg, srcl);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, srcl);
-//.. //-- uInstr2(cb, GET, 4, ArchReg, R_EDX, TempReg, acch);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, acch);
-//.. //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, accl);
-//.. //-- uInstr1(cb, PUSH, 4, TempReg, accl);
-//.. //--
-//.. //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_cmpxchg8b));
-//.. //-- uFlagsRWU(cb, FlagsEmpty, FlagZ, FlagsEmpty);
-//.. //--
-//.. //-- uInstr1(cb, POP, 4, TempReg, accl);
-//.. //-- uInstr2(cb, PUT, 4, TempReg, accl, ArchReg, R_EAX);
-//.. //-- uInstr1(cb, POP, 4, TempReg, acch);
-//.. //-- uInstr2(cb, PUT, 4, TempReg, acch, ArchReg, R_EDX);
-//.. //-- uInstr1(cb, POP, 4, TempReg, srcl);
-//.. //-- uInstr2(cb, PUT, 4, TempReg, srcl, ArchReg, R_EBX);
-//.. //-- uInstr1(cb, POP, 4, TempReg, srch);
-//.. //-- uInstr2(cb, PUT, 4, TempReg, srch, ArchReg, R_ECX);
-//.. //-- uInstr1(cb, POP, 4, TempReg, destl);
-//.. //-- uInstr2(cb, STORE, 4, TempReg, destl, TempReg, tal);
-//.. //-- uInstr1(cb, POP, 4, TempReg, desth);
-//.. //-- uInstr2(cb, STORE, 4, TempReg, desth, TempReg, tah);
-//.. //--
-//.. //-- uInstr0(cb, CALLM_E, 0);
-//.. //--
-//.. //-- return eip0;
-//.. //-- }
-
/* Handle conditional move instructions of the form
cmovcc E(reg-or-mem), G(reg)
IRTemp tmpd = newTemp(ty);
IRTemp tmpt0 = newTemp(ty);
IRTemp tmpt1 = newTemp(ty);
- *decode_ok = True;
+
+ /* There are 3 cases to consider:
+
+ reg-reg: currently unhandled
+
+ reg-mem, not locked: ignore any lock prefix, generate 'naive'
+ (non-atomic) sequence
+
+ reg-mem, locked: use IRCAS
+ */
if (epartIsReg(rm)) {
+ /* case 1 */
*decode_ok = False;
return delta0;
- } else {
+ /* Currently we don't handle xadd_G_E with register operand. */
+ }
+ else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
+ /* case 2 */
IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
assign( tmpd, loadLE(ty, mkexpr(addr)) );
assign( tmpt0, getIRegG(sz, pfx, rm) );
- assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), mkexpr(tmpd), mkexpr(tmpt0)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
storeLE( mkexpr(addr), mkexpr(tmpt1) );
putIRegG(sz, pfx, rm, mkexpr(tmpd));
DIP("xadd%c %s, %s\n",
nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
+ *decode_ok = True;
return len+delta0;
}
+ else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
+ /* case 3 */
+ IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ assign( tmpd, loadLE(ty, mkexpr(addr)) );
+ assign( tmpt0, getIRegG(sz, pfx, rm) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
+ mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIRegG(sz, pfx, rm, mkexpr(tmpd));
+ DIP("xadd%c %s, %s\n",
+ nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
+ *decode_ok = True;
+ return len+delta0;
+ }
+ /*UNREACHED*/
+ vassert(0);
}
//.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
/* Helper for deciding whether a given insn (starting at the opcode
byte) may validly be used with a LOCK prefix. The following insns
may be used with LOCK when their destination operand is in memory.
- Note, this is slightly too permissive. Oh well. Note also, AFAICS
- this is exactly the same for both 32-bit and 64-bit mode.
+ AFAICS this is exactly the same for both 32-bit and 64-bit mode.
- ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03
- OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B
- ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13
- SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B
- AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23
- SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B
- XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33
+ ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
+ OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
+ ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
+ SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
+ AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
+ SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
+ XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
DEC FE /1, FF /1
INC FE /0, FF /0
NEG F6 /3, F7 /3
NOT F6 /2, F7 /2
- XCHG 86, 87
+ XCHG 86, 87
BTC 0F BB, 0F BA /7
BTR 0F B3, 0F BA /6
CMPXCHG8B 0F C7 /1
XADD 0F C0, 0F C1
+
+ ------------------------------
+
+ 80 /0 = addb $imm8, rm8
+ 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
+ 82 /0 = addb $imm8, rm8
+ 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
+
+ 00 = addb r8, rm8
+ 01 = addl r32, rm32 and addw r16, rm16
+
+ Same for ADD OR ADC SBB AND SUB XOR
+
+ FE /1 = dec rm8
+ FF /1 = dec rm32 and dec rm16
+
+ FE /0 = inc rm8
+ FF /0 = inc rm32 and inc rm16
+
+ F6 /3 = neg rm8
+ F7 /3 = neg rm32 and neg rm16
+
+ F6 /2 = not rm8
+ F7 /2 = not rm32 and not rm16
+
+ 0F BB = btcw r16, rm16 and btcl r32, rm32
+ OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
+
+ Same for BTS, BTR
*/
static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
{
switch (opc[0]) {
- case 0x00: case 0x01: case 0x02: case 0x03: return True;
- case 0x08: case 0x09: case 0x0A: case 0x0B: return True;
- case 0x10: case 0x11: case 0x12: case 0x13: return True;
- case 0x18: case 0x19: case 0x1A: case 0x1B: return True;
- case 0x20: case 0x21: case 0x22: case 0x23: return True;
- case 0x28: case 0x29: case 0x2A: case 0x2B: return True;
- case 0x30: case 0x31: case 0x32: case 0x33: return True;
-
- case 0x80: case 0x81: case 0x83:
- if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6)
+ case 0x00: case 0x01: case 0x08: case 0x09:
+ case 0x10: case 0x11: case 0x18: case 0x19:
+ case 0x20: case 0x21: case 0x28: case 0x29:
+ case 0x30: case 0x31:
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0x80: case 0x81: case 0x82: case 0x83:
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
+ && !epartIsReg(opc[1]))
return True;
break;
case 0xFE: case 0xFF:
- if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1)
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
+ && !epartIsReg(opc[1]))
return True;
break;
case 0xF6: case 0xF7:
- if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3)
+ if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
+ && !epartIsReg(opc[1]))
return True;
break;
case 0x86: case 0x87:
- return True;
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
case 0x0F: {
switch (opc[1]) {
case 0xBB: case 0xB3: case 0xAB:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
case 0xBA:
- if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7)
+ if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
+ && !epartIsReg(opc[2]))
return True;
break;
case 0xB0: case 0xB1:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
case 0xC7:
- if (gregLO3ofRM(opc[2]) == 1)
+ if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
return True;
break;
case 0xC0: case 0xC1:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
default:
break;
} /* switch (opc[1]) */
static
DisResult disInstr_AMD64_WRK (
+ /*OUT*/Bool* expect_CAS,
Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
void* callback_opaque,
/* pfx holds the summary of prefixes. */
Prefix pfx = PFX_EMPTY;
- /* do we need follow the insn with MBusEvent(BusUnlock) ? */
- Bool unlock_bus_after_insn = False;
-
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 0;
dres.continueAt = 0;
+ *expect_CAS = False;
+
vassert(guest_RIP_next_assumed == 0);
vassert(guest_RIP_next_mustcheck == False);
case 0x67: pfx |= PFX_ASO; break;
case 0xF2: pfx |= PFX_F2; break;
case 0xF3: pfx |= PFX_F3; break;
- case 0xF0: pfx |= PFX_LOCK; break;
+ case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
case 0x2E: pfx |= PFX_CS; break;
case 0x3E: pfx |= PFX_DS; break;
case 0x26: pfx |= PFX_ES; break;
if (pfx & PFX_66) sz = 2;
if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
- /* Kludge re LOCK prefixes. We assume here that all code generated
- by Vex is going to be run in a single-threaded context, in other
- words that concurrent executions of Vex-generated translations
- will not happen. So we don't need to worry too much about
- preserving atomicity. However, mark the fact that the notional
- hardware bus lock is being acquired (and, after the insn,
- released), so that thread checking tools know this is a locked
- insn.
-
- We check for, and immediately reject, (most) inappropriate uses
- of the LOCK prefix. Later (at decode_failure: and
- decode_success:), if we've added a BusLock event, then we will
- follow up with a BusUnlock event. How do we know execution will
- actually ever get to the BusUnlock event? Because
- can_be_used_with_LOCK_prefix rejects all control-flow changing
- instructions.
-
- One loophole, though: if a LOCK prefix insn (seg)faults, then
- the BusUnlock event will never be reached. This could cause
- tools which track bus hardware lock to lose track. Really, we
- should explicitly release the lock after every insn, but that's
- obviously way too expensive. Really, any tool which tracks the
- state of the bus lock needs to ask V's core/tool interface to
- notify it of signal deliveries. On delivery of SIGSEGV to the
- guest, the tool will be notified, in which case it should
- release the bus hardware lock if it is held.
-
- Note, guest-x86/toIR.c contains identical logic.
- */
+ /* Now we should be looking at the primary opcode byte or the
+ leading F2 or F3. Check that any LOCK prefix is actually
+ allowed. */
+
if (pfx & PFX_LOCK) {
if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
- stmt( IRStmt_MBE(Imbe_BusLock) );
- unlock_bus_after_insn = True;
DIP("lock ");
} else {
+ *expect_CAS = False;
goto decode_failure;
}
}
nameISize(sz), nameIRegG(sz, pfx, modrm),
nameIRegE(sz, pfx, modrm));
} else {
- /* Need to add IRStmt_MBE(Imbe_BusLock). */
- if (pfx & PFX_LOCK) {
- /* check it's already been taken care of */
- vassert(unlock_bus_after_insn);
- } else {
- vassert(!unlock_bus_after_insn);
- stmt( IRStmt_MBE(Imbe_BusLock) );
- unlock_bus_after_insn = True;
- }
- /* Because unlock_bus_after_insn is now True, generic logic
- at the bottom of disInstr will add the
- IRStmt_MBE(Imbe_BusUnlock). */
+ *expect_CAS = True;
addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
assign( t1, loadLE(ty, mkexpr(addr)) );
assign( t2, getIRegG(sz, pfx, modrm) );
- storeLE( mkexpr(addr), mkexpr(t2) );
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
putIRegG( sz, pfx, modrm, mkexpr(t1) );
delta += alen;
DIP("xchg%c %s, %s\n", nameISize(sz),
if (!ok) goto decode_failure;
break;
}
+
case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
- Bool ok = True;
+ IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
+ IRTemp expdHi = newTemp(elemTy);
+ IRTemp expdLo = newTemp(elemTy);
+ IRTemp dataHi = newTemp(elemTy);
+ IRTemp dataLo = newTemp(elemTy);
+ IRTemp oldHi = newTemp(elemTy);
+ IRTemp oldLo = newTemp(elemTy);
+ IRTemp flags_old = newTemp(Ity_I64);
+ IRTemp flags_new = newTemp(Ity_I64);
+ IRTemp success = newTemp(Ity_I1);
+ IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
+ IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
+ IROp opCmpEQ = sz==4 ? Iop_CmpEQ32 : Iop_CmpEQ64;
+ IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
+ IRTemp expdHi64 = newTemp(Ity_I64);
+ IRTemp expdLo64 = newTemp(Ity_I64);
+
+ /* Translate this using a DCAS, even if there is no LOCK
+ prefix. Life is too short to bother with generating two
+ different translations for the with/without-LOCK-prefix
+ cases. */
+ *expect_CAS = True;
+
+ /* Decode, and generate address. */
if (have66orF2orF3(pfx)) goto decode_failure;
if (sz != 4 && sz != 8) goto decode_failure;
- delta = dis_cmpxchg8b ( &ok, vbi, pfx, sz, delta );
- break;
+ if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
+ goto decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) goto decode_failure;
+ if (gregLO3ofRM(modrm) != 1) goto decode_failure;
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+
+ /* cmpxchg16b requires an alignment check. */
+ if (sz == 8)
+ gen_SEGV_if_not_16_aligned( addr );
+
+ /* Get the expected and new values. */
+ assign( expdHi64, getIReg64(R_RDX) );
+ assign( expdLo64, getIReg64(R_RAX) );
+
+ /* These are the correctly-sized expected and new values.
+ However, we also get expdHi64/expdLo64 above as 64-bits
+ regardless, because we will need them later in the 32-bit
+ case (paradoxically). */
+ assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
+ : mkexpr(expdHi64) );
+ assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
+ : mkexpr(expdLo64) );
+ assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
+ assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
+
+ /* Do the DCAS */
+ stmt( IRStmt_CAS(
+ mkIRCAS( oldHi, oldLo,
+ Iend_LE, mkexpr(addr),
+ mkexpr(expdHi), mkexpr(expdLo),
+ mkexpr(dataHi), mkexpr(dataLo)
+ )));
+
+ /* success when oldHi:oldLo == expdHi:expdLo */
+ assign( success,
+ binop(opCmpEQ,
+ binop(opOR,
+ binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
+ binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
+ ),
+ zero
+ ));
+
+ /* If the DCAS is successful, that is to say oldHi:oldLo ==
+ expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
+ which is where they came from originally. Both the actual
+ contents of these two regs, and any shadow values, are
+ unchanged. If the DCAS fails then we're putting into
+ RDX:RAX the value seen in memory. */
+ /* Now of course there's a complication in the 32-bit case
+ (bah!): if the DCAS succeeds, we need to leave RDX:RAX
+ unchanged; but if we use the same scheme as in the 64-bit
+ case, we get hit by the standard rule that a write to the
+ bottom 32 bits of an integer register zeros the upper 32
+ bits. And so the upper halves of RDX and RAX mysteriously
+ become zero. So we have to stuff back in the original
+ 64-bit values which we previously stashed in
+ expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
+ /* It's just _so_ much fun ... */
+ putIRegRDX( 8,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
+ : mkexpr(oldHi),
+ mkexpr(expdHi64)
+ ));
+ putIRegRAX( 8,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
+ : mkexpr(oldLo),
+ mkexpr(expdLo64)
+ ));
+
+ /* Copy the success bit into the Z flag and leave the others
+ unchanged */
+ assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
+ assign(
+ flags_new,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(flags_old),
+ mkU64(~AMD64G_CC_MASK_Z)),
+ binop(Iop_Shl64,
+ binop(Iop_And64,
+ unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
+ mkU8(AMD64G_CC_SHIFT_Z)) ));
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ /* Set NDEP even though it isn't used. This makes
+ redundant-PUT elimination of previous stores to this field
+ work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ /* Sheesh. Aren't you glad it was me and not you that had to
+ write and validate all this grunge? */
+
+ DIP("cmpxchg8b %s\n", dis_buf);
+ break;
+
}
/* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
HChar* fName = NULL;
void* fAddr = NULL;
if (haveF2orF3(pfx)) goto decode_failure;
- if (archinfo->hwcaps == 0/*baseline, == SSE2*/) {
- fName = "amd64g_dirtyhelper_CPUID";
- fAddr = &amd64g_dirtyhelper_CPUID;
+ if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
+ |VEX_HWCAPS_AMD64_CX16)) {
+ fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
+ fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16;
+ /* This is a Core-2-like machine */
+ }
+ else {
+ /* Give a CPUID for at least a baseline machine, no SSE2
+ and no CX16 */
+ fName = "amd64g_dirtyhelper_CPUID_baseline";
+ fAddr = &amd64g_dirtyhelper_CPUID_baseline;
}
- else
- vpanic("disInstr(amd64)(cpuid)");
vassert(fName); vassert(fAddr);
d = unsafeIRDirty_0_N ( 0/*regparms*/,
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
- if (unlock_bus_after_insn)
- stmt( IRStmt_MBE(Imbe_BusUnlock) );
jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
dres.whatNext = Dis_StopHere;
dres.len = 0;
+ /* We also need to say that a CAS is not expected now, regardless
+ of what it might have been set to at the start of the function,
+ since the IR that we've emitted just above (to synthesis a
+ SIGILL) does not involve any CAS, and presumably no other IR has
+ been emitted for this (non-decoded) insn. */
+ *expect_CAS = False;
return dres;
} /* switch (opc) for the main (primary) opcode switch. */
decode_success:
/* All decode successes end up here. */
DIP("\n");
- if (unlock_bus_after_insn)
- stmt( IRStmt_MBE(Imbe_BusUnlock) );
dres.len = (Int)toUInt(delta - delta_start);
return dres;
}
VexAbiInfo* abiinfo,
Bool host_bigendian_IN )
{
+ Int i, x1, x2;
+ Bool expect_CAS, has_CAS;
DisResult dres;
/* Set globals (see top of this file) */
guest_RIP_next_assumed = 0;
guest_RIP_next_mustcheck = False;
- dres = disInstr_AMD64_WRK ( put_IP, resteerOkFn, callback_opaque,
+ x1 = irsb_IN->stmts_used;
+ expect_CAS = False;
+ dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ callback_opaque,
delta, archinfo, abiinfo );
+ x2 = irsb_IN->stmts_used;
+ vassert(x2 >= x1);
/* If disInstr_AMD64_WRK tried to figure out the next rip, check it
got it right. Failure of this assertion is serious and denotes
guest_RIP_next_assumed );
vex_printf(" actual next %%rip = 0x%llx\n",
guest_RIP_curr_instr + dres.len );
- vpanic("bbToIR_AMD64: disInstr miscalculated next %rip");
+ vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
+ }
+
+ /* See comment at the top of disInstr_AMD64_WRK for meaning of
+ expect_CAS. Here, we (sanity-)check for the presence/absence of
+ IRCAS as directed by the returned expect_CAS value. */
+ has_CAS = False;
+ for (i = x1; i < x2; i++) {
+ if (irsb_IN->stmts[i]->tag == Ist_CAS)
+ has_CAS = True;
+ }
+
+ if (expect_CAS != has_CAS) {
+ /* inconsistency detected. re-disassemble the instruction so as
+ to generate a useful error message; then assert. */
+ vex_traceflags |= VEX_TRACE_FE;
+ dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ callback_opaque,
+ delta, archinfo, abiinfo );
+ for (i = x1; i < x2; i++) {
+ vex_printf("\t\t");
+ ppIRStmt(irsb_IN->stmts[i]);
+ vex_printf("\n");
+ }
+ /* Failure of this assertion is serious and denotes a bug in
+ disInstr. */
+ vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
}
return dres;
static void storeLE ( IRExpr* addr, IRExpr* data )
{
- stmt( IRStmt_Store(Iend_LE,addr,data) );
+ stmt( IRStmt_Store(Iend_LE, IRTemp_INVALID, addr, data) );
}
static IRExpr* unop ( IROp op, IRExpr* a )
static IRExpr* loadLE ( IRType ty, IRExpr* data )
{
- return IRExpr_Load(Iend_LE,ty,data);
+ return IRExpr_Load(False, Iend_LE, ty, data);
}
#if 0
vex_state->guest_EMWARN = EmWarn_NONE;
- vex_state->guest_RESVN = 0;
-
vex_state->guest_TISTART = 0;
vex_state->guest_TILEN = 0;
vex_state->guest_EMWARN = EmWarn_NONE;
- vex_state->guest_RESVN = 0;
+ vex_state->padding = 0;
vex_state->guest_TISTART = 0;
vex_state->guest_TILEN = 0;
vex_state->guest_IP_AT_SYSCALL = 0;
vex_state->guest_SPRG3_RO = 0;
+
+ vex_state->padding2 = 0;
}
/* Describe any sections to be regarded by Memcheck as
'always-defined'. */
- .n_alwaysDefd = 12,
+ .n_alwaysDefd = 11,
.alwaysDefd
= { /* 0 */ ALWAYSDEFD32(guest_CIA),
/* 3 */ ALWAYSDEFD32(guest_TILEN),
/* 4 */ ALWAYSDEFD32(guest_VSCR),
/* 5 */ ALWAYSDEFD32(guest_FPROUND),
- /* 6 */ ALWAYSDEFD32(guest_RESVN),
- /* 7 */ ALWAYSDEFD32(guest_NRADDR),
- /* 8 */ ALWAYSDEFD32(guest_NRADDR_GPR2),
- /* 9 */ ALWAYSDEFD32(guest_REDIR_SP),
- /* 10 */ ALWAYSDEFD32(guest_REDIR_STACK),
- /* 11 */ ALWAYSDEFD32(guest_IP_AT_SYSCALL)
+ /* 6 */ ALWAYSDEFD32(guest_NRADDR),
+ /* 7 */ ALWAYSDEFD32(guest_NRADDR_GPR2),
+ /* 8 */ ALWAYSDEFD32(guest_REDIR_SP),
+ /* 9 */ ALWAYSDEFD32(guest_REDIR_STACK),
+ /* 10 */ ALWAYSDEFD32(guest_IP_AT_SYSCALL)
}
};
/* 3 */ ALWAYSDEFD64(guest_TILEN),
/* 4 */ ALWAYSDEFD64(guest_VSCR),
/* 5 */ ALWAYSDEFD64(guest_FPROUND),
- /* 6 */ ALWAYSDEFD64(guest_RESVN),
- /* 7 */ ALWAYSDEFD64(guest_NRADDR),
- /* 8 */ ALWAYSDEFD64(guest_NRADDR_GPR2),
- /* 9 */ ALWAYSDEFD64(guest_REDIR_SP),
- /* 10 */ ALWAYSDEFD64(guest_REDIR_STACK),
- /* 11 */ ALWAYSDEFD64(guest_IP_AT_SYSCALL)
+ /* 6 */ ALWAYSDEFD64(guest_NRADDR),
+ /* 7 */ ALWAYSDEFD64(guest_NRADDR_GPR2),
+ /* 8 */ ALWAYSDEFD64(guest_REDIR_SP),
+ /* 9 */ ALWAYSDEFD64(guest_REDIR_STACK),
+ /* 10 */ ALWAYSDEFD64(guest_IP_AT_SYSCALL)
}
};
#define OFFB_EMWARN offsetofPPCGuestState(guest_EMWARN)
#define OFFB_TISTART offsetofPPCGuestState(guest_TISTART)
#define OFFB_TILEN offsetofPPCGuestState(guest_TILEN)
-#define OFFB_RESVN offsetofPPCGuestState(guest_RESVN)
#define OFFB_NRADDR offsetofPPCGuestState(guest_NRADDR)
#define OFFB_NRADDR_GPR2 offsetofPPCGuestState(guest_NRADDR_GPR2)
PPC_GST_EMWARN, // Emulation warnings
PPC_GST_TISTART,// For icbi: start of area to invalidate
PPC_GST_TILEN, // For icbi: length of area to invalidate
- PPC_GST_RESVN, // For lwarx/stwcx.
PPC_GST_IP_AT_SYSCALL, // the CIA of the most recently executed SC insn
PPC_GST_SPRG3_RO, // SPRG3
PPC_GST_MAX
stmt( IRStmt_WrTmp(dst, e) );
}
+/* This generates a normal (non store-conditional) store. */
static void storeBE ( IRExpr* addr, IRExpr* data )
{
- vassert(typeOfIRExpr(irsb->tyenv, addr) == Ity_I32 ||
- typeOfIRExpr(irsb->tyenv, addr) == Ity_I64);
- stmt( IRStmt_Store(Iend_BE,addr,data) );
+ IRType tyA = typeOfIRExpr(irsb->tyenv, addr);
+ vassert(tyA == Ity_I32 || tyA == Ity_I64);
+ stmt( IRStmt_Store(Iend_BE, IRTemp_INVALID, addr, data) );
}
static IRExpr* unop ( IROp op, IRExpr* a )
return IRExpr_Const(IRConst_U64(i));
}
+/* This generates a normal (non load-linked) load. */
static IRExpr* loadBE ( IRType ty, IRExpr* data )
{
- return IRExpr_Load(Iend_BE,ty,data);
+ return IRExpr_Load(False, Iend_BE, ty, data);
+}
+
+/* And this, a linked load. */
+static IRExpr* loadlinkedBE ( IRType ty, IRExpr* data )
+{
+ if (mode64) {
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ } else {
+ vassert(ty == Ity_I32);
+ }
+ return IRExpr_Load(True, Iend_BE, ty, data);
}
static IRExpr* mkOR1 ( IRExpr* arg1, IRExpr* arg2 )
}
/* IR narrows I32/I64 -> I8/I16/I32 */
-static IRExpr* mkSzNarrow8 ( IRType ty, IRExpr* src )
+static IRExpr* mkNarrowTo8 ( IRType ty, IRExpr* src )
{
vassert(ty == Ity_I32 || ty == Ity_I64);
return ty == Ity_I64 ? unop(Iop_64to8, src) : unop(Iop_32to8, src);
}
-static IRExpr* mkSzNarrow16 ( IRType ty, IRExpr* src )
+static IRExpr* mkNarrowTo16 ( IRType ty, IRExpr* src )
{
vassert(ty == Ity_I32 || ty == Ity_I64);
return ty == Ity_I64 ? unop(Iop_64to16, src) : unop(Iop_32to16, src);
}
-static IRExpr* mkSzNarrow32 ( IRType ty, IRExpr* src )
+static IRExpr* mkNarrowTo32 ( IRType ty, IRExpr* src )
{
vassert(ty == Ity_I32 || ty == Ity_I64);
return ty == Ity_I64 ? unop(Iop_64to32, src) : src;
}
/* Signed/Unsigned IR widens I8/I16/I32 -> I32/I64 */
-static IRExpr* mkSzWiden8 ( IRType ty, IRExpr* src, Bool sined )
+static IRExpr* mkWidenFrom8 ( IRType ty, IRExpr* src, Bool sined )
{
IROp op;
vassert(ty == Ity_I32 || ty == Ity_I64);
return unop(op, src);
}
-static IRExpr* mkSzWiden16 ( IRType ty, IRExpr* src, Bool sined )
+static IRExpr* mkWidenFrom16 ( IRType ty, IRExpr* src, Bool sined )
{
IROp op;
vassert(ty == Ity_I32 || ty == Ity_I64);
return unop(op, src);
}
-static IRExpr* mkSzWiden32 ( IRType ty, IRExpr* src, Bool sined )
+static IRExpr* mkWidenFrom32 ( IRType ty, IRExpr* src, Bool sined )
{
vassert(ty == Ity_I32 || ty == Ity_I64);
if (ty == Ity_I32)
/* non-zero rotate */ rot );
}
-#if 0
-/* ROTL32_64(src64, rot_amt5)
- Weirdo 32bit rotl on ppc64:
- rot32 = ROTL(src_lo32,y);
- return (rot32|rot32);
-*/
-static IRExpr* /* :: Ity_I64 */ ROTL32_64 ( IRExpr* src64,
- IRExpr* rot_amt )
-{
- IRExpr *mask, *rot32;
- vassert(mode64); // used only in 64bit mode
- vassert(typeOfIRExpr(irsb->tyenv,src64) == Ity_I64);
- vassert(typeOfIRExpr(irsb->tyenv,rot_amt) == Ity_I8);
-
- mask = binop(Iop_And8, rot_amt, mkU8(31));
- rot32 = ROTL( unop(Iop_64to32, src64), rot_amt );
-
- return binop(Iop_Or64,
- binop(Iop_Shl64, unop(Iop_32Uto64, rot32), mkU8(32)),
- unop(Iop_32Uto64, rot32));
-}
-#endif
-
-
/* Standard effective address calc: (rA + rB) */
static IRExpr* ea_rA_idxd ( UInt rA, UInt rB )
{
}
+/* Exit the trace if ADDR (intended to be a guest memory address) is
+ not ALIGN-aligned, generating a request for a SIGBUS followed by a
+ restart of the current insn. */
+static void gen_SIGBUS_if_misaligned ( IRTemp addr, UChar align )
+{
+ vassert(align == 4 || align == 8);
+ if (mode64) {
+ vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64);
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE64,
+ binop(Iop_And64, mkexpr(addr), mkU64(align-1)),
+ mkU64(0)),
+ Ijk_SigBUS,
+ IRConst_U64( guest_CIA_curr_instr )
+ )
+ );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I32);
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32,
+ binop(Iop_And32, mkexpr(addr), mkU32(align-1)),
+ mkU32(0)),
+ Ijk_SigBUS,
+ IRConst_U32( guest_CIA_curr_instr )
+ )
+ );
+ }
+}
+
+
/* Generate AbiHints which mark points at which the ELF or PowerOpen
ABIs say that the stack red zone (viz, -N(r1) .. -1(r1), for some
N) becomes undefined. That is at function calls and returns. ELF
binop( Iop_Shl32, getXER_CA32(), mkU8(29)),
getXER_BC32()));
- case PPC_GST_RESVN:
- return IRExpr_Get( OFFB_RESVN, ty);
-
default:
vex_printf("getGST(ppc): reg = %u", reg);
vpanic("getGST(ppc)");
stmt( IRStmt_Put( OFFB_TILEN, src) );
break;
- case PPC_GST_RESVN:
- vassert( ty_src == ty );
- stmt( IRStmt_Put( OFFB_RESVN, src) );
- break;
-
default:
vex_printf("putGST(ppc): reg = %u", reg);
vpanic("putGST(ppc)");
flag_OE ? "o" : "", flag_rC ? ".":"",
rD_addr, rA_addr, rB_addr);
// rD = rA + rB + XER[CA]
- assign( old_xer_ca, mkSzWiden32(ty, getXER_CA32(), False) );
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
binop( mkSzOp(ty, Iop_Add8),
mkexpr(rB), mkexpr(old_xer_ca))) );
rD_addr, rA_addr, rB_addr);
// rD = rA + (-1) + XER[CA]
// => Just another form of adde
- assign( old_xer_ca, mkSzWiden32(ty, getXER_CA32(), False) );
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
min_one = mkSzImm(ty, (Long)-1);
assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
binop( mkSzOp(ty, Iop_Add8),
rD_addr, rA_addr, rB_addr);
// rD = rA + (0) + XER[CA]
// => Just another form of adde
- assign( old_xer_ca, mkSzWiden32(ty, getXER_CA32(), False) );
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
assign( rD, binop( mkSzOp(ty, Iop_Add8),
mkexpr(rA), mkexpr(old_xer_ca)) );
set_XER_CA( ty, PPCG_FLAG_OP_ADDE,
flag_OE ? "o" : "", flag_rC ? ".":"",
rD_addr, rA_addr, rB_addr);
// rD = (log not)rA + rB + XER[CA]
- assign( old_xer_ca, mkSzWiden32(ty, getXER_CA32(), False) );
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
assign( rD, binop( mkSzOp(ty, Iop_Add8),
unop( mkSzOp(ty, Iop_Not8), mkexpr(rA)),
binop( mkSzOp(ty, Iop_Add8),
rD_addr, rA_addr);
// rD = (log not)rA + (-1) + XER[CA]
// => Just another form of subfe
- assign( old_xer_ca, mkSzWiden32(ty, getXER_CA32(), False) );
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
min_one = mkSzImm(ty, (Long)-1);
assign( rD, binop( mkSzOp(ty, Iop_Add8),
unop( mkSzOp(ty, Iop_Not8), mkexpr(rA)),
rD_addr, rA_addr);
// rD = (log not)rA + (0) + XER[CA]
// => Just another form of subfe
- assign( old_xer_ca, mkSzWiden32(ty, getXER_CA32(), False) );
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
assign( rD, binop( mkSzOp(ty, Iop_Add8),
unop( mkSzOp(ty, Iop_Not8),
mkexpr(rA)), mkexpr(old_xer_ca)) );
if (flag_L == 1) {
putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64S, a, b)));
} else {
- a = mkSzNarrow32( ty, a );
- b = mkSzNarrow32( ty, b );
+ a = mkNarrowTo32( ty, a );
+ b = mkNarrowTo32( ty, b );
putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32S, a, b)));
}
putCR0( crfD, getXER_SO() );
if (flag_L == 1) {
putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64U, a, b)));
} else {
- a = mkSzNarrow32( ty, a );
- b = mkSzNarrow32( ty, b );
+ a = mkNarrowTo32( ty, a );
+ b = mkNarrowTo32( ty, b );
putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32U, a, b)));
}
putCR0( crfD, getXER_SO() );
if (flag_L == 1) {
putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64S, a, b)));
} else {
- a = mkSzNarrow32( ty, a );
- b = mkSzNarrow32( ty, b );
+ a = mkNarrowTo32( ty, a );
+ b = mkNarrowTo32( ty, b );
putCR321(crfD, unop(Iop_32to8,binop(Iop_CmpORD32S, a, b)));
}
putCR0( crfD, getXER_SO() );
if (flag_L == 1) {
putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64U, a, b)));
} else {
- a = mkSzNarrow32( ty, a );
- b = mkSzNarrow32( ty, b );
+ a = mkNarrowTo32( ty, a );
+ b = mkNarrowTo32( ty, b );
putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32U, a, b)));
}
putCR0( crfD, getXER_SO() );
// Iop_Clz32 undefined for arg==0, so deal with that case:
irx = binop(Iop_CmpNE32, lo32, mkU32(0));
- assign(rA, mkSzWiden32(ty,
+ assign(rA, mkWidenFrom32(ty,
IRExpr_Mux0X( unop(Iop_1Uto8, irx),
mkU32(32),
unop(Iop_Clz32, lo32)),
case 0x22: // lbz (Load B & Zero, PPC32 p433)
DIP("lbz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
val = loadBE(Ity_I8, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden8(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
break;
case 0x23: // lbzu (Load B & Zero, Update, PPC32 p434)
}
DIP("lbzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
val = loadBE(Ity_I8, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden8(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
putIReg( rA_addr, mkexpr(EA) );
break;
case 0x2A: // lha (Load HW Alg, PPC32 p445)
DIP("lha r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
val = loadBE(Ity_I16, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden16(ty, val, True) );
+ putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
break;
case 0x2B: // lhau (Load HW Alg, Update, PPC32 p446)
}
DIP("lhau r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
val = loadBE(Ity_I16, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden16(ty, val, True) );
+ putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
putIReg( rA_addr, mkexpr(EA) );
break;
case 0x28: // lhz (Load HW & Zero, PPC32 p450)
DIP("lhz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
val = loadBE(Ity_I16, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden16(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
break;
case 0x29: // lhzu (Load HW & and Zero, Update, PPC32 p451)
}
DIP("lhzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
val = loadBE(Ity_I16, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden16(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
putIReg( rA_addr, mkexpr(EA) );
break;
case 0x20: // lwz (Load W & Zero, PPC32 p460)
DIP("lwz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
val = loadBE(Ity_I32, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden32(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
break;
case 0x21: // lwzu (Load W & Zero, Update, PPC32 p461))
}
DIP("lwzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
val = loadBE(Ity_I32, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden32(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
putIReg( rA_addr, mkexpr(EA) );
break;
return False;
}
val = loadBE(Ity_I8, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden8(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
putIReg( rA_addr, mkexpr(EA) );
break;
case 0x057: // lbzx (Load B & Zero, Indexed, PPC32 p436)
DIP("lbzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
val = loadBE(Ity_I8, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden8(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
break;
case 0x177: // lhaux (Load HW Alg, Update Indexed, PPC32 p447)
}
DIP("lhaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
val = loadBE(Ity_I16, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden16(ty, val, True) );
+ putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
putIReg( rA_addr, mkexpr(EA) );
break;
case 0x157: // lhax (Load HW Alg, Indexed, PPC32 p448)
DIP("lhax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
val = loadBE(Ity_I16, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden16(ty, val, True) );
+ putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
break;
case 0x137: // lhzux (Load HW & Zero, Update Indexed, PPC32 p452)
}
DIP("lhzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
val = loadBE(Ity_I16, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden16(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
putIReg( rA_addr, mkexpr(EA) );
break;
case 0x117: // lhzx (Load HW & Zero, Indexed, PPC32 p453)
DIP("lhzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
val = loadBE(Ity_I16, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden16(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
break;
case 0x037: // lwzux (Load W & Zero, Update Indexed, PPC32 p462)
}
DIP("lwzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
val = loadBE(Ity_I32, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden32(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
putIReg( rA_addr, mkexpr(EA) );
break;
case 0x017: // lwzx (Load W & Zero, Indexed, PPC32 p463)
DIP("lwzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
val = loadBE(Ity_I32, mkexpr(EA));
- putIReg( rD_addr, mkSzWiden32(ty, val, False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
break;
switch (opc1) {
case 0x26: // stb (Store B, PPC32 p509)
DIP("stb r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
- storeBE( mkexpr(EA), mkSzNarrow8(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
break;
case 0x27: // stbu (Store B, Update, PPC32 p510)
}
DIP("stbu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
putIReg( rA_addr, mkexpr(EA) );
- storeBE( mkexpr(EA), mkSzNarrow8(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
break;
case 0x2C: // sth (Store HW, PPC32 p522)
DIP("sth r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
- storeBE( mkexpr(EA), mkSzNarrow16(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
break;
case 0x2D: // sthu (Store HW, Update, PPC32 p524)
}
DIP("sthu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
putIReg( rA_addr, mkexpr(EA) );
- storeBE( mkexpr(EA), mkSzNarrow16(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
break;
case 0x24: // stw (Store W, PPC32 p530)
DIP("stw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
- storeBE( mkexpr(EA), mkSzNarrow32(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
break;
case 0x25: // stwu (Store W, Update, PPC32 p534)
}
DIP("stwu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
putIReg( rA_addr, mkexpr(EA) );
- storeBE( mkexpr(EA), mkSzNarrow32(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
break;
/* X Form : all these use EA_indexed */
}
DIP("stbux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
putIReg( rA_addr, mkexpr(EA) );
- storeBE( mkexpr(EA), mkSzNarrow8(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
break;
case 0x0D7: // stbx (Store B Indexed, PPC32 p512)
DIP("stbx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
- storeBE( mkexpr(EA), mkSzNarrow8(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
break;
case 0x1B7: // sthux (Store HW, Update Indexed, PPC32 p525)
}
DIP("sthux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
putIReg( rA_addr, mkexpr(EA) );
- storeBE( mkexpr(EA), mkSzNarrow16(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
break;
case 0x197: // sthx (Store HW Indexed, PPC32 p526)
DIP("sthx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
- storeBE( mkexpr(EA), mkSzNarrow16(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
break;
case 0x0B7: // stwux (Store W, Update Indexed, PPC32 p535)
}
DIP("stwux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
putIReg( rA_addr, mkexpr(EA) );
- storeBE( mkexpr(EA), mkSzNarrow32(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
break;
case 0x097: // stwx (Store W Indexed, PPC32 p536)
DIP("stwx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
- storeBE( mkexpr(EA), mkSzNarrow32(ty, mkexpr(rS)) );
+ storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
break;
DIP("lmw r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
for (r = rD_addr; r <= 31; r++) {
irx_addr = binop(Iop_Add32, mkexpr(EA), mkU32(ea_off));
- putIReg( r, mkSzWiden32(ty, loadBE(Ity_I32, irx_addr ),
- False) );
+ putIReg( r, mkWidenFrom32(ty, loadBE(Ity_I32, irx_addr ),
+ False) );
ea_off += 4;
}
break;
DIP("stmw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
for (r = rS_addr; r <= 31; r++) {
irx_addr = binop(Iop_Add32, mkexpr(EA), mkU32(ea_off));
- storeBE( irx_addr, mkSzNarrow32(ty, getIReg(r)) );
+ storeBE( irx_addr, mkNarrowTo32(ty, getIReg(r)) );
ea_off += 4;
}
break;
vassert(shift == 0 || shift == 8 || shift == 16 || shift == 24);
putIReg(
rD,
- mkSzWiden32(
+ mkWidenFrom32(
ty,
binop(
Iop_Or32,
- mkSzNarrow32(ty, getIReg(rD)),
+ mkNarrowTo32(ty, getIReg(rD)),
binop(
Iop_Shl32,
unop(
binop(mkSzOp(ty,Iop_Add8), e_EA, mkSzImm(ty,i)),
unop(Iop_32to8,
binop(Iop_Shr32,
- mkSzNarrow32(ty, getIReg(rS)),
+ mkNarrowTo32(ty, getIReg(rS)),
mkU8(toUChar(shift))))
);
shift -= 8;
IRType ty = mode64 ? Ity_I64 : Ity_I32;
IRTemp EA = newTemp(ty);
- IRTemp rS = newTemp(ty);
assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
hardware, I think as to whether or not contention is
likely. So we can just ignore it. */
DIP("lwarx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0);
- putIReg( rD_addr, mkSzWiden32(ty, loadBE(Ity_I32, mkexpr(EA)),
- False) );
- /* Take a reservation */
- putGST( PPC_GST_RESVN, mkexpr(EA) );
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 4 );
+
+ // and actually do the load
+ putIReg( rD_addr, mkWidenFrom32(ty, loadlinkedBE(Ity_I32, mkexpr(EA)),
+ False) );
break;
case 0x096: {
// stwcx. (Store Word Conditional Indexed, PPC32 p532)
- IRTemp resaddr = newTemp(ty);
+ // Note this has to handle stwcx. in both 32- and 64-bit modes,
+ // so isn't quite as straightforward as it might otherwise be.
+ IRTemp rS = newTemp(Ity_I32);
+ IRTemp resSC;
if (b0 != 1) {
vex_printf("dis_memsync(ppc)(stwcx.,b0)\n");
return False;
}
DIP("stwcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
- assign( rS, getIReg(rS_addr) );
- /* First set up as if the reservation failed */
- // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]
- putCR321(0, mkU8(0<<1));
- putCR0(0, getXER_SO());
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 4 );
- /* Get the reservation address into a temporary, then
- clear it. */
- assign( resaddr, getGST(PPC_GST_RESVN) );
- putGST( PPC_GST_RESVN, mkSzImm(ty, 0) );
+ // Get the data to be stored, and narrow to 32 bits if necessary
+ assign( rS, mkNarrowTo32(ty, getIReg(rS_addr)) );
- /* Skip the rest if the reservation really did fail. */
- stmt( IRStmt_Exit(
- ( mode64 ?
- binop(Iop_CmpNE64, mkexpr(resaddr), mkexpr(EA)) :
- binop(Iop_CmpNE32, mkexpr(resaddr), mkexpr(EA)) ),
- Ijk_Boring,
- mkSzConst( ty, nextInsnAddr()) ));
+ // Do the store, and get success/failure bit into resSC
+ resSC = newTemp(Ity_I1);
+ stmt( IRStmt_Store(Iend_BE, resSC, mkexpr(EA), mkexpr(rS)) );
+
+ // Set CR0[LT GT EQ S0] = 0b000 || XER[SO] on failure
+ // Set CR0[LT GT EQ S0] = 0b001 || XER[SO] on success
+ putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1)));
+ putCR0(0, getXER_SO());
- /* Note for mode64:
+ /* Note:
If resaddr != lwarx_resaddr, CR0[EQ] is undefined, and
whether rS is stored is dependent on that value. */
-
- /* Success? Do the (32bit) store. Mark the store as
- snooped, so that threading tools can handle it differently
- if necessary. */
- stmt( IRStmt_MBE(Imbe_SnoopedStoreBegin) );
- storeBE( mkexpr(EA), mkSzNarrow32(ty, mkexpr(rS)) );
- stmt( IRStmt_MBE(Imbe_SnoopedStoreEnd) );
-
- // Set CR0[LT GT EQ S0] = 0b001 || XER[SO]
- putCR321(0, mkU8(1<<1));
+ /* So I guess we can just ignore this case? */
break;
}
in the documentation) is merely a hint bit to the
hardware, I think as to whether or not contention is
likely. So we can just ignore it. */
+ if (!mode64)
+ return False;
DIP("ldarx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0);
- putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
- // Take a reservation
- putGST( PPC_GST_RESVN, mkexpr(EA) );
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 8 );
+
+ // and actually do the load
+ putIReg( rD_addr, loadlinkedBE(Ity_I64, mkexpr(EA)) );
break;
case 0x0D6: { // stdcx. (Store DWord Condition Indexd, PPC64 p581)
- IRTemp resaddr = newTemp(ty);
+ // A marginally simplified version of the stwcx. case
+ IRTemp rS = newTemp(Ity_I64);
+ IRTemp resSC;
if (b0 != 1) {
vex_printf("dis_memsync(ppc)(stdcx.,b0)\n");
return False;
}
+ if (!mode64)
+ return False;
DIP("stdcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 8 );
+
+ // Get the data to be stored
assign( rS, getIReg(rS_addr) );
- // First set up as if the reservation failed
- // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]
- putCR321(0, mkU8(0<<1));
+ // Do the store, and get success/failure bit into resSC
+ resSC = newTemp(Ity_I1);
+ stmt( IRStmt_Store(Iend_BE, resSC, mkexpr(EA), mkexpr(rS)) );
+
+ // Set CR0[LT GT EQ S0] = 0b000 || XER[SO] on failure
+ // Set CR0[LT GT EQ S0] = 0b001 || XER[SO] on success
+ putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1)));
putCR0(0, getXER_SO());
-
- // Get the reservation address into a temporary, then clear it.
- assign( resaddr, getGST(PPC_GST_RESVN) );
- putGST( PPC_GST_RESVN, mkSzImm(ty, 0) );
-
- // Skip the rest if the reservation really did fail.
- stmt( IRStmt_Exit( binop(Iop_CmpNE64, mkexpr(resaddr),
- mkexpr(EA)),
- Ijk_Boring,
- IRConst_U64(nextInsnAddr())) );
-
- // Success? Do the store
- storeBE( mkexpr(EA), mkexpr(rS) );
-
- // Set CR0[LT GT EQ S0] = 0b001 || XER[SO]
- putCR321(0, mkU8(1<<1));
+
+ /* Note:
+ If resaddr != lwarx_resaddr, CR0[EQ] is undefined, and
+ whether rS is stored is dependent on that value. */
+ /* So I guess we can just ignore this case? */
break;
}
assign( rS, getIReg(rS_addr) );
assign( rB, getIReg(rB_addr) );
- assign( rS_lo32, mkSzNarrow32(ty, mkexpr(rS)) );
- assign( rB_lo32, mkSzNarrow32(ty, mkexpr(rB)) );
+ assign( rS_lo32, mkNarrowTo32(ty, mkexpr(rS)) );
+ assign( rB_lo32, mkNarrowTo32(ty, mkexpr(rB)) );
if (opc1 == 0x1F) {
switch (opc2) {
binop( Iop_Sar32,
binop(Iop_Shl32, mkexpr(rB_lo32), mkU8(26)),
mkU8(31))) );
- assign( rA, mkSzWiden32(ty, e_tmp, /* Signed */False) );
+ assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */False) );
break;
}
IRExpr_Mux0X( mkexpr(outofrange),
mkexpr(sh_amt),
mkU32(31)) ) );
- assign( rA, mkSzWiden32(ty, e_tmp, /* Signed */True) );
+ assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */True) );
set_XER_CA( ty, PPCG_FLAG_OP_SRAW,
mkexpr(rA),
- mkSzWiden32(ty, mkexpr(rS_lo32), True),
- mkSzWiden32(ty, mkexpr(sh_amt), True ),
- mkSzWiden32(ty, getXER_CA32(), True) );
+ mkWidenFrom32(ty, mkexpr(rS_lo32), True),
+ mkWidenFrom32(ty, mkexpr(sh_amt), True ),
+ mkWidenFrom32(ty, getXER_CA32(), True) );
break;
}
set_XER_CA( ty, PPCG_FLAG_OP_SRAWI,
mkexpr(rA),
- mkSzWiden32(ty, mkexpr(rS_lo32), /* Syned */True),
+ mkWidenFrom32(ty, mkexpr(rS_lo32), /* Syned */True),
mkSzImm(ty, sh_imm),
- mkSzWiden32(ty, getXER_CA32(), /* Syned */False) );
+ mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) );
break;
case 0x218: // srw (Shift Right Word, PPC32 p508)
binop(Iop_Shl32, mkexpr(rB_lo32),
mkU8(26)),
mkU8(31))));
- assign( rA, mkSzWiden32(ty, e_tmp, /* Signed */False) );
+ assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */False) );
break;
);
set_XER_CA( ty, PPCG_FLAG_OP_SRAD,
mkexpr(rA), mkexpr(rS), mkexpr(sh_amt),
- mkSzWiden32(ty, getXER_CA32(), /* Syned */False) );
+ mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) );
break;
}
mkexpr(rA),
getIReg(rS_addr),
mkU64(sh_imm),
- mkSzWiden32(ty, getXER_CA32(), /* Syned */False) );
+ mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) );
break;
case 0x21B: // srd (Shift Right DWord, PPC64 p574)
DIP("lhbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
assign( w1, unop(Iop_16Uto32, loadBE(Ity_I16, mkexpr(EA))) );
assign( w2, gen_byterev16(w1) );
- putIReg( rD_addr, mkSzWiden32(ty, mkexpr(w2),
- /* Signed */False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
+ /* Signed */False) );
break;
case 0x216: // lwbrx (Load Word Byte-Reverse Indexed, PPC32 p459)
DIP("lwbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
assign( w1, loadBE(Ity_I32, mkexpr(EA)) );
assign( w2, gen_byterev32(w1) );
- putIReg( rD_addr, mkSzWiden32(ty, mkexpr(w2),
- /* Signed */False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
+ /* Signed */False) );
break;
case 0x396: // sthbrx (Store Half Word Byte-Reverse Indexed, PPC32 p523)
DIP("sthbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
- assign( w1, mkSzNarrow32(ty, getIReg(rS_addr)) );
+ assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
storeBE( mkexpr(EA), unop(Iop_32to16, gen_byterev16(w1)) );
break;
case 0x296: // stwbrx (Store Word Byte-Reverse Indxd, PPC32 p531)
DIP("stwbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
- assign( w1, mkSzNarrow32(ty, getIReg(rS_addr)) );
+ assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
storeBE( mkexpr(EA), gen_byterev32(w1) );
break;
// implementation of mfocr (from the 2.02 arch spec)
if (b11to20 == 0) {
DIP("mfcr r%u\n", rD_addr);
- putIReg( rD_addr, mkSzWiden32(ty, getGST( PPC_GST_CR ),
- /* Signed */False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_CR ),
+ /* Signed */False) );
break;
}
if (b20 == 1 && b11 == 0) {
DIP("mfocrf r%u,%u\n", rD_addr, CRM);
- putIReg( rD_addr, mkSzWiden32(ty, getGST( PPC_GST_CR ),
- /* Signed */False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_CR ),
+ /* Signed */False) );
break;
}
/* not decodable */
switch (SPR) { // Choose a register...
case 0x1:
DIP("mfxer r%u\n", rD_addr);
- putIReg( rD_addr, mkSzWiden32(ty, getGST( PPC_GST_XER ),
- /* Signed */False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_XER ),
+ /* Signed */False) );
break;
case 0x8:
DIP("mflr r%u\n", rD_addr);
break;
case 0x100:
DIP("mfvrsave r%u\n", rD_addr);
- putIReg( rD_addr, mkSzWiden32(ty, getGST( PPC_GST_VRSAVE ),
- /* Signed */False) );
+ putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_VRSAVE ),
+ /* Signed */False) );
break;
case 0x103:
case 269:
DIP("mftbu r%u", rD_addr);
putIReg( rD_addr,
- mkSzWiden32(ty, unop(Iop_64HIto32, mkexpr(val)),
- /* Signed */False) );
+ mkWidenFrom32(ty, unop(Iop_64HIto32, mkexpr(val)),
+ /* Signed */False) );
break;
case 268:
DIP("mftb r%u", rD_addr);
shft = 4*(7-cr);
putGST_field( PPC_GST_CR,
binop(Iop_Shr32,
- mkSzNarrow32(ty, mkexpr(rS)),
+ mkNarrowTo32(ty, mkexpr(rS)),
mkU8(shft)), cr );
}
break;
switch (SPR) { // Choose a register...
case 0x1:
DIP("mtxer r%u\n", rS_addr);
- putGST( PPC_GST_XER, mkSzNarrow32(ty, mkexpr(rS)) );
+ putGST( PPC_GST_XER, mkNarrowTo32(ty, mkexpr(rS)) );
break;
case 0x8:
DIP("mtlr r%u\n", rS_addr);
break;
case 0x100:
DIP("mtvrsave r%u\n", rS_addr);
- putGST( PPC_GST_VRSAVE, mkSzNarrow32(ty, mkexpr(rS)) );
+ putGST( PPC_GST_VRSAVE, mkNarrowTo32(ty, mkexpr(rS)) );
break;
default:
UInt vD_off = vectorGuestRegOffset(vD_addr);
IRExpr** args = mkIRExprVec_3(
mkU32(vD_off),
- binop(Iop_And32, mkSzNarrow32(ty, mkexpr(EA)),
+ binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
mkU32(0xF)),
mkU32(0)/*left*/ );
if (!mode64) {
UInt vD_off = vectorGuestRegOffset(vD_addr);
IRExpr** args = mkIRExprVec_3(
mkU32(vD_off),
- binop(Iop_And32, mkSzNarrow32(ty, mkexpr(EA)),
+ binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
mkU32(0xF)),
mkU32(1)/*right*/ );
if (!mode64) {
DIP("stvebx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
assign( eb, binop(Iop_And8, mkU8(0xF),
unop(Iop_32to8,
- mkSzNarrow32(ty, mkexpr(EA)) )) );
+ mkNarrowTo32(ty, mkexpr(EA)) )) );
assign( idx, binop(Iop_Shl8,
binop(Iop_Sub8, mkU8(15), mkexpr(eb)),
mkU8(3)) );
DIP("stvehx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
assign( addr_aligned, addr_align(mkexpr(EA), 2) );
assign( eb, binop(Iop_And8, mkU8(0xF),
- mkSzNarrow8(ty, mkexpr(addr_aligned) )) );
+ mkNarrowTo8(ty, mkexpr(addr_aligned) )) );
assign( idx, binop(Iop_Shl8,
binop(Iop_Sub8, mkU8(14), mkexpr(eb)),
mkU8(3)) );
DIP("stvewx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
assign( addr_aligned, addr_align(mkexpr(EA), 4) );
assign( eb, binop(Iop_And8, mkU8(0xF),
- mkSzNarrow8(ty, mkexpr(addr_aligned) )) );
+ mkNarrowTo8(ty, mkexpr(addr_aligned) )) );
assign( idx, binop(Iop_Shl8,
binop(Iop_Sub8, mkU8(12), mkexpr(eb)),
mkU8(3)) );
without prior written permission.
*/
+/* Translates x86 code to IR. */
+
/* TODO:
All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
zeroes all the FP registers. It should leave the registers
unchanged.
- RDTSC returns one, always.
-
SAHF should cause eflags[1] == 1, and in fact it produces 0. As
per Intel docs this bit has no meaning anyway. Since PUSHF is the
only way to observe eflags[1], a proper fix would be to make that
happen. Programs that set it to 1 and then rely on the resulting
SIGBUSs to inform them of misaligned accesses will not work.
- Implementation sysenter is necessarily partial. sysenter is a kind
- of system call entry. When doing a sysenter, the return address is
- not known -- that is something that is beyond Vex's knowledge. So
- the generated IR forces a return to the scheduler, which can do
- what it likes to simulate the systemter, but it MUST set this
- thread's guest_EIP field with the continuation address before
- resuming execution. If that doesn't happen, the thread will jump
- to address zero, which is probably fatal.
+ Implementation of sysenter is necessarily partial. sysenter is a
+ kind of system call entry. When doing a sysenter, the return
+ address is not known -- that is something that is beyond Vex's
+ knowledge. So the generated IR forces a return to the scheduler,
+ which can do what it likes to simulate the systenter, but it MUST
+ set this thread's guest_EIP field with the continuation address
+ before resuming execution. If that doesn't happen, the thread will
+ jump to address zero, which is probably fatal.
This module uses global variables and so is not MT-safe (if that
should ever become relevant).
The delta values are 32-bit ints, not 64-bit ints. That means
this module may not work right if run on a 64-bit host. That should
be fixed properly, really -- if anyone ever wants to use Vex to
- translate x86 code for execution on a 64-bit host. */
+ translate x86 code for execution on a 64-bit host.
+
+ casLE (implementation of lock-prefixed insns) and rep-prefixed
+ insns: the side-exit back to the start of the insn is done with
+ Ijk_Boring. This is quite wrong, it should be done with
+ Ijk_NoRedir, since otherwise the side exit, which is intended to
+ restart the instruction for whatever reason, could go somewhere
+ entirely else. Doing it right (with Ijk_NoRedir jumps) would make
+ no-redir jumps performance critical, at least for rep-prefixed
+ instructions, since all iterations thereof would involve such a
+ jump. It's not such a big deal with casLE since the side exit is
+ only taken if the CAS fails, that is, the location is contended,
+ which is relatively unlikely.
+*/
/* Performance holes:
No prefixes may precede a "Special" instruction.
*/
+/* LOCK prefixed instructions. These are translated using IR-level
+ CAS statements (IRCAS) and are believed to preserve atomicity, even
+ from the point of view of some other process racing against a
+ simulated one (presumably they communicate via a shared memory
+ segment).
+
+ Handlers which are aware of LOCK prefixes are:
+ dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
+ dis_cmpxchg_G_E (cmpxchg)
+ dis_Grp1 (add, or, adc, sbb, and, sub, xor)
+ dis_Grp3 (not, neg)
+ dis_Grp4 (inc, dec)
+ dis_Grp5 (inc, dec)
+ dis_Grp8_Imm (bts, btc, btr)
+ dis_bt_G_E (bts, btc, btr)
+ dis_xadd_G_E (xadd)
+*/
-/* Translates x86 code to IR. */
#include "libvex_basictypes.h"
#include "libvex_ir.h"
static void storeLE ( IRExpr* addr, IRExpr* data )
{
- stmt( IRStmt_Store(Iend_LE,addr,data) );
+ stmt( IRStmt_Store(Iend_LE, IRTemp_INVALID, addr, data) );
}
static IRExpr* unop ( IROp op, IRExpr* a )
static IRExpr* loadLE ( IRType ty, IRExpr* data )
{
- return IRExpr_Load(Iend_LE,ty,data);
+ return IRExpr_Load(False, Iend_LE, ty, data);
}
static IROp mkSizedOp ( IRType ty, IROp op8 )
unop(Iop_1Uto32,y)));
}
+/* Generate a compare-and-swap operation, operating on memory at
+ 'addr'. The expected value is 'expVal' and the new value is
+ 'newVal'. If the operation fails, then transfer control (with a
+ no-redir jump (XXX no -- see comment at top of this file)) to
+ 'restart_point', which is presumably the address of the guest
+ instruction again -- retrying, essentially. */
+static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
+ Addr32 restart_point )
+{
+ IRCAS* cas;
+ IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
+ IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
+ IRTemp oldTmp = newTemp(tyE);
+ IRTemp expTmp = newTemp(tyE);
+ vassert(tyE == tyN);
+ vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
+ assign(expTmp, expVal);
+ cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
+ NULL, mkexpr(expTmp), NULL, newVal );
+ stmt( IRStmt_CAS(cas) );
+ stmt( IRStmt_Exit(
+ binop( mkSizedOp(tyE,Iop_CmpNE8), mkexpr(oldTmp), mkexpr(expTmp) ),
+ Ijk_Boring, /*Ijk_NoRedir*/
+ IRConst_U32( restart_point )
+ ));
+}
+
/*------------------------------------------------------------*/
/*--- Helpers for %eflags. ---*/
/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
appropriately.
+
+ Optionally, generate a store for the 'tres' value. This can either
+ be a normal store, or it can be a cas-with-possible-failure style
+ store:
+
+ if taddr is IRTemp_INVALID, then no store is generated.
+
+ if taddr is not IRTemp_INVALID, then a store (using taddr as
+ the address) is generated:
+
+ if texpVal is IRTemp_INVALID then a normal store is
+ generated, and restart_point must be zero (it is irrelevant).
+
+ if texpVal is not IRTemp_INVALID then a cas-style store is
+ generated. texpVal is the expected value, restart_point
+ is the restart point if the store fails, and texpVal must
+ have the same type as tres.
*/
static void helper_ADC ( Int sz,
- IRTemp tres, IRTemp ta1, IRTemp ta2 )
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
{
UInt thunkOp;
IRType ty = szToITy(sz);
IROp plus = mkSizedOp(ty, Iop_Add8);
IROp xor = mkSizedOp(ty, Iop_Xor8);
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
vassert(sz == 1 || sz == 2 || sz == 4);
thunkOp = sz==4 ? X86G_CC_OP_ADCL
: (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
binop(plus,mkexpr(ta1),mkexpr(ta2)),
mkexpr(oldcn)) );
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
- appropriately.
+ appropriately. As with helper_ADC, possibly generate a store of
+ the result -- see comments on helper_ADC for details.
*/
static void helper_SBB ( Int sz,
- IRTemp tres, IRTemp ta1, IRTemp ta2 )
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
{
UInt thunkOp;
IRType ty = szToITy(sz);
IROp minus = mkSizedOp(ty, Iop_Sub8);
IROp xor = mkSizedOp(ty, Iop_Xor8);
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
vassert(sz == 1 || sz == 2 || sz == 4);
thunkOp = sz==4 ? X86G_CC_OP_SBBL
: (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
binop(minus,mkexpr(ta1),mkexpr(ta2)),
mkexpr(oldcn)) );
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
assign( src, getIReg(size,eregOfRM(rm)) );
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, gregOfRM(rm), mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, gregOfRM(rm), mkexpr(dst1));
} else {
assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
assign( src, loadLE(szToITy(size), mkexpr(addr)) );
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, gregOfRM(rm), mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, gregOfRM(rm), mkexpr(dst1));
} else {
assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
*/
static
UInt dis_op2_G_E ( UChar sorb,
+ Bool locked,
Bool addSubCarry,
IROp op8,
Bool keep,
assign(src, getIReg(size,gregOfRM(rm)));
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, eregOfRM(rm), mkexpr(dst1));
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
putIReg(size, eregOfRM(rm), mkexpr(dst1));
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
assign(src, getIReg(size,gregOfRM(rm)));
if (addSubCarry && op8 == Iop_Add8) {
- helper_ADC( size, dst1, dst0, src );
- storeLE(mkexpr(addr), mkexpr(dst1));
+ if (locked) {
+ /* cas-style store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else
if (addSubCarry && op8 == Iop_Sub8) {
- helper_SBB( size, dst1, dst0, src );
- storeLE(mkexpr(addr), mkexpr(dst1));
+ if (locked) {
+ /* cas-style store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (keep) {
+ if (locked) {
+ if (0) vex_printf("locked case\n" );
+ casLE( mkexpr(addr),
+ mkexpr(dst0)/*expval*/,
+ mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
+ } else {
+ if (0) vex_printf("nonlocked case\n");
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
if (isAddSub(op8))
setFlags_DEP1_DEP2(op8, dst0, src, ty);
else
setFlags_DEP1(op8, dst1, ty);
- if (keep)
- storeLE(mkexpr(addr), mkexpr(dst1));
}
DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
}
else
if (op8 == Iop_Add8 && carrying) {
- helper_ADC( size, dst1, dst0, src );
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
}
else
if (op8 == Iop_Sub8 && carrying) {
- helper_SBB( size, dst1, dst0, src );
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
}
else
vpanic("dis_op_imm_A(x86,guest)");
static
-UInt dis_Grp1 ( UChar sorb,
+UInt dis_Grp1 ( UChar sorb, Bool locked,
Int delta, UChar modrm,
Int am_sz, Int d_sz, Int sz, UInt d32 )
{
assign(src, mkU(ty,d32 & mask));
if (gregOfRM(modrm) == 2 /* ADC */) {
- helper_ADC( sz, dst1, dst0, src );
+ helper_ADC( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
} else
if (gregOfRM(modrm) == 3 /* SBB */) {
- helper_SBB( sz, dst1, dst0, src );
+ helper_SBB( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
if (isAddSub(op8))
assign(src, mkU(ty,d32 & mask));
if (gregOfRM(modrm) == 2 /* ADC */) {
- helper_ADC( sz, dst1, dst0, src );
+ if (locked) {
+ /* cas-style store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else
if (gregOfRM(modrm) == 3 /* SBB */) {
- helper_SBB( sz, dst1, dst0, src );
+ if (locked) {
+ /* cas-style store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
} else {
assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (gregOfRM(modrm) < 7) {
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
+ mkexpr(dst1)/*newVal*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
if (isAddSub(op8))
setFlags_DEP1_DEP2(op8, dst0, src, ty);
else
setFlags_DEP1(op8, dst1, ty);
}
- if (gregOfRM(modrm) < 7)
- storeLE(mkexpr(addr), mkexpr(dst1));
-
delta += (len+d_sz);
DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
d32, dis_buf);
/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
static
UInt dis_Grp8_Imm ( UChar sorb,
+ Bool locked,
Int delta, UChar modrm,
Int am_sz, Int sz, UInt src_val,
Bool* decode_OK )
src_val, dis_buf);
}
- /* Copy relevant bit from t2 into the carry flag. */
- /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
- stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
- stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
- stmt( IRStmt_Put(
- OFFB_CC_DEP1,
- binop(Iop_And32,
- binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
- mkU32(1))
- ));
- /* Set NDEP even though it isn't used. This makes redundant-PUT
- elimination of previous stores to this field work better. */
- stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
-
/* Compute the new value into t2m, if non-BT. */
switch (gregOfRM(modrm)) {
case 4: /* BT */
vassert(0);
}
- /* Write the result back, if non-BT. */
+ /* Write the result back, if non-BT. If the CAS fails then we
+ side-exit from the trace at this point, and so the flag state is
+ not affected. This is of course as required. */
if (gregOfRM(modrm) != 4 /* BT */) {
if (epartIsReg(modrm)) {
- putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
+ putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
} else {
- storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ if (locked) {
+ casLE( mkexpr(t_addr),
+ narrowTo(ty, mkexpr(t2))/*expd*/,
+ narrowTo(ty, mkexpr(t2m))/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ }
}
}
+ /* Copy relevant bit from t2 into the carry flag. */
+ /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
+ mkU32(1))
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+
return delta;
}
/* Group 3 extended opcodes. */
static
-UInt dis_Grp3 ( UChar sorb, Int sz, Int delta, Bool* decode_OK )
+UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
{
UInt d32;
UChar modrm;
*decode_OK = True; /* may change this later */
modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
+ /* LOCK prefix only allowed with not and neg subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
if (epartIsReg(modrm)) {
switch (gregOfRM(modrm)) {
case 0: { /* TEST */
*decode_OK = False;
break;
case 2: /* NOT */
- storeLE( mkexpr(addr), unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ dst1 = newTemp(ty);
+ assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
DIP("not%c %s\n", nameISize(sz), dis_buf);
break;
case 3: /* NEG */
dst1 = newTemp(ty);
assign(dst0, mkU(ty,0));
assign(src, mkexpr(t1));
- assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
+ assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
+ mkexpr(dst0), mkexpr(src)));
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
- storeLE( mkexpr(addr), mkexpr(dst1) );
DIP("neg%c %s\n", nameISize(sz), dis_buf);
break;
case 4: /* MUL */
/* Group 4 extended opcodes. */
static
-UInt dis_Grp4 ( UChar sorb, Int delta, Bool* decode_OK )
+UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
{
Int alen;
UChar modrm;
*decode_OK = True;
modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
+ /* LOCK prefix only allowed with inc and dec subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
if (epartIsReg(modrm)) {
assign(t1, getIReg(1, eregOfRM(modrm)));
switch (gregOfRM(modrm)) {
switch (gregOfRM(modrm)) {
case 0: /* INC */
assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
- storeLE( mkexpr(addr), mkexpr(t2) );
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
setFlags_INC_DEC( True, t2, ty );
break;
case 1: /* DEC */
assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
- storeLE( mkexpr(addr), mkexpr(t2) );
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
setFlags_INC_DEC( False, t2, ty );
break;
default:
/* Group 5 extended opcodes. */
static
-UInt dis_Grp5 ( UChar sorb, Int sz, Int delta,
+UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
DisResult* dres, Bool* decode_OK )
{
Int len;
*decode_OK = True;
modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
+ /* LOCK prefix only allowed with inc and dec subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
if (epartIsReg(modrm)) {
assign(t1, getIReg(sz,eregOfRM(modrm)));
switch (gregOfRM(modrm)) {
t2 = newTemp(ty);
assign(t2, binop(mkSizedOp(ty,Iop_Add8),
mkexpr(t1), mkU(ty,1)));
+ if (locked) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
setFlags_INC_DEC( True, t2, ty );
- storeLE(mkexpr(addr),mkexpr(t2));
break;
case 1: /* DEC */
t2 = newTemp(ty);
assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
mkexpr(t1), mkU(ty,1)));
+ if (locked) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
setFlags_INC_DEC( False, t2, ty );
- storeLE(mkexpr(addr),mkexpr(t2));
break;
case 2: /* call Ev */
vassert(sz == 4);
static
-UInt dis_bt_G_E ( UChar sorb, Int sz, Int delta, BtOp op )
+UInt dis_bt_G_E ( UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
{
HChar dis_buf[50];
UChar modrm;
Int len;
IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
- t_addr1, t_esp, t_mask;
+ t_addr1, t_esp, t_mask, t_new;
vassert(sz == 2 || sz == 4);
t_fetched = t_bitno0 = t_bitno1 = t_bitno2
- = t_addr0 = t_addr1 = t_esp = t_mask = IRTemp_INVALID;
+ = t_addr0 = t_addr1 = t_esp
+ = t_mask = t_new = IRTemp_INVALID;
t_fetched = newTemp(Ity_I8);
+ t_new = newTemp(Ity_I8);
t_bitno0 = newTemp(Ity_I32);
t_bitno1 = newTemp(Ity_I32);
t_bitno2 = newTemp(Ity_I8);
if (op != BtOpNone) {
switch (op) {
- case BtOpSet:
- storeLE( mkexpr(t_addr1),
- binop(Iop_Or8, mkexpr(t_fetched),
- mkexpr(t_mask)) );
+ case BtOpSet:
+ assign( t_new,
+ binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
break;
- case BtOpComp:
- storeLE( mkexpr(t_addr1),
- binop(Iop_Xor8, mkexpr(t_fetched),
- mkexpr(t_mask)) );
+ case BtOpComp:
+ assign( t_new,
+ binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
break;
- case BtOpReset:
- storeLE( mkexpr(t_addr1),
- binop(Iop_And8, mkexpr(t_fetched),
- unop(Iop_Not8, mkexpr(t_mask))) );
+ case BtOpReset:
+ assign( t_new,
+ binop(Iop_And8, mkexpr(t_fetched),
+ unop(Iop_Not8, mkexpr(t_mask))) );
break;
default:
vpanic("dis_bt_G_E(x86)");
}
+ if (locked && !epartIsReg(modrm)) {
+ casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
+ mkexpr(t_new)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(t_addr1), mkexpr(t_new) );
+ }
}
/* Side effect done; now get selected bit into Carry flag */
static
UInt dis_cmpxchg_G_E ( UChar sorb,
+ Bool locked,
Int size,
Int delta0 )
{
IRType ty = szToITy(size);
IRTemp acc = newTemp(ty);
IRTemp src = newTemp(ty);
- //IRTemp res = newTemp(ty);
IRTemp dest = newTemp(ty);
IRTemp dest2 = newTemp(ty);
IRTemp acc2 = newTemp(ty);
IRTemp addr = IRTemp_INVALID;
UChar rm = getUChar(delta0);
+ /* There are 3 cases to consider:
+
+ reg-reg: ignore any lock prefix, generate sequence based
+ on Mux0X
+
+ reg-mem, not locked: ignore any lock prefix, generate sequence
+ based on Mux0X
+
+ reg-mem, locked: use IRCAS
+ */
if (epartIsReg(rm)) {
+ /* case 1 */
assign( dest, getIReg(size, eregOfRM(rm)) );
delta0++;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ putIReg(size, eregOfRM(rm), mkexpr(dest2));
DIP("cmpxchg%c %s,%s\n", nameISize(size),
nameIReg(size,gregOfRM(rm)),
nameIReg(size,eregOfRM(rm)) );
- } else {
+ }
+ else if (!epartIsReg(rm) && !locked) {
+ /* case 2 */
addr = disAMode ( &len, sorb, delta0, dis_buf );
assign( dest, loadLE(ty, mkexpr(addr)) );
delta0 += len;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ storeLE( mkexpr(addr), mkexpr(dest2) );
DIP("cmpxchg%c %s,%s\n", nameISize(size),
nameIReg(size,gregOfRM(rm)), dis_buf);
}
-
- assign( src, getIReg(size, gregOfRM(rm)) );
- assign( acc, getIReg(size, R_EAX) );
- //assign( res, binop( mkSizedOp(ty,Iop_Sub8), mkexpr(acc), mkexpr(dest) ));
- setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
- assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
- assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
- assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
- putIReg(size, R_EAX, mkexpr(acc2));
-
- if (epartIsReg(rm)) {
- putIReg(size, eregOfRM(rm), mkexpr(dest2));
- } else {
- storeLE( mkexpr(addr), mkexpr(dest2) );
+ else if (!epartIsReg(rm) && locked) {
+ /* case 3 */
+ /* src is new value. acc is expected value. dest is old value.
+ Compute success from the output of the IRCAS, and steer the
+ new value for EAX accordingly: in case of success, EAX is
+ unchanged. */
+ addr = disAMode ( &len, sorb, delta0, dis_buf );
+ delta0 += len;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ stmt( IRStmt_CAS(
+ mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
+ NULL, mkexpr(acc), NULL, mkexpr(src) )
+ ));
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ DIP("lock cmpxchg%c %s,%s\n", nameISize(size),
+ nameIReg(size,gregOfRM(rm)), dis_buf);
}
+ else vassert(0);
return delta0;
}
-//-- static
-//-- Addr dis_cmpxchg8b ( UCodeBlock* cb,
-//-- UChar sorb,
-//-- Addr eip0 )
-//-- {
-//-- Int tal, tah, junkl, junkh, destl, desth, srcl, srch, accl, acch;
-//-- HChar dis_buf[50];
-//-- UChar rm;
-//-- UInt pair;
-//--
-//-- rm = getUChar(eip0);
-//-- accl = newTemp(cb);
-//-- acch = newTemp(cb);
-//-- srcl = newTemp(cb);
-//-- srch = newTemp(cb);
-//-- destl = newTemp(cb);
-//-- desth = newTemp(cb);
-//-- junkl = newTemp(cb);
-//-- junkh = newTemp(cb);
-//--
-//-- vg_assert(!epartIsReg(rm));
-//--
-//-- pair = disAMode ( cb, sorb, eip0, dis_buf );
-//-- tal = LOW24(pair);
-//-- tah = newTemp(cb);
-//-- uInstr2(cb, MOV, 4, TempReg, tal, TempReg, tah);
-//-- uInstr2(cb, ADD, 4, Literal, 0, TempReg, tah);
-//-- uLiteral(cb, 4);
-//-- eip0 += HI8(pair);
-//-- DIP("cmpxchg8b %s\n", dis_buf);
-//--
-//-- uInstr0(cb, CALLM_S, 0);
-//--
-//-- uInstr2(cb, LOAD, 4, TempReg, tah, TempReg, desth);
-//-- uInstr1(cb, PUSH, 4, TempReg, desth);
-//-- uInstr2(cb, LOAD, 4, TempReg, tal, TempReg, destl);
-//-- uInstr1(cb, PUSH, 4, TempReg, destl);
-//-- uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, srch);
-//-- uInstr1(cb, PUSH, 4, TempReg, srch);
-//-- uInstr2(cb, GET, 4, ArchReg, R_EBX, TempReg, srcl);
-//-- uInstr1(cb, PUSH, 4, TempReg, srcl);
-//-- uInstr2(cb, GET, 4, ArchReg, R_EDX, TempReg, acch);
-//-- uInstr1(cb, PUSH, 4, TempReg, acch);
-//-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, accl);
-//-- uInstr1(cb, PUSH, 4, TempReg, accl);
-//--
-//-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_cmpxchg8b));
-//-- uFlagsRWU(cb, FlagsEmpty, FlagZ, FlagsEmpty);
-//--
-//-- uInstr1(cb, POP, 4, TempReg, accl);
-//-- uInstr2(cb, PUT, 4, TempReg, accl, ArchReg, R_EAX);
-//-- uInstr1(cb, POP, 4, TempReg, acch);
-//-- uInstr2(cb, PUT, 4, TempReg, acch, ArchReg, R_EDX);
-//-- uInstr1(cb, POP, 4, TempReg, srcl);
-//-- uInstr2(cb, PUT, 4, TempReg, srcl, ArchReg, R_EBX);
-//-- uInstr1(cb, POP, 4, TempReg, srch);
-//-- uInstr2(cb, PUT, 4, TempReg, srch, ArchReg, R_ECX);
-//-- uInstr1(cb, POP, 4, TempReg, destl);
-//-- uInstr2(cb, STORE, 4, TempReg, destl, TempReg, tal);
-//-- uInstr1(cb, POP, 4, TempReg, desth);
-//-- uInstr2(cb, STORE, 4, TempReg, desth, TempReg, tah);
-//--
-//-- uInstr0(cb, CALLM_E, 0);
-//--
-//-- return eip0;
-//-- }
-
-
/* Handle conditional move instructions of the form
cmovcc E(reg-or-mem), G(reg)
static
-UInt dis_xadd_G_E ( UChar sorb, Int sz, Int delta0, Bool* decodeOK )
+UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
+ Bool* decodeOK )
{
Int len;
UChar rm = getIByte(delta0);
HChar dis_buf[50];
- // Int tmpd = newTemp(cb);
- //Int tmpt = newTemp(cb);
-
IRType ty = szToITy(sz);
IRTemp tmpd = newTemp(ty);
IRTemp tmpt0 = newTemp(ty);
IRTemp tmpt1 = newTemp(ty);
+ /* There are 3 cases to consider:
+
+ reg-reg: currently unhandled
+
+ reg-mem, not locked: ignore any lock prefix, generate 'naive'
+ (non-atomic) sequence
+
+ reg-mem, locked: use IRCAS
+ */
+
if (epartIsReg(rm)) {
+ /* case 1 */
*decodeOK = False;
return delta0;
/* Currently we don't handle xadd_G_E with register operand. */
-#if 0
- uInstr2(cb, GET, sz, ArchReg, eregOfRM(rm), TempReg, tmpd);
- uInstr2(cb, GET, sz, ArchReg, gregOfRM(rm), TempReg, tmpt);
- uInstr2(cb, ADD, sz, TempReg, tmpd, TempReg, tmpt);
- setFlagsFromUOpcode(cb, ADD);
- uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
- uInstr2(cb, PUT, sz, TempReg, tmpt, ArchReg, eregOfRM(rm));
+ }
+ else if (!epartIsReg(rm) && !locked) {
+ /* case 2 */
+ IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
+ assign( tmpd, loadLE(ty, mkexpr(addr)) );
+ assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ storeLE( mkexpr(addr), mkexpr(tmpt1) );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
DIP("xadd%c %s, %s\n",
- nameISize(sz), nameIReg(sz,gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
- return 1+eip0;
-#endif
- } else {
+ nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
+ *decodeOK = True;
+ return len+delta0;
+ }
+ else if (!epartIsReg(rm) && locked) {
+ /* case 3 */
IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
assign( tmpd, loadLE(ty, mkexpr(addr)) );
assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
- assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), mkexpr(tmpd), mkexpr(tmpt0)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
+ mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
- storeLE( mkexpr(addr), mkexpr(tmpt1) );
putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
DIP("xadd%c %s, %s\n",
nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
*decodeOK = True;
return len+delta0;
}
+ /*UNREACHED*/
+ vassert(0);
}
/* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
/* Helper for deciding whether a given insn (starting at the opcode
byte) may validly be used with a LOCK prefix. The following insns
may be used with LOCK when their destination operand is in memory.
- Note, this is slightly too permissive. Oh well. Note also, AFAICS
- this is exactly the same for both 32-bit and 64-bit mode.
+ AFAICS this is exactly the same for both 32-bit and 64-bit mode.
- ADD 80 /0, 81 /0, 83 /0, 00, 01, 02, 03
- OR 80 /1, 81 /1, 83 /1, 08, 09, 0A, 0B
- ADC 80 /2, 81 /2, 83 /2, 10, 11, 12, 13
- SBB 81 /3, 81 /3, 83 /3, 18, 19, 1A, 1B
- AND 80 /4, 81 /4, 83 /4, 20, 21, 22, 23
- SUB 80 /5, 81 /5, 83 /5, 28, 29, 2A, 2B
- XOR 80 /6, 81 /6, 83 /6, 30, 31, 32, 33
+ ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
+ OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
+ ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
+ SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
+ AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
+ SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
+ XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
DEC FE /1, FF /1
INC FE /0, FF /0
NEG F6 /3, F7 /3
NOT F6 /2, F7 /2
- XCHG 86, 87
+ XCHG 86, 87
BTC 0F BB, 0F BA /7
BTR 0F B3, 0F BA /6
CMPXCHG8B 0F C7 /1
XADD 0F C0, 0F C1
+
+ ------------------------------
+
+ 80 /0 = addb $imm8, rm8
+ 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
+ 82 /0 = addb $imm8, rm8
+ 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
+
+ 00 = addb r8, rm8
+ 01 = addl r32, rm32 and addw r16, rm16
+
+ Same for ADD OR ADC SBB AND SUB XOR
+
+ FE /1 = dec rm8
+ FF /1 = dec rm32 and dec rm16
+
+ FE /0 = inc rm8
+ FF /0 = inc rm32 and inc rm16
+
+ F6 /3 = neg rm8
+ F7 /3 = neg rm32 and neg rm16
+
+ F6 /2 = not rm8
+ F7 /2 = not rm32 and not rm16
+
+ 0F BB = btcw r16, rm16 and btcl r32, rm32
+ OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
+
+ Same for BTS, BTR
*/
static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
{
switch (opc[0]) {
- case 0x00: case 0x01: case 0x02: case 0x03: return True;
- case 0x08: case 0x09: case 0x0A: case 0x0B: return True;
- case 0x10: case 0x11: case 0x12: case 0x13: return True;
- case 0x18: case 0x19: case 0x1A: case 0x1B: return True;
- case 0x20: case 0x21: case 0x22: case 0x23: return True;
- case 0x28: case 0x29: case 0x2A: case 0x2B: return True;
- case 0x30: case 0x31: case 0x32: case 0x33: return True;
-
- case 0x80: case 0x81: case 0x83:
- if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6)
+ case 0x00: case 0x01: case 0x08: case 0x09:
+ case 0x10: case 0x11: case 0x18: case 0x19:
+ case 0x20: case 0x21: case 0x28: case 0x29:
+ case 0x30: case 0x31:
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0x80: case 0x81: case 0x82: case 0x83:
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
+ && !epartIsReg(opc[1]))
return True;
break;
case 0xFE: case 0xFF:
- if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1)
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
+ && !epartIsReg(opc[1]))
return True;
break;
case 0xF6: case 0xF7:
- if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3)
+ if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
+ && !epartIsReg(opc[1]))
return True;
break;
case 0x86: case 0x87:
- return True;
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
case 0x0F: {
switch (opc[1]) {
case 0xBB: case 0xB3: case 0xAB:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
case 0xBA:
- if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7)
+ if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
+ && !epartIsReg(opc[2]))
return True;
break;
case 0xB0: case 0xB1:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
case 0xC7:
- if (gregOfRM(opc[2]) == 1)
+ if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
return True;
break;
case 0xC0: case 0xC1:
- return True;
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
default:
break;
} /* switch (opc[1]) */
/*--- Disassemble a single instruction ---*/
/*------------------------------------------------------------*/
-/* Disassemble a single instruction into IR. The instruction
- is located in host memory at &guest_code[delta]. */
-
+/* Disassemble a single instruction into IR. The instruction is
+ located in host memory at &guest_code[delta]. *expect_CAS is set
+ to True if the resulting IR is expected to contain an IRCAS
+ statement, and False if it's not expected to. This makes it
+ possible for the caller of disInstr_X86_WRK to check that
+ LOCK-prefixed instructions are at least plausibly translated, in
+ that it becomes possible to check that a (validly) LOCK-prefixed
+ instruction generates a translation containing an IRCAS, and
+ instructions without LOCK prefixes don't generate translations
+ containing an IRCAS.
+*/
static
-DisResult disInstr_X86_WRK (
+DisResult disInstr_X86_WRK (
+ /*OUT*/Bool* expect_CAS,
Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
void* callback_opaque,
/* Gets set to True if a LOCK prefix is seen. */
Bool pfx_lock = False;
- /* do we need follow the insn with MBusEvent(BusUnlock) ? */
- Bool unlock_bus_after_insn = False;
-
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 0;
dres.continueAt = 0;
+ *expect_CAS = False;
+
addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
+ vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
DIP("\t0x%x: ", guest_EIP_bbstart+delta);
/* We may be asked to update the guest EIP before going further. */
break;
case 0xF0:
pfx_lock = True;
+ *expect_CAS = True;
break;
case 0x3E: /* %DS: */
case 0x26: /* %ES: */
leading F2 or F3. Check that any LOCK prefix is actually
allowed. */
- /* Kludge re LOCK prefixes. We assume here that all code generated
- by Vex is going to be run in a single-threaded context, in other
- words that concurrent executions of Vex-generated translations
- will not happen. So we don't need to worry too much about
- preserving atomicity. However, mark the fact that the notional
- hardware bus lock is being acquired (and, after the insn,
- released), so that thread checking tools know this is a locked
- insn.
-
- We check for, and immediately reject, (most) inappropriate uses
- of the LOCK prefix. Later (at decode_failure: and
- decode_success:), if we've added a BusLock event, then we will
- follow up with a BusUnlock event. How do we know execution will
- actually ever get to the BusUnlock event? Because
- can_be_used_with_LOCK_prefix rejects all control-flow changing
- instructions.
-
- One loophole, though: if a LOCK prefix insn (seg)faults, then
- the BusUnlock event will never be reached. This could cause
- tools which track bus hardware lock to lose track. Really, we
- should explicitly release the lock after every insn, but that's
- obviously way too expensive. Really, any tool which tracks the
- state of the bus lock needs to ask V's core/tool interface to
- notify it of signal deliveries. On delivery of SIGSEGV to the
- guest, the tool will be notified, in which case it should
- release the bus hardware lock if it is held.
-
- Note, guest-amd64/toIR.c contains identical logic.
- */
if (pfx_lock) {
if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
- stmt( IRStmt_MBE(Imbe_BusLock) );
- unlock_bus_after_insn = True;
DIP("lock ");
} else {
+ *expect_CAS = False;
goto decode_failure;
}
}
/* ------------------------ opl Gv, Ev ----------------- */
case 0x00: /* ADD Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Add8, True, 1, delta, "add" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Add8, True, 1, delta, "add" );
break;
case 0x01: /* ADD Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Add8, True, sz, delta, "add" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Add8, True, sz, delta, "add" );
break;
case 0x08: /* OR Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Or8, True, 1, delta, "or" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Or8, True, 1, delta, "or" );
break;
case 0x09: /* OR Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Or8, True, sz, delta, "or" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Or8, True, sz, delta, "or" );
break;
case 0x10: /* ADC Gb,Eb */
- delta = dis_op2_G_E ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Add8, True, 1, delta, "adc" );
break;
case 0x11: /* ADC Gv,Ev */
- delta = dis_op2_G_E ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Add8, True, sz, delta, "adc" );
break;
case 0x18: /* SBB Gb,Eb */
- delta = dis_op2_G_E ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Sub8, True, 1, delta, "sbb" );
break;
case 0x19: /* SBB Gv,Ev */
- delta = dis_op2_G_E ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Sub8, True, sz, delta, "sbb" );
break;
case 0x20: /* AND Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_And8, True, 1, delta, "and" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_And8, True, 1, delta, "and" );
break;
case 0x21: /* AND Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_And8, True, sz, delta, "and" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_And8, True, sz, delta, "and" );
break;
case 0x28: /* SUB Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, True, 1, delta, "sub" );
break;
case 0x29: /* SUB Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, True, sz, delta, "sub" );
break;
case 0x30: /* XOR Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Xor8, True, 1, delta, "xor" );
break;
case 0x31: /* XOR Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Xor8, True, sz, delta, "xor" );
break;
case 0x38: /* CMP Gb,Eb */
- delta = dis_op2_G_E ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, False, 1, delta, "cmp" );
break;
case 0x39: /* CMP Gv,Ev */
- delta = dis_op2_G_E ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, False, sz, delta, "cmp" );
break;
/* ------------------------ POP ------------------------ */
nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
nameIReg(sz,eregOfRM(modrm)));
} else {
- /* Need to add IRStmt_MBE(Imbe_BusLock). */
- if (pfx_lock) {
- /* check it's already been taken care of */
- vassert(unlock_bus_after_insn);
- } else {
- vassert(!unlock_bus_after_insn);
- stmt( IRStmt_MBE(Imbe_BusLock) );
- unlock_bus_after_insn = True;
- }
- /* Because unlock_bus_after_insn is now True, generic logic
- at the bottom of disInstr will add the
- IRStmt_MBE(Imbe_BusUnlock). */
+ *expect_CAS = True;
addr = disAMode ( &alen, sorb, delta, dis_buf );
assign( t1, loadLE(ty,mkexpr(addr)) );
assign( t2, getIReg(sz,gregOfRM(modrm)) );
- storeLE( mkexpr(addr), mkexpr(t2) );
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
delta += alen;
DIP("xchg%c %s, %s\n", nameISize(sz),
sz = 1;
d_sz = 1;
d32 = getUChar(delta + am_sz);
- delta = dis_Grp1 ( sorb, delta, modrm, am_sz, d_sz, sz, d32 );
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
break;
case 0x81: /* Grp1 Iv,Ev */
am_sz = lengthAMode(delta);
d_sz = sz;
d32 = getUDisp(d_sz, delta + am_sz);
- delta = dis_Grp1 ( sorb, delta, modrm, am_sz, d_sz, sz, d32 );
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
break;
case 0x83: /* Grp1 Ib,Ev */
am_sz = lengthAMode(delta);
d_sz = 1;
d32 = getSDisp8(delta + am_sz);
- delta = dis_Grp1 ( sorb, delta, modrm, am_sz, d_sz, sz, d32 );
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
break;
/* ------------------------ (Grp2 extensions) ---------- */
case 0xF6: { /* Grp3 Eb */
Bool decode_OK = True;
- delta = dis_Grp3 ( sorb, 1, delta, &decode_OK );
+ delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
}
case 0xF7: { /* Grp3 Ev */
Bool decode_OK = True;
- delta = dis_Grp3 ( sorb, sz, delta, &decode_OK );
+ delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
case 0xFE: { /* Grp4 Eb */
Bool decode_OK = True;
- delta = dis_Grp4 ( sorb, delta, &decode_OK );
+ delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
case 0xFF: { /* Grp5 Ev */
Bool decode_OK = True;
- delta = dis_Grp5 ( sorb, sz, delta, &dres, &decode_OK );
+ delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
modrm = getUChar(delta);
am_sz = lengthAMode(delta);
d32 = getSDisp8(delta + am_sz);
- delta = dis_Grp8_Imm ( sorb, delta, modrm, am_sz, sz, d32,
- &decode_OK );
+ delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
+ am_sz, sz, d32, &decode_OK );
if (!decode_OK)
goto decode_failure;
break;
/* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
case 0xA3: /* BT Gv,Ev */
- delta = dis_bt_G_E ( sorb, sz, delta, BtOpNone );
+ delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpNone );
break;
case 0xB3: /* BTR Gv,Ev */
- delta = dis_bt_G_E ( sorb, sz, delta, BtOpReset );
+ delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpReset );
break;
case 0xAB: /* BTS Gv,Ev */
- delta = dis_bt_G_E ( sorb, sz, delta, BtOpSet );
+ delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpSet );
break;
case 0xBB: /* BTC Gv,Ev */
- delta = dis_bt_G_E ( sorb, sz, delta, BtOpComp );
+ delta = dis_bt_G_E ( sorb, pfx_lock, sz, delta, BtOpComp );
break;
/* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
/* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
case 0xB0: /* CMPXCHG Gb,Eb */
- delta = dis_cmpxchg_G_E ( sorb, 1, delta );
+ delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
break;
case 0xB1: /* CMPXCHG Gv,Ev */
- delta = dis_cmpxchg_G_E ( sorb, sz, delta );
+ delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
break;
case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
- IRTemp m64_old = newTemp(Ity_I64);
- IRTemp m64_new = newTemp(Ity_I64);
- IRTemp da_old = newTemp(Ity_I64);
- IRTemp da_new = newTemp(Ity_I64);
- IRTemp cb_old = newTemp(Ity_I64);
+ IRTemp expdHi = newTemp(Ity_I32);
+ IRTemp expdLo = newTemp(Ity_I32);
+ IRTemp dataHi = newTemp(Ity_I32);
+ IRTemp dataLo = newTemp(Ity_I32);
+ IRTemp oldHi = newTemp(Ity_I32);
+ IRTemp oldLo = newTemp(Ity_I32);
IRTemp flags_old = newTemp(Ity_I32);
IRTemp flags_new = newTemp(Ity_I32);
- IRTemp cond = newTemp(Ity_I8);
+ IRTemp success = newTemp(Ity_I1);
+
+ /* Translate this using a DCAS, even if there is no LOCK
+ prefix. Life is too short to bother with generating two
+ different translations for the with/without-LOCK-prefix
+ cases. */
+ *expect_CAS = True;
/* Decode, and generate address. */
+ if (sz != 4) goto decode_failure;
modrm = getIByte(delta);
if (epartIsReg(modrm)) goto decode_failure;
if (gregOfRM(modrm) != 1) goto decode_failure;
addr = disAMode ( &alen, sorb, delta, dis_buf );
delta += alen;
- /* Fetch the old 64-bit values and compute the guard. */
- assign( m64_old, loadLE(Ity_I64, mkexpr(addr) ));
- assign( da_old, binop(Iop_32HLto64,
- getIReg(4,R_EDX), getIReg(4,R_EAX)) );
- assign( cb_old, binop(Iop_32HLto64,
- getIReg(4,R_ECX), getIReg(4,R_EBX)) );
-
- assign( cond,
- unop(Iop_1Uto8,
- binop(Iop_CmpEQ64, mkexpr(da_old), mkexpr(m64_old))) );
-
- /* Compute new %edx:%eax and m64 values, and put in place */
- assign( da_new,
- IRExpr_Mux0X(mkexpr(cond), mkexpr(m64_old), mkexpr(da_old)));
- assign( m64_new,
- IRExpr_Mux0X(mkexpr(cond), mkexpr(m64_old), mkexpr(cb_old)));
-
- putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(da_new)) );
- putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(da_new)) );
- storeLE( mkexpr(addr), mkexpr(m64_new) );
-
- /* Copy the guard into the Z flag and leave the others unchanged */
+ /* Get the expected and new values. */
+ assign( expdHi, getIReg(4,R_EDX) );
+ assign( expdLo, getIReg(4,R_EAX) );
+ assign( dataHi, getIReg(4,R_ECX) );
+ assign( dataLo, getIReg(4,R_EBX) );
+
+ /* Do the DCAS */
+ stmt( IRStmt_CAS(
+ mkIRCAS( oldHi, oldLo,
+ Iend_LE, mkexpr(addr),
+ mkexpr(expdHi), mkexpr(expdLo),
+ mkexpr(dataHi), mkexpr(dataLo)
+ )));
+
+ /* success when oldHi:oldLo == expdHi:expdLo */
+ assign( success,
+ binop(Iop_CmpEQ32,
+ binop(Iop_Or32,
+ binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
+ binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
+ ),
+ mkU32(0)
+ ));
+
+ /* If the DCAS is successful, that is to say oldHi:oldLo ==
+ expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
+ which is where they came from originally. Both the actual
+ contents of these two regs, and any shadow values, are
+ unchanged. If the DCAS fails then we're putting into
+ EDX:EAX the value seen in memory. */
+ putIReg(4, R_EDX,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ mkexpr(oldHi),
+ mkexpr(expdHi)
+ ));
+ putIReg(4, R_EAX,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ mkexpr(oldLo),
+ mkexpr(expdLo)
+ ));
+
+ /* Copy the success bit into the Z flag and leave the others
+ unchanged */
assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
assign(
flags_new,
mkU32(~X86G_CC_MASK_Z)),
binop(Iop_Shl32,
binop(Iop_And32,
- unop(Iop_8Uto32, mkexpr(cond)), mkU32(1)),
+ unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
mkU8(X86G_CC_SHIFT_Z)) ));
stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
case 0xC0: { /* XADD Gb,Eb */
Bool decodeOK;
- delta = dis_xadd_G_E ( sorb, 1, delta, &decodeOK );
+ delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
if (!decodeOK) goto decode_failure;
break;
}
case 0xC1: { /* XADD Gv,Ev */
Bool decodeOK;
- delta = dis_xadd_G_E ( sorb, sz, delta, &decodeOK );
+ delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
if (!decodeOK) goto decode_failure;
break;
}
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
- if (unlock_bus_after_insn)
- stmt( IRStmt_MBE(Imbe_BusUnlock) );
jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
dres.whatNext = Dis_StopHere;
dres.len = 0;
+ /* We also need to say that a CAS is not expected now, regardless
+ of what it might have been set to at the start of the function,
+ since the IR that we've emitted just above (to synthesis a
+ SIGILL) does not involve any CAS, and presumably no other IR has
+ been emitted for this (non-decoded) insn. */
+ *expect_CAS = False;
return dres;
} /* switch (opc) for the main (primary) opcode switch. */
decode_success:
/* All decode successes end up here. */
DIP("\n");
- if (unlock_bus_after_insn)
- stmt( IRStmt_MBE(Imbe_BusUnlock) );
dres.len = delta - delta_start;
return dres;
}
VexAbiInfo* abiinfo,
Bool host_bigendian_IN )
{
+ Int i, x1, x2;
+ Bool expect_CAS, has_CAS;
DisResult dres;
/* Set globals (see top of this file) */
guest_EIP_curr_instr = (Addr32)guest_IP;
guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta);
- dres = disInstr_X86_WRK ( put_IP, resteerOkFn, callback_opaque,
- delta, archinfo );
+ x1 = irsb_IN->stmts_used;
+ expect_CAS = False;
+ dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ callback_opaque, delta, archinfo );
+ x2 = irsb_IN->stmts_used;
+ vassert(x2 >= x1);
+
+ /* See comment at the top of disInstr_X86_WRK for meaning of
+ expect_CAS. Here, we (sanity-)check for the presence/absence of
+ IRCAS as directed by the returned expect_CAS value. */
+ has_CAS = False;
+ for (i = x1; i < x2; i++) {
+ if (irsb_IN->stmts[i]->tag == Ist_CAS)
+ has_CAS = True;
+ }
+
+ if (expect_CAS != has_CAS) {
+ /* inconsistency detected. re-disassemble the instruction so as
+ to generate a useful error message; then assert. */
+ vex_traceflags |= VEX_TRACE_FE;
+ dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ callback_opaque, delta, archinfo );
+ for (i = x1; i < x2; i++) {
+ vex_printf("\t\t");
+ ppIRStmt(irsb_IN->stmts[i]);
+ vex_printf("\n");
+ }
+ /* Failure of this assertion is serious and denotes a bug in
+ disInstr. */
+ vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
+ }
return dres;
}
i->Ain.Bsfr64.dst = dst;
return i;
}
-AMD64Instr* AMD64Instr_MFence ( void )
-{
+AMD64Instr* AMD64Instr_MFence ( void ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_MFence;
return i;
}
+AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_ACAS;
+ i->Ain.ACAS.addr = addr;
+ i->Ain.ACAS.sz = sz;
+ vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
+ return i;
+}
+AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_DACAS;
+ i->Ain.DACAS.addr = addr;
+ i->Ain.DACAS.sz = sz;
+ vassert(sz == 8 || sz == 4);
+ return i;
+}
+
AMD64Instr* AMD64Instr_A87Free ( Int nregs )
{
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
case Ain_MFence:
vex_printf("mfence" );
return;
+ case Ain_ACAS:
+ vex_printf("lock cmpxchg%c ",
+ i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
+ : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
+ vex_printf("{%%rax->%%rbx},");
+ ppAMD64AMode(i->Ain.ACAS.addr);
+ return;
+ case Ain_DACAS:
+ vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
+ (Int)(2 * i->Ain.DACAS.sz));
+ ppAMD64AMode(i->Ain.DACAS.addr);
+ return;
case Ain_A87Free:
vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
break;
return;
case Ain_MFence:
return;
+ case Ain_ACAS:
+ addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
+ addHRegUse(u, HRmRead, hregAMD64_RBX());
+ addHRegUse(u, HRmModify, hregAMD64_RAX());
+ return;
+ case Ain_DACAS:
+ addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
+ addHRegUse(u, HRmRead, hregAMD64_RCX());
+ addHRegUse(u, HRmRead, hregAMD64_RBX());
+ addHRegUse(u, HRmModify, hregAMD64_RDX());
+ addHRegUse(u, HRmModify, hregAMD64_RAX());
+ return;
case Ain_A87Free:
return;
case Ain_A87PushPop:
return;
case Ain_MFence:
return;
+ case Ain_ACAS:
+ mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
+ return;
+ case Ain_DACAS:
+ mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
+ return;
case Ain_A87Free:
return;
case Ain_A87PushPop:
*p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
goto done;
+ case Ain_ACAS:
+ /* lock */
+ *p++ = 0xF0;
+ if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
+ /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
+ in %rbx. The new-value register is hardwired to be %rbx
+ since dealing with byte integer registers is too much hassle,
+ so we force the register operand to %rbx (could equally be
+ %rcx or %rdx). */
+ rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
+ if (i->Ain.ACAS.sz != 8)
+ rex = clearWBit(rex);
+
+ *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
+ *p++ = 0x0F;
+ if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
+ p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
+ goto done;
+
+ case Ain_DACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
+ value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
+ aren't encoded in the insn. */
+ rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
+ if (i->Ain.ACAS.sz != 8)
+ rex = clearWBit(rex);
+ *p++ = rex;
+ *p++ = 0x0F;
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
+ goto done;
+
case Ain_A87Free:
vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
for (j = 0; j < i->Ain.A87Free.nregs; j++) {
Ain_Set64, /* convert condition code to 64-bit value */
Ain_Bsfr64, /* 64-bit bsf/bsr */
Ain_MFence, /* mem fence */
+ Ain_ACAS, /* 8/16/32/64-bit lock;cmpxchg */
+ Ain_DACAS, /* lock;cmpxchg8b/16b (doubleword ACAS, 2 x
+ 32-bit or 2 x 64-bit only) */
+
Ain_A87Free, /* free up x87 registers */
Ain_A87PushPop, /* x87 loads/stores */
Ain_A87FpOp, /* x87 operations */
On AMD64 we emit a real "mfence". */
struct {
} MFence;
+ struct {
+ AMD64AMode* addr;
+ UChar sz; /* 1, 2, 4 or 8 */
+ } ACAS;
+ struct {
+ AMD64AMode* addr;
+ UChar sz; /* 4 or 8 only */
+ } DACAS;
/* --- X87 --- */
extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst );
extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_MFence ( void );
+extern AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz );
+extern AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz );
+
extern AMD64Instr* AMD64Instr_A87Free ( Int nregs );
extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush );
extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
Int vreg_ctr;
- /* Currently (27 Jan 06) unused */
UInt hwcaps;
}
ISelEnv;
HReg dst = newVRegI(env);
AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
+ /* We can't handle big-endian loads, nor load-linked. */
if (e->Iex.Load.end != Iend_LE)
goto irreducible;
+ if (e->Iex.Load.isLL)
+ goto irreducible;
if (ty == Ity_I64) {
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
}
/* special case: 64-bit load from memory */
- if (e->tag == Iex_Load && ty == Ity_I64 && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && ty == Ity_I64
+ && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
return AMD64RMI_Mem(am);
}
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
}
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
AMD64AMode* am;
HReg res = newVRegV(env);
vassert(e->Iex.Load.ty == Ity_F32);
return res;
}
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
AMD64AMode* am;
HReg res = newVRegV(env);
vassert(e->Iex.Load.ty == Ity_F64);
return dst;
}
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
HReg dst = newVRegV(env);
AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
/* --------- STORE --------- */
case Ist_Store: {
- IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
- IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
- IREndness end = stmt->Ist.Store.end;
+ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+ IREndness end = stmt->Ist.Store.end;
+ IRTemp resSC = stmt->Ist.Store.resSC;
- if (tya != Ity_I64 || end != Iend_LE)
+ if (tya != Ity_I64 || end != Iend_LE || resSC != IRTemp_INVALID)
goto stmt_fail;
if (tyd == Ity_I64) {
case Imbe_Fence:
addInstr(env, AMD64Instr_MFence());
return;
- case Imbe_BusLock:
- case Imbe_BusUnlock:
- return;
default:
break;
}
break;
+ /* --------- ACAS --------- */
+ case Ist_CAS:
+ if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+ /* "normal" singleton CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* get: cas->expd into %rax, and cas->data into %rbx */
+ AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rData = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpd = iselIntExpr_R(env, cas->expdLo);
+ HReg rOld = lookupIRTemp(env, cas->oldLo);
+ vassert(cas->expdHi == NULL);
+ vassert(cas->dataHi == NULL);
+ addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
+ addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
+ addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
+ switch (ty) {
+ case Ity_I64: sz = 8; break;
+ case Ity_I32: sz = 4; break;
+ case Ity_I16: sz = 2; break;
+ case Ity_I8: sz = 1; break;
+ default: goto unhandled_cas;
+ }
+ addInstr(env, AMD64Instr_ACAS(am, sz));
+ addInstr(env, AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
+ return;
+ } else {
+ /* double CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* only 32-bit and 64-bit allowed in this case */
+ /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
+ /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
+ AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldHi = lookupIRTemp(env, cas->oldHi);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ switch (ty) {
+ case Ity_I64:
+ if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
+ goto unhandled_cas; /* we'd have to generate
+ cmpxchg16b, but the host
+ doesn't support that */
+ sz = 8;
+ break;
+ case Ity_I32:
+ sz = 4;
+ break;
+ default:
+ goto unhandled_cas;
+ }
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
+ addInstr(env, AMD64Instr_DACAS(am, sz));
+ addInstr(env,
+ AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
+ addInstr(env,
+ AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
+ return;
+ }
+ unhandled_cas:
+ break;
+
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
case Ist_IMark:
/* sanity ... */
vassert(arch_host == VexArchAMD64);
- vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3)));
+ vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3
+ |VEX_HWCAPS_AMD64_CX16)));
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
IREndness end = stmt->Ist.Store.end;
+ IRTemp resSC = stmt->Ist.Store.resSC;
- if (tya != Ity_I32 || end != Iend_LE)
+ if (tya != Ity_I32 || end != Iend_LE || resSC != IRTemp_INVALID)
goto stmt_fail;
reg = iselIntExpr_R(env, stmt->Ist.Store.data);
}
PPCInstr* PPCInstr_CMov ( PPCCondCode cond,
HReg dst, PPCRI* src ) {
- PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
i->tag = Pin_CMov;
i->Pin.CMov.cond = cond;
i->Pin.CMov.src = src;
if (sz == 8) vassert(mode64);
return i;
}
+PPCInstr* PPCInstr_LoadL ( UChar sz,
+ HReg dst, HReg src, Bool mode64 )
+{
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_LoadL;
+ i->Pin.LoadL.sz = sz;
+ i->Pin.LoadL.src = src;
+ i->Pin.LoadL.dst = dst;
+ vassert(sz == 4 || sz == 8);
+ if (sz == 8) vassert(mode64);
+ return i;
+}
PPCInstr* PPCInstr_Store ( UChar sz, PPCAMode* dst, HReg src,
Bool mode64 ) {
PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
if (sz == 8) vassert(mode64);
return i;
}
+PPCInstr* PPCInstr_StoreC ( UChar sz, HReg dst, HReg src, Bool mode64 ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_StoreC;
+ i->Pin.StoreC.sz = sz;
+ i->Pin.StoreC.src = src;
+ i->Pin.StoreC.dst = dst;
+ vassert(sz == 4 || sz == 8);
+ if (sz == 8) vassert(mode64);
+ return i;
+}
PPCInstr* PPCInstr_Set ( PPCCondCode cond, HReg dst ) {
PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
i->tag = Pin_Set;
ppPPCAMode(i->Pin.Load.src);
return;
}
+ case Pin_LoadL:
+ vex_printf("l%carx ", i->Pin.LoadL.sz==4 ? 'w' : 'd');
+ ppHRegPPC(i->Pin.LoadL.dst);
+ vex_printf(",%%r0,");
+ ppHRegPPC(i->Pin.LoadL.src);
+ return;
case Pin_Store: {
UChar sz = i->Pin.Store.sz;
Bool idxd = toBool(i->Pin.Store.dst->tag == Pam_RR);
ppPPCAMode(i->Pin.Store.dst);
return;
}
+ case Pin_StoreC:
+ vex_printf("st%ccx. ", i->Pin.StoreC.sz==4 ? 'w' : 'd');
+ ppHRegPPC(i->Pin.StoreC.src);
+ vex_printf(",%%r0,");
+ ppHRegPPC(i->Pin.StoreC.dst);
+ return;
case Pin_Set: {
PPCCondCode cc = i->Pin.Set.cond;
vex_printf("set (%s),", showPPCCondCode(cc));
/* Finally, there is the issue that the insn trashes a
register because the literal target address has to be
loaded into a register. %r10 seems a suitable victim.
- (Can't use %r0, as use ops that interpret it as value zero). */
+ (Can't use %r0, as some insns interpret it as value zero). */
addHRegUse(u, HRmWrite, hregPPC_GPR10(mode64));
/* Upshot of this is that the assembler really must use %r10,
and no other, as a destination temporary. */
addRegUsage_PPCAMode(u, i->Pin.Load.src);
addHRegUse(u, HRmWrite, i->Pin.Load.dst);
return;
+ case Pin_LoadL:
+ addHRegUse(u, HRmRead, i->Pin.LoadL.src);
+ addHRegUse(u, HRmWrite, i->Pin.LoadL.dst);
+ return;
case Pin_Store:
addHRegUse(u, HRmRead, i->Pin.Store.src);
addRegUsage_PPCAMode(u, i->Pin.Store.dst);
return;
+ case Pin_StoreC:
+ addHRegUse(u, HRmRead, i->Pin.StoreC.src);
+ addHRegUse(u, HRmRead, i->Pin.StoreC.dst);
+ return;
case Pin_Set:
addHRegUse(u, HRmWrite, i->Pin.Set.dst);
return;
mapRegs_PPCAMode(m, i->Pin.Load.src);
mapReg(m, &i->Pin.Load.dst);
return;
+ case Pin_LoadL:
+ mapReg(m, &i->Pin.LoadL.src);
+ mapReg(m, &i->Pin.LoadL.dst);
+ return;
case Pin_Store:
mapReg(m, &i->Pin.Store.src);
mapRegs_PPCAMode(m, i->Pin.Store.dst);
return;
+ case Pin_StoreC:
+ mapReg(m, &i->Pin.StoreC.src);
+ mapReg(m, &i->Pin.StoreC.dst);
+ return;
case Pin_Set:
mapReg(m, &i->Pin.Set.dst);
return;
case Ijk_TInval: trc = VEX_TRC_JMP_TINVAL; break;
case Ijk_NoRedir: trc = VEX_TRC_JMP_NOREDIR; break;
case Ijk_SigTRAP: trc = VEX_TRC_JMP_SIGTRAP; break;
+ case Ijk_SigBUS: trc = VEX_TRC_JMP_SIGBUS; break;
case Ijk_Ret:
case Ijk_Call:
case Ijk_Boring:
}
}
+ case Pin_LoadL: {
+ if (i->Pin.LoadL.sz == 4) {
+ p = mkFormX(p, 31, iregNo(i->Pin.LoadL.dst, mode64),
+ 0, iregNo(i->Pin.LoadL.src, mode64), 20, 0);
+ goto done;
+ }
+ if (i->Pin.LoadL.sz == 8 && mode64) {
+ p = mkFormX(p, 31, iregNo(i->Pin.LoadL.dst, mode64),
+ 0, iregNo(i->Pin.LoadL.src, mode64), 84, 0);
+ goto done;
+ }
+ goto bad;
+ }
+
case Pin_Set: {
/* Make the destination register be 1 or 0, depending on whether
the relevant condition holds. */
case Pin_MFence: {
p = mkFormX(p, 31, 0, 0, 0, 598, 0); // sync, PPC32 p616
-// CAB: Should this be isync?
-// p = mkFormXL(p, 19, 0, 0, 0, 150, 0); // isync, PPC32 p467
+ // CAB: Should this be isync?
+ // p = mkFormXL(p, 19, 0, 0, 0, 150, 0); // isync, PPC32 p467
goto done;
}
goto done;
}
+ case Pin_StoreC: {
+ if (i->Pin.StoreC.sz == 4) {
+ p = mkFormX(p, 31, iregNo(i->Pin.StoreC.src, mode64),
+ 0, iregNo(i->Pin.StoreC.dst, mode64), 150, 1);
+ goto done;
+ }
+ if (i->Pin.StoreC.sz == 8 && mode64) {
+ p = mkFormX(p, 31, iregNo(i->Pin.StoreC.src, mode64),
+ 0, iregNo(i->Pin.StoreC.dst, mode64), 214, 1);
+ goto done;
+ }
+ goto bad;
+ }
+
case Pin_FpUnary: {
UInt fr_dst = fregNo(i->Pin.FpUnary.dst);
UInt fr_src = fregNo(i->Pin.FpUnary.src);
Pin_Goto, /* conditional/unconditional jmp to dst */
Pin_CMov, /* conditional move */
Pin_Load, /* zero-extending load a 8|16|32|64 bit value from mem */
+ Pin_LoadL, /* load-linked (lwarx/ldarx) 32|64 bit value from mem */
Pin_Store, /* store a 8|16|32|64 bit value to mem */
+ Pin_StoreC, /* store-conditional (stwcx./stdcx.) 32|64 bit val */
Pin_Set, /* convert condition code to value 0 or 1 */
Pin_MfCR, /* move from condition register to GPR */
Pin_MFence, /* mem fence */
HReg dst;
PPCAMode* src;
} Load;
+ /* Load-and-reserve (lwarx, ldarx) */
+ struct {
+ UChar sz; /* 4|8 */
+ HReg dst;
+ HReg src;
+ } LoadL;
/* 64/32/16/8 bit stores */
struct {
UChar sz; /* 1|2|4|8 */
PPCAMode* dst;
HReg src;
} Store;
+ /* Store-conditional (stwcx., stdcx.) */
+ struct {
+ UChar sz; /* 4|8 */
+ HReg dst;
+ HReg src;
+ } StoreC;
/* Convert a ppc condition code to value 0 or 1. */
struct {
PPCCondCode cond;
extern PPCInstr* PPCInstr_CMov ( PPCCondCode, HReg dst, PPCRI* src );
extern PPCInstr* PPCInstr_Load ( UChar sz,
HReg dst, PPCAMode* src, Bool mode64 );
+extern PPCInstr* PPCInstr_LoadL ( UChar sz,
+ HReg dst, HReg src, Bool mode64 );
extern PPCInstr* PPCInstr_Store ( UChar sz, PPCAMode* dst,
HReg src, Bool mode64 );
+extern PPCInstr* PPCInstr_StoreC ( UChar sz, HReg dst, HReg src,
+ Bool mode64 );
extern PPCInstr* PPCInstr_Set ( PPCCondCode cond, HReg dst );
extern PPCInstr* PPCInstr_MfCR ( HReg dst );
extern PPCInstr* PPCInstr_MFence ( void );
addInstr(env, PPCInstr_Alu(
Palu_AND,
rtmp, rtmp,
- PPCRH_Imm(False/*signed*/, toUShort(nElems-1))));
+ PPCRH_Imm(False/*unsigned*/, toUShort(nElems-1))));
addInstr(env, PPCInstr_Shft(
Pshft_SHL,
env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
/* --------- LOAD --------- */
case Iex_Load: {
- HReg r_dst = newVRegI(env);
- PPCAMode* am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/ );
+ HReg r_dst;
+
if (e->Iex.Load.end != Iend_BE)
goto irreducible;
- addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
- r_dst, am_addr, mode64 ));
+
+ r_dst = newVRegI(env);
+
+ if (e->Iex.Load.isLL) {
+ /* lwarx or ldarx. Be simple; force address into a register. */
+ HReg r_addr = iselWordExpr_R( env, e->Iex.Load.addr );
+ if (ty == Ity_I32) {
+ addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
+ }
+ else if (ty == Ity_I64 && mode64) {
+ addInstr(env, PPCInstr_LoadL( 8, r_dst, r_addr, mode64 ));
+ }
+ else
+ goto irreducible;
+ } else {
+ /* Normal load; use whatever amodes we can. */
+ PPCAMode* am_addr
+ = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/ );
+ addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
+ r_dst, am_addr, mode64 ));
+ }
+
return r_dst;
- break;
+ /*NOTREACHED*/
}
/* --------- BINARY OP --------- */
DECLARE_PATTERN(p_LDbe16_then_16Uto32);
DEFINE_PATTERN(p_LDbe16_then_16Uto32,
unop(Iop_16Uto32,
- IRExpr_Load(Iend_BE,Ity_I16,bind(0))) );
+ IRExpr_Load(False,Iend_BE,Ity_I16,bind(0))) );
if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
HReg r_dst = newVRegI(env);
- PPCAMode* amode = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/ );
+ PPCAMode* amode
+ = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/ );
addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
return r_dst;
}
vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
/* 64-bit load */
- if (e->tag == Iex_Load) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE && !e->Iex.Load.isLL) {
HReg tLo = newVRegI(env);
HReg tHi = newVRegI(env);
HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr);
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
}
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE && !e->Iex.Load.isLL) {
PPCAMode* am_addr;
HReg r_dst = newVRegF(env);
vassert(e->Iex.Load.ty == Ity_F32);
}
/* --------- LOAD --------- */
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE && !e->Iex.Load.isLL) {
HReg r_dst = newVRegF(env);
PPCAMode* am_addr;
vassert(e->Iex.Load.ty == Ity_F64);
return dst;
}
- if (e->tag == Iex_Load) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE && !e->Iex.Load.isLL) {
PPCAMode* am_addr;
HReg v_dst = newVRegV(env);
vassert(e->Iex.Load.ty == Ity_V128);
/* --------- STORE --------- */
case Ist_Store: {
- IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
- IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
- IREndness end = stmt->Ist.Store.end;
+ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+ IREndness end = stmt->Ist.Store.end;
+ IRTemp resSC = stmt->Ist.Store.resSC;
- if ( end != Iend_BE ||
- (!mode64 && (tya != Ity_I32)) ||
- ( mode64 && (tya != Ity_I64)) )
+ if (end != Iend_BE)
+ goto stmt_fail;
+ if (!mode64 && (tya != Ity_I32))
+ goto stmt_fail;
+ if (mode64 && (tya != Ity_I64))
goto stmt_fail;
+ if (resSC != IRTemp_INVALID) {
+ /* deal with store-conditional */
+ HReg r_res = lookupIRTemp(env, resSC);
+ HReg r_a = iselWordExpr_R(env, stmt->Ist.Store.addr);
+ HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data);
+ HReg r_tmp = newVRegI(env);
+ if (tyd == Ity_I32 || (tyd == Ity_I64 && mode64)) {
+ addInstr(env, PPCInstr_StoreC( tyd==Ity_I32 ? 4 : 8,
+ r_a, r_src, mode64 ));
+ addInstr(env, PPCInstr_MfCR( r_tmp ));
+ addInstr(env, PPCInstr_Shft(
+ Pshft_SHR,
+ env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
+ r_tmp, r_tmp,
+ PPCRH_Imm(False/*unsigned*/, 29)));
+ /* Probably unnecessary, since the IR dest type is Ity_I1,
+ and so we are entitled to leave whatever junk we like
+ drifting round in the upper 31 or 63 bits of r_res.
+ However, for the sake of conservativeness .. */
+ addInstr(env, PPCInstr_Alu(
+ Palu_AND,
+ r_res, r_tmp,
+ PPCRH_Imm(False/*signed*/, 1)));
+ return;
+ }
+ goto stmt_fail;
+ }
+
if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
(mode64 && (tyd == Ity_I64))) {
PPCAMode* am_addr
case Imbe_Fence:
addInstr(env, PPCInstr_MFence());
return;
- case Imbe_BusLock:
- case Imbe_BusUnlock:
- case Imbe_SnoopedStoreBegin:
- case Imbe_SnoopedStoreEnd:
- return;
default:
break;
}
i->Xin.Bsfr32.dst = dst;
return i;
}
-X86Instr* X86Instr_MFence ( UInt hwcaps )
-{
+X86Instr* X86Instr_MFence ( UInt hwcaps ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_MFence;
i->Xin.MFence.hwcaps = hwcaps;
|VEX_HWCAPS_X86_SSE3)));
return i;
}
+X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_ACAS;
+ i->Xin.ACAS.addr = addr;
+ i->Xin.ACAS.sz = sz;
+ vassert(sz == 4 || sz == 2 || sz == 1);
+ return i;
+}
+X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_DACAS;
+ i->Xin.DACAS.addr = addr;
+ return i;
+}
X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
vex_printf("mfence(%s)",
LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
return;
+ case Xin_ACAS:
+ vex_printf("lock cmpxchg%c ",
+ i->Xin.ACAS.sz==1 ? 'b'
+ : i->Xin.ACAS.sz==2 ? 'w' : 'l');
+ vex_printf("{%%eax->%%ebx},");
+ ppX86AMode(i->Xin.ACAS.addr);
+ return;
+ case Xin_DACAS:
+ vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
+ ppX86AMode(i->Xin.DACAS.addr);
+ return;
case Xin_FpUnary:
vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
ppHRegX86(i->Xin.FpUnary.src);
return;
case Xin_MFence:
return;
+ case Xin_ACAS:
+ addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
+ addHRegUse(u, HRmRead, hregX86_EBX());
+ addHRegUse(u, HRmModify, hregX86_EAX());
+ return;
+ case Xin_DACAS:
+ addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
+ addHRegUse(u, HRmRead, hregX86_ECX());
+ addHRegUse(u, HRmRead, hregX86_EBX());
+ addHRegUse(u, HRmModify, hregX86_EDX());
+ addHRegUse(u, HRmModify, hregX86_EAX());
+ return;
case Xin_FpUnary:
addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
return;
case Xin_MFence:
return;
+ case Xin_ACAS:
+ mapRegs_X86AMode(m, i->Xin.ACAS.addr);
+ return;
+ case Xin_DACAS:
+ mapRegs_X86AMode(m, i->Xin.DACAS.addr);
+ return;
case Xin_FpUnary:
mapReg(m, &i->Xin.FpUnary.src);
mapReg(m, &i->Xin.FpUnary.dst);
/*NOTREACHED*/
break;
+ case Xin_ACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
+ in %ebx. The new-value register is hardwired to be %ebx
+ since letting it be any integer register gives the problem
+ that %sil and %dil are unaddressible on x86 and hence we
+ would have to resort to the same kind of trickery as with
+ byte-sized Xin.Store, just below. Given that this isn't
+ performance critical, it is simpler just to force the
+ register operand to %ebx (could equally be %ecx or %edx).
+ (Although %ebx is more consistent with cmpxchg8b.) */
+ if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
+ *p++ = 0x0F;
+ if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
+ p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
+ goto done;
+
+ case Xin_DACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
+ in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
+ aren't encoded in the insn. */
+ *p++ = 0x0F;
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
+ goto done;
+
case Xin_Store:
if (i->Xin.Store.sz == 2) {
/* This case, at least, is simple, given that we can
Xin_Set32, /* convert condition code to 32-bit value */
Xin_Bsfr32, /* 32-bit bsf/bsr */
Xin_MFence, /* mem fence (not just sse2, but sse0 and 1 too) */
+ Xin_ACAS, /* 8/16/32-bit lock;cmpxchg */
+ Xin_DACAS, /* lock;cmpxchg8b (doubleword ACAS, 2 x 32-bit only) */
Xin_FpUnary, /* FP fake unary op */
Xin_FpBinary, /* FP fake binary op */
struct {
UInt hwcaps;
} MFence;
+ /* "lock;cmpxchg": mem address in .addr,
+ expected value in %eax, new value in %ebx */
+ struct {
+ X86AMode* addr;
+ UChar sz; /* 1, 2 or 4 */
+ } ACAS;
+ /* "lock;cmpxchg8b": mem address in .addr, expected value in
+ %edx:%eax, new value in %ecx:%ebx */
+ struct {
+ X86AMode* addr;
+ } DACAS;
/* X86 Floating point (fake 3-operand, "flat reg file" insns) */
struct {
extern X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst );
extern X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst );
extern X86Instr* X86Instr_MFence ( UInt hwcaps );
+extern X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz );
+extern X86Instr* X86Instr_DACAS ( X86AMode* addr );
extern X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst );
extern X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst );
HReg dst = newVRegI(env);
X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
+ /* We can't handle big-endian loads, nor load-linked. */
if (e->Iex.Load.end != Iend_LE)
goto irreducible;
+ if (e->Iex.Load.isLL)
+ goto irreducible;
if (ty == Ity_I32) {
addInstr(env, X86Instr_Alu32R(Xalu_MOV,
DECLARE_PATTERN(p_LDle8_then_8Uto32);
DEFINE_PATTERN(p_LDle8_then_8Uto32,
unop(Iop_8Uto32,
- IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
+ IRExpr_Load(False,Iend_LE,Ity_I8,bind(0))) );
if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
HReg dst = newVRegI(env);
X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
DECLARE_PATTERN(p_LDle8_then_8Sto32);
DEFINE_PATTERN(p_LDle8_then_8Sto32,
unop(Iop_8Sto32,
- IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
+ IRExpr_Load(False,Iend_LE,Ity_I8,bind(0))) );
if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
HReg dst = newVRegI(env);
X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
DECLARE_PATTERN(p_LDle16_then_16Uto32);
DEFINE_PATTERN(p_LDle16_then_16Uto32,
unop(Iop_16Uto32,
- IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
+ IRExpr_Load(False,Iend_LE,Ity_I16,bind(0))) );
if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
HReg dst = newVRegI(env);
X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
}
/* special case: 32-bit load from memory */
- if (e->tag == Iex_Load && ty == Ity_I32 && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && ty == Ity_I32
+ && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
return X86RMI_Mem(am);
}
}
/* 64-bit load */
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
HReg tLo, tHi;
X86AMode *am0, *am4;
vassert(e->Iex.Load.ty == Ity_I64);
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
}
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
X86AMode* am;
HReg res = newVRegF(env);
vassert(e->Iex.Load.ty == Ity_F32);
return freg;
}
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
X86AMode* am;
HReg res = newVRegF(env);
vassert(e->Iex.Load.ty == Ity_F64);
return dst;
}
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE && !e->Iex.Load.isLL) {
HReg dst = newVRegV(env);
X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
DECLARE_PATTERN(p_zwiden_load64);
DEFINE_PATTERN(p_zwiden_load64,
unop(Iop_64UtoV128,
- IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
+ IRExpr_Load(False,Iend_LE,Ity_I64,bind(0))));
if (matchIRExpr(&mi, p_zwiden_load64, e)) {
X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
HReg dst = newVRegV(env);
/* --------- STORE --------- */
case Ist_Store: {
- IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
- IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
- IREndness end = stmt->Ist.Store.end;
+ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+ IREndness end = stmt->Ist.Store.end;
+ IRTemp resSC = stmt->Ist.Store.resSC;
- if (tya != Ity_I32 || end != Iend_LE)
+ if (tya != Ity_I32 || end != Iend_LE || resSC != IRTemp_INVALID)
goto stmt_fail;
if (tyd == Ity_I32) {
case Imbe_Fence:
addInstr(env, X86Instr_MFence(env->hwcaps));
return;
- case Imbe_BusLock:
- case Imbe_BusUnlock:
- return;
default:
break;
}
break;
+ /* --------- ACAS --------- */
+ case Ist_CAS:
+ if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+ /* "normal" singleton CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
+ X86AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ vassert(cas->expdHi == NULL);
+ vassert(cas->dataHi == NULL);
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
+ switch (ty) {
+ case Ity_I32: sz = 4; break;
+ case Ity_I16: sz = 2; break;
+ case Ity_I8: sz = 1; break;
+ default: goto unhandled_cas;
+ }
+ addInstr(env, X86Instr_ACAS(am, sz));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EAX()), rOldLo));
+ return;
+ } else {
+ /* double CAS */
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* only 32-bit allowed in this case */
+ /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
+ /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
+ X86AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldHi = lookupIRTemp(env, cas->oldHi);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ if (ty != Ity_I32)
+ goto unhandled_cas;
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
+ addInstr(env, X86Instr_DACAS(am));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EDX()), rOldHi));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EAX()), rOldLo));
+ return;
+ }
+ unhandled_cas:
+ break;
+
/* --------- INSTR MARK --------- */
/* Doesn't generate any executable code ... */
case Ist_IMark:
vex_printf( ")" );
break;
case Iex_Load:
- vex_printf( "LD%s:", e->Iex.Load.end==Iend_LE ? "le" : "be" );
+ vex_printf( "LD%s%s:", e->Iex.Load.end==Iend_LE ? "le" : "be",
+ e->Iex.Load.isLL ? "-LL" : "" );
ppIRType(e->Iex.Load.ty);
vex_printf( "(" );
ppIRExpr(e->Iex.Load.addr);
vex_printf(")");
}
+void ppIRCAS ( IRCAS* cas )
+{
+ /* Print even structurally invalid constructions, as an aid to
+ debugging. */
+ if (cas->oldHi != IRTemp_INVALID) {
+ ppIRTemp(cas->oldHi);
+ vex_printf(",");
+ }
+ ppIRTemp(cas->oldLo);
+ vex_printf(" = CAS%s(", cas->end==Iend_LE ? "le" : "be" );
+ ppIRExpr(cas->addr);
+ vex_printf("::");
+ if (cas->expdHi) {
+ ppIRExpr(cas->expdHi);
+ vex_printf(",");
+ }
+ ppIRExpr(cas->expdLo);
+ vex_printf("->");
+ if (cas->dataHi) {
+ ppIRExpr(cas->dataHi);
+ vex_printf(",");
+ }
+ ppIRExpr(cas->dataLo);
+ vex_printf(")");
+}
+
void ppIRJumpKind ( IRJumpKind kind )
{
switch (kind) {
case Ijk_NoRedir: vex_printf("NoRedir"); break;
case Ijk_SigTRAP: vex_printf("SigTRAP"); break;
case Ijk_SigSEGV: vex_printf("SigSEGV"); break;
+ case Ijk_SigBUS: vex_printf("SigBUS"); break;
case Ijk_Sys_syscall: vex_printf("Sys_syscall"); break;
case Ijk_Sys_int32: vex_printf("Sys_int32"); break;
case Ijk_Sys_int128: vex_printf("Sys_int128"); break;
void ppIRMBusEvent ( IRMBusEvent event )
{
switch (event) {
- case Imbe_Fence: vex_printf("Fence"); break;
- case Imbe_BusLock: vex_printf("BusLock"); break;
- case Imbe_BusUnlock: vex_printf("BusUnlock"); break;
- case Imbe_SnoopedStoreBegin: vex_printf("SnoopedStoreBegin"); break;
- case Imbe_SnoopedStoreEnd: vex_printf("SnoopedStoreEnd"); break;
- default: vpanic("ppIRMBusEvent");
+ case Imbe_Fence: vex_printf("Fence"); break;
+ default: vpanic("ppIRMBusEvent");
}
}
ppIRExpr(s->Ist.WrTmp.data);
break;
case Ist_Store:
+ if (s->Ist.Store.resSC != IRTemp_INVALID) {
+ ppIRTemp(s->Ist.Store.resSC);
+ vex_printf( " = SC( " );
+ }
vex_printf( "ST%s(", s->Ist.Store.end==Iend_LE ? "le" : "be" );
ppIRExpr(s->Ist.Store.addr);
vex_printf( ") = ");
ppIRExpr(s->Ist.Store.data);
+ if (s->Ist.Store.resSC != IRTemp_INVALID)
+ vex_printf( " )" );
+ break;
+ case Ist_CAS:
+ ppIRCAS(s->Ist.CAS.details);
break;
case Ist_Dirty:
ppIRDirty(s->Ist.Dirty.details);
e->Iex.Unop.arg = arg;
return e;
}
-IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr ) {
+IRExpr* IRExpr_Load ( Bool isLL, IREndness end, IRType ty, IRExpr* addr ) {
IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
e->tag = Iex_Load;
+ e->Iex.Load.isLL = isLL;
e->Iex.Load.end = end;
e->Iex.Load.ty = ty;
e->Iex.Load.addr = addr;
}
+/* Constructors -- IRCAS */
+
+IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
+ IREndness end, IRExpr* addr,
+ IRExpr* expdHi, IRExpr* expdLo,
+ IRExpr* dataHi, IRExpr* dataLo ) {
+ IRCAS* cas = LibVEX_Alloc(sizeof(IRCAS));
+ cas->oldHi = oldHi;
+ cas->oldLo = oldLo;
+ cas->end = end;
+ cas->addr = addr;
+ cas->expdHi = expdHi;
+ cas->expdLo = expdLo;
+ cas->dataHi = dataHi;
+ cas->dataLo = dataLo;
+ return cas;
+}
+
+
/* Constructors -- IRStmt */
IRStmt* IRStmt_NoOp ( void )
s->Ist.WrTmp.data = data;
return s;
}
-IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data ) {
- IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
- s->tag = Ist_Store;
- s->Ist.Store.end = end;
- s->Ist.Store.addr = addr;
- s->Ist.Store.data = data;
+IRStmt* IRStmt_Store ( IREndness end,
+ IRTemp resSC, IRExpr* addr, IRExpr* data ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_Store;
+ s->Ist.Store.end = end;
+ s->Ist.Store.resSC = resSC;
+ s->Ist.Store.addr = addr;
+ s->Ist.Store.data = data;
vassert(end == Iend_LE || end == Iend_BE);
return s;
}
+IRStmt* IRStmt_CAS ( IRCAS* cas ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_CAS;
+ s->Ist.CAS.details = cas;
+ return s;
+}
IRStmt* IRStmt_Dirty ( IRDirty* d )
{
IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
return IRExpr_Unop(e->Iex.Unop.op,
deepCopyIRExpr(e->Iex.Unop.arg));
case Iex_Load:
- return IRExpr_Load(e->Iex.Load.end,
+ return IRExpr_Load(e->Iex.Load.isLL,
+ e->Iex.Load.end,
e->Iex.Load.ty,
deepCopyIRExpr(e->Iex.Load.addr));
case Iex_Const:
return d2;
}
+IRCAS* deepCopyIRCAS ( IRCAS* cas )
+{
+ return mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
+ deepCopyIRExpr(cas->addr),
+ deepCopyIRExpr(cas->expdHi),
+ deepCopyIRExpr(cas->expdLo),
+ deepCopyIRExpr(cas->dataHi),
+ deepCopyIRExpr(cas->dataLo) );
+}
+
IRStmt* deepCopyIRStmt ( IRStmt* s )
{
switch (s->tag) {
deepCopyIRExpr(s->Ist.WrTmp.data));
case Ist_Store:
return IRStmt_Store(s->Ist.Store.end,
+ s->Ist.Store.resSC,
deepCopyIRExpr(s->Ist.Store.addr),
deepCopyIRExpr(s->Ist.Store.data));
+ case Ist_CAS:
+ return IRStmt_CAS(deepCopyIRCAS(s->Ist.CAS.details));
case Ist_Dirty:
return IRStmt_Dirty(deepCopyIRDirty(s->Ist.Dirty.details));
case Ist_MBE:
Int i;
IRExpr* e;
IRDirty* di;
+ IRCAS* cas;
switch (st->tag) {
case Ist_AbiHint:
case Ist_Store:
return toBool( isIRAtom(st->Ist.Store.addr)
&& isIRAtom(st->Ist.Store.data) );
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ return toBool( isIRAtom(cas->addr)
+ && (cas->expdHi ? isIRAtom(cas->expdHi) : True)
+ && isIRAtom(cas->expdLo)
+ && (cas->dataHi ? isIRAtom(cas->dataHi) : True)
+ && isIRAtom(cas->dataLo) );
case Ist_Dirty:
di = st->Ist.Dirty.details;
if (!isIRAtom(di->guard))
{
Int i;
IRDirty* d;
+ IRCAS* cas;
switch (stmt->tag) {
case Ist_IMark:
break;
useBeforeDef_Expr(bb,stmt,stmt->Ist.Store.addr,def_counts);
useBeforeDef_Expr(bb,stmt,stmt->Ist.Store.data,def_counts);
break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+ useBeforeDef_Expr(bb,stmt,cas->addr,def_counts);
+ if (cas->expdHi)
+ useBeforeDef_Expr(bb,stmt,cas->expdHi,def_counts);
+ useBeforeDef_Expr(bb,stmt,cas->expdLo,def_counts);
+ if (cas->dataHi)
+ useBeforeDef_Expr(bb,stmt,cas->dataHi,def_counts);
+ useBeforeDef_Expr(bb,stmt,cas->dataLo,def_counts);
+ break;
case Ist_Dirty:
d = stmt->Ist.Dirty.details;
for (i = 0; d->args[i] != NULL; i++)
{
Int i;
IRDirty* d;
+ IRCAS* cas;
+ IRType tyExpd, tyData;
IRTypeEnv* tyenv = bb->tyenv;
switch (stmt->tag) {
case Ist_IMark:
sanityCheckFail(bb,stmt,"IRStmt.Store.data: cannot Store :: Ity_I1");
if (stmt->Ist.Store.end != Iend_LE && stmt->Ist.Store.end != Iend_BE)
sanityCheckFail(bb,stmt,"Ist.Store.end: bogus endianness");
+ if (stmt->Ist.Store.resSC != IRTemp_INVALID
+ && typeOfIRTemp(tyenv, stmt->Ist.Store.resSC) != Ity_I1)
+ sanityCheckFail(bb,stmt,"Ist.Store.resSC: not :: Ity_I1");
+ break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+ /* make sure it's definitely either a CAS or a DCAS */
+ if (cas->oldHi == IRTemp_INVALID
+ && cas->expdHi == NULL && cas->dataHi == NULL) {
+ /* fine; it's a single cas */
+ }
+ else
+ if (cas->oldHi != IRTemp_INVALID
+ && cas->expdHi != NULL && cas->dataHi != NULL) {
+ /* fine; it's a double cas */
+ }
+ else {
+ /* it's some el-mutanto hybrid */
+ goto bad_cas;
+ }
+ /* check the address type */
+ tcExpr( bb, stmt, cas->addr, gWordTy );
+ if (typeOfIRExpr(tyenv, cas->addr) != gWordTy) goto bad_cas;
+ /* check types on the {old,expd,data}Lo components agree */
+ tyExpd = typeOfIRExpr(tyenv, cas->expdLo);
+ tyData = typeOfIRExpr(tyenv, cas->dataLo);
+ if (tyExpd != tyData) goto bad_cas;
+ if (tyExpd != typeOfIRTemp(tyenv, cas->oldLo))
+ goto bad_cas;
+ /* check the base element type is sane */
+ if (tyExpd == Ity_I8 || tyExpd == Ity_I16 || tyExpd == Ity_I32
+ || (gWordTy == Ity_I64 && tyExpd == Ity_I64)) {
+ /* fine */
+ } else {
+ goto bad_cas;
+ }
+ /* If it's a DCAS, check types on the {old,expd,data}Hi
+ components too */
+ if (cas->oldHi != IRTemp_INVALID) {
+ tyExpd = typeOfIRExpr(tyenv, cas->expdHi);
+ tyData = typeOfIRExpr(tyenv, cas->dataHi);
+ if (tyExpd != tyData) goto bad_cas;
+ if (tyExpd != typeOfIRTemp(tyenv, cas->oldHi))
+ goto bad_cas;
+ /* and finally check that oldLo and oldHi have the same
+ type. This forces equivalence amongst all 6 types. */
+ if (typeOfIRTemp(tyenv, cas->oldHi)
+ != typeOfIRTemp(tyenv, cas->oldLo))
+ goto bad_cas;
+ }
+ break;
+ bad_cas:
+ sanityCheckFail(bb,stmt,"IRStmt.CAS: ill-formed");
break;
case Ist_Dirty:
/* Mostly check for various kinds of ill-formed dirty calls. */
break;
bad_dirty:
sanityCheckFail(bb,stmt,"IRStmt.Dirty: ill-formed");
+ break;
case Ist_NoOp:
break;
case Ist_MBE:
switch (stmt->Ist.MBE.event) {
- case Imbe_Fence: case Imbe_BusLock: case Imbe_BusUnlock:
- case Imbe_SnoopedStoreBegin: case Imbe_SnoopedStoreEnd:
+ case Imbe_Fence:
break;
default: sanityCheckFail(bb,stmt,"IRStmt.MBE.event: unknown");
break;
def_counts[i] = 0;
for (i = 0; i < bb->stmts_used; i++) {
+ IRDirty* d;
+ IRCAS* cas;
stmt = bb->stmts[i];
+ /* Check any temps used by this statement. */
useBeforeDef_Stmt(bb,stmt,def_counts);
- if (stmt->tag == Ist_WrTmp) {
+ /* Now make note of any temps defd by this statement. */
+ switch (stmt->tag) {
+ case Ist_WrTmp:
if (stmt->Ist.WrTmp.tmp < 0 || stmt->Ist.WrTmp.tmp >= n_temps)
sanityCheckFail(bb, stmt,
"IRStmt.Tmp: destination tmp is out of range");
if (def_counts[stmt->Ist.WrTmp.tmp] > 1)
sanityCheckFail(bb, stmt,
"IRStmt.Tmp: destination tmp is assigned more than once");
- }
- else
- if (stmt->tag == Ist_Dirty
- && stmt->Ist.Dirty.details->tmp != IRTemp_INVALID) {
- IRDirty* d = stmt->Ist.Dirty.details;
- if (d->tmp < 0 || d->tmp >= n_temps)
- sanityCheckFail(bb, stmt,
- "IRStmt.Dirty: destination tmp is out of range");
- def_counts[d->tmp]++;
- if (def_counts[d->tmp] > 1)
- sanityCheckFail(bb, stmt,
- "IRStmt.Dirty: destination tmp is assigned more than once");
+ break;
+ case Ist_Store:
+ if (stmt->Ist.Store.resSC != IRTemp_INVALID) {
+ IRTemp resSC = stmt->Ist.Store.resSC;
+ if (resSC < 0 || resSC >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Store.resSC: destination tmp is out of range");
+ def_counts[resSC]++;
+ if (def_counts[resSC] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Store.resSC: destination tmp "
+ "is assigned more than once");
+ }
+ break;
+ case Ist_Dirty:
+ if (stmt->Ist.Dirty.details->tmp != IRTemp_INVALID) {
+ d = stmt->Ist.Dirty.details;
+ if (d->tmp < 0 || d->tmp >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Dirty: destination tmp is out of range");
+ def_counts[d->tmp]++;
+ if (def_counts[d->tmp] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Dirty: destination tmp is assigned more than once");
+ }
+ break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+
+ if (cas->oldHi != IRTemp_INVALID) {
+ if (cas->oldHi < 0 || cas->oldHi >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpHi is out of range");
+ def_counts[cas->oldHi]++;
+ if (def_counts[cas->oldHi] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpHi is assigned more than once");
+ }
+ if (cas->oldLo < 0 || cas->oldLo >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpLo is out of range");
+ def_counts[cas->oldLo]++;
+ if (def_counts[cas->oldLo] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpLo is assigned more than once");
+ break;
+ default:
+ /* explicitly handle the rest, so as to keep gcc quiet */
+ break;
}
}
case Iex_Binder: /* aha, what we were looking for. */
setBindee(mi, p->Iex.Binder.binder, e);
return True;
-#if 0
- case Iex_GetI:
- if (e->tag != Iex_GetI) return False;
- if (p->Iex.GetI.ty != e->Iex.GetI.ty) return False;
- /* we ignore the offset limit hints .. */
- if (!matchWrk(mi, p->Iex.GetI.offset, e->Iex.GetI.offset))
- return False;
- return True;
-#endif
case Iex_Unop:
if (e->tag != Iex_Unop) return False;
if (p->Iex.Unop.op != e->Iex.Unop.op) return False;
return True;
case Iex_Load:
if (e->tag != Iex_Load) return False;
+ if (p->Iex.Load.isLL != e->Iex.Load.isLL) return False;
if (p->Iex.Load.end != e->Iex.Load.end) return False;
if (p->Iex.Load.ty != e->Iex.Load.ty) return False;
if (!matchWrk(mi, p->Iex.Load.addr, e->Iex.Load.addr))
case Iex_Load:
t1 = newIRTemp(bb->tyenv, ty);
addStmtToIRSB(bb, IRStmt_WrTmp(t1,
- IRExpr_Load(ex->Iex.Load.end,
+ IRExpr_Load(ex->Iex.Load.isLL,
+ ex->Iex.Load.end,
ex->Iex.Load.ty,
flatten_Expr(bb, ex->Iex.Load.addr))));
return IRExpr_RdTmp(t1);
static void flatten_Stmt ( IRSB* bb, IRStmt* st )
{
Int i;
- IRExpr *e1, *e2;
+ IRExpr *e1, *e2, *e3, *e4, *e5;
IRDirty *d, *d2;
+ IRCAS *cas, *cas2;
switch (st->tag) {
case Ist_Put:
if (isIRAtom(st->Ist.Put.data)) {
case Ist_Store:
e1 = flatten_Expr(bb, st->Ist.Store.addr);
e2 = flatten_Expr(bb, st->Ist.Store.data);
- addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
+ addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end,
+ st->Ist.Store.resSC, e1,e2));
+ break;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ e1 = flatten_Expr(bb, cas->addr);
+ e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
+ e3 = flatten_Expr(bb, cas->expdLo);
+ e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
+ e5 = flatten_Expr(bb, cas->dataLo);
+ cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
+ e1, e2, e3, e4, e5 );
+ addStmtToIRSB(bb, IRStmt_CAS(cas2));
break;
case Ist_Dirty:
d = st->Ist.Dirty.details;
enough do a lot better if needed. */
/* Probably also overly-conservative, but also dump everything
if we hit a memory bus event (fence, lock, unlock). Ditto
- AbiHints.*/
+ AbiHints and CASs. */
case Ist_AbiHint:
vassert(isIRAtom(st->Ist.AbiHint.base));
vassert(isIRAtom(st->Ist.AbiHint.nia));
/* fall through */
case Ist_MBE:
case Ist_Dirty:
+ case Ist_CAS:
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
break;
case Iex_Load:
vassert(isIRAtom(ex->Iex.Load.addr));
return IRExpr_Load(
+ ex->Iex.Load.isLL,
ex->Iex.Load.end,
ex->Iex.Load.ty,
subst_Expr(env, ex->Iex.Load.addr)
vassert(isIRAtom(st->Ist.Store.data));
return IRStmt_Store(
st->Ist.Store.end,
+ st->Ist.Store.resSC,
fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
fold_Expr(subst_Expr(env, st->Ist.Store.data))
);
+ case Ist_CAS: {
+ IRCAS *cas, *cas2;
+ cas = st->Ist.CAS.details;
+ vassert(isIRAtom(cas->addr));
+ vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
+ vassert(isIRAtom(cas->expdLo));
+ vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
+ vassert(isIRAtom(cas->dataLo));
+ cas2 = mkIRCAS(
+ cas->oldHi, cas->oldLo, cas->end,
+ fold_Expr(subst_Expr(env, cas->addr)),
+ cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
+ fold_Expr(subst_Expr(env, cas->expdLo)),
+ cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
+ fold_Expr(subst_Expr(env, cas->dataLo))
+ );
+ return IRStmt_CAS(cas2);
+ }
+
case Ist_Dirty: {
Int i;
IRDirty *d, *d2;
{
Int i;
IRDirty* d;
+ IRCAS* cas;
switch (st->tag) {
case Ist_AbiHint:
addUses_Expr(set, st->Ist.AbiHint.base);
addUses_Expr(set, st->Ist.Store.addr);
addUses_Expr(set, st->Ist.Store.data);
return;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ addUses_Expr(set, cas->addr);
+ if (cas->expdHi)
+ addUses_Expr(set, cas->expdHi);
+ addUses_Expr(set, cas->expdLo);
+ if (cas->dataHi)
+ addUses_Expr(set, cas->dataHi);
+ addUses_Expr(set, cas->dataLo);
+ return;
case Ist_Dirty:
d = st->Ist.Dirty.details;
if (d->mFx != Ifx_None)
to do the no-overlap assessments needed for Put/PutI.
*/
switch (st->tag) {
- case Ist_Dirty: case Ist_Store: case Ist_MBE:
+ case Ist_Dirty: case Ist_Store: case Ist_MBE: case Ist_CAS:
paranoia = 2; break;
case Ist_Put: case Ist_PutI:
paranoia = 1; break;
/* just be paranoid ... these should be rare. */
return True;
+ case Ist_CAS:
+ /* This is unbelievably lame, but it's probably not
+ significant from a performance point of view. Really, a
+ CAS is a load-store op, so it should be safe to say False.
+ However .. */
+ return True;
+
case Ist_Dirty:
/* If the dirty call has any guest effects at all, give up.
Probably could do better. */
deltaIRExpr(st->Ist.Exit.guard, delta);
break;
case Ist_Store:
+ if (st->Ist.Store.resSC != IRTemp_INVALID)
+ st->Ist.Store.resSC += delta;
deltaIRExpr(st->Ist.Store.addr, delta);
deltaIRExpr(st->Ist.Store.data, delta);
break;
+ case Ist_CAS:
+ if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
+ st->Ist.CAS.details->oldHi += delta;
+ st->Ist.CAS.details->oldLo += delta;
+ deltaIRExpr(st->Ist.CAS.details->addr, delta);
+ if (st->Ist.CAS.details->expdHi)
+ deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
+ deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
+ if (st->Ist.CAS.details->dataHi)
+ deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
+ deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
+ break;
case Ist_Dirty:
d = st->Ist.Dirty.details;
deltaIRExpr(d->guard, delta);
{
Int i;
IRDirty* d;
+ IRCAS* cas;
switch (st->tag) {
case Ist_AbiHint:
aoccCount_Expr(uses, st->Ist.AbiHint.base);
aoccCount_Expr(uses, st->Ist.Store.addr);
aoccCount_Expr(uses, st->Ist.Store.data);
return;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ aoccCount_Expr(uses, cas->addr);
+ if (cas->expdHi)
+ aoccCount_Expr(uses, cas->expdHi);
+ aoccCount_Expr(uses, cas->expdLo);
+ if (cas->dataHi)
+ aoccCount_Expr(uses, cas->dataHi);
+ aoccCount_Expr(uses, cas->dataLo);
+ return;
case Ist_Dirty:
d = st->Ist.Dirty.details;
if (d->mFx != Ifx_None)
);
case Iex_Load:
return IRExpr_Load(
+ e->Iex.Load.isLL,
e->Iex.Load.end,
e->Iex.Load.ty,
atbSubst_Expr(env, e->Iex.Load.addr)
static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
{
- Int i;
- IRDirty* d;
- IRDirty* d2;
+ Int i;
+ IRDirty *d, *d2;
+ IRCAS *cas, *cas2;
switch (st->tag) {
case Ist_AbiHint:
return IRStmt_AbiHint(
case Ist_Store:
return IRStmt_Store(
st->Ist.Store.end,
+ st->Ist.Store.resSC,
atbSubst_Expr(env, st->Ist.Store.addr),
atbSubst_Expr(env, st->Ist.Store.data)
);
return IRStmt_NoOp();
case Ist_MBE:
return IRStmt_MBE(st->Ist.MBE.event);
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ cas2 = mkIRCAS(
+ cas->oldHi, cas->oldLo, cas->end,
+ atbSubst_Expr(env, cas->addr),
+ cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
+ atbSubst_Expr(env, cas->expdLo),
+ cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
+ atbSubst_Expr(env, cas->dataLo)
+ );
+ return IRStmt_CAS(cas2);
case Ist_Dirty:
d = st->Ist.Dirty.details;
d2 = emptyIRDirty();
youngest. */
/* stmtPuts/stmtStores characterise what the stmt under
- consideration does. */
- stmtPuts = toBool(st->tag == Ist_Put
- || st->tag == Ist_PutI
- || st->tag == Ist_Dirty);
-
- stmtStores = toBool(st->tag == Ist_Store
- || st->tag == Ist_Dirty);
+ consideration does, or might do (sidely safe @ True). */
+ stmtPuts
+ = toBool( st->tag == Ist_Put
+ || st->tag == Ist_PutI
+ || st->tag == Ist_Dirty );
+
+ /* be True if this stmt writes memory or might do (==> we don't
+ want to reorder other loads or stores relative to it). Also,
+ a load-linked falls under this classification, since we
+ really ought to be conservative and not reorder any other
+ memory transactions relative to it. */
+ stmtStores
+ = toBool( st->tag == Ist_Store
+ || (st->tag == Ist_WrTmp
+ && st->Ist.WrTmp.data->tag == Iex_Load
+ && st->Ist.WrTmp.data->Iex.Load.isLL)
+ || st->tag == Ist_Dirty );
for (k = A_NENV-1; k >= 0; k--) {
if (env[k].bindee == NULL)
/*OUT*/Bool* hasVorFtemps,
IRSB* bb )
{
- Int i, j;
- IRStmt* st;
+ Int i, j;
+ IRStmt* st;
IRDirty* d;
+ IRCAS* cas;
*hasGetIorPutI = False;
*hasVorFtemps = False;
vassert(isIRAtom(st->Ist.Store.addr));
vassert(isIRAtom(st->Ist.Store.data));
break;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ vassert(isIRAtom(cas->addr));
+ vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
+ vassert(isIRAtom(cas->expdLo));
+ vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
+ vassert(isIRAtom(cas->dataLo));
+ break;
case Ist_Dirty:
d = st->Ist.Dirty.details;
vassert(isIRAtom(d->guard));
| VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3))
return "x86-sse1-sse2-sse3";
- return False;
+ return NULL;
}
static HChar* show_hwcaps_amd64 ( UInt hwcaps )
{
- /* Monotonic, SSE3 > baseline. */
- if (hwcaps == 0)
- return "amd64-sse2";
- if (hwcaps == VEX_HWCAPS_AMD64_SSE3)
- return "amd64-sse3";
- return False;
+ /* SSE3 and CX16 are orthogonal and > baseline, although we really
+ don't expect to come across anything which can do SSE3 but can't
+ do CX16. Still, we can handle that case. */
+ const UInt SSE3 = VEX_HWCAPS_AMD64_SSE3;
+ const UInt CX16 = VEX_HWCAPS_AMD64_CX16;
+ UInt c = hwcaps;
+ if (c == 0) return "amd64-sse2";
+ if (c == SSE3) return "amd64-sse3";
+ if (c == CX16) return "amd64-sse2-cx16";
+ if (c == (SSE3|CX16)) return "amd64-sse3-cx16";
+ return NULL;
}
static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
but not SSE1). LibVEX_Translate will check for nonsensical
combinations. */
-/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE) */
+/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with
+ cmpxchg8b. */
#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
-/* amd64: baseline capability is SSE2 */
+/* amd64: baseline capability is SSE2, with cmpxchg8b but not
+ cmpxchg16b. */
#define VEX_HWCAPS_AMD64_SSE3 (1<<4) /* SSE3 support */
+#define VEX_HWCAPS_AMD64_CX16 (1<<5) /* cmpxchg16b support */
/* ppc32: baseline capability is integer only */
-#define VEX_HWCAPS_PPC32_F (1<<5) /* basic (non-optional) FP */
-#define VEX_HWCAPS_PPC32_V (1<<6) /* Altivec (VMX) */
-#define VEX_HWCAPS_PPC32_FX (1<<7) /* FP extns (fsqrt, fsqrts) */
-#define VEX_HWCAPS_PPC32_GX (1<<8) /* Graphics extns
+#define VEX_HWCAPS_PPC32_F (1<<6) /* basic (non-optional) FP */
+#define VEX_HWCAPS_PPC32_V (1<<7) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC32_FX (1<<8) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC32_GX (1<<9) /* Graphics extns
(fres,frsqrte,fsel,stfiwx) */
/* ppc64: baseline capability is integer and basic FP insns */
-#define VEX_HWCAPS_PPC64_V (1<<9) /* Altivec (VMX) */
-#define VEX_HWCAPS_PPC64_FX (1<<10) /* FP extns (fsqrt, fsqrts) */
-#define VEX_HWCAPS_PPC64_GX (1<<11) /* Graphics extns
+#define VEX_HWCAPS_PPC64_V (1<<10) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC64_FX (1<<11) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC64_GX (1<<12) /* Graphics extns
(fres,frsqrte,fsel,stfiwx) */
/* arm: baseline capability is ARMv4 */
/* Emulation warnings */
/* 940 */ UInt guest_EMWARN;
- /* For lwarx/stwcx.: 0 == no reservation exists, non-0 == a
- reservation exists. */
- /* 944 */ UInt guest_RESVN;
-
/* For icbi: record start and length of area to invalidate */
- /* 948 */ UInt guest_TISTART;
- /* 952 */ UInt guest_TILEN;
+ /* 944 */ UInt guest_TISTART;
+ /* 948 */ UInt guest_TILEN;
/* Used to record the unredirected guest address at the start of
a translation whose start has been redirected. By reading
find out what the corresponding no-redirection address was.
Note, this is only set for wrap-style redirects, not for
replace-style ones. */
- /* 956 */ UInt guest_NRADDR;
- /* 960 */ UInt guest_NRADDR_GPR2; /* needed by aix */
+ /* 952 */ UInt guest_NRADDR;
+ /* 956 */ UInt guest_NRADDR_GPR2; /* needed by aix */
/* A grows-upwards stack for hidden saves/restores of LR and R2
needed for function interception and wrapping on ppc32-aix5.
A horrible hack. REDIR_SP points to the highest live entry,
and so starts at -1. */
- /* 964 */ UInt guest_REDIR_SP;
- /* 968 */ UInt guest_REDIR_STACK[VEX_GUEST_PPC32_REDIR_STACK_SIZE];
+ /* 960 */ UInt guest_REDIR_SP;
+ /* 964 */ UInt guest_REDIR_STACK[VEX_GUEST_PPC32_REDIR_STACK_SIZE];
/* Needed for AIX (but mandated for all guest architectures):
CIA at the last SC insn. Used when backing up to restart a
/* 1088 */ ULong guest_TISTART;
/* 1096 */ ULong guest_TILEN;
- /* For lwarx/stwcx.: 0 == no reservation exists, non-0 == a
- reservation exists. */
- /* 1104 */ ULong guest_RESVN;
-
/* Used to record the unredirected guest address at the start of
a translation whose start has been redirected. By reading
this pseudo-register shortly afterwards, the translation can
find out what the corresponding no-redirection address was.
Note, this is only set for wrap-style redirects, not for
replace-style ones. */
- /* 1112 */ ULong guest_NRADDR;
- /* 1120 */ ULong guest_NRADDR_GPR2;
+ /* 1104 */ ULong guest_NRADDR;
+ /* 1112 */ ULong guest_NRADDR_GPR2;
/* A grows-upwards stack for hidden saves/restores of LR and R2
needed for function interception and wrapping on ppc64-linux.
A horrible hack. REDIR_SP points to the highest live entry,
and so starts at -1. */
- /* 1128 */ ULong guest_REDIR_SP;
- /* 1136 */ ULong guest_REDIR_STACK[VEX_GUEST_PPC64_REDIR_STACK_SIZE];
+ /* 1120 */ ULong guest_REDIR_SP;
+ /* 1128 */ ULong guest_REDIR_STACK[VEX_GUEST_PPC64_REDIR_STACK_SIZE];
/* Needed for AIX: CIA at the last SC insn. Used when backing up
to restart a syscall that has been interrupted by a signal. */
- /* 1392 */ ULong guest_IP_AT_SYSCALL;
+ /* 1384 */ ULong guest_IP_AT_SYSCALL;
/* SPRG3, which AIUI is readonly in user space. Needed for
threading on AIX. */
/* ???? */ ULong guest_SPRG3_RO;
/* Padding to make it have an 8-aligned size */
- /* UInt padding; */
+ /* ???? */ ULong padding2;
}
VexGuestPPC64State;
IRExpr* arg; /* operand */
} Unop;
- /* A load from memory.
+ /* A load from memory. If .isLL is True then this load also
+ lodges a reservation (ppc-style lwarx/ldarx operation). If
+ .isLL is True, then also, the address must be naturally
+ aligned - any misaligned addresses should be caught by a
+ dominating IR check and side exit. This alignment
+ restriction exists because on at least some LL/SC platforms
+ (ppc), lwarx etc will trap w/ SIGBUS on misaligned addresses,
+ and we have to actually generate lwarx on the host, and we
+ don't want it trapping on the host.
+
ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
*/
struct {
+ Bool isLL; /* True iff load makes a reservation */
IREndness end; /* Endian-ness of the load */
IRType ty; /* Type of the loaded value */
IRExpr* addr; /* Address being loaded from */
IRExpr* arg2, IRExpr* arg3 );
extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 );
extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg );
-extern IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr );
+extern IRExpr* IRExpr_Load ( Bool isLL, IREndness end,
+ IRType ty, IRExpr* addr );
extern IRExpr* IRExpr_Const ( IRConst* con );
extern IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args );
extern IRExpr* IRExpr_Mux0X ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
Ijk_NoRedir, /* Jump to un-redirected guest addr */
Ijk_SigTRAP, /* current instruction synths SIGTRAP */
Ijk_SigSEGV, /* current instruction synths SIGSEGV */
+ Ijk_SigBUS, /* current instruction synths SIGBUS */
/* Unfortunately, various guest-dependent syscall kinds. They
all mean: do a syscall before continuing. */
Ijk_Sys_syscall, /* amd64 'syscall', ppc 'sc' */
typedef
enum {
Imbe_Fence=0x18000,
- Imbe_BusLock,
- Imbe_BusUnlock,
- Imbe_SnoopedStoreBegin,
- Imbe_SnoopedStoreEnd
}
IRMBusEvent;
extern void ppIRMBusEvent ( IRMBusEvent );
+/* --------------- Compare and Swap --------------- */
+
+/* This denotes an atomic compare and swap operation, either
+ a single-element one or a double-element one.
+
+ In the single-element case:
+
+ .addr is the memory address.
+ .end is the endianness with which memory is accessed
+
+ If .addr contains the same value as .expdLo, then .dataLo is
+ written there, else there is no write. In both cases, the
+ original value at .addr is copied into .oldLo.
+
+ Types: .expdLo, .dataLo and .oldLo must all have the same type.
+ It may be any integral type, viz: I8, I16, I32 or, for 64-bit
+ guests, I64.
+
+ .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
+ be NULL.
+
+ In the double-element case:
+
+ .addr is the memory address.
+ .end is the endianness with which memory is accessed
+
+ The operation is the same:
+
+ If .addr contains the same value as .expdHi:.expdLo, then
+ .dataHi:.dataLo is written there, else there is no write. In
+ both cases the original value at .addr is copied into
+ .oldHi:.oldLo.
+
+ Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
+ all have the same type, which may be any integral type, viz: I8,
+ I16, I32 or, for 64-bit guests, I64.
+
+ The double-element case is complicated by the issue of
+ endianness. In all cases, the two elements are understood to be
+ located adjacently in memory, starting at the address .addr.
+
+ If .end is Iend_LE, then the .xxxLo component is at the lower
+ address and the .xxxHi component is at the higher address, and
+ each component is itself stored little-endianly.
+
+ If .end is Iend_BE, then the .xxxHi component is at the lower
+ address and the .xxxLo component is at the higher address, and
+ each component is itself stored big-endianly.
+
+ This allows representing more cases than most architectures can
+ handle. For example, x86 cannot do DCAS on 8- or 16-bit elements.
+
+ How to know if the CAS succeeded?
+
+ * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
+ then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
+ stored at .addr, and the original value there was .oldLo (resp
+ .oldHi:.oldLo).
+
+ * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
+ then the CAS failed, and the original value at .addr was .oldLo
+ (resp. .oldHi:.oldLo).
+
+ Hence it is easy to know whether or not the CAS succeeded.
+*/
+typedef
+ struct {
+ IRTemp oldHi; /* old value of *addr is written here */
+ IRTemp oldLo;
+ IREndness end; /* endianness of the data in memory */
+ IRExpr* addr; /* store address */
+ IRExpr* expdHi; /* expected old value at *addr */
+ IRExpr* expdLo;
+ IRExpr* dataHi; /* new value for *addr */
+ IRExpr* dataLo;
+ }
+ IRCAS;
+
+extern void ppIRCAS ( IRCAS* cas );
+
+extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
+ IREndness end, IRExpr* addr,
+ IRExpr* expdHi, IRExpr* expdLo,
+ IRExpr* dataHi, IRExpr* dataLo );
+
+extern IRCAS* deepCopyIRCAS ( IRCAS* );
+
/* ------------------ Statements ------------------ */
/* The different kinds of statements. Their meaning is explained
Ist_PutI,
Ist_WrTmp,
Ist_Store,
+ Ist_CAS,
Ist_Dirty,
Ist_MBE, /* META (maybe) */
Ist_Exit
'st.Ist.Store.<fieldname>'.
For each kind of statement, we show what it looks like when
- pretty-printed with ppIRExpr().
+ pretty-printed with ppIRStmt().
*/
typedef
struct _IRStmt {
/* A no-op (usually resulting from IR optimisation). Can be
omitted without any effect.
- ppIRExpr output: IR-NoOp
+ ppIRStmt output: IR-NoOp
*/
struct {
} NoOp;
the IRSB). Contains the address and length of the
instruction.
- ppIRExpr output: ------ IMark(<addr>, <len>) ------,
+ ppIRStmt output: ------ IMark(<addr>, <len>) ------,
eg. ------ IMark(0x4000792, 5) ------,
*/
struct {
next (dynamic) instruction that will be executed. This is
to help Memcheck to origin tracking.
- ppIRExpr output: ====== AbiHint(<base>, <len>, <nia>) ======
+ ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
eg. ====== AbiHint(t1, 16, t2) ======
*/
struct {
} AbiHint;
/* Write a guest register, at a fixed offset in the guest state.
- ppIRExpr output: PUT(<offset>) = <data>, eg. PUT(60) = t1
+ ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
*/
struct {
Int offset; /* Offset into the guest state */
state. See the comment for GetI expressions for more
information.
- ppIRExpr output: PUTI<descr>[<ix>,<bias>] = <data>,
+ ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
eg. PUTI(64:8xF64)[t5,0] = t1
*/
struct {
reject any block containing a temporary which is not assigned
to exactly once.
- ppIRExpr output: t<tmp> = <data>, eg. t1 = 3
+ ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
*/
struct {
IRTemp tmp; /* Temporary (LHS of assignment) */
IRExpr* data; /* Expression (RHS of assignment) */
} WrTmp;
- /* Write a value to memory.
- ppIRExpr output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
+ /* Write a value to memory. Normally scRes is
+ IRTemp_INVALID, denoting a normal store. If scRes is not
+ IRTemp_INVALID, then this is a store-conditional, which
+ may fail or succeed depending on the outcome of a
+ previously lodged reservation on this address. scRes is
+ written 1 if the store succeeds and 0 if it fails, and
+ must have type Ity_I1.
+
+ If scRes is not IRTemp_INVALID, then also, the address
+ must be naturally aligned - any misaligned addresses
+ should be caught by a dominating IR check and side exit.
+ This alignment restriction exists because on at least some
+ LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
+ misaligned addresses, and we have to actually generate
+ stwcx. on the host, and we don't want it trapping on the
+ host.
+
+ ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
*/
struct {
IREndness end; /* Endianness of the store */
+ IRTemp resSC; /* result of SC goes here (1 == success) */
IRExpr* addr; /* store address */
IRExpr* data; /* value to write */
} Store;
+ /* Do an atomic compare-and-swap operation. Semantics are
+ described above on a comment at the definition of IRCAS.
+
+ ppIRStmt output:
+ t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
+ eg
+ t1 = CASle(t2 :: t3->Add32(t3,1))
+ which denotes a 32-bit atomic increment
+ of a value at address t2
+
+ A double-element CAS may also be denoted, in which case <tmp>,
+ <expected> and <new> are all pairs of items, separated by
+ commas.
+ */
+ struct {
+ IRCAS* details;
+ } CAS;
+
/* Call (possibly conditionally) a C function that has side
effects (ie. is "dirty"). See the comments above the
IRDirty type declaration for more information.
- ppIRExpr output:
+ ppIRStmt output:
t<tmp> = DIRTY <guard> <effects>
::: <callee>(<args>)
eg.
/* A memory bus event - a fence, or acquisition/release of the
hardware bus lock. IR optimisation treats all these as fences
across which no memory references may be moved.
- ppIRExpr output: MBusEvent-Fence,
+ ppIRStmt output: MBusEvent-Fence,
MBusEvent-BusLock, MBusEvent-BusUnlock.
*/
struct {
} MBE;
/* Conditional exit from the middle of an IRSB.
- ppIRExpr output: if (<guard>) goto {<jk>} <dst>
+ ppIRStmt output: if (<guard>) goto {<jk>} <dst>
eg. if (t69) goto {Boring} 0x4000AAA:I32
*/
struct {
extern IRStmt* IRStmt_PutI ( IRRegArray* descr, IRExpr* ix, Int bias,
IRExpr* data );
extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data );
-extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data );
+extern IRStmt* IRStmt_Store ( IREndness end,
+ IRTemp resSC, IRExpr* addr, IRExpr* data );
+extern IRStmt* IRStmt_CAS ( IRCAS* details );
extern IRStmt* IRStmt_Dirty ( IRDirty* details );
extern IRStmt* IRStmt_MBE ( IRMBusEvent event );
extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
continuing */
#define VEX_TRC_JMP_SIGSEGV 87 /* deliver segv (SIGSEGV) before
continuing */
+#define VEX_TRC_JMP_SIGBUS 93 /* deliver SIGBUS before continuing */
+
#define VEX_TRC_JMP_EMWARN 63 /* deliver emulation warning before
continuing */
#define VEX_TRC_JMP_EMFAIL 83 /* emulation fatal error; abort system */