if (o == GOF(CTR) && is4) goto exactly1;
if (o == GOF(CIA) && is4) goto none;
if (o == GOF(IP_AT_SYSCALL) && is4) goto none;
- if (o == GOF(RESVN) && is4) goto none;
if (o == GOF(TISTART) && is4) goto none;
if (o == GOF(TILEN) && is4) goto none;
if (o == GOF(REDIR_SP) && is4) goto none;
if (o == GOF(CTR) && is8) goto exactly1;
if (o == GOF(CIA) && is8) goto none;
if (o == GOF(IP_AT_SYSCALL) && is8) goto none;
- if (o == GOF(RESVN) && is8) goto none;
if (o == GOF(TISTART) && is8) goto none;
if (o == GOF(TILEN) && is8) goto none;
if (o == GOF(REDIR_SP) && is8) goto none;
/*--- System calls ---*/
/*--------------------------------------------------------------------*/
-void h_pre_syscall ( ThreadId tid, UInt sysno )
+void h_pre_syscall ( ThreadId tid, UInt sysno,
+ UWord* args, UInt nArgs )
{
/* we don't do anything at the pre-syscall point */
}
# if defined(__NR_shmget)
ADD(1, __NR_shmget);
# endif
+# if defined(__NR_ipc) && defined(VKI_SHMAT)
+ ADD(1, __NR_ipc); /* ppc{32,64}-linux horrors */
+# endif
/* --------------- AIX5 --------------- */
}
-void h_post_syscall ( ThreadId tid, UInt sysno, SysRes res )
+void h_post_syscall ( ThreadId tid, UInt sysno,
+ UWord* args, UInt nArgs, SysRes res )
{
Word i, n;
UWordPair* pair;
/* Deal with the common case */
pair = VG_(indexXA)( post_syscall_table, i );
- if (pair->uw2 == 0) {
- /* the common case */
- VG_(set_syscall_return_shadows)(
- tid, /* retval */ (UWord)NONPTR, 0,
- /* error */ (UWord)NONPTR, 0
- );
- return;
- }
+ if (pair->uw2 == 0)
+ /* the common case */
+ goto res_NONPTR_err_NONPTR;
/* Special handling for all remaining cases */
tl_assert(pair->uw2 == 1);
syscall completes. */
post_reg_write_nonptr_or_unknown( tid, PC_OFF_FS_ZERO,
PC_SZB_FS_ZERO );
- VG_(set_syscall_return_shadows)(
- tid, /* retval */ (UWord)NONPTR, 0,
- /* error */ (UWord)NONPTR, 0
- );
- return;
+ goto res_NONPTR_err_NONPTR;
}
# endif
# if defined(__NR_brk)
// With brk(), result (of kernel syscall, not glibc wrapper) is a heap
// pointer. Make the shadow UNKNOWN.
- if (sysno == __NR_brk) {
- VG_(set_syscall_return_shadows)(
- tid, /* retval */ (UWord)UNKNOWN, 0,
- /* error */ (UWord)NONPTR, 0
- );
- return;
- }
+ if (sysno == __NR_brk)
+ goto res_UNKNOWN_err_NONPTR;
# endif
// With mmap, new_mem_mmap() has already been called and added the
) {
if (sr_isError(res)) {
// mmap() had an error, return value is a small negative integer
- VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)NONPTR, 0,
- /*err*/ (UWord)NONPTR, 0 );
- if (0) VG_(printf)("ZZZZZZZ mmap res -> NONPTR\n");
+ goto res_NONPTR_err_NONPTR;
} else {
- VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)UNKNOWN, 0,
- /*err*/ (UWord)NONPTR, 0 );
- if (0) VG_(printf)("ZZZZZZZ mmap res -> UNKNOWN\n");
+ goto res_UNKNOWN_err_NONPTR;
}
return;
}
# if defined(__NR_shmat)
if (sysno == __NR_shmat) {
if (sr_isError(res)) {
- VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)NONPTR, 0,
- /*err*/ (UWord)NONPTR, 0 );
- if (0) VG_(printf)("ZZZZZZZ shmat res -> NONPTR\n");
+ goto res_NONPTR_err_NONPTR;
} else {
- VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)UNKNOWN, 0,
- /*err*/ (UWord)NONPTR, 0 );
- if (0) VG_(printf)("ZZZZZZZ shmat res -> UNKNOWN\n");
+ goto res_UNKNOWN_err_NONPTR;
}
- return;
}
# endif
# if defined(__NR_shmget)
- if (sysno == __NR_shmget) {
+ if (sysno == __NR_shmget)
// FIXME: is this correct?
- VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)UNKNOWN, 0,
- /*err*/ (UWord)NONPTR, 0 );
- return;
+ goto res_UNKNOWN_err_NONPTR;
+# endif
+
+# if defined(__NR_ipc) && defined(VKI_SHMAT)
+ /* perhaps this should be further conditionalised with
+ && (defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
+ Note, this just copies the behaviour of __NR_shmget above.
+
+ JRS 2009 June 02: it seems that the return value from
+ sys_ipc(VKI_SHMAT, ...) doesn't have much relationship to the
+ result returned by the originating user-level shmat call. It's
+ different (and much lower) by a large but integral number of
+ pages. I don't have time to chase this right now. Observed on
+ ppc{32,64}-linux. Result appears to be false errors from apps
+ using shmat. Confusion though -- shouldn't be related to the
+ actual numeric values returned by the syscall, though, should
+ it? Confused. Maybe some bad interaction with a
+ nonpointer-or-unknown heuristic? */
+ if (sysno == __NR_ipc) {
+ if (args[0] == VKI_SHMAT) {
+ goto res_UNKNOWN_err_NONPTR;
+ } else {
+ goto res_NONPTR_err_NONPTR;
+ }
}
# endif
post_syscall_table has .w2 == 1, which in turn implies there
should be special-case code for it above. */
tl_assert(0);
+
+ res_NONPTR_err_NONPTR:
+ VG_(set_syscall_return_shadows)( tid, /* retval */ (UWord)NONPTR, 0,
+ /* error */ (UWord)NONPTR, 0 );
+ return;
+
+ res_UNKNOWN_err_NONPTR:
+ VG_(set_syscall_return_shadows)( tid, /* retval */ (UWord)UNKNOWN, 0,
+ /* error */ (UWord)NONPTR, 0 );
+ return;
}
// ------------------ Store handlers ------------------ //
/* On 32 bit targets, we will use:
- check_store1 check_store2 check_store4_P
+ check_store1 check_store2 check_store4_P check_store4C_P
check_store4 (for 32-bit nonpointer stores)
check_store8_ms4B_ls4B (for 64-bit stores)
check_store16_ms4B_4B_4B_ls4B (for xmm/altivec stores)
On 64 bit targets, we will use:
- check_store1 check_store2 check_store4 check_store8_P
+ check_store1 check_store2 check_store4 check_store4C
+ check_store8_P check_store_8C_P
check_store8_all8B (for 64-bit nonpointer stores)
check_store16_ms8B_ls8B (for xmm/altivec stores)
A "_P" handler writes a pointer to memory, and so has an extra
argument -- the pointer's shadow value. That implies that
- check_store4_P is only to be called on a 32 bit host and
- check_store8_P is only to be called on a 64 bit host. For all
+ check_store4{,C}_P is only to be called on a 32 bit host and
+ check_store8{,C}_P is only to be called on a 64 bit host. For all
other cases, and for the misaligned _P cases, the strategy is to
let the store go through, and then snoop around with
nonptr_or_unknown to fix up the shadow values of any affected
words. */
+/* Helpers for store-conditionals. Ugly kludge :-(
+ They all return 1 if the SC was successful and 0 if it failed. */
+static inline UWord do_store_conditional_32( Addr m/*dst*/, UInt t/*val*/ )
+{
+# if defined(VGA_ppc32) || defined(VGA_ppc64)
+ UWord success;
+ /* If this assertion fails, the underlying IR is (semantically) ill-formed
+ as per the IR spec for IRStmt_Store. */
+ tl_assert(VG_IS_4_ALIGNED(m));
+ __asm__ __volatile__(
+ "stwcx. %2,0,%1" "\n\t" /* data,0,addr */
+ "mfcr %0" "\n\t"
+ "srwi %0,%0,29" "\n\t" /* move relevant CR bit to LSB */
+ : /*out*/"=b"(success)
+ : /*in*/ "b"(m), "b"( (UWord)t )
+ : /*trash*/ "memory", "cc"
+ /* Note: srwi is OK even on 64-bit host because the we're
+ after bit 29 (normal numbering) and we mask off all the
+ other junk just below. */
+ );
+ return success & (UWord)1;
+# else
+ tl_assert(0); /* not implemented on other platforms */
+# endif
+}
+
+static inline UWord do_store_conditional_64( Addr m/*dst*/, ULong t/*val*/ )
+{
+# if defined(VGA_ppc64)
+ UWord success;
+ /* If this assertion fails, the underlying IR is (semantically) ill-formed
+ as per the IR spec for IRStmt_Store. */
+ tl_assert(VG_IS_8_ALIGNED(m));
+ __asm__ __volatile__(
+ "stdcx. %2,0,%1" "\n\t" /* data,0,addr */
+ "mfcr %0" "\n\t"
+ "srdi %0,%0,29" "\n\t" /* move relevant CR bit to LSB */
+ : /*out*/"=b"(success)
+ : /*in*/ "b"(m), "b"( (UWord)t )
+ : /*trash*/ "memory", "cc"
+ );
+ return success & (UWord)1;
+# else
+ tl_assert(0); /* not implemented on other platforms */
+# endif
+}
+
/* Apply nonptr_or_unknown to all the words intersecting
[a, a+len). */
static VG_REGPARM(2)
}
}
+// This handles 64 bit store-conditionals on 64 bit targets. It must
+// not be called on 32 bit targets.
+static VG_REGPARM(3)
+UWord check_store8C_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
+{
+ UWord success;
+ tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
+# if SC_SEGS
+ checkSeg(t_vseg);
+ checkSeg(mptr_vseg);
+# endif
+ check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
+ // Actually *do* the STORE here
+ success = do_store_conditional_64( m, t );
+ if (VG_IS_8_ALIGNED(m)) {
+ set_mem_vseg( m, t_vseg );
+ } else {
+ // straddling two words
+ nonptr_or_unknown_range(m, 8);
+ }
+ return success;
+}
+
// This handles 32 bit stores on 32 bit targets. It must
// not be called on 64 bit targets.
static VG_REGPARM(3)
}
}
+// This handles 32 bit store-conditionals on 32 bit targets. It must
+// not be called on 64 bit targets.
+static VG_REGPARM(3)
+UWord check_store4C_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
+{
+ UWord success;
+ tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
+# if SC_SEGS
+ checkSeg(t_vseg);
+ checkSeg(mptr_vseg);
+# endif
+ check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
+ // Actually *do* the STORE here
+ success = do_store_conditional_32( m, t );
+ if (VG_IS_4_ALIGNED(m)) {
+ set_mem_vseg( m, t_vseg );
+ } else {
+ // straddling two words
+ nonptr_or_unknown_range(m, 4);
+ }
+ return success;
+}
+
// Used for both 32 bit and 64 bit targets.
static VG_REGPARM(3)
void check_store4(Addr m, Seg* mptr_vseg, UWord t)
nonptr_or_unknown_range(m, 4);
}
+// Used for 32-bit store-conditionals on 64 bit targets only. It must
+// not be called on 32 bit targets.
+static VG_REGPARM(3)
+UWord check_store4C(Addr m, Seg* mptr_vseg, UWord t)
+{
+ UWord success;
+ tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
+# if SC_SEGS
+ checkSeg(mptr_vseg);
+# endif
+ check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
+ // Actually *do* the STORE here
+ success = do_store_conditional_32( m, t );
+ nonptr_or_unknown_range(m, 4);
+ return success;
+}
+
// Used for both 32 bit and 64 bit targets.
static VG_REGPARM(3)
void check_store2(Addr m, Seg* mptr_vseg, UWord t)
}
}
-/* Generate into 'ane', instrumentation for 'st'. Also copy 'st'
- itself into 'ane' (the caller does not do so). This is somewhat
+/* Generate into 'pce', instrumentation for 'st'. Also copy 'st'
+ itself into 'pce' (the caller does not do so). This is somewhat
complex and relies heavily on the assumption that the incoming IR
is in flat form.
the post-hoc ugly hack of inspecting and "improving" the
shadow data after the store, in the case where it isn't an
aligned word store.
+
+ Only word-sized values are shadowed. If this is a
+ store-conditional, .resSC will denote a non-word-typed
+ temp, and so we don't need to shadow it. Assert about the
+ type, tho. However, since we're not re-emitting the
+ original IRStmt_Store, but rather doing it as part of the
+ helper function, we need to actually do a SC in the
+ helper, and assign the result bit to .resSC. Ugly.
*/
IRExpr* data = st->Ist.Store.data;
IRExpr* addr = st->Ist.Store.addr;
IRType d_ty = typeOfIRExpr(pce->bb->tyenv, data);
IRExpr* addrv = schemeEw_Atom( pce, addr );
+ IRTemp resSC = st->Ist.Store.resSC;
+ if (resSC != IRTemp_INVALID) {
+ tl_assert(typeOfIRTemp(pce->bb->tyenv, resSC) == Ity_I1);
+ /* viz, not something we want to shadow */
+ /* also, throw out all store-conditional cases that
+ we can't handle */
+ if (pce->gWordTy == Ity_I32 && d_ty != Ity_I32)
+ goto unhandled;
+ if (pce->gWordTy == Ity_I64 && d_ty != Ity_I32 && d_ty != Ity_I64)
+ goto unhandled;
+ }
if (pce->gWordTy == Ity_I32) {
/* ------ 32 bit host/guest (cough, cough) ------ */
switch (d_ty) {
/* Integer word case */
case Ity_I32: {
IRExpr* datav = schemeEw_Atom( pce, data );
- gen_dirty_v_WWWW( pce,
- &check_store4_P, "check_store4_P",
- addr, addrv, data, datav );
+ if (resSC == IRTemp_INVALID) {
+ /* "normal" store */
+ gen_dirty_v_WWWW( pce,
+ &check_store4_P, "check_store4_P",
+ addr, addrv, data, datav );
+ } else {
+ /* store-conditional; need to snarf the success bit */
+ IRTemp resSC32
+ = gen_dirty_W_WWWW( pce,
+ &check_store4C_P,
+ "check_store4C_P",
+ addr, addrv, data, datav );
+ /* presumably resSC32 will really be Ity_I32. In
+ any case we'll get jumped by the IR sanity
+ checker if it's not, when it sees the
+ following statement. */
+ assign( 'I', pce, resSC, unop(Iop_32to1, mkexpr(resSC32)) );
+ }
break;
}
/* Integer subword cases */
/* Integer word case */
case Ity_I64: {
IRExpr* datav = schemeEw_Atom( pce, data );
- gen_dirty_v_WWWW( pce,
- &check_store8_P, "check_store8_P",
- addr, addrv, data, datav );
+ if (resSC == IRTemp_INVALID) {
+ /* "normal" store */
+ gen_dirty_v_WWWW( pce,
+ &check_store8_P, "check_store8_P",
+ addr, addrv, data, datav );
+ } else {
+ IRTemp resSC64
+ = gen_dirty_W_WWWW( pce,
+ &check_store8C_P,
+ "check_store8C_P",
+ addr, addrv, data, datav );
+ assign( 'I', pce, resSC, unop(Iop_64to1, mkexpr(resSC64)) );
+ }
break;
}
/* Integer subword cases */
case Ity_I32:
- gen_dirty_v_WWW( pce,
- &check_store4, "check_store4",
- addr, addrv,
- uwiden_to_host_word( pce, data ));
+ if (resSC == IRTemp_INVALID) {
+ /* "normal" store */
+ gen_dirty_v_WWW( pce,
+ &check_store4, "check_store4",
+ addr, addrv,
+ uwiden_to_host_word( pce, data ));
+ } else {
+ /* store-conditional; need to snarf the success bit */
+ IRTemp resSC64
+ = gen_dirty_W_WWW( pce,
+ &check_store4C,
+ "check_store4C",
+ addr, addrv,
+ uwiden_to_host_word( pce, data ));
+ assign( 'I', pce, resSC, unop(Iop_64to1, mkexpr(resSC64)) );
+ }
break;
case Ity_I16:
gen_dirty_v_WWW( pce,
/*--- Memcheck running state, and tmp management. ---*/
/*------------------------------------------------------------*/
+/* Carries info about a particular tmp. The tmp's number is not
+ recorded, as this is implied by (equal to) its index in the tmpMap
+ in MCEnv. The tmp's type is also not recorded, as this is present
+ in MCEnv.sb->tyenv.
+
+ When .kind is Orig, .shadowV and .shadowB may give the identities
+ of the temps currently holding the associated definedness (shadowV)
+ and origin (shadowB) values, or these may be IRTemp_INVALID if code
+ to compute such values has not yet been emitted.
+
+ When .kind is VSh or BSh then the tmp is holds a V- or B- value,
+ and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
+ illogical for a shadow tmp itself to be shadowed.
+*/
+typedef
+ enum { Orig=1, VSh=2, BSh=3 }
+ TempKind;
+
+typedef
+ struct {
+ TempKind kind;
+ IRTemp shadowV;
+ IRTemp shadowB;
+ }
+ TempMapEnt;
+
+
/* Carries around state during memcheck instrumentation. */
typedef
struct _MCEnv {
/* MODIFIED: the superblock being constructed. IRStmts are
added. */
- IRSB* bb;
+ IRSB* sb;
Bool trace;
- /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
- original temps to their current their current shadow temp.
- Initially all entries are IRTemp_INVALID. Entries are added
- lazily since many original temps are not used due to
- optimisation prior to instrumentation. Note that floating
- point original tmps are shadowed by integer tmps of the same
- size, and Bit-typed original tmps are shadowed by the type
- Ity_I8. See comment below. */
- IRTemp* tmpMapV; /* V-bit tmp shadows */
- IRTemp* tmpMapB; /* origin tracking tmp shadows */
- Int n_originalTmps; /* for range checking */
+ /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
+ current kind and possibly shadow temps for each temp in the
+ IRSB being constructed. Note that it does not contain the
+ type of each tmp. If you want to know the type, look at the
+ relevant entry in sb->tyenv. It follows that at all times
+ during the instrumentation process, the valid indices for
+ tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
+ total number of Orig, V- and B- temps allocated so far.
+
+ The reason for this strange split (types in one place, all
+ other info in another) is that we need the types to be
+ attached to sb so as to make it possible to do
+ "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
+ instrumentation process. */
+ XArray* /* of TempMapEnt */ tmpMap;
/* MODIFIED: indicates whether "bogus" literals have so far been
found. Starts off False, and may change to True. */
sanity checker should catch all such anomalies, however.
*/
+/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
+ both the table in mce->sb and to our auxiliary mapping. Note that
+ newTemp may cause mce->tmpMap to resize, hence previous results
+ from VG_(indexXA)(mce->tmpMap) are invalidated. */
+static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
+{
+ Word newIx;
+ TempMapEnt ent;
+ IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
+ ent.kind = kind;
+ ent.shadowV = IRTemp_INVALID;
+ ent.shadowB = IRTemp_INVALID;
+ newIx = VG_(addToXA)( mce->tmpMap, &ent );
+ tl_assert(newIx == (Word)tmp);
+ return tmp;
+}
+
+
/* Find the tmp currently shadowing the given original tmp. If none
so far exists, allocate one. */
static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
{
- tl_assert(orig < mce->n_originalTmps);
- if (mce->tmpMapV[orig] == IRTemp_INVALID) {
- mce->tmpMapV[orig]
- = newIRTemp(mce->bb->tyenv,
- shadowTypeV(mce->bb->tyenv->types[orig]));
+ TempMapEnt* ent;
+ /* VG_(indexXA) range-checks 'orig', hence no need to check
+ here. */
+ ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
+ tl_assert(ent->kind == Orig);
+ if (ent->shadowV == IRTemp_INVALID) {
+ IRTemp tmpV
+ = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
+ /* newTemp may cause mce->tmpMap to resize, hence previous results
+ from VG_(indexXA) are invalid. */
+ ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
+ tl_assert(ent->kind == Orig);
+ tl_assert(ent->shadowV == IRTemp_INVALID);
+ ent->shadowV = tmpV;
}
- return mce->tmpMapV[orig];
+ return ent->shadowV;
}
/* Allocate a new shadow for the given original tmp. This means any
necessary to give a new value to a shadow once it has been tested
for undefinedness, but unfortunately IR's SSA property disallows
this. Instead we must abandon the old shadow, allocate a new one
- and use that instead. */
+ and use that instead.
+
+ This is the same as findShadowTmpV, except we don't bother to see
+ if a shadow temp already existed -- we simply allocate a new one
+ regardless. */
static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
{
- tl_assert(orig < mce->n_originalTmps);
- mce->tmpMapV[orig]
- = newIRTemp(mce->bb->tyenv,
- shadowTypeV(mce->bb->tyenv->types[orig]));
+ TempMapEnt* ent;
+ /* VG_(indexXA) range-checks 'orig', hence no need to check
+ here. */
+ ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
+ tl_assert(ent->kind == Orig);
+ if (1) {
+ IRTemp tmpV
+ = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
+ /* newTemp may cause mce->tmpMap to resize, hence previous results
+ from VG_(indexXA) are invalid. */
+ ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
+ tl_assert(ent->kind == Orig);
+ ent->shadowV = tmpV;
+ }
}
{
if (a1->tag == Iex_Const)
return True;
- if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
- return True;
+ if (a1->tag == Iex_RdTmp) {
+ TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
+ return ent->kind == Orig;
+ }
return False;
}
{
if (a1->tag == Iex_Const)
return True;
- if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
- return True;
+ if (a1->tag == Iex_RdTmp) {
+ TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
+ return ent->kind == VSh || ent->kind == BSh;
+ }
return False;
}
ppIRStmt(st);
VG_(printf)("\n");
}
- addStmtToIRSB(mce->bb, st);
+ addStmtToIRSB(mce->sb, st);
}
/* assign value to tmp */
static inline
void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
- stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
+ stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
}
/* build various kinds of expressions */
an atom.
'ty' is the type of 'e' and hence the type that the new temporary
- needs to be. But passing it is redundant, since we can deduce the
- type merely by inspecting 'e'. So at least that fact to assert
- that the two types agree. */
-static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e ) {
- IRTemp t;
- IRType tyE = typeOfIRExpr(mce->bb->tyenv, e);
+ needs to be. But passing it in is redundant, since we can deduce
+ the type merely by inspecting 'e'. So at least use that fact to
+ assert that the two types agree. */
+static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
+{
+ TempKind k;
+ IRTemp t;
+ IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
- t = newIRTemp(mce->bb->tyenv, ty);
+ switch (cat) {
+ case 'V': k = VSh; break;
+ case 'B': k = BSh; break;
+ case 'C': k = Orig; break;
+ /* happens when we are making up new "orig"
+ expressions, for IRCAS handling */
+ default: tl_assert(0);
+ }
+ t = newTemp(mce, ty, k);
assign(cat, mce, t, e);
return mkexpr(t);
}
/* Note, dst_ty is a shadow type, not an original type. */
/* First of all, collapse vbits down to a single bit. */
tl_assert(isShadowAtom(mce,vbits));
- src_ty = typeOfIRExpr(mce->bb->tyenv, vbits);
+ src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
/* Fast-track some common cases */
if (src_ty == Ity_I32 && dst_ty == Ity_I32)
tl_assert(isShadowAtom(mce, vatom));
tl_assert(sameKindedAtoms(atom, vatom));
- ty = typeOfIRExpr(mce->bb->tyenv, vatom);
+ ty = typeOfIRExpr(mce->sb->tyenv, vatom);
/* sz is only used for constructing the error message */
sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
tl_assert(isShadowAtom(mce, vatom));
}
- ty = typeOfIRExpr(mce->bb->tyenv, vatom);
+ ty = typeOfIRExpr(mce->sb->tyenv, vatom);
tl_assert(ty != Ity_I1);
if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
/* later: no ... */
IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
{
IRAtom* at;
- IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
- IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
+ IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
+ IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
tl_assert(isShadowAtom(mce,va1));
tl_assert(isShadowAtom(mce,va2));
IRAtom* va1, IRAtom* va2, IRAtom* va3 )
{
IRAtom* at;
- IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
- IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
- IRType t3 = typeOfIRExpr(mce->bb->tyenv, va3);
+ IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
+ IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
+ IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
tl_assert(isShadowAtom(mce,va1));
tl_assert(isShadowAtom(mce,va2));
tl_assert(isShadowAtom(mce,va3));
IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
{
IRAtom* at;
- IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
- IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
- IRType t3 = typeOfIRExpr(mce->bb->tyenv, va3);
- IRType t4 = typeOfIRExpr(mce->bb->tyenv, va4);
+ IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
+ IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
+ IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
+ IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
tl_assert(isShadowAtom(mce,va1));
tl_assert(isShadowAtom(mce,va2));
tl_assert(isShadowAtom(mce,va3));
tl_assert(isOriginalAtom(mce, exprvec[i]));
if (cee->mcx_mask & (1<<i))
continue;
- if (typeOfIRExpr(mce->bb->tyenv, exprvec[i]) != Ity_I64)
+ if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
mergeTy64 = False;
}
/* We need to have a place to park the V bits we're just about to
read. */
- datavbits = newIRTemp(mce->bb->tyenv, ty);
+ datavbits = newTemp(mce, ty, VSh);
di = unsafeIRDirty_1_N( datavbits,
1/*regparms*/,
hname, VG_(fnptr_to_fnentry)( helper ),
vbitsC = expr2vbits(mce, cond);
vbits0 = expr2vbits(mce, expr0);
vbitsX = expr2vbits(mce, exprX);
- ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
+ ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
return
mkUifU(mce, ty, assignNew('V', mce, ty,
return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
case Iex_Const:
- return definedOfType(shadowTypeV(typeOfIRExpr(mce->bb->tyenv, e)));
+ return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
case Iex_Qop:
return expr2vbits_Qop(
/* vatom is vbits-value and as such can only have a shadow type. */
tl_assert(isShadowAtom(mce,vatom));
- ty = typeOfIRExpr(mce->bb->tyenv, vatom);
+ ty = typeOfIRExpr(mce->sb->tyenv, vatom);
tyH = mce->hWordTy;
if (tyH == Ity_I32) {
/* Generate a shadow store. addr is always the original address atom.
You can pass in either originals or V-bits for the data atom, but
- obviously not both. */
+ obviously not both. guard :: Ity_I1 controls whether the store
+ really happens; NULL means it unconditionally does. Note that
+ guard itself is not checked for definedness; the caller of this
+ function must do that if necessary. */
static
void do_shadow_Store ( MCEnv* mce,
IREndness end,
IRAtom* addr, UInt bias,
- IRAtom* data, IRAtom* vdata )
+ IRAtom* data, IRAtom* vdata,
+ IRAtom* guard )
{
IROp mkAdd;
IRType ty, tyAddr;
tl_assert(isOriginalAtom(mce,addr));
tl_assert(isShadowAtom(mce,vdata));
- ty = typeOfIRExpr(mce->bb->tyenv, vdata);
+ if (guard) {
+ tl_assert(isOriginalAtom(mce, guard));
+ tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
+ }
+
+ ty = typeOfIRExpr(mce->sb->tyenv, vdata);
// If we're not doing undefined value checking, pretend that this value
// is "all valid". That lets Vex's optimiser remove some of the V bit
// shadow computation ops that precede it.
if (MC_(clo_mc_level) == 1) {
switch (ty) {
- case Ity_V128: c = IRConst_V128(V_BITS16_DEFINED); break; // V128 weirdness
+ case Ity_V128: // V128 weirdness
+ c = IRConst_V128(V_BITS16_DEFINED); break;
case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
hname, VG_(fnptr_to_fnentry)( helper ),
mkIRExprVec_2( addrHi64, vdataHi64 )
);
+ if (guard) diLo64->guard = guard;
+ if (guard) diHi64->guard = guard;
setHelperAnns( mce, diLo64 );
setHelperAnns( mce, diHi64 );
stmt( 'V', mce, IRStmt_Dirty(diLo64) );
zwidenToHostWord( mce, vdata ))
);
}
+ if (guard) di->guard = guard;
setHelperAnns( mce, di );
stmt( 'V', mce, IRStmt_Dirty(di) );
}
tl_assert(d->mAddr);
complainIfUndefined(mce, d->mAddr);
- tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
+ tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
tl_assert(tyAddr == mce->hWordTy); /* not really right */
}
/* Outputs: the destination temporary, if there is one. */
if (d->tmp != IRTemp_INVALID) {
dst = findShadowTmpV(mce, d->tmp);
- tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
+ tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
}
while (toDo >= 4) {
do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
NULL, /* original data */
- mkPCastTo( mce, Ity_I32, curr ) );
+ mkPCastTo( mce, Ity_I32, curr ),
+ NULL/*guard*/ );
toDo -= 4;
}
/* chew off 16-bit chunks */
while (toDo >= 2) {
do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
NULL, /* original data */
- mkPCastTo( mce, Ity_I16, curr ) );
+ mkPCastTo( mce, Ity_I16, curr ),
+ NULL/*guard*/ );
toDo -= 2;
}
tl_assert(toDo == 0); /* also need to handle 1-byte excess */
}
+
/* We have an ABI hint telling us that [base .. base+len-1] is to
become undefined ("writable"). Generate code to call a helper to
notify the A/V bit machinery of this fact.
}
+/* ------ Dealing with IRCAS (big and complex) ------ */
+
+/* FWDS */
+static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
+ IRAtom* baseaddr, Int offset );
+static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
+static void gen_store_b ( MCEnv* mce, Int szB,
+ IRAtom* baseaddr, Int offset, IRAtom* dataB,
+ IRAtom* guard );
+
+static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
+static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
+
+
+/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
+ IRExpr.Consts, else this asserts. If they are both Consts, it
+ doesn't do anything. So that just leaves the RdTmp case.
+
+ In which case: this assigns the shadow value SHADOW to the IR
+ shadow temporary associated with ORIG. That is, ORIG, being an
+ original temporary, will have a shadow temporary associated with
+ it. However, in the case envisaged here, there will so far have
+ been no IR emitted to actually write a shadow value into that
+ temporary. What this routine does is to (emit IR to) copy the
+ value in SHADOW into said temporary, so that after this call,
+ IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
+ value in SHADOW.
+
+ Point is to allow callers to compute "by hand" a shadow value for
+ ORIG, and force it to be associated with ORIG.
+
+ How do we know that that shadow associated with ORIG has not so far
+ been assigned to? Well, we don't per se know that, but supposing
+ it had. Then this routine would create a second assignment to it,
+ and later the IR sanity checker would barf. But that never
+ happens. QED.
+*/
+static void bind_shadow_tmp_to_orig ( UChar how,
+ MCEnv* mce,
+ IRAtom* orig, IRAtom* shadow )
+{
+ tl_assert(isOriginalAtom(mce, orig));
+ tl_assert(isShadowAtom(mce, shadow));
+ switch (orig->tag) {
+ case Iex_Const:
+ tl_assert(shadow->tag == Iex_Const);
+ break;
+ case Iex_RdTmp:
+ tl_assert(shadow->tag == Iex_RdTmp);
+ if (how == 'V') {
+ assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
+ shadow);
+ } else {
+ tl_assert(how == 'B');
+ assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
+ shadow);
+ }
+ break;
+ default:
+ tl_assert(0);
+ }
+}
+
+
+static
+void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
+{
+ /* Scheme is (both single- and double- cases):
+
+ 1. fetch data#,dataB (the proposed new value)
+
+ 2. fetch expd#,expdB (what we expect to see at the address)
+
+ 3. check definedness of address
+
+ 4. load old#,oldB from shadow memory; this also checks
+ addressibility of the address
+
+ 5. the CAS itself
+
+ 6. complain if "expected == old" is undefined
+
+ 7. if "expected == old"
+ store data#,dataB to shadow memory
+
+ Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
+ 'data' but 7 stores 'data#'. Hence it is possible for the
+ shadow data to be incorrectly checked and/or updated:
+
+ * 6 could falsely complain if 4 read old# as undefined, but some
+ other thread wrote a defined value to the location after 4 but
+ before 5.
+
+ * 6 could falsely not-complain if 4 read old# as defined, but
+ some other thread wrote an undefined value to the location
+ after 4 but before 5.
+
+ * 7 is at least gated correctly, since the 'expected == old'
+ condition is derived from outputs of 5. However, the shadow
+ write could happen too late: imagine after 5 we are
+ descheduled, a different thread runs, writes a different
+ (shadow) value at the address, and then we resume, hence
+ overwriting the shadow value written by the other thread.
+
+ Because the original memory access is atomic, there's no way to
+ make both the original and shadow accesses into a single atomic
+ thing, hence this is unavoidable.
+
+ At least as Valgrind stands, I don't think it's a problem, since
+ we're single threaded *and* we guarantee that there are no
+ context switches during the execution of any specific superblock
+ -- context switches can only happen at superblock boundaries.
+
+ If Valgrind ever becomes MT in the future, then it might be more
+ of a problem. A possible kludge would be to artificially
+ associate with the location, a lock, which we must acquire and
+ release around the transaction as a whole. Hmm, that probably
+ would't work properly since it only guards us against other
+ threads doing CASs on the same location, not against other
+ threads doing normal reads and writes.
+ */
+ if (cas->oldHi == IRTemp_INVALID) {
+ do_shadow_CAS_single( mce, cas );
+ } else {
+ do_shadow_CAS_double( mce, cas );
+ }
+}
+
+
+static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
+{
+ IRAtom *vdataLo = NULL, *bdataLo = NULL;
+ IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
+ IRAtom *voldLo = NULL, *boldLo = NULL;
+ IRAtom *expd_eq_old_V = NULL, *expd_eq_old_B = NULL;
+ IRAtom *expd_eq_old = NULL;
+ IROp opCmpEQ;
+ Int elemSzB;
+ IRType elemTy;
+ Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
+
+ /* single CAS */
+ tl_assert(cas->oldHi == IRTemp_INVALID);
+ tl_assert(cas->expdHi == NULL);
+ tl_assert(cas->dataHi == NULL);
+
+ elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
+ switch (elemTy) {
+ case Ity_I8: elemSzB = 1; opCmpEQ = Iop_CmpEQ8; break;
+ case Ity_I16: elemSzB = 2; opCmpEQ = Iop_CmpEQ16; break;
+ case Ity_I32: elemSzB = 4; opCmpEQ = Iop_CmpEQ32; break;
+ case Ity_I64: elemSzB = 8; opCmpEQ = Iop_CmpEQ64; break;
+ default: tl_assert(0); /* IR defn disallows any other types */
+ }
+
+ /* 1. fetch data# (the proposed new value) */
+ tl_assert(isOriginalAtom(mce, cas->dataLo));
+ vdataLo
+ = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
+ tl_assert(isShadowAtom(mce, vdataLo));
+ if (otrak) {
+ bdataLo
+ = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
+ tl_assert(isShadowAtom(mce, bdataLo));
+ }
+
+ /* 2. fetch expected# (what we expect to see at the address) */
+ tl_assert(isOriginalAtom(mce, cas->expdLo));
+ vexpdLo
+ = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
+ tl_assert(isShadowAtom(mce, vexpdLo));
+ if (otrak) {
+ bexpdLo
+ = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
+ tl_assert(isShadowAtom(mce, bexpdLo));
+ }
+
+ /* 3. check definedness of address */
+ /* 4. fetch old# from shadow memory; this also checks
+ addressibility of the address */
+ voldLo
+ = assignNew(
+ 'V', mce, elemTy,
+ expr2vbits_Load(
+ mce,
+ cas->end, elemTy, cas->addr, 0/*Addr bias*/
+ ));
+ if (otrak) {
+ boldLo
+ = assignNew('B', mce, Ity_I32,
+ gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
+ }
+
+ /* 5. the CAS itself */
+ stmt( 'C', mce, IRStmt_CAS(cas) );
+
+ /* 6. complain if "expected == old" is undefined */
+ /* Doing this directly interacts in a complex way with origin
+ tracking. Much easier to make up an expression tree and hand
+ that off to expr2vbits_Binop. We will need the expression
+ tree in any case in order to decide whether or not to do a
+ shadow store. */
+ /* Note that 'C' is kinda faking it; it is indeed a non-shadow
+ tree, but it's not copied from the input block. */
+ expd_eq_old
+ = assignNew('C', mce, Ity_I1,
+ binop(opCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
+
+ /* Compute into expd_eq_old_V the definedness for expd_eq_old.
+ First we need to ensure that cas->oldLo's V-shadow is bound
+ voldLo, since expr2vbits_Binop will generate a use of it. */
+ bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
+ expd_eq_old_V
+ = expr2vbits_Binop( mce, opCmpEQ, cas->expdLo, mkexpr(cas->oldLo) );
+ if (otrak) {
+ bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
+ expd_eq_old_B
+ = gen_maxU32( mce, bexpdLo, boldLo );
+ }
+
+ /* Generate a complaint if expd_eq_old is undefined. As above,
+ first force expd_eq_old's definedness to be bound to its
+ V-shadow tmp. */
+ bind_shadow_tmp_to_orig('V', mce, expd_eq_old, expd_eq_old_V);
+ if (otrak)
+ bind_shadow_tmp_to_orig('B', mce, expd_eq_old, expd_eq_old_B);
+ complainIfUndefined(mce, expd_eq_old);
+
+ /* 7. if "expected == old"
+ store data# to shadow memory */
+ do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
+ NULL/*data*/, vdataLo/*vdata*/,
+ expd_eq_old/*guard for store*/ );
+ if (otrak) {
+ gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
+ bdataLo/*bdata*/,
+ expd_eq_old/*guard for store*/ );
+ }
+}
+
+
+static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
+{
+ IRAtom *vdataHi = NULL, *bdataHi = NULL;
+ IRAtom *vdataLo = NULL, *bdataLo = NULL;
+ IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
+ IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
+ IRAtom *voldHi = NULL, *boldHi = NULL;
+ IRAtom *voldLo = NULL, *boldLo = NULL;
+ IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
+ IRAtom *xHi_V = NULL, *xLo_V = NULL, *xHL_V = NULL;
+ IRAtom *xHi_B = NULL, *xLo_B = NULL, *xHL_B = NULL;
+ IRAtom *expd_eq_old_V = NULL, *expd_eq_old_B = NULL;
+ IRAtom *expd_eq_old = NULL, *zero = NULL;
+ IROp opCmpEQ, opOr, opXor;
+ Int elemSzB, memOffsLo, memOffsHi;
+ IRType elemTy;
+ Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
+
+ /* double CAS */
+ tl_assert(cas->oldHi != IRTemp_INVALID);
+ tl_assert(cas->expdHi != NULL);
+ tl_assert(cas->dataHi != NULL);
+
+ elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
+ switch (elemTy) {
+ case Ity_I8:
+ opCmpEQ = Iop_CmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
+ elemSzB = 1; zero = mkU8(0);
+ break;
+ case Ity_I16:
+ opCmpEQ = Iop_CmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
+ elemSzB = 2; zero = mkU16(0);
+ break;
+ case Ity_I32:
+ opCmpEQ = Iop_CmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
+ elemSzB = 4; zero = mkU32(0);
+ break;
+ case Ity_I64:
+ opCmpEQ = Iop_CmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
+ elemSzB = 8; zero = mkU64(0);
+ break;
+ default:
+ tl_assert(0); /* IR defn disallows any other types */
+ }
+
+ /* 1. fetch data# (the proposed new value) */
+ tl_assert(isOriginalAtom(mce, cas->dataHi));
+ tl_assert(isOriginalAtom(mce, cas->dataLo));
+ vdataHi
+ = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
+ vdataLo
+ = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
+ tl_assert(isShadowAtom(mce, vdataHi));
+ tl_assert(isShadowAtom(mce, vdataLo));
+ if (otrak) {
+ bdataHi
+ = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
+ bdataLo
+ = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
+ tl_assert(isShadowAtom(mce, bdataHi));
+ tl_assert(isShadowAtom(mce, bdataLo));
+ }
+
+ /* 2. fetch expected# (what we expect to see at the address) */
+ tl_assert(isOriginalAtom(mce, cas->expdHi));
+ tl_assert(isOriginalAtom(mce, cas->expdLo));
+ vexpdHi
+ = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
+ vexpdLo
+ = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
+ tl_assert(isShadowAtom(mce, vexpdHi));
+ tl_assert(isShadowAtom(mce, vexpdLo));
+ if (otrak) {
+ bexpdHi
+ = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
+ bexpdLo
+ = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
+ tl_assert(isShadowAtom(mce, bexpdHi));
+ tl_assert(isShadowAtom(mce, bexpdLo));
+ }
+
+ /* 3. check definedness of address */
+ /* 4. fetch old# from shadow memory; this also checks
+ addressibility of the address */
+ if (cas->end == Iend_LE) {
+ memOffsLo = 0;
+ memOffsHi = elemSzB;
+ } else {
+ tl_assert(cas->end == Iend_BE);
+ memOffsLo = elemSzB;
+ memOffsHi = 0;
+ }
+ voldHi
+ = assignNew(
+ 'V', mce, elemTy,
+ expr2vbits_Load(
+ mce,
+ cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
+ ));
+ voldLo
+ = assignNew(
+ 'V', mce, elemTy,
+ expr2vbits_Load(
+ mce,
+ cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
+ ));
+ if (otrak) {
+ boldHi
+ = assignNew('B', mce, Ity_I32,
+ gen_load_b(mce, elemSzB, cas->addr,
+ memOffsHi/*addr bias*/));
+ boldLo
+ = assignNew('B', mce, Ity_I32,
+ gen_load_b(mce, elemSzB, cas->addr,
+ memOffsLo/*addr bias*/));
+ }
+
+ /* 5. the CAS itself */
+ stmt( 'C', mce, IRStmt_CAS(cas) );
+
+ /* 6. complain if "expected == old" is undefined */
+ /* Doing this directly interacts in a complex way with origin
+ tracking. Much easier to make up an expression tree and hand
+ that off to expr2vbits_Binop. We will need the expression
+ tree in any case in order to decide whether or not to do a
+ shadow store. */
+ /* Note that 'C' is kinda faking it; it is indeed a non-shadow
+ tree, but it's not copied from the input block. */
+ /*
+ xHi = oldHi ^ expdHi;
+ xLo = oldLo ^ expdLo;
+ xHL = xHi | xLo;
+ expd_eq_old = xHL == 0;
+ */
+
+ /* --- xHi = oldHi ^ expdHi --- */
+ xHi = assignNew('C', mce, elemTy,
+ binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
+ bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
+ xHi_V
+ = expr2vbits_Binop( mce, opXor, cas->expdHi, mkexpr(cas->oldHi));
+ if (otrak) {
+ bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
+ xHi_B = gen_maxU32( mce, bexpdHi, boldHi );
+ }
+
+ /* --- xLo = oldLo ^ expdLo --- */
+ xLo = assignNew('C', mce, elemTy,
+ binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
+ bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
+ xLo_V
+ = expr2vbits_Binop( mce, opXor, cas->expdLo, mkexpr(cas->oldLo));
+ if (otrak) {
+ bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
+ xLo_B = gen_maxU32( mce, bexpdLo, boldLo );
+ }
+
+ /* --- xHL = xHi | xLo --- */
+ xHL = assignNew('C', mce, elemTy,
+ binop(opOr, xHi, xLo));
+ bind_shadow_tmp_to_orig('V', mce, xHi, xHi_V);
+ bind_shadow_tmp_to_orig('V', mce, xLo, xLo_V);
+ xHL_V
+ = expr2vbits_Binop( mce, opOr, xHi, xLo );
+ if (otrak) {
+ bind_shadow_tmp_to_orig('B', mce, xHi, xHi_B);
+ bind_shadow_tmp_to_orig('B', mce, xLo, xLo_B);
+ xHL_B = gen_maxU32( mce, xHi_B, xLo_B );
+ }
+
+ /* --- expd_eq_old = xHL == 0 --- */
+ expd_eq_old
+ = assignNew('C', mce, Ity_I1,
+ binop(opCmpEQ, xHL, zero));
+ bind_shadow_tmp_to_orig('V', mce, xHL, xHL_V);
+ expd_eq_old_V
+ = expr2vbits_Binop( mce, opCmpEQ, xHL, zero);
+ if (otrak) {
+ expd_eq_old_B = xHL_B; /* since the zero literal isn't going to
+ contribute any interesting origin */
+ }
+
+ /* The backend's register allocator is probably on fire by now :-) */
+ /* Generate a complaint if expd_eq_old is undefined. As above,
+ first force expd_eq_old's definedness to be bound to its
+ V-shadow tmp. */
+ bind_shadow_tmp_to_orig('V', mce, expd_eq_old, expd_eq_old_V);
+ if (otrak)
+ bind_shadow_tmp_to_orig('B', mce, expd_eq_old, expd_eq_old_B);
+ complainIfUndefined(mce, expd_eq_old);
+
+ /* 7. if "expected == old"
+ store data# to shadow memory */
+ do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
+ NULL/*data*/, vdataHi/*vdata*/,
+ expd_eq_old/*guard for store*/ );
+ do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
+ NULL/*data*/, vdataLo/*vdata*/,
+ expd_eq_old/*guard for store*/ );
+ if (otrak) {
+ gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
+ bdataHi/*bdata*/,
+ expd_eq_old/*guard for store*/ );
+ gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
+ bdataLo/*bdata*/,
+ expd_eq_old/*guard for store*/ );
+ }
+}
+
+
/*------------------------------------------------------------*/
/*--- Memcheck main ---*/
/*------------------------------------------------------------*/
Int i;
IRExpr* e;
IRDirty* d;
+ IRCAS* cas;
switch (st->tag) {
case Ist_WrTmp:
e = st->Ist.WrTmp.data;
case Ist_IMark:
case Ist_MBE:
return False;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ return isBogusAtom(cas->addr)
+ || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
+ || isBogusAtom(cas->expdLo)
+ || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
+ || isBogusAtom(cas->dataLo);
default:
unhandled:
ppIRStmt(st);
IRSB* MC_(instrument) ( VgCallbackClosure* closure,
- IRSB* bb_in,
+ IRSB* sb_in,
VexGuestLayout* layout,
VexGuestExtents* vge,
IRType gWordTy, IRType hWordTy )
Int i, j, first_stmt;
IRStmt* st;
MCEnv mce;
- IRSB* bb;
+ IRSB* sb_out;
if (gWordTy != hWordTy) {
/* We don't currently support this case. */
tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
/* Set up SB */
- bb = deepCopyIRSBExceptStmts(bb_in);
-
- /* Set up the running environment. Only .bb is modified as we go
- along. */
- mce.bb = bb;
+ sb_out = deepCopyIRSBExceptStmts(sb_in);
+
+ /* Set up the running environment. Both .sb and .tmpMap are
+ modified as we go along. Note that tmps are added to both
+ .sb->tyenv and .tmpMap together, so the valid index-set for
+ those two arrays should always be identical. */
+ VG_(memset)(&mce, 0, sizeof(mce));
+ mce.sb = sb_out;
mce.trace = verboze;
mce.layout = layout;
- mce.n_originalTmps = bb->tyenv->types_used;
mce.hWordTy = hWordTy;
mce.bogusLiterals = False;
- mce.tmpMapV = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
- mce.tmpMapB = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
- for (i = 0; i < mce.n_originalTmps; i++) {
- mce.tmpMapV[i] = IRTemp_INVALID;
- mce.tmpMapB[i] = IRTemp_INVALID;
+
+ mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
+ sizeof(TempMapEnt));
+ for (i = 0; i < sb_in->tyenv->types_used; i++) {
+ TempMapEnt ent;
+ ent.kind = Orig;
+ ent.shadowV = IRTemp_INVALID;
+ ent.shadowB = IRTemp_INVALID;
+ VG_(addToXA)( mce.tmpMap, &ent );
}
+ tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
/* Make a preliminary inspection of the statements, to see if there
are any dodgy-looking literals. If there are, we generate
bogus = False;
- for (i = 0; i < bb_in->stmts_used; i++) {
+ for (i = 0; i < sb_in->stmts_used; i++) {
- st = bb_in->stmts[i];
+ st = sb_in->stmts[i];
tl_assert(st);
tl_assert(isFlatIRStmt(st));
/* Copy verbatim any IR preamble preceding the first IMark */
- tl_assert(mce.bb == bb);
+ tl_assert(mce.sb == sb_out);
+ tl_assert(mce.sb != sb_in);
i = 0;
- while (i < bb_in->stmts_used && bb_in->stmts[i]->tag != Ist_IMark) {
+ while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
- st = bb_in->stmts[i];
+ st = sb_in->stmts[i];
tl_assert(st);
tl_assert(isFlatIRStmt(st));
- stmt( 'C', &mce, bb_in->stmts[i] );
+ stmt( 'C', &mce, sb_in->stmts[i] );
i++;
}
no-origin, as appropriate for a defined value.
*/
for (j = 0; j < i; j++) {
- if (bb_in->stmts[j]->tag == Ist_WrTmp) {
+ if (sb_in->stmts[j]->tag == Ist_WrTmp) {
/* findShadowTmpV checks its arg is an original tmp;
no need to assert that here. */
- IRTemp tmp_o = bb_in->stmts[j]->Ist.WrTmp.tmp;
+ IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
- IRType ty_v = typeOfIRTemp(bb->tyenv, tmp_v);
+ IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
if (MC_(clo_mc_level) == 3) {
IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
- tl_assert(typeOfIRTemp(bb->tyenv, tmp_b) == Ity_I32);
+ tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
}
if (0) {
/* Iterate over the remaining stmts to generate instrumentation. */
- tl_assert(bb_in->stmts_used > 0);
+ tl_assert(sb_in->stmts_used > 0);
tl_assert(i >= 0);
- tl_assert(i < bb_in->stmts_used);
- tl_assert(bb_in->stmts[i]->tag == Ist_IMark);
+ tl_assert(i < sb_in->stmts_used);
+ tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
- for (/* use current i*/; i < bb_in->stmts_used; i++) {
+ for (/* use current i*/; i < sb_in->stmts_used; i++) {
- st = bb_in->stmts[i];
- first_stmt = bb->stmts_used;
+ st = sb_in->stmts[i];
+ first_stmt = sb_out->stmts_used;
if (verboze) {
VG_(printf)("\n");
VG_(printf)("\n");
}
- if (MC_(clo_mc_level) == 3)
- schemeS( &mce, st );
+ if (MC_(clo_mc_level) == 3) {
+ /* See comments on case Ist_CAS below. */
+ if (st->tag != Ist_CAS)
+ schemeS( &mce, st );
+ }
/* Generate instrumentation code for each stmt ... */
do_shadow_Store( &mce, st->Ist.Store.end,
st->Ist.Store.addr, 0/* addr bias */,
st->Ist.Store.data,
- NULL /* shadow data */ );
+ NULL /* shadow data */,
+ NULL/*guard*/ );
+ /* If this is a store conditional, it writes to .resSC a
+ value indicating whether or not the store succeeded.
+ Just claim this value is always defined. In the
+ PowerPC interpretation of store-conditional,
+ definedness of the success indication depends on
+ whether the address of the store matches the
+ reservation address. But we can't tell that here (and
+ anyway, we're not being PowerPC-specific). At least we
+ are guarantted that the definedness of the store
+ address, and its addressibility, will be checked as per
+ normal. So it seems pretty safe to just say that the
+ success indication is always defined.
+
+ In schemeS, for origin tracking, we must
+ correspondingly set a no-origin value for the origin
+ shadow of resSC.
+ */
+ if (st->Ist.Store.resSC != IRTemp_INVALID) {
+ assign( 'V', &mce,
+ findShadowTmpV(&mce, st->Ist.Store.resSC),
+ definedOfType(
+ shadowTypeV(
+ typeOfIRTemp(mce.sb->tyenv,
+ st->Ist.Store.resSC)
+ )));
+ }
break;
case Ist_Exit:
st->Ist.AbiHint.nia );
break;
+ case Ist_CAS:
+ do_shadow_CAS( &mce, st->Ist.CAS.details );
+ /* Note, do_shadow_CAS copies the CAS itself to the output
+ block, because it needs to add instrumentation both
+ before and after it. Hence skip the copy below. Also
+ skip the origin-tracking stuff (call to schemeS) above,
+ since that's all tangled up with it too; do_shadow_CAS
+ does it all. */
+ break;
+
default:
VG_(printf)("\n");
ppIRStmt(st);
} /* switch (st->tag) */
if (0 && verboze) {
- for (j = first_stmt; j < bb->stmts_used; j++) {
+ for (j = first_stmt; j < sb_out->stmts_used; j++) {
VG_(printf)(" ");
- ppIRStmt(bb->stmts[j]);
+ ppIRStmt(sb_out->stmts[j]);
VG_(printf)("\n");
}
VG_(printf)("\n");
}
- /* ... and finally copy the stmt itself to the output. */
- stmt('C', &mce, st);
-
+ /* ... and finally copy the stmt itself to the output. Except,
+ skip the copy of IRCASs; see comments on case Ist_CAS
+ above. */
+ if (st->tag != Ist_CAS)
+ stmt('C', &mce, st);
}
/* Now we need to complain if the jump target is undefined. */
- first_stmt = bb->stmts_used;
+ first_stmt = sb_out->stmts_used;
if (verboze) {
- VG_(printf)("bb->next = ");
- ppIRExpr(bb->next);
+ VG_(printf)("sb_in->next = ");
+ ppIRExpr(sb_in->next);
VG_(printf)("\n\n");
}
- complainIfUndefined( &mce, bb->next );
+ complainIfUndefined( &mce, sb_in->next );
if (0 && verboze) {
- for (j = first_stmt; j < bb->stmts_used; j++) {
+ for (j = first_stmt; j < sb_out->stmts_used; j++) {
VG_(printf)(" ");
- ppIRStmt(bb->stmts[j]);
+ ppIRStmt(sb_out->stmts[j]);
VG_(printf)("\n");
}
VG_(printf)("\n");
}
- return bb;
+ /* If this fails, there's been some serious snafu with tmp management,
+ that should be investigated. */
+ tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
+ VG_(deleteXA)( mce.tmpMap );
+
+ tl_assert(mce.sb == sb_out);
+ return sb_out;
}
/*------------------------------------------------------------*/
/*--- Origin tracking stuff ---*/
/*------------------------------------------------------------*/
+/* Almost identical to findShadowTmpV. */
static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
{
- tl_assert(orig < mce->n_originalTmps);
- if (mce->tmpMapB[orig] == IRTemp_INVALID) {
- mce->tmpMapB[orig]
- = newIRTemp(mce->bb->tyenv, Ity_I32);
+ TempMapEnt* ent;
+ /* VG_(indexXA) range-checks 'orig', hence no need to check
+ here. */
+ ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
+ tl_assert(ent->kind == Orig);
+ if (ent->shadowB == IRTemp_INVALID) {
+ IRTemp tmpB
+ = newTemp( mce, Ity_I32, BSh );
+ /* newTemp may cause mce->tmpMap to resize, hence previous results
+ from VG_(indexXA) are invalid. */
+ ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
+ tl_assert(ent->kind == Orig);
+ tl_assert(ent->shadowB == IRTemp_INVALID);
+ ent->shadowB = tmpB;
}
- return mce->tmpMapB[orig];
+ return ent->shadowB;
}
static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
HChar* hName;
IRTemp bTmp;
IRDirty* di;
- IRType aTy = typeOfIRExpr( mce->bb->tyenv, baseaddr );
+ IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
IRAtom* ea = baseaddr;
if (offset != 0) {
: mkU64( (Long)(Int)offset );
ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
}
- bTmp = newIRTemp(mce->bb->tyenv, mce->hWordTy);
+ bTmp = newTemp(mce, mce->hWordTy, BSh);
switch (szB) {
case 1: hFun = (void*)&MC_(helperc_b_load1);
stmt( 'B', mce, IRStmt_Dirty(di) );
if (mce->hWordTy == Ity_I64) {
/* 64-bit host */
- IRTemp bTmp32 = newIRTemp(mce->bb->tyenv, Ity_I32);
+ IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
return mkexpr(bTmp32);
} else {
return mkexpr(bTmp);
}
}
+
+/* Generate a shadow store. guard :: Ity_I1 controls whether the
+ store really happens; NULL means it unconditionally does. */
static void gen_store_b ( MCEnv* mce, Int szB,
- IRAtom* baseaddr, Int offset, IRAtom* dataB )
+ IRAtom* baseaddr, Int offset, IRAtom* dataB,
+ IRAtom* guard )
{
void* hFun;
HChar* hName;
IRDirty* di;
- IRType aTy = typeOfIRExpr( mce->bb->tyenv, baseaddr );
+ IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
IRAtom* ea = baseaddr;
+ if (guard) {
+ tl_assert(isOriginalAtom(mce, guard));
+ tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
+ }
if (offset != 0) {
IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
: mkU64( (Long)(Int)offset );
);
/* no need to mess with any annotations. This call accesses
neither guest state nor guest memory. */
+ if (guard) di->guard = guard;
stmt( 'B', mce, IRStmt_Dirty(di) );
}
static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
- IRType eTy = typeOfIRExpr(mce->bb->tyenv, e);
+ IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
if (eTy == Ity_I64)
return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
if (eTy == Ity_I32)
}
static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
- IRType eTy = typeOfIRExpr(mce->bb->tyenv, e);
+ IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
tl_assert(eTy == Ity_I32);
if (dstTy == Ity_I64)
return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
toDo = d->mSize;
/* chew off 32-bit chunks */
while (toDo >= 4) {
- gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr );
+ gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
+ NULL/*guard*/ );
toDo -= 4;
}
/* handle possible 16-bit excess */
while (toDo >= 2) {
- gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr );
+ gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
+ NULL/*guard*/ );
toDo -= 2;
}
tl_assert(toDo == 0); /* also need to handle 1-byte excess */
available (somewhere) */
tl_assert(isIRAtom(st->Ist.Store.addr));
dszB = sizeofIRType(
- typeOfIRExpr(mce->bb->tyenv, st->Ist.Store.data ));
+ typeOfIRExpr(mce->sb->tyenv, st->Ist.Store.data ));
dataB = schemeE( mce, st->Ist.Store.data );
- gen_store_b( mce, dszB, st->Ist.Store.addr, 0/*offset*/, dataB );
+ gen_store_b( mce, dszB, st->Ist.Store.addr, 0/*offset*/, dataB,
+ NULL/*guard*/ );
+ /* For the rationale behind this, see comments at the place
+ where the V-shadow for .resSC is constructed, in the main
+ loop in MC_(instrument). In short, wee regard .resSc as
+ always-defined. */
+ if (st->Ist.Store.resSC != IRTemp_INVALID) {
+ assign( 'B', mce, findShadowTmpB(mce, st->Ist.Store.resSC),
+ mkU32(0) );
+ }
break;
}
case Ist_Put: {
Int b_offset
= MC_(get_otrack_shadow_offset)(
st->Ist.Put.offset,
- sizeofIRType(typeOfIRExpr(mce->bb->tyenv, st->Ist.Put.data))
+ sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
);
if (b_offset >= 0) {
/* FIXME: this isn't an atom! */