From: Julian Seward Date: Thu, 8 Aug 2013 10:41:46 +0000 (+0000) Subject: Fix # 294285: --partial-loads-ok does not work for 16-byte SSE loads X-Git-Tag: svn/VALGRIND_3_9_0~203 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f865a853ee324af9b8656635c35800828af8d58f;p=thirdparty%2Fvalgrind.git Fix # 294285: --partial-loads-ok does not work for 16-byte SSE loads (core fixes for the memcheck handling of 128 bit loads) (Patrick J. LoPresti, lopresti@gmail.com) git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13488 --- diff --git a/memcheck/mc_include.h b/memcheck/mc_include.h index a658c7cfe8..27815368dd 100644 --- a/memcheck/mc_include.h +++ b/memcheck/mc_include.h @@ -578,15 +578,17 @@ VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr, UWord ); VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr, UWord ); VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr, UWord ); VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr, UWord ); -VG_REGPARM(2) void MC_(helperc_STOREV8) ( Addr, UWord ); - -VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr ); -VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr ); -VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr ); -VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr ); -VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr ); -VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr ); -VG_REGPARM(1) UWord MC_(helperc_LOADV8) ( Addr ); +VG_REGPARM(2) void MC_(helperc_STOREV8) ( Addr, UWord ); + +VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128*, Addr ); +VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128*, Addr ); +VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr ); +VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr ); +VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr ); +VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr ); +VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr ); +VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr ); +VG_REGPARM(1) UWord MC_(helperc_LOADV8) ( Addr ); void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia ); diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c index 1f6328b13a..4608e98412 100644 --- a/memcheck/mc_main.c +++ b/memcheck/mc_main.c @@ -1128,6 +1128,121 @@ static Bool parse_ignore_ranges ( const HChar* str0 ) /* --------------- Load/store slow cases. --------------- */ +static +__attribute__((noinline)) +void mc_LOADV128_slow ( /*OUT*/V128* res, Addr a, Bool bigendian ) +{ + SizeT nBits = 128; + V128 vbits128; /* result */ + V128 pessim128; /* only used when p-l-ok=yes */ + SSizeT bytes_per_long = 64 / 8; + SSizeT szL = nBits / 64; /* Size in longs */ + SSizeT szB = bytes_per_long * szL; + SSizeT i, j; /* Must be signed. */ + SizeT n_addrs_bad = 0; + Addr ai; + UChar vbits8; + Bool ok; + + vbits128.w64[0] = V_BITS64_UNDEFINED; + vbits128.w64[1] = V_BITS64_UNDEFINED; + pessim128.w64[0] = V_BITS64_DEFINED; + pessim128.w64[1] = V_BITS64_DEFINED; + + tl_assert(nBits == 128); + + /* Make up a 128-bit result V word, which contains the loaded data + for valid addresses and Defined for invalid addresses. Iterate + over the bytes in the word, from the most significant down to + the least. The vbits to return are calculated into vbits128. + Also compute the pessimising value to be used when + --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant + info can be gleaned from pessim128) but is used as a + cross-check. */ + for (j = szL-1 ; j >= 0 ; j--) { + ULong vbits64 = V_BITS64_UNDEFINED; + ULong pessim64 = V_BITS64_DEFINED; + UWord long_index = byte_offset_w(szL, bigendian, j); + for (i = bytes_per_long-1; i >= 0; i--) { + PROF_EVENT(31, "mc_LOADV128_slow(loop)"); + ai = a + long_index*bytes_per_long + byte_offset_w(bytes_per_long, + bigendian, i); + ok = get_vbits8(ai, &vbits8); + vbits64 <<= 8; + vbits64 |= vbits8; + if (!ok) n_addrs_bad++; + pessim64 <<= 8; + pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED); + } + vbits128.w64[long_index] = vbits64; + pessim128.w64[long_index] = pessim64; + } + + /* In the common case, all the addresses involved are valid, so we + just return the computed V bits and have done. */ + if (LIKELY(n_addrs_bad == 0)) { + *res = vbits128; + return; + } + + /* If there's no possibility of getting a partial-loads-ok + exemption, report the error and quit. */ + if (!MC_(clo_partial_loads_ok)) { + MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); + *res = vbits128; + return; + } + + /* The partial-loads-ok excemption might apply. Find out if it + does. If so, don't report an addressing error, but do return + Undefined for the bytes that are out of range, so as to avoid + false negatives. If it doesn't apply, just report an addressing + error in the usual way. */ + + /* Some code steps along byte strings in aligned word-sized chunks + even when there is only a partially defined word at the end (eg, + optimised strlen). This is allowed by the memory model of + modern machines, since an aligned load cannot span two pages and + thus cannot "partially fault". + + Therefore, a load from a partially-addressible place is allowed + if all of the following hold: + - the command-line flag is set [by default, it isn't] + - it's an aligned load + - at least one of the addresses in the word *is* valid + + Since this suppresses the addressing error, we avoid false + negatives by marking bytes undefined when they come from an + invalid address. + */ + + /* "at least one of the addresses is invalid" */ + tl_assert(pessim128.w64[0] != V_BITS64_DEFINED + || pessim128.w64[1] != V_BITS64_DEFINED); + + if (0 == (a & (szB - 1)) && n_addrs_bad < szB) { + /* Exemption applies. Use the previously computed pessimising + value for vbits128 and return the combined result, but don't + flag an addressing error. The pessimising value is Defined + for valid addresses and Undefined for invalid addresses. */ + /* for assumption that doing bitwise or implements UifU */ + tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0); + /* (really need "UifU" here...) + vbits128 UifU= pessim128 (is pessimised by it, iow) */ + for (j = szL-1 ; j >= 0 ; j--) + vbits128.w64[j] |= pessim128.w64[j]; + *res = vbits128; + return; + } + + /* Exemption doesn't apply. Flag an addressing error in the normal + way. */ + MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); + + *res = vbits128; +} + + static __attribute__((noinline)) ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian ) @@ -4060,35 +4175,93 @@ static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s, On a 64-bit machine, it's more complex, since we're testing simultaneously for misalignment and for the address being at or - above 32G: - - N_PRIMARY_BITS == 19, so - N_PRIMARY_MAP == 0x80000, so - N_PRIMARY_MAP-1 == 0x7FFFF, so - (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so - - MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 ) - = ~ ( 0xFFFF | 0x7FFFF'0000 ) - = ~ 0x7FFFF'FFFF - = 0xFFFF'FFF8'0000'0000 - - MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 ) - = ~ ( 0xFFFE | 0x7FFFF'0000 ) - = ~ 0x7FFFF'FFFE - = 0xFFFF'FFF8'0000'0001 - - MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 ) - = ~ ( 0xFFFC | 0x7FFFF'0000 ) - = ~ 0x7FFFF'FFFC - = 0xFFFF'FFF8'0000'0003 - - MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 ) - = ~ ( 0xFFF8 | 0x7FFFF'0000 ) - = ~ 0x7FFFF'FFF8 - = 0xFFFF'FFF8'0000'0007 + above 64G: + + N_PRIMARY_BITS == 20, so + N_PRIMARY_MAP == 0x100000, so + N_PRIMARY_MAP-1 == 0xFFFFF, so + (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so + + MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 ) + = ~ ( 0xFFFF | 0xF'FFFF'0000 ) + = ~ 0xF'FFFF'FFFF + = 0xFFFF'FFF0'0000'0000 + + MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 ) + = ~ ( 0xFFFE | 0xF'FFFF'0000 ) + = ~ 0xF'FFFF'FFFE + = 0xFFFF'FFF0'0000'0001 + + MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 ) + = ~ ( 0xFFFC | 0xF'FFFF'0000 ) + = ~ 0xF'FFFF'FFFC + = 0xFFFF'FFF0'0000'0003 + + MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 ) + = ~ ( 0xFFF8 | 0xF'FFFF'0000 ) + = ~ 0xF'FFFF'FFF8 + = 0xFFFF'FFF0'0000'0007 */ +/* ------------------------ Size = 16 ------------------------ */ + +static INLINE +void mc_LOADV128 ( /*OUT*/V128* res, Addr a, Bool isBigEndian ) +{ + PROF_EVENT(200, "mc_LOADV128"); + +#ifndef PERF_FAST_LOADV + mc_LOADV128_slow( res, a, isBigEndian ); + return; +#else + { + UWord sm_off16, vabits16; + SecMap* sm; + int j; + + if (UNLIKELY( UNALIGNED_OR_HIGH(a,128) )) { + PROF_EVENT(201, "mc_LOADV128-slow1"); + mc_LOADV128_slow( res, a, isBigEndian ); + return; + } + + // Handle common cases quickly: a (and a+8) is suitably aligned, + // is mapped, and addressible. + for (j=0 ; j<2 ; ++j) { + sm = get_secmap_for_reading_low(a + 8*j); + sm_off16 = SM_OFF_16(a + 8*j); + vabits16 = ((UShort*)(sm->vabits8))[sm_off16]; + + // Convert V bits from compact memory form to expanded + // register form. + if (LIKELY(vabits16 == VA_BITS16_DEFINED)) { + res->w64[j] = V_BITS64_DEFINED; + } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) { + res->w64[j] = V_BITS64_UNDEFINED; + } else { + /* Slow case: some block of 8 bytes are not all-defined or + all-undefined. */ + PROF_EVENT(202, "mc_LOADV128-slow2"); + mc_LOADV128_slow( res, a, isBigEndian ); + return; + } + } + return; + } +#endif +} + +VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a ) +{ + mc_LOADV128(res, a, True); +} +VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a ) +{ + mc_LOADV128(res, a, False); +} + + /* ------------------------ Size = 8 ------------------------ */ static INLINE diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 93fdf3458e..079ca2a4dd 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -4186,12 +4186,6 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, IREndness end, IRType ty, IRAtom* addr, UInt bias, IRAtom* guard ) { - void* helper; - const HChar* hname; - IRDirty* di; - IRTemp datavbits; - IRAtom* addrAct; - tl_assert(isOriginalAtom(mce,addr)); tl_assert(end == Iend_LE || end == Iend_BE); @@ -4203,43 +4197,59 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, data V bits from shadow memory. */ ty = shadowTypeV(ty); + void* helper = NULL; + const HChar* hname = NULL; + Bool ret_via_outparam = False; + if (end == Iend_LE) { switch (ty) { - case Ity_I64: helper = &MC_(helperc_LOADV64le); - hname = "MC_(helperc_LOADV64le)"; - break; - case Ity_I32: helper = &MC_(helperc_LOADV32le); - hname = "MC_(helperc_LOADV32le)"; - break; - case Ity_I16: helper = &MC_(helperc_LOADV16le); - hname = "MC_(helperc_LOADV16le)"; - break; - case Ity_I8: helper = &MC_(helperc_LOADV8); - hname = "MC_(helperc_LOADV8)"; - break; - default: ppIRType(ty); - VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)"); + case Ity_V128: helper = &MC_(helperc_LOADV128le); + hname = "MC_(helperc_LOADV128le)"; + ret_via_outparam = True; + break; + case Ity_I64: helper = &MC_(helperc_LOADV64le); + hname = "MC_(helperc_LOADV64le)"; + break; + case Ity_I32: helper = &MC_(helperc_LOADV32le); + hname = "MC_(helperc_LOADV32le)"; + break; + case Ity_I16: helper = &MC_(helperc_LOADV16le); + hname = "MC_(helperc_LOADV16le)"; + break; + case Ity_I8: helper = &MC_(helperc_LOADV8); + hname = "MC_(helperc_LOADV8)"; + break; + default: ppIRType(ty); + VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)"); } } else { switch (ty) { - case Ity_I64: helper = &MC_(helperc_LOADV64be); - hname = "MC_(helperc_LOADV64be)"; - break; - case Ity_I32: helper = &MC_(helperc_LOADV32be); - hname = "MC_(helperc_LOADV32be)"; - break; - case Ity_I16: helper = &MC_(helperc_LOADV16be); - hname = "MC_(helperc_LOADV16be)"; - break; - case Ity_I8: helper = &MC_(helperc_LOADV8); - hname = "MC_(helperc_LOADV8)"; - break; - default: ppIRType(ty); - VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)"); + case Ity_V128: helper = &MC_(helperc_LOADV128be); + hname = "MC_(helperc_LOADV128be)"; + ret_via_outparam = True; + break; + case Ity_I64: helper = &MC_(helperc_LOADV64be); + hname = "MC_(helperc_LOADV64be)"; + break; + case Ity_I32: helper = &MC_(helperc_LOADV32be); + hname = "MC_(helperc_LOADV32be)"; + break; + case Ity_I16: helper = &MC_(helperc_LOADV16be); + hname = "MC_(helperc_LOADV16be)"; + break; + case Ity_I8: helper = &MC_(helperc_LOADV8); + hname = "MC_(helperc_LOADV8)"; + break; + default: ppIRType(ty); + VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)"); } } + tl_assert(helper); + tl_assert(hname); + /* Generate the actual address into addrAct. */ + IRAtom* addrAct; if (bias == 0) { addrAct = addr; } else { @@ -4254,11 +4264,22 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, /* We need to have a place to park the V bits we're just about to read. */ - datavbits = newTemp(mce, ty, VSh); - di = unsafeIRDirty_1_N( datavbits, - 1/*regparms*/, - hname, VG_(fnptr_to_fnentry)( helper ), - mkIRExprVec_1( addrAct )); + IRTemp datavbits = newTemp(mce, ty, VSh); + + /* Here's the call. */ + IRDirty* di; + if (ret_via_outparam) { + di = unsafeIRDirty_1_N( datavbits, + 2/*regparms*/, + hname, VG_(fnptr_to_fnentry)( helper ), + mkIRExprVec_2( IRExprP__VECRET, addrAct ) ); + } else { + di = unsafeIRDirty_1_N( datavbits, + 1/*regparms*/, + hname, VG_(fnptr_to_fnentry)( helper ), + mkIRExprVec_1( addrAct ) ); + } + setHelperAnns( mce, di ); if (guard) { di->guard = guard; @@ -4298,20 +4319,8 @@ IRAtom* expr2vbits_Load ( MCEnv* mce, case Ity_I16: case Ity_I32: case Ity_I64: + case Ity_V128: return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard); - case Ity_V128: { - IRAtom *v64hi, *v64lo; - if (end == Iend_LE) { - v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0, guard); - v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8, guard); - } else { - v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0, guard); - v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8, guard); - } - return assignNew( 'V', mce, - Ity_V128, - binop(Iop_64HLtoV128, v64hi, v64lo)); - } case Ity_V256: { /* V256-bit case -- phrased in terms of 64 bit units (Qs), with Q3 being the most significant lane. */ @@ -4868,10 +4877,12 @@ void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) /* Inputs: unmasked args Note: arguments are evaluated REGARDLESS of the guard expression */ for (i = 0; d->args[i]; i++) { - if (d->cee->mcx_mask & (1<args[i]; + if ( (d->cee->mcx_mask & (1<args[i]) ); + here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) ); curr = mkUifU32(mce, here, curr); } } @@ -5741,9 +5752,13 @@ static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) } case Ist_Dirty: d = st->Ist.Dirty.details; - for (i = 0; d->args[i]; i++) - if (isBogusAtom(d->args[i])) - return True; + for (i = 0; d->args[i]; i++) { + IRAtom* atom = d->args[i]; + if (LIKELY(!is_IRExprP__VECRET_or_BBPTR(atom))) { + if (isBogusAtom(atom)) + return True; + } + } if (isBogusAtom(d->guard)) return True; if (d->mAddr && isBogusAtom(d->mAddr)) @@ -6629,10 +6644,12 @@ static void do_origins_Dirty ( MCEnv* mce, IRDirty* d ) /* Inputs: unmasked args Note: arguments are evaluated REGARDLESS of the guard expression */ for (i = 0; d->args[i]; i++) { - if (d->cee->mcx_mask & (1<args[i]; + if ( (d->cee->mcx_mask & (1<args[i] ); + here = schemeE( mce, arg ); curr = gen_maxU32( mce, curr, here ); } }