_VG_USERREQ__HG_RESERVED4, /* Do not use */
_VG_USERREQ__HG_ARANGE_MAKE_UNTRACKED, /* Addr a, ulong len */
_VG_USERREQ__HG_ARANGE_MAKE_TRACKED, /* Addr a, ulong len */
- _VG_USERREQ__HG_PTHREAD_BARRIER_RESIZE_PRE /* pth_bar_t*, ulong */
+ _VG_USERREQ__HG_PTHREAD_BARRIER_RESIZE_PRE, /* pth_bar_t*, ulong */
+ _VG_USERREQ__HG_CLEAN_MEMORY_HEAPBLOCK /* Addr start_of_block */
} Vg_TCheckClientRequest;
_arg1, 0,0,0,0); \
} while (0)
+#define DO_CREQ_W_W(_resF, _dfltF, _creqF, _ty1F,_arg1F) \
+ do { \
+ long int _qzz_res, _arg1; \
+ /* assert(sizeof(_ty1F) == sizeof(long int)); */ \
+ _arg1 = (long int)(_arg1F); \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, (_dfltF), \
+ (_creqF), \
+ _arg1, 0,0,0,0); \
+ _resF = _qzz_res; \
+ } while (0)
+
#define DO_CREQ_v_WW(_creqF, _ty1F,_arg1F, _ty2F,_arg2F) \
do { \
long int _unused_res, _arg1, _arg2; \
void*,(_qzz_start), \
unsigned long,(_qzz_len))
+/* The same, but for the heap block starting at _qzz_blockstart. This
+ allows painting when we only know the address of an object, but not
+ its size, which is sometimes the case in C++ code involving
+ inheritance, and in which RTTI is not, for whatever reason,
+ available. Returns the number of bytes painted, which can be zero
+ for a zero-sized block. Hence, return values >= 0 indicate success
+ (the block was found), and the value -1 indicates block not
+ found, and -2 is returned when not running on Helgrind. */
+#define VALGRIND_HG_CLEAN_MEMORY_HEAPBLOCK(_qzz_blockstart) \
+ (__extension__ \
+ ({long int _npainted; \
+ DO_CREQ_W_W(_npainted, (-2)/*default*/, \
+ _VG_USERREQ__HG_CLEAN_MEMORY_HEAPBLOCK, \
+ void*,(_qzz_blockstart)); \
+ _npainted; \
+ }))
+
/* ----------------------------------------------------------
For error control.
---------------------------------------------------------- */
tl_assert(!xe->XE.Race.descr2);
/* First, see if it's in any heap block. Unfortunately this
- means a linear search through all allocated heap blocks. */
- HG_(mm_find_containing_block)(
- &xe->XE.Race.hctxt, &xe->XE.Race.haddr, &xe->XE.Race.hszB,
- xe->XE.Race.data_addr
- );
+ means a linear search through all allocated heap blocks. The
+ assertion says that if it's detected as a heap block, then we
+ must have an allocation context for it, since all heap blocks
+ should have an allocation context. */
+ Bool is_heapblock
+ = HG_(mm_find_containing_block)(
+ &xe->XE.Race.hctxt, &xe->XE.Race.haddr, &xe->XE.Race.hszB,
+ xe->XE.Race.data_addr
+ );
+ tl_assert(is_heapblock == (xe->XE.Race.hctxt != NULL));
if (!xe->XE.Race.hctxt) {
/* It's not in any heap block. See if we can map it to a
extern ULong HG_(stats__string_table_get_map_size) ( void );
/* For error creation: map 'data_addr' to a malloc'd chunk, if any.
- Slow linear search. This is an abuse of the normal file structure
- since this is exported by hg_main.c, not hg_errors.c. Oh Well. */
-void HG_(mm_find_containing_block)( /*OUT*/ExeContext** where,
+ Slow linear search accelerated in some special cases normal hash
+ search of the mallocmeta table. This is an abuse of the normal file
+ structure since this is exported by hg_main.c, not hg_errors.c. Oh
+ Well. Returns True if found, False if not. Zero-sized blocks are
+ considered to contain the searched-for address if they equal that
+ address. */
+Bool HG_(mm_find_containing_block)( /*OUT*/ExeContext** where,
/*OUT*/Addr* payload,
/*OUT*/SizeT* szB,
Addr data_addr );
/* For error creation: map 'data_addr' to a malloc'd chunk, if any.
- Slow linear search. */
+ Slow linear search. With a bit of hash table help if 'data_addr'
+ is either the start of a block or up to 15 word-sized steps along
+ from the start of a block. */
static inline Bool addr_is_in_MM_Chunk( MallocMeta* mm, Addr a )
{
- if (a < mm->payload) return False;
- if (a >= mm->payload + mm->szB) return False;
- return True;
+ /* Accept 'a' as within 'mm' if 'mm's size is zero and 'a' points
+ right at it. */
+ if (UNLIKELY(mm->szB == 0 && a == mm->payload))
+ return True;
+ /* else normal interval rules apply */
+ if (LIKELY(a < mm->payload)) return False;
+ if (LIKELY(a >= mm->payload + mm->szB)) return False;
+ return True;
}
-void HG_(mm_find_containing_block)( /*OUT*/ExeContext** where,
+Bool HG_(mm_find_containing_block)( /*OUT*/ExeContext** where,
/*OUT*/Addr* payload,
/*OUT*/SizeT* szB,
Addr data_addr )
{
MallocMeta* mm;
+ Int i;
+ const Int n_fast_check_words = 16;
+
+ /* First, do a few fast searches on the basis that data_addr might
+ be exactly the start of a block or up to 15 words inside. This
+ can happen commonly via the creq
+ _VG_USERREQ__HG_CLEAN_MEMORY_HEAPBLOCK. */
+ for (i = 0; i < n_fast_check_words; i++) {
+ mm = VG_(HT_lookup)( hg_mallocmeta_table,
+ data_addr - (UWord)(UInt)i * sizeof(UWord) );
+ if (UNLIKELY(mm && addr_is_in_MM_Chunk(mm, data_addr)))
+ goto found;
+ }
+
/* Well, this totally sucks. But without using an interval tree or
- some such, it's hard to see how to do better. */
+ some such, it's hard to see how to do better. We have to check
+ every block in the entire table. */
VG_(HT_ResetIter)(hg_mallocmeta_table);
while ( (mm = VG_(HT_Next)(hg_mallocmeta_table)) ) {
- if (UNLIKELY(addr_is_in_MM_Chunk(mm, data_addr))) {
- *where = mm->where;
- *payload = mm->payload;
- *szB = mm->szB;
- return;
- }
+ if (UNLIKELY(addr_is_in_MM_Chunk(mm, data_addr)))
+ goto found;
}
+
+ /* Not found. Bah. */
+ return False;
+ /*NOTREACHED*/
+
+ found:
+ tl_assert(mm);
+ tl_assert(addr_is_in_MM_Chunk(mm, data_addr));
+ if (where) *where = mm->where;
+ if (payload) *payload = mm->payload;
+ if (szB) *szB = mm->szB;
+ return True;
}
}
break;
+ case _VG_USERREQ__HG_CLEAN_MEMORY_HEAPBLOCK: {
+ Addr payload = 0;
+ SizeT pszB = 0;
+ if (0) VG_(printf)("VG_USERREQ__HG_CLEAN_MEMORY_HEAPBLOCK(%#lx)\n",
+ args[1]);
+ if (HG_(mm_find_containing_block)(NULL, &payload, &pszB, args[1])) {
+ if (pszB > 0) {
+ evh__die_mem(payload, pszB);
+ evh__new_mem(payload, pszB);
+ }
+ *ret = pszB;
+ } else {
+ *ret = (UWord)-1;
+ }
+ break;
+ }
+
case _VG_USERREQ__HG_ARANGE_MAKE_UNTRACKED:
if (0) VG_(printf)("HG_ARANGE_MAKE_UNTRACKED(%#lx,%ld)\n",
args[1], args[2]);
static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
+static UWord stats__vts__tick = 0; // # calls to VTS__tick
+static UWord stats__vts__join = 0; // # calls to VTS__join
+static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
+static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
+static UWord stats__vts__cmp_structural_slow = 0; // # calls to VTS__cmp_structural w/ slow case
+static UWord stats__vts__indexat_slow = 0; // # calls to VTS__indexAt_SLOW
+static UWord stats__vts_set__fadoa = 0; // # calls to vts_set__find_and_dealloc__or_add
+static UWord stats__vts_set__fadoa_d = 0; // # calls to vts_set__find_and_dealloc__or_add
+ // that lead to a deallocation
+
static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
return a & ~(N_SECMAP_ARANGE - 1);
ScalarTS tmp;
VTS* res;
Word i, n;
+
+ stats__vts__tick++;
+
tl_assert(me);
tl_assert(is_sane_VTS(vts));
//if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
Thr* thr;
VTS* res;
+ stats__vts__join++;
+
tl_assert(a && a->ts);
tl_assert(b && b->ts);
useda = VG_(sizeXA)( a->ts );
Word ia, ib, useda, usedb;
ULong tyma, tymb;
+ stats__vts__cmpLEQ++;
+
tl_assert(a && a->ts);
tl_assert(b && b->ts);
useda = VG_(sizeXA)( a->ts );
/* Compute an arbitrary structural (total) ordering on the two args,
based on their VCs, so they can be looked up in a table, tree, etc.
- Returns -1, 0 or 1. (really just 'deriving Ord' :-)
+ Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
+ performance critical so there is some effort expended to make it sa
+ fast as possible.
*/
Word VTS__cmp_structural ( VTS* a, VTS* b )
{
/* We just need to generate an arbitrary total ordering based on
a->ts and b->ts. Preferably do it in a way which comes across likely
differences relatively quickly. */
- Word i, useda, usedb;
- ScalarTS *tmpa, *tmpb;
-
- tl_assert(a && a->ts);
- tl_assert(b && b->ts);
- useda = VG_(sizeXA)( a->ts );
- usedb = VG_(sizeXA)( b->ts );
+ Word i;
+ Word useda = 0, usedb = 0;
+ ScalarTS *ctsa = NULL, *ctsb = NULL;
+
+ stats__vts__cmp_structural++;
+
+ tl_assert(a);
+ tl_assert(b);
+
+ VG_(getContentsXA_UNSAFE)( a->ts, (void**)&ctsa, &useda );
+ VG_(getContentsXA_UNSAFE)( b->ts, (void**)&ctsb, &usedb );
+
+ if (LIKELY(useda == usedb)) {
+ ScalarTS *tmpa = NULL, *tmpb = NULL;
+ stats__vts__cmp_structural_slow++;
+ /* Same length vectors. Find the first difference, if any, as
+ fast as possible. */
+ for (i = 0; i < useda; i++) {
+ tmpa = &ctsa[i];
+ tmpb = &ctsb[i];
+ if (LIKELY(tmpa->tym == tmpb->tym && tmpa->thr == tmpb->thr))
+ continue;
+ else
+ break;
+ }
+ if (UNLIKELY(i == useda)) {
+ /* They're identical. */
+ return 0;
+ } else {
+ tl_assert(i >= 0 && i < useda);
+ if (tmpa->tym < tmpb->tym) return -1;
+ if (tmpa->tym > tmpb->tym) return 1;
+ if (tmpa->thr < tmpb->thr) return -1;
+ if (tmpa->thr > tmpb->thr) return 1;
+ /* we just established them as non-identical, hence: */
+ }
+ /*NOTREACHED*/
+ tl_assert(0);
+ }
if (useda < usedb) return -1;
if (useda > usedb) return 1;
-
- /* Same length vectors, so let's step through them together. */
- tl_assert(useda == usedb);
- for (i = 0; i < useda; i++) {
- tmpa = VG_(indexXA)( a->ts, i );
- tmpb = VG_(indexXA)( b->ts, i );
- if (tmpa->tym < tmpb->tym) return -1;
- if (tmpa->tym > tmpb->tym) return 1;
- if (tmpa->thr < tmpb->thr) return -1;
- if (tmpa->thr > tmpb->thr) return 1;
- }
-
- /* They're identical. */
- return 0;
+ /*NOTREACHED*/
+ tl_assert(0);
}
*/
ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
UWord i, n;
+ stats__vts__indexat_slow++;
tl_assert(vts && vts->ts);
n = VG_(sizeXA)( vts->ts );
for (i = 0; i < n; i++) {
static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
{
UWord keyW, valW;
+ stats__vts_set__fadoa++;
/* lookup cand (by value) */
if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
/* found it */
tl_assert(valW == 0);
/* if this fails, cand (by ref) was already present (!) */
tl_assert(keyW != (UWord)cand);
+ stats__vts_set__fadoa_d++;
VTS__delete(cand);
return (VTS*)keyW;
} else {
VG_(printf)("%s","\n");
- VG_(printf)(" libhb: %'13llu msmcread (%'llu changed)\n",
+ VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
stats__msmcread, stats__msmcread_change);
- VG_(printf)(" libhb: %'13llu msmcwrite (%'llu changed)\n",
+ VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
stats__msmcwrite, stats__msmcwrite_change);
VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
stats__cmpLEQ_queries, stats__cmpLEQ_misses);
VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
stats__join2_queries, stats__join2_misses);
+ VG_(printf)("%s","\n");
+ VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
+ stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
+ VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
+ stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
+ VG_(printf)( " libhb: VTSset: find_and_dealloc__or_add %'lu (%'lu deallocd)\n",
+ stats__vts_set__fadoa, stats__vts_set__fadoa_d );
+ VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
+ stats__vts__indexat_slow );
+
VG_(printf)("%s","\n");
VG_(printf)(
" libhb: %ld entries in vts_table (approximately %lu bytes)\n",