From: Julian Seward Date: Sun, 21 Dec 2008 10:43:10 +0000 (+0000) Subject: Various changes: X-Git-Tag: svn/VALGRIND_3_4_0~52 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=28f3b05d6853ccaa1f5a99921ee24d64e447f4b9;p=thirdparty%2Fvalgrind.git Various changes: * remove flags --trace-addr= and --trace-level=. These no longer have any effect, so there's no point in having the associated flags. * add flag --show-conflicts=no|yes [yes], which makes it possible to disable the conflicting-access collection machinery. This makes Helgrind run much faster. Perhaps useful in regression testing, when it is desired only to find out if a race exists, but not to collect enough information to easily diagnose it. * add flag --conflict-cache-size= [1000000], which makes it possible to control how much memory is used for storage of information about historical (potentially-conflicting) accesses. * Update comments on the conflicting-access machinery to more closely reflect the code. Includes comments on the important aspects of the value N_OLDREF_ACCS. Increase said constant from 3 to 5. * Fix bug in event_map_bind: when searching for an OldRef.accs[] entry that matches the current access, don't forget to also compare the access sizes. The old code only compared the thread identity and the read/writeness. * hg_main.c: disable Dwarf3 variable/type info reading by default. Mostly this provides little benefit and can cause Helgrind to use a lot more time and memory at startup. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@8845 --- diff --git a/helgrind/hg_basics.c b/helgrind/hg_basics.c index 7c25109644..cb27b8ac20 100644 --- a/helgrind/hg_basics.c +++ b/helgrind/hg_basics.c @@ -70,15 +70,15 @@ Char* HG_(strdup) ( HChar* cc, const Char* s ) /* Description of these flags is in hg_basics.h. */ -Bool HG_(clo_track_lockorders) = True; +Bool HG_(clo_track_lockorders) = True; -Bool HG_(clo_cmp_race_err_addrs) = False; +Bool HG_(clo_cmp_race_err_addrs) = False; -Addr HG_(clo_trace_addr) = 0; +Bool HG_(clo_show_conflicts) = True; -Word HG_(clo_trace_level) = 0; +UWord HG_(clo_conflict_cache_size) = 1000000; -Word HG_(clo_sanity_flags) = 0; +Word HG_(clo_sanity_flags) = 0; /*--------------------------------------------------------------------*/ diff --git a/helgrind/hg_basics.h b/helgrind/hg_basics.h index 0a295f7817..efcef1409d 100644 --- a/helgrind/hg_basics.h +++ b/helgrind/hg_basics.h @@ -72,11 +72,16 @@ extern Bool HG_(clo_track_lockorders); (regtesting) this is sometimes important. */ extern Bool HG_(clo_cmp_race_err_addrs); -/* Tracing memory accesses, so we can see what's going on. - clo_trace_addr is the address to monitor. clo_trace_level = 0 for - no tracing, 1 for summary, 2 for detailed. */ -extern Addr HG_(clo_trace_addr); -extern Word HG_(clo_trace_level); +/* Show conflicting accesses? This involves collecting and storing + large numbers of call stacks just in case we might need to show + them later, and so is expensive (although very useful). Hence + allow it to be optionally disabled. */ +extern Bool HG_(clo_show_conflicts); + +/* Size of the conflicting-access cache, measured in terms of + maximum possible number of elements in the previous-access map. + Must be between 10k amd 10 million. Default is 1 million. */ +extern UWord HG_(clo_conflict_cache_size); /* Sanity check level. This is an or-ing of SCE_{THREADS,LOCKS,BIGRANGE,ACCESS,LAOG}. */ diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c index 328a273c8c..11048c99cc 100644 --- a/helgrind/hg_main.c +++ b/helgrind/hg_main.c @@ -3986,12 +3986,15 @@ static Bool hg_process_cmd_line_option ( Char* arg ) else if (VG_CLO_STREQ(arg, "--cmp-race-err-addrs=yes")) HG_(clo_cmp_race_err_addrs) = True; - else if (VG_CLO_STREQN(13, arg, "--trace-addr=")) { - HG_(clo_trace_addr) = VG_(atoll16)(&arg[13]); - if (HG_(clo_trace_level) == 0) - HG_(clo_trace_level) = 1; - } - else VG_BNUM_CLO(arg, "--trace-level", HG_(clo_trace_level), 0, 2) + else if (VG_CLO_STREQ(arg, "--show-conflicts=no")) + HG_(clo_show_conflicts) = False; + else if (VG_CLO_STREQ(arg, "--show-conflicts=yes")) + HG_(clo_show_conflicts) = True; + + /* If you change the 10k/10mill limits, remember to also change + them in assertions at the top of event_map_maybe_GC. */ + else VG_BNUM_CLO(arg, "--conflict-cache-size", + HG_(clo_conflict_cache_size), 10*1000, 10*1000*1000) /* "stuvwx" --> stuvwx (binary) */ else if (VG_CLO_STREQN(18, arg, "--hg-sanity-flags=")) { @@ -4024,9 +4027,9 @@ static Bool hg_process_cmd_line_option ( Char* arg ) static void hg_print_usage ( void ) { VG_(printf)( -" --track-lockorders=no|yes show lock ordering errors? [yes]\n" -" --trace-addr=0xXXYYZZ show all state changes for address 0xXXYYZZ\n" -" --trace-level=0|1|2 verbosity level of --trace-addr [1]\n" +" --track-lockorders=no|yes show lock ordering errors? [yes]\n" +" --show-conflicts=no|yes show both stack traces in a race? [yes]\n" +" --conflict-cache-size=N size of conflict history cache [1000000]\n" ); VG_(replacement_malloc_print_usage)(); } @@ -4036,10 +4039,9 @@ static void hg_print_debug_usage ( void ) VG_(replacement_malloc_print_debug_usage)(); VG_(printf)(" --cmp-race-err-addrs=no|yes are data addresses in " "race errors significant? [no]\n"); - VG_(printf)(" --hg-sanity-flags= sanity check " + VG_(printf)(" --hg-sanity-flags= sanity check " " at events (X = 0|1) [000000]\n"); VG_(printf)(" --hg-sanity-flags values:\n"); - VG_(printf)(" 100000 crosscheck happens-before-graph searches\n"); VG_(printf)(" 010000 after changes to " "lock-order-acquisition-graph\n"); VG_(printf)(" 001000 at memory accesses (NB: not currently used)\n"); @@ -4198,7 +4200,11 @@ static void hg_pre_clo_init ( void ) hg_cli__realloc, HG_CLI__MALLOC_REDZONE_SZB ); - VG_(needs_var_info)(); /* optional */ + /* 21 Dec 08: disabled this; it mostly causes H to start more + slowly and use significantly more memory, without very often + providing useful results. The user can request to load this + information manually with --read-var-info=yes. */ + if (0) VG_(needs_var_info)(); /* optional */ VG_(track_new_mem_startup) ( evh__new_mem_w_perms ); VG_(track_new_mem_stack_signal)( evh__new_mem_w_tid ); diff --git a/helgrind/libhb_core.c b/helgrind/libhb_core.c index 103b7c205f..99ba2962ac 100644 --- a/helgrind/libhb_core.c +++ b/helgrind/libhb_core.c @@ -2666,7 +2666,6 @@ inline static void gal_Free ( GroupAlloc* ga, void* p ) // // ///////////////////////////////////////////////////////// -#define EVENT_MAP_GC_AT (1 * 1000 * 1000) #define EVENT_MAP_GC_DISCARD_FRACTION 0.5 /* This is in two parts: @@ -2678,26 +2677,40 @@ inline static void gal_Free ( GroupAlloc* ga, void* p ) only represent each one once. The set is indexed/searched by ordering on the stack trace vectors. - 2. An OSet of OldRefs. These store information about each old ref - that we need to record. It is indexed by address of the + 2. A SparseWA of OldRefs. These store information about each old + ref that we need to record. It is indexed by address of the location for which the information is recorded. For LRU purposes, each OldRef also contains a generation number, indicating when it was most recently accessed. The important part of an OldRef is, however, its accs[] array. - This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This - allows us to collect the last access-traceback by up to - N_OLDREF_ACCS different threads for this location. The accs[] - array is a MTF-array. If a pair falls off the end, that's too - bad -- we will lose info about that thread's access to this - location. - - When this OSet becomes too big, we can throw away the entries + This is an array of N_OLDREF_ACCS which binds (thread, R/W, + size) triples to RCECs. This allows us to collect the last + access-traceback by up to N_OLDREF_ACCS different triples for + this location. The accs[] array is a MTF-array. If a binding + falls off the end, that's too bad -- we will lose info about + that triple's access to this location. + + When the SparseWA becomes too big, we can throw away the OldRefs whose generation numbers are below some threshold; hence doing approximate LRU discarding. For each discarded OldRef we must of course decrement the reference count on the all RCECs it refers to, in order that entries from (1) eventually get discarded too. + + A major improvement in reliability of this mechanism would be to + have a dynamically sized OldRef.accs[] array, so no entries ever + fall off the end. In investigations (Dec 08) it appears that a + major cause for the non-availability of conflicting-access traces + in race reports is caused by the fixed size of this array. I + suspect for most OldRefs, only a few entries are used, but for a + minority of cases there is an overflow, leading to info lossage. + Investigations also suggest this is very workload and scheduling + sensitive. Therefore a dynamic sizing would be better. + + However, dynamic sizing would defeat the use of a GroupAllocator + for OldRef structures. And that's important for performance. So + it's not straightforward to do. */ @@ -2916,7 +2929,7 @@ static RCEC* get_RCEC ( Thr* thr ) */ typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC; -#define N_OLDREF_ACCS 3 +#define N_OLDREF_ACCS 5 typedef struct { @@ -3007,16 +3020,25 @@ static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr ) if (b) { /* We already have a record for this address. We now need to - see if we have a stack trace pertaining to this thread's - access. */ + see if we have a stack trace pertaining to this (thread, R/W, + size) triple. */ tl_assert(keyW == a); ref = (OldRef*)valW; tl_assert(ref->magic == OldRef_MAGIC); tl_assert(thr); for (i = 0; i < N_OLDREF_ACCS; i++) { - if (ref->accs[i].thr == thr) - break; + if (ref->accs[i].thr != thr) + continue; + /* since .thr encodes both the accessing thread and the + read/writeness, we know now that at least those features + of the access match this entry. So we just need to check + the size indication. Do this by inspecting the lowest 2 bits of + .rcec, which contain the encoded size info. */ + if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3)) + continue; + /* else we have a match, so stop looking. */ + break; } if (i < N_OLDREF_ACCS) { @@ -3033,13 +3055,16 @@ static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr ) ref->accs[i].rcec = rcec; tl_assert(ref->accs[i].thr == thr); } else { - /* No entry for this thread. Shuffle all of them down one - slot, and put the new entry at the start of the array. */ + /* No entry for this (thread, R/W, size) triple. Shuffle all + of them down one slot, and put the new entry at the start + of the array. */ if (ref->accs[N_OLDREF_ACCS-1].thr) { /* the last slot is in use. We must dec the rc on the associated rcec. */ tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec); stats__ctxt_rcdec2++; + if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF)) + VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2); ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) ); } else { tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec); @@ -3070,9 +3095,9 @@ static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr ) ref->gen = oldrefGen; ref->accs[0].rcec = rcec; ref->accs[0].thr = thr; - /* thr==NULL is used to signify an empty slot, so we can't - add a NULL thr. */ - tl_assert(ptr_and_UWord(thr, ~3) != 0); + /* thr==NULL is used to signify an empty slot, so we can't add a + NULL thr. */ + tl_assert(ptr_and_UWord(thr, ~3) != 0); for (j = 1; j < N_OLDREF_ACCS; j++) { ref->accs[j].thr = NULL; ref->accs[j].rcec = NULL; @@ -3305,12 +3330,17 @@ static void event_map_maybe_GC ( void ) UWord genMap_min = 0; UWord genMap_size = 0; - if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT)) + if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size))) return; if (0) VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN); + /* Check for sane command line params. Limit values must match + those in hg_process_cmd_line_option. */ + tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 ); + tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 ); + /* Check our counting is sane (expensive) */ if (CHECK_CEM) tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree )); @@ -3432,7 +3462,8 @@ static void event_map_maybe_GC ( void ) tl_assert(keyW >= maxGen); tl_assert(retained >= valW); if (retained - valW - > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) { + > (UWord)(HG_(clo_conflict_cache_size) + * EVENT_MAP_GC_DISCARD_FRACTION)) { retained -= valW; maxGen = keyW; } else { @@ -3688,7 +3719,7 @@ static inline SVal msm_read ( SVal svOld, tl_assert(is_sane_SVal_C(svNew)); } tl_assert(svNew != SVal_INVALID); - if (svNew != svOld) { + if (svNew != svOld && HG_(clo_show_conflicts)) { if (SVal__isC(svOld) && SVal__isC(svNew)) { event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr ); stats__msm_read_change++; @@ -3769,7 +3800,7 @@ static inline SVal msm_write ( SVal svOld, tl_assert(is_sane_SVal_C(svNew)); } tl_assert(svNew != SVal_INVALID); - if (svNew != svOld) { + if (svNew != svOld && HG_(clo_show_conflicts)) { if (SVal__isC(svOld) && SVal__isC(svNew)) { event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr ); stats__msm_write_change++;