From: Julian Seward Date: Fri, 5 Aug 2016 14:59:50 +0000 (+0000) Subject: Reimplement MC_(final_tidy) much more efficiently. This reduces its instruction X-Git-Tag: svn/VALGRIND_3_12_0~96 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7ce46139ec2191851423742ae8bddad313581662;p=thirdparty%2Fvalgrind.git Reimplement MC_(final_tidy) much more efficiently. This reduces its instruction count by a factor of about 4. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@15927 --- diff --git a/memcheck/mc_include.h b/memcheck/mc_include.h index 6dc9b2b78b..a8836546b0 100644 --- a/memcheck/mc_include.h +++ b/memcheck/mc_include.h @@ -788,6 +788,9 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, IRSB* MC_(final_tidy) ( IRSB* ); +/* Check some assertions to do with the instrumentation machinery. */ +void MC_(do_instrumentation_startup_checks)( void ); + #endif /* ndef __MC_INCLUDE_H */ /*--------------------------------------------------------------------*/ diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c index 1fe6b47f6f..b6ae8af530 100644 --- a/memcheck/mc_main.c +++ b/memcheck/mc_main.c @@ -8142,6 +8142,9 @@ static void mc_pre_clo_init(void) tl_assert(MASK(4) == 0xFFFFFFF000000003ULL); tl_assert(MASK(8) == 0xFFFFFFF000000007ULL); # endif + + /* Check some assertions to do with the instrumentation machinery. */ + MC_(do_instrumentation_startup_checks)(); } STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT)); diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index d50b53d49e..a009210786 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -6584,6 +6584,7 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, return sb_out; } + /*------------------------------------------------------------*/ /*--- Post-tree-build final tidying ---*/ /*------------------------------------------------------------*/ @@ -6602,17 +6603,69 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, reference, which is kinda pointless. MC_(final_tidy) therefore looks for such repeated calls and removes all but the first. */ -/* A struct for recording which (helper, guard) pairs we have already + +/* With some testing on perf/bz2.c, on amd64 and x86, compiled with + gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to + get almost all the benefits of this transformation whilst causing + the slide-back case to just often enough to be verifiably + correct. For posterity, the numbers are: + + bz2-32 + + 1 111,840 -> 1,702,810; ratio 15.2 + 2 111,840 -> 1,656,644; ratio 14.8 + 3 111,840 -> 1,650,457; ratio 14.7 + 4 111,840 -> 1,649,103; ratio 14.7 + 5 111,840 -> 1,648,655; ratio 14.7 + 6 111,840 -> 1,648,435; ratio 14.7 + 7 111,840 -> 1,648,304; ratio 14.7 + 8 111,840 -> 1,648,304; ratio 14.7 + 10 111,840 -> 1,648,171; ratio 14.7 + 12 111,840 -> 1,648,109 ratio 14.7 + 16 111,840 -> 1,647,947; ratio 14.7 + 32 111,840 -> 1,647,881; ratio 14.7 + inf 111,840 -> 1,647,881; ratio 14.7 + + bz2-64 + + 1 106,628 -> 1,811,992; ratio 17.0 + 2 106,628 -> 1,797,805; ratio 16.9 + 3 106,628 -> 1,792,429; ratio 16.8 + 4 106,628 -> 1,791,037; ratio 16.8 + 5 106,628 -> 1,790,929; ratio 16.8 + 6 106,628 -> 1,790,810; ratio 16.8 + 7 106,628 -> 1,790,764; ratio 16.8 + 8 106,628 -> 1,790,764; ratio 16.8 + 10 106,628 -> 1,790,764; ratio 16.8 + 12 106,628 -> 1,790,764; ratio 16.8 + 16 106,628 -> 1,790,701; ratio 16.8 + 32 106,628 -> 1,790,671; ratio 16.8 + inf 106,628 -> 1,790,671; ratio 16.8 +*/ + +/* Structs for recording which (helper, guard) pairs we have already seen. */ + +#define N_TIDYING_PAIRS 16 + typedef struct { void* entry; IRExpr* guard; } Pair; +typedef + struct { + Pair pairs[N_TIDYING_PAIRS +1/*for bounds checking*/]; + UInt pairsUsed; + } + Pairs; + + /* Return True if e1 and e2 definitely denote the same value (used to compare guards). Return False if unknown; False is the safe answer. Since guest registers and guest memory do not have the SSA property we must return False if any Gets or Loads appear in - the expression. */ + the expression. This implicitly assumes that e1 and e2 have the + same IR type, which is always true here -- the type is Ity_I1. */ static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) { @@ -6661,45 +6714,98 @@ static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 ) True if so. If not, add an entry. */ static -Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry ) +Bool check_or_add ( Pairs* tidyingEnv, IRExpr* guard, void* entry ) { - Pair p; - Pair* pp; - Int i, n = VG_(sizeXA)( pairs ); + UInt i, n = tidyingEnv->pairsUsed; + tl_assert(n <= N_TIDYING_PAIRS); for (i = 0; i < n; i++) { - pp = VG_(indexXA)( pairs, i ); - if (pp->entry == entry && sameIRValue(pp->guard, guard)) + if (tidyingEnv->pairs[i].entry == entry + && sameIRValue(tidyingEnv->pairs[i].guard, guard)) return True; } - p.guard = guard; - p.entry = entry; - VG_(addToXA)( pairs, &p ); + /* (guard, entry) wasn't found in the array. Add it at the end. + If the array is already full, slide the entries one slot + backwards. This means we will lose to ability to detect + duplicates from the pair in slot zero, but that happens so + rarely that it's unlikely to have much effect on overall code + quality. Also, this strategy loses the check for the oldest + tracked exit (memory reference, basically) and so that is (I'd + guess) least likely to be re-used after this point. */ + tl_assert(i == n); + if (n == N_TIDYING_PAIRS) { + for (i = 1; i < N_TIDYING_PAIRS; i++) { + tidyingEnv[n-1] = tidyingEnv[n]; + } + tidyingEnv->pairs[N_TIDYING_PAIRS-1].entry = entry; + tidyingEnv->pairs[N_TIDYING_PAIRS-1].guard = guard; + } else { + tl_assert(n < N_TIDYING_PAIRS); + tidyingEnv->pairs[n].entry = entry; + tidyingEnv->pairs[n].guard = guard; + n++; + tidyingEnv->pairsUsed = n; + } return False; } static Bool is_helperc_value_checkN_fail ( const HChar* name ) { - return - 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)") - || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)") - || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)") - || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)") - || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)") - || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)") - || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)") - || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)"); + /* This is expensive because it happens a lot. We are checking to + see whether |name| is one of the following 8 strings: + + MC_(helperc_value_check8_fail_no_o) + MC_(helperc_value_check4_fail_no_o) + MC_(helperc_value_check0_fail_no_o) + MC_(helperc_value_check1_fail_no_o) + MC_(helperc_value_check8_fail_w_o) + MC_(helperc_value_check0_fail_w_o) + MC_(helperc_value_check1_fail_w_o) + MC_(helperc_value_check4_fail_w_o) + + To speed it up, check the common prefix just once, rather than + all 8 times. + */ + const HChar* prefix = "MC_(helperc_value_check"; + + HChar n, p; + while (True) { + n = *name; + p = *prefix; + if (p == 0) break; /* ran off the end of the prefix */ + /* We still have some prefix to use */ + if (n == 0) return False; /* have prefix, but name ran out */ + if (n != p) return False; /* have both pfx and name, but no match */ + name++; + prefix++; + } + + /* Check the part after the prefix. */ + tl_assert(*prefix == 0 && *name != 0); + return 0==VG_(strcmp)(name, "8_fail_no_o)") + || 0==VG_(strcmp)(name, "4_fail_no_o)") + || 0==VG_(strcmp)(name, "0_fail_no_o)") + || 0==VG_(strcmp)(name, "1_fail_no_o)") + || 0==VG_(strcmp)(name, "8_fail_w_o)") + || 0==VG_(strcmp)(name, "4_fail_w_o)") + || 0==VG_(strcmp)(name, "0_fail_w_o)") + || 0==VG_(strcmp)(name, "1_fail_w_o)"); } IRSB* MC_(final_tidy) ( IRSB* sb_in ) { - Int i; + Int i; IRStmt* st; IRDirty* di; IRExpr* guard; IRCallee* cee; Bool alreadyPresent; - XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1", - VG_(free), sizeof(Pair) ); + Pairs pairs; + + pairs.pairsUsed = 0; + + pairs.pairs[N_TIDYING_PAIRS].entry = (void*)0x123; + pairs.pairs[N_TIDYING_PAIRS].guard = (IRExpr*)0x456; + /* Scan forwards through the statements. Each time a call to one of the relevant helpers is seen, check if we have made a previous call to the same helper using the same guard @@ -6720,16 +6826,21 @@ IRSB* MC_(final_tidy) ( IRSB* sb_in ) guard 'guard'. Check if we have already seen a call to this function with the same guard. If so, delete it. If not, add it to the set of calls we do know about. */ - alreadyPresent = check_or_add( pairs, guard, cee->addr ); + alreadyPresent = check_or_add( &pairs, guard, cee->addr ); if (alreadyPresent) { sb_in->stmts[i] = IRStmt_NoOp(); if (0) VG_(printf)("XX\n"); } } - VG_(deleteXA)( pairs ); + + tl_assert(pairs.pairs[N_TIDYING_PAIRS].entry == (void*)0x123); + tl_assert(pairs.pairs[N_TIDYING_PAIRS].guard == (IRExpr*)0x456); + return sb_in; } +#undef N_TIDYING_PAIRS + /*------------------------------------------------------------*/ /*--- Origin tracking stuff ---*/ @@ -7485,6 +7596,62 @@ static void schemeS ( MCEnv* mce, IRStmt* st ) } +/*------------------------------------------------------------*/ +/*--- Startup assertion checking ---*/ +/*------------------------------------------------------------*/ + +void MC_(do_instrumentation_startup_checks)( void ) +{ + /* Make a best-effort check to see that is_helperc_value_checkN_fail + is working as we expect. */ + +# define CHECK(_expected, _string) \ + tl_assert((_expected) == is_helperc_value_checkN_fail(_string)) + + /* It should identify these 8, and no others, as targets. */ + CHECK(True, "MC_(helperc_value_check8_fail_no_o)"); + CHECK(True, "MC_(helperc_value_check4_fail_no_o)"); + CHECK(True, "MC_(helperc_value_check0_fail_no_o)"); + CHECK(True, "MC_(helperc_value_check1_fail_no_o)"); + CHECK(True, "MC_(helperc_value_check8_fail_w_o)"); + CHECK(True, "MC_(helperc_value_check0_fail_w_o)"); + CHECK(True, "MC_(helperc_value_check1_fail_w_o)"); + CHECK(True, "MC_(helperc_value_check4_fail_w_o)"); + + /* Ad-hoc selection of other strings gathered via a quick test. */ + CHECK(False, "amd64g_dirtyhelper_CPUID_avx2"); + CHECK(False, "amd64g_dirtyhelper_RDTSC"); + CHECK(False, "MC_(helperc_b_load1)"); + CHECK(False, "MC_(helperc_b_load2)"); + CHECK(False, "MC_(helperc_b_load4)"); + CHECK(False, "MC_(helperc_b_load8)"); + CHECK(False, "MC_(helperc_b_load16)"); + CHECK(False, "MC_(helperc_b_load32)"); + CHECK(False, "MC_(helperc_b_store1)"); + CHECK(False, "MC_(helperc_b_store2)"); + CHECK(False, "MC_(helperc_b_store4)"); + CHECK(False, "MC_(helperc_b_store8)"); + CHECK(False, "MC_(helperc_b_store16)"); + CHECK(False, "MC_(helperc_b_store32)"); + CHECK(False, "MC_(helperc_LOADV8)"); + CHECK(False, "MC_(helperc_LOADV16le)"); + CHECK(False, "MC_(helperc_LOADV32le)"); + CHECK(False, "MC_(helperc_LOADV64le)"); + CHECK(False, "MC_(helperc_LOADV128le)"); + CHECK(False, "MC_(helperc_LOADV256le)"); + CHECK(False, "MC_(helperc_STOREV16le)"); + CHECK(False, "MC_(helperc_STOREV32le)"); + CHECK(False, "MC_(helperc_STOREV64le)"); + CHECK(False, "MC_(helperc_STOREV8)"); + CHECK(False, "track_die_mem_stack_8"); + CHECK(False, "track_new_mem_stack_8_w_ECU"); + CHECK(False, "MC_(helperc_MAKE_STACK_UNINIT_w_o)"); + CHECK(False, "VG_(unknown_SP_update_w_ECU)"); + +# undef CHECK +} + + /*--------------------------------------------------------------------*/ /*--- end mc_translate.c ---*/ /*--------------------------------------------------------------------*/