From: Julian Seward Date: Sun, 14 Mar 2010 15:09:27 +0000 (+0000) Subject: Improve performance of the fallback path when a translation is not X-Git-Tag: svn/VALGRIND_3_6_0~333 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=10a6d4b0f6f81c982e65c5722728a4f4cbca1cd1;p=thirdparty%2Fvalgrind.git Improve performance of the fallback path when a translation is not found in the fast-cache. * reduce max loading of the per-sector TT hash tables from 80% to 65%. This reduces the number of required probes by a factor of 3. * when searching for a translation, don't visit the sectors in a fixed order. Instead, use an MTF array in which the most popular sectors (in terms of most likely to hold the translation we're looking for) are visited first. This reduces the number of required probes by another factor of 2. These improvements have no effect on small programs, but improve scalability on big apps. For an application comprising 300k translations, runtime on Memcheck is reduced by 3% and on None by about 20%. The average number of probes per fast-cache miss is reduced from around 22 to less than 5. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11091 --- diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c index fbc93ca3aa..a153b5b6b4 100644 --- a/coregrind/m_scheduler/scheduler.c +++ b/coregrind/m_scheduler/scheduler.c @@ -852,7 +852,7 @@ static void handle_tt_miss ( ThreadId tid ) /* Trivial event. Miss in the fast-cache. Do a full lookup for it. */ found = VG_(search_transtab)( NULL, ip, True/*upd_fast_cache*/ ); - if (!found) { + if (UNLIKELY(!found)) { /* Not found; we need to request a translation. */ if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done, True/*allow redirection*/ )) { diff --git a/coregrind/m_transtab.c b/coregrind/m_transtab.c index fbe02b1eb7..690c0325d5 100644 --- a/coregrind/m_transtab.c +++ b/coregrind/m_transtab.c @@ -71,7 +71,7 @@ /* Because each sector contains a hash table of TTEntries, we need to specify the maximum allowable loading, after which the sector is deemed full. */ -#define SECTOR_TT_LIMIT_PERCENT 80 +#define SECTOR_TT_LIMIT_PERCENT 65 /* The sector is deemed full when this many entries are in it. */ #define N_TTES_PER_SECTOR_USABLE \ @@ -207,6 +207,13 @@ static Int youngest_sector = -1; static Int tc_sector_szQ; +/* A list of sector numbers, in the order which they should be + searched to find translations. This is an optimisation to be used + when searching for translations and should not affect + correctness. -1 denotes "no entry". */ +static Int sector_search_order[N_SECTORS]; + + /* Fast helper for the TC. A direct-mapped cache which holds a set of recently used (guest address, host address) pairs. This array is referred to directly from m_dispatch/dispatch-.S. @@ -570,6 +577,44 @@ static Bool sanity_check_eclasses_in_sector ( Sector* sec ) static Bool sanity_check_redir_tt_tc ( void ); static Bool sanity_check_fastcache ( void ); +static Bool sanity_check_sector_search_order ( void ) +{ + Int i, j, nListed; + /* assert the array is the right size */ + vg_assert(N_SECTORS == (sizeof(sector_search_order) + / sizeof(sector_search_order[0]))); + /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */ + for (i = 0; i < N_SECTORS; i++) { + if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS) + break; + } + nListed = i; + for (/* */; i < N_SECTORS; i++) { + if (sector_search_order[i] != -1) + break; + } + if (i != N_SECTORS) + return False; + /* Check each sector number only appears once */ + for (i = 0; i < N_SECTORS; i++) { + if (sector_search_order[i] == -1) + continue; + for (j = i+1; j < N_SECTORS; j++) { + if (sector_search_order[j] == sector_search_order[i]) + return False; + } + } + /* Check that the number of listed sectors equals the number + in use, by counting nListed back down. */ + for (i = 0; i < N_SECTORS; i++) { + if (sectors[i].tc != NULL) + nListed--; + } + if (nListed != 0) + return False; + return True; +} + static Bool sanity_check_all_sectors ( void ) { Int sno; @@ -587,10 +632,13 @@ static Bool sanity_check_all_sectors ( void ) return False; if ( !sanity_check_fastcache() ) return False; + if ( !sanity_check_sector_search_order() ) + return False; return True; } + /*-------------------------------------------------------------*/ /*--- Add/find translations ---*/ /*-------------------------------------------------------------*/ @@ -703,6 +751,9 @@ static void initialiseSector ( Int sno ) Sector* sec; vg_assert(isValidSector(sno)); + { Bool sane = sanity_check_sector_search_order(); + vg_assert(sane); + } sec = §ors[sno]; if (sec->tc == NULL) { @@ -742,6 +793,14 @@ static void initialiseSector ( Int sno ) sec->tt[i].n_tte2ec = 0; } + /* Add an entry in the sector_search_order */ + for (i = 0; i < N_SECTORS; i++) { + if (sector_search_order[i] == -1) + break; + } + vg_assert(i >= 0 && i < N_SECTORS); + sector_search_order[i] = sno; + if (VG_(clo_verbosity) > 2) VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); @@ -786,6 +845,14 @@ static void initialiseSector ( Int sno ) } } + /* Sanity check: ensure it is already in + sector_search_order[]. */ + for (i = 0; i < N_SECTORS; i++) { + if (sector_search_order[i] == sno) + break; + } + vg_assert(i >= 0 && i < N_SECTORS); + if (VG_(clo_verbosity) > 2) VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); } @@ -794,6 +861,10 @@ static void initialiseSector ( Int sno ) sec->tt_n_inuse = 0; invalidateFastCache(); + + { Bool sane = sanity_check_sector_search_order(); + vg_assert(sane); + } } static void invalidate_icache ( void *ptr, Int nbytes ) @@ -986,14 +1057,13 @@ Bool VG_(search_transtab) ( /*OUT*/AddrH* result, kstart = HASH_TT(guest_addr); vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); - /* Search in all the sectors. Although the order should not matter, - it might be most efficient to search in the order youngest to - oldest. */ - sno = youngest_sector; + /* Search in all the sectors,using sector_search_order[] as a + heuristic guide as to what order to visit the sectors. */ for (i = 0; i < N_SECTORS; i++) { - if (sectors[sno].tc == NULL) - goto notfound; /* sector not in use. */ + sno = sector_search_order[i]; + if (UNLIKELY(sno == -1)) + return False; /* run out of sectors to search */ k = kstart; for (j = 0; j < N_TTES_PER_SECTOR; j++) { @@ -1007,6 +1077,14 @@ Bool VG_(search_transtab) ( /*OUT*/AddrH* result, §ors[sno].tt[k].count ); if (result) *result = (AddrH)sectors[sno].tt[k].tcptr; + /* pull this one one step closer to the front. For large + apps this more or less halves the number of required + probes. */ + if (i > 0) { + Int tmp = sector_search_order[i-1]; + sector_search_order[i-1] = sector_search_order[i]; + sector_search_order[i] = tmp; + } return True; } if (sectors[sno].tt[k].status == Empty) @@ -1019,10 +1097,6 @@ Bool VG_(search_transtab) ( /*OUT*/AddrH* result, /* If we fall off the end, all entries are InUse and not matching, or Deleted. In any case we did not find it in this sector. */ - - notfound: - /* move to the next oldest sector */ - sno = sno==0 ? (N_SECTORS-1) : (sno-1); } /* Not found in any sector. */ @@ -1498,6 +1572,10 @@ void VG_(init_tt_tc) ( void ) } } + /* Initialise the sector_search_order hint table. */ + for (i = 0; i < N_SECTORS; i++) + sector_search_order[i] = -1; + /* Initialise the fast caches. If not profiling (the usual case), we have to explicitly invalidate the fastN cache as invalidateFastCache() won't do that for us. */