From: Julian Seward Date: Thu, 28 Apr 2011 14:58:15 +0000 (+0000) Subject: Change the TT_FAST hash function for from "insn_address >> 2" to X-Git-Tag: svn/VALGRIND_3_7_0~521 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5e6d4577de06b2d310ad674a9b57c5c75117edfa;p=thirdparty%2Fvalgrind.git Change the TT_FAST hash function for from "insn_address >> 2" to "insn_address >> 1". The former is appropriate for ARM code, where all insns are 4-sized and 4-aligned, but not for Thumb code, where the minimum size and alignment is 2. The old scheme happened to work for Thumb (indeed, any hash function would), but caused huge amounts of conflict misses in the fast cache for some programs. The change has been observed to reduce conflict misses by up to 100 times, and in some cases, improves performance significantly for Thumb code. Performance of ARM code is unchanged or possibly a bit worse. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11716 --- diff --git a/coregrind/m_dispatch/dispatch-arm-linux.S b/coregrind/m_dispatch/dispatch-arm-linux.S index 8c92814153..f67aeefa7d 100644 --- a/coregrind/m_dispatch/dispatch-arm-linux.S +++ b/coregrind/m_dispatch/dispatch-arm-linux.S @@ -99,7 +99,7 @@ VG_(run_innerloop__dispatch_unprofiled): /* try a fast lookup in the translation cache */ // r0 = next guest, r1,r2,r3 scratch ldr r1, =VG_TT_FAST_MASK // r1 = VG_TT_FAST_MASK - and r2, r1, r0, LSR #2 // r2 = entry # + and r2, r1, r0, LSR #1 // r2 = entry # ldr r1, =VG_(tt_fast) // r1 = &tt_fast[0] add r1, r1, r2, LSL #3 // r1 = &tt_fast[entry#] ldr r3, [r1, #0] /* .guest */ @@ -144,7 +144,7 @@ VG_(run_innerloop__dispatch_profiled): /* try a fast lookup in the translation cache */ // r0 = next guest, r1,r2,r3 scratch ldr r1, =VG_TT_FAST_MASK // r1 = VG_TT_FAST_MASK - and r2, r1, r0, LSR #2 // r2 = entry # + and r2, r1, r0, LSR #1 // r2 = entry # ldr r1, =VG_(tt_fast) // r1 = &tt_fast[0] add r1, r1, r2, LSL #3 // r1 = &tt_fast[entry#] ldr r3, [r1, #0] /* .guest */ diff --git a/coregrind/pub_core_transtab_asm.h b/coregrind/pub_core_transtab_asm.h index 76e48db1b5..f76924764a 100644 --- a/coregrind/pub_core_transtab_asm.h +++ b/coregrind/pub_core_transtab_asm.h @@ -54,12 +54,16 @@ /* This macro isn't usable in asm land; nevertheless this seems like a good place to put it. */ + #if defined(VGA_x86) || defined(VGA_amd64) # define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) ) & VG_TT_FAST_MASK) -#elif defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm) -# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> 2) & VG_TT_FAST_MASK) -#elif defined(VGA_s390x) + +#elif defined(VGA_s390x) || defined(VGA_arm) # define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> 1) & VG_TT_FAST_MASK) + +#elif defined(VGA_ppc32) || defined(VGA_ppc64) +# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> 2) & VG_TT_FAST_MASK) + #else # error "VG_TT_FAST_HASH: unknown platform" #endif