From: Julian Seward Date: Mon, 26 Dec 2005 17:58:58 +0000 (+0000) Subject: More dispatcher tuning for ppc32/64. Makes a big difference for X-Git-Tag: svn/VALGRIND_3_2_0~441 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=139021b70c35928e3cd7cb0ee6624a4788447b11;p=thirdparty%2Fvalgrind.git More dispatcher tuning for ppc32/64. Makes a big difference for perf/tinycc. - run_thread_for_a_while: just clear this thread's reservation when starting, not all of them. - use a different fast-cache hashing function for ppc32/64 than for x86/amd64. This allows the former to use all the fast-cache entries rather than just 1/4 of them. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5441 --- diff --git a/coregrind/m_dispatch/dispatch-ppc32-linux.S b/coregrind/m_dispatch/dispatch-ppc32-linux.S index 643a781fc9..5103675b65 100644 --- a/coregrind/m_dispatch/dispatch-ppc32-linux.S +++ b/coregrind/m_dispatch/dispatch-ppc32-linux.S @@ -254,14 +254,14 @@ VG_(run_innerloop__dispatch_unprofiled): stw 3,OFFSET_ppc32_CIA(31) /* Are we out of timeslice? If yes, defer to scheduler. */ -// subic. 29,29,1 subi 29,29,1 cmplwi 29,0 beq counter_is_zero /* try a fast lookup in the translation cache */ - /* r4=((r3<<2) & (VG_TT_FAST_MASK<<2)) */ - rlwinm 4,3, 2, 32-2-VG_TT_FAST_BITS, 31-2 + /* r4 = VG_TT_FAST_HASH(addr) * sizeof(ULong) + = ((r3 >>u 2) & VG_TT_FAST_MASK) << 2 */ + rlwinm 4,3, 0, 32-2-VG_TT_FAST_BITS, 31-2 addis 5,4,VG_(tt_fast)@ha lwz 5,VG_(tt_fast)@l(5) lwz 6,4(5) /* big-endian, so comparing 2nd 32bit word */ @@ -310,12 +310,14 @@ VG_(run_innerloop__dispatch_profiled): stw 3,OFFSET_ppc32_CIA(31) /* Are we out of timeslice? If yes, defer to scheduler. */ - addic. 29,29,-1 + subi 29,29,1 + cmplwi 29,0 beq counter_is_zero /* try a fast lookup in the translation cache */ - /* r4=((r3<<2) & (VG_TT_FAST_MASK<<2)) */ - rlwinm 4,3, 2, 32-2-VG_TT_FAST_BITS, 31-2 + /* r4 = VG_TT_FAST_HASH(addr) * sizeof(ULong) + = ((r3 >>u 2) & VG_TT_FAST_MASK) << 2 */ + rlwinm 4,3, 0, 32-2-VG_TT_FAST_BITS, 31-2 addis 5,4,VG_(tt_fast)@ha lwz 5,VG_(tt_fast)@l(5) lwz 6,4(5) /* big-endian, so comparing 2nd 32bit word */ diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c index 1e131199f3..d922e14364 100644 --- a/coregrind/m_scheduler/scheduler.c +++ b/coregrind/m_scheduler/scheduler.c @@ -331,8 +331,8 @@ static void block_signals(ThreadId tid) VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL); } -/* Use libc setjmp/longjmp. longjmp must not restore signal mask - state, but does need to pass "val" through. */ +/* Use gcc's built-in setjmp/longjmp. longjmp must not restore signal + mask state, but does need to pass "val" through. */ #define SCHEDSETJMP(tid, jumped, stmt) \ do { \ ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid); \ @@ -343,7 +343,8 @@ static void block_signals(ThreadId tid) _qq_tst->sched_jmpbuf_valid = True; \ stmt; \ } else if (VG_(clo_trace_sched)) \ - VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%d\n", __LINE__, tid, jumped); \ + VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%d\n", \ + __LINE__, tid, jumped); \ vg_assert(_qq_tst->sched_jmpbuf_valid); \ _qq_tst->sched_jmpbuf_valid = False; \ } while(0) @@ -370,7 +371,6 @@ UInt run_thread_for_a_while ( ThreadId tid ) /* Paranoia */ vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(VG_(is_valid_tid)(tid)); vg_assert(VG_(is_running_thread)(tid)); vg_assert(!VG_(is_exiting)(tid)); @@ -408,11 +408,9 @@ UInt run_thread_for_a_while ( ThreadId tid ) This should be abstractified and lifted out. */ - { Int i; - /* Clear any existing reservation. Be paranoid and clear them all. */ - for (i = 0; i < VG_N_THREADS; i++) - VG_(threads)[i].arch.vex.guest_RESVN = 0; - } + /* Clear any existing reservation that this thread might have made + last time it was running. */ + VG_(threads)[tid].arch.vex.guest_RESVN = 0; /* ppc guest_state vector regs must be 16byte aligned for loads/stores */ vg_assert(VG_IS_16_ALIGNED(VG_(threads)[tid].arch.vex.guest_VR0)); @@ -422,7 +420,8 @@ UInt run_thread_for_a_while ( ThreadId tid ) /* there should be no undealt-with signals */ //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0); - //VG_(printf)("running EIP = %p ESP=%p\n", VG_(threads)[tid].arch.m_eip, VG_(threads)[tid].arch.m_esp); + //VG_(printf)("running EIP = %p ESP=%p\n", + //VG_(threads)[tid].arch.m_eip, VG_(threads)[tid].arch.m_esp); vg_assert(VG_(my_fault)); VG_(my_fault) = False; diff --git a/coregrind/m_transtab.c b/coregrind/m_transtab.c index 31705a7674..f04a210401 100644 --- a/coregrind/m_transtab.c +++ b/coregrind/m_transtab.c @@ -606,7 +606,7 @@ static inline UInt HASH_TT ( Addr64 key ) static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count ) { - UInt cno = ((UInt)key) & VG_TT_FAST_MASK; + UInt cno = (UInt)VG_TT_FAST_HASH(key); VG_(tt_fast)[cno] = tce; VG_(tt_fastN)[cno] = count; n_fast_updates++; diff --git a/coregrind/pub_core_transtab_asm.h b/coregrind/pub_core_transtab_asm.h index 6041dcda39..24626151ae 100644 --- a/coregrind/pub_core_transtab_asm.h +++ b/coregrind/pub_core_transtab_asm.h @@ -31,11 +31,31 @@ #ifndef __PUB_CORE_TRANSTAB_ASM_H #define __PUB_CORE_TRANSTAB_ASM_H -/* Constants for the fast translation lookup cache. */ +/* Constants for the fast translation lookup cache. It is a direct + mapped cache, with 2^VG_TT_FAST_BITS entries. + + On x86/amd64, the cache index is computed as + 'address[VG_TT_FAST_BITS-1 : 0]'. + + On ppc32/ppc64, the bottom two bits of instruction addresses are + zero, which means that function causes only 1/4 of the entries to + ever be used. So instead the function is '(address >>u + 2)[VG_TT_FAST_BITS-1 : 0]' on those targets. */ + #define VG_TT_FAST_BITS 15 #define VG_TT_FAST_SIZE (1 << VG_TT_FAST_BITS) #define VG_TT_FAST_MASK ((VG_TT_FAST_SIZE) - 1) +/* This macro isn't usable in asm land; nevertheless this seems + like a good place to put it. */ +#if defined(VGA_x86) || defined(VGA_amd64) +# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) ) & VG_TT_FAST_MASK) +#elif defined(VGA_ppc32) || defined(VGA_ppc64) +# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> 2) & VG_TT_FAST_MASK) +#else +# error "VG_TT_FAST_HASH: unknown platform" +#endif + #endif // __PUB_CORE_TRANSTAB_ASM_H /*--------------------------------------------------------------------*/