perf/tinycc.
- run_thread_for_a_while: just clear this thread's reservation when
starting, not all of them.
- use a different fast-cache hashing function for ppc32/64 than for
x86/amd64. This allows the former to use all the fast-cache entries
rather than just 1/4 of them.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5441
stw 3,OFFSET_ppc32_CIA(31)
/* Are we out of timeslice? If yes, defer to scheduler. */
-// subic. 29,29,1
subi 29,29,1
cmplwi 29,0
beq counter_is_zero
/* try a fast lookup in the translation cache */
- /* r4=((r3<<2) & (VG_TT_FAST_MASK<<2)) */
- rlwinm 4,3, 2, 32-2-VG_TT_FAST_BITS, 31-2
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(ULong)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 2 */
+ rlwinm 4,3, 0, 32-2-VG_TT_FAST_BITS, 31-2
addis 5,4,VG_(tt_fast)@ha
lwz 5,VG_(tt_fast)@l(5)
lwz 6,4(5) /* big-endian, so comparing 2nd 32bit word */
stw 3,OFFSET_ppc32_CIA(31)
/* Are we out of timeslice? If yes, defer to scheduler. */
- addic. 29,29,-1
+ subi 29,29,1
+ cmplwi 29,0
beq counter_is_zero
/* try a fast lookup in the translation cache */
- /* r4=((r3<<2) & (VG_TT_FAST_MASK<<2)) */
- rlwinm 4,3, 2, 32-2-VG_TT_FAST_BITS, 31-2
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(ULong)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 2 */
+ rlwinm 4,3, 0, 32-2-VG_TT_FAST_BITS, 31-2
addis 5,4,VG_(tt_fast)@ha
lwz 5,VG_(tt_fast)@l(5)
lwz 6,4(5) /* big-endian, so comparing 2nd 32bit word */
VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
}
-/* Use libc setjmp/longjmp. longjmp must not restore signal mask
- state, but does need to pass "val" through. */
+/* Use gcc's built-in setjmp/longjmp. longjmp must not restore signal
+ mask state, but does need to pass "val" through. */
#define SCHEDSETJMP(tid, jumped, stmt) \
do { \
ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid); \
_qq_tst->sched_jmpbuf_valid = True; \
stmt; \
} else if (VG_(clo_trace_sched)) \
- VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%d\n", __LINE__, tid, jumped); \
+ VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%d\n", \
+ __LINE__, tid, jumped); \
vg_assert(_qq_tst->sched_jmpbuf_valid); \
_qq_tst->sched_jmpbuf_valid = False; \
} while(0)
/* Paranoia */
vg_assert(VG_(is_valid_tid)(tid));
- vg_assert(VG_(is_valid_tid)(tid));
vg_assert(VG_(is_running_thread)(tid));
vg_assert(!VG_(is_exiting)(tid));
This should be abstractified and lifted out.
*/
- { Int i;
- /* Clear any existing reservation. Be paranoid and clear them all. */
- for (i = 0; i < VG_N_THREADS; i++)
- VG_(threads)[i].arch.vex.guest_RESVN = 0;
- }
+ /* Clear any existing reservation that this thread might have made
+ last time it was running. */
+ VG_(threads)[tid].arch.vex.guest_RESVN = 0;
/* ppc guest_state vector regs must be 16byte aligned for loads/stores */
vg_assert(VG_IS_16_ALIGNED(VG_(threads)[tid].arch.vex.guest_VR0));
/* there should be no undealt-with signals */
//vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
- //VG_(printf)("running EIP = %p ESP=%p\n", VG_(threads)[tid].arch.m_eip, VG_(threads)[tid].arch.m_esp);
+ //VG_(printf)("running EIP = %p ESP=%p\n",
+ //VG_(threads)[tid].arch.m_eip, VG_(threads)[tid].arch.m_esp);
vg_assert(VG_(my_fault));
VG_(my_fault) = False;
static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count )
{
- UInt cno = ((UInt)key) & VG_TT_FAST_MASK;
+ UInt cno = (UInt)VG_TT_FAST_HASH(key);
VG_(tt_fast)[cno] = tce;
VG_(tt_fastN)[cno] = count;
n_fast_updates++;
#ifndef __PUB_CORE_TRANSTAB_ASM_H
#define __PUB_CORE_TRANSTAB_ASM_H
-/* Constants for the fast translation lookup cache. */
+/* Constants for the fast translation lookup cache. It is a direct
+ mapped cache, with 2^VG_TT_FAST_BITS entries.
+
+ On x86/amd64, the cache index is computed as
+ 'address[VG_TT_FAST_BITS-1 : 0]'.
+
+ On ppc32/ppc64, the bottom two bits of instruction addresses are
+ zero, which means that function causes only 1/4 of the entries to
+ ever be used. So instead the function is '(address >>u
+ 2)[VG_TT_FAST_BITS-1 : 0]' on those targets. */
+
#define VG_TT_FAST_BITS 15
#define VG_TT_FAST_SIZE (1 << VG_TT_FAST_BITS)
#define VG_TT_FAST_MASK ((VG_TT_FAST_SIZE) - 1)
+/* This macro isn't usable in asm land; nevertheless this seems
+ like a good place to put it. */
+#if defined(VGA_x86) || defined(VGA_amd64)
+# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) ) & VG_TT_FAST_MASK)
+#elif defined(VGA_ppc32) || defined(VGA_ppc64)
+# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> 2) & VG_TT_FAST_MASK)
+#else
+# error "VG_TT_FAST_HASH: unknown platform"
+#endif
+
#endif // __PUB_CORE_TRANSTAB_ASM_H
/*--------------------------------------------------------------------*/