From: Julian Seward Date: Fri, 25 Jan 2019 08:27:23 +0000 (+0100) Subject: Bug 402781 - Redo the cache used to process indirect branch targets. X-Git-Tag: VALGRIND_3_15_0~95 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f96d131ce24cb403cc7a43c19bb651dd25fbe122;p=thirdparty%2Fvalgrind.git Bug 402781 - Redo the cache used to process indirect branch targets. Implementation for x86-solaris and amd64-solaris. This completes the implementations for all targets. Note these two are untested because I don't have any way to test them. --- diff --git a/coregrind/m_dispatch/dispatch-amd64-solaris.S b/coregrind/m_dispatch/dispatch-amd64-solaris.S index 79bb512669..2cccf1ff9e 100644 --- a/coregrind/m_dispatch/dispatch-amd64-solaris.S +++ b/coregrind/m_dispatch/dispatch-amd64-solaris.S @@ -205,28 +205,89 @@ VG_(disp_cp_chain_me_to_fastEP): .global VG_(disp_cp_xindir) VG_(disp_cp_xindir): /* Where are we going? */ - movq OFFSET_amd64_RIP(%rbp), %rax + movq OFFSET_amd64_RIP(%rbp), %rax // "guest" /* stats only */ - addl $1, VG_(stats__n_xindirs_32) - - /* try a fast lookup in the translation cache */ - movabsq $VG_(tt_fast), %rcx - movq %rax, %rbx /* next guest addr */ - andq $VG_TT_FAST_MASK, %rbx /* entry# */ - shlq $4, %rbx /* entry# * sizeof(FastCacheEntry) */ - movq 0(%rcx,%rbx,1), %r10 /* .guest */ - movq 8(%rcx,%rbx,1), %r11 /* .host */ - cmpq %rax, %r10 - jnz fast_lookup_failed - - /* Found a match. Jump to .host. */ - jmp *%r11 - ud2 /* persuade insn decoders not to speculate past here */ - -fast_lookup_failed: + addl $1, VG_(stats__n_xIndirs_32) + + // LIVE: %rbp (guest state ptr), %rax (guest address to go to). + // We use 4 temporaries: + // %r9 (to point at the relevant FastCacheSet), + // %r10, %r11 and %r12 (scratch). + + /* Try a fast lookup in the translation cache. This is pretty much + a handcoded version of VG_(lookupInFastCache). */ + + // Compute %r9 = VG_TT_FAST_HASH(guest) + movq %rax, %r9 // guest + shrq $VG_TT_FAST_BITS, %r9 // (guest >> VG_TT_FAST_BITS) + xorq %rax, %r9 // (guest >> VG_TT_FAST_BITS) ^ guest + andq $VG_TT_FAST_MASK, %r9 // setNo + + // Compute %r9 = &VG_(tt_fast)[%r9] + shlq $VG_FAST_CACHE_SET_BITS, %r9 // setNo * sizeof(FastCacheSet) + movabsq $VG_(tt_fast), %r10 // &VG_(tt_fast)[0] + leaq (%r10, %r9), %r9 // &VG_(tt_fast)[setNo] + + // LIVE: %rbp (guest state ptr), %rax (guest addr), %r9 (cache set) + // try way 0 + cmpq %rax, FCS_g0(%r9) // cmp against .guest0 + jnz 1f + // hit at way 0 + jmp *FCS_h0(%r9) // goto .host0 + ud2 + +1: // try way 1 + cmpq %rax, FCS_g1(%r9) // cmp against .guest1 + jnz 2f + // hit at way 1; swap upwards + /* stats only */ + addl $1, VG_(stats__n_xIndir_hits1_32) + movq FCS_g0(%r9), %r10 // r10 = old .guest0 + movq FCS_h0(%r9), %r11 // r11 = old .host0 + movq FCS_h1(%r9), %r12 // r12 = old .host1 + movq %rax, FCS_g0(%r9) // new .guest0 = guest + movq %r12, FCS_h0(%r9) // new .host0 = old .host1 + movq %r10, FCS_g1(%r9) // new .guest1 = old .guest0 + movq %r11, FCS_h1(%r9) // new .host1 = old .host0 + jmp *%r12 // goto old .host1 a.k.a. new .host0 + ud2 + +2: // try way 2 + cmpq %rax, FCS_g2(%r9) // cmp against .guest2 + jnz 3f + // hit at way 2; swap upwards + /* stats only */ + addl $1, VG_(stats__n_xIndir_hits2_32) + movq FCS_g1(%r9), %r10 + movq FCS_h1(%r9), %r11 + movq FCS_h2(%r9), %r12 + movq %rax, FCS_g1(%r9) + movq %r12, FCS_h1(%r9) + movq %r10, FCS_g2(%r9) + movq %r11, FCS_h2(%r9) + jmp *%r12 + ud2 + +3: // try way 3 + cmpq %rax, FCS_g3(%r9) // cmp against .guest3 + jnz 4f + // hit at way 3; swap upwards + /* stats only */ + addl $1, VG_(stats__n_xIndir_hits3_32) + movq FCS_g2(%r9), %r10 + movq FCS_h2(%r9), %r11 + movq FCS_h3(%r9), %r12 + movq %rax, FCS_g2(%r9) + movq %r12, FCS_h2(%r9) + movq %r10, FCS_g3(%r9) + movq %r11, FCS_h3(%r9) + jmp *%r12 + ud2 + +4: // fast lookup failed /* stats only */ - addl $1, VG_(stats__n_xindir_misses_32) + addl $1, VG_(stats__n_xIndir_misses_32) movq $VG_TRC_INNER_FASTMISS, %rax movq $0, %rdx diff --git a/coregrind/m_dispatch/dispatch-x86-solaris.S b/coregrind/m_dispatch/dispatch-x86-solaris.S index aec5b3a876..c7d23f20f6 100644 --- a/coregrind/m_dispatch/dispatch-x86-solaris.S +++ b/coregrind/m_dispatch/dispatch-x86-solaris.S @@ -198,26 +198,88 @@ VG_(disp_cp_chain_me_to_fastEP): .global VG_(disp_cp_xindir) VG_(disp_cp_xindir): /* Where are we going? */ - movl OFFSET_x86_EIP(%ebp), %eax + movl OFFSET_x86_EIP(%ebp), %eax // "guest" /* stats only */ - addl $1, VG_(stats__n_xindirs_32) - - /* try a fast lookup in the translation cache */ - movl %eax, %ebx /* next guest addr */ - andl $VG_TT_FAST_MASK, %ebx /* entry# */ - movl 0+VG_(tt_fast)(,%ebx,8), %esi /* .guest */ - movl 4+VG_(tt_fast)(,%ebx,8), %edi /* .host */ - cmpl %eax, %esi - jnz fast_lookup_failed - - /* Found a match. Jump to .host. */ - jmp *%edi - ud2 /* persuade insn decoders not to speculate past here */ - -fast_lookup_failed: + addl $1, VG_(stats__n_xIndirs_32) + + // LIVE: %ebp (guest state ptr), %eax (guest address to go to). + // We use 4 temporaries: + // %esi (to point at the relevant FastCacheSet), + // %ebx, %ecx and %edx (scratch). + + /* Try a fast lookup in the translation cache. This is pretty much + a handcoded version of VG_(lookupInFastCache). */ + + // Compute %esi = VG_TT_FAST_HASH(guest) + movl %eax, %esi // guest + shrl $VG_TT_FAST_BITS, %esi // (guest >> VG_TT_FAST_BITS) + xorl %eax, %esi // (guest >> VG_TT_FAST_BITS) ^ guest + andl $VG_TT_FAST_MASK, %esi // setNo + + // Compute %esi = &VG_(tt_fast)[%esi] + shll $VG_FAST_CACHE_SET_BITS, %esi // setNo * sizeof(FastCacheSet) + leal VG_(tt_fast)(%esi), %esi // &VG_(tt_fast)[setNo] + + // LIVE: %ebp (guest state ptr), %eax (guest addr), %esi (cache set) + // try way 0 + cmpl %eax, FCS_g0(%esi) // cmp against .guest0 + jnz 1f + // hit at way 0 + jmp *FCS_h0(%esi) // goto .host0 + ud2 + +1: // try way 1 + cmpl %eax, FCS_g1(%esi) // cmp against .guest1 + jnz 2f + // hit at way 1; swap upwards + /* stats only */ + addl $1, VG_(stats__n_xIndir_hits1_32) + movl FCS_g0(%esi), %ebx // ebx = old .guest0 + movl FCS_h0(%esi), %ecx // ecx = old .host0 + movl FCS_h1(%esi), %edx // edx = old .host1 + movl %eax, FCS_g0(%esi) // new .guest0 = guest + movl %edx, FCS_h0(%esi) // new .host0 = old .host1 + movl %ebx, FCS_g1(%esi) // new .guest1 = old .guest0 + movl %ecx, FCS_h1(%esi) // new .host1 = old .host0 + jmp *%edx // goto old .host1 a.k.a. new .host0 + ud2 + +2: // try way 2 + cmpl %eax, FCS_g2(%esi) // cmp against .guest2 + jnz 3f + // hit at way 2; swap upwards + /* stats only */ + addl $1, VG_(stats__n_xIndir_hits2_32) + movl FCS_g1(%esi), %ebx + movl FCS_h1(%esi), %ecx + movl FCS_h2(%esi), %edx + movl %eax, FCS_g1(%esi) + movl %edx, FCS_h1(%esi) + movl %ebx, FCS_g2(%esi) + movl %ecx, FCS_h2(%esi) + jmp *%edx + ud2 + +3: // try way 3 + cmpl %eax, FCS_g3(%esi) // cmp against .guest3 + jnz 4f + // hit at way 3; swap upwards + /* stats only */ + addl $1, VG_(stats__n_xIndir_hits3_32) + movl FCS_g2(%esi), %ebx + movl FCS_h2(%esi), %ecx + movl FCS_h3(%esi), %edx + movl %eax, FCS_g2(%esi) + movl %edx, FCS_h2(%esi) + movl %ebx, FCS_g3(%esi) + movl %ecx, FCS_h3(%esi) + jmp *%edx + ud2 + +4: // fast lookup failed /* stats only */ - addl $1, VG_(stats__n_xindir_misses_32) + addl $1, VG_(stats__n_xIndir_misses_32) movl $VG_TRC_INNER_FASTMISS, %eax movl $0, %edx