From: Julian Seward Date: Fri, 20 Apr 2012 02:19:35 +0000 (+0000) Subject: Fill in some more bits to do with t-chaining for ppc64 X-Git-Tag: svn/VALGRIND_3_8_0~350^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=86dc56291e5600fb7d8b9e7b126cce6698ac25ec;p=thirdparty%2Fvalgrind.git Fill in some more bits to do with t-chaining for ppc64 (still doesn't work) (Valgrind side) git-svn-id: svn://svn.valgrind.org/valgrind/branches/TCHAIN@12513 --- diff --git a/coregrind/m_dispatch/dispatch-ppc32-linux.S b/coregrind/m_dispatch/dispatch-ppc32-linux.S index 61c7bab502..4a2c5d3b87 100644 --- a/coregrind/m_dispatch/dispatch-ppc32-linux.S +++ b/coregrind/m_dispatch/dispatch-ppc32-linux.S @@ -62,7 +62,7 @@ VG_(disp_run_translations): /* r4 holds guest_state */ /* r5 holds host_addr */ - /* ----- entry point to VG_(run_innerloop) ----- */ + /* ----- entry point to VG_(disp_run_translations) ----- */ /* For Linux/ppc32 we need the SysV ABI, which uses LR->4(parent_sp), CR->anywhere. (The AIX ABI, used on Darwin, @@ -104,7 +104,7 @@ VG_(disp_run_translations): stfd 14,352(1) LafterFP1: - /* General reg save area : 72 bytes */ + /* General reg save area : 76 bytes */ stw 31,348(1) stw 30,344(1) stw 29,340(1) @@ -289,8 +289,8 @@ LafterFP8: vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ bt 24,invariant_violation /* branch if all_equal */ #endif -LafterVMX8: +LafterVMX8: /* otherwise we're OK */ b remove_frame @@ -410,7 +410,7 @@ VG_(disp_cp_chain_me_to_slowEP): handing the caller the pair (Chain_me_S, RA) */ li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP mflr 7 - /* 8 = imm32 r30, disp_cp_chain_me_to_slowEP + /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_slowEP 4 = mtctr r30 4 = btctr */ @@ -426,7 +426,7 @@ VG_(disp_cp_chain_me_to_fastEP): handing the caller the pair (Chain_me_S, RA) */ li 6, VG_TRC_CHAIN_ME_TO_FAST_EP mflr 7 - /* 8 = imm32 r30, disp_cp_chain_me_to_fastEP + /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_fastEP 4 = mtctr r30 4 = btctr */ diff --git a/coregrind/m_dispatch/dispatch-ppc64-linux.S b/coregrind/m_dispatch/dispatch-ppc64-linux.S index 4c08a7e67e..4068d2c5fb 100644 --- a/coregrind/m_dispatch/dispatch-ppc64-linux.S +++ b/coregrind/m_dispatch/dispatch-ppc64-linux.S @@ -39,57 +39,61 @@ /* References to globals via the TOC */ /* - .globl vgPlain_tt_fast + .globl vgPlain_tt_fast .lcomm vgPlain_tt_fast,4,4 .type vgPlain_tt_fast, @object */ - .section ".toc","aw" +.section ".toc","aw" .tocent__vgPlain_tt_fast: .tc vgPlain_tt_fast[TC],vgPlain_tt_fast -.tocent__vgPlain_tt_fastN: - .tc vgPlain_tt_fastN[TC],vgPlain_tt_fastN -.tocent__vgPlain_dispatch_ctr: - .tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr +.tocent__vgPlain_stats__n_xindirs: + .tc vgPlain_stats__n_xindirs[TC],vgPlain_stats__n_xindirs +.tocent__vgPlain_stats__n_xindir_misses: + .tc vgPlain_stats__n_xindir_misses[TC],vgPlain_stats__n_xindir_misses .tocent__vgPlain_machine_ppc64_has_VMX: .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX /*------------------------------------------------------------*/ /*--- ---*/ -/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ -/*--- run all translations except no-redir ones. ---*/ +/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ +/*--- used to run all translations, ---*/ +/*--- including no-redir ones. ---*/ /*--- ---*/ /*------------------------------------------------------------*/ /*----------------------------------------------------*/ -/*--- Preamble (set everything up) ---*/ +/*--- Entry and preamble (set everything up) ---*/ /*----------------------------------------------------*/ /* signature: -UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling ); +void VG_(disp_run_translations)( UWord* two_words, + void* guest_state, + Addr host_addr ); */ .section ".text" .align 2 -.globl VG_(run_innerloop) +.globl VG_(disp_run_translations) .section ".opd","aw" .align 3 -VG_(run_innerloop): -.quad .VG_(run_innerloop),.TOC.@tocbase,0 +VG_(disp_run_translations): +.quad .VG_(disp_run_translations),.TOC.@tocbase,0 .previous -.type .VG_(run_innerloop),@function -.globl .VG_(run_innerloop) -.VG_(run_innerloop): - /* r3 holds guest_state */ - /* r4 holds do_profiling */ - - /* ----- entry point to VG_(run_innerloop) ----- */ +.type .VG_(disp_run_translations),@function +.globl .VG_(disp_run_translations) +.VG_(disp_run_translations): + /* r3 holds two_words */ + /* r4 holds guest_state */ + /* r5 holds host_addr */ + + /* ----- entry point to VG_(disp_run_translations) ----- */ /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */ /* Save lr, cr */ - mflr 0 - std 0,16(1) - mfcr 0 - std 0,8(1) + mflr 6 + std 6,16(1) + mfcr 6 + std 6,8(1) /* New stack frame */ stdu 1,-624(1) /* sp should maintain 16-byte alignment */ @@ -116,7 +120,7 @@ VG_(run_innerloop): stfd 15,488(1) stfd 14,480(1) - /* General reg save area : 144 bytes */ + /* General reg save area : 152 bytes */ std 31,472(1) std 30,464(1) std 29,456(1) @@ -135,58 +139,56 @@ VG_(run_innerloop): std 16,352(1) std 15,344(1) std 14,336(1) - /* Probably not necessary to save r13 (thread-specific ptr), - as VEX stays clear of it... but what the hey. */ std 13,328(1) + std 3,104(1) /* save two_words for later */ /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. The Linux kernel might not actually use VRSAVE for its intended purpose, but it should be harmless to preserve anyway. */ - /* r3, r4 are live here, so use r5 */ - ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) - ld 5,0(5) - cmpldi 5,0 + /* r3, r4, r5 are live here, so use r6 */ + ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) + ld 6,0(6) + cmpldi 6,0 beq .LafterVMX1 /* VRSAVE save word : 32 bytes */ - mfspr 5,256 /* vrsave reg is spr number 256 */ - stw 5,324(1) + mfspr 6,256 /* vrsave reg is spr number 256 */ + stw 6,324(1) /* Alignment padding : 4 bytes */ /* Vector reg save area (quadword aligned) : 192 bytes */ - li 5,304 - stvx 31,5,1 - li 5,288 - stvx 30,5,1 - li 5,272 - stvx 29,5,1 - li 5,256 - stvx 28,5,1 - li 5,240 - stvx 27,5,1 - li 5,224 - stvx 26,5,1 - li 5,208 - stvx 25,5,1 - li 5,192 - stvx 24,5,1 - li 5,176 - stvx 23,5,1 - li 5,160 - stvx 22,5,1 - li 5,144 - stvx 21,5,1 - li 5,128 - stvx 20,5,1 + li 6,304 + stvx 31,6,1 + li 6,288 + stvx 30,6,1 + li 6,272 + stvx 29,6,1 + li 6,256 + stvx 28,6,1 + li 6,240 + stvx 27,6,1 + li 6,224 + stvx 26,6,1 + li 6,208 + stvx 25,6,1 + li 6,192 + stvx 24,6,1 + li 6,176 + stvx 23,6,1 + li 6,160 + stvx 22,6,1 + li 6,144 + stvx 21,6,1 + li 6,128 + stvx 20,6,1 .LafterVMX1: /* Local variable space... */ - /* r3 holds guest_state */ - /* r4 holds do_profiling */ - mr 31,3 - std 3,104(1) /* spill orig guest_state ptr */ + /* r3 holds two_words */ + /* r4 holds guest_state */ + /* r5 holds host_addr */ /* 96(sp) used later to check FPSCR[RM] */ /* 88(sp) used later to load fpscr with zero */ @@ -201,13 +203,6 @@ VG_(run_innerloop): 0(sp) : back-chain */ -// CAB TODO: Use a caller-saved reg for orig guest_state ptr -// - rem to set non-allocateable in isel.c - - /* hold dispatch_ctr (=32bit value) in r29 */ - ld 29,.tocent__vgPlain_dispatch_ctr@toc(2) - lwz 29,0(29) /* 32-bit zero-extending load */ - /* set host FPU control word to the default mode expected by VEX-generated code. See comments in libvex.h for more info. */ @@ -215,16 +210,16 @@ VG_(run_innerloop): fsub 3,3,3 is not a reliable way to do this, since if f3 holds a NaN or similar then we don't necessarily wind up with zero. */ - li 5,0 - stw 5,88(1) + li 6,0 + stw 6,88(1) lfs 3,88(1) mtfsf 0xFF,3 /* fpscr = lo32 of f3 */ /* set host AltiVec control word to the default mode expected by VEX-generated code. */ - ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) - ld 5,0(5) - cmpldi 5,0 + ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) + ld 6,0(6) + cmpldi 6,0 beq .LafterVMX2 vspltisw 3,0x0 /* generate zero */ @@ -234,196 +229,34 @@ VG_(run_innerloop): /* make a stack frame for the code we are calling */ stdu 1,-48(1) - /* fetch %CIA into r3 */ - ld 3,OFFSET_ppc64_CIA(31) - - /* fall into main loop (the right one) */ - /* r4 = do_profiling. It's probably trashed after here, - but that's OK: we don't need it after here. */ - cmplwi 4,0 - beq .VG_(run_innerloop__dispatch_unprofiled) - b .VG_(run_innerloop__dispatch_profiled) - /*NOTREACHED*/ - - -/*----------------------------------------------------*/ -/*--- NO-PROFILING (standard) dispatcher ---*/ -/*----------------------------------------------------*/ - - .section ".text" - .align 2 - .globl VG_(run_innerloop__dispatch_unprofiled) - .section ".opd","aw" - .align 3 -VG_(run_innerloop__dispatch_unprofiled): - .quad .VG_(run_innerloop__dispatch_unprofiled),.TOC.@tocbase,0 - .previous - .type .VG_(run_innerloop__dispatch_unprofiled),@function - .globl .VG_(run_innerloop__dispatch_unprofiled) -.VG_(run_innerloop__dispatch_unprofiled): - /* At entry: Live regs: - r1 (=sp) - r2 (toc pointer) - r3 (=CIA = next guest address) - r29 (=dispatch_ctr) - r31 (=guest_state) - Stack state: - 144(r1) (=var space for FPSCR[RM]) - */ - /* Has the guest state pointer been messed with? If yes, exit. - Also set up & VG_(tt_fast) early in an attempt at better - scheduling. */ - ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ - rldicl. 0,31,0,63 - bne .gsp_changed - - /* save the jump address in the guest state */ - std 3,OFFSET_ppc64_CIA(31) - - /* Are we out of timeslice? If yes, defer to scheduler. */ - subi 29,29,1 - cmpldi 29,0 - beq .counter_is_zero - - /* try a fast lookup in the translation cache */ - /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) - = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ - rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ - sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ - add 5,5,4 /* & VG_(tt_fast)[entry#] */ - ld 6,0(5) /* .guest */ - ld 7,8(5) /* .host */ - cmpd 3,6 - bne .fast_lookup_failed - - /* Found a match. Call .host. */ - mtctr 7 - bctrl - - /* On return from guest code: - r3 holds destination (original) address. - r31 may be unchanged (guest_state), or may indicate further - details of the control transfer requested to *r3. - */ - /* start over */ - b .VG_(run_innerloop__dispatch_unprofiled) - /*NOTREACHED*/ - .size .VG_(run_innerloop), .-.VG_(run_innerloop) - - -/*----------------------------------------------------*/ -/*--- PROFILING dispatcher (can be much slower) ---*/ -/*----------------------------------------------------*/ - - .section ".text" - .align 2 - .globl VG_(run_innerloop__dispatch_profiled) - .section ".opd","aw" - .align 3 -VG_(run_innerloop__dispatch_profiled): - .quad .VG_(run_innerloop__dispatch_profiled),.TOC.@tocbase,0 - .previous - .type .VG_(run_innerloop__dispatch_profiled),@function - .globl .VG_(run_innerloop__dispatch_profiled) -.VG_(run_innerloop__dispatch_profiled): - /* At entry: Live regs: - r1 (=sp) - r2 (toc pointer) - r3 (=CIA = next guest address) - r29 (=dispatch_ctr) - r31 (=guest_state) - Stack state: - 144(r1) (=var space for FPSCR[RM]) - */ - /* Has the guest state pointer been messed with? If yes, exit. - Also set up & VG_(tt_fast) early in an attempt at better - scheduling. */ - ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ - rldicl. 0,31,0,63 - bne .gsp_changed - - /* save the jump address in the guest state */ - std 3,OFFSET_ppc64_CIA(31) + /* Set up the guest state ptr */ + mr 31,4 /* r31 (generated code gsp) = r4 */ - /* Are we out of timeslice? If yes, defer to scheduler. */ - subi 29,29,1 - cmpldi 29,0 - beq .counter_is_zero - - /* try a fast lookup in the translation cache */ - /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) - = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ - rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ - sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ - add 5,5,4 /* & VG_(tt_fast)[entry#] */ - ld 6,0(5) /* .guest */ - ld 7,8(5) /* .host */ - cmpd 3,6 - bne .fast_lookup_failed - - /* increment bb profile counter VG_(tt_fastN)[x] (=32bit val) */ - ld 9, .tocent__vgPlain_tt_fastN@toc(2) - srdi 4, 4,1 /* entry# * sizeof(UInt*) */ - ldx 9, 9,4 /* r7 = VG_(tt_fastN)[VG_TT_HASH(addr)] */ - lwz 6, 0(9) /* *(UInt*)r7 ++ */ - addi 6, 6,1 - stw 6, 0(9) - - /* Found a match. Call .host. */ - mtctr 7 - bctrl - - /* On return from guest code: - r3 holds destination (original) address. - r31 may be unchanged (guest_state), or may indicate further - details of the control transfer requested to *r3. - */ - /* start over */ - b .VG_(run_innerloop__dispatch_profiled) + /* and jump into the code cache. Chained translations in + the code cache run, until for whatever reason, they can't + continue. When that happens, the translation in question + will jump (or call) to one of the continuation points + VG_(cp_...) below. */ + mtctr 5 + bctr /*NOTREACHED*/ - .size .VG_(run_a_noredir_translation), .-.VG_(run_a_noredir_translation) - /*----------------------------------------------------*/ -/*--- exit points ---*/ +/*--- Postamble and exit. ---*/ /*----------------------------------------------------*/ -.gsp_changed: - /* Someone messed with the gsp (in r31). Have to - defer to scheduler to resolve this. dispatch ctr - is not yet decremented, so no need to increment. */ - /* %CIA is NOT up to date here. First, need to write - %r3 back to %CIA, but without trashing %r31 since - that holds the value we want to return to the scheduler. - Hence use %r5 transiently for the guest state pointer. */ - ld 5,152(1) /* original guest_state ptr */ - std 3,OFFSET_ppc64_CIA(5) - mr 3,31 /* r3 = new gsp value */ - b .run_innerloop_exit - /*NOTREACHED*/ - -.counter_is_zero: - /* %CIA is up to date */ - /* back out decrement of the dispatch counter */ - addi 29,29,1 - li 3,VG_TRC_INNER_COUNTERZERO - b .run_innerloop_exit - -.fast_lookup_failed: - /* %CIA is up to date */ - /* back out decrement of the dispatch counter */ - addi 29,29,1 - li 3,VG_TRC_INNER_FASTMISS - b .run_innerloop_exit - - +.postamble: + /* At this point, r6 and r7 contain two + words to be returned to the caller. r6 + holds a TRC value, and r7 optionally may + hold another word (for CHAIN_ME exits, the + address of the place to patch.) */ -/* All exits from the dispatcher go through here. - r3 holds the return value. -*/ -.run_innerloop_exit: /* We're leaving. Check that nobody messed with - VSCR or FPSCR. */ + VSCR or FPSCR in ways we don't expect. */ + /* Using r11 - value used again further on, so don't trash! */ + ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) + ld 11,0(11) /* Set fpscr back to a known state, since vex-generated code may have messed with fpscr[rm]. */ @@ -434,10 +267,7 @@ VG_(run_innerloop__dispatch_profiled): addi 1,1,16 mtfsf 0xFF,3 /* fpscr = f3 */ - /* Using r11 - value used again further on, so don't trash! */ - ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) - ld 11,0(11) - cmpldi 11,0 + cmpldi 11,0 /* Do we have altivec? */ beq .LafterVMX8 /* Check VSCR[NJ] == 1 */ @@ -451,31 +281,18 @@ VG_(run_innerloop__dispatch_profiled): vspltw 7,7,0x3 /* flags-word to all lanes */ vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ bt 24,.invariant_violation /* branch if all_equal */ -.LafterVMX8: +.LafterVMX8: /* otherwise we're OK */ - b .run_innerloop_exit_REALLY - + b .remove_frame .invariant_violation: - li 3,VG_TRC_INVARIANT_FAILED - b .run_innerloop_exit_REALLY - -.run_innerloop_exit_REALLY: - /* r3 holds VG_TRC_* value to return */ - - /* Return to parent stack */ - addi 1,1,48 - - /* Write ctr to VG_(dispatch_ctr) (=32bit value) */ - ld 5,.tocent__vgPlain_dispatch_ctr@toc(2) - stw 29,0(5) + li 6,VG_TRC_INVARIANT_FAILED + li 7,0 + /* fall through */ - /* Restore cr */ - lwz 0,44(1) - mtcr 0 - - /* Restore callee-saved registers... */ +.remove_frame: + /* Restore FP regs */ /* Floating-point regs */ lfd 31,616(1) @@ -497,31 +314,11 @@ VG_(run_innerloop__dispatch_profiled): lfd 15,488(1) lfd 14,480(1) - /* General regs */ - ld 31,472(1) - ld 30,464(1) - ld 29,456(1) - ld 28,448(1) - ld 27,440(1) - ld 26,432(1) - ld 25,424(1) - ld 24,416(1) - ld 23,408(1) - ld 22,400(1) - ld 21,392(1) - ld 20,384(1) - ld 19,376(1) - ld 18,368(1) - ld 17,360(1) - ld 16,352(1) - ld 15,344(1) - ld 14,336(1) - ld 13,328(1) - - /* r11 already holds VG_(machine_ppc64_has_VMX) value */ - cmpldi 11,0 + /* r11 already holds VG_(machine_ppc32_has_VMX) value */ + cmplwi 11,0 beq .LafterVMX9 + /* Restore Altivec regs */ /* VRSAVE */ lwz 4,324(1) mfspr 4,256 /* VRSAVE reg is spr number 256 */ @@ -553,7 +350,33 @@ VG_(run_innerloop__dispatch_profiled): lvx 20,4,1 .LafterVMX9: - /* reset cr, lr, sp */ + /* restore int regs, including importantly r3 (two_words) */ + addi 1,1,48 + ld 31,472(1) + ld 30,464(1) + ld 29,456(1) + ld 28,448(1) + ld 27,440(1) + ld 26,432(1) + ld 25,424(1) + ld 24,416(1) + ld 23,408(1) + ld 22,400(1) + ld 21,392(1) + ld 20,384(1) + ld 19,376(1) + ld 18,368(1) + ld 17,360(1) + ld 16,352(1) + ld 15,344(1) + ld 14,336(1) + ld 13,328(1) + std 3,104(1) + /* Stash return values */ + std 6,0(3) + std 7,8(3) + + /* restore lr & sp, and leave */ ld 0,632(1) /* stack_size + 8 */ mtcr 0 ld 0,640(1) /* stack_size + 16 */ @@ -562,94 +385,146 @@ VG_(run_innerloop__dispatch_profiled): blr -/*------------------------------------------------------------*/ -/*--- ---*/ -/*--- A special dispatcher, for running no-redir ---*/ -/*--- translations. Just runs the given translation once. ---*/ -/*--- ---*/ -/*------------------------------------------------------------*/ +/*----------------------------------------------------*/ +/*--- Continuation points ---*/ +/*----------------------------------------------------*/ -/* signature: -void VG_(run_a_noredir_translation) ( UWord* argblock ); -*/ +/* ------ Chain me to slow entry point ------ */ + .section ".text" + .align 2 + .globl VG_(disp_cp_chain_me_to_slowEP) + .section ".opd","aw" + .align 3 +VG_(disp_cp_chain_me_to_slowEP): + .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0 + .previous + .type .VG_(disp_cp_chain_me_to_slowEP),@function + .globl .VG_(disp_cp_chain_me_to_slowEP) +.VG_(disp_cp_chain_me_to_slowEP): + /* We got called. The return address indicates + where the patching needs to happen. Collect + the return address and, exit back to C land, + handing the caller the pair (Chain_me_S, RA) */ + li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP + mflr 7 + /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP + 4 = mtctr r30 + 4 = btctr + */ + subi 7,7,20+4+4 + b .postamble + +/* ------ Chain me to fast entry point ------ */ + .section ".text" + .align 2 + .globl VG_(disp_cp_chain_me_to_fastEP) + .section ".opd","aw" + .align 3 +VG_(disp_cp_chain_me_to_fastEP): + .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0 + .previous + .type .VG_(disp_cp_chain_me_to_fastEP),@function + .globl .VG_(disp_cp_chain_me_to_fastEP) +.VG_(disp_cp_chain_me_to_fastEP): + /* We got called. The return address indicates + where the patching needs to happen. Collect + the return address and, exit back to C land, + handing the caller the pair (Chain_me_S, RA) */ + li 6, VG_TRC_CHAIN_ME_TO_FAST_EP + mflr 7 + /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP + 4 = mtctr r30 + 4 = btctr + */ + subi 7,7,20+4+4 + b .postamble + +/* ------ Indirect but boring jump ------ */ + .section ".text" + .align 2 + .globl VG_(disp_cp_xindir) + .section ".opd","aw" + .align 3 +VG_(disp_cp_xindir): + .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0 + .previous + .type .VG_(disp_cp_xindir),@function + .globl .VG_(disp_cp_xindir) +.VG_(disp_cp_xindir): + /* Where are we going? */ + ld 3,OFFSET_ppc64_CIA(31) -/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args - and 2 to carry results: - 0: input: ptr to translation - 1: input: ptr to guest state - 2: output: next guest PC - 3: output: guest state pointer afterwards (== thread return code) -*/ -.section ".text" -.align 2 -.globl VG_(run_a_noredir_translation) -.section ".opd","aw" -.align 3 -VG_(run_a_noredir_translation): -.quad .VG_(run_a_noredir_translation),.TOC.@tocbase,0 -.previous -.type .VG_(run_a_noredir_translation),@function -.globl .VG_(run_a_noredir_translation) -.VG_(run_a_noredir_translation): - /* save callee-save int regs, & lr */ - stdu 1,-512(1) - std 14,256(1) - std 15,264(1) - std 16,272(1) - std 17,280(1) - std 18,288(1) - std 19,296(1) - std 20,304(1) - std 21,312(1) - std 22,320(1) - std 23,328(1) - std 24,336(1) - std 25,344(1) - std 26,352(1) - std 27,360(1) - std 28,368(1) - std 29,376(1) - std 30,384(1) - std 31,392(1) - mflr 31 - std 31,400(1) - std 2,408(1) /* also preserve R2, just in case .. */ - - std 3,416(1) - ld 31,8(3) - ld 30,0(3) - mtlr 30 - blrl - - ld 4,416(1) - std 3, 16(4) - std 31,24(4) - - ld 14,256(1) - ld 15,264(1) - ld 16,272(1) - ld 17,280(1) - ld 18,288(1) - ld 19,296(1) - ld 20,304(1) - ld 21,312(1) - ld 22,320(1) - ld 23,328(1) - ld 24,336(1) - ld 25,344(1) - ld 26,352(1) - ld 27,360(1) - ld 28,368(1) - ld 29,376(1) - ld 30,384(1) - ld 31,400(1) - mtlr 31 - ld 31,392(1) - ld 2,408(1) /* also preserve R2, just in case .. */ - - addi 1,1,512 - blr + /* stats only */ + ld 5, .tocent__vgPlain_stats__n_xindirs@toc(2) + ld 6,0(5) + addi 6,6,1 + std 6,0(5) + + /* r5 = &VG_(tt_fast) */ + ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ + /* try a fast lookup in the translation cache */ + /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) + = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ + rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ + sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ + add 5,5,4 /* & VG_(tt_fast)[entry#] */ + ld 6,0(5) /* .guest */ + ld 7,8(5) /* .host */ + cmpd 3,6 + bne .fast_lookup_failed + + /* Found a match. Jump to .host. */ + mtctr 7 + bctr + +.fast_lookup_failed: + /* stats only */ + ld 5, .tocent__vgPlain_stats__n_xindir_misses@toc(2) + ld 6,0(5) + addi 6,6,1 + std 6,0(5) + + li 6,VG_TRC_INNER_FASTMISS + li 7,0 + b .postamble + /*NOTREACHED*/ + +/* ------ Assisted jump ------ */ +.section ".text" + .align 2 + .globl VG_(disp_cp_xassisted) + .section ".opd","aw" + .align 3 +VG_(disp_cp_xassisted): + .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0 + .previous + .type .VG_(disp_cp_xassisted),@function + .globl .VG_(disp_cp_xassisted) +.VG_(disp_cp_xassisted): + /* r31 contains the TRC */ + mr 6,31 + li 7,0 + b .postamble + +/* ------ Event check failed ------ */ + .section ".text" + .align 2 + .globl VG_(disp_cp_evcheck_fail) + .section ".opd","aw" + .align 3 +VG_(disp_cp_evcheck_fail): + .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0 + .previous + .type .VG_(disp_cp_evcheck_fail),@function + .globl .VG_(disp_cp_evcheck_fail) +.VG_(disp_cp_evcheck_fail): + li 6,VG_TRC_INNER_COUNTERZERO + li 7,0 + b .postamble + + +.size .VG_(disp_run_translations), .-.VG_(disp_run_translations) /* Let the linker know we don't need an executable stack */ .section .note.GNU-stack,"",@progbits diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c index 09e290801a..6fc5f50fc4 100644 --- a/coregrind/m_scheduler/scheduler.c +++ b/coregrind/m_scheduler/scheduler.c @@ -871,8 +871,9 @@ void run_thread_for_a_while ( /*OUT*/HWord* two_words, //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0); /* Set up event counter stuff for the run. */ - tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP; - tst->arch.vex.host_EvC_FAILADDR = (HWord)&VG_(disp_cp_evcheck_fail); + tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP; + tst->arch.vex.host_EvC_FAILADDR + = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) ); if (0) { vki_sigset_t m; @@ -917,7 +918,7 @@ void run_thread_for_a_while ( /*OUT*/HWord* two_words, vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1); vg_assert(tst->arch.vex.host_EvC_FAILADDR - == (HWord)&VG_(disp_cp_evcheck_fail)); + == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) ); done_this_time = *dispatchCtrP - ((Int)tst->arch.vex.host_EvC_COUNTER + 1); diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c index 8c572fb307..af67d10c7c 100644 --- a/coregrind/m_translate.c +++ b/coregrind/m_translate.c @@ -906,6 +906,7 @@ static void gen_PUSH ( IRSB* bb, IRExpr* e ) Int offB_REDIR_SP = offsetof(VexGuestPPC64State,guest_REDIR_SP); Int offB_REDIR_STACK = offsetof(VexGuestPPC64State,guest_REDIR_STACK); Int offB_EMWARN = offsetof(VexGuestPPC64State,guest_EMWARN); + Int offB_CIA = offsetof(VexGuestPPC64State,guest_CIA); Bool is64 = True; IRType ty_Word = Ity_I64; IROp op_CmpNE = Iop_CmpNE64; @@ -919,6 +920,7 @@ static void gen_PUSH ( IRSB* bb, IRExpr* e ) Int offB_REDIR_SP = offsetof(VexGuestPPC32State,guest_REDIR_SP); Int offB_REDIR_STACK = offsetof(VexGuestPPC32State,guest_REDIR_STACK); Int offB_EMWARN = offsetof(VexGuestPPC32State,guest_EMWARN); + Int offB_CIA = offsetof(VexGuestPPC32State,guest_CIA); Bool is64 = False; IRType ty_Word = Ity_I32; IROp op_CmpNE = Iop_CmpNE32; @@ -970,7 +972,8 @@ static void gen_PUSH ( IRSB* bb, IRExpr* e ) mkU(0) ), Ijk_EmFail, - is64 ? IRConst_U64(0) : IRConst_U32(0) + is64 ? IRConst_U64(0) : IRConst_U32(0), + offB_CIA ) ); @@ -997,6 +1000,7 @@ static IRTemp gen_POP ( IRSB* bb ) Int offB_REDIR_SP = offsetof(VexGuestPPC64State,guest_REDIR_SP); Int offB_REDIR_STACK = offsetof(VexGuestPPC64State,guest_REDIR_STACK); Int offB_EMWARN = offsetof(VexGuestPPC64State,guest_EMWARN); + Int offB_CIA = offsetof(VexGuestPPC64State,guest_CIA); Bool is64 = True; IRType ty_Word = Ity_I64; IROp op_CmpNE = Iop_CmpNE64; @@ -1008,6 +1012,7 @@ static IRTemp gen_POP ( IRSB* bb ) Int offB_REDIR_SP = offsetof(VexGuestPPC32State,guest_REDIR_SP); Int offB_REDIR_STACK = offsetof(VexGuestPPC32State,guest_REDIR_STACK); Int offB_EMWARN = offsetof(VexGuestPPC32State,guest_EMWARN); + Int offB_CIA = offsetof(VexGuestPPC32State,guest_CIA); Bool is64 = False; IRType ty_Word = Ity_I32; IROp op_CmpNE = Iop_CmpNE32; @@ -1049,7 +1054,8 @@ static IRTemp gen_POP ( IRSB* bb ) mkU(0) ), Ijk_EmFail, - is64 ? IRConst_U64(0) : IRConst_U32(0) + is64 ? IRConst_U64(0) : IRConst_U32(0), + offB_CIA ) ); @@ -1514,57 +1520,20 @@ Bool VG_(translate) ( ThreadId tid, hassle, because we don't expect them to get used often. So don't bother. */ if (allow_redirection) { - vta.disp_cp_chain_me_to_slowEP = (void*) &VG_(disp_cp_chain_me_to_slowEP); - vta.disp_cp_chain_me_to_fastEP = (void*) &VG_(disp_cp_chain_me_to_fastEP); - vta.disp_cp_xindir = (void*) &VG_(disp_cp_xindir); + vta.disp_cp_chain_me_to_slowEP + = VG_(fnptr_to_fnentry)( &VG_(disp_cp_chain_me_to_slowEP) ); + vta.disp_cp_chain_me_to_fastEP + = VG_(fnptr_to_fnentry)( &VG_(disp_cp_chain_me_to_fastEP) ); + vta.disp_cp_xindir + = VG_(fnptr_to_fnentry)( &VG_(disp_cp_xindir) ); } else { vta.disp_cp_chain_me_to_slowEP = NULL; vta.disp_cp_chain_me_to_fastEP = NULL; vta.disp_cp_xindir = NULL; } - /* Thins doesn't involve chaining and so is always allowable. */ - vta.disp_cp_xassisted = (void*) &VG_(disp_cp_xassisted); - -#if 0 - // FIXME tidy this up and make profiling work again -# if defined(VGA_x86) || defined(VGA_amd64) - if (!allow_redirection) { - /* It's a no-redir translation. Will be run with the - nonstandard dispatcher VG_(run_a_noredir_translation) and so - needs a nonstandard return point. */ - vta.dispatch_assisted - = (void*) &VG_(run_a_noredir_translation__return_point); - vta.dispatch_unassisted - = vta.dispatch_assisted; - } - else - if (VG_(clo_profile_flags) > 0) { - /* normal translation; although we're profiling. */ - vta.dispatch_assisted - = (void*) &VG_(run_innerloop__dispatch_assisted_profiled); - vta.dispatch_unassisted - = (void*) &VG_(run_innerloop__dispatch_unassisted_profiled); - } - else { - /* normal translation and we're not profiling (the normal case) */ - vta.dispatch_assisted - = (void*) &VG_(run_innerloop__dispatch_assisted_unprofiled); - vta.dispatch_unassisted - = (void*) &VG_(run_innerloop__dispatch_unassisted_unprofiled); - } - -# elif defined(VGA_ppc32) || defined(VGA_ppc64) \ - || defined(VGA_arm) || defined(VGA_s390x) - /* See comment in libvex.h. This target uses a - return-to-link-register scheme to get back to the dispatcher, so - both fields are NULL. */ - vta.dispatch_assisted = NULL; - vta.dispatch_unassisted = NULL; - -# else -# error "Unknown arch" -# endif -#endif /* 0 */ + /* This doesn't involve chaining and so is always allowable. */ + vta.disp_cp_xassisted + = VG_(fnptr_to_fnentry)( &VG_(disp_cp_xassisted) ); /* Sheesh. Finally, actually _do_ the translation! */ tres = LibVEX_Translate ( &vta ); diff --git a/coregrind/m_transtab.c b/coregrind/m_transtab.c index a8e5eb512c..8ae48cbc66 100644 --- a/coregrind/m_transtab.c +++ b/coregrind/m_transtab.c @@ -805,8 +805,10 @@ static void unchain_one ( VexArch vex_arch, UChar* place_to_patch = ((HChar*)tte->tcptr) + ie->from_offs; UChar* disp_cp_chain_me - = ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) - : &VG_(disp_cp_chain_me_to_slowEP); + = VG_(fnptr_to_fnentry)( + ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) + : &VG_(disp_cp_chain_me_to_slowEP) + ); UChar* place_to_jump_to_EXPECTED = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr; diff --git a/coregrind/pub_core_dispatch.h b/coregrind/pub_core_dispatch.h index 5b61f876a0..efb5aebe26 100644 --- a/coregrind/pub_core_dispatch.h +++ b/coregrind/pub_core_dispatch.h @@ -66,13 +66,13 @@ void VG_(disp_run_translations)( HWord* two_words, /* We need to know addresses of the continuation-point (cp_) labels so we can tell VEX what they are. They will get baked into the code - VEX generates. The UChar is entirely mythical, but we need to + VEX generates. The type is entirely mythical, but we need to state _some_ type, so as to keep gcc happy. */ -UChar VG_(disp_cp_chain_me_to_slowEP); -UChar VG_(disp_cp_chain_me_to_fastEP); -UChar VG_(disp_cp_xindir); -UChar VG_(disp_cp_xassisted); -UChar VG_(disp_cp_evcheck_fail); +void VG_(disp_cp_chain_me_to_slowEP)(void); +void VG_(disp_cp_chain_me_to_fastEP)(void); +void VG_(disp_cp_xindir)(void); +void VG_(disp_cp_xassisted)(void); +void VG_(disp_cp_evcheck_fail)(void); #endif // __PUB_CORE_DISPATCH_H