/*------------------------------------------------------------*/
/*--- ---*/
-/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
-/*--- run all translations except no-redir ones. ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/*----------------------------------------------------*/
-/*--- Preamble (set everything up) ---*/
+/*--- Entry and preamble (set everything up) ---*/
/*----------------------------------------------------*/
/* signature:
-UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+void VG_(disp_run_translations)( UWord* two_words,
+ void* guest_state,
+ Addr host_addr );
*/
.text
-.globl VG_(run_innerloop)
-.type VG_(run_innerloop), @function
-VG_(run_innerloop):
- /* r3 holds guest_state */
- /* r4 holds do_profiling */
+.globl VG_(disp_run_translations)
+.type VG_(disp_run_translations), @function
+VG_(disp_run_translations):
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
/* ----- entry point to VG_(run_innerloop) ----- */
/* For Linux/ppc32 we need the SysV ABI, which uses
*/
/* Save lr */
- mflr 0
- stw 0,4(1)
+ mflr 6
+ stw 6,4(1)
/* New stack frame */
stwu 1,-496(1) /* sp should maintain 16-byte alignment */
/* Save callee-saved registers... */
- /* r3, r4 are live here, so use r5 */
- lis 5,VG_(machine_ppc32_has_FP)@ha
- lwz 5,VG_(machine_ppc32_has_FP)@l(5)
- cmplwi 5,0
+ /* r3, r4, r5 are live here, so use r6 */
+ lis 6,VG_(machine_ppc32_has_FP)@ha
+ lwz 6,VG_(machine_ppc32_has_FP)@l(6)
+ cmplwi 6,0
beq LafterFP1
/* Floating-point reg save area : 144 bytes */
stw 16,288(1)
stw 15,284(1)
stw 14,280(1)
- /* Probably not necessary to save r13 (thread-specific ptr),
- as VEX stays clear of it... but what the hey. */
stw 13,276(1)
+ stw 3,272(1) /* save two_words for later */
/* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
The Linux kernel might not actually use VRSAVE for its intended
purpose, but it should be harmless to preserve anyway. */
- /* r3, r4 are live here, so use r5 */
- lis 5,VG_(machine_ppc32_has_VMX)@ha
- lwz 5,VG_(machine_ppc32_has_VMX)@l(5)
- cmplwi 5,0
+ /* r3, r4, r5 are live here, so use r6 */
+ lis 6,VG_(machine_ppc32_has_VMX)@ha
+ lwz 6,VG_(machine_ppc32_has_VMX)@l(6)
+ cmplwi 6,0
beq LafterVMX1
#ifdef HAS_ALTIVEC
/* VRSAVE save word : 32 bytes */
- mfspr 5,256 /* vrsave reg is spr number 256 */
- stw 5,244(1)
+ mfspr 6,256 /* vrsave reg is spr number 256 */
+ stw 6,244(1)
/* Alignment padding : 4 bytes */
/* Vector reg save area (quadword aligned) : 192 bytes */
- li 5,224
- stvx 31,5,1
- li 5,208
- stvx 30,5,1
- li 5,192
- stvx 29,5,1
- li 5,176
- stvx 28,5,1
- li 5,160
- stvx 27,5,1
- li 5,144
- stvx 26,5,1
- li 5,128
- stvx 25,5,1
- li 5,112
- stvx 25,5,1
- li 5,96
- stvx 23,5,1
- li 5,80
- stvx 22,5,1
- li 5,64
- stvx 21,5,1
- li 5,48
- stvx 20,5,1
+ li 6,224
+ stvx 31,6,1
+ li 6,208
+ stvx 30,6,1
+ li 6,192
+ stvx 29,6,1
+ li 6,176
+ stvx 28,6,1
+ li 6,160
+ stvx 27,6,1
+ li 6,144
+ stvx 26,6,1
+ li 6,128
+ stvx 25,6,1
+ li 6,112
+ stvx 25,6,1
+ li 6,96
+ stvx 23,6,1
+ li 6,80
+ stvx 22,6,1
+ li 6,64
+ stvx 21,6,1
+ li 6,48
+ stvx 20,6,1
#endif
LafterVMX1:
/* Save cr */
- mfcr 0
- stw 0,44(1)
+ mfcr 6
+ stw 6,44(1)
/* Local variable space... */
/* 32(sp) used later to check FPSCR[RM] */
- /* r3 holds guest_state */
- /* r4 holds do_profiling */
- mr 31,3 /* r31 (generated code gsp) = r3 */
- stw 3,28(1) /* spill orig guest_state ptr */
+ /* r3 holds two_words */
+ /* r4 holds guest_state */
+ /* r5 holds host_addr */
/* 24(sp) used later to stop ctr reg being clobbered */
/* 20(sp) used later to load fpscr with zero */
0(sp) : back-chain
*/
- /* CAB TODO: Use a caller-saved reg for orig guest_state ptr
- - rem to set non-allocateable in isel.c */
-
- /* hold dispatch_ctr in r29 */
- lis 5,VG_(dispatch_ctr)@ha
- lwz 29,VG_(dispatch_ctr)@l(5)
-
/* set host FPU control word to the default mode expected
by VEX-generated code. See comments in libvex.h for
more info. */
- lis 5,VG_(machine_ppc32_has_FP)@ha
- lwz 5,VG_(machine_ppc32_has_FP)@l(5)
- cmplwi 5,0
+ lis 6,VG_(machine_ppc32_has_FP)@ha
+ lwz 6,VG_(machine_ppc32_has_FP)@l(6)
+ cmplwi 6,0
beq LafterFP2
/* get zero into f3 (tedious) */
/* note: fsub 3,3,3 is not a reliable way to do this,
since if f3 holds a NaN or similar then we don't necessarily
wind up with zero. */
- li 5,0
- stw 5,20(1)
+ li 6,0
+ stw 6,20(1)
lfs 3,20(1)
mtfsf 0xFF,3 /* fpscr = f3 */
LafterFP2:
/* set host AltiVec control word to the default mode expected
by VEX-generated code. */
- lis 5,VG_(machine_ppc32_has_VMX)@ha
- lwz 5,VG_(machine_ppc32_has_VMX)@l(5)
- cmplwi 5,0
+ lis 6,VG_(machine_ppc32_has_VMX)@ha
+ lwz 6,VG_(machine_ppc32_has_VMX)@l(6)
+ cmplwi 6,0
beq LafterVMX2
#ifdef HAS_ALTIVEC
/* make a stack frame for the code we are calling */
stwu 1,-16(1)
- /* fetch %CIA into r3 */
- lwz 3,OFFSET_ppc32_CIA(31)
-
- /* fall into main loop (the right one) */
- /* r4 = do_profiling. It's probably trashed after here,
- but that's OK: we don't need it after here. */
- cmplwi 4,0
- beq VG_(run_innerloop__dispatch_unprofiled)
- b VG_(run_innerloop__dispatch_profiled)
- /*NOTREACHED*/
-
-/*----------------------------------------------------*/
-/*--- NO-PROFILING (standard) dispatcher ---*/
-/*----------------------------------------------------*/
-
-.global VG_(run_innerloop__dispatch_unprofiled)
-VG_(run_innerloop__dispatch_unprofiled):
- /* At entry: Live regs:
- r1 (=sp)
- r3 (=CIA = next guest address)
- r29 (=dispatch_ctr)
- r31 (=guest_state)
- */
- /* Has the guest state pointer been messed with? If yes, exit.
- Also set up & VG_(tt_fast) early in an attempt at better
- scheduling. */
- lis 5,VG_(tt_fast)@ha
- addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
- andi. 0,31,1
- bne gsp_changed
-
- /* save the jump address in the guest state */
- stw 3,OFFSET_ppc32_CIA(31)
-
- /* Are we out of timeslice? If yes, defer to scheduler. */
- subi 29,29,1
- cmplwi 29,0
- beq counter_is_zero
-
- /* try a fast lookup in the translation cache */
- /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
- = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
- rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
- add 5,5,4 /* & VG_(tt_fast)[entry#] */
- lwz 6,0(5) /* .guest */
- lwz 7,4(5) /* .host */
- cmpw 3,6
- bne fast_lookup_failed
-
- /* Found a match. Call .host. */
- mtctr 7
- bctrl
+ /* Set up the guest state ptr */
+ mr 31,4 /* r31 (generated code gsp) = r4 */
- /* On return from guest code:
- r3 holds destination (original) address.
- r31 may be unchanged (guest_state), or may indicate further
- details of the control transfer requested to *r3.
- */
- /* start over */
- b VG_(run_innerloop__dispatch_unprofiled)
+ /* and jump into the code cache. Chained translations in
+ the code cache run, until for whatever reason, they can't
+ continue. When that happens, the translation in question
+ will jump (or call) to one of the continuation points
+ VG_(cp_...) below. */
+ mtctr 5
+ bctr
/*NOTREACHED*/
/*----------------------------------------------------*/
-/*--- PROFILING dispatcher (can be much slower) ---*/
+/*--- Postamble and exit. ---*/
/*----------------------------------------------------*/
-.global VG_(run_innerloop__dispatch_profiled)
-VG_(run_innerloop__dispatch_profiled):
- /* At entry: Live regs:
- r1 (=sp)
- r3 (=CIA = next guest address)
- r29 (=dispatch_ctr)
- r31 (=guest_state)
- */
- /* Has the guest state pointer been messed with? If yes, exit.
- Also set up & VG_(tt_fast) early in an attempt at better
- scheduling. */
- lis 5,VG_(tt_fast)@ha
- addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
- andi. 0,31,1
- bne gsp_changed
+postamble:
+ /* At this point, r6 and r7 contain two
+ words to be returned to the caller. r6
+ holds a TRC value, and r7 optionally may
+ hold another word (for CHAIN_ME exits, the
+ address of the place to patch.) */
- /* save the jump address in the guest state */
- stw 3,OFFSET_ppc32_CIA(31)
-
- /* Are we out of timeslice? If yes, defer to scheduler. */
- subi 29,29,1
- cmplwi 29,0
- beq counter_is_zero
-
- /* try a fast lookup in the translation cache */
- /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
- = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
- rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
- add 5,5,4 /* & VG_(tt_fast)[entry#] */
- lwz 6,0(5) /* .guest */
- lwz 7,4(5) /* .host */
- cmpw 3,6
- bne fast_lookup_failed
-
- /* increment bb profile counter */
- srwi 4,4,1 /* entry# * sizeof(UInt*) */
- addis 6,4,VG_(tt_fastN)@ha
- lwz 9,VG_(tt_fastN)@l(6)
- lwz 8,0(9)
- addi 8,8,1
- stw 8,0(9)
-
- /* Found a match. Call .host. */
- mtctr 7
- bctrl
-
- /* On return from guest code:
- r3 holds destination (original) address.
- r31 may be unchanged (guest_state), or may indicate further
- details of the control transfer requested to *r3.
- */
- /* start over */
- b VG_(run_innerloop__dispatch_profiled)
- /*NOTREACHED*/
-
-/*----------------------------------------------------*/
-/*--- exit points ---*/
-/*----------------------------------------------------*/
-
-gsp_changed:
- /* Someone messed with the gsp (in r31). Have to
- defer to scheduler to resolve this. dispatch ctr
- is not yet decremented, so no need to increment. */
- /* %CIA is NOT up to date here. First, need to write
- %r3 back to %CIA, but without trashing %r31 since
- that holds the value we want to return to the scheduler.
- Hence use %r5 transiently for the guest state pointer. */
- lwz 5,44(1) /* original guest_state ptr */
- stw 3,OFFSET_ppc32_CIA(5)
- mr 3,31 /* r3 = new gsp value */
- b run_innerloop_exit
- /*NOTREACHED*/
-
-counter_is_zero:
- /* %CIA is up to date */
- /* back out decrement of the dispatch counter */
- addi 29,29,1
- li 3,VG_TRC_INNER_COUNTERZERO
- b run_innerloop_exit
-
-fast_lookup_failed:
- /* %CIA is up to date */
- /* back out decrement of the dispatch counter */
- addi 29,29,1
- li 3,VG_TRC_INNER_FASTMISS
- b run_innerloop_exit
-
-
-
-/* All exits from the dispatcher go through here.
- r3 holds the return value.
-*/
-run_innerloop_exit:
/* We're leaving. Check that nobody messed with
- VSCR or FPSCR. */
-
+ VSCR or FPSCR in ways we don't expect. */
/* Using r10 - value used again further on, so don't trash! */
lis 10,VG_(machine_ppc32_has_FP)@ha
lwz 10,VG_(machine_ppc32_has_FP)@l(10)
- cmplwi 10,0
+
+ /* Using r11 - value used again further on, so don't trash! */
+ lis 11,VG_(machine_ppc32_has_VMX)@ha
+ lwz 11,VG_(machine_ppc32_has_VMX)@l(11)
+
+ cmplwi 10,0 /* Do we have FP ? */
beq LafterFP8
/* Set fpscr back to a known state, since vex-generated code
mtfsf 0xFF,3 /* fpscr = f3 */
LafterFP8:
- /* Using r11 - value used again further on, so don't trash! */
- lis 11,VG_(machine_ppc32_has_VMX)@ha
- lwz 11,VG_(machine_ppc32_has_VMX)@l(11)
- cmplwi 11,0
+ cmplwi 11,0 /* Do we have altivec? */
beq LafterVMX8
#ifdef HAS_ALTIVEC
LafterVMX8:
/* otherwise we're OK */
- b run_innerloop_exit_REALLY
-
+ b remove_frame
invariant_violation:
- li 3,VG_TRC_INVARIANT_FAILED
- b run_innerloop_exit_REALLY
-
-run_innerloop_exit_REALLY:
- /* r3 holds VG_TRC_* value to return */
-
- /* Return to parent stack */
- addi 1,1,16
-
- /* Write ctr to VG(dispatch_ctr) */
- lis 5,VG_(dispatch_ctr)@ha
- stw 29,VG_(dispatch_ctr)@l(5)
-
- /* Restore cr */
- lwz 0,44(1)
- mtcr 0
-
- /* Restore callee-saved registers... */
+ li 6,VG_TRC_INVARIANT_FAILED
+ li 7,0
+ /* fall through */
+remove_frame:
+ /* Restore FP regs */
/* r10 already holds VG_(machine_ppc32_has_FP) value */
cmplwi 10,0
beq LafterFP9
lfd 14,352(1)
LafterFP9:
- /* General regs */
- lwz 31,348(1)
- lwz 30,344(1)
- lwz 29,340(1)
- lwz 28,336(1)
- lwz 27,332(1)
- lwz 26,328(1)
- lwz 25,324(1)
- lwz 24,320(1)
- lwz 23,316(1)
- lwz 22,312(1)
- lwz 21,308(1)
- lwz 20,304(1)
- lwz 19,300(1)
- lwz 18,296(1)
- lwz 17,292(1)
- lwz 16,288(1)
- lwz 15,284(1)
- lwz 14,280(1)
- lwz 13,276(1)
-
/* r11 already holds VG_(machine_ppc32_has_VMX) value */
cmplwi 11,0
beq LafterVMX9
+ /* Restore Altivec regs */
#ifdef HAS_ALTIVEC
/* VRSAVE */
lwz 4,244(1)
#endif
LafterVMX9:
- /* reset lr & sp */
+ /* restore int regs, including importantly r3 (two_words) */
+ addi 1,1,16
+ lwz 31,348(1)
+ lwz 30,344(1)
+ lwz 29,340(1)
+ lwz 28,336(1)
+ lwz 27,332(1)
+ lwz 26,328(1)
+ lwz 25,324(1)
+ lwz 24,320(1)
+ lwz 23,316(1)
+ lwz 22,312(1)
+ lwz 21,308(1)
+ lwz 20,304(1)
+ lwz 19,300(1)
+ lwz 18,296(1)
+ lwz 17,292(1)
+ lwz 16,288(1)
+ lwz 15,284(1)
+ lwz 14,280(1)
+ lwz 13,276(1)
+ lwz 3,272(1)
+ /* Stash return values */
+ stw 6,0(3)
+ stw 7,4(3)
+
+ /* restore lr & sp, and leave */
lwz 0,500(1) /* stack_size + 4 */
mtlr 0
addi 1,1,496 /* stack_size */
blr
-.size VG_(run_innerloop), .-VG_(run_innerloop)
-/*------------------------------------------------------------*/
-/*--- ---*/
-/*--- A special dispatcher, for running no-redir ---*/
-/*--- translations. Just runs the given translation once. ---*/
-/*--- ---*/
-/*------------------------------------------------------------*/
+/*----------------------------------------------------*/
+/*--- Continuation points ---*/
+/*----------------------------------------------------*/
-/* signature:
-void VG_(run_a_noredir_translation) ( UWord* argblock );
-*/
+/* ------ Chain me to slow entry point ------ */
+.global VG_(disp_cp_chain_me_to_slowEP)
+VG_(disp_cp_chain_me_to_slowEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
+ mflr 7
+ /* 8 = imm32 r30, disp_cp_chain_me_to_slowEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,8+4+4
+ b postamble
+
+/* ------ Chain me to fast entry point ------ */
+.global VG_(disp_cp_chain_me_to_fastEP)
+VG_(disp_cp_chain_me_to_fastEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
+ mflr 7
+ /* 8 = imm32 r30, disp_cp_chain_me_to_fastEP
+ 4 = mtctr r30
+ 4 = btctr
+ */
+ subi 7,7,8+4+4
+ b postamble
-/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
- and 2 to carry results:
- 0: input: ptr to translation
- 1: input: ptr to guest state
- 2: output: next guest PC
- 3: output: guest state pointer afterwards (== thread return code)
-*/
-.global VG_(run_a_noredir_translation)
-.type VG_(run_a_noredir_translation), @function
-VG_(run_a_noredir_translation):
- /* save callee-save int regs, & lr */
- stwu 1,-256(1)
- stw 14,128(1)
- stw 15,132(1)
- stw 16,136(1)
- stw 17,140(1)
- stw 18,144(1)
- stw 19,148(1)
- stw 20,152(1)
- stw 21,156(1)
- stw 22,160(1)
- stw 23,164(1)
- stw 24,168(1)
- stw 25,172(1)
- stw 26,176(1)
- stw 27,180(1)
- stw 28,184(1)
- stw 29,188(1)
- stw 30,192(1)
- stw 31,196(1)
- mflr 31
- stw 31,200(1)
-
- stw 3,204(1)
- lwz 31,4(3)
- lwz 30,0(3)
- mtlr 30
- blrl
-
- lwz 4,204(1)
- stw 3, 8(4)
- stw 31,12(4)
-
- lwz 14,128(1)
- lwz 15,132(1)
- lwz 16,136(1)
- lwz 17,140(1)
- lwz 18,144(1)
- lwz 19,148(1)
- lwz 20,152(1)
- lwz 21,156(1)
- lwz 22,160(1)
- lwz 23,164(1)
- lwz 24,168(1)
- lwz 25,172(1)
- lwz 26,176(1)
- lwz 27,180(1)
- lwz 28,184(1)
- lwz 29,188(1)
- lwz 30,192(1)
- lwz 31,200(1)
- mtlr 31
- lwz 31,196(1)
- addi 1,1,256
- blr
-.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
+/* ------ Indirect but boring jump ------ */
+.global VG_(disp_cp_xindir)
+VG_(disp_cp_xindir):
+ /* Where are we going? */
+ lwz 3,OFFSET_ppc32_CIA(31)
+ /* stats only */
+ lis 5,VG_(stats__n_xindirs)@ha
+ addi 5,5,VG_(stats__n_xindirs)@l
+ lwz 6,4(5)
+ addic. 6,6,1
+ stw 6,4(5)
+ lwz 6,0(5)
+ addze 6,6
+ stw 6,0(5)
+
+ /* r5 = &VG_(tt_fast) */
+ lis 5,VG_(tt_fast)@ha
+ addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */
+
+ /* try a fast lookup in the translation cache */
+ /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+ = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
+ rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */
+ add 5,5,4 /* & VG_(tt_fast)[entry#] */
+ lwz 6,0(5) /* .guest */
+ lwz 7,4(5) /* .host */
+ cmpw 3,6
+ bne fast_lookup_failed
+
+ /* Found a match. Jump to .host. */
+ mtctr 7
+ bctr
+
+fast_lookup_failed:
+ /* stats only */
+ lis 5,VG_(stats__n_xindir_misses)@ha
+ addi 5,5,VG_(stats__n_xindir_misses)@l
+ lwz 6,4(5)
+ addic. 6,6,1
+ stw 6,4(5)
+ lwz 6,0(5)
+ addze 6,6
+ stw 6,0(5)
+
+ li 6,VG_TRC_INNER_FASTMISS
+ li 7,0
+ b postamble
+ /*NOTREACHED*/
+
+/* ------ Assisted jump ------ */
+.global VG_(disp_cp_xassisted)
+VG_(disp_cp_xassisted):
+ /* r31 contains the TRC */
+ mr 6,31
+ li 7,0
+ b postamble
+
+/* ------ Event check failed ------ */
+.global VG_(disp_cp_evcheck_fail)
+VG_(disp_cp_evcheck_fail):
+ li 6,VG_TRC_INNER_COUNTERZERO
+ li 7,0
+ b postamble
+
+
+.size VG_(disp_run_translations), .-VG_(disp_run_translations)
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits