]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Add translation chaining support for ppc32 (tested) and to
authorJulian Seward <jseward@acm.org>
Fri, 20 Apr 2012 00:14:02 +0000 (00:14 +0000)
committerJulian Seward <jseward@acm.org>
Fri, 20 Apr 2012 00:14:02 +0000 (00:14 +0000)
a large extent for ppc64 (incomplete, untested) (Valgrind side)

git-svn-id: svn://svn.valgrind.org/valgrind/branches/TCHAIN@12512

coregrind/m_dispatch/dispatch-arm-linux.S
coregrind/m_dispatch/dispatch-ppc32-linux.S
coregrind/m_dispatch/dispatch-x86-linux.S
docs/internals/t-chaining-notes.txt
memcheck/mc_machine.c

index 8ac0dab333c6e6563ee8e069781a4db48f2f1085..2c0ea625cb439eb2c6a3bfcd29692d255c5fad28 100644 (file)
@@ -40,8 +40,9 @@
 
 /*------------------------------------------------------------*/
 /*---                                                      ---*/
-/*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
-/*--- run all translations except no-redir ones.           ---*/
+/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
+/*--- used to run all translations,                        ---*/
+/*--- including no-redir ones.                             ---*/
 /*---                                                      ---*/
 /*------------------------------------------------------------*/
 
index edf6065fe94d715f136de4cde5a79ad7daeb2352..61c7bab5029a6fadb2b9752c68c9c801b2c24bec 100644 (file)
 
 /*------------------------------------------------------------*/
 /*---                                                      ---*/
-/*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
-/*--- run all translations except no-redir ones.           ---*/
+/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
+/*--- used to run all translations,                        ---*/
+/*--- including no-redir ones.                             ---*/
 /*---                                                      ---*/
 /*------------------------------------------------------------*/
 
 /*----------------------------------------------------*/
-/*--- Preamble (set everything up)                 ---*/
+/*--- Entry and preamble (set everything up)       ---*/
 /*----------------------------------------------------*/
 
 /* signature:
-UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+void VG_(disp_run_translations)( UWord* two_words,
+                                 void*  guest_state,
+                                 Addr   host_addr );
 */
 .text
-.globl  VG_(run_innerloop)
-.type  VG_(run_innerloop), @function
-VG_(run_innerloop):
-       /* r3 holds guest_state */
-       /* r4 holds do_profiling */
+.globl  VG_(disp_run_translations)
+.type  VG_(disp_run_translations), @function
+VG_(disp_run_translations):
+       /* r3 holds two_words */
+       /* r4 holds guest_state */
+        /* r5 holds host_addr */
 
         /* ----- entry point to VG_(run_innerloop) ----- */
         /* For Linux/ppc32 we need the SysV ABI, which uses
@@ -66,17 +70,17 @@ VG_(run_innerloop):
         */
 
         /* Save lr */
-        mflr    0
-        stw     0,4(1)
+        mflr    6
+        stw     6,4(1)
 
         /* New stack frame */
         stwu    1,-496(1)  /* sp should maintain 16-byte alignment */
 
         /* Save callee-saved registers... */
-       /* r3, r4 are live here, so use r5 */
-        lis     5,VG_(machine_ppc32_has_FP)@ha
-        lwz     5,VG_(machine_ppc32_has_FP)@l(5)
-        cmplwi  5,0
+       /* r3, r4, r5 are live here, so use r6 */
+        lis     6,VG_(machine_ppc32_has_FP)@ha
+        lwz     6,VG_(machine_ppc32_has_FP)@l(6)
+        cmplwi  6,0
         beq     LafterFP1
 
         /* Floating-point reg save area : 144 bytes */
@@ -119,67 +123,65 @@ LafterFP1:
         stw     16,288(1)
         stw     15,284(1)
         stw     14,280(1)
-        /* Probably not necessary to save r13 (thread-specific ptr),
-           as VEX stays clear of it... but what the hey. */
         stw     13,276(1)
+        stw     3,272(1)  /* save two_words for later */
 
         /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
            The Linux kernel might not actually use VRSAVE for its intended
            purpose, but it should be harmless to preserve anyway. */
-       /* r3, r4 are live here, so use r5 */
-        lis     5,VG_(machine_ppc32_has_VMX)@ha
-        lwz     5,VG_(machine_ppc32_has_VMX)@l(5)
-        cmplwi  5,0
+       /* r3, r4, r5 are live here, so use r6 */
+        lis     6,VG_(machine_ppc32_has_VMX)@ha
+        lwz     6,VG_(machine_ppc32_has_VMX)@l(6)
+        cmplwi  6,0
         beq     LafterVMX1
 
 #ifdef HAS_ALTIVEC
         /* VRSAVE save word : 32 bytes */
-        mfspr   5,256         /* vrsave reg is spr number 256 */
-        stw     5,244(1)
+        mfspr   6,256         /* vrsave reg is spr number 256 */
+        stw     6,244(1)
 
         /* Alignment padding : 4 bytes */
 
         /* Vector reg save area (quadword aligned) : 192 bytes */
-        li      5,224
-        stvx    31,5,1
-        li      5,208
-        stvx    30,5,1
-        li      5,192
-        stvx    29,5,1
-        li      5,176
-        stvx    28,5,1
-        li      5,160
-        stvx    27,5,1
-        li      5,144
-        stvx    26,5,1
-        li      5,128
-        stvx    25,5,1
-        li      5,112
-        stvx    25,5,1
-        li      5,96
-        stvx    23,5,1
-        li      5,80
-        stvx    22,5,1
-        li      5,64
-        stvx    21,5,1
-        li      5,48
-        stvx    20,5,1
+        li      6,224
+        stvx    31,6,1
+        li      6,208
+        stvx    30,6,1
+        li      6,192
+        stvx    29,6,1
+        li      6,176
+        stvx    28,6,1
+        li      6,160
+        stvx    27,6,1
+        li      6,144
+        stvx    26,6,1
+        li      6,128
+        stvx    25,6,1
+        li      6,112
+        stvx    25,6,1
+        li      6,96
+        stvx    23,6,1
+        li      6,80
+        stvx    22,6,1
+        li      6,64
+        stvx    21,6,1
+        li      6,48
+        stvx    20,6,1
 #endif
         
 LafterVMX1:
 
         /* Save cr */
-        mfcr    0
-        stw     0,44(1)
+        mfcr    6
+        stw     6,44(1)
 
         /* Local variable space... */
 
         /* 32(sp) used later to check FPSCR[RM] */
 
-        /* r3 holds guest_state */
-        /* r4 holds do_profiling */
-        mr      31,3      /* r31 (generated code gsp) = r3 */
-        stw     3,28(1)   /* spill orig guest_state ptr */
+       /* r3 holds two_words */
+       /* r4 holds guest_state */
+        /* r5 holds host_addr */
 
         /* 24(sp) used later to stop ctr reg being clobbered */
         /* 20(sp) used later to load fpscr with zero */
@@ -190,36 +192,29 @@ LafterVMX1:
            0(sp)  : back-chain
         */
 
-        /* CAB TODO: Use a caller-saved reg for orig guest_state ptr
-           - rem to set non-allocateable in isel.c */
-
-        /* hold dispatch_ctr in r29 */
-        lis     5,VG_(dispatch_ctr)@ha
-        lwz     29,VG_(dispatch_ctr)@l(5)
-
         /* set host FPU control word to the default mode expected 
            by VEX-generated code.  See comments in libvex.h for
            more info. */
-        lis     5,VG_(machine_ppc32_has_FP)@ha
-        lwz     5,VG_(machine_ppc32_has_FP)@l(5)
-        cmplwi  5,0
+        lis     6,VG_(machine_ppc32_has_FP)@ha
+        lwz     6,VG_(machine_ppc32_has_FP)@l(6)
+        cmplwi  6,0
         beq     LafterFP2
 
         /* get zero into f3 (tedious) */
         /* note: fsub 3,3,3 is not a reliable way to do this, 
            since if f3 holds a NaN or similar then we don't necessarily
            wind up with zero. */
-        li      5,0
-        stw     5,20(1)
+        li      6,0
+        stw     6,20(1)
         lfs     3,20(1)
         mtfsf   0xFF,3   /* fpscr = f3 */
 LafterFP2:
 
         /* set host AltiVec control word to the default mode expected 
            by VEX-generated code. */
-        lis     5,VG_(machine_ppc32_has_VMX)@ha
-        lwz     5,VG_(machine_ppc32_has_VMX)@l(5)
-        cmplwi  5,0
+        lis     6,VG_(machine_ppc32_has_VMX)@ha
+        lwz     6,VG_(machine_ppc32_has_VMX)@l(6)
+        cmplwi  6,0
         beq     LafterVMX2
 
 #ifdef HAS_ALTIVEC
@@ -232,172 +227,40 @@ LafterVMX2:
         /* make a stack frame for the code we are calling */
         stwu    1,-16(1)
 
-        /* fetch %CIA into r3 */
-        lwz     3,OFFSET_ppc32_CIA(31)
-
-        /* fall into main loop (the right one) */
-       /* r4 = do_profiling.  It's probably trashed after here,
-           but that's OK: we don't need it after here. */
-       cmplwi  4,0
-       beq     VG_(run_innerloop__dispatch_unprofiled)
-       b       VG_(run_innerloop__dispatch_profiled)
-       /*NOTREACHED*/
-
-/*----------------------------------------------------*/
-/*--- NO-PROFILING (standard) dispatcher           ---*/
-/*----------------------------------------------------*/
-
-.global        VG_(run_innerloop__dispatch_unprofiled)
-VG_(run_innerloop__dispatch_unprofiled):
-       /* At entry: Live regs:
-               r1  (=sp)
-               r3  (=CIA = next guest address)
-               r29 (=dispatch_ctr)
-               r31 (=guest_state)
-       */
-       /* Has the guest state pointer been messed with?  If yes, exit.
-           Also set up & VG_(tt_fast) early in an attempt at better
-           scheduling. */
-        lis    5,VG_(tt_fast)@ha
-        addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
-        andi.   0,31,1
-        bne    gsp_changed
-
-        /* save the jump address in the guest state */
-        stw     3,OFFSET_ppc32_CIA(31)
-
-        /* Are we out of timeslice?  If yes, defer to scheduler. */
-       subi    29,29,1
-       cmplwi  29,0
-        beq    counter_is_zero
-
-        /* try a fast lookup in the translation cache */
-        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
-              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
-       rlwinm  4,3,1, 29-VG_TT_FAST_BITS, 28   /* entry# * 8 */
-       add     5,5,4   /* & VG_(tt_fast)[entry#] */
-       lwz     6,0(5)   /* .guest */
-       lwz     7,4(5)   /* .host */
-        cmpw    3,6
-        bne     fast_lookup_failed
-
-        /* Found a match.  Call .host. */
-        mtctr   7
-        bctrl
+        /* Set up the guest state ptr */
+        mr      31,4      /* r31 (generated code gsp) = r4 */
 
-        /* On return from guest code:
-          r3  holds destination (original) address.
-           r31 may be unchanged (guest_state), or may indicate further
-           details of the control transfer requested to *r3.
-        */
-       /* start over */
-       b       VG_(run_innerloop__dispatch_unprofiled)
+        /* and jump into the code cache.  Chained translations in
+           the code cache run, until for whatever reason, they can't
+           continue.  When that happens, the translation in question
+           will jump (or call) to one of the continuation points
+           VG_(cp_...) below. */
+        mtctr   5
+        bctr
        /*NOTREACHED*/
 
 /*----------------------------------------------------*/
-/*--- PROFILING dispatcher (can be much slower)    ---*/
+/*--- Postamble and exit.                          ---*/
 /*----------------------------------------------------*/
 
-.global        VG_(run_innerloop__dispatch_profiled)
-VG_(run_innerloop__dispatch_profiled):
-       /* At entry: Live regs:
-               r1 (=sp)
-               r3  (=CIA = next guest address)
-               r29 (=dispatch_ctr)
-               r31 (=guest_state)
-       */
-       /* Has the guest state pointer been messed with?  If yes, exit.
-           Also set up & VG_(tt_fast) early in an attempt at better
-           scheduling. */
-        lis    5,VG_(tt_fast)@ha
-        addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
-        andi.   0,31,1
-        bne    gsp_changed
+postamble:
+        /* At this point, r6 and r7 contain two
+           words to be returned to the caller.  r6
+           holds a TRC value, and r7 optionally may
+           hold another word (for CHAIN_ME exits, the
+           address of the place to patch.) */
 
-        /* save the jump address in the guest state */
-        stw     3,OFFSET_ppc32_CIA(31)
-
-        /* Are we out of timeslice?  If yes, defer to scheduler. */
-       subi    29,29,1
-       cmplwi  29,0
-        beq    counter_is_zero
-
-        /* try a fast lookup in the translation cache */
-        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
-              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
-       rlwinm  4,3,1, 29-VG_TT_FAST_BITS, 28   /* entry# * 8 */
-       add     5,5,4   /* & VG_(tt_fast)[entry#] */
-       lwz     6,0(5)   /* .guest */
-       lwz     7,4(5)   /* .host */
-        cmpw    3,6
-        bne     fast_lookup_failed
-
-        /* increment bb profile counter */
-       srwi    4,4,1   /* entry# * sizeof(UInt*) */
-        addis   6,4,VG_(tt_fastN)@ha
-        lwz     9,VG_(tt_fastN)@l(6)
-        lwz     8,0(9)
-        addi    8,8,1
-        stw     8,0(9)
-
-        /* Found a match.  Call .host. */
-        mtctr   7
-        bctrl
-
-        /* On return from guest code:
-          r3  holds destination (original) address.
-           r31 may be unchanged (guest_state), or may indicate further
-           details of the control transfer requested to *r3.
-        */
-       /* start over */
-       b       VG_(run_innerloop__dispatch_profiled)
-       /*NOTREACHED*/
-
-/*----------------------------------------------------*/
-/*--- exit points                                  ---*/
-/*----------------------------------------------------*/
-
-gsp_changed:
-       /* Someone messed with the gsp (in r31).  Have to
-           defer to scheduler to resolve this.  dispatch ctr
-          is not yet decremented, so no need to increment. */
-       /* %CIA is NOT up to date here.  First, need to write
-          %r3 back to %CIA, but without trashing %r31 since
-          that holds the value we want to return to the scheduler.
-          Hence use %r5 transiently for the guest state pointer. */
-        lwz     5,44(1)         /* original guest_state ptr */
-        stw     3,OFFSET_ppc32_CIA(5)
-       mr      3,31            /* r3 = new gsp value */
-       b       run_innerloop_exit
-       /*NOTREACHED*/
-
-counter_is_zero:
-       /* %CIA is up to date */
-       /* back out decrement of the dispatch counter */
-        addi    29,29,1
-        li      3,VG_TRC_INNER_COUNTERZERO
-        b       run_innerloop_exit
-
-fast_lookup_failed:
-       /* %CIA is up to date */
-       /* back out decrement of the dispatch counter */
-        addi    29,29,1
-        li      3,VG_TRC_INNER_FASTMISS
-       b       run_innerloop_exit
-
-
-
-/* All exits from the dispatcher go through here.
-   r3 holds the return value. 
-*/
-run_innerloop_exit: 
         /* We're leaving.  Check that nobody messed with
-           VSCR or FPSCR. */
-
+           VSCR or FPSCR in ways we don't expect. */
         /* Using r10 - value used again further on, so don't trash! */
         lis     10,VG_(machine_ppc32_has_FP)@ha
         lwz     10,VG_(machine_ppc32_has_FP)@l(10)
-        cmplwi  10,0
+
+       /* Using r11 - value used again further on, so don't trash! */
+        lis     11,VG_(machine_ppc32_has_VMX)@ha
+        lwz     11,VG_(machine_ppc32_has_VMX)@l(11)
+
+        cmplwi  10,0    /* Do we have FP ? */
         beq     LafterFP8
 
        /* Set fpscr back to a known state, since vex-generated code
@@ -410,10 +273,7 @@ run_innerloop_exit:
         mtfsf   0xFF,3   /* fpscr = f3 */
 LafterFP8:
 
-       /* Using r11 - value used again further on, so don't trash! */
-        lis     11,VG_(machine_ppc32_has_VMX)@ha
-        lwz     11,VG_(machine_ppc32_has_VMX)@l(11)
-        cmplwi  11,0
+        cmplwi  11,0    /* Do we have altivec? */
         beq     LafterVMX8
 
 #ifdef HAS_ALTIVEC
@@ -432,29 +292,15 @@ LafterFP8:
 LafterVMX8:
 
        /* otherwise we're OK */
-        b       run_innerloop_exit_REALLY
-
+        b       remove_frame
 
 invariant_violation:
-        li      3,VG_TRC_INVARIANT_FAILED
-        b       run_innerloop_exit_REALLY
-
-run_innerloop_exit_REALLY:
-        /* r3 holds VG_TRC_* value to return */
-
-        /* Return to parent stack */
-        addi    1,1,16
-
-        /* Write ctr to VG(dispatch_ctr) */
-        lis     5,VG_(dispatch_ctr)@ha
-        stw     29,VG_(dispatch_ctr)@l(5)
-
-        /* Restore cr */
-        lwz     0,44(1)
-        mtcr    0
-
-        /* Restore callee-saved registers... */
+        li      6,VG_TRC_INVARIANT_FAILED
+        li      7,0
+        /* fall through */
 
+remove_frame:
+        /* Restore FP regs */
         /* r10 already holds VG_(machine_ppc32_has_FP) value */
         cmplwi  10,0
         beq     LafterFP9
@@ -480,31 +326,11 @@ run_innerloop_exit_REALLY:
         lfd     14,352(1)
 LafterFP9:
 
-        /* General regs */
-        lwz     31,348(1)
-        lwz     30,344(1)
-        lwz     29,340(1)
-        lwz     28,336(1)
-        lwz     27,332(1)
-        lwz     26,328(1)
-        lwz     25,324(1)
-        lwz     24,320(1)
-        lwz     23,316(1)
-        lwz     22,312(1)
-        lwz     21,308(1)
-        lwz     20,304(1)
-        lwz     19,300(1)
-        lwz     18,296(1)
-        lwz     17,292(1)
-        lwz     16,288(1)
-        lwz     15,284(1)
-        lwz     14,280(1)
-        lwz     13,276(1)
-
         /* r11 already holds VG_(machine_ppc32_has_VMX) value */
         cmplwi  11,0
         beq     LafterVMX9
 
+        /* Restore Altivec regs */
 #ifdef HAS_ALTIVEC
         /* VRSAVE */
         lwz     4,244(1)
@@ -538,92 +364,142 @@ LafterFP9:
 #endif
 LafterVMX9:
 
-        /* reset lr & sp */
+        /* restore int regs, including importantly r3 (two_words) */
+        addi    1,1,16
+        lwz     31,348(1)
+        lwz     30,344(1)
+        lwz     29,340(1)
+        lwz     28,336(1)
+        lwz     27,332(1)
+        lwz     26,328(1)
+        lwz     25,324(1)
+        lwz     24,320(1)
+        lwz     23,316(1)
+        lwz     22,312(1)
+        lwz     21,308(1)
+        lwz     20,304(1)
+        lwz     19,300(1)
+        lwz     18,296(1)
+        lwz     17,292(1)
+        lwz     16,288(1)
+        lwz     15,284(1)
+        lwz     14,280(1)
+        lwz     13,276(1)
+        lwz     3,272(1)
+        /* Stash return values */
+        stw     6,0(3)
+        stw     7,4(3)
+
+        /* restore lr & sp, and leave */
         lwz     0,500(1)  /* stack_size + 4 */
         mtlr    0
         addi    1,1,496   /* stack_size */
         blr
-.size VG_(run_innerloop), .-VG_(run_innerloop)
 
 
-/*------------------------------------------------------------*/
-/*---                                                      ---*/
-/*--- A special dispatcher, for running no-redir           ---*/
-/*--- translations.  Just runs the given translation once. ---*/
-/*---                                                      ---*/
-/*------------------------------------------------------------*/
+/*----------------------------------------------------*/
+/*--- Continuation points                          ---*/
+/*----------------------------------------------------*/
 
-/* signature:
-void VG_(run_a_noredir_translation) ( UWord* argblock );
-*/
+/* ------ Chain me to slow entry point ------ */
+.global VG_(disp_cp_chain_me_to_slowEP)
+VG_(disp_cp_chain_me_to_slowEP):
+        /* We got called.  The return address indicates
+           where the patching needs to happen.  Collect
+           the return address and, exit back to C land,
+           handing the caller the pair (Chain_me_S, RA) */
+        li   6, VG_TRC_CHAIN_ME_TO_SLOW_EP
+        mflr 7
+        /* 8 = imm32 r30, disp_cp_chain_me_to_slowEP
+           4 = mtctr r30
+           4 = btctr
+        */
+        subi 7,7,8+4+4
+        b    postamble
+
+/* ------ Chain me to fast entry point ------ */
+.global VG_(disp_cp_chain_me_to_fastEP)
+VG_(disp_cp_chain_me_to_fastEP):
+        /* We got called.  The return address indicates
+           where the patching needs to happen.  Collect
+           the return address and, exit back to C land,
+           handing the caller the pair (Chain_me_S, RA) */
+        li   6, VG_TRC_CHAIN_ME_TO_FAST_EP
+        mflr 7
+        /* 8 = imm32 r30, disp_cp_chain_me_to_fastEP
+           4 = mtctr r30
+           4 = btctr
+        */
+        subi 7,7,8+4+4
+        b    postamble
 
-/* Run a no-redir translation.  argblock points to 4 UWords, 2 to carry args
-   and 2 to carry results:
-      0: input:  ptr to translation
-      1: input:  ptr to guest state
-      2: output: next guest PC
-      3: output: guest state pointer afterwards (== thread return code)
-*/
-.global VG_(run_a_noredir_translation)
-.type VG_(run_a_noredir_translation), @function
-VG_(run_a_noredir_translation):
-       /* save callee-save int regs, & lr */
-       stwu 1,-256(1)
-       stw  14,128(1)
-       stw  15,132(1)
-       stw  16,136(1)
-       stw  17,140(1)
-       stw  18,144(1)
-       stw  19,148(1)
-       stw  20,152(1)
-       stw  21,156(1)
-       stw  22,160(1)
-       stw  23,164(1)
-       stw  24,168(1)
-       stw  25,172(1)
-       stw  26,176(1)
-       stw  27,180(1)
-       stw  28,184(1)
-       stw  29,188(1)
-       stw  30,192(1)
-       stw  31,196(1)
-       mflr 31
-       stw  31,200(1)
-
-       stw  3,204(1)
-       lwz  31,4(3)
-       lwz  30,0(3)
-       mtlr 30
-       blrl
-
-       lwz  4,204(1)
-       stw  3,  8(4)
-       stw  31,12(4)
-
-       lwz  14,128(1)
-       lwz  15,132(1)
-       lwz  16,136(1)
-       lwz  17,140(1)
-       lwz  18,144(1)
-       lwz  19,148(1)
-       lwz  20,152(1)
-       lwz  21,156(1)
-       lwz  22,160(1)
-       lwz  23,164(1)
-       lwz  24,168(1)
-       lwz  25,172(1)
-       lwz  26,176(1)
-       lwz  27,180(1)
-       lwz  28,184(1)
-       lwz  29,188(1)
-       lwz  30,192(1)
-       lwz  31,200(1)
-       mtlr 31
-       lwz  31,196(1)
-       addi 1,1,256
-       blr
-.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
+/* ------ Indirect but boring jump ------ */
+.global VG_(disp_cp_xindir)
+VG_(disp_cp_xindir):
+        /* Where are we going? */
+        lwz     3,OFFSET_ppc32_CIA(31)
 
+        /* stats only */
+        lis     5,VG_(stats__n_xindirs)@ha
+        addi    5,5,VG_(stats__n_xindirs)@l
+        lwz     6,4(5)
+        addic.  6,6,1
+        stw     6,4(5)
+        lwz     6,0(5)
+        addze   6,6
+        stw     6,0(5)
+        
+        /* r5 = &VG_(tt_fast) */
+        lis    5,VG_(tt_fast)@ha
+        addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
+
+        /* try a fast lookup in the translation cache */
+        /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
+              = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
+       rlwinm  4,3,1, 29-VG_TT_FAST_BITS, 28   /* entry# * 8 */
+       add     5,5,4   /* & VG_(tt_fast)[entry#] */
+       lwz     6,0(5)   /* .guest */
+       lwz     7,4(5)   /* .host */
+        cmpw    3,6
+        bne     fast_lookup_failed
+
+        /* Found a match.  Jump to .host. */
+        mtctr   7
+        bctr
+
+fast_lookup_failed:
+        /* stats only */
+        lis     5,VG_(stats__n_xindir_misses)@ha
+        addi    5,5,VG_(stats__n_xindir_misses)@l
+        lwz     6,4(5)
+        addic.  6,6,1
+        stw     6,4(5)
+        lwz     6,0(5)
+        addze   6,6
+        stw     6,0(5)
+
+        li      6,VG_TRC_INNER_FASTMISS
+        li      7,0
+        b       postamble
+       /*NOTREACHED*/
+
+/* ------ Assisted jump ------ */
+.global VG_(disp_cp_xassisted)
+VG_(disp_cp_xassisted):
+        /* r31 contains the TRC */
+        mr      6,31
+        li      7,0
+        b       postamble
+
+/* ------ Event check failed ------ */
+.global VG_(disp_cp_evcheck_fail)
+VG_(disp_cp_evcheck_fail):
+        li      6,VG_TRC_INNER_COUNTERZERO
+        li      7,0
+        b       postamble
+
+        
+.size VG_(disp_run_translations), .-VG_(disp_run_translations)
 
 /* Let the linker know we don't need an executable stack */
 .section .note.GNU-stack,"",@progbits
index eaf10cf372c6692f59c7c22cb61f3e274654ff17..f34d8bfca445409bb101e5845afff1ec78cefe66 100644 (file)
@@ -39,8 +39,9 @@
 
 /*------------------------------------------------------------*/
 /*---                                                      ---*/
-/*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
-/*--- run all translations except no-redir ones.           ---*/
+/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
+/*--- used to run all translations,                        ---*/
+/*--- including no-redir ones.                             ---*/
 /*---                                                      ---*/
 /*------------------------------------------------------------*/
 
index 1f4e367770ab206f7413f6243263bd64f4562c73..ed1d5e3216d690df85046eab25512cd513064934 100644 (file)
@@ -1,6 +1,6 @@
 
 DO NOT MERGE
-~~~~~~~~~~~
+~~~~~~~~~~~~
 
 Changes memcheck/tests/Makefile.am w.r.t. -mfloat-abi=softfp
 Ditto none/tests/arm/Makefile.am
@@ -28,16 +28,26 @@ host_x86_defs.c, host_amd64_defs.c: return proper VexInvalRange
 records from the patchers, instead of {0,0}, so that transparent
 self hosting works properly.
 
+host_ppc_defs.h: is RdWrLR still needed?  If not delete.
+
+ditto ARM, Ld8S
+
+make sure IRStmt_Exit3 is completely gone.
+
+all backends: iselStmt(Ist_Exit) vs iselNext: make sure that the same
+JKs are handled, else it's not safe against branch sense switching
+
 
 Optimisations
 ~~~~~~~~~~~~~
 all targets: change VG_(stats__n_xindirs) to a 32 bit counter, and
-empty out every now and again.
+empty out every now and again.  Ditto VG_(stats__n_xindir_misses).
 
 amd64: XDirect: write const value to guest_RIP using single
 insn when the value is < 0x8000'0000
 
 arm: chain_XDirect: generate short form jumps when possible
+ppc: chain_XDirect: generate short form jumps when possible
 
 arm codegen: Generate ORRS for CmpwNEZ32(Or32(x,y))
 
index 3be57a5ca0bfdab4764c2dadab657722edf31e82..66551d96dea54af98901e4b09cb78e4e27b6d17b 100644 (file)
@@ -393,7 +393,7 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
 
    if (o == GOF(CIA)       && sz == 4) return -1;
    if (o == GOF(IP_AT_SYSCALL) && sz == 4) return -1; /* slot unused */
-   if (o == GOF(FPROUND)   && sz == 4) return -1;
+   if (o == GOF(FPROUND)   && sz == 1) return -1;
    if (o == GOF(VRSAVE)    && sz == 4) return -1;
    if (o == GOF(EMWARN)    && sz == 4) return -1;
    if (o == GOF(TISTART)   && sz == 4) return -1;