From: Julian Seward Date: Fri, 20 Apr 2012 00:14:02 +0000 (+0000) Subject: Add translation chaining support for ppc32 (tested) and to X-Git-Tag: svn/VALGRIND_3_8_0~350^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6d68ec0346c26487800b257331b8c1b4c4d22f7a;p=thirdparty%2Fvalgrind.git Add translation chaining support for ppc32 (tested) and to a large extent for ppc64 (incomplete, untested) (Valgrind side) git-svn-id: svn://svn.valgrind.org/valgrind/branches/TCHAIN@12512 --- diff --git a/coregrind/m_dispatch/dispatch-arm-linux.S b/coregrind/m_dispatch/dispatch-arm-linux.S index 8ac0dab333..2c0ea625cb 100644 --- a/coregrind/m_dispatch/dispatch-arm-linux.S +++ b/coregrind/m_dispatch/dispatch-arm-linux.S @@ -40,8 +40,9 @@ /*------------------------------------------------------------*/ /*--- ---*/ -/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ -/*--- run all translations except no-redir ones. ---*/ +/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ +/*--- used to run all translations, ---*/ +/*--- including no-redir ones. ---*/ /*--- ---*/ /*------------------------------------------------------------*/ diff --git a/coregrind/m_dispatch/dispatch-ppc32-linux.S b/coregrind/m_dispatch/dispatch-ppc32-linux.S index edf6065fe9..61c7bab502 100644 --- a/coregrind/m_dispatch/dispatch-ppc32-linux.S +++ b/coregrind/m_dispatch/dispatch-ppc32-linux.S @@ -39,24 +39,28 @@ /*------------------------------------------------------------*/ /*--- ---*/ -/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ -/*--- run all translations except no-redir ones. ---*/ +/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ +/*--- used to run all translations, ---*/ +/*--- including no-redir ones. ---*/ /*--- ---*/ /*------------------------------------------------------------*/ /*----------------------------------------------------*/ -/*--- Preamble (set everything up) ---*/ +/*--- Entry and preamble (set everything up) ---*/ /*----------------------------------------------------*/ /* signature: -UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling ); +void VG_(disp_run_translations)( UWord* two_words, + void* guest_state, + Addr host_addr ); */ .text -.globl VG_(run_innerloop) -.type VG_(run_innerloop), @function -VG_(run_innerloop): - /* r3 holds guest_state */ - /* r4 holds do_profiling */ +.globl VG_(disp_run_translations) +.type VG_(disp_run_translations), @function +VG_(disp_run_translations): + /* r3 holds two_words */ + /* r4 holds guest_state */ + /* r5 holds host_addr */ /* ----- entry point to VG_(run_innerloop) ----- */ /* For Linux/ppc32 we need the SysV ABI, which uses @@ -66,17 +70,17 @@ VG_(run_innerloop): */ /* Save lr */ - mflr 0 - stw 0,4(1) + mflr 6 + stw 6,4(1) /* New stack frame */ stwu 1,-496(1) /* sp should maintain 16-byte alignment */ /* Save callee-saved registers... */ - /* r3, r4 are live here, so use r5 */ - lis 5,VG_(machine_ppc32_has_FP)@ha - lwz 5,VG_(machine_ppc32_has_FP)@l(5) - cmplwi 5,0 + /* r3, r4, r5 are live here, so use r6 */ + lis 6,VG_(machine_ppc32_has_FP)@ha + lwz 6,VG_(machine_ppc32_has_FP)@l(6) + cmplwi 6,0 beq LafterFP1 /* Floating-point reg save area : 144 bytes */ @@ -119,67 +123,65 @@ LafterFP1: stw 16,288(1) stw 15,284(1) stw 14,280(1) - /* Probably not necessary to save r13 (thread-specific ptr), - as VEX stays clear of it... but what the hey. */ stw 13,276(1) + stw 3,272(1) /* save two_words for later */ /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. The Linux kernel might not actually use VRSAVE for its intended purpose, but it should be harmless to preserve anyway. */ - /* r3, r4 are live here, so use r5 */ - lis 5,VG_(machine_ppc32_has_VMX)@ha - lwz 5,VG_(machine_ppc32_has_VMX)@l(5) - cmplwi 5,0 + /* r3, r4, r5 are live here, so use r6 */ + lis 6,VG_(machine_ppc32_has_VMX)@ha + lwz 6,VG_(machine_ppc32_has_VMX)@l(6) + cmplwi 6,0 beq LafterVMX1 #ifdef HAS_ALTIVEC /* VRSAVE save word : 32 bytes */ - mfspr 5,256 /* vrsave reg is spr number 256 */ - stw 5,244(1) + mfspr 6,256 /* vrsave reg is spr number 256 */ + stw 6,244(1) /* Alignment padding : 4 bytes */ /* Vector reg save area (quadword aligned) : 192 bytes */ - li 5,224 - stvx 31,5,1 - li 5,208 - stvx 30,5,1 - li 5,192 - stvx 29,5,1 - li 5,176 - stvx 28,5,1 - li 5,160 - stvx 27,5,1 - li 5,144 - stvx 26,5,1 - li 5,128 - stvx 25,5,1 - li 5,112 - stvx 25,5,1 - li 5,96 - stvx 23,5,1 - li 5,80 - stvx 22,5,1 - li 5,64 - stvx 21,5,1 - li 5,48 - stvx 20,5,1 + li 6,224 + stvx 31,6,1 + li 6,208 + stvx 30,6,1 + li 6,192 + stvx 29,6,1 + li 6,176 + stvx 28,6,1 + li 6,160 + stvx 27,6,1 + li 6,144 + stvx 26,6,1 + li 6,128 + stvx 25,6,1 + li 6,112 + stvx 25,6,1 + li 6,96 + stvx 23,6,1 + li 6,80 + stvx 22,6,1 + li 6,64 + stvx 21,6,1 + li 6,48 + stvx 20,6,1 #endif LafterVMX1: /* Save cr */ - mfcr 0 - stw 0,44(1) + mfcr 6 + stw 6,44(1) /* Local variable space... */ /* 32(sp) used later to check FPSCR[RM] */ - /* r3 holds guest_state */ - /* r4 holds do_profiling */ - mr 31,3 /* r31 (generated code gsp) = r3 */ - stw 3,28(1) /* spill orig guest_state ptr */ + /* r3 holds two_words */ + /* r4 holds guest_state */ + /* r5 holds host_addr */ /* 24(sp) used later to stop ctr reg being clobbered */ /* 20(sp) used later to load fpscr with zero */ @@ -190,36 +192,29 @@ LafterVMX1: 0(sp) : back-chain */ - /* CAB TODO: Use a caller-saved reg for orig guest_state ptr - - rem to set non-allocateable in isel.c */ - - /* hold dispatch_ctr in r29 */ - lis 5,VG_(dispatch_ctr)@ha - lwz 29,VG_(dispatch_ctr)@l(5) - /* set host FPU control word to the default mode expected by VEX-generated code. See comments in libvex.h for more info. */ - lis 5,VG_(machine_ppc32_has_FP)@ha - lwz 5,VG_(machine_ppc32_has_FP)@l(5) - cmplwi 5,0 + lis 6,VG_(machine_ppc32_has_FP)@ha + lwz 6,VG_(machine_ppc32_has_FP)@l(6) + cmplwi 6,0 beq LafterFP2 /* get zero into f3 (tedious) */ /* note: fsub 3,3,3 is not a reliable way to do this, since if f3 holds a NaN or similar then we don't necessarily wind up with zero. */ - li 5,0 - stw 5,20(1) + li 6,0 + stw 6,20(1) lfs 3,20(1) mtfsf 0xFF,3 /* fpscr = f3 */ LafterFP2: /* set host AltiVec control word to the default mode expected by VEX-generated code. */ - lis 5,VG_(machine_ppc32_has_VMX)@ha - lwz 5,VG_(machine_ppc32_has_VMX)@l(5) - cmplwi 5,0 + lis 6,VG_(machine_ppc32_has_VMX)@ha + lwz 6,VG_(machine_ppc32_has_VMX)@l(6) + cmplwi 6,0 beq LafterVMX2 #ifdef HAS_ALTIVEC @@ -232,172 +227,40 @@ LafterVMX2: /* make a stack frame for the code we are calling */ stwu 1,-16(1) - /* fetch %CIA into r3 */ - lwz 3,OFFSET_ppc32_CIA(31) - - /* fall into main loop (the right one) */ - /* r4 = do_profiling. It's probably trashed after here, - but that's OK: we don't need it after here. */ - cmplwi 4,0 - beq VG_(run_innerloop__dispatch_unprofiled) - b VG_(run_innerloop__dispatch_profiled) - /*NOTREACHED*/ - -/*----------------------------------------------------*/ -/*--- NO-PROFILING (standard) dispatcher ---*/ -/*----------------------------------------------------*/ - -.global VG_(run_innerloop__dispatch_unprofiled) -VG_(run_innerloop__dispatch_unprofiled): - /* At entry: Live regs: - r1 (=sp) - r3 (=CIA = next guest address) - r29 (=dispatch_ctr) - r31 (=guest_state) - */ - /* Has the guest state pointer been messed with? If yes, exit. - Also set up & VG_(tt_fast) early in an attempt at better - scheduling. */ - lis 5,VG_(tt_fast)@ha - addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */ - andi. 0,31,1 - bne gsp_changed - - /* save the jump address in the guest state */ - stw 3,OFFSET_ppc32_CIA(31) - - /* Are we out of timeslice? If yes, defer to scheduler. */ - subi 29,29,1 - cmplwi 29,0 - beq counter_is_zero - - /* try a fast lookup in the translation cache */ - /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) - = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */ - rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */ - add 5,5,4 /* & VG_(tt_fast)[entry#] */ - lwz 6,0(5) /* .guest */ - lwz 7,4(5) /* .host */ - cmpw 3,6 - bne fast_lookup_failed - - /* Found a match. Call .host. */ - mtctr 7 - bctrl + /* Set up the guest state ptr */ + mr 31,4 /* r31 (generated code gsp) = r4 */ - /* On return from guest code: - r3 holds destination (original) address. - r31 may be unchanged (guest_state), or may indicate further - details of the control transfer requested to *r3. - */ - /* start over */ - b VG_(run_innerloop__dispatch_unprofiled) + /* and jump into the code cache. Chained translations in + the code cache run, until for whatever reason, they can't + continue. When that happens, the translation in question + will jump (or call) to one of the continuation points + VG_(cp_...) below. */ + mtctr 5 + bctr /*NOTREACHED*/ /*----------------------------------------------------*/ -/*--- PROFILING dispatcher (can be much slower) ---*/ +/*--- Postamble and exit. ---*/ /*----------------------------------------------------*/ -.global VG_(run_innerloop__dispatch_profiled) -VG_(run_innerloop__dispatch_profiled): - /* At entry: Live regs: - r1 (=sp) - r3 (=CIA = next guest address) - r29 (=dispatch_ctr) - r31 (=guest_state) - */ - /* Has the guest state pointer been messed with? If yes, exit. - Also set up & VG_(tt_fast) early in an attempt at better - scheduling. */ - lis 5,VG_(tt_fast)@ha - addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */ - andi. 0,31,1 - bne gsp_changed +postamble: + /* At this point, r6 and r7 contain two + words to be returned to the caller. r6 + holds a TRC value, and r7 optionally may + hold another word (for CHAIN_ME exits, the + address of the place to patch.) */ - /* save the jump address in the guest state */ - stw 3,OFFSET_ppc32_CIA(31) - - /* Are we out of timeslice? If yes, defer to scheduler. */ - subi 29,29,1 - cmplwi 29,0 - beq counter_is_zero - - /* try a fast lookup in the translation cache */ - /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) - = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */ - rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */ - add 5,5,4 /* & VG_(tt_fast)[entry#] */ - lwz 6,0(5) /* .guest */ - lwz 7,4(5) /* .host */ - cmpw 3,6 - bne fast_lookup_failed - - /* increment bb profile counter */ - srwi 4,4,1 /* entry# * sizeof(UInt*) */ - addis 6,4,VG_(tt_fastN)@ha - lwz 9,VG_(tt_fastN)@l(6) - lwz 8,0(9) - addi 8,8,1 - stw 8,0(9) - - /* Found a match. Call .host. */ - mtctr 7 - bctrl - - /* On return from guest code: - r3 holds destination (original) address. - r31 may be unchanged (guest_state), or may indicate further - details of the control transfer requested to *r3. - */ - /* start over */ - b VG_(run_innerloop__dispatch_profiled) - /*NOTREACHED*/ - -/*----------------------------------------------------*/ -/*--- exit points ---*/ -/*----------------------------------------------------*/ - -gsp_changed: - /* Someone messed with the gsp (in r31). Have to - defer to scheduler to resolve this. dispatch ctr - is not yet decremented, so no need to increment. */ - /* %CIA is NOT up to date here. First, need to write - %r3 back to %CIA, but without trashing %r31 since - that holds the value we want to return to the scheduler. - Hence use %r5 transiently for the guest state pointer. */ - lwz 5,44(1) /* original guest_state ptr */ - stw 3,OFFSET_ppc32_CIA(5) - mr 3,31 /* r3 = new gsp value */ - b run_innerloop_exit - /*NOTREACHED*/ - -counter_is_zero: - /* %CIA is up to date */ - /* back out decrement of the dispatch counter */ - addi 29,29,1 - li 3,VG_TRC_INNER_COUNTERZERO - b run_innerloop_exit - -fast_lookup_failed: - /* %CIA is up to date */ - /* back out decrement of the dispatch counter */ - addi 29,29,1 - li 3,VG_TRC_INNER_FASTMISS - b run_innerloop_exit - - - -/* All exits from the dispatcher go through here. - r3 holds the return value. -*/ -run_innerloop_exit: /* We're leaving. Check that nobody messed with - VSCR or FPSCR. */ - + VSCR or FPSCR in ways we don't expect. */ /* Using r10 - value used again further on, so don't trash! */ lis 10,VG_(machine_ppc32_has_FP)@ha lwz 10,VG_(machine_ppc32_has_FP)@l(10) - cmplwi 10,0 + + /* Using r11 - value used again further on, so don't trash! */ + lis 11,VG_(machine_ppc32_has_VMX)@ha + lwz 11,VG_(machine_ppc32_has_VMX)@l(11) + + cmplwi 10,0 /* Do we have FP ? */ beq LafterFP8 /* Set fpscr back to a known state, since vex-generated code @@ -410,10 +273,7 @@ run_innerloop_exit: mtfsf 0xFF,3 /* fpscr = f3 */ LafterFP8: - /* Using r11 - value used again further on, so don't trash! */ - lis 11,VG_(machine_ppc32_has_VMX)@ha - lwz 11,VG_(machine_ppc32_has_VMX)@l(11) - cmplwi 11,0 + cmplwi 11,0 /* Do we have altivec? */ beq LafterVMX8 #ifdef HAS_ALTIVEC @@ -432,29 +292,15 @@ LafterFP8: LafterVMX8: /* otherwise we're OK */ - b run_innerloop_exit_REALLY - + b remove_frame invariant_violation: - li 3,VG_TRC_INVARIANT_FAILED - b run_innerloop_exit_REALLY - -run_innerloop_exit_REALLY: - /* r3 holds VG_TRC_* value to return */ - - /* Return to parent stack */ - addi 1,1,16 - - /* Write ctr to VG(dispatch_ctr) */ - lis 5,VG_(dispatch_ctr)@ha - stw 29,VG_(dispatch_ctr)@l(5) - - /* Restore cr */ - lwz 0,44(1) - mtcr 0 - - /* Restore callee-saved registers... */ + li 6,VG_TRC_INVARIANT_FAILED + li 7,0 + /* fall through */ +remove_frame: + /* Restore FP regs */ /* r10 already holds VG_(machine_ppc32_has_FP) value */ cmplwi 10,0 beq LafterFP9 @@ -480,31 +326,11 @@ run_innerloop_exit_REALLY: lfd 14,352(1) LafterFP9: - /* General regs */ - lwz 31,348(1) - lwz 30,344(1) - lwz 29,340(1) - lwz 28,336(1) - lwz 27,332(1) - lwz 26,328(1) - lwz 25,324(1) - lwz 24,320(1) - lwz 23,316(1) - lwz 22,312(1) - lwz 21,308(1) - lwz 20,304(1) - lwz 19,300(1) - lwz 18,296(1) - lwz 17,292(1) - lwz 16,288(1) - lwz 15,284(1) - lwz 14,280(1) - lwz 13,276(1) - /* r11 already holds VG_(machine_ppc32_has_VMX) value */ cmplwi 11,0 beq LafterVMX9 + /* Restore Altivec regs */ #ifdef HAS_ALTIVEC /* VRSAVE */ lwz 4,244(1) @@ -538,92 +364,142 @@ LafterFP9: #endif LafterVMX9: - /* reset lr & sp */ + /* restore int regs, including importantly r3 (two_words) */ + addi 1,1,16 + lwz 31,348(1) + lwz 30,344(1) + lwz 29,340(1) + lwz 28,336(1) + lwz 27,332(1) + lwz 26,328(1) + lwz 25,324(1) + lwz 24,320(1) + lwz 23,316(1) + lwz 22,312(1) + lwz 21,308(1) + lwz 20,304(1) + lwz 19,300(1) + lwz 18,296(1) + lwz 17,292(1) + lwz 16,288(1) + lwz 15,284(1) + lwz 14,280(1) + lwz 13,276(1) + lwz 3,272(1) + /* Stash return values */ + stw 6,0(3) + stw 7,4(3) + + /* restore lr & sp, and leave */ lwz 0,500(1) /* stack_size + 4 */ mtlr 0 addi 1,1,496 /* stack_size */ blr -.size VG_(run_innerloop), .-VG_(run_innerloop) -/*------------------------------------------------------------*/ -/*--- ---*/ -/*--- A special dispatcher, for running no-redir ---*/ -/*--- translations. Just runs the given translation once. ---*/ -/*--- ---*/ -/*------------------------------------------------------------*/ +/*----------------------------------------------------*/ +/*--- Continuation points ---*/ +/*----------------------------------------------------*/ -/* signature: -void VG_(run_a_noredir_translation) ( UWord* argblock ); -*/ +/* ------ Chain me to slow entry point ------ */ +.global VG_(disp_cp_chain_me_to_slowEP) +VG_(disp_cp_chain_me_to_slowEP): + /* We got called. The return address indicates + where the patching needs to happen. Collect + the return address and, exit back to C land, + handing the caller the pair (Chain_me_S, RA) */ + li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP + mflr 7 + /* 8 = imm32 r30, disp_cp_chain_me_to_slowEP + 4 = mtctr r30 + 4 = btctr + */ + subi 7,7,8+4+4 + b postamble + +/* ------ Chain me to fast entry point ------ */ +.global VG_(disp_cp_chain_me_to_fastEP) +VG_(disp_cp_chain_me_to_fastEP): + /* We got called. The return address indicates + where the patching needs to happen. Collect + the return address and, exit back to C land, + handing the caller the pair (Chain_me_S, RA) */ + li 6, VG_TRC_CHAIN_ME_TO_FAST_EP + mflr 7 + /* 8 = imm32 r30, disp_cp_chain_me_to_fastEP + 4 = mtctr r30 + 4 = btctr + */ + subi 7,7,8+4+4 + b postamble -/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args - and 2 to carry results: - 0: input: ptr to translation - 1: input: ptr to guest state - 2: output: next guest PC - 3: output: guest state pointer afterwards (== thread return code) -*/ -.global VG_(run_a_noredir_translation) -.type VG_(run_a_noredir_translation), @function -VG_(run_a_noredir_translation): - /* save callee-save int regs, & lr */ - stwu 1,-256(1) - stw 14,128(1) - stw 15,132(1) - stw 16,136(1) - stw 17,140(1) - stw 18,144(1) - stw 19,148(1) - stw 20,152(1) - stw 21,156(1) - stw 22,160(1) - stw 23,164(1) - stw 24,168(1) - stw 25,172(1) - stw 26,176(1) - stw 27,180(1) - stw 28,184(1) - stw 29,188(1) - stw 30,192(1) - stw 31,196(1) - mflr 31 - stw 31,200(1) - - stw 3,204(1) - lwz 31,4(3) - lwz 30,0(3) - mtlr 30 - blrl - - lwz 4,204(1) - stw 3, 8(4) - stw 31,12(4) - - lwz 14,128(1) - lwz 15,132(1) - lwz 16,136(1) - lwz 17,140(1) - lwz 18,144(1) - lwz 19,148(1) - lwz 20,152(1) - lwz 21,156(1) - lwz 22,160(1) - lwz 23,164(1) - lwz 24,168(1) - lwz 25,172(1) - lwz 26,176(1) - lwz 27,180(1) - lwz 28,184(1) - lwz 29,188(1) - lwz 30,192(1) - lwz 31,200(1) - mtlr 31 - lwz 31,196(1) - addi 1,1,256 - blr -.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation) +/* ------ Indirect but boring jump ------ */ +.global VG_(disp_cp_xindir) +VG_(disp_cp_xindir): + /* Where are we going? */ + lwz 3,OFFSET_ppc32_CIA(31) + /* stats only */ + lis 5,VG_(stats__n_xindirs)@ha + addi 5,5,VG_(stats__n_xindirs)@l + lwz 6,4(5) + addic. 6,6,1 + stw 6,4(5) + lwz 6,0(5) + addze 6,6 + stw 6,0(5) + + /* r5 = &VG_(tt_fast) */ + lis 5,VG_(tt_fast)@ha + addi 5,5,VG_(tt_fast)@l /* & VG_(tt_fast) */ + + /* try a fast lookup in the translation cache */ + /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) + = ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */ + rlwinm 4,3,1, 29-VG_TT_FAST_BITS, 28 /* entry# * 8 */ + add 5,5,4 /* & VG_(tt_fast)[entry#] */ + lwz 6,0(5) /* .guest */ + lwz 7,4(5) /* .host */ + cmpw 3,6 + bne fast_lookup_failed + + /* Found a match. Jump to .host. */ + mtctr 7 + bctr + +fast_lookup_failed: + /* stats only */ + lis 5,VG_(stats__n_xindir_misses)@ha + addi 5,5,VG_(stats__n_xindir_misses)@l + lwz 6,4(5) + addic. 6,6,1 + stw 6,4(5) + lwz 6,0(5) + addze 6,6 + stw 6,0(5) + + li 6,VG_TRC_INNER_FASTMISS + li 7,0 + b postamble + /*NOTREACHED*/ + +/* ------ Assisted jump ------ */ +.global VG_(disp_cp_xassisted) +VG_(disp_cp_xassisted): + /* r31 contains the TRC */ + mr 6,31 + li 7,0 + b postamble + +/* ------ Event check failed ------ */ +.global VG_(disp_cp_evcheck_fail) +VG_(disp_cp_evcheck_fail): + li 6,VG_TRC_INNER_COUNTERZERO + li 7,0 + b postamble + + +.size VG_(disp_run_translations), .-VG_(disp_run_translations) /* Let the linker know we don't need an executable stack */ .section .note.GNU-stack,"",@progbits diff --git a/coregrind/m_dispatch/dispatch-x86-linux.S b/coregrind/m_dispatch/dispatch-x86-linux.S index eaf10cf372..f34d8bfca4 100644 --- a/coregrind/m_dispatch/dispatch-x86-linux.S +++ b/coregrind/m_dispatch/dispatch-x86-linux.S @@ -39,8 +39,9 @@ /*------------------------------------------------------------*/ /*--- ---*/ -/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/ -/*--- run all translations except no-redir ones. ---*/ +/*--- The dispatch loop. VG_(disp_run_translations) is ---*/ +/*--- used to run all translations, ---*/ +/*--- including no-redir ones. ---*/ /*--- ---*/ /*------------------------------------------------------------*/ diff --git a/docs/internals/t-chaining-notes.txt b/docs/internals/t-chaining-notes.txt index 1f4e367770..ed1d5e3216 100644 --- a/docs/internals/t-chaining-notes.txt +++ b/docs/internals/t-chaining-notes.txt @@ -1,6 +1,6 @@ DO NOT MERGE -~~~~~~~~~~~ +~~~~~~~~~~~~ Changes memcheck/tests/Makefile.am w.r.t. -mfloat-abi=softfp Ditto none/tests/arm/Makefile.am @@ -28,16 +28,26 @@ host_x86_defs.c, host_amd64_defs.c: return proper VexInvalRange records from the patchers, instead of {0,0}, so that transparent self hosting works properly. +host_ppc_defs.h: is RdWrLR still needed? If not delete. + +ditto ARM, Ld8S + +make sure IRStmt_Exit3 is completely gone. + +all backends: iselStmt(Ist_Exit) vs iselNext: make sure that the same +JKs are handled, else it's not safe against branch sense switching + Optimisations ~~~~~~~~~~~~~ all targets: change VG_(stats__n_xindirs) to a 32 bit counter, and -empty out every now and again. +empty out every now and again. Ditto VG_(stats__n_xindir_misses). amd64: XDirect: write const value to guest_RIP using single insn when the value is < 0x8000'0000 arm: chain_XDirect: generate short form jumps when possible +ppc: chain_XDirect: generate short form jumps when possible arm codegen: Generate ORRS for CmpwNEZ32(Or32(x,y)) diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c index 3be57a5ca0..66551d96de 100644 --- a/memcheck/mc_machine.c +++ b/memcheck/mc_machine.c @@ -393,7 +393,7 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB ) if (o == GOF(CIA) && sz == 4) return -1; if (o == GOF(IP_AT_SYSCALL) && sz == 4) return -1; /* slot unused */ - if (o == GOF(FPROUND) && sz == 4) return -1; + if (o == GOF(FPROUND) && sz == 1) return -1; if (o == GOF(VRSAVE) && sz == 4) return -1; if (o == GOF(EMWARN) && sz == 4) return -1; if (o == GOF(TISTART) && sz == 4) return -1;