From: Cerion Armour-Brown Date: Thu, 22 Dec 2005 15:53:12 +0000 (+0000) Subject: fixed up ppc64 assembly with .opd sections X-Git-Tag: svn/VALGRIND_3_2_0~470 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4d4bc5d1f6cafdfff5f869ee83fc1dfa3bb6a500;p=thirdparty%2Fvalgrind.git fixed up ppc64 assembly with .opd sections do_syscall_for_client_WRK() needed a bigger stack to avoid the linkage area. always use dot_prefix for label calls not wrapping assembly with .section ".text" ... .previous - ppc64 doesn't like it... seems we can't 'stack' more than one section to pop off with .previous ? git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5405 --- diff --git a/coregrind/m_libcassert.c b/coregrind/m_libcassert.c index 773b601d44..f5e4a17ff1 100644 --- a/coregrind/m_libcassert.c +++ b/coregrind/m_libcassert.c @@ -78,8 +78,8 @@ #elif defined(VGP_ppc64_linux) # define GET_REAL_PC_SP_AND_FP(pc, sp, fp) \ asm("mflr 0;" /* r0 = lr */ \ - "bl m_libcassert_get_ip;" /* lr = pc */ \ - "m_libcassert_get_ip:\n" \ + "bl .m_libcassert_get_ip;" /* lr = pc */ \ + ".m_libcassert_get_ip:\n" \ "mflr %0;" \ "mtlr 0;" /* restore lr */ \ "mr %1,1;" \ diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c index 403b8fd1ff..a8987e7c6d 100644 --- a/coregrind/m_machine.c +++ b/coregrind/m_machine.c @@ -373,7 +373,7 @@ Bool VG_(machine_get_hwcaps)( void ) /* VG_(printf)("FP %d VMX %d\n", (Int)have_fp, (Int)have_vmx); */ /* We can only support 3 cases, not 4 (vmx but no fp). So make - fp a prerequisite for vmx. */ + fp a prerequisite for vmx. */ if (have_vmx && !have_fp) have_vmx = False; diff --git a/coregrind/m_main.c b/coregrind/m_main.c index 97ba2d9a9f..0bb7161a43 100644 --- a/coregrind/m_main.c +++ b/coregrind/m_main.c @@ -2850,9 +2850,9 @@ asm("\n" ); #elif defined(VGP_ppc64_linux) asm("\n" - ".text\n" /* PPC64 ELF ABI says '_start' points to a function descriptor. So we must have one, and that is what goes into the .opd section. */ + "\t.align 2\n" "\t.global _start\n" "\t.section \".opd\",\"aw\"\n" "\t.align 3\n" @@ -2886,7 +2886,6 @@ asm("\n" "\tbl ._start_in_C\n" "\tnop\n" "\ttrap\n" - ".previous\n" ); #else #error "_start: needs implementation on this platform" diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c index e920271bb6..a9d060c890 100644 --- a/coregrind/m_signals.c +++ b/coregrind/m_signals.c @@ -477,11 +477,18 @@ extern void my_sigreturn(void); ".previous\n" #elif defined(VGP_ppc64_linux) # define _MYSIG(name) \ - ".text\n" \ + ".align 2\n" \ + ".globl my_sigreturn\n" \ + ".section \".opd\",\"aw\"\n" \ + ".align 3\n" \ "my_sigreturn:\n" \ + ".quad .my_sigreturn,.TOC.@tocbase,0\n" \ + ".previous\n" \ + ".type .my_sigreturn,@function\n" \ + ".globl .my_sigreturn\n" \ + ".my_sigreturn:\n" \ " li 0, " #name "\n" \ - " sc\n" \ - ".previous\n" + " sc\n" #else # error Unknown platform #endif diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c index ae28f49e14..db956566cb 100644 --- a/coregrind/m_syscall.c +++ b/coregrind/m_syscall.c @@ -218,7 +218,15 @@ asm( bottom but of [1]. */ extern void do_syscall_WRK ( ULong* argblock ); asm( -".text\n" +".align 2\n" +".globl do_syscall_WRK\n" +".section \".opd\",\"aw\"\n" +".align 3\n" +"do_syscall_WRK:\n" +".quad .do_syscall_WRK,.TOC.@tocbase,0\n" +".previous\n" +".type .do_syscall_WRK,@function\n" +".globl .do_syscall_WRK\n" ".do_syscall_WRK:\n" " std 3,-16(1)\n" /* stash arg */ " ld 8, 48(3)\n" /* sc arg 6 */ @@ -236,7 +244,6 @@ asm( " andi. 3,3,1\n" " std 3,8(5)\n" /* argblock[1] = cr0.s0 & 1 */ " blr\n" -".previous\n" ); #else # error Unknown platform diff --git a/coregrind/m_syswrap/syscall-ppc64-linux.S b/coregrind/m_syswrap/syscall-ppc64-linux.S index 3d8134f5b4..0a31c24621 100644 --- a/coregrind/m_syswrap/syscall-ppc64-linux.S +++ b/coregrind/m_syswrap/syscall-ppc64-linux.S @@ -70,14 +70,22 @@ /* from vki_arch.h */ #define VKI_SIG_SETMASK 2 +.align 2 +.globl ML_(do_syscall_for_client_WRK) +.section ".opd","aw" +.align 3 +ML_(do_syscall_for_client_WRK): +.quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0 +.previous +.type .ML_(do_syscall_for_client_WRK),@function .globl .ML_(do_syscall_for_client_WRK) .ML_(do_syscall_for_client_WRK): /* make a stack frame */ - stdu 1,-64(1) - std 31,56(1) - std 30,48(1) - std 29,40(1) - std 28,32(1) + stdu 1,-80(1) + std 31,72(1) + std 30,64(1) + std 29,56(1) + std 28,48(1) mr 31,3 /* syscall number */ mr 30,4 /* guest_state */ mr 29,6 /* postmask */ @@ -122,11 +130,11 @@ /* now safe from signals */ /* pop off stack frame */ -5: ld 28,32(1) - ld 29,40(1) - ld 30,48(1) - ld 31,56(1) - addi 1,1,64 +5: ld 28,48(1) + ld 29,56(1) + ld 30,64(1) + ld 31,72(1) + addi 1,1,80 blr /* failure: return -ve error code */ @@ -149,8 +157,7 @@ ML_(blksys_complete): .long 3b ML_(blksys_committed): .long 4b ML_(blksys_finished): .long 5b -.previous - + /* Let the linker know we don't need an executable stack */ .section .note.GNU-stack,"",@progbits diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c index 35c068dca2..ef75719382 100644 --- a/coregrind/m_syswrap/syswrap-ppc64-linux.c +++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c @@ -74,7 +74,7 @@ void ML_(call_on_new_stack_0_1) ( Addr stack, address, the second word is the TOC ptr (r2), and the third word is the static chain value. */ asm( -".text\n" +" .align 2\n" " .globl vgModuleLocal_call_on_new_stack_0_1\n" " .section \".opd\",\"aw\"\n" " .align 3\n" @@ -122,7 +122,6 @@ asm( " mtcr 0\n\t" // CAB: Need this? " bctr\n\t" // jump to dst " trap\n" // should never get here -".previous\n" ); @@ -166,7 +165,15 @@ ULong do_syscall_clone_ppc64_linux ( Word (*fn)(void *), Int* parent_tid, void/*vki_modify_ldt_t*/ * ); asm( -".text\n" +" .align 2\n" +" .globl do_syscall_clone_ppc64_linux\n" +" .section \".opd\",\"aw\"\n" +" .align 3\n" +"do_syscall_clone_ppc64_linux:\n" +" .quad .do_syscall_clone_ppc64_linux,.TOC.@tocbase,0\n" +" .previous\n" +" .type .do_syscall_clone_ppc64_linux,@function\n" +" .globl .do_syscall_clone_ppc64_linux\n" ".do_syscall_clone_ppc64_linux:\n" " stdu 1,-64(1)\n" " std 29,40(1)\n" @@ -229,7 +236,6 @@ asm( " ld 31,56(1)\n" " addi 1,1,64\n" " blr\n" -".previous\n" ); #undef __NR_CLONE diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S index 8423fedca5..1da47dde6d 100644 --- a/coregrind/m_trampoline.S +++ b/coregrind/m_trampoline.S @@ -296,11 +296,28 @@ VG_(trampoline_stuff_end): /* a leading page of unexecutable code */ UD2_PAGE +.align 2 .global VG_(trampoline_stuff_start) +.section ".opd","aw" +.align 3 VG_(trampoline_stuff_start): +.quad .VG_(trampoline_stuff_start),.TOC.@tocbase,0 +.previous +.type .VG_(trampoline_stuff_start),@function +.global .VG_(trampoline_stuff_start) +.VG_(trampoline_stuff_start): + +.align 2 .global VG_(trampoline_stuff_end) +.section ".opd","aw" +.align 3 VG_(trampoline_stuff_end): +.quad .VG_(trampoline_stuff_end),.TOC.@tocbase,0 +.previous +.type .VG_(trampoline_stuff_end),@function +.global .VG_(trampoline_stuff_end) +.VG_(trampoline_stuff_end): # undef UD2_16 # undef UD2_64 diff --git a/coregrind/vki_unistd-ppc64-linux.h b/coregrind/vki_unistd-ppc64-linux.h index 17f640c508..ffd5397c4e 100644 --- a/coregrind/vki_unistd-ppc64-linux.h +++ b/coregrind/vki_unistd-ppc64-linux.h @@ -309,4 +309,4 @@ #define __NR_inotify_rm_watch 277 -#endif /* __VKI_UNISTD_PPC32_LINUX_H */ +#endif /* __VKI_UNISTD_PPC64_LINUX_H */ diff --git a/docs/internals/performance.txt b/docs/internals/performance.txt index ddeeda61a9..ff2075c31f 100644 --- a/docs/internals/performance.txt +++ b/docs/internals/performance.txt @@ -16,7 +16,7 @@ Post 3.1.0: Saved 1--3% on a few programs. - r5345,r5346,r5352: Julian improved the dispatcher so that x86 and AMD64 use jumps instead of call/return for calling translations. - Also, on x86, amd64 and ppc32, --profile-flags style profiling was + Also, on x86, amd64, ppc32 and ppc64, --profile-flags style profiling was removed from the despatch loop unless --profile-flags is being used. Improved Nulgrind performance typically by 10--20%, and Memcheck performance typically by 2--20%.