do_syscall_for_client_WRK() needed a bigger stack to avoid the linkage area.
always use dot_prefix for label calls
not wrapping assembly with
.section ".text"
...
.previous
- ppc64 doesn't like it... seems we can't 'stack' more than one section to pop off with .previous ?
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5405
#elif defined(VGP_ppc64_linux)
# define GET_REAL_PC_SP_AND_FP(pc, sp, fp) \
asm("mflr 0;" /* r0 = lr */ \
- "bl m_libcassert_get_ip;" /* lr = pc */ \
- "m_libcassert_get_ip:\n" \
+ "bl .m_libcassert_get_ip;" /* lr = pc */ \
+ ".m_libcassert_get_ip:\n" \
"mflr %0;" \
"mtlr 0;" /* restore lr */ \
"mr %1,1;" \
/* VG_(printf)("FP %d VMX %d\n", (Int)have_fp, (Int)have_vmx); */
/* We can only support 3 cases, not 4 (vmx but no fp). So make
- fp a prerequisite for vmx. */
+ fp a prerequisite for vmx. */
if (have_vmx && !have_fp)
have_vmx = False;
);
#elif defined(VGP_ppc64_linux)
asm("\n"
- ".text\n"
/* PPC64 ELF ABI says '_start' points to a function descriptor.
So we must have one, and that is what goes into the .opd section. */
+ "\t.align 2\n"
"\t.global _start\n"
"\t.section \".opd\",\"aw\"\n"
"\t.align 3\n"
"\tbl ._start_in_C\n"
"\tnop\n"
"\ttrap\n"
- ".previous\n"
);
#else
#error "_start: needs implementation on this platform"
".previous\n"
#elif defined(VGP_ppc64_linux)
# define _MYSIG(name) \
- ".text\n" \
+ ".align 2\n" \
+ ".globl my_sigreturn\n" \
+ ".section \".opd\",\"aw\"\n" \
+ ".align 3\n" \
"my_sigreturn:\n" \
+ ".quad .my_sigreturn,.TOC.@tocbase,0\n" \
+ ".previous\n" \
+ ".type .my_sigreturn,@function\n" \
+ ".globl .my_sigreturn\n" \
+ ".my_sigreturn:\n" \
" li 0, " #name "\n" \
- " sc\n" \
- ".previous\n"
+ " sc\n"
#else
# error Unknown platform
#endif
bottom but of [1]. */
extern void do_syscall_WRK ( ULong* argblock );
asm(
-".text\n"
+".align 2\n"
+".globl do_syscall_WRK\n"
+".section \".opd\",\"aw\"\n"
+".align 3\n"
+"do_syscall_WRK:\n"
+".quad .do_syscall_WRK,.TOC.@tocbase,0\n"
+".previous\n"
+".type .do_syscall_WRK,@function\n"
+".globl .do_syscall_WRK\n"
".do_syscall_WRK:\n"
" std 3,-16(1)\n" /* stash arg */
" ld 8, 48(3)\n" /* sc arg 6 */
" andi. 3,3,1\n"
" std 3,8(5)\n" /* argblock[1] = cr0.s0 & 1 */
" blr\n"
-".previous\n"
);
#else
# error Unknown platform
/* from vki_arch.h */
#define VKI_SIG_SETMASK 2
+.align 2
+.globl ML_(do_syscall_for_client_WRK)
+.section ".opd","aw"
+.align 3
+ML_(do_syscall_for_client_WRK):
+.quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0
+.previous
+.type .ML_(do_syscall_for_client_WRK),@function
.globl .ML_(do_syscall_for_client_WRK)
.ML_(do_syscall_for_client_WRK):
/* make a stack frame */
- stdu 1,-64(1)
- std 31,56(1)
- std 30,48(1)
- std 29,40(1)
- std 28,32(1)
+ stdu 1,-80(1)
+ std 31,72(1)
+ std 30,64(1)
+ std 29,56(1)
+ std 28,48(1)
mr 31,3 /* syscall number */
mr 30,4 /* guest_state */
mr 29,6 /* postmask */
/* now safe from signals */
/* pop off stack frame */
-5: ld 28,32(1)
- ld 29,40(1)
- ld 30,48(1)
- ld 31,56(1)
- addi 1,1,64
+5: ld 28,48(1)
+ ld 29,56(1)
+ ld 30,64(1)
+ ld 31,72(1)
+ addi 1,1,80
blr
/* failure: return -ve error code */
ML_(blksys_committed): .long 4b
ML_(blksys_finished): .long 5b
-.previous
-
+
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
address, the second word is the TOC ptr (r2), and the third word is
the static chain value. */
asm(
-".text\n"
+" .align 2\n"
" .globl vgModuleLocal_call_on_new_stack_0_1\n"
" .section \".opd\",\"aw\"\n"
" .align 3\n"
" mtcr 0\n\t" // CAB: Need this?
" bctr\n\t" // jump to dst
" trap\n" // should never get here
-".previous\n"
);
Int* parent_tid,
void/*vki_modify_ldt_t*/ * );
asm(
-".text\n"
+" .align 2\n"
+" .globl do_syscall_clone_ppc64_linux\n"
+" .section \".opd\",\"aw\"\n"
+" .align 3\n"
+"do_syscall_clone_ppc64_linux:\n"
+" .quad .do_syscall_clone_ppc64_linux,.TOC.@tocbase,0\n"
+" .previous\n"
+" .type .do_syscall_clone_ppc64_linux,@function\n"
+" .globl .do_syscall_clone_ppc64_linux\n"
".do_syscall_clone_ppc64_linux:\n"
" stdu 1,-64(1)\n"
" std 29,40(1)\n"
" ld 31,56(1)\n"
" addi 1,1,64\n"
" blr\n"
-".previous\n"
);
#undef __NR_CLONE
/* a leading page of unexecutable code */
UD2_PAGE
+.align 2
.global VG_(trampoline_stuff_start)
+.section ".opd","aw"
+.align 3
VG_(trampoline_stuff_start):
+.quad .VG_(trampoline_stuff_start),.TOC.@tocbase,0
+.previous
+.type .VG_(trampoline_stuff_start),@function
+.global .VG_(trampoline_stuff_start)
+.VG_(trampoline_stuff_start):
+
+.align 2
.global VG_(trampoline_stuff_end)
+.section ".opd","aw"
+.align 3
VG_(trampoline_stuff_end):
+.quad .VG_(trampoline_stuff_end),.TOC.@tocbase,0
+.previous
+.type .VG_(trampoline_stuff_end),@function
+.global .VG_(trampoline_stuff_end)
+.VG_(trampoline_stuff_end):
# undef UD2_16
# undef UD2_64
#define __NR_inotify_rm_watch 277
-#endif /* __VKI_UNISTD_PPC32_LINUX_H */
+#endif /* __VKI_UNISTD_PPC64_LINUX_H */
Saved 1--3% on a few programs.
- r5345,r5346,r5352: Julian improved the dispatcher so that x86 and
AMD64 use jumps instead of call/return for calling translations.
- Also, on x86, amd64 and ppc32, --profile-flags style profiling was
+ Also, on x86, amd64, ppc32 and ppc64, --profile-flags style profiling was
removed from the despatch loop unless --profile-flags is being used.
Improved Nulgrind performance typically by 10--20%, and Memcheck
performance typically by 2--20%.