#include "core_asm.h"
#include "vki_unistd.h"
+#include "libvex_guest_offsets.h"
/*
Perform a Linux syscall with the "syscall" instruction.
1: // PARENT or ERROR
ret
-
.globl VG_(sigreturn)
VG_(sigreturn):
movq $__NR_rt_sigreturn, %rax
syscall
+/*----------------------------------------------------------------*/
+/*
+ Perform a syscall for the client. This will run a syscall
+ with the client's specific per-thread signal mask.
+
+ The structure of this function is such that, if the syscall is
+ interrupted by a signal, we can determine exactly what
+ execution state we were in with respect to the execution of
+ the syscall by examining the value of %eip in the signal
+ handler. This means that we can always do the appropriate
+ thing to precisely emulate the kernel's signal/syscall
+ interactions.
+
+ The syscall number is taken from the argument, even though it
+ should also be in guest_state->guest_RAX. The syscall result
+ is written back to guest_state->guest_RAX on completion.
+
+ Returns 0 if the syscall was successfully called (even if the
+ syscall itself failed), or a -ve error code if one of the
+ sigprocmasks failed (there's no way to determine which one
+ failed).
+
+ VGA_(interrupted_syscall)() does the thread state fixup in the
+ case where we were interrupted by a signal.
+
+ Prototype:
+
+ Int VGA_(_client_syscall)(Int syscallno, // rdi
+ void* guest_state, // rsi
+ const vki_sigset_t *sysmask, // rdx
+ const vki_sigset_t *postmask, // rcx
+ Int nsigwords) // r8
+
+*/
+
+/* from vki_arch.h */
+#define VKI_SIG_SETMASK 2
+
+.globl VGA_(_client_syscall)
+VGA_(_client_syscall):
+ /* save callee-saved regs */
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+#define FSZ ((4+1)*4) /* 4 args + ret addr */
+
+#define PUSH_di_si_dx_cx_8 \
+ pushq %rdi ; \
+ pushq %rsi ; \
+ pushq %rdx ; \
+ pushq %rcx ; \
+ pushq %r8
+
+#define POP_di_si_dx_cx_8 \
+ popq %r8 ; \
+ popq %rcx ; \
+ popq %rdx ; \
+ popq %rsi ; \
+ popq %rdi
+
+1: /* Even though we can't take a signal until the sigprocmask completes,
+ start the range early.
+ If eip is in the range [1,2), the syscall hasn't been started yet */
+
+ /* Set the signal mask which should be current during the syscall. */
+ /* Save and restore all 5 arg regs round the call. This is easier
+ than figuring out the minimal set to save/restore. */
+
+ PUSH_di_si_dx_cx_8
+
+ movq $__NR_rt_sigprocmask, %rax // syscall #
+ movq $VKI_SIG_SETMASK, %rdi // how
+ movq %rdx, %rsi // sysmask
+ movq %rcx, %rdx // postmask
+ movq %r8, %r10 // nsigwords
+ syscall
+
+ POP_di_si_dx_cx_8
+
+ testl %eax, %eax
+ js 5f /* sigprocmask failed */
+
+ /* OK, that worked. Now do the syscall proper. */
+
+ PUSH_di_si_dx_cx_8
+
+ movq %rsi, %rax /* rax --> VexGuestAMD64State * */
+ pushq %rdi /* syscallno -> stack */
+ movq OFFSET_amd64_RDI(%rax), %rdi
+ movq OFFSET_amd64_RSI(%rax), %rsi
+ movq OFFSET_amd64_RDX(%rax), %rdx
+ movq OFFSET_amd64_R10(%rax), %r10
+ movq OFFSET_amd64_R8(%rax), %r8
+ movq OFFSET_amd64_R9(%rax), %r9
+ popq %rax /* syscallno -> %rax */
+
+ /* If rip==2, then the syscall was either just about
+ to start, or was interrupted and the kernel was
+ restarting it. */
+2: syscall
+3: /* In the range [3, 4), the syscall result is in %rax,
+ but hasn't been committed to RAX. */
+
+ POP_di_si_dx_cx_8
+
+ movq %rax, OFFSET_amd64_RAX(%rsi) /* save back to RAX */
+
+4: /* Re-block signals. If eip is in [4,5), then the syscall is complete and
+ we needn't worry about it. */
+ PUSH_di_si_dx_cx_8
+
+ movq $__NR_rt_sigprocmask, %rax // syscall #
+ movq $VKI_SIG_SETMASK, %rdi // how
+ movq %rcx, %rsi // postmask
+ xorq %rdx, %rdx // NULL
+ movq %r8, %r10 // nsigwords
+ syscall
+
+ POP_di_si_dx_cx_8
+
+5: /* now safe from signals */
+
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+#undef FSZ
+ ret
+
+.section .rodata
+/* export the ranges so that VGA_(interrupted_syscall) can do the
+ right thing */
+
+.globl VGA_(blksys_setup)
+.globl VGA_(blksys_restart)
+.globl VGA_(blksys_complete)
+.globl VGA_(blksys_committed)
+.globl VGA_(blksys_finished)
+VGA_(blksys_setup): .quad 1b
+VGA_(blksys_restart): .quad 2b
+VGA_(blksys_complete): .quad 3b
+VGA_(blksys_committed): .quad 4b
+VGA_(blksys_finished): .quad 5b
+.previous
+
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
*/
#include "core.h"
+#include "ume.h" /* for jmp_with_stack */
/* COPIED FROM /usr/include/asm-i386/prctl.h (amd64-linux) */
#define ARCH_GET_GS 0x1004
+/* ---------------------------------------------------------------------
+ Stacks, thread wrappers, clone
+ Note. Why is this stuff here?
+ ------------------------------------------------------------------ */
-// See the comment accompanying the declaration of VGA_(thread_syscall)() in
-// coregrind/core.h for an explanation of what this does, and why.
-asm(
-".text\n"
-" .type vgArch_do_thread_syscall,@function\n"
-
-".globl vgArch_do_thread_syscall\n"
-"vgArch_do_thread_syscall:\n"
-" pushq %r15\n"
-" pushq %r14\n"
-" pushq %r13\n"
-" pushq %r12\n"
-" pushq %rbx\n"
-" pushq %rbp\n"
-".vgArch_sys_before:\n"
-
-/* Params:
- rdi = UWord sys
- rsi = UWord arg1
- rdx = UWord arg2
- rcx = UWord arg3
- r8 = UWord arg4
- r9 = UWord arg5
- Stack now looks like this (remaining args pushed R->L):
- Int poststate 80
- Int* statep 72
- HWord* result 64
- UWord arg6 56
- ReturnAddress 48
- r15 40
- r14 32
- r13 24
- r12 16
- rbx 8
- rbp 0+rsp
-*/
-
-/* Convert function calling convention --> syscall calling convention */
-" movq %rdi, %rax\n" /* syscall */
-" movq %rsi, %rdi\n" /* arg1 */
-" movq %rdx, %rsi\n" /* arg2 */
-" movq %rcx, %rdx\n" /* arg3 */
-" movq %r8, %r10\n" /* arg4 */
-" movq %r9, %r8\n" /* arg5 */
-" movq 56(%rsp), %r9\n" /* arg6 */ /* last arg from stack */
-".vgArch_sys_restarted:\n"
-" syscall\n"
-".vgArch_sys_after:\n"
-" movq 64(%rsp),%rbx\n" /* rbx = HWord* result */
-" movq %rax, (%rbx)\n" /* write the syscall retval */
-
-" movl 72(%esp),%ebx\n" /* rbx = Int* stateP */
-" testl %ebx, %ebx\n"
-" jz 1f\n"
-
-" movl 80(%rsp),%ecx\n" /* write the post state (must be after retval write) */
-" movl %ecx,(%rbx)\n"
-
-".vgArch_sys_done:\n" /* OK, all clear from here */
-"1: popq %rbp\n"
-" popq %rbx\n"
-" popq %r12\n"
-" popq %r13\n"
-" popq %r14\n"
-" popq %r15\n"
-" ret\n"
-" .size vgArch_do_thread_syscall,.-vgArch_do_thread_syscall\n"
-".previous\n"
-
-".section .rodata\n"
-" .globl vgArch_sys_before\n"
-"vgArch_sys_before: .long .vgArch_sys_before\n"
-" .globl vgArch_sys_restarted\n"
-"vgArch_sys_restarted: .long .vgArch_sys_restarted\n"
-" .globl vgArch_sys_after\n"
-"vgArch_sys_after: .long .vgArch_sys_after\n"
-" .globl vgArch_sys_done\n"
-"vgArch_sys_done: .long .vgArch_sys_done\n"
-".previous\n"
-);
-
-
+/* These are addresses within VGA_(client_syscall). See syscall.S for details. */
+extern const Addr VGA_(blksys_setup);
+extern const Addr VGA_(blksys_restart);
+extern const Addr VGA_(blksys_complete);
+extern const Addr VGA_(blksys_committed);
+extern const Addr VGA_(blksys_finished);
// Back up to restart a system call.
void VGA_(restart_syscall)(ThreadArchState *arch)
{
- I_die_here;
-#if 0
- arch->vex.guest_EIP -= 2; // sizeof(int $0x80)
+ arch->vex.guest_RIP -= 2; // sizeof(syscall)
/* Make sure our caller is actually sane, and we're really backing
back over a syscall.
- int $0x80 == CD 80
+ syscall == 0F 05
*/
{
- UChar *p = (UChar *)arch->vex.guest_EIP;
+ UChar *p = (UChar *)arch->vex.guest_RIP;
- if (p[0] != 0xcd || p[1] != 0x80)
+ if (p[0] != 0x0F || p[1] != 0x05)
VG_(message)(Vg_DebugMsg,
"?! restarting over syscall at %p %02x %02x\n",
- arch->vex.guest_EIP, p[0], p[1]);
+ arch->vex.guest_RIP, p[0], p[1]);
- vg_assert(p[0] == 0xcd && p[1] == 0x80);
+ vg_assert(p[0] == 0x0F && p[1] == 0x05);
}
-#endif
}
-/* ---------------------------------------------------------------------
- Stacks, thread wrappers, clone
- Note. Why is this stuff here?
- ------------------------------------------------------------------ */
+/*
+ Fix up the VCPU state when a syscall is interrupted by a signal.
+
+ To do this, we determine the precise state of the syscall by
+ looking at the (real) rip at the time the signal happened. The
+ syscall sequence looks like:
+
+ 1. unblock signals
+ 2. perform syscall
+ 3. save result to RAX
+ 4. re-block signals
+
+ If a signal
+ happens at Then Why?
+ [1-2) restart nothing has happened (restart syscall)
+ [2] restart syscall hasn't started, or kernel wants to restart
+ [2-3) save syscall complete, but results not saved
+ [3-4) syscall complete, results saved
+
+ Sometimes we never want to restart an interrupted syscall (because
+ sigaction says not to), so we only restart if "restart" is True.
+
+ This will also call VG_(post_syscall)() if the syscall has actually
+ completed (either because it was interrupted, or because it
+ actually finished). It will not call VG_(post_syscall)() if the
+ syscall is set up for restart, which means that the pre-wrapper may
+ get called multiple times.
+ */
+/* NB: this is identical to the x86 version */
+void VGA_(interrupted_syscall)(ThreadId tid,
+ struct vki_ucontext *uc,
+ Bool restart)
+{
+ static const Bool debug = 0;
+
+ ThreadState *tst = VG_(get_ThreadState)(tid);
+ ThreadArchState *th_regs = &tst->arch;
+ Word ip = UCONTEXT_INSTR_PTR(uc);
+
+ if (debug)
+ VG_(printf)("interrupted_syscall: ip=%p; restart=%d eax=%d\n",
+ ip, restart, UCONTEXT_SYSCALL_NUM(uc));
+
+ if (ip < VGA_(blksys_setup) || ip >= VGA_(blksys_finished)) {
+ VG_(printf)(" not in syscall (%p - %p)\n", VGA_(blksys_setup), VGA_(blksys_finished));
+ vg_assert(tst->syscallno == -1);
+ return;
+ }
+
+ vg_assert(tst->syscallno != -1);
+
+ if (ip >= VGA_(blksys_setup) && ip < VGA_(blksys_restart)) {
+ /* syscall hasn't even started; go around again */
+ if (debug)
+ VG_(printf)(" not started: restart\n");
+ VGA_(restart_syscall)(th_regs);
+ } else if (ip == VGA_(blksys_restart)) {
+ /* We're either about to run the syscall, or it was interrupted
+ and the kernel restarted it. Restart if asked, otherwise
+ EINTR it. */
+ if (restart)
+ VGA_(restart_syscall)(th_regs);
+ else {
+ th_regs->vex.PLATFORM_SYSCALL_RET = -VKI_EINTR;
+ VG_(post_syscall)(tid);
+ }
+ } else if (ip >= VGA_(blksys_complete) && ip < VGA_(blksys_committed)) {
+ /* Syscall complete, but result hasn't been written back yet.
+ The saved real CPU %rax has the result, which we need to move
+ to RAX. */
+ if (debug)
+ VG_(printf)(" completed: ret=%d\n", UCONTEXT_SYSCALL_RET(uc));
+ th_regs->vex.PLATFORM_SYSCALL_RET = UCONTEXT_SYSCALL_RET(uc);
+ VG_(post_syscall)(tid);
+ } else if (ip >= VGA_(blksys_committed) && ip < VGA_(blksys_finished)) {
+ /* Result committed, but the signal mask has not been restored;
+ we expect our caller (the signal handler) will have fixed
+ this up. */
+ if (debug)
+ VG_(printf)(" all done\n");
+ VG_(post_syscall)(tid);
+ } else
+ VG_(core_panic)("?? strange syscall interrupt state?");
+
+ tst->syscallno = -1;
+}
+
+extern void VGA_(_client_syscall)(Int syscallno,
+ void* guest_state,
+ const vki_sigset_t *syscall_mask,
+ const vki_sigset_t *restore_mask,
+ Int nsigwords);
+
+void VGA_(client_syscall)(Int syscallno, ThreadState *tst,
+ const vki_sigset_t *syscall_mask)
+{
+ vki_sigset_t saved;
+ VGA_(_client_syscall)(syscallno, &tst->arch.vex,
+ syscall_mask, &saved, _VKI_NSIG_WORDS * sizeof(UWord));
+}
+
/*
Allocate a stack for this thread.
pUInt++)
*pUInt = FILL;
/* rsp is left at top of stack */
- rsp = pUInt;
+ rsp = (ULong*)pUInt;
if (0)
VG_(printf)("stack for tid %d at %p (%x); esp=%p\n",
------------------------------------------------------------------ */
/* These are addresses within VGA_(client_syscall). See syscall.S for details. */
-extern const Word VGA_(blksys_setup);
-extern const Word VGA_(blksys_restart);
-extern const Word VGA_(blksys_complete);
-extern const Word VGA_(blksys_committed);
-extern const Word VGA_(blksys_finished);
+extern const Addr VGA_(blksys_setup);
+extern const Addr VGA_(blksys_restart);
+extern const Addr VGA_(blksys_complete);
+extern const Addr VGA_(blksys_committed);
+extern const Addr VGA_(blksys_finished);
// Back up to restart a system call.
void VGA_(restart_syscall)(ThreadArchState *arch)
3. save result to EAX
4. re-block signals
- If a signal happens at Then Why?
- 1-2 restart nothing has happened (restart syscall)
- 2 restart syscall hasn't started, or kernel wants to restart
- 2-3 save syscall complete, but results not saved
- 3-4 - syscall complete, results saved
+ If a signal
+ happens at Then Why?
+ [1-2) restart nothing has happened (restart syscall)
+ [2] restart syscall hasn't started, or kernel wants to restart
+ [2-3) save syscall complete, but results not saved
+ [3-4) syscall complete, results saved
Sometimes we never want to restart an interrupted syscall (because
sigaction says not to), so we only restart if "restart" is True.
syscall is set up for restart, which means that the pre-wrapper may
get called multiple times.
*/
+/* NB: this is identical to the amd64 version */
void VGA_(interrupted_syscall)(ThreadId tid,
struct vki_ucontext *uc,
Bool restart)