From: Paul Floyd Date: Mon, 30 May 2022 20:57:34 +0000 (+0200) Subject: Indent and add more comments for FreeBSD syscall code X-Git-Tag: VALGRIND_3_20_0~59 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2b370744e5fe1830e4454d14af9d127aa9ed7acd;p=thirdparty%2Fvalgrind.git Indent and add more comments for FreeBSD syscall code After working on an issue that turns out to seem to be with the FreeBSD kernel sched_uler I played a lot with the Valgrind syscall and scheduler code. I've kept the comments and the reformatting. --- diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c index ad3d71602e..1e49ed4121 100644 --- a/coregrind/m_syscall.c +++ b/coregrind/m_syscall.c @@ -735,6 +735,10 @@ asm( ); #elif defined(VGP_amd64_freebsd) +/* Convert function calling convention --> SYSCALL_STD calling + convention + PJF - not sure why we don't use SYSCALL0 convention like x86 + */ extern UWord do_syscall_WRK ( UWord syscall_no, /* %rdi */ UWord a1, /* %rsi */ @@ -751,8 +755,6 @@ extern UWord do_syscall_WRK ( asm( ".text\n" "do_syscall_WRK:\n" - /* Convert function calling convention --> syscall calling - convention */ " pushq %rbp\n" " movq %rsp, %rbp\n" " movq %rdi, %rax\n" /* syscall_no */ @@ -761,21 +763,21 @@ asm( " movq %rcx, %rdx\n" /* a3 */ " movq %r8, %r10\n" /* a4 */ " movq %r9, %r8\n" /* a5 */ -" movq 16(%rbp), %r9\n" /* a6 last arg from stack, account for %rbp */ +" movq 16(%rbp), %r9\n" /* a6 last arg from stack, account for %rbp */ " movq 24(%rbp), %r11\n" /* a7 from stack */ " pushq %r11\n" " movq 32(%rbp), %r11\n" /* a8 from stack */ " pushq %r11\n" -" subq $8,%rsp\n" /* fake return addr */ +" subq $8,%rsp\n" /* fake return addr */ " syscall\n" " jb 1f\n" -" movq 48(%rbp),%rsi\n" -" movq %rdx, (%rsi)\n" +" movq 48(%rbp),%rsi\n" /* success */ +" movq %rdx, (%rsi)\n" /* second return value */ " movq %rbp, %rsp\n" " popq %rbp\n" " ret\n" -"1:\n" -" movq 40(%rbp), %rsi\n" +"1:\n" /* error path */ +" movq 40(%rbp), %rsi\n" /* flags */ " movl $1,(%rsi)\n" " movq %rbp, %rsp\n" " popq %rbp\n" diff --git a/coregrind/m_syswrap/syscall-amd64-freebsd.S b/coregrind/m_syswrap/syscall-amd64-freebsd.S index b435842c81..55d53f0b76 100644 --- a/coregrind/m_syswrap/syscall-amd64-freebsd.S +++ b/coregrind/m_syswrap/syscall-amd64-freebsd.S @@ -77,115 +77,124 @@ .globl ML_(do_syscall_for_client_WRK) ML_(do_syscall_for_client_WRK): - /* save callee-saved regs */ - pushq %rbp - movq %rsp, %rbp - pushq %rdi // -8(%rbp) syscallno - pushq %rsi // -16(%rbp) guest_state - pushq %rdx // -24(%rbp) sysmask - pushq %rcx // -32(%rbp) postmask - pushq %r8 // -40(%rbp) sigsetSzB - -1: /* Even though we can't take a signal until the sigprocmask completes, - start the range early. - If eip is in the range [1,2), the syscall hasn't been started yet */ - - /* Set the signal mask which should be current during the syscall. */ - /* Save and restore all 5 arg regs round the call. This is easier - than figuring out the minimal set to save/restore. */ - - movq $__NR_sigprocmask, %rax // syscall # - movq $VKI_SIG_SETMASK, %rdi // how - movq %rdx, %rsi // sysmask - movq %rcx, %rdx // postmask - syscall - - jb 7f /* sigprocmask failed */ - - /* OK, that worked. Now do the syscall proper. */ - - /* 6 register parameters */ - movq -16(%rbp), %r11 /* r11 = VexGuestAMD64State * */ - movq OFFSET_amd64_RDI(%r11), %rdi - movq OFFSET_amd64_RSI(%r11), %rsi - movq OFFSET_amd64_RDX(%r11), %rdx - movq OFFSET_amd64_R10(%r11), %r10 - movq OFFSET_amd64_R8(%r11), %r8 - movq OFFSET_amd64_R9(%r11), %r9 + /* save callee-saved regs */ + pushq %rbp + movq %rsp, %rbp + pushq %rdi // -8(%rbp) syscallno + pushq %rsi // -16(%rbp) guest_state + pushq %rdx // -24(%rbp) sysmask + pushq %rcx // -32(%rbp) postmask + pushq %r8 // -40(%rbp) sigsetSzB + +1: /* Even though we can't take a signal until the sigprocmask completes, + start the range early. + If eip is in the range [1,2), the syscall hasn't been started yet */ + + /* Set the signal mask which should be current during the syscall. */ + /* Save and restore all 5 arg regs round the call. This is easier + than figuring out the minimal set to save/restore. */ + + movq $__NR_sigprocmask, %rax // syscall # + movq $VKI_SIG_SETMASK, %rdi // how + movq %rdx, %rsi // sysmask + movq %rcx, %rdx // postmask + syscall + + jb 7f /* sigprocmask failed */ + + /* OK, that worked. Now do the syscall proper. */ + + /* 6 register parameters */ + movq -16(%rbp), %r11 /* r11 = VexGuestAMD64State * */ + movq OFFSET_amd64_RDI(%r11), %rdi + movq OFFSET_amd64_RSI(%r11), %rsi + movq OFFSET_amd64_RDX(%r11), %rdx + movq OFFSET_amd64_R10(%r11), %r10 + movq OFFSET_amd64_R8(%r11), %r8 + movq OFFSET_amd64_R9(%r11), %r9 /* 2 stack parameters plus return address (ignored by syscall) */ - movq OFFSET_amd64_RSP(%r11), %r11 /* r11 = simulated RSP */ - movq 16(%r11), %rax - pushq %rax - movq 8(%r11), %rax - pushq %rax - /* (fake) return address. */ - movq 0(%r11), %rax - pushq %rax + /* @todo PJF there is a potential bug here + * syscall can take up to 8 arguments + * but when syscall0 or syscall198 is being used + * one argument is used for the syscall0/198 id + * and one for the actual id and in this case + * there could be 3 stack parameters. + * However, only mmap takes 8 arguments + * and only on x86. It would be an unlikely combination, + * but this might break one day. */ + movq OFFSET_amd64_RSP(%r11), %r11 /* r11 = simulated RSP */ + movq 16(%r11), %rax + pushq %rax + movq 8(%r11), %rax + pushq %rax + /* (fake) return address. */ + movq 0(%r11), %rax + pushq %rax /* syscallno */ - movq -8(%rbp), %rax - - /* If rip==2, then the syscall was either just about - to start, or was interrupted and the kernel was - restarting it. */ -2: syscall -3: /* In the range [3, 4), the syscall result is in %rax, - but hasn't been committed to RAX. */ - - /* stack contents: 3 words for syscall above, plus our prologue */ - setc 0(%rsp) /* stash returned carry flag */ - - movq -16(%rbp), %r11 /* r11 = VexGuestAMD64State * */ - movq %rax, OFFSET_amd64_RAX(%r11) /* save back to RAX */ - movq %rdx, OFFSET_amd64_RDX(%r11) /* save back to RDX */ - - /* save carry flag to VEX */ - xorq %rax, %rax - movb 0(%rsp), %al - movq %rax, %rdi /* arg1 = new flag */ - movq %r11, %rsi /* arg2 = vex state */ - addq $24, %rsp /* remove syscall parameters */ - movq $0x1, ML_(blksys_saving_cflag) - call LibVEX_GuestAMD64_put_rflag_c - movq $0x0, ML_(blksys_saving_cflag) - -4: /* Re-block signals. If eip is in [4,5), then the syscall - is complete and we needn't worry about it. */ - - movq $__NR_sigprocmask, %rax // syscall # - movq $VKI_SIG_SETMASK, %rdi // how - movq -32(%rbp), %rsi // postmask - xorq %rdx, %rdx // NULL - syscall - - jb 7f /* sigprocmask failed */ + movq -8(%rbp), %rax + + /* If rip==2, then the syscall was either just about + to start, or was interrupted and the kernel was + restarting it. */ +2: syscall +3: /* In the range [3, 4), the syscall result is in %rax, + but hasn't been committed to RAX. */ + + /* stack contents: 3 words for syscall above, plus our prologue */ + setc 0(%rsp) /* stash returned carry flag */ + + movq -16(%rbp), %r11 /* r11 = VexGuestAMD64State * */ + movq %rax, OFFSET_amd64_RAX(%r11) /* save back to RAX */ + movq %rdx, OFFSET_amd64_RDX(%r11) /* save back to RDX */ + + /* save carry flag to VEX */ + xorq %rax, %rax + movb 0(%rsp), %al + movq %rax, %rdi /* arg1 = new flag */ + movq %r11, %rsi /* arg2 = vex state */ + addq $24, %rsp /* remove syscall parameters */ + movq $0x1, ML_(blksys_saving_cflag) + call LibVEX_GuestAMD64_put_rflag_c + movq $0x0, ML_(blksys_saving_cflag) + +4: /* Re-block signals. If eip is in [4,5), then the syscall + is complete and we needn't worry about it. */ + + movq $__NR_sigprocmask, %rax // syscall # + movq $VKI_SIG_SETMASK, %rdi // how + movq -32(%rbp), %rsi // postmask + xorq %rdx, %rdx // NULL + syscall + + jb 7f /* sigprocmask failed */ 5: /* now safe from signals */ - xorq %rax,%rax - movq -8(%rbp), %rdi - movq -16(%rbp), %rsi - movq -24(%rbp), %rdx - movq -32(%rbp), %rcx - movq -40(%rbp), %r8 - movq %rbp, %rsp - popq %rbp - ret + xorq %rax,%rax + movq -8(%rbp), %rdi + movq -16(%rbp), %rsi + movq -24(%rbp), %rdx + movq -32(%rbp), %rcx + movq -40(%rbp), %r8 + movq %rbp, %rsp + popq %rbp + ret 7: /* failure: return 0x8000 | error code */ - orq $0x8000, %rax - movq -8(%rbp), %rdi - movq -16(%rbp), %rsi - movq -24(%rbp), %rdx - movq -32(%rbp), %rcx - movq -40(%rbp), %r8 - movq %rbp, %rsp - popq %rbp - ret + orq $0x8000, %rax + movq -8(%rbp), %rdi + movq -16(%rbp), %rsi + movq -24(%rbp), %rdx + movq -32(%rbp), %rcx + movq -40(%rbp), %r8 + movq %rbp, %rsp + popq %rbp + ret .section .rodata -/* export the ranges so that - VG_(fixup_guest_state_after_syscall_interrupted) can do the - right thing */ + /* export the ranges so that + VG_(fixup_guest_state_after_syscall_interrupted) can do the + right thing */ .globl ML_(blksys_setup) .globl ML_(blksys_restart) diff --git a/coregrind/m_syswrap/syscall-x86-freebsd.S b/coregrind/m_syswrap/syscall-x86-freebsd.S index d4a6c90812..523d3d2e0d 100644 --- a/coregrind/m_syswrap/syscall-x86-freebsd.S +++ b/coregrind/m_syswrap/syscall-x86-freebsd.S @@ -38,40 +38,40 @@ /*----------------------------------------------------------------*/ /* - Perform a syscall for the client. This will run a syscall - with the client's specific per-thread signal mask. - - The structure of this function is such that, if the syscall is - interrupted by a signal, we can determine exactly what - execution state we were in with respect to the execution of - the syscall by examining the value of %eip in the signal - handler. This means that we can always do the appropriate - thing to precisely emulate the kernel's signal/syscall - interactions. - - The syscall number is taken from the argument, even though it - should also be in regs->m_eax. The syscall result is written - back to regs->m_eax on completion. - - Returns 0 if the syscall was successfully called (even if the - syscall itself failed), or a -ve error code if one of the - sigprocmasks failed (there's no way to determine which one - failed). - - VG_(fixup_guest_state_after_syscall_interrupted) does the - thread state fixup in the case where we were interrupted by a - signal. + Perform a syscall for the client. This will run a syscall + with the client's specific per-thread signal mask. + + The structure of this function is such that, if the syscall is + interrupted by a signal, we can determine exactly what + execution state we were in with respect to the execution of + the syscall by examining the value of %eip in the signal + handler. This means that we can always do the appropriate + thing to precisely emulate the kernel's signal/syscall + interactions. + + The syscall number is taken from the argument, even though it + should also be in regs->m_eax. The syscall result is written + back to regs->m_eax on completion. + + Returns 0 if the syscall was successfully called (even if the + syscall itself failed), or a -ve error code if one of the + sigprocmasks failed (there's no way to determine which one + failed). + + VG_(fixup_guest_state_after_syscall_interrupted) does the + thread state fixup in the case where we were interrupted by a + signal. Prototype: - Int ML_(do_syscall_for_client_WRK)( - Int syscallno, // ebp+8 - void* guest_state, // ebp+12 - const vki_sigset_t *sysmask, // ebp+16 - const vki_sigset_t *postmask, // ebp+20 - Int sigsetSzB) // ebp+24 + Int ML_(do_syscall_for_client_WRK)( + Int syscallno, // ebp+8 + void* guest_state, // ebp+12 + const vki_sigset_t *sysmask, // ebp+16 + const vki_sigset_t *postmask, // ebp+20 + Int sigsetSzB) // ebp+24 - Note that sigsetSzB is totally ignored (and irrelevant). + Note that sigsetSzB is totally ignored (and irrelevant). */ /* from vki-darwin.h, checked at startup by m_vki.c */ @@ -80,101 +80,101 @@ .globl ML_(do_syscall_for_client_WRK) ML_(do_syscall_for_client_WRK): /* establish stack frame */ - push %ebp - mov %esp, %ebp - subl $8, %esp /* 16-byte align stack */ + push %ebp + mov %esp, %ebp + subl $8, %esp /* 16-byte align stack */ 1: /* Even though we can't take a signal until the - sigprocmask completes, start the range early. - If eip is in the range [1,2), the syscall hasn't been started yet */ - - /* Set the signal mask which should be current during the syscall. */ - pushl 20(%ebp) - pushl 16(%ebp) - pushl $VKI_SIG_SETMASK - pushl $0xcafebabe /* totally fake return address */ - movl $__NR_sigprocmask, %eax - int $0x80 - jc 7f /* sigprocmask failed */ - addl $16,%esp - - /* Copy syscall parameters to the stack - assume no more than 8 - * plus the return address */ - /* do_syscall8 */ - /* stack is currently aligned assuming 8 parameters */ - movl 12(%ebp), %edx - movl OFFSET_x86_ESP(%edx), %edx /* edx = simulated ESP */ - movl 28+4(%edx), %eax - pushl %eax - movl 24+4(%edx), %eax - pushl %eax - movl 20+4(%edx), %eax - pushl %eax - movl 16+4(%edx), %eax - pushl %eax - movl 12+4(%edx), %eax - pushl %eax - movl 8+4(%edx), %eax - pushl %eax - movl 4+4(%edx), %eax - pushl %eax - movl 0+4(%edx), %eax - pushl %eax - /* return address */ - movl 0(%edx), %eax - pushl %eax - - /* Put syscall number in eax */ - movl 8(%ebp), %eax - - /* If eip==2, then the syscall was either just about to start, - or was interrupted and the kernel was restarting it. */ -2: int $0x80 /* UNIX (GrP fixme should be sysenter?) */ - -3: /* In the range [3, 4), the syscall result is in %eax and %edx and C, - but hasn't been committed to the thread state. */ - setc 0(%esp) /* stash returned carry flag */ - movl 12(%ebp), %ecx - movl %eax, OFFSET_x86_EAX(%ecx) /* save EAX to vex */ - movl %edx, OFFSET_x86_EDX(%ecx) /* save EDX to vex */ - /* save carry flag to vex */ - subl $12, %esp - movl %ecx, 4(%esp) - movl $0, 0(%esp) - movb 12(%esp), %al - movb %al, 0(%esp) + sigprocmask completes, start the range early. + If eip is in the range [1,2), the syscall hasn't been started yet */ + + /* Set the signal mask which should be current during the syscall. */ + pushl 20(%ebp) + pushl 16(%ebp) + pushl $VKI_SIG_SETMASK + pushl $0xcafebabe /* totally fake return address */ + movl $__NR_sigprocmask, %eax + int $0x80 + jc 7f /* sigprocmask failed */ + addl $16,%esp + + /* Copy syscall parameters to the stack - assume no more than 8 + * plus the return address */ + /* do_syscall8 */ + /* stack is currently aligned assuming 8 parameters */ + movl 12(%ebp), %edx + movl OFFSET_x86_ESP(%edx), %edx /* edx = simulated ESP */ + movl 28+4(%edx), %eax + pushl %eax + movl 24+4(%edx), %eax + pushl %eax + movl 20+4(%edx), %eax + pushl %eax + movl 16+4(%edx), %eax + pushl %eax + movl 12+4(%edx), %eax + pushl %eax + movl 8+4(%edx), %eax + pushl %eax + movl 4+4(%edx), %eax + pushl %eax + movl 0+4(%edx), %eax + pushl %eax + /* return address */ + movl 0(%edx), %eax + pushl %eax + + /* Put syscall number in eax */ + movl 8(%ebp), %eax + + /* If eip==2, then the syscall was either just about to start, + or was interrupted and the kernel was restarting it. */ +2: int $0x80 /* UNIX (GrP fixme should be sysenter?) */ + +3: /* In the range [3, 4), the syscall result is in %eax and %edx and C, + but hasn't been committed to the thread state. */ + setc 0(%esp) /* stash returned carry flag */ + movl 12(%ebp), %ecx + movl %eax, OFFSET_x86_EAX(%ecx) /* save EAX to vex */ + movl %edx, OFFSET_x86_EDX(%ecx) /* save EDX to vex */ + /* save carry flag to vex */ + subl $12, %esp + movl %ecx, 4(%esp) + movl $0, 0(%esp) + movb 12(%esp), %al + movb %al, 0(%esp) movl $0x1, ML_(blksys_saving_cflag) - call LibVEX_GuestX86_put_eflag_c + call LibVEX_GuestX86_put_eflag_c movl $0x0, ML_(blksys_saving_cflag) - addl $12, %esp - -4: /* Re-block signals. If eip is in [4,5), then the syscall is - complete and we needn't worry about it. */ - /* Set up for __pthread_sigmask(SIG_SETMASK, postmask, NULL) */ - pushl $0 - pushl 20(%ebp) - pushl $VKI_SIG_SETMASK - pushl $0xcafef00d /* totally fake return address */ - movl $__NR_sigprocmask, %eax - int $0x80 /* should be sysenter? */ - jc 7f /* sigprocmask failed */ - addl $16,%esp - -5: /* now safe from signals */ - movl $0, %eax /* SUCCESS */ - movl %ebp, %esp - popl %ebp - ret - -7: /* failure: return 0x8000 | error code */ - /* Note that we enter here with %esp being 16 too low - (4 extra words on the stack). But because we're nuking - the stack frame now, that doesn't matter. */ - andl $0x7FFF, %eax - orl $0x8000, %eax - movl %ebp, %esp - popl %ebp - ret + addl $12, %esp + +4: /* Re-block signals. If eip is in [4,5), then the syscall is + complete and we needn't worry about it. */ + /* Set up for __pthread_sigmask(SIG_SETMASK, postmask, NULL) */ + pushl $0 + pushl 20(%ebp) + pushl $VKI_SIG_SETMASK + pushl $0xcafef00d /* totally fake return address */ + movl $__NR_sigprocmask, %eax + int $0x80 /* should be sysenter? */ + jc 7f /* sigprocmask failed */ + addl $16,%esp + +5: /* now safe from signals */ + movl $0, %eax /* SUCCESS */ + movl %ebp, %esp + popl %ebp + ret + +7: /* failure: return 0x8000 | error code */ + /* Note that we enter here with %esp being 16 too low + (4 extra words on the stack). But because we're nuking + the stack frame now, that doesn't matter. */ + andl $0x7FFF, %eax + orl $0x8000, %eax + movl %ebp, %esp + popl %ebp + ret .section .rodata /* export the ranges so that diff --git a/coregrind/m_syswrap/syswrap-freebsd.c b/coregrind/m_syswrap/syswrap-freebsd.c index 8b051952d4..b8f5250b94 100644 --- a/coregrind/m_syswrap/syswrap-freebsd.c +++ b/coregrind/m_syswrap/syswrap-freebsd.c @@ -1763,7 +1763,7 @@ PRE(sys_seteuid) // int stat(char *path, struct freebsd11_stat *sb); PRE(sys_freebsd11_stat) { - PRINT("sys_stat ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",ARG1,(char *)ARG1,ARG2); + PRINT("sys_freebsd11_stat ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",ARG1,(char *)ARG1,ARG2); PRE_REG_READ2(int, "stat", char *, path, struct freebsd11_stat *, sb); PRE_MEM_RASCIIZ( "stat(path)", ARG1 ); PRE_MEM_WRITE( "stat(sb)", ARG2, sizeof(struct vki_freebsd11_stat) ); @@ -1792,7 +1792,7 @@ POST(sys_freebsd11_fstat) // int lstat(const char * restrict path, struct stat * restrict sb); PRE(sys_freebsd11_lstat) { - PRINT("sys_lstat ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",ARG1,(char *)ARG1,ARG2); + PRINT("sys_freebsd11_lstat ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",ARG1,(char *)ARG1,ARG2); PRE_REG_READ2(sb, "lstat", const char *, path, struct freebsd11_stat *, sb); PRE_MEM_RASCIIZ( "lstat(path)", ARG1 ); PRE_MEM_WRITE( "lstat(sb)", ARG2, sizeof(struct vki_freebsd11_stat) ); diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c index 333fa09b1d..5824a1dbea 100644 --- a/coregrind/m_syswrap/syswrap-main.c +++ b/coregrind/m_syswrap/syswrap-main.c @@ -585,6 +585,11 @@ void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical, UWord *stack = (UWord *)gst->guest_RSP; // FreeBSD supports different calling conventions + // @todo PJF this all seems over complicated to me + // SYSCALL_STD is OK but for the other + // two here we overwrite canonical->sysno with + // the final syscall number but then in do_syscall_for_client + // we switch real_syscallno back to __NR_syscall or __NR___syscall switch (gst->guest_RAX) { case __NR_syscall: canonical->klass = VG_FREEBSD_SYSCALL0; @@ -595,7 +600,7 @@ void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical, canonical->sysno = gst->guest_RDI; break; default: - canonical->klass = 0; + canonical->klass = VG_FREEBSD_SYSCALL_STD; canonical->sysno = gst->guest_RAX; break; } diff --git a/include/vki/vki-scnums-freebsd.h b/include/vki/vki-scnums-freebsd.h index bb1228a553..9e924c081f 100644 --- a/include/vki/vki-scnums-freebsd.h +++ b/include/vki/vki-scnums-freebsd.h @@ -29,8 +29,17 @@ #include "config.h" +// this is the syscall format used by e.g., libc functions like 'write' +// this is the one used 99.999% of the time +// the two others are only for experimental or testing use +// (but we use them in the scalar tests). #define VG_FREEBSD_SYSCALL_STD 0 +// this is the syscall format used by 'syscall' #define VG_FREEBSD_SYSCALL0 1 +// this is the syscall format used by '__syscall' +// it is the same as VG_FREEBSD_SYSCALL0 except that +// it ensures that 64bit argument alignment is correct +// that makes no difference for amd64, x86 not sure #define VG_FREEBSD_SYSCALL198 2 // From sys/syscall.h