From 1271ae8557c73db1f433e6d63d8aa93ce65fe361 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Fri, 12 Apr 2002 10:26:32 +0000 Subject: [PATCH] New files to support pthreads. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@51 --- coregrind/arch/x86-linux/vg_libpthread.c | 411 ++++++ coregrind/vg_libpthread.c | 411 ++++++ coregrind/vg_scheduler.c | 1723 ++++++++++++++++++++++ vg_libpthread.c | 411 ++++++ vg_scheduler.c | 1723 ++++++++++++++++++++++ 5 files changed, 4679 insertions(+) create mode 100644 coregrind/arch/x86-linux/vg_libpthread.c create mode 100644 coregrind/vg_libpthread.c create mode 100644 coregrind/vg_scheduler.c create mode 100644 vg_libpthread.c create mode 100644 vg_scheduler.c diff --git a/coregrind/arch/x86-linux/vg_libpthread.c b/coregrind/arch/x86-linux/vg_libpthread.c new file mode 100644 index 0000000000..e8d60430be --- /dev/null +++ b/coregrind/arch/x86-linux/vg_libpthread.c @@ -0,0 +1,411 @@ + +/* This is a replacement for the standard libpthread.so. It is loaded + as part of the client's image (if required) and directs pthread + calls through to Valgrind's request mechanism. + + A couple of caveats. + + 1. Since it's a binary-compatible replacement for an existing library, + we must take care to used exactly the same data layouts, etc, as + the standard pthread.so does. + + 2. Since this runs as part of the client, there are no specific + restrictions on what headers etc we can include, so long as + this libpthread.so does not end up having dependencies on .so's + which the real one doesn't. + + Later ... it appears we cannot call file-related stuff in libc here, + perhaps fair enough. Be careful what you call from here. Even exit() + doesn't work (gives infinite recursion and then stack overflow); hence + myexit(). Also fprintf doesn't seem safe. +*/ + +#include "valgrind.h" /* For the request-passing mechanism */ +#include "vg_include.h" /* For the VG_USERREQ__* constants */ + +#include +#include +#include + + +/* --------------------------------------------------------------------- + Helpers. We have to be pretty self-sufficient. + ------------------------------------------------------------------ */ + +static +void myexit ( int arg ) +{ + int __res; + __asm__ volatile ("movl %%ecx, %%ebx ; int $0x80" + : "=a" (__res) + : "0" (__NR_exit), + "c" (arg) ); + /* We don't bother to mention the fact that this asm trashes %ebx, + since it won't return. If you ever do let it return ... fix + this! */ +} + + +/* Give up without using printf etc, since they seem to give + segfaults. */ +static +void ensure_valgrind ( char* caller ) +{ + char* str; + int is_valgrind = RUNNING_ON_VALGRIND; + if (!is_valgrind) { + str = "\nvalgrind-ed process: vg_libpthread.so: " + "pthread call when\n"; + write(2, str, strlen(str)); + str = "not running on valgrind; aborting! " + "This is probably a bug in\n"; + write(2, str, strlen(str)); + str = "valgrind. Please report it to me at: " + "jseward@acm.org. Thanks.\n"; + write(2, str, strlen(str)); + str = "unexpectedly called function is: "; + write(2, str, strlen(str)); + write(2, caller, strlen(caller)); + str = "\n\n"; + write(2, str, strlen(str)); + myexit(1); + } +} + + +static +void barf ( char* str ) +{ + char buf[100]; + buf[0] = 0; + strcat(buf, "\nvg_libpthread.so: "); + strcat(buf, str); + strcat(buf, "\n\n"); + write(2, buf, strlen(buf)); + myexit(1); +} + + + +/* --------------------------------------------------------------------- + Pass pthread_ calls to Valgrind's request mechanism. + ------------------------------------------------------------------ */ + +int +pthread_create (pthread_t *__restrict __thread, + __const pthread_attr_t *__restrict __attr, + void *(*__start_routine) (void *), + void *__restrict __arg) +{ + int res; + ensure_valgrind("pthread_create"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_CREATE, + __thread, __attr, __start_routine, __arg); + return res; +} + + + +int +pthread_join (pthread_t __th, void **__thread_return) +{ + int res; + ensure_valgrind("pthread_join"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_JOIN, + __th, __thread_return, 0, 0); + return res; +} + + +/* What are these? Anybody know? I don't. */ + +void _pthread_cleanup_push_defer ( void ) +{ + // char* str = "_pthread_cleanup_push_defer\n"; + // write(2, str, strlen(str)); +} + +void _pthread_cleanup_pop_restore ( void ) +{ + // char* str = "_pthread_cleanup_pop_restore\n"; + // write(2, str, strlen(str)); +} + + +static int thread_specific_errno[VG_N_THREADS]; + +int* __errno_location ( void ) +{ + int tid; + ensure_valgrind("__errno_location"); + VALGRIND_MAGIC_SEQUENCE(tid, 0 /* default */, + VG_USERREQ__PTHREAD_GET_THREADID, + 0, 0, 0, 0); + /* 'cos I'm paranoid ... */ + if (tid < 0 || tid >= VG_N_THREADS) + barf("__errno_location: invalid ThreadId"); + return & thread_specific_errno[tid]; +} + + +int pthread_mutexattr_init(pthread_mutexattr_t *attr) +{ + char* str = "pthread_mutexattr_init\n"; + write(2, str, strlen(str)); + return 0; +} + +int pthread_mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *mutexattr) +{ + int res; + char* str = "pthread_mutex_init\n"; + write(2, str, strlen(str)); + ensure_valgrind("pthread_mutex_init"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_INIT, + mutex, mutexattr, 0, 0); + return res; +} + +int pthread_mutexattr_destroy(pthread_mutexattr_t *attr) +{ + char* str = "pthread_mutexattr_destroy\n"; + write(2, str, strlen(str)); + return 0; +} + +int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_lock-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_LOCK, + mutex, 0, 0, 0); + return res; + } +} + +int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_unlock-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_UNLOCK, + mutex, 0, 0, 0); + return res; + } +} + +pthread_t pthread_self(void) +{ + int tid; + ensure_valgrind("pthread_self"); + VALGRIND_MAGIC_SEQUENCE(tid, 0 /* default */, + VG_USERREQ__PTHREAD_GET_THREADID, + 0, 0, 0, 0); + if (tid < 0 || tid >= VG_N_THREADS) + barf("pthread_self: invalid ThreadId"); + return tid; +} + +int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_destroy-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_DESTROY, + mutex, 0, 0, 0); + } + return res; +} + + +int pthread_setcanceltype(int type, int *oldtype) +{ + char* str = "pthread_setcanceltype\n"; + write(2, str, strlen(str)); + return 0; +} + + +int pthread_cancel(pthread_t thread) +{ + int res; + ensure_valgrind("pthread_cancel"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_CANCEL, + thread, 0, 0, 0); + return res; +} + + +/* --------------------------------------------------------------------- + These are here (I think) because they are deemed cancellation + points by POSIX. For the moment we'll simply pass the call along + to the corresponding thread-unaware (?) libc routine. + ------------------------------------------------------------------ */ + +#include +#include +#include +#include +#include +#include + +extern +int __libc_sigaction + (int signum, + const struct sigaction *act, + struct sigaction *oldact); +int sigaction(int signum, + const struct sigaction *act, + struct sigaction *oldact) +{ + // char* str = "sigaction\n"; + // write(2, str, strlen(str)); + return __libc_sigaction(signum, act, oldact); +} + + +extern +int __libc_connect(int sockfd, + const struct sockaddr *serv_addr, + socklen_t addrlen); +int connect(int sockfd, + const struct sockaddr *serv_addr, + socklen_t addrlen) +{ + // char* str = "connect\n"; + // write(2, str, strlen(str)); + return __libc_connect(sockfd, serv_addr, addrlen); +} + + +extern +int __libc_fcntl(int fd, int cmd, long arg); +int fcntl(int fd, int cmd, long arg) +{ + // char* str = "fcntl\n"; + // write(2, str, strlen(str)); + return __libc_fcntl(fd, cmd, arg); +} + + +extern +ssize_t __libc_write(int fd, const void *buf, size_t count); +ssize_t write(int fd, const void *buf, size_t count) +{ + // char* str = "write\n"; + // write(2, str, strlen(str)); + return __libc_write(fd, buf, count); +} + + +extern +ssize_t __libc_read(int fd, void *buf, size_t count); +ssize_t read(int fd, void *buf, size_t count) +{ + // char* str = "read\n"; + // write(2, str, strlen(str)); + return __libc_read(fd, buf, count); +} + + +extern +int __libc_open(const char *pathname, int flags); +int open(const char *pathname, int flags) +{ + // char* str = "open\n"; + // write(2, str, strlen(str)); + return __libc_open(pathname, flags); +} + + +extern +int __libc_close(int fd); +int close(int fd) +{ + // char* str = "open\n"; + // write(2, str, strlen(str)); + return __libc_close(fd); +} + + +extern +int __libc_accept(int s, struct sockaddr *addr, socklen_t *addrlen); +int accept(int s, struct sockaddr *addr, socklen_t *addrlen) +{ + // char* str = "accept\n"; + // write(2, str, strlen(str)); + return __libc_accept(s, addr, addrlen); +} + + +extern +pid_t __libc_fork(void); +pid_t fork(void) +{ + // char* str = "fork\n"; + // write(2, str, strlen(str)); + return __libc_fork(); +} + + +extern +pid_t __libc_waitpid(pid_t pid, int *status, int options); +pid_t waitpid(pid_t pid, int *status, int options) +{ + // char* str = "waitpid\n"; + // write(2, str, strlen(str)); + return __libc_waitpid(pid, status, options); +} + + +extern +int __libc_nanosleep(const struct timespec *req, struct timespec *rem); +int nanosleep(const struct timespec *req, struct timespec *rem) +{ + return __libc_nanosleep(req, rem); +} + +extern +int __libc_fsync(int fd); +int fsync(int fd) +{ + return __libc_fsync(fd); +} + +/* I've no idea what these are, but they get called quite a lot. + Anybody know? */ + +#undef _IO_flockfile +void _IO_flockfile ( _IO_FILE * file ) +{ + // char* str = "_IO_flockfile\n"; + // write(2, str, strlen(str)); + // barf("_IO_flockfile"); +} + +#undef _IO_funlockfile +void _IO_funlockfile ( _IO_FILE * file ) +{ + // char* str = "_IO_funlockfile\n"; + // write(2, str, strlen(str)); + //barf("_IO_funlockfile"); +} + diff --git a/coregrind/vg_libpthread.c b/coregrind/vg_libpthread.c new file mode 100644 index 0000000000..e8d60430be --- /dev/null +++ b/coregrind/vg_libpthread.c @@ -0,0 +1,411 @@ + +/* This is a replacement for the standard libpthread.so. It is loaded + as part of the client's image (if required) and directs pthread + calls through to Valgrind's request mechanism. + + A couple of caveats. + + 1. Since it's a binary-compatible replacement for an existing library, + we must take care to used exactly the same data layouts, etc, as + the standard pthread.so does. + + 2. Since this runs as part of the client, there are no specific + restrictions on what headers etc we can include, so long as + this libpthread.so does not end up having dependencies on .so's + which the real one doesn't. + + Later ... it appears we cannot call file-related stuff in libc here, + perhaps fair enough. Be careful what you call from here. Even exit() + doesn't work (gives infinite recursion and then stack overflow); hence + myexit(). Also fprintf doesn't seem safe. +*/ + +#include "valgrind.h" /* For the request-passing mechanism */ +#include "vg_include.h" /* For the VG_USERREQ__* constants */ + +#include +#include +#include + + +/* --------------------------------------------------------------------- + Helpers. We have to be pretty self-sufficient. + ------------------------------------------------------------------ */ + +static +void myexit ( int arg ) +{ + int __res; + __asm__ volatile ("movl %%ecx, %%ebx ; int $0x80" + : "=a" (__res) + : "0" (__NR_exit), + "c" (arg) ); + /* We don't bother to mention the fact that this asm trashes %ebx, + since it won't return. If you ever do let it return ... fix + this! */ +} + + +/* Give up without using printf etc, since they seem to give + segfaults. */ +static +void ensure_valgrind ( char* caller ) +{ + char* str; + int is_valgrind = RUNNING_ON_VALGRIND; + if (!is_valgrind) { + str = "\nvalgrind-ed process: vg_libpthread.so: " + "pthread call when\n"; + write(2, str, strlen(str)); + str = "not running on valgrind; aborting! " + "This is probably a bug in\n"; + write(2, str, strlen(str)); + str = "valgrind. Please report it to me at: " + "jseward@acm.org. Thanks.\n"; + write(2, str, strlen(str)); + str = "unexpectedly called function is: "; + write(2, str, strlen(str)); + write(2, caller, strlen(caller)); + str = "\n\n"; + write(2, str, strlen(str)); + myexit(1); + } +} + + +static +void barf ( char* str ) +{ + char buf[100]; + buf[0] = 0; + strcat(buf, "\nvg_libpthread.so: "); + strcat(buf, str); + strcat(buf, "\n\n"); + write(2, buf, strlen(buf)); + myexit(1); +} + + + +/* --------------------------------------------------------------------- + Pass pthread_ calls to Valgrind's request mechanism. + ------------------------------------------------------------------ */ + +int +pthread_create (pthread_t *__restrict __thread, + __const pthread_attr_t *__restrict __attr, + void *(*__start_routine) (void *), + void *__restrict __arg) +{ + int res; + ensure_valgrind("pthread_create"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_CREATE, + __thread, __attr, __start_routine, __arg); + return res; +} + + + +int +pthread_join (pthread_t __th, void **__thread_return) +{ + int res; + ensure_valgrind("pthread_join"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_JOIN, + __th, __thread_return, 0, 0); + return res; +} + + +/* What are these? Anybody know? I don't. */ + +void _pthread_cleanup_push_defer ( void ) +{ + // char* str = "_pthread_cleanup_push_defer\n"; + // write(2, str, strlen(str)); +} + +void _pthread_cleanup_pop_restore ( void ) +{ + // char* str = "_pthread_cleanup_pop_restore\n"; + // write(2, str, strlen(str)); +} + + +static int thread_specific_errno[VG_N_THREADS]; + +int* __errno_location ( void ) +{ + int tid; + ensure_valgrind("__errno_location"); + VALGRIND_MAGIC_SEQUENCE(tid, 0 /* default */, + VG_USERREQ__PTHREAD_GET_THREADID, + 0, 0, 0, 0); + /* 'cos I'm paranoid ... */ + if (tid < 0 || tid >= VG_N_THREADS) + barf("__errno_location: invalid ThreadId"); + return & thread_specific_errno[tid]; +} + + +int pthread_mutexattr_init(pthread_mutexattr_t *attr) +{ + char* str = "pthread_mutexattr_init\n"; + write(2, str, strlen(str)); + return 0; +} + +int pthread_mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *mutexattr) +{ + int res; + char* str = "pthread_mutex_init\n"; + write(2, str, strlen(str)); + ensure_valgrind("pthread_mutex_init"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_INIT, + mutex, mutexattr, 0, 0); + return res; +} + +int pthread_mutexattr_destroy(pthread_mutexattr_t *attr) +{ + char* str = "pthread_mutexattr_destroy\n"; + write(2, str, strlen(str)); + return 0; +} + +int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_lock-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_LOCK, + mutex, 0, 0, 0); + return res; + } +} + +int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_unlock-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_UNLOCK, + mutex, 0, 0, 0); + return res; + } +} + +pthread_t pthread_self(void) +{ + int tid; + ensure_valgrind("pthread_self"); + VALGRIND_MAGIC_SEQUENCE(tid, 0 /* default */, + VG_USERREQ__PTHREAD_GET_THREADID, + 0, 0, 0, 0); + if (tid < 0 || tid >= VG_N_THREADS) + barf("pthread_self: invalid ThreadId"); + return tid; +} + +int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_destroy-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_DESTROY, + mutex, 0, 0, 0); + } + return res; +} + + +int pthread_setcanceltype(int type, int *oldtype) +{ + char* str = "pthread_setcanceltype\n"; + write(2, str, strlen(str)); + return 0; +} + + +int pthread_cancel(pthread_t thread) +{ + int res; + ensure_valgrind("pthread_cancel"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_CANCEL, + thread, 0, 0, 0); + return res; +} + + +/* --------------------------------------------------------------------- + These are here (I think) because they are deemed cancellation + points by POSIX. For the moment we'll simply pass the call along + to the corresponding thread-unaware (?) libc routine. + ------------------------------------------------------------------ */ + +#include +#include +#include +#include +#include +#include + +extern +int __libc_sigaction + (int signum, + const struct sigaction *act, + struct sigaction *oldact); +int sigaction(int signum, + const struct sigaction *act, + struct sigaction *oldact) +{ + // char* str = "sigaction\n"; + // write(2, str, strlen(str)); + return __libc_sigaction(signum, act, oldact); +} + + +extern +int __libc_connect(int sockfd, + const struct sockaddr *serv_addr, + socklen_t addrlen); +int connect(int sockfd, + const struct sockaddr *serv_addr, + socklen_t addrlen) +{ + // char* str = "connect\n"; + // write(2, str, strlen(str)); + return __libc_connect(sockfd, serv_addr, addrlen); +} + + +extern +int __libc_fcntl(int fd, int cmd, long arg); +int fcntl(int fd, int cmd, long arg) +{ + // char* str = "fcntl\n"; + // write(2, str, strlen(str)); + return __libc_fcntl(fd, cmd, arg); +} + + +extern +ssize_t __libc_write(int fd, const void *buf, size_t count); +ssize_t write(int fd, const void *buf, size_t count) +{ + // char* str = "write\n"; + // write(2, str, strlen(str)); + return __libc_write(fd, buf, count); +} + + +extern +ssize_t __libc_read(int fd, void *buf, size_t count); +ssize_t read(int fd, void *buf, size_t count) +{ + // char* str = "read\n"; + // write(2, str, strlen(str)); + return __libc_read(fd, buf, count); +} + + +extern +int __libc_open(const char *pathname, int flags); +int open(const char *pathname, int flags) +{ + // char* str = "open\n"; + // write(2, str, strlen(str)); + return __libc_open(pathname, flags); +} + + +extern +int __libc_close(int fd); +int close(int fd) +{ + // char* str = "open\n"; + // write(2, str, strlen(str)); + return __libc_close(fd); +} + + +extern +int __libc_accept(int s, struct sockaddr *addr, socklen_t *addrlen); +int accept(int s, struct sockaddr *addr, socklen_t *addrlen) +{ + // char* str = "accept\n"; + // write(2, str, strlen(str)); + return __libc_accept(s, addr, addrlen); +} + + +extern +pid_t __libc_fork(void); +pid_t fork(void) +{ + // char* str = "fork\n"; + // write(2, str, strlen(str)); + return __libc_fork(); +} + + +extern +pid_t __libc_waitpid(pid_t pid, int *status, int options); +pid_t waitpid(pid_t pid, int *status, int options) +{ + // char* str = "waitpid\n"; + // write(2, str, strlen(str)); + return __libc_waitpid(pid, status, options); +} + + +extern +int __libc_nanosleep(const struct timespec *req, struct timespec *rem); +int nanosleep(const struct timespec *req, struct timespec *rem) +{ + return __libc_nanosleep(req, rem); +} + +extern +int __libc_fsync(int fd); +int fsync(int fd) +{ + return __libc_fsync(fd); +} + +/* I've no idea what these are, but they get called quite a lot. + Anybody know? */ + +#undef _IO_flockfile +void _IO_flockfile ( _IO_FILE * file ) +{ + // char* str = "_IO_flockfile\n"; + // write(2, str, strlen(str)); + // barf("_IO_flockfile"); +} + +#undef _IO_funlockfile +void _IO_funlockfile ( _IO_FILE * file ) +{ + // char* str = "_IO_funlockfile\n"; + // write(2, str, strlen(str)); + //barf("_IO_funlockfile"); +} + diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c new file mode 100644 index 0000000000..c1a26ad2c9 --- /dev/null +++ b/coregrind/vg_scheduler.c @@ -0,0 +1,1723 @@ + +/*--------------------------------------------------------------------*/ +/*--- A user-space pthreads implementation. vg_scheduler.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, an x86 protected-mode emulator + designed for debugging and profiling binaries on x86-Unixes. + + Copyright (C) 2000-2002 Julian Seward + jseward@acm.org + Julian_Seward@muraroa.demon.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file LICENSE. +*/ + +#include "vg_include.h" +#include "vg_constants.h" + +#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and + VG_USERREQ__DO_LEAK_CHECK */ + +/* BORKAGE as of 11 Apr 02 + +Note! This implementation is so poor as to not be suitable for use by +anyone at all! + +- properly save scheduler private state in signal delivery frames. + +- fd-poll optimisation (don't select with empty sets) + +- signals interrupting read/write and nanosleep, and take notice + of SA_RESTART or not + +- return bogus RA: %EAX trashed, so pthread_joiner gets nonsense + exit codes + +- when a thread is done mark its stack as noaccess */ + + +/* --------------------------------------------------------------------- + Types and globals for the scheduler. + ------------------------------------------------------------------ */ + +/* type ThreadId is defined in vg_include.h. */ + +/* struct ThreadState is defined in vg_include.h. */ + +/* Private globals. A statically allocated array of threads. */ +static ThreadState vg_threads[VG_N_THREADS]; + + +/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */ +jmp_buf VG_(scheduler_jmpbuf); +/* ... and if so, here's the signal which caused it to do so. */ +Int VG_(longjmpd_on_signal); + + +/* Machinery to keep track of which threads are waiting on which + fds. */ +typedef + struct { + /* The thread which made the request. */ + ThreadId tid; + + /* The next two fields describe the request. */ + /* File descriptor waited for. -1 means this slot is not in use */ + Int fd; + /* The syscall number the fd is used in. */ + Int syscall_no; + + /* False => still waiting for select to tell us the fd is ready + to go. True => the fd is ready, but the results have not yet + been delivered back to the calling thread. Once the latter + happens, this entire record is marked as no longer in use, by + making the fd field be -1. */ + Bool ready; + } + VgWaitedOnFd; + +static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS]; + + + +typedef + struct { + /* Is this slot in use, or free? */ + Bool in_use; + /* If in_use, is this mutex held by some thread, or not? */ + Bool held; + /* if held==True, owner indicates who by. */ + ThreadId owner; + } + VgMutex; + +static VgMutex vg_mutexes[VG_N_MUTEXES]; + +/* Forwards */ +static void do_nontrivial_clientreq ( ThreadId tid ); + + +/* --------------------------------------------------------------------- + Helper functions for the scheduler. + ------------------------------------------------------------------ */ + +static +void pp_sched_status ( void ) +{ + Int i; + VG_(printf)("\nsched status:\n"); + for (i = 0; i < VG_N_THREADS; i++) { + if (vg_threads[i].status == VgTs_Empty) continue; + VG_(printf)("tid %d: ", i); + switch (vg_threads[i].status) { + case VgTs_Runnable: VG_(printf)("Runnable\n"); break; + case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break; + case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n", + vg_threads[i].joiner); break; + case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break; + default: VG_(printf)("???"); break; + } + } + VG_(printf)("\n"); +} + +static +void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no ) +{ + Int i; + + vg_assert(fd != -1); /* avoid total chaos */ + + for (i = 0; i < VG_N_WAITING_FDS; i++) + if (vg_waiting_fds[i].fd == -1) + break; + + if (i == VG_N_WAITING_FDS) + VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low"); + /* + VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n", + tid, fd, i); + */ + vg_waiting_fds[i].fd = fd; + vg_waiting_fds[i].tid = tid; + vg_waiting_fds[i].ready = False; + vg_waiting_fds[i].syscall_no = syscall_no; +} + + + +static +void print_sched_event ( ThreadId tid, Char* what ) +{ + if (1) + VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what ); +} + + +static +Char* name_of_sched_event ( UInt event ) +{ + switch (event) { + case VG_TRC_EBP_JMP_SPECIAL: return "JMP_SPECIAL"; + case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL"; + case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ"; + case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO"; + case VG_TRC_INNER_FASTMISS: return "FASTMISS"; + case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL"; + default: return "??UNKNOWN??"; + } +} + + +/* Create a translation of the client basic block beginning at + orig_addr, and add it to the translation cache & translation table. + This probably doesn't really belong here, but, hey ... +*/ +void VG_(create_translation_for) ( Addr orig_addr ) +{ + Addr trans_addr; + TTEntry tte; + Int orig_size, trans_size; + /* Ensure there is space to hold a translation. */ + VG_(maybe_do_lru_pass)(); + VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size ); + /* Copy data at trans_addr into the translation cache. + Returned pointer is to the code, not to the 4-byte + header. */ + /* Since the .orig_size and .trans_size fields are + UShort, be paranoid. */ + vg_assert(orig_size > 0 && orig_size < 65536); + vg_assert(trans_size > 0 && trans_size < 65536); + tte.orig_size = orig_size; + tte.orig_addr = orig_addr; + tte.trans_size = trans_size; + tte.trans_addr = VG_(copy_to_transcache) + ( trans_addr, trans_size ); + tte.mru_epoch = VG_(current_epoch); + /* Free the intermediary -- was allocated by VG_(emit_code). */ + VG_(jitfree)( (void*)trans_addr ); + /* Add to trans tab and set back pointer. */ + VG_(add_to_trans_tab) ( &tte ); + /* Update stats. */ + VG_(this_epoch_in_count) ++; + VG_(this_epoch_in_osize) += orig_size; + VG_(this_epoch_in_tsize) += trans_size; + VG_(overall_in_count) ++; + VG_(overall_in_osize) += orig_size; + VG_(overall_in_tsize) += trans_size; + /* Record translated area for SMC detection. */ + VG_(smc_mark_original) ( orig_addr, orig_size ); +} + + +/* Allocate a completely empty ThreadState record. */ +static +ThreadId vg_alloc_ThreadState ( void ) +{ + Int i; + for (i = 0; i < VG_N_THREADS; i++) { + if (vg_threads[i].status == VgTs_Empty) + return i; + } + VG_(printf)("vg_alloc_ThreadState: no free slots available\n"); + VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n"); + VG_(panic)("VG_N_THREADS is too low"); + /*NOTREACHED*/ +} + + +ThreadState* VG_(get_thread_state) ( ThreadId tid ) +{ + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status != VgTs_Empty); + return & vg_threads[tid]; +} + + +/* Find an unused VgMutex record. */ +static +MutexId vg_alloc_VgMutex ( void ) +{ + Int i; + for (i = 0; i < VG_N_MUTEXES; i++) { + if (!vg_mutexes[i].in_use) + return i; + } + VG_(printf)("vg_alloc_VgMutex: no free slots available\n"); + VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n"); + VG_(panic)("VG_N_MUTEXES is too low"); + /*NOTREACHED*/ +} + + +/* Copy the saved state of a thread into VG_(baseBlock), ready for it + to be run. */ +__inline__ +void VG_(load_thread_state) ( ThreadId tid ) +{ + Int i; + VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax; + VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx; + VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx; + VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx; + VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi; + VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi; + VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp; + VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp; + VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags; + VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip; + + for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) + VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i]; + + VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax; + VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx; + VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx; + VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx; + VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi; + VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi; + VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp; + VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp; + VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags; +} + + +/* Copy the state of a thread from VG_(baseBlock), presumably after it + has been descheduled. For sanity-check purposes, fill the vacated + VG_(baseBlock) with garbage so as to make the system more likely to + fail quickly if we erroneously continue to poke around inside + VG_(baseBlock) without first doing a load_thread_state(). +*/ +__inline__ +void VG_(save_thread_state) ( ThreadId tid ) +{ + Int i; + const UInt junk = 0xDEADBEEF; + + vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)]; + vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)]; + vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)]; + vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)]; + vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)]; + vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)]; + vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)]; + vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)]; + vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)]; + vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)]; + + for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) + vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i]; + + vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)]; + vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)]; + vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)]; + vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)]; + vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)]; + vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)]; + vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)]; + vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)]; + vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)]; + + /* Fill it up with junk. */ + VG_(baseBlock)[VGOFF_(m_eax)] = junk; + VG_(baseBlock)[VGOFF_(m_ebx)] = junk; + VG_(baseBlock)[VGOFF_(m_ecx)] = junk; + VG_(baseBlock)[VGOFF_(m_edx)] = junk; + VG_(baseBlock)[VGOFF_(m_esi)] = junk; + VG_(baseBlock)[VGOFF_(m_edi)] = junk; + VG_(baseBlock)[VGOFF_(m_ebp)] = junk; + VG_(baseBlock)[VGOFF_(m_esp)] = junk; + VG_(baseBlock)[VGOFF_(m_eflags)] = junk; + VG_(baseBlock)[VGOFF_(m_eip)] = junk; + + for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) + VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk; +} + + +/* Run the thread tid for a while, and return a VG_TRC_* value to the + scheduler indicating what happened. */ +static +UInt run_thread_for_a_while ( ThreadId tid ) +{ + UInt trc = 0; + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status != VgTs_Empty); + vg_assert(VG_(bbs_to_go) > 0); + + VG_(load_thread_state) ( tid ); + if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) { + /* try this ... */ + trc = VG_(run_innerloop)(); + /* We get here if the client didn't take a fault. */ + } else { + /* We get here if the client took a fault, which caused our + signal handler to longjmp. */ + vg_assert(trc == 0); + trc = VG_TRC_UNRESUMABLE_SIGNAL; + } + VG_(save_thread_state) ( tid ); + return trc; +} + + +/* Increment the LRU epoch counter. */ +static +void increment_epoch ( void ) +{ + VG_(current_epoch)++; + if (VG_(clo_verbosity) > 2) { + UInt tt_used, tc_used; + VG_(get_tt_tc_used) ( &tt_used, &tc_used ); + VG_(message)(Vg_UserMsg, + "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d", + VG_(bbs_done), + VG_(this_epoch_in_count), + VG_(this_epoch_in_osize), + VG_(this_epoch_in_tsize), + VG_(this_epoch_out_count), + VG_(this_epoch_out_osize), + VG_(this_epoch_out_tsize), + tt_used, tc_used + ); + } + VG_(this_epoch_in_count) = 0; + VG_(this_epoch_in_osize) = 0; + VG_(this_epoch_in_tsize) = 0; + VG_(this_epoch_out_count) = 0; + VG_(this_epoch_out_osize) = 0; + VG_(this_epoch_out_tsize) = 0; +} + + +/* Initialise the scheduler. Create a single "main" thread ready to + run, with special ThreadId of zero. This is called at startup; the + caller takes care to park the client's state is parked in + VG_(baseBlock). +*/ +void VG_(scheduler_init) ( void ) +{ + Int i; + Addr startup_esp; + ThreadId tid_main; + + startup_esp = VG_(baseBlock)[VGOFF_(m_esp)]; + if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) { + VG_(printf)("%esp at startup = %p is not near %p; aborting\n", + startup_esp, VG_STARTUP_STACK_MASK); + VG_(panic)("unexpected %esp at startup"); + } + + for (i = 0; i < VG_N_THREADS; i++) { + vg_threads[i].stack_size = 0; + vg_threads[i].stack_base = (Addr)NULL; + } + + for (i = 0; i < VG_N_WAITING_FDS; i++) + vg_waiting_fds[i].fd = -1; /* not in use */ + + for (i = 0; i < VG_N_MUTEXES; i++) + vg_mutexes[i].in_use = False; + + /* Assert this is thread zero, which has certain magic + properties. */ + tid_main = vg_alloc_ThreadState(); + vg_assert(tid_main == 0); + + vg_threads[tid_main].status = VgTs_Runnable; + vg_threads[tid_main].joiner = VG_INVALID_THREADID; + vg_threads[tid_main].retval = NULL; /* not important */ + + /* Copy VG_(baseBlock) state to tid_main's slot. */ + VG_(save_thread_state) ( tid_main ); +} + + +/* What if fd isn't a valid fd? */ +static +void set_fd_nonblocking ( Int fd ) +{ + Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); + vg_assert(!VG_(is_kerror)(res)); + res |= VKI_O_NONBLOCK; + res = VG_(fcntl)( fd, VKI_F_SETFL, res ); + vg_assert(!VG_(is_kerror)(res)); +} + +static +void set_fd_blocking ( Int fd ) +{ + Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); + vg_assert(!VG_(is_kerror)(res)); + res &= ~VKI_O_NONBLOCK; + res = VG_(fcntl)( fd, VKI_F_SETFL, res ); + vg_assert(!VG_(is_kerror)(res)); +} + +static +Bool fd_is_blockful ( Int fd ) +{ + Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); + vg_assert(!VG_(is_kerror)(res)); + return (res & VKI_O_NONBLOCK) ? False : True; +} + + + +/* Do a purely thread-local request for tid, and put the result in its + %EDX, without changing its scheduling state in any way, nor that of + any other threads. Return True if so. + + If the request is non-trivial, return False; a more capable but + slower mechanism will deal with it. +*/ +static +Bool maybe_do_trivial_clientreq ( ThreadId tid ) +{ +# define SIMPLE_RETURN(vvv) \ + { vg_threads[tid].m_edx = (vvv); \ + return True; \ + } + + UInt* arg = (UInt*)(vg_threads[tid].m_eax); + UInt req_no = arg[0]; + switch (req_no) { + case VG_USERREQ__MALLOC: + SIMPLE_RETURN( + (UInt)VG_(client_malloc) ( arg[1], Vg_AllocMalloc ) + ); + case VG_USERREQ__BUILTIN_NEW: + SIMPLE_RETURN( + (UInt)VG_(client_malloc) ( arg[1], Vg_AllocNew ) + ); + case VG_USERREQ__BUILTIN_VEC_NEW: + SIMPLE_RETURN( + (UInt)VG_(client_malloc) ( arg[1], Vg_AllocNewVec ) + ); + case VG_USERREQ__FREE: + VG_(client_free) ( (void*)arg[1], Vg_AllocMalloc ); + SIMPLE_RETURN(0); /* irrelevant */ + case VG_USERREQ__BUILTIN_DELETE: + VG_(client_free) ( (void*)arg[1], Vg_AllocNew ); + SIMPLE_RETURN(0); /* irrelevant */ + case VG_USERREQ__BUILTIN_VEC_DELETE: + VG_(client_free) ( (void*)arg[1], Vg_AllocNewVec ); + SIMPLE_RETURN(0); /* irrelevant */ + case VG_USERREQ__CALLOC: + SIMPLE_RETURN( + (UInt)VG_(client_calloc) ( arg[1], arg[2] ) + ); + case VG_USERREQ__REALLOC: + SIMPLE_RETURN( + (UInt)VG_(client_realloc) ( (void*)arg[1], arg[2] ) + ); + case VG_USERREQ__MEMALIGN: + SIMPLE_RETURN( + (UInt)VG_(client_memalign) ( arg[1], arg[2] ) + ); + default: + /* Too hard; wimp out. */ + return False; + } +# undef SIMPLE_RETURN +} + + +static +void sched_do_syscall ( ThreadId tid ) +{ + UInt saved_eax; + UInt res, syscall_no; + UInt fd; + Bool might_block, assumed_nonblocking; + Bool orig_fd_blockness; + Char msg_buf[100]; + + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + syscall_no = vg_threads[tid].m_eax; /* syscall number */ + + if (syscall_no == __NR_nanosleep) { + ULong t_now, t_awaken; + struct vki_timespec* req; + req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */ + t_now = VG_(read_microsecond_timer)(); + t_awaken + = t_now + + (ULong)1000000ULL * (ULong)(req->tv_sec) + + (ULong)( (UInt)(req->tv_nsec) / 1000 ); + vg_threads[tid].status = VgTs_Sleeping; + vg_threads[tid].awaken_at = t_awaken; + if (1) { + VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu", + t_now, t_awaken-t_now); + print_sched_event(tid, msg_buf); + } + /* Force the scheduler to run something else for a while. */ + return; + } + + switch (syscall_no) { + case __NR_read: + case __NR_write: + assumed_nonblocking + = False; + might_block + = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */); + break; + default: + might_block = False; + assumed_nonblocking = True; + } + + if (assumed_nonblocking) { + /* We think it's non-blocking. Just do it in the normal way. */ + VG_(perform_assumed_nonblocking_syscall)(tid); + /* The thread is still runnable. */ + return; + } + + /* It might block. Take evasive action. */ + switch (syscall_no) { + case __NR_read: + case __NR_write: + fd = vg_threads[tid].m_ebx; break; + default: + vg_assert(3+3 == 7); + } + + /* Set the fd to nonblocking, and do the syscall, which will return + immediately, in order to lodge a request with the Linux kernel. + We later poll for I/O completion using select(). */ + + orig_fd_blockness = fd_is_blockful(fd); + set_fd_nonblocking(fd); + vg_assert(!fd_is_blockful(fd)); + VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */); + + /* This trashes the thread's %eax; we have to preserve it. */ + saved_eax = vg_threads[tid].m_eax; + KERNEL_DO_SYSCALL(tid,res); + + /* Restore original blockfulness of the fd. */ + if (orig_fd_blockness) + set_fd_blocking(fd); + else + set_fd_nonblocking(fd); + + if (res != -VKI_EWOULDBLOCK) { + /* It didn't block; it went through immediately. So finish off + in the normal way. Don't restore %EAX, since that now + (correctly) holds the result of the call. */ + VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); + /* We're still runnable. */ + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + } else { + + /* It would have blocked. First, restore %EAX to what it was + before our speculative call. */ + vg_threads[tid].m_eax = saved_eax; + /* Put this fd in a table of fds on which we are waiting for + completion. The arguments for select() later are constructed + from this table. */ + add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */); + /* Deschedule thread until an I/O completion happens. */ + vg_threads[tid].status = VgTs_WaitFD; + if (1) { + VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd); + print_sched_event(tid, msg_buf); + } + + } +} + + +/* Find out which of the fds in vg_waiting_fds are now ready to go, by + making enquiries with select(), and mark them as ready. We have to + wait for the requesting threads to fall into the the WaitFD state + before we can actually finally deliver the results, so this + procedure doesn't do that; complete_blocked_syscalls() does it. + + It might seem odd that a thread which has done a blocking syscall + is not in WaitFD state; the way this can happen is if it initially + becomes WaitFD, but then a signal is delivered to it, so it becomes + Runnable for a while. In this case we have to wait for the + sighandler to return, whereupon the WaitFD state is resumed, and + only at that point can the I/O result be delivered to it. However, + this point may be long after the fd is actually ready. + + So, poll_for_ready_fds() merely detects fds which are ready. + complete_blocked_syscalls() does the second half of the trick, + possibly much later: it delivers the results from ready fds to + threads in WaitFD state. +*/ +void poll_for_ready_fds ( void ) +{ + vki_ksigset_t saved_procmask; + vki_fd_set readfds; + vki_fd_set writefds; + vki_fd_set exceptfds; + struct vki_timeval timeout; + Int fd, fd_max, i, n_ready, syscall_no, n_ok; + ThreadId tid; + Bool rd_ok, wr_ok, ex_ok; + Char msg_buf[100]; + + /* Awaken any sleeping threads whose sleep has expired. */ + { + struct vki_timespec * rem; + ULong t_now = VG_(read_microsecond_timer)(); + for (tid = 0; tid < VG_N_THREADS; tid++) { + if (vg_threads[tid].status != VgTs_Sleeping) + continue; + if (t_now >= vg_threads[tid].awaken_at) { + /* Resume this thread. Set to zero the remaining-time (second) + arg of nanosleep, since it's used up all its time. */ + vg_assert(vg_threads[tid].m_eax == __NR_nanosleep); + rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */ + if (rem != NULL) { + rem->tv_sec = 0; + rem->tv_nsec = 0; + } + /* Make the syscall return 0 (success). */ + vg_threads[tid].m_eax = 0; + /* Reschedule this thread. */ + vg_threads[tid].status = VgTs_Runnable; + if (1) { + VG_(sprintf)(msg_buf, "at %lu: nanosleep done", + t_now); + print_sched_event(tid, msg_buf); + } + } + } + } + + timeout.tv_sec = 0; + timeout.tv_usec = 0; + + VKI_FD_ZERO(&readfds); + VKI_FD_ZERO(&writefds); + VKI_FD_ZERO(&exceptfds); + fd_max = -1; + for (i = 0; i < VG_N_WAITING_FDS; i++) { + if (vg_waiting_fds[i].fd == -1 /* not in use */) + continue; + if (vg_waiting_fds[i].ready /* already ready? */) + continue; + fd = vg_waiting_fds[i].fd; + /* VG_(printf)("adding QUERY for fd %d\n", fd); */ + if (fd > fd_max) + fd_max = fd; + tid = vg_waiting_fds[i].tid; + vg_assert(tid >= 0 && tid < VG_N_THREADS); + syscall_no = vg_waiting_fds[i].syscall_no; + switch (syscall_no) { + case __NR_read: + VKI_FD_SET(fd, &readfds); break; + case __NR_write: + VKI_FD_SET(fd, &writefds); break; + default: + VG_(panic)("poll_for_ready_fds: unexpected syscall"); + /*NOTREACHED*/ + break; + } + } + + /* BLOCK ALL SIGNALS. We don't want the complication of select() + getting interrupted. */ + VG_(block_all_host_signals)( &saved_procmask ); + + n_ready = VG_(select) + ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout); + if (VG_(is_kerror)(n_ready)) { + VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready); + VG_(panic)("poll_for_ready_fds: select failed?!"); + /*NOTREACHED*/ + } + + /* UNBLOCK ALL SIGNALS */ + VG_(restore_host_signals)( &saved_procmask ); + + /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */ + + if (n_ready == 0) + return; + + /* Inspect all the fds we know about, and handle any completions that + have happened. */ + /* + VG_(printf)("\n\n"); + for (fd = 0; fd < 100; fd++) + if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) { + VG_(printf)("X"); } else { VG_(printf)("."); }; + VG_(printf)("\n\nfd_max = %d\n", fd_max); + */ + + for (fd = 0; fd <= fd_max; fd++) { + rd_ok = VKI_FD_ISSET(fd, &readfds); + wr_ok = VKI_FD_ISSET(fd, &writefds); + ex_ok = VKI_FD_ISSET(fd, &exceptfds); + + n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0); + if (n_ok == 0) + continue; + if (n_ok > 1) { + VG_(printf)("offending fd = %d\n", fd); + VG_(panic)("poll_for_ready_fds: multiple events on fd"); + } + + /* An I/O event completed for fd. Find the thread which + requested this. */ + for (i = 0; i < VG_N_WAITING_FDS; i++) { + if (vg_waiting_fds[i].fd == -1 /* not in use */) + continue; + if (vg_waiting_fds[i].fd == fd) + break; + } + + /* And a bit more paranoia ... */ + vg_assert(i >= 0 && i < VG_N_WAITING_FDS); + + /* Mark the fd as ready. */ + vg_assert(! vg_waiting_fds[i].ready); + vg_waiting_fds[i].ready = True; + } +} + + +/* See comment attached to poll_for_ready_fds() for explaination. */ +void complete_blocked_syscalls ( void ) +{ + Int fd, i, res, syscall_no; + ThreadId tid; + Char msg_buf[100]; + + /* Inspect all the outstanding fds we know about. */ + + for (i = 0; i < VG_N_WAITING_FDS; i++) { + if (vg_waiting_fds[i].fd == -1 /* not in use */) + continue; + if (! vg_waiting_fds[i].ready) + continue; + + fd = vg_waiting_fds[i].fd; + tid = vg_waiting_fds[i].tid; + vg_assert(tid >= 0 && tid < VG_N_THREADS); + + /* The thread actually has to be waiting for the I/O event it + requested before we can deliver the result! */ + if (vg_threads[tid].status != VgTs_WaitFD) + continue; + + /* Ok, actually do it! We can safely use %EAX as the syscall + number, because the speculative call made by + sched_do_syscall() doesn't change %EAX in the case where the + call would have blocked. */ + + syscall_no = vg_waiting_fds[i].syscall_no; + vg_assert(syscall_no == vg_threads[tid].m_eax); + KERNEL_DO_SYSCALL(tid,res); + VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); + + /* Reschedule. */ + vg_threads[tid].status = VgTs_Runnable; + /* Mark slot as no longer in use. */ + vg_waiting_fds[i].fd = -1; + /* pp_sched_status(); */ + if (1) { + VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd); + print_sched_event(tid, msg_buf); + } + } +} + + +static +void nanosleep_for_a_while ( void ) +{ + Int res; + struct vki_timespec req; + struct vki_timespec rem; + req.tv_sec = 0; + req.tv_nsec = 20 * 1000 * 1000; + res = VG_(nanosleep)( &req, &rem ); + /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */ + vg_assert(res == 0); +} + + +/* --------------------------------------------------------------------- + The scheduler proper. + ------------------------------------------------------------------ */ + +/* Run user-space threads until either + * Deadlock occurs + * One thread asks to shutdown Valgrind + * The specified number of basic blocks has gone by. +*/ +VgSchedReturnCode VG_(scheduler) ( void ) +{ + ThreadId tid, tid_next; + UInt trc; + UInt dispatch_ctr_SAVED; + Int done_this_time, n_in_fdwait; + Char msg_buf[100]; + Addr trans_addr; + + /* For the LRU structures, records when the epoch began. */ + ULong lru_epoch_started_at = 0; + + /* Start with the root thread. tid in general indicates the + currently runnable/just-finished-running thread. */ + tid = 0; + + /* This is the top level scheduler loop. It falls into three + phases. */ + while (True) { + + /* ======================= Phase 1 of 3 ======================= + Handle I/O completions and signals. This may change the + status of various threads. Then select a new thread to run, + or declare deadlock, or sleep if there are no runnable + threads but some are blocked on I/O. */ + + /* Age the LRU structures if an epoch has been completed. */ + if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) { + lru_epoch_started_at = VG_(bbs_done); + increment_epoch(); + } + + /* Was a debug-stop requested? */ + if (VG_(bbs_to_go) == 0) + goto debug_stop; + + /* Do the following loop until a runnable thread is found, or + deadlock is detected. */ + while (True) { + + /* For stats purposes only. */ + VG_(num_scheduling_events_MAJOR) ++; + + /* See if any I/O operations which we were waiting for have + completed, and, if so, make runnable the relevant waiting + threads. */ + poll_for_ready_fds(); + complete_blocked_syscalls(); + + /* See if there are any signals which need to be delivered. If + so, choose thread(s) to deliver them to, and build signal + delivery frames on those thread(s) stacks. */ + VG_(deliver_signals)( 0 /*HACK*/ ); + VG_(do_sanity_checks)(0 /*HACK*/, False); + + /* Try and find a thread (tid) to run. */ + tid_next = tid; + n_in_fdwait = 0; + while (True) { + tid_next++; + if (tid_next >= VG_N_THREADS) tid_next = 0; + if (vg_threads[tid_next].status == VgTs_WaitFD) + n_in_fdwait ++; + if (vg_threads[tid_next].status == VgTs_Runnable) + break; /* We can run this one. */ + if (tid_next == tid) + break; /* been all the way round */ + } + tid = tid_next; + + if (vg_threads[tid].status == VgTs_Runnable) { + /* Found a suitable candidate. Fall out of this loop, so + we can advance to stage 2 of the scheduler: actually + running the thread. */ + break; + } + + /* We didn't find a runnable thread. Now what? */ + if (n_in_fdwait == 0) { + /* No runnable threads and non in fd-wait either. Not + good. */ + pp_sched_status(); + return VgSrc_Deadlock; + } + + /* At least one thread is in a fd-wait state. Delay for a + while, and go round again, in the hope that eventually a + thread becomes runnable. */ + nanosleep_for_a_while(); + // pp_sched_status(); + // VG_(printf)(".\n"); + } + + + /* ======================= Phase 2 of 3 ======================= + Wahey! We've finally decided that thread tid is runnable, so + we now do that. Run it for as much of a quanta as possible. + Trivial requests are handled and the thread continues. The + aim is not to do too many of Phase 1 since it is expensive. */ + + if (0) + VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS); + + /* Figure out how many bbs to ask vg_run_innerloop to do. Note + that it decrements the counter before testing it for zero, so + that if VG_(dispatch_ctr) is set to N you get at most N-1 + iterations. Also this means that VG_(dispatch_ctr) must + exceed zero before entering the innerloop. Also also, the + decrement is done before the bb is actually run, so you + always get at least one decrement even if nothing happens. + */ + if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM) + VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1; + else + VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1; + + /* ... and remember what we asked for. */ + dispatch_ctr_SAVED = VG_(dispatch_ctr); + + /* Actually run thread tid. */ + while (True) { + + /* For stats purposes only. */ + VG_(num_scheduling_events_MINOR) ++; + + if (0) + VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs", + tid, VG_(dispatch_ctr) - 1 ); + + trc = run_thread_for_a_while ( tid ); + + /* Deal quickly with trivial scheduling events, and resume the + thread. */ + + if (trc == VG_TRC_INNER_FASTMISS) { + vg_assert(VG_(dispatch_ctr) > 0); + + /* Trivial event. Miss in the fast-cache. Do a full + lookup for it. */ + trans_addr + = VG_(search_transtab) ( vg_threads[tid].m_eip ); + if (trans_addr == (Addr)0) { + /* Not found; we need to request a translation. */ + VG_(create_translation_for)( vg_threads[tid].m_eip ); + trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip ); + if (trans_addr == (Addr)0) + VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry"); + } + continue; /* with this thread */ + } + + if (trc == VG_TRC_EBP_JMP_CLIENTREQ) { + Bool is_triv = maybe_do_trivial_clientreq(tid); + if (is_triv) { + /* NOTE: a trivial request is something like a call to + malloc() or free(). It DOES NOT change the + Runnability of this thread nor the status of any + other thread; it is purely thread-local. */ + continue; /* with this thread */ + } + } + + /* It's a non-trivial event. Give up running this thread and + handle things the expensive way. */ + break; + } + + /* ======================= Phase 3 of 3 ======================= + Handle non-trivial thread requests, mostly pthread stuff. */ + + /* Ok, we've fallen out of the dispatcher for a + non-completely-trivial reason. First, update basic-block + counters. */ + + done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1; + vg_assert(done_this_time >= 0); + VG_(bbs_to_go) -= (ULong)done_this_time; + VG_(bbs_done) += (ULong)done_this_time; + + if (0 && trc != VG_TRC_INNER_FASTMISS) + VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d", + tid, done_this_time, (Int)trc ); + + if (0 && trc != VG_TRC_INNER_FASTMISS) + VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s", + tid, VG_(bbs_done), + name_of_sched_event(trc) ); + + /* Examine the thread's return code to figure out why it + stopped, and handle requests. */ + + switch (trc) { + + case VG_TRC_INNER_FASTMISS: + VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS"); + /*NOTREACHED*/ + break; + + case VG_TRC_INNER_COUNTERZERO: + /* Timeslice is out. Let a new thread be scheduled, + simply by doing nothing, causing us to arrive back at + Phase 1. */ + if (VG_(bbs_to_go) == 0) { + goto debug_stop; + } + vg_assert(VG_(dispatch_ctr) == 0); + break; + + case VG_TRC_UNRESUMABLE_SIGNAL: + /* It got a SIGSEGV/SIGBUS, which we need to deliver right + away. Again, do nothing, so we wind up back at Phase + 1, whereupon the signal will be "delivered". */ + break; + + case VG_TRC_EBP_JMP_SPECIAL: { + Addr next_eip = vg_threads[tid].m_eip; + if (next_eip == (Addr) & VG_(signalreturn_bogusRA)) { + /* vthread tid is returning from a signal handler; + modify its stack/regs accordingly. */ + VG_(signal_returns)(tid); + } + else + if (next_eip == (Addr) & VG_(shutdown)) { + return VgSrc_Shutdown; + } else { + VG_(panic)("vg_schedule: VG_TRC_EBP_JMP_SPECIAL"); + } + break; + } + + case VG_TRC_EBP_JMP_SYSCALL: + /* Do a syscall for the vthread tid. This could cause it + to become non-runnable. */ + sched_do_syscall(tid); + break; + + case VG_TRC_EBP_JMP_CLIENTREQ: + /* Do a client request for the vthread tid. Note that + some requests will have been handled by + maybe_do_trivial_clientreq(), so we don't expect to see + those here. + */ + if (0) { + VG_(sprintf)(msg_buf, "request 0x%x", + vg_threads[tid].m_eax); + print_sched_event(tid, msg_buf); + } + /* Do a non-trivial client request for thread tid. tid's + %EAX points to a short vector of argument words, the + first of which is the request code. The result of the + request is put in tid's %EDX. Alternatively, perhaps + the request causes tid to become non-runnable and/or + other blocked threads become runnable. In general we + can and often do mess with the state of arbitrary + threads at this point. */ + do_nontrivial_clientreq(tid); + break; + + default: + VG_(printf)("\ntrc = %d\n", trc); + VG_(panic)("VG_(scheduler), phase 3: " + "unexpected thread return code"); + /* NOTREACHED */ + break; + + } /* switch (trc) */ + + /* That completes Phase 3 of 3. Return now to the top of the + main scheduler loop, to Phase 1 of 3. */ + + } /* top-level scheduler loop */ + + + /* NOTREACHED */ + VG_(panic)("scheduler: post-main-loop ?!"); + /* NOTREACHED */ + + debug_stop: + /* If we exited because of a debug stop, print the translation + of the last block executed -- by translating it again, and + throwing away the result. */ + VG_(printf)( + "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n"); + VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL ); + VG_(printf)("\n"); + VG_(printf)( + "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n"); + + return VgSrc_BbsDone; +} + + +/* --------------------------------------------------------------------- + The pthread implementation. + ------------------------------------------------------------------ */ + +#include +#include + +#if !defined(PTHREAD_STACK_MIN) +# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB) +#endif + +/* /usr/include/bits/pthreadtypes.h: + typedef unsigned long int pthread_t; +*/ + +/* RUNS ON SIMD CPU! + This is the return address that pthread_create uses. +*/ +static +void do_pthread_create_bogusRA ( void ) +{ + /* Tell the scheduler that this thread has returned. */ + Int res; + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_CREATE_BOGUSRA, + 0, 0, 0, 0); + VG_(panic)("do_pthread_create_bogusRA: shouldn't be still alive!"); +} + + +static +void do_pthread_cancel ( ThreadId tid_canceller, + pthread_t tid_cancellee ) +{ + Char msg_buf[100]; + /* We want make is appear that this thread has returned to + do_pthread_create_bogusRA with PTHREAD_CANCELED as the + return value. So: simple: put PTHREAD_CANCELED into %EAX + and &do_pthread_create_bogusRA into %EIP and keep going! */ + if (1) { + VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller); + print_sched_event(tid_cancellee, msg_buf); + } + vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED; + vg_threads[tid_cancellee].m_eip = (UInt)&do_pthread_create_bogusRA; + vg_threads[tid_cancellee].status = VgTs_Runnable; +} + + + +/* Thread tid is exiting, by returning from the function it was + created with. The main complication here is to resume any thread + waiting to join with this one. */ +static +void do_pthread_create_exit_by_returning ( ThreadId tid ) +{ + ThreadId jnr; /* joiner, the thread calling pthread_join. */ + UInt* jnr_args; + void** jnr_thread_return; + Char msg_buf[100]; + + /* Mark it as not in use. Leave the stack in place so the next + user of this slot doesn't reallocate it. */ + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status != VgTs_Empty); + + vg_threads[tid].retval = (void*)vg_threads[tid].m_eax; + + if (vg_threads[tid].joiner == VG_INVALID_THREADID) { + /* No one has yet done a join on me */ + vg_threads[tid].status = VgTs_WaitJoiner; + if (1) { + VG_(sprintf)(msg_buf, + "root fn returns, waiting for a call pthread_join(%d)", + tid); + print_sched_event(tid, msg_buf); + } + } else { + /* Some is waiting; make their join call return with success, + putting my exit code in the place specified by the caller's + thread_return param. This is all very horrible, since we + need to consult the joiner's arg block -- pointed to by its + %EAX -- in order to extract the 2nd param of its pthread_join + call. TODO: free properly the slot (also below). + */ + jnr = vg_threads[tid].joiner; + vg_assert(jnr >= 0 && jnr < VG_N_THREADS); + vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee); + jnr_args = (UInt*)vg_threads[jnr].m_eax; + jnr_thread_return = (void**)(jnr_args[2]); + if (jnr_thread_return != NULL) + *jnr_thread_return = vg_threads[tid].retval; + vg_threads[jnr].m_edx = 0; /* success */ + vg_threads[jnr].status = VgTs_Runnable; + vg_threads[tid].status = VgTs_Empty; /* bye! */ + if (1) { + VG_(sprintf)(msg_buf, + "root fn returns, to find a waiting pthread_join(%d)", tid); + print_sched_event(tid, msg_buf); + VG_(sprintf)(msg_buf, + "my pthread_join(%d) returned; resuming", tid); + print_sched_event(jnr, msg_buf); + } + } + + /* Return value is irrelevant; this thread will not get + rescheduled. */ +} + + +static +void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return ) +{ + Char msg_buf[100]; + + /* jee, the joinee, is the thread specified as an arg in thread + tid's call to pthread_join. So tid is the join-er. */ + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + if (jee == tid) { + vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */ + vg_threads[tid].status = VgTs_Runnable; + return; + } + + if (jee < 0 + || jee >= VG_N_THREADS + || vg_threads[jee].status == VgTs_Empty) { + /* Invalid thread to join to. */ + vg_threads[tid].m_edx = EINVAL; + vg_threads[tid].status = VgTs_Runnable; + return; + } + + if (vg_threads[jee].joiner != VG_INVALID_THREADID) { + /* Someone already did join on this thread */ + vg_threads[tid].m_edx = EINVAL; + vg_threads[tid].status = VgTs_Runnable; + return; + } + + /* if (vg_threads[jee].detached) ... */ + + /* Perhaps the joinee has already finished? If so return + immediately with its return code, and free up the slot. TODO: + free it properly (also above). */ + if (vg_threads[jee].status == VgTs_WaitJoiner) { + vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID); + vg_threads[tid].m_edx = 0; /* success */ + if (thread_return != NULL) + *thread_return = vg_threads[jee].retval; + vg_threads[tid].status = VgTs_Runnable; + vg_threads[jee].status = VgTs_Empty; /* bye! */ + if (1) { + VG_(sprintf)(msg_buf, + "someone called pthread_join() on me; bye!"); + print_sched_event(jee, msg_buf); + VG_(sprintf)(msg_buf, + "my pthread_join(%d) returned immediately", + jee ); + print_sched_event(tid, msg_buf); + } + return; + } + + /* Ok, so we'll have to wait on jee. */ + vg_threads[jee].joiner = tid; + vg_threads[tid].status = VgTs_WaitJoinee; + if (1) { + VG_(sprintf)(msg_buf, + "blocking on call of pthread_join(%d)", jee ); + print_sched_event(tid, msg_buf); + } + /* So tid's join call does not return just now. */ +} + + +static +void do_pthread_create ( ThreadId parent_tid, + pthread_t* thread, + pthread_attr_t* attr, + void* (*start_routine)(void *), + void* arg ) +{ + Addr new_stack; + UInt new_stk_szb; + ThreadId tid; + Char msg_buf[100]; + + /* Paranoia ... */ + vg_assert(sizeof(pthread_t) == sizeof(UInt)); + + vg_assert(vg_threads[parent_tid].status != VgTs_Empty); + + tid = vg_alloc_ThreadState(); + + /* If we've created the main thread's tid, we're in deep trouble :) */ + vg_assert(tid != 0); + + /* Copy the parent's CPU state into the child's, in a roundabout + way (via baseBlock). */ + VG_(load_thread_state)(parent_tid); + VG_(save_thread_state)(tid); + + /* Consider allocating the child a stack, if the one it already has + is inadequate. */ + new_stk_szb = PTHREAD_STACK_MIN; + + if (new_stk_szb > vg_threads[tid].stack_size) { + /* Again, for good measure :) We definitely don't want to be + allocating a stack for the main thread. */ + vg_assert(tid != 0); + /* for now, we don't handle the case of anything other than + assigning it for the first time. */ + vg_assert(vg_threads[tid].stack_size == 0); + vg_assert(vg_threads[tid].stack_base == (Addr)NULL); + new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb ); + vg_threads[tid].stack_base = new_stack; + vg_threads[tid].stack_size = new_stk_szb; + vg_threads[tid].m_esp + = new_stack + new_stk_szb + - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; + } + if (VG_(clo_instrument)) + VGM_(make_noaccess)( vg_threads[tid].m_esp, + VG_AR_CLIENT_STACKBASE_REDZONE_SZB ); + + /* push arg */ + vg_threads[tid].m_esp -= 4; + * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg; + + /* push (magical) return address */ + vg_threads[tid].m_esp -= 4; + * (UInt*)(vg_threads[tid].m_esp) = (UInt)do_pthread_create_bogusRA; + + if (VG_(clo_instrument)) + VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 ); + + /* this is where we start */ + vg_threads[tid].m_eip = (UInt)start_routine; + + if (1) { + VG_(sprintf)(msg_buf, + "new thread, created by %d", parent_tid ); + print_sched_event(tid, msg_buf); + } + + /* store the thread id in *thread. */ + // if (VG_(clo_instrument)) + // ***** CHECK *thread is writable + *thread = (pthread_t)tid; + + /* return zero */ + vg_threads[tid].joiner = VG_INVALID_THREADID; + vg_threads[tid].status = VgTs_Runnable; + vg_threads[tid].m_edx = 0; /* success */ +} + + +/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t + is a struct with at least 5 words: + typedef struct + { + int __m_reserved; -- Reserved for future use + int __m_count; -- Depth of recursive locking + _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck) + int __m_kind; -- Mutex kind: fast, recursive or errcheck + struct _pthread_fastlock __m_lock; -- Underlying fast lock + } pthread_mutex_t; + Ours is just a single word, an index into vg_mutexes[]. + For now I'll park it in the __m_reserved field. + + Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have + a zero __m_count field (see /usr/include/pthread.h). So I'll + use zero to mean non-inited, and 1 to mean inited. + + How convenient. +*/ + +static +void initialise_mutex ( pthread_mutex_t *mutex ) +{ + MutexId mid; + /* vg_alloc_MutexId aborts if we can't allocate a mutex, for + whatever reason. */ +VG_(printf)("initialise_mutex %p\n", mutex); + mid = vg_alloc_VgMutex(); + vg_mutexes[mid].in_use = True; + vg_mutexes[mid].held = False; + vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */ + mutex->__m_reserved = mid; + mutex->__m_count = 1; /* initialised */ +} + +/* Allocate a new MutexId and write it into *mutex. Ideally take + notice of the attributes in *mutexattr. */ +static +void do_pthread_mutex_init ( ThreadId tid, + pthread_mutex_t *mutex, + const pthread_mutexattr_t *mutexattr) +{ + /* Paranoia ... */ +VG_(printf)("mutex_init %d %p %p\n", tid, mutex, mutexattr); + + vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt)); + + initialise_mutex(mutex); + /* + RETURN VALUE + pthread_mutex_init always returns 0. The other mutex functions + return 0 on success and a non-zero error code on error. + */ + /* THIS THREAD returns with 0. */ + vg_threads[tid].m_edx = 0; +} + + +static +void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex ) +{ + MutexId mid; + Char msg_buf[100]; + +VG_(printf)("mutex_lock %d %p\n", tid, mutex); + + /* *mutex contains the MutexId, or one of the magic values + PTHREAD_*MUTEX_INITIALIZER*, indicating we need to initialise it + now. See comment(s) above re use of __m_count to indicated + initialisation status. + */ + + /* POSIX doesn't mandate this, but for sanity ... */ + if (mutex == NULL) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + if (mutex->__m_count == 0) { + initialise_mutex(mutex); + } + + mid = mutex->__m_reserved; + if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + /* Assert initialised. */ + vg_assert(mutex->__m_count == 1); + + /* Assume tid valid. */ + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + if (vg_mutexes[mid].held) { + if (vg_mutexes[mid].owner == tid) { + vg_threads[tid].m_edx = EDEADLK; + return; + } + /* Someone else has it; we have to wait. */ + vg_threads[tid].status = VgTs_WaitMX; + vg_threads[tid].waited_on_mid = mid; + /* No assignment to %EDX, since we're blocking. */ + if (1) { + VG_(sprintf)(msg_buf, "wait for mutex %d", mid ); + print_sched_event(tid, msg_buf); + } + } else { + /* We get it! */ + vg_mutexes[mid].held = True; + vg_mutexes[mid].owner = tid; + /* return 0 (success). */ + vg_threads[tid].m_edx = 0; + } +} + + +static +void do_pthread_mutex_unlock ( ThreadId tid, + pthread_mutex_t *mutex ) +{ + MutexId mid; + Int i; + Char msg_buf[100]; + +VG_(printf)("mutex_unlock %d %p\n", tid, mutex); + + if (mutex == NULL + || mutex->__m_count != 1) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + mid = mutex->__m_reserved; + if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + /* Assume tid valid */ + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + /* Barf if we don't currently hold the mutex. */ + if (!vg_mutexes[mid].held || vg_mutexes[mid].owner != tid) { + vg_threads[tid].m_edx = EPERM; + return; + } + + /* Find some arbitrary thread waiting on this mutex, and make it + runnable. If none are waiting, mark the mutex as not held. */ + for (i = 0; i < VG_N_THREADS; i++) { + if (vg_threads[i].status == VgTs_Empty) + continue; + if (vg_threads[i].status == VgTs_WaitMX + && vg_threads[i].waited_on_mid == mid) + break; + } + + vg_assert(i <= VG_N_THREADS); + if (i == VG_N_THREADS) { + /* Nobody else is waiting on it. */ + vg_mutexes[mid].held = False; + } else { + /* Notionally transfer the hold to thread i, whose + pthread_mutex_lock() call now returns with 0 (success). */ + vg_mutexes[mid].owner = i; + vg_threads[i].status = VgTs_Runnable; + vg_threads[i].m_edx = 0; /* pth_lock() success */ + if (1) { + VG_(sprintf)(msg_buf, "acquire mutex %d, resume", mid ); + print_sched_event(tid, msg_buf); + } + } + + /* In either case, our (tid's) pth_unlock() returns with 0 + (success). */ + vg_threads[tid].m_edx = 0; /* Success. */ +} + + +static void do_pthread_mutex_destroy ( ThreadId tid, + pthread_mutex_t *mutex ) +{ + MutexId mid; + +VG_(printf)("mutex_destroy %d %p\n", tid, mutex); + + if (mutex == NULL + || mutex->__m_count != 1) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + mid = mutex->__m_reserved; + if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + /* Assume tid valid */ + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + /* Barf if the mutex is currently held. */ + if (vg_mutexes[mid].held) { + vg_threads[tid].m_edx = EBUSY; + return; + } + + mutex->__m_count = 0; /* uninitialised */ + vg_mutexes[mid].in_use = False; + vg_threads[tid].m_edx = 0; +} + + +/* --------------------------------------------------------------------- + Handle non-trivial client requests. + ------------------------------------------------------------------ */ + +static +void do_nontrivial_clientreq ( ThreadId tid ) +{ + UInt* arg = (UInt*)(vg_threads[tid].m_eax); + UInt req_no = arg[0]; + switch (req_no) { + + case VG_USERREQ__PTHREAD_CREATE: + do_pthread_create( tid, + (pthread_t*)arg[1], + (pthread_attr_t*)arg[2], + (void*(*)(void*))arg[3], + (void*)arg[4] ); + break; + + case VG_USERREQ__PTHREAD_CREATE_BOGUSRA: + do_pthread_create_exit_by_returning( tid ); + break; + + case VG_USERREQ__PTHREAD_JOIN: + do_pthread_join( tid, arg[1], (void**)(arg[2]) ); + break; + + /* Sigh ... this probably will cause huge numbers of major + (expensive) scheduling events, for no real reason. + Perhaps should be classified as a trivial-request. */ + case VG_USERREQ__PTHREAD_GET_THREADID: + vg_threads[tid].m_edx = tid; + break; + + case VG_USERREQ__PTHREAD_MUTEX_INIT: + do_pthread_mutex_init( tid, + (pthread_mutex_t *)(arg[1]), + (pthread_mutexattr_t *)(arg[2]) ); + break; + + case VG_USERREQ__PTHREAD_MUTEX_LOCK: + do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) ); + break; + + case VG_USERREQ__PTHREAD_MUTEX_UNLOCK: + do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) ); + break; + + case VG_USERREQ__PTHREAD_MUTEX_DESTROY: + do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) ); + break; + + case VG_USERREQ__PTHREAD_CANCEL: + do_pthread_cancel( tid, (pthread_t)(arg[1]) ); + break; + + case VG_USERREQ__MAKE_NOACCESS: + case VG_USERREQ__MAKE_WRITABLE: + case VG_USERREQ__MAKE_READABLE: + case VG_USERREQ__DISCARD: + case VG_USERREQ__CHECK_WRITABLE: + case VG_USERREQ__CHECK_READABLE: + case VG_USERREQ__MAKE_NOACCESS_STACK: + case VG_USERREQ__RUNNING_ON_VALGRIND: + case VG_USERREQ__DO_LEAK_CHECK: + vg_threads[tid].m_edx = VG_(handle_client_request) ( arg ); + break; + + default: + VG_(printf)("panic'd on private request = 0x%x\n", arg[0] ); + VG_(panic)("handle_private_client_pthread_request: " + "unknown request"); + /*NOTREACHED*/ + break; + } +} + + +/*--------------------------------------------------------------------*/ +/*--- end vg_scheduler.c ---*/ +/*--------------------------------------------------------------------*/ diff --git a/vg_libpthread.c b/vg_libpthread.c new file mode 100644 index 0000000000..e8d60430be --- /dev/null +++ b/vg_libpthread.c @@ -0,0 +1,411 @@ + +/* This is a replacement for the standard libpthread.so. It is loaded + as part of the client's image (if required) and directs pthread + calls through to Valgrind's request mechanism. + + A couple of caveats. + + 1. Since it's a binary-compatible replacement for an existing library, + we must take care to used exactly the same data layouts, etc, as + the standard pthread.so does. + + 2. Since this runs as part of the client, there are no specific + restrictions on what headers etc we can include, so long as + this libpthread.so does not end up having dependencies on .so's + which the real one doesn't. + + Later ... it appears we cannot call file-related stuff in libc here, + perhaps fair enough. Be careful what you call from here. Even exit() + doesn't work (gives infinite recursion and then stack overflow); hence + myexit(). Also fprintf doesn't seem safe. +*/ + +#include "valgrind.h" /* For the request-passing mechanism */ +#include "vg_include.h" /* For the VG_USERREQ__* constants */ + +#include +#include +#include + + +/* --------------------------------------------------------------------- + Helpers. We have to be pretty self-sufficient. + ------------------------------------------------------------------ */ + +static +void myexit ( int arg ) +{ + int __res; + __asm__ volatile ("movl %%ecx, %%ebx ; int $0x80" + : "=a" (__res) + : "0" (__NR_exit), + "c" (arg) ); + /* We don't bother to mention the fact that this asm trashes %ebx, + since it won't return. If you ever do let it return ... fix + this! */ +} + + +/* Give up without using printf etc, since they seem to give + segfaults. */ +static +void ensure_valgrind ( char* caller ) +{ + char* str; + int is_valgrind = RUNNING_ON_VALGRIND; + if (!is_valgrind) { + str = "\nvalgrind-ed process: vg_libpthread.so: " + "pthread call when\n"; + write(2, str, strlen(str)); + str = "not running on valgrind; aborting! " + "This is probably a bug in\n"; + write(2, str, strlen(str)); + str = "valgrind. Please report it to me at: " + "jseward@acm.org. Thanks.\n"; + write(2, str, strlen(str)); + str = "unexpectedly called function is: "; + write(2, str, strlen(str)); + write(2, caller, strlen(caller)); + str = "\n\n"; + write(2, str, strlen(str)); + myexit(1); + } +} + + +static +void barf ( char* str ) +{ + char buf[100]; + buf[0] = 0; + strcat(buf, "\nvg_libpthread.so: "); + strcat(buf, str); + strcat(buf, "\n\n"); + write(2, buf, strlen(buf)); + myexit(1); +} + + + +/* --------------------------------------------------------------------- + Pass pthread_ calls to Valgrind's request mechanism. + ------------------------------------------------------------------ */ + +int +pthread_create (pthread_t *__restrict __thread, + __const pthread_attr_t *__restrict __attr, + void *(*__start_routine) (void *), + void *__restrict __arg) +{ + int res; + ensure_valgrind("pthread_create"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_CREATE, + __thread, __attr, __start_routine, __arg); + return res; +} + + + +int +pthread_join (pthread_t __th, void **__thread_return) +{ + int res; + ensure_valgrind("pthread_join"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_JOIN, + __th, __thread_return, 0, 0); + return res; +} + + +/* What are these? Anybody know? I don't. */ + +void _pthread_cleanup_push_defer ( void ) +{ + // char* str = "_pthread_cleanup_push_defer\n"; + // write(2, str, strlen(str)); +} + +void _pthread_cleanup_pop_restore ( void ) +{ + // char* str = "_pthread_cleanup_pop_restore\n"; + // write(2, str, strlen(str)); +} + + +static int thread_specific_errno[VG_N_THREADS]; + +int* __errno_location ( void ) +{ + int tid; + ensure_valgrind("__errno_location"); + VALGRIND_MAGIC_SEQUENCE(tid, 0 /* default */, + VG_USERREQ__PTHREAD_GET_THREADID, + 0, 0, 0, 0); + /* 'cos I'm paranoid ... */ + if (tid < 0 || tid >= VG_N_THREADS) + barf("__errno_location: invalid ThreadId"); + return & thread_specific_errno[tid]; +} + + +int pthread_mutexattr_init(pthread_mutexattr_t *attr) +{ + char* str = "pthread_mutexattr_init\n"; + write(2, str, strlen(str)); + return 0; +} + +int pthread_mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *mutexattr) +{ + int res; + char* str = "pthread_mutex_init\n"; + write(2, str, strlen(str)); + ensure_valgrind("pthread_mutex_init"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_INIT, + mutex, mutexattr, 0, 0); + return res; +} + +int pthread_mutexattr_destroy(pthread_mutexattr_t *attr) +{ + char* str = "pthread_mutexattr_destroy\n"; + write(2, str, strlen(str)); + return 0; +} + +int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_lock-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_LOCK, + mutex, 0, 0, 0); + return res; + } +} + +int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_unlock-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_UNLOCK, + mutex, 0, 0, 0); + return res; + } +} + +pthread_t pthread_self(void) +{ + int tid; + ensure_valgrind("pthread_self"); + VALGRIND_MAGIC_SEQUENCE(tid, 0 /* default */, + VG_USERREQ__PTHREAD_GET_THREADID, + 0, 0, 0, 0); + if (tid < 0 || tid >= VG_N_THREADS) + barf("pthread_self: invalid ThreadId"); + return tid; +} + +int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + int res; + if (!(RUNNING_ON_VALGRIND)) { + char* str = "pthread_mutex_destroy-NOT-INSIDE-VALGRIND\n"; + write(2, str, strlen(str)); + return 0; + } else { + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_MUTEX_DESTROY, + mutex, 0, 0, 0); + } + return res; +} + + +int pthread_setcanceltype(int type, int *oldtype) +{ + char* str = "pthread_setcanceltype\n"; + write(2, str, strlen(str)); + return 0; +} + + +int pthread_cancel(pthread_t thread) +{ + int res; + ensure_valgrind("pthread_cancel"); + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_CANCEL, + thread, 0, 0, 0); + return res; +} + + +/* --------------------------------------------------------------------- + These are here (I think) because they are deemed cancellation + points by POSIX. For the moment we'll simply pass the call along + to the corresponding thread-unaware (?) libc routine. + ------------------------------------------------------------------ */ + +#include +#include +#include +#include +#include +#include + +extern +int __libc_sigaction + (int signum, + const struct sigaction *act, + struct sigaction *oldact); +int sigaction(int signum, + const struct sigaction *act, + struct sigaction *oldact) +{ + // char* str = "sigaction\n"; + // write(2, str, strlen(str)); + return __libc_sigaction(signum, act, oldact); +} + + +extern +int __libc_connect(int sockfd, + const struct sockaddr *serv_addr, + socklen_t addrlen); +int connect(int sockfd, + const struct sockaddr *serv_addr, + socklen_t addrlen) +{ + // char* str = "connect\n"; + // write(2, str, strlen(str)); + return __libc_connect(sockfd, serv_addr, addrlen); +} + + +extern +int __libc_fcntl(int fd, int cmd, long arg); +int fcntl(int fd, int cmd, long arg) +{ + // char* str = "fcntl\n"; + // write(2, str, strlen(str)); + return __libc_fcntl(fd, cmd, arg); +} + + +extern +ssize_t __libc_write(int fd, const void *buf, size_t count); +ssize_t write(int fd, const void *buf, size_t count) +{ + // char* str = "write\n"; + // write(2, str, strlen(str)); + return __libc_write(fd, buf, count); +} + + +extern +ssize_t __libc_read(int fd, void *buf, size_t count); +ssize_t read(int fd, void *buf, size_t count) +{ + // char* str = "read\n"; + // write(2, str, strlen(str)); + return __libc_read(fd, buf, count); +} + + +extern +int __libc_open(const char *pathname, int flags); +int open(const char *pathname, int flags) +{ + // char* str = "open\n"; + // write(2, str, strlen(str)); + return __libc_open(pathname, flags); +} + + +extern +int __libc_close(int fd); +int close(int fd) +{ + // char* str = "open\n"; + // write(2, str, strlen(str)); + return __libc_close(fd); +} + + +extern +int __libc_accept(int s, struct sockaddr *addr, socklen_t *addrlen); +int accept(int s, struct sockaddr *addr, socklen_t *addrlen) +{ + // char* str = "accept\n"; + // write(2, str, strlen(str)); + return __libc_accept(s, addr, addrlen); +} + + +extern +pid_t __libc_fork(void); +pid_t fork(void) +{ + // char* str = "fork\n"; + // write(2, str, strlen(str)); + return __libc_fork(); +} + + +extern +pid_t __libc_waitpid(pid_t pid, int *status, int options); +pid_t waitpid(pid_t pid, int *status, int options) +{ + // char* str = "waitpid\n"; + // write(2, str, strlen(str)); + return __libc_waitpid(pid, status, options); +} + + +extern +int __libc_nanosleep(const struct timespec *req, struct timespec *rem); +int nanosleep(const struct timespec *req, struct timespec *rem) +{ + return __libc_nanosleep(req, rem); +} + +extern +int __libc_fsync(int fd); +int fsync(int fd) +{ + return __libc_fsync(fd); +} + +/* I've no idea what these are, but they get called quite a lot. + Anybody know? */ + +#undef _IO_flockfile +void _IO_flockfile ( _IO_FILE * file ) +{ + // char* str = "_IO_flockfile\n"; + // write(2, str, strlen(str)); + // barf("_IO_flockfile"); +} + +#undef _IO_funlockfile +void _IO_funlockfile ( _IO_FILE * file ) +{ + // char* str = "_IO_funlockfile\n"; + // write(2, str, strlen(str)); + //barf("_IO_funlockfile"); +} + diff --git a/vg_scheduler.c b/vg_scheduler.c new file mode 100644 index 0000000000..c1a26ad2c9 --- /dev/null +++ b/vg_scheduler.c @@ -0,0 +1,1723 @@ + +/*--------------------------------------------------------------------*/ +/*--- A user-space pthreads implementation. vg_scheduler.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, an x86 protected-mode emulator + designed for debugging and profiling binaries on x86-Unixes. + + Copyright (C) 2000-2002 Julian Seward + jseward@acm.org + Julian_Seward@muraroa.demon.co.uk + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file LICENSE. +*/ + +#include "vg_include.h" +#include "vg_constants.h" + +#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and + VG_USERREQ__DO_LEAK_CHECK */ + +/* BORKAGE as of 11 Apr 02 + +Note! This implementation is so poor as to not be suitable for use by +anyone at all! + +- properly save scheduler private state in signal delivery frames. + +- fd-poll optimisation (don't select with empty sets) + +- signals interrupting read/write and nanosleep, and take notice + of SA_RESTART or not + +- return bogus RA: %EAX trashed, so pthread_joiner gets nonsense + exit codes + +- when a thread is done mark its stack as noaccess */ + + +/* --------------------------------------------------------------------- + Types and globals for the scheduler. + ------------------------------------------------------------------ */ + +/* type ThreadId is defined in vg_include.h. */ + +/* struct ThreadState is defined in vg_include.h. */ + +/* Private globals. A statically allocated array of threads. */ +static ThreadState vg_threads[VG_N_THREADS]; + + +/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */ +jmp_buf VG_(scheduler_jmpbuf); +/* ... and if so, here's the signal which caused it to do so. */ +Int VG_(longjmpd_on_signal); + + +/* Machinery to keep track of which threads are waiting on which + fds. */ +typedef + struct { + /* The thread which made the request. */ + ThreadId tid; + + /* The next two fields describe the request. */ + /* File descriptor waited for. -1 means this slot is not in use */ + Int fd; + /* The syscall number the fd is used in. */ + Int syscall_no; + + /* False => still waiting for select to tell us the fd is ready + to go. True => the fd is ready, but the results have not yet + been delivered back to the calling thread. Once the latter + happens, this entire record is marked as no longer in use, by + making the fd field be -1. */ + Bool ready; + } + VgWaitedOnFd; + +static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS]; + + + +typedef + struct { + /* Is this slot in use, or free? */ + Bool in_use; + /* If in_use, is this mutex held by some thread, or not? */ + Bool held; + /* if held==True, owner indicates who by. */ + ThreadId owner; + } + VgMutex; + +static VgMutex vg_mutexes[VG_N_MUTEXES]; + +/* Forwards */ +static void do_nontrivial_clientreq ( ThreadId tid ); + + +/* --------------------------------------------------------------------- + Helper functions for the scheduler. + ------------------------------------------------------------------ */ + +static +void pp_sched_status ( void ) +{ + Int i; + VG_(printf)("\nsched status:\n"); + for (i = 0; i < VG_N_THREADS; i++) { + if (vg_threads[i].status == VgTs_Empty) continue; + VG_(printf)("tid %d: ", i); + switch (vg_threads[i].status) { + case VgTs_Runnable: VG_(printf)("Runnable\n"); break; + case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break; + case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n", + vg_threads[i].joiner); break; + case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break; + default: VG_(printf)("???"); break; + } + } + VG_(printf)("\n"); +} + +static +void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no ) +{ + Int i; + + vg_assert(fd != -1); /* avoid total chaos */ + + for (i = 0; i < VG_N_WAITING_FDS; i++) + if (vg_waiting_fds[i].fd == -1) + break; + + if (i == VG_N_WAITING_FDS) + VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low"); + /* + VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n", + tid, fd, i); + */ + vg_waiting_fds[i].fd = fd; + vg_waiting_fds[i].tid = tid; + vg_waiting_fds[i].ready = False; + vg_waiting_fds[i].syscall_no = syscall_no; +} + + + +static +void print_sched_event ( ThreadId tid, Char* what ) +{ + if (1) + VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what ); +} + + +static +Char* name_of_sched_event ( UInt event ) +{ + switch (event) { + case VG_TRC_EBP_JMP_SPECIAL: return "JMP_SPECIAL"; + case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL"; + case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ"; + case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO"; + case VG_TRC_INNER_FASTMISS: return "FASTMISS"; + case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL"; + default: return "??UNKNOWN??"; + } +} + + +/* Create a translation of the client basic block beginning at + orig_addr, and add it to the translation cache & translation table. + This probably doesn't really belong here, but, hey ... +*/ +void VG_(create_translation_for) ( Addr orig_addr ) +{ + Addr trans_addr; + TTEntry tte; + Int orig_size, trans_size; + /* Ensure there is space to hold a translation. */ + VG_(maybe_do_lru_pass)(); + VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size ); + /* Copy data at trans_addr into the translation cache. + Returned pointer is to the code, not to the 4-byte + header. */ + /* Since the .orig_size and .trans_size fields are + UShort, be paranoid. */ + vg_assert(orig_size > 0 && orig_size < 65536); + vg_assert(trans_size > 0 && trans_size < 65536); + tte.orig_size = orig_size; + tte.orig_addr = orig_addr; + tte.trans_size = trans_size; + tte.trans_addr = VG_(copy_to_transcache) + ( trans_addr, trans_size ); + tte.mru_epoch = VG_(current_epoch); + /* Free the intermediary -- was allocated by VG_(emit_code). */ + VG_(jitfree)( (void*)trans_addr ); + /* Add to trans tab and set back pointer. */ + VG_(add_to_trans_tab) ( &tte ); + /* Update stats. */ + VG_(this_epoch_in_count) ++; + VG_(this_epoch_in_osize) += orig_size; + VG_(this_epoch_in_tsize) += trans_size; + VG_(overall_in_count) ++; + VG_(overall_in_osize) += orig_size; + VG_(overall_in_tsize) += trans_size; + /* Record translated area for SMC detection. */ + VG_(smc_mark_original) ( orig_addr, orig_size ); +} + + +/* Allocate a completely empty ThreadState record. */ +static +ThreadId vg_alloc_ThreadState ( void ) +{ + Int i; + for (i = 0; i < VG_N_THREADS; i++) { + if (vg_threads[i].status == VgTs_Empty) + return i; + } + VG_(printf)("vg_alloc_ThreadState: no free slots available\n"); + VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n"); + VG_(panic)("VG_N_THREADS is too low"); + /*NOTREACHED*/ +} + + +ThreadState* VG_(get_thread_state) ( ThreadId tid ) +{ + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status != VgTs_Empty); + return & vg_threads[tid]; +} + + +/* Find an unused VgMutex record. */ +static +MutexId vg_alloc_VgMutex ( void ) +{ + Int i; + for (i = 0; i < VG_N_MUTEXES; i++) { + if (!vg_mutexes[i].in_use) + return i; + } + VG_(printf)("vg_alloc_VgMutex: no free slots available\n"); + VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n"); + VG_(panic)("VG_N_MUTEXES is too low"); + /*NOTREACHED*/ +} + + +/* Copy the saved state of a thread into VG_(baseBlock), ready for it + to be run. */ +__inline__ +void VG_(load_thread_state) ( ThreadId tid ) +{ + Int i; + VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax; + VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx; + VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx; + VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx; + VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi; + VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi; + VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp; + VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp; + VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags; + VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip; + + for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) + VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i]; + + VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax; + VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx; + VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx; + VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx; + VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi; + VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi; + VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp; + VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp; + VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags; +} + + +/* Copy the state of a thread from VG_(baseBlock), presumably after it + has been descheduled. For sanity-check purposes, fill the vacated + VG_(baseBlock) with garbage so as to make the system more likely to + fail quickly if we erroneously continue to poke around inside + VG_(baseBlock) without first doing a load_thread_state(). +*/ +__inline__ +void VG_(save_thread_state) ( ThreadId tid ) +{ + Int i; + const UInt junk = 0xDEADBEEF; + + vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)]; + vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)]; + vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)]; + vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)]; + vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)]; + vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)]; + vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)]; + vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)]; + vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)]; + vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)]; + + for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) + vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i]; + + vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)]; + vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)]; + vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)]; + vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)]; + vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)]; + vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)]; + vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)]; + vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)]; + vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)]; + + /* Fill it up with junk. */ + VG_(baseBlock)[VGOFF_(m_eax)] = junk; + VG_(baseBlock)[VGOFF_(m_ebx)] = junk; + VG_(baseBlock)[VGOFF_(m_ecx)] = junk; + VG_(baseBlock)[VGOFF_(m_edx)] = junk; + VG_(baseBlock)[VGOFF_(m_esi)] = junk; + VG_(baseBlock)[VGOFF_(m_edi)] = junk; + VG_(baseBlock)[VGOFF_(m_ebp)] = junk; + VG_(baseBlock)[VGOFF_(m_esp)] = junk; + VG_(baseBlock)[VGOFF_(m_eflags)] = junk; + VG_(baseBlock)[VGOFF_(m_eip)] = junk; + + for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) + VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk; +} + + +/* Run the thread tid for a while, and return a VG_TRC_* value to the + scheduler indicating what happened. */ +static +UInt run_thread_for_a_while ( ThreadId tid ) +{ + UInt trc = 0; + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status != VgTs_Empty); + vg_assert(VG_(bbs_to_go) > 0); + + VG_(load_thread_state) ( tid ); + if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) { + /* try this ... */ + trc = VG_(run_innerloop)(); + /* We get here if the client didn't take a fault. */ + } else { + /* We get here if the client took a fault, which caused our + signal handler to longjmp. */ + vg_assert(trc == 0); + trc = VG_TRC_UNRESUMABLE_SIGNAL; + } + VG_(save_thread_state) ( tid ); + return trc; +} + + +/* Increment the LRU epoch counter. */ +static +void increment_epoch ( void ) +{ + VG_(current_epoch)++; + if (VG_(clo_verbosity) > 2) { + UInt tt_used, tc_used; + VG_(get_tt_tc_used) ( &tt_used, &tc_used ); + VG_(message)(Vg_UserMsg, + "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d", + VG_(bbs_done), + VG_(this_epoch_in_count), + VG_(this_epoch_in_osize), + VG_(this_epoch_in_tsize), + VG_(this_epoch_out_count), + VG_(this_epoch_out_osize), + VG_(this_epoch_out_tsize), + tt_used, tc_used + ); + } + VG_(this_epoch_in_count) = 0; + VG_(this_epoch_in_osize) = 0; + VG_(this_epoch_in_tsize) = 0; + VG_(this_epoch_out_count) = 0; + VG_(this_epoch_out_osize) = 0; + VG_(this_epoch_out_tsize) = 0; +} + + +/* Initialise the scheduler. Create a single "main" thread ready to + run, with special ThreadId of zero. This is called at startup; the + caller takes care to park the client's state is parked in + VG_(baseBlock). +*/ +void VG_(scheduler_init) ( void ) +{ + Int i; + Addr startup_esp; + ThreadId tid_main; + + startup_esp = VG_(baseBlock)[VGOFF_(m_esp)]; + if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) { + VG_(printf)("%esp at startup = %p is not near %p; aborting\n", + startup_esp, VG_STARTUP_STACK_MASK); + VG_(panic)("unexpected %esp at startup"); + } + + for (i = 0; i < VG_N_THREADS; i++) { + vg_threads[i].stack_size = 0; + vg_threads[i].stack_base = (Addr)NULL; + } + + for (i = 0; i < VG_N_WAITING_FDS; i++) + vg_waiting_fds[i].fd = -1; /* not in use */ + + for (i = 0; i < VG_N_MUTEXES; i++) + vg_mutexes[i].in_use = False; + + /* Assert this is thread zero, which has certain magic + properties. */ + tid_main = vg_alloc_ThreadState(); + vg_assert(tid_main == 0); + + vg_threads[tid_main].status = VgTs_Runnable; + vg_threads[tid_main].joiner = VG_INVALID_THREADID; + vg_threads[tid_main].retval = NULL; /* not important */ + + /* Copy VG_(baseBlock) state to tid_main's slot. */ + VG_(save_thread_state) ( tid_main ); +} + + +/* What if fd isn't a valid fd? */ +static +void set_fd_nonblocking ( Int fd ) +{ + Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); + vg_assert(!VG_(is_kerror)(res)); + res |= VKI_O_NONBLOCK; + res = VG_(fcntl)( fd, VKI_F_SETFL, res ); + vg_assert(!VG_(is_kerror)(res)); +} + +static +void set_fd_blocking ( Int fd ) +{ + Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); + vg_assert(!VG_(is_kerror)(res)); + res &= ~VKI_O_NONBLOCK; + res = VG_(fcntl)( fd, VKI_F_SETFL, res ); + vg_assert(!VG_(is_kerror)(res)); +} + +static +Bool fd_is_blockful ( Int fd ) +{ + Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); + vg_assert(!VG_(is_kerror)(res)); + return (res & VKI_O_NONBLOCK) ? False : True; +} + + + +/* Do a purely thread-local request for tid, and put the result in its + %EDX, without changing its scheduling state in any way, nor that of + any other threads. Return True if so. + + If the request is non-trivial, return False; a more capable but + slower mechanism will deal with it. +*/ +static +Bool maybe_do_trivial_clientreq ( ThreadId tid ) +{ +# define SIMPLE_RETURN(vvv) \ + { vg_threads[tid].m_edx = (vvv); \ + return True; \ + } + + UInt* arg = (UInt*)(vg_threads[tid].m_eax); + UInt req_no = arg[0]; + switch (req_no) { + case VG_USERREQ__MALLOC: + SIMPLE_RETURN( + (UInt)VG_(client_malloc) ( arg[1], Vg_AllocMalloc ) + ); + case VG_USERREQ__BUILTIN_NEW: + SIMPLE_RETURN( + (UInt)VG_(client_malloc) ( arg[1], Vg_AllocNew ) + ); + case VG_USERREQ__BUILTIN_VEC_NEW: + SIMPLE_RETURN( + (UInt)VG_(client_malloc) ( arg[1], Vg_AllocNewVec ) + ); + case VG_USERREQ__FREE: + VG_(client_free) ( (void*)arg[1], Vg_AllocMalloc ); + SIMPLE_RETURN(0); /* irrelevant */ + case VG_USERREQ__BUILTIN_DELETE: + VG_(client_free) ( (void*)arg[1], Vg_AllocNew ); + SIMPLE_RETURN(0); /* irrelevant */ + case VG_USERREQ__BUILTIN_VEC_DELETE: + VG_(client_free) ( (void*)arg[1], Vg_AllocNewVec ); + SIMPLE_RETURN(0); /* irrelevant */ + case VG_USERREQ__CALLOC: + SIMPLE_RETURN( + (UInt)VG_(client_calloc) ( arg[1], arg[2] ) + ); + case VG_USERREQ__REALLOC: + SIMPLE_RETURN( + (UInt)VG_(client_realloc) ( (void*)arg[1], arg[2] ) + ); + case VG_USERREQ__MEMALIGN: + SIMPLE_RETURN( + (UInt)VG_(client_memalign) ( arg[1], arg[2] ) + ); + default: + /* Too hard; wimp out. */ + return False; + } +# undef SIMPLE_RETURN +} + + +static +void sched_do_syscall ( ThreadId tid ) +{ + UInt saved_eax; + UInt res, syscall_no; + UInt fd; + Bool might_block, assumed_nonblocking; + Bool orig_fd_blockness; + Char msg_buf[100]; + + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + syscall_no = vg_threads[tid].m_eax; /* syscall number */ + + if (syscall_no == __NR_nanosleep) { + ULong t_now, t_awaken; + struct vki_timespec* req; + req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */ + t_now = VG_(read_microsecond_timer)(); + t_awaken + = t_now + + (ULong)1000000ULL * (ULong)(req->tv_sec) + + (ULong)( (UInt)(req->tv_nsec) / 1000 ); + vg_threads[tid].status = VgTs_Sleeping; + vg_threads[tid].awaken_at = t_awaken; + if (1) { + VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu", + t_now, t_awaken-t_now); + print_sched_event(tid, msg_buf); + } + /* Force the scheduler to run something else for a while. */ + return; + } + + switch (syscall_no) { + case __NR_read: + case __NR_write: + assumed_nonblocking + = False; + might_block + = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */); + break; + default: + might_block = False; + assumed_nonblocking = True; + } + + if (assumed_nonblocking) { + /* We think it's non-blocking. Just do it in the normal way. */ + VG_(perform_assumed_nonblocking_syscall)(tid); + /* The thread is still runnable. */ + return; + } + + /* It might block. Take evasive action. */ + switch (syscall_no) { + case __NR_read: + case __NR_write: + fd = vg_threads[tid].m_ebx; break; + default: + vg_assert(3+3 == 7); + } + + /* Set the fd to nonblocking, and do the syscall, which will return + immediately, in order to lodge a request with the Linux kernel. + We later poll for I/O completion using select(). */ + + orig_fd_blockness = fd_is_blockful(fd); + set_fd_nonblocking(fd); + vg_assert(!fd_is_blockful(fd)); + VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */); + + /* This trashes the thread's %eax; we have to preserve it. */ + saved_eax = vg_threads[tid].m_eax; + KERNEL_DO_SYSCALL(tid,res); + + /* Restore original blockfulness of the fd. */ + if (orig_fd_blockness) + set_fd_blocking(fd); + else + set_fd_nonblocking(fd); + + if (res != -VKI_EWOULDBLOCK) { + /* It didn't block; it went through immediately. So finish off + in the normal way. Don't restore %EAX, since that now + (correctly) holds the result of the call. */ + VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); + /* We're still runnable. */ + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + } else { + + /* It would have blocked. First, restore %EAX to what it was + before our speculative call. */ + vg_threads[tid].m_eax = saved_eax; + /* Put this fd in a table of fds on which we are waiting for + completion. The arguments for select() later are constructed + from this table. */ + add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */); + /* Deschedule thread until an I/O completion happens. */ + vg_threads[tid].status = VgTs_WaitFD; + if (1) { + VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd); + print_sched_event(tid, msg_buf); + } + + } +} + + +/* Find out which of the fds in vg_waiting_fds are now ready to go, by + making enquiries with select(), and mark them as ready. We have to + wait for the requesting threads to fall into the the WaitFD state + before we can actually finally deliver the results, so this + procedure doesn't do that; complete_blocked_syscalls() does it. + + It might seem odd that a thread which has done a blocking syscall + is not in WaitFD state; the way this can happen is if it initially + becomes WaitFD, but then a signal is delivered to it, so it becomes + Runnable for a while. In this case we have to wait for the + sighandler to return, whereupon the WaitFD state is resumed, and + only at that point can the I/O result be delivered to it. However, + this point may be long after the fd is actually ready. + + So, poll_for_ready_fds() merely detects fds which are ready. + complete_blocked_syscalls() does the second half of the trick, + possibly much later: it delivers the results from ready fds to + threads in WaitFD state. +*/ +void poll_for_ready_fds ( void ) +{ + vki_ksigset_t saved_procmask; + vki_fd_set readfds; + vki_fd_set writefds; + vki_fd_set exceptfds; + struct vki_timeval timeout; + Int fd, fd_max, i, n_ready, syscall_no, n_ok; + ThreadId tid; + Bool rd_ok, wr_ok, ex_ok; + Char msg_buf[100]; + + /* Awaken any sleeping threads whose sleep has expired. */ + { + struct vki_timespec * rem; + ULong t_now = VG_(read_microsecond_timer)(); + for (tid = 0; tid < VG_N_THREADS; tid++) { + if (vg_threads[tid].status != VgTs_Sleeping) + continue; + if (t_now >= vg_threads[tid].awaken_at) { + /* Resume this thread. Set to zero the remaining-time (second) + arg of nanosleep, since it's used up all its time. */ + vg_assert(vg_threads[tid].m_eax == __NR_nanosleep); + rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */ + if (rem != NULL) { + rem->tv_sec = 0; + rem->tv_nsec = 0; + } + /* Make the syscall return 0 (success). */ + vg_threads[tid].m_eax = 0; + /* Reschedule this thread. */ + vg_threads[tid].status = VgTs_Runnable; + if (1) { + VG_(sprintf)(msg_buf, "at %lu: nanosleep done", + t_now); + print_sched_event(tid, msg_buf); + } + } + } + } + + timeout.tv_sec = 0; + timeout.tv_usec = 0; + + VKI_FD_ZERO(&readfds); + VKI_FD_ZERO(&writefds); + VKI_FD_ZERO(&exceptfds); + fd_max = -1; + for (i = 0; i < VG_N_WAITING_FDS; i++) { + if (vg_waiting_fds[i].fd == -1 /* not in use */) + continue; + if (vg_waiting_fds[i].ready /* already ready? */) + continue; + fd = vg_waiting_fds[i].fd; + /* VG_(printf)("adding QUERY for fd %d\n", fd); */ + if (fd > fd_max) + fd_max = fd; + tid = vg_waiting_fds[i].tid; + vg_assert(tid >= 0 && tid < VG_N_THREADS); + syscall_no = vg_waiting_fds[i].syscall_no; + switch (syscall_no) { + case __NR_read: + VKI_FD_SET(fd, &readfds); break; + case __NR_write: + VKI_FD_SET(fd, &writefds); break; + default: + VG_(panic)("poll_for_ready_fds: unexpected syscall"); + /*NOTREACHED*/ + break; + } + } + + /* BLOCK ALL SIGNALS. We don't want the complication of select() + getting interrupted. */ + VG_(block_all_host_signals)( &saved_procmask ); + + n_ready = VG_(select) + ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout); + if (VG_(is_kerror)(n_ready)) { + VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready); + VG_(panic)("poll_for_ready_fds: select failed?!"); + /*NOTREACHED*/ + } + + /* UNBLOCK ALL SIGNALS */ + VG_(restore_host_signals)( &saved_procmask ); + + /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */ + + if (n_ready == 0) + return; + + /* Inspect all the fds we know about, and handle any completions that + have happened. */ + /* + VG_(printf)("\n\n"); + for (fd = 0; fd < 100; fd++) + if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) { + VG_(printf)("X"); } else { VG_(printf)("."); }; + VG_(printf)("\n\nfd_max = %d\n", fd_max); + */ + + for (fd = 0; fd <= fd_max; fd++) { + rd_ok = VKI_FD_ISSET(fd, &readfds); + wr_ok = VKI_FD_ISSET(fd, &writefds); + ex_ok = VKI_FD_ISSET(fd, &exceptfds); + + n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0); + if (n_ok == 0) + continue; + if (n_ok > 1) { + VG_(printf)("offending fd = %d\n", fd); + VG_(panic)("poll_for_ready_fds: multiple events on fd"); + } + + /* An I/O event completed for fd. Find the thread which + requested this. */ + for (i = 0; i < VG_N_WAITING_FDS; i++) { + if (vg_waiting_fds[i].fd == -1 /* not in use */) + continue; + if (vg_waiting_fds[i].fd == fd) + break; + } + + /* And a bit more paranoia ... */ + vg_assert(i >= 0 && i < VG_N_WAITING_FDS); + + /* Mark the fd as ready. */ + vg_assert(! vg_waiting_fds[i].ready); + vg_waiting_fds[i].ready = True; + } +} + + +/* See comment attached to poll_for_ready_fds() for explaination. */ +void complete_blocked_syscalls ( void ) +{ + Int fd, i, res, syscall_no; + ThreadId tid; + Char msg_buf[100]; + + /* Inspect all the outstanding fds we know about. */ + + for (i = 0; i < VG_N_WAITING_FDS; i++) { + if (vg_waiting_fds[i].fd == -1 /* not in use */) + continue; + if (! vg_waiting_fds[i].ready) + continue; + + fd = vg_waiting_fds[i].fd; + tid = vg_waiting_fds[i].tid; + vg_assert(tid >= 0 && tid < VG_N_THREADS); + + /* The thread actually has to be waiting for the I/O event it + requested before we can deliver the result! */ + if (vg_threads[tid].status != VgTs_WaitFD) + continue; + + /* Ok, actually do it! We can safely use %EAX as the syscall + number, because the speculative call made by + sched_do_syscall() doesn't change %EAX in the case where the + call would have blocked. */ + + syscall_no = vg_waiting_fds[i].syscall_no; + vg_assert(syscall_no == vg_threads[tid].m_eax); + KERNEL_DO_SYSCALL(tid,res); + VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); + + /* Reschedule. */ + vg_threads[tid].status = VgTs_Runnable; + /* Mark slot as no longer in use. */ + vg_waiting_fds[i].fd = -1; + /* pp_sched_status(); */ + if (1) { + VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd); + print_sched_event(tid, msg_buf); + } + } +} + + +static +void nanosleep_for_a_while ( void ) +{ + Int res; + struct vki_timespec req; + struct vki_timespec rem; + req.tv_sec = 0; + req.tv_nsec = 20 * 1000 * 1000; + res = VG_(nanosleep)( &req, &rem ); + /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */ + vg_assert(res == 0); +} + + +/* --------------------------------------------------------------------- + The scheduler proper. + ------------------------------------------------------------------ */ + +/* Run user-space threads until either + * Deadlock occurs + * One thread asks to shutdown Valgrind + * The specified number of basic blocks has gone by. +*/ +VgSchedReturnCode VG_(scheduler) ( void ) +{ + ThreadId tid, tid_next; + UInt trc; + UInt dispatch_ctr_SAVED; + Int done_this_time, n_in_fdwait; + Char msg_buf[100]; + Addr trans_addr; + + /* For the LRU structures, records when the epoch began. */ + ULong lru_epoch_started_at = 0; + + /* Start with the root thread. tid in general indicates the + currently runnable/just-finished-running thread. */ + tid = 0; + + /* This is the top level scheduler loop. It falls into three + phases. */ + while (True) { + + /* ======================= Phase 1 of 3 ======================= + Handle I/O completions and signals. This may change the + status of various threads. Then select a new thread to run, + or declare deadlock, or sleep if there are no runnable + threads but some are blocked on I/O. */ + + /* Age the LRU structures if an epoch has been completed. */ + if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) { + lru_epoch_started_at = VG_(bbs_done); + increment_epoch(); + } + + /* Was a debug-stop requested? */ + if (VG_(bbs_to_go) == 0) + goto debug_stop; + + /* Do the following loop until a runnable thread is found, or + deadlock is detected. */ + while (True) { + + /* For stats purposes only. */ + VG_(num_scheduling_events_MAJOR) ++; + + /* See if any I/O operations which we were waiting for have + completed, and, if so, make runnable the relevant waiting + threads. */ + poll_for_ready_fds(); + complete_blocked_syscalls(); + + /* See if there are any signals which need to be delivered. If + so, choose thread(s) to deliver them to, and build signal + delivery frames on those thread(s) stacks. */ + VG_(deliver_signals)( 0 /*HACK*/ ); + VG_(do_sanity_checks)(0 /*HACK*/, False); + + /* Try and find a thread (tid) to run. */ + tid_next = tid; + n_in_fdwait = 0; + while (True) { + tid_next++; + if (tid_next >= VG_N_THREADS) tid_next = 0; + if (vg_threads[tid_next].status == VgTs_WaitFD) + n_in_fdwait ++; + if (vg_threads[tid_next].status == VgTs_Runnable) + break; /* We can run this one. */ + if (tid_next == tid) + break; /* been all the way round */ + } + tid = tid_next; + + if (vg_threads[tid].status == VgTs_Runnable) { + /* Found a suitable candidate. Fall out of this loop, so + we can advance to stage 2 of the scheduler: actually + running the thread. */ + break; + } + + /* We didn't find a runnable thread. Now what? */ + if (n_in_fdwait == 0) { + /* No runnable threads and non in fd-wait either. Not + good. */ + pp_sched_status(); + return VgSrc_Deadlock; + } + + /* At least one thread is in a fd-wait state. Delay for a + while, and go round again, in the hope that eventually a + thread becomes runnable. */ + nanosleep_for_a_while(); + // pp_sched_status(); + // VG_(printf)(".\n"); + } + + + /* ======================= Phase 2 of 3 ======================= + Wahey! We've finally decided that thread tid is runnable, so + we now do that. Run it for as much of a quanta as possible. + Trivial requests are handled and the thread continues. The + aim is not to do too many of Phase 1 since it is expensive. */ + + if (0) + VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS); + + /* Figure out how many bbs to ask vg_run_innerloop to do. Note + that it decrements the counter before testing it for zero, so + that if VG_(dispatch_ctr) is set to N you get at most N-1 + iterations. Also this means that VG_(dispatch_ctr) must + exceed zero before entering the innerloop. Also also, the + decrement is done before the bb is actually run, so you + always get at least one decrement even if nothing happens. + */ + if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM) + VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1; + else + VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1; + + /* ... and remember what we asked for. */ + dispatch_ctr_SAVED = VG_(dispatch_ctr); + + /* Actually run thread tid. */ + while (True) { + + /* For stats purposes only. */ + VG_(num_scheduling_events_MINOR) ++; + + if (0) + VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs", + tid, VG_(dispatch_ctr) - 1 ); + + trc = run_thread_for_a_while ( tid ); + + /* Deal quickly with trivial scheduling events, and resume the + thread. */ + + if (trc == VG_TRC_INNER_FASTMISS) { + vg_assert(VG_(dispatch_ctr) > 0); + + /* Trivial event. Miss in the fast-cache. Do a full + lookup for it. */ + trans_addr + = VG_(search_transtab) ( vg_threads[tid].m_eip ); + if (trans_addr == (Addr)0) { + /* Not found; we need to request a translation. */ + VG_(create_translation_for)( vg_threads[tid].m_eip ); + trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip ); + if (trans_addr == (Addr)0) + VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry"); + } + continue; /* with this thread */ + } + + if (trc == VG_TRC_EBP_JMP_CLIENTREQ) { + Bool is_triv = maybe_do_trivial_clientreq(tid); + if (is_triv) { + /* NOTE: a trivial request is something like a call to + malloc() or free(). It DOES NOT change the + Runnability of this thread nor the status of any + other thread; it is purely thread-local. */ + continue; /* with this thread */ + } + } + + /* It's a non-trivial event. Give up running this thread and + handle things the expensive way. */ + break; + } + + /* ======================= Phase 3 of 3 ======================= + Handle non-trivial thread requests, mostly pthread stuff. */ + + /* Ok, we've fallen out of the dispatcher for a + non-completely-trivial reason. First, update basic-block + counters. */ + + done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1; + vg_assert(done_this_time >= 0); + VG_(bbs_to_go) -= (ULong)done_this_time; + VG_(bbs_done) += (ULong)done_this_time; + + if (0 && trc != VG_TRC_INNER_FASTMISS) + VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d", + tid, done_this_time, (Int)trc ); + + if (0 && trc != VG_TRC_INNER_FASTMISS) + VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s", + tid, VG_(bbs_done), + name_of_sched_event(trc) ); + + /* Examine the thread's return code to figure out why it + stopped, and handle requests. */ + + switch (trc) { + + case VG_TRC_INNER_FASTMISS: + VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS"); + /*NOTREACHED*/ + break; + + case VG_TRC_INNER_COUNTERZERO: + /* Timeslice is out. Let a new thread be scheduled, + simply by doing nothing, causing us to arrive back at + Phase 1. */ + if (VG_(bbs_to_go) == 0) { + goto debug_stop; + } + vg_assert(VG_(dispatch_ctr) == 0); + break; + + case VG_TRC_UNRESUMABLE_SIGNAL: + /* It got a SIGSEGV/SIGBUS, which we need to deliver right + away. Again, do nothing, so we wind up back at Phase + 1, whereupon the signal will be "delivered". */ + break; + + case VG_TRC_EBP_JMP_SPECIAL: { + Addr next_eip = vg_threads[tid].m_eip; + if (next_eip == (Addr) & VG_(signalreturn_bogusRA)) { + /* vthread tid is returning from a signal handler; + modify its stack/regs accordingly. */ + VG_(signal_returns)(tid); + } + else + if (next_eip == (Addr) & VG_(shutdown)) { + return VgSrc_Shutdown; + } else { + VG_(panic)("vg_schedule: VG_TRC_EBP_JMP_SPECIAL"); + } + break; + } + + case VG_TRC_EBP_JMP_SYSCALL: + /* Do a syscall for the vthread tid. This could cause it + to become non-runnable. */ + sched_do_syscall(tid); + break; + + case VG_TRC_EBP_JMP_CLIENTREQ: + /* Do a client request for the vthread tid. Note that + some requests will have been handled by + maybe_do_trivial_clientreq(), so we don't expect to see + those here. + */ + if (0) { + VG_(sprintf)(msg_buf, "request 0x%x", + vg_threads[tid].m_eax); + print_sched_event(tid, msg_buf); + } + /* Do a non-trivial client request for thread tid. tid's + %EAX points to a short vector of argument words, the + first of which is the request code. The result of the + request is put in tid's %EDX. Alternatively, perhaps + the request causes tid to become non-runnable and/or + other blocked threads become runnable. In general we + can and often do mess with the state of arbitrary + threads at this point. */ + do_nontrivial_clientreq(tid); + break; + + default: + VG_(printf)("\ntrc = %d\n", trc); + VG_(panic)("VG_(scheduler), phase 3: " + "unexpected thread return code"); + /* NOTREACHED */ + break; + + } /* switch (trc) */ + + /* That completes Phase 3 of 3. Return now to the top of the + main scheduler loop, to Phase 1 of 3. */ + + } /* top-level scheduler loop */ + + + /* NOTREACHED */ + VG_(panic)("scheduler: post-main-loop ?!"); + /* NOTREACHED */ + + debug_stop: + /* If we exited because of a debug stop, print the translation + of the last block executed -- by translating it again, and + throwing away the result. */ + VG_(printf)( + "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n"); + VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL ); + VG_(printf)("\n"); + VG_(printf)( + "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n"); + + return VgSrc_BbsDone; +} + + +/* --------------------------------------------------------------------- + The pthread implementation. + ------------------------------------------------------------------ */ + +#include +#include + +#if !defined(PTHREAD_STACK_MIN) +# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB) +#endif + +/* /usr/include/bits/pthreadtypes.h: + typedef unsigned long int pthread_t; +*/ + +/* RUNS ON SIMD CPU! + This is the return address that pthread_create uses. +*/ +static +void do_pthread_create_bogusRA ( void ) +{ + /* Tell the scheduler that this thread has returned. */ + Int res; + VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, + VG_USERREQ__PTHREAD_CREATE_BOGUSRA, + 0, 0, 0, 0); + VG_(panic)("do_pthread_create_bogusRA: shouldn't be still alive!"); +} + + +static +void do_pthread_cancel ( ThreadId tid_canceller, + pthread_t tid_cancellee ) +{ + Char msg_buf[100]; + /* We want make is appear that this thread has returned to + do_pthread_create_bogusRA with PTHREAD_CANCELED as the + return value. So: simple: put PTHREAD_CANCELED into %EAX + and &do_pthread_create_bogusRA into %EIP and keep going! */ + if (1) { + VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller); + print_sched_event(tid_cancellee, msg_buf); + } + vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED; + vg_threads[tid_cancellee].m_eip = (UInt)&do_pthread_create_bogusRA; + vg_threads[tid_cancellee].status = VgTs_Runnable; +} + + + +/* Thread tid is exiting, by returning from the function it was + created with. The main complication here is to resume any thread + waiting to join with this one. */ +static +void do_pthread_create_exit_by_returning ( ThreadId tid ) +{ + ThreadId jnr; /* joiner, the thread calling pthread_join. */ + UInt* jnr_args; + void** jnr_thread_return; + Char msg_buf[100]; + + /* Mark it as not in use. Leave the stack in place so the next + user of this slot doesn't reallocate it. */ + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status != VgTs_Empty); + + vg_threads[tid].retval = (void*)vg_threads[tid].m_eax; + + if (vg_threads[tid].joiner == VG_INVALID_THREADID) { + /* No one has yet done a join on me */ + vg_threads[tid].status = VgTs_WaitJoiner; + if (1) { + VG_(sprintf)(msg_buf, + "root fn returns, waiting for a call pthread_join(%d)", + tid); + print_sched_event(tid, msg_buf); + } + } else { + /* Some is waiting; make their join call return with success, + putting my exit code in the place specified by the caller's + thread_return param. This is all very horrible, since we + need to consult the joiner's arg block -- pointed to by its + %EAX -- in order to extract the 2nd param of its pthread_join + call. TODO: free properly the slot (also below). + */ + jnr = vg_threads[tid].joiner; + vg_assert(jnr >= 0 && jnr < VG_N_THREADS); + vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee); + jnr_args = (UInt*)vg_threads[jnr].m_eax; + jnr_thread_return = (void**)(jnr_args[2]); + if (jnr_thread_return != NULL) + *jnr_thread_return = vg_threads[tid].retval; + vg_threads[jnr].m_edx = 0; /* success */ + vg_threads[jnr].status = VgTs_Runnable; + vg_threads[tid].status = VgTs_Empty; /* bye! */ + if (1) { + VG_(sprintf)(msg_buf, + "root fn returns, to find a waiting pthread_join(%d)", tid); + print_sched_event(tid, msg_buf); + VG_(sprintf)(msg_buf, + "my pthread_join(%d) returned; resuming", tid); + print_sched_event(jnr, msg_buf); + } + } + + /* Return value is irrelevant; this thread will not get + rescheduled. */ +} + + +static +void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return ) +{ + Char msg_buf[100]; + + /* jee, the joinee, is the thread specified as an arg in thread + tid's call to pthread_join. So tid is the join-er. */ + vg_assert(tid >= 0 && tid < VG_N_THREADS); + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + if (jee == tid) { + vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */ + vg_threads[tid].status = VgTs_Runnable; + return; + } + + if (jee < 0 + || jee >= VG_N_THREADS + || vg_threads[jee].status == VgTs_Empty) { + /* Invalid thread to join to. */ + vg_threads[tid].m_edx = EINVAL; + vg_threads[tid].status = VgTs_Runnable; + return; + } + + if (vg_threads[jee].joiner != VG_INVALID_THREADID) { + /* Someone already did join on this thread */ + vg_threads[tid].m_edx = EINVAL; + vg_threads[tid].status = VgTs_Runnable; + return; + } + + /* if (vg_threads[jee].detached) ... */ + + /* Perhaps the joinee has already finished? If so return + immediately with its return code, and free up the slot. TODO: + free it properly (also above). */ + if (vg_threads[jee].status == VgTs_WaitJoiner) { + vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID); + vg_threads[tid].m_edx = 0; /* success */ + if (thread_return != NULL) + *thread_return = vg_threads[jee].retval; + vg_threads[tid].status = VgTs_Runnable; + vg_threads[jee].status = VgTs_Empty; /* bye! */ + if (1) { + VG_(sprintf)(msg_buf, + "someone called pthread_join() on me; bye!"); + print_sched_event(jee, msg_buf); + VG_(sprintf)(msg_buf, + "my pthread_join(%d) returned immediately", + jee ); + print_sched_event(tid, msg_buf); + } + return; + } + + /* Ok, so we'll have to wait on jee. */ + vg_threads[jee].joiner = tid; + vg_threads[tid].status = VgTs_WaitJoinee; + if (1) { + VG_(sprintf)(msg_buf, + "blocking on call of pthread_join(%d)", jee ); + print_sched_event(tid, msg_buf); + } + /* So tid's join call does not return just now. */ +} + + +static +void do_pthread_create ( ThreadId parent_tid, + pthread_t* thread, + pthread_attr_t* attr, + void* (*start_routine)(void *), + void* arg ) +{ + Addr new_stack; + UInt new_stk_szb; + ThreadId tid; + Char msg_buf[100]; + + /* Paranoia ... */ + vg_assert(sizeof(pthread_t) == sizeof(UInt)); + + vg_assert(vg_threads[parent_tid].status != VgTs_Empty); + + tid = vg_alloc_ThreadState(); + + /* If we've created the main thread's tid, we're in deep trouble :) */ + vg_assert(tid != 0); + + /* Copy the parent's CPU state into the child's, in a roundabout + way (via baseBlock). */ + VG_(load_thread_state)(parent_tid); + VG_(save_thread_state)(tid); + + /* Consider allocating the child a stack, if the one it already has + is inadequate. */ + new_stk_szb = PTHREAD_STACK_MIN; + + if (new_stk_szb > vg_threads[tid].stack_size) { + /* Again, for good measure :) We definitely don't want to be + allocating a stack for the main thread. */ + vg_assert(tid != 0); + /* for now, we don't handle the case of anything other than + assigning it for the first time. */ + vg_assert(vg_threads[tid].stack_size == 0); + vg_assert(vg_threads[tid].stack_base == (Addr)NULL); + new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb ); + vg_threads[tid].stack_base = new_stack; + vg_threads[tid].stack_size = new_stk_szb; + vg_threads[tid].m_esp + = new_stack + new_stk_szb + - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; + } + if (VG_(clo_instrument)) + VGM_(make_noaccess)( vg_threads[tid].m_esp, + VG_AR_CLIENT_STACKBASE_REDZONE_SZB ); + + /* push arg */ + vg_threads[tid].m_esp -= 4; + * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg; + + /* push (magical) return address */ + vg_threads[tid].m_esp -= 4; + * (UInt*)(vg_threads[tid].m_esp) = (UInt)do_pthread_create_bogusRA; + + if (VG_(clo_instrument)) + VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 ); + + /* this is where we start */ + vg_threads[tid].m_eip = (UInt)start_routine; + + if (1) { + VG_(sprintf)(msg_buf, + "new thread, created by %d", parent_tid ); + print_sched_event(tid, msg_buf); + } + + /* store the thread id in *thread. */ + // if (VG_(clo_instrument)) + // ***** CHECK *thread is writable + *thread = (pthread_t)tid; + + /* return zero */ + vg_threads[tid].joiner = VG_INVALID_THREADID; + vg_threads[tid].status = VgTs_Runnable; + vg_threads[tid].m_edx = 0; /* success */ +} + + +/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t + is a struct with at least 5 words: + typedef struct + { + int __m_reserved; -- Reserved for future use + int __m_count; -- Depth of recursive locking + _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck) + int __m_kind; -- Mutex kind: fast, recursive or errcheck + struct _pthread_fastlock __m_lock; -- Underlying fast lock + } pthread_mutex_t; + Ours is just a single word, an index into vg_mutexes[]. + For now I'll park it in the __m_reserved field. + + Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have + a zero __m_count field (see /usr/include/pthread.h). So I'll + use zero to mean non-inited, and 1 to mean inited. + + How convenient. +*/ + +static +void initialise_mutex ( pthread_mutex_t *mutex ) +{ + MutexId mid; + /* vg_alloc_MutexId aborts if we can't allocate a mutex, for + whatever reason. */ +VG_(printf)("initialise_mutex %p\n", mutex); + mid = vg_alloc_VgMutex(); + vg_mutexes[mid].in_use = True; + vg_mutexes[mid].held = False; + vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */ + mutex->__m_reserved = mid; + mutex->__m_count = 1; /* initialised */ +} + +/* Allocate a new MutexId and write it into *mutex. Ideally take + notice of the attributes in *mutexattr. */ +static +void do_pthread_mutex_init ( ThreadId tid, + pthread_mutex_t *mutex, + const pthread_mutexattr_t *mutexattr) +{ + /* Paranoia ... */ +VG_(printf)("mutex_init %d %p %p\n", tid, mutex, mutexattr); + + vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt)); + + initialise_mutex(mutex); + /* + RETURN VALUE + pthread_mutex_init always returns 0. The other mutex functions + return 0 on success and a non-zero error code on error. + */ + /* THIS THREAD returns with 0. */ + vg_threads[tid].m_edx = 0; +} + + +static +void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex ) +{ + MutexId mid; + Char msg_buf[100]; + +VG_(printf)("mutex_lock %d %p\n", tid, mutex); + + /* *mutex contains the MutexId, or one of the magic values + PTHREAD_*MUTEX_INITIALIZER*, indicating we need to initialise it + now. See comment(s) above re use of __m_count to indicated + initialisation status. + */ + + /* POSIX doesn't mandate this, but for sanity ... */ + if (mutex == NULL) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + if (mutex->__m_count == 0) { + initialise_mutex(mutex); + } + + mid = mutex->__m_reserved; + if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + /* Assert initialised. */ + vg_assert(mutex->__m_count == 1); + + /* Assume tid valid. */ + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + if (vg_mutexes[mid].held) { + if (vg_mutexes[mid].owner == tid) { + vg_threads[tid].m_edx = EDEADLK; + return; + } + /* Someone else has it; we have to wait. */ + vg_threads[tid].status = VgTs_WaitMX; + vg_threads[tid].waited_on_mid = mid; + /* No assignment to %EDX, since we're blocking. */ + if (1) { + VG_(sprintf)(msg_buf, "wait for mutex %d", mid ); + print_sched_event(tid, msg_buf); + } + } else { + /* We get it! */ + vg_mutexes[mid].held = True; + vg_mutexes[mid].owner = tid; + /* return 0 (success). */ + vg_threads[tid].m_edx = 0; + } +} + + +static +void do_pthread_mutex_unlock ( ThreadId tid, + pthread_mutex_t *mutex ) +{ + MutexId mid; + Int i; + Char msg_buf[100]; + +VG_(printf)("mutex_unlock %d %p\n", tid, mutex); + + if (mutex == NULL + || mutex->__m_count != 1) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + mid = mutex->__m_reserved; + if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + /* Assume tid valid */ + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + /* Barf if we don't currently hold the mutex. */ + if (!vg_mutexes[mid].held || vg_mutexes[mid].owner != tid) { + vg_threads[tid].m_edx = EPERM; + return; + } + + /* Find some arbitrary thread waiting on this mutex, and make it + runnable. If none are waiting, mark the mutex as not held. */ + for (i = 0; i < VG_N_THREADS; i++) { + if (vg_threads[i].status == VgTs_Empty) + continue; + if (vg_threads[i].status == VgTs_WaitMX + && vg_threads[i].waited_on_mid == mid) + break; + } + + vg_assert(i <= VG_N_THREADS); + if (i == VG_N_THREADS) { + /* Nobody else is waiting on it. */ + vg_mutexes[mid].held = False; + } else { + /* Notionally transfer the hold to thread i, whose + pthread_mutex_lock() call now returns with 0 (success). */ + vg_mutexes[mid].owner = i; + vg_threads[i].status = VgTs_Runnable; + vg_threads[i].m_edx = 0; /* pth_lock() success */ + if (1) { + VG_(sprintf)(msg_buf, "acquire mutex %d, resume", mid ); + print_sched_event(tid, msg_buf); + } + } + + /* In either case, our (tid's) pth_unlock() returns with 0 + (success). */ + vg_threads[tid].m_edx = 0; /* Success. */ +} + + +static void do_pthread_mutex_destroy ( ThreadId tid, + pthread_mutex_t *mutex ) +{ + MutexId mid; + +VG_(printf)("mutex_destroy %d %p\n", tid, mutex); + + if (mutex == NULL + || mutex->__m_count != 1) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + mid = mutex->__m_reserved; + if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) { + vg_threads[tid].m_edx = EINVAL; + return; + } + + /* Assume tid valid */ + vg_assert(vg_threads[tid].status == VgTs_Runnable); + + /* Barf if the mutex is currently held. */ + if (vg_mutexes[mid].held) { + vg_threads[tid].m_edx = EBUSY; + return; + } + + mutex->__m_count = 0; /* uninitialised */ + vg_mutexes[mid].in_use = False; + vg_threads[tid].m_edx = 0; +} + + +/* --------------------------------------------------------------------- + Handle non-trivial client requests. + ------------------------------------------------------------------ */ + +static +void do_nontrivial_clientreq ( ThreadId tid ) +{ + UInt* arg = (UInt*)(vg_threads[tid].m_eax); + UInt req_no = arg[0]; + switch (req_no) { + + case VG_USERREQ__PTHREAD_CREATE: + do_pthread_create( tid, + (pthread_t*)arg[1], + (pthread_attr_t*)arg[2], + (void*(*)(void*))arg[3], + (void*)arg[4] ); + break; + + case VG_USERREQ__PTHREAD_CREATE_BOGUSRA: + do_pthread_create_exit_by_returning( tid ); + break; + + case VG_USERREQ__PTHREAD_JOIN: + do_pthread_join( tid, arg[1], (void**)(arg[2]) ); + break; + + /* Sigh ... this probably will cause huge numbers of major + (expensive) scheduling events, for no real reason. + Perhaps should be classified as a trivial-request. */ + case VG_USERREQ__PTHREAD_GET_THREADID: + vg_threads[tid].m_edx = tid; + break; + + case VG_USERREQ__PTHREAD_MUTEX_INIT: + do_pthread_mutex_init( tid, + (pthread_mutex_t *)(arg[1]), + (pthread_mutexattr_t *)(arg[2]) ); + break; + + case VG_USERREQ__PTHREAD_MUTEX_LOCK: + do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) ); + break; + + case VG_USERREQ__PTHREAD_MUTEX_UNLOCK: + do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) ); + break; + + case VG_USERREQ__PTHREAD_MUTEX_DESTROY: + do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) ); + break; + + case VG_USERREQ__PTHREAD_CANCEL: + do_pthread_cancel( tid, (pthread_t)(arg[1]) ); + break; + + case VG_USERREQ__MAKE_NOACCESS: + case VG_USERREQ__MAKE_WRITABLE: + case VG_USERREQ__MAKE_READABLE: + case VG_USERREQ__DISCARD: + case VG_USERREQ__CHECK_WRITABLE: + case VG_USERREQ__CHECK_READABLE: + case VG_USERREQ__MAKE_NOACCESS_STACK: + case VG_USERREQ__RUNNING_ON_VALGRIND: + case VG_USERREQ__DO_LEAK_CHECK: + vg_threads[tid].m_edx = VG_(handle_client_request) ( arg ); + break; + + default: + VG_(printf)("panic'd on private request = 0x%x\n", arg[0] ); + VG_(panic)("handle_private_client_pthread_request: " + "unknown request"); + /*NOTREACHED*/ + break; + } +} + + +/*--------------------------------------------------------------------*/ +/*--- end vg_scheduler.c ---*/ +/*--------------------------------------------------------------------*/ -- 2.47.3