From: Dave Hart Date: Sat, 27 Aug 2011 05:24:07 +0000 (+0000) Subject: [Bug 2000] ntpd worker threads must block signals expected in main thread. X-Git-Tag: NTP_4_2_7P209~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fb893471c631039cc43bd76ba5214fee5f0c315f;p=thirdparty%2Fntp.git [Bug 2000] ntpd worker threads must block signals expected in main thread. bk: 4e587f77s8Thg5sdevuqgmAE5LyNkw --- diff --git a/ChangeLog b/ChangeLog index 653d52195..57c867431 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,5 @@ +* [Bug 2000] ntpd worker threads must block signals expected in main + thread. * [Bug 2001] add ntpq -c timerstats like ntpdc -c timerstats. (4.2.7p208) 2011/08/24 Released by Harlan Stenn * Fix the CLOCK_MONOTONIC TRACE() message. diff --git a/include/ntpd.h b/include/ntpd.h index 4ea22bdba..dfd74bdcc 100644 --- a/include/ntpd.h +++ b/include/ntpd.h @@ -296,6 +296,20 @@ extern char * fstostr(time_t); /* NTP timescale seconds */ /* ntpd.c */ extern void parse_cmdline_opts(int *, char ***); +/* + * Signals we catch for debugging. + */ +#define MOREDEBUGSIG SIGUSR1 +#define LESSDEBUGSIG SIGUSR2 +/* + * Signals which terminate us gracefully. + */ +#ifndef SYS_WINNT +# define SIGDIE1 SIGHUP +# define SIGDIE3 SIGQUIT +# define SIGDIE2 SIGINT +# define SIGDIE4 SIGTERM +#endif /* SYS_WINNT */ /* diff --git a/libntp/syssignal.c b/libntp/syssignal.c index 447595194..3f1848135 100644 --- a/libntp/syssignal.c +++ b/libntp/syssignal.c @@ -29,8 +29,8 @@ signal_no_reset( struct sigaction ovec; ZERO(vec); - vec.sa_handler = func; sigemptyset(&vec.sa_mask); + vec.sa_handler = func; /* Added for PPS clocks on Solaris 7 which get EINTR errors */ # ifdef SIGPOLL diff --git a/libntp/work_thread.c b/libntp/work_thread.c index ea471c26f..d020c12c2 100644 --- a/libntp/work_thread.c +++ b/libntp/work_thread.c @@ -8,6 +8,7 @@ #include #include +#include #ifndef SYS_WINNT #include #endif @@ -62,6 +63,9 @@ u_int WINAPI blocking_thread(void *); #else void * blocking_thread(void *); #endif +#ifndef SYS_WINNT +static void block_thread_signals(sigset_t *); +#endif void @@ -407,6 +411,7 @@ start_blocking_thread_internal( int is_pipe; int flags; size_t stacksize; + sigset_t saved_sig_mask; # ifdef NEED_PTHREAD_INIT /* @@ -463,9 +468,11 @@ start_blocking_thread_internal( pthread_attr_setscope(&thr_attr, PTHREAD_SCOPE_SYSTEM); #endif c->thread_ref = emalloc_zero(sizeof(*c->thread_ref)); + block_thread_signals(&saved_sig_mask); rc = pthread_create(c->thread_ref, &thr_attr, &blocking_thread, c); saved_errno = errno; + pthread_sigmask(SIG_SETMASK, &saved_sig_mask, NULL); pthread_attr_destroy(&thr_attr); if (0 != rc) { errno = saved_errno; @@ -475,6 +482,56 @@ start_blocking_thread_internal( } #endif + +/* + * block_thread_signals() + * + * Temporarily block signals used by ntpd main thread, so that signal + * mask inherited by child threads leaves them blocked. Returns prior + * active signal mask via pmask, to be restored by the main thread + * after pthread_create(). + */ +#ifndef SYS_WINNT +void +block_thread_signals( + sigset_t * pmask + ) +{ + sigset_t block; + + sigemptyset(&block); +# ifdef HAVE_SIGNALED_IO +# ifdef SIGIO + sigaddset(&block, SIGIO); +# endif /* SIGIO */ +# ifdef SIGIO + sigaddset(&block, SIGPOLL); +# endif /* SIGIO */ +# endif /* HAVE_SIGNALED_IO */ + sigaddset(&block, SIGALRM); + sigaddset(&block, MOREDEBUGSIG); + sigaddset(&block, LESSDEBUGSIG); +# ifdef SIGDIE1 + sigaddset(&block, SIGDIE1); +# endif +# ifdef SIGDIE2 + sigaddset(&block, SIGDIE2); +# endif +# ifdef SIGDIE3 + sigaddset(&block, SIGDIE3); +# endif +# ifdef SIGDIE4 + sigaddset(&block, SIGDIE4); +# endif +# ifdef SIGBUS + sigaddset(&block, SIGBUS); +# endif + sigemptyset(pmask); + pthread_sigmask(SIG_BLOCK, &block, pmask); +} +#endif /* !SYS_WINNT */ + + /* * prepare_child_sems() * diff --git a/ntpd/ntp_timer.c b/ntpd/ntp_timer.c index 485d3778a..ece53dd04 100644 --- a/ntpd/ntp_timer.c +++ b/ntpd/ntp_timer.c @@ -105,14 +105,47 @@ static RETSIGTYPE alarming (int); #if !defined(VMS) # if !defined SYS_WINNT || defined(SYS_CYGWIN32) # ifdef HAVE_TIMER_CREATE - static timer_t timer_id; - struct itimerspec itimer; +static timer_t timer_id; +typedef struct itimerspec intervaltimer; +# define itv_frac tv_nsec # else - struct itimerval itimer; +typedef struct itimerval intervaltimer; +# define itv_frac tv_usec # endif +intervaltimer itimer; # endif #endif +#if !defined(SYS_WINNT) && !defined(VMS) +void set_timer_or_die(const intervaltimer *); +#endif + + +#if !defined(SYS_WINNT) && !defined(VMS) +void +set_timer_or_die( + const intervaltimer * ptimer + ) +{ + const char * setfunc; + int rc; + +# ifdef HAVE_TIMER_CREATE + setfunc = "timer_settime"; + rc = timer_settime(timer_id, 0, &itimer, NULL); +# else + setfunc = "setitimer"; + rc = setitimer(ITIMER_REAL, &itimer, NULL); +# endif + if (-1 == rc) { + msyslog(LOG_ERR, "interval timer %s failed, %m\n", + setfunc); + exit(1); + } +} +#endif /* !SYS_WINNT && !VMS */ + + /* * reinit_timer - reinitialize interval timer after a clock step. */ @@ -120,40 +153,27 @@ void reinit_timer(void) { #if !defined(SYS_WINNT) && !defined(VMS) -# ifdef HAVE_TIMER_CREATE + ZERO(itimer); +# ifdef HAVE_TIMER_CREATE timer_gettime(timer_id, &itimer); - if (itimer.it_value.tv_sec < 0 || itimer.it_value.tv_sec > (1< (1< (1 << EVENT_TIMEOUT)) + itimer.it_value.tv_sec = (1 << EVENT_TIMEOUT); + if (itimer.it_value.itv_frac < 0) + itimer.it_value.itv_frac = 0; + if (0 == itimer.it_value.tv_sec && + 0 == itimer.it_value.itv_frac) + itimer.it_value.tv_sec = (1 << EVENT_TIMEOUT); + itimer.it_interval.tv_sec = (1 << EVENT_TIMEOUT); + itimer.it_interval.itv_frac = 0; + set_timer_or_die(&itimer); # endif /* VMS */ } + /* * init_timer - initialize the timer data structures */ @@ -163,7 +183,7 @@ init_timer(void) /* * Initialize... */ - alarm_flag = 0; + alarm_flag = FALSE; alarm_overflow = 0; adjust_timer = 1; stats_timer = HOUR; @@ -182,20 +202,16 @@ init_timer(void) */ # ifndef VMS # ifdef HAVE_TIMER_CREATE - if (timer_create(CLOCK_REALTIME, NULL, &timer_id) == TC_ERR) { - fprintf (stderr, "timer create FAILED\n"); - exit (0); + if (TC_ERR == timer_create(CLOCK_REALTIME, NULL, &timer_id)) { + msyslog(LOG_ERR, "timer_create failed, %m\n"); + exit(1); } - signal_no_reset(SIGALRM, alarming); - itimer.it_interval.tv_sec = itimer.it_value.tv_sec = (1<p_link; - if (peer->flags & FLAG_REFCLOCK) - refclock_timer(peer); + for (p = peer_list; p != NULL; p = next_peer) { + next_peer = p->p_link; + if (FLAG_REFCLOCK & p->flags) + refclock_timer(p); } #endif /* REFCLOCK */ } @@ -285,11 +307,10 @@ timer(void) * careful here, since the peer structure might go away as the * result of the call. */ - for (peer = peer_list; peer != NULL; peer = next_peer) { - next_peer = peer->p_link; - if (peer->action != NULL && peer->nextaction <= - current_time) - (*peer->action)(peer); + for (p = peer_list; p != NULL; p = next_peer) { + next_peer = p->p_link; + if (p->action != NULL && p->nextaction <= current_time) + (*p->action)(p); /* * Restrain the non-burst packet rate not more @@ -297,17 +318,15 @@ timer(void) * usually tripped using iburst and minpoll of * 128 s or less. */ - if (peer->throttle > 0) - peer->throttle--; - if (peer->nextdate <= current_time) { + if (p->throttle > 0) + p->throttle--; + if (p->nextdate <= current_time) { #ifdef REFCLOCK - if (peer->flags & FLAG_REFCLOCK) - refclock_transmit(peer); + if (FLAG_REFCLOCK & p->flags) + refclock_transmit(p); else - transmit(peer); -#else /* REFCLOCK */ - transmit(peer); -#endif /* REFCLOCK */ +#endif /* REFCLOCK */ + transmit(p); } } @@ -430,24 +449,40 @@ alarming( int sig ) { -#if !defined(VMS) - if (initializing) - return; - if (alarm_flag) - alarm_overflow++; - else - alarm_flag++; -#else /* VMS AST routine */ +#ifdef DEBUG + const char *msg = "alarming: initializing TRUE\n"; +#endif + if (!initializing) { - if (alarm_flag) alarm_overflow++; - else alarm_flag = 1; /* increment is no good */ + if (alarm_flag) { + alarm_overflow++; +#ifdef DEBUG + msg = "alarming: overflow\n"; +#endif + } else { +# ifndef VMS + alarm_flag++; +# else + /* VMS AST routine, increment is no good */ + alarm_flag = 1; +# endif +#ifdef DEBUG + msg = "alarming: normal\n"; +#endif + } } - lib$addx(&vmsinc,&vmstimer,&vmstimer); - sys$setimr(0,&vmstimer,alarming,alarming,0); -#endif /* VMS */ +# ifdef VMS + lib$addx(&vmsinc, &vmstimer, &vmstimer); + sys$setimr(0, &vmstimer, alarming, alarming, 0); +# endif +#ifdef DEBUG + if (debug) + write(2, msg, strlen(msg)); +#endif } #endif /* SYS_WINNT */ + void timer_interfacetimeout(u_long timeout) { diff --git a/ntpd/ntpd.c b/ntpd/ntpd.c index 4b3c1d48c..42cbf2477 100644 --- a/ntpd/ntpd.c +++ b/ntpd/ntpd.c @@ -457,6 +457,7 @@ ntpdmain( struct rlimit rl; # endif # endif + # ifdef HAVE_UMASK uv = umask(0); if (uv) @@ -464,7 +465,6 @@ ntpdmain( else umask(022); # endif - progname = argv[0]; initializing = TRUE; /* mark that we are initializing */ parse_cmdline_opts(&argc, &argv); @@ -953,7 +953,7 @@ getgroup: cap_free(caps); } # endif /* HAVE_LINUX_CAPABILITIES */ - root_dropped = 1; + root_dropped = TRUE; fork_deferred_worker(); } /* if (droproot) */ # endif /* HAVE_DROPROOT */ @@ -966,7 +966,8 @@ getgroup: * between checking for alarms and doing the select(). * Mostly harmless, I think. */ - /* On VMS, I suspect that select() can't be interrupted + /* + * On VMS, I suspect that select() can't be interrupted * by a "signal" either, so I take the easy way out and * have select() time out after one second. * System clock updates really aren't time-critical, @@ -980,74 +981,73 @@ getgroup: # else /* normal I/O */ BLOCK_IO_AND_ALARM(); - was_alarmed = 0; - for (;;) - { + was_alarmed = FALSE; + for (;;) { # ifndef HAVE_SIGNALED_IO fd_set rdfdes; int nfound; # endif - if (alarm_flag) /* alarmed? */ - { - was_alarmed = 1; - alarm_flag = 0; + if (alarm_flag) { /* alarmed? */ + was_alarmed = TRUE; + alarm_flag = FALSE; } - if (!was_alarmed && has_full_recv_buffer() == ISC_FALSE) - { + if (!was_alarmed && !has_full_recv_buffer()) { /* * Nothing to do. Wait for something. */ # ifndef HAVE_SIGNALED_IO rdfdes = activefds; -# if defined(VMS) || defined(SYS_VXWORKS) +# if !defined(VMS) && !defined(SYS_VXWORKS) + nfound = select(maxactivefd + 1, &rdfdes, NULL, + NULL, NULL); +# else /* VMS, VxWorks */ /* make select() wake up after one second */ { struct timeval t1; - t1.tv_sec = 1; t1.tv_usec = 0; - nfound = select(maxactivefd+1, &rdfdes, (fd_set *)0, - (fd_set *)0, &t1); + t1.tv_sec = 1; + t1.tv_usec = 0; + nfound = select(maxactivefd + 1, + &rdfdes, NULL, NULL, + &t1); } -# else - nfound = select(maxactivefd+1, &rdfdes, (fd_set *)0, - (fd_set *)0, (struct timeval *)0); -# endif /* VMS */ - if (nfound > 0) - { +# endif /* VMS, VxWorks */ + if (nfound > 0) { l_fp ts; get_systime(&ts); - (void)input_handler(&ts); - } - else if (nfound == -1 && errno != EINTR) + input_handler(&ts); + } else if (nfound == -1 && errno != EINTR) { msyslog(LOG_ERR, "select() error: %m"); + } # ifdef DEBUG - else if (debug > 5) + else if (debug > 4) { msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound); + } else { + DPRINTF(1, ("select() returned %d: %m\n", nfound)); + } # endif /* DEBUG */ # else /* HAVE_SIGNALED_IO */ wait_for_signal(); # endif /* HAVE_SIGNALED_IO */ - if (alarm_flag) /* alarmed? */ - { - was_alarmed = 1; - alarm_flag = 0; + if (alarm_flag) { /* alarmed? */ + was_alarmed = TRUE; + alarm_flag = FALSE; } } - if (was_alarmed) - { + if (was_alarmed) { UNBLOCK_IO_AND_ALARM(); /* * Out here, signals are unblocked. Call timer routine * to process expiry. */ timer(); - was_alarmed = 0; + was_alarmed = FALSE; BLOCK_IO_AND_ALARM(); } @@ -1063,27 +1063,24 @@ getgroup: tsa = pts; # endif rbuf = get_full_recv_buffer(); - while (rbuf != NULL) - { - if (alarm_flag) - { - was_alarmed = 1; - alarm_flag = 0; + while (rbuf != NULL) { + if (alarm_flag) { + was_alarmed = TRUE; + alarm_flag = FALSE; } UNBLOCK_IO_AND_ALARM(); - if (was_alarmed) - { /* avoid timer starvation during lengthy I/O handling */ + if (was_alarmed) { + /* avoid timer starvation during lengthy I/O handling */ timer(); - was_alarmed = 0; + was_alarmed = FALSE; } /* * Call the data procedure to handle each received * packet. */ - if (rbuf->receiver != NULL) /* This should always be true */ - { + if (rbuf->receiver != NULL) { # ifdef DEBUG_TIMING l_fp dts = pts; @@ -1092,9 +1089,9 @@ getgroup: collect_timing(rbuf, "buffer processing delay", 1, &dts); bufcount++; # endif - (rbuf->receiver)(rbuf); + (*rbuf->receiver)(rbuf); } else { - msyslog(LOG_ERR, "receive buffer corruption - receiver found to be NULL - ABORTING"); + msyslog(LOG_ERR, "fatal: receive buffer callback NULL"); abort(); }