From: Thibault Godouet Date: Thu, 9 Jun 2016 14:59:20 +0000 (+0100) Subject: Merge branch 'master' into suspend X-Git-Tag: ver3_3_0~7^2~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1beddcbf06453a1ac1cba15344cf092aafe301a9;p=thirdparty%2Ffcron.git Merge branch 'master' into suspend --- 1beddcbf06453a1ac1cba15344cf092aafe301a9 diff --cc conf.c index 6ea88ea,06e11ca..b9877ba --- a/conf.c +++ b/conf.c @@@ -839,16 -853,173 +853,16 @@@ add_line_to_file(cl_t * cl, cf_t * cf, Set(cl->cl_mailto, cl->cl_file->cf_user); } - /* job has been stopped during execution: insert it in lavg or serial queue - * if it was in one at fcron's stops. */ - /* NOTE: runatreboot is prioritary over jobs that were still running - * when fcron stops, because the former will get run quicker as they are not - * put into the serial queue. runatreboot jobs will be handled later on. */ - if (cl->cl_numexe > 0 && !is_runatreboot(cl->cl_option)) { - - cl->cl_numexe = 0; - if (is_lavg(cl->cl_option)) { - if (!is_strict(cl->cl_option)) - add_lavg_job(cl, -1); - } - else if (is_serial(cl->cl_option) - || is_serial_once(cl->cl_option)) - add_serial_job(cl, -1); - else { - /* job has been stopped during execution : - * launch it again */ - warn("job '%s' did not finish : running it again.", cl->cl_shell); - set_serial_once(cl->cl_option); - add_serial_job(cl, -1); - } - } - - if (is_system_startup || is_volatile(cl->cl_option)) { - clear_hasrun(cl->cl_option); - } - - if (is_runonce(cl->cl_option) && is_hasrun(cl->cl_option)) { - /* if we get here, then is_system_startup is_volatile are both false */ - /* do nothing: don't re-schedule or add to the job queue */ - explain("job '%s' with runonce set has already run since last " - "system startup: not re-scheduling.", cl->cl_shell); + /* make sure the timefreq is valid on @-lines or we could end up with + * infinite loops */ - if (!is_td(cl->cl_option) && cl->cl_timefreq < 10) { ++ if (!is_td(cl->cl_option) && cl->cl_timefreq < 1) { + error("Invalid timefreq %ld for job '%s': setting to 1 day", + cl->cl_timefreq, cl->cl_shell); + cl->cl_timefreq = 3600 * 24; } - else if (is_td(cl->cl_option)) { - - /* set the time and date of the next execution */ - if (is_system_startup && is_runatreboot(cl->cl_option)) { - - if (is_notice_notrun(cl->cl_option)) { - - if (cl->cl_runfreq == 1) { - /* %-line */ - set_next_exe_notrun(cl, SYSDOWN_RUNATREBOOT); - } - else { - /* set next exe and mail user */ - time_t since = cl->cl_nextexe; - - cl->cl_nextexe = now; - mail_notrun_time_t(cl, SYSDOWN, since); - } - - } - else { - cl->cl_nextexe = now; - } - - insert_nextexe(cl); - - } - else if (cl->cl_nextexe <= now) { - if (cl->cl_nextexe == 0) - /* the is a line from a new file */ - set_next_exe(cl, NO_GOTO, -1); - else if (cl->cl_runfreq == 1 && is_notice_notrun(cl->cl_option)) - set_next_exe_notrun(cl, SYSDOWN); - else if (is_bootrun(cl->cl_option) && t_save != 0 - && cl->cl_runfreq != 1) { - if (cl->cl_remain > 0 && --cl->cl_remain > 0) { - debug(" cl_remain: %d", cl->cl_remain); - } - else { - /* run bootrun jobs */ - cl->cl_remain = cl->cl_runfreq; - debug(" boot-run '%s'", cl->cl_shell); - if (!is_lavg(cl->cl_option)) { - set_serial_once(cl->cl_option); - add_serial_job(cl, -1); - } - else - add_lavg_job(cl, -1); - } - set_next_exe(cl, STD, -1); - } - else { - if (is_notice_notrun(cl->cl_option)) { - /* set next exe and mail user */ - time_t since = cl->cl_nextexe; - - set_next_exe(cl, NO_GOTO, -1); - mail_notrun_time_t(cl, SYSDOWN, since); - } - else - set_next_exe(cl, NO_GOTO, -1); - } - } - else { - /* value of nextexe is valid : just insert line in queue */ - insert_nextexe(cl); - } - } - else { /* is_td(cl->cl_option) */ - if (cl->cl_timefreq < 1) { - error("Invalid timefreq %ld for job '%s': setting to 1 day", - cl->cl_timefreq, cl->cl_shell); - cl->cl_timefreq = 3600 * 24; - } - - /* standard @-lines */ - if (is_system_startup && is_runatreboot(cl->cl_option)) { - cl->cl_nextexe = now; - } - /* t_save == 0 means this is a new file, hence a new line */ - else if (t_save == 0 || is_volatile(cl->cl_option) - || (is_system_startup && (is_rebootreset(cl->cl_option) - || is_runonce(cl->cl_option)))) { - /* cl_first is always saved to disk for a volatile line */ - if (cl->cl_first == LONG_MAX) { - cl->cl_nextexe = TIME_T_MAX; - } - else { - cl->cl_nextexe = now + cl->cl_first; - if (cl->cl_nextexe < now || cl->cl_nextexe > TIME_T_MAX) { - /* there was an integer overflow! */ - error - ("Error while setting next exe time for job '%s': cl_nextexe" - " overflowed (case1). now=%lu, cl_timefreq=%lu, cl_nextexe=%lu.", - cl->cl_shell, now, cl->cl_timefreq, cl->cl_nextexe); - error - ("Setting cl_nextexe to TIME_T_MAX to prevent an infinite loop."); - cl->cl_nextexe = TIME_T_MAX; - } - } - } - else { - if (cl->cl_nextexe != LONG_MAX) { - cl->cl_nextexe += slept; - if (cl->cl_nextexe < now || cl->cl_nextexe > TIME_T_MAX) { - /* either there was an integer overflow, or the slept time is incorrect - * (e.g. fcron didn't shut down cleanly and the fcrontab wasn't saved correctly) */ - error - ("Error while setting next exe time for job '%s': cl_nextexe" - " overflowed (case2). now=%lu, cl_timefreq=%lu, cl_nextexe=%lu. " - "Did fcron shut down cleanly?", - cl->cl_shell, now, cl->cl_timefreq, cl->cl_nextexe); - error - ("Setting cl_nextexe to now+cl_timefreq to prevent an infinite loop."); - cl->cl_nextexe = now + cl->cl_timefreq; - error("next execution will now be at %ld.", cl->cl_nextexe); - } - } - } - - insert_nextexe(cl); - } - - if (debug_opt && !(is_runonce(cl->cl_option) && is_hasrun(cl->cl_option))) { - struct tm *ftime; - ftime = localtime(&(cl->cl_nextexe)); - debug(" cmd '%s' next exec %04d-%02d-%02d wday:%d %02d:%02d:%02d" - " (system time)", - cl->cl_shell, (ftime->tm_year + 1900), (ftime->tm_mon + 1), - ftime->tm_mday, ftime->tm_wday, ftime->tm_hour, ftime->tm_min, - ftime->tm_sec); - } + set_next_exe_startup(cl, is_system_startup ? CONTEXT_BOOT : CONTEXT_DEFAULT, + sleep_duration); /* add the current line to the list, and allocate a new line */ if ((cl->cl_id = next_id++) >= ULONG_MAX - 1) { diff --cc database.c index 2bd04fb,c672e6e..6c6d051 --- a/database.c +++ b/database.c @@@ -1335,212 -1336,6 +1335,212 @@@ set_next_exe_notrun(cl_t * line, char c } +void +reschedule_all_on_resume(const time_t sleep_duration) +/* walk through all files and lines, update the schedule and run as appropriate */ +{ + cf_t *file = NULL; + + for (file = file_base; file; file = file->cf_next) { + cl_t *line = NULL; + + debug("Re-scheduling %s's jobs...", file->cf_user); + + for (line = file->cf_line_base; line; line = line->cl_next) { + set_next_exe_startup(line, CONTEXT_RESUME, sleep_duration); + } + + } +} + +void +set_next_exe_startup(struct cl_t *cl, const int context, + const time_t sleep_duration) + /* Schedule the next execution at startup (or a new file, + * or after a computer suspend/hibernation */ +{ + int is_new_file = (sleep_duration == now) ? 1 : 0; + + /* if job was stopped during execution: insert it in lavg or serial queue + * if it was in one when fcron stopped. + * This only applies to fcron startup and not system resume, as in the latter case + * the job would still be running in the background: in that case we leave it + * to finish normally and we don't run them again. */ + /* NOTE: + * - runatreboot has higher priority than jobs that were still running + * when fcron stopped, because the former will get run quicker as they are not + * put into the serial queue. runatreboot jobs will be handled later on. */ + if (context != CONTEXT_RESUME && cl->cl_numexe > 0 + && !is_runatreboot(cl->cl_option)) { + + cl->cl_numexe = 0; + if (is_lavg(cl->cl_option)) { + if (!is_strict(cl->cl_option)) + add_lavg_job(cl, -1); + } + else if (is_serial(cl->cl_option) + || is_serial_once(cl->cl_option)) + add_serial_job(cl, -1); + else { + /* job has been stopped during execution : + * launch it again */ + warn("job '%s' did not finish : running it again.", cl->cl_shell); + set_serial_once(cl->cl_option); + add_serial_job(cl, -1); + } + } + + if (context == CONTEXT_BOOT + || (context == CONTEXT_DEFAULT && is_volatile(cl->cl_option))) { + clear_hasrun(cl->cl_option); + } + + if (is_runonce(cl->cl_option) && is_hasrun(cl->cl_option)) { + /* if we get here, then context != CONTEXT_BOOT and_volatile is false */ + /* do nothing: don't re-schedule or add to the job queue */ + explain("job '%s' with runonce set has already run since last " + "system startup: not re-scheduling.", cl->cl_shell); + } + else if (is_td(cl->cl_option)) { + + /* set the time and date of the next execution */ + if (context == CONTEXT_BOOT && is_runatreboot(cl->cl_option)) { + + if (is_notice_notrun(cl->cl_option)) { + + if (cl->cl_runfreq == 1) { + /* %-line */ + set_next_exe_notrun(cl, SYSDOWN_RUNATREBOOT); + } + else { + /* set next exe and mail user */ + time_t since = cl->cl_nextexe; + + cl->cl_nextexe = now; + mail_notrun_time_t(cl, SYSDOWN, since); + } + + } + else { + cl->cl_nextexe = now; + } + + insert_nextexe(cl); + + } + else if (cl->cl_nextexe <= now) { + if (cl->cl_nextexe == 0) + /* the is a line from a new file */ + set_next_exe(cl, NO_GOTO, -1); + else if (cl->cl_runfreq == 1 && is_notice_notrun(cl->cl_option)) + set_next_exe_notrun(cl, SYSDOWN); + else if (is_bootrun(cl->cl_option) && !is_new_file + && cl->cl_runfreq != 1) { + if (cl->cl_remain > 0 && --cl->cl_remain > 0) { + debug(" cl_remain: %d", cl->cl_remain); + } + else { + /* run bootrun jobs */ + cl->cl_remain = cl->cl_runfreq; + debug(" boot-run '%s'", cl->cl_shell); + if (!is_lavg(cl->cl_option)) { + set_serial_once(cl->cl_option); + add_serial_job(cl, -1); + } + else + add_lavg_job(cl, -1); + } + set_next_exe(cl, STD, -1); + } + else { + if (is_notice_notrun(cl->cl_option)) { + /* set next exe and mail user */ + time_t since = cl->cl_nextexe; + + set_next_exe(cl, NO_GOTO, -1); + mail_notrun_time_t(cl, SYSDOWN, since); + + } + else + set_next_exe(cl, NO_GOTO, -1); + } + } + else { + /* value of nextexe is valid : just insert line in queue unless + * this is a system resume, in which case the line will be there + * already: */ + if (context != CONTEXT_RESUME) { + insert_nextexe(cl); + } + } + } + else { /* is_td(cl->cl_option) */ - if (cl->cl_timefreq < 10) { ++ if (cl->cl_timefreq < 1) { + error("Invalid timefreq %ld for job '%s': setting to 1 day", + cl->cl_timefreq, cl->cl_shell); + cl->cl_timefreq = 3600 * 24; + } + + /* standard @-lines */ + if (context == CONTEXT_BOOT && is_runatreboot(cl->cl_option)) { + cl->cl_nextexe = now; + } + else if (is_new_file || is_volatile(cl->cl_option) + || (context == CONTEXT_BOOT && (is_rebootreset(cl->cl_option) + || is_runonce(cl->cl_option)))) { + /* cl_first is always saved to disk for a volatile line */ + if (cl->cl_first == LONG_MAX) { + cl->cl_nextexe = TIME_T_MAX; + } + else { + cl->cl_nextexe = now + cl->cl_first; + if (cl->cl_nextexe < now || cl->cl_nextexe > TIME_T_MAX) { + /* there was an integer overflow! */ + error + ("Error while setting next exe time for job '%s': cl_nextexe" + " overflowed (case1). now=%lu, cl_timefreq=%lu, cl_nextexe=%lu.", + cl->cl_shell, now, cl->cl_timefreq, cl->cl_nextexe); + error + ("Setting cl_nextexe to TIME_T_MAX to prevent an infinite loop."); + cl->cl_nextexe = TIME_T_MAX; + } + } + } + else { + if (cl->cl_nextexe != LONG_MAX) { + cl->cl_nextexe += sleep_duration; + if (cl->cl_nextexe < now || cl->cl_nextexe > TIME_T_MAX) { + /* either there was an integer overflow, or the sleep_duration time is incorrect + * (e.g. fcron didn't shut down cleanly and the fcrontab wasn't saved correctly) */ + error + ("Error while setting next exe time for job '%s': cl_nextexe" + " overflowed (case2). now=%lu, cl_timefreq=%lu, cl_nextexe=%lu. " + "Did fcron shut down cleanly?", + cl->cl_shell, now, cl->cl_timefreq, cl->cl_nextexe); + error + ("Setting cl_nextexe to now+cl_timefreq to prevent an infinite loop."); + cl->cl_nextexe = now + cl->cl_timefreq; + error("next execution will now be at %ld.", cl->cl_nextexe); + } + } + } + + insert_nextexe(cl); + } + + if (debug_opt && !(is_runonce(cl->cl_option) && is_hasrun(cl->cl_option))) { + struct tm *ftime; + ftime = localtime(&(cl->cl_nextexe)); + debug(" cmd '%s' next exec %04d-%02d-%02d wday:%d %02d:%02d:%02d" + " (system time)", + cl->cl_shell, (ftime->tm_year + 1900), (ftime->tm_mon + 1), + ftime->tm_mday, ftime->tm_wday, ftime->tm_hour, ftime->tm_min, + ftime->tm_sec); + } + +} + + void mail_notrun_time_t(cl_t * line, char context, time_t since_time_t) /* Same as mail_notrun() but with 'since' defined as a time_t instead of a struct tm */ diff --cc doc/en/todo.sgml index 68c123f,6c5956f..a1341c7 --- a/doc/en/todo.sgml +++ b/doc/en/todo.sgml @@@ -26,14 -26,8 +26,20 @@@ A copy of the license is included in gf Option to compile and install from git sources without generating the doc + - register in OS suspend/hibernate mechanism to stop fcron when going to sleep and start it again when resuming from sleep (see FAQ entry). ++ On Linux systems, replace suspendfile by clock_gettime() calls: CLOCK_BOOTTIME - CLOCK_MONOTONIC will give us the total suspend duration. Should be simpler, more elegant and accurate. ++ ++ ++ On Linux systems, use timerfd_create()/timerfd_settime(TFD_TIMER_ABSTIME|TFD_TIMER_CANCEL_ON_SET) to get notified of time jumps. On other systems, keep SIGCONT with a hook in systemd/pm-utils. ++ + + add systemd suspend hooks for fcron (contribution welcome) + + - add a 'runatresume' option, to run when the computer resumes? (similar to runatreboot) ++ add a 'runatresume' (or 'runonclockchanges'?) option, to run when the computer resumes? (similar to runatreboot) + + + use ask_user() in boot-install diff --cc fcron.c index dce220e,5e9bee0..15dc37c --- a/fcron.c +++ b/fcron.c @@@ -44,9 -44,6 +44,9 @@@ RETSIGTYPE sigterm_handler(int x) RETSIGTYPE sigchild_handler(int x); RETSIGTYPE sigusr1_handler(int x); RETSIGTYPE sigusr2_handler(int x); +RETSIGTYPE sigcont_handler(int x); +long int get_suspend_duration(time_t slept_from); - void check_suspend(time_t slept_from, time_t planned_sleep); ++void check_suspend(time_t slept_from, time_t nwt); int parseopt(int argc, char *argv[]); void get_lock(void); int is_system_reboot(void); @@@ -524,165 -521,6 +525,167 @@@ sigusr2_handler(int x sig_debug = 1; } +RETSIGTYPE +sigcont_handler(int x) + /* used to notify fcron of a system resume after suspend. + * However this signal could also be received in other cases. */ +{ + sig_cont = 1; +} + +long int +get_suspend_duration(time_t slept_from) + /* Return the amount of time the system was suspended (to mem or disk). + * Return 0 on error. + * + * The idea is that: + * 1) the OS sends the STOP signal to the main fcron process when suspending + * 2) the OS writes the suspend duration (as a string) into suspendfile, + * and then sends the CONT signal to the main fcron process when resuming. + * + * The main reason to do it this way instead of killing fcron and restarting + * it on resume is to better handle jobs that may already be running. + * (e.g. don't run them again when the machine resumes) */ +{ + int fd = -1; + char buf[TERM_LEN]; + int read_len = 0; - long int suspend_duration = 0; /* default value to return on error */ ++ long int suspend_duration = 0; /* default value to return on error */ + struct stat s; + + if (sig_cont <= 0) { + /* signal not raised -- do nothing */ + return 0; + } + + /* the signal CONT was raised: reset the signal and check the suspendfile */ + sig_cont = 0; + + fd = open(suspendfile, O_RDONLY | O_NONBLOCK); + if (fd == -1) { + /* If the file doesn't exist, then we assume the user/system + * did a manual 'kill -STOP' / 'kill -CONT' and doesn't intend + * for fcron to account for any suspend time. + * This is not considered as an error. */ + if (errno != ENOENT) { + error_e("Could not open suspend file '%s'", suspendfile); + } + goto cleanup_return; + } + + /* check the file is a 'normal' file (e.g. not a link) and only writable + * by root -- don't allow attacker to affect job schedules, + * or delete the suspendfile */ + if (fstat(fd, &s) < 0) { + error_e("could not fstat() suspend file '%s'", suspendfile); + goto cleanup_return; + } + if (!S_ISREG(s.st_mode) || s.st_nlink != 1) { + error_e("suspend file %s is not a regular file", suspendfile); + goto cleanup_return; + } + + if (s.st_mode & S_IWOTH || s.st_uid != rootuid || s.st_gid != rootgid) { + error("suspend file %s must be owned by %s:%s and not writable by" - " others.", suspendfile, ROOTNAME, ROOTGROUP); ++ " others.", suspendfile, ROOTNAME, ROOTGROUP); + goto cleanup_return; + } + + /* read the content of the suspendfile into the buffer */ + read_len = read(fd, buf, sizeof(buf) - 1); + if (read_len < 0) { + /* we have to run this immediately or errno may be changed */ + error_e("Could not read suspend file '%s'", suspendfile); + goto unlink_cleanup_return; + } + if (read_len < 0) { + goto unlink_cleanup_return; + } + buf[read_len] = '\0'; + + errno = 0; + suspend_duration = strtol(buf, NULL, 10); + if (errno != 0) { + error_e("Count not parse suspend duration '%s'", buf); + suspend_duration = 0; + goto unlink_cleanup_return; + } + else if (suspend_duration < 0) { + warn("Read negative suspend_duration (%ld): ignoring."); + suspend_duration = 0; + goto unlink_cleanup_return; + } + else { + debug("Read suspend_duration of '%ld' from suspend file '%s'", + suspend_duration, suspendfile); + + if (now < slept_from + suspend_duration) { + long int time_slept = now - slept_from; + + /* we can have a couple of seconds more due to rounding up, + * but anything more should be an invalid value in suspendfile */ + explain("Suspend duration %lds in suspend file '%s' is longer than " + "we slept. This could be due to rounding. " + "Reverting to time slept %lds.", + suspend_duration, suspendfile, time_slept); + suspend_duration = time_slept; + } + } + - unlink_cleanup_return: ++ unlink_cleanup_return: + if (unlink(suspendfile) < 0) { + warn_e("Could not remove suspend file '%s'", suspendfile); + return 0; + } + - cleanup_return: ++ cleanup_return: + if (fd >= 0 && xclose(&fd) < 0) { + warn_e("Could not xclose() suspend file '%s'", suspendfile); + } + +#ifdef HAVE_SIGNAL + signal(SIGCONT, sigcont_handler); + siginterrupt(SIGCONT, 0); +#endif + + return suspend_duration; + +} + +void - check_suspend(time_t slept_from, time_t planned_sleep) ++check_suspend(time_t slept_from, time_t nwt) + /* Check if the machine was suspended (to mem or disk), and if so + * reschedule jobs accordingly */ +{ + long int suspend_duration; /* amount of time the system was suspended */ - long int actual_sleep; /* time we actually slept */ ++ long int time_diff; /* estimate of suspend_duration (as fallback) */ + + suspend_duration = get_suspend_duration(slept_from); + + /* Also check if there was an unaccounted sleep duration, in case + * the OS is not configured to let fcron properly know about suspends + * via suspendfile. + * This is not perfect as we may miss some suspend time if fcron + * is woken up before the timer expiry, e.g. due to a signal + * or activity on a socket (fcrondyn). + * NOTE: the +5 second is arbitrary -- just a way to make sure + * we don't get any false positive. If the suspend or hibernate + * is very short it seems fine to simply ignore it anyway */ - actual_sleep = now - slept_from; - if (suspend_duration <= 0 && (actual_sleep - planned_sleep) > 5) { - suspend_duration = actual_sleep - planned_sleep; ++ time_diff = now - nwt; ++ if (suspend_duration <= 0 && time_diff > 5) { ++ suspend_duration = time_diff; + } + + if (suspend_duration > 0) { ++ long int actual_sleep = now - slept_from; ++ long int scheduled_sleep = nwt - slept_from; + explain("suspend/hibernate detected: we woke up after %lus" + " instead of %lus. The system was suspended for %lus.", - actual_sleep, planned_sleep, suspend_duration); ++ actual_sleep, scheduled_sleep, suspend_duration); + reschedule_all_on_resume(suspend_duration); + } +} + int main(int argc, char **argv) @@@ -814,12 -651,10 +817,13 @@@ siginterrupt(SIGUSR1, 0); signal(SIGUSR2, sigusr2_handler); siginterrupt(SIGUSR2, 0); + signal(SIGCONT, sigcont_handler); + siginterrupt(SIGCONT, 0); - /* we don't want SIGPIPE to kill fcron, and don't need to handle it */ + /* we don't want SIGPIPE to kill fcron, and don't need to handle it as when ignored + * write() on a pipe closed at the other end will return EPIPE */ signal(SIGPIPE, SIG_IGN); #elif HAVE_SIGSET + /* FIXME: check for errors */ sigset(SIGTERM, sigterm_handler); sigset(SIGHUP, sighup_handler); sigset(SIGCHLD, sigchild_handler); @@@ -940,14 -774,13 +944,14 @@@ main_loop( * sleep, and then test all jobs and execute if needed. */ { time_t save; /* time remaining until next save */ - time_t stime; /* time to sleep until next job - * execution */ + time_t slept_from; /* time it was when we went into sleep */ + time_t nwt; /* next wake time */ #ifdef HAVE_GETTIMEOFDAY - struct timeval tv; /* we use usec field to get more precision */ + struct timeval now_tv; /* we use usec field to get more precision */ #endif - #ifdef FCRONDYN + #if defined(FCRONDYN) && defined(HAVE_GETTIMEOFDAY) int retcode = 0; + struct timeval sleep_tv; /* we use usec field to get more precision */ #endif debug("Entering main loop"); @@@ -969,48 -804,89 +975,96 @@@ for (;;) { + /* remember when we started to sleep -- this is to detect suspend/hibernate */ + slept_from = time(NULL); + #ifdef HAVE_GETTIMEOFDAY #ifdef FCRONDYN - gettimeofday(&tv, NULL); - tv.tv_sec = (stime > 1) ? stime - 1 : 0; - /* we set tv_usec to slightly more than necessary so as - * we don't wake up too early, in which case we would - * have to sleep again for some time */ - tv.tv_usec = 1001000 - tv.tv_usec; - /* On some systems (BSD, etc), tv_usec cannot be greater than 999999 */ - if (tv.tv_usec > 999999) - tv.tv_usec = 999999; + gettimeofday(&now_tv, NULL); + debug("now gettimeofday tv_sec=%ld, tv_usec=%ld %s", now_tv.tv_sec, + now_tv.tv_usec, ctime(&nwt)); + + if (nwt <= now_tv.tv_sec) { + sleep_tv.tv_sec = 0; + sleep_tv.tv_usec = 0; + } + else { + /* compensate for any time spent in the loop, + * so as we wake up exactly at the beginning of the second */ + sleep_tv.tv_sec = nwt - now_tv.tv_sec - 1; + /* we set tv_usec to slightly more than necessary so as we don't wake + * up too early (e.g. due to rounding to the system clock granularity), + * in which case we would have to go back to sleep again */ + sleep_tv.tv_usec = 1000000 + min_sleep_usec - now_tv.tv_usec; + } + + if (sleep_tv.tv_usec > 999999) { + /* On some systems (BSD, etc), tv_usec cannot be greater than 999999 */ + sleep_tv.tv_usec = 999999; + } + else if (sleep_tv.tv_sec == 0 && sleep_tv.tv_usec < min_sleep_usec) { + /* to prevent any infinite loop, sleep at least 1ms */ + debug + ("We'll sleep for a tiny bit to avoid any risk of infinite loop"); + sleep_tv.tv_usec = min_sleep_usec; + } /* note: read_set is set in socket.c */ - if ((retcode = select(set_max_fd + 1, &read_set, NULL, NULL, &tv)) < 0 + debug("nwt=%s, sleep sec=%ld, usec=%ld", ctime(&nwt), sleep_tv.tv_sec, + sleep_tv.tv_usec); + if ((retcode = + select(set_max_fd + 1, &read_set, NULL, NULL, &sleep_tv)) < 0 && errno != EINTR) die_e("select returned %d", errno); - #else - if (stime > 1) - sleep(stime - 1); - gettimeofday(&tv, NULL); + #else /* FCRONDYN */ + if (nwt - now > 0) { + sleep(nwt - now - 1); + } + gettimeofday(&now_tv, NULL); /* we set tv_usec to slightly more than necessary to avoid * infinite loop */ - usleep(1001000 - tv.tv_usec); + usleep(1000000 + min_sleep_usec - now_tv.tv_usec); #endif /* FCRONDYN */ - #else - sleep(stime); + #else /* HAVE_GETTIMEOFDAY */ + if (nwt - now > 0) { + sleep(nwt - now); + } #endif /* HAVE_GETTIMEOFDAY */ - now = time(NULL); + debug("\n"); + now = my_time(); + #ifdef HAVE_GETTIMEOFDAY + if (debug_opt) { + gettimeofday(&now_tv, NULL); + debug("now=%ld now_tv sec=%ld usec=%ld", now, now_tv.tv_sec, + now_tv.tv_usec); + } + #endif + debug("\n"); + check_signal(); + - check_suspend(slept_from, stime); ++ check_suspend(slept_from, nwt); + #ifdef HAVE_GETTIMEOFDAY + if (debug_opt) { + gettimeofday(&now_tv, NULL); + debug("after check_signal: now_tv sec=%ld usec=%ld", now_tv.tv_sec, + now_tv.tv_usec); + } + #endif test_jobs(); + #ifdef HAVE_GETTIMEOFDAY + if (debug_opt) { + gettimeofday(&now_tv, NULL); + debug("after test_jobs: now_tv sec=%ld usec=%ld", now_tv.tv_sec, + now_tv.tv_usec); + } + #endif - while (serial_num > 0 && serial_running < serial_max_running) + while (serial_num > 0 && serial_running < serial_max_running) { run_serial_job(); + } if (once) { explain("Running with option once : exiting ... ");