From: Jeff Trawick Date: Sat, 18 Sep 2004 00:33:56 +0000 (+0000) Subject: Unix MPMs: Shut down the server more quickly when child processes are X-Git-Tag: 2.1.1~244 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=130c053d26b3b5a4a459024c94e5ee9113a688a0;p=thirdparty%2Fapache%2Fhttpd.git Unix MPMs: Shut down the server more quickly when child processes are slow to exit. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@105195 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/CHANGES b/CHANGES index b995a527b92..a0e2201f770 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,9 @@ Changes with Apache 2.1.0-dev [Remove entries to the current 2.0 section below, when backported] + *) Unix MPMs: Shut down the server more quickly when child processes are + slow to exit. [Joe Orton, Jeff Trawick] + *) mod_info: Added listing of the Request Hooks and added more build information like 'httpd -V' contains. Changed output to XHTML. [Paul Querna] diff --git a/server/mpm_common.c b/server/mpm_common.c index 101a0af7793..e4c6de073ad 100644 --- a/server/mpm_common.c +++ b/server/mpm_common.c @@ -61,22 +61,55 @@ #ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES void ap_reclaim_child_processes(int terminate) { - int i; - long int waittime = 1024 * 16; /* in usecs */ + apr_time_t waittime = 1024 * 16; apr_status_t waitret; - int tries; + int i; int not_dead_yet; int max_daemons; + apr_time_t starttime = apr_time_now(); + /* this table of actions and elapsed times tells what action is taken + * at which elapsed time from starting the reclaim + */ + struct { + enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action; + apr_time_t action_time; + } action_table[] = { + {DO_NOTHING, 0}, /* dummy entry for iterations where we reap + * children but take no action against + * stragglers + */ + {SEND_SIGTERM, apr_time_from_sec(3)}, + {SEND_SIGTERM, apr_time_from_sec(5)}, + {SEND_SIGTERM, apr_time_from_sec(7)}, + {SEND_SIGKILL, apr_time_from_sec(9)}, + {GIVEUP, apr_time_from_sec(10)} + }; + int cur_action; /* index of action we decided to take this + * iteration + */ + int next_action = 1; /* index of first real action */ ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons); - for (tries = terminate ? 4 : 1; tries <= 9; ++tries) { - /* don't want to hold up progress any more than - * necessary, but we need to allow children a few moments to exit. - * Set delay with an exponential backoff. - */ + do { apr_sleep(waittime); + /* don't let waittime get longer than 1 second; otherwise, we don't + * react quickly to the last child exiting, and taking action can + * be delayed + */ waittime = waittime * 4; + if (waittime > apr_time_from_sec(1)) { + waittime = apr_time_from_sec(1); + } + + /* see what action to take, if any */ + if (action_table[next_action].action_time <= apr_time_now() - starttime) { + cur_action = next_action; + ++next_action; + } + else { + cur_action = 0; /* nothing to do */ + } /* now see who is done */ not_dead_yet = 0; @@ -95,32 +128,28 @@ void ap_reclaim_child_processes(int terminate) } ++not_dead_yet; - switch (tries) { - case 1: /* 16ms */ - case 2: /* 82ms */ - case 3: /* 344ms */ - case 4: /* 16ms */ + switch(action_table[cur_action].action) { + case DO_NOTHING: break; - - case 5: /* 82ms */ - case 6: /* 344ms */ - case 7: /* 1.4sec */ + + case SEND_SIGTERM: /* ok, now it's being annoying */ ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, - "child process %ld still did not exit, " + "child process %" APR_PID_T_FMT + " still did not exit, " "sending a SIGTERM", - (long)pid); + pid); kill(pid, SIGTERM); break; - - case 8: /* 6 sec */ - /* die child scum */ + + case SEND_SIGKILL: ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, - "child process %ld still did not exit, " + "child process %" APR_PID_T_FMT + " still did not exit, " "sending a SIGKILL", - (long)pid); + pid); #ifndef BEOS kill(pid, SIGKILL); #else @@ -133,8 +162,8 @@ void ap_reclaim_child_processes(int terminate) kill_thread(pid); #endif break; - - case 9: /* 14 sec */ + + case GIVEUP: /* gave it our best shot, but alas... If this really * is a child we are trying to kill and it really hasn't * exited, we will likely fail to bind to the port @@ -142,9 +171,10 @@ void ap_reclaim_child_processes(int terminate) */ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, - "could not make child process %ld exit, " + "could not make child process %" APR_PID_T_FMT + " exit, " "attempting to continue anyway", - (long)pid); + pid); break; } } @@ -153,11 +183,8 @@ void ap_reclaim_child_processes(int terminate) apr_proc_other_child_refresh_all(APR_OC_REASON_RESTART); #endif - if (!not_dead_yet) { - /* nothing left to wait for */ - break; - } - } + } while (not_dead_yet > 0 && + action_table[cur_action].action != GIVEUP); } #endif /* AP_MPM_WANT_RECLAIM_CHILD_PROCESSES */