#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_reclaim_child_processes(int terminate)
{
- int i;
- long int waittime = 1024 * 16; /* in usecs */
+ apr_time_t waittime = 1024 * 16;
apr_status_t waitret;
- int tries;
+ int i;
int not_dead_yet;
int max_daemons;
+ apr_time_t starttime = apr_time_now();
+ /* this table of actions and elapsed times tells what action is taken
+ * at which elapsed time from starting the reclaim
+ */
+ struct {
+ enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action;
+ apr_time_t action_time;
+ } action_table[] = {
+ {DO_NOTHING, 0}, /* dummy entry for iterations where we reap
+ * children but take no action against
+ * stragglers
+ */
+ {SEND_SIGTERM, apr_time_from_sec(3)},
+ {SEND_SIGTERM, apr_time_from_sec(5)},
+ {SEND_SIGTERM, apr_time_from_sec(7)},
+ {SEND_SIGKILL, apr_time_from_sec(9)},
+ {GIVEUP, apr_time_from_sec(10)}
+ };
+ int cur_action; /* index of action we decided to take this
+ * iteration
+ */
+ int next_action = 1; /* index of first real action */
ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);
- for (tries = terminate ? 4 : 1; tries <= 9; ++tries) {
- /* don't want to hold up progress any more than
- * necessary, but we need to allow children a few moments to exit.
- * Set delay with an exponential backoff.
- */
+ do {
apr_sleep(waittime);
+ /* don't let waittime get longer than 1 second; otherwise, we don't
+ * react quickly to the last child exiting, and taking action can
+ * be delayed
+ */
waittime = waittime * 4;
+ if (waittime > apr_time_from_sec(1)) {
+ waittime = apr_time_from_sec(1);
+ }
+
+ /* see what action to take, if any */
+ if (action_table[next_action].action_time <= apr_time_now() - starttime) {
+ cur_action = next_action;
+ ++next_action;
+ }
+ else {
+ cur_action = 0; /* nothing to do */
+ }
/* now see who is done */
not_dead_yet = 0;
}
++not_dead_yet;
- switch (tries) {
- case 1: /* 16ms */
- case 2: /* 82ms */
- case 3: /* 344ms */
- case 4: /* 16ms */
+ switch(action_table[cur_action].action) {
+ case DO_NOTHING:
break;
-
- case 5: /* 82ms */
- case 6: /* 344ms */
- case 7: /* 1.4sec */
+
+ case SEND_SIGTERM:
/* ok, now it's being annoying */
ap_log_error(APLOG_MARK, APLOG_WARNING,
0, ap_server_conf,
- "child process %ld still did not exit, "
+ "child process %" APR_PID_T_FMT
+ " still did not exit, "
"sending a SIGTERM",
- (long)pid);
+ pid);
kill(pid, SIGTERM);
break;
-
- case 8: /* 6 sec */
- /* die child scum */
+
+ case SEND_SIGKILL:
ap_log_error(APLOG_MARK, APLOG_ERR,
0, ap_server_conf,
- "child process %ld still did not exit, "
+ "child process %" APR_PID_T_FMT
+ " still did not exit, "
"sending a SIGKILL",
- (long)pid);
+ pid);
#ifndef BEOS
kill(pid, SIGKILL);
#else
kill_thread(pid);
#endif
break;
-
- case 9: /* 14 sec */
+
+ case GIVEUP:
/* gave it our best shot, but alas... If this really
* is a child we are trying to kill and it really hasn't
* exited, we will likely fail to bind to the port
*/
ap_log_error(APLOG_MARK, APLOG_ERR,
0, ap_server_conf,
- "could not make child process %ld exit, "
+ "could not make child process %" APR_PID_T_FMT
+ " exit, "
"attempting to continue anyway",
- (long)pid);
+ pid);
break;
}
}
apr_proc_other_child_refresh_all(APR_OC_REASON_RESTART);
#endif
- if (!not_dead_yet) {
- /* nothing left to wait for */
- break;
- }
- }
+ } while (not_dead_yet > 0 &&
+ action_table[cur_action].action != GIVEUP);
}
#endif /* AP_MPM_WANT_RECLAIM_CHILD_PROCESSES */