From: Jeff Trawick <trawick@apache.org>
Date: Thu, 2 Dec 2004 17:39:22 +0000 (+0000)
Subject: worker MPM: Fix a problem which could cause httpd processes to
X-Git-Tag: 2.1.2~21
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=592750b026b9a5d3a7f401796ec1763ca2b79984;p=thirdparty%2Fapache%2Fhttpd.git

worker MPM: Fix a problem which could cause httpd processes to
remain active after shutdown.

The problem occurred when a scoreboard entry currently
in use by an exiting child process was used for a new child
process.  At that point, the MPM forgot about the exiting
child process, so ap_reclaim_child_processes() wouldn't be
able to forceably terminate it.

(An exiting child process may *never* exit due to a stuck
or long-running request being handled on one of the threads.)


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@109510 13f79535-47bb-0310-9956-ffa450edef68
---

diff --git a/CHANGES b/CHANGES
index c8f0d56264b..2c7990d28c5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,9 @@ Changes with Apache 2.1.2-dev
 
   [Remove entries to the current 2.0 section below, when backported]
 
+  *) worker MPM: Fix a problem which could cause httpd processes to
+     remain active after shutdown.  [Jeff Trawick]
+
   *) core: Error out on sections that are missing an argument instead of
      silently consuming the section. PR 25460.
      [Geoffrey Young, Paul Querna]
diff --git a/include/mpm_common.h b/include/mpm_common.h
index 85a4db56068..e10f840c3df 100644
--- a/include/mpm_common.h
+++ b/include/mpm_common.h
@@ -59,7 +59,7 @@ extern "C" {
  * Make sure all child processes that have been spawned by the parent process
  * have died.  This includes process registered as "other_children".
  * @warning This is only defined if the MPM defines 
- *          MPM_NEEDS_RECLAIM_CHILD_PROCESS
+ *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
  * @param terminate Either 1 or 0.  If 1, send the child processes SIGTERM
  *        each time through the loop.  If 0, give the process time to die
  *        on its own before signalling it.
@@ -67,11 +67,42 @@ extern "C" {
  *  MPM_CHILD_PID -- Get the pid from the specified spot in the scoreboard
  *  MPM_NOTE_CHILD_KILLED -- Note the child died in the scoreboard
  * </pre>
+ * @tip The MPM child processes which are reclaimed are those listed
+ * in the scoreboard as well as those currently registered via
+ * ap_register_extra_mpm_process().
  */
 #ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
 void ap_reclaim_child_processes(int terminate);
 #endif
 
+/**
+ * Tell ap_reclaim_child_processes() about an MPM child process which has no
+ * entry in the scoreboard.
+ * @warning This is only defined if the MPM defines
+ *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+ * @param pid The process id of an MPM child process which should be
+ * reclaimed when ap_reclaim_child_processes() is called.
+ * @tip If an extra MPM child process terminates prior to calling
+ * ap_reclaim_child_processes(), remove it from the list of such processes
+ * by calling ap_unregister_extra_mpm_process().
+ */
+#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+void ap_register_extra_mpm_process(pid_t pid);
+#endif
+
+/**
+ * Unregister an MPM child process which was previously registered by a
+ * call to ap_register_extra_mpm_process().
+ * @warning This is only defined if the MPM defines
+ *          AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+ * @param pid The process id of an MPM child process which no longer needs to
+ * be reclaimed.
+ * @return 1 if the process was found and removed, 0 otherwise
+ */
+#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+int ap_unregister_extra_mpm_process(pid_t pid);
+#endif
+
 /**
  * Determine if any child process has died.  If no child process died, then
  * this process sleeps for the amount of time specified by the MPM defined
diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c
index b13facdd333..764036b8344 100644
--- a/server/mpm/worker/worker.c
+++ b/server/mpm/worker/worker.c
@@ -1310,6 +1310,21 @@ static int make_child(server_rec *s, int slot)
         clean_child_exit(0);
     }
     /* else */
+    if (ap_scoreboard_image->parent[slot].pid != 0) {
+        /* This new child process is squatting on the scoreboard
+         * entry owned by an exiting child process, which cannot
+         * exit until all active requests complete.
+         * Don't forget about this exiting child process, or we
+         * won't be able to kill it if it doesn't exit by the
+         * time the server is shut down.
+         */
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
+                     "taking over scoreboard slot from %" APR_PID_T_FMT "%s",
+                     ap_scoreboard_image->parent[slot].pid,
+                     ap_scoreboard_image->parent[slot].quiescing ?
+                         " (quiescing)" : "");
+        ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid);
+    }
     ap_scoreboard_image->parent[slot].quiescing = 0;
     ap_scoreboard_image->parent[slot].pid = pid;
     return 0;
@@ -1524,6 +1539,9 @@ static void server_main_loop(int remaining_children_to_start)
                     make_child(ap_server_conf, child_slot);
                     --remaining_children_to_start;
                 }
+            }
+            else if (ap_unregister_extra_mpm_process(pid.pid) == 1) {
+                /* handled */
 #if APR_HAS_OTHER_CHILD
             }
             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
diff --git a/server/mpm_common.c b/server/mpm_common.c
index 58fd3113ea7..e2f9adb8a86 100644
--- a/server/mpm_common.c
+++ b/server/mpm_common.c
@@ -59,11 +59,120 @@
 #endif
 
 #ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+
+typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t;
+
+typedef struct extra_process_t {
+    struct extra_process_t *next;
+    pid_t pid;
+} extra_process_t;
+
+static extra_process_t *extras;
+
+void ap_register_extra_mpm_process(pid_t pid)
+{
+    extra_process_t *p = (extra_process_t *)malloc(sizeof(extra_process_t));
+
+    p->next = extras;
+    p->pid = pid;
+    extras = p;
+}
+
+int ap_unregister_extra_mpm_process(pid_t pid)
+{
+    extra_process_t *cur = extras;
+    extra_process_t *prev = NULL;
+
+    while (cur && cur->pid != pid) {
+        prev = cur;
+        cur = cur->next;
+    }
+
+    if (cur) {
+        if (prev) {
+            prev->next = cur->next;
+        }
+        else {
+            extras = cur->next;
+        }
+        free(cur);
+        return 1; /* found */
+    }
+    else {
+        /* we don't know about any such process */
+        return 0;
+    }
+}
+
+static int reclaim_one_pid(pid_t pid, action_t action)
+{
+    apr_proc_t proc;
+    apr_status_t waitret;
+
+    proc.pid = pid;
+    waitret = apr_proc_wait(&proc, NULL, NULL, APR_NOWAIT);
+    if (waitret != APR_CHILD_NOTDONE) {
+        return 1;
+    }
+
+    switch(action) {
+    case DO_NOTHING:
+        break;
+        
+    case SEND_SIGTERM:
+        /* ok, now it's being annoying */
+        ap_log_error(APLOG_MARK, APLOG_WARNING,
+                     0, ap_server_conf,
+                     "child process %" APR_PID_T_FMT
+                     " still did not exit, "
+                     "sending a SIGTERM",
+                     pid);
+        kill(pid, SIGTERM);
+        break;
+        
+    case SEND_SIGKILL:
+        ap_log_error(APLOG_MARK, APLOG_ERR,
+                     0, ap_server_conf,
+                     "child process %" APR_PID_T_FMT
+                     " still did not exit, "
+                     "sending a SIGKILL",
+                     pid);
+#ifndef BEOS
+        kill(pid, SIGKILL);
+#else
+        /* sending a SIGKILL kills the entire team on BeOS, and as
+         * httpd thread is part of that team it removes any chance
+         * of ever doing a restart.  To counter this I'm changing to
+         * use a kinder, gentler way of killing a specific thread
+         * that is just as effective.
+         */
+        kill_thread(pid);
+#endif
+        break;
+                
+    case GIVEUP:
+        /* gave it our best shot, but alas...  If this really
+         * is a child we are trying to kill and it really hasn't
+         * exited, we will likely fail to bind to the port
+         * after the restart.
+         */
+        ap_log_error(APLOG_MARK, APLOG_ERR,
+                     0, ap_server_conf,
+                     "could not make child process %" APR_PID_T_FMT
+                     " exit, "
+                     "attempting to continue anyway",
+                     pid);
+        break;
+    }
+    
+    return 0;
+}
+
 void ap_reclaim_child_processes(int terminate)
 {
     apr_time_t waittime = 1024 * 16;
-    apr_status_t waitret;
     int i;
+    extra_process_t *cur_extra;
     int not_dead_yet;
     int max_daemons;
     apr_time_t starttime = apr_time_now();
@@ -71,7 +180,7 @@ void ap_reclaim_child_processes(int terminate)
      * at which elapsed time from starting the reclaim
      */
     struct {
-        enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action;
+        action_t action;
         apr_time_t action_time;
     } action_table[] = {
         {DO_NOTHING, 0}, /* dummy entry for iterations where we reap
@@ -115,70 +224,31 @@ void ap_reclaim_child_processes(int terminate)
         not_dead_yet = 0;
         for (i = 0; i < max_daemons; ++i) {
             pid_t pid = MPM_CHILD_PID(i);
-            apr_proc_t proc;
 
-            if (pid == 0)
-                continue;
+            if (pid == 0) {
+                continue; /* not every scoreboard entry is in use */
+            }
 
-            proc.pid = pid;
-            waitret = apr_proc_wait(&proc, NULL, NULL, APR_NOWAIT);
-            if (waitret != APR_CHILD_NOTDONE) {
+            if (reclaim_one_pid(pid, action_table[cur_action].action)) {
                 MPM_NOTE_CHILD_KILLED(i);
-                continue;
             }
-
-            ++not_dead_yet;
-            switch(action_table[cur_action].action) {
-            case DO_NOTHING:
-                break;
-                
-            case SEND_SIGTERM:
-                /* ok, now it's being annoying */
-                ap_log_error(APLOG_MARK, APLOG_WARNING,
-                             0, ap_server_conf,
-                             "child process %" APR_PID_T_FMT
-                             " still did not exit, "
-                             "sending a SIGTERM",
-                             pid);
-                kill(pid, SIGTERM);
-                break;
-                
-            case SEND_SIGKILL:
-                ap_log_error(APLOG_MARK, APLOG_ERR,
-                             0, ap_server_conf,
-                             "child process %" APR_PID_T_FMT
-                             " still did not exit, "
-                             "sending a SIGKILL",
-                             pid);
-#ifndef BEOS
-                kill(pid, SIGKILL);
-#else
-                /* sending a SIGKILL kills the entire team on BeOS, and as
-                 * httpd thread is part of that team it removes any chance
-                 * of ever doing a restart.  To counter this I'm changing to
-                 * use a kinder, gentler way of killing a specific thread
-                 * that is just as effective.
-                 */
-                kill_thread(pid);
-#endif
-                break;
-                
-            case GIVEUP:
-                /* gave it our best shot, but alas...  If this really
-                 * is a child we are trying to kill and it really hasn't
-                 * exited, we will likely fail to bind to the port
-                 * after the restart.
-                 */
-                ap_log_error(APLOG_MARK, APLOG_ERR,
-                             0, ap_server_conf,
-                             "could not make child process %" APR_PID_T_FMT
-                             " exit, "
-                             "attempting to continue anyway",
-                             pid);
-                break;
+            else {
+                ++not_dead_yet;
             }
         }
 
+        cur_extra = extras;
+        while (cur_extra) {
+            extra_process_t *next = cur_extra->next;
+
+            if (reclaim_one_pid(cur_extra->pid, action_table[cur_action].action)) {
+                AP_DEBUG_ASSERT(1 == ap_unregister_extra_mpm_process(cur_extra->pid));
+            }
+            else {
+                ++not_dead_yet;
+            }
+            cur_extra = next;
+        }
 #if APR_HAS_OTHER_CHILD
         apr_proc_other_child_refresh_all(APR_OC_REASON_RESTART);
 #endif