]> git.ipfire.org Git - thirdparty/apache/httpd.git/commitdiff
worker mpm: don't take down the whole server for a transient
authorGreg Ames <gregames@apache.org>
Wed, 25 May 2005 19:36:20 +0000 (19:36 +0000)
committerGreg Ames <gregames@apache.org>
Wed, 25 May 2005 19:36:20 +0000 (19:36 +0000)
thread creation failure. PR 34514

git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.0.x@178518 13f79535-47bb-0310-9956-ffa450edef68

CHANGES
STATUS
server/mpm/worker/worker.c

diff --git a/CHANGES b/CHANGES
index 9a4a9a045b5bb22380136f29f2a7ea4650cf5b62..304477f0e0331662e6e98a11ff51a3813e8a2a5c 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,8 @@
 Changes with Apache 2.0.55
   
+  *) worker mpm: don't take down the whole server for a transient
+     thread creation failure. PR 34514 [Greg Ames]
+  
   *) mod_rewrite: use buffered I/O to improve performance with large
      RewriteMap txt: files.  [Greg Ames]
 
diff --git a/STATUS b/STATUS
index 2c91f1c30d032cb310694da0b0562dd560899db0..c0533dac208ac4fac7b52ea563272d60faef3a72 100644 (file)
--- a/STATUS
+++ b/STATUS
@@ -122,13 +122,6 @@ PATCHES TO BACKPORT FROM TRUNK:
        +1: jorton, trawick
        [yes, I will write a CHANGES entry too]
 
-    *) worker mpm: don't take down the whole server for a transient
-       thread creation failure. PR 34514
-         http://svn.apache.org/viewcvs.cgi?rev=168182&view=rev
-         http://svn.apache.org/viewcvs.cgi?rev=168649&view=rev
-           (ignore changes to event MPM, which don't apply to 2.0.x)
-       +1: trawick, jorton, gregames
-
     *) several changes to improve logging of connection-oriented errors, including
        ap_log_cerror() API (needs minor bump in addition to changes below)
          http://cvs.apache.org/viewcvs.cgi/httpd-2.0/server/core.c?r1=1.289&r2=1.291
index 9fe8f5e5888bb5ac6ed0456b7341fa8adc60dfed..fe078ece5d840f21801fc443495511e5110254e6 100644 (file)
@@ -134,6 +134,7 @@ static int resource_shortage = 0;
 static fd_queue_t *worker_queue;
 static fd_queue_info_t *worker_queue_info;
 static int mpm_state = AP_MPMQ_STARTING;
+static int sick_child_detected;
 
 /* The structure used to pass unique initialization info to each thread */
 typedef struct {
@@ -872,14 +873,8 @@ static void create_listener_thread(thread_starter *ts)
     if (rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
                      "apr_thread_create: unable to create listener thread");
-        /* In case system resources are maxxed out, we don't want
-         * Apache running away with the CPU trying to fork over and
-         * over and over again if we exit.
-         * XXX Jeff doesn't see how Apache is going to try to fork again since
-         * the exit code is APEXIT_CHILDFATAL
-         */
-        apr_sleep(apr_time_from_sec(10));
-        clean_child_exit(APEXIT_CHILDFATAL);
+        /* let the parent decide how bad this really is */
+        clean_child_exit(APEXIT_CHILDSICK);
     }
     apr_os_thread_get(&listener_os_thread, ts->listener);
 }
@@ -956,11 +951,8 @@ static void * APR_THREAD_FUNC start_threads(apr_thread_t *thd, void *dummy)
             if (rv != APR_SUCCESS) {
                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
                     "apr_thread_create: unable to create worker thread");
-                /* In case system resources are maxxed out, we don't want
-                   Apache running away with the CPU trying to fork over and
-                   over and over again if we exit. */
-                apr_sleep(apr_time_from_sec(10));
-                clean_child_exit(APEXIT_CHILDFATAL);
+                /* let the parent decide how bad this really is */
+                clean_child_exit(APEXIT_CHILDSICK);
             }
             threads_created++;
         }
@@ -1153,11 +1145,8 @@ static void child_main(int child_num_arg)
     if (rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
                      "apr_thread_create: unable to create worker thread");
-        /* In case system resources are maxxed out, we don't want
-           Apache running away with the CPU trying to fork over and
-           over and over again if we exit. */
-        apr_sleep(apr_time_from_sec(10));
-        clean_child_exit(APEXIT_CHILDFATAL);
+        /* let the parent decide how bad this really is */
+        clean_child_exit(APEXIT_CHILDSICK);
     }
 
     mpm_state = AP_MPMQ_RUNNING;
@@ -1345,6 +1334,7 @@ static void perform_idle_server_maintenance(void)
     int free_slots[MAX_SPAWN_RATE];
     int last_non_dead;
     int total_non_dead;
+    int active_thread_count = 0;
 
     /* initialize the free_list */
     free_length = 0;
@@ -1382,14 +1372,16 @@ static void perform_idle_server_maintenance(void)
              * So we hopefully won't need to fork more if we count it.
              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
              */
-            if (status <= SERVER_READY && status != SERVER_DEAD &&
-                    !ps->quiescing &&
-                    ps->generation == ap_my_generation &&
-                 /* XXX the following shouldn't be necessary if we clean up 
-                  *     properly after seg faults, but we're not yet    GLA 
-                  */     
-                    ps->pid != 0) {
-                ++idle_thread_count;
+            if (ps->pid != 0) { /* XXX just set all_dead_threads in outer for
+                                   loop if no pid?  not much else matters */
+                if (status <= SERVER_READY && status != SERVER_DEAD &&
+                        !ps->quiescing &&
+                        ps->generation == ap_my_generation) {
+                    ++idle_thread_count;
+                }
+                if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
+                    ++active_thread_count;
+                }
             }
         }
         if (any_dead_threads && totally_free_length < idle_spawn_rate
@@ -1420,6 +1412,28 @@ static void perform_idle_server_maintenance(void)
             ++total_non_dead;
         }
     }
+
+    if (sick_child_detected) {
+        if (active_thread_count > 0) {
+            /* some child processes appear to be working.  don't kill the
+             * whole server.
+             */
+            sick_child_detected = 0;
+        }
+        else {
+            /* looks like a basket case.  give up.  
+             */
+            shutdown_pending = 1;
+            child_fatal = 1;
+            ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
+                         ap_server_conf,
+                         "No active workers found..."
+                         " Apache is exiting!");
+            /* the child already logged the failure details */
+            return;
+        }
+    }
+                                                    
     ap_max_daemons_limit = last_non_dead + 1;
 
     if (idle_thread_count > max_spare_threads) {
@@ -1493,6 +1507,12 @@ static void server_main_loop(int remaining_children_to_start)
                 child_fatal = 1;
                 return;
             }
+            else if (processed_status == APEXIT_CHILDSICK) {
+                /* tell perform_idle_server_maintenance to check into this
+                 * on the next timer pop
+                 */
+                sick_child_detected = 1;
+            }
             /* non-fatal death... note that it's gone in the scoreboard. */
             child_slot = find_child_by_pid(&pid);
             if (child_slot >= 0) {