]> git.ipfire.org Git - thirdparty/apache/httpd.git/commitdiff
Merge /httpd/httpd/trunk:r1899858,1899865,1899884,1900991
authorStefan Eissing <icing@apache.org>
Wed, 25 May 2022 12:09:36 +0000 (12:09 +0000)
committerStefan Eissing <icing@apache.org>
Wed, 25 May 2022 12:09:36 +0000 (12:09 +0000)
  *) mpm_event, mpm_worker: Handle children killed pathologically.

git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1901234 13f79535-47bb-0310-9956-ffa450edef68

changes-entries/event_early_killed_children.txt [new file with mode: 0644]
server/mpm/event/event.c
server/mpm/worker/worker.c

diff --git a/changes-entries/event_early_killed_children.txt b/changes-entries/event_early_killed_children.txt
new file mode 100644 (file)
index 0000000..db968c6
--- /dev/null
@@ -0,0 +1,2 @@
+  *) MPM event: Restart chilren processes killed before idle maintenance.
+     PR 65769.  [Yann Ylavic, Ruediger Pluem]
index ff260ef9489f2a5eaa0a9b007bf0c0f15363b541..3672f4496344d9276c3b49a80f2559cbdf6e76a4 100644 (file)
@@ -3058,6 +3058,7 @@ static void server_main_loop(int remaining_children_to_start)
 {
     int num_buckets = retained->mpm->num_buckets;
     int max_daemon_used = 0;
+    int successive_kills = 0;
     int child_slot;
     apr_exit_why_e exitwhy;
     int status, processed_status;
@@ -3136,11 +3137,30 @@ static void server_main_loop(int remaining_children_to_start)
             /* Don't perform idle maintenance when a child dies,
              * only do it when there's a timeout.  Remember only a
              * finite number of children can die, and it's pretty
-             * pathological for a lot to die suddenly.
+             * pathological for a lot to die suddenly.  If a child is
+             * killed by a signal (faulting) we want to restart it ASAP
+             * though, up to 3 successive faults or we stop this until
+             * a timeout happens again (to avoid the flood of fork()ed
+             * processes that keep being killed early).
              */
-            continue;
+            if (child_slot < 0 || !APR_PROC_CHECK_SIGNALED(exitwhy)) {
+                continue;
+            }
+            if (++successive_kills >= 3) {
+                if (successive_kills % 10 == 3) {
+                    ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
+                                 ap_server_conf, APLOGNO(10392)
+                                 "children are killed successively!");
+                }
+                continue;
+            }
+            ++remaining_children_to_start;
+        }
+        else {
+            successive_kills = 0;
         }
-        else if (remaining_children_to_start) {
+
+        if (remaining_children_to_start) {
             /* we hit a 1 second timeout in which none of the previous
              * generation of children needed to be reaped... so assume
              * they're all done, and pick up the slack if any is left.
index 7e3a5542406896d7e6b54397a66b3512c145e69b..0907db3281cc4c071dfefe26786f4fa36b6a9c24 100644 (file)
@@ -1590,6 +1590,7 @@ static void perform_idle_server_maintenance(int child_bucket)
 static void server_main_loop(int remaining_children_to_start)
 {
     int num_buckets = retained->mpm->num_buckets;
+    int successive_kills = 0;
     ap_generation_t old_gen;
     int child_slot;
     apr_exit_why_e exitwhy;
@@ -1684,11 +1685,30 @@ static void server_main_loop(int remaining_children_to_start)
             /* Don't perform idle maintenance when a child dies,
              * only do it when there's a timeout.  Remember only a
              * finite number of children can die, and it's pretty
-             * pathological for a lot to die suddenly.
+             * pathological for a lot to die suddenly.  If a child is
+             * killed by a signal (faulting) we want to restart it ASAP
+             * though, up to 3 successive faults or we stop this until
+             * a timeout happens again (to avoid the flood of fork()ed
+             * processes that keep being killed early).
              */
-            continue;
+            if (child_slot < 0 || !APR_PROC_CHECK_SIGNALED(exitwhy)) {
+                continue;
+            }
+            if (++successive_kills >= 3) {
+                if (successive_kills % 10 == 3) {
+                    ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
+                                 ap_server_conf, APLOGNO(10392)
+                                 "children are killed successively!");
+                }
+                continue;
+            }
+            ++remaining_children_to_start;
+        }
+        else {
+            successive_kills = 0;
         }
-        else if (remaining_children_to_start) {
+
+        if (remaining_children_to_start) {
             /* we hit a 1 second timeout in which none of the previous
              * generation of children needed to be reaped... so assume
              * they're all done, and pick up the slack if any is left.