From: Jeff Trawick Date: Fri, 17 Aug 2012 15:45:28 +0000 (+0000) Subject: mpm_event, mpm_worker: Remain active amidst prevalent child process X-Git-Tag: 2.2.23~39 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=55d785f174aa65d6955608a3bcc9f14f186aa418;p=thirdparty%2Fapache%2Fhttpd.git mpm_event, mpm_worker: Remain active amidst prevalent child process resource shortages. Submitted by: trawick Reviewed by: wrowe, rjung git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.2.x@1374330 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/CHANGES b/CHANGES index eafa09bbcfc..6941b58c14f 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,9 @@ Changes with Apache 2.2.23 envvars: Fix insecure handling of LD_LIBRARY_PATH that could lead to the current working directory to be searched for DSOs. [Stefan Fritsch] + *) mpm_event, mpm_worker: Remain active amidst prevalent child process + resource shortages. [Jeff Trawick] + *) mod_proxy_balancer: Restore balancing after a failed worker has recovered when using lbmethod_bybusyness. PR 48735. [Jeff Trawick] diff --git a/STATUS b/STATUS index 00d5eea4cd1..7801ecb384c 100644 --- a/STATUS +++ b/STATUS @@ -112,14 +112,6 @@ PATCHES ACCEPTED TO BACKPORT FROM TRUNK: 2.2.x patch: http://people.apache.org/~jim/patches/mod_proxy_ajp-erroroverride.patch +1: igalic, jim, rpluem - * mpm_event, mpm_worker: Remain active amidst prevalent child process - resource shortages. - trunk patch: http://svn.apache.org/viewvc?rev=1363557&view=rev - trunk patch: http://svn.apache.org/viewvc?rev=1364601&view=rev - 2.4.x patch: trunk patch works - 2.2.x patch: http://people.apache.org/~trawick/r1363557_1364601_to_2.2.x.txt - +1: trawick, wrowe, rjung - PATCHES PROPOSED TO BACKPORT FROM TRUNK: [ New proposals should be added at the end of the list ] diff --git a/server/mpm/experimental/event/event.c b/server/mpm/experimental/event/event.c index bbee899ea11..82efd0cd651 100644 --- a/server/mpm/experimental/event/event.c +++ b/server/mpm/experimental/event/event.c @@ -148,6 +148,7 @@ static int ap_daemons_limit = 0; static int server_limit = DEFAULT_SERVER_LIMIT; static int first_server_limit = 0; static int thread_limit = DEFAULT_THREAD_LIMIT; +static int had_healthy_child = 0; static int first_thread_limit = 0; static int changed_limit_at_restart; static int dying = 0; @@ -1657,6 +1658,7 @@ static void perform_idle_server_maintenance(void) int any_dying_threads = 0; int any_dead_threads = 0; int all_dead_threads = 1; + int child_threads_active = 0; if (i >= ap_max_daemons_limit && totally_free_length == idle_spawn_rate) @@ -1686,10 +1688,11 @@ static void perform_idle_server_maintenance(void) ++idle_thread_count; } if (status >= SERVER_READY && status < SERVER_GRACEFUL) { - ++active_thread_count; + ++child_threads_active; } } } + active_thread_count += child_threads_active; if (any_dead_threads && totally_free_length < idle_spawn_rate && free_length < MAX_SPAWN_RATE @@ -1713,6 +1716,9 @@ static void perform_idle_server_maintenance(void) } ++free_length; } + else if (child_threads_active == ap_threads_per_child) { + had_healthy_child = 1; + } /* XXX if (!ps->quiescing) is probably more reliable GLA */ if (!any_dying_threads) { last_non_dead = i; @@ -1721,21 +1727,23 @@ static void perform_idle_server_maintenance(void) } if (sick_child_detected) { - if (active_thread_count > 0) { - /* some child processes appear to be working. don't kill the - * whole server. + if (had_healthy_child) { + /* Assume this is a transient error, even though it may not be. Leave + * the server up in case it is able to serve some requests or the + * problem will be resolved. */ sick_child_detected = 0; } else { - /* looks like a basket case. give up. + /* looks like a basket case, as no child ever fully initialized; give up. */ shutdown_pending = 1; child_fatal = 1; ap_log_error(APLOG_MARK, APLOG_ALERT, 0, ap_server_conf, - "No active workers found..." - " Apache is exiting!"); + "A resource shortage or other unrecoverable failure " + "was encountered before any child process initialized " + "successfully... httpd is exiting!"); /* the child already logged the failure details */ return; } @@ -2198,6 +2206,7 @@ static int worker_pre_config(apr_pool_t * pconf, apr_pool_t * plog, ap_pid_fname = DEFAULT_PIDLOG; ap_lock_fname = DEFAULT_LOCKFILE; ap_max_requests_per_child = DEFAULT_MAX_REQUESTS_PER_CHILD; + had_healthy_child = 0; ap_extended_status = 0; #ifdef AP_MPM_WANT_SET_MAX_MEM_FREE ap_max_mem_free = APR_ALLOCATOR_MAX_FREE_UNLIMITED; diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c index f13f639426f..edf33fc5c45 100644 --- a/server/mpm/worker/worker.c +++ b/server/mpm/worker/worker.c @@ -124,6 +124,7 @@ static int first_server_limit = 0; static int thread_limit = DEFAULT_THREAD_LIMIT; static int first_thread_limit = 0; static int changed_limit_at_restart; +static int had_healthy_child = 0; static int dying = 0; static int workers_may_exit = 0; static int start_thread_may_exit = 0; @@ -1419,6 +1420,7 @@ static void perform_idle_server_maintenance(void) int any_dying_threads = 0; int any_dead_threads = 0; int all_dead_threads = 1; + int child_threads_active = 0; if (i >= ap_max_daemons_limit && totally_free_length == idle_spawn_rate) break; @@ -1449,10 +1451,11 @@ static void perform_idle_server_maintenance(void) ++idle_thread_count; } if (status >= SERVER_READY && status < SERVER_GRACEFUL) { - ++active_thread_count; + ++child_threads_active; } } } + active_thread_count += child_threads_active; if (any_dead_threads && totally_free_length < idle_spawn_rate && free_length < MAX_SPAWN_RATE && (!ps->pid /* no process in the slot */ @@ -1475,6 +1478,9 @@ static void perform_idle_server_maintenance(void) } ++free_length; } + else if (child_threads_active == ap_threads_per_child) { + had_healthy_child = 1; + } /* XXX if (!ps->quiescing) is probably more reliable GLA */ if (!any_dying_threads) { last_non_dead = i; @@ -1483,21 +1489,23 @@ static void perform_idle_server_maintenance(void) } if (sick_child_detected) { - if (active_thread_count > 0) { - /* some child processes appear to be working. don't kill the - * whole server. + if (had_healthy_child) { + /* Assume this is a transient error, even though it may not be. Leave + * the server up in case it is able to serve some requests or the + * problem will be resolved. */ sick_child_detected = 0; } else { - /* looks like a basket case. give up. + /* looks like a basket case, as no child ever fully initialized; give up. */ shutdown_pending = 1; child_fatal = 1; ap_log_error(APLOG_MARK, APLOG_ALERT, 0, ap_server_conf, - "No active workers found..." - " Apache is exiting!"); + "A resource shortage or other unrecoverable failure " + "was encountered before any child process initialized " + "successfully... httpd is exiting!"); /* the child already logged the failure details */ return; } @@ -2011,6 +2019,7 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog, ap_pid_fname = DEFAULT_PIDLOG; ap_lock_fname = DEFAULT_LOCKFILE; ap_max_requests_per_child = DEFAULT_MAX_REQUESTS_PER_CHILD; + had_healthy_child = 0; ap_extended_status = 0; #ifdef AP_MPM_WANT_SET_MAX_MEM_FREE ap_max_mem_free = APR_ALLOCATOR_MAX_FREE_UNLIMITED;