From: Graham Leggett Date: Mon, 13 Dec 2021 10:43:53 +0000 (+0000) Subject: Backport: X-Git-Tag: candidate-2.4.52-rc1~62 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4819c58566d2274b95d56b16fe51aa80461633da;p=thirdparty%2Fapache%2Fhttpd.git Backport: *) mpm_event: Restart stopping of idle children after a load peak. PR 65626. trunk patch: http://svn.apache.org/r1894285 http://svn.apache.org/r1894286 http://svn.apache.org/r1894291 http://svn.apache.org/r1895550 http://svn.apache.org/r1895553 http://svn.apache.org/r1895630 backport PR: https://github.com/apache/httpd/pull/276 2.4.x patch: https://patch-diff.githubusercontent.com/raw/apache/httpd/pull/276.patch +1: ylavic, rpluem, minfrin ylavic: updated with r1894291 for correctness of perform_idle_server_maintenance() w.r.t. num_buckets > 1 and ease merging of r1895553, with r1895550 for correctness of active_daemons used in r1895553 and r1895630. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1895871 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/CHANGES b/CHANGES index 3046315d50a..9961b1fe84b 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,9 @@ -*- coding: utf-8 -*- Changes with Apache 2.4.52 + *) mpm_event: Restart stopping of idle children after a load peak. PR 65626. + [Yann Ylavic, Ruediger Pluem] + *) mod_http2: fixes 2 regressions in server limit handling. 1. When reaching server limits, such as MaxRequestsPerChild, the HTTP/2 connection send a GOAWAY frame much too early on new diff --git a/STATUS b/STATUS index 28f0a5a6f2d..89fb3fa5fe3 100644 --- a/STATUS +++ b/STATUS @@ -145,19 +145,6 @@ RELEASE SHOWSTOPPERS: PATCHES ACCEPTED TO BACKPORT FROM TRUNK: [ start all new proposals below, under PATCHES PROPOSED. ] - *) mpm_event: Restart stopping of idle children after a load peak. PR 65626. - trunk patch: http://svn.apache.org/r1894285 - http://svn.apache.org/r1894286 - http://svn.apache.org/r1894291 - http://svn.apache.org/r1895550 - http://svn.apache.org/r1895553 - http://svn.apache.org/r1895630 - backport PR: https://github.com/apache/httpd/pull/276 - 2.4.x patch: https://patch-diff.githubusercontent.com/raw/apache/httpd/pull/276.patch - +1: ylavic, rpluem, minfrin - ylavic: updated with r1894291 for correctness of perform_idle_server_maintenance() - w.r.t. num_buckets > 1 and ease merging of r1895553, with r1895550 for - correctness of active_daemons used in r1895553 and r1895630. PATCHES PROPOSED TO BACKPORT FROM TRUNK: diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c index dddff35da06..c2d53ff0735 100644 --- a/server/mpm/event/event.c +++ b/server/mpm/event/event.c @@ -167,8 +167,6 @@ static int ap_daemons_to_start = 0; /* StartServers */ static int min_spare_threads = 0; /* MinSpareThreads */ static int max_spare_threads = 0; /* MaxSpareThreads */ static int active_daemons_limit = 0; /* MaxRequestWorkers / ThreadsPerChild */ -static int active_daemons = 0; /* workers that still active, i.e. are - not shutting down gracefully */ static int max_workers = 0; /* MaxRequestWorkers */ static int server_limit = 0; /* ServerLimit */ static int thread_limit = 0; /* ThreadLimit */ @@ -389,7 +387,10 @@ typedef struct event_retained_data { * Not kept up-to-date when shutdown is pending. */ int total_daemons; - + /* + * Workers that still active, i.e. are not shutting down gracefully. + */ + int active_daemons; /* * idle_spawn_rate is the number of children that will be spawned on the * next maintenance cycle if there aren't enough idle servers. It is @@ -2759,7 +2760,7 @@ static int make_child(server_rec * s, int slot, int bucket) ap_scoreboard_image->parent[slot].quiescing = 0; ap_scoreboard_image->parent[slot].not_accepting = 0; event_note_child_started(slot, pid); - active_daemons++; + retained->active_daemons++; retained->total_daemons++; return 0; } @@ -2780,41 +2781,39 @@ static void startup_children(int number_to_start) } } -static void perform_idle_server_maintenance(int child_bucket, int num_buckets) +static void perform_idle_server_maintenance(int child_bucket) { - int i, j; + int num_buckets = retained->mpm->num_buckets; int idle_thread_count = 0; - worker_score *ws; process_score *ps; int free_length = 0; int free_slots[MAX_SPAWN_RATE]; int last_non_dead = -1; int active_thread_count = 0; + int i, j; for (i = 0; i < server_limit; ++i) { - /* Initialization to satisfy the compiler. It doesn't know - * that threads_per_child is always > 0 */ - int status = SERVER_DEAD; - int child_threads_active = 0; - int bucket = i % num_buckets; - + if (num_buckets > 1 && (i % num_buckets) != child_bucket) { + /* We only care about child_bucket in this call */ + continue; + } if (i >= retained->max_daemons_limit && free_length == retained->idle_spawn_rate[child_bucket]) { /* short cut if all active processes have been examined and * enough empty scoreboard slots have been found */ - break; } + ps = &ap_scoreboard_image->parent[i]; if (ps->pid != 0) { + int child_threads_active = 0; if (ps->quiescing == 1) { ps->quiescing = 2; - active_daemons--; + retained->active_daemons--; } for (j = 0; j < threads_per_child; j++) { - ws = &ap_scoreboard_image->servers[i][j]; - status = ws->status; + int status = ap_scoreboard_image->servers[i][j].status; /* We consider a starting server as idle because we started it * at least a cycle ago, and if it still hasn't finished starting @@ -2823,26 +2822,26 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) * This depends on the ordering of SERVER_READY and SERVER_STARTING. */ if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting - && ps->generation == retained->mpm->my_generation - && bucket == child_bucket) - { + && ps->generation == retained->mpm->my_generation) { ++idle_thread_count; } if (status >= SERVER_READY && status < SERVER_GRACEFUL) { ++child_threads_active; } } + active_thread_count += child_threads_active; + if (child_threads_active == threads_per_child) { + had_healthy_child = 1; + } last_non_dead = i; } - active_thread_count += child_threads_active; - if (!ps->pid - && bucket == child_bucket - && free_length < retained->idle_spawn_rate[child_bucket]) + else if (free_length < retained->idle_spawn_rate[child_bucket]) { free_slots[free_length++] = i; - else if (child_threads_active == threads_per_child) - had_healthy_child = 1; + } } + retained->max_daemons_limit = last_non_dead + 1; + if (retained->sick_child_detected) { if (had_healthy_child) { /* Assume this is a transient error, even though it may not be. Leave @@ -2851,6 +2850,10 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) */ retained->sick_child_detected = 0; } + else if (child_bucket < num_buckets - 1) { + /* check for had_healthy_child up to the last child bucket */ + return; + } else { /* looks like a basket case, as no child ever fully initialized; give up. */ @@ -2866,18 +2869,16 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) } } - retained->max_daemons_limit = last_non_dead + 1; - - if (idle_thread_count > max_spare_threads / num_buckets) - { + if (idle_thread_count > max_spare_threads / num_buckets) { /* * Child processes that we ask to shut down won't die immediately * but may stay around for a long time when they finish their * requests. If the server load changes many times, many such * gracefully finishing processes may accumulate, filling up the * scoreboard. To avoid running out of scoreboard entries, we - * don't shut down more processes when the total number of processes - * is high. + * don't shut down more processes if there are stopping ones + * already (i.e. active_daemons != total_daemons) and not enough + * slack space in the scoreboard for a graceful restart. * * XXX It would be nice if we could * XXX - kill processes without keepalive connections first @@ -2885,22 +2886,29 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) * XXX depending on server load, later be able to resurrect them * or kill them */ - if (retained->total_daemons <= active_daemons_limit && - retained->total_daemons < server_limit) { - /* Kill off one child */ + int do_kill = (retained->active_daemons == retained->total_daemons + || (server_limit - retained->total_daemons > + active_daemons_limit)); + ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, + "%shutting down one child: " + "active daemons %d / active limit %d / " + "total daemons %d / ServerLimit %d / " + "idle threads %d / max workers %d", + (do_kill) ? "S" : "Not s", + retained->active_daemons, active_daemons_limit, + retained->total_daemons, server_limit, + idle_thread_count, max_workers); + if (do_kill) { ap_mpm_podx_signal(all_buckets[child_bucket].pod, AP_MPM_PODX_GRACEFUL); - retained->idle_spawn_rate[child_bucket] = 1; - } else { - ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, - "Not shutting down child: total daemons %d / " - "active limit %d / ServerLimit %d", - retained->total_daemons, active_daemons_limit, - server_limit); } + else { + /* Wait for dying daemon(s) to exit */ + } + retained->idle_spawn_rate[child_bucket] = 1; } else if (idle_thread_count < min_spare_threads / num_buckets) { - if (active_thread_count >= max_workers) { + if (active_thread_count >= max_workers / num_buckets) { if (0 == idle_thread_count) { if (!retained->maxclients_reported) { ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484) @@ -2931,16 +2939,16 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) if (free_length > retained->idle_spawn_rate[child_bucket]) { free_length = retained->idle_spawn_rate[child_bucket]; } - if (free_length + active_daemons > active_daemons_limit) { - if (active_daemons < active_daemons_limit) { - free_length = active_daemons_limit - active_daemons; + if (free_length + retained->active_daemons > active_daemons_limit) { + if (retained->active_daemons < active_daemons_limit) { + free_length = active_daemons_limit - retained->active_daemons; } else { ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, "server is at active daemons limit, spawning " "of %d children cancelled: %d/%d active, " "rate %d", free_length, - active_daemons, active_daemons_limit, + retained->active_daemons, active_daemons_limit, retained->idle_spawn_rate[child_bucket]); free_length = 0; } @@ -2953,14 +2961,14 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) "spawning %d children, there are around %d idle " "threads, %d active children, and %d children " "that are shutting down", free_length, - idle_thread_count, active_daemons, + idle_thread_count, retained->active_daemons, retained->total_daemons); } for (i = 0; i < free_length; ++i) { ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, "Spawning new child: slot %d active / " "total daemons: %d/%d", - free_slots[i], active_daemons, + free_slots[i], retained->active_daemons, retained->total_daemons); make_child(ap_server_conf, free_slots[i], child_bucket); } @@ -2981,8 +2989,9 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) } } -static void server_main_loop(int remaining_children_to_start, int num_buckets) +static void server_main_loop(int remaining_children_to_start) { + int num_buckets = retained->mpm->num_buckets; int child_slot; apr_exit_why_e exitwhy; int status, processed_status; @@ -3037,7 +3046,7 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets) event_note_child_killed(child_slot, 0, 0); ps = &ap_scoreboard_image->parent[child_slot]; if (!ps->quiescing) - active_daemons--; + retained->active_daemons--; ps->quiescing = 0; /* NOTE: We don't dec in the (child_slot < 0) case! */ retained->total_daemons--; @@ -3091,7 +3100,7 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets) } for (i = 0; i < num_buckets; i++) { - perform_idle_server_maintenance(i, num_buckets); + perform_idle_server_maintenance(i); } } } @@ -3168,7 +3177,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) retained->mpm->mpm_state = AP_MPMQ_RUNNING; - server_main_loop(remaining_children_to_start, num_buckets); + server_main_loop(remaining_children_to_start); retained->mpm->mpm_state = AP_MPMQ_STOPPING; if (retained->mpm->shutdown_pending && retained->mpm->is_ungraceful) { @@ -3298,8 +3307,6 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) "SIGHUP received. Attempting to restart"); } - active_daemons = 0; - return OK; } @@ -3466,7 +3473,6 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, if (!retained) { retained = ap_retained_data_create(userdata_key, sizeof(*retained)); retained->mpm = ap_unixd_mpm_get_retained_data(); - retained->max_daemons_limit = -1; if (retained->mpm->module_loads) { test_atomics = 1; } diff --git a/server/mpm/mpmt_os2/mpmt_os2.c b/server/mpm/mpmt_os2/mpmt_os2.c index 77fc3e7f05c..b3adb03adba 100644 --- a/server/mpm/mpmt_os2/mpmt_os2.c +++ b/server/mpm/mpmt_os2/mpmt_os2.c @@ -79,7 +79,7 @@ int ap_min_spare_threads = 0; int ap_max_spare_threads = 0; /* Keep track of a few interesting statistics */ -int ap_max_daemons_limit = -1; +int ap_max_daemons_limit = 0; /* volatile just in case */ static int volatile shutdown_pending; @@ -344,8 +344,8 @@ static void spawn_child(int slot) "error spawning child, slot %d", slot); } - if (ap_max_daemons_limit < slot) { - ap_max_daemons_limit = slot; + if (slot + 1 > ap_max_daemons_limit) { + ap_max_daemons_limit = slot + 1; } ap_scoreboard_image->parent[slot].pid = proc_rc.codeTerminate; diff --git a/server/mpm/prefork/prefork.c b/server/mpm/prefork/prefork.c index 1cbe542a6d4..0bdfd5ae743 100644 --- a/server/mpm/prefork/prefork.c +++ b/server/mpm/prefork/prefork.c @@ -1288,7 +1288,6 @@ static int prefork_pre_config(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp if (!retained) { retained = ap_retained_data_create(userdata_key, sizeof(*retained)); retained->mpm = ap_unixd_mpm_get_retained_data(); - retained->max_daemons_limit = -1; retained->idle_spawn_rate = 1; } retained->mpm->mpm_state = AP_MPMQ_STARTING; diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c index bd56f618bd8..5cf8ead94e1 100644 --- a/server/mpm/worker/worker.c +++ b/server/mpm/worker/worker.c @@ -1365,11 +1365,10 @@ static void startup_children(int number_to_start) } } -static void perform_idle_server_maintenance(int child_bucket, int num_buckets) +static void perform_idle_server_maintenance(int child_bucket) { - int i, j; + int num_buckets = retained->mpm->num_buckets; int idle_thread_count; - worker_score *ws; process_score *ps; int free_length; int totally_free_length = 0; @@ -1377,6 +1376,7 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) int last_non_dead; int total_non_dead; int active_thread_count = 0; + int i, j; /* initialize the free_list */ free_length = 0; @@ -1388,13 +1388,15 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) for (i = 0; i < ap_daemons_limit; ++i) { /* Initialization to satisfy the compiler. It doesn't know * that threads_per_child is always > 0 */ - int status = SERVER_DEAD; int any_dying_threads = 0; int any_dead_threads = 0; int all_dead_threads = 1; int child_threads_active = 0; - int bucket = i % num_buckets; + if (num_buckets > 1 && (i % num_buckets) != child_bucket) { + /* We only care about child_bucket in this call */ + continue; + } if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate[child_bucket]) { /* short cut if all active processes have been examined and @@ -1404,8 +1406,7 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) } ps = &ap_scoreboard_image->parent[i]; for (j = 0; j < threads_per_child; j++) { - ws = &ap_scoreboard_image->servers[i][j]; - status = ws->status; + int status = ap_scoreboard_image->servers[i][j].status; /* XXX any_dying_threads is probably no longer needed GLA */ any_dying_threads = any_dying_threads || @@ -1425,8 +1426,7 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) loop if no pid? not much else matters */ if (status <= SERVER_READY && !ps->quiescing && - ps->generation == retained->mpm->my_generation && - bucket == child_bucket) { + ps->generation == retained->mpm->my_generation) { ++idle_thread_count; } if (status >= SERVER_READY && status < SERVER_GRACEFUL) { @@ -1436,7 +1436,6 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) } active_thread_count += child_threads_active; if (any_dead_threads - && bucket == child_bucket && totally_free_length < retained->idle_spawn_rate[child_bucket] && free_length < MAX_SPAWN_RATE / num_buckets && (!ps->pid /* no process in the slot */ @@ -1464,11 +1463,15 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) } /* XXX if (!ps->quiescing) is probably more reliable GLA */ if (!any_dying_threads) { - last_non_dead = i; ++total_non_dead; } + if (ps->pid != 0) { + last_non_dead = i; + } } + retained->max_daemons_limit = last_non_dead + 1; + if (retained->sick_child_detected) { if (had_healthy_child) { /* Assume this is a transient error, even though it may not be. Leave @@ -1477,6 +1480,10 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) */ retained->sick_child_detected = 0; } + else if (child_bucket < num_buckets - 1) { + /* check for had_healthy_child up to the last child bucket */ + return; + } else { /* looks like a basket case, as no child ever fully initialized; give up. */ @@ -1492,8 +1499,6 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) } } - retained->max_daemons_limit = last_non_dead + 1; - if (idle_thread_count > max_spare_threads / num_buckets) { /* Kill off one child */ ap_mpm_podx_signal(all_buckets[child_bucket].pod, @@ -1504,7 +1509,7 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) /* terminate the free list */ if (free_length == 0) { /* scoreboard is full, can't fork */ - if (active_thread_count >= ap_daemons_limit * threads_per_child) { + if (active_thread_count >= max_workers / num_buckets) { /* no threads are "inactive" - starting, stopping, etc. */ /* have we reached MaxRequestWorkers, or just getting close? */ if (0 == idle_thread_count) { @@ -1567,8 +1572,9 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) } } -static void server_main_loop(int remaining_children_to_start, int num_buckets) +static void server_main_loop(int remaining_children_to_start) { + int num_buckets = retained->mpm->num_buckets; ap_generation_t old_gen; int child_slot; apr_exit_why_e exitwhy; @@ -1682,7 +1688,7 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets) } for (i = 0; i < num_buckets; i++) { - perform_idle_server_maintenance(i, num_buckets); + perform_idle_server_maintenance(i); } } } @@ -1764,7 +1770,7 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) apr_proc_mutex_defname()); retained->mpm->mpm_state = AP_MPMQ_RUNNING; - server_main_loop(remaining_children_to_start, num_buckets); + server_main_loop(remaining_children_to_start); retained->mpm->mpm_state = AP_MPMQ_STOPPING; if (retained->mpm->shutdown_pending && retained->mpm->is_ungraceful) { @@ -2019,7 +2025,6 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog, if (!retained) { retained = ap_retained_data_create(userdata_key, sizeof(*retained)); retained->mpm = ap_unixd_mpm_get_retained_data(); - retained->max_daemons_limit = -1; } retained->mpm->mpm_state = AP_MPMQ_STARTING; if (retained->mpm->baton != retained) {