From: Stefan Eissing Date: Thu, 15 Jul 2021 08:29:19 +0000 (+0000) Subject: * mod_http2: X-Git-Tag: 2.5.0-alpha2-ci-test-only~894 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2aa1c1415a733a29bf7e501805fb4329c7752788;p=thirdparty%2Fapache%2Fhttpd.git * mod_http2: - Added a timeout to h2 worker cleanup to exit latest after 5 seconds of waiting on idle workers to terminate. This happens after all connections have been processed. a WARNING is logged in case workers lagged behind. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1891566 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/changes-entries/h2_workers_dynamic.txt b/changes-entries/h2_workers_dynamic.txt index 1d3115c7249..2de102ea21a 100644 --- a/changes-entries/h2_workers_dynamic.txt +++ b/changes-entries/h2_workers_dynamic.txt @@ -6,6 +6,9 @@ create H2MinWorkers threads and add up to H2MaxWorkers when needed. These additional workers time out when idle after H2MaxWorkerIdleSeconds and disappear again. + - Added a timeout to h2 worker cleanup to exit latest after 5 seconds of + waiting on idle workers to terminate. This happens after all connections + have been processed. a WARNING is logged in case workers lagged behind. - When the shutdown of a child is detected (e.g. graceful shutdown), the module will terminate all idle workers above H2MinWorkers right away. This detection currently only happens when a HTTP/2 connection is active. diff --git a/modules/http2/h2_session.c b/modules/http2/h2_session.c index 4cc2c96be30..453a98fd36c 100644 --- a/modules/http2/h2_session.c +++ b/modules/http2/h2_session.c @@ -774,7 +774,7 @@ static apr_status_t session_cleanup(h2_session *session, const char *trigger) * connection when sending the next request, this has the effect * that at least this one request will fail. */ - ap_log_cerror(APLOG_MARK, APLOG_WARNING, 0, c, + ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, H2_SSSN_LOG(APLOGNO(03199), session, "connection disappeared without proper " "goodbye, clients will be confused, should not happen")); @@ -799,13 +799,17 @@ static apr_status_t session_pool_cleanup(void *data) h2_session *session; if ((session = h2_ctx_get_session(c))) { + int mpm_state = 0; + int level; + + ap_mpm_query(AP_MPMQ_MPM_STATE, &mpm_state); + level = (AP_MPMQ_STOPPING == mpm_state)? APLOG_DEBUG : APLOG_WARNING; /* if the session is still there, now is the last chance * to perform cleanup. Normally, cleanup should have happened - * earlier in the connection pre_close. Main reason is that - * any ongoing requests on secondary connections might still access - * data which has, at this time, already been freed. An example - * is mod_ssl that uses request hooks. */ - ap_log_cerror(APLOG_MARK, APLOG_WARNING, 0, c, + * earlier in the connection pre_close. + * However, when the server is stopping, it may shutdown connections + * without running the pre_close hooks. Do not want about that. */ + ap_log_cerror(APLOG_MARK, level, 0, c, H2_SSSN_LOG(APLOGNO(10020), session, "session cleanup triggered by pool cleanup. " "this should have happened earlier already.")); diff --git a/modules/http2/h2_version.h b/modules/http2/h2_version.h index 38a21904449..785a172386f 100644 --- a/modules/http2/h2_version.h +++ b/modules/http2/h2_version.h @@ -27,7 +27,7 @@ * @macro * Version number of the http2 module as c string */ -#define MOD_HTTP2_VERSION "1.15.21" +#define MOD_HTTP2_VERSION "1.15.22" /** * @macro @@ -35,7 +35,7 @@ * release. This is a 24 bit number with 8 bits for major number, 8 bits * for minor and 8 bits for patch. Version 1.2.3 becomes 0x010203. */ -#define MOD_HTTP2_VERSION_NUM 0x010f15 +#define MOD_HTTP2_VERSION_NUM 0x010f16 #endif /* mod_h2_h2_version_h */ diff --git a/modules/http2/h2_workers.c b/modules/http2/h2_workers.c index 7832ac0e383..601c8edb4c8 100644 --- a/modules/http2/h2_workers.c +++ b/modules/http2/h2_workers.c @@ -94,7 +94,7 @@ static apr_status_t activate_slot(h2_workers *workers, h2_slot *slot) if (rv != APR_SUCCESS) goto cleanup; } - ap_log_error(APLOG_MARK, APLOG_WARNING, 0, workers->s, + ap_log_error(APLOG_MARK, APLOG_TRACE3, 0, workers->s, "h2_workers: new thread for slot %d", slot->id); /* thread will either immediately start work or add itself @@ -319,16 +319,45 @@ static void workers_abort_idle(h2_workers *workers) static apr_status_t workers_pool_cleanup(void *data) { h2_workers *workers = data; + apr_time_t timout = apr_time_from_sec(1); + apr_status_t rv; + int i, n = 5; + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s, + "h2_workers: cleanup %d workers idling", + (int)apr_atomic_read32(&workers->worker_count)); workers_abort_idle(workers); - /* wait for all the workers to become zombies and join them */ + /* wait for all the workers to become zombies and join them. + * this gets called after the mpm shuts down and all connections + * have either been handled (graceful) or we are forced exiting + * (ungrateful). Either way, we show limited patience. */ apr_thread_mutex_lock(workers->lock); - if (apr_atomic_read32(&workers->worker_count)) { - apr_thread_cond_wait(workers->all_done, workers->lock); + for (i = 0; i < n; ++i) { + if (!apr_atomic_read32(&workers->worker_count)) { + break; + } + rv = apr_thread_cond_timedwait(workers->all_done, workers->lock, timout); + if (APR_TIMEUP == rv) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s, + APLOGNO() "h2_workers: waiting for idle workers to close, " + "still seeing %d workers living", + apr_atomic_read32(&workers->worker_count)); + continue; + } + } + if (i >= n) { + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, workers->s, + APLOGNO() "h2_workers: cleanup, %d idle workers " + "did not exit after %d seconds.", + apr_atomic_read32(&workers->worker_count), i); } apr_thread_mutex_unlock(workers->lock); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s, + "h2_workers: cleanup all workers terminated"); join_zombies(workers); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s, + "h2_workers: cleanup zombie workers joined"); return APR_SUCCESS; } @@ -363,6 +392,10 @@ h2_workers *h2_workers_create(server_rec *s, apr_pool_t *pchild, workers->max_workers = max_workers; workers->max_idle_duration = apr_time_from_sec((idle_secs > 0)? idle_secs : 10); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s, + "h2_workers: created with min=%d max=%d idle_timeout=%d sec", + workers->min_workers, workers->max_workers, + (int)apr_time_sec(workers->max_idle_duration)); /* FIXME: the fifo set we use here has limited capacity. Once the * set is full, connections with new requests do a wait. Unfortunately, * we have optimizations in place there that makes such waiting "unfair"