From: Stefan Eissing <icing@apache.org>
Date: Thu, 15 Jul 2021 08:29:19 +0000 (+0000)
Subject:  * mod_http2:
X-Git-Tag: 2.5.0-alpha2-ci-test-only~894
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2aa1c1415a733a29bf7e501805fb4329c7752788;p=thirdparty%2Fapache%2Fhttpd.git

 * mod_http2:
   - Added a timeout to h2 worker cleanup to exit latest after 5 seconds of
     waiting on idle workers to terminate. This happens after all connections
     have been processed. a WARNING is logged in case workers lagged behind.


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1891566 13f79535-47bb-0310-9956-ffa450edef68
---

diff --git a/changes-entries/h2_workers_dynamic.txt b/changes-entries/h2_workers_dynamic.txt
index 1d3115c7249..2de102ea21a 100644
--- a/changes-entries/h2_workers_dynamic.txt
+++ b/changes-entries/h2_workers_dynamic.txt
@@ -6,6 +6,9 @@
      create H2MinWorkers threads and add up to H2MaxWorkers when needed. These
      additional workers time out when idle after H2MaxWorkerIdleSeconds and
      disappear again.
+   - Added a timeout to h2 worker cleanup to exit latest after 5 seconds of
+     waiting on idle workers to terminate. This happens after all connections
+     have been processed. a WARNING is logged in case workers lagged behind.
    - When the shutdown of a child is detected (e.g. graceful shutdown), the
      module will terminate all idle workers above H2MinWorkers right away.
      This detection currently only happens when a HTTP/2 connection is active.
diff --git a/modules/http2/h2_session.c b/modules/http2/h2_session.c
index 4cc2c96be30..453a98fd36c 100644
--- a/modules/http2/h2_session.c
+++ b/modules/http2/h2_session.c
@@ -774,7 +774,7 @@ static apr_status_t session_cleanup(h2_session *session, const char *trigger)
          * connection when sending the next request, this has the effect
          * that at least this one request will fail.
          */
-        ap_log_cerror(APLOG_MARK, APLOG_WARNING, 0, c,
+        ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c,
                       H2_SSSN_LOG(APLOGNO(03199), session, 
                       "connection disappeared without proper "
                       "goodbye, clients will be confused, should not happen"));
@@ -799,13 +799,17 @@ static apr_status_t session_pool_cleanup(void *data)
     h2_session *session;
     
     if ((session = h2_ctx_get_session(c))) {
+        int mpm_state = 0;
+        int level;
+
+        ap_mpm_query(AP_MPMQ_MPM_STATE, &mpm_state);
+        level = (AP_MPMQ_STOPPING == mpm_state)? APLOG_DEBUG : APLOG_WARNING;
         /* if the session is still there, now is the last chance
          * to perform cleanup. Normally, cleanup should have happened
-         * earlier in the connection pre_close. Main reason is that
-         * any ongoing requests on secondary connections might still access
-         * data which has, at this time, already been freed. An example
-         * is mod_ssl that uses request hooks. */
-        ap_log_cerror(APLOG_MARK, APLOG_WARNING, 0, c,
+         * earlier in the connection pre_close.
+         * However, when the server is stopping, it may shutdown connections
+         * without running the pre_close hooks. Do not want about that. */
+        ap_log_cerror(APLOG_MARK, level, 0, c,
                       H2_SSSN_LOG(APLOGNO(10020), session, 
                       "session cleanup triggered by pool cleanup. "
                       "this should have happened earlier already."));
diff --git a/modules/http2/h2_version.h b/modules/http2/h2_version.h
index 38a21904449..785a172386f 100644
--- a/modules/http2/h2_version.h
+++ b/modules/http2/h2_version.h
@@ -27,7 +27,7 @@
  * @macro
  * Version number of the http2 module as c string
  */
-#define MOD_HTTP2_VERSION "1.15.21"
+#define MOD_HTTP2_VERSION "1.15.22"
 
 /**
  * @macro
@@ -35,7 +35,7 @@
  * release. This is a 24 bit number with 8 bits for major number, 8 bits
  * for minor and 8 bits for patch. Version 1.2.3 becomes 0x010203.
  */
-#define MOD_HTTP2_VERSION_NUM 0x010f15
+#define MOD_HTTP2_VERSION_NUM 0x010f16
 
 
 #endif /* mod_h2_h2_version_h */
diff --git a/modules/http2/h2_workers.c b/modules/http2/h2_workers.c
index 7832ac0e383..601c8edb4c8 100644
--- a/modules/http2/h2_workers.c
+++ b/modules/http2/h2_workers.c
@@ -94,7 +94,7 @@ static apr_status_t activate_slot(h2_workers *workers, h2_slot *slot)
         if (rv != APR_SUCCESS) goto cleanup;
     }
     
-    ap_log_error(APLOG_MARK, APLOG_WARNING, 0, workers->s,
+    ap_log_error(APLOG_MARK, APLOG_TRACE3, 0, workers->s,
                  "h2_workers: new thread for slot %d", slot->id); 
 
     /* thread will either immediately start work or add itself
@@ -319,16 +319,45 @@ static void workers_abort_idle(h2_workers *workers)
 static apr_status_t workers_pool_cleanup(void *data)
 {
     h2_workers *workers = data;
+    apr_time_t timout = apr_time_from_sec(1);
+    apr_status_t rv;
+    int i, n = 5;
 
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s,
+                 "h2_workers: cleanup %d workers idling",
+                 (int)apr_atomic_read32(&workers->worker_count));
     workers_abort_idle(workers);
 
-    /* wait for all the workers to become zombies and join them */
+    /* wait for all the workers to become zombies and join them.
+     * this gets called after the mpm shuts down and all connections
+     * have either been handled (graceful) or we are forced exiting
+     * (ungrateful). Either way, we show limited patience. */
     apr_thread_mutex_lock(workers->lock);
-    if (apr_atomic_read32(&workers->worker_count)) {
-        apr_thread_cond_wait(workers->all_done, workers->lock);
+    for (i = 0; i < n; ++i) {
+        if (!apr_atomic_read32(&workers->worker_count)) {
+            break;
+        }
+        rv = apr_thread_cond_timedwait(workers->all_done, workers->lock, timout);
+        if (APR_TIMEUP == rv) {
+            ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s,
+                         APLOGNO() "h2_workers: waiting for idle workers to close, "
+                         "still seeing %d workers living",
+                         apr_atomic_read32(&workers->worker_count));
+            continue;
+        }
+    }
+    if (i >= n) {
+        ap_log_error(APLOG_MARK, APLOG_WARNING, 0, workers->s,
+                     APLOGNO() "h2_workers: cleanup, %d idle workers "
+                     "did not exit after %d seconds.",
+                     apr_atomic_read32(&workers->worker_count), i);
     }
     apr_thread_mutex_unlock(workers->lock);
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s,
+                 "h2_workers: cleanup all workers terminated");
     join_zombies(workers);
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s,
+                 "h2_workers: cleanup zombie workers joined");
 
     return APR_SUCCESS;
 }
@@ -363,6 +392,10 @@ h2_workers *h2_workers_create(server_rec *s, apr_pool_t *pchild,
     workers->max_workers = max_workers;
     workers->max_idle_duration = apr_time_from_sec((idle_secs > 0)? idle_secs : 10);
 
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, workers->s,
+                 "h2_workers: created with min=%d max=%d idle_timeout=%d sec",
+                 workers->min_workers, workers->max_workers,
+                 (int)apr_time_sec(workers->max_idle_duration));
     /* FIXME: the fifo set we use here has limited capacity. Once the
      * set is full, connections with new requests do a wait. Unfortunately,
      * we have optimizations in place there that makes such waiting "unfair"