From 6082a0e2e301e4d4a731fe82948eb196585499b2 Mon Sep 17 00:00:00 2001
From: Eduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
Date: Sat, 13 Aug 2016 12:43:50 +1200
Subject: [PATCH] Make Squid death due to overloaded helpers optional.

Added on-persistent-overload=action option to helpers. Helper overload
is defined as running with an overflowing queue. Persistent helper
overload is [still] defined as being overloaded for more than 3 minutes.

The default behavior is unchanged(*) -- Squid worker dies with a fatal
error at the attempt to submit a new request to a persistenly overloaded
helper. This default behavior can also be configured explicitly using
on-persistent-overload=die.

With on-persistent-overload=ERR, when dealing with a persistently
overloaded helper, Squid immediately skips the helper request and sends
an ERR response to the caller. Squid informs the admin when it starts
and when it stops skipping helper requests due to persistent overload.

The code had conflicting notions of an "overloaded helper". The external
ACL helper, the URL rewriter, and the store ID code used queueFull() to
test whether the new request would overflow the queue (and, hence,
overload the helper), but queueFull() itself did not check whether the
queue was full! It checked whether the queue was already overflowing.
This confusion resulted in that code scheduling one extra helper request
before enabling bypass. The code and its documentation are now more
consistent (and better match the "overload" terminology used by the new
configuration option, which also feels better than calling the helper
"full").

(*) Resolving the above confusion resulted in minor (one request)
differences in the number of helper requests queued by Squid for
external ACL, URL rewriting, and store ID helpers, with the adjusted
behavior [better] matching the documentation.
---
 doc/release-notes/release-4.sgml |  16 ++++
 src/cf.data.pre                  | 102 ++++++++++++++++------
 src/external_acl.cc              |   7 +-
 src/helper.cc                    | 145 +++++++++++++++++++++----------
 src/helper.h                     |  30 ++++---
 src/helper/ChildConfig.cc        |  13 +++
 src/helper/ChildConfig.h         |   8 ++
 src/redirect.cc                  |   6 +-
 8 files changed, 238 insertions(+), 89 deletions(-)
diff --git a/doc/release-notes/release-4.sgml b/doc/release-notes/release-4.sgml
index 430830054b..99261fabd6 100644
--- a/doc/release-notes/release-4.sgml
+++ b/doc/release-notes/release-4.sgml
@@ -238,6 +238,8 @@ This section gives a thorough account of those changes in three categories:
 	<tag>auth_param</tag>
 	<p>New parameter <em>queue-size=</em> to set the maximum number
 	   of queued requests.
+	<p>New parameter <em>on-persistent-overload=</em> to set the action taken
+	   when the helper queue is overloaded.
 
 	<tag>cache_peer</tag>
 	<p>New option <em>auth-no-keytab</em> to let GSSAPI implementation determine
@@ -257,6 +259,8 @@ This section gives a thorough account of those changes in three categories:
 	<tag>external_acl_type</tag>
 	<p>New parameter <em>queue-size=</em> to set the maximum number
 	   of queued requests.
+	<p>New parameter <em>on-persistent-overload=</em> to set the action taken
+	   when the helper queue is overloaded.
 	<p>Format field updated to accept any logformat %macro code.
 
 	<tag>http_port</tag>
@@ -334,14 +338,26 @@ This section gives a thorough account of those changes in three categories:
 	<tag>sslcrtd_children</tag>
 	<p>New parameter <em>queue-size=</em> to set the maximum number
 	   of queued requests.
+	<p>New parameter <em>on-persistent-overload=</em> to set the action taken
+	   when the helper queue is overloaded.
 
 	<tag>sslcrtvalidator_children</tag>
 	<p>New parameter <em>queue-size=</em> to set the maximum number
 	   of queued requests.
+	<p>New parameter <em>on-persistent-overload=</em> to set the action taken
+	   when the helper queue is overloaded.
+
+	<tag>store_id_children</tag>
+	<p>New parameter <em>queue-size=</em> to set the maximum number
+	   of queued requests.
+	<p>New parameter <em>on-persistent-overload=</em> to set the action taken
+	   when the helper queue is overloaded.
 
 	<tag>url_rewrite_children</tag>
 	<p>New parameter <em>queue-size=</em> to set the maximum number
 	   of queued requests.
+	<p>New parameter <em>on-persistent-overload=</em> to set the action taken
+	   when the helper queue is overloaded.
 
 </descrip>
 
diff --git a/src/cf.data.pre b/src/cf.data.pre
index a7604e387b..0209c2289b 100644
--- a/src/cf.data.pre
+++ b/src/cf.data.pre
@@ -566,7 +566,8 @@ DOC_START
 		For Digest there is no default, this parameter is mandatory.
 		For NTLM and Negotiate this parameter is ignored.
 
-	"children" numberofchildren [startup=N] [idle=N] [concurrency=N] [queue-size=N]
+	"children" numberofchildren [startup=N] [idle=N] [concurrency=N]
+		[queue-size=N] [on-persistent-overload=action]
 
 		The maximum number of authenticator processes to spawn. If
 		you start too few Squid will have to wait for them to process
@@ -591,10 +592,27 @@ DOC_START
 		Concurrency must not be set unless it's known the helper
 		supports the input format with channel-ID fields.
 
-		The queue-size= option sets the maximum number of queued
-		requests. If the queued requests exceed queue size for more
-		than 3 minutes then squid aborts its operation.
-		The default value is set to 2*numberofchildren/
+		The queue-size=N option sets the maximum number of queued
+		requests to N. The default maximum is 2*numberofchildren. Squid
+		is allowed to temporarily exceed the configured maximum, marking
+		the affected helper as "overloaded". If the helper overload
+		lasts more than 3 minutes, the action prescribed by the
+		on-persistent-overload option applies.
+
+		The on-persistent-overload=action option specifies Squid
+		reaction to a new helper request arriving when the helper
+		has been overloaded for more that 3 minutes already. The number
+		of queued requests determines whether the helper is overloaded
+		(see the queue-size option).
+
+		Two actions are supported:
+
+		  die	Squid worker quits. This is the default behavior.
+
+		  ERR	Squid treats the helper request as if it was
+			immediately submitted, and the helper immediately
+			replied with an ERR response. This action has no effect
+			on the already queued and in-progress helper requests.
 
 		NOTE: NTLM and Negotiate schemes do not support concurrency
 			in the Squid code module even though some helpers can.
@@ -5125,11 +5143,29 @@ DOC_START
 
 		queue-size=N
 
-	Sets the maximum number of queued requests.
-	If the queued requests exceed queue size and redirector_bypass
-	configuration option is set, then redirector is bypassed. Otherwise, if
-	overloading persists squid may abort its operation.
-	The default value is set to 2*numberofchildren.
+	Sets the maximum number of queued requests to N. The default maximum
+	is 2*numberofchildren. If the queued requests exceed queue size and
+	redirector_bypass configuration option is set, then redirector is bypassed.
+	Otherwise, Squid is allowed to temporarily exceed the configured maximum,
+	marking the affected helper as "overloaded". If the helper overload lasts
+	more than 3 minutes, the action prescribed by the on-persistent-overload
+	option applies.
+
+		on-persistent-overload=action
+
+	Specifies Squid reaction to a new helper request arriving when the helper
+	has been overloaded for more that 3 minutes already. The number of queued
+	requests determines whether the helper is overloaded (see the queue-size
+	option).
+
+	Two actions are supported:
+
+	  die	Squid worker quits. This is the default behavior.
+
+	  ERR	Squid treats the helper request as if it was
+		immediately submitted, and the helper immediately
+		replied with an ERR response. This action has no effect
+		on the already queued and in-progress helper requests.
 DOC_END
 
 NAME: url_rewrite_host_header redirect_rewrites_host_header
@@ -5172,11 +5208,10 @@ LOC: Config.onoff.redirector_bypass
 DEFAULT: off
 DOC_START
 	When this is 'on', a request will not go through the
-	redirector if all the helpers are busy.  If this is 'off'
-	and the redirector queue grows too large, Squid will exit
-	with a FATAL error and ask you to increase the number of
-	redirectors.  You should only enable this if the redirectors
-	are not critical to your caching system.  If you use
+	redirector if all the helpers are busy. If this is 'off' and the
+	redirector queue grows too large, the action is prescribed by the
+	on-persistent-overload option. You should only enable this if the
+	redirectors are not critical to your caching system. If you use
 	redirectors for access control, and you enable this option,
 	users may have access to pages they should not
 	be allowed to request.
@@ -5332,11 +5367,29 @@ DOC_START
 
 		queue-size=N
 
-	Sets the maximum number of queued requests.
-	If the queued requests exceed queue size and store_id_bypass
-	configuration option is set, then storeID helper is bypassed. Otherwise,
-	if overloading persists squid may abort its operation.
-	The default value is set to 2*numberofchildren.
+	Sets the maximum number of queued requests to N. The default maximum
+	is 2*numberofchildren. If the queued requests exceed queue size and
+	redirector_bypass configuration option is set, then redirector is bypassed.
+	Otherwise, Squid is allowed to temporarily exceed the configured maximum,
+	marking the affected helper as "overloaded". If the helper overload lasts
+	more than 3 minutes, the action prescribed by the on-persistent-overload
+	option applies.
+
+		on-persistent-overload=action
+
+	Specifies Squid reaction to a new helper request arriving when the helper
+	has been overloaded for more that 3 minutes already. The number of queued
+	requests determines whether the helper is overloaded (see the queue-size
+	option).
+
+	Two actions are supported:
+
+	  die	Squid worker quits. This is the default behavior.
+
+	  ERR	Squid treats the helper request as if it was
+		immediately submitted, and the helper immediately
+		replied with an ERR response. This action has no effect
+		on the already queued and in-progress helper requests.
 DOC_END
 
 NAME: store_id_access storeurl_rewrite_access
@@ -5359,11 +5412,10 @@ LOC: Config.onoff.store_id_bypass
 DEFAULT: on
 DOC_START
 	When this is 'on', a request will not go through the
-	helper if all helpers are busy.  If this is 'off'
-	and the helper queue grows too large, Squid will exit
-	with a FATAL error and ask you to increase the number of
-	helpers.  You should only enable this if the helperss
-	are not critical to your caching system.  If you use
+	helper if all helpers are busy. If this is 'off' and the helper
+	queue grows too large, the action is prescribed by the
+	on-persistent-overload option. You should only enable this if the
+	helpers are not critical to your caching system. If you use
 	helpers for critical caching components, and you enable this 
 	option,	users may not get objects from cache.
 	This options sets default queue-size option of the store_id_children
diff --git a/src/external_acl.cc b/src/external_acl.cc
index c34f566d6f..779c3b2c69 100644
--- a/src/external_acl.cc
+++ b/src/external_acl.cc
@@ -628,7 +628,8 @@ aclMatchExternal(external_acl_data *acl, ACLFilledChecklist *ch)
         if (!entry) {
             debugs(82, 2, HERE << acl->def->name << "(\"" << key << "\") = lookup needed");
 
-            if (!acl->def->theHelper->queueFull()) {
+            // TODO: All other helpers allow temporary overload. Should not we?
+            if (!acl->def->theHelper->willOverload()) {
                 debugs(82, 2, HERE << "\"" << key << "\": queueing a call.");
                 if (!ch->goAsync(ExternalACLLookup::Instance()))
                     debugs(82, 2, "\"" << key << "\": no async support!");
@@ -637,12 +638,12 @@ aclMatchExternal(external_acl_data *acl, ACLFilledChecklist *ch)
             } else {
                 if (!staleEntry) {
                     debugs(82, DBG_IMPORTANT, "WARNING: external ACL '" << acl->def->name <<
-                           "' queue overload. Request rejected '" << key << "'.");
+                           "' queue full. Request rejected '" << key << "'.");
                     external_acl_message = "SYSTEM TOO BUSY, TRY AGAIN LATER";
                     return ACCESS_DUNNO;
                 } else {
                     debugs(82, DBG_IMPORTANT, "WARNING: external ACL '" << acl->def->name <<
-                           "' queue overload. Using stale result. '" << key << "'.");
+                           "' queue full. Using stale result. '" << key << "'.");
                     entry = staleEntry;
                     /* Fall thru to processing below */
                 }
diff --git a/src/helper.cc b/src/helper.cc
index fe52569d5b..715f260b7d 100644
--- a/src/helper.cc
+++ b/src/helper.cc
@@ -45,8 +45,8 @@ static IOCB helperStatefulHandleRead;
 static void helperServerFree(helper_server *srv);
 static void helperStatefulServerFree(helper_stateful_server *srv);
 static void Enqueue(helper * hlp, Helper::Xaction *);
-static helper_server *GetFirstAvailable(helper * hlp);
-static helper_stateful_server *StatefulGetFirstAvailable(statefulhelper * hlp);
+static helper_server *GetFirstAvailable(const helper * hlp);
+static helper_stateful_server *StatefulGetFirstAvailable(const statefulhelper * hlp);
 static void helperDispatch(helper_server * srv, Helper::Xaction * r);
 static void helperStatefulDispatch(helper_stateful_server * srv, Helper::Xaction * r);
 static void helperKickQueue(helper * hlp);
@@ -379,54 +379,101 @@ helper::submitRequest(Helper::Xaction *r)
     else
         Enqueue(this, r);
 
-    if (!queueFull()) {
-        full_time = 0;
-    } else if (!full_time) {
-        debugs(84, 3, id_name << " queue became full");
-        full_time = squid_curtime;
+    syncQueueStats();
+}
+
+/// handles helperSubmit() and helperStatefulSubmit() failures
+static void
+SubmissionFailure(helper *hlp, HLPCB *callback, void *data)
+{
+    auto result = Helper::Error;
+    if (!hlp) {
+        debugs(84, 3, "no helper");
+        result = Helper::Unknown;
     }
+    // else pretend the helper has responded with ERR
+
+    callback(data, Helper::Reply(result));
 }
 
 void
 helperSubmit(helper * hlp, const char *buf, HLPCB * callback, void *data)
 {
-    if (hlp == NULL) {
-        debugs(84, 3, "helperSubmit: hlp == NULL");
-        Helper::Reply const nilReply(Helper::Unknown);
-        callback(data, nilReply);
-        return;
-    }
-    hlp->prepSubmit();
-    hlp->submit(buf, callback, data);
+    if (!hlp || !hlp->trySubmit(buf, callback, data))
+        SubmissionFailure(hlp, callback, data);
 }
 
+/// whether queuing an additional request would overload the helper
 bool
 helper::queueFull() const {
+    return stats.queue_size >= static_cast<int>(childs.queue_size);
+}
+
+bool
+helper::overloaded() const {
     return stats.queue_size > static_cast<int>(childs.queue_size);
 }
 
-/// prepares the helper for request submission via trySubmit() or helperSubmit()
-/// currently maintains full_time and kills Squid if the helper remains full for too long
+/// synchronizes queue-dependent measurements with the current queue state
 void
-helper::prepSubmit()
+helper::syncQueueStats()
 {
-    if (!queueFull())
-        full_time = 0;
-    else if (!full_time) // may happen here if reconfigure decreases capacity
-        full_time = squid_curtime;
-    else if (squid_curtime - full_time > 180)
-        fatalf("Too many queued %s requests", id_name);
+    if (overloaded()) {
+        if (overloadStart) {
+            debugs(84, 5, id_name << " still overloaded; dropped " << droppedRequests);
+        } else {
+            overloadStart = squid_curtime;
+            debugs(84, 3, id_name << " became overloaded");
+        }
+    } else {
+        if (overloadStart) {
+            debugs(84, 5, id_name << " is no longer overloaded");
+            if (droppedRequests) {
+                debugs(84, DBG_IMPORTANT, "helper " << id_name <<
+                       " is no longer overloaded after dropping " << droppedRequests <<
+                       " requests in " << (squid_curtime - overloadStart) << " seconds");
+                droppedRequests = 0;
+            }
+            overloadStart = 0;
+        }
+    }
 }
 
+/// prepares the helper for request submission
+/// returns true if and only if the submission should proceed
+/// may kill Squid if the helper remains overloaded for too long
 bool
-helper::trySubmit(const char *buf, HLPCB * callback, void *data)
+helper::prepSubmit()
 {
-    prepSubmit();
+    // re-sync for the configuration may have changed since the last submission
+    syncQueueStats();
+
+    // Nothing special to do if the new request does not overload (i.e., the
+    // queue is not even full yet) or only _starts_ overloading this helper
+    // (i.e., the queue is currently at its limit).
+    if (!overloaded())
+        return true;
 
-    if (queueFull()) {
-        debugs(84, DBG_IMPORTANT, id_name << " drops request due to a full queue");
-        return false; // request was ignored
+    if (squid_curtime - overloadStart <= 180)
+        return true; // also OK: overload has not persisted long enough to panic
+
+    if (childs.onPersistentOverload == Helper::ChildConfig::actDie)
+        fatalf("Too many queued %s requests; see on-persistent-overload.", id_name);
+
+    if (!droppedRequests) {
+        debugs(84, DBG_IMPORTANT, "WARNING: dropping requests to overloaded " <<
+               id_name << " helper configured with on-persistent-overload=err");
     }
+    ++droppedRequests;
+    debugs(84, 3, "failed to send " << droppedRequests << " helper requests to " << id_name);
+    return false;
+}
+
+bool
+helper::trySubmit(const char *buf, HLPCB * callback, void *data)
+{
+    if (!prepSubmit())
+        return false; // request was dropped
 
     submit(buf, callback, data); // will send or queue
     return true; // request submitted or queued
@@ -445,14 +492,19 @@ helper::submit(const char *buf, HLPCB * callback, void *data)
 void
 helperStatefulSubmit(statefulhelper * hlp, const char *buf, HLPCB * callback, void *data, helper_stateful_server * lastserver)
 {
-    if (hlp == NULL) {
-        debugs(84, 3, "helperStatefulSubmit: hlp == NULL");
-        Helper::Reply const nilReply(Helper::Unknown);
-        callback(data, nilReply);
-        return;
-    }
-    hlp->prepSubmit();
-    hlp->submit(buf, callback, data, lastserver);
+    if (!hlp || !hlp->trySubmit(buf, callback, data, lastserver))
+        SubmissionFailure(hlp, callback, data);
+}
+
+/// If possible, submit request. Otherwise, either kill Squid or return false.
+bool
+statefulhelper::trySubmit(const char *buf, HLPCB * callback, void *data, helper_stateful_server *lastserver)
+{
+    if (!prepSubmit())
+        return false; // request was dropped
+
+    submit(buf, callback, data, lastserver); // will send or queue
+    return true; // request submitted or queued
 }
 
 void statefulhelper::submit(const char *buf, HLPCB * callback, void *data, helper_stateful_server * lastserver)
@@ -477,12 +529,7 @@ void statefulhelper::submit(const char *buf, HLPCB * callback, void *data, helpe
     debugs(84, DBG_DATA, "placeholder: '" << r->request.placeholder <<
            "', " << Raw("buf", buf, (!buf?0:strlen(buf))));
 
-    if (!queueFull()) {
-        full_time = 0;
-    } else if (!full_time) {
-        debugs(84, 3, id_name << " queue became full");
-        full_time = squid_curtime;
-    }
+    syncQueueStats();
 }
 
 /**
@@ -570,6 +617,11 @@ helper::packStatsInto(Packable *p, const char *label) const
               "   P\tPLACEHOLDER\n", 101);
 }
 
+bool
+helper::willOverload() const {
+    return queueFull() && !(childs.needNew() || GetFirstAvailable(this));
+}
+
 void
 helperShutdown(helper * hlp)
 {
@@ -1186,7 +1238,7 @@ helper::nextRequest()
 }
 
 static helper_server *
-GetFirstAvailable(helper * hlp)
+GetFirstAvailable(const helper * hlp)
 {
     dlink_node *n;
     helper_server *srv;
@@ -1217,14 +1269,13 @@ GetFirstAvailable(helper * hlp)
         selected = srv;
     }
 
-    /* Check for overload */
     if (!selected) {
         debugs(84, 5, "GetFirstAvailable: None available.");
         return NULL;
     }
 
     if (selected->stats.pending >= (hlp->childs.concurrency ? hlp->childs.concurrency : 1)) {
-        debugs(84, 3, "GetFirstAvailable: Least-loaded helper is overloaded!");
+        debugs(84, 3, "GetFirstAvailable: Least-loaded helper is fully loaded!");
         return NULL;
     }
 
@@ -1233,7 +1284,7 @@ GetFirstAvailable(helper * hlp)
 }
 
 static helper_stateful_server *
-StatefulGetFirstAvailable(statefulhelper * hlp)
+StatefulGetFirstAvailable(const statefulhelper * hlp)
 {
     dlink_node *n;
     helper_stateful_server *srv = NULL;
diff --git a/src/helper.h b/src/helper.h
index 04e79cde9b..e0821ae2b0 100644
--- a/src/helper.h
+++ b/src/helper.h
@@ -49,13 +49,13 @@ public:
  *   idle:   no processes are working on requests (and no requests are queued);
  *   normal: some, but not all processes are working (and no requests are queued);
  *   busy:   all processes are working (and some requests are possibly queued);
- *   full:   all processes are working and at least 2*#processes requests are queued.
+ *   overloaded: a busy helper with more than queue-size requests in the queue.
  *
- * A "busy" helper queues new requests and issues a WARNING every 10 minutes or so.
- * A "full" helper either drops new requests or keeps queuing them, depending on
+ * A busy helper queues new requests and issues a WARNING every 10 minutes or so.
+ * An overloaded helper either drops new requests or keeps queuing them, depending on
  *   whether the caller can handle dropped requests (trySubmit vs helperSubmit APIs).
- * An attempt to use a "full" helper that has been "full" for 3+ minutes kills worker.
- *   Given enough load, all helpers except for external ACL will make such attempts.
+ * If an overloaded helper has been overloaded for 3+ minutes, an attempt to use
+ *   it results in on-persistent-overload action, which may kill worker.
  */
 class helper
 {
@@ -66,7 +66,8 @@ public:
         cmdline(NULL),
         id_name(name),
         ipc_type(0),
-        full_time(0),
+        droppedRequests(0),
+        overloadStart(0),
         last_queue_warn(0),
         last_restart(0),
         timeout(0),
@@ -77,13 +78,10 @@ public:
     }
     ~helper();
 
-    /// whether at least one more request can be successfully submitted
-    bool queueFull() const;
-
     /// \returns next request in the queue, or nil.
     Helper::Xaction *nextRequest();
 
-    ///< If not full, submit request. Otherwise, either kill Squid or return false.
+    /// If possible, submit request. Otherwise, either kill Squid or return false.
     bool trySubmit(const char *buf, HLPCB * callback, void *data);
 
     /// Submits a request to the helper or add it to the queue if none of
@@ -92,6 +90,9 @@ public:
 
     /// Dump some stats about the helper state to a Packable object
     void packStatsInto(Packable *p, const char *label = NULL) const;
+    /// whether the helper will be in "overloaded" state after one more request
+    /// already overloaded helpers return true
+    bool willOverload() const;
 
 public:
     wordlist *cmdline;
@@ -101,7 +102,8 @@ public:
     Helper::ChildConfig childs;    ///< Configuration settings for number running.
     int ipc_type;
     Ip::Address addr;
-    time_t full_time; ///< when a full helper became full (zero for non-full helpers)
+    unsigned int droppedRequests; ///< requests not sent during helper overload
+    time_t overloadStart; ///< when the helper became overloaded (zero if it is not)
     time_t last_queue_warn;
     time_t last_restart;
     time_t timeout; ///< Requests timeout
@@ -120,7 +122,10 @@ public:
 
 protected:
     friend void helperSubmit(helper * hlp, const char *buf, HLPCB * callback, void *data);
-    void prepSubmit();
+    bool queueFull() const;
+    bool overloaded() const;
+    void syncQueueStats();
+    bool prepSubmit();
     void submit(const char *buf, HLPCB * callback, void *data);
 };
 
@@ -138,6 +143,7 @@ public:
 private:
     friend void helperStatefulSubmit(statefulhelper * hlp, const char *buf, HLPCB * callback, void *data, helper_stateful_server * lastserver);
     void submit(const char *buf, HLPCB * callback, void *data, helper_stateful_server *lastserver);
+    bool trySubmit(const char *buf, HLPCB * callback, void *data, helper_stateful_server *lastserver);
 };
 
 /**
diff --git a/src/helper/ChildConfig.cc b/src/helper/ChildConfig.cc
index 12ff2c0196..e1ef0ef404 100644
--- a/src/helper/ChildConfig.cc
+++ b/src/helper/ChildConfig.cc
@@ -24,6 +24,7 @@ Helper::ChildConfig::ChildConfig():
     n_running(0),
     n_active(0),
     queue_size(0),
+    onPersistentOverload(actDie),
     defaultQueueSize(true)
 {}
 
@@ -35,6 +36,7 @@ Helper::ChildConfig::ChildConfig(const unsigned int m):
     n_running(0),
     n_active(0),
     queue_size(2 * m),
+    onPersistentOverload(actDie),
     defaultQueueSize(true)
 {}
 
@@ -48,6 +50,7 @@ Helper::ChildConfig::updateLimits(const Helper::ChildConfig &rhs)
     n_idle = rhs.n_idle;
     concurrency = rhs.concurrency;
     queue_size = rhs.queue_size;
+    onPersistentOverload = rhs.onPersistentOverload;
     defaultQueueSize = rhs.defaultQueueSize;
     return *this;
 }
@@ -96,6 +99,16 @@ Helper::ChildConfig::parseConfig()
         } else if (strncmp(token, "queue-size=", 11) == 0) {
             queue_size = xatoui(token + 11);
             defaultQueueSize = false;
+        } else if (strncmp(token, "on-persistent-overload=", 23) == 0) {
+            const SBuf action(token + 23);
+            if (action.cmp("ERR") == 0)
+                onPersistentOverload = actErr;
+            else if (action.cmp("die") == 0)
+                onPersistentOverload = actDie;
+            else {
+                debugs(0, DBG_CRITICAL, "ERROR: Unsupported on-persistent-overloaded action: " << action);
+                self_destruct();
+            }
         } else {
             debugs(0, DBG_PARSE_NOTE(DBG_IMPORTANT), "ERROR: Undefined option: " << token << ".");
             self_destruct();
diff --git a/src/helper/ChildConfig.h b/src/helper/ChildConfig.h
index 8e3de227a2..f03e692077 100644
--- a/src/helper/ChildConfig.h
+++ b/src/helper/ChildConfig.h
@@ -90,6 +90,14 @@ public:
      */
     unsigned int queue_size;
 
+    /// how to handle a serious problem with a helper request submission
+    enum SubmissionErrorHandlingAction {
+        actDie, ///< kill the caller process (i.e., Squid worker)
+        actErr  ///< drop the request and send an error to the caller
+    };
+    /// how to handle a new request for helper that was overloaded for too long
+    SubmissionErrorHandlingAction onPersistentOverload;
+
     /**
      * True if the default queue size is used.
      * Needed in the cases where we need to adjust default queue_size in
diff --git a/src/redirect.cc b/src/redirect.cc
index 945d58bff1..5098283528 100644
--- a/src/redirect.cc
+++ b/src/redirect.cc
@@ -295,7 +295,9 @@ redirectStart(ClientHttpRequest * http, HLPCB * handler, void *data)
     assert(handler);
     debugs(61, 5, "redirectStart: '" << http->uri << "'");
 
-    if (Config.onoff.redirector_bypass && redirectors->queueFull()) {
+    // TODO: Deprecate Config.onoff.redirector_bypass in favor of either
+    // onPersistentOverload or a new onOverload option that applies to all helpers.
+    if (Config.onoff.redirector_bypass && redirectors->willOverload()) {
         /* Skip redirector if the queue is full */
         ++redirectorBypassed;
         Helper::Reply bypassReply;
@@ -319,7 +321,7 @@ storeIdStart(ClientHttpRequest * http, HLPCB * handler, void *data)
     assert(handler);
     debugs(61, 5, "storeIdStart: '" << http->uri << "'");
 
-    if (Config.onoff.store_id_bypass && storeIds->queueFull()) {
+    if (Config.onoff.store_id_bypass && storeIds->willOverload()) {
         /* Skip StoreID Helper if the queue is full */
         ++storeIdBypassed;
         Helper::Reply bypassReply;
-- 
2.39.5