]> git.ipfire.org Git - thirdparty/squid.git/commitdiff
Preserve caller context across ping timeout events (#635)
authorEduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
Thu, 4 Jun 2020 11:23:31 +0000 (11:23 +0000)
committerSquid Anubis <squid-anubis@squid-cache.org>
Fri, 5 Jun 2020 12:43:09 +0000 (12:43 +0000)
PeerSelector was losing its transaction context while handling an
event-driven timeout. Also, the old code generating HandlePingTimeout
event was problematic for two reasons:

* it used raw 'this' for cbdata-protected child class. Sooner or later,
  code like this leads to "real" bugs similar to those fixed in ce9bb79,
  38a9f55 / 9e64d84, and 4299f87.

* It (mis)used event API for the transaction-specific purpose.

We created a new "ping timeout service" that keeps at most one
transaction-agnostic event.h timeout at any given time while maintaining
an internal list of transactions awaiting a ping response. Similar
architecture was used for HappyConnOpener helper services.

We rejected the alternative idea of enhancing event API to support
asynchronous calls because event.cc internals were never meant to handle
thousands of concurrently pending transaction-related timeouts on busy
proxies. While it would be possible to enhance those internals as well,
timeout services offer an even better performance, are much easier to
enhance to support timeout reconfigurations, and offer better "Squid
state" debugging.

src/PeerSelectState.h
src/PingData.h
src/SquidTime.h
src/enums.h
src/neighbors.cc
src/peer_select.cc

index 3dd4cb9570282afe2826c68fdb15ef589e20e496..2d3e744b5f1150d10679a8c8f54669fbd9c7d732 100644 (file)
@@ -85,6 +85,15 @@ public:
     /// a single selection loop iteration: attempts to add more destinations
     void selectMore();
 
+    /// switches into the PING_WAITING state (and associated timeout monitoring)
+    void startPingWaiting();
+
+    /// terminates ICP ping timeout monitoring
+    void cancelPingTimeoutMonitoring();
+
+    /// called when the given selector should stop expecting ICP ping responses
+    static void HandlePingTimeout(PeerSelector *);
+
     HttpRequest *request;
     AccessLogEntry::Pointer al; ///< info for the future access.log entry
     StoreEntry *entry;
@@ -122,7 +131,6 @@ protected:
     static IRCB HandlePingReply;
     static ACLCB CheckAlwaysDirectDone;
     static ACLCB CheckNeverDirectDone;
-    static EVH HandlePingTimeout;
 
 private:
     Acl::Answer always_direct;
index 1f4a216477e567b233cf2e306e1ce45f1e417fc7..a1fc8501fd2a42b45b948356f13843cfe648bd97 100644 (file)
@@ -9,12 +9,29 @@
 #ifndef SQUID_PINGDATA_H
 #define SQUID_PINGDATA_H
 
+#include "mem/PoolingAllocator.h"
+
+#include <map>
+
+class PeerSelector;
+class PeerSelectorPingMonitor;
+
+typedef std::pair<timeval, PeerSelector *> WaitingPeerSelector;
+/// waiting PeerSelector objects, ordered by their absolute deadlines
+typedef std::multimap<timeval, PeerSelector *, std::less<timeval>, PoolingAllocator<WaitingPeerSelector> > WaitingPeerSelectors;
+typedef WaitingPeerSelectors::iterator WaitingPeerSelectorPosition;
+
+/// ICP probing of cache_peers during peer selection
 class ping_data
 {
 
 public:
     ping_data();
 
+    /// no ICP responses are expected beyond the returned absolute time
+    /// \returns start + timeout
+    timeval deadline() const;
+
     struct timeval start;
 
     struct timeval stop;
@@ -25,6 +42,11 @@ public:
     int timedout;
     int w_rtt;
     int p_rtt;
+
+private:
+    friend PeerSelectorPingMonitor;
+    /// maintained by PeerSelectorPingMonitor
+    WaitingPeerSelectorPosition monitorRegistration;
 };
 
 #endif /* SQUID_PINGDATA_H */
index 2d77b951c9e5592c06a9c016178acb5f3668dce7..eda43890b8b4c9cbdd21220e83be6ce49447e5a9 100644 (file)
@@ -56,6 +56,47 @@ public:
     virtual void tick();
 };
 
+// TODO: Remove direct timercmp() calls in legacy code.
+
+inline bool
+operator <(const timeval &a, const timeval &b)
+{
+    return timercmp(&a, &b, <);
+}
+
+inline bool
+operator >(const timeval &a, const timeval &b)
+{
+    return timercmp(&a, &b, >);
+}
+
+inline bool
+operator !=(const timeval &a, const timeval &b)
+{
+    return timercmp(&a, &b, !=);
+}
+
+// Operators for timeval below avoid timercmp() because Linux timeradd(3) manual
+// page says that their timercmp() versions "do not work" on some platforms.
+
+inline bool
+operator <=(const timeval &a, const timeval &b)
+{
+    return !(a > b);
+}
+
+inline bool
+operator >=(const timeval &a, const timeval &b)
+{
+    return !(a < b);
+}
+
+inline bool
+operator ==(const timeval &a, const timeval &b)
+{
+    return !(a != b);
+}
+
 namespace Time
 {
 
index 2297d5c9b3e7fe25a1ef4b5115e6b8464c5c58ba..ea410ecbee91acca678bd201f75474b6a53f8a93 100644 (file)
@@ -37,8 +37,12 @@ typedef enum _mem_status_t {
 } mem_status_t;
 
 typedef enum {
+    /// Has not considered whether to send ICP queries to peers yet.
     PING_NONE,
+    /// Sent ICP queries to peers and still awaiting responses.
     PING_WAITING,
+    /// Not waiting for ICP responses now and will not send ICP queries later.
+    /// The ICP queries may (or may not) have been sent earlier.
     PING_DONE
 } ping_status_t;
 
index d908e7f9ade5035e3e0a10206c25a014f6b4617b..c3dc609798ddfcab2a754f363e9f33e26bf92072 100644 (file)
@@ -1441,7 +1441,7 @@ peerCountMcastPeersCreateAndSend(CachePeer * const p)
     reqnum = icpSetCacheKey((const cache_key *)fake->key);
     icpCreateAndSend(ICP_QUERY, 0, url, reqnum, 0,
                      icpOutgoingConn->fd, p->in_addr, psstate->al);
-    fake->ping_status = PING_WAITING;
+    fake->ping_status = PING_WAITING; // TODO: refactor to use PeerSelector::startPingWaiting()
     eventAdd("peerCountMcastPeersDone",
              peerCountMcastPeersDone,
              psstate,
index 0f0dd9ce6fd60c0ca256c36e8d05fc0b62265c3b..49a835ec13f67dfef0e2a56e96cf54d496c90171 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "squid.h"
 #include "acl/FilledChecklist.h"
+#include "base/AsyncCbdataCalls.h"
 #include "base/InstanceId.h"
 #include "CachePeer.h"
 #include "carp.h"
@@ -34,6 +35,7 @@
 #include "SquidConfig.h"
 #include "SquidTime.h"
 #include "Store.h"
+#include "util.h" // for tvSubDsec() which should be in SquidTime.h
 
 /**
  * A CachePeer which has been selected as a possible destination.
@@ -99,6 +101,132 @@ operator <<(std::ostream &os, const PeerSelectionDumper &fsd)
     return os;
 }
 
+/// An ICP ping timeout service.
+/// Protects event.cc (which is designed to handle a few unrelated timeouts)
+/// from exposure to thousands of ping-related timeouts on busy proxies.
+class PeerSelectorPingMonitor
+{
+public:
+    /// registers the given selector to be notified about the IPC ping timeout
+    void monitor(PeerSelector *);
+
+    /// removes a PeerSelector from the waiting list
+    void forget(PeerSelector *);
+
+    /// \returns a (nil) registration of a non-waiting peer selector
+    WaitingPeerSelectorPosition npos() { return selectors.end(); }
+
+private:
+    static void NoteWaitOver(void *monitor);
+
+    void startWaiting();
+    void abortWaiting();
+    void noteWaitOver();
+
+    WaitingPeerSelectors selectors; ///< \see WaitingPeerSelectors
+};
+
+/// monitors all PeerSelector ICP ping timeouts
+static PeerSelectorPingMonitor &
+PingMonitor()
+{
+    static const auto Instance = new PeerSelectorPingMonitor();
+    return *Instance;
+}
+
+/* PeerSelectorPingMonitor */
+
+/// PeerSelectorPingMonitor::noteWaitOver() wrapper
+void
+PeerSelectorPingMonitor::NoteWaitOver(void *raw)
+{
+    assert(raw);
+    static_cast<PeerSelectorPingMonitor*>(raw)->noteWaitOver();
+}
+
+/// schedules a single event to represent all waiting selectors
+void
+PeerSelectorPingMonitor::startWaiting()
+{
+    assert(!selectors.empty());
+    const auto interval = tvSubDsec(current_time, selectors.begin()->first);
+    eventAdd("PeerSelectorPingMonitor::NoteWaitOver", &PeerSelectorPingMonitor::NoteWaitOver, this, interval, 0, false);
+}
+
+/// undoes an earlier startWaiting() call
+void
+PeerSelectorPingMonitor::abortWaiting()
+{
+    // our event may be already in the AsyncCallQueue but that is OK:
+    // such queued calls cannot accumulate, and we ignore any stale ones
+    eventDelete(&PeerSelectorPingMonitor::NoteWaitOver, nullptr);
+}
+
+/// calls back all ready PeerSelectors and continues to wait for others
+void
+PeerSelectorPingMonitor::noteWaitOver()
+{
+    while (!selectors.empty() && current_time >= selectors.begin()->first) {
+        const auto selector = selectors.begin()->second;
+        CallBack(selector->al, [selector,this] {
+            selector->ping.monitorRegistration = npos();
+            AsyncCall::Pointer callback = asyncCall(44, 4, "PeerSelector::HandlePingTimeout",
+                cbdataDialer(PeerSelector::HandlePingTimeout, selector));
+            ScheduleCallHere(callback);
+        });
+        selectors.erase(selectors.begin());
+    }
+
+    if (!selectors.empty()) {
+        // Since abortWaiting() is unreliable, we may have been awakened by a
+        // stale event A after event B has been scheduled. Now we are going to
+        // schedule event C. Prevent event accumulation by deleting B (if any).
+        abortWaiting();
+
+        startWaiting();
+    }
+}
+
+void
+PeerSelectorPingMonitor::monitor(PeerSelector *selector)
+{
+    assert(selector);
+
+    const auto deadline = selector->ping.deadline();
+    const auto position = selectors.emplace(deadline, selector);
+    selector->ping.monitorRegistration = position;
+
+    if (position == selectors.begin()) {
+        if (selectors.size() > 1)
+            abortWaiting(); // remove the previously scheduled earlier event
+        startWaiting();
+    } // else the already scheduled event is still the earliest one
+}
+
+void
+PeerSelectorPingMonitor::forget(PeerSelector *selector)
+{
+    assert(selector);
+
+    if (selector->ping.monitorRegistration == npos())
+        return; // already forgotten
+
+    const auto wasFirst = selector->ping.monitorRegistration == selectors.begin();
+    selectors.erase(selector->ping.monitorRegistration);
+    selector->ping.monitorRegistration = npos();
+
+    if (wasFirst) {
+        // do not reschedule if there are still elements with the same deadline
+        if (!selectors.empty() && selectors.begin()->first == selector->ping.deadline())
+            return;
+        abortWaiting();
+        if (!selectors.empty())
+            startWaiting();
+    } // else do nothing since the old scheduled event is still the earliest one
+}
+
+/* PeerSelector */
+
 PeerSelector::~PeerSelector()
 {
     while (servers) {
@@ -107,12 +235,10 @@ PeerSelector::~PeerSelector()
         servers = next;
     }
 
+    cancelPingTimeoutMonitoring();
+
     if (entry) {
         debugs(44, 3, entry->url());
-
-        if (entry->ping_status == PING_WAITING)
-            eventDelete(HandlePingTimeout, this);
-
         entry->ping_status = PING_DONE;
     }
 
@@ -132,6 +258,21 @@ PeerSelector::~PeerSelector()
     delete lastError;
 }
 
+void
+PeerSelector::startPingWaiting()
+{
+    assert(entry);
+    assert(entry->ping_status != PING_WAITING);
+    PingMonitor().monitor(this);
+    entry->ping_status = PING_WAITING;
+}
+
+void
+PeerSelector::cancelPingTimeoutMonitoring()
+{
+    PingMonitor().forget(this);
+}
+
 static int
 peerSelectIcpPing(PeerSelector *ps, int direct, StoreEntry * entry)
 {
@@ -520,6 +661,7 @@ PeerSelector::selectMore()
             return;
     } else if (entry->ping_status == PING_WAITING) {
         selectSomeNeighborReplies();
+        cancelPingTimeoutMonitoring();
         entry->ping_status = PING_DONE;
     }
 
@@ -614,6 +756,9 @@ PeerSelector::selectSomeNeighbor()
                                            this,
                                            &ping.n_replies_expected,
                                            &ping.timeout);
+            // TODO: Refactor neighborsUdpPing() to guarantee positive timeouts.
+            if (ping.timeout < 0)
+                ping.timeout = 0;
 
             if (ping.n_sent == 0)
                 debugs(44, DBG_CRITICAL, "WARNING: neighborsUdpPing returned 0");
@@ -622,12 +767,7 @@ PeerSelector::selectSomeNeighbor()
                    " msec");
 
             if (ping.n_replies_expected > 0) {
-                entry->ping_status = PING_WAITING;
-                eventAdd("PeerSelector::HandlePingTimeout",
-                         HandlePingTimeout,
-                         this,
-                         0.001 * ping.timeout,
-                         0);
+                startPingWaiting();
                 return;
             }
         }
@@ -754,8 +894,11 @@ PeerSelector::handlePingTimeout()
 {
     debugs(44, 3, url());
 
-    if (entry)
-        entry->ping_status = PING_DONE;
+    // do nothing if ping reply came while handlePingTimeout() was queued
+    if (!entry || entry->ping_status != PING_WAITING)
+        return;
+
+    entry->ping_status = PING_DONE;
 
     if (selectionAborted())
         return;
@@ -766,9 +909,9 @@ PeerSelector::handlePingTimeout()
 }
 
 void
-PeerSelector::HandlePingTimeout(void *data)
+PeerSelector::HandlePingTimeout(PeerSelector *selector)
 {
-    static_cast<PeerSelector*>(data)->handlePingTimeout();
+    selector->handlePingTimeout();
 }
 
 void
@@ -1046,7 +1189,8 @@ ping_data::ping_data() :
     timeout(0),
     timedout(0),
     w_rtt(0),
-    p_rtt(0)
+    p_rtt(0),
+    monitorRegistration(PingMonitor().npos())
 {
     start.tv_sec = 0;
     start.tv_usec = 0;
@@ -1054,3 +1198,15 @@ ping_data::ping_data() :
     stop.tv_usec = 0;
 }
 
+timeval
+ping_data::deadline() const
+{
+    timeval timeInterval;
+    timeInterval.tv_sec = timeout / 1000;
+    timeInterval.tv_usec = (timeout % 1000) * 1000;
+
+    timeval result;
+    tvAdd(result, start, timeInterval);
+    return result;
+}
+