From: Eduard Bagdasaryan Date: Thu, 4 Jun 2020 11:23:31 +0000 (+0000) Subject: Preserve caller context across ping timeout events (#635) X-Git-Tag: 4.15-20210522-snapshot~104 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9865de7;p=thirdparty%2Fsquid.git Preserve caller context across ping timeout events (#635) PeerSelector was losing its transaction context while handling an event-driven timeout. Also, the old code generating HandlePingTimeout event was problematic for two reasons: * it used raw 'this' for cbdata-protected child class. Sooner or later, code like this leads to "real" bugs similar to those fixed in ce9bb79, 38a9f55 / 9e64d84, and 4299f87. * It (mis)used event API for the transaction-specific purpose. We created a new "ping timeout service" that keeps at most one transaction-agnostic event.h timeout at any given time while maintaining an internal list of transactions awaiting a ping response. Similar architecture was used for HappyConnOpener helper services. We rejected the alternative idea of enhancing event API to support asynchronous calls because event.cc internals were never meant to handle thousands of concurrently pending transaction-related timeouts on busy proxies. While it would be possible to enhance those internals as well, timeout services offer an even better performance, are much easier to enhance to support timeout reconfigurations, and offer better "Squid state" debugging. --- diff --git a/src/PeerSelectState.h b/src/PeerSelectState.h index 3dd4cb9570..2d3e744b5f 100644 --- a/src/PeerSelectState.h +++ b/src/PeerSelectState.h @@ -85,6 +85,15 @@ public: /// a single selection loop iteration: attempts to add more destinations void selectMore(); + /// switches into the PING_WAITING state (and associated timeout monitoring) + void startPingWaiting(); + + /// terminates ICP ping timeout monitoring + void cancelPingTimeoutMonitoring(); + + /// called when the given selector should stop expecting ICP ping responses + static void HandlePingTimeout(PeerSelector *); + HttpRequest *request; AccessLogEntry::Pointer al; ///< info for the future access.log entry StoreEntry *entry; @@ -122,7 +131,6 @@ protected: static IRCB HandlePingReply; static ACLCB CheckAlwaysDirectDone; static ACLCB CheckNeverDirectDone; - static EVH HandlePingTimeout; private: Acl::Answer always_direct; diff --git a/src/PingData.h b/src/PingData.h index 1f4a216477..a1fc8501fd 100644 --- a/src/PingData.h +++ b/src/PingData.h @@ -9,12 +9,29 @@ #ifndef SQUID_PINGDATA_H #define SQUID_PINGDATA_H +#include "mem/PoolingAllocator.h" + +#include + +class PeerSelector; +class PeerSelectorPingMonitor; + +typedef std::pair WaitingPeerSelector; +/// waiting PeerSelector objects, ordered by their absolute deadlines +typedef std::multimap, PoolingAllocator > WaitingPeerSelectors; +typedef WaitingPeerSelectors::iterator WaitingPeerSelectorPosition; + +/// ICP probing of cache_peers during peer selection class ping_data { public: ping_data(); + /// no ICP responses are expected beyond the returned absolute time + /// \returns start + timeout + timeval deadline() const; + struct timeval start; struct timeval stop; @@ -25,6 +42,11 @@ public: int timedout; int w_rtt; int p_rtt; + +private: + friend PeerSelectorPingMonitor; + /// maintained by PeerSelectorPingMonitor + WaitingPeerSelectorPosition monitorRegistration; }; #endif /* SQUID_PINGDATA_H */ diff --git a/src/SquidTime.h b/src/SquidTime.h index 2d77b951c9..eda43890b8 100644 --- a/src/SquidTime.h +++ b/src/SquidTime.h @@ -56,6 +56,47 @@ public: virtual void tick(); }; +// TODO: Remove direct timercmp() calls in legacy code. + +inline bool +operator <(const timeval &a, const timeval &b) +{ + return timercmp(&a, &b, <); +} + +inline bool +operator >(const timeval &a, const timeval &b) +{ + return timercmp(&a, &b, >); +} + +inline bool +operator !=(const timeval &a, const timeval &b) +{ + return timercmp(&a, &b, !=); +} + +// Operators for timeval below avoid timercmp() because Linux timeradd(3) manual +// page says that their timercmp() versions "do not work" on some platforms. + +inline bool +operator <=(const timeval &a, const timeval &b) +{ + return !(a > b); +} + +inline bool +operator >=(const timeval &a, const timeval &b) +{ + return !(a < b); +} + +inline bool +operator ==(const timeval &a, const timeval &b) +{ + return !(a != b); +} + namespace Time { diff --git a/src/enums.h b/src/enums.h index 2297d5c9b3..ea410ecbee 100644 --- a/src/enums.h +++ b/src/enums.h @@ -37,8 +37,12 @@ typedef enum _mem_status_t { } mem_status_t; typedef enum { + /// Has not considered whether to send ICP queries to peers yet. PING_NONE, + /// Sent ICP queries to peers and still awaiting responses. PING_WAITING, + /// Not waiting for ICP responses now and will not send ICP queries later. + /// The ICP queries may (or may not) have been sent earlier. PING_DONE } ping_status_t; diff --git a/src/neighbors.cc b/src/neighbors.cc index d908e7f9ad..c3dc609798 100644 --- a/src/neighbors.cc +++ b/src/neighbors.cc @@ -1441,7 +1441,7 @@ peerCountMcastPeersCreateAndSend(CachePeer * const p) reqnum = icpSetCacheKey((const cache_key *)fake->key); icpCreateAndSend(ICP_QUERY, 0, url, reqnum, 0, icpOutgoingConn->fd, p->in_addr, psstate->al); - fake->ping_status = PING_WAITING; + fake->ping_status = PING_WAITING; // TODO: refactor to use PeerSelector::startPingWaiting() eventAdd("peerCountMcastPeersDone", peerCountMcastPeersDone, psstate, diff --git a/src/peer_select.cc b/src/peer_select.cc index 0f0dd9ce6f..49a835ec13 100644 --- a/src/peer_select.cc +++ b/src/peer_select.cc @@ -10,6 +10,7 @@ #include "squid.h" #include "acl/FilledChecklist.h" +#include "base/AsyncCbdataCalls.h" #include "base/InstanceId.h" #include "CachePeer.h" #include "carp.h" @@ -34,6 +35,7 @@ #include "SquidConfig.h" #include "SquidTime.h" #include "Store.h" +#include "util.h" // for tvSubDsec() which should be in SquidTime.h /** * A CachePeer which has been selected as a possible destination. @@ -99,6 +101,132 @@ operator <<(std::ostream &os, const PeerSelectionDumper &fsd) return os; } +/// An ICP ping timeout service. +/// Protects event.cc (which is designed to handle a few unrelated timeouts) +/// from exposure to thousands of ping-related timeouts on busy proxies. +class PeerSelectorPingMonitor +{ +public: + /// registers the given selector to be notified about the IPC ping timeout + void monitor(PeerSelector *); + + /// removes a PeerSelector from the waiting list + void forget(PeerSelector *); + + /// \returns a (nil) registration of a non-waiting peer selector + WaitingPeerSelectorPosition npos() { return selectors.end(); } + +private: + static void NoteWaitOver(void *monitor); + + void startWaiting(); + void abortWaiting(); + void noteWaitOver(); + + WaitingPeerSelectors selectors; ///< \see WaitingPeerSelectors +}; + +/// monitors all PeerSelector ICP ping timeouts +static PeerSelectorPingMonitor & +PingMonitor() +{ + static const auto Instance = new PeerSelectorPingMonitor(); + return *Instance; +} + +/* PeerSelectorPingMonitor */ + +/// PeerSelectorPingMonitor::noteWaitOver() wrapper +void +PeerSelectorPingMonitor::NoteWaitOver(void *raw) +{ + assert(raw); + static_cast(raw)->noteWaitOver(); +} + +/// schedules a single event to represent all waiting selectors +void +PeerSelectorPingMonitor::startWaiting() +{ + assert(!selectors.empty()); + const auto interval = tvSubDsec(current_time, selectors.begin()->first); + eventAdd("PeerSelectorPingMonitor::NoteWaitOver", &PeerSelectorPingMonitor::NoteWaitOver, this, interval, 0, false); +} + +/// undoes an earlier startWaiting() call +void +PeerSelectorPingMonitor::abortWaiting() +{ + // our event may be already in the AsyncCallQueue but that is OK: + // such queued calls cannot accumulate, and we ignore any stale ones + eventDelete(&PeerSelectorPingMonitor::NoteWaitOver, nullptr); +} + +/// calls back all ready PeerSelectors and continues to wait for others +void +PeerSelectorPingMonitor::noteWaitOver() +{ + while (!selectors.empty() && current_time >= selectors.begin()->first) { + const auto selector = selectors.begin()->second; + CallBack(selector->al, [selector,this] { + selector->ping.monitorRegistration = npos(); + AsyncCall::Pointer callback = asyncCall(44, 4, "PeerSelector::HandlePingTimeout", + cbdataDialer(PeerSelector::HandlePingTimeout, selector)); + ScheduleCallHere(callback); + }); + selectors.erase(selectors.begin()); + } + + if (!selectors.empty()) { + // Since abortWaiting() is unreliable, we may have been awakened by a + // stale event A after event B has been scheduled. Now we are going to + // schedule event C. Prevent event accumulation by deleting B (if any). + abortWaiting(); + + startWaiting(); + } +} + +void +PeerSelectorPingMonitor::monitor(PeerSelector *selector) +{ + assert(selector); + + const auto deadline = selector->ping.deadline(); + const auto position = selectors.emplace(deadline, selector); + selector->ping.monitorRegistration = position; + + if (position == selectors.begin()) { + if (selectors.size() > 1) + abortWaiting(); // remove the previously scheduled earlier event + startWaiting(); + } // else the already scheduled event is still the earliest one +} + +void +PeerSelectorPingMonitor::forget(PeerSelector *selector) +{ + assert(selector); + + if (selector->ping.monitorRegistration == npos()) + return; // already forgotten + + const auto wasFirst = selector->ping.monitorRegistration == selectors.begin(); + selectors.erase(selector->ping.monitorRegistration); + selector->ping.monitorRegistration = npos(); + + if (wasFirst) { + // do not reschedule if there are still elements with the same deadline + if (!selectors.empty() && selectors.begin()->first == selector->ping.deadline()) + return; + abortWaiting(); + if (!selectors.empty()) + startWaiting(); + } // else do nothing since the old scheduled event is still the earliest one +} + +/* PeerSelector */ + PeerSelector::~PeerSelector() { while (servers) { @@ -107,12 +235,10 @@ PeerSelector::~PeerSelector() servers = next; } + cancelPingTimeoutMonitoring(); + if (entry) { debugs(44, 3, entry->url()); - - if (entry->ping_status == PING_WAITING) - eventDelete(HandlePingTimeout, this); - entry->ping_status = PING_DONE; } @@ -132,6 +258,21 @@ PeerSelector::~PeerSelector() delete lastError; } +void +PeerSelector::startPingWaiting() +{ + assert(entry); + assert(entry->ping_status != PING_WAITING); + PingMonitor().monitor(this); + entry->ping_status = PING_WAITING; +} + +void +PeerSelector::cancelPingTimeoutMonitoring() +{ + PingMonitor().forget(this); +} + static int peerSelectIcpPing(PeerSelector *ps, int direct, StoreEntry * entry) { @@ -520,6 +661,7 @@ PeerSelector::selectMore() return; } else if (entry->ping_status == PING_WAITING) { selectSomeNeighborReplies(); + cancelPingTimeoutMonitoring(); entry->ping_status = PING_DONE; } @@ -614,6 +756,9 @@ PeerSelector::selectSomeNeighbor() this, &ping.n_replies_expected, &ping.timeout); + // TODO: Refactor neighborsUdpPing() to guarantee positive timeouts. + if (ping.timeout < 0) + ping.timeout = 0; if (ping.n_sent == 0) debugs(44, DBG_CRITICAL, "WARNING: neighborsUdpPing returned 0"); @@ -622,12 +767,7 @@ PeerSelector::selectSomeNeighbor() " msec"); if (ping.n_replies_expected > 0) { - entry->ping_status = PING_WAITING; - eventAdd("PeerSelector::HandlePingTimeout", - HandlePingTimeout, - this, - 0.001 * ping.timeout, - 0); + startPingWaiting(); return; } } @@ -754,8 +894,11 @@ PeerSelector::handlePingTimeout() { debugs(44, 3, url()); - if (entry) - entry->ping_status = PING_DONE; + // do nothing if ping reply came while handlePingTimeout() was queued + if (!entry || entry->ping_status != PING_WAITING) + return; + + entry->ping_status = PING_DONE; if (selectionAborted()) return; @@ -766,9 +909,9 @@ PeerSelector::handlePingTimeout() } void -PeerSelector::HandlePingTimeout(void *data) +PeerSelector::HandlePingTimeout(PeerSelector *selector) { - static_cast(data)->handlePingTimeout(); + selector->handlePingTimeout(); } void @@ -1046,7 +1189,8 @@ ping_data::ping_data() : timeout(0), timedout(0), w_rtt(0), - p_rtt(0) + p_rtt(0), + monitorRegistration(PingMonitor().npos()) { start.tv_sec = 0; start.tv_usec = 0; @@ -1054,3 +1198,15 @@ ping_data::ping_data() : stop.tv_usec = 0; } +timeval +ping_data::deadline() const +{ + timeval timeInterval; + timeInterval.tv_sec = timeout / 1000; + timeInterval.tv_usec = (timeout % 1000) * 1000; + + timeval result; + tvAdd(result, start, timeInterval); + return result; +} +