From: Alex Rousskov Date: Mon, 22 May 2017 16:49:36 +0000 (-0600) Subject: Do not unconditionally revive dead peers after a DNS refresh. X-Git-Tag: M-staged-PR71~171 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=883b9a4d1f62c6de1009d09119b0a6bbc100be8c;p=thirdparty%2Fsquid.git Do not unconditionally revive dead peers after a DNS refresh. Every hour, peerRefreshDNS() performs a DNS lookup of all cache_peer addresses. Before this patch, even if the lookup results did not change, the associated peerDNSConfigure() code silently cleared dead peer marking (CachePeer::tcp_up counter), if any. Forcefully reviving dead peers every hour can lead to transaction delays (and delays may lead to failures) due to connection timeouts when using a still dead peer. This patch starts standard TCP probing (instead of pointless dead peer reviving), correctly refreshing peer state. The primary goal is to cover a situation where a DNS refresh changes the peer address list. However, TCP probing may be useful for other situations as well and has low overhead (that is why it starts unconditionally). For example, probing may be useful when the DNS refresh changes the order of IP addresses. It also helps detecting dead idle peers. Also delay and later resume peer probing if peerDNSConfigure() is invoked when peers are being probed. Squid should re-probe because the current probes may use stale IP addresses and produce wrong results. --- diff --git a/src/CachePeer.h b/src/CachePeer.h index 60520a982e..7f28e7bf6c 100644 --- a/src/CachePeer.h +++ b/src/CachePeer.h @@ -145,6 +145,8 @@ public: #endif int tcp_up = 0; /* 0 if a connect() fails */ + /// whether to do another TCP probe after current TCP probes + bool reprobe = false; Ip::Address addresses[10]; int n_addresses = 0; diff --git a/src/neighbors.cc b/src/neighbors.cc index 891e23eb17..cb95fea621 100644 --- a/src/neighbors.cc +++ b/src/neighbors.cc @@ -59,7 +59,7 @@ static void neighborAliveHtcp(CachePeer *, const MemObject *, const HtcpReplyDat static void neighborCountIgnored(CachePeer *); static void peerRefreshDNS(void *); static IPH peerDNSConfigure; -static void peerProbeConnect(CachePeer *); +static void peerProbeConnect(CachePeer *, const bool reprobeIfBusy = false); static CNCB peerProbeConnectDone; static void peerCountMcastPeersDone(void *data); static void peerCountMcastPeersStart(void *data); @@ -1202,8 +1202,6 @@ peerDNSConfigure(const ipcache_addrs *ia, const Dns::LookupDetails &, void *data return; } - p->tcp_up = p->connect_fail_limit; - for (j = 0; j < (int) ia->count && j < PEER_MAX_ADDRESSES; ++j) { p->addresses[j] = ia->in_addrs[j]; debugs(15, 2, "--> IP address #" << j << ": " << p->addresses[j]); @@ -1214,6 +1212,8 @@ peerDNSConfigure(const ipcache_addrs *ia, const Dns::LookupDetails &, void *data p->in_addr = p->addresses[0]; p->in_addr.port(p->icp.port); + peerProbeConnect(p, true); // detect any died or revived peers ASAP + if (p->type == PEER_MULTICAST) peerCountMcastPeersSchedule(p, 10); @@ -1287,21 +1287,31 @@ peerConnectSucceded(CachePeer * p) p->tcp_up = p->connect_fail_limit; } +/// whether new TCP probes are currently banned +static bool +peerProbeIsBusy(const CachePeer *p) +{ + if (p->testing_now > 0) { + debugs(15, 8, "yes, probing " << p); + return true; + } + if (squid_curtime - p->stats.last_connect_probe == 0) { + debugs(15, 8, "yes, just probed " << p); + return true; + } + return false; +} /* * peerProbeConnect will be called on dead peers by neighborUp */ static void -peerProbeConnect(CachePeer * p) +peerProbeConnect(CachePeer *p, const bool reprobeIfBusy) { - if (p->testing_now > 0) { - debugs(15, 8, "already probing " << p); - return; - } - - if (squid_curtime - p->stats.last_connect_probe == 0) { - debugs(15, 8, "just probed " << p); + if (peerProbeIsBusy(p)) { + p->reprobe = reprobeIfBusy; return; } + p->reprobe = false; const time_t ctimeout = peerConnectTimeout(p); /* for each IP address of this CachePeer. find one that we can connect to and probe it. */ @@ -1337,6 +1347,9 @@ peerProbeConnectDone(const Comm::ConnectionPointer &conn, Comm::Flag status, int -- p->testing_now; conn->close(); // TODO: log this traffic. + + if (p->reprobe) + peerProbeConnect(p); } static void