From: Alex Rousskov Date: Tue, 2 Apr 2024 20:37:31 +0000 (+0000) Subject: Do not blame cache_peer for CONNECT errors (#1772) X-Git-Tag: SQUID_7_0_1~153 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2e7dea3cedd3ef2f071dee82867c4147f17376dd;p=thirdparty%2Fsquid.git Do not blame cache_peer for CONNECT errors (#1772) ERROR: Connection to [such-and-such-cache_peer] failed TCP_TUNNEL/503 CONNECT nxdomain.test:443 FIRSTUP_PARENT Squid does not alert an admin about (and decrease health level of) a cache_peer that responded with an error to a GET request. Just like GET responses from a cache_peer, CONNECT responses may (and often do!) reflect client or origin server failures. We should not penalize cache_peers (and alert admins) until we can distinguish these frequent client/origin failures from (relatively rare) cache_peer problems. This change absolves cache_peers of CONNECT problems, restoring parity with GETs and restoring v4 behavior changed (probably by accident) in v5. Also removed Http::StatusCode parameter from failure notification functions because it became essentially unused after the primary Http::Tunneler changes. Tunneler was the only source of status code information that (in some cases) used received HTTP response to compute that status code. All other cases extracted that status code from Squid-generated errors. Those errors were arguably never meant to supply status code information for "this failure is not our fault" decision, and they do not supply 4xx status codes driving that decision. ### Problem evolution 2019 commit f5e1794 effectively started blaming cache_peer for all FwdState CONNECT errors. That functionality change was probably accidental, likely influenced by the names of noteConnectFailure() and peerConnectFailed() functions that abbreviated "Connection", making the functions look as applicable to CONNECT failures. Prior to that commit, the functions were never used for CONNECT errors. After it, FwdState started calling peerConnectFailed() for all CONNECT failures. In 2020 commit 25b0ce4, TunnelStateData started blaming cache_peers as well (by moving that FwdState-only error handling code into Tunneler). The same "accidental functionality change" speculations apply here. In 2022 commit 022dbab, we made an exception for 4xx CONNECT errors as folks deploying newer code started complaining about cache_peers getting blamed for client-caused errors (e.g., HTTP 403 Forbidden replies). We did not realize that the blaming code itself was an unwanted accident. Now we are getting complaints about cache_peers getting blamed for 502 and 503 CONNECT errors caused by, for example, domain names without IPs: As these CONNECT error responses are propagated from parent to child caches, every child cache in the chain logs ERRORs and every cache_peer in the chain gets its health counter decreased! --- diff --git a/src/CachePeer.cc b/src/CachePeer.cc index 63c6fb60a9..98a750cb41 100644 --- a/src/CachePeer.cc +++ b/src/CachePeer.cc @@ -69,20 +69,11 @@ CachePeer::noteSuccess() } } -void -CachePeer::noteFailure(const Http::StatusCode code) -{ - if (Http::Is4xx(code)) - return; // this failure is not our fault - - countFailure(); -} - // TODO: Require callers to detail failures instead of using one (and often // misleading!) "connection failed" phrase for all of them. /// noteFailure() helper for handling failures attributed to this peer void -CachePeer::countFailure() +CachePeer::noteFailure() { stats.last_connect_failure = squid_curtime; if (tcp_up > 0) diff --git a/src/CachePeer.h b/src/CachePeer.h index 783c17eea0..f888a0cdde 100644 --- a/src/CachePeer.h +++ b/src/CachePeer.h @@ -38,9 +38,8 @@ public: /// reacts to a successful establishment of a connection to this cache_peer void noteSuccess(); - /// reacts to a failure on a connection to this cache_peer - /// \param code a received response status code, if any - void noteFailure(Http::StatusCode code); + /// reacts to a failed attempt to establish a connection to this cache_peer + void noteFailure(); /// (re)configure cache_peer name=value void rename(const char *); @@ -236,14 +235,13 @@ NoteOutgoingConnectionSuccess(CachePeer * const peer) peer->noteSuccess(); } -/// reacts to a failure on a connection to an origin server or cache_peer +/// reacts to a failed attempt to establish a connection to an origin server or cache_peer /// \param peer nil if the connection is to an origin server -/// \param code a received response status code, if any inline void -NoteOutgoingConnectionFailure(CachePeer * const peer, const Http::StatusCode code) +NoteOutgoingConnectionFailure(CachePeer * const peer) { if (peer) - peer->noteFailure(code); + peer->noteFailure(); } /// identify the given cache peer in cache.log messages and such diff --git a/src/HappyConnOpener.cc b/src/HappyConnOpener.cc index 981216030c..a2d3052583 100644 --- a/src/HappyConnOpener.cc +++ b/src/HappyConnOpener.cc @@ -638,7 +638,7 @@ HappyConnOpener::handleConnOpenerAnswer(Attempt &attempt, const CommConnectCbPar lastError = makeError(ERR_CONNECT_FAIL); lastError->xerrno = params.xerrno; - NoteOutgoingConnectionFailure(params.conn->getPeer(), lastError->httpStatus); + NoteOutgoingConnectionFailure(params.conn->getPeer()); if (spareWaiting) updateSpareWaitAfterPrimeFailure(); diff --git a/src/PeerPoolMgr.cc b/src/PeerPoolMgr.cc index 6b6b440b6b..a65e6bbdcb 100644 --- a/src/PeerPoolMgr.cc +++ b/src/PeerPoolMgr.cc @@ -87,7 +87,7 @@ PeerPoolMgr::handleOpenedConnection(const CommConnectCbParams ¶ms) } if (params.flag != Comm::OK) { - NoteOutgoingConnectionFailure(peer, Http::scNone); + NoteOutgoingConnectionFailure(peer); checkpoint("conn opening failure"); // may retry return; } diff --git a/src/clients/HttpTunneler.cc b/src/clients/HttpTunneler.cc index 1df7b15b0c..d52dd457e8 100644 --- a/src/clients/HttpTunneler.cc +++ b/src/clients/HttpTunneler.cc @@ -90,7 +90,7 @@ Http::Tunneler::handleConnectionClosure(const CommCloseCbParams &) { closer = nullptr; if (connection) { - countFailingConnection(nullptr); + countFailingConnection(); connection->noteClosure(); connection = nullptr; } @@ -355,7 +355,7 @@ Http::Tunneler::bailWith(ErrorState *error) if (const auto failingConnection = connection) { // TODO: Reuse to-peer connections after a CONNECT error response. - countFailingConnection(error); + countFailingConnection(); disconnect(); failingConnection->close(); } @@ -374,10 +374,12 @@ Http::Tunneler::sendSuccess() } void -Http::Tunneler::countFailingConnection(const ErrorState * const error) +Http::Tunneler::countFailingConnection() { assert(connection); - NoteOutgoingConnectionFailure(connection->getPeer(), error ? error->httpStatus : Http::scNone); + // No NoteOutgoingConnectionFailure(connection->getPeer()) call here because + // we do not blame cache_peer for CONNECT failures (on top of a successfully + // established connection to that cache_peer). if (noteFwdPconnUse && connection->isOpen()) fwdPconnPool->noteUses(fd_table[connection->fd].pconn.uses); } diff --git a/src/clients/HttpTunneler.h b/src/clients/HttpTunneler.h index eb8dcee685..d5090d3ac0 100644 --- a/src/clients/HttpTunneler.h +++ b/src/clients/HttpTunneler.h @@ -80,7 +80,7 @@ private: void disconnect(); /// updates connection usage history before the connection is closed - void countFailingConnection(const ErrorState *); + void countFailingConnection(); AsyncCall::Pointer writer; ///< called when the request has been written AsyncCall::Pointer reader; ///< called when the response should be read diff --git a/src/neighbors.cc b/src/neighbors.cc index a2719f628f..34a4383df9 100644 --- a/src/neighbors.cc +++ b/src/neighbors.cc @@ -1227,7 +1227,7 @@ peerProbeConnectDone(const Comm::ConnectionPointer &conn, Comm::Flag status, int if (status == Comm::OK) p->noteSuccess(); else - p->noteFailure(Http::scNone); + p->noteFailure(); -- p->testing_now; conn->close(); diff --git a/src/security/BlindPeerConnector.cc b/src/security/BlindPeerConnector.cc index 1811ae96f6..d57a4ea25e 100644 --- a/src/security/BlindPeerConnector.cc +++ b/src/security/BlindPeerConnector.cc @@ -76,7 +76,7 @@ Security::BlindPeerConnector::noteNegotiationDone(ErrorState *error) // based on TCP results, SSL results, or both. And the code is probably not // consistent in this aspect across tunnelling and forwarding modules. if (peer && peer->secure.encryptTransport) - peer->noteFailure(error->httpStatus); + peer->noteFailure(); return; } diff --git a/src/security/PeerConnector.cc b/src/security/PeerConnector.cc index f12229403e..96c2f0ed34 100644 --- a/src/security/PeerConnector.cc +++ b/src/security/PeerConnector.cc @@ -115,7 +115,7 @@ Security::PeerConnector::commCloseHandler(const CommCloseCbParams ¶ms) err->detailError(d); if (serverConn) { - countFailingConnection(err); + countFailingConnection(); serverConn->noteClosure(); serverConn = nullptr; } @@ -507,7 +507,7 @@ Security::PeerConnector::bail(ErrorState *error) answer().error = error; if (const auto failingConnection = serverConn) { - countFailingConnection(error); + countFailingConnection(); disconnect(); failingConnection->close(); } @@ -525,10 +525,10 @@ Security::PeerConnector::sendSuccess() } void -Security::PeerConnector::countFailingConnection(const ErrorState * const error) +Security::PeerConnector::countFailingConnection() { assert(serverConn); - NoteOutgoingConnectionFailure(serverConn->getPeer(), error ? error->httpStatus : Http::scNone); + NoteOutgoingConnectionFailure(serverConn->getPeer()); // TODO: Calling PconnPool::noteUses() should not be our responsibility. if (noteFwdPconnUse && serverConn->isOpen()) fwdPconnPool->noteUses(fd_table[serverConn->fd].pconn.uses); diff --git a/src/security/PeerConnector.h b/src/security/PeerConnector.h index 3c7c01b8dc..b00f3852a8 100644 --- a/src/security/PeerConnector.h +++ b/src/security/PeerConnector.h @@ -150,7 +150,7 @@ protected: void disconnect(); /// updates connection usage history before the connection is closed - void countFailingConnection(const ErrorState *); + void countFailingConnection(); /// If called the certificates validator will not used void bypassCertValidator() {useCertValidator_ = false;} diff --git a/src/tests/stub_libsecurity.cc b/src/tests/stub_libsecurity.cc index c4d3accab9..456f3122ae 100644 --- a/src/tests/stub_libsecurity.cc +++ b/src/tests/stub_libsecurity.cc @@ -105,7 +105,7 @@ void PeerConnector::bail(ErrorState *) STUB void PeerConnector::sendSuccess() STUB void PeerConnector::callBack() STUB void PeerConnector::disconnect() STUB -void PeerConnector::countFailingConnection(const ErrorState *) STUB +void PeerConnector::countFailingConnection() STUB void PeerConnector::recordNegotiationDetails() STUB EncryptorAnswer &PeerConnector::answer() STUB_RETREF(EncryptorAnswer) }