From 022dbabd89249f839d1861aa87c1ab9e1a008a47 Mon Sep 17 00:00:00 2001 From: Eduard Bagdasaryan Date: Sun, 13 Nov 2022 16:00:21 +0000 Subject: [PATCH] Do not blame cache_peer for 4xx CONNECT responses (#1166) To avoid using a problematic cache_peer, Squid penalizes cache_peers for a variety of problems. However, a problem like an HTTP 403 Forbidden response may be caused by the client or other factors unrelated to the cache_peer that sent the response to Squid. In those cases, the cache_peer is not at fault and should not be marked dead, even after many such responses. This change stops blaming cache_peers for HTTP 4xx outcomes of Squid CONNECT requests. Currently, such outcomes only happen when a cache_peer responds with a 4xx reply, but the new code also treats Squid-generated 4xx error responses (while trying to establish a connection to the cache_peer) the same way. This hard-coded logic covers known use cases. If different use cases surface, we can make Squid behavior configurable. Co-authored-by: Amos Jeffries --- src/CachePeer.cc | 49 ++++++++++++++++++++++++ src/CachePeer.h | 30 +++++++++++++++ src/FwdState.cc | 3 +- src/FwdState.h | 1 - src/HappyConnOpener.cc | 4 +- src/PeerPoolMgr.cc | 4 +- src/clients/HttpTunneler.cc | 9 ++--- src/clients/HttpTunneler.h | 2 +- src/error/forward.h | 1 + src/http/StatusCode.h | 2 + src/neighbors.cc | 60 +++++------------------------- src/neighbors.h | 8 +++- src/security/BlindPeerConnector.cc | 8 ++-- src/security/BlindPeerConnector.h | 4 +- src/security/PeerConnector.cc | 18 +++++---- src/security/PeerConnector.h | 3 +- src/tests/stub_libsecurity.cc | 2 +- 17 files changed, 125 insertions(+), 83 deletions(-) diff --git a/src/CachePeer.cc b/src/CachePeer.cc index 24bf56714f..834b142f74 100644 --- a/src/CachePeer.cc +++ b/src/CachePeer.cc @@ -10,6 +10,7 @@ #include "acl/Gadgets.h" #include "CachePeer.h" #include "defines.h" +#include "neighbors.h" #include "NeighborTypeDomainList.h" #include "pconn.h" #include "PeerPoolMgr.h" @@ -55,6 +56,54 @@ CachePeer::~CachePeer() xfree(domain); } +void +CachePeer::noteSuccess() +{ + if (!tcp_up) { + debugs(15, 2, "connection to " << *this << " succeeded"); + tcp_up = connect_fail_limit; // NP: so peerAlive() works properly. + peerAlive(this); + } else { + tcp_up = connect_fail_limit; + } +} + +void +CachePeer::noteFailure(const Http::StatusCode code) +{ + if (Http::Is4xx(code)) + return; // this failure is not our fault + + countFailure(); +} + +// TODO: Require callers to detail failures instead of using one (and often +// misleading!) "connection failed" phrase for all of them. +/// noteFailure() helper for handling failures attributed to this peer +void +CachePeer::countFailure() +{ + stats.last_connect_failure = squid_curtime; + if (tcp_up > 0) + --tcp_up; + + const auto consideredAliveByAdmin = (stats.logged_state == PEER_ALIVE); + const auto level = consideredAliveByAdmin ? DBG_IMPORTANT : 2; + debugs(15, level, "ERROR: Connection to " << *this << " failed"); + + if (consideredAliveByAdmin) { + if (!tcp_up) { + debugs(15, DBG_IMPORTANT, "Detected DEAD " << neighborTypeStr(this) << ": " << name); + stats.logged_state = PEER_DEAD; + } else { + debugs(15, 2, "additional failures needed to mark this cache_peer DEAD: " << tcp_up); + } + } else { + assert(!tcp_up); + debugs(15, 2, "cache_peer " << *this << " is still DEAD"); + } +} + void CachePeer::rename(const char * const newName) { diff --git a/src/CachePeer.h b/src/CachePeer.h index 2beb460982..b641294bfb 100644 --- a/src/CachePeer.h +++ b/src/CachePeer.h @@ -12,6 +12,7 @@ #include "acl/forward.h" #include "base/CbcPointer.h" #include "enums.h" +#include "http/StatusCode.h" #include "icp_opcode.h" #include "ip/Address.h" #include "security/PeerOptions.h" @@ -34,6 +35,13 @@ public: explicit CachePeer(const char *hostname); ~CachePeer(); + /// reacts to a successful establishment of a connection to this cache_peer + void noteSuccess(); + + /// reacts to a failure on a connection to this cache_peer + /// \param code a received response status code, if any + void noteFailure(Http::StatusCode code); + /// (re)configure cache_peer name=value void rename(const char *); @@ -216,8 +224,30 @@ public: int front_end_https = 0; ///< 0 - off, 1 - on, 2 - auto int connection_auth = 2; ///< 0 - off, 1 - on, 2 - auto + +private: + void countFailure(); }; +/// reacts to a successful establishment of a connection to an origin server or cache_peer +/// \param peer nil if Squid established a connection to an origin server +inline void +NoteOutgoingConnectionSuccess(CachePeer * const peer) +{ + if (peer) + peer->noteSuccess(); +} + +/// reacts to a failure on a connection to an origin server or cache_peer +/// \param peer nil if the connection is to an origin server +/// \param code a received response status code, if any +inline void +NoteOutgoingConnectionFailure(CachePeer * const peer, const Http::StatusCode code) +{ + if (peer) + peer->noteFailure(code); +} + /// identify the given cache peer in cache.log messages and such std::ostream &operator <<(std::ostream &, const CachePeer &); diff --git a/src/FwdState.cc b/src/FwdState.cc index e07bea3233..495666854b 100644 --- a/src/FwdState.cc +++ b/src/FwdState.cc @@ -1060,8 +1060,7 @@ FwdState::successfullyConnectedToPeer(const Comm::ConnectionPointer &conn) CallJobHere1(17, 4, request->clientConnectionManager, ConnStateData, ConnStateData::notePeerConnection, serverConnection()); - if (serverConnection()->getPeer()) - peerConnectSucceded(serverConnection()->getPeer()); + NoteOutgoingConnectionSuccess(serverConnection()->getPeer()); dispatch(); } diff --git a/src/FwdState.h b/src/FwdState.h index ad9e75cc3b..ca1ad75e3c 100644 --- a/src/FwdState.h +++ b/src/FwdState.h @@ -31,7 +31,6 @@ class AccessLogEntry; typedef RefCount AccessLogEntryPointer; -class ErrorState; class HttpRequest; class PconnPool; class ResolvedPeers; diff --git a/src/HappyConnOpener.cc b/src/HappyConnOpener.cc index e1eed3a24b..1beca7cc5e 100644 --- a/src/HappyConnOpener.cc +++ b/src/HappyConnOpener.cc @@ -630,8 +630,6 @@ HappyConnOpener::handleConnOpenerAnswer(Attempt &attempt, const CommConnectCbPar } debugs(17, 8, what << " failed: " << params.conn); - if (const auto peer = params.conn->getPeer()) - peerConnectFailed(peer); // remember the last failure (we forward it if we cannot connect anywhere) lastFailedConnection = handledPath; @@ -640,6 +638,8 @@ HappyConnOpener::handleConnOpenerAnswer(Attempt &attempt, const CommConnectCbPar lastError = makeError(ERR_CONNECT_FAIL); lastError->xerrno = params.xerrno; + NoteOutgoingConnectionFailure(params.conn->getPeer(), lastError->httpStatus); + if (spareWaiting) updateSpareWaitAfterPrimeFailure(); diff --git a/src/PeerPoolMgr.cc b/src/PeerPoolMgr.cc index a3502613bd..6ec043665c 100644 --- a/src/PeerPoolMgr.cc +++ b/src/PeerPoolMgr.cc @@ -86,7 +86,7 @@ PeerPoolMgr::handleOpenedConnection(const CommConnectCbParams ¶ms) } if (params.flag != Comm::OK) { - peerConnectFailed(peer); + NoteOutgoingConnectionFailure(peer, Http::scNone); checkpoint("conn opening failure"); // may retry return; } @@ -134,7 +134,7 @@ PeerPoolMgr::handleSecuredPeer(Security::EncryptorAnswer &answer) assert(!answer.tunneled); if (answer.error.get()) { assert(!answer.conn); - // PeerConnector calls peerConnectFailed() for us; + // PeerConnector calls NoteOutgoingConnectionFailure() for us checkpoint("conn securing failure"); // may retry return; } diff --git a/src/clients/HttpTunneler.cc b/src/clients/HttpTunneler.cc index 19e3f4efef..0162e21a6a 100644 --- a/src/clients/HttpTunneler.cc +++ b/src/clients/HttpTunneler.cc @@ -90,7 +90,7 @@ Http::Tunneler::handleConnectionClosure(const CommCloseCbParams &) { closer = nullptr; if (connection) { - countFailingConnection(); + countFailingConnection(nullptr); connection->noteClosure(); connection = nullptr; } @@ -355,7 +355,7 @@ Http::Tunneler::bailWith(ErrorState *error) if (const auto failingConnection = connection) { // TODO: Reuse to-peer connections after a CONNECT error response. - countFailingConnection(); + countFailingConnection(error); disconnect(); failingConnection->close(); } @@ -374,11 +374,10 @@ Http::Tunneler::sendSuccess() } void -Http::Tunneler::countFailingConnection() +Http::Tunneler::countFailingConnection(const ErrorState * const error) { assert(connection); - if (const auto p = connection->getPeer()) - peerConnectFailed(p); + NoteOutgoingConnectionFailure(connection->getPeer(), error ? error->httpStatus : Http::scNone); if (noteFwdPconnUse && connection->isOpen()) fwdPconnPool->noteUses(fd_table[connection->fd].pconn.uses); } diff --git a/src/clients/HttpTunneler.h b/src/clients/HttpTunneler.h index 21edc6250b..d6d8d18f26 100644 --- a/src/clients/HttpTunneler.h +++ b/src/clients/HttpTunneler.h @@ -80,7 +80,7 @@ private: void disconnect(); /// updates connection usage history before the connection is closed - void countFailingConnection(); + void countFailingConnection(const ErrorState *); AsyncCall::Pointer writer; ///< called when the request has been written AsyncCall::Pointer reader; ///< called when the response should be read diff --git a/src/error/forward.h b/src/error/forward.h index 798e853457..3a4d8ce955 100644 --- a/src/error/forward.h +++ b/src/error/forward.h @@ -90,6 +90,7 @@ typedef enum { class Error; class ErrorDetail; +class ErrorState; typedef RefCount ErrorDetailPointer; diff --git a/src/http/StatusCode.h b/src/http/StatusCode.h index 38ece6d9a1..1517826858 100644 --- a/src/http/StatusCode.h +++ b/src/http/StatusCode.h @@ -90,6 +90,8 @@ typedef enum { const char *StatusCodeString(const Http::StatusCode status); /// whether this is an informational 1xx response status code inline bool Is1xx(const int sc) { return scContinue <= sc && sc < scOkay; } +/// whether this is a client error 4xx response status code +inline bool Is4xx(const int sc) { return scBadRequest <= sc && sc < scInternalServerError; } /// whether this response status code prohibits sending Content-Length inline bool ProhibitsContentLength(const StatusCode sc) { return sc == scNoContent || Is1xx(sc); } /// whether to send the request to another peer based on the current response status code diff --git a/src/neighbors.cc b/src/neighbors.cc index 2f9b25bf17..ee0c318d84 100644 --- a/src/neighbors.cc +++ b/src/neighbors.cc @@ -453,10 +453,7 @@ peerClearRR() } } -/** - * Perform all actions when a CachePeer is detected revived. - */ -static void +void peerAlive(CachePeer *p) { if (p->stats.logged_state == PEER_DEAD && p->tcp_up) { @@ -469,6 +466,10 @@ peerAlive(CachePeer *p) p->stats.last_reply = squid_curtime; p->stats.probe_start = 0; + + // TODO: Remove or explain how we could detect an alive peer without IP addresses + if (!p->n_addresses) + ipcache_nbgethostbyname(p->host, peerDNSConfigure, p); } CachePeer * @@ -1256,48 +1257,6 @@ peerRefreshDNS(void *data) eventAddIsh("peerRefreshDNS", peerRefreshDNS, nullptr, 3600.0, 1); } -// TODO: Move to CachePeer::noteFailure() or similar. -// TODO: Require callers to detail failures instead of using one (and often -// misleading!) "TCP" label for all of them. -void -peerConnectFailed(CachePeer * const p) -{ - p->stats.last_connect_failure = squid_curtime; - if (p->tcp_up > 0) - --p->tcp_up; - - // TODO: Report peer name. Same-addresses peers often have different names. - - const auto consideredAliveByAdmin = p->stats.logged_state == PEER_ALIVE; - const auto level = consideredAliveByAdmin ? DBG_IMPORTANT : 2; - debugs(15, level, "ERROR: TCP connection to " << *p << " failed"); - - if (consideredAliveByAdmin) { - if (!p->tcp_up) { - debugs(15, DBG_IMPORTANT, "Detected DEAD " << neighborTypeStr(p) << ": " << *p); - p->stats.logged_state = PEER_DEAD; - } else { - debugs(15, 2, "additional failures needed to mark this cache_peer DEAD: " << p->tcp_up); - } - } else { - assert(!p->tcp_up); - debugs(15, 2, "cache_peer " << *p << " is still DEAD"); - } -} - -void -peerConnectSucceded(CachePeer * p) -{ - if (!p->tcp_up) { - debugs(15, 2, "TCP connection to " << *p << " succeeded"); - p->tcp_up = p->connect_fail_limit; // NP: so peerAlive(p) works properly. - peerAlive(p); - if (!p->n_addresses) - ipcache_nbgethostbyname(p->host, peerDNSConfigure, p); - } else - p->tcp_up = p->connect_fail_limit; -} - /// whether new TCP probes are currently banned static bool peerProbeIsBusy(const CachePeer *p) @@ -1349,11 +1308,10 @@ peerProbeConnectDone(const Comm::ConnectionPointer &conn, Comm::Flag status, int { CachePeer *p = (CachePeer*)data; - if (status == Comm::OK) { - peerConnectSucceded(p); - } else { - peerConnectFailed(p); - } + if (status == Comm::OK) + p->noteSuccess(); + else + p->noteFailure(Http::scNone); -- p->testing_now; conn->close(); diff --git a/src/neighbors.h b/src/neighbors.h index 2510f66c75..8229b8fd61 100644 --- a/src/neighbors.h +++ b/src/neighbors.h @@ -51,6 +51,12 @@ CachePeer *getRoundRobinParent(PeerSelector*); CachePeer *getWeightedRoundRobinParent(PeerSelector*); void peerClearRRStart(void); void peerClearRR(void); + +// TODO: Move, together with its many dependencies and callers, into CachePeer. +/// Updates protocol-agnostic CachePeer state after an indication of a +/// successful contact with the given cache_peer. +void peerAlive(CachePeer *); + lookup_t peerDigestLookup(CachePeer * p, PeerSelector *); CachePeer *neighborsDigestSelect(PeerSelector *); void peerNoteDigestLookup(HttpRequest * request, CachePeer * p, lookup_t lookup); @@ -58,8 +64,6 @@ void peerNoteDigestGone(CachePeer * p); int neighborUp(const CachePeer * e); const char *neighborTypeStr(const CachePeer * e); peer_t neighborType(const CachePeer *, const AnyP::Uri &); -void peerConnectFailed(CachePeer *); -void peerConnectSucceded(CachePeer *); void dump_peer_options(StoreEntry *, CachePeer *); int peerHTTPOkay(const CachePeer *, PeerSelector *); diff --git a/src/security/BlindPeerConnector.cc b/src/security/BlindPeerConnector.cc index d0d1c4a9eb..46be73a0a3 100644 --- a/src/security/BlindPeerConnector.cc +++ b/src/security/BlindPeerConnector.cc @@ -70,13 +70,13 @@ Security::BlindPeerConnector::noteNegotiationDone(ErrorState *error) if (error) { debugs(83, 5, "error=" << (void*)error); - // XXX: forward.cc calls peerConnectSucceeded() after an OK TCP connect but - // we call peerConnectFailed() if SSL failed afterwards. Is that OK? - // It is not clear whether we should call peerConnectSucceeded/Failed() + // XXX: FwdState calls NoteOutgoingConnectionSuccess() after an OK TCP connect, but + // we call noteFailure() if SSL failed afterwards. Is that OK? + // It is not clear whether we should call noteSuccess()/noteFailure()/etc. // based on TCP results, SSL results, or both. And the code is probably not // consistent in this aspect across tunnelling and forwarding modules. if (peer && peer->secure.encryptTransport) - peerConnectFailed(peer); + peer->noteFailure(error->httpStatus); return; } diff --git a/src/security/BlindPeerConnector.h b/src/security/BlindPeerConnector.h index 0fc406f84c..bea7fe6d57 100644 --- a/src/security/BlindPeerConnector.h +++ b/src/security/BlindPeerConnector.h @@ -42,8 +42,8 @@ public: /// Return the configured TLS context object virtual Security::ContextPointer getTlsContext(); - /// On error calls peerConnectFailed(). - /// On success store the used TLS session for later use. + /// On success, stores the used TLS session for later use. + /// On error, informs the peer. virtual void noteNegotiationDone(ErrorState *); }; diff --git a/src/security/PeerConnector.cc b/src/security/PeerConnector.cc index 3729b47100..3087cb8183 100644 --- a/src/security/PeerConnector.cc +++ b/src/security/PeerConnector.cc @@ -12,6 +12,7 @@ #include "acl/FilledChecklist.h" #include "base/AsyncCallbacks.h" #include "base/IoManip.h" +#include "CachePeer.h" #include "comm/Loops.h" #include "comm/Read.h" #include "Downloader.h" @@ -110,15 +111,17 @@ Security::PeerConnector::commCloseHandler(const CommCloseCbParams ¶ms) debugs(83, 5, "FD " << params.fd << ", Security::PeerConnector=" << params.data); closeHandler = nullptr; + + const auto err = new ErrorState(ERR_SECURE_CONNECT_FAIL, Http::scServiceUnavailable, request.getRaw(), al); + static const auto d = MakeNamedErrorDetail("TLS_CONNECT_CLOSE"); + err->detailError(d); + if (serverConn) { - countFailingConnection(); + countFailingConnection(err); serverConn->noteClosure(); serverConn = nullptr; } - const auto err = new ErrorState(ERR_SECURE_CONNECT_FAIL, Http::scServiceUnavailable, request.getRaw(), al); - static const auto d = MakeNamedErrorDetail("TLS_CONNECT_CLOSE"); - err->detailError(d); bail(err); } @@ -508,7 +511,7 @@ Security::PeerConnector::bail(ErrorState *error) answer().error = error; if (const auto failingConnection = serverConn) { - countFailingConnection(); + countFailingConnection(error); disconnect(); failingConnection->close(); } @@ -526,11 +529,10 @@ Security::PeerConnector::sendSuccess() } void -Security::PeerConnector::countFailingConnection() +Security::PeerConnector::countFailingConnection(const ErrorState * const error) { assert(serverConn); - if (const auto p = serverConn->getPeer()) - peerConnectFailed(p); + NoteOutgoingConnectionFailure(serverConn->getPeer(), error ? error->httpStatus : Http::scNone); // TODO: Calling PconnPool::noteUses() should not be our responsibility. if (noteFwdPconnUse && serverConn->isOpen()) fwdPconnPool->noteUses(fd_table[serverConn->fd].pconn.uses); diff --git a/src/security/PeerConnector.h b/src/security/PeerConnector.h index de242001b0..3703e28ecf 100644 --- a/src/security/PeerConnector.h +++ b/src/security/PeerConnector.h @@ -26,7 +26,6 @@ #include #include -class ErrorState; class Downloader; class DownloaderAnswer; class AccessLogEntry; @@ -151,7 +150,7 @@ protected: void disconnect(); /// updates connection usage history before the connection is closed - void countFailingConnection(); + void countFailingConnection(const ErrorState *); /// If called the certificates validator will not used void bypassCertValidator() {useCertValidator_ = false;} diff --git a/src/tests/stub_libsecurity.cc b/src/tests/stub_libsecurity.cc index e3053b7946..bf1d110855 100644 --- a/src/tests/stub_libsecurity.cc +++ b/src/tests/stub_libsecurity.cc @@ -98,7 +98,7 @@ void PeerConnector::bail(ErrorState *) STUB void PeerConnector::sendSuccess() STUB void PeerConnector::callBack() STUB void PeerConnector::disconnect() STUB -void PeerConnector::countFailingConnection() STUB +void PeerConnector::countFailingConnection(const ErrorState *) STUB void PeerConnector::recordNegotiationDetails() STUB EncryptorAnswer &PeerConnector::answer() STUB_RETREF(EncryptorAnswer) } -- 2.39.5