From: hno <> Date: Wed, 3 May 2000 00:32:40 +0000 (+0000) Subject: hno squid-2.3.DEVEL2.tcp_dead_detection_and_retry.patch X-Git-Tag: SQUID_3_0_PRE1~2032 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=eb406bb759d973af85e0de00a4c7199b9952a144;p=thirdparty%2Fsquid.git hno squid-2.3.DEVEL2.tcp_dead_detection_and_retry.patch Squid-2.3.DEVEL2: Improved TCP dead peer detection & failover A major adjustment of how Squid detects a failing TCP peer and how connections is retried when it fails to connect. --- diff --git a/src/cache_cf.cc b/src/cache_cf.cc index 1b2612066c..fc7c7b1bff 100644 --- a/src/cache_cf.cc +++ b/src/cache_cf.cc @@ -1,6 +1,6 @@ /* - * $Id: cache_cf.cc,v 1.337 2000/03/06 16:23:29 wessels Exp $ + * $Id: cache_cf.cc,v 1.338 2000/05/02 18:32:40 hno Exp $ * * DEBUG: section 3 Configuration File Parsing * AUTHOR: Harvest Derived @@ -1014,6 +1014,7 @@ parse_peer(peer ** head) p->weight = 1; p->icp.version = ICP_VERSION_CURRENT; p->tcp_up = PEER_TCP_MAGIC_COUNT; + p->test_fd = -1; #if USE_CARP if (p->carp.load_factor) { /* calculate this peers hash for use in CARP */ diff --git a/src/forward.cc b/src/forward.cc index 6de07fddd6..4dde99b4ec 100644 --- a/src/forward.cc +++ b/src/forward.cc @@ -1,6 +1,6 @@ /* - * $Id: forward.cc,v 1.69 2000/03/06 16:23:31 wessels Exp $ + * $Id: forward.cc,v 1.70 2000/05/02 18:32:41 hno Exp $ * * DEBUG: section 17 Request Forwarding * AUTHOR: Duane Wessels @@ -133,13 +133,20 @@ fwdServerClosed(int fd, void *data) fwdState->n_tries, (int) (squid_curtime - fwdState->start)); if (fwdState->servers->next) { - /* cycle */ + /* use next, or cycle if origin server isn't last */ FwdServer *fs = fwdState->servers; - FwdServer **T; + FwdServer **T, *T2 = NULL; fwdState->servers = fs->next; - for (T = &fwdState->servers; *T; T = &(*T)->next); - *T = fs; - fs->next = NULL; + for (T = &fwdState->servers; *T; T2=*T, T = &(*T)->next); + if (T2 && T2->peer) { + /* cycle */ + *T = fs; + fs->next = NULL; + } else { + /* Use next. The last "direct" entry is retried multiple times */ + fwdState->servers = fs->next; + fwdServerFree(fs); + } } /* use eventAdd to break potential call sequence loops */ eventAdd("fwdConnectStart", fwdConnectStart, fwdState, 0.0, 0); @@ -182,12 +189,14 @@ fwdConnectDone(int server_fd, int status, void *data) err->request = requestLink(request); fwdFail(fwdState, err); if (fs->peer) - peerCheckConnectStart(fs->peer); + peerConnectFailed(fs->peer); comm_close(server_fd); } else { debug(17, 3) ("fwdConnectDone: FD %d: '%s'\n", server_fd, storeUrl(fwdState->entry)); fd_note(server_fd, storeUrl(fwdState->entry)); fd_table[server_fd].uses++; + if (fs->peer) + peerConnectSucceded(fs->peer); fwdDispatch(fwdState); } current = NULL; @@ -211,7 +220,7 @@ fwdConnectTimeout(int fd, void *data) */ if (fwdState->servers) if (fwdState->servers->peer) - peerCheckConnectStart(fwdState->servers->peer); + peerConnectFailed(fwdState->servers->peer); } comm_close(fd); } diff --git a/src/neighbors.cc b/src/neighbors.cc index 9a7e9fda5a..306452b2ca 100644 --- a/src/neighbors.cc +++ b/src/neighbors.cc @@ -1,6 +1,6 @@ /* - * $Id: neighbors.cc,v 1.279 2000/05/02 18:23:48 hno Exp $ + * $Id: neighbors.cc,v 1.280 2000/05/02 18:32:41 hno Exp $ * * DEBUG: section 15 Neighbor Routines * AUTHOR: Harvest Derived @@ -48,9 +48,9 @@ static void neighborAliveHtcp(peer *, const MemObject *, const htcpReplyData *); static void neighborCountIgnored(peer *); static void peerRefreshDNS(void *); static IPH peerDNSConfigure; -static EVH peerCheckConnect; -static IPH peerCheckConnect2; -static CNCB peerCheckConnectDone; +static void peerProbeConnect(peer *); +static IPH peerProbeConnect2; +static CNCB peerProbeConnectDone; static void peerCountMcastPeersDone(void *data); static void peerCountMcastPeersStart(void *data); static void peerCountMcastPeersSchedule(peer * p, time_t when); @@ -866,8 +866,10 @@ peerFindByNameAndPort(const char *name, unsigned short port) int neighborUp(const peer * p) { - if (!p->tcp_up) + if (!p->tcp_up) { + peerProbeConnect((peer *)p); return 0; + } if (p->options.no_query) return 1; if (p->stats.probe_start != 0 && @@ -967,62 +969,82 @@ peerRefreshDNS(void *data) eventAddIsh("peerRefreshDNS", peerRefreshDNS, NULL, 3600.0, 1); } +void +peerConnectFailed(peer *p) +{ + p->stats.last_connect_failure = squid_curtime; + if (!p->tcp_up) { + debug(15, 2) ("TCP connection to %s/%d dead\n", p->host, p->http_port); + return; + } + debug(15, 1) ("TCP connection to %s/%d failed\n", p->host, p->http_port); + p->tcp_up--; + if (!p->tcp_up) { + debug(15, 1) ("Detected DEAD %s: %s/%d/%d\n", + neighborTypeStr(p), + p->host, p->http_port, p->icp.port); + p->stats.logged_state = PEER_DEAD; + } +} + +void +peerConnectSucceded(peer *p) +{ + if (!p->tcp_up) { + debug(15, 2) ("TCP connection to %s/%d succeded\n", p->host, p->http_port); + debug(15, 1) ("Detected REVIVED %s: %s/%d/%d\n", + neighborTypeStr(p), + p->host, p->http_port, p->icp.port); + p->stats.logged_state = PEER_ALIVE; + } + p->tcp_up = PEER_TCP_MAGIC_COUNT; +} + /* - * peerCheckConnect will NOT be called by eventRun if the peer/data - * pointer becomes invalid. + * peerProbeConnect will be called on dead peers by neighborUp */ static void -peerCheckConnect(void *data) +peerProbeConnect(peer *p) { - peer *p = data; int fd; + if (p->test_fd != -1) + return; /* probe already running */ + if (squid_curtime - p->stats.last_connect_probe < Config.Timeout.connect) + return; /* don't probe to often */ fd = comm_open(SOCK_STREAM, 0, Config.Addrs.tcp_outgoing, 0, COMM_NONBLOCKING, p->host); if (fd < 0) return; p->test_fd = fd; - ipcache_nbgethostbyname(p->host, peerCheckConnect2, p); + p->stats.last_connect_probe = squid_curtime; + ipcache_nbgethostbyname(p->host, peerProbeConnect2, p); } static void -peerCheckConnect2(const ipcache_addrs * ianotused, void *data) +peerProbeConnect2(const ipcache_addrs * ianotused, void *data) { peer *p = data; commConnectStart(p->test_fd, p->host, p->http_port, - peerCheckConnectDone, + peerProbeConnectDone, p); } static void -peerCheckConnectDone(int fd, int status, void *data) +peerProbeConnectDone(int fd, int status, void *data) { peer *p = data; if (status == COMM_OK) { - p->tcp_up = PEER_TCP_MAGIC_COUNT; - debug(15, 1) ("TCP connection to %s/%d succeeded\n", - p->host, p->http_port); + peerConnectSucceded(p); } else { - eventAdd("peerCheckConnect", peerCheckConnect, p, 60.0, 1); + peerConnectFailed(p); } comm_close(fd); + p->test_fd = -1; return; } -void -peerCheckConnectStart(peer * p) -{ - if (!p->tcp_up) - return; - debug(15, 1) ("TCP connection to %s/%d failed\n", p->host, p->http_port); - p->tcp_up--; - if (p->tcp_up != (PEER_TCP_MAGIC_COUNT - 1)) - return; - p->last_fail_time = squid_curtime; - eventAdd("peerCheckConnect", peerCheckConnect, p, 30.0, 1); -} - static void peerCountMcastPeersSchedule(peer * p, time_t when) { @@ -1225,9 +1247,9 @@ dump_peers(StoreEntry * sentry, peer * peers) #if USE_HTCP } #endif - if (e->last_fail_time) { + if (e->stats.last_connect_failure) { storeAppendPrintf(sentry, "Last failed connect() at: %s\n", - mkhttpdlogtime(&(e->last_fail_time))); + mkhttpdlogtime(&(e->stats.last_connect_failure))); } if (e->peer_domain != NULL) { storeAppendPrintf(sentry, "DOMAIN LIST: "); diff --git a/src/peer_select.cc b/src/peer_select.cc index 435e60c22c..2ac9dcea42 100644 --- a/src/peer_select.cc +++ b/src/peer_select.cc @@ -1,6 +1,6 @@ /* - * $Id: peer_select.cc,v 1.105 2000/03/06 16:23:33 wessels Exp $ + * $Id: peer_select.cc,v 1.106 2000/05/02 18:32:41 hno Exp $ * * DEBUG: section 44 Peer Selection Algorithm * AUTHOR: Duane Wessels @@ -276,8 +276,8 @@ peerSelectFoo(ps_state * ps) if (Config.onoff.prefer_direct) peerGetSomeDirect(ps); peerGetSomeParent(ps); - if (!Config.onoff.prefer_direct) - peerGetSomeDirect(ps); + /* Have direct as a last resort if possible.. */ + peerGetSomeDirect(ps); peerSelectCallback(ps); } diff --git a/src/protos.h b/src/protos.h index 8d5f28053d..43074d8c92 100644 --- a/src/protos.h +++ b/src/protos.h @@ -1,6 +1,6 @@ /* - * $Id: protos.h,v 1.358 2000/03/25 04:58:40 wessels Exp $ + * $Id: protos.h,v 1.359 2000/05/02 18:32:41 hno Exp $ * * * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ @@ -641,7 +641,8 @@ extern int neighborUp(const peer * e); extern CBDUNL peerDestroy; extern char *neighborTypeStr(const peer * e); extern peer_t neighborType(const peer *, const request_t *); -extern void peerCheckConnectStart(peer *); +extern void peerConnectFailed(peer *); +extern void peerConnectSucceded(peer *); extern void dump_peer_options(StoreEntry *, peer *); extern int peerHTTPOkay(const peer *, request_t *); extern peer *whichPeer(const struct sockaddr_in *from); diff --git a/src/structs.h b/src/structs.h index 3bb006bce5..42c9b350e7 100644 --- a/src/structs.h +++ b/src/structs.h @@ -1,6 +1,6 @@ /* - * $Id: structs.h,v 1.317 2000/05/02 18:23:49 hno Exp $ + * $Id: structs.h,v 1.318 2000/05/02 18:32:41 hno Exp $ * * * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ @@ -1057,6 +1057,8 @@ struct _peer { time_t probe_start; time_t last_query; time_t last_reply; + time_t last_connect_failure; + time_t last_connect_probe; int logged_state; /* so we can print dead/revived msgs */ } stats; struct { @@ -1108,7 +1110,6 @@ struct _peer { char *digest_url; #endif int tcp_up; /* 0 if a connect() fails */ - time_t last_fail_time; struct in_addr addresses[10]; int n_addresses; int rr_count;