3 * $Id: peer_select.cc,v 1.131 2003/08/10 11:00:44 robertc Exp $
5 * DEBUG: section 44 Peer Selection Algorithm
6 * AUTHOR: Duane Wessels
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
39 #include "HttpRequest.h"
40 #include "ACLChecklist.h"
43 const char *hier_strings
[] =
53 "CLOSEST_PARENT_MISS",
77 static const char *DirectStr
[] =
85 static void peerSelectFoo(ps_state
*);
86 static void peerPingTimeout(void *data
);
87 static void peerSelectCallback(ps_state
* psstate
);
88 static IRCB peerHandlePingReply
;
89 static void peerSelectStateFree(ps_state
* psstate
);
90 static void peerIcpParentMiss(peer
*, icp_common_t
*, ps_state
*);
92 static void peerHtcpParentMiss(peer
*, htcpReplyData
*, ps_state
*);
93 static void peerHandleHtcpReply(peer
*, peer_t
, htcpReplyData
*, void *);
95 static int peerCheckNetdbDirect(ps_state
* psstate
);
96 static void peerGetSomeNeighbor(ps_state
*);
97 static void peerGetSomeNeighborReplies(ps_state
*);
98 static void peerGetSomeDirect(ps_state
*);
99 static void peerGetSomeParent(ps_state
*);
100 static void peerGetAllParents(ps_state
*);
101 static void peerAddFwdServer(FwdServer
**, peer
*, hier_code
);
104 peerSelectStateFree(ps_state
* psstate
)
106 if (psstate
->acl_checklist
) {
107 debug(44, 1) ("calling aclChecklistFree() from peerSelectStateFree\n");
108 delete (psstate
->acl_checklist
);
111 requestUnlink(psstate
->request
);
112 psstate
->request
= NULL
;
114 if (psstate
->entry
) {
115 assert(psstate
->entry
->ping_status
!= PING_WAITING
);
116 storeUnlockObject(psstate
->entry
);
117 psstate
->entry
= NULL
;
124 peerSelectIcpPing(HttpRequest
* request
, int direct
, StoreEntry
* entry
)
128 assert(entry
->ping_status
== PING_NONE
);
129 assert(direct
!= DIRECT_YES
);
130 debug(44, 3) ("peerSelectIcpPing: %s\n", storeUrl(entry
));
132 if (!request
->flags
.hierarchical
&& direct
!= DIRECT_NO
)
135 if (EBIT_TEST(entry
->flags
, KEY_PRIVATE
) && !neighbors_do_private_keys
)
136 if (direct
!= DIRECT_NO
)
139 n
= neighborsCount(request
);
141 debug(44, 3) ("peerSelectIcpPing: counted %d neighbors\n", n
);
148 peerSelect(HttpRequest
* request
,
156 debug(44, 3) ("peerSelect: %s\n", storeUrl(entry
));
158 debug(44, 3) ("peerSelect: %s\n", RequestMethodStr
[request
->method
]);
160 psstate
= cbdataAlloc(ps_state
);
162 psstate
->request
= requestLink(request
);
164 psstate
->entry
= entry
;
166 psstate
->callback
= callback
;
168 psstate
->callback_data
= cbdataReference(callback_data
);
170 psstate
->direct
= DIRECT_UNKNOWN
;
172 #if USE_CACHE_DIGESTS
174 request
->hier
.peer_select_start
= current_time
;
179 storeLockObject(psstate
->entry
);
181 peerSelectFoo(psstate
);
185 peerCheckNeverDirectDone(int answer
, void *data
)
187 ps_state
*psstate
= (ps_state
*) data
;
188 psstate
->acl_checklist
= NULL
;
189 debug(44, 3) ("peerCheckNeverDirectDone: %d\n", answer
);
190 psstate
->never_direct
= answer
? 1 : -1;
191 peerSelectFoo(psstate
);
195 peerCheckAlwaysDirectDone(int answer
, void *data
)
197 ps_state
*psstate
= (ps_state
*)data
;
198 psstate
->acl_checklist
= NULL
;
199 debug(44, 3) ("peerCheckAlwaysDirectDone: %d\n", answer
);
200 psstate
->always_direct
= answer
? 1 : -1;
201 peerSelectFoo(psstate
);
205 peerSelectCallback(ps_state
* psstate
)
207 StoreEntry
*entry
= psstate
->entry
;
208 FwdServer
*fs
= psstate
->servers
;
213 debug(44, 3) ("peerSelectCallback: %s\n", storeUrl(entry
));
215 if (entry
->ping_status
== PING_WAITING
)
216 eventDelete(peerPingTimeout
, psstate
);
218 entry
->ping_status
= PING_DONE
;
222 debug(44, 1) ("Failed to select source for '%s'\n", storeUrl(entry
));
223 debug(44, 1) (" always_direct = %d\n", psstate
->always_direct
);
224 debug(44, 1) (" never_direct = %d\n", psstate
->never_direct
);
225 debug(44, 1) (" timedout = %d\n", psstate
->ping
.timedout
);
228 psstate
->ping
.stop
= current_time
;
229 psstate
->request
->hier
.ping
= psstate
->ping
;
230 callback
= psstate
->callback
;
231 psstate
->callback
= NULL
;
233 if (cbdataReferenceValidDone(psstate
->callback_data
, &cbdata
)) {
234 psstate
->servers
= NULL
;
235 callback(fs
, cbdata
);
238 peerSelectStateFree(psstate
);
242 peerCheckNetdbDirect(ps_state
* psstate
)
248 if (psstate
->direct
== DIRECT_NO
)
251 myrtt
= netdbHostRtt(psstate
->request
->host
);
253 debug(44, 3) ("peerCheckNetdbDirect: MY RTT = %d msec\n", myrtt
);
255 debug(44, 3) ("peerCheckNetdbDirect: minimum_direct_rtt = %d msec\n",
256 Config
.minDirectRtt
);
258 if (myrtt
&& myrtt
<= Config
.minDirectRtt
)
261 myhops
= netdbHostHops(psstate
->request
->host
);
263 debug(44, 3) ("peerCheckNetdbDirect: MY hops = %d\n", myhops
);
265 debug(44, 3) ("peerCheckNetdbDirect: minimum_direct_hops = %d\n",
266 Config
.minDirectHops
);
268 if (myhops
&& myhops
<= Config
.minDirectHops
)
271 p
= whichPeer(&psstate
->closest_parent_miss
);
276 debug(44, 3) ("peerCheckNetdbDirect: closest_parent_miss RTT = %d msec\n",
277 psstate
->ping
.p_rtt
);
279 if (myrtt
&& myrtt
<= psstate
->ping
.p_rtt
)
286 peerSelectFoo(ps_state
* ps
)
288 StoreEntry
*entry
= ps
->entry
;
289 HttpRequest
*request
= ps
->request
;
290 debug(44, 3) ("peerSelectFoo: '%s %s'\n",
291 RequestMethodStr
[request
->method
],
294 if (ps
->direct
== DIRECT_UNKNOWN
) {
295 if (ps
->always_direct
== 0 && Config
.accessList
.AlwaysDirect
) {
296 ps
->acl_checklist
= aclChecklistCreate(
297 Config
.accessList
.AlwaysDirect
,
300 ps
->acl_checklist
->nonBlockingCheck(peerCheckAlwaysDirectDone
,
303 } else if (ps
->always_direct
> 0) {
304 ps
->direct
= DIRECT_YES
;
305 } else if (ps
->never_direct
== 0 && Config
.accessList
.NeverDirect
) {
306 ps
->acl_checklist
= aclChecklistCreate(
307 Config
.accessList
.NeverDirect
,
310 ps
->acl_checklist
->nonBlockingCheck(peerCheckNeverDirectDone
,
313 } else if (ps
->never_direct
> 0) {
314 ps
->direct
= DIRECT_NO
;
315 } else if (request
->flags
.accelerated
) {
316 ps
->direct
= DIRECT_NO
;
317 } else if (request
->flags
.loopdetect
) {
318 ps
->direct
= DIRECT_YES
;
319 } else if (peerCheckNetdbDirect(ps
)) {
320 ps
->direct
= DIRECT_YES
;
322 ps
->direct
= DIRECT_MAYBE
;
325 debug(44, 3) ("peerSelectFoo: direct = %s\n",
326 DirectStr
[ps
->direct
]);
331 } else if (entry
->ping_status
== PING_NONE
) {
332 peerGetSomeNeighbor(ps
);
334 if (entry
->ping_status
== PING_WAITING
)
336 } else if (entry
->ping_status
== PING_WAITING
) {
337 peerGetSomeNeighborReplies(ps
);
338 entry
->ping_status
= PING_DONE
;
341 switch (ps
->direct
) {
344 peerGetSomeDirect(ps
);
348 peerGetSomeParent(ps
);
349 peerGetAllParents(ps
);
354 if (Config
.onoff
.prefer_direct
)
355 peerGetSomeDirect(ps
);
357 if (request
->flags
.hierarchical
|| !Config
.onoff
.nonhierarchical_direct
)
358 peerGetSomeParent(ps
);
360 if (!Config
.onoff
.prefer_direct
)
361 peerGetSomeDirect(ps
);
366 peerSelectCallback(ps
);
370 * peerGetSomeNeighbor
372 * Selects a neighbor (parent or sibling) based on one of the
376 * Netdb RTT estimates
380 peerGetSomeNeighbor(ps_state
* ps
)
382 StoreEntry
*entry
= ps
->entry
;
383 HttpRequest
*request
= ps
->request
;
385 hier_code code
= HIER_NONE
;
386 assert(entry
->ping_status
== PING_NONE
);
388 if (ps
->direct
== DIRECT_YES
) {
389 entry
->ping_status
= PING_DONE
;
393 #if USE_CACHE_DIGESTS
394 if ((p
= neighborsDigestSelect(request
))) {
395 if (neighborType(p
, request
) == PEER_PARENT
)
396 code
= CD_PARENT_HIT
;
398 code
= CD_SIBLING_HIT
;
401 if ((p
= netdbClosestParent(request
))) {
402 code
= CLOSEST_PARENT
;
403 } else if (peerSelectIcpPing(request
, ps
->direct
, entry
)) {
404 debug(44, 3) ("peerSelect: Doing ICP pings\n");
405 ps
->ping
.start
= current_time
;
406 ps
->ping
.n_sent
= neighborsUdpPing(request
,
410 &ps
->ping
.n_replies_expected
,
413 if (ps
->ping
.n_sent
== 0)
414 debug(44, 0) ("WARNING: neighborsUdpPing returned 0\n");
416 debug(44, 3) ("peerSelect: %d ICP replies expected, RTT %d msec\n",
417 ps
->ping
.n_replies_expected
, ps
->ping
.timeout
);
419 if (ps
->ping
.n_replies_expected
> 0) {
420 entry
->ping_status
= PING_WAITING
;
421 eventAdd("peerPingTimeout",
424 0.001 * ps
->ping
.timeout
,
430 if (code
!= HIER_NONE
) {
432 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
433 peerAddFwdServer(&ps
->servers
, p
, code
);
436 entry
->ping_status
= PING_DONE
;
440 * peerGetSomeNeighborReplies
442 * Selects a neighbor (parent or sibling) based on ICP/HTCP replies.
445 peerGetSomeNeighborReplies(ps_state
* ps
)
447 HttpRequest
*request
= ps
->request
;
449 hier_code code
= HIER_NONE
;
450 assert(ps
->entry
->ping_status
== PING_WAITING
);
451 assert(ps
->direct
!= DIRECT_YES
);
453 if (peerCheckNetdbDirect(ps
)) {
454 code
= CLOSEST_DIRECT
;
455 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], request
->host
);
456 peerAddFwdServer(&ps
->servers
, NULL
, code
);
461 code
= ps
->hit_type
== PEER_PARENT
? PARENT_HIT
: SIBLING_HIT
;
463 #if ALLOW_SOURCE_PING
464 if ((p
= ps
->secho
)) {
465 code
= SOURCE_FASTEST
;
468 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
469 p
= whichPeer(&ps
->closest_parent_miss
);
470 code
= CLOSEST_PARENT_MISS
;
471 } else if (ps
->first_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
472 p
= whichPeer(&ps
->first_parent_miss
);
473 code
= FIRST_PARENT_MISS
;
476 if (p
&& code
!= HIER_NONE
) {
477 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
478 peerAddFwdServer(&ps
->servers
, p
, code
);
486 * Simply adds a 'direct' entry to the FwdServers list if this
487 * request can be forwarded directly to the origin server
490 peerGetSomeDirect(ps_state
* ps
)
492 if (ps
->direct
== DIRECT_NO
)
495 if (ps
->request
->protocol
== PROTO_WAIS
)
496 /* Its not really DIRECT, now is it? */
497 peerAddFwdServer(&ps
->servers
, Config
.Wais
._peer
, DIRECT
);
499 peerAddFwdServer(&ps
->servers
, NULL
, DIRECT
);
503 peerGetSomeParent(ps_state
* ps
)
506 HttpRequest
*request
= ps
->request
;
507 hier_code code
= HIER_NONE
;
508 debug(44, 3) ("peerGetSomeParent: %s %s\n",
509 RequestMethodStr
[request
->method
],
512 if (ps
->direct
== DIRECT_YES
)
515 if ((p
= getDefaultParent(request
))) {
516 code
= DEFAULT_PARENT
;
519 } else if ((p
= carpSelectParent(request
))) {
523 } else if ((p
= getRoundRobinParent(request
))) {
524 code
= ROUNDROBIN_PARENT
;
525 } else if ((p
= getWeightedRoundRobinParent(request
))) {
526 code
= ROUNDROBIN_PARENT
;
527 } else if ((p
= getFirstUpParent(request
))) {
528 code
= FIRSTUP_PARENT
;
529 } else if ((p
= getAnyParent(request
))) {
530 code
= ANY_OLD_PARENT
;
533 if (code
!= HIER_NONE
) {
534 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
535 peerAddFwdServer(&ps
->servers
, p
, code
);
539 /* Adds alive parents. Used as a last resort for never_direct.
542 peerGetAllParents(ps_state
* ps
)
545 HttpRequest
*request
= ps
->request
;
546 /* Add all alive parents */
548 for (p
= Config
.peers
; p
; p
= p
->next
) {
549 /* XXX: neighbors.c lacks a public interface for enumerating
550 * parents to a request so we have to dig some here..
553 if (neighborType(p
, request
) != PEER_PARENT
)
556 if (!peerHTTPOkay(p
, request
))
559 debug(15, 3) ("peerGetAllParents: adding alive parent %s\n", p
->host
);
561 peerAddFwdServer(&ps
->servers
, p
, ANY_OLD_PARENT
);
564 /* XXX: should add dead parents here, but it is currently
565 * not possible to find out which parents are dead or which
566 * simply are not configured to handle the request.
568 /* Add default parent as a last resort */
569 if ((p
= getDefaultParent(request
))) {
570 peerAddFwdServer(&ps
->servers
, p
, DEFAULT_PARENT
);
575 peerPingTimeout(void *data
)
577 ps_state
*psstate
= (ps_state
*)data
;
578 StoreEntry
*entry
= psstate
->entry
;
581 debug(44, 3) ("peerPingTimeout: '%s'\n", storeUrl(entry
));
583 if (!cbdataReferenceValid(psstate
->callback_data
)) {
584 /* request aborted */
585 entry
->ping_status
= PING_DONE
;
586 cbdataReferenceDone(psstate
->callback_data
);
587 peerSelectStateFree(psstate
);
591 PeerStats
.timeouts
++;
592 psstate
->ping
.timedout
= 1;
593 peerSelectFoo(psstate
);
599 memset(&PeerStats
, '\0', sizeof(PeerStats
));
600 assert(sizeof(hier_strings
) == (HIER_MAX
+ 1) * sizeof(char *));
604 peerIcpParentMiss(peer
* p
, icp_common_t
* header
, ps_state
* ps
)
609 if (Config
.onoff
.query_icmp
) {
610 if (header
->flags
& ICP_FLAG_SRC_RTT
) {
611 rtt
= header
->pad
& 0xFFFF;
612 hops
= (header
->pad
>> 16) & 0xFFFF;
614 if (rtt
> 0 && rtt
< 0xFFFF)
615 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
617 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
618 ps
->closest_parent_miss
= p
->in_addr
;
619 ps
->ping
.p_rtt
= rtt
;
624 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
625 if (p
->options
.closest_only
)
628 /* set FIRST_MISS if there is no CLOSEST parent */
629 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
632 rtt
= (tvSubMsec(ps
->ping
.start
, current_time
) - p
->basetime
) / p
->weight
;
637 if (ps
->first_parent_miss
.sin_addr
.s_addr
== any_addr
.s_addr
||
638 rtt
< ps
->ping
.w_rtt
) {
639 ps
->first_parent_miss
= p
->in_addr
;
640 ps
->ping
.w_rtt
= rtt
;
645 peerHandleIcpReply(peer
* p
, peer_t type
, icp_common_t
* header
, void *data
)
647 ps_state
*psstate
= (ps_state
*)data
;
648 icp_opcode op
= header
->getOpCode();
649 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
651 storeUrl(psstate
->entry
));
652 #if USE_CACHE_DIGESTS && 0
653 /* do cd lookup to count false misses */
656 peerNoteDigestLookup(request
, p
,
657 peerDigestLookup(p
, request
, psstate
->entry
));
661 psstate
->ping
.n_recv
++;
663 if (op
== ICP_MISS
|| op
== ICP_DECHO
) {
664 if (type
== PEER_PARENT
)
665 peerIcpParentMiss(p
, header
, psstate
);
666 } else if (op
== ICP_HIT
) {
668 psstate
->hit_type
= type
;
669 peerSelectFoo(psstate
);
673 #if ALLOW_SOURCE_PING
674 else if (op
== ICP_SECHO
) {
676 peerSelectFoo(psstate
);
681 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
684 peerSelectFoo(psstate
);
689 peerHandleHtcpReply(peer
* p
, peer_t type
, htcpReplyData
* htcp
, void *data
)
691 ps_state
*psstate
= (ps_state
*)data
;
692 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
693 htcp
->hit
? "HIT" : "MISS",
694 storeUrl(psstate
->entry
));
695 psstate
->ping
.n_recv
++;
699 psstate
->hit_type
= type
;
700 peerSelectFoo(psstate
);
704 if (type
== PEER_PARENT
)
705 peerHtcpParentMiss(p
, htcp
, psstate
);
707 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
710 peerSelectFoo(psstate
);
714 peerHtcpParentMiss(peer
* p
, htcpReplyData
* htcp
, ps_state
* ps
)
719 if (Config
.onoff
.query_icmp
) {
720 if (htcp
->cto
.rtt
> 0) {
721 rtt
= (int) htcp
->cto
.rtt
* 1000;
722 hops
= (int) htcp
->cto
.hops
* 1000;
723 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
725 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
726 ps
->closest_parent_miss
= p
->in_addr
;
727 ps
->ping
.p_rtt
= rtt
;
732 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
733 if (p
->options
.closest_only
)
736 /* set FIRST_MISS if there is no CLOSEST parent */
737 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
740 rtt
= (tvSubMsec(ps
->ping
.start
, current_time
) - p
->basetime
) / p
->weight
;
745 if (ps
->first_parent_miss
.sin_addr
.s_addr
== any_addr
.s_addr
||
746 rtt
< ps
->ping
.w_rtt
) {
747 ps
->first_parent_miss
= p
->in_addr
;
748 ps
->ping
.w_rtt
= rtt
;
755 peerHandlePingReply(peer
* p
, peer_t type
, protocol_t proto
, void *pingdata
, void *data
)
757 if (proto
== PROTO_ICP
)
758 peerHandleIcpReply(p
, type
, (icp_common_t
*)pingdata
, data
);
762 else if (proto
== PROTO_HTCP
)
763 peerHandleHtcpReply(p
, type
, (htcpReplyData
*)pingdata
, data
);
768 debug(44, 1) ("peerHandlePingReply: unknown protocol_t %d\n", (int) proto
);
772 peerAddFwdServer(FwdServer
** FS
, peer
* p
, hier_code code
)
774 FwdServer
*fs
= (FwdServer
*)memAllocate(MEM_FWD_SERVER
);
775 debug(44, 5) ("peerAddFwdServer: adding %s %s\n",
776 p
? p
->host
: "DIRECT",
778 fs
->_peer
= cbdataReference(p
);