3 * $Id: peer_select.cc,v 1.113 2001/02/07 18:56:52 hno Exp $
5 * DEBUG: section 44 Peer Selection Algorithm
6 * AUTHOR: Duane Wessels
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
38 const char *hier_strings
[] =
48 "CLOSEST_PARENT_MISS",
69 static char *DirectStr
[] =
77 static void peerSelectFoo(ps_state
*);
78 static void peerPingTimeout(void *data
);
79 static void peerSelectCallback(ps_state
* psstate
);
80 static IRCB peerHandlePingReply
;
81 static void peerSelectStateFree(ps_state
* psstate
);
82 static void peerIcpParentMiss(peer
*, icp_common_t
*, ps_state
*);
84 static void peerHtcpParentMiss(peer
*, htcpReplyData
*, ps_state
*);
85 static void peerHandleHtcpReply(peer
*, peer_t
, htcpReplyData
*, void *);
87 static int peerCheckNetdbDirect(ps_state
* psstate
);
88 static void peerGetSomeNeighbor(ps_state
*);
89 static void peerGetSomeNeighborReplies(ps_state
*);
90 static void peerGetSomeDirect(ps_state
*);
91 static void peerGetSomeParent(ps_state
*);
92 static void peerGetAllParents(ps_state
*);
93 static void peerAddFwdServer(FwdServer
**, peer
*, hier_code
);
96 peerSelectStateFree(ps_state
* psstate
)
98 if (psstate
->acl_checklist
) {
99 debug(44, 1) ("calling aclChecklistFree() from peerSelectStateFree\n");
100 aclChecklistFree(psstate
->acl_checklist
);
102 requestUnlink(psstate
->request
);
103 psstate
->request
= NULL
;
104 if (psstate
->entry
) {
105 assert(psstate
->entry
->ping_status
!= PING_WAITING
);
106 storeUnlockObject(psstate
->entry
);
107 psstate
->entry
= NULL
;
113 peerSelectIcpPing(request_t
* request
, int direct
, StoreEntry
* entry
)
117 assert(entry
->ping_status
== PING_NONE
);
118 assert(direct
!= DIRECT_YES
);
119 debug(44, 3) ("peerSelectIcpPing: %s\n", storeUrl(entry
));
120 if (!request
->flags
.hierarchical
&& direct
!= DIRECT_NO
)
122 if (EBIT_TEST(entry
->flags
, KEY_PRIVATE
) && !neighbors_do_private_keys
)
123 if (direct
!= DIRECT_NO
)
125 n
= neighborsCount(request
);
126 debug(44, 3) ("peerSelectIcpPing: counted %d neighbors\n", n
);
132 peerSelect(request_t
* request
,
139 debug(44, 3) ("peerSelect: %s\n", storeUrl(entry
));
141 debug(44, 3) ("peerSelect: %s\n", RequestMethodStr
[request
->method
]);
142 psstate
= CBDATA_ALLOC(ps_state
, NULL
);
143 psstate
->request
= requestLink(request
);
144 psstate
->entry
= entry
;
145 psstate
->callback
= callback
;
146 psstate
->callback_data
= callback_data
;
147 psstate
->direct
= DIRECT_UNKNOWN
;
148 #if USE_CACHE_DIGESTS
149 request
->hier
.peer_select_start
= current_time
;
152 storeLockObject(psstate
->entry
);
153 cbdataLock(callback_data
);
154 peerSelectFoo(psstate
);
158 peerCheckNeverDirectDone(int answer
, void *data
)
160 ps_state
*psstate
= data
;
161 psstate
->acl_checklist
= NULL
;
162 debug(44, 3) ("peerCheckNeverDirectDone: %d\n", answer
);
163 psstate
->never_direct
= answer
? 1 : -1;
164 peerSelectFoo(psstate
);
168 peerCheckAlwaysDirectDone(int answer
, void *data
)
170 ps_state
*psstate
= data
;
171 psstate
->acl_checklist
= NULL
;
172 debug(44, 3) ("peerCheckAlwaysDirectDone: %d\n", answer
);
173 psstate
->always_direct
= answer
? 1 : -1;
174 peerSelectFoo(psstate
);
178 peerSelectCallback(ps_state
* psstate
)
180 StoreEntry
*entry
= psstate
->entry
;
181 FwdServer
*fs
= psstate
->servers
;
182 void *data
= psstate
->callback_data
;
184 debug(44, 3) ("peerSelectCallback: %s\n", storeUrl(entry
));
185 if (entry
->ping_status
== PING_WAITING
)
186 eventDelete(peerPingTimeout
, psstate
);
187 entry
->ping_status
= PING_DONE
;
190 debug(44, 1) ("Failed to select source for '%s'\n", storeUrl(entry
));
191 debug(44, 1) (" always_direct = %d\n", psstate
->always_direct
);
192 debug(44, 1) (" never_direct = %d\n", psstate
->never_direct
);
193 debug(44, 1) (" timedout = %d\n", psstate
->ping
.timedout
);
195 psstate
->ping
.stop
= current_time
;
196 psstate
->request
->hier
.ping
= psstate
->ping
;
197 if (cbdataValid(data
)) {
198 psstate
->servers
= NULL
;
199 psstate
->callback(fs
, data
);
202 peerSelectStateFree(psstate
);
206 peerCheckNetdbDirect(ps_state
* psstate
)
211 if (psstate
->direct
== DIRECT_NO
)
213 myrtt
= netdbHostRtt(psstate
->request
->host
);
214 debug(44, 3) ("peerCheckNetdbDirect: MY RTT = %d msec\n", myrtt
);
215 debug(44, 3) ("peerCheckNetdbDirect: minimum_direct_rtt = %d msec\n",
216 Config
.minDirectRtt
);
217 if (myrtt
&& myrtt
<= Config
.minDirectRtt
)
219 myhops
= netdbHostHops(psstate
->request
->host
);
220 debug(44, 3) ("peerCheckNetdbDirect: MY hops = %d\n", myhops
);
221 debug(44, 3) ("peerCheckNetdbDirect: minimum_direct_hops = %d\n",
222 Config
.minDirectHops
);
223 if (myhops
&& myhops
<= Config
.minDirectHops
)
225 p
= whichPeer(&psstate
->closest_parent_miss
);
228 debug(44, 3) ("peerCheckNetdbDirect: closest_parent_miss RTT = %d msec\n",
229 psstate
->ping
.p_rtt
);
230 if (myrtt
&& myrtt
<= psstate
->ping
.p_rtt
)
236 peerSelectFoo(ps_state
* ps
)
238 StoreEntry
*entry
= ps
->entry
;
239 request_t
*request
= ps
->request
;
240 debug(44, 3) ("peerSelectFoo: '%s %s'\n",
241 RequestMethodStr
[request
->method
],
243 if (ps
->direct
== DIRECT_UNKNOWN
) {
244 if (ps
->always_direct
== 0 && Config
.accessList
.AlwaysDirect
) {
245 ps
->acl_checklist
= aclChecklistCreate(
246 Config
.accessList
.AlwaysDirect
,
249 aclNBCheck(ps
->acl_checklist
,
250 peerCheckAlwaysDirectDone
,
253 } else if (ps
->always_direct
> 0) {
254 ps
->direct
= DIRECT_YES
;
255 } else if (ps
->never_direct
== 0 && Config
.accessList
.NeverDirect
) {
256 ps
->acl_checklist
= aclChecklistCreate(
257 Config
.accessList
.NeverDirect
,
260 aclNBCheck(ps
->acl_checklist
,
261 peerCheckNeverDirectDone
,
264 } else if (ps
->never_direct
> 0) {
265 ps
->direct
= DIRECT_NO
;
266 } else if (request
->flags
.loopdetect
) {
267 ps
->direct
= DIRECT_YES
;
268 } else if (peerCheckNetdbDirect(ps
)) {
269 ps
->direct
= DIRECT_YES
;
271 ps
->direct
= DIRECT_MAYBE
;
273 debug(44, 3) ("peerSelectFoo: direct = %s\n",
274 DirectStr
[ps
->direct
]);
278 } else if (entry
->ping_status
== PING_NONE
) {
279 peerGetSomeNeighbor(ps
);
280 if (entry
->ping_status
== PING_WAITING
)
282 } else if (entry
->ping_status
== PING_WAITING
) {
283 peerGetSomeNeighborReplies(ps
);
284 entry
->ping_status
= PING_DONE
;
286 switch (ps
->direct
) {
288 peerGetSomeDirect(ps
);
291 peerGetSomeParent(ps
);
292 peerGetAllParents(ps
);
295 if (Config
.onoff
.prefer_direct
)
296 peerGetSomeDirect(ps
);
297 if (request
->flags
.hierarchical
|| !Config
.onoff
.nonhierarchical_direct
)
298 peerGetSomeParent(ps
);
299 if (!Config
.onoff
.prefer_direct
)
300 peerGetSomeDirect(ps
);
303 peerSelectCallback(ps
);
307 * peerGetSomeNeighbor
309 * Selects a neighbor (parent or sibling) based on one of the
313 * Netdb RTT estimates
317 peerGetSomeNeighbor(ps_state
* ps
)
319 StoreEntry
*entry
= ps
->entry
;
320 request_t
*request
= ps
->request
;
322 hier_code code
= HIER_NONE
;
323 assert(entry
->ping_status
== PING_NONE
);
324 if (ps
->direct
== DIRECT_YES
) {
325 entry
->ping_status
= PING_DONE
;
328 #if USE_CACHE_DIGESTS
329 if ((p
= neighborsDigestSelect(request
, entry
))) {
330 if (neighborType(p
, request
) == PEER_PARENT
)
331 code
= CD_PARENT_HIT
;
333 code
= CD_SIBLING_HIT
;
337 if ((p
= carpSelectParent(request
))) {
341 if ((p
= netdbClosestParent(request
))) {
342 code
= CLOSEST_PARENT
;
343 } else if (peerSelectIcpPing(request
, ps
->direct
, entry
)) {
344 debug(44, 3) ("peerSelect: Doing ICP pings\n");
345 ps
->ping
.start
= current_time
;
346 ps
->ping
.n_sent
= neighborsUdpPing(request
,
350 &ps
->ping
.n_replies_expected
,
352 if (ps
->ping
.n_sent
== 0)
353 debug(44, 0) ("WARNING: neighborsUdpPing returned 0\n");
354 debug(44, 3) ("peerSelect: %d ICP replies expected, RTT %d msec\n",
355 ps
->ping
.n_replies_expected
, ps
->ping
.timeout
);
356 if (ps
->ping
.n_replies_expected
> 0) {
357 entry
->ping_status
= PING_WAITING
;
358 eventAdd("peerPingTimeout",
361 0.001 * ps
->ping
.timeout
,
366 if (code
!= HIER_NONE
) {
368 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
369 peerAddFwdServer(&ps
->servers
, p
, code
);
371 entry
->ping_status
= PING_DONE
;
375 * peerGetSomeNeighborReplies
377 * Selects a neighbor (parent or sibling) based on ICP/HTCP replies.
380 peerGetSomeNeighborReplies(ps_state
* ps
)
382 request_t
*request
= ps
->request
;
384 hier_code code
= HIER_NONE
;
385 assert(ps
->entry
->ping_status
== PING_WAITING
);
386 assert(ps
->direct
!= DIRECT_YES
);
387 if (peerCheckNetdbDirect(ps
)) {
388 code
= CLOSEST_DIRECT
;
389 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], request
->host
);
390 peerAddFwdServer(&ps
->servers
, NULL
, code
);
394 code
= ps
->hit_type
== PEER_PARENT
? PARENT_HIT
: SIBLING_HIT
;
396 #if ALLOW_SOURCE_PING
397 if ((p
= ps
->secho
)) {
398 code
= SOURCE_FASTEST
;
401 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
402 p
= whichPeer(&ps
->closest_parent_miss
);
403 code
= CLOSEST_PARENT_MISS
;
404 } else if (ps
->first_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
405 p
= whichPeer(&ps
->first_parent_miss
);
406 code
= FIRST_PARENT_MISS
;
408 if (p
&& code
!= HIER_NONE
) {
409 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
410 peerAddFwdServer(&ps
->servers
, p
, code
);
418 * Simply adds a 'direct' entry to the FwdServers list if this
419 * request can be forwarded directly to the origin server
422 peerGetSomeDirect(ps_state
* ps
)
424 if (ps
->direct
== DIRECT_NO
)
426 if (ps
->request
->protocol
== PROTO_WAIS
)
427 /* Its not really DIRECT, now is it? */
428 peerAddFwdServer(&ps
->servers
, Config
.Wais
.peer
, DIRECT
);
430 peerAddFwdServer(&ps
->servers
, NULL
, DIRECT
);
434 peerGetSomeParent(ps_state
* ps
)
437 request_t
*request
= ps
->request
;
438 hier_code code
= HIER_NONE
;
439 debug(44, 3) ("peerGetSomeParent: %s %s\n",
440 RequestMethodStr
[request
->method
],
442 if (ps
->direct
== DIRECT_YES
)
444 if ((p
= getDefaultParent(request
))) {
445 code
= DEFAULT_PARENT
;
446 } else if ((p
= getRoundRobinParent(request
))) {
447 code
= ROUNDROBIN_PARENT
;
448 } else if ((p
= getFirstUpParent(request
))) {
449 code
= FIRSTUP_PARENT
;
450 } else if ((p
= getAnyParent(request
))) {
451 code
= ANY_OLD_PARENT
;
453 if (code
!= HIER_NONE
) {
454 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
455 peerAddFwdServer(&ps
->servers
, p
, code
);
459 /* Adds alive parents. Used as a last resort for never_direct.
462 peerGetAllParents(ps_state
* ps
)
465 request_t
*request
= ps
->request
;
466 /* Add all alive parents */
467 for (p
= Config
.peers
; p
; p
= p
->next
) {
468 /* XXX: neighbors.c lacks a public interface for enumerating
469 * parents to a request so we have to dig some here..
471 if (neighborType(p
, request
) != PEER_PARENT
)
473 if (!peerHTTPOkay(p
, request
))
475 debug(15, 3) ("peerGetAllParents: adding alive parent %s\n", p
->host
);
476 peerAddFwdServer(&ps
->servers
, p
, ANY_OLD_PARENT
);
478 /* XXX: should add dead parents here, but it is currently
479 * not possible to find out which parents are dead or which
480 * simply are not configured to handle the request.
482 /* Add default parent as a last resort */
483 if ((p
= getDefaultParent(request
))) {
484 peerAddFwdServer(&ps
->servers
, p
, DEFAULT_PARENT
);
489 peerPingTimeout(void *data
)
491 ps_state
*psstate
= data
;
492 StoreEntry
*entry
= psstate
->entry
;
494 debug(44, 3) ("peerPingTimeout: '%s'\n", storeUrl(entry
));
495 if (!cbdataValid(psstate
->callback_data
)) {
496 /* request aborted */
497 entry
->ping_status
= PING_DONE
;
498 cbdataUnlock(psstate
->callback_data
);
499 peerSelectStateFree(psstate
);
502 PeerStats
.timeouts
++;
503 psstate
->ping
.timedout
= 1;
504 peerSelectFoo(psstate
);
510 memset(&PeerStats
, '\0', sizeof(PeerStats
));
511 assert(sizeof(hier_strings
) == (HIER_MAX
+ 1) * sizeof(char *));
515 peerIcpParentMiss(peer
* p
, icp_common_t
* header
, ps_state
* ps
)
519 if (Config
.onoff
.query_icmp
) {
520 if (header
->flags
& ICP_FLAG_SRC_RTT
) {
521 rtt
= header
->pad
& 0xFFFF;
522 hops
= (header
->pad
>> 16) & 0xFFFF;
523 if (rtt
> 0 && rtt
< 0xFFFF)
524 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
525 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
526 ps
->closest_parent_miss
= p
->in_addr
;
527 ps
->ping
.p_rtt
= rtt
;
531 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
532 if (p
->options
.closest_only
)
534 /* set FIRST_MISS if there is no CLOSEST parent */
535 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
537 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
538 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
539 ps
->first_parent_miss
= p
->in_addr
;
540 ps
->ping
.w_rtt
= rtt
;
545 peerHandleIcpReply(peer
* p
, peer_t type
, icp_common_t
* header
, void *data
)
547 ps_state
*psstate
= data
;
548 icp_opcode op
= header
->opcode
;
549 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
551 storeUrl(psstate
->entry
));
552 #if USE_CACHE_DIGESTS && 0
553 /* do cd lookup to count false misses */
555 peerNoteDigestLookup(request
, p
,
556 peerDigestLookup(p
, request
, psstate
->entry
));
558 psstate
->ping
.n_recv
++;
559 if (op
== ICP_MISS
|| op
== ICP_DECHO
) {
560 if (type
== PEER_PARENT
)
561 peerIcpParentMiss(p
, header
, psstate
);
562 } else if (op
== ICP_HIT
) {
564 psstate
->hit_type
= type
;
565 peerSelectFoo(psstate
);
568 #if ALLOW_SOURCE_PING
569 else if (op
== ICP_SECHO
) {
571 peerSelectFoo(psstate
);
575 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
577 peerSelectFoo(psstate
);
582 peerHandleHtcpReply(peer
* p
, peer_t type
, htcpReplyData
* htcp
, void *data
)
584 ps_state
*psstate
= data
;
585 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
586 htcp
->hit
? "HIT" : "MISS",
587 storeUrl(psstate
->entry
));
588 psstate
->ping
.n_recv
++;
591 psstate
->hit_type
= type
;
592 peerSelectFoo(psstate
);
595 if (type
== PEER_PARENT
)
596 peerHtcpParentMiss(p
, htcp
, psstate
);
597 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
599 peerSelectFoo(psstate
);
603 peerHtcpParentMiss(peer
* p
, htcpReplyData
* htcp
, ps_state
* ps
)
607 if (Config
.onoff
.query_icmp
) {
608 if (htcp
->cto
.rtt
> 0) {
609 rtt
= (int) htcp
->cto
.rtt
* 1000;
610 hops
= (int) htcp
->cto
.hops
* 1000;
611 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
612 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
613 ps
->closest_parent_miss
= p
->in_addr
;
614 ps
->ping
.p_rtt
= rtt
;
618 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
619 if (p
->options
.closest_only
)
621 /* set FIRST_MISS if there is no CLOSEST parent */
622 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
624 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
625 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
626 ps
->first_parent_miss
= p
->in_addr
;
627 ps
->ping
.w_rtt
= rtt
;
633 peerHandlePingReply(peer
* p
, peer_t type
, protocol_t proto
, void *pingdata
, void *data
)
635 if (proto
== PROTO_ICP
)
636 peerHandleIcpReply(p
, type
, pingdata
, data
);
638 else if (proto
== PROTO_HTCP
)
639 peerHandleHtcpReply(p
, type
, pingdata
, data
);
642 debug(44, 1) ("peerHandlePingReply: unknown protocol_t %d\n", (int) proto
);
646 peerAddFwdServer(FwdServer
** FS
, peer
* p
, hier_code code
)
648 FwdServer
*fs
= memAllocate(MEM_FWD_SERVER
);
649 debug(44, 5) ("peerAddFwdServer: adding %s %s\n",
650 p
? p
->host
: "DIRECT",
654 cbdataLock(fs
->peer
);