3 * $Id: peer_select.cc,v 1.109 2000/10/17 08:06:04 adrian Exp $
5 * DEBUG: section 44 Peer Selection Algorithm
6 * AUTHOR: Duane Wessels
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by the
14 * National Science Foundation. Squid is Copyrighted (C) 1998 by
15 * the Regents of the University of California. Please see the
16 * COPYRIGHT file for full details. Squid incorporates software
17 * developed and/or copyrighted by other sources. Please see the
18 * CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
38 const char *hier_strings
[] =
48 "CLOSEST_PARENT_MISS",
69 static char *DirectStr
[] =
77 static void peerSelectFoo(ps_state
*);
78 static void peerPingTimeout(void *data
);
79 static void peerSelectCallback(ps_state
* psstate
);
80 static IRCB peerHandlePingReply
;
81 static void peerSelectStateFree(ps_state
* psstate
);
82 static void peerIcpParentMiss(peer
*, icp_common_t
*, ps_state
*);
84 static void peerHtcpParentMiss(peer
*, htcpReplyData
*, ps_state
*);
85 static void peerHandleHtcpReply(peer
*, peer_t
, htcpReplyData
*, void *);
87 static int peerCheckNetdbDirect(ps_state
* psstate
);
88 static void peerGetSomeNeighbor(ps_state
*);
89 static void peerGetSomeNeighborReplies(ps_state
*);
90 static void peerGetSomeDirect(ps_state
*);
91 static void peerGetSomeParent(ps_state
*);
92 static void peerGetAllParents(ps_state
*);
93 static void peerAddFwdServer(FwdServer
**, peer
*, hier_code
);
96 peerSelectStateFree(ps_state
* psstate
)
98 if (psstate
->acl_checklist
) {
99 debug(44, 1) ("calling aclChecklistFree() from peerSelectStateFree\n");
100 aclChecklistFree(psstate
->acl_checklist
);
102 requestUnlink(psstate
->request
);
103 psstate
->request
= NULL
;
104 if (psstate
->entry
) {
105 assert(psstate
->entry
->ping_status
!= PING_WAITING
);
106 storeUnlockObject(psstate
->entry
);
107 psstate
->entry
= NULL
;
113 peerSelectIcpPing(request_t
* request
, int direct
, StoreEntry
* entry
)
117 assert(entry
->ping_status
== PING_NONE
);
118 assert(direct
!= DIRECT_YES
);
119 debug(44, 3) ("peerSelectIcpPing: %s\n", storeUrl(entry
));
120 if (!request
->flags
.hierarchical
&& direct
!= DIRECT_NO
)
122 if (EBIT_TEST(entry
->flags
, KEY_PRIVATE
) && !neighbors_do_private_keys
)
123 if (direct
!= DIRECT_NO
)
125 n
= neighborsCount(request
);
126 debug(44, 3) ("peerSelectIcpPing: counted %d neighbors\n", n
);
132 peerSelect(request_t
* request
,
137 ps_state
*psstate
= memAllocate(MEM_PS_STATE
);
139 debug(44, 3) ("peerSelect: %s\n", storeUrl(entry
));
141 debug(44, 3) ("peerSelect: %s\n", RequestMethodStr
[request
->method
]);
142 cbdataAdd(psstate
, memFree
, MEM_PS_STATE
);
143 psstate
->request
= requestLink(request
);
144 psstate
->entry
= entry
;
145 psstate
->callback
= callback
;
146 psstate
->callback_data
= callback_data
;
147 psstate
->direct
= DIRECT_UNKNOWN
;
148 #if USE_CACHE_DIGESTS
149 request
->hier
.peer_select_start
= current_time
;
152 storeLockObject(psstate
->entry
);
153 cbdataLock(callback_data
);
154 peerSelectFoo(psstate
);
158 peerCheckNeverDirectDone(int answer
, void *data
)
160 ps_state
*psstate
= data
;
161 psstate
->acl_checklist
= NULL
;
162 debug(44, 3) ("peerCheckNeverDirectDone: %d\n", answer
);
163 psstate
->never_direct
= answer
? 1 : -1;
164 peerSelectFoo(psstate
);
168 peerCheckAlwaysDirectDone(int answer
, void *data
)
170 ps_state
*psstate
= data
;
171 psstate
->acl_checklist
= NULL
;
172 debug(44, 3) ("peerCheckAlwaysDirectDone: %d\n", answer
);
173 psstate
->always_direct
= answer
? 1 : -1;
174 peerSelectFoo(psstate
);
178 peerSelectCallback(ps_state
* psstate
)
180 StoreEntry
*entry
= psstate
->entry
;
181 FwdServer
*fs
= psstate
->servers
;
182 void *data
= psstate
->callback_data
;
184 debug(44, 3) ("peerSelectCallback: %s\n", storeUrl(entry
));
185 if (entry
->ping_status
== PING_WAITING
)
186 eventDelete(peerPingTimeout
, psstate
);
187 entry
->ping_status
= PING_DONE
;
190 debug(44, 1) ("Failed to select source for '%s'\n", storeUrl(entry
));
191 debug(44, 1) (" always_direct = %d\n", psstate
->always_direct
);
192 debug(44, 1) (" never_direct = %d\n", psstate
->never_direct
);
193 debug(44, 1) (" timedout = %d\n", psstate
->ping
.timedout
);
195 psstate
->ping
.stop
= current_time
;
196 psstate
->request
->hier
.ping
= psstate
->ping
;
197 if (cbdataValid(data
)) {
198 psstate
->servers
= NULL
;
199 psstate
->callback(fs
, data
);
202 peerSelectStateFree(psstate
);
206 peerCheckNetdbDirect(ps_state
* psstate
)
208 peer
*p
= whichPeer(&psstate
->closest_parent_miss
);
213 if (psstate
->direct
== DIRECT_NO
)
215 myrtt
= netdbHostRtt(psstate
->request
->host
);
216 debug(44, 3) ("peerCheckNetdbDirect: MY RTT = %d msec\n", myrtt
);
217 debug(44, 3) ("peerCheckNetdbDirect: closest_parent_miss RTT = %d msec\n",
218 psstate
->ping
.p_rtt
);
219 if (myrtt
&& myrtt
< psstate
->ping
.p_rtt
)
221 myhops
= netdbHostHops(psstate
->request
->host
);
222 debug(44, 3) ("peerCheckNetdbDirect: MY hops = %d\n", myhops
);
223 debug(44, 3) ("peerCheckNetdbDirect: minimum_direct_hops = %d\n",
224 Config
.minDirectHops
);
225 if (myhops
&& myhops
<= Config
.minDirectHops
)
231 peerSelectFoo(ps_state
* ps
)
233 StoreEntry
*entry
= ps
->entry
;
234 request_t
*request
= ps
->request
;
235 debug(44, 3) ("peerSelectFoo: '%s %s'\n",
236 RequestMethodStr
[request
->method
],
238 if (ps
->direct
== DIRECT_UNKNOWN
) {
239 if (ps
->always_direct
== 0 && Config
.accessList
.AlwaysDirect
) {
240 ps
->acl_checklist
= aclChecklistCreate(
241 Config
.accessList
.AlwaysDirect
,
244 aclNBCheck(ps
->acl_checklist
,
245 peerCheckAlwaysDirectDone
,
248 } else if (ps
->always_direct
> 0) {
249 ps
->direct
= DIRECT_YES
;
250 } else if (ps
->never_direct
== 0 && Config
.accessList
.NeverDirect
) {
251 ps
->acl_checklist
= aclChecklistCreate(
252 Config
.accessList
.NeverDirect
,
255 aclNBCheck(ps
->acl_checklist
,
256 peerCheckNeverDirectDone
,
259 } else if (ps
->never_direct
> 0) {
260 ps
->direct
= DIRECT_NO
;
261 } else if (request
->flags
.loopdetect
) {
262 ps
->direct
= DIRECT_YES
;
264 ps
->direct
= DIRECT_MAYBE
;
266 debug(44, 3) ("peerSelectFoo: direct = %s\n",
267 DirectStr
[ps
->direct
]);
271 } else if (entry
->ping_status
== PING_NONE
) {
272 peerGetSomeNeighbor(ps
);
273 if (entry
->ping_status
== PING_WAITING
)
275 } else if (entry
->ping_status
== PING_WAITING
) {
276 peerGetSomeNeighborReplies(ps
);
277 entry
->ping_status
= PING_DONE
;
279 switch (ps
->direct
) {
281 peerGetSomeDirect(ps
);
284 peerGetSomeParent(ps
);
285 peerGetAllParents(ps
);
288 if (Config
.onoff
.prefer_direct
)
289 peerGetSomeDirect(ps
);
290 if (request
->flags
.hierarchical
|| !Config
.onoff
.nonhierarchical_direct
)
291 peerGetSomeParent(ps
);
292 if (!Config
.onoff
.prefer_direct
)
293 peerGetSomeDirect(ps
);
296 peerSelectCallback(ps
);
300 * peerGetSomeNeighbor
302 * Selects a neighbor (parent or sibling) based on one of the
306 * Netdb RTT estimates
310 peerGetSomeNeighbor(ps_state
* ps
)
312 StoreEntry
*entry
= ps
->entry
;
313 request_t
*request
= ps
->request
;
315 hier_code code
= HIER_NONE
;
316 assert(entry
->ping_status
== PING_NONE
);
317 if (ps
->direct
== DIRECT_YES
) {
318 entry
->ping_status
= PING_DONE
;
321 #if USE_CACHE_DIGESTS
322 if ((p
= neighborsDigestSelect(request
, entry
))) {
323 if (neighborType(p
, request
) == PEER_PARENT
)
324 code
= CD_PARENT_HIT
;
326 code
= CD_SIBLING_HIT
;
330 if ((p
= carpSelectParent(request
))) {
334 if ((p
= netdbClosestParent(request
))) {
335 code
= CLOSEST_PARENT
;
336 } else if (peerSelectIcpPing(request
, ps
->direct
, entry
)) {
337 debug(44, 3) ("peerSelect: Doing ICP pings\n");
338 ps
->ping
.start
= current_time
;
339 ps
->ping
.n_sent
= neighborsUdpPing(request
,
343 &ps
->ping
.n_replies_expected
,
345 if (ps
->ping
.n_sent
== 0)
346 debug(44, 0) ("WARNING: neighborsUdpPing returned 0\n");
347 debug(44, 3) ("peerSelect: %d ICP replies expected, RTT %d msec\n",
348 ps
->ping
.n_replies_expected
, ps
->ping
.timeout
);
349 if (ps
->ping
.n_replies_expected
> 0) {
350 entry
->ping_status
= PING_WAITING
;
351 eventAdd("peerPingTimeout",
354 0.001 * ps
->ping
.timeout
,
359 if (code
!= HIER_NONE
) {
361 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
362 peerAddFwdServer(&ps
->servers
, p
, code
);
364 entry
->ping_status
= PING_DONE
;
368 * peerGetSomeNeighborReplies
370 * Selects a neighbor (parent or sibling) based on ICP/HTCP replies.
373 peerGetSomeNeighborReplies(ps_state
* ps
)
375 request_t
*request
= ps
->request
;
377 hier_code code
= HIER_NONE
;
378 assert(ps
->entry
->ping_status
== PING_WAITING
);
379 assert(ps
->direct
!= DIRECT_YES
);
380 if (peerCheckNetdbDirect(ps
)) {
381 code
= CLOSEST_DIRECT
;
382 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], request
->host
);
383 peerAddFwdServer(&ps
->servers
, NULL
, code
);
387 code
= ps
->hit_type
== PEER_PARENT
? PARENT_HIT
: SIBLING_HIT
;
389 #if ALLOW_SOURCE_PING
390 if ((p
= ps
->secho
)) {
391 code
= SOURCE_FASTEST
;
394 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
395 p
= whichPeer(&ps
->closest_parent_miss
);
396 code
= CLOSEST_PARENT_MISS
;
397 } else if (ps
->first_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
398 p
= whichPeer(&ps
->first_parent_miss
);
399 code
= FIRST_PARENT_MISS
;
401 if (p
&& code
!= HIER_NONE
) {
402 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
403 peerAddFwdServer(&ps
->servers
, p
, code
);
411 * Simply adds a 'direct' entry to the FwdServers list if this
412 * request can be forwarded directly to the origin server
415 peerGetSomeDirect(ps_state
* ps
)
417 if (ps
->direct
== DIRECT_NO
)
419 if (ps
->request
->protocol
== PROTO_WAIS
)
420 /* Its not really DIRECT, now is it? */
421 peerAddFwdServer(&ps
->servers
, Config
.Wais
.peer
, DIRECT
);
423 peerAddFwdServer(&ps
->servers
, NULL
, DIRECT
);
427 peerGetSomeParent(ps_state
* ps
)
430 request_t
*request
= ps
->request
;
431 hier_code code
= HIER_NONE
;
432 debug(44, 3) ("peerGetSomeParent: %s %s\n",
433 RequestMethodStr
[request
->method
],
435 if (ps
->direct
== DIRECT_YES
)
437 if ((p
= getDefaultParent(request
))) {
438 code
= DEFAULT_PARENT
;
439 } else if ((p
= getRoundRobinParent(request
))) {
440 code
= ROUNDROBIN_PARENT
;
441 } else if ((p
= getFirstUpParent(request
))) {
442 code
= FIRSTUP_PARENT
;
443 } else if ((p
= getAnyParent(request
))) {
444 code
= ANY_OLD_PARENT
;
446 if (code
!= HIER_NONE
) {
447 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
448 peerAddFwdServer(&ps
->servers
, p
, code
);
452 /* Adds alive parents. Used as a last resort for never_direct.
455 peerGetAllParents(ps_state
* ps
)
458 request_t
*request
= ps
->request
;
459 /* Add all alive parents */
460 for (p
= Config
.peers
; p
; p
= p
->next
) {
461 /* XXX: neighbors.c lacks a public interface for enumerating
462 * parents to a request so we have to dig some here..
464 if (neighborType(p
, request
) != PEER_PARENT
)
466 if (!peerHTTPOkay(p
, request
))
468 debug(15, 3) ("peerGetAllParents: adding alive parent %s\n", p
->host
);
469 peerAddFwdServer(&ps
->servers
, p
, ANY_OLD_PARENT
);
471 /* XXX: should add dead parents here, but it is currently
472 * not possible to find out which parents are dead or which
473 * simply are not configured to handle the request.
475 /* Add default parent as a last resort */
476 if ((p
= getDefaultParent(request
))) {
477 peerAddFwdServer(&ps
->servers
, p
, DEFAULT_PARENT
);
482 peerPingTimeout(void *data
)
484 ps_state
*psstate
= data
;
485 StoreEntry
*entry
= psstate
->entry
;
487 debug(44, 3) ("peerPingTimeout: '%s'\n", storeUrl(entry
));
488 if (!cbdataValid(psstate
->callback_data
)) {
489 /* request aborted */
490 entry
->ping_status
= PING_DONE
;
491 cbdataUnlock(psstate
->callback_data
);
492 peerSelectStateFree(psstate
);
495 PeerStats
.timeouts
++;
496 psstate
->ping
.timedout
= 1;
497 peerSelectFoo(psstate
);
503 memset(&PeerStats
, '\0', sizeof(PeerStats
));
504 assert(sizeof(hier_strings
) == (HIER_MAX
+ 1) * sizeof(char *));
508 peerIcpParentMiss(peer
* p
, icp_common_t
* header
, ps_state
* ps
)
512 if (Config
.onoff
.query_icmp
) {
513 if (header
->flags
& ICP_FLAG_SRC_RTT
) {
514 rtt
= header
->pad
& 0xFFFF;
515 hops
= (header
->pad
>> 16) & 0xFFFF;
516 if (rtt
> 0 && rtt
< 0xFFFF)
517 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
518 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
519 ps
->closest_parent_miss
= p
->in_addr
;
520 ps
->ping
.p_rtt
= rtt
;
524 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
525 if (p
->options
.closest_only
)
527 /* set FIRST_MISS if there is no CLOSEST parent */
528 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
530 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
531 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
532 ps
->first_parent_miss
= p
->in_addr
;
533 ps
->ping
.w_rtt
= rtt
;
538 peerHandleIcpReply(peer
* p
, peer_t type
, icp_common_t
* header
, void *data
)
540 ps_state
*psstate
= data
;
541 icp_opcode op
= header
->opcode
;
542 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
544 storeUrl(psstate
->entry
));
545 #if USE_CACHE_DIGESTS && 0
546 /* do cd lookup to count false misses */
548 peerNoteDigestLookup(request
, p
,
549 peerDigestLookup(p
, request
, psstate
->entry
));
551 psstate
->ping
.n_recv
++;
552 if (op
== ICP_MISS
|| op
== ICP_DECHO
) {
553 if (type
== PEER_PARENT
)
554 peerIcpParentMiss(p
, header
, psstate
);
555 } else if (op
== ICP_HIT
) {
557 psstate
->hit_type
= type
;
558 peerSelectFoo(psstate
);
561 #if ALLOW_SOURCE_PING
562 else if (op
== ICP_SECHO
) {
564 peerSelectFoo(psstate
);
568 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
570 peerSelectFoo(psstate
);
575 peerHandleHtcpReply(peer
* p
, peer_t type
, htcpReplyData
* htcp
, void *data
)
577 ps_state
*psstate
= data
;
578 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
579 htcp
->hit
? "HIT" : "MISS",
580 storeUrl(psstate
->entry
));
581 psstate
->ping
.n_recv
++;
584 psstate
->hit_type
= type
;
585 peerSelectFoo(psstate
);
588 if (type
== PEER_PARENT
)
589 peerHtcpParentMiss(p
, htcp
, psstate
);
590 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
592 peerSelectFoo(psstate
);
596 peerHtcpParentMiss(peer
* p
, htcpReplyData
* htcp
, ps_state
* ps
)
600 if (Config
.onoff
.query_icmp
) {
601 if (htcp
->cto
.rtt
> 0) {
602 rtt
= (int) htcp
->cto
.rtt
* 1000;
603 hops
= (int) htcp
->cto
.hops
* 1000;
604 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
605 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
606 ps
->closest_parent_miss
= p
->in_addr
;
607 ps
->ping
.p_rtt
= rtt
;
611 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
612 if (p
->options
.closest_only
)
614 /* set FIRST_MISS if there is no CLOSEST parent */
615 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
617 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
618 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
619 ps
->first_parent_miss
= p
->in_addr
;
620 ps
->ping
.w_rtt
= rtt
;
626 peerHandlePingReply(peer
* p
, peer_t type
, protocol_t proto
, void *pingdata
, void *data
)
628 if (proto
== PROTO_ICP
)
629 peerHandleIcpReply(p
, type
, pingdata
, data
);
631 else if (proto
== PROTO_HTCP
)
632 peerHandleHtcpReply(p
, type
, pingdata
, data
);
635 debug(44, 1) ("peerHandlePingReply: unknown protocol_t %d\n", (int) proto
);
639 peerAddFwdServer(FwdServer
** FS
, peer
* p
, hier_code code
)
641 FwdServer
*fs
= memAllocate(MEM_FWD_SERVER
);
642 debug(44, 5) ("peerAddFwdServer: adding %s %s\n",
643 p
? p
->host
: "DIRECT",
647 cbdataLock(fs
->peer
);