3 * $Id: peer_select.cc,v 1.103 2000/01/03 19:32:33 wessels Exp $
5 * DEBUG: section 44 Peer Selection Algorithm
6 * AUTHOR: Duane Wessels
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by the
14 * National Science Foundation. Squid is Copyrighted (C) 1998 by
15 * Duane Wessels and the University of California San Diego. Please
16 * see the COPYRIGHT file for full details. Squid incorporates
17 * software developed and/or copyrighted by other sources. Please see
18 * the CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
38 const char *hier_strings
[] =
48 "CLOSEST_PARENT_MISS",
69 static char *DirectStr
[] =
77 static void peerSelectFoo(ps_state
*);
78 static void peerPingTimeout(void *data
);
79 static void peerSelectCallback(ps_state
* psstate
);
80 static IRCB peerHandlePingReply
;
81 static void peerSelectStateFree(ps_state
* psstate
);
82 static void peerIcpParentMiss(peer
*, icp_common_t
*, ps_state
*);
84 static void peerHtcpParentMiss(peer
*, htcpReplyData
*, ps_state
*);
85 static void peerHandleHtcpReply(peer
*, peer_t
, htcpReplyData
*, void *);
87 static int peerCheckNetdbDirect(ps_state
* psstate
);
88 static void peerGetSomeNeighbor(ps_state
*);
89 static void peerGetSomeNeighborReplies(ps_state
*);
90 static void peerGetSomeDirect(ps_state
*);
91 static void peerGetSomeParent(ps_state
*);
92 static void peerAddFwdServer(FwdServer
**, peer
*, hier_code
);
95 peerSelectStateFree(ps_state
* psstate
)
97 if (psstate
->acl_checklist
) {
98 debug(44, 1) ("calling aclChecklistFree() from peerSelectStateFree\n");
99 aclChecklistFree(psstate
->acl_checklist
);
101 requestUnlink(psstate
->request
);
102 psstate
->request
= NULL
;
103 if (psstate
->entry
) {
104 assert(psstate
->entry
->ping_status
!= PING_WAITING
);
105 storeUnlockObject(psstate
->entry
);
106 psstate
->entry
= NULL
;
112 peerSelectIcpPing(request_t
* request
, int direct
, StoreEntry
* entry
)
116 assert(entry
->ping_status
== PING_NONE
);
117 assert(direct
!= DIRECT_YES
);
118 debug(44, 3) ("peerSelectIcpPing: %s\n", storeUrl(entry
));
119 if (!request
->flags
.hierarchical
&& direct
!= DIRECT_NO
)
121 if (EBIT_TEST(entry
->flags
, KEY_PRIVATE
) && !neighbors_do_private_keys
)
122 if (direct
!= DIRECT_NO
)
124 n
= neighborsCount(request
);
125 debug(44, 3) ("peerSelectIcpPing: counted %d neighbors\n", n
);
131 peerSelect(request_t
* request
,
136 ps_state
*psstate
= xcalloc(1, sizeof(ps_state
));
138 debug(44, 3) ("peerSelect: %s\n", storeUrl(entry
));
140 debug(44, 3) ("peerSelect: %s\n", RequestMethodStr
[request
->method
]);
141 cbdataAdd(psstate
, cbdataXfree
, 0);
142 psstate
->request
= requestLink(request
);
143 psstate
->entry
= entry
;
144 psstate
->callback
= callback
;
145 psstate
->callback_data
= callback_data
;
146 psstate
->direct
= DIRECT_UNKNOWN
;
147 #if USE_CACHE_DIGESTS
148 request
->hier
.peer_select_start
= current_time
;
151 storeLockObject(psstate
->entry
);
152 cbdataLock(callback_data
);
153 peerSelectFoo(psstate
);
157 peerCheckNeverDirectDone(int answer
, void *data
)
159 ps_state
*psstate
= data
;
160 psstate
->acl_checklist
= NULL
;
161 debug(44, 3) ("peerCheckNeverDirectDone: %d\n", answer
);
162 psstate
->never_direct
= answer
? 1 : -1;
163 peerSelectFoo(psstate
);
167 peerCheckAlwaysDirectDone(int answer
, void *data
)
169 ps_state
*psstate
= data
;
170 psstate
->acl_checklist
= NULL
;
171 debug(44, 3) ("peerCheckAlwaysDirectDone: %d\n", answer
);
172 psstate
->always_direct
= answer
? 1 : -1;
173 peerSelectFoo(psstate
);
177 peerSelectCallback(ps_state
* psstate
)
179 StoreEntry
*entry
= psstate
->entry
;
180 FwdServer
*fs
= psstate
->servers
;
181 void *data
= psstate
->callback_data
;
183 debug(44, 3) ("peerSelectCallback: %s\n", storeUrl(entry
));
184 if (entry
->ping_status
== PING_WAITING
)
185 eventDelete(peerPingTimeout
, psstate
);
186 entry
->ping_status
= PING_DONE
;
189 debug(44, 1) ("Failed to select source for '%s'\n", storeUrl(entry
));
190 debug(44, 1) (" always_direct = %d\n", psstate
->always_direct
);
191 debug(44, 1) (" never_direct = %d\n", psstate
->never_direct
);
192 debug(44, 1) (" timedout = %d\n", psstate
->ping
.timedout
);
194 psstate
->ping
.stop
= current_time
;
195 psstate
->request
->hier
.ping
= psstate
->ping
;
196 if (cbdataValid(data
)) {
197 psstate
->servers
= NULL
;
198 psstate
->callback(fs
, data
);
201 peerSelectStateFree(psstate
);
205 peerCheckNetdbDirect(ps_state
* psstate
)
207 peer
*p
= whichPeer(&psstate
->closest_parent_miss
);
212 myrtt
= netdbHostRtt(psstate
->request
->host
);
213 debug(44, 3) ("peerCheckNetdbDirect: MY RTT = %d msec\n", myrtt
);
214 debug(44, 3) ("peerCheckNetdbDirect: closest_parent_miss RTT = %d msec\n",
215 psstate
->ping
.p_rtt
);
216 if (myrtt
&& myrtt
< psstate
->ping
.p_rtt
)
218 myhops
= netdbHostHops(psstate
->request
->host
);
219 debug(44, 3) ("peerCheckNetdbDirect: MY hops = %d\n", myhops
);
220 debug(44, 3) ("peerCheckNetdbDirect: minimum_direct_hops = %d\n",
221 Config
.minDirectHops
);
222 if (myhops
&& myhops
<= Config
.minDirectHops
)
228 peerSelectFoo(ps_state
* ps
)
230 StoreEntry
*entry
= ps
->entry
;
231 request_t
*request
= ps
->request
;
232 debug(44, 3) ("peerSelectFoo: '%s %s'\n",
233 RequestMethodStr
[request
->method
],
235 if (ps
->direct
== DIRECT_UNKNOWN
) {
236 if (ps
->always_direct
== 0 && Config
.accessList
.AlwaysDirect
) {
237 ps
->acl_checklist
= aclChecklistCreate(
238 Config
.accessList
.AlwaysDirect
,
240 NULL
, /* user agent */
242 aclNBCheck(ps
->acl_checklist
,
243 peerCheckAlwaysDirectDone
,
246 } else if (ps
->always_direct
> 0) {
247 ps
->direct
= DIRECT_YES
;
248 } else if (ps
->never_direct
== 0 && Config
.accessList
.NeverDirect
) {
249 ps
->acl_checklist
= aclChecklistCreate(
250 Config
.accessList
.NeverDirect
,
252 NULL
, /* user agent */
254 aclNBCheck(ps
->acl_checklist
,
255 peerCheckNeverDirectDone
,
258 } else if (ps
->never_direct
> 0) {
259 ps
->direct
= DIRECT_NO
;
260 } else if (request
->flags
.loopdetect
) {
261 ps
->direct
= DIRECT_YES
;
263 ps
->direct
= DIRECT_MAYBE
;
265 debug(44, 3) ("peerSelectFoo: direct = %s\n",
266 DirectStr
[ps
->direct
]);
270 } else if (entry
->ping_status
== PING_NONE
) {
271 peerGetSomeNeighbor(ps
);
272 if (entry
->ping_status
== PING_WAITING
)
274 } else if (entry
->ping_status
== PING_WAITING
) {
275 peerGetSomeNeighborReplies(ps
);
276 entry
->ping_status
= PING_DONE
;
278 if (Config
.onoff
.prefer_direct
)
279 peerGetSomeDirect(ps
);
280 peerGetSomeParent(ps
);
281 if (!Config
.onoff
.prefer_direct
)
282 peerGetSomeDirect(ps
);
283 peerSelectCallback(ps
);
287 * peerGetSomeNeighbor
289 * Selects a neighbor (parent or sibling) based on one of the
293 * Netdb RTT estimates
297 peerGetSomeNeighbor(ps_state
* ps
)
299 StoreEntry
*entry
= ps
->entry
;
300 request_t
*request
= ps
->request
;
302 hier_code code
= HIER_NONE
;
303 assert(entry
->ping_status
== PING_NONE
);
304 if (ps
->direct
== DIRECT_YES
) {
305 entry
->ping_status
= PING_DONE
;
308 #if USE_CACHE_DIGESTS
309 if ((p
= neighborsDigestSelect(request
, entry
))) {
310 if (neighborType(p
, request
) == PEER_PARENT
)
311 code
= CD_PARENT_HIT
;
313 code
= CD_SIBLING_HIT
;
317 if ((p
= carpSelectParent(request
))) {
321 if ((p
= netdbClosestParent(request
))) {
322 code
= CLOSEST_PARENT
;
323 } else if (peerSelectIcpPing(request
, ps
->direct
, entry
)) {
324 debug(44, 3) ("peerSelect: Doing ICP pings\n");
325 ps
->ping
.start
= current_time
;
326 ps
->ping
.n_sent
= neighborsUdpPing(request
,
330 &ps
->ping
.n_replies_expected
,
332 if (ps
->ping
.n_sent
== 0)
333 debug(44, 0) ("WARNING: neighborsUdpPing returned 0\n");
334 debug(44, 3) ("peerSelect: %d ICP replies expected, RTT %d msec\n",
335 ps
->ping
.n_replies_expected
, ps
->ping
.timeout
);
336 if (ps
->ping
.n_replies_expected
> 0) {
337 entry
->ping_status
= PING_WAITING
;
338 eventAdd("peerPingTimeout",
341 0.001 * ps
->ping
.timeout
,
346 if (code
!= HIER_NONE
) {
348 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
349 peerAddFwdServer(&ps
->servers
, p
, code
);
351 entry
->ping_status
= PING_DONE
;
355 * peerGetSomeNeighborReplies
357 * Selects a neighbor (parent or sibling) based on ICP/HTCP replies.
360 peerGetSomeNeighborReplies(ps_state
* ps
)
362 request_t
*request
= ps
->request
;
364 hier_code code
= HIER_NONE
;
365 assert(ps
->entry
->ping_status
== PING_WAITING
);
366 assert(ps
->direct
!= DIRECT_YES
);
367 if (peerCheckNetdbDirect(ps
)) {
368 code
= CLOSEST_DIRECT
;
369 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], request
->host
);
370 peerAddFwdServer(&ps
->servers
, NULL
, code
);
374 code
= ps
->hit_type
== PEER_PARENT
? PARENT_HIT
: SIBLING_HIT
;
376 #if ALLOW_SOURCE_PING
377 if ((p
= ps
->secho
)) {
378 code
= SOURCE_FASTEST
;
381 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
382 p
= whichPeer(&ps
->closest_parent_miss
);
383 code
= CLOSEST_PARENT_MISS
;
384 } else if (ps
->first_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
385 p
= whichPeer(&ps
->first_parent_miss
);
386 code
= FIRST_PARENT_MISS
;
388 if (p
&& code
!= HIER_NONE
) {
389 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
390 peerAddFwdServer(&ps
->servers
, p
, code
);
398 * Simply adds a 'direct' entry to the FwdServers list if this
399 * request can be forwarded directly to the origin server
402 peerGetSomeDirect(ps_state
* ps
)
404 if (ps
->direct
== DIRECT_NO
)
406 if (ps
->request
->protocol
== PROTO_WAIS
)
407 /* Its not really DIRECT, now is it? */
408 peerAddFwdServer(&ps
->servers
, Config
.Wais
.peer
, DIRECT
);
410 peerAddFwdServer(&ps
->servers
, NULL
, DIRECT
);
414 peerGetSomeParent(ps_state
* ps
)
417 request_t
*request
= ps
->request
;
418 hier_code code
= HIER_NONE
;
419 debug(44, 3) ("peerGetSomeParent: %s %s\n",
420 RequestMethodStr
[request
->method
],
422 if (ps
->direct
== DIRECT_YES
)
424 if ((p
= getDefaultParent(request
))) {
425 code
= DEFAULT_PARENT
;
426 } else if ((p
= getRoundRobinParent(request
))) {
427 code
= ROUNDROBIN_PARENT
;
428 } else if ((p
= getFirstUpParent(request
))) {
429 code
= FIRSTUP_PARENT
;
430 } else if ((p
= getAnyParent(request
))) {
431 code
= ANY_OLD_PARENT
;
433 if (code
!= HIER_NONE
) {
434 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
435 peerAddFwdServer(&ps
->servers
, p
, code
);
440 peerPingTimeout(void *data
)
442 ps_state
*psstate
= data
;
443 StoreEntry
*entry
= psstate
->entry
;
445 debug(44, 3) ("peerPingTimeout: '%s'\n", storeUrl(entry
));
446 if (!cbdataValid(psstate
->callback_data
)) {
447 /* request aborted */
448 entry
->ping_status
= PING_DONE
;
449 cbdataUnlock(psstate
->callback_data
);
450 peerSelectStateFree(psstate
);
453 PeerStats
.timeouts
++;
454 psstate
->ping
.timedout
= 1;
455 peerSelectFoo(psstate
);
461 memset(&PeerStats
, '\0', sizeof(PeerStats
));
462 assert(sizeof(hier_strings
) == (HIER_MAX
+ 1) * sizeof(char *));
466 peerIcpParentMiss(peer
* p
, icp_common_t
* header
, ps_state
* ps
)
470 if (Config
.onoff
.query_icmp
) {
471 if (header
->flags
& ICP_FLAG_SRC_RTT
) {
472 rtt
= header
->pad
& 0xFFFF;
473 hops
= (header
->pad
>> 16) & 0xFFFF;
474 if (rtt
> 0 && rtt
< 0xFFFF)
475 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
476 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
477 ps
->closest_parent_miss
= p
->in_addr
;
478 ps
->ping
.p_rtt
= rtt
;
482 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
483 if (p
->options
.closest_only
)
485 /* set FIRST_MISS if there is no CLOSEST parent */
486 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
488 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
489 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
490 ps
->first_parent_miss
= p
->in_addr
;
491 ps
->ping
.w_rtt
= rtt
;
496 peerHandleIcpReply(peer
* p
, peer_t type
, icp_common_t
* header
, void *data
)
498 ps_state
*psstate
= data
;
499 icp_opcode op
= header
->opcode
;
500 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
502 storeUrl(psstate
->entry
));
503 #if USE_CACHE_DIGESTS && 0
504 /* do cd lookup to count false misses */
506 peerNoteDigestLookup(request
, p
,
507 peerDigestLookup(p
, request
, psstate
->entry
));
509 psstate
->ping
.n_recv
++;
510 if (op
== ICP_MISS
|| op
== ICP_DECHO
) {
511 if (type
== PEER_PARENT
)
512 peerIcpParentMiss(p
, header
, psstate
);
513 } else if (op
== ICP_HIT
) {
515 psstate
->hit_type
= type
;
516 peerSelectFoo(psstate
);
519 #if ALLOW_SOURCE_PING
520 else if (op
== ICP_SECHO
) {
522 peerSelectFoo(psstate
);
526 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
528 peerSelectFoo(psstate
);
533 peerHandleHtcpReply(peer
* p
, peer_t type
, htcpReplyData
* htcp
, void *data
)
535 ps_state
*psstate
= data
;
536 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
537 htcp
->hit
? "HIT" : "MISS",
538 storeUrl(psstate
->entry
));
539 psstate
->ping
.n_recv
++;
542 psstate
->hit_type
= type
;
543 peerSelectFoo(psstate
);
546 if (type
== PEER_PARENT
)
547 peerHtcpParentMiss(p
, htcp
, psstate
);
548 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
550 peerSelectFoo(psstate
);
554 peerHtcpParentMiss(peer
* p
, htcpReplyData
* htcp
, ps_state
* ps
)
558 if (Config
.onoff
.query_icmp
) {
559 if (htcp
->cto
.rtt
> 0) {
560 rtt
= (int) htcp
->cto
.rtt
* 1000;
561 hops
= (int) htcp
->cto
.hops
* 1000;
562 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
563 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
564 ps
->closest_parent_miss
= p
->in_addr
;
565 ps
->ping
.p_rtt
= rtt
;
569 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
570 if (p
->options
.closest_only
)
572 /* set FIRST_MISS if there is no CLOSEST parent */
573 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
575 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
576 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
577 ps
->first_parent_miss
= p
->in_addr
;
578 ps
->ping
.w_rtt
= rtt
;
584 peerHandlePingReply(peer
* p
, peer_t type
, protocol_t proto
, void *pingdata
, void *data
)
586 if (proto
== PROTO_ICP
)
587 peerHandleIcpReply(p
, type
, pingdata
, data
);
589 else if (proto
== PROTO_HTCP
)
590 peerHandleHtcpReply(p
, type
, pingdata
, data
);
593 debug(44, 1) ("peerHandlePingReply: unknown protocol_t %d\n", (int) proto
);
597 peerAddFwdServer(FwdServer
** FS
, peer
* p
, hier_code code
)
599 FwdServer
*fs
= memAllocate(MEM_FWD_SERVER
);
600 debug(44, 5) ("peerAddFwdServer: adding %s %s\n",
601 p
? p
->host
: "DIRECT",
605 cbdataLock(fs
->peer
);