3 * $Id: peer_select.cc,v 1.92 1998/12/05 00:54:35 wessels Exp $
5 * DEBUG: section 44 Peer Selection Algorithm
6 * AUTHOR: Duane Wessels
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by the
14 * National Science Foundation. Squid is Copyrighted (C) 1998 by
15 * Duane Wessels and the University of California San Diego. Please
16 * see the COPYRIGHT file for full details. Squid incorporates
17 * software developed and/or copyrighted by other sources. Please see
18 * the CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
38 const char *hier_strings
[] =
48 "CLOSEST_PARENT_MISS",
68 static char *DirectStr
[] =
76 static void peerSelectFoo(ps_state
*);
77 static void peerPingTimeout(void *data
);
78 static void peerSelectCallback(ps_state
* psstate
);
79 static IRCB peerHandlePingReply
;
80 static void peerSelectStateFree(ps_state
* psstate
);
81 static void peerIcpParentMiss(peer
*, icp_common_t
*, ps_state
*);
83 static void peerHtcpParentMiss(peer
*, htcpReplyData
*, ps_state
*);
84 static void peerHandleHtcpReply(peer
*, peer_t
, htcpReplyData
*, void *);
86 static int peerCheckNetdbDirect(ps_state
* psstate
);
87 static void peerGetSomeNeighbor(ps_state
*);
88 static void peerGetSomeNeighborReplies(ps_state
*);
89 static void peerGetSomeDirect(ps_state
*);
90 static void peerGetSomeParent(ps_state
*);
91 static void psstateFigureDirect(ps_state
*);
92 static void peerAddFwdServer(FwdServer
**, peer
*, hier_code
);
95 peerSelectStateFree(ps_state
* psstate
)
97 if (psstate
->acl_checklist
) {
98 debug(44, 1) ("calling aclChecklistFree() from peerSelectStateFree\n");
99 aclChecklistFree(psstate
->acl_checklist
);
101 requestUnlink(psstate
->request
);
102 psstate
->request
= NULL
;
103 if (psstate
->entry
) {
104 assert(psstate
->entry
->ping_status
!= PING_WAITING
);
105 storeUnlockObject(psstate
->entry
);
106 psstate
->entry
= NULL
;
112 peerSelectIcpPing(request_t
* request
, int direct
, StoreEntry
* entry
)
116 assert(entry
->ping_status
== PING_NONE
);
117 assert(direct
!= DIRECT_YES
);
118 debug(44, 3) ("peerSelectIcpPing: %s\n", storeUrl(entry
));
119 if (!request
->flags
.hierarchical
&& direct
!= DIRECT_NO
)
121 if (EBIT_TEST(entry
->flags
, KEY_PRIVATE
) && !neighbors_do_private_keys
)
122 if (direct
!= DIRECT_NO
)
124 n
= neighborsCount(request
);
125 debug(44, 3) ("peerSelectIcpPing: counted %d neighbors\n", n
);
131 peerSelect(request_t
* request
,
136 ps_state
*psstate
= xcalloc(1, sizeof(ps_state
));
138 debug(44, 3) ("peerSelect: %s\n", storeUrl(entry
));
140 debug(44, 3) ("peerSelect: %s\n", RequestMethodStr
[request
->method
]);
141 cbdataAdd(psstate
, cbdataXfree
, 0);
142 psstate
->request
= requestLink(request
);
143 psstate
->entry
= entry
;
144 psstate
->callback
= callback
;
145 psstate
->callback_data
= callback_data
;
146 psstate
->direct
= DIRECT_UNKNOWN
;
147 #if USE_CACHE_DIGESTS
148 request
->hier
.peer_select_start
= current_time
;
151 storeLockObject(psstate
->entry
);
152 cbdataLock(callback_data
);
153 peerSelectFoo(psstate
);
157 peerCheckNeverDirectDone(int answer
, void *data
)
159 ps_state
*psstate
= data
;
160 psstate
->acl_checklist
= NULL
;
161 debug(44, 3) ("peerCheckNeverDirectDone: %d\n", answer
);
162 psstate
->never_direct
= answer
? 1 : -1;
163 peerSelectFoo(psstate
);
167 peerCheckAlwaysDirectDone(int answer
, void *data
)
169 ps_state
*psstate
= data
;
170 psstate
->acl_checklist
= NULL
;
171 debug(44, 3) ("peerCheckAlwaysDirectDone: %d\n", answer
);
172 psstate
->always_direct
= answer
? 1 : -1;
173 peerSelectFoo(psstate
);
177 peerSelectCallback(ps_state
* psstate
)
179 StoreEntry
*entry
= psstate
->entry
;
180 FwdServer
*fs
= psstate
->servers
;
181 void *data
= psstate
->callback_data
;
183 debug(44, 3) ("peerSelectCallback: %s\n", storeUrl(entry
));
184 if (entry
->ping_status
== PING_WAITING
)
185 eventDelete(peerPingTimeout
, psstate
);
186 entry
->ping_status
= PING_DONE
;
189 debug(44, 1) ("Failed to select source for '%s'\n", storeUrl(entry
));
190 debug(44, 1) (" always_direct = %d\n", psstate
->always_direct
);
191 debug(44, 1) (" never_direct = %d\n", psstate
->never_direct
);
192 debug(44, 1) (" timedout = %d\n", psstate
->ping
.timedout
);
194 psstate
->ping
.stop
= current_time
;
195 if (cbdataValid(data
)) {
196 psstate
->servers
= NULL
;
197 psstate
->callback(fs
, data
);
200 peerSelectStateFree(psstate
);
204 peerCheckNetdbDirect(ps_state
* psstate
)
206 peer
*p
= whichPeer(&psstate
->closest_parent_miss
);
211 myrtt
= netdbHostRtt(psstate
->request
->host
);
212 debug(44, 3) ("peerCheckNetdbDirect: MY RTT = %d msec\n", myrtt
);
213 debug(44, 3) ("peerCheckNetdbDirect: closest_parent_miss RTT = %d msec\n",
214 psstate
->ping
.p_rtt
);
215 if (myrtt
&& myrtt
< psstate
->ping
.p_rtt
)
217 myhops
= netdbHostHops(psstate
->request
->host
);
218 debug(44, 3) ("peerCheckNetdbDirect: MY hops = %d\n", myhops
);
219 debug(44, 3) ("peerCheckNetdbDirect: minimum_direct_hops = %d\n",
220 Config
.minDirectHops
);
221 if (myhops
&& myhops
<= Config
.minDirectHops
)
227 peerSelectFoo(ps_state
* psstate
)
229 StoreEntry
*entry
= psstate
->entry
;
230 request_t
*request
= psstate
->request
;
231 debug(44, 3) ("peerSelectFoo: '%s %s'\n",
232 RequestMethodStr
[request
->method
],
234 if (psstate
->direct
== DIRECT_UNKNOWN
) {
235 psstateFigureDirect(psstate
);
236 if (psstate
->direct
== DIRECT_UNKNOWN
)
239 if (entry
->ping_status
== PING_NONE
) {
240 peerGetSomeNeighbor(psstate
);
241 if (entry
->ping_status
== PING_WAITING
)
243 } else if (entry
->ping_status
== PING_WAITING
) {
244 peerGetSomeNeighborReplies(psstate
);
245 entry
->ping_status
= PING_DONE
;
247 if (Config
.onoff
.prefer_direct
)
248 peerGetSomeDirect(psstate
);
249 peerGetSomeParent(psstate
);
250 if (!Config
.onoff
.prefer_direct
)
251 peerGetSomeDirect(psstate
);
252 peerSelectCallback(psstate
);
256 psstateFigureDirect(ps_state
* psstate
)
258 request_t
*request
= psstate
->request
;
259 if (psstate
->always_direct
== 0 && Config
.accessList
.AlwaysDirect
) {
260 psstate
->acl_checklist
= aclChecklistCreate(
261 Config
.accessList
.AlwaysDirect
,
263 request
->client_addr
,
264 NULL
, /* user agent */
266 aclNBCheck(psstate
->acl_checklist
,
267 peerCheckAlwaysDirectDone
,
270 } else if (psstate
->always_direct
> 0) {
271 psstate
->direct
= DIRECT_YES
;
272 } else if (psstate
->never_direct
== 0 && Config
.accessList
.NeverDirect
) {
273 psstate
->acl_checklist
= aclChecklistCreate(
274 Config
.accessList
.NeverDirect
,
276 request
->client_addr
,
277 NULL
, /* user agent */
279 aclNBCheck(psstate
->acl_checklist
,
280 peerCheckNeverDirectDone
,
283 } else if (psstate
->never_direct
> 0) {
284 psstate
->direct
= DIRECT_NO
;
285 } else if (request
->flags
.loopdetect
) {
286 psstate
->direct
= DIRECT_YES
;
288 psstate
->direct
= DIRECT_MAYBE
;
290 debug(44, 3) ("psstateFigureDirect: direct = %s\n",
291 DirectStr
[psstate
->direct
]);
295 * peerGetSomeNeighbor
297 * Selects a neighbor (parent or sibling) based on one of the
301 * Netdb RTT estimates
305 peerGetSomeNeighbor(ps_state
* ps
)
307 StoreEntry
*entry
= ps
->entry
;
308 request_t
*request
= ps
->request
;
310 hier_code code
= HIER_NONE
;
311 assert(entry
->ping_status
== PING_NONE
);
312 if (ps
->direct
== DIRECT_YES
) {
313 entry
->ping_status
= PING_DONE
;
316 #if USE_CACHE_DIGESTS
317 if ((p
= neighborsDigestSelect(request
, entry
))) {
318 code
= CACHE_DIGEST_HIT
;
322 if ((p
= carpSelectParent(request
))) {
326 if ((p
= netdbClosestParent(request
))) {
327 code
= CLOSEST_PARENT
;
328 } else if (peerSelectIcpPing(request
, ps
->direct
, entry
)) {
329 debug(44, 3) ("peerSelect: Doing ICP pings\n");
330 ps
->ping
.start
= current_time
;
331 ps
->ping
.n_sent
= neighborsUdpPing(request
,
335 &ps
->ping
.n_replies_expected
,
337 if (ps
->ping
.n_sent
== 0)
338 debug(44, 0) ("WARNING: neighborsUdpPing returned 0\n");
339 debug(44, 3) ("peerSelect: %d ICP replies expected, RTT %d msec\n",
340 ps
->ping
.n_replies_expected
, ps
->ping
.timeout
);
341 if (ps
->ping
.n_replies_expected
> 0) {
342 entry
->ping_status
= PING_WAITING
;
343 eventAdd("peerPingTimeout",
346 0.001 * ps
->ping
.timeout
,
351 if (code
!= HIER_NONE
) {
353 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
354 peerAddFwdServer(&ps
->servers
, p
, code
);
356 entry
->ping_status
= PING_DONE
;
360 * peerGetSomeNeighborReplies
362 * Selects a neighbor (parent or sibling) based on ICP/HTCP replies.
365 peerGetSomeNeighborReplies(ps_state
* ps
)
367 StoreEntry
*entry
= ps
->entry
;
368 request_t
*request
= ps
->request
;
370 hier_code code
= HIER_NONE
;
371 assert(entry
->ping_status
== PING_WAITING
);
372 assert(ps
->direct
!= DIRECT_YES
);
373 if (peerCheckNetdbDirect(ps
)) {
374 code
= CLOSEST_DIRECT
;
375 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], request
->host
);
376 peerAddFwdServer(&ps
->servers
, NULL
, code
);
380 code
= ps
->hit_type
== PEER_PARENT
? PARENT_HIT
: SIBLING_HIT
;
382 #if ALLOW_SOURCE_PING
383 if ((p
= ps
->secho
)) {
384 code
= SOURCE_FASTEST
;
387 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
388 p
= whichPeer(&ps
->closest_parent_miss
);
389 code
= CLOSEST_PARENT_MISS
;
390 } else if (ps
->first_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
) {
391 p
= whichPeer(&ps
->first_parent_miss
);
392 code
= FIRST_PARENT_MISS
;
394 if (p
&& code
!= HIER_NONE
) {
395 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
396 peerAddFwdServer(&ps
->servers
, p
, code
);
404 * Simply adds a 'direct' entry to the FwdServers list if this
405 * request can be forwarded directly to the origin server
408 peerGetSomeDirect(ps_state
* ps
)
410 if (ps
->direct
== DIRECT_NO
)
412 if (ps
->request
->protocol
== PROTO_WAIS
)
413 /* Its not really DIRECT, now is it? */
414 peerAddFwdServer(&ps
->servers
, Config
.Wais
.peer
, DIRECT
);
416 peerAddFwdServer(&ps
->servers
, NULL
, DIRECT
);
420 peerGetSomeParent(ps_state
* ps
)
423 request_t
*request
= ps
->request
;
424 hier_code code
= HIER_NONE
;
425 debug(44, 3) ("peerGetSomeParent: %s %s\n",
426 RequestMethodStr
[request
->method
],
428 if ((p
= getDefaultParent(request
))) {
429 code
= DEFAULT_PARENT
;
430 } else if ((p
= getRoundRobinParent(request
))) {
431 code
= ROUNDROBIN_PARENT
;
432 } else if ((p
= getFirstUpParent(request
))) {
433 code
= FIRSTUP_PARENT
;
434 } else if ((p
= getAnyParent(request
))) {
435 code
= ANY_OLD_PARENT
;
437 if (code
!= HIER_NONE
) {
438 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
439 peerAddFwdServer(&ps
->servers
, p
, code
);
444 peerPingTimeout(void *data
)
446 ps_state
*psstate
= data
;
447 StoreEntry
*entry
= psstate
->entry
;
449 debug(44, 3) ("peerPingTimeout: '%s'\n", storeUrl(entry
));
450 entry
->ping_status
= PING_TIMEOUT
;
451 if (!cbdataValid(psstate
->callback_data
)) {
452 /* request aborted */
453 cbdataUnlock(psstate
->callback_data
);
454 peerSelectStateFree(psstate
);
457 PeerStats
.timeouts
++;
458 psstate
->ping
.timedout
= 1;
459 peerSelectFoo(psstate
);
465 memset(&PeerStats
, '\0', sizeof(PeerStats
));
466 assert(sizeof(hier_strings
) == (HIER_MAX
+ 1) * sizeof(char *));
470 peerIcpParentMiss(peer
* p
, icp_common_t
* header
, ps_state
* ps
)
474 if (Config
.onoff
.query_icmp
) {
475 if (header
->flags
& ICP_FLAG_SRC_RTT
) {
476 rtt
= header
->pad
& 0xFFFF;
477 hops
= (header
->pad
>> 16) & 0xFFFF;
478 if (rtt
> 0 && rtt
< 0xFFFF)
479 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
480 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
481 ps
->closest_parent_miss
= p
->in_addr
;
482 ps
->ping
.p_rtt
= rtt
;
486 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
487 if (p
->options
.closest_only
)
489 /* set FIRST_MISS if there is no CLOSEST parent */
490 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
492 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
493 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
494 ps
->first_parent_miss
= p
->in_addr
;
495 ps
->ping
.w_rtt
= rtt
;
500 peerHandleIcpReply(peer
* p
, peer_t type
, icp_common_t
* header
, void *data
)
502 ps_state
*psstate
= data
;
503 icp_opcode op
= header
->opcode
;
504 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
506 storeUrl(psstate
->entry
));
507 #if USE_CACHE_DIGESTS && 0
508 /* do cd lookup to count false misses */
510 peerNoteDigestLookup(request
, p
,
511 peerDigestLookup(p
, request
, psstate
->entry
));
513 psstate
->ping
.n_recv
++;
514 if (op
== ICP_MISS
|| op
== ICP_DECHO
) {
515 if (type
== PEER_PARENT
)
516 peerIcpParentMiss(p
, header
, psstate
);
517 } else if (op
== ICP_HIT
) {
519 psstate
->hit_type
= type
;
520 peerSelectFoo(psstate
);
523 #if ALLOW_SOURCE_PING
524 else if (op
== ICP_SECHO
) {
526 peerSelectFoo(psstate
);
530 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
532 peerSelectFoo(psstate
);
537 peerHandleHtcpReply(peer
* p
, peer_t type
, htcpReplyData
* htcp
, void *data
)
539 ps_state
*psstate
= data
;
540 request_t
*request
= psstate
->request
;
541 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
542 htcp
->hit
? "HIT" : "MISS",
543 storeUrl(psstate
->entry
));
544 psstate
->ping
.n_recv
++;
547 psstate
->hit_type
= type
;
548 peerSelectFoo(psstate
);
551 if (type
== PEER_PARENT
)
552 peerHtcpParentMiss(p
, htcp
, psstate
);
553 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
555 peerSelectFoo(psstate
);
559 peerHtcpParentMiss(peer
* p
, htcpReplyData
* htcp
, ps_state
* ps
)
563 if (Config
.onoff
.query_icmp
) {
564 if (htcp
->cto
.rtt
> 0) {
565 rtt
= (int) htcp
->cto
.rtt
* 1000;
566 hops
= (int) htcp
->cto
.hops
* 1000;
567 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
568 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
569 ps
->closest_parent_miss
= p
->in_addr
;
570 ps
->ping
.p_rtt
= rtt
;
574 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
575 if (p
->options
.closest_only
)
577 /* set FIRST_MISS if there is no CLOSEST parent */
578 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
580 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
581 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
582 ps
->first_parent_miss
= p
->in_addr
;
583 ps
->ping
.w_rtt
= rtt
;
589 peerHandlePingReply(peer
* p
, peer_t type
, protocol_t proto
, void *pingdata
, void *data
)
591 if (proto
== PROTO_ICP
)
592 peerHandleIcpReply(p
, type
, pingdata
, data
);
594 else if (proto
== PROTO_HTCP
)
595 peerHandleHtcpReply(p
, type
, pingdata
, data
);
598 debug(44, 1) ("peerHandlePingReply: unknown protocol_t %d\n", (int) proto
);
602 peerAddFwdServer(FwdServer
** FS
, peer
* p
, hier_code code
)
604 FwdServer
*fs
= memAllocate(MEM_FWD_SERVER
);
605 debug(44, 5) ("peerAddFwdServer: adding %s %s\n",
606 p
? p
->host
: "DIRECT",
610 cbdataLock(fs
->peer
);