3 * $Id: peer_select.cc,v 1.88 1998/10/13 23:33:35 wessels Exp $
5 * DEBUG: section 44 Peer Selection Algorithm
6 * AUTHOR: Duane Wessels
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by the
14 * National Science Foundation. Squid is Copyrighted (C) 1998 by
15 * Duane Wessels and the University of California San Diego. Please
16 * see the COPYRIGHT file for full details. Squid incorporates
17 * software developed and/or copyrighted by other sources. Please see
18 * the CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
38 const char *hier_strings
[] =
49 "CLOSEST_PARENT_MISS",
57 "NO_CACHE_DIGEST_DIRECT",
70 static char *DirectStr
[] =
77 static void peerSelectFoo(ps_state
*);
78 static void peerPingTimeout(void *data
);
79 static void peerSelectCallbackFail(ps_state
* psstate
);
80 static IRCB peerHandlePingReply
;
81 static void peerSelectStateFree(ps_state
* psstate
);
82 static void peerIcpParentMiss(peer
*, icp_common_t
*, ps_state
*);
84 static void peerHtcpParentMiss(peer
*, htcpReplyData
*, ps_state
*);
85 static void peerHandleHtcpReply(peer
*, peer_t
, htcpReplyData
*, void *);
87 static int peerCheckNetdbDirect(ps_state
* psstate
);
90 peerSelectStateFree(ps_state
* psstate
)
92 if (psstate
->acl_checklist
) {
93 debug(44, 1) ("calling aclChecklistFree() from peerSelectStateFree\n");
94 aclChecklistFree(psstate
->acl_checklist
);
96 requestUnlink(psstate
->request
);
97 psstate
->request
= NULL
;
99 assert(psstate
->entry
->ping_status
!= PING_WAITING
);
100 storeUnlockObject(psstate
->entry
);
101 psstate
->entry
= NULL
;
107 peerSelectIcpPing(request_t
* request
, int direct
, StoreEntry
* entry
)
112 debug(44, 3) ("peerSelectIcpPing: %s\n", storeUrl(entry
));
113 if (entry
->ping_status
!= PING_NONE
)
115 assert(direct
!= DIRECT_YES
);
116 if (!request
->flags
.hierarchical
&& direct
!= DIRECT_NO
)
118 if (EBIT_TEST(entry
->flags
, KEY_PRIVATE
) && !neighbors_do_private_keys
)
119 if (direct
!= DIRECT_NO
)
121 n
= neighborsCount(request
);
122 debug(44, 3) ("peerSelectIcpPing: counted %d neighbors\n", n
);
128 peerGetSomeParent(request_t
* request
, hier_code
* code
)
131 debug(44, 3) ("peerGetSomeParent: %s %s\n",
132 RequestMethodStr
[request
->method
],
134 if ((p
= getDefaultParent(request
))) {
135 *code
= DEFAULT_PARENT
;
138 if ((p
= getRoundRobinParent(request
))) {
139 *code
= ROUNDROBIN_PARENT
;
142 if ((p
= getFirstUpParent(request
))) {
143 *code
= FIRSTUP_PARENT
;
146 if ((p
= getAnyParent(request
))) {
147 *code
= ANY_OLD_PARENT
;
154 peerSelect(request_t
* request
,
160 ps_state
*psstate
= xcalloc(1, sizeof(ps_state
));
162 debug(44, 3) ("peerSelect: %s\n", storeUrl(entry
));
164 debug(44, 3) ("peerSelect: %s\n", RequestMethodStr
[request
->method
]);
165 cbdataAdd(psstate
, MEM_NONE
);
166 psstate
->request
= requestLink(request
);
167 psstate
->entry
= entry
;
168 psstate
->callback
= callback
;
169 psstate
->fail_callback
= fail_callback
;
170 psstate
->callback_data
= callback_data
;
171 #if USE_CACHE_DIGESTS
172 request
->hier
.peer_select_start
= current_time
;
175 storeLockObject(psstate
->entry
);
176 cbdataLock(callback_data
);
177 peerSelectFoo(psstate
);
181 peerCheckNeverDirectDone(int answer
, void *data
)
183 ps_state
*psstate
= data
;
184 psstate
->acl_checklist
= NULL
;
185 debug(44, 3) ("peerCheckNeverDirectDone: %d\n", answer
);
186 psstate
->never_direct
= answer
? 1 : -1;
187 peerSelectFoo(psstate
);
191 peerCheckAlwaysDirectDone(int answer
, void *data
)
193 ps_state
*psstate
= data
;
194 psstate
->acl_checklist
= NULL
;
195 debug(44, 3) ("peerCheckAlwaysDirectDone: %d\n", answer
);
196 psstate
->always_direct
= answer
? 1 : -1;
197 peerSelectFoo(psstate
);
201 peerSelectCallback(ps_state
* psstate
, peer
* p
)
203 StoreEntry
*entry
= psstate
->entry
;
204 void *data
= psstate
->callback_data
;
206 debug(44, 3) ("peerSelectCallback: %s\n", storeUrl(entry
));
207 if (entry
->ping_status
== PING_WAITING
)
208 eventDelete(peerPingTimeout
, psstate
);
209 entry
->ping_status
= PING_DONE
;
211 psstate
->ping
.stop
= current_time
;
212 if (cbdataValid(data
))
213 psstate
->callback(p
, data
);
215 peerSelectStateFree(psstate
);
219 peerSelectCallbackFail(ps_state
* psstate
)
221 request_t
*request
= psstate
->request
;
222 void *data
= psstate
->callback_data
;
223 const char *url
= psstate
->entry
? storeUrl(psstate
->entry
) : urlCanonical(request
);
225 psstate
->entry
->ping_status
= PING_DONE
;
226 debug(44, 1) ("Failed to select source for '%s'\n", url
);
227 debug(44, 1) (" always_direct = %d\n", psstate
->always_direct
);
228 debug(44, 1) (" never_direct = %d\n", psstate
->never_direct
);
229 debug(44, 1) (" timedout = %d\n", psstate
->ping
.timedout
);
230 if (cbdataValid(data
))
231 psstate
->fail_callback(NULL
, data
);
233 peerSelectStateFree(psstate
);
237 peerCheckNetdbDirect(ps_state
* psstate
)
239 peer
*p
= whichPeer(&psstate
->closest_parent_miss
);
244 myrtt
= netdbHostRtt(psstate
->request
->host
);
245 debug(44, 3) ("peerCheckNetdbDirect: MY RTT = %d msec\n", myrtt
);
246 debug(44, 3) ("peerCheckNetdbDirect: closest_parent_miss RTT = %d msec\n",
247 psstate
->ping
.p_rtt
);
248 if (myrtt
&& myrtt
< psstate
->ping
.p_rtt
)
250 myhops
= netdbHostHops(psstate
->request
->host
);
251 debug(44, 3) ("peerCheckNetdbDirect: MY hops = %d\n", myhops
);
252 debug(44, 3) ("peerCheckNetdbDirect: minimum_direct_hops = %d\n",
253 Config
.minDirectHops
);
254 if (myhops
&& myhops
<= Config
.minDirectHops
)
260 peerSelectFoo(ps_state
* psstate
)
264 StoreEntry
*entry
= psstate
->entry
;
265 request_t
*request
= psstate
->request
;
267 debug(44, 3) ("peerSelectFoo: '%s %s'\n",
268 RequestMethodStr
[request
->method
],
270 if (psstate
->always_direct
== 0 && Config
.accessList
.AlwaysDirect
) {
271 psstate
->acl_checklist
= aclChecklistCreate(
272 Config
.accessList
.AlwaysDirect
,
274 request
->client_addr
,
275 NULL
, /* user agent */
277 aclNBCheck(psstate
->acl_checklist
,
278 peerCheckAlwaysDirectDone
,
281 } else if (psstate
->always_direct
> 0) {
283 } else if (psstate
->never_direct
== 0 && Config
.accessList
.NeverDirect
) {
284 psstate
->acl_checklist
= aclChecklistCreate(
285 Config
.accessList
.NeverDirect
,
287 request
->client_addr
,
288 NULL
, /* user agent */
290 aclNBCheck(psstate
->acl_checklist
,
291 peerCheckNeverDirectDone
,
294 } else if (psstate
->never_direct
> 0) {
296 } else if (request
->flags
.loopdetect
) {
299 direct
= DIRECT_MAYBE
;
301 debug(44, 3) ("peerSelectFoo: direct = %s\n", DirectStr
[direct
]);
302 if (direct
== DIRECT_YES
) {
303 debug(44, 3) ("peerSelectFoo: DIRECT\n");
304 hierarchyNote(&request
->hier
, DIRECT
, &psstate
->ping
, request
->host
);
305 peerSelectCallback(psstate
, NULL
);
308 if ((p
= getSingleParent(request
))) {
309 psstate
->single_parent
= p
->in_addr
;
310 debug(44, 3) ("peerSelect: found single parent, skipping ICP query\n");
312 if (!request
->flags
.hierarchical
&& direct
!= DIRECT_NO
) {
313 debug(44, 3) ("peerSelectFoo: DIRECT for non-hierarchical request\n");
314 hierarchyNote(&request
->hier
, DIRECT
, &psstate
->ping
, request
->host
);
315 peerSelectCallback(psstate
, NULL
);
318 #if USE_CACHE_DIGESTS
319 else if ((p
= neighborsDigestSelect(request
, entry
))) {
320 debug(44, 2) ("peerSelect: Using Cache Digest\n");
321 request
->hier
.alg
= PEER_SA_DIGEST
;
322 code
= CACHE_DIGEST_HIT
;
323 debug(44, 2) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
324 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, p
->host
);
325 peerSelectCallback(psstate
, p
);
330 else if ((p
= carpSelectParent(request
))) {
331 hierarchyNote(&request
->hier
, CARP
, &psstate
->ping
, p
->host
);
332 peerSelectCallback(psstate
, p
);
336 else if ((p
= netdbClosestParent(request
))) {
337 request
->hier
.alg
= PEER_SA_NETDB
;
338 code
= CLOSEST_PARENT
;
339 debug(44, 2) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
340 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, p
->host
);
341 peerSelectCallback(psstate
, p
);
343 } else if (peerSelectIcpPing(request
, direct
, entry
)) {
344 assert(entry
->ping_status
== PING_NONE
);
345 request
->hier
.alg
= PEER_SA_ICP
;
346 debug(44, 3) ("peerSelect: Doing ICP pings\n");
347 psstate
->ping
.start
= current_time
;
348 psstate
->ping
.n_sent
= neighborsUdpPing(request
,
352 &psstate
->ping
.n_replies_expected
,
353 &psstate
->ping
.timeout
);
354 if (psstate
->ping
.n_sent
== 0)
355 debug(44, 0) ("WARNING: neighborsUdpPing returned 0\n");
356 debug(44, 3) ("peerSelectFoo: %d ICP replies expected, RTT %d msec\n",
357 psstate
->ping
.n_replies_expected
, psstate
->ping
.timeout
);
358 if (psstate
->ping
.n_replies_expected
> 0) {
359 entry
->ping_status
= PING_WAITING
;
360 eventAdd("peerPingTimeout",
363 0.001 * psstate
->ping
.timeout
,
368 debug(44, 3) ("peerSelectFoo: After peerSelectIcpPing.\n");
369 if (peerCheckNetdbDirect(psstate
)) {
370 code
= CLOSEST_DIRECT
;
371 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], request
->host
);
372 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, request
->host
);
373 peerSelectCallback(psstate
, NULL
);
374 } else if ((p
= whichPeer(&psstate
->closest_parent_miss
))) {
375 code
= CLOSEST_PARENT_MISS
;
376 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
377 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, p
->host
);
378 peerSelectCallback(psstate
, p
);
379 } else if ((p
= whichPeer(&psstate
->first_parent_miss
))) {
380 code
= FIRST_PARENT_MISS
;
381 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
382 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, p
->host
);
383 peerSelectCallback(psstate
, p
);
384 } else if ((p
= whichPeer(&psstate
->single_parent
))) {
385 code
= SINGLE_PARENT
;
386 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
387 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, p
->host
);
388 peerSelectCallback(psstate
, p
);
389 } else if (direct
!= DIRECT_NO
) {
391 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], request
->host
);
392 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, request
->host
);
393 peerSelectCallback(psstate
, NULL
);
394 } else if ((p
= peerGetSomeParent(request
, &code
))) {
395 debug(44, 3) ("peerSelect: %s/%s\n", hier_strings
[code
], p
->host
);
396 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, p
->host
);
397 peerSelectCallback(psstate
, p
);
399 code
= NO_DIRECT_FAIL
;
400 hierarchyNote(&request
->hier
, code
, &psstate
->ping
, NULL
);
401 peerSelectCallbackFail(psstate
);
406 peerPingTimeout(void *data
)
408 ps_state
*psstate
= data
;
409 StoreEntry
*entry
= psstate
->entry
;
411 debug(44, 3) ("peerPingTimeout: '%s'\n", storeUrl(entry
));
412 entry
->ping_status
= PING_TIMEOUT
;
413 if (!cbdataValid(psstate
->callback_data
)) {
414 /* request aborted */
415 cbdataUnlock(psstate
->callback_data
);
416 peerSelectStateFree(psstate
);
419 PeerStats
.timeouts
++;
420 psstate
->ping
.timedout
= 1;
421 peerSelectFoo(psstate
);
427 memset(&PeerStats
, '\0', sizeof(PeerStats
));
428 assert(sizeof(hier_strings
) == (HIER_MAX
+ 1) * sizeof(char *));
432 peerIcpParentMiss(peer
* p
, icp_common_t
* header
, ps_state
* ps
)
436 if (Config
.onoff
.query_icmp
) {
437 if (header
->flags
& ICP_FLAG_SRC_RTT
) {
438 rtt
= header
->pad
& 0xFFFF;
439 hops
= (header
->pad
>> 16) & 0xFFFF;
440 if (rtt
> 0 && rtt
< 0xFFFF)
441 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
442 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
443 ps
->closest_parent_miss
= p
->in_addr
;
444 ps
->ping
.p_rtt
= rtt
;
448 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
449 if (p
->options
.closest_only
)
451 /* set FIRST_MISS if there is no CLOSEST parent */
452 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
454 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
455 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
456 ps
->first_parent_miss
= p
->in_addr
;
457 ps
->ping
.w_rtt
= rtt
;
462 peerHandleIcpReply(peer
* p
, peer_t type
, icp_common_t
* header
, void *data
)
464 ps_state
*psstate
= data
;
465 icp_opcode op
= header
->opcode
;
466 request_t
*request
= psstate
->request
;
467 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
469 storeUrl(psstate
->entry
));
470 #if USE_CACHE_DIGESTS && 0
471 /* do cd lookup to count false misses */
473 peerNoteDigestLookup(request
, p
,
474 peerDigestLookup(p
, request
, psstate
->entry
));
476 psstate
->ping
.n_recv
++;
477 if (op
== ICP_MISS
|| op
== ICP_DECHO
) {
478 if (type
== PEER_PARENT
)
479 peerIcpParentMiss(p
, header
, psstate
);
480 } else if (op
== ICP_HIT
) {
481 hierarchyNote(&request
->hier
,
482 type
== PEER_PARENT
? PARENT_HIT
: SIBLING_HIT
,
485 peerSelectCallback(psstate
, p
);
487 } else if (op
== ICP_SECHO
) {
488 hierarchyNote(&request
->hier
,
492 peerSelectCallback(psstate
, NULL
);
495 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
497 peerSelectFoo(psstate
);
502 peerHandleHtcpReply(peer
* p
, peer_t type
, htcpReplyData
* htcp
, void *data
)
504 ps_state
*psstate
= data
;
505 request_t
*request
= psstate
->request
;
506 debug(44, 3) ("peerHandleIcpReply: %s %s\n",
507 htcp
->hit
? "HIT" : "MISS",
508 storeUrl(psstate
->entry
));
509 psstate
->ping
.n_recv
++;
511 hierarchyNote(&request
->hier
,
512 type
== PEER_PARENT
? PARENT_HIT
: SIBLING_HIT
,
515 peerSelectCallback(psstate
, p
);
518 if (type
== PEER_PARENT
)
519 peerHtcpParentMiss(p
, htcp
, psstate
);
520 if (psstate
->ping
.n_recv
< psstate
->ping
.n_replies_expected
)
522 peerSelectFoo(psstate
);
526 peerHtcpParentMiss(peer
* p
, htcpReplyData
* htcp
, ps_state
* ps
)
530 if (Config
.onoff
.query_icmp
) {
531 if (htcp
->cto
.rtt
> 0) {
532 rtt
= (int) htcp
->cto
.rtt
* 1000;
533 hops
= (int) htcp
->cto
.hops
* 1000;
534 netdbUpdatePeer(ps
->request
, p
, rtt
, hops
);
535 if (rtt
&& (ps
->ping
.p_rtt
== 0 || rtt
< ps
->ping
.p_rtt
)) {
536 ps
->closest_parent_miss
= p
->in_addr
;
537 ps
->ping
.p_rtt
= rtt
;
541 /* if closest-only is set, then don't allow FIRST_PARENT_MISS */
542 if (p
->options
.closest_only
)
544 /* set FIRST_MISS if there is no CLOSEST parent */
545 if (ps
->closest_parent_miss
.sin_addr
.s_addr
!= any_addr
.s_addr
)
547 rtt
= tvSubMsec(ps
->ping
.start
, current_time
) / p
->weight
;
548 if (ps
->ping
.w_rtt
== 0 || rtt
< ps
->ping
.w_rtt
) {
549 ps
->first_parent_miss
= p
->in_addr
;
550 ps
->ping
.w_rtt
= rtt
;
556 peerHandlePingReply(peer
* p
, peer_t type
, protocol_t proto
, void *pingdata
, void *data
)
558 if (proto
== PROTO_ICP
)
559 peerHandleIcpReply(p
, type
, pingdata
, data
);
561 else if (proto
== PROTO_HTCP
)
562 peerHandleHtcpReply(p
, type
, pingdata
, data
);
565 debug(44, 1) ("peerHandlePingReply: unknown protocol_t %d\n", (int) proto
);