]> git.ipfire.org Git - thirdparty/squid.git/blame - src/comm_poll.cc
Document the 'carp' cache_peer option
[thirdparty/squid.git] / src / comm_poll.cc
CommitLineData
1b3db6d9 1
2/*
528b2c61 3 * $Id: comm_poll.cc,v 1.9 2003/01/23 00:37:19 robertc Exp $
1b3db6d9 4 *
5 * DEBUG: section 5 Socket Functions
6 *
7 * SQUID Web Proxy Cache http://www.squid-cache.org/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from
11 * the Internet community; see the CONTRIBUTORS file for full
12 * details. Many organizations have provided support for Squid's
13 * development; see the SPONSORS file for full details. Squid is
14 * Copyrighted (C) 2001 by the Regents of the University of
15 * California; see the COPYRIGHT file for full details. Squid
16 * incorporates software developed and/or copyrighted by other
17 * sources; see the CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
35#include "squid.h"
e6ccf245 36#include "Store.h"
528b2c61 37#include "fde.h"
1b3db6d9 38
39#ifdef USE_POLL
40
41static int MAX_POLL_TIME = 1000; /* see also comm_quick_poll_required() */
42
43#ifndef howmany
44#define howmany(x, y) (((x)+((y)-1))/(y))
45#endif
46#ifndef NBBY
47#define NBBY 8
48#endif
49#define FD_MASK_BYTES sizeof(fd_mask)
50#define FD_MASK_BITS (FD_MASK_BYTES*NBBY)
51
52/* STATIC */
53static int fdIsHttp(int fd);
54static int fdIsIcp(int fd);
55static int fdIsDns(int fd);
56static OBJH commIncomingStats;
57static int comm_check_incoming_poll_handlers(int nfds, int *fds);
58static void comm_poll_dns_incoming(void);
59static void commUpdateReadBits(int fd, PF * handler);
60static void commUpdateWriteBits(int fd, PF * handler);
61
62static fd_set global_readfds;
63static fd_set global_writefds;
64static int nreadfds;
65static int nwritefds;
66
67/*
68 * Automatic tuning for incoming requests:
69 *
70 * INCOMING sockets are the ICP and HTTP ports. We need to check these
71 * fairly regularly, but how often? When the load increases, we
72 * want to check the incoming sockets more often. If we have a lot
73 * of incoming ICP, then we need to check these sockets more than
74 * if we just have HTTP.
75 *
76 * The variables 'incoming_icp_interval' and 'incoming_http_interval'
77 * determine how many normal I/O events to process before checking
78 * incoming sockets again. Note we store the incoming_interval
79 * multipled by a factor of (2^INCOMING_FACTOR) to have some
80 * pseudo-floating point precision.
81 *
82 * The variable 'icp_io_events' and 'http_io_events' counts how many normal
83 * I/O events have been processed since the last check on the incoming
84 * sockets. When io_events > incoming_interval, its time to check incoming
85 * sockets.
86 *
87 * Every time we check incoming sockets, we count how many new messages
88 * or connections were processed. This is used to adjust the
89 * incoming_interval for the next iteration. The new incoming_interval
90 * is calculated as the current incoming_interval plus what we would
91 * like to see as an average number of events minus the number of
92 * events just processed.
93 *
94 * incoming_interval = incoming_interval + target_average - number_of_events_processed
95 *
96 * There are separate incoming_interval counters for both HTTP and ICP events
97 *
98 * You can see the current values of the incoming_interval's, as well as
99 * a histogram of 'incoming_events' by asking the cache manager
100 * for 'comm_incoming', e.g.:
101 *
102 * % ./client mgr:comm_incoming
103 *
104 * Caveats:
105 *
106 * - We have MAX_INCOMING_INTEGER as a magic upper limit on
107 * incoming_interval for both types of sockets. At the
108 * largest value the cache will effectively be idling.
109 *
110 * - The higher the INCOMING_FACTOR, the slower the algorithm will
111 * respond to load spikes/increases/decreases in demand. A value
112 * between 3 and 8 is recommended.
113 */
114
115#define MAX_INCOMING_INTEGER 256
116#define INCOMING_FACTOR 5
117#define MAX_INCOMING_INTERVAL (MAX_INCOMING_INTEGER << INCOMING_FACTOR)
118static int icp_io_events = 0;
119static int dns_io_events = 0;
120static int http_io_events = 0;
121static int incoming_icp_interval = 16 << INCOMING_FACTOR;
122static int incoming_dns_interval = 16 << INCOMING_FACTOR;
123static int incoming_http_interval = 16 << INCOMING_FACTOR;
124#define commCheckICPIncoming (++icp_io_events > (incoming_icp_interval>> INCOMING_FACTOR))
125#define commCheckDNSIncoming (++dns_io_events > (incoming_dns_interval>> INCOMING_FACTOR))
126#define commCheckHTTPIncoming (++http_io_events > (incoming_http_interval>> INCOMING_FACTOR))
127
128
129void
130commSetSelect(int fd, unsigned int type, PF * handler, void *client_data,
fa80a8ef 131 time_t timeout)
1b3db6d9 132{
133 fde *F = &fd_table[fd];
134 assert(fd >= 0);
135 assert(F->flags.open);
136 debug(5, 5) ("commSetSelect: FD %d type %d\n", fd, type);
137 if (type & COMM_SELECT_READ) {
fa80a8ef 138 F->read_handler = handler;
139 F->read_data = client_data;
140 commUpdateReadBits(fd, handler);
1b3db6d9 141 }
142 if (type & COMM_SELECT_WRITE) {
fa80a8ef 143 F->write_handler = handler;
144 F->write_data = client_data;
145 commUpdateWriteBits(fd, handler);
1b3db6d9 146 }
147 if (timeout)
fa80a8ef 148 F->timeout = squid_curtime + timeout;
1b3db6d9 149}
150
151static int
152fdIsIcp(int fd)
153{
154 if (fd == theInIcpConnection)
155 return 1;
156 if (fd == theOutIcpConnection)
157 return 1;
158 return 0;
159}
160
161static int
162fdIsDns(int fd)
163{
164 if (fd == DnsSocket)
165 return 1;
166 return 0;
167}
168
169static int
170fdIsHttp(int fd)
171{
172 int j;
173 for (j = 0; j < NHttpSockets; j++) {
174 if (fd == HttpSockets[j])
175 return 1;
176 }
177 return 0;
178}
179
180#if DELAY_POOLS
181static int slowfdcnt = 0;
182static int slowfdarr[SQUID_MAXFD];
183
184static void
185commAddSlowFd(int fd)
186{
187 assert(slowfdcnt < SQUID_MAXFD);
188 slowfdarr[slowfdcnt++] = fd;
189}
190
191static int
192commGetSlowFd(void)
193{
194 int whichfd, retfd;
195
196 if (!slowfdcnt)
197 return -1;
198 whichfd = squid_random() % slowfdcnt;
199 retfd = slowfdarr[whichfd];
200 slowfdarr[whichfd] = slowfdarr[--slowfdcnt];
201 return retfd;
202}
203#endif
204
205static int
206comm_check_incoming_poll_handlers(int nfds, int *fds)
207{
208 int i;
209 int fd;
210 PF *hdl = NULL;
211 int npfds;
212 struct pollfd pfds[3 + MAXHTTPPORTS];
88bfe092 213 PROF_start(comm_check_incoming);
1b3db6d9 214 incoming_sockets_accepted = 0;
215 for (i = npfds = 0; i < nfds; i++) {
216 int events;
217 fd = fds[i];
218 events = 0;
219 if (fd_table[fd].read_handler)
220 events |= POLLRDNORM;
221 if (fd_table[fd].write_handler)
222 events |= POLLWRNORM;
223 if (events) {
224 pfds[npfds].fd = fd;
225 pfds[npfds].events = events;
226 pfds[npfds].revents = 0;
227 npfds++;
228 }
229 }
88bfe092 230 if (!nfds) {
231 PROF_stop(comm_check_incoming);
1b3db6d9 232 return -1;
88bfe092 233 }
1b3db6d9 234 getCurrentTime();
1b3db6d9 235 statCounter.syscalls.polls++;
88bfe092 236 if (poll(pfds, npfds, 0) < 1) {
237 PROF_stop(comm_check_incoming);
1b3db6d9 238 return incoming_sockets_accepted;
88bfe092 239 }
1b3db6d9 240 for (i = 0; i < npfds; i++) {
241 int revents;
242 if (((revents = pfds[i].revents) == 0) || ((fd = pfds[i].fd) == -1))
243 continue;
244 if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) {
245 if ((hdl = fd_table[fd].read_handler)) {
246 fd_table[fd].read_handler = NULL;
247 hdl(fd, fd_table[fd].read_data);
248 } else if (pfds[i].events & POLLRDNORM)
249 debug(5, 1) ("comm_poll_incoming: FD %d NULL read handler\n",
250 fd);
251 }
252 if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) {
253 if ((hdl = fd_table[fd].write_handler)) {
254 fd_table[fd].write_handler = NULL;
255 hdl(fd, fd_table[fd].write_data);
256 } else if (pfds[i].events & POLLWRNORM)
257 debug(5, 1) ("comm_poll_incoming: FD %d NULL write_handler\n",
258 fd);
259 }
260 }
88bfe092 261 PROF_stop(comm_check_incoming);
1b3db6d9 262 return incoming_sockets_accepted;
263}
264
265static void
266comm_poll_icp_incoming(void)
267{
268 int nfds = 0;
269 int fds[2];
270 int nevents;
271 icp_io_events = 0;
272 if (theInIcpConnection >= 0)
273 fds[nfds++] = theInIcpConnection;
274 if (theInIcpConnection != theOutIcpConnection)
275 if (theOutIcpConnection >= 0)
276 fds[nfds++] = theOutIcpConnection;
277 if (nfds == 0)
278 return;
279 nevents = comm_check_incoming_poll_handlers(nfds, fds);
280 incoming_icp_interval += Config.comm_incoming.icp_average - nevents;
281 if (incoming_icp_interval < Config.comm_incoming.icp_min_poll)
282 incoming_icp_interval = Config.comm_incoming.icp_min_poll;
283 if (incoming_icp_interval > MAX_INCOMING_INTERVAL)
284 incoming_icp_interval = MAX_INCOMING_INTERVAL;
285 if (nevents > INCOMING_ICP_MAX)
286 nevents = INCOMING_ICP_MAX;
287 statHistCount(&statCounter.comm_icp_incoming, nevents);
288}
289
290static void
291comm_poll_http_incoming(void)
292{
293 int nfds = 0;
294 int fds[MAXHTTPPORTS];
295 int j;
296 int nevents;
297 http_io_events = 0;
298 for (j = 0; j < NHttpSockets; j++) {
299 if (HttpSockets[j] < 0)
300 continue;
301 if (commDeferRead(HttpSockets[j]))
302 continue;
303 fds[nfds++] = HttpSockets[j];
304 }
305 nevents = comm_check_incoming_poll_handlers(nfds, fds);
306 incoming_http_interval = incoming_http_interval
307 + Config.comm_incoming.http_average - nevents;
308 if (incoming_http_interval < Config.comm_incoming.http_min_poll)
309 incoming_http_interval = Config.comm_incoming.http_min_poll;
310 if (incoming_http_interval > MAX_INCOMING_INTERVAL)
311 incoming_http_interval = MAX_INCOMING_INTERVAL;
312 if (nevents > INCOMING_HTTP_MAX)
313 nevents = INCOMING_HTTP_MAX;
314 statHistCount(&statCounter.comm_http_incoming, nevents);
315}
316
317/* poll all sockets; call handlers for those that are ready. */
3d7e9d7c 318comm_err_t
1b3db6d9 319comm_select(int msec)
320{
321 struct pollfd pfds[SQUID_MAXFD];
322#if DELAY_POOLS
323 fd_set slowfds;
324#endif
325 PF *hdl = NULL;
326 int fd;
1b3db6d9 327 int maxfd;
328 unsigned long nfds;
329 unsigned long npending;
330 int num;
331 int callicp = 0, callhttp = 0;
332 int calldns = 0;
333 static time_t last_timeout = 0;
334 double timeout = current_dtime + (msec / 1000.0);
335 do {
1b3db6d9 336 double start;
337 getCurrentTime();
338 start = current_dtime;
1b3db6d9 339#if DELAY_POOLS
340 FD_ZERO(&slowfds);
341#endif
342 if (commCheckICPIncoming)
343 comm_poll_icp_incoming();
344 if (commCheckDNSIncoming)
345 comm_poll_dns_incoming();
346 if (commCheckHTTPIncoming)
347 comm_poll_http_incoming();
88bfe092 348 PROF_start(comm_poll_prep_pfds);
1b3db6d9 349 callicp = calldns = callhttp = 0;
350 nfds = 0;
351 npending = 0;
352 maxfd = Biggest_FD + 1;
e6ccf245 353 for (int i = 0; i < maxfd; i++) {
1b3db6d9 354 int events;
355 events = 0;
356 /* Check each open socket for a handler. */
357 if (fd_table[i].read_handler) {
358 switch (commDeferRead(i)) {
359 case 0:
360 events |= POLLRDNORM;
361 break;
362 case 1:
363 break;
364#if DELAY_POOLS
365 case -1:
366 events |= POLLRDNORM;
367 FD_SET(i, &slowfds);
368 break;
369#endif
370 default:
371 fatalf("bad return value from commDeferRead(FD %d)\n", i);
372 }
373 }
374 if (fd_table[i].write_handler)
375 events |= POLLWRNORM;
376 if (events) {
377 pfds[nfds].fd = i;
378 pfds[nfds].events = events;
379 pfds[nfds].revents = 0;
380 nfds++;
381 if ((events & POLLRDNORM) && fd_table[i].flags.read_pending)
382 npending++;
383 }
384 }
88bfe092 385 PROF_stop(comm_poll_prep_pfds);
1b3db6d9 386 if (nfds == 0) {
387 assert(shutting_down);
388 return COMM_SHUTDOWN;
389 }
390 if (npending)
391 msec = 0;
392 if (msec > MAX_POLL_TIME)
393 msec = MAX_POLL_TIME;
394 for (;;) {
88bfe092 395 PROF_start(comm_poll_normal);
1b3db6d9 396 statCounter.syscalls.polls++;
397 num = poll(pfds, nfds, msec);
398 statCounter.select_loops++;
88bfe092 399 PROF_stop(comm_poll_normal);
1b3db6d9 400 if (num >= 0 || npending >= 0)
401 break;
402 if (ignoreErrno(errno))
403 continue;
404 debug(5, 0) ("comm_poll: poll failure: %s\n", xstrerror());
405 assert(errno != EINVAL);
406 return COMM_ERROR;
407 /* NOTREACHED */
408 }
409 debug(5, num ? 5 : 8) ("comm_poll: %d+%ld FDs ready\n", num, npending);
410 statHistCount(&statCounter.select_fds_hist, num);
411 /* Check timeout handlers ONCE each second. */
412 if (squid_curtime > last_timeout) {
413 last_timeout = squid_curtime;
414 checkTimeouts();
415 }
416 if (num == 0 && npending == 0)
417 continue;
418 /* scan each socket but the accept socket. Poll this
419 * more frequently to minimize losses due to the 5 connect
420 * limit in SunOS */
88bfe092 421 PROF_start(comm_handle_ready_fd);
e6ccf245 422 for (size_t loopIndex = 0; loopIndex < nfds; loopIndex++) {
1b3db6d9 423 fde *F;
e6ccf245 424 int revents = pfds[loopIndex].revents;
425 fd = pfds[loopIndex].fd;
1b3db6d9 426 if (fd == -1)
427 continue;
428 if (fd_table[fd].flags.read_pending)
429 revents |= POLLIN;
430 if (revents == 0)
431 continue;
432 if (fdIsIcp(fd)) {
433 callicp = 1;
434 continue;
435 }
436 if (fdIsDns(fd)) {
437 calldns = 1;
438 continue;
439 }
440 if (fdIsHttp(fd)) {
441 callhttp = 1;
442 continue;
443 }
444 F = &fd_table[fd];
445 if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) {
446 debug(5, 6) ("comm_poll: FD %d ready for reading\n", fd);
447 if (NULL == (hdl = F->read_handler))
448 (void) 0;
449#if DELAY_POOLS
450 else if (FD_ISSET(fd, &slowfds))
451 commAddSlowFd(fd);
452#endif
453 else {
88bfe092 454 PROF_start(comm_read_handler);
1b3db6d9 455 F->read_handler = NULL;
456 hdl(fd, F->read_data);
88bfe092 457 PROF_stop(comm_read_handler);
1b3db6d9 458 statCounter.select_fds++;
459 if (commCheckICPIncoming)
460 comm_poll_icp_incoming();
461 if (commCheckDNSIncoming)
462 comm_poll_dns_incoming();
463 if (commCheckHTTPIncoming)
464 comm_poll_http_incoming();
465 }
466 }
467 if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) {
468 debug(5, 5) ("comm_poll: FD %d ready for writing\n", fd);
469 if ((hdl = F->write_handler)) {
88bfe092 470 PROF_start(comm_write_handler);
1b3db6d9 471 F->write_handler = NULL;
472 hdl(fd, F->write_data);
88bfe092 473 PROF_stop(comm_write_handler);
1b3db6d9 474 statCounter.select_fds++;
475 if (commCheckICPIncoming)
476 comm_poll_icp_incoming();
477 if (commCheckDNSIncoming)
478 comm_poll_dns_incoming();
479 if (commCheckHTTPIncoming)
480 comm_poll_http_incoming();
481 }
482 }
483 if (revents & POLLNVAL) {
484 close_handler *ch;
485 debug(5, 0) ("WARNING: FD %d has handlers, but it's invalid.\n", fd);
486 debug(5, 0) ("FD %d is a %s\n", fd, fdTypeStr[F->type]);
487 debug(5, 0) ("--> %s\n", F->desc);
488 debug(5, 0) ("tmout:%p read:%p write:%p\n",
489 F->timeout_handler,
490 F->read_handler,
491 F->write_handler);
29b8d8d6 492 for (ch = F->closeHandler; ch; ch = ch->next)
1b3db6d9 493 debug(5, 0) (" close handler: %p\n", ch->handler);
29b8d8d6 494 if (F->closeHandler) {
1b3db6d9 495 commCallCloseHandlers(fd);
496 } else if (F->timeout_handler) {
497 debug(5, 0) ("comm_poll: Calling Timeout Handler\n");
498 F->timeout_handler(fd, F->timeout_data);
499 }
29b8d8d6 500 F->closeHandler = NULL;
1b3db6d9 501 F->timeout_handler = NULL;
502 F->read_handler = NULL;
503 F->write_handler = NULL;
504 if (F->flags.open)
505 fd_close(fd);
506 }
507 }
88bfe092 508 PROF_stop(comm_handle_ready_fd);
1b3db6d9 509 if (callicp)
510 comm_poll_icp_incoming();
511 if (calldns)
512 comm_poll_dns_incoming();
513 if (callhttp)
514 comm_poll_http_incoming();
515#if DELAY_POOLS
516 while ((fd = commGetSlowFd()) != -1) {
517 fde *F = &fd_table[fd];
518 debug(5, 6) ("comm_select: slow FD %d selected for reading\n", fd);
519 if ((hdl = F->read_handler)) {
520 F->read_handler = NULL;
521 hdl(fd, F->read_data);
522 statCounter.select_fds++;
523 if (commCheckICPIncoming)
524 comm_poll_icp_incoming();
525 if (commCheckDNSIncoming)
526 comm_poll_dns_incoming();
527 if (commCheckHTTPIncoming)
528 comm_poll_http_incoming();
529 }
530 }
531#endif
1b3db6d9 532 getCurrentTime();
533 statCounter.select_time += (current_dtime - start);
1b3db6d9 534 return COMM_OK;
535 }
536 while (timeout > current_dtime);
537 debug(5, 8) ("comm_poll: time out: %ld.\n", (long int) squid_curtime);
538 return COMM_TIMEOUT;
539}
540
541
542static void
543comm_poll_dns_incoming(void)
544{
545 int nfds = 0;
546 int fds[2];
547 int nevents;
548 dns_io_events = 0;
549 if (DnsSocket < 0)
550 return;
551 fds[nfds++] = DnsSocket;
552 nevents = comm_check_incoming_poll_handlers(nfds, fds);
553 if (nevents < 0)
554 return;
555 incoming_dns_interval += Config.comm_incoming.dns_average - nevents;
556 if (incoming_dns_interval < Config.comm_incoming.dns_min_poll)
557 incoming_dns_interval = Config.comm_incoming.dns_min_poll;
558 if (incoming_dns_interval > MAX_INCOMING_INTERVAL)
559 incoming_dns_interval = MAX_INCOMING_INTERVAL;
560 if (nevents > INCOMING_DNS_MAX)
561 nevents = INCOMING_DNS_MAX;
562 statHistCount(&statCounter.comm_dns_incoming, nevents);
563}
564
565void
566comm_select_init(void)
567{
568 cachemgrRegister("comm_incoming",
569 "comm_incoming() stats",
570 commIncomingStats, 0, 1);
571 FD_ZERO(&global_readfds);
572 FD_ZERO(&global_writefds);
573 nreadfds = nwritefds = 0;
574}
575
576
577static void
578commIncomingStats(StoreEntry * sentry)
579{
580 StatCounters *f = &statCounter;
581 storeAppendPrintf(sentry, "Current incoming_icp_interval: %d\n",
582 incoming_icp_interval >> INCOMING_FACTOR);
583 storeAppendPrintf(sentry, "Current incoming_dns_interval: %d\n",
584 incoming_dns_interval >> INCOMING_FACTOR);
585 storeAppendPrintf(sentry, "Current incoming_http_interval: %d\n",
586 incoming_http_interval >> INCOMING_FACTOR);
587 storeAppendPrintf(sentry, "\n");
588 storeAppendPrintf(sentry, "Histogram of events per incoming socket type\n");
589 storeAppendPrintf(sentry, "ICP Messages handled per comm_poll_icp_incoming() call:\n");
590 statHistDump(&f->comm_icp_incoming, sentry, statHistIntDumper);
591 storeAppendPrintf(sentry, "DNS Messages handled per comm_poll_dns_incoming() call:\n");
592 statHistDump(&f->comm_dns_incoming, sentry, statHistIntDumper);
593 storeAppendPrintf(sentry, "HTTP Messages handled per comm_poll_http_incoming() call:\n");
594 statHistDump(&f->comm_http_incoming, sentry, statHistIntDumper);
595}
596
597void
598commUpdateReadBits(int fd, PF * handler)
599{
600 if (handler && !FD_ISSET(fd, &global_readfds)) {
601 FD_SET(fd, &global_readfds);
602 nreadfds++;
603 } else if (!handler && FD_ISSET(fd, &global_readfds)) {
604 FD_CLR(fd, &global_readfds);
605 nreadfds--;
606 }
607}
608
609void
610commUpdateWriteBits(int fd, PF * handler)
611{
612 if (handler && !FD_ISSET(fd, &global_writefds)) {
613 FD_SET(fd, &global_writefds);
614 nwritefds++;
615 } else if (!handler && FD_ISSET(fd, &global_writefds)) {
616 FD_CLR(fd, &global_writefds);
617 nwritefds--;
618 }
619}
620
621/* Called by async-io or diskd to speed up the polling */
622void
623comm_quick_poll_required(void)
624{
625 MAX_POLL_TIME = 10;
626}
627
628#endif /* USE_POLL */