2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 05 Socket Functions */
15 #include "anyp/PortCfg.h"
16 #include "comm/Connection.h"
17 #include "comm/Loops.h"
21 #include "mgr/Registration.h"
22 #include "SquidConfig.h"
23 #include "SquidTime.h"
24 #include "StatCounters.h"
33 static int MAX_POLL_TIME
= 1000; /* see also Comm::QuickPollRequired() */
36 #define howmany(x, y) (((x)+((y)-1))/(y))
41 #define FD_MASK_BYTES sizeof(fd_mask)
42 #define FD_MASK_BITS (FD_MASK_BYTES*NBBY)
45 static int examine_select(fd_set
*, fd_set
*);
46 static int fdIsTcpListener(int fd
);
47 static int fdIsUdpListener(int fd
);
48 static int fdIsDns(int fd
);
49 static OBJH commIncomingStats
;
50 static int comm_check_incoming_select_handlers(int nfds
, int *fds
);
51 static void comm_select_dns_incoming(void);
52 static void commUpdateReadBits(int fd
, PF
* handler
);
53 static void commUpdateWriteBits(int fd
, PF
* handler
);
55 static struct timeval zero_tv
;
56 static fd_set global_readfds
;
57 static fd_set global_writefds
;
62 * Automatic tuning for incoming requests:
64 * INCOMING sockets are the ICP and HTTP ports. We need to check these
65 * fairly regularly, but how often? When the load increases, we
66 * want to check the incoming sockets more often. If we have a lot
67 * of incoming ICP, then we need to check these sockets more than
68 * if we just have HTTP.
70 * The variables 'incoming_udp_interval' and 'incoming_tcp_interval'
71 * determine how many normal I/O events to process before checking
72 * incoming sockets again. Note we store the incoming_interval
73 * multipled by a factor of (2^INCOMING_FACTOR) to have some
74 * pseudo-floating point precision.
76 * The variable 'udp_io_events' and 'tcp_io_events' counts how many normal
77 * I/O events have been processed since the last check on the incoming
78 * sockets. When io_events > incoming_interval, its time to check incoming
81 * Every time we check incoming sockets, we count how many new messages
82 * or connections were processed. This is used to adjust the
83 * incoming_interval for the next iteration. The new incoming_interval
84 * is calculated as the current incoming_interval plus what we would
85 * like to see as an average number of events minus the number of
86 * events just processed.
88 * incoming_interval = incoming_interval + target_average - number_of_events_processed
90 * There are separate incoming_interval counters for DNS, UDP and TCP events
92 * You can see the current values of the incoming_interval's, as well as
93 * a histogram of 'incoming_events' by asking the cache manager
94 * for 'comm_incoming', e.g.:
96 * % ./client mgr:comm_incoming
100 * - We have MAX_INCOMING_INTEGER as a magic upper limit on
101 * incoming_interval for both types of sockets. At the
102 * largest value the cache will effectively be idling.
104 * - The higher the INCOMING_FACTOR, the slower the algorithm will
105 * respond to load spikes/increases/decreases in demand. A value
106 * between 3 and 8 is recommended.
109 #define MAX_INCOMING_INTEGER 256
110 #define INCOMING_FACTOR 5
111 #define MAX_INCOMING_INTERVAL (MAX_INCOMING_INTEGER << INCOMING_FACTOR)
112 static int udp_io_events
= 0;
113 static int dns_io_events
= 0;
114 static int tcp_io_events
= 0;
115 static int incoming_udp_interval
= 16 << INCOMING_FACTOR
;
116 static int incoming_dns_interval
= 16 << INCOMING_FACTOR
;
117 static int incoming_tcp_interval
= 16 << INCOMING_FACTOR
;
118 #define commCheckUdpIncoming (++udp_io_events > (incoming_udp_interval>> INCOMING_FACTOR))
119 #define commCheckDnsIncoming (++dns_io_events > (incoming_dns_interval>> INCOMING_FACTOR))
120 #define commCheckTcpIncoming (++tcp_io_events > (incoming_tcp_interval>> INCOMING_FACTOR))
123 Comm::SetSelect(int fd
, unsigned int type
, PF
* handler
, void *client_data
, time_t timeout
)
125 fde
*F
= &fd_table
[fd
];
127 assert(F
->flags
.open
);
128 debugs(5, 5, HERE
<< "FD " << fd
<< ", type=" << type
<<
129 ", handler=" << handler
<< ", client_data=" << client_data
<<
130 ", timeout=" << timeout
);
132 if (type
& COMM_SELECT_READ
) {
133 F
->read_handler
= handler
;
134 F
->read_data
= client_data
;
135 commUpdateReadBits(fd
, handler
);
138 if (type
& COMM_SELECT_WRITE
) {
139 F
->write_handler
= handler
;
140 F
->write_data
= client_data
;
141 commUpdateWriteBits(fd
, handler
);
145 F
->timeout
= squid_curtime
+ timeout
;
149 Comm::ResetSelect(int fd
)
154 fdIsUdpListener(int fd
)
156 if (icpIncomingConn
!= NULL
&& fd
== icpIncomingConn
->fd
)
159 if (icpOutgoingConn
!= NULL
&& fd
== icpOutgoingConn
->fd
)
168 if (fd
== DnsSocketA
)
171 if (fd
== DnsSocketB
)
178 fdIsTcpListener(int fd
)
180 for (AnyP::PortCfgPointer s
= HttpPortList
; s
!= NULL
; s
= s
->next
) {
181 if (s
->listenConn
!= NULL
&& s
->listenConn
->fd
== fd
)
189 comm_check_incoming_select_handlers(int nfds
, int *fds
)
198 FD_ZERO(&write_mask
);
199 incoming_sockets_accepted
= 0;
201 for (i
= 0; i
< nfds
; ++i
) {
204 if (fd_table
[fd
].read_handler
) {
205 FD_SET(fd
, &read_mask
);
211 if (fd_table
[fd
].write_handler
) {
212 FD_SET(fd
, &write_mask
);
224 ++ statCounter
.syscalls
.selects
;
226 if (select(maxfd
, &read_mask
, &write_mask
, NULL
, &zero_tv
) < 1)
227 return incoming_sockets_accepted
;
229 for (i
= 0; i
< nfds
; ++i
) {
232 if (FD_ISSET(fd
, &read_mask
)) {
233 if ((hdl
= fd_table
[fd
].read_handler
) != NULL
) {
234 fd_table
[fd
].read_handler
= NULL
;
235 commUpdateReadBits(fd
, NULL
);
236 hdl(fd
, fd_table
[fd
].read_data
);
238 debugs(5, DBG_IMPORTANT
, "comm_select_incoming: FD " << fd
<< " NULL read handler");
242 if (FD_ISSET(fd
, &write_mask
)) {
243 if ((hdl
= fd_table
[fd
].write_handler
) != NULL
) {
244 fd_table
[fd
].write_handler
= NULL
;
245 commUpdateWriteBits(fd
, NULL
);
246 hdl(fd
, fd_table
[fd
].write_data
);
248 debugs(5, DBG_IMPORTANT
, "comm_select_incoming: FD " << fd
<< " NULL write handler");
253 return incoming_sockets_accepted
;
257 comm_select_udp_incoming(void)
264 if (Comm::IsConnOpen(icpIncomingConn
)) {
265 fds
[nfds
] = icpIncomingConn
->fd
;
269 if (Comm::IsConnOpen(icpOutgoingConn
) && icpIncomingConn
!= icpOutgoingConn
) {
270 fds
[nfds
] = icpOutgoingConn
->fd
;
277 nevents
= comm_check_incoming_select_handlers(nfds
, fds
);
279 incoming_udp_interval
+= Config
.comm_incoming
.udp
.average
- nevents
;
281 if (incoming_udp_interval
< 0)
282 incoming_udp_interval
= 0;
284 if (incoming_udp_interval
> MAX_INCOMING_INTERVAL
)
285 incoming_udp_interval
= MAX_INCOMING_INTERVAL
;
287 if (nevents
> INCOMING_UDP_MAX
)
288 nevents
= INCOMING_UDP_MAX
;
290 statCounter
.comm_udp_incoming
.count(nevents
);
294 comm_select_tcp_incoming(void)
297 int fds
[MAXTCPLISTENPORTS
];
301 // XXX: only poll sockets that won't be deferred. But how do we identify them?
303 for (AnyP::PortCfgPointer s
= HttpPortList
; s
!= NULL
; s
= s
->next
) {
304 if (Comm::IsConnOpen(s
->listenConn
)) {
305 fds
[nfds
] = s
->listenConn
->fd
;
310 nevents
= comm_check_incoming_select_handlers(nfds
, fds
);
311 incoming_tcp_interval
+= Config
.comm_incoming
.tcp
.average
- nevents
;
313 if (incoming_tcp_interval
< 0)
314 incoming_tcp_interval
= 0;
316 if (incoming_tcp_interval
> MAX_INCOMING_INTERVAL
)
317 incoming_tcp_interval
= MAX_INCOMING_INTERVAL
;
319 if (nevents
> INCOMING_TCP_MAX
)
320 nevents
= INCOMING_TCP_MAX
;
322 statCounter
.comm_tcp_incoming
.count(nevents
);
325 #define DEBUG_FDBITS 0
326 /* Select on all sockets; call handlers for those that are ready. */
328 Comm::DoSelect(int msec
)
339 int calldns
= 0, calludp
= 0, calltcp
= 0;
352 struct timeval poll_time
;
353 double timeout
= current_dtime
+ (msec
/ 1000.0);
359 start
= current_dtime
;
361 if (commCheckUdpIncoming
)
362 comm_select_udp_incoming();
364 if (commCheckDnsIncoming
)
365 comm_select_dns_incoming();
367 if (commCheckTcpIncoming
)
368 comm_select_tcp_incoming();
370 calldns
= calludp
= calltcp
= 0;
372 maxfd
= Biggest_FD
+ 1;
374 memcpy(&readfds
, &global_readfds
,
375 howmany(maxfd
, FD_MASK_BITS
) * FD_MASK_BYTES
);
377 memcpy(&writefds
, &global_writefds
,
378 howmany(maxfd
, FD_MASK_BITS
) * FD_MASK_BYTES
);
380 /* remove stalled FDs, and deal with pending descriptors */
383 FD_ZERO(&pendingfds
);
385 maxindex
= howmany(maxfd
, FD_MASK_BITS
);
387 fdsp
= (fd_mask
*) & readfds
;
389 for (j
= 0; j
< maxindex
; ++j
) {
390 if ((tmask
= fdsp
[j
]) == 0)
391 continue; /* no bits here */
393 for (k
= 0; k
< FD_MASK_BITS
; ++k
) {
394 if (!EBIT_TEST(tmask
, k
))
397 /* Found a set bit */
398 fd
= (j
* FD_MASK_BITS
) + k
;
400 if (FD_ISSET(fd
, &readfds
) && fd_table
[fd
].flags
.read_pending
) {
401 FD_SET(fd
, &pendingfds
);
408 for (i
= 0; i
< maxfd
; ++i
) {
409 /* Check each open socket for a handler. */
411 if (fd_table
[i
].read_handler
) {
412 assert(FD_ISSET(i
, &readfds
));
415 if (fd_table
[i
].write_handler
) {
416 assert(FD_ISSET(i
, &writefds
));
421 if (nreadfds
+ nwritefds
== 0) {
422 assert(shutting_down
);
423 return Comm::SHUTDOWN
;
426 if (msec
> MAX_POLL_TIME
)
427 msec
= MAX_POLL_TIME
;
433 poll_time
.tv_sec
= msec
/ 1000;
434 poll_time
.tv_usec
= (msec
% 1000) * 1000;
435 ++ statCounter
.syscalls
.selects
;
436 num
= select(maxfd
, &readfds
, &writefds
, NULL
, &poll_time
);
437 ++ statCounter
.select_loops
;
439 if (num
>= 0 || pending
> 0)
442 if (ignoreErrno(errno
))
445 debugs(5, DBG_CRITICAL
, "comm_select: select failure: " << xstrerror());
447 examine_select(&readfds
, &writefds
);
449 return Comm::COMM_ERROR
;
454 if (num
< 0 && !pending
)
459 debugs(5, num
? 5 : 8, "comm_select: " << num
<< "+" << pending
<< " FDs ready");
461 statCounter
.select_fds_hist
.count(num
);
463 if (num
== 0 && pending
== 0)
466 /* Scan return fd masks for ready descriptors */
467 fdsp
= (fd_mask
*) & readfds
;
469 pfdsp
= (fd_mask
*) & pendingfds
;
471 maxindex
= howmany(maxfd
, FD_MASK_BITS
);
473 for (j
= 0; j
< maxindex
; ++j
) {
474 if ((tmask
= (fdsp
[j
] | pfdsp
[j
])) == 0)
475 continue; /* no bits here */
477 for (k
= 0; k
< FD_MASK_BITS
; ++k
) {
479 break; /* no more bits left */
481 if (!EBIT_TEST(tmask
, k
))
484 /* Found a set bit */
485 fd
= (j
* FD_MASK_BITS
) + k
;
487 EBIT_CLR(tmask
, k
); /* this will be done */
491 debugs(5, 9, "FD " << fd
<< " bit set for reading");
493 assert(FD_ISSET(fd
, &readfds
));
497 if (fdIsUdpListener(fd
)) {
507 if (fdIsTcpListener(fd
)) {
513 debugs(5, 6, "comm_select: FD " << fd
<< " ready for reading");
515 if (NULL
== (hdl
= F
->read_handler
))
518 F
->read_handler
= NULL
;
519 F
->flags
.read_pending
= 0;
520 commUpdateReadBits(fd
, NULL
);
521 hdl(fd
, F
->read_data
);
522 ++ statCounter
.select_fds
;
524 if (commCheckUdpIncoming
)
525 comm_select_udp_incoming();
527 if (commCheckDnsIncoming
)
528 comm_select_dns_incoming();
530 if (commCheckTcpIncoming
)
531 comm_select_tcp_incoming();
536 fdsp
= (fd_mask
*) & writefds
;
538 for (j
= 0; j
< maxindex
; ++j
) {
539 if ((tmask
= fdsp
[j
]) == 0)
540 continue; /* no bits here */
542 for (k
= 0; k
< FD_MASK_BITS
; ++k
) {
544 break; /* no more bits left */
546 if (!EBIT_TEST(tmask
, k
))
549 /* Found a set bit */
550 fd
= (j
* FD_MASK_BITS
) + k
;
552 EBIT_CLR(tmask
, k
); /* this will be done */
556 debugs(5, 9, "FD " << fd
<< " bit set for writing");
558 assert(FD_ISSET(fd
, &writefds
));
562 if (fdIsUdpListener(fd
)) {
572 if (fdIsTcpListener(fd
)) {
578 debugs(5, 6, "comm_select: FD " << fd
<< " ready for writing");
580 if ((hdl
= F
->write_handler
)) {
581 F
->write_handler
= NULL
;
582 commUpdateWriteBits(fd
, NULL
);
583 hdl(fd
, F
->write_data
);
584 ++ statCounter
.select_fds
;
586 if (commCheckUdpIncoming
)
587 comm_select_udp_incoming();
589 if (commCheckDnsIncoming
)
590 comm_select_dns_incoming();
592 if (commCheckTcpIncoming
)
593 comm_select_tcp_incoming();
599 comm_select_udp_incoming();
602 comm_select_dns_incoming();
605 comm_select_tcp_incoming();
609 statCounter
.select_time
+= (current_dtime
- start
);
612 } while (timeout
> current_dtime
);
613 debugs(5, 8, "comm_select: time out: " << squid_curtime
);
615 return Comm::TIMEOUT
;
619 comm_select_dns_incoming(void)
626 if (DnsSocketA
< 0 && DnsSocketB
< 0)
629 if (DnsSocketA
>= 0) {
630 fds
[nfds
] = DnsSocketA
;
634 if (DnsSocketB
>= 0) {
635 fds
[nfds
] = DnsSocketB
;
639 nevents
= comm_check_incoming_select_handlers(nfds
, fds
);
644 incoming_dns_interval
+= Config
.comm_incoming
.dns
.average
- nevents
;
646 if (incoming_dns_interval
< Config
.comm_incoming
.dns
.min_poll
)
647 incoming_dns_interval
= Config
.comm_incoming
.dns
.min_poll
;
649 if (incoming_dns_interval
> MAX_INCOMING_INTERVAL
)
650 incoming_dns_interval
= MAX_INCOMING_INTERVAL
;
652 if (nevents
> INCOMING_DNS_MAX
)
653 nevents
= INCOMING_DNS_MAX
;
655 statCounter
.comm_dns_incoming
.count(nevents
);
659 Comm::SelectLoopInit(void)
663 FD_ZERO(&global_readfds
);
664 FD_ZERO(&global_writefds
);
665 nreadfds
= nwritefds
= 0;
667 Mgr::RegisterAction("comm_select_incoming",
668 "comm_incoming() stats",
669 commIncomingStats
, 0, 1);
673 * examine_select - debug routine.
675 * I spend the day chasing this core dump that occurs when both the client
676 * and the server side of a cache fetch simultaneoulsy abort the
677 * connection. While I haven't really studied the code to figure out how
678 * it happens, the snippet below may prevent the cache from exitting:
680 * Call this from where the select loop fails.
683 examine_select(fd_set
* readfds
, fd_set
* writefds
)
690 AsyncCall::Pointer ch
= NULL
;
694 debugs(5, DBG_CRITICAL
, "examine_select: Examining open file descriptors...");
696 for (fd
= 0; fd
< Squid_MaxFD
; ++fd
) {
699 tv
.tv_sec
= tv
.tv_usec
= 0;
701 if (FD_ISSET(fd
, readfds
))
703 else if (FD_ISSET(fd
, writefds
))
704 FD_SET(fd
, &write_x
);
708 ++ statCounter
.syscalls
.selects
;
711 if (!fstat(fd
, &sb
)) {
712 debugs(5, 5, "FD " << fd
<< " is valid.");
717 debugs(5, DBG_CRITICAL
, "FD " << fd
<< ": " << xstrerror());
718 debugs(5, DBG_CRITICAL
, "WARNING: FD " << fd
<< " has handlers, but it's invalid.");
719 debugs(5, DBG_CRITICAL
, "FD " << fd
<< " is a " << fdTypeStr
[F
->type
] << " called '" << F
->desc
<< "'");
720 debugs(5, DBG_CRITICAL
, "tmout:" << F
->timeoutHandler
<< " read:" << F
->read_handler
<< " write:" << F
->write_handler
);
722 for (ch
= F
->closeHandler
; ch
!= NULL
; ch
= ch
->Next())
723 debugs(5, DBG_CRITICAL
, " close handler: " << ch
);
725 if (F
->closeHandler
!= NULL
) {
726 commCallCloseHandlers(fd
);
727 } else if (F
->timeoutHandler
!= NULL
) {
728 debugs(5, DBG_CRITICAL
, "examine_select: Calling Timeout Handler");
729 ScheduleCallHere(F
->timeoutHandler
);
732 F
->closeHandler
= NULL
;
733 F
->timeoutHandler
= NULL
;
734 F
->read_handler
= NULL
;
735 F
->write_handler
= NULL
;
737 FD_CLR(fd
, writefds
);
744 commIncomingStats(StoreEntry
* sentry
)
746 storeAppendPrintf(sentry
, "Current incoming_udp_interval: %d\n",
747 incoming_udp_interval
>> INCOMING_FACTOR
);
748 storeAppendPrintf(sentry
, "Current incoming_dns_interval: %d\n",
749 incoming_dns_interval
>> INCOMING_FACTOR
);
750 storeAppendPrintf(sentry
, "Current incoming_tcp_interval: %d\n",
751 incoming_tcp_interval
>> INCOMING_FACTOR
);
752 storeAppendPrintf(sentry
, "\n");
753 storeAppendPrintf(sentry
, "Histogram of events per incoming socket type\n");
754 storeAppendPrintf(sentry
, "ICP Messages handled per comm_select_udp_incoming() call:\n");
755 statCounter
.comm_udp_incoming
.dump(sentry
, statHistIntDumper
);
756 storeAppendPrintf(sentry
, "DNS Messages handled per comm_select_dns_incoming() call:\n");
757 statCounter
.comm_dns_incoming
.dump(sentry
, statHistIntDumper
);
758 storeAppendPrintf(sentry
, "HTTP Messages handled per comm_select_tcp_incoming() call:\n");
759 statCounter
.comm_tcp_incoming
.dump(sentry
, statHistIntDumper
);
763 commUpdateReadBits(int fd
, PF
* handler
)
765 if (handler
&& !FD_ISSET(fd
, &global_readfds
)) {
766 FD_SET(fd
, &global_readfds
);
768 } else if (!handler
&& FD_ISSET(fd
, &global_readfds
)) {
769 FD_CLR(fd
, &global_readfds
);
775 commUpdateWriteBits(int fd
, PF
* handler
)
777 if (handler
&& !FD_ISSET(fd
, &global_writefds
)) {
778 FD_SET(fd
, &global_writefds
);
780 } else if (!handler
&& FD_ISSET(fd
, &global_writefds
)) {
781 FD_CLR(fd
, &global_writefds
);
786 /* Called by async-io or diskd to speed up the polling */
788 Comm::QuickPollRequired(void)
793 #endif /* USE_SELECT */