]> git.ipfire.org Git - thirdparty/squid.git/blob - src/comm.cc
4b88933c0150f2b7e119f0d7daa0b37fba221714
[thirdparty/squid.git] / src / comm.cc
1 /*
2 * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 05 Socket Functions */
10
11 #include "squid.h"
12 #include "base/AsyncFunCalls.h"
13 #include "base/OnOff.h"
14 #include "ClientInfo.h"
15 #include "comm/AcceptLimiter.h"
16 #include "comm/comm_internal.h"
17 #include "comm/Connection.h"
18 #include "comm/IoCallback.h"
19 #include "comm/Loops.h"
20 #include "comm/Read.h"
21 #include "comm/TcpAcceptor.h"
22 #include "comm/Write.h"
23 #include "compat/cmsg.h"
24 #include "DescriptorSet.h"
25 #include "event.h"
26 #include "fd.h"
27 #include "fde.h"
28 #include "globals.h"
29 #include "icmp/net_db.h"
30 #include "ip/Intercept.h"
31 #include "ip/QosConfig.h"
32 #include "ip/tools.h"
33 #include "pconn.h"
34 #include "sbuf/SBuf.h"
35 #include "sbuf/Stream.h"
36 #include "SquidConfig.h"
37 #include "StatCounters.h"
38 #include "StoreIOBuffer.h"
39 #include "tools.h"
40
41 #if USE_OPENSSL
42 #include "ssl/support.h"
43 #endif
44
45 #include <cerrno>
46 #include <cmath>
47 #if _SQUID_CYGWIN_
48 #include <sys/ioctl.h>
49 #endif
50 #ifdef HAVE_NETINET_TCP_H
51 #include <netinet/tcp.h>
52 #endif
53 #if HAVE_SYS_UN_H
54 #include <sys/un.h>
55 #endif
56
57 /*
58 * New C-like simple comm code. This stuff is a mess and doesn't really buy us anything.
59 */
60
61 static IOCB commHalfClosedReader;
62 static int comm_openex(int sock_type, int proto, Ip::Address &, int flags, const char *note);
63 static void comm_init_opened(const Comm::ConnectionPointer &conn, const char *note, struct addrinfo *AI);
64 static int comm_apply_flags(int new_socket, Ip::Address &addr, int flags, struct addrinfo *AI);
65
66 #if USE_DELAY_POOLS
67 CBDATA_CLASS_INIT(CommQuotaQueue);
68
69 static void commHandleWriteHelper(void * data);
70 #endif
71
72 /* STATIC */
73
74 static DescriptorSet *TheHalfClosed = nullptr; /// the set of half-closed FDs
75 static bool WillCheckHalfClosed = false; /// true if check is scheduled
76 static EVH commHalfClosedCheck;
77 static void commPlanHalfClosedCheck();
78
79 static Comm::Flag commBind(int s, struct addrinfo &);
80 static void commSetBindAddressNoPort(int);
81 static void commSetReuseAddr(int);
82 static void commConfigureLinger(int fd, OnOff);
83 #ifdef TCP_NODELAY
84 static void commSetTcpNoDelay(int);
85 #endif
86 static void commSetTcpRcvbuf(int, int);
87
88 bool
89 isOpen(const int fd)
90 {
91 return fd >= 0 && fd_table && fd_table[fd].flags.open != 0;
92 }
93
94 /**
95 * Empty the read buffers
96 *
97 * This is a magical routine that empties the read buffers.
98 * Under some platforms (Linux) if a buffer has data in it before
99 * you call close(), the socket will hang and take quite a while
100 * to timeout.
101 */
102 static void
103 comm_empty_os_read_buffers(int fd)
104 {
105 #if _SQUID_LINUX_
106 #if USE_OPENSSL
107 // Bug 4146: SSL-Bump BIO does not release sockets on close.
108 if (fd_table[fd].ssl)
109 return;
110 #endif
111
112 /* prevent those nasty RST packets */
113 char buf[SQUID_TCP_SO_RCVBUF];
114 if (fd_table[fd].flags.nonblocking && fd_table[fd].type != FD_MSGHDR) {
115 while (FD_READ_METHOD(fd, buf, SQUID_TCP_SO_RCVBUF) > 0) {};
116 }
117 #else
118 (void)fd;
119 #endif
120 }
121
122 /**
123 * synchronous wrapper around udp socket functions
124 */
125 int
126 comm_udp_recvfrom(int fd, void *buf, size_t len, int flags, Ip::Address &from)
127 {
128 ++ statCounter.syscalls.sock.recvfroms;
129 debugs(5,8, "comm_udp_recvfrom: FD " << fd << " from " << from);
130 struct addrinfo *AI = nullptr;
131 Ip::Address::InitAddr(AI);
132 int x = recvfrom(fd, buf, len, flags, AI->ai_addr, &AI->ai_addrlen);
133 from = *AI;
134 Ip::Address::FreeAddr(AI);
135 return x;
136 }
137
138 int
139 comm_udp_recv(int fd, void *buf, size_t len, int flags)
140 {
141 Ip::Address nul;
142 return comm_udp_recvfrom(fd, buf, len, flags, nul);
143 }
144
145 ssize_t
146 comm_udp_send(int s, const void *buf, size_t len, int flags)
147 {
148 return send(s, buf, len, flags);
149 }
150
151 bool
152 comm_has_incomplete_write(int fd)
153 {
154 assert(isOpen(fd) && COMMIO_FD_WRITECB(fd) != nullptr);
155 return COMMIO_FD_WRITECB(fd)->active();
156 }
157
158 /**
159 * Queue a write. handler/handler_data are called when the write fully
160 * completes, on error, or on file descriptor close.
161 */
162
163 /* Return the local port associated with fd. */
164 unsigned short
165 comm_local_port(int fd)
166 {
167 Ip::Address temp;
168 struct addrinfo *addr = nullptr;
169 fde *F = &fd_table[fd];
170
171 /* If the fd is closed already, just return */
172
173 if (!F->flags.open) {
174 debugs(5, 0, "comm_local_port: FD " << fd << " has been closed.");
175 return 0;
176 }
177
178 if (F->local_addr.port())
179 return F->local_addr.port();
180
181 if (F->sock_family == AF_INET)
182 temp.setIPv4();
183
184 Ip::Address::InitAddr(addr);
185
186 if (getsockname(fd, addr->ai_addr, &(addr->ai_addrlen)) ) {
187 int xerrno = errno;
188 debugs(50, DBG_IMPORTANT, "ERROR: " << MYNAME << "Failed to retrieve TCP/UDP port number for socket: FD " << fd << ": " << xstrerr(xerrno));
189 Ip::Address::FreeAddr(addr);
190 return 0;
191 }
192 temp = *addr;
193
194 Ip::Address::FreeAddr(addr);
195
196 if (F->local_addr.isAnyAddr()) {
197 /* save the whole local address, not just the port. */
198 F->local_addr = temp;
199 } else {
200 F->local_addr.port(temp.port());
201 }
202
203 debugs(5, 6, "comm_local_port: FD " << fd << ": port " << F->local_addr.port() << "(family=" << F->sock_family << ")");
204 return F->local_addr.port();
205 }
206
207 /// sets the IP_BIND_ADDRESS_NO_PORT socket option to optimize ephemeral port
208 /// reuse by outgoing TCP connections that must bind(2) to a source IP address
209 static void
210 commSetBindAddressNoPort(const int fd)
211 {
212 #if defined(IP_BIND_ADDRESS_NO_PORT)
213 int flag = 1;
214 if (setsockopt(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, reinterpret_cast<char*>(&flag), sizeof(flag)) < 0) {
215 const auto savedErrno = errno;
216 debugs(50, DBG_IMPORTANT, "ERROR: setsockopt(IP_BIND_ADDRESS_NO_PORT) failure: " << xstrerr(savedErrno));
217 }
218 #else
219 (void)fd;
220 #endif
221 }
222
223 static Comm::Flag
224 commBind(int s, struct addrinfo &inaddr)
225 {
226 ++ statCounter.syscalls.sock.binds;
227
228 if (bind(s, inaddr.ai_addr, inaddr.ai_addrlen) == 0) {
229 debugs(50, 6, "bind socket FD " << s << " to " << fd_table[s].local_addr);
230 return Comm::OK;
231 }
232 int xerrno = errno;
233 debugs(50, DBG_CRITICAL, "ERROR: " << MYNAME << "Cannot bind socket FD " << s << " to " << fd_table[s].local_addr << ": " << xstrerr(xerrno));
234
235 return Comm::COMM_ERROR;
236 }
237
238 /**
239 * Create a socket. Default is blocking, stream (TCP) socket. IO_TYPE
240 * is OR of flags specified in comm.h. Defaults TOS
241 */
242 int
243 comm_open(int sock_type,
244 int proto,
245 Ip::Address &addr,
246 int flags,
247 const char *note)
248 {
249 // assume zero-port callers do not need to know the assigned port right away
250 if (sock_type == SOCK_STREAM && addr.port() == 0 && ((flags & COMM_DOBIND) || !addr.isAnyAddr()))
251 flags |= COMM_DOBIND_PORT_LATER;
252
253 return comm_openex(sock_type, proto, addr, flags, note);
254 }
255
256 void
257 comm_open_listener(int sock_type,
258 int proto,
259 Comm::ConnectionPointer &conn,
260 const char *note)
261 {
262 /* all listener sockets require bind() */
263 conn->flags |= COMM_DOBIND;
264
265 /* attempt native enabled port. */
266 conn->fd = comm_openex(sock_type, proto, conn->local, conn->flags, note);
267 }
268
269 int
270 comm_open_listener(int sock_type,
271 int proto,
272 Ip::Address &addr,
273 int flags,
274 const char *note)
275 {
276 int sock = -1;
277
278 /* all listener sockets require bind() */
279 flags |= COMM_DOBIND;
280
281 /* attempt native enabled port. */
282 sock = comm_openex(sock_type, proto, addr, flags, note);
283
284 return sock;
285 }
286
287 static bool
288 limitError(int const anErrno)
289 {
290 return anErrno == ENFILE || anErrno == EMFILE;
291 }
292
293 static void
294 comm_set_v6only(int fd, int tos)
295 {
296 #ifdef IPV6_V6ONLY
297 if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &tos, sizeof(int)) < 0) {
298 int xerrno = errno;
299 debugs(50, DBG_IMPORTANT, MYNAME << "setsockopt(IPV6_V6ONLY) " << (tos?"ON":"OFF") << " for FD " << fd << ": " << xstrerr(xerrno));
300 }
301 #else
302 debugs(50, DBG_CRITICAL, MYNAME << "WARNING: setsockopt(IPV6_V6ONLY) not supported on this platform");
303 #endif /* sockopt */
304 }
305
306 /**
307 * Set the socket option required for TPROXY spoofing for:
308 * - Linux TPROXY v4 support,
309 * - OpenBSD divert-to support,
310 * - FreeBSD IPFW TPROXY v4 support.
311 */
312 static void
313 comm_set_transparent(int fd)
314 {
315 #if _SQUID_LINUX_ && defined(IP_TRANSPARENT) // Linux
316 # define soLevel SOL_IP
317 # define soFlag IP_TRANSPARENT
318 bool doneSuid = false;
319
320 #elif defined(SO_BINDANY) // OpenBSD 4.7+ and NetBSD with PF
321 # define soLevel SOL_SOCKET
322 # define soFlag SO_BINDANY
323 enter_suid();
324 bool doneSuid = true;
325
326 #elif defined(IP_BINDANY) // FreeBSD with IPFW
327 # define soLevel IPPROTO_IP
328 # define soFlag IP_BINDANY
329 enter_suid();
330 bool doneSuid = true;
331
332 #else
333 debugs(50, DBG_CRITICAL, "WARNING: comm_open: setsockopt(TPROXY) not supported on this platform");
334 (void)fd;
335 #endif /* sockopt */
336
337 #if defined(soLevel) && defined(soFlag)
338 int tos = 1;
339 if (setsockopt(fd, soLevel, soFlag, (char *) &tos, sizeof(int)) < 0) {
340 int xerrno = errno;
341 debugs(50, DBG_IMPORTANT, MYNAME << "setsockopt(TPROXY) on FD " << fd << ": " << xstrerr(xerrno));
342 } else {
343 /* mark the socket as having transparent options */
344 fd_table[fd].flags.transparent = true;
345 }
346 if (doneSuid)
347 leave_suid();
348 #endif
349 }
350
351 /**
352 * Create a socket. Default is blocking, stream (TCP) socket. IO_TYPE
353 * is OR of flags specified in defines.h:COMM_*
354 */
355 static int
356 comm_openex(int sock_type,
357 int proto,
358 Ip::Address &addr,
359 int flags,
360 const char *note)
361 {
362 int new_socket;
363 struct addrinfo *AI = nullptr;
364
365 /* Create socket for accepting new connections. */
366 ++ statCounter.syscalls.sock.sockets;
367
368 if (!Ip::EnableIpv6 && addr.isIPv6()) {
369 debugs(50, 2, "refusing to open an IPv6 socket when IPv6 support is disabled: " << addr);
370 errno = ENOTSUP;
371 return -1;
372 }
373
374 /* Setup the socket addrinfo details for use */
375 addr.getAddrInfo(AI);
376 AI->ai_socktype = sock_type;
377 AI->ai_protocol = proto;
378
379 debugs(50, 3, "comm_openex: Attempt open socket for: " << addr );
380
381 new_socket = socket(AI->ai_family, AI->ai_socktype, AI->ai_protocol);
382 const auto firstErrNo = errno;
383
384 /* under IPv6 there is the possibility IPv6 is present but disabled. */
385 /* try again as IPv4-native if possible */
386 if ( new_socket < 0 && Ip::EnableIpv6 && addr.isIPv6() && addr.setIPv4() ) {
387 /* attempt to open this IPv4-only. */
388 Ip::Address::FreeAddr(AI);
389 /* Setup the socket addrinfo details for use */
390 addr.getAddrInfo(AI);
391 AI->ai_socktype = sock_type;
392 AI->ai_protocol = proto;
393 debugs(50, 3, "Attempt fallback open socket for: " << addr );
394 new_socket = socket(AI->ai_family, AI->ai_socktype, AI->ai_protocol);
395 // TODO: Report failures of this second socket() call.
396 // if both socket() calls fail, we use firstErrNo
397 debugs(50, 2, "attempt open " << note << " socket on: " << addr);
398 }
399
400 if (new_socket < 0) {
401 /* Increase the number of reserved fd's if calls to socket()
402 * are failing because the open file table is full. This
403 * limits the number of simultaneous clients */
404
405 if (limitError(firstErrNo)) {
406 debugs(50, DBG_IMPORTANT, MYNAME << "socket failure: " << xstrerr(firstErrNo));
407 fdAdjustReserved();
408 } else {
409 debugs(50, DBG_CRITICAL, MYNAME << "socket failure: " << xstrerr(firstErrNo));
410 }
411
412 Ip::Address::FreeAddr(AI);
413
414 errno = firstErrNo; // restore for caller
415 return -1;
416 }
417
418 // XXX: temporary for the transition. comm_openex will eventually have a conn to play with.
419 Comm::ConnectionPointer conn = new Comm::Connection;
420 conn->local = addr;
421 conn->fd = new_socket;
422
423 debugs(50, 3, "comm_openex: Opened socket " << conn << " : family=" << AI->ai_family << ", type=" << AI->ai_socktype << ", protocol=" << AI->ai_protocol );
424
425 if ( Ip::EnableIpv6&IPV6_SPECIAL_SPLITSTACK && addr.isIPv6() )
426 comm_set_v6only(conn->fd, 1);
427
428 /* Windows Vista supports Dual-Sockets. BUT defaults them to V6ONLY. Turn it OFF. */
429 /* Other OS may have this administratively disabled for general use. Same deal. */
430 if ( Ip::EnableIpv6&IPV6_SPECIAL_V4MAPPING && addr.isIPv6() )
431 comm_set_v6only(conn->fd, 0);
432
433 comm_init_opened(conn, note, AI);
434 new_socket = comm_apply_flags(conn->fd, addr, flags, AI);
435
436 Ip::Address::FreeAddr(AI);
437
438 // XXX transition only. prevent conn from closing the new FD on function exit.
439 conn->fd = -1;
440 // XXX: firstErrNo is not applicable here -- socket() calls succeeded above!
441 // TODO: Stop reporting error codes via errno.
442 errno = firstErrNo;
443 return new_socket;
444 }
445
446 /// update FD tables after a local or remote (IPC) comm_openex();
447 void
448 comm_init_opened(const Comm::ConnectionPointer &conn,
449 const char *note,
450 struct addrinfo *AI)
451 {
452 assert(Comm::IsConnOpen(conn));
453 assert(AI);
454
455 /* update fdstat */
456 debugs(5, 5, conn << " is a new socket");
457
458 assert(!isOpen(conn->fd)); // NP: global isOpen checks the fde entry for openness not the Comm::Connection
459 fd_open(conn->fd, FD_SOCKET, note);
460
461 fde *F = &fd_table[conn->fd];
462 F->local_addr = conn->local;
463
464 F->sock_family = AI->ai_family;
465 }
466
467 /// apply flags after a local comm_open*() call;
468 /// returns new_socket or -1 on error
469 static int
470 comm_apply_flags(int new_socket,
471 Ip::Address &addr,
472 int flags,
473 struct addrinfo *AI)
474 {
475 assert(new_socket >= 0);
476 assert(AI);
477 const int sock_type = AI->ai_socktype;
478
479 if (!(flags & COMM_NOCLOEXEC))
480 commSetCloseOnExec(new_socket);
481
482 if ((flags & COMM_REUSEADDR))
483 commSetReuseAddr(new_socket);
484
485 if (addr.port() > (unsigned short) 0) {
486 #if _SQUID_WINDOWS_
487 if (sock_type != SOCK_DGRAM)
488 #endif
489 commConfigureLinger(new_socket, OnOff::off);
490
491 if (opt_reuseaddr)
492 commSetReuseAddr(new_socket);
493 }
494
495 /* MUST be done before binding or face OS Error: "(99) Cannot assign requested address"... */
496 if ((flags & COMM_TRANSPARENT)) {
497 comm_set_transparent(new_socket);
498 }
499
500 if ( (flags & COMM_DOBIND) || addr.port() > 0 || !addr.isAnyAddr() ) {
501 if ( !(flags & COMM_DOBIND) && addr.isAnyAddr() )
502 debugs(5, DBG_IMPORTANT,"WARNING: Squid is attempting to bind() port " << addr << " without being a listener.");
503 if ( addr.isNoAddr() )
504 debugs(5, DBG_CRITICAL, "ERROR: Squid is attempting to bind() port " << addr << "!!");
505
506 #if defined(SO_REUSEPORT)
507 if (flags & COMM_REUSEPORT) {
508 int on = 1;
509 if (setsockopt(new_socket, SOL_SOCKET, SO_REUSEPORT, reinterpret_cast<char*>(&on), sizeof(on)) < 0) {
510 const auto savedErrno = errno;
511 const auto errorMessage = ToSBuf("cannot enable SO_REUSEPORT socket option when binding to ",
512 addr, ": ", xstrerr(savedErrno));
513 if (reconfiguring)
514 debugs(5, DBG_IMPORTANT, "ERROR: " << errorMessage);
515 else
516 throw TexcHere(errorMessage);
517 }
518 }
519 #endif
520
521 if ((flags & COMM_DOBIND_PORT_LATER))
522 commSetBindAddressNoPort(new_socket);
523
524 if (commBind(new_socket, *AI) != Comm::OK) {
525 comm_close(new_socket);
526 return -1;
527 }
528 }
529
530 if (flags & COMM_NONBLOCKING)
531 if (commSetNonBlocking(new_socket) == Comm::COMM_ERROR) {
532 comm_close(new_socket);
533 return -1;
534 }
535
536 #ifdef TCP_NODELAY
537 if (sock_type == SOCK_STREAM)
538 commSetTcpNoDelay(new_socket);
539
540 #endif
541
542 if (Config.tcpRcvBufsz > 0 && sock_type == SOCK_STREAM)
543 commSetTcpRcvbuf(new_socket, Config.tcpRcvBufsz);
544
545 return new_socket;
546 }
547
548 void
549 comm_import_opened(const Comm::ConnectionPointer &conn,
550 const char *note,
551 struct addrinfo *AI)
552 {
553 debugs(5, 2, conn);
554 assert(Comm::IsConnOpen(conn));
555 assert(AI);
556
557 comm_init_opened(conn, note, AI);
558
559 if ((conn->flags & COMM_TRANSPARENT))
560 fd_table[conn->fd].flags.transparent = true;
561
562 if (conn->flags & COMM_NONBLOCKING)
563 fd_table[conn->fd].flags.nonblocking = true;
564
565 #ifdef TCP_NODELAY
566 if (AI->ai_socktype == SOCK_STREAM)
567 fd_table[conn->fd].flags.nodelay = true;
568 #endif
569
570 /* no fd_table[fd].flags. updates needed for these conditions:
571 * if ((flags & COMM_REUSEADDR)) ...
572 * if ((flags & COMM_DOBIND) ...) ...
573 */
574 }
575
576 // XXX: now that raw-FD timeouts are only unset for pipes and files this SHOULD be a no-op.
577 // With handler already unset. Leaving this present until that can be verified for all code paths.
578 void
579 commUnsetFdTimeout(int fd)
580 {
581 debugs(5, 3, "Remove timeout for FD " << fd);
582 assert(fd >= 0);
583 assert(fd < Squid_MaxFD);
584 fde *F = &fd_table[fd];
585 assert(F->flags.open);
586
587 F->timeoutHandler = nullptr;
588 F->timeout = 0;
589 }
590
591 void
592 commSetConnTimeout(const Comm::ConnectionPointer &conn, time_t timeout, AsyncCall::Pointer &callback)
593 {
594 debugs(5, 3, conn << " timeout " << timeout);
595 assert(Comm::IsConnOpen(conn));
596 assert(conn->fd < Squid_MaxFD);
597 fde *F = &fd_table[conn->fd];
598 assert(F->flags.open);
599
600 if (timeout < 0) {
601 F->timeoutHandler = nullptr;
602 F->timeout = 0;
603 } else {
604 if (callback != nullptr) {
605 typedef CommTimeoutCbParams Params;
606 Params &params = GetCommParams<Params>(callback);
607 params.conn = conn;
608 F->timeoutHandler = callback;
609 }
610
611 F->timeout = squid_curtime + timeout;
612 }
613 }
614
615 void
616 commUnsetConnTimeout(const Comm::ConnectionPointer &conn)
617 {
618 debugs(5, 3, "Remove timeout for " << conn);
619 AsyncCall::Pointer nil;
620 commSetConnTimeout(conn, -1, nil);
621 }
622
623 /**
624 * Connect socket FD to given remote address.
625 * If return value is an error flag (COMM_ERROR, ERR_CONNECT, ERR_PROTOCOL, etc.),
626 * then error code will also be returned in errno.
627 */
628 int
629 comm_connect_addr(int sock, const Ip::Address &address)
630 {
631 Comm::Flag status = Comm::OK;
632 fde *F = &fd_table[sock];
633 int x = 0;
634 int err = 0;
635 socklen_t errlen;
636 struct addrinfo *AI = nullptr;
637
638 assert(address.port() != 0);
639
640 debugs(5, 9, "connecting socket FD " << sock << " to " << address << " (want family: " << F->sock_family << ")");
641
642 /* Handle IPv6 over IPv4-only socket case.
643 * this case must presently be handled here since the getAddrInfo asserts on bad mappings.
644 * NP: because commResetFD is private to ConnStateData we have to return an error and
645 * trust its handled properly.
646 */
647 if (F->sock_family == AF_INET && !address.isIPv4()) {
648 errno = ENETUNREACH;
649 return Comm::ERR_PROTOCOL;
650 }
651
652 /* Handle IPv4 over IPv6-only socket case.
653 * This case is presently handled here as it's both a known case and it's
654 * uncertain what error will be returned by the IPv6 stack in such case. It's
655 * possible this will also be handled by the errno checks below after connect()
656 * but needs careful cross-platform verification, and verifying the address
657 * condition here is simple.
658 */
659 if (!F->local_addr.isIPv4() && address.isIPv4()) {
660 errno = ENETUNREACH;
661 return Comm::ERR_PROTOCOL;
662 }
663
664 address.getAddrInfo(AI, F->sock_family);
665
666 /* Establish connection. */
667 int xerrno = 0;
668
669 if (!F->flags.called_connect) {
670 F->flags.called_connect = true;
671 ++ statCounter.syscalls.sock.connects;
672
673 errno = 0;
674 if ((x = connect(sock, AI->ai_addr, AI->ai_addrlen)) < 0) {
675 xerrno = errno;
676 debugs(5,5, "sock=" << sock << ", addrinfo(" <<
677 " flags=" << AI->ai_flags <<
678 ", family=" << AI->ai_family <<
679 ", socktype=" << AI->ai_socktype <<
680 ", protocol=" << AI->ai_protocol <<
681 ", &addr=" << AI->ai_addr <<
682 ", addrlen=" << AI->ai_addrlen << " )");
683 debugs(5, 9, "connect FD " << sock << ": (" << x << ") " << xstrerr(xerrno));
684 debugs(14,9, "connecting to: " << address);
685
686 } else if (x == 0) {
687 // XXX: ICAP code refuses callbacks during a pending comm_ call
688 // Async calls development will fix this.
689 x = -1;
690 xerrno = EINPROGRESS;
691 }
692
693 } else {
694 errno = 0;
695 errlen = sizeof(err);
696 x = getsockopt(sock, SOL_SOCKET, SO_ERROR, &err, &errlen);
697 if (x == 0)
698 xerrno = err;
699
700 #if _SQUID_SOLARIS_
701 /*
702 * Solaris 2.4's socket emulation doesn't allow you
703 * to determine the error from a failed non-blocking
704 * connect and just returns EPIPE. Create a fake
705 * error message for connect. -- fenner@parc.xerox.com
706 */
707 if (x < 0 && xerrno == EPIPE)
708 xerrno = ENOTCONN;
709 else
710 xerrno = errno;
711 #endif
712 }
713
714 Ip::Address::FreeAddr(AI);
715
716 errno = xerrno;
717 if (xerrno == 0 || xerrno == EISCONN)
718 status = Comm::OK;
719 else if (ignoreErrno(xerrno))
720 status = Comm::INPROGRESS;
721 else if (xerrno == EAFNOSUPPORT || xerrno == EINVAL)
722 return Comm::ERR_PROTOCOL;
723 else
724 return Comm::COMM_ERROR;
725
726 address.toStr(F->ipaddr, MAX_IPSTRLEN);
727
728 F->remote_port = address.port(); /* remote_port is HS */
729
730 if (status == Comm::OK) {
731 debugs(5, DBG_DATA, "comm_connect_addr: FD " << sock << " connected to " << address);
732 } else if (status == Comm::INPROGRESS) {
733 debugs(5, DBG_DATA, "comm_connect_addr: FD " << sock << " connection pending");
734 }
735
736 errno = xerrno;
737 return status;
738 }
739
740 void
741 commCallCloseHandlers(int fd)
742 {
743 fde *F = &fd_table[fd];
744 debugs(5, 5, "commCallCloseHandlers: FD " << fd);
745
746 while (F->closeHandler != nullptr) {
747 AsyncCall::Pointer call = F->closeHandler;
748 F->closeHandler = call->Next();
749 call->setNext(nullptr);
750 // If call is not canceled schedule it for execution else ignore it
751 if (!call->canceled()) {
752 debugs(5, 5, "commCallCloseHandlers: ch->handler=" << call);
753 // XXX: Without the following code, callback fd may be -1.
754 // typedef CommCloseCbParams Params;
755 // auto &params = GetCommParams<Params>(call);
756 // params.fd = fd;
757 ScheduleCallHere(call);
758 }
759 }
760 }
761
762 /// sets SO_LINGER socket(7) option
763 /// \param enabled -- whether linger will be active (sets linger::l_onoff)
764 static void
765 commConfigureLinger(const int fd, const OnOff enabled)
766 {
767 struct linger l = {};
768 l.l_onoff = (enabled == OnOff::on ? 1 : 0);
769 l.l_linger = 0; // how long to linger for, in seconds
770
771 fd_table[fd].flags.harshClosureRequested = (l.l_onoff && !l.l_linger); // close(2) sends TCP RST if true
772
773 if (setsockopt(fd, SOL_SOCKET, SO_LINGER, reinterpret_cast<char*>(&l), sizeof(l)) < 0) {
774 const auto xerrno = errno;
775 debugs(50, DBG_CRITICAL, "ERROR: Failed to set closure behavior (SO_LINGER) for FD " << fd << ": " << xstrerr(xerrno));
776 }
777 }
778
779 /**
780 * enable linger with time of 0 so that when the socket is
781 * closed, TCP generates a RESET
782 */
783 void
784 comm_reset_close(const Comm::ConnectionPointer &conn)
785 {
786 if (Comm::IsConnOpen(conn)) {
787 commConfigureLinger(conn->fd, OnOff::on);
788 debugs(5, 7, conn->id);
789 conn->close();
790 }
791 }
792
793 // Legacy close function.
794 void
795 old_comm_reset_close(int fd)
796 {
797 if (fd >= 0) {
798 commConfigureLinger(fd, OnOff::on);
799 comm_close(fd);
800 }
801 }
802
803 static void
804 commStartTlsClose(const int fd)
805 {
806 Security::SessionSendGoodbye(fd_table[fd].ssl);
807 }
808
809 static void
810 comm_close_complete(const int fd)
811 {
812 auto F = &fd_table[fd];
813 F->ssl.reset();
814 F->dynamicTlsContext.reset();
815 fd_close(fd); /* update fdstat */
816 close(fd);
817
818 ++ statCounter.syscalls.sock.closes;
819
820 /* When one connection closes, give accept() a chance, if need be */
821 CodeContext::Reset(); // exit FD-specific context
822 Comm::AcceptLimiter::Instance().kick();
823 }
824
825 /*
826 * Close the socket fd.
827 *
828 * + call write handlers with ERR_CLOSING
829 * + call read handlers with ERR_CLOSING
830 * + call closing handlers
831 *
832 * A deferred reader has no Comm read handler mentioned above. To stay in sync,
833 * such a reader must register a Comm closing handler.
834 */
835 void
836 _comm_close(int fd, char const *file, int line)
837 {
838 debugs(5, 3, "start closing FD " << fd << " by " << file << ":" << line);
839 assert(fd >= 0);
840 assert(fd < Squid_MaxFD);
841
842 fde *F = &fd_table[fd];
843
844 if (F->closing())
845 return;
846
847 /* XXX: is this obsolete behind F->closing() ? */
848 if ( (shutting_down || reconfiguring) && (!F->flags.open || F->type == FD_FILE))
849 return;
850
851 /* The following fails because ipc.c is doing calls to pipe() to create sockets! */
852 if (!isOpen(fd)) {
853 debugs(50, DBG_IMPORTANT, "ERROR: Squid BUG #3556: FD " << fd << " is not an open socket.");
854 // XXX: do we need to run close(fd) or fd_close(fd) here?
855 return;
856 }
857
858 assert(F->type != FD_FILE);
859
860 F->flags.close_request = true;
861
862 // We have caller's context and fde::codeContext. In the unlikely event they
863 // differ, it is not clear which context is more applicable to this closure.
864 // For simplicity sake, we remain in the caller's context while still
865 // allowing individual advanced callbacks to overwrite it.
866
867 if (F->ssl && !F->flags.harshClosureRequested) {
868 const auto startCall = asyncCall(5, 4, "commStartTlsClose",
869 callDialer(commStartTlsClose, fd));
870 ScheduleCallHere(startCall);
871 }
872
873 // a half-closed fd may lack a reader, so we stop monitoring explicitly
874 if (commHasHalfClosedMonitor(fd))
875 commStopHalfClosedMonitor(fd);
876 commUnsetFdTimeout(fd);
877
878 // notify read/write handlers after canceling select reservations, if any
879 if (COMMIO_FD_WRITECB(fd)->active()) {
880 Comm::SetSelect(fd, COMM_SELECT_WRITE, nullptr, nullptr, 0);
881 COMMIO_FD_WRITECB(fd)->finish(Comm::ERR_CLOSING, 0);
882 }
883 if (COMMIO_FD_READCB(fd)->active()) {
884 Comm::SetSelect(fd, COMM_SELECT_READ, nullptr, nullptr, 0);
885 COMMIO_FD_READCB(fd)->finish(Comm::ERR_CLOSING, 0);
886 }
887
888 #if USE_DELAY_POOLS
889 if (BandwidthBucket *bucket = BandwidthBucket::SelectBucket(F)) {
890 if (bucket->selectWaiting)
891 bucket->onFdClosed();
892 }
893 #endif
894
895 commCallCloseHandlers(fd);
896
897 comm_empty_os_read_buffers(fd);
898
899 // must use async call to wait for all callbacks
900 // scheduled before comm_close() to finish
901 const auto completeCall = asyncCall(5, 4, "comm_close_complete",
902 callDialer(comm_close_complete, fd));
903 ScheduleCallHere(completeCall);
904 }
905
906 /* Send a udp datagram to specified TO_ADDR. */
907 int
908 comm_udp_sendto(int fd,
909 const Ip::Address &to_addr,
910 const void *buf,
911 int len)
912 {
913 ++ statCounter.syscalls.sock.sendtos;
914
915 debugs(50, 3, "comm_udp_sendto: Attempt to send UDP packet to " << to_addr <<
916 " using FD " << fd << " using Port " << comm_local_port(fd) );
917
918 struct addrinfo *AI = nullptr;
919 to_addr.getAddrInfo(AI, fd_table[fd].sock_family);
920 int x = sendto(fd, buf, len, 0, AI->ai_addr, AI->ai_addrlen);
921 int xerrno = errno;
922 Ip::Address::FreeAddr(AI);
923
924 if (x >= 0) {
925 errno = xerrno; // restore for caller to use
926 return x;
927 }
928
929 #if _SQUID_LINUX_
930 if (ECONNREFUSED != xerrno)
931 #endif
932 debugs(50, DBG_IMPORTANT, MYNAME << "FD " << fd << ", (family=" << fd_table[fd].sock_family << ") " << to_addr << ": " << xstrerr(xerrno));
933
934 errno = xerrno; // restore for caller to use
935 return Comm::COMM_ERROR;
936 }
937
938 AsyncCall::Pointer
939 comm_add_close_handler(int fd, CLCB * handler, void *data)
940 {
941 debugs(5, 5, "comm_add_close_handler: FD " << fd << ", handler=" <<
942 handler << ", data=" << data);
943
944 AsyncCall::Pointer call=commCbCall(5,4, "SomeCloseHandler",
945 CommCloseCbPtrFun(handler, data));
946 comm_add_close_handler(fd, call);
947 return call;
948 }
949
950 void
951 comm_add_close_handler(int fd, AsyncCall::Pointer &call)
952 {
953 debugs(5, 5, "comm_add_close_handler: FD " << fd << ", AsyncCall=" << call);
954
955 /*TODO:Check for a similar scheduled AsyncCall*/
956 // for (c = fd_table[fd].closeHandler; c; c = c->next)
957 // assert(c->handler != handler || c->data != data);
958
959 // TODO: Consider enhancing AsyncCallList to support random-access close
960 // handlers, perhaps after upgrading the remaining legacy CLCB handlers.
961 call->setNext(fd_table[fd].closeHandler);
962
963 fd_table[fd].closeHandler = call;
964 }
965
966 // remove function-based close handler
967 void
968 comm_remove_close_handler(int fd, CLCB * handler, void *data)
969 {
970 assert(isOpen(fd));
971 /* Find handler in list */
972 debugs(5, 5, "comm_remove_close_handler: FD " << fd << ", handler=" <<
973 handler << ", data=" << data);
974
975 AsyncCall::Pointer p, prev = nullptr;
976 for (p = fd_table[fd].closeHandler; p != nullptr; prev = p, p = p->Next()) {
977 typedef CommCbFunPtrCallT<CommCloseCbPtrFun> Call;
978 const Call *call = dynamic_cast<const Call*>(p.getRaw());
979 if (!call) // method callbacks have their own comm_remove_close_handler
980 continue;
981
982 typedef CommCloseCbParams Params;
983 const Params &params = GetCommParams<Params>(p);
984 if (call->dialer.handler == handler && params.data == data)
985 break; /* This is our handler */
986 }
987
988 // comm_close removes all close handlers so our handler may be gone
989 if (p != nullptr) {
990 p->dequeue(fd_table[fd].closeHandler, prev);
991 p->cancel("comm_remove_close_handler");
992 }
993 }
994
995 // remove method-based close handler
996 void
997 comm_remove_close_handler(int fd, AsyncCall::Pointer &call)
998 {
999 assert(isOpen(fd));
1000 debugs(5, 5, "comm_remove_close_handler: FD " << fd << ", AsyncCall=" << call);
1001
1002 // comm_close removes all close handlers so our handler may be gone
1003 AsyncCall::Pointer p, prev = nullptr;
1004 for (p = fd_table[fd].closeHandler; p != nullptr && p != call; prev = p, p = p->Next());
1005
1006 if (p != nullptr)
1007 p->dequeue(fd_table[fd].closeHandler, prev);
1008 call->cancel("comm_remove_close_handler");
1009 }
1010
1011 static void
1012 commSetReuseAddr(int fd)
1013 {
1014 int on = 1;
1015 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)) < 0) {
1016 int xerrno = errno;
1017 debugs(50, DBG_IMPORTANT, MYNAME << "FD " << fd << ": " << xstrerr(xerrno));
1018 }
1019 }
1020
1021 static void
1022 commSetTcpRcvbuf(int fd, int size)
1023 {
1024 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (char *) &size, sizeof(size)) < 0) {
1025 int xerrno = errno;
1026 debugs(50, DBG_IMPORTANT, MYNAME << "FD " << fd << ", SIZE " << size << ": " << xstrerr(xerrno));
1027 }
1028 if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (char *) &size, sizeof(size)) < 0) {
1029 int xerrno = errno;
1030 debugs(50, DBG_IMPORTANT, MYNAME << "FD " << fd << ", SIZE " << size << ": " << xstrerr(xerrno));
1031 }
1032 #ifdef TCP_WINDOW_CLAMP
1033 if (setsockopt(fd, SOL_TCP, TCP_WINDOW_CLAMP, (char *) &size, sizeof(size)) < 0) {
1034 int xerrno = errno;
1035 debugs(50, DBG_IMPORTANT, MYNAME << "FD " << fd << ", SIZE " << size << ": " << xstrerr(xerrno));
1036 }
1037 #endif
1038 }
1039
1040 int
1041 commSetNonBlocking(int fd)
1042 {
1043 #if _SQUID_WINDOWS_
1044 int nonblocking = TRUE;
1045
1046 if (ioctl(fd, FIONBIO, &nonblocking) < 0) {
1047 int xerrno = errno;
1048 debugs(50, DBG_CRITICAL, MYNAME << "FD " << fd << ": " << xstrerr(xerrno) << " " << fd_table[fd].type);
1049 return Comm::COMM_ERROR;
1050 }
1051
1052 #else
1053 int flags;
1054 int dummy = 0;
1055
1056 if ((flags = fcntl(fd, F_GETFL, dummy)) < 0) {
1057 int xerrno = errno;
1058 debugs(50, DBG_CRITICAL, MYNAME << "FD " << fd << ": fcntl F_GETFL: " << xstrerr(xerrno));
1059 return Comm::COMM_ERROR;
1060 }
1061
1062 if (fcntl(fd, F_SETFL, flags | SQUID_NONBLOCK) < 0) {
1063 int xerrno = errno;
1064 debugs(50, DBG_CRITICAL, MYNAME << "FD " << fd << ": " << xstrerr(xerrno));
1065 return Comm::COMM_ERROR;
1066 }
1067 #endif
1068
1069 fd_table[fd].flags.nonblocking = true;
1070 return 0;
1071 }
1072
1073 int
1074 commUnsetNonBlocking(int fd)
1075 {
1076 #if _SQUID_WINDOWS_
1077 int nonblocking = FALSE;
1078
1079 if (ioctlsocket(fd, FIONBIO, (unsigned long *) &nonblocking) < 0) {
1080 #else
1081 int flags;
1082 int dummy = 0;
1083
1084 if ((flags = fcntl(fd, F_GETFL, dummy)) < 0) {
1085 int xerrno = errno;
1086 debugs(50, DBG_CRITICAL, MYNAME << "FD " << fd << ": fcntl F_GETFL: " << xstrerr(xerrno));
1087 return Comm::COMM_ERROR;
1088 }
1089
1090 if (fcntl(fd, F_SETFL, flags & (~SQUID_NONBLOCK)) < 0) {
1091 #endif
1092 int xerrno = errno;
1093 debugs(50, DBG_CRITICAL, MYNAME << "FD " << fd << ": " << xstrerr(xerrno));
1094 return Comm::COMM_ERROR;
1095 }
1096
1097 fd_table[fd].flags.nonblocking = false;
1098 return 0;
1099 }
1100
1101 void
1102 commSetCloseOnExec(int fd)
1103 {
1104 #ifdef FD_CLOEXEC
1105 int flags;
1106 int dummy = 0;
1107
1108 if ((flags = fcntl(fd, F_GETFD, dummy)) < 0) {
1109 int xerrno = errno;
1110 debugs(50, DBG_CRITICAL, MYNAME << "FD " << fd << ": fcntl F_GETFD: " << xstrerr(xerrno));
1111 return;
1112 }
1113
1114 if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) < 0) {
1115 int xerrno = errno;
1116 debugs(50, DBG_CRITICAL, "ERROR: " << MYNAME << "FD " << fd << ": set close-on-exec failed: " << xstrerr(xerrno));
1117 }
1118 #endif
1119 }
1120
1121 #ifdef TCP_NODELAY
1122 static void
1123 commSetTcpNoDelay(int fd)
1124 {
1125 int on = 1;
1126
1127 if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof(on)) < 0) {
1128 int xerrno = errno;
1129 debugs(50, DBG_IMPORTANT, MYNAME << "FD " << fd << ": " << xstrerr(xerrno));
1130 }
1131
1132 fd_table[fd].flags.nodelay = true;
1133 }
1134
1135 #endif
1136
1137 void
1138 comm_init(void)
1139 {
1140 assert(fd_table);
1141
1142 /* XXX account fd_table */
1143 /* Keep a few file descriptors free so that we don't run out of FD's
1144 * after accepting a client but before it opens a socket or a file.
1145 * Since Squid_MaxFD can be as high as several thousand, don't waste them */
1146 RESERVED_FD = min(100, Squid_MaxFD / 4);
1147
1148 TheHalfClosed = new DescriptorSet;
1149
1150 /* setup the select loop module */
1151 Comm::SelectLoopInit();
1152 }
1153
1154 void
1155 comm_exit(void)
1156 {
1157 delete TheHalfClosed;
1158 TheHalfClosed = nullptr;
1159 }
1160
1161 #if USE_DELAY_POOLS
1162 // called when the queue is done waiting for the client bucket to fill
1163 void
1164 commHandleWriteHelper(void * data)
1165 {
1166 CommQuotaQueue *queue = static_cast<CommQuotaQueue*>(data);
1167 assert(queue);
1168
1169 ClientInfo *clientInfo = queue->clientInfo;
1170 // ClientInfo invalidates queue if freed, so if we got here through,
1171 // evenAdd cbdata protections, everything should be valid and consistent
1172 assert(clientInfo);
1173 assert(clientInfo->hasQueue());
1174 assert(clientInfo->hasQueue(queue));
1175 assert(clientInfo->eventWaiting);
1176 clientInfo->eventWaiting = false;
1177
1178 do {
1179 clientInfo->writeOrDequeue();
1180 if (clientInfo->selectWaiting)
1181 return;
1182 } while (clientInfo->hasQueue());
1183
1184 debugs(77, 3, "emptied queue");
1185 }
1186
1187 void
1188 ClientInfo::writeOrDequeue()
1189 {
1190 assert(!selectWaiting);
1191 const auto head = quotaPeekFd();
1192 const auto &headFde = fd_table[head];
1193 CallBack(headFde.codeContext, [&] {
1194 const auto ccb = COMMIO_FD_WRITECB(head);
1195 // check that the head descriptor is still relevant
1196 if (headFde.clientInfo == this &&
1197 quotaPeekReserv() == ccb->quotaQueueReserv &&
1198 !headFde.closing()) {
1199
1200 // wait for the head descriptor to become ready for writing
1201 Comm::SetSelect(head, COMM_SELECT_WRITE, Comm::HandleWrite, ccb, 0);
1202 selectWaiting = true;
1203 } else {
1204 quotaDequeue(); // remove the no longer relevant descriptor
1205 }
1206 });
1207 }
1208
1209 bool
1210 ClientInfo::hasQueue() const
1211 {
1212 assert(quotaQueue);
1213 return !quotaQueue->empty();
1214 }
1215
1216 bool
1217 ClientInfo::hasQueue(const CommQuotaQueue *q) const
1218 {
1219 assert(quotaQueue);
1220 return quotaQueue == q;
1221 }
1222
1223 /// returns the first descriptor to be dequeued
1224 int
1225 ClientInfo::quotaPeekFd() const
1226 {
1227 assert(quotaQueue);
1228 return quotaQueue->front();
1229 }
1230
1231 /// returns the reservation ID of the first descriptor to be dequeued
1232 unsigned int
1233 ClientInfo::quotaPeekReserv() const
1234 {
1235 assert(quotaQueue);
1236 return quotaQueue->outs + 1;
1237 }
1238
1239 /// queues a given fd, creating the queue if necessary; returns reservation ID
1240 unsigned int
1241 ClientInfo::quotaEnqueue(int fd)
1242 {
1243 assert(quotaQueue);
1244 return quotaQueue->enqueue(fd);
1245 }
1246
1247 /// removes queue head
1248 void
1249 ClientInfo::quotaDequeue()
1250 {
1251 assert(quotaQueue);
1252 quotaQueue->dequeue();
1253 }
1254
1255 void
1256 ClientInfo::kickQuotaQueue()
1257 {
1258 if (!eventWaiting && !selectWaiting && hasQueue()) {
1259 // wait at least a second if the bucket is empty
1260 const double delay = (bucketLevel < 1.0) ? 1.0 : 0.0;
1261 eventAdd("commHandleWriteHelper", &commHandleWriteHelper,
1262 quotaQueue, delay, 0, true);
1263 eventWaiting = true;
1264 }
1265 }
1266
1267 /// calculates how much to write for a single dequeued client
1268 int
1269 ClientInfo::quota()
1270 {
1271 /* If we have multiple clients and give full bucketSize to each client then
1272 * clt1 may often get a lot more because clt1->clt2 time distance in the
1273 * select(2) callback order may be a lot smaller than cltN->clt1 distance.
1274 * We divide quota evenly to be more fair. */
1275
1276 if (!rationedCount) {
1277 rationedCount = quotaQueue->size() + 1;
1278
1279 // The delay in ration recalculation _temporary_ deprives clients from
1280 // bytes that should have trickled in while rationedCount was positive.
1281 refillBucket();
1282
1283 // Rounding errors do not accumulate here, but we round down to avoid
1284 // negative bucket sizes after write with rationedCount=1.
1285 rationedQuota = static_cast<int>(floor(bucketLevel/rationedCount));
1286 debugs(77,5, "new rationedQuota: " << rationedQuota <<
1287 '*' << rationedCount);
1288 }
1289
1290 --rationedCount;
1291 debugs(77,7, "rationedQuota: " << rationedQuota <<
1292 " rations remaining: " << rationedCount);
1293
1294 // update 'last seen' time to prevent clientdb GC from dropping us
1295 last_seen = squid_curtime;
1296 return rationedQuota;
1297 }
1298
1299 bool
1300 ClientInfo::applyQuota(int &nleft, Comm::IoCallback *state)
1301 {
1302 assert(hasQueue());
1303 assert(quotaPeekFd() == state->conn->fd);
1304 quotaDequeue(); // we will write or requeue below
1305 if (nleft > 0 && !BandwidthBucket::applyQuota(nleft, state)) {
1306 state->quotaQueueReserv = quotaEnqueue(state->conn->fd);
1307 kickQuotaQueue();
1308 return false;
1309 }
1310 return true;
1311 }
1312
1313 void
1314 ClientInfo::scheduleWrite(Comm::IoCallback *state)
1315 {
1316 if (writeLimitingActive) {
1317 state->quotaQueueReserv = quotaEnqueue(state->conn->fd);
1318 kickQuotaQueue();
1319 }
1320 }
1321
1322 void
1323 ClientInfo::onFdClosed()
1324 {
1325 BandwidthBucket::onFdClosed();
1326 // kick queue or it will get stuck as commWriteHandle is not called
1327 kickQuotaQueue();
1328 }
1329
1330 void
1331 ClientInfo::reduceBucket(const int len)
1332 {
1333 if (len > 0)
1334 BandwidthBucket::reduceBucket(len);
1335 // even if we wrote nothing, we were served; give others a chance
1336 kickQuotaQueue();
1337 }
1338
1339 void
1340 ClientInfo::setWriteLimiter(const int aWriteSpeedLimit, const double anInitialBurst, const double aHighWatermark)
1341 {
1342 debugs(77,5, "Write limits for " << (const char*)key <<
1343 " speed=" << aWriteSpeedLimit << " burst=" << anInitialBurst <<
1344 " highwatermark=" << aHighWatermark);
1345
1346 // set or possibly update traffic shaping parameters
1347 writeLimitingActive = true;
1348 writeSpeedLimit = aWriteSpeedLimit;
1349 bucketSizeLimit = aHighWatermark;
1350
1351 // but some members should only be set once for a newly activated bucket
1352 if (firstTimeConnection) {
1353 firstTimeConnection = false;
1354
1355 assert(!selectWaiting);
1356 assert(!quotaQueue);
1357 quotaQueue = new CommQuotaQueue(this);
1358
1359 bucketLevel = anInitialBurst;
1360 prevTime = current_dtime;
1361 }
1362 }
1363
1364 CommQuotaQueue::CommQuotaQueue(ClientInfo *info): clientInfo(info),
1365 ins(0), outs(0)
1366 {
1367 assert(clientInfo);
1368 }
1369
1370 CommQuotaQueue::~CommQuotaQueue()
1371 {
1372 assert(!clientInfo); // ClientInfo should clear this before destroying us
1373 }
1374
1375 /// places the given fd at the end of the queue; returns reservation ID
1376 unsigned int
1377 CommQuotaQueue::enqueue(int fd)
1378 {
1379 debugs(77,5, "clt" << (const char*)clientInfo->key <<
1380 ": FD " << fd << " with qqid" << (ins+1) << ' ' << fds.size());
1381 fds.push_back(fd);
1382 fd_table[fd].codeContext = CodeContext::Current();
1383 return ++ins;
1384 }
1385
1386 /// removes queue head
1387 void
1388 CommQuotaQueue::dequeue()
1389 {
1390 assert(!fds.empty());
1391 debugs(77,5, "clt" << (const char*)clientInfo->key <<
1392 ": FD " << fds.front() << " with qqid" << (outs+1) << ' ' <<
1393 fds.size());
1394 fds.pop_front();
1395 ++outs;
1396 }
1397 #endif /* USE_DELAY_POOLS */
1398
1399 /*
1400 * hm, this might be too general-purpose for all the places we'd
1401 * like to use it.
1402 */
1403 int
1404 ignoreErrno(int ierrno)
1405 {
1406 switch (ierrno) {
1407
1408 case EINPROGRESS:
1409
1410 case EWOULDBLOCK:
1411 #if EAGAIN != EWOULDBLOCK
1412
1413 case EAGAIN:
1414 #endif
1415
1416 case EALREADY:
1417
1418 case EINTR:
1419 #ifdef ERESTART
1420
1421 case ERESTART:
1422 #endif
1423
1424 return 1;
1425
1426 default:
1427 return 0;
1428 }
1429
1430 /* NOTREACHED */
1431 }
1432
1433 void
1434 commCloseAllSockets(void)
1435 {
1436 int fd;
1437 fde *F = nullptr;
1438
1439 for (fd = 0; fd <= Biggest_FD; ++fd) {
1440 F = &fd_table[fd];
1441
1442 if (!F->flags.open)
1443 continue;
1444
1445 if (F->type != FD_SOCKET)
1446 continue;
1447
1448 if (F->flags.ipc) /* don't close inter-process sockets */
1449 continue;
1450
1451 if (F->timeoutHandler != nullptr) {
1452 AsyncCall::Pointer callback = F->timeoutHandler;
1453 F->timeoutHandler = nullptr;
1454 debugs(5, 5, "commCloseAllSockets: FD " << fd << ": Calling timeout handler");
1455 ScheduleCallHere(callback);
1456 } else {
1457 debugs(5, 5, "commCloseAllSockets: FD " << fd << ": calling comm_reset_close()");
1458 old_comm_reset_close(fd);
1459 }
1460 }
1461 }
1462
1463 static bool
1464 AlreadyTimedOut(fde *F)
1465 {
1466 if (!F->flags.open)
1467 return true;
1468
1469 if (F->timeout == 0)
1470 return true;
1471
1472 if (F->timeout > squid_curtime)
1473 return true;
1474
1475 return false;
1476 }
1477
1478 static bool
1479 writeTimedOut(int fd)
1480 {
1481 if (!COMMIO_FD_WRITECB(fd)->active())
1482 return false;
1483
1484 if ((squid_curtime - fd_table[fd].writeStart) < Config.Timeout.write)
1485 return false;
1486
1487 return true;
1488 }
1489
1490 void
1491 checkTimeouts(void)
1492 {
1493 int fd;
1494 fde *F = nullptr;
1495 AsyncCall::Pointer callback;
1496
1497 for (fd = 0; fd <= Biggest_FD; ++fd) {
1498 F = &fd_table[fd];
1499
1500 if (writeTimedOut(fd)) {
1501 // We have an active write callback and we are timed out
1502 CodeContext::Reset(F->codeContext);
1503 debugs(5, 5, "checkTimeouts: FD " << fd << " auto write timeout");
1504 Comm::SetSelect(fd, COMM_SELECT_WRITE, nullptr, nullptr, 0);
1505 COMMIO_FD_WRITECB(fd)->finish(Comm::COMM_ERROR, ETIMEDOUT);
1506 CodeContext::Reset();
1507 continue;
1508 #if USE_DELAY_POOLS
1509 } else if (F->writeQuotaHandler != nullptr && COMMIO_FD_WRITECB(fd)->conn != nullptr) {
1510 // TODO: Move and extract quota() call to place it inside F->codeContext.
1511 if (!F->writeQuotaHandler->selectWaiting && F->writeQuotaHandler->quota() && !F->closing()) {
1512 CodeContext::Reset(F->codeContext);
1513 F->writeQuotaHandler->selectWaiting = true;
1514 Comm::SetSelect(fd, COMM_SELECT_WRITE, Comm::HandleWrite, COMMIO_FD_WRITECB(fd), 0);
1515 CodeContext::Reset();
1516 }
1517 continue;
1518 #endif
1519 }
1520 else if (AlreadyTimedOut(F))
1521 continue;
1522
1523 CodeContext::Reset(F->codeContext);
1524 debugs(5, 5, "checkTimeouts: FD " << fd << " Expired");
1525
1526 if (F->timeoutHandler != nullptr) {
1527 debugs(5, 5, "checkTimeouts: FD " << fd << ": Call timeout handler");
1528 callback = F->timeoutHandler;
1529 F->timeoutHandler = nullptr;
1530 ScheduleCallHere(callback);
1531 } else {
1532 debugs(5, 5, "checkTimeouts: FD " << fd << ": Forcing comm_close()");
1533 comm_close(fd);
1534 }
1535
1536 CodeContext::Reset();
1537 }
1538 }
1539
1540 /// Start waiting for a possibly half-closed connection to close
1541 // by scheduling a read callback to a monitoring handler that
1542 // will close the connection on read errors.
1543 void
1544 commStartHalfClosedMonitor(int fd)
1545 {
1546 debugs(5, 5, "adding FD " << fd << " to " << *TheHalfClosed);
1547 assert(isOpen(fd) && !commHasHalfClosedMonitor(fd));
1548 (void)TheHalfClosed->add(fd); // could also assert the result
1549 fd_table[fd].codeContext = CodeContext::Current();
1550 commPlanHalfClosedCheck(); // may schedule check if we added the first FD
1551 }
1552
1553 static
1554 void
1555 commPlanHalfClosedCheck()
1556 {
1557 if (!WillCheckHalfClosed && !TheHalfClosed->empty()) {
1558 eventAdd("commHalfClosedCheck", &commHalfClosedCheck, nullptr, 1.0, 1);
1559 WillCheckHalfClosed = true;
1560 }
1561 }
1562
1563 /// iterates over all descriptors that may need half-closed tests and
1564 /// calls comm_read for those that do; re-schedules the check if needed
1565 static
1566 void
1567 commHalfClosedCheck(void *)
1568 {
1569 debugs(5, 5, "checking " << *TheHalfClosed);
1570
1571 typedef DescriptorSet::const_iterator DSCI;
1572 const DSCI end = TheHalfClosed->end();
1573 for (DSCI i = TheHalfClosed->begin(); i != end; ++i) {
1574 Comm::ConnectionPointer c = new Comm::Connection; // XXX: temporary. make HalfClosed a list of these.
1575 c->fd = *i;
1576 if (!fd_table[c->fd].halfClosedReader) { // not reading already
1577 CallBack(fd_table[c->fd].codeContext, [&c] {
1578 AsyncCall::Pointer call = commCbCall(5,4, "commHalfClosedReader",
1579 CommIoCbPtrFun(&commHalfClosedReader, nullptr));
1580 Comm::Read(c, call);
1581 fd_table[c->fd].halfClosedReader = call;
1582 });
1583 } else
1584 c->fd = -1; // XXX: temporary. prevent c replacement erase closing listed FD
1585 }
1586
1587 WillCheckHalfClosed = false; // as far as we know
1588 commPlanHalfClosedCheck(); // may need to check again
1589 }
1590
1591 /// checks whether we are waiting for possibly half-closed connection to close
1592 // We are monitoring if the read handler for the fd is the monitoring handler.
1593 bool
1594 commHasHalfClosedMonitor(int fd)
1595 {
1596 return TheHalfClosed->has(fd);
1597 }
1598
1599 /// stop waiting for possibly half-closed connection to close
1600 void
1601 commStopHalfClosedMonitor(int const fd)
1602 {
1603 debugs(5, 5, "removing FD " << fd << " from " << *TheHalfClosed);
1604
1605 // cancel the read if one was scheduled
1606 AsyncCall::Pointer reader = fd_table[fd].halfClosedReader;
1607 if (reader != nullptr)
1608 Comm::ReadCancel(fd, reader);
1609 fd_table[fd].halfClosedReader = nullptr;
1610
1611 TheHalfClosed->del(fd);
1612 }
1613
1614 /// I/O handler for the possibly half-closed connection monitoring code
1615 static void
1616 commHalfClosedReader(const Comm::ConnectionPointer &conn, char *, size_t size, Comm::Flag flag, int, void *)
1617 {
1618 // there cannot be more data coming in on half-closed connections
1619 assert(size == 0);
1620 assert(conn != nullptr);
1621 assert(commHasHalfClosedMonitor(conn->fd)); // or we would have canceled the read
1622
1623 fd_table[conn->fd].halfClosedReader = nullptr; // done reading, for now
1624
1625 // nothing to do if fd is being closed
1626 if (flag == Comm::ERR_CLOSING)
1627 return;
1628
1629 // if read failed, close the connection
1630 if (flag != Comm::OK) {
1631 debugs(5, 3, "closing " << conn);
1632 conn->close();
1633 return;
1634 }
1635
1636 // continue waiting for close or error
1637 commPlanHalfClosedCheck(); // make sure this fd will be checked again
1638 }
1639
1640 int
1641 CommSelectEngine::checkEvents(int timeout)
1642 {
1643 static time_t last_timeout = 0;
1644
1645 /* No, this shouldn't be here. But it shouldn't be in each comm handler. -adrian */
1646 if (squid_curtime > last_timeout) {
1647 last_timeout = squid_curtime;
1648 checkTimeouts();
1649 }
1650
1651 switch (Comm::DoSelect(timeout)) {
1652
1653 case Comm::OK:
1654
1655 case Comm::TIMEOUT:
1656 return 0;
1657
1658 case Comm::IDLE:
1659
1660 case Comm::SHUTDOWN:
1661 return EVENT_IDLE;
1662
1663 case Comm::COMM_ERROR:
1664 return EVENT_ERROR;
1665
1666 default:
1667 fatal_dump("comm.cc: Internal error -- this should never happen.");
1668 return EVENT_ERROR;
1669 };
1670 }
1671
1672 /// Create a unix-domain socket (UDS) that only supports FD_MSGHDR I/O.
1673 int
1674 comm_open_uds(int sock_type,
1675 int proto,
1676 struct sockaddr_un* addr,
1677 int flags)
1678 {
1679 // TODO: merge with comm_openex() when Ip::Address becomes NetAddress
1680
1681 int new_socket;
1682
1683 /* Create socket for accepting new connections. */
1684 ++ statCounter.syscalls.sock.sockets;
1685
1686 /* Setup the socket addrinfo details for use */
1687 struct addrinfo AI;
1688 AI.ai_flags = 0;
1689 AI.ai_family = PF_UNIX;
1690 AI.ai_socktype = sock_type;
1691 AI.ai_protocol = proto;
1692 AI.ai_addrlen = SUN_LEN(addr);
1693 AI.ai_addr = (sockaddr*)addr;
1694 AI.ai_canonname = nullptr;
1695 AI.ai_next = nullptr;
1696
1697 debugs(50, 3, "Attempt open socket for: " << addr->sun_path);
1698
1699 if ((new_socket = socket(AI.ai_family, AI.ai_socktype, AI.ai_protocol)) < 0) {
1700 int xerrno = errno;
1701 /* Increase the number of reserved fd's if calls to socket()
1702 * are failing because the open file table is full. This
1703 * limits the number of simultaneous clients */
1704
1705 if (limitError(xerrno)) {
1706 debugs(50, DBG_IMPORTANT, MYNAME << "socket failure: " << xstrerr(xerrno));
1707 fdAdjustReserved();
1708 } else {
1709 debugs(50, DBG_CRITICAL, MYNAME << "socket failure: " << xstrerr(xerrno));
1710 }
1711 return -1;
1712 }
1713
1714 debugs(50, 3, "Opened UDS FD " << new_socket << " : family=" << AI.ai_family << ", type=" << AI.ai_socktype << ", protocol=" << AI.ai_protocol);
1715
1716 /* update fdstat */
1717 debugs(50, 5, "FD " << new_socket << " is a new socket");
1718
1719 assert(!isOpen(new_socket));
1720 fd_open(new_socket, FD_MSGHDR, addr->sun_path);
1721
1722 fd_table[new_socket].sock_family = AI.ai_family;
1723
1724 if (!(flags & COMM_NOCLOEXEC))
1725 commSetCloseOnExec(new_socket);
1726
1727 if (flags & COMM_REUSEADDR)
1728 commSetReuseAddr(new_socket);
1729
1730 if (flags & COMM_NONBLOCKING) {
1731 if (commSetNonBlocking(new_socket) != Comm::OK) {
1732 comm_close(new_socket);
1733 return -1;
1734 }
1735 }
1736
1737 if (flags & COMM_DOBIND) {
1738 if (commBind(new_socket, AI) != Comm::OK) {
1739 comm_close(new_socket);
1740 return -1;
1741 }
1742 }
1743
1744 #ifdef TCP_NODELAY
1745 if (sock_type == SOCK_STREAM)
1746 commSetTcpNoDelay(new_socket);
1747
1748 #endif
1749
1750 if (Config.tcpRcvBufsz > 0 && sock_type == SOCK_STREAM)
1751 commSetTcpRcvbuf(new_socket, Config.tcpRcvBufsz);
1752
1753 return new_socket;
1754 }
1755