2 * BIRD Internet Routing Daemon -- Unix I/O
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
7 * Can be freely distributed and used under the terms of the GNU GPL.
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
20 #include <sys/types.h>
21 #include <sys/socket.h>
29 #include <netinet/in.h>
30 #include <netinet/tcp.h>
31 #include <netinet/udp.h>
32 #include <netinet/icmp6.h>
34 #include "nest/bird.h"
35 #include "lib/lists.h"
36 #include "lib/resource.h"
37 #include "lib/socket.h"
38 #include "lib/event.h"
39 #include "lib/timer.h"
40 #include "lib/string.h"
41 #include "nest/iface.h"
42 #include "conf/conf.h"
44 #include "sysdep/unix/unix.h"
45 #include CONFIG_INCLUDE_SYSIO_H
47 /* Maximum number of calls of tx handler for one socket in one
48 * poll iteration. Should be small enough to not monopolize CPU by
49 * one protocol instance.
53 /* Maximum number of calls of rx handler for all sockets in one poll
54 iteration. RX callbacks are often much more costly so we limit
55 this to gen small latencies */
56 #define MAX_RX_STEPS 4
71 struct rfile
*a
= (struct rfile
*) r
;
79 struct rfile
*a
= (struct rfile
*) r
;
81 debug("(FILE *%p)\n", a
->f
);
84 static struct resclass rf_class
= {
94 rf_open(pool
*p
, char *name
, char *mode
)
96 FILE *f
= fopen(name
, mode
);
101 struct rfile
*r
= ralloc(p
, &rf_class
);
107 rf_file(struct rfile
*f
)
113 rf_fileno(struct rfile
*f
)
126 times_init(struct timeloop
*loop
)
131 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
133 die("Monotonic clock is missing");
135 if ((ts
.tv_sec
< 0) || (((u64
) ts
.tv_sec
) > ((u64
) 1 << 40)))
136 log(L_WARN
"Monotonic clock is crazy");
138 loop
->last_time
= ts
.tv_sec S
+ ts
.tv_nsec NS
;
143 times_update(struct timeloop
*loop
)
148 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
150 die("clock_gettime: %m");
152 btime new_time
= ts
.tv_sec S
+ ts
.tv_nsec NS
;
154 if (new_time
< loop
->last_time
)
155 log(L_ERR
"Monotonic clock is broken");
157 loop
->last_time
= new_time
;
162 times_update_real_time(struct timeloop
*loop
)
167 rv
= clock_gettime(CLOCK_REALTIME
, &ts
);
169 die("clock_gettime: %m");
171 loop
->real_time
= ts
.tv_sec S
+ ts
.tv_nsec NS
;
178 * Socket resources represent network connections. Their data structure (&socket)
179 * contains a lot of fields defining the exact type of the socket, the local and
180 * remote addresses and ports, pointers to socket buffers and finally pointers to
181 * hook functions to be called when new data have arrived to the receive buffer
182 * (@rx_hook), when the contents of the transmit buffer have been transmitted
183 * (@tx_hook) and when an error or connection close occurs (@err_hook).
185 * Freeing of sockets from inside socket hooks is perfectly safe.
189 #define SOL_IP IPPROTO_IP
193 #define SOL_IPV6 IPPROTO_IPV6
197 #define SOL_ICMPV6 IPPROTO_ICMPV6
202 * Sockaddr helper functions
205 static inline int UNUSED
sockaddr_length(int af
)
206 { return (af
== AF_INET
) ? sizeof(struct sockaddr_in
) : sizeof(struct sockaddr_in6
); }
209 sockaddr_fill4(struct sockaddr_in
*sa
, ip_addr a
, uint port
)
211 memset(sa
, 0, sizeof(struct sockaddr_in
));
212 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
213 sa
->sin_len
= sizeof(struct sockaddr_in
);
215 sa
->sin_family
= AF_INET
;
216 sa
->sin_port
= htons(port
);
217 sa
->sin_addr
= ipa_to_in4(a
);
221 sockaddr_fill6(struct sockaddr_in6
*sa
, ip_addr a
, struct iface
*ifa
, uint port
)
223 memset(sa
, 0, sizeof(struct sockaddr_in6
));
225 sa
->sin6_len
= sizeof(struct sockaddr_in6
);
227 sa
->sin6_family
= AF_INET6
;
228 sa
->sin6_port
= htons(port
);
229 sa
->sin6_flowinfo
= 0;
230 sa
->sin6_addr
= ipa_to_in6(a
);
232 if (ifa
&& ipa_is_link_local(a
))
233 sa
->sin6_scope_id
= ifa
->index
;
237 sockaddr_fill(sockaddr
*sa
, int af
, ip_addr a
, struct iface
*ifa
, uint port
)
240 sockaddr_fill4((struct sockaddr_in
*) sa
, a
, port
);
241 else if (af
== AF_INET6
)
242 sockaddr_fill6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
248 sockaddr_read4(struct sockaddr_in
*sa
, ip_addr
*a
, uint
*port
)
250 *port
= ntohs(sa
->sin_port
);
251 *a
= ipa_from_in4(sa
->sin_addr
);
255 sockaddr_read6(struct sockaddr_in6
*sa
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
257 *port
= ntohs(sa
->sin6_port
);
258 *a
= ipa_from_in6(sa
->sin6_addr
);
260 if (ifa
&& ipa_is_link_local(*a
))
261 *ifa
= if_find_by_index(sa
->sin6_scope_id
);
265 sockaddr_read(sockaddr
*sa
, int af
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
267 if (sa
->sa
.sa_family
!= af
)
271 sockaddr_read4((struct sockaddr_in
*) sa
, a
, port
);
272 else if (af
== AF_INET6
)
273 sockaddr_read6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
287 * IPv6 multicast syscalls
290 /* Fortunately standardized in RFC 3493 */
292 #define INIT_MREQ6(maddr,ifa) \
293 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
296 sk_setup_multicast6(sock
*s
)
298 int index
= s
->iface
->index
;
302 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_IF
, &index
, sizeof(index
)) < 0)
303 ERR("IPV6_MULTICAST_IF");
305 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
306 ERR("IPV6_MULTICAST_HOPS");
308 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_LOOP
, &n
, sizeof(n
)) < 0)
309 ERR("IPV6_MULTICAST_LOOP");
315 sk_join_group6(sock
*s
, ip_addr maddr
)
317 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
319 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_JOIN_GROUP
, &mr
, sizeof(mr
)) < 0)
320 ERR("IPV6_JOIN_GROUP");
326 sk_leave_group6(sock
*s
, ip_addr maddr
)
328 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
330 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_LEAVE_GROUP
, &mr
, sizeof(mr
)) < 0)
331 ERR("IPV6_LEAVE_GROUP");
338 * IPv6 packet control messages
341 /* Also standardized, in RFC 3542 */
344 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
345 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
346 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
347 * RFC and we use IPV6_PKTINFO.
349 #ifndef IPV6_RECVPKTINFO
350 #define IPV6_RECVPKTINFO IPV6_PKTINFO
353 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
355 #ifndef IPV6_RECVHOPLIMIT
356 #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
360 #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
361 #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
364 sk_request_cmsg6_pktinfo(sock
*s
)
368 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVPKTINFO
, &y
, sizeof(y
)) < 0)
369 ERR("IPV6_RECVPKTINFO");
375 sk_request_cmsg6_ttl(sock
*s
)
379 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVHOPLIMIT
, &y
, sizeof(y
)) < 0)
380 ERR("IPV6_RECVHOPLIMIT");
386 sk_process_cmsg6_pktinfo(sock
*s
, struct cmsghdr
*cm
)
388 if (cm
->cmsg_type
== IPV6_PKTINFO
)
390 struct in6_pktinfo
*pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
391 s
->laddr
= ipa_from_in6(pi
->ipi6_addr
);
392 s
->lifindex
= pi
->ipi6_ifindex
;
397 sk_process_cmsg6_ttl(sock
*s
, struct cmsghdr
*cm
)
399 if (cm
->cmsg_type
== IPV6_HOPLIMIT
)
400 s
->rcv_ttl
= * (int *) CMSG_DATA(cm
);
404 sk_prepare_cmsgs6(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
407 struct in6_pktinfo
*pi
;
410 msg
->msg_control
= cbuf
;
411 msg
->msg_controllen
= cbuflen
;
413 cm
= CMSG_FIRSTHDR(msg
);
414 cm
->cmsg_level
= SOL_IPV6
;
415 cm
->cmsg_type
= IPV6_PKTINFO
;
416 cm
->cmsg_len
= CMSG_LEN(sizeof(*pi
));
417 controllen
+= CMSG_SPACE(sizeof(*pi
));
419 pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
420 pi
->ipi6_ifindex
= s
->iface
? s
->iface
->index
: 0;
421 pi
->ipi6_addr
= ipa_to_in6(s
->saddr
);
423 msg
->msg_controllen
= controllen
;
428 * Miscellaneous socket syscalls
432 sk_set_ttl4(sock
*s
, int ttl
)
434 if (setsockopt(s
->fd
, SOL_IP
, IP_TTL
, &ttl
, sizeof(ttl
)) < 0)
441 sk_set_ttl6(sock
*s
, int ttl
)
443 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_UNICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
444 ERR("IPV6_UNICAST_HOPS");
450 sk_set_tos4(sock
*s
, int tos
)
452 if (setsockopt(s
->fd
, SOL_IP
, IP_TOS
, &tos
, sizeof(tos
)) < 0)
459 sk_set_tos6(sock
*s
, int tos
)
461 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_TCLASS
, &tos
, sizeof(tos
)) < 0)
468 sk_set_high_port(sock
*s UNUSED
)
470 /* Port range setting is optional, ignore it if not supported */
475 int range
= IP_PORTRANGE_HIGH
;
476 if (setsockopt(s
->fd
, SOL_IP
, IP_PORTRANGE
, &range
, sizeof(range
)) < 0)
481 #ifdef IPV6_PORTRANGE
484 int range
= IPV6_PORTRANGE_HIGH
;
485 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_PORTRANGE
, &range
, sizeof(range
)) < 0)
486 ERR("IPV6_PORTRANGE");
494 sk_skip_ip_header(byte
*pkt
, int *len
)
496 if ((*len
< 20) || ((*pkt
& 0xf0) != 0x40))
499 int hlen
= (*pkt
& 0x0f) * 4;
500 if ((hlen
< 20) || (hlen
> *len
))
508 sk_rx_buffer(sock
*s
, int *len
)
510 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
))
511 return sk_skip_ip_header(s
->rbuf
, len
);
518 * Public socket functions
522 * sk_setup_multicast - enable multicast for given socket
525 * Prepare transmission of multicast packets for given datagram socket.
526 * The socket must have defined @iface.
528 * Result: 0 for success, -1 for an error.
532 sk_setup_multicast(sock
*s
)
537 return sk_setup_multicast4(s
);
539 return sk_setup_multicast6(s
);
543 * sk_join_group - join multicast group for given socket
545 * @maddr: multicast address
547 * Join multicast group for given datagram socket and associated interface.
548 * The socket must have defined @iface.
550 * Result: 0 for success, -1 for an error.
554 sk_join_group(sock
*s
, ip_addr maddr
)
557 return sk_join_group4(s
, maddr
);
559 return sk_join_group6(s
, maddr
);
563 * sk_leave_group - leave multicast group for given socket
565 * @maddr: multicast address
567 * Leave multicast group for given datagram socket and associated interface.
568 * The socket must have defined @iface.
570 * Result: 0 for success, -1 for an error.
574 sk_leave_group(sock
*s
, ip_addr maddr
)
577 return sk_leave_group4(s
, maddr
);
579 return sk_leave_group6(s
, maddr
);
583 * sk_setup_broadcast - enable broadcast for given socket
586 * Allow reception and transmission of broadcast packets for given datagram
587 * socket. The socket must have defined @iface. For transmission, packets should
588 * be send to @brd address of @iface.
590 * Result: 0 for success, -1 for an error.
594 sk_setup_broadcast(sock
*s
)
598 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BROADCAST
, &y
, sizeof(y
)) < 0)
605 * sk_set_ttl - set transmit TTL for given socket
609 * Set TTL for already opened connections when TTL was not set before. Useful
610 * for accepted connections when different ones should have different TTL.
612 * Result: 0 for success, -1 for an error.
616 sk_set_ttl(sock
*s
, int ttl
)
621 return sk_set_ttl4(s
, ttl
);
623 return sk_set_ttl6(s
, ttl
);
627 * sk_set_min_ttl - set minimal accepted TTL for given socket
631 * Set minimal accepted TTL for given socket. Can be used for TTL security.
634 * Result: 0 for success, -1 for an error.
638 sk_set_min_ttl(sock
*s
, int ttl
)
641 return sk_set_min_ttl4(s
, ttl
);
643 return sk_set_min_ttl6(s
, ttl
);
648 * sk_set_md5_auth - add / remove MD5 security association for given socket
650 * @local: IP address of local side
651 * @remote: IP address of remote side
652 * @ifa: Interface for link-local IP address
653 * @passwd: Password used for MD5 authentication
654 * @setkey: Update also system SA/SP database
656 * In TCP MD5 handling code in kernel, there is a set of security associations
657 * used for choosing password and other authentication parameters according to
658 * the local and remote address. This function is useful for listening socket,
659 * for active sockets it may be enough to set s->password field.
661 * When called with passwd != NULL, the new pair is added,
662 * When called with passwd == NULL, the existing pair is removed.
664 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
665 * stored in global SA/SP database (but the behavior also must be enabled on
666 * per-socket basis). In case of multiple sockets to the same neighbor, the
667 * socket-specific state must be configured for each socket while global state
668 * just once per src-dst pair. The @setkey argument controls whether the global
669 * state (SA/SP database) is also updated.
671 * Result: 0 for success, -1 for an error.
675 sk_set_md5_auth(sock
*s
, ip_addr local
, ip_addr remote
, struct iface
*ifa
, char *passwd
, int setkey
)
680 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
684 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
685 * kernel will automatically fill it for outgoing packets and check it for
686 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
687 * known to the kernel.
689 * Result: 0 for success, -1 for an error.
693 sk_set_ipv6_checksum(sock
*s
, int offset
)
695 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_CHECKSUM
, &offset
, sizeof(offset
)) < 0)
696 ERR("IPV6_CHECKSUM");
702 sk_set_icmp6_filter(sock
*s
, int p1
, int p2
)
704 /* a bit of lame interface, but it is here only for Radv */
705 struct icmp6_filter f
;
707 ICMP6_FILTER_SETBLOCKALL(&f
);
708 ICMP6_FILTER_SETPASS(p1
, &f
);
709 ICMP6_FILTER_SETPASS(p2
, &f
);
711 if (setsockopt(s
->fd
, SOL_ICMPV6
, ICMP6_FILTER
, &f
, sizeof(f
)) < 0)
718 sk_log_error(sock
*s
, const char *p
)
720 log(L_ERR
"%s: Socket error: %s%#m", p
, s
->err
);
725 * Actual struct birdsock code
728 static list sock_list
;
729 static struct birdsock
*current_sock
;
730 static struct birdsock
*stored_sock
;
735 if (!s
->n
.next
->next
)
738 return SKIP_BACK(sock
, n
, s
->n
.next
);
742 sk_alloc_bufs(sock
*s
)
744 if (!s
->rbuf
&& s
->rbsize
)
745 s
->rbuf
= s
->rbuf_alloc
= xmalloc(s
->rbsize
);
747 if (!s
->tbuf
&& s
->tbsize
)
748 s
->tbuf
= s
->tbuf_alloc
= xmalloc(s
->tbsize
);
749 s
->tpos
= s
->ttx
= s
->tbuf
;
753 sk_free_bufs(sock
*s
)
757 xfree(s
->rbuf_alloc
);
758 s
->rbuf
= s
->rbuf_alloc
= NULL
;
762 xfree(s
->tbuf_alloc
);
763 s
->tbuf
= s
->tbuf_alloc
= NULL
;
771 struct ssh_sock
*ssh
= s
->ssh
;
780 if (ssh_channel_is_open(ssh
->channel
))
781 ssh_channel_close(ssh
->channel
);
782 ssh_channel_free(ssh
->channel
);
788 ssh_disconnect(ssh
->session
);
789 ssh_free(ssh
->session
);
798 sock
*s
= (sock
*) r
;
803 if (s
->type
== SK_SSH
|| s
->type
== SK_SSH_ACTIVE
)
810 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
811 if (!(s
->flags
& SKF_THREAD
))
813 if (s
== current_sock
)
814 current_sock
= sk_next(s
);
815 if (s
== stored_sock
)
816 stored_sock
= sk_next(s
);
820 if (s
->type
!= SK_SSH
&& s
->type
!= SK_SSH_ACTIVE
)
827 sk_set_rbsize(sock
*s
, uint val
)
829 ASSERT(s
->rbuf_alloc
== s
->rbuf
);
831 if (s
->rbsize
== val
)
835 xfree(s
->rbuf_alloc
);
836 s
->rbuf_alloc
= xmalloc(val
);
837 s
->rpos
= s
->rbuf
= s
->rbuf_alloc
;
841 sk_set_tbsize(sock
*s
, uint val
)
843 ASSERT(s
->tbuf_alloc
== s
->tbuf
);
845 if (s
->tbsize
== val
)
848 byte
*old_tbuf
= s
->tbuf
;
851 s
->tbuf
= s
->tbuf_alloc
= xrealloc(s
->tbuf_alloc
, val
);
852 s
->tpos
= s
->tbuf
+ (s
->tpos
- old_tbuf
);
853 s
->ttx
= s
->tbuf
+ (s
->ttx
- old_tbuf
);
857 sk_set_tbuf(sock
*s
, void *tbuf
)
859 s
->tbuf
= tbuf
?: s
->tbuf_alloc
;
860 s
->ttx
= s
->tpos
= s
->tbuf
;
864 sk_reallocate(sock
*s
)
873 sock
*s
= (sock
*) r
;
874 static char *sk_type_names
[] = { "TCP<", "TCP>", "TCP", "UDP", NULL
, "IP", NULL
, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
876 debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
877 sk_type_names
[s
->type
],
885 s
->iface
? s
->iface
->name
: "none");
888 static struct resclass sk_class
= {
898 * sk_new - create a socket
901 * This function creates a new socket resource. If you want to use it,
902 * you need to fill in all the required fields of the structure and
903 * call sk_open() to do the actual opening of the socket.
905 * The real function name is sock_new(), sk_new() is a macro wrapper
906 * to avoid collision with OpenSSL.
911 sock
*s
= ralloc(p
, &sk_class
);
913 // s->saddr = s->daddr = IPA_NONE;
914 s
->tos
= s
->priority
= s
->ttl
= -1;
925 if (s
->type
== SK_SSH_ACTIVE
)
928 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
934 if (ipa_nonzero(s
->saddr
) && !(s
->flags
& SKF_BIND
))
935 s
->flags
|= SKF_PKTINFO
;
937 #ifdef CONFIG_USE_HDRINCL
938 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
) && (s
->flags
& SKF_PKTINFO
))
940 s
->flags
&= ~SKF_PKTINFO
;
941 s
->flags
|= SKF_HDRINCL
;
942 if (setsockopt(fd
, SOL_IP
, IP_HDRINCL
, &y
, sizeof(y
)) < 0)
947 if (s
->vrf
&& !s
->iface
)
949 /* Bind socket to associated VRF interface.
950 This is Linux-specific, but so is SO_BINDTODEVICE. */
951 #ifdef SO_BINDTODEVICE
952 struct ifreq ifr
= {};
953 strcpy(ifr
.ifr_name
, s
->vrf
->name
);
954 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
955 ERR("SO_BINDTODEVICE");
961 #ifdef SO_BINDTODEVICE
962 struct ifreq ifr
= {};
963 strcpy(ifr
.ifr_name
, s
->iface
->name
);
964 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
965 ERR("SO_BINDTODEVICE");
968 #ifdef CONFIG_UNIX_DONTROUTE
969 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_DONTROUTE
, &y
, sizeof(y
)) < 0)
976 if (s
->flags
& SKF_LADDR_RX
)
977 if (sk_request_cmsg4_pktinfo(s
) < 0)
980 if (s
->flags
& SKF_TTL_RX
)
981 if (sk_request_cmsg4_ttl(s
) < 0)
984 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
985 if (sk_disable_mtu_disc4(s
) < 0)
989 if (sk_set_ttl4(s
, s
->ttl
) < 0)
993 if (sk_set_tos4(s
, s
->tos
) < 0)
999 if ((s
->type
== SK_TCP_PASSIVE
) || (s
->type
== SK_TCP_ACTIVE
) || (s
->type
== SK_UDP
))
1000 if (setsockopt(fd
, SOL_IPV6
, IPV6_V6ONLY
, &y
, sizeof(y
)) < 0)
1003 if (s
->flags
& SKF_LADDR_RX
)
1004 if (sk_request_cmsg6_pktinfo(s
) < 0)
1007 if (s
->flags
& SKF_TTL_RX
)
1008 if (sk_request_cmsg6_ttl(s
) < 0)
1011 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1012 if (sk_disable_mtu_disc6(s
) < 0)
1016 if (sk_set_ttl6(s
, s
->ttl
) < 0)
1020 if (sk_set_tos6(s
, s
->tos
) < 0)
1024 /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
1025 if (s
->priority
>= 0)
1026 if (sk_set_priority(s
, s
->priority
) < 0)
1035 add_tail(&sock_list
, &s
->n
);
1039 sk_tcp_connected(sock
*s
)
1042 int sa_len
= sizeof(sa
);
1044 if ((getsockname(s
->fd
, &sa
.sa
, &sa_len
) < 0) ||
1045 (sockaddr_read(&sa
, s
->af
, &s
->saddr
, &s
->iface
, &s
->sport
) < 0))
1046 log(L_WARN
"SOCK: Cannot get local IP address for TCP>");
1055 sk_ssh_connected(sock
*s
)
1064 sk_passive_connected(sock
*s
, int type
)
1066 sockaddr loc_sa
, rem_sa
;
1067 int loc_sa_len
= sizeof(loc_sa
);
1068 int rem_sa_len
= sizeof(rem_sa
);
1070 int fd
= accept(s
->fd
, ((type
== SK_TCP
) ? &rem_sa
.sa
: NULL
), &rem_sa_len
);
1073 if ((errno
!= EINTR
) && (errno
!= EAGAIN
))
1074 s
->err_hook(s
, errno
);
1078 sock
*t
= sk_new(s
->pool
);
1086 t
->rbsize
= s
->rbsize
;
1087 t
->tbsize
= s
->tbsize
;
1091 if ((getsockname(fd
, &loc_sa
.sa
, &loc_sa_len
) < 0) ||
1092 (sockaddr_read(&loc_sa
, s
->af
, &t
->saddr
, &t
->iface
, &t
->sport
) < 0))
1093 log(L_WARN
"SOCK: Cannot get local IP address for TCP<");
1095 if (sockaddr_read(&rem_sa
, s
->af
, &t
->daddr
, &t
->iface
, &t
->dport
) < 0)
1096 log(L_WARN
"SOCK: Cannot get remote IP address for TCP<");
1099 if (sk_setup(t
) < 0)
1101 /* FIXME: Call err_hook instead ? */
1102 log(L_ERR
"SOCK: Incoming connection: %s%#m", t
->err
);
1104 /* FIXME: handle it better in rfree() */
1119 * Return SSH_OK or SSH_AGAIN or SSH_ERROR
1122 sk_ssh_connect(sock
*s
)
1124 s
->fd
= ssh_get_fd(s
->ssh
->session
);
1126 /* Big fall thru automata */
1127 switch (s
->ssh
->state
)
1129 case SK_SSH_CONNECT
:
1131 switch (ssh_connect(s
->ssh
->session
))
1134 /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
1135 * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
1136 * documented but our code relies on that.
1148 case SK_SSH_SERVER_KNOWN
:
1150 s
->ssh
->state
= SK_SSH_SERVER_KNOWN
;
1152 if (s
->ssh
->server_hostkey_path
)
1154 int server_identity_is_ok
= 1;
1156 /* Check server identity */
1157 switch (ssh_is_server_known(s
->ssh
->session
))
1159 #define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
1160 case SSH_SERVER_KNOWN_OK
:
1161 /* The server is known and has not changed. */
1164 case SSH_SERVER_NOT_KNOWN
:
1165 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server is unknown, its public key was not found in the known host file %s", s
->ssh
->server_hostkey_path
);
1168 case SSH_SERVER_KNOWN_CHANGED
:
1169 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server key has changed. Either you are under attack or the administrator changed the key.");
1170 server_identity_is_ok
= 0;
1173 case SSH_SERVER_FILE_NOT_FOUND
:
1174 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The known host file %s does not exist", s
->ssh
->server_hostkey_path
);
1175 server_identity_is_ok
= 0;
1178 case SSH_SERVER_ERROR
:
1179 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "Some error happened");
1180 server_identity_is_ok
= 0;
1183 case SSH_SERVER_FOUND_OTHER
:
1184 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server gave use a key of a type while we had an other type recorded. " \
1185 "It is a possible attack.");
1186 server_identity_is_ok
= 0;
1190 if (!server_identity_is_ok
)
1195 case SK_SSH_USERAUTH
:
1197 s
->ssh
->state
= SK_SSH_USERAUTH
;
1198 switch (ssh_userauth_publickey_auto(s
->ssh
->session
, NULL
, NULL
))
1200 case SSH_AUTH_AGAIN
:
1203 case SSH_AUTH_SUCCESS
:
1211 case SK_SSH_CHANNEL
:
1213 s
->ssh
->state
= SK_SSH_CHANNEL
;
1214 s
->ssh
->channel
= ssh_channel_new(s
->ssh
->session
);
1215 if (s
->ssh
->channel
== NULL
)
1219 case SK_SSH_SESSION
:
1221 s
->ssh
->state
= SK_SSH_SESSION
;
1222 switch (ssh_channel_open_session(s
->ssh
->channel
))
1235 case SK_SSH_SUBSYSTEM
:
1237 s
->ssh
->state
= SK_SSH_SUBSYSTEM
;
1238 if (s
->ssh
->subsystem
)
1240 switch (ssh_channel_request_subsystem(s
->ssh
->channel
, s
->ssh
->subsystem
))
1254 case SK_SSH_ESTABLISHED
:
1255 s
->ssh
->state
= SK_SSH_ESTABLISHED
;
1262 * Return file descriptor number if success
1263 * Return -1 if failed
1266 sk_open_ssh(sock
*s
)
1269 bug("sk_open() sock->ssh is not allocated");
1271 ssh_session sess
= ssh_new();
1273 ERR2("Cannot create a ssh session");
1274 s
->ssh
->session
= sess
;
1276 const int verbosity
= SSH_LOG_NOLOG
;
1277 ssh_options_set(sess
, SSH_OPTIONS_LOG_VERBOSITY
, &verbosity
);
1278 ssh_options_set(sess
, SSH_OPTIONS_HOST
, s
->host
);
1279 ssh_options_set(sess
, SSH_OPTIONS_PORT
, &(s
->dport
));
1280 /* TODO: Add SSH_OPTIONS_BINDADDR */
1281 ssh_options_set(sess
, SSH_OPTIONS_USER
, s
->ssh
->username
);
1283 if (s
->ssh
->server_hostkey_path
)
1284 ssh_options_set(sess
, SSH_OPTIONS_KNOWNHOSTS
, s
->ssh
->server_hostkey_path
);
1286 if (s
->ssh
->client_privkey_path
)
1287 ssh_options_set(sess
, SSH_OPTIONS_IDENTITY
, s
->ssh
->client_privkey_path
);
1289 ssh_set_blocking(sess
, 0);
1291 switch (sk_ssh_connect(s
))
1297 sk_ssh_connected(s
);
1301 ERR2(ssh_get_error(sess
));
1305 return ssh_get_fd(sess
);
1313 * sk_open - open a socket
1316 * This function takes a socket resource created by sk_new() and
1317 * initialized by the user and binds a corresponding network connection
1320 * Result: 0 for success, -1 for an error.
1329 ip_addr bind_addr
= IPA_NONE
;
1332 if (s
->type
<= SK_IP
)
1335 * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
1336 * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
1337 * But the specifications have to be consistent.
1343 ASSERT(ipa_zero(s
->saddr
) || ipa_zero(s
->daddr
) ||
1344 (ipa_is_ip4(s
->saddr
) == ipa_is_ip4(s
->daddr
)));
1345 af
= (ipa_is_ip4(s
->saddr
) || ipa_is_ip4(s
->daddr
)) ? AF_INET
: AF_INET6
;
1349 ASSERT(ipa_zero(s
->saddr
) || ipa_is_ip4(s
->saddr
));
1350 ASSERT(ipa_zero(s
->daddr
) || ipa_is_ip4(s
->daddr
));
1355 ASSERT(ipa_zero(s
->saddr
) || !ipa_is_ip4(s
->saddr
));
1356 ASSERT(ipa_zero(s
->daddr
) || !ipa_is_ip4(s
->daddr
));
1361 bug("Invalid subtype %d", s
->subtype
);
1368 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1370 case SK_TCP_PASSIVE
:
1371 fd
= socket(af
, SOCK_STREAM
, IPPROTO_TCP
);
1372 bind_port
= s
->sport
;
1373 bind_addr
= s
->saddr
;
1374 do_bind
= bind_port
|| ipa_nonzero(bind_addr
);
1379 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1380 fd
= sk_open_ssh(s
);
1385 fd
= socket(af
, SOCK_DGRAM
, IPPROTO_UDP
);
1386 bind_port
= s
->sport
;
1387 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1392 fd
= socket(af
, SOCK_RAW
, s
->dport
);
1394 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1395 do_bind
= ipa_nonzero(bind_addr
);
1404 bug("sk_open() called for invalid sock type %d", s
->type
);
1413 if (sk_setup(s
) < 0)
1422 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &y
, sizeof(y
)) < 0)
1423 ERR2("SO_REUSEADDR");
1425 #ifdef CONFIG_NO_IFACE_BIND
1426 /* Workaround missing ability to bind to an iface */
1427 if ((s
->type
== SK_UDP
) && s
->iface
&& ipa_zero(bind_addr
))
1429 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &y
, sizeof(y
)) < 0)
1430 ERR2("SO_REUSEPORT");
1435 if (s
->flags
& SKF_HIGH_PORT
)
1436 if (sk_set_high_port(s
) < 0)
1437 log(L_WARN
"Socket error: %s%#m", s
->err
);
1439 sockaddr_fill(&sa
, s
->af
, bind_addr
, s
->iface
, bind_port
);
1440 if (bind(fd
, &sa
.sa
, SA_LEN(sa
)) < 0)
1445 if (sk_set_md5_auth(s
, s
->saddr
, s
->daddr
, s
->iface
, s
->password
, 0) < 0)
1451 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1452 if (connect(fd
, &sa
.sa
, SA_LEN(sa
)) >= 0)
1453 sk_tcp_connected(s
);
1454 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
&&
1455 errno
!= ECONNREFUSED
&& errno
!= EHOSTUNREACH
&& errno
!= ENETUNREACH
)
1459 case SK_TCP_PASSIVE
:
1460 if (listen(fd
, 8) < 0)
1472 if (!(s
->flags
& SKF_THREAD
))
1484 sk_open_unix(sock
*s
, char *name
)
1486 struct sockaddr_un sa
;
1489 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1491 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1495 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1498 /* Path length checked in test_old_bird() */
1499 sa
.sun_family
= AF_UNIX
;
1500 strcpy(sa
.sun_path
, name
);
1502 if (bind(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) < 0)
1505 if (listen(fd
, 8) < 0)
1514 #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1515 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1516 #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1519 sk_prepare_cmsgs(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
1522 sk_prepare_cmsgs4(s
, msg
, cbuf
, cbuflen
);
1524 sk_prepare_cmsgs6(s
, msg
, cbuf
, cbuflen
);
1528 sk_process_cmsgs(sock
*s
, struct msghdr
*msg
)
1532 s
->laddr
= IPA_NONE
;
1536 for (cm
= CMSG_FIRSTHDR(msg
); cm
!= NULL
; cm
= CMSG_NXTHDR(msg
, cm
))
1538 if ((cm
->cmsg_level
== SOL_IP
) && sk_is_ipv4(s
))
1540 sk_process_cmsg4_pktinfo(s
, cm
);
1541 sk_process_cmsg4_ttl(s
, cm
);
1544 if ((cm
->cmsg_level
== SOL_IPV6
) && sk_is_ipv6(s
))
1546 sk_process_cmsg6_pktinfo(s
, cm
);
1547 sk_process_cmsg6_ttl(s
, cm
);
1556 struct iovec iov
= {s
->tbuf
, s
->tpos
- s
->tbuf
};
1557 byte cmsg_buf
[CMSG_TX_SPACE
];
1561 sockaddr_fill(&dst
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1563 struct msghdr msg
= {
1564 .msg_name
= &dst
.sa
,
1565 .msg_namelen
= SA_LEN(dst
),
1570 #ifdef CONFIG_DONTROUTE_UNICAST
1571 /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
1572 cannot use it for other cases (e.g. when TTL security is used). */
1573 if (ipa_is_ip4(s
->daddr
) && ip4_is_unicast(ipa_to_ip4(s
->daddr
)) && (s
->ttl
== 1))
1574 flags
= MSG_DONTROUTE
;
1577 #ifdef CONFIG_USE_HDRINCL
1579 struct iovec iov2
[2] = { {hdr
, 20}, iov
};
1581 if (s
->flags
& SKF_HDRINCL
)
1583 sk_prepare_ip_header(s
, hdr
, iov
.iov_len
);
1589 if (s
->flags
& SKF_PKTINFO
)
1590 sk_prepare_cmsgs(s
, &msg
, cmsg_buf
, sizeof(cmsg_buf
));
1592 return sendmsg(s
->fd
, &msg
, flags
);
1598 struct iovec iov
= {s
->rbuf
, s
->rbsize
};
1599 byte cmsg_buf
[CMSG_RX_SPACE
];
1602 struct msghdr msg
= {
1603 .msg_name
= &src
.sa
,
1604 .msg_namelen
= sizeof(src
), // XXXX ??
1607 .msg_control
= cmsg_buf
,
1608 .msg_controllen
= sizeof(cmsg_buf
),
1612 int rv
= recvmsg(s
->fd
, &msg
, 0);
1617 // if (cf_type == SK_IP)
1618 // rv = ipv4_skip_header(pbuf, rv);
1621 sockaddr_read(&src
, s
->af
, &s
->faddr
, NULL
, &s
->fport
);
1622 sk_process_cmsgs(s
, &msg
);
1624 if (msg
.msg_flags
& MSG_TRUNC
)
1625 s
->flags
|= SKF_TRUNCATED
;
1627 s
->flags
&= ~SKF_TRUNCATED
;
1633 static inline void reset_tx_buffer(sock
*s
) { s
->ttx
= s
->tpos
= s
->tbuf
; }
1636 sk_maybe_write(sock
*s
)
1645 while (s
->ttx
!= s
->tpos
)
1647 e
= write(s
->fd
, s
->ttx
, s
->tpos
- s
->ttx
);
1651 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1654 /* EPIPE is just a connection close notification during TX */
1655 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1667 while (s
->ttx
!= s
->tpos
)
1669 e
= ssh_channel_write(s
->ssh
->channel
, s
->ttx
, s
->tpos
- s
->ttx
);
1673 s
->err
= ssh_get_error(s
->ssh
->session
);
1674 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
1677 /* EPIPE is just a connection close notification during TX */
1678 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1690 if (s
->tbuf
== s
->tpos
)
1697 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1700 s
->err_hook(s
, errno
);
1713 bug("sk_maybe_write: unknown socket type %d", s
->type
);
1718 sk_rx_ready(sock
*s
)
1721 struct pollfd pfd
= { .fd
= s
->fd
};
1722 pfd
.events
|= POLLIN
;
1725 rv
= poll(&pfd
, 1, 0);
1727 if ((rv
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
1734 * sk_send - send data to a socket
1736 * @len: number of bytes to send
1738 * This function sends @len bytes of data prepared in the
1739 * transmit buffer of the socket @s to the network connection.
1740 * If the packet can be sent immediately, it does so and returns
1741 * 1, else it queues the packet for later processing, returns 0
1742 * and calls the @tx_hook of the socket when the tranmission
1746 sk_send(sock
*s
, unsigned len
)
1749 s
->tpos
= s
->tbuf
+ len
;
1750 return sk_maybe_write(s
);
1754 * sk_send_to - send data to a specific destination
1756 * @len: number of bytes to send
1757 * @addr: IP address to send the packet to
1758 * @port: port to send the packet to
1760 * This is a sk_send() replacement for connection-less packet sockets
1761 * which allows destination of the packet to be chosen dynamically.
1762 * Raw IP sockets should use 0 for @port.
1765 sk_send_to(sock
*s
, unsigned len
, ip_addr addr
, unsigned port
)
1772 s
->tpos
= s
->tbuf
+ len
;
1773 return sk_maybe_write(s
);
1778 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1779 ip_addr saddr, ip_addr daddr, unsigned dport)
1786 s->tpos = s->tbuf + len;
1787 return sk_maybe_write(s);
1792 call_rx_hook(sock
*s
, int size
)
1794 if (s
->rx_hook(s
, size
))
1796 /* We need to be careful since the socket could have been deleted by the hook */
1797 if (current_sock
== s
)
1804 sk_read_ssh(sock
*s
)
1806 ssh_channel rchans
[2] = { s
->ssh
->channel
, NULL
};
1807 struct timeval timev
= { 1, 0 };
1809 if (ssh_channel_select(rchans
, NULL
, NULL
, &timev
) == SSH_EINTR
)
1810 return 1; /* Try again */
1812 if (ssh_channel_is_eof(s
->ssh
->channel
) != 0)
1814 /* The remote side is closing the connection */
1819 if (rchans
[0] == NULL
)
1820 return 0; /* No data is available on the socket */
1822 const uint used_bytes
= s
->rpos
- s
->rbuf
;
1823 const int read_bytes
= ssh_channel_read_nonblocking(s
->ssh
->channel
, s
->rpos
, s
->rbsize
- used_bytes
, 0);
1827 s
->rpos
+= read_bytes
;
1828 call_rx_hook(s
, used_bytes
+ read_bytes
);
1831 else if (read_bytes
== 0)
1833 if (ssh_channel_is_eof(s
->ssh
->channel
) != 0)
1835 /* The remote side is closing the connection */
1841 s
->err
= ssh_get_error(s
->ssh
->session
);
1842 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
1845 return 0; /* No data is available on the socket */
1849 /* sk_read() and sk_write() are called from BFD's event loop */
1852 sk_read(sock
*s
, int revents
)
1856 case SK_TCP_PASSIVE
:
1857 return sk_passive_connected(s
, SK_TCP
);
1859 case SK_UNIX_PASSIVE
:
1860 return sk_passive_connected(s
, SK_UNIX
);
1865 int c
= read(s
->fd
, s
->rpos
, s
->rbuf
+ s
->rbsize
- s
->rpos
);
1869 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1870 s
->err_hook(s
, errno
);
1871 else if (errno
== EAGAIN
&& !(revents
& POLLIN
))
1873 log(L_ERR
"Got EAGAIN from read when revents=%x (without POLLIN)", revents
);
1882 call_rx_hook(s
, s
->rpos
- s
->rbuf
);
1890 return sk_read_ssh(s
);
1894 return s
->rx_hook(s
, 0);
1898 int e
= sk_recvmsg(s
);
1902 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1903 s
->err_hook(s
, errno
);
1907 s
->rpos
= s
->rbuf
+ e
;
1922 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1924 if (connect(s
->fd
, &sa
.sa
, SA_LEN(sa
)) >= 0 || errno
== EISCONN
)
1925 sk_tcp_connected(s
);
1926 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
)
1927 s
->err_hook(s
, errno
);
1934 switch (sk_ssh_connect(s
))
1937 sk_ssh_connected(s
);
1944 s
->err
= ssh_get_error(s
->ssh
->session
);
1945 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
1953 if (s
->ttx
!= s
->tpos
&& sk_maybe_write(s
) > 0)
1963 int sk_is_ipv4(sock
*s
)
1964 { return s
->af
== AF_INET
; }
1966 int sk_is_ipv6(sock
*s
)
1967 { return s
->af
== AF_INET6
; }
1970 sk_err(sock
*s
, int revents
)
1972 int se
= 0, sse
= sizeof(se
);
1973 if ((s
->type
!= SK_MAGIC
) && (revents
& POLLERR
))
1974 if (getsockopt(s
->fd
, SOL_SOCKET
, SO_ERROR
, &se
, &sse
) < 0)
1976 log(L_ERR
"IO: Socket error: SO_ERROR: %m");
1989 debug("Open sockets:\n");
1990 WALK_LIST(n
, sock_list
)
1992 s
= SKIP_BACK(sock
, n
, n
);
2001 * Internal event log and watchdog
2004 #define EVENT_LOG_LENGTH 32
2006 struct event_log_entry
2014 static struct event_log_entry event_log
[EVENT_LOG_LENGTH
];
2015 static struct event_log_entry
*event_open
;
2016 static int event_log_pos
, event_log_num
, watchdog_active
;
2017 static btime last_time
;
2018 static btime loop_time
;
2021 io_update_time(void)
2027 * This is third time-tracking procedure (after update_times() above and
2028 * times_update() in BFD), dedicated to internal event log and latency
2029 * tracking. Hopefully, we consolidate these sometimes.
2032 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
2034 die("clock_gettime: %m");
2036 last_time
= ts
.tv_sec S
+ ts
.tv_nsec NS
;
2040 event_open
->duration
= last_time
- event_open
->timestamp
;
2042 if (event_open
->duration
> config
->latency_limit
)
2043 log(L_WARN
"Event 0x%p 0x%p took %d ms",
2044 event_open
->hook
, event_open
->data
, (int) (event_open
->duration TO_MS
));
2051 * io_log_event - mark approaching event into event log
2052 * @hook: event hook address
2053 * @data: event data address
2055 * Store info (hook, data, timestamp) about the following internal event into
2056 * a circular event log (@event_log). When latency tracking is enabled, the log
2057 * entry is kept open (in @event_open) so the duration can be filled later.
2060 io_log_event(void *hook
, void *data
)
2062 if (config
->latency_debug
)
2065 struct event_log_entry
*en
= event_log
+ event_log_pos
;
2069 en
->timestamp
= last_time
;
2074 event_log_pos
%= EVENT_LOG_LENGTH
;
2076 event_open
= config
->latency_debug
? en
: NULL
;
2080 io_close_event(void)
2091 log(L_DEBUG
"Event log:");
2092 for (i
= 0; i
< EVENT_LOG_LENGTH
; i
++)
2094 struct event_log_entry
*en
= event_log
+ (event_log_pos
+ i
) % EVENT_LOG_LENGTH
;
2096 log(L_DEBUG
" Event 0x%p 0x%p at %8d for %d ms", en
->hook
, en
->data
,
2097 (int) ((last_time
- en
->timestamp
) TO_MS
), (int) (en
->duration TO_MS
));
2102 watchdog_sigalrm(int sig UNUSED
)
2104 /* Update last_time and duration, but skip latency check */
2105 config
->latency_limit
= 0xffffffff;
2108 /* We want core dump */
2113 watchdog_start1(void)
2117 loop_time
= last_time
;
2121 watchdog_start(void)
2125 loop_time
= last_time
;
2128 if (config
->watchdog_timeout
)
2130 alarm(config
->watchdog_timeout
);
2131 watchdog_active
= 1;
2140 if (watchdog_active
)
2143 watchdog_active
= 0;
2146 btime duration
= last_time
- loop_time
;
2147 if (duration
> config
->watchdog_warning
)
2148 log(L_WARN
"I/O loop cycle took %d ms for %d events",
2149 (int) (duration TO_MS
), event_log_num
);
2157 volatile int async_config_flag
; /* Asynchronous reconfiguration/dump scheduled */
2158 volatile int async_dump_flag
;
2159 volatile int async_shutdown_flag
;
2164 init_list(&sock_list
);
2165 init_list(&global_event_list
);
2167 // XXX init_times();
2168 // XXX update_times();
2169 boot_time
= current_time();
2171 u64 now
= (u64
) current_real_time();
2172 srandom((uint
) (now
^ (now
>> 32)));
2175 static int short_loops
= 0;
2176 #define SHORT_LOOP_MAX 10
2181 int poll_tout
, timeout
;
2182 int nfds
, events
, pout
;
2187 struct pollfd
*pfd
= xmalloc(fdmax
* sizeof(struct pollfd
));
2192 times_update(&main_timeloop
);
2193 events
= ev_run_list(&global_event_list
);
2194 timers_fire(&main_timeloop
);
2198 poll_tout
= (events
? 0 : 3000); /* Time in milliseconds */
2199 if (t
= timers_first(&main_timeloop
))
2201 times_update(&main_timeloop
);
2202 timeout
= (tm_remains(t
) TO_MS
) + 1;
2203 poll_tout
= MIN(poll_tout
, timeout
);
2207 WALK_LIST(n
, sock_list
)
2209 pfd
[nfds
] = (struct pollfd
) { .fd
= -1 }; /* everything other set to 0 by this */
2210 s
= SKIP_BACK(sock
, n
, n
);
2213 pfd
[nfds
].fd
= s
->fd
;
2214 pfd
[nfds
].events
|= POLLIN
;
2216 if (s
->tx_hook
&& s
->ttx
!= s
->tpos
)
2218 pfd
[nfds
].fd
= s
->fd
;
2219 pfd
[nfds
].events
|= POLLOUT
;
2221 if (pfd
[nfds
].fd
!= -1)
2232 pfd
= xrealloc(pfd
, fdmax
* sizeof(struct pollfd
));
2237 * Yes, this is racy. But even if the signal comes before this test
2238 * and entering poll(), it gets caught on the next timer tick.
2241 if (async_config_flag
)
2243 io_log_event(async_config
, NULL
);
2245 async_config_flag
= 0;
2248 if (async_dump_flag
)
2250 io_log_event(async_dump
, NULL
);
2252 async_dump_flag
= 0;
2255 if (async_shutdown_flag
)
2257 io_log_event(async_shutdown
, NULL
);
2259 async_shutdown_flag
= 0;
2263 /* And finally enter poll() to find active sockets */
2265 pout
= poll(pfd
, nfds
, poll_tout
);
2270 if (errno
== EINTR
|| errno
== EAGAIN
)
2276 times_update(&main_timeloop
);
2278 /* guaranteed to be non-empty */
2279 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2281 while (current_sock
)
2283 sock
*s
= current_sock
;
2286 current_sock
= sk_next(s
);
2294 if (s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2298 io_log_event(s
->rx_hook
, s
->data
);
2299 e
= sk_read(s
, pfd
[s
->index
].revents
);
2300 if (s
!= current_sock
)
2303 while (e
&& s
->rx_hook
&& steps
);
2306 if (pfd
[s
->index
].revents
& POLLOUT
)
2310 io_log_event(s
->tx_hook
, s
->data
);
2312 if (s
!= current_sock
)
2317 current_sock
= sk_next(s
);
2322 if (events
&& (short_loops
< SHORT_LOOP_MAX
))
2327 current_sock
= stored_sock
;
2328 if (current_sock
== NULL
)
2329 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2331 while (current_sock
&& count
< MAX_RX_STEPS
)
2333 sock
*s
= current_sock
;
2336 current_sock
= sk_next(s
);
2340 if (!s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2343 io_log_event(s
->rx_hook
, s
->data
);
2344 sk_read(s
, pfd
[s
->index
].revents
);
2345 if (s
!= current_sock
)
2349 if (pfd
[s
->index
].revents
& (POLLHUP
| POLLERR
))
2351 sk_err(s
, pfd
[s
->index
].revents
);
2352 if (s
!= current_sock
)
2356 current_sock
= sk_next(s
);
2361 stored_sock
= current_sock
;
2367 test_old_bird(char *path
)
2370 struct sockaddr_un sa
;
2372 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
2374 die("Cannot create socket: %m");
2375 if (strlen(path
) >= sizeof(sa
.sun_path
))
2376 die("Socket path too long");
2377 bzero(&sa
, sizeof(sa
));
2378 sa
.sun_family
= AF_UNIX
;
2379 strcpy(sa
.sun_path
, path
);
2380 if (connect(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) == 0)
2381 die("I found another BIRD running.");