2 * BIRD Internet Routing Daemon -- Unix I/O
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
7 * Can be freely distributed and used under the terms of the GNU GPL.
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
20 #include <sys/types.h>
21 #include <sys/socket.h>
29 #include <netinet/in.h>
30 #include <netinet/tcp.h>
31 #include <netinet/udp.h>
32 #include <netinet/icmp6.h>
34 #include "nest/bird.h"
35 #include "lib/lists.h"
36 #include "lib/resource.h"
37 #include "lib/socket.h"
38 #include "lib/event.h"
39 #include "lib/timer.h"
40 #include "lib/string.h"
41 #include "nest/iface.h"
42 #include "conf/conf.h"
44 #include "sysdep/unix/unix.h"
45 #include CONFIG_INCLUDE_SYSIO_H
47 /* Maximum number of calls of tx handler for one socket in one
48 * poll iteration. Should be small enough to not monopolize CPU by
49 * one protocol instance.
53 /* Maximum number of calls of rx handler for all sockets in one poll
54 iteration. RX callbacks are often much more costly so we limit
55 this to gen small latencies */
56 #define MAX_RX_STEPS 4
70 struct rfile
*a
= (struct rfile
*) r
;
78 struct rfile
*a
= (struct rfile
*) r
;
80 debug("(FILE *%p)\n", a
->f
);
83 static struct resclass rf_class
= {
93 tracked_fopen(pool
*p
, char *name
, char *mode
)
95 FILE *f
= fopen(name
, mode
);
99 struct rfile
*r
= ralloc(p
, &rf_class
);
113 times_init(struct timeloop
*loop
)
118 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
120 die("Monotonic clock is missing");
122 if ((ts
.tv_sec
< 0) || (((s64
) ts
.tv_sec
) > ((s64
) 1 << 40)))
123 log(L_WARN
"Monotonic clock is crazy");
125 loop
->last_time
= ts
.tv_sec S
+ ts
.tv_nsec NS
;
130 times_update(struct timeloop
*loop
)
135 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
137 die("clock_gettime: %m");
139 btime new_time
= ts
.tv_sec S
+ ts
.tv_nsec NS
;
141 if (new_time
< loop
->last_time
)
142 log(L_ERR
"Monotonic clock is broken");
144 loop
->last_time
= new_time
;
149 times_update_real_time(struct timeloop
*loop
)
154 rv
= clock_gettime(CLOCK_REALTIME
, &ts
);
156 die("clock_gettime: %m");
158 loop
->real_time
= ts
.tv_sec S
+ ts
.tv_nsec NS
;
165 * Socket resources represent network connections. Their data structure (&socket)
166 * contains a lot of fields defining the exact type of the socket, the local and
167 * remote addresses and ports, pointers to socket buffers and finally pointers to
168 * hook functions to be called when new data have arrived to the receive buffer
169 * (@rx_hook), when the contents of the transmit buffer have been transmitted
170 * (@tx_hook) and when an error or connection close occurs (@err_hook).
172 * Freeing of sockets from inside socket hooks is perfectly safe.
176 #define SOL_IP IPPROTO_IP
180 #define SOL_IPV6 IPPROTO_IPV6
184 #define SOL_ICMPV6 IPPROTO_ICMPV6
189 * Sockaddr helper functions
192 static inline int UNUSED
sockaddr_length(int af
)
193 { return (af
== AF_INET
) ? sizeof(struct sockaddr_in
) : sizeof(struct sockaddr_in6
); }
196 sockaddr_fill4(struct sockaddr_in
*sa
, ip_addr a
, uint port
)
198 memset(sa
, 0, sizeof(struct sockaddr_in
));
199 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
200 sa
->sin_len
= sizeof(struct sockaddr_in
);
202 sa
->sin_family
= AF_INET
;
203 sa
->sin_port
= htons(port
);
204 sa
->sin_addr
= ipa_to_in4(a
);
208 sockaddr_fill6(struct sockaddr_in6
*sa
, ip_addr a
, struct iface
*ifa
, uint port
)
210 memset(sa
, 0, sizeof(struct sockaddr_in6
));
212 sa
->sin6_len
= sizeof(struct sockaddr_in6
);
214 sa
->sin6_family
= AF_INET6
;
215 sa
->sin6_port
= htons(port
);
216 sa
->sin6_flowinfo
= 0;
217 sa
->sin6_addr
= ipa_to_in6(a
);
219 if (ifa
&& ipa_is_link_local(a
))
220 sa
->sin6_scope_id
= ifa
->index
;
224 sockaddr_fill(sockaddr
*sa
, int af
, ip_addr a
, struct iface
*ifa
, uint port
)
227 sockaddr_fill4((struct sockaddr_in
*) sa
, a
, port
);
228 else if (af
== AF_INET6
)
229 sockaddr_fill6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
235 sockaddr_read4(struct sockaddr_in
*sa
, ip_addr
*a
, uint
*port
)
237 *port
= ntohs(sa
->sin_port
);
238 *a
= ipa_from_in4(sa
->sin_addr
);
242 sockaddr_read6(struct sockaddr_in6
*sa
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
244 *port
= ntohs(sa
->sin6_port
);
245 *a
= ipa_from_in6(sa
->sin6_addr
);
247 if (ifa
&& ipa_is_link_local(*a
))
248 *ifa
= if_find_by_index(sa
->sin6_scope_id
);
252 sockaddr_read(sockaddr
*sa
, int af
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
254 if (sa
->sa
.sa_family
!= af
)
258 sockaddr_read4((struct sockaddr_in
*) sa
, a
, port
);
259 else if (af
== AF_INET6
)
260 sockaddr_read6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
274 * IPv6 multicast syscalls
277 /* Fortunately standardized in RFC 3493 */
279 #define INIT_MREQ6(maddr,ifa) \
280 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
283 sk_setup_multicast6(sock
*s
)
285 int index
= s
->iface
->index
;
289 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_IF
, &index
, sizeof(index
)) < 0)
290 ERR("IPV6_MULTICAST_IF");
292 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
293 ERR("IPV6_MULTICAST_HOPS");
295 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_LOOP
, &n
, sizeof(n
)) < 0)
296 ERR("IPV6_MULTICAST_LOOP");
302 sk_join_group6(sock
*s
, ip_addr maddr
)
304 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
306 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_JOIN_GROUP
, &mr
, sizeof(mr
)) < 0)
307 ERR("IPV6_JOIN_GROUP");
313 sk_leave_group6(sock
*s
, ip_addr maddr
)
315 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
317 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_LEAVE_GROUP
, &mr
, sizeof(mr
)) < 0)
318 ERR("IPV6_LEAVE_GROUP");
325 * IPv6 packet control messages
328 /* Also standardized, in RFC 3542 */
331 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
332 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
333 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
334 * RFC and we use IPV6_PKTINFO.
336 #ifndef IPV6_RECVPKTINFO
337 #define IPV6_RECVPKTINFO IPV6_PKTINFO
340 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
342 #ifndef IPV6_RECVHOPLIMIT
343 #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
347 #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
348 #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
351 sk_request_cmsg6_pktinfo(sock
*s
)
355 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVPKTINFO
, &y
, sizeof(y
)) < 0)
356 ERR("IPV6_RECVPKTINFO");
362 sk_request_cmsg6_ttl(sock
*s
)
366 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVHOPLIMIT
, &y
, sizeof(y
)) < 0)
367 ERR("IPV6_RECVHOPLIMIT");
373 sk_process_cmsg6_pktinfo(sock
*s
, struct cmsghdr
*cm
)
375 if (cm
->cmsg_type
== IPV6_PKTINFO
)
377 struct in6_pktinfo
*pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
378 s
->laddr
= ipa_from_in6(pi
->ipi6_addr
);
379 s
->lifindex
= pi
->ipi6_ifindex
;
384 sk_process_cmsg6_ttl(sock
*s
, struct cmsghdr
*cm
)
386 if (cm
->cmsg_type
== IPV6_HOPLIMIT
)
387 s
->rcv_ttl
= * (int *) CMSG_DATA(cm
);
391 sk_prepare_cmsgs6(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
394 struct in6_pktinfo
*pi
;
397 msg
->msg_control
= cbuf
;
398 msg
->msg_controllen
= cbuflen
;
400 cm
= CMSG_FIRSTHDR(msg
);
401 cm
->cmsg_level
= SOL_IPV6
;
402 cm
->cmsg_type
= IPV6_PKTINFO
;
403 cm
->cmsg_len
= CMSG_LEN(sizeof(*pi
));
404 controllen
+= CMSG_SPACE(sizeof(*pi
));
406 pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
407 pi
->ipi6_ifindex
= s
->iface
? s
->iface
->index
: 0;
408 pi
->ipi6_addr
= ipa_to_in6(s
->saddr
);
410 msg
->msg_controllen
= controllen
;
415 * Miscellaneous socket syscalls
419 sk_set_ttl4(sock
*s
, int ttl
)
421 if (setsockopt(s
->fd
, SOL_IP
, IP_TTL
, &ttl
, sizeof(ttl
)) < 0)
428 sk_set_ttl6(sock
*s
, int ttl
)
430 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_UNICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
431 ERR("IPV6_UNICAST_HOPS");
437 sk_set_tos4(sock
*s
, int tos
)
439 if (setsockopt(s
->fd
, SOL_IP
, IP_TOS
, &tos
, sizeof(tos
)) < 0)
446 sk_set_tos6(sock
*s
, int tos
)
448 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_TCLASS
, &tos
, sizeof(tos
)) < 0)
455 sk_set_high_port(sock
*s UNUSED
)
457 /* Port range setting is optional, ignore it if not supported */
462 int range
= IP_PORTRANGE_HIGH
;
463 if (setsockopt(s
->fd
, SOL_IP
, IP_PORTRANGE
, &range
, sizeof(range
)) < 0)
468 #ifdef IPV6_PORTRANGE
471 int range
= IPV6_PORTRANGE_HIGH
;
472 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_PORTRANGE
, &range
, sizeof(range
)) < 0)
473 ERR("IPV6_PORTRANGE");
481 sk_skip_ip_header(byte
*pkt
, int *len
)
483 if ((*len
< 20) || ((*pkt
& 0xf0) != 0x40))
486 int hlen
= (*pkt
& 0x0f) * 4;
487 if ((hlen
< 20) || (hlen
> *len
))
495 sk_rx_buffer(sock
*s
, int *len
)
497 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
))
498 return sk_skip_ip_header(s
->rbuf
, len
);
505 * Public socket functions
509 * sk_setup_multicast - enable multicast for given socket
512 * Prepare transmission of multicast packets for given datagram socket.
513 * The socket must have defined @iface.
515 * Result: 0 for success, -1 for an error.
519 sk_setup_multicast(sock
*s
)
524 return sk_setup_multicast4(s
);
526 return sk_setup_multicast6(s
);
530 * sk_join_group - join multicast group for given socket
532 * @maddr: multicast address
534 * Join multicast group for given datagram socket and associated interface.
535 * The socket must have defined @iface.
537 * Result: 0 for success, -1 for an error.
541 sk_join_group(sock
*s
, ip_addr maddr
)
544 return sk_join_group4(s
, maddr
);
546 return sk_join_group6(s
, maddr
);
550 * sk_leave_group - leave multicast group for given socket
552 * @maddr: multicast address
554 * Leave multicast group for given datagram socket and associated interface.
555 * The socket must have defined @iface.
557 * Result: 0 for success, -1 for an error.
561 sk_leave_group(sock
*s
, ip_addr maddr
)
564 return sk_leave_group4(s
, maddr
);
566 return sk_leave_group6(s
, maddr
);
570 * sk_setup_broadcast - enable broadcast for given socket
573 * Allow reception and transmission of broadcast packets for given datagram
574 * socket. The socket must have defined @iface. For transmission, packets should
575 * be send to @brd address of @iface.
577 * Result: 0 for success, -1 for an error.
581 sk_setup_broadcast(sock
*s
)
585 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BROADCAST
, &y
, sizeof(y
)) < 0)
592 * sk_set_ttl - set transmit TTL for given socket
596 * Set TTL for already opened connections when TTL was not set before. Useful
597 * for accepted connections when different ones should have different TTL.
599 * Result: 0 for success, -1 for an error.
603 sk_set_ttl(sock
*s
, int ttl
)
608 return sk_set_ttl4(s
, ttl
);
610 return sk_set_ttl6(s
, ttl
);
614 * sk_set_min_ttl - set minimal accepted TTL for given socket
618 * Set minimal accepted TTL for given socket. Can be used for TTL security.
621 * Result: 0 for success, -1 for an error.
625 sk_set_min_ttl(sock
*s
, int ttl
)
628 return sk_set_min_ttl4(s
, ttl
);
630 return sk_set_min_ttl6(s
, ttl
);
635 * sk_set_md5_auth - add / remove MD5 security association for given socket
637 * @local: IP address of local side
638 * @remote: IP address of remote side
639 * @ifa: Interface for link-local IP address
640 * @passwd: Password used for MD5 authentication
641 * @setkey: Update also system SA/SP database
643 * In TCP MD5 handling code in kernel, there is a set of security associations
644 * used for choosing password and other authentication parameters according to
645 * the local and remote address. This function is useful for listening socket,
646 * for active sockets it may be enough to set s->password field.
648 * When called with passwd != NULL, the new pair is added,
649 * When called with passwd == NULL, the existing pair is removed.
651 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
652 * stored in global SA/SP database (but the behavior also must be enabled on
653 * per-socket basis). In case of multiple sockets to the same neighbor, the
654 * socket-specific state must be configured for each socket while global state
655 * just once per src-dst pair. The @setkey argument controls whether the global
656 * state (SA/SP database) is also updated.
658 * Result: 0 for success, -1 for an error.
662 sk_set_md5_auth(sock
*s
, ip_addr local
, ip_addr remote
, struct iface
*ifa
, char *passwd
, int setkey
)
667 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
671 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
672 * kernel will automatically fill it for outgoing packets and check it for
673 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
674 * known to the kernel.
676 * Result: 0 for success, -1 for an error.
680 sk_set_ipv6_checksum(sock
*s
, int offset
)
682 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_CHECKSUM
, &offset
, sizeof(offset
)) < 0)
683 ERR("IPV6_CHECKSUM");
689 sk_set_icmp6_filter(sock
*s
, int p1
, int p2
)
691 /* a bit of lame interface, but it is here only for Radv */
692 struct icmp6_filter f
;
694 ICMP6_FILTER_SETBLOCKALL(&f
);
695 ICMP6_FILTER_SETPASS(p1
, &f
);
696 ICMP6_FILTER_SETPASS(p2
, &f
);
698 if (setsockopt(s
->fd
, SOL_ICMPV6
, ICMP6_FILTER
, &f
, sizeof(f
)) < 0)
705 sk_log_error(sock
*s
, const char *p
)
707 log(L_ERR
"%s: Socket error: %s%#m", p
, s
->err
);
712 * Actual struct birdsock code
715 static list sock_list
;
716 static struct birdsock
*current_sock
;
717 static struct birdsock
*stored_sock
;
722 if (!s
->n
.next
->next
)
725 return SKIP_BACK(sock
, n
, s
->n
.next
);
729 sk_alloc_bufs(sock
*s
)
731 if (!s
->rbuf
&& s
->rbsize
)
732 s
->rbuf
= s
->rbuf_alloc
= xmalloc(s
->rbsize
);
734 if (!s
->tbuf
&& s
->tbsize
)
735 s
->tbuf
= s
->tbuf_alloc
= xmalloc(s
->tbsize
);
736 s
->tpos
= s
->ttx
= s
->tbuf
;
740 sk_free_bufs(sock
*s
)
744 xfree(s
->rbuf_alloc
);
745 s
->rbuf
= s
->rbuf_alloc
= NULL
;
749 xfree(s
->tbuf_alloc
);
750 s
->tbuf
= s
->tbuf_alloc
= NULL
;
758 struct ssh_sock
*ssh
= s
->ssh
;
767 if (ssh_channel_is_open(ssh
->channel
))
768 ssh_channel_close(ssh
->channel
);
769 ssh_channel_free(ssh
->channel
);
775 ssh_disconnect(ssh
->session
);
776 ssh_free(ssh
->session
);
785 sock
*s
= (sock
*) r
;
790 if (s
->type
== SK_SSH
|| s
->type
== SK_SSH_ACTIVE
)
797 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
798 if (!(s
->flags
& SKF_THREAD
))
800 if (s
== current_sock
)
801 current_sock
= sk_next(s
);
802 if (s
== stored_sock
)
803 stored_sock
= sk_next(s
);
807 if (s
->type
!= SK_SSH
&& s
->type
!= SK_SSH_ACTIVE
)
814 sk_set_rbsize(sock
*s
, uint val
)
816 ASSERT(s
->rbuf_alloc
== s
->rbuf
);
818 if (s
->rbsize
== val
)
822 xfree(s
->rbuf_alloc
);
823 s
->rbuf_alloc
= xmalloc(val
);
824 s
->rpos
= s
->rbuf
= s
->rbuf_alloc
;
828 sk_set_tbsize(sock
*s
, uint val
)
830 ASSERT(s
->tbuf_alloc
== s
->tbuf
);
832 if (s
->tbsize
== val
)
835 byte
*old_tbuf
= s
->tbuf
;
838 s
->tbuf
= s
->tbuf_alloc
= xrealloc(s
->tbuf_alloc
, val
);
839 s
->tpos
= s
->tbuf
+ (s
->tpos
- old_tbuf
);
840 s
->ttx
= s
->tbuf
+ (s
->ttx
- old_tbuf
);
844 sk_set_tbuf(sock
*s
, void *tbuf
)
846 s
->tbuf
= tbuf
?: s
->tbuf_alloc
;
847 s
->ttx
= s
->tpos
= s
->tbuf
;
851 sk_reallocate(sock
*s
)
860 sock
*s
= (sock
*) r
;
861 static char *sk_type_names
[] = { "TCP<", "TCP>", "TCP", "UDP", NULL
, "IP", NULL
, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
863 debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
864 sk_type_names
[s
->type
],
872 s
->iface
? s
->iface
->name
: "none");
875 static struct resclass sk_class
= {
885 * sk_new - create a socket
888 * This function creates a new socket resource. If you want to use it,
889 * you need to fill in all the required fields of the structure and
890 * call sk_open() to do the actual opening of the socket.
892 * The real function name is sock_new(), sk_new() is a macro wrapper
893 * to avoid collision with OpenSSL.
898 sock
*s
= ralloc(p
, &sk_class
);
900 // s->saddr = s->daddr = IPA_NONE;
901 s
->tos
= s
->priority
= s
->ttl
= -1;
912 if (s
->type
== SK_SSH_ACTIVE
)
915 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
921 if (ipa_nonzero(s
->saddr
) && !(s
->flags
& SKF_BIND
))
922 s
->flags
|= SKF_PKTINFO
;
924 #ifdef CONFIG_USE_HDRINCL
925 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
) && (s
->flags
& SKF_PKTINFO
))
927 s
->flags
&= ~SKF_PKTINFO
;
928 s
->flags
|= SKF_HDRINCL
;
929 if (setsockopt(fd
, SOL_IP
, IP_HDRINCL
, &y
, sizeof(y
)) < 0)
934 if (s
->vrf
&& !s
->iface
)
936 /* Bind socket to associated VRF interface.
937 This is Linux-specific, but so is SO_BINDTODEVICE. */
938 #ifdef SO_BINDTODEVICE
939 struct ifreq ifr
= {};
940 strcpy(ifr
.ifr_name
, s
->vrf
->name
);
941 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
942 ERR("SO_BINDTODEVICE");
948 #ifdef SO_BINDTODEVICE
949 struct ifreq ifr
= {};
950 strcpy(ifr
.ifr_name
, s
->iface
->name
);
951 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
952 ERR("SO_BINDTODEVICE");
955 #ifdef CONFIG_UNIX_DONTROUTE
956 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_DONTROUTE
, &y
, sizeof(y
)) < 0)
963 if (s
->flags
& SKF_LADDR_RX
)
964 if (sk_request_cmsg4_pktinfo(s
) < 0)
967 if (s
->flags
& SKF_TTL_RX
)
968 if (sk_request_cmsg4_ttl(s
) < 0)
971 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
972 if (sk_disable_mtu_disc4(s
) < 0)
976 if (sk_set_ttl4(s
, s
->ttl
) < 0)
980 if (sk_set_tos4(s
, s
->tos
) < 0)
986 if ((s
->type
== SK_TCP_PASSIVE
) || (s
->type
== SK_TCP_ACTIVE
) || (s
->type
== SK_UDP
))
987 if (setsockopt(fd
, SOL_IPV6
, IPV6_V6ONLY
, &y
, sizeof(y
)) < 0)
990 if (s
->flags
& SKF_LADDR_RX
)
991 if (sk_request_cmsg6_pktinfo(s
) < 0)
994 if (s
->flags
& SKF_TTL_RX
)
995 if (sk_request_cmsg6_ttl(s
) < 0)
998 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
999 if (sk_disable_mtu_disc6(s
) < 0)
1003 if (sk_set_ttl6(s
, s
->ttl
) < 0)
1007 if (sk_set_tos6(s
, s
->tos
) < 0)
1011 /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
1012 if (s
->priority
>= 0)
1013 if (sk_set_priority(s
, s
->priority
) < 0)
1022 add_tail(&sock_list
, &s
->n
);
1026 sk_tcp_connected(sock
*s
)
1029 int sa_len
= sizeof(sa
);
1031 if ((getsockname(s
->fd
, &sa
.sa
, &sa_len
) < 0) ||
1032 (sockaddr_read(&sa
, s
->af
, &s
->saddr
, &s
->iface
, &s
->sport
) < 0))
1033 log(L_WARN
"SOCK: Cannot get local IP address for TCP>");
1042 sk_ssh_connected(sock
*s
)
1051 sk_passive_connected(sock
*s
, int type
)
1053 sockaddr loc_sa
, rem_sa
;
1054 int loc_sa_len
= sizeof(loc_sa
);
1055 int rem_sa_len
= sizeof(rem_sa
);
1057 int fd
= accept(s
->fd
, ((type
== SK_TCP
) ? &rem_sa
.sa
: NULL
), &rem_sa_len
);
1060 if ((errno
!= EINTR
) && (errno
!= EAGAIN
))
1061 s
->err_hook(s
, errno
);
1065 sock
*t
= sk_new(s
->pool
);
1071 t
->rbsize
= s
->rbsize
;
1072 t
->tbsize
= s
->tbsize
;
1076 if ((getsockname(fd
, &loc_sa
.sa
, &loc_sa_len
) < 0) ||
1077 (sockaddr_read(&loc_sa
, s
->af
, &t
->saddr
, &t
->iface
, &t
->sport
) < 0))
1078 log(L_WARN
"SOCK: Cannot get local IP address for TCP<");
1080 if (sockaddr_read(&rem_sa
, s
->af
, &t
->daddr
, &t
->iface
, &t
->dport
) < 0)
1081 log(L_WARN
"SOCK: Cannot get remote IP address for TCP<");
1084 if (sk_setup(t
) < 0)
1086 /* FIXME: Call err_hook instead ? */
1087 log(L_ERR
"SOCK: Incoming connection: %s%#m", t
->err
);
1089 /* FIXME: handle it better in rfree() */
1104 * Return SSH_OK or SSH_AGAIN or SSH_ERROR
1107 sk_ssh_connect(sock
*s
)
1109 s
->fd
= ssh_get_fd(s
->ssh
->session
);
1111 /* Big fall thru automata */
1112 switch (s
->ssh
->state
)
1114 case SK_SSH_CONNECT
:
1116 switch (ssh_connect(s
->ssh
->session
))
1119 /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
1120 * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
1121 * documented but our code relies on that.
1133 case SK_SSH_SERVER_KNOWN
:
1135 s
->ssh
->state
= SK_SSH_SERVER_KNOWN
;
1137 if (s
->ssh
->server_hostkey_path
)
1139 int server_identity_is_ok
= 1;
1141 /* Check server identity */
1142 switch (ssh_is_server_known(s
->ssh
->session
))
1144 #define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
1145 case SSH_SERVER_KNOWN_OK
:
1146 /* The server is known and has not changed. */
1149 case SSH_SERVER_NOT_KNOWN
:
1150 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server is unknown, its public key was not found in the known host file %s", s
->ssh
->server_hostkey_path
);
1153 case SSH_SERVER_KNOWN_CHANGED
:
1154 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server key has changed. Either you are under attack or the administrator changed the key.");
1155 server_identity_is_ok
= 0;
1158 case SSH_SERVER_FILE_NOT_FOUND
:
1159 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The known host file %s does not exist", s
->ssh
->server_hostkey_path
);
1160 server_identity_is_ok
= 0;
1163 case SSH_SERVER_ERROR
:
1164 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "Some error happened");
1165 server_identity_is_ok
= 0;
1168 case SSH_SERVER_FOUND_OTHER
:
1169 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server gave use a key of a type while we had an other type recorded. " \
1170 "It is a possible attack.");
1171 server_identity_is_ok
= 0;
1175 if (!server_identity_is_ok
)
1180 case SK_SSH_USERAUTH
:
1182 s
->ssh
->state
= SK_SSH_USERAUTH
;
1183 switch (ssh_userauth_publickey_auto(s
->ssh
->session
, NULL
, NULL
))
1185 case SSH_AUTH_AGAIN
:
1188 case SSH_AUTH_SUCCESS
:
1196 case SK_SSH_CHANNEL
:
1198 s
->ssh
->state
= SK_SSH_CHANNEL
;
1199 s
->ssh
->channel
= ssh_channel_new(s
->ssh
->session
);
1200 if (s
->ssh
->channel
== NULL
)
1204 case SK_SSH_SESSION
:
1206 s
->ssh
->state
= SK_SSH_SESSION
;
1207 switch (ssh_channel_open_session(s
->ssh
->channel
))
1220 case SK_SSH_SUBSYSTEM
:
1222 s
->ssh
->state
= SK_SSH_SUBSYSTEM
;
1223 if (s
->ssh
->subsystem
)
1225 switch (ssh_channel_request_subsystem(s
->ssh
->channel
, s
->ssh
->subsystem
))
1239 case SK_SSH_ESTABLISHED
:
1240 s
->ssh
->state
= SK_SSH_ESTABLISHED
;
1247 * Return file descriptor number if success
1248 * Return -1 if failed
1251 sk_open_ssh(sock
*s
)
1254 bug("sk_open() sock->ssh is not allocated");
1256 ssh_session sess
= ssh_new();
1258 ERR2("Cannot create a ssh session");
1259 s
->ssh
->session
= sess
;
1261 const int verbosity
= SSH_LOG_NOLOG
;
1262 ssh_options_set(sess
, SSH_OPTIONS_LOG_VERBOSITY
, &verbosity
);
1263 ssh_options_set(sess
, SSH_OPTIONS_HOST
, s
->host
);
1264 ssh_options_set(sess
, SSH_OPTIONS_PORT
, &(s
->dport
));
1265 /* TODO: Add SSH_OPTIONS_BINDADDR */
1266 ssh_options_set(sess
, SSH_OPTIONS_USER
, s
->ssh
->username
);
1268 if (s
->ssh
->server_hostkey_path
)
1269 ssh_options_set(sess
, SSH_OPTIONS_KNOWNHOSTS
, s
->ssh
->server_hostkey_path
);
1271 if (s
->ssh
->client_privkey_path
)
1272 ssh_options_set(sess
, SSH_OPTIONS_IDENTITY
, s
->ssh
->client_privkey_path
);
1274 ssh_set_blocking(sess
, 0);
1276 switch (sk_ssh_connect(s
))
1282 sk_ssh_connected(s
);
1286 ERR2(ssh_get_error(sess
));
1290 return ssh_get_fd(sess
);
1298 * sk_open - open a socket
1301 * This function takes a socket resource created by sk_new() and
1302 * initialized by the user and binds a corresponding network connection
1305 * Result: 0 for success, -1 for an error.
1314 ip_addr bind_addr
= IPA_NONE
;
1317 if (s
->type
<= SK_IP
)
1320 * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
1321 * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
1322 * But the specifications have to be consistent.
1328 ASSERT(ipa_zero(s
->saddr
) || ipa_zero(s
->daddr
) ||
1329 (ipa_is_ip4(s
->saddr
) == ipa_is_ip4(s
->daddr
)));
1330 af
= (ipa_is_ip4(s
->saddr
) || ipa_is_ip4(s
->daddr
)) ? AF_INET
: AF_INET6
;
1334 ASSERT(ipa_zero(s
->saddr
) || ipa_is_ip4(s
->saddr
));
1335 ASSERT(ipa_zero(s
->daddr
) || ipa_is_ip4(s
->daddr
));
1340 ASSERT(ipa_zero(s
->saddr
) || !ipa_is_ip4(s
->saddr
));
1341 ASSERT(ipa_zero(s
->daddr
) || !ipa_is_ip4(s
->daddr
));
1346 bug("Invalid subtype %d", s
->subtype
);
1353 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1355 case SK_TCP_PASSIVE
:
1356 fd
= socket(af
, SOCK_STREAM
, IPPROTO_TCP
);
1357 bind_port
= s
->sport
;
1358 bind_addr
= s
->saddr
;
1359 do_bind
= bind_port
|| ipa_nonzero(bind_addr
);
1364 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1365 fd
= sk_open_ssh(s
);
1370 fd
= socket(af
, SOCK_DGRAM
, IPPROTO_UDP
);
1371 bind_port
= s
->sport
;
1372 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1377 fd
= socket(af
, SOCK_RAW
, s
->dport
);
1379 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1380 do_bind
= ipa_nonzero(bind_addr
);
1389 bug("sk_open() called for invalid sock type %d", s
->type
);
1398 if (sk_setup(s
) < 0)
1407 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &y
, sizeof(y
)) < 0)
1408 ERR2("SO_REUSEADDR");
1410 #ifdef CONFIG_NO_IFACE_BIND
1411 /* Workaround missing ability to bind to an iface */
1412 if ((s
->type
== SK_UDP
) && s
->iface
&& ipa_zero(bind_addr
))
1414 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &y
, sizeof(y
)) < 0)
1415 ERR2("SO_REUSEPORT");
1420 if (s
->flags
& SKF_HIGH_PORT
)
1421 if (sk_set_high_port(s
) < 0)
1422 log(L_WARN
"Socket error: %s%#m", s
->err
);
1424 sockaddr_fill(&sa
, s
->af
, bind_addr
, s
->iface
, bind_port
);
1425 if (bind(fd
, &sa
.sa
, SA_LEN(sa
)) < 0)
1430 if (sk_set_md5_auth(s
, s
->saddr
, s
->daddr
, s
->iface
, s
->password
, 0) < 0)
1436 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1437 if (connect(fd
, &sa
.sa
, SA_LEN(sa
)) >= 0)
1438 sk_tcp_connected(s
);
1439 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
&&
1440 errno
!= ECONNREFUSED
&& errno
!= EHOSTUNREACH
&& errno
!= ENETUNREACH
)
1444 case SK_TCP_PASSIVE
:
1445 if (listen(fd
, 8) < 0)
1457 if (!(s
->flags
& SKF_THREAD
))
1469 sk_open_unix(sock
*s
, char *name
)
1471 struct sockaddr_un sa
;
1474 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1476 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1480 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1483 /* Path length checked in test_old_bird() */
1484 sa
.sun_family
= AF_UNIX
;
1485 strcpy(sa
.sun_path
, name
);
1487 if (bind(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) < 0)
1490 if (listen(fd
, 8) < 0)
1499 #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1500 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1501 #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1504 sk_prepare_cmsgs(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
1507 sk_prepare_cmsgs4(s
, msg
, cbuf
, cbuflen
);
1509 sk_prepare_cmsgs6(s
, msg
, cbuf
, cbuflen
);
1513 sk_process_cmsgs(sock
*s
, struct msghdr
*msg
)
1517 s
->laddr
= IPA_NONE
;
1521 for (cm
= CMSG_FIRSTHDR(msg
); cm
!= NULL
; cm
= CMSG_NXTHDR(msg
, cm
))
1523 if ((cm
->cmsg_level
== SOL_IP
) && sk_is_ipv4(s
))
1525 sk_process_cmsg4_pktinfo(s
, cm
);
1526 sk_process_cmsg4_ttl(s
, cm
);
1529 if ((cm
->cmsg_level
== SOL_IPV6
) && sk_is_ipv6(s
))
1531 sk_process_cmsg6_pktinfo(s
, cm
);
1532 sk_process_cmsg6_ttl(s
, cm
);
1541 struct iovec iov
= {s
->tbuf
, s
->tpos
- s
->tbuf
};
1542 byte cmsg_buf
[CMSG_TX_SPACE
];
1545 sockaddr_fill(&dst
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1547 struct msghdr msg
= {
1548 .msg_name
= &dst
.sa
,
1549 .msg_namelen
= SA_LEN(dst
),
1554 #ifdef CONFIG_USE_HDRINCL
1556 struct iovec iov2
[2] = { {hdr
, 20}, iov
};
1558 if (s
->flags
& SKF_HDRINCL
)
1560 sk_prepare_ip_header(s
, hdr
, iov
.iov_len
);
1566 if (s
->flags
& SKF_PKTINFO
)
1567 sk_prepare_cmsgs(s
, &msg
, cmsg_buf
, sizeof(cmsg_buf
));
1569 return sendmsg(s
->fd
, &msg
, 0);
1575 struct iovec iov
= {s
->rbuf
, s
->rbsize
};
1576 byte cmsg_buf
[CMSG_RX_SPACE
];
1579 struct msghdr msg
= {
1580 .msg_name
= &src
.sa
,
1581 .msg_namelen
= sizeof(src
), // XXXX ??
1584 .msg_control
= cmsg_buf
,
1585 .msg_controllen
= sizeof(cmsg_buf
),
1589 int rv
= recvmsg(s
->fd
, &msg
, 0);
1594 // if (cf_type == SK_IP)
1595 // rv = ipv4_skip_header(pbuf, rv);
1598 sockaddr_read(&src
, s
->af
, &s
->faddr
, NULL
, &s
->fport
);
1599 sk_process_cmsgs(s
, &msg
);
1601 if (msg
.msg_flags
& MSG_TRUNC
)
1602 s
->flags
|= SKF_TRUNCATED
;
1604 s
->flags
&= ~SKF_TRUNCATED
;
1610 static inline void reset_tx_buffer(sock
*s
) { s
->ttx
= s
->tpos
= s
->tbuf
; }
1613 sk_maybe_write(sock
*s
)
1622 while (s
->ttx
!= s
->tpos
)
1624 e
= write(s
->fd
, s
->ttx
, s
->tpos
- s
->ttx
);
1628 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1631 /* EPIPE is just a connection close notification during TX */
1632 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1644 while (s
->ttx
!= s
->tpos
)
1646 e
= ssh_channel_write(s
->ssh
->channel
, s
->ttx
, s
->tpos
- s
->ttx
);
1650 s
->err
= ssh_get_error(s
->ssh
->session
);
1651 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
1654 /* EPIPE is just a connection close notification during TX */
1655 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1667 if (s
->tbuf
== s
->tpos
)
1674 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1677 s
->err_hook(s
, errno
);
1690 bug("sk_maybe_write: unknown socket type %d", s
->type
);
1695 sk_rx_ready(sock
*s
)
1698 struct pollfd pfd
= { .fd
= s
->fd
};
1699 pfd
.events
|= POLLIN
;
1702 rv
= poll(&pfd
, 1, 0);
1704 if ((rv
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
1711 * sk_send - send data to a socket
1713 * @len: number of bytes to send
1715 * This function sends @len bytes of data prepared in the
1716 * transmit buffer of the socket @s to the network connection.
1717 * If the packet can be sent immediately, it does so and returns
1718 * 1, else it queues the packet for later processing, returns 0
1719 * and calls the @tx_hook of the socket when the tranmission
1723 sk_send(sock
*s
, unsigned len
)
1726 s
->tpos
= s
->tbuf
+ len
;
1727 return sk_maybe_write(s
);
1731 * sk_send_to - send data to a specific destination
1733 * @len: number of bytes to send
1734 * @addr: IP address to send the packet to
1735 * @port: port to send the packet to
1737 * This is a sk_send() replacement for connection-less packet sockets
1738 * which allows destination of the packet to be chosen dynamically.
1739 * Raw IP sockets should use 0 for @port.
1742 sk_send_to(sock
*s
, unsigned len
, ip_addr addr
, unsigned port
)
1749 s
->tpos
= s
->tbuf
+ len
;
1750 return sk_maybe_write(s
);
1755 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1756 ip_addr saddr, ip_addr daddr, unsigned dport)
1763 s->tpos = s->tbuf + len;
1764 return sk_maybe_write(s);
1769 call_rx_hook(sock
*s
, int size
)
1771 if (s
->rx_hook(s
, size
))
1773 /* We need to be careful since the socket could have been deleted by the hook */
1774 if (current_sock
== s
)
1781 sk_read_ssh(sock
*s
)
1783 ssh_channel rchans
[2] = { s
->ssh
->channel
, NULL
};
1784 struct timeval timev
= { 1, 0 };
1786 if (ssh_channel_select(rchans
, NULL
, NULL
, &timev
) == SSH_EINTR
)
1787 return 1; /* Try again */
1789 if (ssh_channel_is_eof(s
->ssh
->channel
) != 0)
1791 /* The remote side is closing the connection */
1796 if (rchans
[0] == NULL
)
1797 return 0; /* No data is available on the socket */
1799 const uint used_bytes
= s
->rpos
- s
->rbuf
;
1800 const int read_bytes
= ssh_channel_read_nonblocking(s
->ssh
->channel
, s
->rpos
, s
->rbsize
- used_bytes
, 0);
1804 s
->rpos
+= read_bytes
;
1805 call_rx_hook(s
, used_bytes
+ read_bytes
);
1808 else if (read_bytes
== 0)
1810 if (ssh_channel_is_eof(s
->ssh
->channel
) != 0)
1812 /* The remote side is closing the connection */
1818 s
->err
= ssh_get_error(s
->ssh
->session
);
1819 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
1822 return 0; /* No data is available on the socket */
1826 /* sk_read() and sk_write() are called from BFD's event loop */
1829 sk_read(sock
*s
, int revents
)
1833 case SK_TCP_PASSIVE
:
1834 return sk_passive_connected(s
, SK_TCP
);
1836 case SK_UNIX_PASSIVE
:
1837 return sk_passive_connected(s
, SK_UNIX
);
1842 int c
= read(s
->fd
, s
->rpos
, s
->rbuf
+ s
->rbsize
- s
->rpos
);
1846 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1847 s
->err_hook(s
, errno
);
1848 else if (errno
== EAGAIN
&& !(revents
& POLLIN
))
1850 log(L_ERR
"Got EAGAIN from read when revents=%x (without POLLIN)", revents
);
1859 call_rx_hook(s
, s
->rpos
- s
->rbuf
);
1867 return sk_read_ssh(s
);
1871 return s
->rx_hook(s
, 0);
1875 int e
= sk_recvmsg(s
);
1879 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1880 s
->err_hook(s
, errno
);
1884 s
->rpos
= s
->rbuf
+ e
;
1899 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1901 if (connect(s
->fd
, &sa
.sa
, SA_LEN(sa
)) >= 0 || errno
== EISCONN
)
1902 sk_tcp_connected(s
);
1903 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
)
1904 s
->err_hook(s
, errno
);
1911 switch (sk_ssh_connect(s
))
1914 sk_ssh_connected(s
);
1921 s
->err
= ssh_get_error(s
->ssh
->session
);
1922 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
1930 if (s
->ttx
!= s
->tpos
&& sk_maybe_write(s
) > 0)
1940 int sk_is_ipv4(sock
*s
)
1941 { return s
->af
== AF_INET
; }
1943 int sk_is_ipv6(sock
*s
)
1944 { return s
->af
== AF_INET6
; }
1947 sk_err(sock
*s
, int revents
)
1949 int se
= 0, sse
= sizeof(se
);
1950 if ((s
->type
!= SK_MAGIC
) && (revents
& POLLERR
))
1951 if (getsockopt(s
->fd
, SOL_SOCKET
, SO_ERROR
, &se
, &sse
) < 0)
1953 log(L_ERR
"IO: Socket error: SO_ERROR: %m");
1966 debug("Open sockets:\n");
1967 WALK_LIST(n
, sock_list
)
1969 s
= SKIP_BACK(sock
, n
, n
);
1978 * Internal event log and watchdog
1981 #define EVENT_LOG_LENGTH 32
1983 struct event_log_entry
1991 static struct event_log_entry event_log
[EVENT_LOG_LENGTH
];
1992 static struct event_log_entry
*event_open
;
1993 static int event_log_pos
, event_log_num
, watchdog_active
;
1994 static btime last_time
;
1995 static btime loop_time
;
1998 io_update_time(void)
2004 * This is third time-tracking procedure (after update_times() above and
2005 * times_update() in BFD), dedicated to internal event log and latency
2006 * tracking. Hopefully, we consolidate these sometimes.
2009 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
2011 die("clock_gettime: %m");
2013 last_time
= ts
.tv_sec S
+ ts
.tv_nsec NS
;
2017 event_open
->duration
= last_time
- event_open
->timestamp
;
2019 if (event_open
->duration
> config
->latency_limit
)
2020 log(L_WARN
"Event 0x%p 0x%p took %d ms",
2021 event_open
->hook
, event_open
->data
, (int) (event_open
->duration TO_MS
));
2028 * io_log_event - mark approaching event into event log
2029 * @hook: event hook address
2030 * @data: event data address
2032 * Store info (hook, data, timestamp) about the following internal event into
2033 * a circular event log (@event_log). When latency tracking is enabled, the log
2034 * entry is kept open (in @event_open) so the duration can be filled later.
2037 io_log_event(void *hook
, void *data
)
2039 if (config
->latency_debug
)
2042 struct event_log_entry
*en
= event_log
+ event_log_pos
;
2046 en
->timestamp
= last_time
;
2051 event_log_pos
%= EVENT_LOG_LENGTH
;
2053 event_open
= config
->latency_debug
? en
: NULL
;
2057 io_close_event(void)
2068 log(L_DEBUG
"Event log:");
2069 for (i
= 0; i
< EVENT_LOG_LENGTH
; i
++)
2071 struct event_log_entry
*en
= event_log
+ (event_log_pos
+ i
) % EVENT_LOG_LENGTH
;
2073 log(L_DEBUG
" Event 0x%p 0x%p at %8d for %d ms", en
->hook
, en
->data
,
2074 (int) ((last_time
- en
->timestamp
) TO_MS
), (int) (en
->duration TO_MS
));
2079 watchdog_sigalrm(int sig UNUSED
)
2081 /* Update last_time and duration, but skip latency check */
2082 config
->latency_limit
= 0xffffffff;
2085 /* We want core dump */
2090 watchdog_start1(void)
2094 loop_time
= last_time
;
2098 watchdog_start(void)
2102 loop_time
= last_time
;
2105 if (config
->watchdog_timeout
)
2107 alarm(config
->watchdog_timeout
);
2108 watchdog_active
= 1;
2117 if (watchdog_active
)
2120 watchdog_active
= 0;
2123 btime duration
= last_time
- loop_time
;
2124 if (duration
> config
->watchdog_warning
)
2125 log(L_WARN
"I/O loop cycle took %d ms for %d events",
2126 (int) (duration TO_MS
), event_log_num
);
2134 volatile int async_config_flag
; /* Asynchronous reconfiguration/dump scheduled */
2135 volatile int async_dump_flag
;
2136 volatile int async_shutdown_flag
;
2141 init_list(&sock_list
);
2142 init_list(&global_event_list
);
2144 // XXX init_times();
2145 // XXX update_times();
2146 boot_time
= current_time();
2147 srandom((uint
) (current_real_time() TO_S
));
2150 static int short_loops
= 0;
2151 #define SHORT_LOOP_MAX 10
2156 int poll_tout
, timeout
;
2157 int nfds
, events
, pout
;
2162 struct pollfd
*pfd
= xmalloc(fdmax
* sizeof(struct pollfd
));
2167 times_update(&main_timeloop
);
2168 events
= ev_run_list(&global_event_list
);
2169 timers_fire(&main_timeloop
);
2173 poll_tout
= (events
? 0 : 3000); /* Time in milliseconds */
2174 if (t
= timers_first(&main_timeloop
))
2176 times_update(&main_timeloop
);
2177 timeout
= (tm_remains(t
) TO_MS
) + 1;
2178 poll_tout
= MIN(poll_tout
, timeout
);
2182 WALK_LIST(n
, sock_list
)
2184 pfd
[nfds
] = (struct pollfd
) { .fd
= -1 }; /* everything other set to 0 by this */
2185 s
= SKIP_BACK(sock
, n
, n
);
2188 pfd
[nfds
].fd
= s
->fd
;
2189 pfd
[nfds
].events
|= POLLIN
;
2191 if (s
->tx_hook
&& s
->ttx
!= s
->tpos
)
2193 pfd
[nfds
].fd
= s
->fd
;
2194 pfd
[nfds
].events
|= POLLOUT
;
2196 if (pfd
[nfds
].fd
!= -1)
2207 pfd
= xrealloc(pfd
, fdmax
* sizeof(struct pollfd
));
2212 * Yes, this is racy. But even if the signal comes before this test
2213 * and entering poll(), it gets caught on the next timer tick.
2216 if (async_config_flag
)
2218 io_log_event(async_config
, NULL
);
2220 async_config_flag
= 0;
2223 if (async_dump_flag
)
2225 io_log_event(async_dump
, NULL
);
2227 async_dump_flag
= 0;
2230 if (async_shutdown_flag
)
2232 io_log_event(async_shutdown
, NULL
);
2234 async_shutdown_flag
= 0;
2238 /* And finally enter poll() to find active sockets */
2240 pout
= poll(pfd
, nfds
, poll_tout
);
2245 if (errno
== EINTR
|| errno
== EAGAIN
)
2251 times_update(&main_timeloop
);
2253 /* guaranteed to be non-empty */
2254 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2256 while (current_sock
)
2258 sock
*s
= current_sock
;
2261 current_sock
= sk_next(s
);
2269 if (s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2273 io_log_event(s
->rx_hook
, s
->data
);
2274 e
= sk_read(s
, pfd
[s
->index
].revents
);
2275 if (s
!= current_sock
)
2278 while (e
&& s
->rx_hook
&& steps
);
2281 if (pfd
[s
->index
].revents
& POLLOUT
)
2285 io_log_event(s
->tx_hook
, s
->data
);
2287 if (s
!= current_sock
)
2292 current_sock
= sk_next(s
);
2297 if (events
&& (short_loops
< SHORT_LOOP_MAX
))
2302 current_sock
= stored_sock
;
2303 if (current_sock
== NULL
)
2304 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2306 while (current_sock
&& count
< MAX_RX_STEPS
)
2308 sock
*s
= current_sock
;
2311 current_sock
= sk_next(s
);
2315 if (!s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2318 io_log_event(s
->rx_hook
, s
->data
);
2319 sk_read(s
, pfd
[s
->index
].revents
);
2320 if (s
!= current_sock
)
2324 if (pfd
[s
->index
].revents
& (POLLHUP
| POLLERR
))
2326 sk_err(s
, pfd
[s
->index
].revents
);
2327 if (s
!= current_sock
)
2331 current_sock
= sk_next(s
);
2336 stored_sock
= current_sock
;
2342 test_old_bird(char *path
)
2345 struct sockaddr_un sa
;
2347 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
2349 die("Cannot create socket: %m");
2350 if (strlen(path
) >= sizeof(sa
.sun_path
))
2351 die("Socket path too long");
2352 bzero(&sa
, sizeof(sa
));
2353 sa
.sun_family
= AF_UNIX
;
2354 strcpy(sa
.sun_path
, path
);
2355 if (connect(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) == 0)
2356 die("I found another BIRD running.");