2 * BIRD Internet Routing Daemon -- Unix I/O
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
7 * Can be freely distributed and used under the terms of the GNU GPL.
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
20 #include <sys/types.h>
21 #include <sys/socket.h>
29 #include <netinet/in.h>
30 #include <netinet/tcp.h>
31 #include <netinet/udp.h>
32 #include <netinet/icmp6.h>
34 #include "nest/bird.h"
35 #include "lib/lists.h"
36 #include "lib/resource.h"
37 #include "sysdep/unix/timer.h"
38 #include "lib/socket.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "nest/iface.h"
43 #include "sysdep/unix/unix.h"
44 #include CONFIG_INCLUDE_SYSIO_H
46 /* Maximum number of calls of tx handler for one socket in one
47 * poll iteration. Should be small enough to not monopolize CPU by
48 * one protocol instance.
52 /* Maximum number of calls of rx handler for all sockets in one poll
53 iteration. RX callbacks are often much more costly so we limit
54 this to gen small latencies */
55 #define MAX_RX_STEPS 4
69 struct rfile
*a
= (struct rfile
*) r
;
77 struct rfile
*a
= (struct rfile
*) r
;
79 debug("(FILE *%p)\n", a
->f
);
82 static struct resclass rf_class
= {
92 tracked_fopen(pool
*p
, char *name
, char *mode
)
94 FILE *f
= fopen(name
, mode
);
98 struct rfile
*r
= ralloc(p
, &rf_class
);
107 * Timers are resources which represent a wish of a module to call
108 * a function at the specified time. The platform dependent code
109 * doesn't guarantee exact timing, only that a timer function
110 * won't be called before the requested time.
112 * In BIRD, time is represented by values of the &bird_clock_t type
113 * which are integral numbers interpreted as a relative number of seconds since
114 * some fixed time point in past. The current time can be read
115 * from variable @now with reasonable accuracy and is monotonic. There is also
116 * a current 'absolute' time in variable @now_real reported by OS.
118 * Each timer is described by a &timer structure containing a pointer
119 * to the handler function (@hook), data private to this function (@data),
120 * time the function should be called at (@expires, 0 for inactive timers),
121 * for the other fields see |timer.h|.
124 #define NEAR_TIMER_LIMIT 4
126 static list near_timers
, far_timers
;
127 static bird_clock_t first_far_timer
= TIME_INFINITY
;
129 /* now must be different from 0, because 0 is a special value in timer->expires */
130 bird_clock_t now
= 1, now_real
, boot_time
;
133 update_times_plain(void)
135 bird_clock_t new_time
= time(NULL
);
136 int delta
= new_time
- now_real
;
138 if ((delta
>= 0) && (delta
< 60))
140 else if (now_real
!= 0)
141 log(L_WARN
"Time jump, delta %d s", delta
);
147 update_times_gettime(void)
152 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
154 die("clock_gettime: %m");
156 if (ts
.tv_sec
!= now
) {
158 log(L_ERR
"Monotonic timer is broken");
161 now_real
= time(NULL
);
165 static int clock_monotonic_available
;
170 if (clock_monotonic_available
)
171 update_times_gettime();
173 update_times_plain();
180 clock_monotonic_available
= (clock_gettime(CLOCK_MONOTONIC
, &ts
) == 0);
181 if (!clock_monotonic_available
)
182 log(L_WARN
"Monotonic timer is missing");
189 timer
*t
= (timer
*) r
;
197 timer
*t
= (timer
*) r
;
199 debug("(code %p, data %p, ", t
->hook
, t
->data
);
201 debug("rand %d, ", t
->randomize
);
203 debug("recur %d, ", t
->recurrent
);
205 debug("expires in %d sec)\n", t
->expires
- now
);
207 debug("inactive)\n");
210 static struct resclass tm_class
= {
220 * tm_new - create a timer
223 * This function creates a new timer resource and returns
224 * a pointer to it. To use the timer, you need to fill in
225 * the structure fields and call tm_start() to start timing.
230 timer
*t
= ralloc(p
, &tm_class
);
235 tm_insert_near(timer
*t
)
237 node
*n
= HEAD(near_timers
);
239 while (n
->next
&& (SKIP_BACK(timer
, n
, n
)->expires
< t
->expires
))
241 insert_node(&t
->n
, n
->prev
);
245 * tm_start - start a timer
247 * @after: number of seconds the timer should be run after
249 * This function schedules the hook function of the timer to
250 * be called after @after seconds. If the timer has been already
251 * started, it's @expire time is replaced by the new value.
253 * You can have set the @randomize field of @t, the timeout
254 * will be increased by a random number of seconds chosen
255 * uniformly from range 0 .. @randomize.
257 * You can call tm_start() from the handler function of the timer
258 * to request another run of the timer. Also, you can set the @recurrent
259 * field to have the timer re-added automatically with the same timeout.
262 tm_start(timer
*t
, unsigned after
)
267 after
+= random() % (t
->randomize
+ 1);
269 if (t
->expires
== when
)
274 if (after
<= NEAR_TIMER_LIMIT
)
278 if (!first_far_timer
|| first_far_timer
> when
)
279 first_far_timer
= when
;
280 add_tail(&far_timers
, &t
->n
);
285 * tm_stop - stop a timer
288 * This function stops a timer. If the timer is already stopped,
302 tm_dump_them(char *name
, list
*l
)
307 debug("%s timers:\n", name
);
310 t
= SKIP_BACK(timer
, n
, n
);
320 tm_dump_them("Near", &near_timers
);
321 tm_dump_them("Far", &far_timers
);
327 time_t x
= first_far_timer
;
329 if (!EMPTY_LIST(near_timers
))
331 timer
*t
= SKIP_BACK(timer
, n
, HEAD(near_timers
));
338 void io_log_event(void *hook
, void *data
);
346 if (first_far_timer
<= now
)
348 bird_clock_t limit
= now
+ NEAR_TIMER_LIMIT
;
349 first_far_timer
= TIME_INFINITY
;
350 n
= HEAD(far_timers
);
353 t
= SKIP_BACK(timer
, n
, n
);
354 if (t
->expires
<= limit
)
359 else if (t
->expires
< first_far_timer
)
360 first_far_timer
= t
->expires
;
364 while ((n
= HEAD(near_timers
)) -> next
)
367 t
= SKIP_BACK(timer
, n
, n
);
368 if (t
->expires
> now
)
371 delay
= t
->expires
- now
;
375 int i
= t
->recurrent
- delay
;
380 io_log_event(t
->hook
, t
->data
);
386 * tm_parse_datetime - parse a date and time
387 * @x: datetime string
389 * tm_parse_datetime() takes a textual representation of
390 * a date and time (dd-mm-yyyy hh:mm:ss)
391 * and converts it to the corresponding value of type &bird_clock_t.
394 tm_parse_datetime(char *x
)
400 if (sscanf(x
, "%d-%d-%d %d:%d:%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &tm
.tm_hour
, &tm
.tm_min
, &tm
.tm_sec
, &n
) != 6 || x
[n
])
401 return tm_parse_date(x
);
405 if (t
== (time_t) -1)
410 * tm_parse_date - parse a date
413 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
414 * and converts it to the corresponding value of type &bird_clock_t.
417 tm_parse_date(char *x
)
423 if (sscanf(x
, "%d-%d-%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &n
) != 3 || x
[n
])
427 tm
.tm_hour
= tm
.tm_min
= tm
.tm_sec
= 0;
429 if (t
== (time_t) -1)
435 tm_format_reltime(char *x
, struct tm
*tm
, bird_clock_t delta
)
437 static char *month_names
[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
438 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
441 bsprintf(x
, "%02d:%02d", tm
->tm_hour
, tm
->tm_min
);
442 else if (delta
< 360*86400)
443 bsprintf(x
, "%s%02d", month_names
[tm
->tm_mon
], tm
->tm_mday
);
445 bsprintf(x
, "%d", tm
->tm_year
+1900);
448 #include "conf/conf.h"
451 * tm_format_datetime - convert date and time to textual representation
452 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
453 * @fmt_spec: specification of resulting textual representation of the time
456 * This function formats the given relative time value @t to a textual
457 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
460 tm_format_datetime(char *x
, struct timeformat
*fmt_spec
, bird_clock_t t
)
462 const char *fmt_used
;
464 bird_clock_t delta
= now
- t
;
465 t
= now_real
- delta
;
468 if (fmt_spec
->fmt1
== NULL
)
469 return tm_format_reltime(x
, tm
, delta
);
471 if ((fmt_spec
->limit
== 0) || (delta
< fmt_spec
->limit
))
472 fmt_used
= fmt_spec
->fmt1
;
474 fmt_used
= fmt_spec
->fmt2
;
476 int rv
= strftime(x
, TM_DATETIME_BUFFER_SIZE
, fmt_used
, tm
);
477 if (((rv
== 0) && fmt_used
[0]) || (rv
== TM_DATETIME_BUFFER_SIZE
))
478 strcpy(x
, "<too-long>");
485 * Socket resources represent network connections. Their data structure (&socket)
486 * contains a lot of fields defining the exact type of the socket, the local and
487 * remote addresses and ports, pointers to socket buffers and finally pointers to
488 * hook functions to be called when new data have arrived to the receive buffer
489 * (@rx_hook), when the contents of the transmit buffer have been transmitted
490 * (@tx_hook) and when an error or connection close occurs (@err_hook).
492 * Freeing of sockets from inside socket hooks is perfectly safe.
496 #define SOL_IP IPPROTO_IP
500 #define SOL_IPV6 IPPROTO_IPV6
504 #define SOL_ICMPV6 IPPROTO_ICMPV6
509 * Sockaddr helper functions
512 static inline int UNUSED
sockaddr_length(int af
)
513 { return (af
== AF_INET
) ? sizeof(struct sockaddr_in
) : sizeof(struct sockaddr_in6
); }
516 sockaddr_fill4(struct sockaddr_in
*sa
, ip_addr a
, uint port
)
518 memset(sa
, 0, sizeof(struct sockaddr_in
));
520 sa
->sin_len
= sizeof(struct sockaddr_in
);
522 sa
->sin_family
= AF_INET
;
523 sa
->sin_port
= htons(port
);
524 sa
->sin_addr
= ipa_to_in4(a
);
528 sockaddr_fill6(struct sockaddr_in6
*sa
, ip_addr a
, struct iface
*ifa
, uint port
)
530 memset(sa
, 0, sizeof(struct sockaddr_in6
));
532 sa
->sin6_len
= sizeof(struct sockaddr_in6
);
534 sa
->sin6_family
= AF_INET6
;
535 sa
->sin6_port
= htons(port
);
536 sa
->sin6_flowinfo
= 0;
537 sa
->sin6_addr
= ipa_to_in6(a
);
539 if (ifa
&& ipa_is_link_local(a
))
540 sa
->sin6_scope_id
= ifa
->index
;
544 sockaddr_fill(sockaddr
*sa
, int af
, ip_addr a
, struct iface
*ifa
, uint port
)
547 sockaddr_fill4((struct sockaddr_in
*) sa
, a
, port
);
548 else if (af
== AF_INET6
)
549 sockaddr_fill6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
555 sockaddr_read4(struct sockaddr_in
*sa
, ip_addr
*a
, uint
*port
)
557 *port
= ntohs(sa
->sin_port
);
558 *a
= ipa_from_in4(sa
->sin_addr
);
562 sockaddr_read6(struct sockaddr_in6
*sa
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
564 *port
= ntohs(sa
->sin6_port
);
565 *a
= ipa_from_in6(sa
->sin6_addr
);
567 if (ifa
&& ipa_is_link_local(*a
))
568 *ifa
= if_find_by_index(sa
->sin6_scope_id
);
572 sockaddr_read(sockaddr
*sa
, int af
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
574 if (sa
->sa
.sa_family
!= af
)
578 sockaddr_read4((struct sockaddr_in
*) sa
, a
, port
);
579 else if (af
== AF_INET6
)
580 sockaddr_read6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
594 * IPv6 multicast syscalls
597 /* Fortunately standardized in RFC 3493 */
599 #define INIT_MREQ6(maddr,ifa) \
600 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
603 sk_setup_multicast6(sock
*s
)
605 int index
= s
->iface
->index
;
609 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_IF
, &index
, sizeof(index
)) < 0)
610 ERR("IPV6_MULTICAST_IF");
612 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
613 ERR("IPV6_MULTICAST_HOPS");
615 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_LOOP
, &n
, sizeof(n
)) < 0)
616 ERR("IPV6_MULTICAST_LOOP");
622 sk_join_group6(sock
*s
, ip_addr maddr
)
624 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
626 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_JOIN_GROUP
, &mr
, sizeof(mr
)) < 0)
627 ERR("IPV6_JOIN_GROUP");
633 sk_leave_group6(sock
*s
, ip_addr maddr
)
635 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
637 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_LEAVE_GROUP
, &mr
, sizeof(mr
)) < 0)
638 ERR("IPV6_LEAVE_GROUP");
645 * IPv6 packet control messages
648 /* Also standardized, in RFC 3542 */
651 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
652 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
653 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
654 * RFC and we use IPV6_PKTINFO.
656 #ifndef IPV6_RECVPKTINFO
657 #define IPV6_RECVPKTINFO IPV6_PKTINFO
660 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
662 #ifndef IPV6_RECVHOPLIMIT
663 #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
667 #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
668 #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
671 sk_request_cmsg6_pktinfo(sock
*s
)
675 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVPKTINFO
, &y
, sizeof(y
)) < 0)
676 ERR("IPV6_RECVPKTINFO");
682 sk_request_cmsg6_ttl(sock
*s
)
686 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVHOPLIMIT
, &y
, sizeof(y
)) < 0)
687 ERR("IPV6_RECVHOPLIMIT");
693 sk_process_cmsg6_pktinfo(sock
*s
, struct cmsghdr
*cm
)
695 if (cm
->cmsg_type
== IPV6_PKTINFO
)
697 struct in6_pktinfo
*pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
698 s
->laddr
= ipa_from_in6(pi
->ipi6_addr
);
699 s
->lifindex
= pi
->ipi6_ifindex
;
704 sk_process_cmsg6_ttl(sock
*s
, struct cmsghdr
*cm
)
706 if (cm
->cmsg_type
== IPV6_HOPLIMIT
)
707 s
->rcv_ttl
= * (int *) CMSG_DATA(cm
);
711 sk_prepare_cmsgs6(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
714 struct in6_pktinfo
*pi
;
717 msg
->msg_control
= cbuf
;
718 msg
->msg_controllen
= cbuflen
;
720 cm
= CMSG_FIRSTHDR(msg
);
721 cm
->cmsg_level
= SOL_IPV6
;
722 cm
->cmsg_type
= IPV6_PKTINFO
;
723 cm
->cmsg_len
= CMSG_LEN(sizeof(*pi
));
724 controllen
+= CMSG_SPACE(sizeof(*pi
));
726 pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
727 pi
->ipi6_ifindex
= s
->iface
? s
->iface
->index
: 0;
728 pi
->ipi6_addr
= ipa_to_in6(s
->saddr
);
730 msg
->msg_controllen
= controllen
;
735 * Miscellaneous socket syscalls
739 sk_set_ttl4(sock
*s
, int ttl
)
741 if (setsockopt(s
->fd
, SOL_IP
, IP_TTL
, &ttl
, sizeof(ttl
)) < 0)
748 sk_set_ttl6(sock
*s
, int ttl
)
750 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_UNICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
751 ERR("IPV6_UNICAST_HOPS");
757 sk_set_tos4(sock
*s
, int tos
)
759 if (setsockopt(s
->fd
, SOL_IP
, IP_TOS
, &tos
, sizeof(tos
)) < 0)
766 sk_set_tos6(sock
*s
, int tos
)
768 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_TCLASS
, &tos
, sizeof(tos
)) < 0)
775 sk_set_high_port(sock
*s UNUSED
)
777 /* Port range setting is optional, ignore it if not supported */
782 int range
= IP_PORTRANGE_HIGH
;
783 if (setsockopt(s
->fd
, SOL_IP
, IP_PORTRANGE
, &range
, sizeof(range
)) < 0)
788 #ifdef IPV6_PORTRANGE
791 int range
= IPV6_PORTRANGE_HIGH
;
792 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_PORTRANGE
, &range
, sizeof(range
)) < 0)
793 ERR("IPV6_PORTRANGE");
801 sk_skip_ip_header(byte
*pkt
, int *len
)
803 if ((*len
< 20) || ((*pkt
& 0xf0) != 0x40))
806 int hlen
= (*pkt
& 0x0f) * 4;
807 if ((hlen
< 20) || (hlen
> *len
))
815 sk_rx_buffer(sock
*s
, int *len
)
817 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
))
818 return sk_skip_ip_header(s
->rbuf
, len
);
825 * Public socket functions
829 * sk_setup_multicast - enable multicast for given socket
832 * Prepare transmission of multicast packets for given datagram socket.
833 * The socket must have defined @iface.
835 * Result: 0 for success, -1 for an error.
839 sk_setup_multicast(sock
*s
)
844 return sk_setup_multicast4(s
);
846 return sk_setup_multicast6(s
);
850 * sk_join_group - join multicast group for given socket
852 * @maddr: multicast address
854 * Join multicast group for given datagram socket and associated interface.
855 * The socket must have defined @iface.
857 * Result: 0 for success, -1 for an error.
861 sk_join_group(sock
*s
, ip_addr maddr
)
864 return sk_join_group4(s
, maddr
);
866 return sk_join_group6(s
, maddr
);
870 * sk_leave_group - leave multicast group for given socket
872 * @maddr: multicast address
874 * Leave multicast group for given datagram socket and associated interface.
875 * The socket must have defined @iface.
877 * Result: 0 for success, -1 for an error.
881 sk_leave_group(sock
*s
, ip_addr maddr
)
884 return sk_leave_group4(s
, maddr
);
886 return sk_leave_group6(s
, maddr
);
890 * sk_setup_broadcast - enable broadcast for given socket
893 * Allow reception and transmission of broadcast packets for given datagram
894 * socket. The socket must have defined @iface. For transmission, packets should
895 * be send to @brd address of @iface.
897 * Result: 0 for success, -1 for an error.
901 sk_setup_broadcast(sock
*s
)
905 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BROADCAST
, &y
, sizeof(y
)) < 0)
912 * sk_set_ttl - set transmit TTL for given socket
916 * Set TTL for already opened connections when TTL was not set before. Useful
917 * for accepted connections when different ones should have different TTL.
919 * Result: 0 for success, -1 for an error.
923 sk_set_ttl(sock
*s
, int ttl
)
928 return sk_set_ttl4(s
, ttl
);
930 return sk_set_ttl6(s
, ttl
);
934 * sk_set_min_ttl - set minimal accepted TTL for given socket
938 * Set minimal accepted TTL for given socket. Can be used for TTL security.
941 * Result: 0 for success, -1 for an error.
945 sk_set_min_ttl(sock
*s
, int ttl
)
948 return sk_set_min_ttl4(s
, ttl
);
950 return sk_set_min_ttl6(s
, ttl
);
955 * sk_set_md5_auth - add / remove MD5 security association for given socket
957 * @local: IP address of local side
958 * @remote: IP address of remote side
959 * @ifa: Interface for link-local IP address
960 * @passwd: Password used for MD5 authentication
961 * @setkey: Update also system SA/SP database
963 * In TCP MD5 handling code in kernel, there is a set of security associations
964 * used for choosing password and other authentication parameters according to
965 * the local and remote address. This function is useful for listening socket,
966 * for active sockets it may be enough to set s->password field.
968 * When called with passwd != NULL, the new pair is added,
969 * When called with passwd == NULL, the existing pair is removed.
971 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
972 * stored in global SA/SP database (but the behavior also must be enabled on
973 * per-socket basis). In case of multiple sockets to the same neighbor, the
974 * socket-specific state must be configured for each socket while global state
975 * just once per src-dst pair. The @setkey argument controls whether the global
976 * state (SA/SP database) is also updated.
978 * Result: 0 for success, -1 for an error.
982 sk_set_md5_auth(sock
*s
, ip_addr local
, ip_addr remote
, struct iface
*ifa
, char *passwd
, int setkey
)
987 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
991 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
992 * kernel will automatically fill it for outgoing packets and check it for
993 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
994 * known to the kernel.
996 * Result: 0 for success, -1 for an error.
1000 sk_set_ipv6_checksum(sock
*s
, int offset
)
1002 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_CHECKSUM
, &offset
, sizeof(offset
)) < 0)
1003 ERR("IPV6_CHECKSUM");
1009 sk_set_icmp6_filter(sock
*s
, int p1
, int p2
)
1011 /* a bit of lame interface, but it is here only for Radv */
1012 struct icmp6_filter f
;
1014 ICMP6_FILTER_SETBLOCKALL(&f
);
1015 ICMP6_FILTER_SETPASS(p1
, &f
);
1016 ICMP6_FILTER_SETPASS(p2
, &f
);
1018 if (setsockopt(s
->fd
, SOL_ICMPV6
, ICMP6_FILTER
, &f
, sizeof(f
)) < 0)
1019 ERR("ICMP6_FILTER");
1025 sk_log_error(sock
*s
, const char *p
)
1027 log(L_ERR
"%s: Socket error: %s%#m", p
, s
->err
);
1032 * Actual struct birdsock code
1035 static list sock_list
;
1036 static struct birdsock
*current_sock
;
1037 static struct birdsock
*stored_sock
;
1039 static inline sock
*
1042 if (!s
->n
.next
->next
)
1045 return SKIP_BACK(sock
, n
, s
->n
.next
);
1049 sk_alloc_bufs(sock
*s
)
1051 if (!s
->rbuf
&& s
->rbsize
)
1052 s
->rbuf
= s
->rbuf_alloc
= xmalloc(s
->rbsize
);
1054 if (!s
->tbuf
&& s
->tbsize
)
1055 s
->tbuf
= s
->tbuf_alloc
= xmalloc(s
->tbsize
);
1056 s
->tpos
= s
->ttx
= s
->tbuf
;
1060 sk_free_bufs(sock
*s
)
1064 xfree(s
->rbuf_alloc
);
1065 s
->rbuf
= s
->rbuf_alloc
= NULL
;
1069 xfree(s
->tbuf_alloc
);
1070 s
->tbuf
= s
->tbuf_alloc
= NULL
;
1076 sk_ssh_free(sock
*s
)
1078 struct ssh_sock
*ssh
= s
->ssh
;
1087 if (ssh_channel_is_open(ssh
->channel
))
1088 ssh_channel_close(ssh
->channel
);
1089 ssh_channel_free(ssh
->channel
);
1090 ssh
->channel
= NULL
;
1095 ssh_disconnect(ssh
->session
);
1096 ssh_free(ssh
->session
);
1097 ssh
->session
= NULL
;
1103 sk_free(resource
*r
)
1105 sock
*s
= (sock
*) r
;
1110 if (s
->type
== SK_SSH
|| s
->type
== SK_SSH_ACTIVE
)
1117 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1118 if (!(s
->flags
& SKF_THREAD
))
1120 if (s
== current_sock
)
1121 current_sock
= sk_next(s
);
1122 if (s
== stored_sock
)
1123 stored_sock
= sk_next(s
);
1127 if (s
->type
!= SK_SSH
&& s
->type
!= SK_SSH_ACTIVE
)
1134 sk_set_rbsize(sock
*s
, uint val
)
1136 ASSERT(s
->rbuf_alloc
== s
->rbuf
);
1138 if (s
->rbsize
== val
)
1142 xfree(s
->rbuf_alloc
);
1143 s
->rbuf_alloc
= xmalloc(val
);
1144 s
->rpos
= s
->rbuf
= s
->rbuf_alloc
;
1148 sk_set_tbsize(sock
*s
, uint val
)
1150 ASSERT(s
->tbuf_alloc
== s
->tbuf
);
1152 if (s
->tbsize
== val
)
1155 byte
*old_tbuf
= s
->tbuf
;
1158 s
->tbuf
= s
->tbuf_alloc
= xrealloc(s
->tbuf_alloc
, val
);
1159 s
->tpos
= s
->tbuf
+ (s
->tpos
- old_tbuf
);
1160 s
->ttx
= s
->tbuf
+ (s
->ttx
- old_tbuf
);
1164 sk_set_tbuf(sock
*s
, void *tbuf
)
1166 s
->tbuf
= tbuf
?: s
->tbuf_alloc
;
1167 s
->ttx
= s
->tpos
= s
->tbuf
;
1171 sk_reallocate(sock
*s
)
1178 sk_dump(resource
*r
)
1180 sock
*s
= (sock
*) r
;
1181 static char *sk_type_names
[] = { "TCP<", "TCP>", "TCP", "UDP", NULL
, "IP", NULL
, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
1183 debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1184 sk_type_names
[s
->type
],
1192 s
->iface
? s
->iface
->name
: "none");
1195 static struct resclass sk_class
= {
1205 * sk_new - create a socket
1208 * This function creates a new socket resource. If you want to use it,
1209 * you need to fill in all the required fields of the structure and
1210 * call sk_open() to do the actual opening of the socket.
1212 * The real function name is sock_new(), sk_new() is a macro wrapper
1213 * to avoid collision with OpenSSL.
1218 sock
*s
= ralloc(p
, &sk_class
);
1220 // s->saddr = s->daddr = IPA_NONE;
1221 s
->tos
= s
->priority
= s
->ttl
= -1;
1232 if (s
->type
== SK_SSH_ACTIVE
)
1235 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1241 if (ipa_nonzero(s
->saddr
) && !(s
->flags
& SKF_BIND
))
1242 s
->flags
|= SKF_PKTINFO
;
1244 #ifdef CONFIG_USE_HDRINCL
1245 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
) && (s
->flags
& SKF_PKTINFO
))
1247 s
->flags
&= ~SKF_PKTINFO
;
1248 s
->flags
|= SKF_HDRINCL
;
1249 if (setsockopt(fd
, SOL_IP
, IP_HDRINCL
, &y
, sizeof(y
)) < 0)
1256 #ifdef SO_BINDTODEVICE
1257 struct ifreq ifr
= {};
1258 strcpy(ifr
.ifr_name
, s
->iface
->name
);
1259 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
1260 ERR("SO_BINDTODEVICE");
1263 #ifdef CONFIG_UNIX_DONTROUTE
1264 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_DONTROUTE
, &y
, sizeof(y
)) < 0)
1265 ERR("SO_DONTROUTE");
1269 if (s
->priority
>= 0)
1270 if (sk_set_priority(s
, s
->priority
) < 0)
1275 if (s
->flags
& SKF_LADDR_RX
)
1276 if (sk_request_cmsg4_pktinfo(s
) < 0)
1279 if (s
->flags
& SKF_TTL_RX
)
1280 if (sk_request_cmsg4_ttl(s
) < 0)
1283 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1284 if (sk_disable_mtu_disc4(s
) < 0)
1288 if (sk_set_ttl4(s
, s
->ttl
) < 0)
1292 if (sk_set_tos4(s
, s
->tos
) < 0)
1298 if ((s
->type
== SK_TCP_PASSIVE
) || (s
->type
== SK_TCP_ACTIVE
) || (s
->type
== SK_UDP
))
1299 if (setsockopt(fd
, SOL_IPV6
, IPV6_V6ONLY
, &y
, sizeof(y
)) < 0)
1302 if (s
->flags
& SKF_LADDR_RX
)
1303 if (sk_request_cmsg6_pktinfo(s
) < 0)
1306 if (s
->flags
& SKF_TTL_RX
)
1307 if (sk_request_cmsg6_ttl(s
) < 0)
1310 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1311 if (sk_disable_mtu_disc6(s
) < 0)
1315 if (sk_set_ttl6(s
, s
->ttl
) < 0)
1319 if (sk_set_tos6(s
, s
->tos
) < 0)
1329 add_tail(&sock_list
, &s
->n
);
1333 sk_tcp_connected(sock
*s
)
1336 int sa_len
= sizeof(sa
);
1338 if ((getsockname(s
->fd
, &sa
.sa
, &sa_len
) < 0) ||
1339 (sockaddr_read(&sa
, s
->af
, &s
->saddr
, &s
->iface
, &s
->sport
) < 0))
1340 log(L_WARN
"SOCK: Cannot get local IP address for TCP>");
1348 sk_ssh_connected(sock
*s
)
1356 sk_passive_connected(sock
*s
, int type
)
1358 sockaddr loc_sa
, rem_sa
;
1359 int loc_sa_len
= sizeof(loc_sa
);
1360 int rem_sa_len
= sizeof(rem_sa
);
1362 int fd
= accept(s
->fd
, ((type
== SK_TCP
) ? &rem_sa
.sa
: NULL
), &rem_sa_len
);
1365 if ((errno
!= EINTR
) && (errno
!= EAGAIN
))
1366 s
->err_hook(s
, errno
);
1370 sock
*t
= sk_new(s
->pool
);
1376 t
->rbsize
= s
->rbsize
;
1377 t
->tbsize
= s
->tbsize
;
1381 if ((getsockname(fd
, &loc_sa
.sa
, &loc_sa_len
) < 0) ||
1382 (sockaddr_read(&loc_sa
, s
->af
, &t
->saddr
, &t
->iface
, &t
->sport
) < 0))
1383 log(L_WARN
"SOCK: Cannot get local IP address for TCP<");
1385 if (sockaddr_read(&rem_sa
, s
->af
, &t
->daddr
, &t
->iface
, &t
->dport
) < 0)
1386 log(L_WARN
"SOCK: Cannot get remote IP address for TCP<");
1389 if (sk_setup(t
) < 0)
1391 /* FIXME: Call err_hook instead ? */
1392 log(L_ERR
"SOCK: Incoming connection: %s%#m", t
->err
);
1394 /* FIXME: handle it better in rfree() */
1409 * Return SSH_OK or SSH_AGAIN or SSH_ERROR
1412 sk_ssh_connect(sock
*s
)
1414 s
->fd
= ssh_get_fd(s
->ssh
->session
);
1416 /* Big fall thru automata */
1417 switch (s
->ssh
->state
)
1419 case SK_SSH_CONNECT
:
1421 switch (ssh_connect(s
->ssh
->session
))
1424 /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
1425 * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
1426 * documented but our code relies on that.
1438 case SK_SSH_SERVER_KNOWN
:
1440 s
->ssh
->state
= SK_SSH_SERVER_KNOWN
;
1442 if (s
->ssh
->server_hostkey_path
)
1444 int server_identity_is_ok
= 1;
1446 /* Check server identity */
1447 switch (ssh_is_server_known(s
->ssh
->session
))
1449 #define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
1450 case SSH_SERVER_KNOWN_OK
:
1451 /* The server is known and has not changed. */
1454 case SSH_SERVER_NOT_KNOWN
:
1455 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server is unknown, its public key was not found in the known host file %s", s
->ssh
->server_hostkey_path
);
1458 case SSH_SERVER_KNOWN_CHANGED
:
1459 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server key has changed. Either you are under attack or the administrator changed the key.");
1460 server_identity_is_ok
= 0;
1463 case SSH_SERVER_FILE_NOT_FOUND
:
1464 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The known host file %s does not exist", s
->ssh
->server_hostkey_path
);
1465 server_identity_is_ok
= 0;
1468 case SSH_SERVER_ERROR
:
1469 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "Some error happened");
1470 server_identity_is_ok
= 0;
1473 case SSH_SERVER_FOUND_OTHER
:
1474 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server gave use a key of a type while we had an other type recorded. " \
1475 "It is a possible attack.");
1476 server_identity_is_ok
= 0;
1480 if (!server_identity_is_ok
)
1485 case SK_SSH_USERAUTH
:
1487 s
->ssh
->state
= SK_SSH_USERAUTH
;
1488 switch (ssh_userauth_publickey_auto(s
->ssh
->session
, NULL
, NULL
))
1490 case SSH_AUTH_AGAIN
:
1493 case SSH_AUTH_SUCCESS
:
1501 case SK_SSH_CHANNEL
:
1503 s
->ssh
->state
= SK_SSH_CHANNEL
;
1504 s
->ssh
->channel
= ssh_channel_new(s
->ssh
->session
);
1505 if (s
->ssh
->channel
== NULL
)
1509 case SK_SSH_SESSION
:
1511 s
->ssh
->state
= SK_SSH_SESSION
;
1512 switch (ssh_channel_open_session(s
->ssh
->channel
))
1525 case SK_SSH_SUBSYSTEM
:
1527 s
->ssh
->state
= SK_SSH_SUBSYSTEM
;
1528 if (s
->ssh
->subsystem
)
1530 switch (ssh_channel_request_subsystem(s
->ssh
->channel
, s
->ssh
->subsystem
))
1544 case SK_SSH_ESTABLISHED
:
1545 s
->ssh
->state
= SK_SSH_ESTABLISHED
;
1552 * Return file descriptor number if success
1553 * Return -1 if failed
1556 sk_open_ssh(sock
*s
)
1559 bug("sk_open() sock->ssh is not allocated");
1561 ssh_session sess
= ssh_new();
1563 ERR2("Cannot create a ssh session");
1564 s
->ssh
->session
= sess
;
1566 const int verbosity
= SSH_LOG_NOLOG
;
1567 ssh_options_set(sess
, SSH_OPTIONS_LOG_VERBOSITY
, &verbosity
);
1568 ssh_options_set(sess
, SSH_OPTIONS_HOST
, s
->host
);
1569 ssh_options_set(sess
, SSH_OPTIONS_PORT
, &(s
->dport
));
1570 /* TODO: Add SSH_OPTIONS_BINDADDR */
1571 ssh_options_set(sess
, SSH_OPTIONS_USER
, s
->ssh
->username
);
1573 if (s
->ssh
->server_hostkey_path
)
1574 ssh_options_set(sess
, SSH_OPTIONS_KNOWNHOSTS
, s
->ssh
->server_hostkey_path
);
1576 if (s
->ssh
->client_privkey_path
)
1577 ssh_options_set(sess
, SSH_OPTIONS_IDENTITY
, s
->ssh
->client_privkey_path
);
1579 ssh_set_blocking(sess
, 0);
1581 switch (sk_ssh_connect(s
))
1587 sk_ssh_connected(s
);
1591 ERR2(ssh_get_error(sess
));
1595 return ssh_get_fd(sess
);
1603 * sk_open - open a socket
1606 * This function takes a socket resource created by sk_new() and
1607 * initialized by the user and binds a corresponding network connection
1610 * Result: 0 for success, -1 for an error.
1619 ip_addr bind_addr
= IPA_NONE
;
1622 if (s
->type
<= SK_IP
)
1625 * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
1626 * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
1627 * But the specifications have to be consistent.
1633 ASSERT(ipa_zero(s
->saddr
) || ipa_zero(s
->daddr
) ||
1634 (ipa_is_ip4(s
->saddr
) == ipa_is_ip4(s
->daddr
)));
1635 af
= (ipa_is_ip4(s
->saddr
) || ipa_is_ip4(s
->daddr
)) ? AF_INET
: AF_INET6
;
1639 ASSERT(ipa_zero(s
->saddr
) || ipa_is_ip4(s
->saddr
));
1640 ASSERT(ipa_zero(s
->daddr
) || ipa_is_ip4(s
->daddr
));
1645 ASSERT(ipa_zero(s
->saddr
) || !ipa_is_ip4(s
->saddr
));
1646 ASSERT(ipa_zero(s
->daddr
) || !ipa_is_ip4(s
->daddr
));
1651 bug("Invalid subtype %d", s
->subtype
);
1658 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1660 case SK_TCP_PASSIVE
:
1661 fd
= socket(af
, SOCK_STREAM
, IPPROTO_TCP
);
1662 bind_port
= s
->sport
;
1663 bind_addr
= s
->saddr
;
1664 do_bind
= bind_port
|| ipa_nonzero(bind_addr
);
1669 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1670 fd
= sk_open_ssh(s
);
1675 fd
= socket(af
, SOCK_DGRAM
, IPPROTO_UDP
);
1676 bind_port
= s
->sport
;
1677 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1682 fd
= socket(af
, SOCK_RAW
, s
->dport
);
1684 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1685 do_bind
= ipa_nonzero(bind_addr
);
1694 bug("sk_open() called for invalid sock type %d", s
->type
);
1703 if (sk_setup(s
) < 0)
1712 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &y
, sizeof(y
)) < 0)
1713 ERR2("SO_REUSEADDR");
1715 #ifdef CONFIG_NO_IFACE_BIND
1716 /* Workaround missing ability to bind to an iface */
1717 if ((s
->type
== SK_UDP
) && s
->iface
&& ipa_zero(bind_addr
))
1719 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &y
, sizeof(y
)) < 0)
1720 ERR2("SO_REUSEPORT");
1725 if (s
->flags
& SKF_HIGH_PORT
)
1726 if (sk_set_high_port(s
) < 0)
1727 log(L_WARN
"Socket error: %s%#m", s
->err
);
1729 sockaddr_fill(&sa
, s
->af
, bind_addr
, s
->iface
, bind_port
);
1730 if (bind(fd
, &sa
.sa
, SA_LEN(sa
)) < 0)
1735 if (sk_set_md5_auth(s
, s
->saddr
, s
->daddr
, s
->iface
, s
->password
, 0) < 0)
1741 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1742 if (connect(fd
, &sa
.sa
, SA_LEN(sa
)) >= 0)
1743 sk_tcp_connected(s
);
1744 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
&&
1745 errno
!= ECONNREFUSED
&& errno
!= EHOSTUNREACH
&& errno
!= ENETUNREACH
)
1749 case SK_TCP_PASSIVE
:
1750 if (listen(fd
, 8) < 0)
1762 if (!(s
->flags
& SKF_THREAD
))
1774 sk_open_unix(sock
*s
, char *name
)
1776 struct sockaddr_un sa
;
1779 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1781 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1785 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1788 /* Path length checked in test_old_bird() */
1789 sa
.sun_family
= AF_UNIX
;
1790 strcpy(sa
.sun_path
, name
);
1792 if (bind(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) < 0)
1795 if (listen(fd
, 8) < 0)
1804 #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1805 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1806 #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1809 sk_prepare_cmsgs(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
1812 sk_prepare_cmsgs4(s
, msg
, cbuf
, cbuflen
);
1814 sk_prepare_cmsgs6(s
, msg
, cbuf
, cbuflen
);
1818 sk_process_cmsgs(sock
*s
, struct msghdr
*msg
)
1822 s
->laddr
= IPA_NONE
;
1826 for (cm
= CMSG_FIRSTHDR(msg
); cm
!= NULL
; cm
= CMSG_NXTHDR(msg
, cm
))
1828 if ((cm
->cmsg_level
== SOL_IP
) && sk_is_ipv4(s
))
1830 sk_process_cmsg4_pktinfo(s
, cm
);
1831 sk_process_cmsg4_ttl(s
, cm
);
1834 if ((cm
->cmsg_level
== SOL_IPV6
) && sk_is_ipv6(s
))
1836 sk_process_cmsg6_pktinfo(s
, cm
);
1837 sk_process_cmsg6_ttl(s
, cm
);
1846 struct iovec iov
= {s
->tbuf
, s
->tpos
- s
->tbuf
};
1847 byte cmsg_buf
[CMSG_TX_SPACE
];
1850 sockaddr_fill(&dst
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1852 struct msghdr msg
= {
1853 .msg_name
= &dst
.sa
,
1854 .msg_namelen
= SA_LEN(dst
),
1859 #ifdef CONFIG_USE_HDRINCL
1861 struct iovec iov2
[2] = { {hdr
, 20}, iov
};
1863 if (s
->flags
& SKF_HDRINCL
)
1865 sk_prepare_ip_header(s
, hdr
, iov
.iov_len
);
1871 if (s
->flags
& SKF_PKTINFO
)
1872 sk_prepare_cmsgs(s
, &msg
, cmsg_buf
, sizeof(cmsg_buf
));
1874 return sendmsg(s
->fd
, &msg
, 0);
1880 struct iovec iov
= {s
->rbuf
, s
->rbsize
};
1881 byte cmsg_buf
[CMSG_RX_SPACE
];
1884 struct msghdr msg
= {
1885 .msg_name
= &src
.sa
,
1886 .msg_namelen
= sizeof(src
), // XXXX ??
1889 .msg_control
= cmsg_buf
,
1890 .msg_controllen
= sizeof(cmsg_buf
),
1894 int rv
= recvmsg(s
->fd
, &msg
, 0);
1899 // if (cf_type == SK_IP)
1900 // rv = ipv4_skip_header(pbuf, rv);
1903 sockaddr_read(&src
, s
->af
, &s
->faddr
, NULL
, &s
->fport
);
1904 sk_process_cmsgs(s
, &msg
);
1906 if (msg
.msg_flags
& MSG_TRUNC
)
1907 s
->flags
|= SKF_TRUNCATED
;
1909 s
->flags
&= ~SKF_TRUNCATED
;
1915 static inline void reset_tx_buffer(sock
*s
) { s
->ttx
= s
->tpos
= s
->tbuf
; }
1918 sk_maybe_write(sock
*s
)
1927 while (s
->ttx
!= s
->tpos
)
1929 e
= write(s
->fd
, s
->ttx
, s
->tpos
- s
->ttx
);
1933 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1936 /* EPIPE is just a connection close notification during TX */
1937 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1949 while (s
->ttx
!= s
->tpos
)
1951 e
= ssh_channel_write(s
->ssh
->channel
, s
->ttx
, s
->tpos
- s
->ttx
);
1955 s
->err
= ssh_get_error(s
->ssh
->session
);
1956 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
1959 /* EPIPE is just a connection close notification during TX */
1960 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1972 if (s
->tbuf
== s
->tpos
)
1979 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1982 s
->err_hook(s
, errno
);
1995 bug("sk_maybe_write: unknown socket type %d", s
->type
);
2000 sk_rx_ready(sock
*s
)
2003 struct pollfd pfd
= { .fd
= s
->fd
};
2004 pfd
.events
|= POLLIN
;
2007 rv
= poll(&pfd
, 1, 0);
2009 if ((rv
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
2016 * sk_send - send data to a socket
2018 * @len: number of bytes to send
2020 * This function sends @len bytes of data prepared in the
2021 * transmit buffer of the socket @s to the network connection.
2022 * If the packet can be sent immediately, it does so and returns
2023 * 1, else it queues the packet for later processing, returns 0
2024 * and calls the @tx_hook of the socket when the tranmission
2028 sk_send(sock
*s
, unsigned len
)
2031 s
->tpos
= s
->tbuf
+ len
;
2032 return sk_maybe_write(s
);
2036 * sk_send_to - send data to a specific destination
2038 * @len: number of bytes to send
2039 * @addr: IP address to send the packet to
2040 * @port: port to send the packet to
2042 * This is a sk_send() replacement for connection-less packet sockets
2043 * which allows destination of the packet to be chosen dynamically.
2044 * Raw IP sockets should use 0 for @port.
2047 sk_send_to(sock
*s
, unsigned len
, ip_addr addr
, unsigned port
)
2054 s
->tpos
= s
->tbuf
+ len
;
2055 return sk_maybe_write(s
);
2060 sk_send_full(sock *s, unsigned len, struct iface *ifa,
2061 ip_addr saddr, ip_addr daddr, unsigned dport)
2068 s->tpos = s->tbuf + len;
2069 return sk_maybe_write(s);
2074 call_rx_hook(sock
*s
, int size
)
2076 if (s
->rx_hook(s
, size
))
2078 /* We need to be careful since the socket could have been deleted by the hook */
2079 if (current_sock
== s
)
2086 sk_read_ssh(sock
*s
)
2088 ssh_channel rchans
[2] = { s
->ssh
->channel
, NULL
};
2089 struct timeval timev
= { 1, 0 };
2091 if (ssh_channel_select(rchans
, NULL
, NULL
, &timev
) == SSH_EINTR
)
2092 return 1; /* Try again */
2094 if (ssh_channel_is_eof(s
->ssh
->channel
) != 0)
2096 /* The remote side is closing the connection */
2101 if (rchans
[0] == NULL
)
2102 return 0; /* No data is available on the socket */
2104 const uint used_bytes
= s
->rpos
- s
->rbuf
;
2105 const int read_bytes
= ssh_channel_read_nonblocking(s
->ssh
->channel
, s
->rpos
, s
->rbsize
- used_bytes
, 0);
2109 s
->rpos
+= read_bytes
;
2110 call_rx_hook(s
, used_bytes
+ read_bytes
);
2113 else if (read_bytes
== 0)
2115 if (ssh_channel_is_eof(s
->ssh
->channel
) != 0)
2117 /* The remote side is closing the connection */
2123 s
->err
= ssh_get_error(s
->ssh
->session
);
2124 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
2127 return 0; /* No data is available on the socket */
2131 /* sk_read() and sk_write() are called from BFD's event loop */
2134 sk_read(sock
*s
, int revents
)
2138 case SK_TCP_PASSIVE
:
2139 return sk_passive_connected(s
, SK_TCP
);
2141 case SK_UNIX_PASSIVE
:
2142 return sk_passive_connected(s
, SK_UNIX
);
2147 int c
= read(s
->fd
, s
->rpos
, s
->rbuf
+ s
->rbsize
- s
->rpos
);
2151 if (errno
!= EINTR
&& errno
!= EAGAIN
)
2152 s
->err_hook(s
, errno
);
2153 else if (errno
== EAGAIN
&& !(revents
& POLLIN
))
2155 log(L_ERR
"Got EAGAIN from read when revents=%x (without POLLIN)", revents
);
2164 call_rx_hook(s
, s
->rpos
- s
->rbuf
);
2172 return sk_read_ssh(s
);
2176 return s
->rx_hook(s
, 0);
2180 int e
= sk_recvmsg(s
);
2184 if (errno
!= EINTR
&& errno
!= EAGAIN
)
2185 s
->err_hook(s
, errno
);
2189 s
->rpos
= s
->rbuf
+ e
;
2204 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
2206 if (connect(s
->fd
, &sa
.sa
, SA_LEN(sa
)) >= 0 || errno
== EISCONN
)
2207 sk_tcp_connected(s
);
2208 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
)
2209 s
->err_hook(s
, errno
);
2216 switch (sk_ssh_connect(s
))
2219 sk_ssh_connected(s
);
2226 s
->err
= ssh_get_error(s
->ssh
->session
);
2227 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
2235 if (s
->ttx
!= s
->tpos
&& sk_maybe_write(s
) > 0)
2245 int sk_is_ipv4(sock
*s
)
2246 { return s
->af
== AF_INET
; }
2248 int sk_is_ipv6(sock
*s
)
2249 { return s
->af
== AF_INET6
; }
2252 sk_err(sock
*s
, int revents
)
2254 int se
= 0, sse
= sizeof(se
);
2255 if ((s
->type
!= SK_MAGIC
) && (revents
& POLLERR
))
2256 if (getsockopt(s
->fd
, SOL_SOCKET
, SO_ERROR
, &se
, &sse
) < 0)
2258 log(L_ERR
"IO: Socket error: SO_ERROR: %m");
2271 debug("Open sockets:\n");
2272 WALK_LIST(n
, sock_list
)
2274 s
= SKIP_BACK(sock
, n
, n
);
2283 * Internal event log and watchdog
2286 #define EVENT_LOG_LENGTH 32
2288 struct event_log_entry
2296 static struct event_log_entry event_log
[EVENT_LOG_LENGTH
];
2297 static struct event_log_entry
*event_open
;
2298 static int event_log_pos
, event_log_num
, watchdog_active
;
2299 static btime last_time
;
2300 static btime loop_time
;
2303 io_update_time(void)
2308 if (!clock_monotonic_available
)
2312 * This is third time-tracking procedure (after update_times() above and
2313 * times_update() in BFD), dedicated to internal event log and latency
2314 * tracking. Hopefully, we consolidate these sometimes.
2317 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
2319 die("clock_gettime: %m");
2321 last_time
= ((s64
) ts
.tv_sec S
) + (ts
.tv_nsec
/ 1000);
2325 event_open
->duration
= last_time
- event_open
->timestamp
;
2327 if (event_open
->duration
> config
->latency_limit
)
2328 log(L_WARN
"Event 0x%p 0x%p took %d ms",
2329 event_open
->hook
, event_open
->data
, (int) (event_open
->duration TO_MS
));
2336 * io_log_event - mark approaching event into event log
2337 * @hook: event hook address
2338 * @data: event data address
2340 * Store info (hook, data, timestamp) about the following internal event into
2341 * a circular event log (@event_log). When latency tracking is enabled, the log
2342 * entry is kept open (in @event_open) so the duration can be filled later.
2345 io_log_event(void *hook
, void *data
)
2347 if (config
->latency_debug
)
2350 struct event_log_entry
*en
= event_log
+ event_log_pos
;
2354 en
->timestamp
= last_time
;
2359 event_log_pos
%= EVENT_LOG_LENGTH
;
2361 event_open
= config
->latency_debug
? en
: NULL
;
2365 io_close_event(void)
2376 log(L_DEBUG
"Event log:");
2377 for (i
= 0; i
< EVENT_LOG_LENGTH
; i
++)
2379 struct event_log_entry
*en
= event_log
+ (event_log_pos
+ i
) % EVENT_LOG_LENGTH
;
2381 log(L_DEBUG
" Event 0x%p 0x%p at %8d for %d ms", en
->hook
, en
->data
,
2382 (int) ((last_time
- en
->timestamp
) TO_MS
), (int) (en
->duration TO_MS
));
2387 watchdog_sigalrm(int sig UNUSED
)
2389 /* Update last_time and duration, but skip latency check */
2390 config
->latency_limit
= 0xffffffff;
2393 /* We want core dump */
2398 watchdog_start1(void)
2402 loop_time
= last_time
;
2406 watchdog_start(void)
2410 loop_time
= last_time
;
2413 if (config
->watchdog_timeout
)
2415 alarm(config
->watchdog_timeout
);
2416 watchdog_active
= 1;
2425 if (watchdog_active
)
2428 watchdog_active
= 0;
2431 btime duration
= last_time
- loop_time
;
2432 if (duration
> config
->watchdog_warning
)
2433 log(L_WARN
"I/O loop cycle took %d ms for %d events",
2434 (int) (duration TO_MS
), event_log_num
);
2442 volatile int async_config_flag
; /* Asynchronous reconfiguration/dump scheduled */
2443 volatile int async_dump_flag
;
2444 volatile int async_shutdown_flag
;
2449 init_list(&near_timers
);
2450 init_list(&far_timers
);
2451 init_list(&sock_list
);
2452 init_list(&global_event_list
);
2457 srandom((int) now_real
);
2460 static int short_loops
= 0;
2461 #define SHORT_LOOP_MAX 10
2468 int nfds
, events
, pout
;
2472 struct pollfd
*pfd
= xmalloc(fdmax
* sizeof(struct pollfd
));
2477 events
= ev_run_list(&global_event_list
);
2480 tout
= tm_first_shot();
2486 poll_tout
= (events
? 0 : MIN(tout
- now
, 3)) * 1000; /* Time in milliseconds */
2491 WALK_LIST(n
, sock_list
)
2493 pfd
[nfds
] = (struct pollfd
) { .fd
= -1 }; /* everything other set to 0 by this */
2494 s
= SKIP_BACK(sock
, n
, n
);
2497 pfd
[nfds
].fd
= s
->fd
;
2498 pfd
[nfds
].events
|= POLLIN
;
2500 if (s
->tx_hook
&& s
->ttx
!= s
->tpos
)
2502 pfd
[nfds
].fd
= s
->fd
;
2503 pfd
[nfds
].events
|= POLLOUT
;
2505 if (pfd
[nfds
].fd
!= -1)
2516 pfd
= xrealloc(pfd
, fdmax
* sizeof(struct pollfd
));
2521 * Yes, this is racy. But even if the signal comes before this test
2522 * and entering poll(), it gets caught on the next timer tick.
2525 if (async_config_flag
)
2527 io_log_event(async_config
, NULL
);
2529 async_config_flag
= 0;
2532 if (async_dump_flag
)
2534 io_log_event(async_dump
, NULL
);
2536 async_dump_flag
= 0;
2539 if (async_shutdown_flag
)
2541 io_log_event(async_shutdown
, NULL
);
2543 async_shutdown_flag
= 0;
2547 /* And finally enter poll() to find active sockets */
2549 pout
= poll(pfd
, nfds
, poll_tout
);
2554 if (errno
== EINTR
|| errno
== EAGAIN
)
2560 /* guaranteed to be non-empty */
2561 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2563 while (current_sock
)
2565 sock
*s
= current_sock
;
2568 current_sock
= sk_next(s
);
2576 if (s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2580 io_log_event(s
->rx_hook
, s
->data
);
2581 e
= sk_read(s
, pfd
[s
->index
].revents
);
2582 if (s
!= current_sock
)
2585 while (e
&& s
->rx_hook
&& steps
);
2588 if (pfd
[s
->index
].revents
& POLLOUT
)
2592 io_log_event(s
->tx_hook
, s
->data
);
2594 if (s
!= current_sock
)
2599 current_sock
= sk_next(s
);
2604 if (events
&& (short_loops
< SHORT_LOOP_MAX
))
2609 current_sock
= stored_sock
;
2610 if (current_sock
== NULL
)
2611 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2613 while (current_sock
&& count
< MAX_RX_STEPS
)
2615 sock
*s
= current_sock
;
2618 current_sock
= sk_next(s
);
2622 if (!s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2625 io_log_event(s
->rx_hook
, s
->data
);
2626 sk_read(s
, pfd
[s
->index
].revents
);
2627 if (s
!= current_sock
)
2631 if (pfd
[s
->index
].revents
& (POLLHUP
| POLLERR
))
2633 sk_err(s
, pfd
[s
->index
].revents
);
2637 current_sock
= sk_next(s
);
2642 stored_sock
= current_sock
;
2648 test_old_bird(char *path
)
2651 struct sockaddr_un sa
;
2653 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
2655 die("Cannot create socket: %m");
2656 if (strlen(path
) >= sizeof(sa
.sun_path
))
2657 die("Socket path too long");
2658 bzero(&sa
, sizeof(sa
));
2659 sa
.sun_family
= AF_UNIX
;
2660 strcpy(sa
.sun_path
, path
);
2661 if (connect(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) == 0)
2662 die("I found another BIRD running.");