2 * BIRD Internet Routing Daemon -- Unix I/O
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
7 * Can be freely distributed and used under the terms of the GNU GPL.
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
20 #include <sys/types.h>
21 #include <sys/socket.h>
29 #include <netinet/in.h>
30 #include <netinet/tcp.h>
31 #include <netinet/udp.h>
32 #include <netinet/icmp6.h>
34 #include "nest/bird.h"
35 #include "lib/lists.h"
36 #include "lib/resource.h"
37 #include "sysdep/unix/timer.h"
38 #include "lib/socket.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "nest/iface.h"
43 #include "sysdep/unix/unix.h"
44 #include CONFIG_INCLUDE_SYSIO_H
46 /* Maximum number of calls of tx handler for one socket in one
47 * poll iteration. Should be small enough to not monopolize CPU by
48 * one protocol instance.
52 /* Maximum number of calls of rx handler for all sockets in one poll
53 iteration. RX callbacks are often much more costly so we limit
54 this to gen small latencies */
55 #define MAX_RX_STEPS 4
69 struct rfile
*a
= (struct rfile
*) r
;
77 struct rfile
*a
= (struct rfile
*) r
;
79 debug("(FILE *%p)\n", a
->f
);
82 static struct resclass rf_class
= {
92 tracked_fopen(pool
*p
, char *name
, char *mode
)
94 FILE *f
= fopen(name
, mode
);
98 struct rfile
*r
= ralloc(p
, &rf_class
);
107 * Timers are resources which represent a wish of a module to call
108 * a function at the specified time. The platform dependent code
109 * doesn't guarantee exact timing, only that a timer function
110 * won't be called before the requested time.
112 * In BIRD, time is represented by values of the &bird_clock_t type
113 * which are integral numbers interpreted as a relative number of seconds since
114 * some fixed time point in past. The current time can be read
115 * from variable @now with reasonable accuracy and is monotonic. There is also
116 * a current 'absolute' time in variable @now_real reported by OS.
118 * Each timer is described by a &timer structure containing a pointer
119 * to the handler function (@hook), data private to this function (@data),
120 * time the function should be called at (@expires, 0 for inactive timers),
121 * for the other fields see |timer.h|.
124 #define NEAR_TIMER_LIMIT 4
126 static list near_timers
, far_timers
;
127 static bird_clock_t first_far_timer
= TIME_INFINITY
;
129 /* now must be different from 0, because 0 is a special value in timer->expires */
130 bird_clock_t now
= 1, now_real
, boot_time
;
133 update_times_plain(void)
135 bird_clock_t new_time
= time(NULL
);
136 int delta
= new_time
- now_real
;
138 if ((delta
>= 0) && (delta
< 60))
140 else if (now_real
!= 0)
141 log(L_WARN
"Time jump, delta %d s", delta
);
147 update_times_gettime(void)
152 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
154 die("clock_gettime: %m");
156 if (ts
.tv_sec
!= now
) {
158 log(L_ERR
"Monotonic timer is broken");
161 now_real
= time(NULL
);
165 static int clock_monotonic_available
;
170 if (clock_monotonic_available
)
171 update_times_gettime();
173 update_times_plain();
180 clock_monotonic_available
= (clock_gettime(CLOCK_MONOTONIC
, &ts
) == 0);
181 if (!clock_monotonic_available
)
182 log(L_WARN
"Monotonic timer is missing");
189 timer
*t
= (timer
*) r
;
197 timer
*t
= (timer
*) r
;
199 debug("(code %p, data %p, ", t
->hook
, t
->data
);
201 debug("rand %d, ", t
->randomize
);
203 debug("recur %d, ", t
->recurrent
);
205 debug("expires in %d sec)\n", t
->expires
- now
);
207 debug("inactive)\n");
210 static struct resclass tm_class
= {
220 * tm_new - create a timer
223 * This function creates a new timer resource and returns
224 * a pointer to it. To use the timer, you need to fill in
225 * the structure fields and call tm_start() to start timing.
230 timer
*t
= ralloc(p
, &tm_class
);
235 tm_insert_near(timer
*t
)
237 node
*n
= HEAD(near_timers
);
239 while (n
->next
&& (SKIP_BACK(timer
, n
, n
)->expires
< t
->expires
))
241 insert_node(&t
->n
, n
->prev
);
245 * tm_start - start a timer
247 * @after: number of seconds the timer should be run after
249 * This function schedules the hook function of the timer to
250 * be called after @after seconds. If the timer has been already
251 * started, it's @expire time is replaced by the new value.
253 * You can have set the @randomize field of @t, the timeout
254 * will be increased by a random number of seconds chosen
255 * uniformly from range 0 .. @randomize.
257 * You can call tm_start() from the handler function of the timer
258 * to request another run of the timer. Also, you can set the @recurrent
259 * field to have the timer re-added automatically with the same timeout.
262 tm_start(timer
*t
, unsigned after
)
267 after
+= random() % (t
->randomize
+ 1);
269 if (t
->expires
== when
)
274 if (after
<= NEAR_TIMER_LIMIT
)
278 if (!first_far_timer
|| first_far_timer
> when
)
279 first_far_timer
= when
;
280 add_tail(&far_timers
, &t
->n
);
285 * tm_stop - stop a timer
288 * This function stops a timer. If the timer is already stopped,
302 tm_dump_them(char *name
, list
*l
)
307 debug("%s timers:\n", name
);
310 t
= SKIP_BACK(timer
, n
, n
);
320 tm_dump_them("Near", &near_timers
);
321 tm_dump_them("Far", &far_timers
);
327 time_t x
= first_far_timer
;
329 if (!EMPTY_LIST(near_timers
))
331 timer
*t
= SKIP_BACK(timer
, n
, HEAD(near_timers
));
338 void io_log_event(void *hook
, void *data
);
346 if (first_far_timer
<= now
)
348 bird_clock_t limit
= now
+ NEAR_TIMER_LIMIT
;
349 first_far_timer
= TIME_INFINITY
;
350 n
= HEAD(far_timers
);
353 t
= SKIP_BACK(timer
, n
, n
);
354 if (t
->expires
<= limit
)
359 else if (t
->expires
< first_far_timer
)
360 first_far_timer
= t
->expires
;
364 while ((n
= HEAD(near_timers
)) -> next
)
367 t
= SKIP_BACK(timer
, n
, n
);
368 if (t
->expires
> now
)
371 delay
= t
->expires
- now
;
375 int i
= t
->recurrent
- delay
;
380 io_log_event(t
->hook
, t
->data
);
386 * tm_parse_datetime - parse a date and time
387 * @x: datetime string
389 * tm_parse_datetime() takes a textual representation of
390 * a date and time (dd-mm-yyyy hh:mm:ss)
391 * and converts it to the corresponding value of type &bird_clock_t.
394 tm_parse_datetime(char *x
)
400 if (sscanf(x
, "%d-%d-%d %d:%d:%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &tm
.tm_hour
, &tm
.tm_min
, &tm
.tm_sec
, &n
) != 6 || x
[n
])
401 return tm_parse_date(x
);
405 if (t
== (time_t) -1)
410 * tm_parse_date - parse a date
413 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
414 * and converts it to the corresponding value of type &bird_clock_t.
417 tm_parse_date(char *x
)
423 if (sscanf(x
, "%d-%d-%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &n
) != 3 || x
[n
])
427 tm
.tm_hour
= tm
.tm_min
= tm
.tm_sec
= 0;
429 if (t
== (time_t) -1)
435 tm_format_reltime(char *x
, struct tm
*tm
, bird_clock_t delta
)
437 static char *month_names
[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
438 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
441 bsprintf(x
, "%02d:%02d", tm
->tm_hour
, tm
->tm_min
);
442 else if (delta
< 360*86400)
443 bsprintf(x
, "%s%02d", month_names
[tm
->tm_mon
], tm
->tm_mday
);
445 bsprintf(x
, "%d", tm
->tm_year
+1900);
448 #include "conf/conf.h"
451 * tm_format_datetime - convert date and time to textual representation
452 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
453 * @fmt_spec: specification of resulting textual representation of the time
456 * This function formats the given relative time value @t to a textual
457 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
460 tm_format_datetime(char *x
, struct timeformat
*fmt_spec
, bird_clock_t t
)
462 const char *fmt_used
;
464 bird_clock_t delta
= now
- t
;
465 t
= now_real
- delta
;
468 if (fmt_spec
->fmt1
== NULL
)
469 return tm_format_reltime(x
, tm
, delta
);
471 if ((fmt_spec
->limit
== 0) || (delta
< fmt_spec
->limit
))
472 fmt_used
= fmt_spec
->fmt1
;
474 fmt_used
= fmt_spec
->fmt2
;
476 int rv
= strftime(x
, TM_DATETIME_BUFFER_SIZE
, fmt_used
, tm
);
477 if (((rv
== 0) && fmt_used
[0]) || (rv
== TM_DATETIME_BUFFER_SIZE
))
478 strcpy(x
, "<too-long>");
485 * Socket resources represent network connections. Their data structure (&socket)
486 * contains a lot of fields defining the exact type of the socket, the local and
487 * remote addresses and ports, pointers to socket buffers and finally pointers to
488 * hook functions to be called when new data have arrived to the receive buffer
489 * (@rx_hook), when the contents of the transmit buffer have been transmitted
490 * (@tx_hook) and when an error or connection close occurs (@err_hook).
492 * Freeing of sockets from inside socket hooks is perfectly safe.
496 #define SOL_IP IPPROTO_IP
500 #define SOL_IPV6 IPPROTO_IPV6
504 #define SOL_ICMPV6 IPPROTO_ICMPV6
509 * Sockaddr helper functions
512 static inline int UNUSED
sockaddr_length(int af
)
513 { return (af
== AF_INET
) ? sizeof(struct sockaddr_in
) : sizeof(struct sockaddr_in6
); }
516 sockaddr_fill4(struct sockaddr_in
*sa
, ip_addr a
, uint port
)
518 memset(sa
, 0, sizeof(struct sockaddr_in
));
519 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
520 sa
->sin_len
= sizeof(struct sockaddr_in
);
522 sa
->sin_family
= AF_INET
;
523 sa
->sin_port
= htons(port
);
524 sa
->sin_addr
= ipa_to_in4(a
);
528 sockaddr_fill6(struct sockaddr_in6
*sa
, ip_addr a
, struct iface
*ifa
, uint port
)
530 memset(sa
, 0, sizeof(struct sockaddr_in6
));
532 sa
->sin6_len
= sizeof(struct sockaddr_in6
);
534 sa
->sin6_family
= AF_INET6
;
535 sa
->sin6_port
= htons(port
);
536 sa
->sin6_flowinfo
= 0;
537 sa
->sin6_addr
= ipa_to_in6(a
);
539 if (ifa
&& ipa_is_link_local(a
))
540 sa
->sin6_scope_id
= ifa
->index
;
544 sockaddr_fill(sockaddr
*sa
, int af
, ip_addr a
, struct iface
*ifa
, uint port
)
547 sockaddr_fill4((struct sockaddr_in
*) sa
, a
, port
);
548 else if (af
== AF_INET6
)
549 sockaddr_fill6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
555 sockaddr_read4(struct sockaddr_in
*sa
, ip_addr
*a
, uint
*port
)
557 *port
= ntohs(sa
->sin_port
);
558 *a
= ipa_from_in4(sa
->sin_addr
);
562 sockaddr_read6(struct sockaddr_in6
*sa
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
564 *port
= ntohs(sa
->sin6_port
);
565 *a
= ipa_from_in6(sa
->sin6_addr
);
567 if (ifa
&& ipa_is_link_local(*a
))
568 *ifa
= if_find_by_index(sa
->sin6_scope_id
);
572 sockaddr_read(sockaddr
*sa
, int af
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
574 if (sa
->sa
.sa_family
!= af
)
578 sockaddr_read4((struct sockaddr_in
*) sa
, a
, port
);
579 else if (af
== AF_INET6
)
580 sockaddr_read6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
594 * IPv6 multicast syscalls
597 /* Fortunately standardized in RFC 3493 */
599 #define INIT_MREQ6(maddr,ifa) \
600 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
603 sk_setup_multicast6(sock
*s
)
605 int index
= s
->iface
->index
;
609 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_IF
, &index
, sizeof(index
)) < 0)
610 ERR("IPV6_MULTICAST_IF");
612 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
613 ERR("IPV6_MULTICAST_HOPS");
615 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_LOOP
, &n
, sizeof(n
)) < 0)
616 ERR("IPV6_MULTICAST_LOOP");
622 sk_join_group6(sock
*s
, ip_addr maddr
)
624 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
626 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_JOIN_GROUP
, &mr
, sizeof(mr
)) < 0)
627 ERR("IPV6_JOIN_GROUP");
633 sk_leave_group6(sock
*s
, ip_addr maddr
)
635 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
637 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_LEAVE_GROUP
, &mr
, sizeof(mr
)) < 0)
638 ERR("IPV6_LEAVE_GROUP");
645 * IPv6 packet control messages
648 /* Also standardized, in RFC 3542 */
651 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
652 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
653 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
654 * RFC and we use IPV6_PKTINFO.
656 #ifndef IPV6_RECVPKTINFO
657 #define IPV6_RECVPKTINFO IPV6_PKTINFO
660 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
662 #ifndef IPV6_RECVHOPLIMIT
663 #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
667 #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
668 #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
671 sk_request_cmsg6_pktinfo(sock
*s
)
675 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVPKTINFO
, &y
, sizeof(y
)) < 0)
676 ERR("IPV6_RECVPKTINFO");
682 sk_request_cmsg6_ttl(sock
*s
)
686 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVHOPLIMIT
, &y
, sizeof(y
)) < 0)
687 ERR("IPV6_RECVHOPLIMIT");
693 sk_process_cmsg6_pktinfo(sock
*s
, struct cmsghdr
*cm
)
695 if (cm
->cmsg_type
== IPV6_PKTINFO
)
697 struct in6_pktinfo
*pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
698 s
->laddr
= ipa_from_in6(pi
->ipi6_addr
);
699 s
->lifindex
= pi
->ipi6_ifindex
;
704 sk_process_cmsg6_ttl(sock
*s
, struct cmsghdr
*cm
)
706 if (cm
->cmsg_type
== IPV6_HOPLIMIT
)
707 s
->rcv_ttl
= * (int *) CMSG_DATA(cm
);
711 sk_prepare_cmsgs6(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
714 struct in6_pktinfo
*pi
;
717 msg
->msg_control
= cbuf
;
718 msg
->msg_controllen
= cbuflen
;
720 cm
= CMSG_FIRSTHDR(msg
);
721 cm
->cmsg_level
= SOL_IPV6
;
722 cm
->cmsg_type
= IPV6_PKTINFO
;
723 cm
->cmsg_len
= CMSG_LEN(sizeof(*pi
));
724 controllen
+= CMSG_SPACE(sizeof(*pi
));
726 pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
727 pi
->ipi6_ifindex
= s
->iface
? s
->iface
->index
: 0;
728 pi
->ipi6_addr
= ipa_to_in6(s
->saddr
);
730 msg
->msg_controllen
= controllen
;
735 * Miscellaneous socket syscalls
739 sk_set_ttl4(sock
*s
, int ttl
)
741 if (setsockopt(s
->fd
, SOL_IP
, IP_TTL
, &ttl
, sizeof(ttl
)) < 0)
748 sk_set_ttl6(sock
*s
, int ttl
)
750 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_UNICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
751 ERR("IPV6_UNICAST_HOPS");
757 sk_set_tos4(sock
*s
, int tos
)
759 if (setsockopt(s
->fd
, SOL_IP
, IP_TOS
, &tos
, sizeof(tos
)) < 0)
766 sk_set_tos6(sock
*s
, int tos
)
768 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_TCLASS
, &tos
, sizeof(tos
)) < 0)
775 sk_set_high_port(sock
*s UNUSED
)
777 /* Port range setting is optional, ignore it if not supported */
782 int range
= IP_PORTRANGE_HIGH
;
783 if (setsockopt(s
->fd
, SOL_IP
, IP_PORTRANGE
, &range
, sizeof(range
)) < 0)
788 #ifdef IPV6_PORTRANGE
791 int range
= IPV6_PORTRANGE_HIGH
;
792 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_PORTRANGE
, &range
, sizeof(range
)) < 0)
793 ERR("IPV6_PORTRANGE");
801 sk_skip_ip_header(byte
*pkt
, int *len
)
803 if ((*len
< 20) || ((*pkt
& 0xf0) != 0x40))
806 int hlen
= (*pkt
& 0x0f) * 4;
807 if ((hlen
< 20) || (hlen
> *len
))
815 sk_rx_buffer(sock
*s
, int *len
)
817 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
))
818 return sk_skip_ip_header(s
->rbuf
, len
);
825 * Public socket functions
829 * sk_setup_multicast - enable multicast for given socket
832 * Prepare transmission of multicast packets for given datagram socket.
833 * The socket must have defined @iface.
835 * Result: 0 for success, -1 for an error.
839 sk_setup_multicast(sock
*s
)
844 return sk_setup_multicast4(s
);
846 return sk_setup_multicast6(s
);
850 * sk_join_group - join multicast group for given socket
852 * @maddr: multicast address
854 * Join multicast group for given datagram socket and associated interface.
855 * The socket must have defined @iface.
857 * Result: 0 for success, -1 for an error.
861 sk_join_group(sock
*s
, ip_addr maddr
)
864 return sk_join_group4(s
, maddr
);
866 return sk_join_group6(s
, maddr
);
870 * sk_leave_group - leave multicast group for given socket
872 * @maddr: multicast address
874 * Leave multicast group for given datagram socket and associated interface.
875 * The socket must have defined @iface.
877 * Result: 0 for success, -1 for an error.
881 sk_leave_group(sock
*s
, ip_addr maddr
)
884 return sk_leave_group4(s
, maddr
);
886 return sk_leave_group6(s
, maddr
);
890 * sk_setup_broadcast - enable broadcast for given socket
893 * Allow reception and transmission of broadcast packets for given datagram
894 * socket. The socket must have defined @iface. For transmission, packets should
895 * be send to @brd address of @iface.
897 * Result: 0 for success, -1 for an error.
901 sk_setup_broadcast(sock
*s
)
905 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BROADCAST
, &y
, sizeof(y
)) < 0)
912 * sk_set_ttl - set transmit TTL for given socket
916 * Set TTL for already opened connections when TTL was not set before. Useful
917 * for accepted connections when different ones should have different TTL.
919 * Result: 0 for success, -1 for an error.
923 sk_set_ttl(sock
*s
, int ttl
)
928 return sk_set_ttl4(s
, ttl
);
930 return sk_set_ttl6(s
, ttl
);
934 * sk_set_min_ttl - set minimal accepted TTL for given socket
938 * Set minimal accepted TTL for given socket. Can be used for TTL security.
941 * Result: 0 for success, -1 for an error.
945 sk_set_min_ttl(sock
*s
, int ttl
)
948 return sk_set_min_ttl4(s
, ttl
);
950 return sk_set_min_ttl6(s
, ttl
);
955 * sk_set_md5_auth - add / remove MD5 security association for given socket
957 * @local: IP address of local side
958 * @remote: IP address of remote side
959 * @ifa: Interface for link-local IP address
960 * @passwd: Password used for MD5 authentication
961 * @setkey: Update also system SA/SP database
963 * In TCP MD5 handling code in kernel, there is a set of security associations
964 * used for choosing password and other authentication parameters according to
965 * the local and remote address. This function is useful for listening socket,
966 * for active sockets it may be enough to set s->password field.
968 * When called with passwd != NULL, the new pair is added,
969 * When called with passwd == NULL, the existing pair is removed.
971 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
972 * stored in global SA/SP database (but the behavior also must be enabled on
973 * per-socket basis). In case of multiple sockets to the same neighbor, the
974 * socket-specific state must be configured for each socket while global state
975 * just once per src-dst pair. The @setkey argument controls whether the global
976 * state (SA/SP database) is also updated.
978 * Result: 0 for success, -1 for an error.
982 sk_set_md5_auth(sock
*s
, ip_addr local
, ip_addr remote
, struct iface
*ifa
, char *passwd
, int setkey
)
987 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
991 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
992 * kernel will automatically fill it for outgoing packets and check it for
993 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
994 * known to the kernel.
996 * Result: 0 for success, -1 for an error.
1000 sk_set_ipv6_checksum(sock
*s
, int offset
)
1002 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_CHECKSUM
, &offset
, sizeof(offset
)) < 0)
1003 ERR("IPV6_CHECKSUM");
1009 sk_set_icmp6_filter(sock
*s
, int p1
, int p2
)
1011 /* a bit of lame interface, but it is here only for Radv */
1012 struct icmp6_filter f
;
1014 ICMP6_FILTER_SETBLOCKALL(&f
);
1015 ICMP6_FILTER_SETPASS(p1
, &f
);
1016 ICMP6_FILTER_SETPASS(p2
, &f
);
1018 if (setsockopt(s
->fd
, SOL_ICMPV6
, ICMP6_FILTER
, &f
, sizeof(f
)) < 0)
1019 ERR("ICMP6_FILTER");
1025 sk_log_error(sock
*s
, const char *p
)
1027 log(L_ERR
"%s: Socket error: %s%#m", p
, s
->err
);
1032 * Actual struct birdsock code
1035 static list sock_list
;
1036 static struct birdsock
*current_sock
;
1037 static struct birdsock
*stored_sock
;
1039 static inline sock
*
1042 if (!s
->n
.next
->next
)
1045 return SKIP_BACK(sock
, n
, s
->n
.next
);
1049 sk_alloc_bufs(sock
*s
)
1051 if (!s
->rbuf
&& s
->rbsize
)
1052 s
->rbuf
= s
->rbuf_alloc
= xmalloc(s
->rbsize
);
1054 if (!s
->tbuf
&& s
->tbsize
)
1055 s
->tbuf
= s
->tbuf_alloc
= xmalloc(s
->tbsize
);
1056 s
->tpos
= s
->ttx
= s
->tbuf
;
1060 sk_free_bufs(sock
*s
)
1064 xfree(s
->rbuf_alloc
);
1065 s
->rbuf
= s
->rbuf_alloc
= NULL
;
1069 xfree(s
->tbuf_alloc
);
1070 s
->tbuf
= s
->tbuf_alloc
= NULL
;
1076 sk_ssh_free(sock
*s
)
1078 struct ssh_sock
*ssh
= s
->ssh
;
1087 if (ssh_channel_is_open(ssh
->channel
))
1088 ssh_channel_close(ssh
->channel
);
1089 ssh_channel_free(ssh
->channel
);
1090 ssh
->channel
= NULL
;
1095 ssh_disconnect(ssh
->session
);
1096 ssh_free(ssh
->session
);
1097 ssh
->session
= NULL
;
1103 sk_free(resource
*r
)
1105 sock
*s
= (sock
*) r
;
1110 if (s
->type
== SK_SSH
|| s
->type
== SK_SSH_ACTIVE
)
1117 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1118 if (!(s
->flags
& SKF_THREAD
))
1120 if (s
== current_sock
)
1121 current_sock
= sk_next(s
);
1122 if (s
== stored_sock
)
1123 stored_sock
= sk_next(s
);
1127 if (s
->type
!= SK_SSH
&& s
->type
!= SK_SSH_ACTIVE
)
1134 sk_set_rbsize(sock
*s
, uint val
)
1136 ASSERT(s
->rbuf_alloc
== s
->rbuf
);
1138 if (s
->rbsize
== val
)
1142 xfree(s
->rbuf_alloc
);
1143 s
->rbuf_alloc
= xmalloc(val
);
1144 s
->rpos
= s
->rbuf
= s
->rbuf_alloc
;
1148 sk_set_tbsize(sock
*s
, uint val
)
1150 ASSERT(s
->tbuf_alloc
== s
->tbuf
);
1152 if (s
->tbsize
== val
)
1155 byte
*old_tbuf
= s
->tbuf
;
1158 s
->tbuf
= s
->tbuf_alloc
= xrealloc(s
->tbuf_alloc
, val
);
1159 s
->tpos
= s
->tbuf
+ (s
->tpos
- old_tbuf
);
1160 s
->ttx
= s
->tbuf
+ (s
->ttx
- old_tbuf
);
1164 sk_set_tbuf(sock
*s
, void *tbuf
)
1166 s
->tbuf
= tbuf
?: s
->tbuf_alloc
;
1167 s
->ttx
= s
->tpos
= s
->tbuf
;
1171 sk_reallocate(sock
*s
)
1178 sk_dump(resource
*r
)
1180 sock
*s
= (sock
*) r
;
1181 static char *sk_type_names
[] = { "TCP<", "TCP>", "TCP", "UDP", NULL
, "IP", NULL
, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
1183 debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1184 sk_type_names
[s
->type
],
1192 s
->iface
? s
->iface
->name
: "none");
1195 static struct resclass sk_class
= {
1205 * sk_new - create a socket
1208 * This function creates a new socket resource. If you want to use it,
1209 * you need to fill in all the required fields of the structure and
1210 * call sk_open() to do the actual opening of the socket.
1212 * The real function name is sock_new(), sk_new() is a macro wrapper
1213 * to avoid collision with OpenSSL.
1218 sock
*s
= ralloc(p
, &sk_class
);
1220 // s->saddr = s->daddr = IPA_NONE;
1221 s
->tos
= s
->priority
= s
->ttl
= -1;
1232 if (s
->type
== SK_SSH_ACTIVE
)
1235 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1241 if (ipa_nonzero(s
->saddr
) && !(s
->flags
& SKF_BIND
))
1242 s
->flags
|= SKF_PKTINFO
;
1244 #ifdef CONFIG_USE_HDRINCL
1245 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
) && (s
->flags
& SKF_PKTINFO
))
1247 s
->flags
&= ~SKF_PKTINFO
;
1248 s
->flags
|= SKF_HDRINCL
;
1249 if (setsockopt(fd
, SOL_IP
, IP_HDRINCL
, &y
, sizeof(y
)) < 0)
1256 #ifdef SO_BINDTODEVICE
1257 struct ifreq ifr
= {};
1258 strcpy(ifr
.ifr_name
, s
->iface
->name
);
1259 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
1260 ERR("SO_BINDTODEVICE");
1263 #ifdef CONFIG_UNIX_DONTROUTE
1264 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_DONTROUTE
, &y
, sizeof(y
)) < 0)
1265 ERR("SO_DONTROUTE");
1269 if (s
->priority
>= 0)
1270 if (sk_set_priority(s
, s
->priority
) < 0)
1275 if (s
->flags
& SKF_LADDR_RX
)
1276 if (sk_request_cmsg4_pktinfo(s
) < 0)
1279 if (s
->flags
& SKF_TTL_RX
)
1280 if (sk_request_cmsg4_ttl(s
) < 0)
1283 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1284 if (sk_disable_mtu_disc4(s
) < 0)
1288 if (sk_set_ttl4(s
, s
->ttl
) < 0)
1292 if (sk_set_tos4(s
, s
->tos
) < 0)
1298 if ((s
->type
== SK_TCP_PASSIVE
) || (s
->type
== SK_TCP_ACTIVE
) || (s
->type
== SK_UDP
))
1299 if (setsockopt(fd
, SOL_IPV6
, IPV6_V6ONLY
, &y
, sizeof(y
)) < 0)
1302 if (s
->flags
& SKF_LADDR_RX
)
1303 if (sk_request_cmsg6_pktinfo(s
) < 0)
1306 if (s
->flags
& SKF_TTL_RX
)
1307 if (sk_request_cmsg6_ttl(s
) < 0)
1310 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1311 if (sk_disable_mtu_disc6(s
) < 0)
1315 if (sk_set_ttl6(s
, s
->ttl
) < 0)
1319 if (sk_set_tos6(s
, s
->tos
) < 0)
1329 add_tail(&sock_list
, &s
->n
);
1333 sk_tcp_connected(sock
*s
)
1336 int sa_len
= sizeof(sa
);
1338 if ((getsockname(s
->fd
, &sa
.sa
, &sa_len
) < 0) ||
1339 (sockaddr_read(&sa
, s
->af
, &s
->saddr
, &s
->iface
, &s
->sport
) < 0))
1340 log(L_WARN
"SOCK: Cannot get local IP address for TCP>");
1349 sk_ssh_connected(sock
*s
)
1358 sk_passive_connected(sock
*s
, int type
)
1360 sockaddr loc_sa
, rem_sa
;
1361 int loc_sa_len
= sizeof(loc_sa
);
1362 int rem_sa_len
= sizeof(rem_sa
);
1364 int fd
= accept(s
->fd
, ((type
== SK_TCP
) ? &rem_sa
.sa
: NULL
), &rem_sa_len
);
1367 if ((errno
!= EINTR
) && (errno
!= EAGAIN
))
1368 s
->err_hook(s
, errno
);
1372 sock
*t
= sk_new(s
->pool
);
1378 t
->rbsize
= s
->rbsize
;
1379 t
->tbsize
= s
->tbsize
;
1383 if ((getsockname(fd
, &loc_sa
.sa
, &loc_sa_len
) < 0) ||
1384 (sockaddr_read(&loc_sa
, s
->af
, &t
->saddr
, &t
->iface
, &t
->sport
) < 0))
1385 log(L_WARN
"SOCK: Cannot get local IP address for TCP<");
1387 if (sockaddr_read(&rem_sa
, s
->af
, &t
->daddr
, &t
->iface
, &t
->dport
) < 0)
1388 log(L_WARN
"SOCK: Cannot get remote IP address for TCP<");
1391 if (sk_setup(t
) < 0)
1393 /* FIXME: Call err_hook instead ? */
1394 log(L_ERR
"SOCK: Incoming connection: %s%#m", t
->err
);
1396 /* FIXME: handle it better in rfree() */
1411 * Return SSH_OK or SSH_AGAIN or SSH_ERROR
1414 sk_ssh_connect(sock
*s
)
1416 s
->fd
= ssh_get_fd(s
->ssh
->session
);
1418 /* Big fall thru automata */
1419 switch (s
->ssh
->state
)
1421 case SK_SSH_CONNECT
:
1423 switch (ssh_connect(s
->ssh
->session
))
1426 /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
1427 * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
1428 * documented but our code relies on that.
1440 case SK_SSH_SERVER_KNOWN
:
1442 s
->ssh
->state
= SK_SSH_SERVER_KNOWN
;
1444 if (s
->ssh
->server_hostkey_path
)
1446 int server_identity_is_ok
= 1;
1448 /* Check server identity */
1449 switch (ssh_is_server_known(s
->ssh
->session
))
1451 #define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
1452 case SSH_SERVER_KNOWN_OK
:
1453 /* The server is known and has not changed. */
1456 case SSH_SERVER_NOT_KNOWN
:
1457 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server is unknown, its public key was not found in the known host file %s", s
->ssh
->server_hostkey_path
);
1460 case SSH_SERVER_KNOWN_CHANGED
:
1461 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server key has changed. Either you are under attack or the administrator changed the key.");
1462 server_identity_is_ok
= 0;
1465 case SSH_SERVER_FILE_NOT_FOUND
:
1466 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The known host file %s does not exist", s
->ssh
->server_hostkey_path
);
1467 server_identity_is_ok
= 0;
1470 case SSH_SERVER_ERROR
:
1471 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "Some error happened");
1472 server_identity_is_ok
= 0;
1475 case SSH_SERVER_FOUND_OTHER
:
1476 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s
, "The server gave use a key of a type while we had an other type recorded. " \
1477 "It is a possible attack.");
1478 server_identity_is_ok
= 0;
1482 if (!server_identity_is_ok
)
1487 case SK_SSH_USERAUTH
:
1489 s
->ssh
->state
= SK_SSH_USERAUTH
;
1490 switch (ssh_userauth_publickey_auto(s
->ssh
->session
, NULL
, NULL
))
1492 case SSH_AUTH_AGAIN
:
1495 case SSH_AUTH_SUCCESS
:
1503 case SK_SSH_CHANNEL
:
1505 s
->ssh
->state
= SK_SSH_CHANNEL
;
1506 s
->ssh
->channel
= ssh_channel_new(s
->ssh
->session
);
1507 if (s
->ssh
->channel
== NULL
)
1511 case SK_SSH_SESSION
:
1513 s
->ssh
->state
= SK_SSH_SESSION
;
1514 switch (ssh_channel_open_session(s
->ssh
->channel
))
1527 case SK_SSH_SUBSYSTEM
:
1529 s
->ssh
->state
= SK_SSH_SUBSYSTEM
;
1530 if (s
->ssh
->subsystem
)
1532 switch (ssh_channel_request_subsystem(s
->ssh
->channel
, s
->ssh
->subsystem
))
1546 case SK_SSH_ESTABLISHED
:
1547 s
->ssh
->state
= SK_SSH_ESTABLISHED
;
1554 * Return file descriptor number if success
1555 * Return -1 if failed
1558 sk_open_ssh(sock
*s
)
1561 bug("sk_open() sock->ssh is not allocated");
1563 ssh_session sess
= ssh_new();
1565 ERR2("Cannot create a ssh session");
1566 s
->ssh
->session
= sess
;
1568 const int verbosity
= SSH_LOG_NOLOG
;
1569 ssh_options_set(sess
, SSH_OPTIONS_LOG_VERBOSITY
, &verbosity
);
1570 ssh_options_set(sess
, SSH_OPTIONS_HOST
, s
->host
);
1571 ssh_options_set(sess
, SSH_OPTIONS_PORT
, &(s
->dport
));
1572 /* TODO: Add SSH_OPTIONS_BINDADDR */
1573 ssh_options_set(sess
, SSH_OPTIONS_USER
, s
->ssh
->username
);
1575 if (s
->ssh
->server_hostkey_path
)
1576 ssh_options_set(sess
, SSH_OPTIONS_KNOWNHOSTS
, s
->ssh
->server_hostkey_path
);
1578 if (s
->ssh
->client_privkey_path
)
1579 ssh_options_set(sess
, SSH_OPTIONS_IDENTITY
, s
->ssh
->client_privkey_path
);
1581 ssh_set_blocking(sess
, 0);
1583 switch (sk_ssh_connect(s
))
1589 sk_ssh_connected(s
);
1593 ERR2(ssh_get_error(sess
));
1597 return ssh_get_fd(sess
);
1605 * sk_open - open a socket
1608 * This function takes a socket resource created by sk_new() and
1609 * initialized by the user and binds a corresponding network connection
1612 * Result: 0 for success, -1 for an error.
1621 ip_addr bind_addr
= IPA_NONE
;
1624 if (s
->type
<= SK_IP
)
1627 * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
1628 * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
1629 * But the specifications have to be consistent.
1635 ASSERT(ipa_zero(s
->saddr
) || ipa_zero(s
->daddr
) ||
1636 (ipa_is_ip4(s
->saddr
) == ipa_is_ip4(s
->daddr
)));
1637 af
= (ipa_is_ip4(s
->saddr
) || ipa_is_ip4(s
->daddr
)) ? AF_INET
: AF_INET6
;
1641 ASSERT(ipa_zero(s
->saddr
) || ipa_is_ip4(s
->saddr
));
1642 ASSERT(ipa_zero(s
->daddr
) || ipa_is_ip4(s
->daddr
));
1647 ASSERT(ipa_zero(s
->saddr
) || !ipa_is_ip4(s
->saddr
));
1648 ASSERT(ipa_zero(s
->daddr
) || !ipa_is_ip4(s
->daddr
));
1653 bug("Invalid subtype %d", s
->subtype
);
1660 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1662 case SK_TCP_PASSIVE
:
1663 fd
= socket(af
, SOCK_STREAM
, IPPROTO_TCP
);
1664 bind_port
= s
->sport
;
1665 bind_addr
= s
->saddr
;
1666 do_bind
= bind_port
|| ipa_nonzero(bind_addr
);
1671 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1672 fd
= sk_open_ssh(s
);
1677 fd
= socket(af
, SOCK_DGRAM
, IPPROTO_UDP
);
1678 bind_port
= s
->sport
;
1679 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1684 fd
= socket(af
, SOCK_RAW
, s
->dport
);
1686 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1687 do_bind
= ipa_nonzero(bind_addr
);
1696 bug("sk_open() called for invalid sock type %d", s
->type
);
1705 if (sk_setup(s
) < 0)
1714 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &y
, sizeof(y
)) < 0)
1715 ERR2("SO_REUSEADDR");
1717 #ifdef CONFIG_NO_IFACE_BIND
1718 /* Workaround missing ability to bind to an iface */
1719 if ((s
->type
== SK_UDP
) && s
->iface
&& ipa_zero(bind_addr
))
1721 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &y
, sizeof(y
)) < 0)
1722 ERR2("SO_REUSEPORT");
1727 if (s
->flags
& SKF_HIGH_PORT
)
1728 if (sk_set_high_port(s
) < 0)
1729 log(L_WARN
"Socket error: %s%#m", s
->err
);
1731 sockaddr_fill(&sa
, s
->af
, bind_addr
, s
->iface
, bind_port
);
1732 if (bind(fd
, &sa
.sa
, SA_LEN(sa
)) < 0)
1737 if (sk_set_md5_auth(s
, s
->saddr
, s
->daddr
, s
->iface
, s
->password
, 0) < 0)
1743 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1744 if (connect(fd
, &sa
.sa
, SA_LEN(sa
)) >= 0)
1745 sk_tcp_connected(s
);
1746 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
&&
1747 errno
!= ECONNREFUSED
&& errno
!= EHOSTUNREACH
&& errno
!= ENETUNREACH
)
1751 case SK_TCP_PASSIVE
:
1752 if (listen(fd
, 8) < 0)
1764 if (!(s
->flags
& SKF_THREAD
))
1776 sk_open_unix(sock
*s
, char *name
)
1778 struct sockaddr_un sa
;
1781 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1783 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1787 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1790 /* Path length checked in test_old_bird() */
1791 sa
.sun_family
= AF_UNIX
;
1792 strcpy(sa
.sun_path
, name
);
1794 if (bind(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) < 0)
1797 if (listen(fd
, 8) < 0)
1806 #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1807 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1808 #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1811 sk_prepare_cmsgs(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
1814 sk_prepare_cmsgs4(s
, msg
, cbuf
, cbuflen
);
1816 sk_prepare_cmsgs6(s
, msg
, cbuf
, cbuflen
);
1820 sk_process_cmsgs(sock
*s
, struct msghdr
*msg
)
1824 s
->laddr
= IPA_NONE
;
1828 for (cm
= CMSG_FIRSTHDR(msg
); cm
!= NULL
; cm
= CMSG_NXTHDR(msg
, cm
))
1830 if ((cm
->cmsg_level
== SOL_IP
) && sk_is_ipv4(s
))
1832 sk_process_cmsg4_pktinfo(s
, cm
);
1833 sk_process_cmsg4_ttl(s
, cm
);
1836 if ((cm
->cmsg_level
== SOL_IPV6
) && sk_is_ipv6(s
))
1838 sk_process_cmsg6_pktinfo(s
, cm
);
1839 sk_process_cmsg6_ttl(s
, cm
);
1848 struct iovec iov
= {s
->tbuf
, s
->tpos
- s
->tbuf
};
1849 byte cmsg_buf
[CMSG_TX_SPACE
];
1852 sockaddr_fill(&dst
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1854 struct msghdr msg
= {
1855 .msg_name
= &dst
.sa
,
1856 .msg_namelen
= SA_LEN(dst
),
1861 #ifdef CONFIG_USE_HDRINCL
1863 struct iovec iov2
[2] = { {hdr
, 20}, iov
};
1865 if (s
->flags
& SKF_HDRINCL
)
1867 sk_prepare_ip_header(s
, hdr
, iov
.iov_len
);
1873 if (s
->flags
& SKF_PKTINFO
)
1874 sk_prepare_cmsgs(s
, &msg
, cmsg_buf
, sizeof(cmsg_buf
));
1876 return sendmsg(s
->fd
, &msg
, 0);
1882 struct iovec iov
= {s
->rbuf
, s
->rbsize
};
1883 byte cmsg_buf
[CMSG_RX_SPACE
];
1886 struct msghdr msg
= {
1887 .msg_name
= &src
.sa
,
1888 .msg_namelen
= sizeof(src
), // XXXX ??
1891 .msg_control
= cmsg_buf
,
1892 .msg_controllen
= sizeof(cmsg_buf
),
1896 int rv
= recvmsg(s
->fd
, &msg
, 0);
1901 // if (cf_type == SK_IP)
1902 // rv = ipv4_skip_header(pbuf, rv);
1905 sockaddr_read(&src
, s
->af
, &s
->faddr
, NULL
, &s
->fport
);
1906 sk_process_cmsgs(s
, &msg
);
1908 if (msg
.msg_flags
& MSG_TRUNC
)
1909 s
->flags
|= SKF_TRUNCATED
;
1911 s
->flags
&= ~SKF_TRUNCATED
;
1917 static inline void reset_tx_buffer(sock
*s
) { s
->ttx
= s
->tpos
= s
->tbuf
; }
1920 sk_maybe_write(sock
*s
)
1929 while (s
->ttx
!= s
->tpos
)
1931 e
= write(s
->fd
, s
->ttx
, s
->tpos
- s
->ttx
);
1935 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1938 /* EPIPE is just a connection close notification during TX */
1939 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1951 while (s
->ttx
!= s
->tpos
)
1953 e
= ssh_channel_write(s
->ssh
->channel
, s
->ttx
, s
->tpos
- s
->ttx
);
1957 s
->err
= ssh_get_error(s
->ssh
->session
);
1958 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
1961 /* EPIPE is just a connection close notification during TX */
1962 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1974 if (s
->tbuf
== s
->tpos
)
1981 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1984 s
->err_hook(s
, errno
);
1997 bug("sk_maybe_write: unknown socket type %d", s
->type
);
2002 sk_rx_ready(sock
*s
)
2005 struct pollfd pfd
= { .fd
= s
->fd
};
2006 pfd
.events
|= POLLIN
;
2009 rv
= poll(&pfd
, 1, 0);
2011 if ((rv
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
2018 * sk_send - send data to a socket
2020 * @len: number of bytes to send
2022 * This function sends @len bytes of data prepared in the
2023 * transmit buffer of the socket @s to the network connection.
2024 * If the packet can be sent immediately, it does so and returns
2025 * 1, else it queues the packet for later processing, returns 0
2026 * and calls the @tx_hook of the socket when the tranmission
2030 sk_send(sock
*s
, unsigned len
)
2033 s
->tpos
= s
->tbuf
+ len
;
2034 return sk_maybe_write(s
);
2038 * sk_send_to - send data to a specific destination
2040 * @len: number of bytes to send
2041 * @addr: IP address to send the packet to
2042 * @port: port to send the packet to
2044 * This is a sk_send() replacement for connection-less packet sockets
2045 * which allows destination of the packet to be chosen dynamically.
2046 * Raw IP sockets should use 0 for @port.
2049 sk_send_to(sock
*s
, unsigned len
, ip_addr addr
, unsigned port
)
2056 s
->tpos
= s
->tbuf
+ len
;
2057 return sk_maybe_write(s
);
2062 sk_send_full(sock *s, unsigned len, struct iface *ifa,
2063 ip_addr saddr, ip_addr daddr, unsigned dport)
2070 s->tpos = s->tbuf + len;
2071 return sk_maybe_write(s);
2076 call_rx_hook(sock
*s
, int size
)
2078 if (s
->rx_hook(s
, size
))
2080 /* We need to be careful since the socket could have been deleted by the hook */
2081 if (current_sock
== s
)
2088 sk_read_ssh(sock
*s
)
2090 ssh_channel rchans
[2] = { s
->ssh
->channel
, NULL
};
2091 struct timeval timev
= { 1, 0 };
2093 if (ssh_channel_select(rchans
, NULL
, NULL
, &timev
) == SSH_EINTR
)
2094 return 1; /* Try again */
2096 if (ssh_channel_is_eof(s
->ssh
->channel
) != 0)
2098 /* The remote side is closing the connection */
2103 if (rchans
[0] == NULL
)
2104 return 0; /* No data is available on the socket */
2106 const uint used_bytes
= s
->rpos
- s
->rbuf
;
2107 const int read_bytes
= ssh_channel_read_nonblocking(s
->ssh
->channel
, s
->rpos
, s
->rbsize
- used_bytes
, 0);
2111 s
->rpos
+= read_bytes
;
2112 call_rx_hook(s
, used_bytes
+ read_bytes
);
2115 else if (read_bytes
== 0)
2117 if (ssh_channel_is_eof(s
->ssh
->channel
) != 0)
2119 /* The remote side is closing the connection */
2125 s
->err
= ssh_get_error(s
->ssh
->session
);
2126 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
2129 return 0; /* No data is available on the socket */
2133 /* sk_read() and sk_write() are called from BFD's event loop */
2136 sk_read(sock
*s
, int revents
)
2140 case SK_TCP_PASSIVE
:
2141 return sk_passive_connected(s
, SK_TCP
);
2143 case SK_UNIX_PASSIVE
:
2144 return sk_passive_connected(s
, SK_UNIX
);
2149 int c
= read(s
->fd
, s
->rpos
, s
->rbuf
+ s
->rbsize
- s
->rpos
);
2153 if (errno
!= EINTR
&& errno
!= EAGAIN
)
2154 s
->err_hook(s
, errno
);
2155 else if (errno
== EAGAIN
&& !(revents
& POLLIN
))
2157 log(L_ERR
"Got EAGAIN from read when revents=%x (without POLLIN)", revents
);
2166 call_rx_hook(s
, s
->rpos
- s
->rbuf
);
2174 return sk_read_ssh(s
);
2178 return s
->rx_hook(s
, 0);
2182 int e
= sk_recvmsg(s
);
2186 if (errno
!= EINTR
&& errno
!= EAGAIN
)
2187 s
->err_hook(s
, errno
);
2191 s
->rpos
= s
->rbuf
+ e
;
2206 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
2208 if (connect(s
->fd
, &sa
.sa
, SA_LEN(sa
)) >= 0 || errno
== EISCONN
)
2209 sk_tcp_connected(s
);
2210 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
)
2211 s
->err_hook(s
, errno
);
2218 switch (sk_ssh_connect(s
))
2221 sk_ssh_connected(s
);
2228 s
->err
= ssh_get_error(s
->ssh
->session
);
2229 s
->err_hook(s
, ssh_get_error_code(s
->ssh
->session
));
2237 if (s
->ttx
!= s
->tpos
&& sk_maybe_write(s
) > 0)
2247 int sk_is_ipv4(sock
*s
)
2248 { return s
->af
== AF_INET
; }
2250 int sk_is_ipv6(sock
*s
)
2251 { return s
->af
== AF_INET6
; }
2254 sk_err(sock
*s
, int revents
)
2256 int se
= 0, sse
= sizeof(se
);
2257 if ((s
->type
!= SK_MAGIC
) && (revents
& POLLERR
))
2258 if (getsockopt(s
->fd
, SOL_SOCKET
, SO_ERROR
, &se
, &sse
) < 0)
2260 log(L_ERR
"IO: Socket error: SO_ERROR: %m");
2273 debug("Open sockets:\n");
2274 WALK_LIST(n
, sock_list
)
2276 s
= SKIP_BACK(sock
, n
, n
);
2285 * Internal event log and watchdog
2288 #define EVENT_LOG_LENGTH 32
2290 struct event_log_entry
2298 static struct event_log_entry event_log
[EVENT_LOG_LENGTH
];
2299 static struct event_log_entry
*event_open
;
2300 static int event_log_pos
, event_log_num
, watchdog_active
;
2301 static btime last_time
;
2302 static btime loop_time
;
2305 io_update_time(void)
2310 if (!clock_monotonic_available
)
2314 * This is third time-tracking procedure (after update_times() above and
2315 * times_update() in BFD), dedicated to internal event log and latency
2316 * tracking. Hopefully, we consolidate these sometimes.
2319 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
2321 die("clock_gettime: %m");
2323 last_time
= ((s64
) ts
.tv_sec S
) + (ts
.tv_nsec
/ 1000);
2327 event_open
->duration
= last_time
- event_open
->timestamp
;
2329 if (event_open
->duration
> config
->latency_limit
)
2330 log(L_WARN
"Event 0x%p 0x%p took %d ms",
2331 event_open
->hook
, event_open
->data
, (int) (event_open
->duration TO_MS
));
2338 * io_log_event - mark approaching event into event log
2339 * @hook: event hook address
2340 * @data: event data address
2342 * Store info (hook, data, timestamp) about the following internal event into
2343 * a circular event log (@event_log). When latency tracking is enabled, the log
2344 * entry is kept open (in @event_open) so the duration can be filled later.
2347 io_log_event(void *hook
, void *data
)
2349 if (config
->latency_debug
)
2352 struct event_log_entry
*en
= event_log
+ event_log_pos
;
2356 en
->timestamp
= last_time
;
2361 event_log_pos
%= EVENT_LOG_LENGTH
;
2363 event_open
= config
->latency_debug
? en
: NULL
;
2367 io_close_event(void)
2378 log(L_DEBUG
"Event log:");
2379 for (i
= 0; i
< EVENT_LOG_LENGTH
; i
++)
2381 struct event_log_entry
*en
= event_log
+ (event_log_pos
+ i
) % EVENT_LOG_LENGTH
;
2383 log(L_DEBUG
" Event 0x%p 0x%p at %8d for %d ms", en
->hook
, en
->data
,
2384 (int) ((last_time
- en
->timestamp
) TO_MS
), (int) (en
->duration TO_MS
));
2389 watchdog_sigalrm(int sig UNUSED
)
2391 /* Update last_time and duration, but skip latency check */
2392 config
->latency_limit
= 0xffffffff;
2395 /* We want core dump */
2400 watchdog_start1(void)
2404 loop_time
= last_time
;
2408 watchdog_start(void)
2412 loop_time
= last_time
;
2415 if (config
->watchdog_timeout
)
2417 alarm(config
->watchdog_timeout
);
2418 watchdog_active
= 1;
2427 if (watchdog_active
)
2430 watchdog_active
= 0;
2433 btime duration
= last_time
- loop_time
;
2434 if (duration
> config
->watchdog_warning
)
2435 log(L_WARN
"I/O loop cycle took %d ms for %d events",
2436 (int) (duration TO_MS
), event_log_num
);
2444 volatile int async_config_flag
; /* Asynchronous reconfiguration/dump scheduled */
2445 volatile int async_dump_flag
;
2446 volatile int async_shutdown_flag
;
2451 init_list(&near_timers
);
2452 init_list(&far_timers
);
2453 init_list(&sock_list
);
2454 init_list(&global_event_list
);
2459 srandom((int) now_real
);
2462 static int short_loops
= 0;
2463 #define SHORT_LOOP_MAX 10
2470 int nfds
, events
, pout
;
2474 struct pollfd
*pfd
= xmalloc(fdmax
* sizeof(struct pollfd
));
2479 events
= ev_run_list(&global_event_list
);
2482 tout
= tm_first_shot();
2488 poll_tout
= (events
? 0 : MIN(tout
- now
, 3)) * 1000; /* Time in milliseconds */
2493 WALK_LIST(n
, sock_list
)
2495 pfd
[nfds
] = (struct pollfd
) { .fd
= -1 }; /* everything other set to 0 by this */
2496 s
= SKIP_BACK(sock
, n
, n
);
2499 pfd
[nfds
].fd
= s
->fd
;
2500 pfd
[nfds
].events
|= POLLIN
;
2502 if (s
->tx_hook
&& s
->ttx
!= s
->tpos
)
2504 pfd
[nfds
].fd
= s
->fd
;
2505 pfd
[nfds
].events
|= POLLOUT
;
2507 if (pfd
[nfds
].fd
!= -1)
2518 pfd
= xrealloc(pfd
, fdmax
* sizeof(struct pollfd
));
2523 * Yes, this is racy. But even if the signal comes before this test
2524 * and entering poll(), it gets caught on the next timer tick.
2527 if (async_config_flag
)
2529 io_log_event(async_config
, NULL
);
2531 async_config_flag
= 0;
2534 if (async_dump_flag
)
2536 io_log_event(async_dump
, NULL
);
2538 async_dump_flag
= 0;
2541 if (async_shutdown_flag
)
2543 io_log_event(async_shutdown
, NULL
);
2545 async_shutdown_flag
= 0;
2549 /* And finally enter poll() to find active sockets */
2551 pout
= poll(pfd
, nfds
, poll_tout
);
2556 if (errno
== EINTR
|| errno
== EAGAIN
)
2562 /* guaranteed to be non-empty */
2563 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2565 while (current_sock
)
2567 sock
*s
= current_sock
;
2570 current_sock
= sk_next(s
);
2578 if (s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2582 io_log_event(s
->rx_hook
, s
->data
);
2583 e
= sk_read(s
, pfd
[s
->index
].revents
);
2584 if (s
!= current_sock
)
2587 while (e
&& s
->rx_hook
&& steps
);
2590 if (pfd
[s
->index
].revents
& POLLOUT
)
2594 io_log_event(s
->tx_hook
, s
->data
);
2596 if (s
!= current_sock
)
2601 current_sock
= sk_next(s
);
2606 if (events
&& (short_loops
< SHORT_LOOP_MAX
))
2611 current_sock
= stored_sock
;
2612 if (current_sock
== NULL
)
2613 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2615 while (current_sock
&& count
< MAX_RX_STEPS
)
2617 sock
*s
= current_sock
;
2620 current_sock
= sk_next(s
);
2624 if (!s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2627 io_log_event(s
->rx_hook
, s
->data
);
2628 sk_read(s
, pfd
[s
->index
].revents
);
2629 if (s
!= current_sock
)
2633 if (pfd
[s
->index
].revents
& (POLLHUP
| POLLERR
))
2635 sk_err(s
, pfd
[s
->index
].revents
);
2636 if (s
!= current_sock
)
2640 current_sock
= sk_next(s
);
2645 stored_sock
= current_sock
;
2651 test_old_bird(char *path
)
2654 struct sockaddr_un sa
;
2656 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
2658 die("Cannot create socket: %m");
2659 if (strlen(path
) >= sizeof(sa
.sun_path
))
2660 die("Socket path too long");
2661 bzero(&sa
, sizeof(sa
));
2662 sa
.sun_family
= AF_UNIX
;
2663 strcpy(sa
.sun_path
, path
);
2664 if (connect(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) == 0)
2665 die("I found another BIRD running.");