2 * BIRD Internet Routing Daemon -- Unix I/O
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
7 * Can be freely distributed and used under the terms of the GNU GPL.
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
18 #include <sys/types.h>
19 #include <sys/socket.h>
26 #include <netinet/in.h>
27 #include <netinet/tcp.h>
28 #include <netinet/udp.h>
29 #include <netinet/icmp6.h>
31 #include "nest/bird.h"
32 #include "lib/lists.h"
33 #include "lib/resource.h"
34 #include "lib/timer.h"
35 #include "lib/socket.h"
36 #include "lib/event.h"
37 #include "lib/string.h"
38 #include "nest/iface.h"
41 #include "lib/sysio.h"
43 /* Maximum number of calls of tx handler for one socket in one
44 * select iteration. Should be small enough to not monopolize CPU by
45 * one protocol instance.
49 /* Maximum number of calls of rx handler for all sockets in one select
50 iteration. RX callbacks are often much more costly so we limit
51 this to gen small latencies */
52 #define MAX_RX_STEPS 4
66 struct rfile
*a
= (struct rfile
*) r
;
74 struct rfile
*a
= (struct rfile
*) r
;
76 debug("(FILE *%p)\n", a
->f
);
79 static struct resclass rf_class
= {
89 tracked_fopen(pool
*p
, char *name
, char *mode
)
91 FILE *f
= fopen(name
, mode
);
95 struct rfile
*r
= ralloc(p
, &rf_class
);
104 * Timers are resources which represent a wish of a module to call
105 * a function at the specified time. The platform dependent code
106 * doesn't guarantee exact timing, only that a timer function
107 * won't be called before the requested time.
109 * In BIRD, time is represented by values of the &bird_clock_t type
110 * which are integral numbers interpreted as a relative number of seconds since
111 * some fixed time point in past. The current time can be read
112 * from variable @now with reasonable accuracy and is monotonic. There is also
113 * a current 'absolute' time in variable @now_real reported by OS.
115 * Each timer is described by a &timer structure containing a pointer
116 * to the handler function (@hook), data private to this function (@data),
117 * time the function should be called at (@expires, 0 for inactive timers),
118 * for the other fields see |timer.h|.
121 #define NEAR_TIMER_LIMIT 4
123 static list near_timers
, far_timers
;
124 static bird_clock_t first_far_timer
= TIME_INFINITY
;
126 /* now must be different from 0, because 0 is a special value in timer->expires */
127 bird_clock_t now
= 1, now_real
, boot_time
;
130 update_times_plain(void)
132 bird_clock_t new_time
= time(NULL
);
133 int delta
= new_time
- now_real
;
135 if ((delta
>= 0) && (delta
< 60))
137 else if (now_real
!= 0)
138 log(L_WARN
"Time jump, delta %d s", delta
);
144 update_times_gettime(void)
149 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
151 die("clock_gettime: %m");
153 if (ts
.tv_sec
!= now
) {
155 log(L_ERR
"Monotonic timer is broken");
158 now_real
= time(NULL
);
162 static int clock_monotonic_available
;
167 if (clock_monotonic_available
)
168 update_times_gettime();
170 update_times_plain();
177 clock_monotonic_available
= (clock_gettime(CLOCK_MONOTONIC
, &ts
) == 0);
178 if (!clock_monotonic_available
)
179 log(L_WARN
"Monotonic timer is missing");
186 timer
*t
= (timer
*) r
;
194 timer
*t
= (timer
*) r
;
196 debug("(code %p, data %p, ", t
->hook
, t
->data
);
198 debug("rand %d, ", t
->randomize
);
200 debug("recur %d, ", t
->recurrent
);
202 debug("expires in %d sec)\n", t
->expires
- now
);
204 debug("inactive)\n");
207 static struct resclass tm_class
= {
217 * tm_new - create a timer
220 * This function creates a new timer resource and returns
221 * a pointer to it. To use the timer, you need to fill in
222 * the structure fields and call tm_start() to start timing.
227 timer
*t
= ralloc(p
, &tm_class
);
232 tm_insert_near(timer
*t
)
234 node
*n
= HEAD(near_timers
);
236 while (n
->next
&& (SKIP_BACK(timer
, n
, n
)->expires
< t
->expires
))
238 insert_node(&t
->n
, n
->prev
);
242 * tm_start - start a timer
244 * @after: number of seconds the timer should be run after
246 * This function schedules the hook function of the timer to
247 * be called after @after seconds. If the timer has been already
248 * started, it's @expire time is replaced by the new value.
250 * You can have set the @randomize field of @t, the timeout
251 * will be increased by a random number of seconds chosen
252 * uniformly from range 0 .. @randomize.
254 * You can call tm_start() from the handler function of the timer
255 * to request another run of the timer. Also, you can set the @recurrent
256 * field to have the timer re-added automatically with the same timeout.
259 tm_start(timer
*t
, unsigned after
)
264 after
+= random() % (t
->randomize
+ 1);
266 if (t
->expires
== when
)
271 if (after
<= NEAR_TIMER_LIMIT
)
275 if (!first_far_timer
|| first_far_timer
> when
)
276 first_far_timer
= when
;
277 add_tail(&far_timers
, &t
->n
);
282 * tm_stop - stop a timer
285 * This function stops a timer. If the timer is already stopped,
299 tm_dump_them(char *name
, list
*l
)
304 debug("%s timers:\n", name
);
307 t
= SKIP_BACK(timer
, n
, n
);
317 tm_dump_them("Near", &near_timers
);
318 tm_dump_them("Far", &far_timers
);
324 time_t x
= first_far_timer
;
326 if (!EMPTY_LIST(near_timers
))
328 timer
*t
= SKIP_BACK(timer
, n
, HEAD(near_timers
));
341 if (first_far_timer
<= now
)
343 bird_clock_t limit
= now
+ NEAR_TIMER_LIMIT
;
344 first_far_timer
= TIME_INFINITY
;
345 n
= HEAD(far_timers
);
348 t
= SKIP_BACK(timer
, n
, n
);
349 if (t
->expires
<= limit
)
354 else if (t
->expires
< first_far_timer
)
355 first_far_timer
= t
->expires
;
359 while ((n
= HEAD(near_timers
)) -> next
)
362 t
= SKIP_BACK(timer
, n
, n
);
363 if (t
->expires
> now
)
366 delay
= t
->expires
- now
;
370 int i
= t
->recurrent
- delay
;
380 * tm_parse_datetime - parse a date and time
381 * @x: datetime string
383 * tm_parse_datetime() takes a textual representation of
384 * a date and time (dd-mm-yyyy hh:mm:ss)
385 * and converts it to the corresponding value of type &bird_clock_t.
388 tm_parse_datetime(char *x
)
394 if (sscanf(x
, "%d-%d-%d %d:%d:%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &tm
.tm_hour
, &tm
.tm_min
, &tm
.tm_sec
, &n
) != 6 || x
[n
])
395 return tm_parse_date(x
);
399 if (t
== (time_t) -1)
404 * tm_parse_date - parse a date
407 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
408 * and converts it to the corresponding value of type &bird_clock_t.
411 tm_parse_date(char *x
)
417 if (sscanf(x
, "%d-%d-%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &n
) != 3 || x
[n
])
421 tm
.tm_hour
= tm
.tm_min
= tm
.tm_sec
= 0;
423 if (t
== (time_t) -1)
429 tm_format_reltime(char *x
, struct tm
*tm
, bird_clock_t delta
)
431 static char *month_names
[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
432 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
435 bsprintf(x
, "%02d:%02d", tm
->tm_hour
, tm
->tm_min
);
436 else if (delta
< 360*86400)
437 bsprintf(x
, "%s%02d", month_names
[tm
->tm_mon
], tm
->tm_mday
);
439 bsprintf(x
, "%d", tm
->tm_year
+1900);
442 #include "conf/conf.h"
445 * tm_format_datetime - convert date and time to textual representation
446 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
449 * This function formats the given relative time value @t to a textual
450 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
453 tm_format_datetime(char *x
, struct timeformat
*fmt_spec
, bird_clock_t t
)
455 const char *fmt_used
;
457 bird_clock_t delta
= now
- t
;
458 t
= now_real
- delta
;
461 if (fmt_spec
->fmt1
== NULL
)
462 return tm_format_reltime(x
, tm
, delta
);
464 if ((fmt_spec
->limit
== 0) || (delta
< fmt_spec
->limit
))
465 fmt_used
= fmt_spec
->fmt1
;
467 fmt_used
= fmt_spec
->fmt2
;
469 int rv
= strftime(x
, TM_DATETIME_BUFFER_SIZE
, fmt_used
, tm
);
470 if (((rv
== 0) && fmt_used
[0]) || (rv
== TM_DATETIME_BUFFER_SIZE
))
471 strcpy(x
, "<too-long>");
478 * Socket resources represent network connections. Their data structure (&socket)
479 * contains a lot of fields defining the exact type of the socket, the local and
480 * remote addresses and ports, pointers to socket buffers and finally pointers to
481 * hook functions to be called when new data have arrived to the receive buffer
482 * (@rx_hook), when the contents of the transmit buffer have been transmitted
483 * (@tx_hook) and when an error or connection close occurs (@err_hook).
485 * Freeing of sockets from inside socket hooks is perfectly safe.
489 #define SOL_IP IPPROTO_IP
493 #define SOL_IPV6 IPPROTO_IPV6
497 #define SOL_ICMPV6 IPPROTO_ICMPV6
502 * Sockaddr helper functions
505 static inline int sockaddr_length(int af
)
506 { return (af
== AF_INET
) ? sizeof(struct sockaddr_in
) : sizeof(struct sockaddr_in6
); }
509 sockaddr_fill4(struct sockaddr_in
*sa
, ip_addr a
, struct iface
*ifa
, uint port
)
511 memset(sa
, 0, sizeof(struct sockaddr_in
));
513 sa
->sin_len
= sizeof(struct sockaddr_in
);
515 sa
->sin_family
= AF_INET
;
516 sa
->sin_port
= htons(port
);
517 sa
->sin_addr
= ipa_to_in4(a
);
521 sockaddr_fill6(struct sockaddr_in6
*sa
, ip_addr a
, struct iface
*ifa
, uint port
)
523 memset(sa
, 0, sizeof(struct sockaddr_in6
));
525 sa
->sin6_len
= sizeof(struct sockaddr_in6
);
527 sa
->sin6_family
= AF_INET6
;
528 sa
->sin6_port
= htons(port
);
529 sa
->sin6_flowinfo
= 0;
530 sa
->sin6_addr
= ipa_to_in6(a
);
532 if (ifa
&& ipa_is_link_local(a
))
533 sa
->sin6_scope_id
= ifa
->index
;
537 sockaddr_fill(sockaddr
*sa
, int af
, ip_addr a
, struct iface
*ifa
, uint port
)
540 sockaddr_fill4((struct sockaddr_in
*) sa
, a
, ifa
, port
);
541 else if (af
== AF_INET6
)
542 sockaddr_fill6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
548 sockaddr_read4(struct sockaddr_in
*sa
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
550 *port
= ntohs(sa
->sin_port
);
551 *a
= ipa_from_in4(sa
->sin_addr
);
555 sockaddr_read6(struct sockaddr_in6
*sa
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
557 *port
= ntohs(sa
->sin6_port
);
558 *a
= ipa_from_in6(sa
->sin6_addr
);
560 if (ifa
&& ipa_is_link_local(*a
))
561 *ifa
= if_find_by_index(sa
->sin6_scope_id
);
565 sockaddr_read(sockaddr
*sa
, int af
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
567 if (sa
->sa
.sa_family
!= af
)
571 sockaddr_read4((struct sockaddr_in
*) sa
, a
, ifa
, port
);
572 else if (af
== AF_INET6
)
573 sockaddr_read6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
587 * IPv6 multicast syscalls
590 /* Fortunately standardized in RFC 3493 */
592 #define INIT_MREQ6(maddr,ifa) \
593 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
596 sk_setup_multicast6(sock
*s
)
598 int index
= s
->iface
->index
;
602 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_IF
, &index
, sizeof(index
)) < 0)
603 ERR("IPV6_MULTICAST_IF");
605 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
606 ERR("IPV6_MULTICAST_HOPS");
608 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_LOOP
, &n
, sizeof(n
)) < 0)
609 ERR("IPV6_MULTICAST_LOOP");
615 sk_join_group6(sock
*s
, ip_addr maddr
)
617 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
619 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_JOIN_GROUP
, &mr
, sizeof(mr
)) < 0)
620 ERR("IPV6_JOIN_GROUP");
626 sk_leave_group6(sock
*s
, ip_addr maddr
)
628 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
630 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_LEAVE_GROUP
, &mr
, sizeof(mr
)) < 0)
631 ERR("IPV6_LEAVE_GROUP");
638 * IPv6 packet control messages
641 /* Also standardized, in RFC 3542 */
644 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
645 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
646 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
647 * RFC and we use IPV6_PKTINFO.
649 #ifndef IPV6_RECVPKTINFO
650 #define IPV6_RECVPKTINFO IPV6_PKTINFO
653 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
655 #ifndef IPV6_RECVHOPLIMIT
656 #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
660 #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
661 #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
664 sk_request_cmsg6_pktinfo(sock
*s
)
668 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVPKTINFO
, &y
, sizeof(y
)) < 0)
669 ERR("IPV6_RECVPKTINFO");
675 sk_request_cmsg6_ttl(sock
*s
)
679 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVHOPLIMIT
, &y
, sizeof(y
)) < 0)
680 ERR("IPV6_RECVHOPLIMIT");
686 sk_process_cmsg6_pktinfo(sock
*s
, struct cmsghdr
*cm
)
688 if (cm
->cmsg_type
== IPV6_PKTINFO
)
690 struct in6_pktinfo
*pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
691 s
->laddr
= ipa_from_in6(pi
->ipi6_addr
);
692 s
->lifindex
= pi
->ipi6_ifindex
;
697 sk_process_cmsg6_ttl(sock
*s
, struct cmsghdr
*cm
)
699 if (cm
->cmsg_type
== IPV6_HOPLIMIT
)
700 s
->rcv_ttl
= * (int *) CMSG_DATA(cm
);
704 sk_prepare_cmsgs6(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
707 struct in6_pktinfo
*pi
;
710 msg
->msg_control
= cbuf
;
711 msg
->msg_controllen
= cbuflen
;
713 cm
= CMSG_FIRSTHDR(msg
);
714 cm
->cmsg_level
= SOL_IPV6
;
715 cm
->cmsg_type
= IPV6_PKTINFO
;
716 cm
->cmsg_len
= CMSG_LEN(sizeof(*pi
));
717 controllen
+= CMSG_SPACE(sizeof(*pi
));
719 pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
720 pi
->ipi6_ifindex
= s
->iface
? s
->iface
->index
: 0;
721 pi
->ipi6_addr
= ipa_to_in6(s
->saddr
);
723 msg
->msg_controllen
= controllen
;
728 * Miscellaneous socket syscalls
732 sk_set_ttl4(sock
*s
, int ttl
)
734 if (setsockopt(s
->fd
, SOL_IP
, IP_TTL
, &ttl
, sizeof(ttl
)) < 0)
741 sk_set_ttl6(sock
*s
, int ttl
)
743 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_UNICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
744 ERR("IPV6_UNICAST_HOPS");
750 sk_set_tos4(sock
*s
, int tos
)
752 if (setsockopt(s
->fd
, SOL_IP
, IP_TOS
, &tos
, sizeof(tos
)) < 0)
759 sk_set_tos6(sock
*s
, int tos
)
761 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_TCLASS
, &tos
, sizeof(tos
)) < 0)
769 * Public socket functions
773 * sk_setup_multicast - enable multicast for given socket
776 * Prepare transmission of multicast packets for given datagram socket.
777 * The socket must have defined @iface.
779 * Result: 0 for success, -1 for an error.
783 sk_setup_multicast(sock
*s
)
788 return sk_setup_multicast4(s
);
790 return sk_setup_multicast6(s
);
794 * sk_join_group - join multicast group for given socket
796 * @maddr: multicast address
798 * Join multicast group for given datagram socket and associated interface.
799 * The socket must have defined @iface.
801 * Result: 0 for success, -1 for an error.
805 sk_join_group(sock
*s
, ip_addr maddr
)
808 return sk_join_group4(s
, maddr
);
810 return sk_join_group6(s
, maddr
);
814 * sk_leave_group - leave multicast group for given socket
816 * @maddr: multicast address
818 * Leave multicast group for given datagram socket and associated interface.
819 * The socket must have defined @iface.
821 * Result: 0 for success, -1 for an error.
825 sk_leave_group(sock
*s
, ip_addr maddr
)
828 return sk_leave_group4(s
, maddr
);
830 return sk_leave_group6(s
, maddr
);
834 * sk_setup_broadcast - enable broadcast for given socket
837 * Allow reception and transmission of broadcast packets for given datagram
838 * socket. The socket must have defined @iface. For transmission, packets should
839 * be send to @brd address of @iface.
841 * Result: 0 for success, -1 for an error.
845 sk_setup_broadcast(sock
*s
)
849 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BROADCAST
, &y
, sizeof(y
)) < 0)
856 * sk_set_ttl - set transmit TTL for given socket
860 * Set TTL for already opened connections when TTL was not set before. Useful
861 * for accepted connections when different ones should have different TTL.
863 * Result: 0 for success, -1 for an error.
867 sk_set_ttl(sock
*s
, int ttl
)
872 return sk_set_ttl4(s
, ttl
);
874 return sk_set_ttl6(s
, ttl
);
878 * sk_set_min_ttl - set minimal accepted TTL for given socket
882 * Set minimal accepted TTL for given socket. Can be used for TTL security.
885 * Result: 0 for success, -1 for an error.
889 sk_set_min_ttl(sock
*s
, int ttl
)
892 return sk_set_min_ttl4(s
, ttl
);
894 return sk_set_min_ttl6(s
, ttl
);
899 * sk_set_md5_auth - add / remove MD5 security association for given socket
901 * @a: IP address of the other side
902 * @ifa: Interface for link-local IP address
903 * @passwd: password used for MD5 authentication
905 * In TCP MD5 handling code in kernel, there is a set of pairs (address,
906 * password) used to choose password according to address of the other side.
907 * This function is useful for listening socket, for active sockets it is enough
908 * to set s->password field.
910 * When called with passwd != NULL, the new pair is added,
911 * When called with passwd == NULL, the existing pair is removed.
913 * Result: 0 for success, -1 for an error.
917 sk_set_md5_auth(sock
*s
, ip_addr a
, struct iface
*ifa
, char *passwd
)
922 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
926 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
927 * kernel will automatically fill it for outgoing packets and check it for
928 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
929 * known to the kernel.
931 * Result: 0 for success, -1 for an error.
935 sk_set_ipv6_checksum(sock
*s
, int offset
)
937 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_CHECKSUM
, &offset
, sizeof(offset
)) < 0)
938 ERR("IPV6_CHECKSUM");
944 sk_set_icmp6_filter(sock
*s
, int p1
, int p2
)
946 /* a bit of lame interface, but it is here only for Radv */
947 struct icmp6_filter f
;
949 ICMP6_FILTER_SETBLOCKALL(&f
);
950 ICMP6_FILTER_SETPASS(p1
, &f
);
951 ICMP6_FILTER_SETPASS(p2
, &f
);
953 if (setsockopt(s
->fd
, SOL_ICMPV6
, ICMP6_FILTER
, &f
, sizeof(f
)) < 0)
960 sk_log_error(sock
*s
, const char *p
)
962 log(L_ERR
"%s: Socket error: %s%#m", p
, s
->err
);
967 * Actual struct birdsock code
970 static list sock_list
;
971 static struct birdsock
*current_sock
;
972 static struct birdsock
*stored_sock
;
973 static int sock_recalc_fdsets_p
;
978 if (!s
->n
.next
->next
)
981 return SKIP_BACK(sock
, n
, s
->n
.next
);
985 sk_alloc_bufs(sock
*s
)
987 if (!s
->rbuf
&& s
->rbsize
)
988 s
->rbuf
= s
->rbuf_alloc
= xmalloc(s
->rbsize
);
990 if (!s
->tbuf
&& s
->tbsize
)
991 s
->tbuf
= s
->tbuf_alloc
= xmalloc(s
->tbsize
);
992 s
->tpos
= s
->ttx
= s
->tbuf
;
996 sk_free_bufs(sock
*s
)
1000 xfree(s
->rbuf_alloc
);
1001 s
->rbuf
= s
->rbuf_alloc
= NULL
;
1005 xfree(s
->tbuf_alloc
);
1006 s
->tbuf
= s
->tbuf_alloc
= NULL
;
1011 sk_free(resource
*r
)
1013 sock
*s
= (sock
*) r
;
1020 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1021 if (s
->flags
& SKF_THREAD
)
1024 if (s
== current_sock
)
1025 current_sock
= sk_next(s
);
1026 if (s
== stored_sock
)
1027 stored_sock
= sk_next(s
);
1029 sock_recalc_fdsets_p
= 1;
1034 sk_set_rbsize(sock
*s
, uint val
)
1036 ASSERT(s
->rbuf_alloc
== s
->rbuf
);
1038 if (s
->rbsize
== val
)
1042 xfree(s
->rbuf_alloc
);
1043 s
->rbuf_alloc
= xmalloc(val
);
1044 s
->rpos
= s
->rbuf
= s
->rbuf_alloc
;
1048 sk_set_tbsize(sock
*s
, uint val
)
1050 ASSERT(s
->tbuf_alloc
== s
->tbuf
);
1052 if (s
->tbsize
== val
)
1055 byte
*old_tbuf
= s
->tbuf
;
1058 s
->tbuf
= s
->tbuf_alloc
= xrealloc(s
->tbuf_alloc
, val
);
1059 s
->tpos
= s
->tbuf
+ (s
->tpos
- old_tbuf
);
1060 s
->ttx
= s
->tbuf
+ (s
->ttx
- old_tbuf
);
1064 sk_set_tbuf(sock
*s
, void *tbuf
)
1066 s
->tbuf
= tbuf
?: s
->tbuf_alloc
;
1067 s
->ttx
= s
->tpos
= s
->tbuf
;
1071 sk_reallocate(sock
*s
)
1078 sk_dump(resource
*r
)
1080 sock
*s
= (sock
*) r
;
1081 static char *sk_type_names
[] = { "TCP<", "TCP>", "TCP", "UDP", NULL
, "IP", NULL
, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1083 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1084 sk_type_names
[s
->type
],
1092 s
->iface
? s
->iface
->name
: "none");
1095 static struct resclass sk_class
= {
1105 * sk_new - create a socket
1108 * This function creates a new socket resource. If you want to use it,
1109 * you need to fill in all the required fields of the structure and
1110 * call sk_open() to do the actual opening of the socket.
1112 * The real function name is sock_new(), sk_new() is a macro wrapper
1113 * to avoid collision with OpenSSL.
1118 sock
*s
= ralloc(p
, &sk_class
);
1120 // s->saddr = s->daddr = IPA_NONE;
1121 s
->tos
= s
->priority
= s
->ttl
= -1;
1132 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1138 if (ipa_nonzero(s
->saddr
) && !(s
->flags
& SKF_BIND
))
1139 s
->flags
|= SKF_PKTINFO
;
1141 #ifdef CONFIG_USE_HDRINCL
1142 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
) && (s
->flags
& SKF_PKTINFO
))
1144 s
->flags
&= ~SKF_PKTINFO
;
1145 s
->flags
|= SKF_HDRINCL
;
1146 if (setsockopt(fd
, SOL_IP
, IP_HDRINCL
, &y
, sizeof(y
)) < 0)
1153 #ifdef SO_BINDTODEVICE
1155 strcpy(ifr
.ifr_name
, s
->iface
->name
);
1156 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
1157 ERR("SO_BINDTODEVICE");
1160 #ifdef CONFIG_UNIX_DONTROUTE
1161 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_DONTROUTE
, &y
, sizeof(y
)) < 0)
1162 ERR("SO_DONTROUTE");
1166 if (s
->priority
>= 0)
1167 if (sk_set_priority(s
, s
->priority
) < 0)
1172 if (s
->flags
& SKF_LADDR_RX
)
1173 if (sk_request_cmsg4_pktinfo(s
) < 0)
1176 if (s
->flags
& SKF_TTL_RX
)
1177 if (sk_request_cmsg4_ttl(s
) < 0)
1180 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1181 if (sk_disable_mtu_disc4(s
) < 0)
1185 if (sk_set_ttl4(s
, s
->ttl
) < 0)
1189 if (sk_set_tos4(s
, s
->tos
) < 0)
1195 if (s
->flags
& SKF_V6ONLY
)
1196 if (setsockopt(fd
, SOL_IPV6
, IPV6_V6ONLY
, &y
, sizeof(y
)) < 0)
1199 if (s
->flags
& SKF_LADDR_RX
)
1200 if (sk_request_cmsg6_pktinfo(s
) < 0)
1203 if (s
->flags
& SKF_TTL_RX
)
1204 if (sk_request_cmsg6_ttl(s
) < 0)
1207 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1208 if (sk_disable_mtu_disc6(s
) < 0)
1212 if (sk_set_ttl6(s
, s
->ttl
) < 0)
1216 if (sk_set_tos6(s
, s
->tos
) < 0)
1226 add_tail(&sock_list
, &s
->n
);
1227 sock_recalc_fdsets_p
= 1;
1231 sk_tcp_connected(sock
*s
)
1234 int sa_len
= sizeof(sa
);
1236 if ((getsockname(s
->fd
, &sa
.sa
, &sa_len
) < 0) ||
1237 (sockaddr_read(&sa
, s
->af
, &s
->saddr
, &s
->iface
, &s
->sport
) < 0))
1238 log(L_WARN
"SOCK: Cannot get local IP address for TCP>");
1246 sk_passive_connected(sock
*s
, int type
)
1248 sockaddr loc_sa
, rem_sa
;
1249 int loc_sa_len
= sizeof(loc_sa
);
1250 int rem_sa_len
= sizeof(rem_sa
);
1252 int fd
= accept(s
->fd
, ((type
== SK_TCP
) ? &rem_sa
.sa
: NULL
), &rem_sa_len
);
1255 if ((errno
!= EINTR
) && (errno
!= EAGAIN
))
1256 s
->err_hook(s
, errno
);
1260 sock
*t
= sk_new(s
->pool
);
1266 t
->rbsize
= s
->rbsize
;
1267 t
->tbsize
= s
->tbsize
;
1271 if ((getsockname(fd
, &loc_sa
.sa
, &loc_sa_len
) < 0) ||
1272 (sockaddr_read(&loc_sa
, s
->af
, &t
->saddr
, &t
->iface
, &t
->sport
) < 0))
1273 log(L_WARN
"SOCK: Cannot get local IP address for TCP<");
1275 if (sockaddr_read(&rem_sa
, s
->af
, &t
->daddr
, &t
->iface
, &t
->dport
) < 0)
1276 log(L_WARN
"SOCK: Cannot get remote IP address for TCP<");
1279 if (sk_setup(t
) < 0)
1281 /* FIXME: Call err_hook instead ? */
1282 log(L_ERR
"SOCK: Incoming connection: %s%#m", t
->err
);
1284 /* FIXME: handle it better in rfree() */
1298 * sk_open - open a socket
1301 * This function takes a socket resource created by sk_new() and
1302 * initialized by the user and binds a corresponding network connection
1305 * Result: 0 for success, -1 for an error.
1314 ip_addr bind_addr
= IPA_NONE
;
1320 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1322 case SK_TCP_PASSIVE
:
1323 fd
= socket(af
, SOCK_STREAM
, IPPROTO_TCP
);
1324 bind_port
= s
->sport
;
1325 bind_addr
= s
->saddr
;
1326 do_bind
= bind_port
|| ipa_nonzero(bind_addr
);
1330 fd
= socket(af
, SOCK_DGRAM
, IPPROTO_UDP
);
1331 bind_port
= s
->sport
;
1332 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1337 fd
= socket(af
, SOCK_RAW
, s
->dport
);
1339 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1340 do_bind
= ipa_nonzero(bind_addr
);
1349 bug("sk_open() called for invalid sock type %d", s
->type
);
1358 if (sk_setup(s
) < 0)
1367 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &y
, sizeof(y
)) < 0)
1368 ERR2("SO_REUSEADDR");
1370 #ifdef CONFIG_NO_IFACE_BIND
1371 /* Workaround missing ability to bind to an iface */
1372 if ((s
->type
== SK_UDP
) && s
->iface
&& ipa_zero(bind_addr
))
1374 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &y
, sizeof(y
)) < 0)
1375 ERR2("SO_REUSEPORT");
1380 sockaddr_fill(&sa
, af
, bind_addr
, s
->iface
, bind_port
);
1381 if (bind(fd
, &sa
.sa
, SA_LEN(sa
)) < 0)
1386 if (sk_set_md5_auth(s
, s
->daddr
, s
->iface
, s
->password
) < 0)
1392 sockaddr_fill(&sa
, af
, s
->daddr
, s
->iface
, s
->dport
);
1393 if (connect(fd
, &sa
.sa
, SA_LEN(sa
)) >= 0)
1394 sk_tcp_connected(s
);
1395 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
&&
1396 errno
!= ECONNREFUSED
&& errno
!= EHOSTUNREACH
&& errno
!= ENETUNREACH
)
1400 case SK_TCP_PASSIVE
:
1401 if (listen(fd
, 8) < 0)
1412 if (!(s
->flags
& SKF_THREAD
))
1423 sk_open_unix(sock
*s
, char *name
)
1425 struct sockaddr_un sa
;
1428 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1430 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1434 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1437 /* Path length checked in test_old_bird() */
1438 sa
.sun_family
= AF_UNIX
;
1439 strcpy(sa
.sun_path
, name
);
1441 if (bind(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) < 0)
1444 if (listen(fd
, 8) < 0)
1453 #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1454 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1455 #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1458 sk_prepare_cmsgs(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
1461 sk_prepare_cmsgs4(s
, msg
, cbuf
, cbuflen
);
1463 sk_prepare_cmsgs6(s
, msg
, cbuf
, cbuflen
);
1467 sk_process_cmsgs(sock
*s
, struct msghdr
*msg
)
1471 s
->laddr
= IPA_NONE
;
1475 for (cm
= CMSG_FIRSTHDR(msg
); cm
!= NULL
; cm
= CMSG_NXTHDR(msg
, cm
))
1477 if ((cm
->cmsg_level
== SOL_IP
) && sk_is_ipv4(s
))
1479 sk_process_cmsg4_pktinfo(s
, cm
);
1480 sk_process_cmsg4_ttl(s
, cm
);
1483 if ((cm
->cmsg_level
== SOL_IPV6
) && sk_is_ipv6(s
))
1485 sk_process_cmsg6_pktinfo(s
, cm
);
1486 sk_process_cmsg6_ttl(s
, cm
);
1495 struct iovec iov
= {s
->tbuf
, s
->tpos
- s
->tbuf
};
1496 byte cmsg_buf
[CMSG_TX_SPACE
];
1499 sockaddr_fill(&dst
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1501 struct msghdr msg
= {
1502 .msg_name
= &dst
.sa
,
1503 .msg_namelen
= SA_LEN(dst
),
1508 #ifdef CONFIG_USE_HDRINCL
1510 struct iovec iov2
[2] = { {hdr
, 20}, iov
};
1512 if (s
->flags
& SKF_HDRINCL
)
1514 sk_prepare_ip_header(s
, hdr
, iov
.iov_len
);
1520 if (s
->flags
& SKF_PKTINFO
)
1521 sk_prepare_cmsgs(s
, &msg
, cmsg_buf
, sizeof(cmsg_buf
));
1523 return sendmsg(s
->fd
, &msg
, 0);
1529 struct iovec iov
= {s
->rbuf
, s
->rbsize
};
1530 byte cmsg_buf
[CMSG_RX_SPACE
];
1533 struct msghdr msg
= {
1534 .msg_name
= &src
.sa
,
1535 .msg_namelen
= sizeof(src
), // XXXX ??
1538 .msg_control
= cmsg_buf
,
1539 .msg_controllen
= sizeof(cmsg_buf
),
1543 int rv
= recvmsg(s
->fd
, &msg
, 0);
1548 // if (cf_type == SK_IP)
1549 // rv = ipv4_skip_header(pbuf, rv);
1552 sockaddr_read(&src
, s
->af
, &s
->faddr
, NULL
, &s
->fport
);
1553 sk_process_cmsgs(s
, &msg
);
1555 if (msg
.msg_flags
& MSG_TRUNC
)
1556 s
->flags
|= SKF_TRUNCATED
;
1558 s
->flags
&= ~SKF_TRUNCATED
;
1564 static inline void reset_tx_buffer(sock
*s
) { s
->ttx
= s
->tpos
= s
->tbuf
; }
1567 sk_maybe_write(sock
*s
)
1576 while (s
->ttx
!= s
->tpos
)
1578 e
= write(s
->fd
, s
->ttx
, s
->tpos
- s
->ttx
);
1582 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1585 /* EPIPE is just a connection close notification during TX */
1586 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1599 if (s
->tbuf
== s
->tpos
)
1606 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1609 s
->err_hook(s
, errno
);
1621 bug("sk_maybe_write: unknown socket type %d", s
->type
);
1626 sk_rx_ready(sock
*s
)
1629 struct timeval timo
;
1640 rv
= select(s
->fd
+1, &rd
, &wr
, NULL
, &timo
);
1642 if ((rv
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
1649 * sk_send - send data to a socket
1651 * @len: number of bytes to send
1653 * This function sends @len bytes of data prepared in the
1654 * transmit buffer of the socket @s to the network connection.
1655 * If the packet can be sent immediately, it does so and returns
1656 * 1, else it queues the packet for later processing, returns 0
1657 * and calls the @tx_hook of the socket when the tranmission
1661 sk_send(sock
*s
, unsigned len
)
1664 s
->tpos
= s
->tbuf
+ len
;
1665 return sk_maybe_write(s
);
1669 * sk_send_to - send data to a specific destination
1671 * @len: number of bytes to send
1672 * @addr: IP address to send the packet to
1673 * @port: port to send the packet to
1675 * This is a sk_send() replacement for connection-less packet sockets
1676 * which allows destination of the packet to be chosen dynamically.
1677 * Raw IP sockets should use 0 for @port.
1680 sk_send_to(sock
*s
, unsigned len
, ip_addr addr
, unsigned port
)
1687 s
->tpos
= s
->tbuf
+ len
;
1688 return sk_maybe_write(s
);
1693 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1694 ip_addr saddr, ip_addr daddr, unsigned dport)
1701 s->tpos = s->tbuf + len;
1702 return sk_maybe_write(s);
1706 /* sk_read() and sk_write() are called from BFD's event loop */
1713 case SK_TCP_PASSIVE
:
1714 return sk_passive_connected(s
, SK_TCP
);
1716 case SK_UNIX_PASSIVE
:
1717 return sk_passive_connected(s
, SK_UNIX
);
1722 int c
= read(s
->fd
, s
->rpos
, s
->rbuf
+ s
->rbsize
- s
->rpos
);
1726 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1727 s
->err_hook(s
, errno
);
1734 if (s
->rx_hook(s
, s
->rpos
- s
->rbuf
))
1736 /* We need to be careful since the socket could have been deleted by the hook */
1737 if (current_sock
== s
)
1746 return s
->rx_hook(s
, 0);
1750 int e
= sk_recvmsg(s
);
1754 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1755 s
->err_hook(s
, errno
);
1759 s
->rpos
= s
->rbuf
+ e
;
1774 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1776 if (connect(s
->fd
, &sa
.sa
, SA_LEN(sa
)) >= 0 || errno
== EISCONN
)
1777 sk_tcp_connected(s
);
1778 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
)
1779 s
->err_hook(s
, errno
);
1784 if (s
->ttx
!= s
->tpos
&& sk_maybe_write(s
) > 0)
1800 debug("Open sockets:\n");
1801 WALK_LIST(n
, sock_list
)
1803 s
= SKIP_BACK(sock
, n
, n
);
1815 volatile int async_config_flag
; /* Asynchronous reconfiguration/dump scheduled */
1816 volatile int async_dump_flag
;
1821 init_list(&near_timers
);
1822 init_list(&far_timers
);
1823 init_list(&sock_list
);
1824 init_list(&global_event_list
);
1829 srandom((int) now_real
);
1832 static int short_loops
= 0;
1833 #define SHORT_LOOP_MAX 10
1839 struct timeval timo
;
1845 sock_recalc_fdsets_p
= 1;
1848 events
= ev_run_list(&global_event_list
);
1850 tout
= tm_first_shot();
1856 timo
.tv_sec
= events
? 0 : MIN(tout
- now
, 3);
1859 if (sock_recalc_fdsets_p
)
1861 sock_recalc_fdsets_p
= 0;
1867 WALK_LIST(n
, sock_list
)
1869 s
= SKIP_BACK(sock
, n
, n
);
1878 if (s
->tx_hook
&& s
->ttx
!= s
->tpos
)
1889 * Yes, this is racy. But even if the signal comes before this test
1890 * and entering select(), it gets caught on the next timer tick.
1893 if (async_config_flag
)
1896 async_config_flag
= 0;
1899 if (async_dump_flag
)
1902 async_dump_flag
= 0;
1905 if (async_shutdown_flag
)
1908 async_shutdown_flag
= 0;
1912 /* And finally enter select() to find active sockets */
1913 hi
= select(hi
+1, &rd
, &wr
, NULL
, &timo
);
1917 if (errno
== EINTR
|| errno
== EAGAIN
)
1923 /* guaranteed to be non-empty */
1924 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
1926 while (current_sock
)
1928 sock
*s
= current_sock
;
1933 if ((s
->type
>= SK_MAGIC
) && FD_ISSET(s
->fd
, &rd
) && s
->rx_hook
)
1938 if (s
!= current_sock
)
1941 while (e
&& s
->rx_hook
&& steps
);
1944 if (FD_ISSET(s
->fd
, &wr
))
1949 if (s
!= current_sock
)
1953 current_sock
= sk_next(s
);
1958 if (events
&& (short_loops
< SHORT_LOOP_MAX
))
1963 current_sock
= stored_sock
;
1964 if (current_sock
== NULL
)
1965 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
1967 while (current_sock
&& count
< MAX_RX_STEPS
)
1969 sock
*s
= current_sock
;
1972 if ((s
->type
< SK_MAGIC
) && FD_ISSET(s
->fd
, &rd
) && s
->rx_hook
)
1976 if (s
!= current_sock
)
1979 current_sock
= sk_next(s
);
1983 stored_sock
= current_sock
;
1989 test_old_bird(char *path
)
1992 struct sockaddr_un sa
;
1994 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1996 die("Cannot create socket: %m");
1997 if (strlen(path
) >= sizeof(sa
.sun_path
))
1998 die("Socket path too long");
1999 bzero(&sa
, sizeof(sa
));
2000 sa
.sun_family
= AF_UNIX
;
2001 strcpy(sa
.sun_path
, path
);
2002 if (connect(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) == 0)
2003 die("I found another BIRD running.");