2 * BIRD Internet Routing Daemon -- Unix I/O
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
7 * Can be freely distributed and used under the terms of the GNU GPL.
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
18 #include <sys/types.h>
19 #include <sys/socket.h>
20 #include <sys/fcntl.h>
25 #include <netinet/in.h>
26 #include <netinet/icmp6.h>
28 #include "nest/bird.h"
29 #include "lib/lists.h"
30 #include "lib/resource.h"
31 #include "lib/timer.h"
32 #include "lib/socket.h"
33 #include "lib/event.h"
34 #include "lib/string.h"
35 #include "nest/iface.h"
38 #include "lib/sysio.h"
40 /* Maximum number of calls of tx handler for one socket in one
41 * select iteration. Should be small enough to not monopolize CPU by
42 * one protocol instance.
46 /* Maximum number of calls of rx handler for all sockets in one select
47 iteration. RX callbacks are often much more costly so we limit
48 this to gen small latencies */
49 #define MAX_RX_STEPS 4
63 struct rfile
*a
= (struct rfile
*) r
;
71 struct rfile
*a
= (struct rfile
*) r
;
73 debug("(FILE *%p)\n", a
->f
);
76 static struct resclass rf_class
= {
86 tracked_fopen(pool
*p
, char *name
, char *mode
)
88 FILE *f
= fopen(name
, mode
);
92 struct rfile
*r
= ralloc(p
, &rf_class
);
101 * Timers are resources which represent a wish of a module to call
102 * a function at the specified time. The platform dependent code
103 * doesn't guarantee exact timing, only that a timer function
104 * won't be called before the requested time.
106 * In BIRD, time is represented by values of the &bird_clock_t type
107 * which are integral numbers interpreted as a relative number of seconds since
108 * some fixed time point in past. The current time can be read
109 * from variable @now with reasonable accuracy and is monotonic. There is also
110 * a current 'absolute' time in variable @now_real reported by OS.
112 * Each timer is described by a &timer structure containing a pointer
113 * to the handler function (@hook), data private to this function (@data),
114 * time the function should be called at (@expires, 0 for inactive timers),
115 * for the other fields see |timer.h|.
118 #define NEAR_TIMER_LIMIT 4
120 static list near_timers
, far_timers
;
121 static bird_clock_t first_far_timer
= TIME_INFINITY
;
123 /* now must be different from 0, because 0 is a special value in timer->expires */
124 bird_clock_t now
= 1, now_real
;
127 update_times_plain(void)
129 bird_clock_t new_time
= time(NULL
);
130 int delta
= new_time
- now_real
;
132 if ((delta
>= 0) && (delta
< 60))
134 else if (now_real
!= 0)
135 log(L_WARN
"Time jump, delta %d s", delta
);
141 update_times_gettime(void)
146 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
148 die("clock_gettime: %m");
150 if (ts
.tv_sec
!= now
) {
152 log(L_ERR
"Monotonic timer is broken");
155 now_real
= time(NULL
);
159 static int clock_monotonic_available
;
164 if (clock_monotonic_available
)
165 update_times_gettime();
167 update_times_plain();
174 clock_monotonic_available
= (clock_gettime(CLOCK_MONOTONIC
, &ts
) == 0);
175 if (!clock_monotonic_available
)
176 log(L_WARN
"Monotonic timer is missing");
183 timer
*t
= (timer
*) r
;
191 timer
*t
= (timer
*) r
;
193 debug("(code %p, data %p, ", t
->hook
, t
->data
);
195 debug("rand %d, ", t
->randomize
);
197 debug("recur %d, ", t
->recurrent
);
199 debug("expires in %d sec)\n", t
->expires
- now
);
201 debug("inactive)\n");
204 static struct resclass tm_class
= {
214 * tm_new - create a timer
217 * This function creates a new timer resource and returns
218 * a pointer to it. To use the timer, you need to fill in
219 * the structure fields and call tm_start() to start timing.
224 timer
*t
= ralloc(p
, &tm_class
);
229 tm_insert_near(timer
*t
)
231 node
*n
= HEAD(near_timers
);
233 while (n
->next
&& (SKIP_BACK(timer
, n
, n
)->expires
< t
->expires
))
235 insert_node(&t
->n
, n
->prev
);
239 * tm_start - start a timer
241 * @after: number of seconds the timer should be run after
243 * This function schedules the hook function of the timer to
244 * be called after @after seconds. If the timer has been already
245 * started, it's @expire time is replaced by the new value.
247 * You can have set the @randomize field of @t, the timeout
248 * will be increased by a random number of seconds chosen
249 * uniformly from range 0 .. @randomize.
251 * You can call tm_start() from the handler function of the timer
252 * to request another run of the timer. Also, you can set the @recurrent
253 * field to have the timer re-added automatically with the same timeout.
256 tm_start(timer
*t
, unsigned after
)
261 after
+= random() % (t
->randomize
+ 1);
263 if (t
->expires
== when
)
268 if (after
<= NEAR_TIMER_LIMIT
)
272 if (!first_far_timer
|| first_far_timer
> when
)
273 first_far_timer
= when
;
274 add_tail(&far_timers
, &t
->n
);
279 * tm_stop - stop a timer
282 * This function stops a timer. If the timer is already stopped,
296 tm_dump_them(char *name
, list
*l
)
301 debug("%s timers:\n", name
);
304 t
= SKIP_BACK(timer
, n
, n
);
314 tm_dump_them("Near", &near_timers
);
315 tm_dump_them("Far", &far_timers
);
321 time_t x
= first_far_timer
;
323 if (!EMPTY_LIST(near_timers
))
325 timer
*t
= SKIP_BACK(timer
, n
, HEAD(near_timers
));
338 if (first_far_timer
<= now
)
340 bird_clock_t limit
= now
+ NEAR_TIMER_LIMIT
;
341 first_far_timer
= TIME_INFINITY
;
342 n
= HEAD(far_timers
);
345 t
= SKIP_BACK(timer
, n
, n
);
346 if (t
->expires
<= limit
)
351 else if (t
->expires
< first_far_timer
)
352 first_far_timer
= t
->expires
;
356 while ((n
= HEAD(near_timers
)) -> next
)
359 t
= SKIP_BACK(timer
, n
, n
);
360 if (t
->expires
> now
)
363 delay
= t
->expires
- now
;
367 int i
= t
->recurrent
- delay
;
377 * tm_parse_datetime - parse a date and time
378 * @x: datetime string
380 * tm_parse_datetime() takes a textual representation of
381 * a date and time (dd-mm-yyyy hh:mm:ss)
382 * and converts it to the corresponding value of type &bird_clock_t.
385 tm_parse_datetime(char *x
)
391 if (sscanf(x
, "%d-%d-%d %d:%d:%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &tm
.tm_hour
, &tm
.tm_min
, &tm
.tm_sec
, &n
) != 6 || x
[n
])
392 return tm_parse_date(x
);
396 if (t
== (time_t) -1)
401 * tm_parse_date - parse a date
404 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
405 * and converts it to the corresponding value of type &bird_clock_t.
408 tm_parse_date(char *x
)
414 if (sscanf(x
, "%d-%d-%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &n
) != 3 || x
[n
])
418 tm
.tm_hour
= tm
.tm_min
= tm
.tm_sec
= 0;
420 if (t
== (time_t) -1)
426 tm_format_reltime(char *x
, struct tm
*tm
, bird_clock_t delta
)
428 static char *month_names
[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
429 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
432 bsprintf(x
, "%02d:%02d", tm
->tm_hour
, tm
->tm_min
);
433 else if (delta
< 360*86400)
434 bsprintf(x
, "%s%02d", month_names
[tm
->tm_mon
], tm
->tm_mday
);
436 bsprintf(x
, "%d", tm
->tm_year
+1900);
439 #include "conf/conf.h"
442 * tm_format_datetime - convert date and time to textual representation
443 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
446 * This function formats the given relative time value @t to a textual
447 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
450 tm_format_datetime(char *x
, struct timeformat
*fmt_spec
, bird_clock_t t
)
452 const char *fmt_used
;
454 bird_clock_t delta
= now
- t
;
455 t
= now_real
- delta
;
458 if (fmt_spec
->fmt1
== NULL
)
459 return tm_format_reltime(x
, tm
, delta
);
461 if ((fmt_spec
->limit
== 0) || (delta
< fmt_spec
->limit
))
462 fmt_used
= fmt_spec
->fmt1
;
464 fmt_used
= fmt_spec
->fmt2
;
466 int rv
= strftime(x
, TM_DATETIME_BUFFER_SIZE
, fmt_used
, tm
);
467 if (((rv
== 0) && fmt_used
[0]) || (rv
== TM_DATETIME_BUFFER_SIZE
))
468 strcpy(x
, "<too-long>");
474 * Socket resources represent network connections. Their data structure (&socket)
475 * contains a lot of fields defining the exact type of the socket, the local and
476 * remote addresses and ports, pointers to socket buffers and finally pointers to
477 * hook functions to be called when new data have arrived to the receive buffer
478 * (@rx_hook), when the contents of the transmit buffer have been transmitted
479 * (@tx_hook) and when an error or connection close occurs (@err_hook).
481 * Freeing of sockets from inside socket hooks is perfectly safe.
485 #define SOL_IP IPPROTO_IP
489 #define SOL_IPV6 IPPROTO_IPV6
492 static list sock_list
;
493 static struct birdsock
*current_sock
;
494 static struct birdsock
*stored_sock
;
495 static int sock_recalc_fdsets_p
;
500 if (!s
->n
.next
->next
)
503 return SKIP_BACK(sock
, n
, s
->n
.next
);
507 sk_alloc_bufs(sock
*s
)
509 if (!s
->rbuf
&& s
->rbsize
)
510 s
->rbuf
= s
->rbuf_alloc
= xmalloc(s
->rbsize
);
512 if (!s
->tbuf
&& s
->tbsize
)
513 s
->tbuf
= s
->tbuf_alloc
= xmalloc(s
->tbsize
);
514 s
->tpos
= s
->ttx
= s
->tbuf
;
518 sk_free_bufs(sock
*s
)
522 xfree(s
->rbuf_alloc
);
523 s
->rbuf
= s
->rbuf_alloc
= NULL
;
527 xfree(s
->tbuf_alloc
);
528 s
->tbuf
= s
->tbuf_alloc
= NULL
;
535 sock
*s
= (sock
*) r
;
541 if (s
== current_sock
)
542 current_sock
= sk_next(s
);
543 if (s
== stored_sock
)
544 stored_sock
= sk_next(s
);
546 sock_recalc_fdsets_p
= 1;
551 sk_reallocate(sock
*s
)
560 sock
*s
= (sock
*) r
;
561 static char *sk_type_names
[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
563 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
564 sk_type_names
[s
->type
],
572 s
->iface
? s
->iface
->name
: "none");
575 static struct resclass sk_class
= {
585 * sk_new - create a socket
588 * This function creates a new socket resource. If you want to use it,
589 * you need to fill in all the required fields of the structure and
590 * call sk_open() to do the actual opening of the socket.
595 sock
*s
= ralloc(p
, &sk_class
);
597 // s->saddr = s->daddr = IPA_NONE;
598 s
->tos
= s
->ttl
= -1;
606 add_tail(&sock_list
, &s
->n
);
607 sock_recalc_fdsets_p
= 1;
613 fill_in_sockaddr(struct sockaddr_in6
*sa
, ip_addr a
, struct iface
*ifa
, unsigned port
)
615 memset(sa
, 0, sizeof (struct sockaddr_in6
));
616 sa
->sin6_family
= AF_INET6
;
617 sa
->sin6_port
= htons(port
);
618 sa
->sin6_flowinfo
= 0;
620 sa
->sin6_len
= sizeof(struct sockaddr_in6
);
622 set_inaddr(&sa
->sin6_addr
, a
);
624 if (ifa
&& ipa_has_link_scope(a
))
625 sa
->sin6_scope_id
= ifa
->index
;
629 get_sockaddr(struct sockaddr_in6
*sa
, ip_addr
*a
, struct iface
**ifa
, unsigned *port
, int check
)
631 if (check
&& sa
->sin6_family
!= AF_INET6
)
632 bug("get_sockaddr called for wrong address family (%d)", sa
->sin6_family
);
634 *port
= ntohs(sa
->sin6_port
);
635 memcpy(a
, &sa
->sin6_addr
, sizeof(*a
));
638 if (ifa
&& ipa_has_link_scope(*a
))
639 *ifa
= if_find_by_index(sa
->sin6_scope_id
);
645 fill_in_sockaddr(struct sockaddr_in
*sa
, ip_addr a
, struct iface
*ifa
, unsigned port
)
647 memset (sa
, 0, sizeof (struct sockaddr_in
));
648 sa
->sin_family
= AF_INET
;
649 sa
->sin_port
= htons(port
);
651 sa
->sin_len
= sizeof(struct sockaddr_in
);
653 set_inaddr(&sa
->sin_addr
, a
);
657 get_sockaddr(struct sockaddr_in
*sa
, ip_addr
*a
, struct iface
**ifa
, unsigned *port
, int check
)
659 if (check
&& sa
->sin_family
!= AF_INET
)
660 bug("get_sockaddr called for wrong address family (%d)", sa
->sin_family
);
662 *port
= ntohs(sa
->sin_port
);
663 memcpy(a
, &sa
->sin_addr
.s_addr
, sizeof(*a
));
672 /* PKTINFO handling is also standardized in IPv6 */
673 #define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
674 #define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
677 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
678 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
679 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
680 * RFC and we use IPV6_PKTINFO.
682 #ifndef IPV6_RECVPKTINFO
683 #define IPV6_RECVPKTINFO IPV6_PKTINFO
687 sysio_register_cmsgs(sock
*s
)
690 if ((s
->flags
& SKF_LADDR_RX
) &&
691 setsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, &ok
, sizeof(ok
)) < 0)
692 return "IPV6_RECVPKTINFO";
698 sysio_process_rx_cmsgs(sock
*s
, struct msghdr
*msg
)
701 struct in6_pktinfo
*pi
= NULL
;
703 if (!(s
->flags
& SKF_LADDR_RX
))
706 for (cm
= CMSG_FIRSTHDR(msg
); cm
!= NULL
; cm
= CMSG_NXTHDR(msg
, cm
))
708 if (cm
->cmsg_level
== IPPROTO_IPV6
&& cm
->cmsg_type
== IPV6_PKTINFO
)
709 pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
719 get_inaddr(&s
->laddr
, &pi
->ipi6_addr
);
720 s
->lifindex
= pi
->ipi6_ifindex
;
726 sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
729 struct in6_pktinfo *pi;
731 if (!(s->flags & SKF_LADDR_TX))
734 msg->msg_control = cbuf;
735 msg->msg_controllen = cbuflen;
737 cm = CMSG_FIRSTHDR(msg);
738 cm->cmsg_level = IPPROTO_IPV6;
739 cm->cmsg_type = IPV6_PKTINFO;
740 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
742 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
743 set_inaddr(&pi->ipi6_addr, s->saddr);
744 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
746 msg->msg_controllen = cm->cmsg_len;
753 sk_set_ttl_int(sock
*s
)
756 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_UNICAST_HOPS
, &s
->ttl
, sizeof(s
->ttl
)) < 0)
757 return "IPV6_UNICAST_HOPS";
759 if (setsockopt(s
->fd
, SOL_IP
, IP_TTL
, &s
->ttl
, sizeof(s
->ttl
)) < 0)
761 #ifdef CONFIG_UNIX_DONTROUTE
763 if (s
->ttl
== 1 && setsockopt(s
->fd
, SOL_SOCKET
, SO_DONTROUTE
, &one
, sizeof(one
)) < 0)
764 return "SO_DONTROUTE";
770 #define ERR(x) do { err = x; goto bad; } while(0)
771 #define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
779 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
780 ERR("fcntl(O_NONBLOCK)");
781 if (s
->type
== SK_UNIX
)
784 if ((s
->tos
>= 0) && setsockopt(fd
, SOL_IP
, IP_TOS
, &s
->tos
, sizeof(s
->tos
)) < 0)
790 if ((s
->flags
& SKF_V6ONLY
) && setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &v
, sizeof(v
)) < 0)
795 err
= sk_set_ttl_int(s
);
797 sysio_register_cmsgs(s
);
803 * sk_set_ttl - set transmit TTL for given socket.
807 * Set TTL for already opened connections when TTL was not set before.
808 * Useful for accepted connections when different ones should have
811 * Result: 0 for success, -1 for an error.
815 sk_set_ttl(sock
*s
, int ttl
)
820 if (err
= sk_set_ttl_int(s
))
821 log(L_ERR
"sk_set_ttl: %s: %m", err
);
823 return (err
? -1 : 0);
827 * sk_set_min_ttl - set minimal accepted TTL for given socket.
831 * Can be used in TTL security implementation
833 * Result: 0 for success, -1 for an error.
837 sk_set_min_ttl(sock
*s
, int ttl
)
841 err
= sk_set_min_ttl6(s
, ttl
);
843 err
= sk_set_min_ttl4(s
, ttl
);
850 * sk_set_md5_auth - add / remove MD5 security association for given socket.
852 * @a: IP address of the other side
853 * @ifa: Interface for link-local IP address
854 * @passwd: password used for MD5 authentication
856 * In TCP MD5 handling code in kernel, there is a set of pairs
857 * (address, password) used to choose password according to
858 * address of the other side. This function is useful for
859 * listening socket, for active sockets it is enough to set
862 * When called with passwd != NULL, the new pair is added,
863 * When called with passwd == NULL, the existing pair is removed.
865 * Result: 0 for success, -1 for an error.
869 sk_set_md5_auth(sock
*s
, ip_addr a
, struct iface
*ifa
, char *passwd
)
872 fill_in_sockaddr(&sa
, a
, ifa
, 0);
873 return sk_set_md5_auth_int(s
, &sa
, passwd
);
877 sk_set_broadcast(sock
*s
, int enable
)
879 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BROADCAST
, &enable
, sizeof(enable
)) < 0)
881 log(L_ERR
"sk_set_broadcast: SO_BROADCAST: %m");
892 sk_set_ipv6_checksum(sock
*s
, int offset
)
894 if (setsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_CHECKSUM
, &offset
, sizeof(offset
)) < 0)
896 log(L_ERR
"sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
904 sk_set_icmp_filter(sock
*s
, int p1
, int p2
)
906 /* a bit of lame interface, but it is here only for Radv */
907 struct icmp6_filter f
;
909 ICMP6_FILTER_SETBLOCKALL(&f
);
910 ICMP6_FILTER_SETPASS(p1
, &f
);
911 ICMP6_FILTER_SETPASS(p2
, &f
);
913 if (setsockopt(s
->fd
, IPPROTO_ICMPV6
, ICMP6_FILTER
, &f
, sizeof(f
)) < 0)
915 log(L_ERR
"sk_setup_icmp_filter: ICMP6_FILTER: %m");
923 sk_setup_multicast(sock
*s
)
929 ASSERT(s
->iface
&& s
->iface
->addr
);
931 index
= s
->iface
->index
;
932 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_HOPS
, &s
->ttl
, sizeof(s
->ttl
)) < 0)
933 ERR("IPV6_MULTICAST_HOPS");
934 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_LOOP
, &zero
, sizeof(zero
)) < 0)
935 ERR("IPV6_MULTICAST_LOOP");
936 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_IF
, &index
, sizeof(index
)) < 0)
937 ERR("IPV6_MULTICAST_IF");
939 if (err
= sysio_bind_to_iface(s
))
945 log(L_ERR
"sk_setup_multicast: %s: %m", err
);
950 sk_join_group(sock
*s
, ip_addr maddr
)
952 struct ipv6_mreq mreq
;
954 set_inaddr(&mreq
.ipv6mr_multiaddr
, maddr
);
956 #ifdef CONFIG_IPV6_GLIBC_20
957 mreq
.ipv6mr_ifindex
= s
->iface
->index
;
959 mreq
.ipv6mr_interface
= s
->iface
->index
;
962 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_JOIN_GROUP
, &mreq
, sizeof(mreq
)) < 0)
964 log(L_ERR
"sk_join_group: IPV6_JOIN_GROUP: %m");
972 sk_leave_group(sock
*s
, ip_addr maddr
)
974 struct ipv6_mreq mreq
;
976 set_inaddr(&mreq
.ipv6mr_multiaddr
, maddr
);
978 #ifdef CONFIG_IPV6_GLIBC_20
979 mreq
.ipv6mr_ifindex
= s
->iface
->index
;
981 mreq
.ipv6mr_interface
= s
->iface
->index
;
984 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_LEAVE_GROUP
, &mreq
, sizeof(mreq
)) < 0)
986 log(L_ERR
"sk_leave_group: IPV6_LEAVE_GROUP: %m");
996 sk_setup_multicast(sock
*s
)
1000 ASSERT(s
->iface
&& s
->iface
->addr
);
1002 if (err
= sysio_setup_multicast(s
))
1004 log(L_ERR
"sk_setup_multicast: %s: %m", err
);
1012 sk_join_group(sock
*s
, ip_addr maddr
)
1016 if (err
= sysio_join_group(s
, maddr
))
1018 log(L_ERR
"sk_join_group: %s: %m", err
);
1026 sk_leave_group(sock
*s
, ip_addr maddr
)
1030 if (err
= sysio_leave_group(s
, maddr
))
1032 log(L_ERR
"sk_leave_group: %s: %m", err
);
1043 sk_tcp_connected(sock
*s
)
1046 int lsa_len
= sizeof(lsa
);
1047 if (getsockname(s
->fd
, (struct sockaddr
*) &lsa
, &lsa_len
) == 0)
1048 get_sockaddr(&lsa
, &s
->saddr
, &s
->iface
, &s
->sport
, 1);
1056 sk_passive_connected(sock
*s
, struct sockaddr
*sa
, int al
, int type
)
1058 int fd
= accept(s
->fd
, sa
, &al
);
1061 sock
*t
= sk_new(s
->pool
);
1067 t
->rbsize
= s
->rbsize
;
1068 t
->tbsize
= s
->tbsize
;
1072 int lsa_len
= sizeof(lsa
);
1073 if (getsockname(fd
, (struct sockaddr
*) &lsa
, &lsa_len
) == 0)
1074 get_sockaddr(&lsa
, &t
->saddr
, &t
->iface
, &t
->sport
, 1);
1076 get_sockaddr((sockaddr
*) sa
, &t
->daddr
, &t
->iface
, &t
->dport
, 1);
1079 if (err
= sk_setup(t
))
1081 log(L_ERR
"Incoming connection: %s: %m", err
);
1089 else if (errno
!= EINTR
&& errno
!= EAGAIN
)
1091 s
->err_hook(s
, errno
);
1097 * sk_open - open a socket
1100 * This function takes a socket resource created by sk_new() and
1101 * initialized by the user and binds a corresponding network connection
1104 * Result: 0 for success, -1 for an error.
1113 int has_src
= ipa_nonzero(s
->saddr
) || s
->sport
;
1119 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1121 case SK_TCP_PASSIVE
:
1122 fd
= socket(BIRD_PF
, SOCK_STREAM
, IPPROTO_TCP
);
1125 fd
= socket(BIRD_PF
, SOCK_DGRAM
, IPPROTO_UDP
);
1128 fd
= socket(BIRD_PF
, SOCK_RAW
, s
->dport
);
1134 bug("sk_open() called for invalid sock type %d", type
);
1137 die("sk_open: socket: %m");
1140 if (err
= sk_setup(s
))
1152 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &one
, sizeof(one
)) < 0)
1153 ERR("SO_REUSEADDR");
1155 fill_in_sockaddr(&sa
, s
->saddr
, s
->iface
, port
);
1156 if (bind(fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) < 0)
1159 fill_in_sockaddr(&sa
, s
->daddr
, s
->iface
, s
->dport
);
1163 int rv
= sk_set_md5_auth_int(s
, &sa
, s
->password
);
1171 if (connect(fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) >= 0)
1172 sk_tcp_connected(s
);
1173 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
&&
1174 errno
!= ECONNREFUSED
&& errno
!= EHOSTUNREACH
&& errno
!= ENETUNREACH
)
1177 case SK_TCP_PASSIVE
:
1186 #ifdef IPV6_MTU_DISCOVER
1188 int dont
= IPV6_PMTUDISC_DONT
;
1189 if (setsockopt(fd
, SOL_IPV6
, IPV6_MTU_DISCOVER
, &dont
, sizeof(dont
)) < 0)
1190 ERR("IPV6_MTU_DISCOVER");
1196 int dont
= IP_PMTUDISC_DONT
;
1197 if (setsockopt(fd
, SOL_IP
, IP_PMTUDISC
, &dont
, sizeof(dont
)) < 0)
1208 log(L_ERR
"sk_open: %s: %m", err
);
1216 sk_open_unix(sock
*s
, char *name
)
1219 struct sockaddr_un sa
;
1222 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1226 if (err
= sk_setup(s
))
1230 /* Path length checked in test_old_bird() */
1231 sa
.sun_family
= AF_UNIX
;
1232 strcpy(sa
.sun_path
, name
);
1233 if (bind(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) < 0)
1241 log(L_ERR
"sk_open_unix: %s: %m", err
);
1242 die("Unable to create control socket %s", name
);
1245 static inline void reset_tx_buffer(sock
*s
) { s
->ttx
= s
->tpos
= s
->tbuf
; }
1248 sk_maybe_write(sock
*s
)
1257 while (s
->ttx
!= s
->tpos
)
1259 e
= write(s
->fd
, s
->ttx
, s
->tpos
- s
->ttx
);
1262 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1265 /* EPIPE is just a connection close notification during TX */
1266 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1278 if (s
->tbuf
== s
->tpos
)
1282 fill_in_sockaddr(&sa
, s
->daddr
, s
->iface
, s
->dport
);
1284 struct iovec iov
= {s
->tbuf
, s
->tpos
- s
->tbuf
};
1285 // byte cmsg_buf[CMSG_TX_SPACE];
1287 struct msghdr msg
= {
1289 .msg_namelen
= sizeof(sa
),
1293 // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1294 e
= sendmsg(s
->fd
, &msg
, 0);
1298 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1301 s
->err_hook(s
, errno
);
1310 bug("sk_maybe_write: unknown socket type %d", s
->type
);
1315 sk_rx_ready(sock
*s
)
1318 struct timeval timo
;
1329 rv
= select(s
->fd
+1, &rd
, &wr
, NULL
, &timo
);
1331 if ((rv
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
1338 * sk_send - send data to a socket
1340 * @len: number of bytes to send
1342 * This function sends @len bytes of data prepared in the
1343 * transmit buffer of the socket @s to the network connection.
1344 * If the packet can be sent immediately, it does so and returns
1345 * 1, else it queues the packet for later processing, returns 0
1346 * and calls the @tx_hook of the socket when the tranmission
1350 sk_send(sock
*s
, unsigned len
)
1353 s
->tpos
= s
->tbuf
+ len
;
1354 return sk_maybe_write(s
);
1358 * sk_send_to - send data to a specific destination
1360 * @len: number of bytes to send
1361 * @addr: IP address to send the packet to
1362 * @port: port to send the packet to
1364 * This is a sk_send() replacement for connection-less packet sockets
1365 * which allows destination of the packet to be chosen dynamically.
1368 sk_send_to(sock
*s
, unsigned len
, ip_addr addr
, unsigned port
)
1373 s
->tpos
= s
->tbuf
+ len
;
1374 return sk_maybe_write(s
);
1379 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1380 ip_addr saddr, ip_addr daddr, unsigned dport)
1387 s->tpos = s->tbuf + len;
1388 return sk_maybe_write(s);
1397 case SK_TCP_PASSIVE
:
1400 return sk_passive_connected(s
, (struct sockaddr
*) &sa
, sizeof(sa
), SK_TCP
);
1402 case SK_UNIX_PASSIVE
:
1404 struct sockaddr_un sa
;
1405 return sk_passive_connected(s
, (struct sockaddr
*) &sa
, sizeof(sa
), SK_UNIX
);
1410 int c
= read(s
->fd
, s
->rpos
, s
->rbuf
+ s
->rbsize
- s
->rpos
);
1414 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1415 s
->err_hook(s
, errno
);
1422 if (s
->rx_hook(s
, s
->rpos
- s
->rbuf
))
1424 /* We need to be careful since the socket could have been deleted by the hook */
1425 if (current_sock
== s
)
1433 return s
->rx_hook(s
, 0);
1439 struct iovec iov
= {s
->rbuf
, s
->rbsize
};
1440 byte cmsg_buf
[CMSG_RX_SPACE
];
1442 struct msghdr msg
= {
1444 .msg_namelen
= sizeof(sa
),
1447 .msg_control
= cmsg_buf
,
1448 .msg_controllen
= sizeof(cmsg_buf
),
1451 e
= recvmsg(s
->fd
, &msg
, 0);
1455 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1456 s
->err_hook(s
, errno
);
1459 s
->rpos
= s
->rbuf
+ e
;
1460 get_sockaddr(&sa
, &s
->faddr
, NULL
, &s
->fport
, 1);
1461 sysio_process_rx_cmsgs(s
, &msg
);
1477 fill_in_sockaddr(&sa
, s
->daddr
, s
->iface
, s
->dport
);
1478 if (connect(s
->fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) >= 0 || errno
== EISCONN
)
1479 sk_tcp_connected(s
);
1480 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
)
1481 s
->err_hook(s
, errno
);
1485 if (s
->ttx
!= s
->tpos
&& sk_maybe_write(s
) > 0)
1500 debug("Open sockets:\n");
1501 WALK_LIST(n
, sock_list
)
1503 s
= SKIP_BACK(sock
, n
, n
);
1517 volatile int async_config_flag
; /* Asynchronous reconfiguration/dump scheduled */
1518 volatile int async_dump_flag
;
1523 init_list(&near_timers
);
1524 init_list(&far_timers
);
1525 init_list(&sock_list
);
1526 init_list(&global_event_list
);
1530 srandom((int) now_real
);
1533 static int short_loops
= 0;
1534 #define SHORT_LOOP_MAX 10
1540 struct timeval timo
;
1546 sock_recalc_fdsets_p
= 1;
1549 events
= ev_run_list(&global_event_list
);
1551 tout
= tm_first_shot();
1557 timo
.tv_sec
= events
? 0 : tout
- now
;
1560 if (sock_recalc_fdsets_p
)
1562 sock_recalc_fdsets_p
= 0;
1568 WALK_LIST(n
, sock_list
)
1570 s
= SKIP_BACK(sock
, n
, n
);
1579 if (s
->tx_hook
&& s
->ttx
!= s
->tpos
)
1590 * Yes, this is racy. But even if the signal comes before this test
1591 * and entering select(), it gets caught on the next timer tick.
1594 if (async_config_flag
)
1597 async_config_flag
= 0;
1600 if (async_dump_flag
)
1603 async_dump_flag
= 0;
1606 if (async_shutdown_flag
)
1609 async_shutdown_flag
= 0;
1613 /* And finally enter select() to find active sockets */
1614 hi
= select(hi
+1, &rd
, &wr
, NULL
, &timo
);
1618 if (errno
== EINTR
|| errno
== EAGAIN
)
1624 /* guaranteed to be non-empty */
1625 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
1627 while (current_sock
)
1629 sock
*s
= current_sock
;
1634 if ((s
->type
>= SK_MAGIC
) && FD_ISSET(s
->fd
, &rd
) && s
->rx_hook
)
1639 if (s
!= current_sock
)
1642 while (e
&& s
->rx_hook
&& steps
);
1645 if (FD_ISSET(s
->fd
, &wr
))
1650 if (s
!= current_sock
)
1654 current_sock
= sk_next(s
);
1659 if (events
&& (short_loops
< SHORT_LOOP_MAX
))
1664 current_sock
= stored_sock
;
1665 if (current_sock
== NULL
)
1666 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
1668 while (current_sock
&& count
< MAX_RX_STEPS
)
1670 sock
*s
= current_sock
;
1673 if ((s
->type
< SK_MAGIC
) && FD_ISSET(s
->fd
, &rd
) && s
->rx_hook
)
1677 if (s
!= current_sock
)
1680 current_sock
= sk_next(s
);
1684 stored_sock
= current_sock
;
1690 test_old_bird(char *path
)
1693 struct sockaddr_un sa
;
1695 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1697 die("Cannot create socket: %m");
1698 if (strlen(path
) >= sizeof(sa
.sun_path
))
1699 die("Socket path too long");
1700 bzero(&sa
, sizeof(sa
));
1701 sa
.sun_family
= AF_UNIX
;
1702 strcpy(sa
.sun_path
, path
);
1703 if (connect(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) == 0)
1704 die("I found another BIRD running.");