2 * BIRD Internet Routing Daemon -- Unix I/O
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
7 * Can be freely distributed and used under the terms of the GNU GPL.
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
18 #include <sys/types.h>
19 #include <sys/socket.h>
20 #include <sys/fcntl.h>
25 #include <netinet/in.h>
26 #include <netinet/icmp6.h>
28 #include "nest/bird.h"
29 #include "lib/lists.h"
30 #include "lib/resource.h"
31 #include "lib/timer.h"
32 #include "lib/socket.h"
33 #include "lib/event.h"
34 #include "lib/string.h"
35 #include "nest/iface.h"
38 #include "lib/sysio.h"
40 /* Maximum number of calls of tx handler for one socket in one
41 * select iteration. Should be small enough to not monopolize CPU by
42 * one protocol instance.
46 /* Maximum number of calls of rx handler for all sockets in one select
47 iteration. RX callbacks are often much more costly so we limit
48 this to gen small latencies */
49 #define MAX_RX_STEPS 4
63 struct rfile
*a
= (struct rfile
*) r
;
71 struct rfile
*a
= (struct rfile
*) r
;
73 debug("(FILE *%p)\n", a
->f
);
76 static struct resclass rf_class
= {
86 tracked_fopen(pool
*p
, char *name
, char *mode
)
88 FILE *f
= fopen(name
, mode
);
92 struct rfile
*r
= ralloc(p
, &rf_class
);
101 * Timers are resources which represent a wish of a module to call
102 * a function at the specified time. The platform dependent code
103 * doesn't guarantee exact timing, only that a timer function
104 * won't be called before the requested time.
106 * In BIRD, time is represented by values of the &bird_clock_t type
107 * which are integral numbers interpreted as a relative number of seconds since
108 * some fixed time point in past. The current time can be read
109 * from variable @now with reasonable accuracy and is monotonic. There is also
110 * a current 'absolute' time in variable @now_real reported by OS.
112 * Each timer is described by a &timer structure containing a pointer
113 * to the handler function (@hook), data private to this function (@data),
114 * time the function should be called at (@expires, 0 for inactive timers),
115 * for the other fields see |timer.h|.
118 #define NEAR_TIMER_LIMIT 4
120 static list near_timers
, far_timers
;
121 static bird_clock_t first_far_timer
= TIME_INFINITY
;
123 /* now must be different from 0, because 0 is a special value in timer->expires */
124 bird_clock_t now
= 1, now_real
, boot_time
;
127 update_times_plain(void)
129 bird_clock_t new_time
= time(NULL
);
130 int delta
= new_time
- now_real
;
132 if ((delta
>= 0) && (delta
< 60))
134 else if (now_real
!= 0)
135 log(L_WARN
"Time jump, delta %d s", delta
);
141 update_times_gettime(void)
146 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
148 die("clock_gettime: %m");
150 if (ts
.tv_sec
!= now
) {
152 log(L_ERR
"Monotonic timer is broken");
155 now_real
= time(NULL
);
159 static int clock_monotonic_available
;
164 if (clock_monotonic_available
)
165 update_times_gettime();
167 update_times_plain();
174 clock_monotonic_available
= (clock_gettime(CLOCK_MONOTONIC
, &ts
) == 0);
175 if (!clock_monotonic_available
)
176 log(L_WARN
"Monotonic timer is missing");
183 timer
*t
= (timer
*) r
;
191 timer
*t
= (timer
*) r
;
193 debug("(code %p, data %p, ", t
->hook
, t
->data
);
195 debug("rand %d, ", t
->randomize
);
197 debug("recur %d, ", t
->recurrent
);
199 debug("expires in %d sec)\n", t
->expires
- now
);
201 debug("inactive)\n");
204 static struct resclass tm_class
= {
214 * tm_new - create a timer
217 * This function creates a new timer resource and returns
218 * a pointer to it. To use the timer, you need to fill in
219 * the structure fields and call tm_start() to start timing.
224 timer
*t
= ralloc(p
, &tm_class
);
229 tm_insert_near(timer
*t
)
231 node
*n
= HEAD(near_timers
);
233 while (n
->next
&& (SKIP_BACK(timer
, n
, n
)->expires
< t
->expires
))
235 insert_node(&t
->n
, n
->prev
);
239 * tm_start - start a timer
241 * @after: number of seconds the timer should be run after
243 * This function schedules the hook function of the timer to
244 * be called after @after seconds. If the timer has been already
245 * started, it's @expire time is replaced by the new value.
247 * You can have set the @randomize field of @t, the timeout
248 * will be increased by a random number of seconds chosen
249 * uniformly from range 0 .. @randomize.
251 * You can call tm_start() from the handler function of the timer
252 * to request another run of the timer. Also, you can set the @recurrent
253 * field to have the timer re-added automatically with the same timeout.
256 tm_start(timer
*t
, unsigned after
)
261 after
+= random() % (t
->randomize
+ 1);
263 if (t
->expires
== when
)
268 if (after
<= NEAR_TIMER_LIMIT
)
272 if (!first_far_timer
|| first_far_timer
> when
)
273 first_far_timer
= when
;
274 add_tail(&far_timers
, &t
->n
);
279 * tm_stop - stop a timer
282 * This function stops a timer. If the timer is already stopped,
296 tm_dump_them(char *name
, list
*l
)
301 debug("%s timers:\n", name
);
304 t
= SKIP_BACK(timer
, n
, n
);
314 tm_dump_them("Near", &near_timers
);
315 tm_dump_them("Far", &far_timers
);
321 time_t x
= first_far_timer
;
323 if (!EMPTY_LIST(near_timers
))
325 timer
*t
= SKIP_BACK(timer
, n
, HEAD(near_timers
));
338 if (first_far_timer
<= now
)
340 bird_clock_t limit
= now
+ NEAR_TIMER_LIMIT
;
341 first_far_timer
= TIME_INFINITY
;
342 n
= HEAD(far_timers
);
345 t
= SKIP_BACK(timer
, n
, n
);
346 if (t
->expires
<= limit
)
351 else if (t
->expires
< first_far_timer
)
352 first_far_timer
= t
->expires
;
356 while ((n
= HEAD(near_timers
)) -> next
)
359 t
= SKIP_BACK(timer
, n
, n
);
360 if (t
->expires
> now
)
363 delay
= t
->expires
- now
;
367 int i
= t
->recurrent
- delay
;
377 * tm_parse_datetime - parse a date and time
378 * @x: datetime string
380 * tm_parse_datetime() takes a textual representation of
381 * a date and time (dd-mm-yyyy hh:mm:ss)
382 * and converts it to the corresponding value of type &bird_clock_t.
385 tm_parse_datetime(char *x
)
391 if (sscanf(x
, "%d-%d-%d %d:%d:%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &tm
.tm_hour
, &tm
.tm_min
, &tm
.tm_sec
, &n
) != 6 || x
[n
])
392 return tm_parse_date(x
);
396 if (t
== (time_t) -1)
401 * tm_parse_date - parse a date
404 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
405 * and converts it to the corresponding value of type &bird_clock_t.
408 tm_parse_date(char *x
)
414 if (sscanf(x
, "%d-%d-%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &n
) != 3 || x
[n
])
418 tm
.tm_hour
= tm
.tm_min
= tm
.tm_sec
= 0;
420 if (t
== (time_t) -1)
426 tm_format_reltime(char *x
, struct tm
*tm
, bird_clock_t delta
)
428 static char *month_names
[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
429 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
432 bsprintf(x
, "%02d:%02d", tm
->tm_hour
, tm
->tm_min
);
433 else if (delta
< 360*86400)
434 bsprintf(x
, "%s%02d", month_names
[tm
->tm_mon
], tm
->tm_mday
);
436 bsprintf(x
, "%d", tm
->tm_year
+1900);
439 #include "conf/conf.h"
442 * tm_format_datetime - convert date and time to textual representation
443 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
446 * This function formats the given relative time value @t to a textual
447 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
450 tm_format_datetime(char *x
, struct timeformat
*fmt_spec
, bird_clock_t t
)
452 const char *fmt_used
;
454 bird_clock_t delta
= now
- t
;
455 t
= now_real
- delta
;
458 if (fmt_spec
->fmt1
== NULL
)
459 return tm_format_reltime(x
, tm
, delta
);
461 if ((fmt_spec
->limit
== 0) || (delta
< fmt_spec
->limit
))
462 fmt_used
= fmt_spec
->fmt1
;
464 fmt_used
= fmt_spec
->fmt2
;
466 int rv
= strftime(x
, TM_DATETIME_BUFFER_SIZE
, fmt_used
, tm
);
467 if (((rv
== 0) && fmt_used
[0]) || (rv
== TM_DATETIME_BUFFER_SIZE
))
468 strcpy(x
, "<too-long>");
474 * Socket resources represent network connections. Their data structure (&socket)
475 * contains a lot of fields defining the exact type of the socket, the local and
476 * remote addresses and ports, pointers to socket buffers and finally pointers to
477 * hook functions to be called when new data have arrived to the receive buffer
478 * (@rx_hook), when the contents of the transmit buffer have been transmitted
479 * (@tx_hook) and when an error or connection close occurs (@err_hook).
481 * Freeing of sockets from inside socket hooks is perfectly safe.
485 #define SOL_IP IPPROTO_IP
489 #define SOL_IPV6 IPPROTO_IPV6
492 static list sock_list
;
493 static struct birdsock
*current_sock
;
494 static struct birdsock
*stored_sock
;
495 static int sock_recalc_fdsets_p
;
500 if (!s
->n
.next
->next
)
503 return SKIP_BACK(sock
, n
, s
->n
.next
);
507 sk_alloc_bufs(sock
*s
)
509 if (!s
->rbuf
&& s
->rbsize
)
510 s
->rbuf
= s
->rbuf_alloc
= xmalloc(s
->rbsize
);
512 if (!s
->tbuf
&& s
->tbsize
)
513 s
->tbuf
= s
->tbuf_alloc
= xmalloc(s
->tbsize
);
514 s
->tpos
= s
->ttx
= s
->tbuf
;
518 sk_free_bufs(sock
*s
)
522 xfree(s
->rbuf_alloc
);
523 s
->rbuf
= s
->rbuf_alloc
= NULL
;
527 xfree(s
->tbuf_alloc
);
528 s
->tbuf
= s
->tbuf_alloc
= NULL
;
535 sock
*s
= (sock
*) r
;
541 if (s
== current_sock
)
542 current_sock
= sk_next(s
);
543 if (s
== stored_sock
)
544 stored_sock
= sk_next(s
);
546 sock_recalc_fdsets_p
= 1;
551 sk_reallocate(sock
*s
)
560 sock
*s
= (sock
*) r
;
561 static char *sk_type_names
[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
563 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
564 sk_type_names
[s
->type
],
572 s
->iface
? s
->iface
->name
: "none");
575 static struct resclass sk_class
= {
585 * sk_new - create a socket
588 * This function creates a new socket resource. If you want to use it,
589 * you need to fill in all the required fields of the structure and
590 * call sk_open() to do the actual opening of the socket.
592 * The real function name is sock_new(), sk_new() is a macro wrapper
593 * to avoid collision with OpenSSL.
598 sock
*s
= ralloc(p
, &sk_class
);
600 // s->saddr = s->daddr = IPA_NONE;
601 s
->tos
= s
->priority
= s
->ttl
= -1;
609 add_tail(&sock_list
, &s
->n
);
610 sock_recalc_fdsets_p
= 1;
616 fill_in_sockaddr(struct sockaddr_in6
*sa
, ip_addr a
, struct iface
*ifa
, unsigned port
)
618 memset(sa
, 0, sizeof (struct sockaddr_in6
));
619 sa
->sin6_family
= AF_INET6
;
620 sa
->sin6_port
= htons(port
);
621 sa
->sin6_flowinfo
= 0;
623 sa
->sin6_len
= sizeof(struct sockaddr_in6
);
625 set_inaddr(&sa
->sin6_addr
, a
);
627 if (ifa
&& ipa_has_link_scope(a
))
628 sa
->sin6_scope_id
= ifa
->index
;
632 get_sockaddr(struct sockaddr_in6
*sa
, ip_addr
*a
, struct iface
**ifa
, unsigned *port
, int check
)
634 if (check
&& sa
->sin6_family
!= AF_INET6
)
635 bug("get_sockaddr called for wrong address family (%d)", sa
->sin6_family
);
637 *port
= ntohs(sa
->sin6_port
);
638 memcpy(a
, &sa
->sin6_addr
, sizeof(*a
));
641 if (ifa
&& ipa_has_link_scope(*a
))
642 *ifa
= if_find_by_index(sa
->sin6_scope_id
);
648 fill_in_sockaddr(struct sockaddr_in
*sa
, ip_addr a
, struct iface
*ifa
, unsigned port
)
650 memset (sa
, 0, sizeof (struct sockaddr_in
));
651 sa
->sin_family
= AF_INET
;
652 sa
->sin_port
= htons(port
);
654 sa
->sin_len
= sizeof(struct sockaddr_in
);
656 set_inaddr(&sa
->sin_addr
, a
);
660 get_sockaddr(struct sockaddr_in
*sa
, ip_addr
*a
, struct iface
**ifa
, unsigned *port
, int check
)
662 if (check
&& sa
->sin_family
!= AF_INET
)
663 bug("get_sockaddr called for wrong address family (%d)", sa
->sin_family
);
665 *port
= ntohs(sa
->sin_port
);
666 memcpy(a
, &sa
->sin_addr
.s_addr
, sizeof(*a
));
675 /* PKTINFO handling is also standardized in IPv6 */
676 #define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
677 #define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
680 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
681 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
682 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
683 * RFC and we use IPV6_PKTINFO.
685 #ifndef IPV6_RECVPKTINFO
686 #define IPV6_RECVPKTINFO IPV6_PKTINFO
690 sysio_register_cmsgs(sock
*s
)
693 if ((s
->flags
& SKF_LADDR_RX
) &&
694 setsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, &ok
, sizeof(ok
)) < 0)
695 return "IPV6_RECVPKTINFO";
701 sysio_process_rx_cmsgs(sock
*s
, struct msghdr
*msg
)
704 struct in6_pktinfo
*pi
= NULL
;
706 if (!(s
->flags
& SKF_LADDR_RX
))
709 for (cm
= CMSG_FIRSTHDR(msg
); cm
!= NULL
; cm
= CMSG_NXTHDR(msg
, cm
))
711 if (cm
->cmsg_level
== IPPROTO_IPV6
&& cm
->cmsg_type
== IPV6_PKTINFO
)
712 pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
722 get_inaddr(&s
->laddr
, &pi
->ipi6_addr
);
723 s
->lifindex
= pi
->ipi6_ifindex
;
729 sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
732 struct in6_pktinfo *pi;
734 if (!(s->flags & SKF_LADDR_TX))
737 msg->msg_control = cbuf;
738 msg->msg_controllen = cbuflen;
740 cm = CMSG_FIRSTHDR(msg);
741 cm->cmsg_level = IPPROTO_IPV6;
742 cm->cmsg_type = IPV6_PKTINFO;
743 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
745 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
746 set_inaddr(&pi->ipi6_addr, s->saddr);
747 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
749 msg->msg_controllen = cm->cmsg_len;
756 sk_set_ttl_int(sock
*s
)
759 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_UNICAST_HOPS
, &s
->ttl
, sizeof(s
->ttl
)) < 0)
760 return "IPV6_UNICAST_HOPS";
762 if (setsockopt(s
->fd
, SOL_IP
, IP_TTL
, &s
->ttl
, sizeof(s
->ttl
)) < 0)
764 #ifdef CONFIG_UNIX_DONTROUTE
766 if (s
->ttl
== 1 && setsockopt(s
->fd
, SOL_SOCKET
, SO_DONTROUTE
, &one
, sizeof(one
)) < 0)
767 return "SO_DONTROUTE";
773 #define ERR(x) do { err = x; goto bad; } while(0)
774 #define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
782 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
783 ERR("fcntl(O_NONBLOCK)");
784 if (s
->type
== SK_UNIX
)
788 if ((s
->tos
>= 0) && setsockopt(fd
, SOL_IPV6
, IPV6_TCLASS
, &s
->tos
, sizeof(s
->tos
)) < 0)
791 if ((s
->tos
>= 0) && setsockopt(fd
, SOL_IP
, IP_TOS
, &s
->tos
, sizeof(s
->tos
)) < 0)
795 if (s
->priority
>= 0)
796 sk_set_priority(s
, s
->priority
);
800 if ((s
->flags
& SKF_V6ONLY
) && setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &v
, sizeof(v
)) < 0)
805 err
= sk_set_ttl_int(s
);
807 sysio_register_cmsgs(s
);
813 * sk_set_ttl - set transmit TTL for given socket.
817 * Set TTL for already opened connections when TTL was not set before.
818 * Useful for accepted connections when different ones should have
821 * Result: 0 for success, -1 for an error.
825 sk_set_ttl(sock
*s
, int ttl
)
830 if (err
= sk_set_ttl_int(s
))
831 log(L_ERR
"sk_set_ttl: %s: %m", err
);
833 return (err
? -1 : 0);
837 * sk_set_min_ttl - set minimal accepted TTL for given socket.
841 * Can be used in TTL security implementation
843 * Result: 0 for success, -1 for an error.
847 sk_set_min_ttl(sock
*s
, int ttl
)
851 err
= sk_set_min_ttl6(s
, ttl
);
853 err
= sk_set_min_ttl4(s
, ttl
);
860 * sk_set_md5_auth - add / remove MD5 security association for given socket.
862 * @a: IP address of the other side
863 * @ifa: Interface for link-local IP address
864 * @passwd: password used for MD5 authentication
866 * In TCP MD5 handling code in kernel, there is a set of pairs
867 * (address, password) used to choose password according to
868 * address of the other side. This function is useful for
869 * listening socket, for active sockets it is enough to set
872 * When called with passwd != NULL, the new pair is added,
873 * When called with passwd == NULL, the existing pair is removed.
875 * Result: 0 for success, -1 for an error.
879 sk_set_md5_auth(sock
*s
, ip_addr a
, struct iface
*ifa
, char *passwd
)
882 fill_in_sockaddr(&sa
, a
, ifa
, 0);
883 return sk_set_md5_auth_int(s
, &sa
, passwd
);
887 sk_set_broadcast(sock
*s
, int enable
)
889 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BROADCAST
, &enable
, sizeof(enable
)) < 0)
891 log(L_ERR
"sk_set_broadcast: SO_BROADCAST: %m");
902 sk_set_ipv6_checksum(sock
*s
, int offset
)
904 if (setsockopt(s
->fd
, IPPROTO_IPV6
, IPV6_CHECKSUM
, &offset
, sizeof(offset
)) < 0)
906 log(L_ERR
"sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
914 sk_set_icmp_filter(sock
*s
, int p1
, int p2
)
916 /* a bit of lame interface, but it is here only for Radv */
917 struct icmp6_filter f
;
919 ICMP6_FILTER_SETBLOCKALL(&f
);
920 ICMP6_FILTER_SETPASS(p1
, &f
);
921 ICMP6_FILTER_SETPASS(p2
, &f
);
923 if (setsockopt(s
->fd
, IPPROTO_ICMPV6
, ICMP6_FILTER
, &f
, sizeof(f
)) < 0)
925 log(L_ERR
"sk_setup_icmp_filter: ICMP6_FILTER: %m");
933 sk_setup_multicast(sock
*s
)
939 ASSERT(s
->iface
&& s
->iface
->addr
);
941 index
= s
->iface
->index
;
942 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_HOPS
, &s
->ttl
, sizeof(s
->ttl
)) < 0)
943 ERR("IPV6_MULTICAST_HOPS");
944 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_LOOP
, &zero
, sizeof(zero
)) < 0)
945 ERR("IPV6_MULTICAST_LOOP");
946 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_IF
, &index
, sizeof(index
)) < 0)
947 ERR("IPV6_MULTICAST_IF");
949 if (err
= sysio_bind_to_iface(s
))
955 log(L_ERR
"sk_setup_multicast: %s: %m", err
);
960 sk_join_group(sock
*s
, ip_addr maddr
)
962 struct ipv6_mreq mreq
;
964 set_inaddr(&mreq
.ipv6mr_multiaddr
, maddr
);
966 #ifdef CONFIG_IPV6_GLIBC_20
967 mreq
.ipv6mr_ifindex
= s
->iface
->index
;
969 mreq
.ipv6mr_interface
= s
->iface
->index
;
972 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_JOIN_GROUP
, &mreq
, sizeof(mreq
)) < 0)
974 log(L_ERR
"sk_join_group: IPV6_JOIN_GROUP: %m");
982 sk_leave_group(sock
*s
, ip_addr maddr
)
984 struct ipv6_mreq mreq
;
986 set_inaddr(&mreq
.ipv6mr_multiaddr
, maddr
);
988 #ifdef CONFIG_IPV6_GLIBC_20
989 mreq
.ipv6mr_ifindex
= s
->iface
->index
;
991 mreq
.ipv6mr_interface
= s
->iface
->index
;
994 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_LEAVE_GROUP
, &mreq
, sizeof(mreq
)) < 0)
996 log(L_ERR
"sk_leave_group: IPV6_LEAVE_GROUP: %m");
1006 sk_setup_multicast(sock
*s
)
1010 ASSERT(s
->iface
&& s
->iface
->addr
);
1012 if (err
= sysio_setup_multicast(s
))
1014 log(L_ERR
"sk_setup_multicast: %s: %m", err
);
1022 sk_join_group(sock
*s
, ip_addr maddr
)
1026 if (err
= sysio_join_group(s
, maddr
))
1028 log(L_ERR
"sk_join_group: %s: %m", err
);
1036 sk_leave_group(sock
*s
, ip_addr maddr
)
1040 if (err
= sysio_leave_group(s
, maddr
))
1042 log(L_ERR
"sk_leave_group: %s: %m", err
);
1053 sk_tcp_connected(sock
*s
)
1056 int lsa_len
= sizeof(lsa
);
1057 if (getsockname(s
->fd
, (struct sockaddr
*) &lsa
, &lsa_len
) == 0)
1058 get_sockaddr(&lsa
, &s
->saddr
, &s
->iface
, &s
->sport
, 1);
1066 sk_passive_connected(sock
*s
, struct sockaddr
*sa
, int al
, int type
)
1068 int fd
= accept(s
->fd
, sa
, &al
);
1071 sock
*t
= sk_new(s
->pool
);
1077 t
->rbsize
= s
->rbsize
;
1078 t
->tbsize
= s
->tbsize
;
1082 int lsa_len
= sizeof(lsa
);
1083 if (getsockname(fd
, (struct sockaddr
*) &lsa
, &lsa_len
) == 0)
1084 get_sockaddr(&lsa
, &t
->saddr
, &t
->iface
, &t
->sport
, 1);
1086 get_sockaddr((sockaddr
*) sa
, &t
->daddr
, &t
->iface
, &t
->dport
, 1);
1089 if (err
= sk_setup(t
))
1091 log(L_ERR
"Incoming connection: %s: %m", err
);
1099 else if (errno
!= EINTR
&& errno
!= EAGAIN
)
1101 s
->err_hook(s
, errno
);
1107 * sk_open - open a socket
1110 * This function takes a socket resource created by sk_new() and
1111 * initialized by the user and binds a corresponding network connection
1114 * Result: 0 for success, -1 for an error.
1123 int has_src
= ipa_nonzero(s
->saddr
) || s
->sport
;
1129 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1131 case SK_TCP_PASSIVE
:
1132 fd
= socket(BIRD_PF
, SOCK_STREAM
, IPPROTO_TCP
);
1135 fd
= socket(BIRD_PF
, SOCK_DGRAM
, IPPROTO_UDP
);
1138 fd
= socket(BIRD_PF
, SOCK_RAW
, s
->dport
);
1144 bug("sk_open() called for invalid sock type %d", type
);
1147 die("sk_open: socket: %m");
1150 if (err
= sk_setup(s
))
1162 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &one
, sizeof(one
)) < 0)
1163 ERR("SO_REUSEADDR");
1165 fill_in_sockaddr(&sa
, s
->saddr
, s
->iface
, port
);
1166 if (bind(fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) < 0)
1169 fill_in_sockaddr(&sa
, s
->daddr
, s
->iface
, s
->dport
);
1173 int rv
= sk_set_md5_auth_int(s
, &sa
, s
->password
);
1181 if (connect(fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) >= 0)
1182 sk_tcp_connected(s
);
1183 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
&&
1184 errno
!= ECONNREFUSED
&& errno
!= EHOSTUNREACH
&& errno
!= ENETUNREACH
)
1187 case SK_TCP_PASSIVE
:
1196 #ifdef IPV6_MTU_DISCOVER
1198 int dont
= IPV6_PMTUDISC_DONT
;
1199 if (setsockopt(fd
, SOL_IPV6
, IPV6_MTU_DISCOVER
, &dont
, sizeof(dont
)) < 0)
1200 ERR("IPV6_MTU_DISCOVER");
1206 int dont
= IP_PMTUDISC_DONT
;
1207 if (setsockopt(fd
, SOL_IP
, IP_PMTUDISC
, &dont
, sizeof(dont
)) < 0)
1218 log(L_ERR
"sk_open: %s: %m", err
);
1226 sk_open_unix(sock
*s
, char *name
)
1229 struct sockaddr_un sa
;
1232 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1236 if (err
= sk_setup(s
))
1240 /* Path length checked in test_old_bird() */
1241 sa
.sun_family
= AF_UNIX
;
1242 strcpy(sa
.sun_path
, name
);
1243 if (bind(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) < 0)
1251 log(L_ERR
"sk_open_unix: %s: %m", err
);
1252 die("Unable to create control socket %s", name
);
1255 static inline void reset_tx_buffer(sock
*s
) { s
->ttx
= s
->tpos
= s
->tbuf
; }
1258 sk_maybe_write(sock
*s
)
1267 while (s
->ttx
!= s
->tpos
)
1269 e
= write(s
->fd
, s
->ttx
, s
->tpos
- s
->ttx
);
1272 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1275 /* EPIPE is just a connection close notification during TX */
1276 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1288 if (s
->tbuf
== s
->tpos
)
1292 fill_in_sockaddr(&sa
, s
->daddr
, s
->iface
, s
->dport
);
1294 struct iovec iov
= {s
->tbuf
, s
->tpos
- s
->tbuf
};
1295 // byte cmsg_buf[CMSG_TX_SPACE];
1297 struct msghdr msg
= {
1299 .msg_namelen
= sizeof(sa
),
1303 // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1304 e
= sendmsg(s
->fd
, &msg
, 0);
1308 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1311 s
->err_hook(s
, errno
);
1320 bug("sk_maybe_write: unknown socket type %d", s
->type
);
1325 sk_rx_ready(sock
*s
)
1328 struct timeval timo
;
1339 rv
= select(s
->fd
+1, &rd
, &wr
, NULL
, &timo
);
1341 if ((rv
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
1348 * sk_send - send data to a socket
1350 * @len: number of bytes to send
1352 * This function sends @len bytes of data prepared in the
1353 * transmit buffer of the socket @s to the network connection.
1354 * If the packet can be sent immediately, it does so and returns
1355 * 1, else it queues the packet for later processing, returns 0
1356 * and calls the @tx_hook of the socket when the tranmission
1360 sk_send(sock
*s
, unsigned len
)
1363 s
->tpos
= s
->tbuf
+ len
;
1364 return sk_maybe_write(s
);
1368 * sk_send_to - send data to a specific destination
1370 * @len: number of bytes to send
1371 * @addr: IP address to send the packet to
1372 * @port: port to send the packet to
1374 * This is a sk_send() replacement for connection-less packet sockets
1375 * which allows destination of the packet to be chosen dynamically.
1378 sk_send_to(sock
*s
, unsigned len
, ip_addr addr
, unsigned port
)
1383 s
->tpos
= s
->tbuf
+ len
;
1384 return sk_maybe_write(s
);
1389 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1390 ip_addr saddr, ip_addr daddr, unsigned dport)
1397 s->tpos = s->tbuf + len;
1398 return sk_maybe_write(s);
1407 case SK_TCP_PASSIVE
:
1410 return sk_passive_connected(s
, (struct sockaddr
*) &sa
, sizeof(sa
), SK_TCP
);
1412 case SK_UNIX_PASSIVE
:
1414 struct sockaddr_un sa
;
1415 return sk_passive_connected(s
, (struct sockaddr
*) &sa
, sizeof(sa
), SK_UNIX
);
1420 int c
= read(s
->fd
, s
->rpos
, s
->rbuf
+ s
->rbsize
- s
->rpos
);
1424 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1425 s
->err_hook(s
, errno
);
1432 if (s
->rx_hook(s
, s
->rpos
- s
->rbuf
))
1434 /* We need to be careful since the socket could have been deleted by the hook */
1435 if (current_sock
== s
)
1443 return s
->rx_hook(s
, 0);
1449 struct iovec iov
= {s
->rbuf
, s
->rbsize
};
1450 byte cmsg_buf
[CMSG_RX_SPACE
];
1452 struct msghdr msg
= {
1454 .msg_namelen
= sizeof(sa
),
1457 .msg_control
= cmsg_buf
,
1458 .msg_controllen
= sizeof(cmsg_buf
),
1461 e
= recvmsg(s
->fd
, &msg
, 0);
1465 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1466 s
->err_hook(s
, errno
);
1469 s
->rpos
= s
->rbuf
+ e
;
1470 get_sockaddr(&sa
, &s
->faddr
, NULL
, &s
->fport
, 1);
1471 sysio_process_rx_cmsgs(s
, &msg
);
1487 fill_in_sockaddr(&sa
, s
->daddr
, s
->iface
, s
->dport
);
1488 if (connect(s
->fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) >= 0 || errno
== EISCONN
)
1489 sk_tcp_connected(s
);
1490 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
)
1491 s
->err_hook(s
, errno
);
1495 if (s
->ttx
!= s
->tpos
&& sk_maybe_write(s
) > 0)
1510 debug("Open sockets:\n");
1511 WALK_LIST(n
, sock_list
)
1513 s
= SKIP_BACK(sock
, n
, n
);
1527 volatile int async_config_flag
; /* Asynchronous reconfiguration/dump scheduled */
1528 volatile int async_dump_flag
;
1533 init_list(&near_timers
);
1534 init_list(&far_timers
);
1535 init_list(&sock_list
);
1536 init_list(&global_event_list
);
1541 srandom((int) now_real
);
1544 static int short_loops
= 0;
1545 #define SHORT_LOOP_MAX 10
1551 struct timeval timo
;
1557 sock_recalc_fdsets_p
= 1;
1560 events
= ev_run_list(&global_event_list
);
1562 tout
= tm_first_shot();
1568 timo
.tv_sec
= events
? 0 : MIN(tout
- now
, 3);
1571 if (sock_recalc_fdsets_p
)
1573 sock_recalc_fdsets_p
= 0;
1579 WALK_LIST(n
, sock_list
)
1581 s
= SKIP_BACK(sock
, n
, n
);
1590 if (s
->tx_hook
&& s
->ttx
!= s
->tpos
)
1601 * Yes, this is racy. But even if the signal comes before this test
1602 * and entering select(), it gets caught on the next timer tick.
1605 if (async_config_flag
)
1608 async_config_flag
= 0;
1611 if (async_dump_flag
)
1614 async_dump_flag
= 0;
1617 if (async_shutdown_flag
)
1620 async_shutdown_flag
= 0;
1624 /* And finally enter select() to find active sockets */
1625 hi
= select(hi
+1, &rd
, &wr
, NULL
, &timo
);
1629 if (errno
== EINTR
|| errno
== EAGAIN
)
1635 /* guaranteed to be non-empty */
1636 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
1638 while (current_sock
)
1640 sock
*s
= current_sock
;
1645 if ((s
->type
>= SK_MAGIC
) && FD_ISSET(s
->fd
, &rd
) && s
->rx_hook
)
1650 if (s
!= current_sock
)
1653 while (e
&& s
->rx_hook
&& steps
);
1656 if (FD_ISSET(s
->fd
, &wr
))
1661 if (s
!= current_sock
)
1665 current_sock
= sk_next(s
);
1670 if (events
&& (short_loops
< SHORT_LOOP_MAX
))
1675 current_sock
= stored_sock
;
1676 if (current_sock
== NULL
)
1677 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
1679 while (current_sock
&& count
< MAX_RX_STEPS
)
1681 sock
*s
= current_sock
;
1684 if ((s
->type
< SK_MAGIC
) && FD_ISSET(s
->fd
, &rd
) && s
->rx_hook
)
1688 if (s
!= current_sock
)
1691 current_sock
= sk_next(s
);
1695 stored_sock
= current_sock
;
1701 test_old_bird(char *path
)
1704 struct sockaddr_un sa
;
1706 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1708 die("Cannot create socket: %m");
1709 if (strlen(path
) >= sizeof(sa
.sun_path
))
1710 die("Socket path too long");
1711 bzero(&sa
, sizeof(sa
));
1712 sa
.sun_family
= AF_UNIX
;
1713 strcpy(sa
.sun_path
, path
);
1714 if (connect(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) == 0)
1715 die("I found another BIRD running.");