2 * BIRD Internet Routing Daemon -- Unix I/O
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
7 * Can be freely distributed and used under the terms of the GNU GPL.
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
20 #include <sys/types.h>
21 #include <sys/socket.h>
29 #include <netinet/in.h>
30 #include <netinet/tcp.h>
31 #include <netinet/udp.h>
32 #include <netinet/icmp6.h>
34 #include "nest/bird.h"
35 #include "lib/lists.h"
36 #include "lib/resource.h"
37 #include "lib/timer.h"
38 #include "lib/socket.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "nest/iface.h"
44 #include "lib/sysio.h"
46 /* Maximum number of calls of tx handler for one socket in one
47 * poll iteration. Should be small enough to not monopolize CPU by
48 * one protocol instance.
52 /* Maximum number of calls of rx handler for all sockets in one poll
53 iteration. RX callbacks are often much more costly so we limit
54 this to gen small latencies */
55 #define MAX_RX_STEPS 4
69 struct rfile
*a
= (struct rfile
*) r
;
77 struct rfile
*a
= (struct rfile
*) r
;
79 debug("(FILE *%p)\n", a
->f
);
82 static struct resclass rf_class
= {
92 tracked_fopen(pool
*p
, char *name
, char *mode
)
94 FILE *f
= fopen(name
, mode
);
98 struct rfile
*r
= ralloc(p
, &rf_class
);
107 * Timers are resources which represent a wish of a module to call
108 * a function at the specified time. The platform dependent code
109 * doesn't guarantee exact timing, only that a timer function
110 * won't be called before the requested time.
112 * In BIRD, time is represented by values of the &bird_clock_t type
113 * which are integral numbers interpreted as a relative number of seconds since
114 * some fixed time point in past. The current time can be read
115 * from variable @now with reasonable accuracy and is monotonic. There is also
116 * a current 'absolute' time in variable @now_real reported by OS.
118 * Each timer is described by a &timer structure containing a pointer
119 * to the handler function (@hook), data private to this function (@data),
120 * time the function should be called at (@expires, 0 for inactive timers),
121 * for the other fields see |timer.h|.
124 #define NEAR_TIMER_LIMIT 4
126 static list near_timers
, far_timers
;
127 static bird_clock_t first_far_timer
= TIME_INFINITY
;
129 /* now must be different from 0, because 0 is a special value in timer->expires */
130 bird_clock_t now
= 1, now_real
, boot_time
;
133 update_times_plain(void)
135 bird_clock_t new_time
= time(NULL
);
136 int delta
= new_time
- now_real
;
138 if ((delta
>= 0) && (delta
< 60))
140 else if (now_real
!= 0)
141 log(L_WARN
"Time jump, delta %d s", delta
);
147 update_times_gettime(void)
152 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
154 die("clock_gettime: %m");
156 if (ts
.tv_sec
!= now
) {
158 log(L_ERR
"Monotonic timer is broken");
161 now_real
= time(NULL
);
165 static int clock_monotonic_available
;
170 if (clock_monotonic_available
)
171 update_times_gettime();
173 update_times_plain();
180 clock_monotonic_available
= (clock_gettime(CLOCK_MONOTONIC
, &ts
) == 0);
181 if (!clock_monotonic_available
)
182 log(L_WARN
"Monotonic timer is missing");
189 timer
*t
= (timer
*) r
;
197 timer
*t
= (timer
*) r
;
199 debug("(code %p, data %p, ", t
->hook
, t
->data
);
201 debug("rand %d, ", t
->randomize
);
203 debug("recur %d, ", t
->recurrent
);
205 debug("expires in %d sec)\n", t
->expires
- now
);
207 debug("inactive)\n");
210 static struct resclass tm_class
= {
220 * tm_new - create a timer
223 * This function creates a new timer resource and returns
224 * a pointer to it. To use the timer, you need to fill in
225 * the structure fields and call tm_start() to start timing.
230 timer
*t
= ralloc(p
, &tm_class
);
235 tm_insert_near(timer
*t
)
237 node
*n
= HEAD(near_timers
);
239 while (n
->next
&& (SKIP_BACK(timer
, n
, n
)->expires
< t
->expires
))
241 insert_node(&t
->n
, n
->prev
);
245 * tm_start - start a timer
247 * @after: number of seconds the timer should be run after
249 * This function schedules the hook function of the timer to
250 * be called after @after seconds. If the timer has been already
251 * started, it's @expire time is replaced by the new value.
253 * You can have set the @randomize field of @t, the timeout
254 * will be increased by a random number of seconds chosen
255 * uniformly from range 0 .. @randomize.
257 * You can call tm_start() from the handler function of the timer
258 * to request another run of the timer. Also, you can set the @recurrent
259 * field to have the timer re-added automatically with the same timeout.
262 tm_start(timer
*t
, unsigned after
)
267 after
+= random() % (t
->randomize
+ 1);
269 if (t
->expires
== when
)
274 if (after
<= NEAR_TIMER_LIMIT
)
278 if (!first_far_timer
|| first_far_timer
> when
)
279 first_far_timer
= when
;
280 add_tail(&far_timers
, &t
->n
);
285 * tm_stop - stop a timer
288 * This function stops a timer. If the timer is already stopped,
302 tm_dump_them(char *name
, list
*l
)
307 debug("%s timers:\n", name
);
310 t
= SKIP_BACK(timer
, n
, n
);
320 tm_dump_them("Near", &near_timers
);
321 tm_dump_them("Far", &far_timers
);
327 time_t x
= first_far_timer
;
329 if (!EMPTY_LIST(near_timers
))
331 timer
*t
= SKIP_BACK(timer
, n
, HEAD(near_timers
));
338 void io_log_event(void *hook
, void *data
);
346 if (first_far_timer
<= now
)
348 bird_clock_t limit
= now
+ NEAR_TIMER_LIMIT
;
349 first_far_timer
= TIME_INFINITY
;
350 n
= HEAD(far_timers
);
353 t
= SKIP_BACK(timer
, n
, n
);
354 if (t
->expires
<= limit
)
359 else if (t
->expires
< first_far_timer
)
360 first_far_timer
= t
->expires
;
364 while ((n
= HEAD(near_timers
)) -> next
)
367 t
= SKIP_BACK(timer
, n
, n
);
368 if (t
->expires
> now
)
371 delay
= t
->expires
- now
;
375 int i
= t
->recurrent
- delay
;
380 io_log_event(t
->hook
, t
->data
);
386 * tm_parse_datetime - parse a date and time
387 * @x: datetime string
389 * tm_parse_datetime() takes a textual representation of
390 * a date and time (dd-mm-yyyy hh:mm:ss)
391 * and converts it to the corresponding value of type &bird_clock_t.
394 tm_parse_datetime(char *x
)
400 if (sscanf(x
, "%d-%d-%d %d:%d:%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &tm
.tm_hour
, &tm
.tm_min
, &tm
.tm_sec
, &n
) != 6 || x
[n
])
401 return tm_parse_date(x
);
405 if (t
== (time_t) -1)
410 * tm_parse_date - parse a date
413 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
414 * and converts it to the corresponding value of type &bird_clock_t.
417 tm_parse_date(char *x
)
423 if (sscanf(x
, "%d-%d-%d%n", &tm
.tm_mday
, &tm
.tm_mon
, &tm
.tm_year
, &n
) != 3 || x
[n
])
427 tm
.tm_hour
= tm
.tm_min
= tm
.tm_sec
= 0;
429 if (t
== (time_t) -1)
435 tm_format_reltime(char *x
, struct tm
*tm
, bird_clock_t delta
)
437 static char *month_names
[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
438 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
441 bsprintf(x
, "%02d:%02d", tm
->tm_hour
, tm
->tm_min
);
442 else if (delta
< 360*86400)
443 bsprintf(x
, "%s%02d", month_names
[tm
->tm_mon
], tm
->tm_mday
);
445 bsprintf(x
, "%d", tm
->tm_year
+1900);
448 #include "conf/conf.h"
451 * tm_format_datetime - convert date and time to textual representation
452 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
453 * @fmt_spec: specification of resulting textual representation of the time
456 * This function formats the given relative time value @t to a textual
457 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
460 tm_format_datetime(char *x
, struct timeformat
*fmt_spec
, bird_clock_t t
)
462 const char *fmt_used
;
464 bird_clock_t delta
= now
- t
;
465 t
= now_real
- delta
;
468 if (fmt_spec
->fmt1
== NULL
)
469 return tm_format_reltime(x
, tm
, delta
);
471 if ((fmt_spec
->limit
== 0) || (delta
< fmt_spec
->limit
))
472 fmt_used
= fmt_spec
->fmt1
;
474 fmt_used
= fmt_spec
->fmt2
;
476 int rv
= strftime(x
, TM_DATETIME_BUFFER_SIZE
, fmt_used
, tm
);
477 if (((rv
== 0) && fmt_used
[0]) || (rv
== TM_DATETIME_BUFFER_SIZE
))
478 strcpy(x
, "<too-long>");
485 * Socket resources represent network connections. Their data structure (&socket)
486 * contains a lot of fields defining the exact type of the socket, the local and
487 * remote addresses and ports, pointers to socket buffers and finally pointers to
488 * hook functions to be called when new data have arrived to the receive buffer
489 * (@rx_hook), when the contents of the transmit buffer have been transmitted
490 * (@tx_hook) and when an error or connection close occurs (@err_hook).
492 * Freeing of sockets from inside socket hooks is perfectly safe.
496 #define SOL_IP IPPROTO_IP
500 #define SOL_IPV6 IPPROTO_IPV6
504 #define SOL_ICMPV6 IPPROTO_ICMPV6
509 * Sockaddr helper functions
512 static inline int UNUSED
sockaddr_length(int af
)
513 { return (af
== AF_INET
) ? sizeof(struct sockaddr_in
) : sizeof(struct sockaddr_in6
); }
516 sockaddr_fill4(struct sockaddr_in
*sa
, ip_addr a
, uint port
)
518 memset(sa
, 0, sizeof(struct sockaddr_in
));
519 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
520 sa
->sin_len
= sizeof(struct sockaddr_in
);
522 sa
->sin_family
= AF_INET
;
523 sa
->sin_port
= htons(port
);
524 sa
->sin_addr
= ipa_to_in4(a
);
528 sockaddr_fill6(struct sockaddr_in6
*sa
, ip_addr a
, struct iface
*ifa
, uint port
)
530 memset(sa
, 0, sizeof(struct sockaddr_in6
));
532 sa
->sin6_len
= sizeof(struct sockaddr_in6
);
534 sa
->sin6_family
= AF_INET6
;
535 sa
->sin6_port
= htons(port
);
536 sa
->sin6_flowinfo
= 0;
537 sa
->sin6_addr
= ipa_to_in6(a
);
539 if (ifa
&& ipa_is_link_local(a
))
540 sa
->sin6_scope_id
= ifa
->index
;
544 sockaddr_fill(sockaddr
*sa
, int af
, ip_addr a
, struct iface
*ifa
, uint port
)
547 sockaddr_fill4((struct sockaddr_in
*) sa
, a
, port
);
548 else if (af
== AF_INET6
)
549 sockaddr_fill6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
555 sockaddr_read4(struct sockaddr_in
*sa
, ip_addr
*a
, uint
*port
)
557 *port
= ntohs(sa
->sin_port
);
558 *a
= ipa_from_in4(sa
->sin_addr
);
562 sockaddr_read6(struct sockaddr_in6
*sa
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
564 *port
= ntohs(sa
->sin6_port
);
565 *a
= ipa_from_in6(sa
->sin6_addr
);
567 if (ifa
&& ipa_is_link_local(*a
))
568 *ifa
= if_find_by_index(sa
->sin6_scope_id
);
572 sockaddr_read(sockaddr
*sa
, int af
, ip_addr
*a
, struct iface
**ifa
, uint
*port
)
574 if (sa
->sa
.sa_family
!= af
)
578 sockaddr_read4((struct sockaddr_in
*) sa
, a
, port
);
579 else if (af
== AF_INET6
)
580 sockaddr_read6((struct sockaddr_in6
*) sa
, a
, ifa
, port
);
594 * IPv6 multicast syscalls
597 /* Fortunately standardized in RFC 3493 */
599 #define INIT_MREQ6(maddr,ifa) \
600 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
603 sk_setup_multicast6(sock
*s
)
605 int index
= s
->iface
->index
;
609 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_IF
, &index
, sizeof(index
)) < 0)
610 ERR("IPV6_MULTICAST_IF");
612 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
613 ERR("IPV6_MULTICAST_HOPS");
615 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_MULTICAST_LOOP
, &n
, sizeof(n
)) < 0)
616 ERR("IPV6_MULTICAST_LOOP");
622 sk_join_group6(sock
*s
, ip_addr maddr
)
624 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
626 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_JOIN_GROUP
, &mr
, sizeof(mr
)) < 0)
627 ERR("IPV6_JOIN_GROUP");
633 sk_leave_group6(sock
*s
, ip_addr maddr
)
635 struct ipv6_mreq mr
= INIT_MREQ6(maddr
, s
->iface
);
637 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_LEAVE_GROUP
, &mr
, sizeof(mr
)) < 0)
638 ERR("IPV6_LEAVE_GROUP");
645 * IPv6 packet control messages
648 /* Also standardized, in RFC 3542 */
651 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
652 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
653 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
654 * RFC and we use IPV6_PKTINFO.
656 #ifndef IPV6_RECVPKTINFO
657 #define IPV6_RECVPKTINFO IPV6_PKTINFO
660 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
662 #ifndef IPV6_RECVHOPLIMIT
663 #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
667 #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
668 #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
671 sk_request_cmsg6_pktinfo(sock
*s
)
675 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVPKTINFO
, &y
, sizeof(y
)) < 0)
676 ERR("IPV6_RECVPKTINFO");
682 sk_request_cmsg6_ttl(sock
*s
)
686 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_RECVHOPLIMIT
, &y
, sizeof(y
)) < 0)
687 ERR("IPV6_RECVHOPLIMIT");
693 sk_process_cmsg6_pktinfo(sock
*s
, struct cmsghdr
*cm
)
695 if (cm
->cmsg_type
== IPV6_PKTINFO
)
697 struct in6_pktinfo
*pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
698 s
->laddr
= ipa_from_in6(pi
->ipi6_addr
);
699 s
->lifindex
= pi
->ipi6_ifindex
;
704 sk_process_cmsg6_ttl(sock
*s
, struct cmsghdr
*cm
)
706 if (cm
->cmsg_type
== IPV6_HOPLIMIT
)
707 s
->rcv_ttl
= * (int *) CMSG_DATA(cm
);
711 sk_prepare_cmsgs6(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
714 struct in6_pktinfo
*pi
;
717 msg
->msg_control
= cbuf
;
718 msg
->msg_controllen
= cbuflen
;
720 cm
= CMSG_FIRSTHDR(msg
);
721 cm
->cmsg_level
= SOL_IPV6
;
722 cm
->cmsg_type
= IPV6_PKTINFO
;
723 cm
->cmsg_len
= CMSG_LEN(sizeof(*pi
));
724 controllen
+= CMSG_SPACE(sizeof(*pi
));
726 pi
= (struct in6_pktinfo
*) CMSG_DATA(cm
);
727 pi
->ipi6_ifindex
= s
->iface
? s
->iface
->index
: 0;
728 pi
->ipi6_addr
= ipa_to_in6(s
->saddr
);
730 msg
->msg_controllen
= controllen
;
735 * Miscellaneous socket syscalls
739 sk_set_ttl4(sock
*s
, int ttl
)
741 if (setsockopt(s
->fd
, SOL_IP
, IP_TTL
, &ttl
, sizeof(ttl
)) < 0)
748 sk_set_ttl6(sock
*s
, int ttl
)
750 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_UNICAST_HOPS
, &ttl
, sizeof(ttl
)) < 0)
751 ERR("IPV6_UNICAST_HOPS");
757 sk_set_tos4(sock
*s
, int tos
)
759 if (setsockopt(s
->fd
, SOL_IP
, IP_TOS
, &tos
, sizeof(tos
)) < 0)
766 sk_set_tos6(sock
*s
, int tos
)
768 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_TCLASS
, &tos
, sizeof(tos
)) < 0)
775 sk_set_high_port(sock
*s UNUSED
)
777 /* Port range setting is optional, ignore it if not supported */
782 int range
= IP_PORTRANGE_HIGH
;
783 if (setsockopt(s
->fd
, SOL_IP
, IP_PORTRANGE
, &range
, sizeof(range
)) < 0)
788 #ifdef IPV6_PORTRANGE
791 int range
= IPV6_PORTRANGE_HIGH
;
792 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_PORTRANGE
, &range
, sizeof(range
)) < 0)
793 ERR("IPV6_PORTRANGE");
801 sk_skip_ip_header(byte
*pkt
, int *len
)
803 if ((*len
< 20) || ((*pkt
& 0xf0) != 0x40))
806 int hlen
= (*pkt
& 0x0f) * 4;
807 if ((hlen
< 20) || (hlen
> *len
))
815 sk_rx_buffer(sock
*s
, int *len
)
817 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
))
818 return sk_skip_ip_header(s
->rbuf
, len
);
825 * Public socket functions
829 * sk_setup_multicast - enable multicast for given socket
832 * Prepare transmission of multicast packets for given datagram socket.
833 * The socket must have defined @iface.
835 * Result: 0 for success, -1 for an error.
839 sk_setup_multicast(sock
*s
)
844 return sk_setup_multicast4(s
);
846 return sk_setup_multicast6(s
);
850 * sk_join_group - join multicast group for given socket
852 * @maddr: multicast address
854 * Join multicast group for given datagram socket and associated interface.
855 * The socket must have defined @iface.
857 * Result: 0 for success, -1 for an error.
861 sk_join_group(sock
*s
, ip_addr maddr
)
864 return sk_join_group4(s
, maddr
);
866 return sk_join_group6(s
, maddr
);
870 * sk_leave_group - leave multicast group for given socket
872 * @maddr: multicast address
874 * Leave multicast group for given datagram socket and associated interface.
875 * The socket must have defined @iface.
877 * Result: 0 for success, -1 for an error.
881 sk_leave_group(sock
*s
, ip_addr maddr
)
884 return sk_leave_group4(s
, maddr
);
886 return sk_leave_group6(s
, maddr
);
890 * sk_setup_broadcast - enable broadcast for given socket
893 * Allow reception and transmission of broadcast packets for given datagram
894 * socket. The socket must have defined @iface. For transmission, packets should
895 * be send to @brd address of @iface.
897 * Result: 0 for success, -1 for an error.
901 sk_setup_broadcast(sock
*s
)
905 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BROADCAST
, &y
, sizeof(y
)) < 0)
912 * sk_set_ttl - set transmit TTL for given socket
916 * Set TTL for already opened connections when TTL was not set before. Useful
917 * for accepted connections when different ones should have different TTL.
919 * Result: 0 for success, -1 for an error.
923 sk_set_ttl(sock
*s
, int ttl
)
928 return sk_set_ttl4(s
, ttl
);
930 return sk_set_ttl6(s
, ttl
);
934 * sk_set_min_ttl - set minimal accepted TTL for given socket
938 * Set minimal accepted TTL for given socket. Can be used for TTL security.
941 * Result: 0 for success, -1 for an error.
945 sk_set_min_ttl(sock
*s
, int ttl
)
948 return sk_set_min_ttl4(s
, ttl
);
950 return sk_set_min_ttl6(s
, ttl
);
955 * sk_set_md5_auth - add / remove MD5 security association for given socket
957 * @local: IP address of local side
958 * @remote: IP address of remote side
959 * @ifa: Interface for link-local IP address
960 * @passwd: Password used for MD5 authentication
961 * @setkey: Update also system SA/SP database
963 * In TCP MD5 handling code in kernel, there is a set of security associations
964 * used for choosing password and other authentication parameters according to
965 * the local and remote address. This function is useful for listening socket,
966 * for active sockets it may be enough to set s->password field.
968 * When called with passwd != NULL, the new pair is added,
969 * When called with passwd == NULL, the existing pair is removed.
971 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
972 * stored in global SA/SP database (but the behavior also must be enabled on
973 * per-socket basis). In case of multiple sockets to the same neighbor, the
974 * socket-specific state must be configured for each socket while global state
975 * just once per src-dst pair. The @setkey argument controls whether the global
976 * state (SA/SP database) is also updated.
978 * Result: 0 for success, -1 for an error.
982 sk_set_md5_auth(sock
*s
, ip_addr local
, ip_addr remote
, struct iface
*ifa
, char *passwd
, int setkey
)
987 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
991 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
992 * kernel will automatically fill it for outgoing packets and check it for
993 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
994 * known to the kernel.
996 * Result: 0 for success, -1 for an error.
1000 sk_set_ipv6_checksum(sock
*s
, int offset
)
1002 if (setsockopt(s
->fd
, SOL_IPV6
, IPV6_CHECKSUM
, &offset
, sizeof(offset
)) < 0)
1003 ERR("IPV6_CHECKSUM");
1009 sk_set_icmp6_filter(sock
*s
, int p1
, int p2
)
1011 /* a bit of lame interface, but it is here only for Radv */
1012 struct icmp6_filter f
;
1014 ICMP6_FILTER_SETBLOCKALL(&f
);
1015 ICMP6_FILTER_SETPASS(p1
, &f
);
1016 ICMP6_FILTER_SETPASS(p2
, &f
);
1018 if (setsockopt(s
->fd
, SOL_ICMPV6
, ICMP6_FILTER
, &f
, sizeof(f
)) < 0)
1019 ERR("ICMP6_FILTER");
1025 sk_log_error(sock
*s
, const char *p
)
1027 log(L_ERR
"%s: Socket error: %s%#m", p
, s
->err
);
1032 * Actual struct birdsock code
1035 static list sock_list
;
1036 static struct birdsock
*current_sock
;
1037 static struct birdsock
*stored_sock
;
1039 static inline sock
*
1042 if (!s
->n
.next
->next
)
1045 return SKIP_BACK(sock
, n
, s
->n
.next
);
1049 sk_alloc_bufs(sock
*s
)
1051 if (!s
->rbuf
&& s
->rbsize
)
1052 s
->rbuf
= s
->rbuf_alloc
= xmalloc(s
->rbsize
);
1054 if (!s
->tbuf
&& s
->tbsize
)
1055 s
->tbuf
= s
->tbuf_alloc
= xmalloc(s
->tbsize
);
1056 s
->tpos
= s
->ttx
= s
->tbuf
;
1060 sk_free_bufs(sock
*s
)
1064 xfree(s
->rbuf_alloc
);
1065 s
->rbuf
= s
->rbuf_alloc
= NULL
;
1069 xfree(s
->tbuf_alloc
);
1070 s
->tbuf
= s
->tbuf_alloc
= NULL
;
1075 sk_free(resource
*r
)
1077 sock
*s
= (sock
*) r
;
1084 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1085 if (s
->flags
& SKF_THREAD
)
1088 if (s
== current_sock
)
1089 current_sock
= sk_next(s
);
1090 if (s
== stored_sock
)
1091 stored_sock
= sk_next(s
);
1097 sk_set_rbsize(sock
*s
, uint val
)
1099 ASSERT(s
->rbuf_alloc
== s
->rbuf
);
1101 if (s
->rbsize
== val
)
1105 xfree(s
->rbuf_alloc
);
1106 s
->rbuf_alloc
= xmalloc(val
);
1107 s
->rpos
= s
->rbuf
= s
->rbuf_alloc
;
1111 sk_set_tbsize(sock
*s
, uint val
)
1113 ASSERT(s
->tbuf_alloc
== s
->tbuf
);
1115 if (s
->tbsize
== val
)
1118 byte
*old_tbuf
= s
->tbuf
;
1121 s
->tbuf
= s
->tbuf_alloc
= xrealloc(s
->tbuf_alloc
, val
);
1122 s
->tpos
= s
->tbuf
+ (s
->tpos
- old_tbuf
);
1123 s
->ttx
= s
->tbuf
+ (s
->ttx
- old_tbuf
);
1127 sk_set_tbuf(sock
*s
, void *tbuf
)
1129 s
->tbuf
= tbuf
?: s
->tbuf_alloc
;
1130 s
->ttx
= s
->tpos
= s
->tbuf
;
1134 sk_reallocate(sock
*s
)
1141 sk_dump(resource
*r
)
1143 sock
*s
= (sock
*) r
;
1144 static char *sk_type_names
[] = { "TCP<", "TCP>", "TCP", "UDP", NULL
, "IP", NULL
, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1146 debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1147 sk_type_names
[s
->type
],
1155 s
->iface
? s
->iface
->name
: "none");
1158 static struct resclass sk_class
= {
1168 * sk_new - create a socket
1171 * This function creates a new socket resource. If you want to use it,
1172 * you need to fill in all the required fields of the structure and
1173 * call sk_open() to do the actual opening of the socket.
1175 * The real function name is sock_new(), sk_new() is a macro wrapper
1176 * to avoid collision with OpenSSL.
1181 sock
*s
= ralloc(p
, &sk_class
);
1183 // s->saddr = s->daddr = IPA_NONE;
1184 s
->tos
= s
->priority
= s
->ttl
= -1;
1195 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1201 if (ipa_nonzero(s
->saddr
) && !(s
->flags
& SKF_BIND
))
1202 s
->flags
|= SKF_PKTINFO
;
1204 #ifdef CONFIG_USE_HDRINCL
1205 if (sk_is_ipv4(s
) && (s
->type
== SK_IP
) && (s
->flags
& SKF_PKTINFO
))
1207 s
->flags
&= ~SKF_PKTINFO
;
1208 s
->flags
|= SKF_HDRINCL
;
1209 if (setsockopt(fd
, SOL_IP
, IP_HDRINCL
, &y
, sizeof(y
)) < 0)
1214 if (s
->vrf
&& !s
->iface
)
1216 /* Bind socket to associated VRF interface.
1217 This is Linux-specific, but so is SO_BINDTODEVICE. */
1218 #ifdef SO_BINDTODEVICE
1219 struct ifreq ifr
= {};
1220 strcpy(ifr
.ifr_name
, s
->vrf
->name
);
1221 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
1222 ERR("SO_BINDTODEVICE");
1228 #ifdef SO_BINDTODEVICE
1229 struct ifreq ifr
= {};
1230 strcpy(ifr
.ifr_name
, s
->iface
->name
);
1231 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
, &ifr
, sizeof(ifr
)) < 0)
1232 ERR("SO_BINDTODEVICE");
1235 #ifdef CONFIG_UNIX_DONTROUTE
1236 if (setsockopt(s
->fd
, SOL_SOCKET
, SO_DONTROUTE
, &y
, sizeof(y
)) < 0)
1237 ERR("SO_DONTROUTE");
1243 if (s
->flags
& SKF_LADDR_RX
)
1244 if (sk_request_cmsg4_pktinfo(s
) < 0)
1247 if (s
->flags
& SKF_TTL_RX
)
1248 if (sk_request_cmsg4_ttl(s
) < 0)
1251 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1252 if (sk_disable_mtu_disc4(s
) < 0)
1256 if (sk_set_ttl4(s
, s
->ttl
) < 0)
1260 if (sk_set_tos4(s
, s
->tos
) < 0)
1266 if (s
->flags
& SKF_V6ONLY
)
1267 if (setsockopt(fd
, SOL_IPV6
, IPV6_V6ONLY
, &y
, sizeof(y
)) < 0)
1270 if (s
->flags
& SKF_LADDR_RX
)
1271 if (sk_request_cmsg6_pktinfo(s
) < 0)
1274 if (s
->flags
& SKF_TTL_RX
)
1275 if (sk_request_cmsg6_ttl(s
) < 0)
1278 if ((s
->type
== SK_UDP
) || (s
->type
== SK_IP
))
1279 if (sk_disable_mtu_disc6(s
) < 0)
1283 if (sk_set_ttl6(s
, s
->ttl
) < 0)
1287 if (sk_set_tos6(s
, s
->tos
) < 0)
1291 /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
1292 if (s
->priority
>= 0)
1293 if (sk_set_priority(s
, s
->priority
) < 0)
1302 add_tail(&sock_list
, &s
->n
);
1306 sk_tcp_connected(sock
*s
)
1309 int sa_len
= sizeof(sa
);
1311 if ((getsockname(s
->fd
, &sa
.sa
, &sa_len
) < 0) ||
1312 (sockaddr_read(&sa
, s
->af
, &s
->saddr
, &s
->iface
, &s
->sport
) < 0))
1313 log(L_WARN
"SOCK: Cannot get local IP address for TCP>");
1321 sk_passive_connected(sock
*s
, int type
)
1323 sockaddr loc_sa
, rem_sa
;
1324 int loc_sa_len
= sizeof(loc_sa
);
1325 int rem_sa_len
= sizeof(rem_sa
);
1327 int fd
= accept(s
->fd
, ((type
== SK_TCP
) ? &rem_sa
.sa
: NULL
), &rem_sa_len
);
1330 if ((errno
!= EINTR
) && (errno
!= EAGAIN
))
1331 s
->err_hook(s
, errno
);
1335 sock
*t
= sk_new(s
->pool
);
1341 t
->rbsize
= s
->rbsize
;
1342 t
->tbsize
= s
->tbsize
;
1346 if ((getsockname(fd
, &loc_sa
.sa
, &loc_sa_len
) < 0) ||
1347 (sockaddr_read(&loc_sa
, s
->af
, &t
->saddr
, &t
->iface
, &t
->sport
) < 0))
1348 log(L_WARN
"SOCK: Cannot get local IP address for TCP<");
1350 if (sockaddr_read(&rem_sa
, s
->af
, &t
->daddr
, &t
->iface
, &t
->dport
) < 0)
1351 log(L_WARN
"SOCK: Cannot get remote IP address for TCP<");
1354 if (sk_setup(t
) < 0)
1356 /* FIXME: Call err_hook instead ? */
1357 log(L_ERR
"SOCK: Incoming connection: %s%#m", t
->err
);
1359 /* FIXME: handle it better in rfree() */
1373 * sk_open - open a socket
1376 * This function takes a socket resource created by sk_new() and
1377 * initialized by the user and binds a corresponding network connection
1380 * Result: 0 for success, -1 for an error.
1389 ip_addr bind_addr
= IPA_NONE
;
1395 s
->ttx
= ""; /* Force s->ttx != s->tpos */
1397 case SK_TCP_PASSIVE
:
1398 fd
= socket(af
, SOCK_STREAM
, IPPROTO_TCP
);
1399 bind_port
= s
->sport
;
1400 bind_addr
= s
->saddr
;
1401 do_bind
= bind_port
|| ipa_nonzero(bind_addr
);
1405 fd
= socket(af
, SOCK_DGRAM
, IPPROTO_UDP
);
1406 bind_port
= s
->sport
;
1407 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1412 fd
= socket(af
, SOCK_RAW
, s
->dport
);
1414 bind_addr
= (s
->flags
& SKF_BIND
) ? s
->saddr
: IPA_NONE
;
1415 do_bind
= ipa_nonzero(bind_addr
);
1424 bug("sk_open() called for invalid sock type %d", s
->type
);
1433 if (sk_setup(s
) < 0)
1442 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &y
, sizeof(y
)) < 0)
1443 ERR2("SO_REUSEADDR");
1445 #ifdef CONFIG_NO_IFACE_BIND
1446 /* Workaround missing ability to bind to an iface */
1447 if ((s
->type
== SK_UDP
) && s
->iface
&& ipa_zero(bind_addr
))
1449 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &y
, sizeof(y
)) < 0)
1450 ERR2("SO_REUSEPORT");
1455 if (s
->flags
& SKF_HIGH_PORT
)
1456 if (sk_set_high_port(s
) < 0)
1457 log(L_WARN
"Socket error: %s%#m", s
->err
);
1459 sockaddr_fill(&sa
, af
, bind_addr
, s
->iface
, bind_port
);
1460 if (bind(fd
, &sa
.sa
, SA_LEN(sa
)) < 0)
1465 if (sk_set_md5_auth(s
, s
->saddr
, s
->daddr
, s
->iface
, s
->password
, 0) < 0)
1471 sockaddr_fill(&sa
, af
, s
->daddr
, s
->iface
, s
->dport
);
1472 if (connect(fd
, &sa
.sa
, SA_LEN(sa
)) >= 0)
1473 sk_tcp_connected(s
);
1474 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
&&
1475 errno
!= ECONNREFUSED
&& errno
!= EHOSTUNREACH
&& errno
!= ENETUNREACH
)
1479 case SK_TCP_PASSIVE
:
1480 if (listen(fd
, 8) < 0)
1491 if (!(s
->flags
& SKF_THREAD
))
1502 sk_open_unix(sock
*s
, char *name
)
1504 struct sockaddr_un sa
;
1507 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1509 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1513 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) < 0)
1516 /* Path length checked in test_old_bird() */
1517 sa
.sun_family
= AF_UNIX
;
1518 strcpy(sa
.sun_path
, name
);
1520 if (bind(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) < 0)
1523 if (listen(fd
, 8) < 0)
1532 #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1533 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1534 #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1537 sk_prepare_cmsgs(sock
*s
, struct msghdr
*msg
, void *cbuf
, size_t cbuflen
)
1540 sk_prepare_cmsgs4(s
, msg
, cbuf
, cbuflen
);
1542 sk_prepare_cmsgs6(s
, msg
, cbuf
, cbuflen
);
1546 sk_process_cmsgs(sock
*s
, struct msghdr
*msg
)
1550 s
->laddr
= IPA_NONE
;
1554 for (cm
= CMSG_FIRSTHDR(msg
); cm
!= NULL
; cm
= CMSG_NXTHDR(msg
, cm
))
1556 if ((cm
->cmsg_level
== SOL_IP
) && sk_is_ipv4(s
))
1558 sk_process_cmsg4_pktinfo(s
, cm
);
1559 sk_process_cmsg4_ttl(s
, cm
);
1562 if ((cm
->cmsg_level
== SOL_IPV6
) && sk_is_ipv6(s
))
1564 sk_process_cmsg6_pktinfo(s
, cm
);
1565 sk_process_cmsg6_ttl(s
, cm
);
1574 struct iovec iov
= {s
->tbuf
, s
->tpos
- s
->tbuf
};
1575 byte cmsg_buf
[CMSG_TX_SPACE
];
1578 sockaddr_fill(&dst
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1580 struct msghdr msg
= {
1581 .msg_name
= &dst
.sa
,
1582 .msg_namelen
= SA_LEN(dst
),
1587 #ifdef CONFIG_USE_HDRINCL
1589 struct iovec iov2
[2] = { {hdr
, 20}, iov
};
1591 if (s
->flags
& SKF_HDRINCL
)
1593 sk_prepare_ip_header(s
, hdr
, iov
.iov_len
);
1599 if (s
->flags
& SKF_PKTINFO
)
1600 sk_prepare_cmsgs(s
, &msg
, cmsg_buf
, sizeof(cmsg_buf
));
1602 return sendmsg(s
->fd
, &msg
, 0);
1608 struct iovec iov
= {s
->rbuf
, s
->rbsize
};
1609 byte cmsg_buf
[CMSG_RX_SPACE
];
1612 struct msghdr msg
= {
1613 .msg_name
= &src
.sa
,
1614 .msg_namelen
= sizeof(src
), // XXXX ??
1617 .msg_control
= cmsg_buf
,
1618 .msg_controllen
= sizeof(cmsg_buf
),
1622 int rv
= recvmsg(s
->fd
, &msg
, 0);
1627 // if (cf_type == SK_IP)
1628 // rv = ipv4_skip_header(pbuf, rv);
1631 sockaddr_read(&src
, s
->af
, &s
->faddr
, NULL
, &s
->fport
);
1632 sk_process_cmsgs(s
, &msg
);
1634 if (msg
.msg_flags
& MSG_TRUNC
)
1635 s
->flags
|= SKF_TRUNCATED
;
1637 s
->flags
&= ~SKF_TRUNCATED
;
1643 static inline void reset_tx_buffer(sock
*s
) { s
->ttx
= s
->tpos
= s
->tbuf
; }
1646 sk_maybe_write(sock
*s
)
1655 while (s
->ttx
!= s
->tpos
)
1657 e
= write(s
->fd
, s
->ttx
, s
->tpos
- s
->ttx
);
1661 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1664 /* EPIPE is just a connection close notification during TX */
1665 s
->err_hook(s
, (errno
!= EPIPE
) ? errno
: 0);
1678 if (s
->tbuf
== s
->tpos
)
1685 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1688 s
->err_hook(s
, errno
);
1700 bug("sk_maybe_write: unknown socket type %d", s
->type
);
1705 sk_rx_ready(sock
*s
)
1708 struct pollfd pfd
= { .fd
= s
->fd
};
1709 pfd
.events
|= POLLIN
;
1712 rv
= poll(&pfd
, 1, 0);
1714 if ((rv
< 0) && (errno
== EINTR
|| errno
== EAGAIN
))
1721 * sk_send - send data to a socket
1723 * @len: number of bytes to send
1725 * This function sends @len bytes of data prepared in the
1726 * transmit buffer of the socket @s to the network connection.
1727 * If the packet can be sent immediately, it does so and returns
1728 * 1, else it queues the packet for later processing, returns 0
1729 * and calls the @tx_hook of the socket when the tranmission
1733 sk_send(sock
*s
, unsigned len
)
1736 s
->tpos
= s
->tbuf
+ len
;
1737 return sk_maybe_write(s
);
1741 * sk_send_to - send data to a specific destination
1743 * @len: number of bytes to send
1744 * @addr: IP address to send the packet to
1745 * @port: port to send the packet to
1747 * This is a sk_send() replacement for connection-less packet sockets
1748 * which allows destination of the packet to be chosen dynamically.
1749 * Raw IP sockets should use 0 for @port.
1752 sk_send_to(sock
*s
, unsigned len
, ip_addr addr
, unsigned port
)
1759 s
->tpos
= s
->tbuf
+ len
;
1760 return sk_maybe_write(s
);
1765 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1766 ip_addr saddr, ip_addr daddr, unsigned dport)
1773 s->tpos = s->tbuf + len;
1774 return sk_maybe_write(s);
1778 /* sk_read() and sk_write() are called from BFD's event loop */
1781 sk_read(sock
*s
, int revents
)
1785 case SK_TCP_PASSIVE
:
1786 return sk_passive_connected(s
, SK_TCP
);
1788 case SK_UNIX_PASSIVE
:
1789 return sk_passive_connected(s
, SK_UNIX
);
1794 int c
= read(s
->fd
, s
->rpos
, s
->rbuf
+ s
->rbsize
- s
->rpos
);
1798 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1799 s
->err_hook(s
, errno
);
1800 else if (errno
== EAGAIN
&& !(revents
& POLLIN
))
1802 log(L_ERR
"Got EAGAIN from read when revents=%x (without POLLIN)", revents
);
1811 if (s
->rx_hook(s
, s
->rpos
- s
->rbuf
))
1813 /* We need to be careful since the socket could have been deleted by the hook */
1814 if (current_sock
== s
)
1823 return s
->rx_hook(s
, 0);
1827 int e
= sk_recvmsg(s
);
1831 if (errno
!= EINTR
&& errno
!= EAGAIN
)
1832 s
->err_hook(s
, errno
);
1836 s
->rpos
= s
->rbuf
+ e
;
1851 sockaddr_fill(&sa
, s
->af
, s
->daddr
, s
->iface
, s
->dport
);
1853 if (connect(s
->fd
, &sa
.sa
, SA_LEN(sa
)) >= 0 || errno
== EISCONN
)
1854 sk_tcp_connected(s
);
1855 else if (errno
!= EINTR
&& errno
!= EAGAIN
&& errno
!= EINPROGRESS
)
1856 s
->err_hook(s
, errno
);
1861 if (s
->ttx
!= s
->tpos
&& sk_maybe_write(s
) > 0)
1872 sk_err(sock
*s
, int revents
)
1874 int se
= 0, sse
= sizeof(se
);
1875 if ((s
->type
!= SK_MAGIC
) && (revents
& POLLERR
))
1876 if (getsockopt(s
->fd
, SOL_SOCKET
, SO_ERROR
, &se
, &sse
) < 0)
1878 log(L_ERR
"IO: Socket error: SO_ERROR: %m");
1891 debug("Open sockets:\n");
1892 WALK_LIST(n
, sock_list
)
1894 s
= SKIP_BACK(sock
, n
, n
);
1903 * Internal event log and watchdog
1906 #define EVENT_LOG_LENGTH 32
1908 struct event_log_entry
1916 static struct event_log_entry event_log
[EVENT_LOG_LENGTH
];
1917 static struct event_log_entry
*event_open
;
1918 static int event_log_pos
, event_log_num
, watchdog_active
;
1919 static btime last_time
;
1920 static btime loop_time
;
1923 io_update_time(void)
1928 if (!clock_monotonic_available
)
1932 * This is third time-tracking procedure (after update_times() above and
1933 * times_update() in BFD), dedicated to internal event log and latency
1934 * tracking. Hopefully, we consolidate these sometimes.
1937 rv
= clock_gettime(CLOCK_MONOTONIC
, &ts
);
1939 die("clock_gettime: %m");
1941 last_time
= ((s64
) ts
.tv_sec S
) + (ts
.tv_nsec
/ 1000);
1945 event_open
->duration
= last_time
- event_open
->timestamp
;
1947 if (event_open
->duration
> config
->latency_limit
)
1948 log(L_WARN
"Event 0x%p 0x%p took %d ms",
1949 event_open
->hook
, event_open
->data
, (int) (event_open
->duration TO_MS
));
1956 * io_log_event - mark approaching event into event log
1957 * @hook: event hook address
1958 * @data: event data address
1960 * Store info (hook, data, timestamp) about the following internal event into
1961 * a circular event log (@event_log). When latency tracking is enabled, the log
1962 * entry is kept open (in @event_open) so the duration can be filled later.
1965 io_log_event(void *hook
, void *data
)
1967 if (config
->latency_debug
)
1970 struct event_log_entry
*en
= event_log
+ event_log_pos
;
1974 en
->timestamp
= last_time
;
1979 event_log_pos
%= EVENT_LOG_LENGTH
;
1981 event_open
= config
->latency_debug
? en
: NULL
;
1985 io_close_event(void)
1996 log(L_DEBUG
"Event log:");
1997 for (i
= 0; i
< EVENT_LOG_LENGTH
; i
++)
1999 struct event_log_entry
*en
= event_log
+ (event_log_pos
+ i
) % EVENT_LOG_LENGTH
;
2001 log(L_DEBUG
" Event 0x%p 0x%p at %8d for %d ms", en
->hook
, en
->data
,
2002 (int) ((last_time
- en
->timestamp
) TO_MS
), (int) (en
->duration TO_MS
));
2007 watchdog_sigalrm(int sig UNUSED
)
2009 /* Update last_time and duration, but skip latency check */
2010 config
->latency_limit
= 0xffffffff;
2013 /* We want core dump */
2018 watchdog_start1(void)
2022 loop_time
= last_time
;
2026 watchdog_start(void)
2030 loop_time
= last_time
;
2033 if (config
->watchdog_timeout
)
2035 alarm(config
->watchdog_timeout
);
2036 watchdog_active
= 1;
2045 if (watchdog_active
)
2048 watchdog_active
= 0;
2051 btime duration
= last_time
- loop_time
;
2052 if (duration
> config
->watchdog_warning
)
2053 log(L_WARN
"I/O loop cycle took %d ms for %d events",
2054 (int) (duration TO_MS
), event_log_num
);
2062 volatile int async_config_flag
; /* Asynchronous reconfiguration/dump scheduled */
2063 volatile int async_dump_flag
;
2064 volatile int async_shutdown_flag
;
2069 init_list(&near_timers
);
2070 init_list(&far_timers
);
2071 init_list(&sock_list
);
2072 init_list(&global_event_list
);
2077 srandom((int) now_real
);
2080 static int short_loops
= 0;
2081 #define SHORT_LOOP_MAX 10
2088 int nfds
, events
, pout
;
2092 struct pollfd
*pfd
= xmalloc(fdmax
* sizeof(struct pollfd
));
2097 events
= ev_run_list(&global_event_list
);
2100 tout
= tm_first_shot();
2106 poll_tout
= (events
? 0 : MIN(tout
- now
, 3)) * 1000; /* Time in milliseconds */
2111 WALK_LIST(n
, sock_list
)
2113 pfd
[nfds
] = (struct pollfd
) { .fd
= -1 }; /* everything other set to 0 by this */
2114 s
= SKIP_BACK(sock
, n
, n
);
2117 pfd
[nfds
].fd
= s
->fd
;
2118 pfd
[nfds
].events
|= POLLIN
;
2120 if (s
->tx_hook
&& s
->ttx
!= s
->tpos
)
2122 pfd
[nfds
].fd
= s
->fd
;
2123 pfd
[nfds
].events
|= POLLOUT
;
2125 if (pfd
[nfds
].fd
!= -1)
2136 pfd
= xrealloc(pfd
, fdmax
* sizeof(struct pollfd
));
2141 * Yes, this is racy. But even if the signal comes before this test
2142 * and entering poll(), it gets caught on the next timer tick.
2145 if (async_config_flag
)
2147 io_log_event(async_config
, NULL
);
2149 async_config_flag
= 0;
2152 if (async_dump_flag
)
2154 io_log_event(async_dump
, NULL
);
2156 async_dump_flag
= 0;
2159 if (async_shutdown_flag
)
2161 io_log_event(async_shutdown
, NULL
);
2163 async_shutdown_flag
= 0;
2167 /* And finally enter poll() to find active sockets */
2169 pout
= poll(pfd
, nfds
, poll_tout
);
2174 if (errno
== EINTR
|| errno
== EAGAIN
)
2180 /* guaranteed to be non-empty */
2181 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2183 while (current_sock
)
2185 sock
*s
= current_sock
;
2188 current_sock
= sk_next(s
);
2196 if (s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2200 io_log_event(s
->rx_hook
, s
->data
);
2201 e
= sk_read(s
, pfd
[s
->index
].revents
);
2202 if (s
!= current_sock
)
2205 while (e
&& s
->rx_hook
&& steps
);
2208 if (pfd
[s
->index
].revents
& POLLOUT
)
2212 io_log_event(s
->tx_hook
, s
->data
);
2214 if (s
!= current_sock
)
2219 current_sock
= sk_next(s
);
2224 if (events
&& (short_loops
< SHORT_LOOP_MAX
))
2229 current_sock
= stored_sock
;
2230 if (current_sock
== NULL
)
2231 current_sock
= SKIP_BACK(sock
, n
, HEAD(sock_list
));
2233 while (current_sock
&& count
< MAX_RX_STEPS
)
2235 sock
*s
= current_sock
;
2238 current_sock
= sk_next(s
);
2242 if (!s
->fast_rx
&& (pfd
[s
->index
].revents
& POLLIN
) && s
->rx_hook
)
2245 io_log_event(s
->rx_hook
, s
->data
);
2246 sk_read(s
, pfd
[s
->index
].revents
);
2247 if (s
!= current_sock
)
2251 if (pfd
[s
->index
].revents
& (POLLHUP
| POLLERR
))
2253 sk_err(s
, pfd
[s
->index
].revents
);
2254 if (s
!= current_sock
)
2258 current_sock
= sk_next(s
);
2263 stored_sock
= current_sock
;
2269 test_old_bird(char *path
)
2272 struct sockaddr_un sa
;
2274 fd
= socket(AF_UNIX
, SOCK_STREAM
, 0);
2276 die("Cannot create socket: %m");
2277 if (strlen(path
) >= sizeof(sa
.sun_path
))
2278 die("Socket path too long");
2279 bzero(&sa
, sizeof(sa
));
2280 sa
.sun_family
= AF_UNIX
;
2281 strcpy(sa
.sun_path
, path
);
2282 if (connect(fd
, (struct sockaddr
*) &sa
, SUN_LEN(&sa
)) == 0)
2283 die("I found another BIRD running.");