]> git.ipfire.org Git - thirdparty/bird.git/blob - sysdep/unix/io.c
Merge branch 'master' into mq-filter-stack
[thirdparty/bird.git] / sysdep / unix / io.c
1 /*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
12 #ifndef _GNU_SOURCE
13 #define _GNU_SOURCE
14 #endif
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <time.h>
19 #include <sys/time.h>
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <sys/uio.h>
23 #include <sys/un.h>
24 #include <poll.h>
25 #include <unistd.h>
26 #include <fcntl.h>
27 #include <errno.h>
28 #include <net/if.h>
29 #include <netinet/in.h>
30 #include <netinet/tcp.h>
31 #include <netinet/udp.h>
32 #include <netinet/icmp6.h>
33
34 #include "nest/bird.h"
35 #include "lib/lists.h"
36 #include "lib/resource.h"
37 #include "lib/socket.h"
38 #include "lib/event.h"
39 #include "lib/timer.h"
40 #include "lib/string.h"
41 #include "nest/iface.h"
42 #include "conf/conf.h"
43
44 #include "sysdep/unix/unix.h"
45 #include CONFIG_INCLUDE_SYSIO_H
46
47 /* Maximum number of calls of tx handler for one socket in one
48 * poll iteration. Should be small enough to not monopolize CPU by
49 * one protocol instance.
50 */
51 #define MAX_STEPS 4
52
53 /* Maximum number of calls of rx handler for all sockets in one poll
54 iteration. RX callbacks are often much more costly so we limit
55 this to gen small latencies */
56 #define MAX_RX_STEPS 4
57
58
59 /*
60 * Tracked Files
61 */
62
63 struct rfile {
64 resource r;
65 FILE *f;
66 };
67
68 static void
69 rf_free(resource *r)
70 {
71 struct rfile *a = (struct rfile *) r;
72
73 fclose(a->f);
74 }
75
76 static void
77 rf_dump(resource *r)
78 {
79 struct rfile *a = (struct rfile *) r;
80
81 debug("(FILE *%p)\n", a->f);
82 }
83
84 static struct resclass rf_class = {
85 "FILE",
86 sizeof(struct rfile),
87 rf_free,
88 rf_dump,
89 NULL,
90 NULL
91 };
92
93 struct rfile *
94 rf_open(pool *p, char *name, char *mode)
95 {
96 FILE *f = fopen(name, mode);
97
98 if (!f)
99 return NULL;
100
101 struct rfile *r = ralloc(p, &rf_class);
102 r->f = f;
103 return r;
104 }
105
106 void *
107 rf_file(struct rfile *f)
108 {
109 return f->f;
110 }
111
112 int
113 rf_fileno(struct rfile *f)
114 {
115 return fileno(f->f);
116 }
117
118
119 /*
120 * Time clock
121 */
122
123 btime boot_time;
124
125 void
126 times_init(struct timeloop *loop)
127 {
128 struct timespec ts;
129 int rv;
130
131 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
132 if (rv < 0)
133 die("Monotonic clock is missing");
134
135 if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40)))
136 log(L_WARN "Monotonic clock is crazy");
137
138 loop->last_time = ts.tv_sec S + ts.tv_nsec NS;
139 loop->real_time = 0;
140 }
141
142 void
143 times_update(struct timeloop *loop)
144 {
145 struct timespec ts;
146 int rv;
147
148 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
149 if (rv < 0)
150 die("clock_gettime: %m");
151
152 btime new_time = ts.tv_sec S + ts.tv_nsec NS;
153
154 if (new_time < loop->last_time)
155 log(L_ERR "Monotonic clock is broken");
156
157 loop->last_time = new_time;
158 loop->real_time = 0;
159 }
160
161 void
162 times_update_real_time(struct timeloop *loop)
163 {
164 struct timespec ts;
165 int rv;
166
167 rv = clock_gettime(CLOCK_REALTIME, &ts);
168 if (rv < 0)
169 die("clock_gettime: %m");
170
171 loop->real_time = ts.tv_sec S + ts.tv_nsec NS;
172 }
173
174
175 /**
176 * DOC: Sockets
177 *
178 * Socket resources represent network connections. Their data structure (&socket)
179 * contains a lot of fields defining the exact type of the socket, the local and
180 * remote addresses and ports, pointers to socket buffers and finally pointers to
181 * hook functions to be called when new data have arrived to the receive buffer
182 * (@rx_hook), when the contents of the transmit buffer have been transmitted
183 * (@tx_hook) and when an error or connection close occurs (@err_hook).
184 *
185 * Freeing of sockets from inside socket hooks is perfectly safe.
186 */
187
188 #ifndef SOL_IP
189 #define SOL_IP IPPROTO_IP
190 #endif
191
192 #ifndef SOL_IPV6
193 #define SOL_IPV6 IPPROTO_IPV6
194 #endif
195
196 #ifndef SOL_ICMPV6
197 #define SOL_ICMPV6 IPPROTO_ICMPV6
198 #endif
199
200
201 /*
202 * Sockaddr helper functions
203 */
204
205 static inline int UNUSED sockaddr_length(int af)
206 { return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
207
208 static inline void
209 sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
210 {
211 memset(sa, 0, sizeof(struct sockaddr_in));
212 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
213 sa->sin_len = sizeof(struct sockaddr_in);
214 #endif
215 sa->sin_family = AF_INET;
216 sa->sin_port = htons(port);
217 sa->sin_addr = ipa_to_in4(a);
218 }
219
220 static inline void
221 sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
222 {
223 memset(sa, 0, sizeof(struct sockaddr_in6));
224 #ifdef SIN6_LEN
225 sa->sin6_len = sizeof(struct sockaddr_in6);
226 #endif
227 sa->sin6_family = AF_INET6;
228 sa->sin6_port = htons(port);
229 sa->sin6_flowinfo = 0;
230 sa->sin6_addr = ipa_to_in6(a);
231
232 if (ifa && ipa_is_link_local(a))
233 sa->sin6_scope_id = ifa->index;
234 }
235
236 void
237 sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
238 {
239 if (af == AF_INET)
240 sockaddr_fill4((struct sockaddr_in *) sa, a, port);
241 else if (af == AF_INET6)
242 sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
243 else
244 bug("Unknown AF");
245 }
246
247 static inline void
248 sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
249 {
250 *port = ntohs(sa->sin_port);
251 *a = ipa_from_in4(sa->sin_addr);
252 }
253
254 static inline void
255 sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
256 {
257 *port = ntohs(sa->sin6_port);
258 *a = ipa_from_in6(sa->sin6_addr);
259
260 if (ifa && ipa_is_link_local(*a))
261 *ifa = if_find_by_index(sa->sin6_scope_id);
262 }
263
264 int
265 sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
266 {
267 if (sa->sa.sa_family != af)
268 goto fail;
269
270 if (af == AF_INET)
271 sockaddr_read4((struct sockaddr_in *) sa, a, port);
272 else if (af == AF_INET6)
273 sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
274 else
275 goto fail;
276
277 return 0;
278
279 fail:
280 *a = IPA_NONE;
281 *port = 0;
282 return -1;
283 }
284
285
286 /*
287 * IPv6 multicast syscalls
288 */
289
290 /* Fortunately standardized in RFC 3493 */
291
292 #define INIT_MREQ6(maddr,ifa) \
293 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
294
295 static inline int
296 sk_setup_multicast6(sock *s)
297 {
298 int index = s->iface->index;
299 int ttl = s->ttl;
300 int n = 0;
301
302 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
303 ERR("IPV6_MULTICAST_IF");
304
305 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
306 ERR("IPV6_MULTICAST_HOPS");
307
308 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
309 ERR("IPV6_MULTICAST_LOOP");
310
311 return 0;
312 }
313
314 static inline int
315 sk_join_group6(sock *s, ip_addr maddr)
316 {
317 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
318
319 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
320 ERR("IPV6_JOIN_GROUP");
321
322 return 0;
323 }
324
325 static inline int
326 sk_leave_group6(sock *s, ip_addr maddr)
327 {
328 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
329
330 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
331 ERR("IPV6_LEAVE_GROUP");
332
333 return 0;
334 }
335
336
337 /*
338 * IPv6 packet control messages
339 */
340
341 /* Also standardized, in RFC 3542 */
342
343 /*
344 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
345 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
346 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
347 * RFC and we use IPV6_PKTINFO.
348 */
349 #ifndef IPV6_RECVPKTINFO
350 #define IPV6_RECVPKTINFO IPV6_PKTINFO
351 #endif
352 /*
353 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
354 */
355 #ifndef IPV6_RECVHOPLIMIT
356 #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
357 #endif
358
359
360 #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
361 #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
362
363 static inline int
364 sk_request_cmsg6_pktinfo(sock *s)
365 {
366 int y = 1;
367
368 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
369 ERR("IPV6_RECVPKTINFO");
370
371 return 0;
372 }
373
374 static inline int
375 sk_request_cmsg6_ttl(sock *s)
376 {
377 int y = 1;
378
379 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
380 ERR("IPV6_RECVHOPLIMIT");
381
382 return 0;
383 }
384
385 static inline void
386 sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
387 {
388 if (cm->cmsg_type == IPV6_PKTINFO)
389 {
390 struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
391 s->laddr = ipa_from_in6(pi->ipi6_addr);
392 s->lifindex = pi->ipi6_ifindex;
393 }
394 }
395
396 static inline void
397 sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
398 {
399 if (cm->cmsg_type == IPV6_HOPLIMIT)
400 s->rcv_ttl = * (int *) CMSG_DATA(cm);
401 }
402
403 static inline void
404 sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
405 {
406 struct cmsghdr *cm;
407 struct in6_pktinfo *pi;
408 int controllen = 0;
409
410 msg->msg_control = cbuf;
411 msg->msg_controllen = cbuflen;
412
413 cm = CMSG_FIRSTHDR(msg);
414 cm->cmsg_level = SOL_IPV6;
415 cm->cmsg_type = IPV6_PKTINFO;
416 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
417 controllen += CMSG_SPACE(sizeof(*pi));
418
419 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
420 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
421 pi->ipi6_addr = ipa_to_in6(s->saddr);
422
423 msg->msg_controllen = controllen;
424 }
425
426
427 /*
428 * Miscellaneous socket syscalls
429 */
430
431 static inline int
432 sk_set_ttl4(sock *s, int ttl)
433 {
434 if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
435 ERR("IP_TTL");
436
437 return 0;
438 }
439
440 static inline int
441 sk_set_ttl6(sock *s, int ttl)
442 {
443 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
444 ERR("IPV6_UNICAST_HOPS");
445
446 return 0;
447 }
448
449 static inline int
450 sk_set_tos4(sock *s, int tos)
451 {
452 if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
453 ERR("IP_TOS");
454
455 return 0;
456 }
457
458 static inline int
459 sk_set_tos6(sock *s, int tos)
460 {
461 if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
462 ERR("IPV6_TCLASS");
463
464 return 0;
465 }
466
467 static inline int
468 sk_set_high_port(sock *s UNUSED)
469 {
470 /* Port range setting is optional, ignore it if not supported */
471
472 #ifdef IP_PORTRANGE
473 if (sk_is_ipv4(s))
474 {
475 int range = IP_PORTRANGE_HIGH;
476 if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
477 ERR("IP_PORTRANGE");
478 }
479 #endif
480
481 #ifdef IPV6_PORTRANGE
482 if (sk_is_ipv6(s))
483 {
484 int range = IPV6_PORTRANGE_HIGH;
485 if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
486 ERR("IPV6_PORTRANGE");
487 }
488 #endif
489
490 return 0;
491 }
492
493 static inline byte *
494 sk_skip_ip_header(byte *pkt, int *len)
495 {
496 if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
497 return NULL;
498
499 int hlen = (*pkt & 0x0f) * 4;
500 if ((hlen < 20) || (hlen > *len))
501 return NULL;
502
503 *len -= hlen;
504 return pkt + hlen;
505 }
506
507 byte *
508 sk_rx_buffer(sock *s, int *len)
509 {
510 if (sk_is_ipv4(s) && (s->type == SK_IP))
511 return sk_skip_ip_header(s->rbuf, len);
512 else
513 return s->rbuf;
514 }
515
516
517 /*
518 * Public socket functions
519 */
520
521 /**
522 * sk_setup_multicast - enable multicast for given socket
523 * @s: socket
524 *
525 * Prepare transmission of multicast packets for given datagram socket.
526 * The socket must have defined @iface.
527 *
528 * Result: 0 for success, -1 for an error.
529 */
530
531 int
532 sk_setup_multicast(sock *s)
533 {
534 ASSERT(s->iface);
535
536 if (sk_is_ipv4(s))
537 return sk_setup_multicast4(s);
538 else
539 return sk_setup_multicast6(s);
540 }
541
542 /**
543 * sk_join_group - join multicast group for given socket
544 * @s: socket
545 * @maddr: multicast address
546 *
547 * Join multicast group for given datagram socket and associated interface.
548 * The socket must have defined @iface.
549 *
550 * Result: 0 for success, -1 for an error.
551 */
552
553 int
554 sk_join_group(sock *s, ip_addr maddr)
555 {
556 if (sk_is_ipv4(s))
557 return sk_join_group4(s, maddr);
558 else
559 return sk_join_group6(s, maddr);
560 }
561
562 /**
563 * sk_leave_group - leave multicast group for given socket
564 * @s: socket
565 * @maddr: multicast address
566 *
567 * Leave multicast group for given datagram socket and associated interface.
568 * The socket must have defined @iface.
569 *
570 * Result: 0 for success, -1 for an error.
571 */
572
573 int
574 sk_leave_group(sock *s, ip_addr maddr)
575 {
576 if (sk_is_ipv4(s))
577 return sk_leave_group4(s, maddr);
578 else
579 return sk_leave_group6(s, maddr);
580 }
581
582 /**
583 * sk_setup_broadcast - enable broadcast for given socket
584 * @s: socket
585 *
586 * Allow reception and transmission of broadcast packets for given datagram
587 * socket. The socket must have defined @iface. For transmission, packets should
588 * be send to @brd address of @iface.
589 *
590 * Result: 0 for success, -1 for an error.
591 */
592
593 int
594 sk_setup_broadcast(sock *s)
595 {
596 int y = 1;
597
598 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
599 ERR("SO_BROADCAST");
600
601 return 0;
602 }
603
604 /**
605 * sk_set_ttl - set transmit TTL for given socket
606 * @s: socket
607 * @ttl: TTL value
608 *
609 * Set TTL for already opened connections when TTL was not set before. Useful
610 * for accepted connections when different ones should have different TTL.
611 *
612 * Result: 0 for success, -1 for an error.
613 */
614
615 int
616 sk_set_ttl(sock *s, int ttl)
617 {
618 s->ttl = ttl;
619
620 if (sk_is_ipv4(s))
621 return sk_set_ttl4(s, ttl);
622 else
623 return sk_set_ttl6(s, ttl);
624 }
625
626 /**
627 * sk_set_min_ttl - set minimal accepted TTL for given socket
628 * @s: socket
629 * @ttl: TTL value
630 *
631 * Set minimal accepted TTL for given socket. Can be used for TTL security.
632 * implementations.
633 *
634 * Result: 0 for success, -1 for an error.
635 */
636
637 int
638 sk_set_min_ttl(sock *s, int ttl)
639 {
640 if (sk_is_ipv4(s))
641 return sk_set_min_ttl4(s, ttl);
642 else
643 return sk_set_min_ttl6(s, ttl);
644 }
645
646 #if 0
647 /**
648 * sk_set_md5_auth - add / remove MD5 security association for given socket
649 * @s: socket
650 * @local: IP address of local side
651 * @remote: IP address of remote side
652 * @ifa: Interface for link-local IP address
653 * @passwd: Password used for MD5 authentication
654 * @setkey: Update also system SA/SP database
655 *
656 * In TCP MD5 handling code in kernel, there is a set of security associations
657 * used for choosing password and other authentication parameters according to
658 * the local and remote address. This function is useful for listening socket,
659 * for active sockets it may be enough to set s->password field.
660 *
661 * When called with passwd != NULL, the new pair is added,
662 * When called with passwd == NULL, the existing pair is removed.
663 *
664 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
665 * stored in global SA/SP database (but the behavior also must be enabled on
666 * per-socket basis). In case of multiple sockets to the same neighbor, the
667 * socket-specific state must be configured for each socket while global state
668 * just once per src-dst pair. The @setkey argument controls whether the global
669 * state (SA/SP database) is also updated.
670 *
671 * Result: 0 for success, -1 for an error.
672 */
673
674 int
675 sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
676 { DUMMY; }
677 #endif
678
679 /**
680 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
681 * @s: socket
682 * @offset: offset
683 *
684 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
685 * kernel will automatically fill it for outgoing packets and check it for
686 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
687 * known to the kernel.
688 *
689 * Result: 0 for success, -1 for an error.
690 */
691
692 int
693 sk_set_ipv6_checksum(sock *s, int offset)
694 {
695 if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
696 ERR("IPV6_CHECKSUM");
697
698 return 0;
699 }
700
701 int
702 sk_set_icmp6_filter(sock *s, int p1, int p2)
703 {
704 /* a bit of lame interface, but it is here only for Radv */
705 struct icmp6_filter f;
706
707 ICMP6_FILTER_SETBLOCKALL(&f);
708 ICMP6_FILTER_SETPASS(p1, &f);
709 ICMP6_FILTER_SETPASS(p2, &f);
710
711 if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
712 ERR("ICMP6_FILTER");
713
714 return 0;
715 }
716
717 void
718 sk_log_error(sock *s, const char *p)
719 {
720 log(L_ERR "%s: Socket error: %s%#m", p, s->err);
721 }
722
723
724 /*
725 * Actual struct birdsock code
726 */
727
728 static list sock_list;
729 static struct birdsock *current_sock;
730 static struct birdsock *stored_sock;
731
732 static inline sock *
733 sk_next(sock *s)
734 {
735 if (!s->n.next->next)
736 return NULL;
737 else
738 return SKIP_BACK(sock, n, s->n.next);
739 }
740
741 static void
742 sk_alloc_bufs(sock *s)
743 {
744 if (!s->rbuf && s->rbsize)
745 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
746 s->rpos = s->rbuf;
747 if (!s->tbuf && s->tbsize)
748 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
749 s->tpos = s->ttx = s->tbuf;
750 }
751
752 static void
753 sk_free_bufs(sock *s)
754 {
755 if (s->rbuf_alloc)
756 {
757 xfree(s->rbuf_alloc);
758 s->rbuf = s->rbuf_alloc = NULL;
759 }
760 if (s->tbuf_alloc)
761 {
762 xfree(s->tbuf_alloc);
763 s->tbuf = s->tbuf_alloc = NULL;
764 }
765 }
766
767 #ifdef HAVE_LIBSSH
768 static void
769 sk_ssh_free(sock *s)
770 {
771 struct ssh_sock *ssh = s->ssh;
772
773 if (s->ssh == NULL)
774 return;
775
776 s->ssh = NULL;
777
778 if (ssh->channel)
779 {
780 if (ssh_channel_is_open(ssh->channel))
781 ssh_channel_close(ssh->channel);
782 ssh_channel_free(ssh->channel);
783 ssh->channel = NULL;
784 }
785
786 if (ssh->session)
787 {
788 ssh_disconnect(ssh->session);
789 ssh_free(ssh->session);
790 ssh->session = NULL;
791 }
792 }
793 #endif
794
795 static void
796 sk_free(resource *r)
797 {
798 sock *s = (sock *) r;
799
800 sk_free_bufs(s);
801
802 #ifdef HAVE_LIBSSH
803 if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
804 sk_ssh_free(s);
805 #endif
806
807 if (s->fd < 0)
808 return;
809
810 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
811 if (!(s->flags & SKF_THREAD))
812 {
813 if (s == current_sock)
814 current_sock = sk_next(s);
815 if (s == stored_sock)
816 stored_sock = sk_next(s);
817 rem_node(&s->n);
818 }
819
820 if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
821 close(s->fd);
822
823 s->fd = -1;
824 }
825
826 void
827 sk_set_rbsize(sock *s, uint val)
828 {
829 ASSERT(s->rbuf_alloc == s->rbuf);
830
831 if (s->rbsize == val)
832 return;
833
834 s->rbsize = val;
835 xfree(s->rbuf_alloc);
836 s->rbuf_alloc = xmalloc(val);
837 s->rpos = s->rbuf = s->rbuf_alloc;
838 }
839
840 void
841 sk_set_tbsize(sock *s, uint val)
842 {
843 ASSERT(s->tbuf_alloc == s->tbuf);
844
845 if (s->tbsize == val)
846 return;
847
848 byte *old_tbuf = s->tbuf;
849
850 s->tbsize = val;
851 s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
852 s->tpos = s->tbuf + (s->tpos - old_tbuf);
853 s->ttx = s->tbuf + (s->ttx - old_tbuf);
854 }
855
856 void
857 sk_set_tbuf(sock *s, void *tbuf)
858 {
859 s->tbuf = tbuf ?: s->tbuf_alloc;
860 s->ttx = s->tpos = s->tbuf;
861 }
862
863 void
864 sk_reallocate(sock *s)
865 {
866 sk_free_bufs(s);
867 sk_alloc_bufs(s);
868 }
869
870 static void
871 sk_dump(resource *r)
872 {
873 sock *s = (sock *) r;
874 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
875
876 debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
877 sk_type_names[s->type],
878 s->data,
879 s->saddr,
880 s->sport,
881 s->daddr,
882 s->dport,
883 s->tos,
884 s->ttl,
885 s->iface ? s->iface->name : "none");
886 }
887
888 static struct resclass sk_class = {
889 "Socket",
890 sizeof(sock),
891 sk_free,
892 sk_dump,
893 NULL,
894 NULL
895 };
896
897 /**
898 * sk_new - create a socket
899 * @p: pool
900 *
901 * This function creates a new socket resource. If you want to use it,
902 * you need to fill in all the required fields of the structure and
903 * call sk_open() to do the actual opening of the socket.
904 *
905 * The real function name is sock_new(), sk_new() is a macro wrapper
906 * to avoid collision with OpenSSL.
907 */
908 sock *
909 sock_new(pool *p)
910 {
911 sock *s = ralloc(p, &sk_class);
912 s->pool = p;
913 // s->saddr = s->daddr = IPA_NONE;
914 s->tos = s->priority = s->ttl = -1;
915 s->fd = -1;
916 return s;
917 }
918
919 static int
920 sk_setup(sock *s)
921 {
922 int y = 1;
923 int fd = s->fd;
924
925 if (s->type == SK_SSH_ACTIVE)
926 return 0;
927
928 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
929 ERR("O_NONBLOCK");
930
931 if (!s->af)
932 return 0;
933
934 if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
935 s->flags |= SKF_PKTINFO;
936
937 #ifdef CONFIG_USE_HDRINCL
938 if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
939 {
940 s->flags &= ~SKF_PKTINFO;
941 s->flags |= SKF_HDRINCL;
942 if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
943 ERR("IP_HDRINCL");
944 }
945 #endif
946
947 if (s->vrf && !s->iface)
948 {
949 /* Bind socket to associated VRF interface.
950 This is Linux-specific, but so is SO_BINDTODEVICE. */
951 #ifdef SO_BINDTODEVICE
952 struct ifreq ifr = {};
953 strcpy(ifr.ifr_name, s->vrf->name);
954 if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
955 ERR("SO_BINDTODEVICE");
956 #endif
957 }
958
959 if (s->iface)
960 {
961 #ifdef SO_BINDTODEVICE
962 struct ifreq ifr = {};
963 strcpy(ifr.ifr_name, s->iface->name);
964 if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
965 ERR("SO_BINDTODEVICE");
966 #endif
967
968 #ifdef CONFIG_UNIX_DONTROUTE
969 if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
970 ERR("SO_DONTROUTE");
971 #endif
972 }
973
974 if (sk_is_ipv4(s))
975 {
976 if (s->flags & SKF_LADDR_RX)
977 if (sk_request_cmsg4_pktinfo(s) < 0)
978 return -1;
979
980 if (s->flags & SKF_TTL_RX)
981 if (sk_request_cmsg4_ttl(s) < 0)
982 return -1;
983
984 if ((s->type == SK_UDP) || (s->type == SK_IP))
985 if (sk_disable_mtu_disc4(s) < 0)
986 return -1;
987
988 if (s->ttl >= 0)
989 if (sk_set_ttl4(s, s->ttl) < 0)
990 return -1;
991
992 if (s->tos >= 0)
993 if (sk_set_tos4(s, s->tos) < 0)
994 return -1;
995 }
996
997 if (sk_is_ipv6(s))
998 {
999 if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
1000 if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1001 ERR("IPV6_V6ONLY");
1002
1003 if (s->flags & SKF_LADDR_RX)
1004 if (sk_request_cmsg6_pktinfo(s) < 0)
1005 return -1;
1006
1007 if (s->flags & SKF_TTL_RX)
1008 if (sk_request_cmsg6_ttl(s) < 0)
1009 return -1;
1010
1011 if ((s->type == SK_UDP) || (s->type == SK_IP))
1012 if (sk_disable_mtu_disc6(s) < 0)
1013 return -1;
1014
1015 if (s->ttl >= 0)
1016 if (sk_set_ttl6(s, s->ttl) < 0)
1017 return -1;
1018
1019 if (s->tos >= 0)
1020 if (sk_set_tos6(s, s->tos) < 0)
1021 return -1;
1022 }
1023
1024 /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
1025 if (s->priority >= 0)
1026 if (sk_set_priority(s, s->priority) < 0)
1027 return -1;
1028
1029 return 0;
1030 }
1031
1032 static void
1033 sk_insert(sock *s)
1034 {
1035 add_tail(&sock_list, &s->n);
1036 }
1037
1038 static void
1039 sk_tcp_connected(sock *s)
1040 {
1041 sockaddr sa;
1042 int sa_len = sizeof(sa);
1043
1044 if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1045 (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1046 log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1047
1048 s->type = SK_TCP;
1049 sk_alloc_bufs(s);
1050 s->tx_hook(s);
1051 }
1052
1053 #ifdef HAVE_LIBSSH
1054 static void
1055 sk_ssh_connected(sock *s)
1056 {
1057 sk_alloc_bufs(s);
1058 s->type = SK_SSH;
1059 s->tx_hook(s);
1060 }
1061 #endif
1062
1063 static int
1064 sk_passive_connected(sock *s, int type)
1065 {
1066 sockaddr loc_sa, rem_sa;
1067 int loc_sa_len = sizeof(loc_sa);
1068 int rem_sa_len = sizeof(rem_sa);
1069
1070 int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1071 if (fd < 0)
1072 {
1073 if ((errno != EINTR) && (errno != EAGAIN))
1074 s->err_hook(s, errno);
1075 return 0;
1076 }
1077
1078 sock *t = sk_new(s->pool);
1079 t->type = type;
1080 t->data = s->data;
1081 t->af = s->af;
1082 t->fd = fd;
1083 t->ttl = s->ttl;
1084 t->tos = s->tos;
1085 t->vrf = s->vrf;
1086 t->rbsize = s->rbsize;
1087 t->tbsize = s->tbsize;
1088
1089 if (type == SK_TCP)
1090 {
1091 if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1092 (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1093 log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1094
1095 if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1096 log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1097 }
1098
1099 if (sk_setup(t) < 0)
1100 {
1101 /* FIXME: Call err_hook instead ? */
1102 log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1103
1104 /* FIXME: handle it better in rfree() */
1105 close(t->fd);
1106 t->fd = -1;
1107 rfree(t);
1108 return 1;
1109 }
1110
1111 sk_insert(t);
1112 sk_alloc_bufs(t);
1113 s->rx_hook(t, 0);
1114 return 1;
1115 }
1116
1117 #ifdef HAVE_LIBSSH
1118 /*
1119 * Return SSH_OK or SSH_AGAIN or SSH_ERROR
1120 */
1121 static int
1122 sk_ssh_connect(sock *s)
1123 {
1124 s->fd = ssh_get_fd(s->ssh->session);
1125
1126 /* Big fall thru automata */
1127 switch (s->ssh->state)
1128 {
1129 case SK_SSH_CONNECT:
1130 {
1131 switch (ssh_connect(s->ssh->session))
1132 {
1133 case SSH_AGAIN:
1134 /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
1135 * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
1136 * documented but our code relies on that.
1137 */
1138 return SSH_AGAIN;
1139
1140 case SSH_OK:
1141 break;
1142
1143 default:
1144 return SSH_ERROR;
1145 }
1146 } /* fallthrough */
1147
1148 case SK_SSH_SERVER_KNOWN:
1149 {
1150 s->ssh->state = SK_SSH_SERVER_KNOWN;
1151
1152 if (s->ssh->server_hostkey_path)
1153 {
1154 int server_identity_is_ok = 1;
1155
1156 /* Check server identity */
1157 switch (ssh_is_server_known(s->ssh->session))
1158 {
1159 #define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
1160 case SSH_SERVER_KNOWN_OK:
1161 /* The server is known and has not changed. */
1162 break;
1163
1164 case SSH_SERVER_NOT_KNOWN:
1165 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
1166 break;
1167
1168 case SSH_SERVER_KNOWN_CHANGED:
1169 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
1170 server_identity_is_ok = 0;
1171 break;
1172
1173 case SSH_SERVER_FILE_NOT_FOUND:
1174 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
1175 server_identity_is_ok = 0;
1176 break;
1177
1178 case SSH_SERVER_ERROR:
1179 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
1180 server_identity_is_ok = 0;
1181 break;
1182
1183 case SSH_SERVER_FOUND_OTHER:
1184 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
1185 "It is a possible attack.");
1186 server_identity_is_ok = 0;
1187 break;
1188 }
1189
1190 if (!server_identity_is_ok)
1191 return SSH_ERROR;
1192 }
1193 } /* fallthrough */
1194
1195 case SK_SSH_USERAUTH:
1196 {
1197 s->ssh->state = SK_SSH_USERAUTH;
1198 switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
1199 {
1200 case SSH_AUTH_AGAIN:
1201 return SSH_AGAIN;
1202
1203 case SSH_AUTH_SUCCESS:
1204 break;
1205
1206 default:
1207 return SSH_ERROR;
1208 }
1209 } /* fallthrough */
1210
1211 case SK_SSH_CHANNEL:
1212 {
1213 s->ssh->state = SK_SSH_CHANNEL;
1214 s->ssh->channel = ssh_channel_new(s->ssh->session);
1215 if (s->ssh->channel == NULL)
1216 return SSH_ERROR;
1217 } /* fallthrough */
1218
1219 case SK_SSH_SESSION:
1220 {
1221 s->ssh->state = SK_SSH_SESSION;
1222 switch (ssh_channel_open_session(s->ssh->channel))
1223 {
1224 case SSH_AGAIN:
1225 return SSH_AGAIN;
1226
1227 case SSH_OK:
1228 break;
1229
1230 default:
1231 return SSH_ERROR;
1232 }
1233 } /* fallthrough */
1234
1235 case SK_SSH_SUBSYSTEM:
1236 {
1237 s->ssh->state = SK_SSH_SUBSYSTEM;
1238 if (s->ssh->subsystem)
1239 {
1240 switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
1241 {
1242 case SSH_AGAIN:
1243 return SSH_AGAIN;
1244
1245 case SSH_OK:
1246 break;
1247
1248 default:
1249 return SSH_ERROR;
1250 }
1251 }
1252 } /* fallthrough */
1253
1254 case SK_SSH_ESTABLISHED:
1255 s->ssh->state = SK_SSH_ESTABLISHED;
1256 }
1257
1258 return SSH_OK;
1259 }
1260
1261 /*
1262 * Return file descriptor number if success
1263 * Return -1 if failed
1264 */
1265 static int
1266 sk_open_ssh(sock *s)
1267 {
1268 if (!s->ssh)
1269 bug("sk_open() sock->ssh is not allocated");
1270
1271 ssh_session sess = ssh_new();
1272 if (sess == NULL)
1273 ERR2("Cannot create a ssh session");
1274 s->ssh->session = sess;
1275
1276 const int verbosity = SSH_LOG_NOLOG;
1277 ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
1278 ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
1279 ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
1280 /* TODO: Add SSH_OPTIONS_BINDADDR */
1281 ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
1282
1283 if (s->ssh->server_hostkey_path)
1284 ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
1285
1286 if (s->ssh->client_privkey_path)
1287 ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
1288
1289 ssh_set_blocking(sess, 0);
1290
1291 switch (sk_ssh_connect(s))
1292 {
1293 case SSH_AGAIN:
1294 break;
1295
1296 case SSH_OK:
1297 sk_ssh_connected(s);
1298 break;
1299
1300 case SSH_ERROR:
1301 ERR2(ssh_get_error(sess));
1302 break;
1303 }
1304
1305 return ssh_get_fd(sess);
1306
1307 err:
1308 return -1;
1309 }
1310 #endif
1311
1312 /**
1313 * sk_open - open a socket
1314 * @s: socket
1315 *
1316 * This function takes a socket resource created by sk_new() and
1317 * initialized by the user and binds a corresponding network connection
1318 * to it.
1319 *
1320 * Result: 0 for success, -1 for an error.
1321 */
1322 int
1323 sk_open(sock *s)
1324 {
1325 int af = AF_UNSPEC;
1326 int fd = -1;
1327 int do_bind = 0;
1328 int bind_port = 0;
1329 ip_addr bind_addr = IPA_NONE;
1330 sockaddr sa;
1331
1332 if (s->type <= SK_IP)
1333 {
1334 /*
1335 * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
1336 * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
1337 * But the specifications have to be consistent.
1338 */
1339
1340 switch (s->subtype)
1341 {
1342 case 0:
1343 ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
1344 (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
1345 af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
1346 break;
1347
1348 case SK_IPV4:
1349 ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
1350 ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
1351 af = AF_INET;
1352 break;
1353
1354 case SK_IPV6:
1355 ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
1356 ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
1357 af = AF_INET6;
1358 break;
1359
1360 default:
1361 bug("Invalid subtype %d", s->subtype);
1362 }
1363 }
1364
1365 switch (s->type)
1366 {
1367 case SK_TCP_ACTIVE:
1368 s->ttx = ""; /* Force s->ttx != s->tpos */
1369 /* Fall thru */
1370 case SK_TCP_PASSIVE:
1371 fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1372 bind_port = s->sport;
1373 bind_addr = s->saddr;
1374 do_bind = bind_port || ipa_nonzero(bind_addr);
1375 break;
1376
1377 #ifdef HAVE_LIBSSH
1378 case SK_SSH_ACTIVE:
1379 s->ttx = ""; /* Force s->ttx != s->tpos */
1380 fd = sk_open_ssh(s);
1381 break;
1382 #endif
1383
1384 case SK_UDP:
1385 fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1386 bind_port = s->sport;
1387 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1388 do_bind = 1;
1389 break;
1390
1391 case SK_IP:
1392 fd = socket(af, SOCK_RAW, s->dport);
1393 bind_port = 0;
1394 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1395 do_bind = ipa_nonzero(bind_addr);
1396 break;
1397
1398 case SK_MAGIC:
1399 af = 0;
1400 fd = s->fd;
1401 break;
1402
1403 default:
1404 bug("sk_open() called for invalid sock type %d", s->type);
1405 }
1406
1407 if (fd < 0)
1408 ERR("socket");
1409
1410 s->af = af;
1411 s->fd = fd;
1412
1413 if (sk_setup(s) < 0)
1414 goto err;
1415
1416 if (do_bind)
1417 {
1418 if (bind_port)
1419 {
1420 int y = 1;
1421
1422 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1423 ERR2("SO_REUSEADDR");
1424
1425 #ifdef CONFIG_NO_IFACE_BIND
1426 /* Workaround missing ability to bind to an iface */
1427 if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1428 {
1429 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1430 ERR2("SO_REUSEPORT");
1431 }
1432 #endif
1433 }
1434 else
1435 if (s->flags & SKF_HIGH_PORT)
1436 if (sk_set_high_port(s) < 0)
1437 log(L_WARN "Socket error: %s%#m", s->err);
1438
1439 sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
1440 if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1441 ERR2("bind");
1442 }
1443
1444 if (s->password)
1445 if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
1446 goto err;
1447
1448 switch (s->type)
1449 {
1450 case SK_TCP_ACTIVE:
1451 sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1452 if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1453 sk_tcp_connected(s);
1454 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1455 errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1456 ERR2("connect");
1457 break;
1458
1459 case SK_TCP_PASSIVE:
1460 if (listen(fd, 8) < 0)
1461 ERR2("listen");
1462 break;
1463
1464 case SK_SSH_ACTIVE:
1465 case SK_MAGIC:
1466 break;
1467
1468 default:
1469 sk_alloc_bufs(s);
1470 }
1471
1472 if (!(s->flags & SKF_THREAD))
1473 sk_insert(s);
1474
1475 return 0;
1476
1477 err:
1478 close(fd);
1479 s->fd = -1;
1480 return -1;
1481 }
1482
1483 int
1484 sk_open_unix(sock *s, char *name)
1485 {
1486 struct sockaddr_un sa;
1487 int fd;
1488
1489 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1490
1491 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1492 if (fd < 0)
1493 return -1;
1494
1495 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1496 return -1;
1497
1498 /* Path length checked in test_old_bird() */
1499 sa.sun_family = AF_UNIX;
1500 strcpy(sa.sun_path, name);
1501
1502 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1503 return -1;
1504
1505 if (listen(fd, 8) < 0)
1506 return -1;
1507
1508 s->fd = fd;
1509 sk_insert(s);
1510 return 0;
1511 }
1512
1513
1514 #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1515 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1516 #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1517
1518 static void
1519 sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1520 {
1521 if (sk_is_ipv4(s))
1522 sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1523 else
1524 sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1525 }
1526
1527 static void
1528 sk_process_cmsgs(sock *s, struct msghdr *msg)
1529 {
1530 struct cmsghdr *cm;
1531
1532 s->laddr = IPA_NONE;
1533 s->lifindex = 0;
1534 s->rcv_ttl = -1;
1535
1536 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1537 {
1538 if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1539 {
1540 sk_process_cmsg4_pktinfo(s, cm);
1541 sk_process_cmsg4_ttl(s, cm);
1542 }
1543
1544 if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1545 {
1546 sk_process_cmsg6_pktinfo(s, cm);
1547 sk_process_cmsg6_ttl(s, cm);
1548 }
1549 }
1550 }
1551
1552
1553 static inline int
1554 sk_sendmsg(sock *s)
1555 {
1556 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1557 byte cmsg_buf[CMSG_TX_SPACE];
1558 sockaddr dst;
1559 int flags = 0;
1560
1561 sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
1562
1563 struct msghdr msg = {
1564 .msg_name = &dst.sa,
1565 .msg_namelen = SA_LEN(dst),
1566 .msg_iov = &iov,
1567 .msg_iovlen = 1
1568 };
1569
1570 #ifdef CONFIG_DONTROUTE_UNICAST
1571 /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
1572 cannot use it for other cases (e.g. when TTL security is used). */
1573 if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1))
1574 flags = MSG_DONTROUTE;
1575 #endif
1576
1577 #ifdef CONFIG_USE_HDRINCL
1578 byte hdr[20];
1579 struct iovec iov2[2] = { {hdr, 20}, iov };
1580
1581 if (s->flags & SKF_HDRINCL)
1582 {
1583 sk_prepare_ip_header(s, hdr, iov.iov_len);
1584 msg.msg_iov = iov2;
1585 msg.msg_iovlen = 2;
1586 }
1587 #endif
1588
1589 if (s->flags & SKF_PKTINFO)
1590 sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1591
1592 return sendmsg(s->fd, &msg, flags);
1593 }
1594
1595 static inline int
1596 sk_recvmsg(sock *s)
1597 {
1598 struct iovec iov = {s->rbuf, s->rbsize};
1599 byte cmsg_buf[CMSG_RX_SPACE];
1600 sockaddr src;
1601
1602 struct msghdr msg = {
1603 .msg_name = &src.sa,
1604 .msg_namelen = sizeof(src), // XXXX ??
1605 .msg_iov = &iov,
1606 .msg_iovlen = 1,
1607 .msg_control = cmsg_buf,
1608 .msg_controllen = sizeof(cmsg_buf),
1609 .msg_flags = 0
1610 };
1611
1612 int rv = recvmsg(s->fd, &msg, 0);
1613 if (rv < 0)
1614 return rv;
1615
1616 //ifdef IPV4
1617 // if (cf_type == SK_IP)
1618 // rv = ipv4_skip_header(pbuf, rv);
1619 //endif
1620
1621 sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1622 sk_process_cmsgs(s, &msg);
1623
1624 if (msg.msg_flags & MSG_TRUNC)
1625 s->flags |= SKF_TRUNCATED;
1626 else
1627 s->flags &= ~SKF_TRUNCATED;
1628
1629 return rv;
1630 }
1631
1632
1633 static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1634
1635 static int
1636 sk_maybe_write(sock *s)
1637 {
1638 int e;
1639
1640 switch (s->type)
1641 {
1642 case SK_TCP:
1643 case SK_MAGIC:
1644 case SK_UNIX:
1645 while (s->ttx != s->tpos)
1646 {
1647 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1648
1649 if (e < 0)
1650 {
1651 if (errno != EINTR && errno != EAGAIN)
1652 {
1653 reset_tx_buffer(s);
1654 /* EPIPE is just a connection close notification during TX */
1655 s->err_hook(s, (errno != EPIPE) ? errno : 0);
1656 return -1;
1657 }
1658 return 0;
1659 }
1660 s->ttx += e;
1661 }
1662 reset_tx_buffer(s);
1663 return 1;
1664
1665 #ifdef HAVE_LIBSSH
1666 case SK_SSH:
1667 while (s->ttx != s->tpos)
1668 {
1669 e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
1670
1671 if (e < 0)
1672 {
1673 s->err = ssh_get_error(s->ssh->session);
1674 s->err_hook(s, ssh_get_error_code(s->ssh->session));
1675
1676 reset_tx_buffer(s);
1677 /* EPIPE is just a connection close notification during TX */
1678 s->err_hook(s, (errno != EPIPE) ? errno : 0);
1679 return -1;
1680 }
1681 s->ttx += e;
1682 }
1683 reset_tx_buffer(s);
1684 return 1;
1685 #endif
1686
1687 case SK_UDP:
1688 case SK_IP:
1689 {
1690 if (s->tbuf == s->tpos)
1691 return 1;
1692
1693 e = sk_sendmsg(s);
1694
1695 if (e < 0)
1696 {
1697 if (errno != EINTR && errno != EAGAIN)
1698 {
1699 reset_tx_buffer(s);
1700 s->err_hook(s, errno);
1701 return -1;
1702 }
1703
1704 if (!s->tx_hook)
1705 reset_tx_buffer(s);
1706 return 0;
1707 }
1708 reset_tx_buffer(s);
1709 return 1;
1710 }
1711
1712 default:
1713 bug("sk_maybe_write: unknown socket type %d", s->type);
1714 }
1715 }
1716
1717 int
1718 sk_rx_ready(sock *s)
1719 {
1720 int rv;
1721 struct pollfd pfd = { .fd = s->fd };
1722 pfd.events |= POLLIN;
1723
1724 redo:
1725 rv = poll(&pfd, 1, 0);
1726
1727 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1728 goto redo;
1729
1730 return rv;
1731 }
1732
1733 /**
1734 * sk_send - send data to a socket
1735 * @s: socket
1736 * @len: number of bytes to send
1737 *
1738 * This function sends @len bytes of data prepared in the
1739 * transmit buffer of the socket @s to the network connection.
1740 * If the packet can be sent immediately, it does so and returns
1741 * 1, else it queues the packet for later processing, returns 0
1742 * and calls the @tx_hook of the socket when the tranmission
1743 * takes place.
1744 */
1745 int
1746 sk_send(sock *s, unsigned len)
1747 {
1748 s->ttx = s->tbuf;
1749 s->tpos = s->tbuf + len;
1750 return sk_maybe_write(s);
1751 }
1752
1753 /**
1754 * sk_send_to - send data to a specific destination
1755 * @s: socket
1756 * @len: number of bytes to send
1757 * @addr: IP address to send the packet to
1758 * @port: port to send the packet to
1759 *
1760 * This is a sk_send() replacement for connection-less packet sockets
1761 * which allows destination of the packet to be chosen dynamically.
1762 * Raw IP sockets should use 0 for @port.
1763 */
1764 int
1765 sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1766 {
1767 s->daddr = addr;
1768 if (port)
1769 s->dport = port;
1770
1771 s->ttx = s->tbuf;
1772 s->tpos = s->tbuf + len;
1773 return sk_maybe_write(s);
1774 }
1775
1776 /*
1777 int
1778 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1779 ip_addr saddr, ip_addr daddr, unsigned dport)
1780 {
1781 s->iface = ifa;
1782 s->saddr = saddr;
1783 s->daddr = daddr;
1784 s->dport = dport;
1785 s->ttx = s->tbuf;
1786 s->tpos = s->tbuf + len;
1787 return sk_maybe_write(s);
1788 }
1789 */
1790
1791 static void
1792 call_rx_hook(sock *s, int size)
1793 {
1794 if (s->rx_hook(s, size))
1795 {
1796 /* We need to be careful since the socket could have been deleted by the hook */
1797 if (current_sock == s)
1798 s->rpos = s->rbuf;
1799 }
1800 }
1801
1802 #ifdef HAVE_LIBSSH
1803 static int
1804 sk_read_ssh(sock *s)
1805 {
1806 ssh_channel rchans[2] = { s->ssh->channel, NULL };
1807 struct timeval timev = { 1, 0 };
1808
1809 if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
1810 return 1; /* Try again */
1811
1812 if (ssh_channel_is_eof(s->ssh->channel) != 0)
1813 {
1814 /* The remote side is closing the connection */
1815 s->err_hook(s, 0);
1816 return 0;
1817 }
1818
1819 if (rchans[0] == NULL)
1820 return 0; /* No data is available on the socket */
1821
1822 const uint used_bytes = s->rpos - s->rbuf;
1823 const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
1824 if (read_bytes > 0)
1825 {
1826 /* Received data */
1827 s->rpos += read_bytes;
1828 call_rx_hook(s, used_bytes + read_bytes);
1829 return 1;
1830 }
1831 else if (read_bytes == 0)
1832 {
1833 if (ssh_channel_is_eof(s->ssh->channel) != 0)
1834 {
1835 /* The remote side is closing the connection */
1836 s->err_hook(s, 0);
1837 }
1838 }
1839 else
1840 {
1841 s->err = ssh_get_error(s->ssh->session);
1842 s->err_hook(s, ssh_get_error_code(s->ssh->session));
1843 }
1844
1845 return 0; /* No data is available on the socket */
1846 }
1847 #endif
1848
1849 /* sk_read() and sk_write() are called from BFD's event loop */
1850
1851 int
1852 sk_read(sock *s, int revents)
1853 {
1854 switch (s->type)
1855 {
1856 case SK_TCP_PASSIVE:
1857 return sk_passive_connected(s, SK_TCP);
1858
1859 case SK_UNIX_PASSIVE:
1860 return sk_passive_connected(s, SK_UNIX);
1861
1862 case SK_TCP:
1863 case SK_UNIX:
1864 {
1865 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1866
1867 if (c < 0)
1868 {
1869 if (errno != EINTR && errno != EAGAIN)
1870 s->err_hook(s, errno);
1871 else if (errno == EAGAIN && !(revents & POLLIN))
1872 {
1873 log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
1874 s->err_hook(s, 0);
1875 }
1876 }
1877 else if (!c)
1878 s->err_hook(s, 0);
1879 else
1880 {
1881 s->rpos += c;
1882 call_rx_hook(s, s->rpos - s->rbuf);
1883 return 1;
1884 }
1885 return 0;
1886 }
1887
1888 #ifdef HAVE_LIBSSH
1889 case SK_SSH:
1890 return sk_read_ssh(s);
1891 #endif
1892
1893 case SK_MAGIC:
1894 return s->rx_hook(s, 0);
1895
1896 default:
1897 {
1898 int e = sk_recvmsg(s);
1899
1900 if (e < 0)
1901 {
1902 if (errno != EINTR && errno != EAGAIN)
1903 s->err_hook(s, errno);
1904 return 0;
1905 }
1906
1907 s->rpos = s->rbuf + e;
1908 s->rx_hook(s, e);
1909 return 1;
1910 }
1911 }
1912 }
1913
1914 int
1915 sk_write(sock *s)
1916 {
1917 switch (s->type)
1918 {
1919 case SK_TCP_ACTIVE:
1920 {
1921 sockaddr sa;
1922 sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1923
1924 if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1925 sk_tcp_connected(s);
1926 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1927 s->err_hook(s, errno);
1928 return 0;
1929 }
1930
1931 #ifdef HAVE_LIBSSH
1932 case SK_SSH_ACTIVE:
1933 {
1934 switch (sk_ssh_connect(s))
1935 {
1936 case SSH_OK:
1937 sk_ssh_connected(s);
1938 break;
1939
1940 case SSH_AGAIN:
1941 return 1;
1942
1943 case SSH_ERROR:
1944 s->err = ssh_get_error(s->ssh->session);
1945 s->err_hook(s, ssh_get_error_code(s->ssh->session));
1946 break;
1947 }
1948 return 0;
1949 }
1950 #endif
1951
1952 default:
1953 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1954 {
1955 if (s->tx_hook)
1956 s->tx_hook(s);
1957 return 1;
1958 }
1959 return 0;
1960 }
1961 }
1962
1963 int sk_is_ipv4(sock *s)
1964 { return s->af == AF_INET; }
1965
1966 int sk_is_ipv6(sock *s)
1967 { return s->af == AF_INET6; }
1968
1969 void
1970 sk_err(sock *s, int revents)
1971 {
1972 int se = 0, sse = sizeof(se);
1973 if ((s->type != SK_MAGIC) && (revents & POLLERR))
1974 if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
1975 {
1976 log(L_ERR "IO: Socket error: SO_ERROR: %m");
1977 se = 0;
1978 }
1979
1980 s->err_hook(s, se);
1981 }
1982
1983 void
1984 sk_dump_all(void)
1985 {
1986 node *n;
1987 sock *s;
1988
1989 debug("Open sockets:\n");
1990 WALK_LIST(n, sock_list)
1991 {
1992 s = SKIP_BACK(sock, n, n);
1993 debug("%p ", s);
1994 sk_dump(&s->r);
1995 }
1996 debug("\n");
1997 }
1998
1999
2000 /*
2001 * Internal event log and watchdog
2002 */
2003
2004 #define EVENT_LOG_LENGTH 32
2005
2006 struct event_log_entry
2007 {
2008 void *hook;
2009 void *data;
2010 btime timestamp;
2011 btime duration;
2012 };
2013
2014 static struct event_log_entry event_log[EVENT_LOG_LENGTH];
2015 static struct event_log_entry *event_open;
2016 static int event_log_pos, event_log_num, watchdog_active;
2017 static btime last_time;
2018 static btime loop_time;
2019
2020 static void
2021 io_update_time(void)
2022 {
2023 struct timespec ts;
2024 int rv;
2025
2026 /*
2027 * This is third time-tracking procedure (after update_times() above and
2028 * times_update() in BFD), dedicated to internal event log and latency
2029 * tracking. Hopefully, we consolidate these sometimes.
2030 */
2031
2032 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
2033 if (rv < 0)
2034 die("clock_gettime: %m");
2035
2036 last_time = ts.tv_sec S + ts.tv_nsec NS;
2037
2038 if (event_open)
2039 {
2040 event_open->duration = last_time - event_open->timestamp;
2041
2042 if (event_open->duration > config->latency_limit)
2043 log(L_WARN "Event 0x%p 0x%p took %d ms",
2044 event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
2045
2046 event_open = NULL;
2047 }
2048 }
2049
2050 /**
2051 * io_log_event - mark approaching event into event log
2052 * @hook: event hook address
2053 * @data: event data address
2054 *
2055 * Store info (hook, data, timestamp) about the following internal event into
2056 * a circular event log (@event_log). When latency tracking is enabled, the log
2057 * entry is kept open (in @event_open) so the duration can be filled later.
2058 */
2059 void
2060 io_log_event(void *hook, void *data)
2061 {
2062 if (config->latency_debug)
2063 io_update_time();
2064
2065 struct event_log_entry *en = event_log + event_log_pos;
2066
2067 en->hook = hook;
2068 en->data = data;
2069 en->timestamp = last_time;
2070 en->duration = 0;
2071
2072 event_log_num++;
2073 event_log_pos++;
2074 event_log_pos %= EVENT_LOG_LENGTH;
2075
2076 event_open = config->latency_debug ? en : NULL;
2077 }
2078
2079 static inline void
2080 io_close_event(void)
2081 {
2082 if (event_open)
2083 io_update_time();
2084 }
2085
2086 void
2087 io_log_dump(void)
2088 {
2089 int i;
2090
2091 log(L_DEBUG "Event log:");
2092 for (i = 0; i < EVENT_LOG_LENGTH; i++)
2093 {
2094 struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
2095 if (en->hook)
2096 log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
2097 (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
2098 }
2099 }
2100
2101 void
2102 watchdog_sigalrm(int sig UNUSED)
2103 {
2104 /* Update last_time and duration, but skip latency check */
2105 config->latency_limit = 0xffffffff;
2106 io_update_time();
2107
2108 /* We want core dump */
2109 abort();
2110 }
2111
2112 static inline void
2113 watchdog_start1(void)
2114 {
2115 io_update_time();
2116
2117 loop_time = last_time;
2118 }
2119
2120 static inline void
2121 watchdog_start(void)
2122 {
2123 io_update_time();
2124
2125 loop_time = last_time;
2126 event_log_num = 0;
2127
2128 if (config->watchdog_timeout)
2129 {
2130 alarm(config->watchdog_timeout);
2131 watchdog_active = 1;
2132 }
2133 }
2134
2135 static inline void
2136 watchdog_stop(void)
2137 {
2138 io_update_time();
2139
2140 if (watchdog_active)
2141 {
2142 alarm(0);
2143 watchdog_active = 0;
2144 }
2145
2146 btime duration = last_time - loop_time;
2147 if (duration > config->watchdog_warning)
2148 log(L_WARN "I/O loop cycle took %d ms for %d events",
2149 (int) (duration TO_MS), event_log_num);
2150 }
2151
2152
2153 /*
2154 * Main I/O Loop
2155 */
2156
2157 volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
2158 volatile int async_dump_flag;
2159 volatile int async_shutdown_flag;
2160
2161 void
2162 io_init(void)
2163 {
2164 init_list(&sock_list);
2165 init_list(&global_event_list);
2166 krt_io_init();
2167 // XXX init_times();
2168 // XXX update_times();
2169 boot_time = current_time();
2170
2171 u64 now = (u64) current_real_time();
2172 srandom((uint) (now ^ (now >> 32)));
2173 }
2174
2175 static int short_loops = 0;
2176 #define SHORT_LOOP_MAX 10
2177
2178 void
2179 io_loop(void)
2180 {
2181 int poll_tout, timeout;
2182 int nfds, events, pout;
2183 timer *t;
2184 sock *s;
2185 node *n;
2186 int fdmax = 256;
2187 struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
2188
2189 watchdog_start1();
2190 for(;;)
2191 {
2192 times_update(&main_timeloop);
2193 events = ev_run_list(&global_event_list);
2194 timers_fire(&main_timeloop);
2195 io_close_event();
2196
2197 // FIXME
2198 poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
2199 if (t = timers_first(&main_timeloop))
2200 {
2201 times_update(&main_timeloop);
2202 timeout = (tm_remains(t) TO_MS) + 1;
2203 poll_tout = MIN(poll_tout, timeout);
2204 }
2205
2206 nfds = 0;
2207 WALK_LIST(n, sock_list)
2208 {
2209 pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
2210 s = SKIP_BACK(sock, n, n);
2211 if (s->rx_hook)
2212 {
2213 pfd[nfds].fd = s->fd;
2214 pfd[nfds].events |= POLLIN;
2215 }
2216 if (s->tx_hook && s->ttx != s->tpos)
2217 {
2218 pfd[nfds].fd = s->fd;
2219 pfd[nfds].events |= POLLOUT;
2220 }
2221 if (pfd[nfds].fd != -1)
2222 {
2223 s->index = nfds;
2224 nfds++;
2225 }
2226 else
2227 s->index = -1;
2228
2229 if (nfds >= fdmax)
2230 {
2231 fdmax *= 2;
2232 pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
2233 }
2234 }
2235
2236 /*
2237 * Yes, this is racy. But even if the signal comes before this test
2238 * and entering poll(), it gets caught on the next timer tick.
2239 */
2240
2241 if (async_config_flag)
2242 {
2243 io_log_event(async_config, NULL);
2244 async_config();
2245 async_config_flag = 0;
2246 continue;
2247 }
2248 if (async_dump_flag)
2249 {
2250 io_log_event(async_dump, NULL);
2251 async_dump();
2252 async_dump_flag = 0;
2253 continue;
2254 }
2255 if (async_shutdown_flag)
2256 {
2257 io_log_event(async_shutdown, NULL);
2258 async_shutdown();
2259 async_shutdown_flag = 0;
2260 continue;
2261 }
2262
2263 /* And finally enter poll() to find active sockets */
2264 watchdog_stop();
2265 pout = poll(pfd, nfds, poll_tout);
2266 watchdog_start();
2267
2268 if (pout < 0)
2269 {
2270 if (errno == EINTR || errno == EAGAIN)
2271 continue;
2272 die("poll: %m");
2273 }
2274 if (pout)
2275 {
2276 times_update(&main_timeloop);
2277
2278 /* guaranteed to be non-empty */
2279 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2280
2281 while (current_sock)
2282 {
2283 sock *s = current_sock;
2284 if (s->index == -1)
2285 {
2286 current_sock = sk_next(s);
2287 goto next;
2288 }
2289
2290 int e;
2291 int steps;
2292
2293 steps = MAX_STEPS;
2294 if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2295 do
2296 {
2297 steps--;
2298 io_log_event(s->rx_hook, s->data);
2299 e = sk_read(s, pfd[s->index].revents);
2300 if (s != current_sock)
2301 goto next;
2302 }
2303 while (e && s->rx_hook && steps);
2304
2305 steps = MAX_STEPS;
2306 if (pfd[s->index].revents & POLLOUT)
2307 do
2308 {
2309 steps--;
2310 io_log_event(s->tx_hook, s->data);
2311 e = sk_write(s);
2312 if (s != current_sock)
2313 goto next;
2314 }
2315 while (e && steps);
2316
2317 current_sock = sk_next(s);
2318 next: ;
2319 }
2320
2321 short_loops++;
2322 if (events && (short_loops < SHORT_LOOP_MAX))
2323 continue;
2324 short_loops = 0;
2325
2326 int count = 0;
2327 current_sock = stored_sock;
2328 if (current_sock == NULL)
2329 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2330
2331 while (current_sock && count < MAX_RX_STEPS)
2332 {
2333 sock *s = current_sock;
2334 if (s->index == -1)
2335 {
2336 current_sock = sk_next(s);
2337 goto next2;
2338 }
2339
2340 if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2341 {
2342 count++;
2343 io_log_event(s->rx_hook, s->data);
2344 sk_read(s, pfd[s->index].revents);
2345 if (s != current_sock)
2346 goto next2;
2347 }
2348
2349 if (pfd[s->index].revents & (POLLHUP | POLLERR))
2350 {
2351 sk_err(s, pfd[s->index].revents);
2352 if (s != current_sock)
2353 goto next2;
2354 }
2355
2356 current_sock = sk_next(s);
2357 next2: ;
2358 }
2359
2360
2361 stored_sock = current_sock;
2362 }
2363 }
2364 }
2365
2366 void
2367 test_old_bird(char *path)
2368 {
2369 int fd;
2370 struct sockaddr_un sa;
2371
2372 fd = socket(AF_UNIX, SOCK_STREAM, 0);
2373 if (fd < 0)
2374 die("Cannot create socket: %m");
2375 if (strlen(path) >= sizeof(sa.sun_path))
2376 die("Socket path too long");
2377 bzero(&sa, sizeof(sa));
2378 sa.sun_family = AF_UNIX;
2379 strcpy(sa.sun_path, path);
2380 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2381 die("I found another BIRD running.");
2382 close(fd);
2383 }