]> git.ipfire.org Git - thirdparty/bird.git/blob - sysdep/unix/io.c
Better packet priority and traffic class handling.
[thirdparty/bird.git] / sysdep / unix / io.c
1 /*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
12 #define _GNU_SOURCE 1
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <time.h>
17 #include <sys/time.h>
18 #include <sys/types.h>
19 #include <sys/socket.h>
20 #include <sys/fcntl.h>
21 #include <sys/uio.h>
22 #include <sys/un.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <netinet/in.h>
26 #include <netinet/icmp6.h>
27
28 #include "nest/bird.h"
29 #include "lib/lists.h"
30 #include "lib/resource.h"
31 #include "lib/timer.h"
32 #include "lib/socket.h"
33 #include "lib/event.h"
34 #include "lib/string.h"
35 #include "nest/iface.h"
36
37 #include "lib/unix.h"
38 #include "lib/sysio.h"
39
40 /* Maximum number of calls of tx handler for one socket in one
41 * select iteration. Should be small enough to not monopolize CPU by
42 * one protocol instance.
43 */
44 #define MAX_STEPS 4
45
46 /* Maximum number of calls of rx handler for all sockets in one select
47 iteration. RX callbacks are often much more costly so we limit
48 this to gen small latencies */
49 #define MAX_RX_STEPS 4
50
51 /*
52 * Tracked Files
53 */
54
55 struct rfile {
56 resource r;
57 FILE *f;
58 };
59
60 static void
61 rf_free(resource *r)
62 {
63 struct rfile *a = (struct rfile *) r;
64
65 fclose(a->f);
66 }
67
68 static void
69 rf_dump(resource *r)
70 {
71 struct rfile *a = (struct rfile *) r;
72
73 debug("(FILE *%p)\n", a->f);
74 }
75
76 static struct resclass rf_class = {
77 "FILE",
78 sizeof(struct rfile),
79 rf_free,
80 rf_dump,
81 NULL,
82 NULL
83 };
84
85 void *
86 tracked_fopen(pool *p, char *name, char *mode)
87 {
88 FILE *f = fopen(name, mode);
89
90 if (f)
91 {
92 struct rfile *r = ralloc(p, &rf_class);
93 r->f = f;
94 }
95 return f;
96 }
97
98 /**
99 * DOC: Timers
100 *
101 * Timers are resources which represent a wish of a module to call
102 * a function at the specified time. The platform dependent code
103 * doesn't guarantee exact timing, only that a timer function
104 * won't be called before the requested time.
105 *
106 * In BIRD, time is represented by values of the &bird_clock_t type
107 * which are integral numbers interpreted as a relative number of seconds since
108 * some fixed time point in past. The current time can be read
109 * from variable @now with reasonable accuracy and is monotonic. There is also
110 * a current 'absolute' time in variable @now_real reported by OS.
111 *
112 * Each timer is described by a &timer structure containing a pointer
113 * to the handler function (@hook), data private to this function (@data),
114 * time the function should be called at (@expires, 0 for inactive timers),
115 * for the other fields see |timer.h|.
116 */
117
118 #define NEAR_TIMER_LIMIT 4
119
120 static list near_timers, far_timers;
121 static bird_clock_t first_far_timer = TIME_INFINITY;
122
123 /* now must be different from 0, because 0 is a special value in timer->expires */
124 bird_clock_t now = 1, now_real, boot_time;
125
126 static void
127 update_times_plain(void)
128 {
129 bird_clock_t new_time = time(NULL);
130 int delta = new_time - now_real;
131
132 if ((delta >= 0) && (delta < 60))
133 now += delta;
134 else if (now_real != 0)
135 log(L_WARN "Time jump, delta %d s", delta);
136
137 now_real = new_time;
138 }
139
140 static void
141 update_times_gettime(void)
142 {
143 struct timespec ts;
144 int rv;
145
146 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
147 if (rv != 0)
148 die("clock_gettime: %m");
149
150 if (ts.tv_sec != now) {
151 if (ts.tv_sec < now)
152 log(L_ERR "Monotonic timer is broken");
153
154 now = ts.tv_sec;
155 now_real = time(NULL);
156 }
157 }
158
159 static int clock_monotonic_available;
160
161 static inline void
162 update_times(void)
163 {
164 if (clock_monotonic_available)
165 update_times_gettime();
166 else
167 update_times_plain();
168 }
169
170 static inline void
171 init_times(void)
172 {
173 struct timespec ts;
174 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
175 if (!clock_monotonic_available)
176 log(L_WARN "Monotonic timer is missing");
177 }
178
179
180 static void
181 tm_free(resource *r)
182 {
183 timer *t = (timer *) r;
184
185 tm_stop(t);
186 }
187
188 static void
189 tm_dump(resource *r)
190 {
191 timer *t = (timer *) r;
192
193 debug("(code %p, data %p, ", t->hook, t->data);
194 if (t->randomize)
195 debug("rand %d, ", t->randomize);
196 if (t->recurrent)
197 debug("recur %d, ", t->recurrent);
198 if (t->expires)
199 debug("expires in %d sec)\n", t->expires - now);
200 else
201 debug("inactive)\n");
202 }
203
204 static struct resclass tm_class = {
205 "Timer",
206 sizeof(timer),
207 tm_free,
208 tm_dump,
209 NULL,
210 NULL
211 };
212
213 /**
214 * tm_new - create a timer
215 * @p: pool
216 *
217 * This function creates a new timer resource and returns
218 * a pointer to it. To use the timer, you need to fill in
219 * the structure fields and call tm_start() to start timing.
220 */
221 timer *
222 tm_new(pool *p)
223 {
224 timer *t = ralloc(p, &tm_class);
225 return t;
226 }
227
228 static inline void
229 tm_insert_near(timer *t)
230 {
231 node *n = HEAD(near_timers);
232
233 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
234 n = n->next;
235 insert_node(&t->n, n->prev);
236 }
237
238 /**
239 * tm_start - start a timer
240 * @t: timer
241 * @after: number of seconds the timer should be run after
242 *
243 * This function schedules the hook function of the timer to
244 * be called after @after seconds. If the timer has been already
245 * started, it's @expire time is replaced by the new value.
246 *
247 * You can have set the @randomize field of @t, the timeout
248 * will be increased by a random number of seconds chosen
249 * uniformly from range 0 .. @randomize.
250 *
251 * You can call tm_start() from the handler function of the timer
252 * to request another run of the timer. Also, you can set the @recurrent
253 * field to have the timer re-added automatically with the same timeout.
254 */
255 void
256 tm_start(timer *t, unsigned after)
257 {
258 bird_clock_t when;
259
260 if (t->randomize)
261 after += random() % (t->randomize + 1);
262 when = now + after;
263 if (t->expires == when)
264 return;
265 if (t->expires)
266 rem_node(&t->n);
267 t->expires = when;
268 if (after <= NEAR_TIMER_LIMIT)
269 tm_insert_near(t);
270 else
271 {
272 if (!first_far_timer || first_far_timer > when)
273 first_far_timer = when;
274 add_tail(&far_timers, &t->n);
275 }
276 }
277
278 /**
279 * tm_stop - stop a timer
280 * @t: timer
281 *
282 * This function stops a timer. If the timer is already stopped,
283 * nothing happens.
284 */
285 void
286 tm_stop(timer *t)
287 {
288 if (t->expires)
289 {
290 rem_node(&t->n);
291 t->expires = 0;
292 }
293 }
294
295 static void
296 tm_dump_them(char *name, list *l)
297 {
298 node *n;
299 timer *t;
300
301 debug("%s timers:\n", name);
302 WALK_LIST(n, *l)
303 {
304 t = SKIP_BACK(timer, n, n);
305 debug("%p ", t);
306 tm_dump(&t->r);
307 }
308 debug("\n");
309 }
310
311 void
312 tm_dump_all(void)
313 {
314 tm_dump_them("Near", &near_timers);
315 tm_dump_them("Far", &far_timers);
316 }
317
318 static inline time_t
319 tm_first_shot(void)
320 {
321 time_t x = first_far_timer;
322
323 if (!EMPTY_LIST(near_timers))
324 {
325 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
326 if (t->expires < x)
327 x = t->expires;
328 }
329 return x;
330 }
331
332 static void
333 tm_shot(void)
334 {
335 timer *t;
336 node *n, *m;
337
338 if (first_far_timer <= now)
339 {
340 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
341 first_far_timer = TIME_INFINITY;
342 n = HEAD(far_timers);
343 while (m = n->next)
344 {
345 t = SKIP_BACK(timer, n, n);
346 if (t->expires <= limit)
347 {
348 rem_node(n);
349 tm_insert_near(t);
350 }
351 else if (t->expires < first_far_timer)
352 first_far_timer = t->expires;
353 n = m;
354 }
355 }
356 while ((n = HEAD(near_timers)) -> next)
357 {
358 int delay;
359 t = SKIP_BACK(timer, n, n);
360 if (t->expires > now)
361 break;
362 rem_node(n);
363 delay = t->expires - now;
364 t->expires = 0;
365 if (t->recurrent)
366 {
367 int i = t->recurrent - delay;
368 if (i < 0)
369 i = 0;
370 tm_start(t, i);
371 }
372 t->hook(t);
373 }
374 }
375
376 /**
377 * tm_parse_datetime - parse a date and time
378 * @x: datetime string
379 *
380 * tm_parse_datetime() takes a textual representation of
381 * a date and time (dd-mm-yyyy hh:mm:ss)
382 * and converts it to the corresponding value of type &bird_clock_t.
383 */
384 bird_clock_t
385 tm_parse_datetime(char *x)
386 {
387 struct tm tm;
388 int n;
389 time_t t;
390
391 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
392 return tm_parse_date(x);
393 tm.tm_mon--;
394 tm.tm_year -= 1900;
395 t = mktime(&tm);
396 if (t == (time_t) -1)
397 return 0;
398 return t;
399 }
400 /**
401 * tm_parse_date - parse a date
402 * @x: date string
403 *
404 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
405 * and converts it to the corresponding value of type &bird_clock_t.
406 */
407 bird_clock_t
408 tm_parse_date(char *x)
409 {
410 struct tm tm;
411 int n;
412 time_t t;
413
414 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
415 return 0;
416 tm.tm_mon--;
417 tm.tm_year -= 1900;
418 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
419 t = mktime(&tm);
420 if (t == (time_t) -1)
421 return 0;
422 return t;
423 }
424
425 static void
426 tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
427 {
428 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
429 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
430
431 if (delta < 20*3600)
432 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
433 else if (delta < 360*86400)
434 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
435 else
436 bsprintf(x, "%d", tm->tm_year+1900);
437 }
438
439 #include "conf/conf.h"
440
441 /**
442 * tm_format_datetime - convert date and time to textual representation
443 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
444 * @t: time
445 *
446 * This function formats the given relative time value @t to a textual
447 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
448 */
449 void
450 tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
451 {
452 const char *fmt_used;
453 struct tm *tm;
454 bird_clock_t delta = now - t;
455 t = now_real - delta;
456 tm = localtime(&t);
457
458 if (fmt_spec->fmt1 == NULL)
459 return tm_format_reltime(x, tm, delta);
460
461 if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
462 fmt_used = fmt_spec->fmt1;
463 else
464 fmt_used = fmt_spec->fmt2;
465
466 int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
467 if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
468 strcpy(x, "<too-long>");
469 }
470
471 /**
472 * DOC: Sockets
473 *
474 * Socket resources represent network connections. Their data structure (&socket)
475 * contains a lot of fields defining the exact type of the socket, the local and
476 * remote addresses and ports, pointers to socket buffers and finally pointers to
477 * hook functions to be called when new data have arrived to the receive buffer
478 * (@rx_hook), when the contents of the transmit buffer have been transmitted
479 * (@tx_hook) and when an error or connection close occurs (@err_hook).
480 *
481 * Freeing of sockets from inside socket hooks is perfectly safe.
482 */
483
484 #ifndef SOL_IP
485 #define SOL_IP IPPROTO_IP
486 #endif
487
488 #ifndef SOL_IPV6
489 #define SOL_IPV6 IPPROTO_IPV6
490 #endif
491
492 static list sock_list;
493 static struct birdsock *current_sock;
494 static struct birdsock *stored_sock;
495 static int sock_recalc_fdsets_p;
496
497 static inline sock *
498 sk_next(sock *s)
499 {
500 if (!s->n.next->next)
501 return NULL;
502 else
503 return SKIP_BACK(sock, n, s->n.next);
504 }
505
506 static void
507 sk_alloc_bufs(sock *s)
508 {
509 if (!s->rbuf && s->rbsize)
510 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
511 s->rpos = s->rbuf;
512 if (!s->tbuf && s->tbsize)
513 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
514 s->tpos = s->ttx = s->tbuf;
515 }
516
517 static void
518 sk_free_bufs(sock *s)
519 {
520 if (s->rbuf_alloc)
521 {
522 xfree(s->rbuf_alloc);
523 s->rbuf = s->rbuf_alloc = NULL;
524 }
525 if (s->tbuf_alloc)
526 {
527 xfree(s->tbuf_alloc);
528 s->tbuf = s->tbuf_alloc = NULL;
529 }
530 }
531
532 static void
533 sk_free(resource *r)
534 {
535 sock *s = (sock *) r;
536
537 sk_free_bufs(s);
538 if (s->fd >= 0)
539 {
540 close(s->fd);
541 if (s == current_sock)
542 current_sock = sk_next(s);
543 if (s == stored_sock)
544 stored_sock = sk_next(s);
545 rem_node(&s->n);
546 sock_recalc_fdsets_p = 1;
547 }
548 }
549
550 void
551 sk_reallocate(sock *s)
552 {
553 sk_free_bufs(s);
554 sk_alloc_bufs(s);
555 }
556
557 static void
558 sk_dump(resource *r)
559 {
560 sock *s = (sock *) r;
561 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
562
563 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
564 sk_type_names[s->type],
565 s->data,
566 s->saddr,
567 s->sport,
568 s->daddr,
569 s->dport,
570 s->tos,
571 s->ttl,
572 s->iface ? s->iface->name : "none");
573 }
574
575 static struct resclass sk_class = {
576 "Socket",
577 sizeof(sock),
578 sk_free,
579 sk_dump,
580 NULL,
581 NULL
582 };
583
584 /**
585 * sk_new - create a socket
586 * @p: pool
587 *
588 * This function creates a new socket resource. If you want to use it,
589 * you need to fill in all the required fields of the structure and
590 * call sk_open() to do the actual opening of the socket.
591 *
592 * The real function name is sock_new(), sk_new() is a macro wrapper
593 * to avoid collision with OpenSSL.
594 */
595 sock *
596 sock_new(pool *p)
597 {
598 sock *s = ralloc(p, &sk_class);
599 s->pool = p;
600 // s->saddr = s->daddr = IPA_NONE;
601 s->tos = s->priority = s->ttl = -1;
602 s->fd = -1;
603 return s;
604 }
605
606 static void
607 sk_insert(sock *s)
608 {
609 add_tail(&sock_list, &s->n);
610 sock_recalc_fdsets_p = 1;
611 }
612
613 #ifdef IPV6
614
615 void
616 fill_in_sockaddr(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, unsigned port)
617 {
618 memset(sa, 0, sizeof (struct sockaddr_in6));
619 sa->sin6_family = AF_INET6;
620 sa->sin6_port = htons(port);
621 sa->sin6_flowinfo = 0;
622 #ifdef HAVE_SIN_LEN
623 sa->sin6_len = sizeof(struct sockaddr_in6);
624 #endif
625 set_inaddr(&sa->sin6_addr, a);
626
627 if (ifa && ipa_has_link_scope(a))
628 sa->sin6_scope_id = ifa->index;
629 }
630
631 void
632 get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, unsigned *port, int check)
633 {
634 if (check && sa->sin6_family != AF_INET6)
635 bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
636 if (port)
637 *port = ntohs(sa->sin6_port);
638 memcpy(a, &sa->sin6_addr, sizeof(*a));
639 ipa_ntoh(*a);
640
641 if (ifa && ipa_has_link_scope(*a))
642 *ifa = if_find_by_index(sa->sin6_scope_id);
643 }
644
645 #else
646
647 void
648 fill_in_sockaddr(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, unsigned port)
649 {
650 memset (sa, 0, sizeof (struct sockaddr_in));
651 sa->sin_family = AF_INET;
652 sa->sin_port = htons(port);
653 #ifdef HAVE_SIN_LEN
654 sa->sin_len = sizeof(struct sockaddr_in);
655 #endif
656 set_inaddr(&sa->sin_addr, a);
657 }
658
659 void
660 get_sockaddr(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *port, int check)
661 {
662 if (check && sa->sin_family != AF_INET)
663 bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
664 if (port)
665 *port = ntohs(sa->sin_port);
666 memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
667 ipa_ntoh(*a);
668 }
669
670 #endif
671
672
673 #ifdef IPV6
674
675 /* PKTINFO handling is also standardized in IPv6 */
676 #define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
677 #define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
678
679 /*
680 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
681 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
682 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
683 * RFC and we use IPV6_PKTINFO.
684 */
685 #ifndef IPV6_RECVPKTINFO
686 #define IPV6_RECVPKTINFO IPV6_PKTINFO
687 #endif
688
689 static char *
690 sysio_register_cmsgs(sock *s)
691 {
692 int ok = 1;
693 if ((s->flags & SKF_LADDR_RX) &&
694 setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)
695 return "IPV6_RECVPKTINFO";
696
697 return NULL;
698 }
699
700 static void
701 sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
702 {
703 struct cmsghdr *cm;
704 struct in6_pktinfo *pi = NULL;
705
706 if (!(s->flags & SKF_LADDR_RX))
707 return;
708
709 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
710 {
711 if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
712 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
713 }
714
715 if (!pi)
716 {
717 s->laddr = IPA_NONE;
718 s->lifindex = 0;
719 return;
720 }
721
722 get_inaddr(&s->laddr, &pi->ipi6_addr);
723 s->lifindex = pi->ipi6_ifindex;
724 return;
725 }
726
727 /*
728 static void
729 sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
730 {
731 struct cmsghdr *cm;
732 struct in6_pktinfo *pi;
733
734 if (!(s->flags & SKF_LADDR_TX))
735 return;
736
737 msg->msg_control = cbuf;
738 msg->msg_controllen = cbuflen;
739
740 cm = CMSG_FIRSTHDR(msg);
741 cm->cmsg_level = IPPROTO_IPV6;
742 cm->cmsg_type = IPV6_PKTINFO;
743 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
744
745 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
746 set_inaddr(&pi->ipi6_addr, s->saddr);
747 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
748
749 msg->msg_controllen = cm->cmsg_len;
750 return;
751 }
752 */
753 #endif
754
755 static char *
756 sk_set_ttl_int(sock *s)
757 {
758 #ifdef IPV6
759 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
760 return "IPV6_UNICAST_HOPS";
761 #else
762 if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
763 return "IP_TTL";
764 #ifdef CONFIG_UNIX_DONTROUTE
765 int one = 1;
766 if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
767 return "SO_DONTROUTE";
768 #endif
769 #endif
770 return NULL;
771 }
772
773 #define ERR(x) do { err = x; goto bad; } while(0)
774 #define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
775
776 static char *
777 sk_setup(sock *s)
778 {
779 int fd = s->fd;
780 char *err = NULL;
781
782 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
783 ERR("fcntl(O_NONBLOCK)");
784 if (s->type == SK_UNIX)
785 return NULL;
786
787 #ifdef IPV6
788 if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0)
789 WARN("IPV6_TCLASS");
790 #else
791 if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
792 WARN("IP_TOS");
793 #endif
794
795 if (s->priority >= 0)
796 sk_set_priority(s, s->priority);
797
798 #ifdef IPV6
799 int v = 1;
800 if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
801 WARN("IPV6_V6ONLY");
802 #endif
803
804 if (s->ttl >= 0)
805 err = sk_set_ttl_int(s);
806
807 sysio_register_cmsgs(s);
808 bad:
809 return err;
810 }
811
812 /**
813 * sk_set_ttl - set transmit TTL for given socket.
814 * @s: socket
815 * @ttl: TTL value
816 *
817 * Set TTL for already opened connections when TTL was not set before.
818 * Useful for accepted connections when different ones should have
819 * different TTL.
820 *
821 * Result: 0 for success, -1 for an error.
822 */
823
824 int
825 sk_set_ttl(sock *s, int ttl)
826 {
827 char *err;
828
829 s->ttl = ttl;
830 if (err = sk_set_ttl_int(s))
831 log(L_ERR "sk_set_ttl: %s: %m", err);
832
833 return (err ? -1 : 0);
834 }
835
836 /**
837 * sk_set_min_ttl - set minimal accepted TTL for given socket.
838 * @s: socket
839 * @ttl: TTL value
840 *
841 * Can be used in TTL security implementation
842 *
843 * Result: 0 for success, -1 for an error.
844 */
845
846 int
847 sk_set_min_ttl(sock *s, int ttl)
848 {
849 int err;
850 #ifdef IPV6
851 err = sk_set_min_ttl6(s, ttl);
852 #else
853 err = sk_set_min_ttl4(s, ttl);
854 #endif
855
856 return err;
857 }
858
859 /**
860 * sk_set_md5_auth - add / remove MD5 security association for given socket.
861 * @s: socket
862 * @a: IP address of the other side
863 * @ifa: Interface for link-local IP address
864 * @passwd: password used for MD5 authentication
865 *
866 * In TCP MD5 handling code in kernel, there is a set of pairs
867 * (address, password) used to choose password according to
868 * address of the other side. This function is useful for
869 * listening socket, for active sockets it is enough to set
870 * s->password field.
871 *
872 * When called with passwd != NULL, the new pair is added,
873 * When called with passwd == NULL, the existing pair is removed.
874 *
875 * Result: 0 for success, -1 for an error.
876 */
877
878 int
879 sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
880 {
881 sockaddr sa;
882 fill_in_sockaddr(&sa, a, ifa, 0);
883 return sk_set_md5_auth_int(s, &sa, passwd);
884 }
885
886 int
887 sk_set_broadcast(sock *s, int enable)
888 {
889 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)) < 0)
890 {
891 log(L_ERR "sk_set_broadcast: SO_BROADCAST: %m");
892 return -1;
893 }
894
895 return 0;
896 }
897
898
899 #ifdef IPV6
900
901 int
902 sk_set_ipv6_checksum(sock *s, int offset)
903 {
904 if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
905 {
906 log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
907 return -1;
908 }
909
910 return 0;
911 }
912
913 int
914 sk_set_icmp_filter(sock *s, int p1, int p2)
915 {
916 /* a bit of lame interface, but it is here only for Radv */
917 struct icmp6_filter f;
918
919 ICMP6_FILTER_SETBLOCKALL(&f);
920 ICMP6_FILTER_SETPASS(p1, &f);
921 ICMP6_FILTER_SETPASS(p2, &f);
922
923 if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
924 {
925 log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m");
926 return -1;
927 }
928
929 return 0;
930 }
931
932 int
933 sk_setup_multicast(sock *s)
934 {
935 char *err;
936 int zero = 0;
937 int index;
938
939 ASSERT(s->iface && s->iface->addr);
940
941 index = s->iface->index;
942 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
943 ERR("IPV6_MULTICAST_HOPS");
944 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
945 ERR("IPV6_MULTICAST_LOOP");
946 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
947 ERR("IPV6_MULTICAST_IF");
948
949 if (err = sysio_bind_to_iface(s))
950 goto bad;
951
952 return 0;
953
954 bad:
955 log(L_ERR "sk_setup_multicast: %s: %m", err);
956 return -1;
957 }
958
959 int
960 sk_join_group(sock *s, ip_addr maddr)
961 {
962 struct ipv6_mreq mreq;
963
964 set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
965
966 #ifdef CONFIG_IPV6_GLIBC_20
967 mreq.ipv6mr_ifindex = s->iface->index;
968 #else
969 mreq.ipv6mr_interface = s->iface->index;
970 #endif
971
972 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0)
973 {
974 log(L_ERR "sk_join_group: IPV6_JOIN_GROUP: %m");
975 return -1;
976 }
977
978 return 0;
979 }
980
981 int
982 sk_leave_group(sock *s, ip_addr maddr)
983 {
984 struct ipv6_mreq mreq;
985
986 set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
987
988 #ifdef CONFIG_IPV6_GLIBC_20
989 mreq.ipv6mr_ifindex = s->iface->index;
990 #else
991 mreq.ipv6mr_interface = s->iface->index;
992 #endif
993
994 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0)
995 {
996 log(L_ERR "sk_leave_group: IPV6_LEAVE_GROUP: %m");
997 return -1;
998 }
999
1000 return 0;
1001 }
1002
1003 #else /* IPV4 */
1004
1005 int
1006 sk_setup_multicast(sock *s)
1007 {
1008 char *err;
1009
1010 ASSERT(s->iface && s->iface->addr);
1011
1012 if (err = sysio_setup_multicast(s))
1013 {
1014 log(L_ERR "sk_setup_multicast: %s: %m", err);
1015 return -1;
1016 }
1017
1018 return 0;
1019 }
1020
1021 int
1022 sk_join_group(sock *s, ip_addr maddr)
1023 {
1024 char *err;
1025
1026 if (err = sysio_join_group(s, maddr))
1027 {
1028 log(L_ERR "sk_join_group: %s: %m", err);
1029 return -1;
1030 }
1031
1032 return 0;
1033 }
1034
1035 int
1036 sk_leave_group(sock *s, ip_addr maddr)
1037 {
1038 char *err;
1039
1040 if (err = sysio_leave_group(s, maddr))
1041 {
1042 log(L_ERR "sk_leave_group: %s: %m", err);
1043 return -1;
1044 }
1045
1046 return 0;
1047 }
1048
1049 #endif
1050
1051
1052 static void
1053 sk_tcp_connected(sock *s)
1054 {
1055 sockaddr lsa;
1056 int lsa_len = sizeof(lsa);
1057 if (getsockname(s->fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
1058 get_sockaddr(&lsa, &s->saddr, &s->iface, &s->sport, 1);
1059
1060 s->type = SK_TCP;
1061 sk_alloc_bufs(s);
1062 s->tx_hook(s);
1063 }
1064
1065 static int
1066 sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
1067 {
1068 int fd = accept(s->fd, sa, &al);
1069 if (fd >= 0)
1070 {
1071 sock *t = sk_new(s->pool);
1072 char *err;
1073 t->type = type;
1074 t->fd = fd;
1075 t->ttl = s->ttl;
1076 t->tos = s->tos;
1077 t->rbsize = s->rbsize;
1078 t->tbsize = s->tbsize;
1079 if (type == SK_TCP)
1080 {
1081 sockaddr lsa;
1082 int lsa_len = sizeof(lsa);
1083 if (getsockname(fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
1084 get_sockaddr(&lsa, &t->saddr, &t->iface, &t->sport, 1);
1085
1086 get_sockaddr((sockaddr *) sa, &t->daddr, &t->iface, &t->dport, 1);
1087 }
1088 sk_insert(t);
1089 if (err = sk_setup(t))
1090 {
1091 log(L_ERR "Incoming connection: %s: %m", err);
1092 rfree(t);
1093 return 1;
1094 }
1095 sk_alloc_bufs(t);
1096 s->rx_hook(t, 0);
1097 return 1;
1098 }
1099 else if (errno != EINTR && errno != EAGAIN)
1100 {
1101 s->err_hook(s, errno);
1102 }
1103 return 0;
1104 }
1105
1106 /**
1107 * sk_open - open a socket
1108 * @s: socket
1109 *
1110 * This function takes a socket resource created by sk_new() and
1111 * initialized by the user and binds a corresponding network connection
1112 * to it.
1113 *
1114 * Result: 0 for success, -1 for an error.
1115 */
1116 int
1117 sk_open(sock *s)
1118 {
1119 int fd;
1120 sockaddr sa;
1121 int one = 1;
1122 int type = s->type;
1123 int has_src = ipa_nonzero(s->saddr) || s->sport;
1124 char *err;
1125
1126 switch (type)
1127 {
1128 case SK_TCP_ACTIVE:
1129 s->ttx = ""; /* Force s->ttx != s->tpos */
1130 /* Fall thru */
1131 case SK_TCP_PASSIVE:
1132 fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
1133 break;
1134 case SK_UDP:
1135 fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
1136 break;
1137 case SK_IP:
1138 fd = socket(BIRD_PF, SOCK_RAW, s->dport);
1139 break;
1140 case SK_MAGIC:
1141 fd = s->fd;
1142 break;
1143 default:
1144 bug("sk_open() called for invalid sock type %d", type);
1145 }
1146 if (fd < 0)
1147 die("sk_open: socket: %m");
1148 s->fd = fd;
1149
1150 if (err = sk_setup(s))
1151 goto bad;
1152
1153 if (has_src)
1154 {
1155 int port;
1156
1157 if (type == SK_IP)
1158 port = 0;
1159 else
1160 {
1161 port = s->sport;
1162 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
1163 ERR("SO_REUSEADDR");
1164 }
1165 fill_in_sockaddr(&sa, s->saddr, s->iface, port);
1166 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1167 ERR("bind");
1168 }
1169 fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
1170
1171 if (s->password)
1172 {
1173 int rv = sk_set_md5_auth_int(s, &sa, s->password);
1174 if (rv < 0)
1175 goto bad_no_log;
1176 }
1177
1178 switch (type)
1179 {
1180 case SK_TCP_ACTIVE:
1181 if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
1182 sk_tcp_connected(s);
1183 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1184 errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1185 ERR("connect");
1186 break;
1187 case SK_TCP_PASSIVE:
1188 if (listen(fd, 8))
1189 ERR("listen");
1190 break;
1191 case SK_MAGIC:
1192 break;
1193 default:
1194 sk_alloc_bufs(s);
1195 #ifdef IPV6
1196 #ifdef IPV6_MTU_DISCOVER
1197 {
1198 int dont = IPV6_PMTUDISC_DONT;
1199 if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
1200 ERR("IPV6_MTU_DISCOVER");
1201 }
1202 #endif
1203 #else
1204 #ifdef IP_PMTUDISC
1205 {
1206 int dont = IP_PMTUDISC_DONT;
1207 if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
1208 ERR("IP_PMTUDISC");
1209 }
1210 #endif
1211 #endif
1212 }
1213
1214 sk_insert(s);
1215 return 0;
1216
1217 bad:
1218 log(L_ERR "sk_open: %s: %m", err);
1219 bad_no_log:
1220 close(fd);
1221 s->fd = -1;
1222 return -1;
1223 }
1224
1225 void
1226 sk_open_unix(sock *s, char *name)
1227 {
1228 int fd;
1229 struct sockaddr_un sa;
1230 char *err;
1231
1232 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1233 if (fd < 0)
1234 ERR("socket");
1235 s->fd = fd;
1236 if (err = sk_setup(s))
1237 goto bad;
1238 unlink(name);
1239
1240 /* Path length checked in test_old_bird() */
1241 sa.sun_family = AF_UNIX;
1242 strcpy(sa.sun_path, name);
1243 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1244 ERR("bind");
1245 if (listen(fd, 8))
1246 ERR("listen");
1247 sk_insert(s);
1248 return;
1249
1250 bad:
1251 log(L_ERR "sk_open_unix: %s: %m", err);
1252 die("Unable to create control socket %s", name);
1253 }
1254
1255 static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1256
1257 static int
1258 sk_maybe_write(sock *s)
1259 {
1260 int e;
1261
1262 switch (s->type)
1263 {
1264 case SK_TCP:
1265 case SK_MAGIC:
1266 case SK_UNIX:
1267 while (s->ttx != s->tpos)
1268 {
1269 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1270 if (e < 0)
1271 {
1272 if (errno != EINTR && errno != EAGAIN)
1273 {
1274 reset_tx_buffer(s);
1275 /* EPIPE is just a connection close notification during TX */
1276 s->err_hook(s, (errno != EPIPE) ? errno : 0);
1277 return -1;
1278 }
1279 return 0;
1280 }
1281 s->ttx += e;
1282 }
1283 reset_tx_buffer(s);
1284 return 1;
1285 case SK_UDP:
1286 case SK_IP:
1287 {
1288 if (s->tbuf == s->tpos)
1289 return 1;
1290
1291 sockaddr sa;
1292 fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
1293
1294 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1295 // byte cmsg_buf[CMSG_TX_SPACE];
1296
1297 struct msghdr msg = {
1298 .msg_name = &sa,
1299 .msg_namelen = sizeof(sa),
1300 .msg_iov = &iov,
1301 .msg_iovlen = 1};
1302
1303 // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1304 e = sendmsg(s->fd, &msg, 0);
1305
1306 if (e < 0)
1307 {
1308 if (errno != EINTR && errno != EAGAIN)
1309 {
1310 reset_tx_buffer(s);
1311 s->err_hook(s, errno);
1312 return -1;
1313 }
1314 return 0;
1315 }
1316 reset_tx_buffer(s);
1317 return 1;
1318 }
1319 default:
1320 bug("sk_maybe_write: unknown socket type %d", s->type);
1321 }
1322 }
1323
1324 int
1325 sk_rx_ready(sock *s)
1326 {
1327 fd_set rd, wr;
1328 struct timeval timo;
1329 int rv;
1330
1331 FD_ZERO(&rd);
1332 FD_ZERO(&wr);
1333 FD_SET(s->fd, &rd);
1334
1335 timo.tv_sec = 0;
1336 timo.tv_usec = 0;
1337
1338 redo:
1339 rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1340
1341 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1342 goto redo;
1343
1344 return rv;
1345 }
1346
1347 /**
1348 * sk_send - send data to a socket
1349 * @s: socket
1350 * @len: number of bytes to send
1351 *
1352 * This function sends @len bytes of data prepared in the
1353 * transmit buffer of the socket @s to the network connection.
1354 * If the packet can be sent immediately, it does so and returns
1355 * 1, else it queues the packet for later processing, returns 0
1356 * and calls the @tx_hook of the socket when the tranmission
1357 * takes place.
1358 */
1359 int
1360 sk_send(sock *s, unsigned len)
1361 {
1362 s->ttx = s->tbuf;
1363 s->tpos = s->tbuf + len;
1364 return sk_maybe_write(s);
1365 }
1366
1367 /**
1368 * sk_send_to - send data to a specific destination
1369 * @s: socket
1370 * @len: number of bytes to send
1371 * @addr: IP address to send the packet to
1372 * @port: port to send the packet to
1373 *
1374 * This is a sk_send() replacement for connection-less packet sockets
1375 * which allows destination of the packet to be chosen dynamically.
1376 */
1377 int
1378 sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1379 {
1380 s->daddr = addr;
1381 s->dport = port;
1382 s->ttx = s->tbuf;
1383 s->tpos = s->tbuf + len;
1384 return sk_maybe_write(s);
1385 }
1386
1387 /*
1388 int
1389 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1390 ip_addr saddr, ip_addr daddr, unsigned dport)
1391 {
1392 s->iface = ifa;
1393 s->saddr = saddr;
1394 s->daddr = daddr;
1395 s->dport = dport;
1396 s->ttx = s->tbuf;
1397 s->tpos = s->tbuf + len;
1398 return sk_maybe_write(s);
1399 }
1400 */
1401
1402 static int
1403 sk_read(sock *s)
1404 {
1405 switch (s->type)
1406 {
1407 case SK_TCP_PASSIVE:
1408 {
1409 sockaddr sa;
1410 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1411 }
1412 case SK_UNIX_PASSIVE:
1413 {
1414 struct sockaddr_un sa;
1415 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
1416 }
1417 case SK_TCP:
1418 case SK_UNIX:
1419 {
1420 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1421
1422 if (c < 0)
1423 {
1424 if (errno != EINTR && errno != EAGAIN)
1425 s->err_hook(s, errno);
1426 }
1427 else if (!c)
1428 s->err_hook(s, 0);
1429 else
1430 {
1431 s->rpos += c;
1432 if (s->rx_hook(s, s->rpos - s->rbuf))
1433 {
1434 /* We need to be careful since the socket could have been deleted by the hook */
1435 if (current_sock == s)
1436 s->rpos = s->rbuf;
1437 }
1438 return 1;
1439 }
1440 return 0;
1441 }
1442 case SK_MAGIC:
1443 return s->rx_hook(s, 0);
1444 default:
1445 {
1446 sockaddr sa;
1447 int e;
1448
1449 struct iovec iov = {s->rbuf, s->rbsize};
1450 byte cmsg_buf[CMSG_RX_SPACE];
1451
1452 struct msghdr msg = {
1453 .msg_name = &sa,
1454 .msg_namelen = sizeof(sa),
1455 .msg_iov = &iov,
1456 .msg_iovlen = 1,
1457 .msg_control = cmsg_buf,
1458 .msg_controllen = sizeof(cmsg_buf),
1459 .msg_flags = 0};
1460
1461 e = recvmsg(s->fd, &msg, 0);
1462
1463 if (e < 0)
1464 {
1465 if (errno != EINTR && errno != EAGAIN)
1466 s->err_hook(s, errno);
1467 return 0;
1468 }
1469 s->rpos = s->rbuf + e;
1470 get_sockaddr(&sa, &s->faddr, NULL, &s->fport, 1);
1471 sysio_process_rx_cmsgs(s, &msg);
1472
1473 s->rx_hook(s, e);
1474 return 1;
1475 }
1476 }
1477 }
1478
1479 static int
1480 sk_write(sock *s)
1481 {
1482 switch (s->type)
1483 {
1484 case SK_TCP_ACTIVE:
1485 {
1486 sockaddr sa;
1487 fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
1488 if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
1489 sk_tcp_connected(s);
1490 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1491 s->err_hook(s, errno);
1492 return 0;
1493 }
1494 default:
1495 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1496 {
1497 s->tx_hook(s);
1498 return 1;
1499 }
1500 return 0;
1501 }
1502 }
1503
1504 void
1505 sk_dump_all(void)
1506 {
1507 node *n;
1508 sock *s;
1509
1510 debug("Open sockets:\n");
1511 WALK_LIST(n, sock_list)
1512 {
1513 s = SKIP_BACK(sock, n, n);
1514 debug("%p ", s);
1515 sk_dump(&s->r);
1516 }
1517 debug("\n");
1518 }
1519
1520 #undef ERR
1521 #undef WARN
1522
1523 /*
1524 * Main I/O Loop
1525 */
1526
1527 volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
1528 volatile int async_dump_flag;
1529
1530 void
1531 io_init(void)
1532 {
1533 init_list(&near_timers);
1534 init_list(&far_timers);
1535 init_list(&sock_list);
1536 init_list(&global_event_list);
1537 krt_io_init();
1538 init_times();
1539 update_times();
1540 boot_time = now;
1541 srandom((int) now_real);
1542 }
1543
1544 static int short_loops = 0;
1545 #define SHORT_LOOP_MAX 10
1546
1547 void
1548 io_loop(void)
1549 {
1550 fd_set rd, wr;
1551 struct timeval timo;
1552 time_t tout;
1553 int hi, events;
1554 sock *s;
1555 node *n;
1556
1557 sock_recalc_fdsets_p = 1;
1558 for(;;)
1559 {
1560 events = ev_run_list(&global_event_list);
1561 update_times();
1562 tout = tm_first_shot();
1563 if (tout <= now)
1564 {
1565 tm_shot();
1566 continue;
1567 }
1568 timo.tv_sec = events ? 0 : MIN(tout - now, 3);
1569 timo.tv_usec = 0;
1570
1571 if (sock_recalc_fdsets_p)
1572 {
1573 sock_recalc_fdsets_p = 0;
1574 FD_ZERO(&rd);
1575 FD_ZERO(&wr);
1576 }
1577
1578 hi = 0;
1579 WALK_LIST(n, sock_list)
1580 {
1581 s = SKIP_BACK(sock, n, n);
1582 if (s->rx_hook)
1583 {
1584 FD_SET(s->fd, &rd);
1585 if (s->fd > hi)
1586 hi = s->fd;
1587 }
1588 else
1589 FD_CLR(s->fd, &rd);
1590 if (s->tx_hook && s->ttx != s->tpos)
1591 {
1592 FD_SET(s->fd, &wr);
1593 if (s->fd > hi)
1594 hi = s->fd;
1595 }
1596 else
1597 FD_CLR(s->fd, &wr);
1598 }
1599
1600 /*
1601 * Yes, this is racy. But even if the signal comes before this test
1602 * and entering select(), it gets caught on the next timer tick.
1603 */
1604
1605 if (async_config_flag)
1606 {
1607 async_config();
1608 async_config_flag = 0;
1609 continue;
1610 }
1611 if (async_dump_flag)
1612 {
1613 async_dump();
1614 async_dump_flag = 0;
1615 continue;
1616 }
1617 if (async_shutdown_flag)
1618 {
1619 async_shutdown();
1620 async_shutdown_flag = 0;
1621 continue;
1622 }
1623
1624 /* And finally enter select() to find active sockets */
1625 hi = select(hi+1, &rd, &wr, NULL, &timo);
1626
1627 if (hi < 0)
1628 {
1629 if (errno == EINTR || errno == EAGAIN)
1630 continue;
1631 die("select: %m");
1632 }
1633 if (hi)
1634 {
1635 /* guaranteed to be non-empty */
1636 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1637
1638 while (current_sock)
1639 {
1640 sock *s = current_sock;
1641 int e;
1642 int steps;
1643
1644 steps = MAX_STEPS;
1645 if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1646 do
1647 {
1648 steps--;
1649 e = sk_read(s);
1650 if (s != current_sock)
1651 goto next;
1652 }
1653 while (e && s->rx_hook && steps);
1654
1655 steps = MAX_STEPS;
1656 if (FD_ISSET(s->fd, &wr))
1657 do
1658 {
1659 steps--;
1660 e = sk_write(s);
1661 if (s != current_sock)
1662 goto next;
1663 }
1664 while (e && steps);
1665 current_sock = sk_next(s);
1666 next: ;
1667 }
1668
1669 short_loops++;
1670 if (events && (short_loops < SHORT_LOOP_MAX))
1671 continue;
1672 short_loops = 0;
1673
1674 int count = 0;
1675 current_sock = stored_sock;
1676 if (current_sock == NULL)
1677 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1678
1679 while (current_sock && count < MAX_RX_STEPS)
1680 {
1681 sock *s = current_sock;
1682 int e;
1683
1684 if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1685 {
1686 count++;
1687 e = sk_read(s);
1688 if (s != current_sock)
1689 goto next2;
1690 }
1691 current_sock = sk_next(s);
1692 next2: ;
1693 }
1694
1695 stored_sock = current_sock;
1696 }
1697 }
1698 }
1699
1700 void
1701 test_old_bird(char *path)
1702 {
1703 int fd;
1704 struct sockaddr_un sa;
1705
1706 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1707 if (fd < 0)
1708 die("Cannot create socket: %m");
1709 if (strlen(path) >= sizeof(sa.sun_path))
1710 die("Socket path too long");
1711 bzero(&sa, sizeof(sa));
1712 sa.sun_family = AF_UNIX;
1713 strcpy(sa.sun_path, path);
1714 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1715 die("I found another BIRD running.");
1716 close(fd);
1717 }
1718
1719