]> git.ipfire.org Git - thirdparty/bird.git/blob - sysdep/unix/io.c
Merge remote-tracking branch 'origin/master' into soft-int
[thirdparty/bird.git] / sysdep / unix / io.c
1 /*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Ondrej Filip <feela@network.cz>
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
10 /* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
12 #define _GNU_SOURCE 1
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <time.h>
17 #include <sys/time.h>
18 #include <sys/types.h>
19 #include <sys/socket.h>
20 #include <sys/uio.h>
21 #include <sys/un.h>
22 #include <unistd.h>
23 #include <fcntl.h>
24 #include <errno.h>
25 #include <net/if.h>
26 #include <netinet/in.h>
27 #include <netinet/tcp.h>
28 #include <netinet/udp.h>
29 #include <netinet/icmp6.h>
30
31 #include "nest/bird.h"
32 #include "lib/lists.h"
33 #include "lib/resource.h"
34 #include "lib/timer.h"
35 #include "lib/socket.h"
36 #include "lib/event.h"
37 #include "lib/string.h"
38 #include "nest/iface.h"
39
40 #include "lib/unix.h"
41 #include "lib/sysio.h"
42
43 /* Maximum number of calls of tx handler for one socket in one
44 * select iteration. Should be small enough to not monopolize CPU by
45 * one protocol instance.
46 */
47 #define MAX_STEPS 4
48
49 /* Maximum number of calls of rx handler for all sockets in one select
50 iteration. RX callbacks are often much more costly so we limit
51 this to gen small latencies */
52 #define MAX_RX_STEPS 4
53
54 /*
55 * Tracked Files
56 */
57
58 struct rfile {
59 resource r;
60 FILE *f;
61 };
62
63 static void
64 rf_free(resource *r)
65 {
66 struct rfile *a = (struct rfile *) r;
67
68 fclose(a->f);
69 }
70
71 static void
72 rf_dump(resource *r)
73 {
74 struct rfile *a = (struct rfile *) r;
75
76 debug("(FILE *%p)\n", a->f);
77 }
78
79 static struct resclass rf_class = {
80 "FILE",
81 sizeof(struct rfile),
82 rf_free,
83 rf_dump,
84 NULL,
85 NULL
86 };
87
88 void *
89 tracked_fopen(pool *p, char *name, char *mode)
90 {
91 FILE *f = fopen(name, mode);
92
93 if (f)
94 {
95 struct rfile *r = ralloc(p, &rf_class);
96 r->f = f;
97 }
98 return f;
99 }
100
101 /**
102 * DOC: Timers
103 *
104 * Timers are resources which represent a wish of a module to call
105 * a function at the specified time. The platform dependent code
106 * doesn't guarantee exact timing, only that a timer function
107 * won't be called before the requested time.
108 *
109 * In BIRD, time is represented by values of the &bird_clock_t type
110 * which are integral numbers interpreted as a relative number of seconds since
111 * some fixed time point in past. The current time can be read
112 * from variable @now with reasonable accuracy and is monotonic. There is also
113 * a current 'absolute' time in variable @now_real reported by OS.
114 *
115 * Each timer is described by a &timer structure containing a pointer
116 * to the handler function (@hook), data private to this function (@data),
117 * time the function should be called at (@expires, 0 for inactive timers),
118 * for the other fields see |timer.h|.
119 */
120
121 #define NEAR_TIMER_LIMIT 4
122
123 static list near_timers, far_timers;
124 static bird_clock_t first_far_timer = TIME_INFINITY;
125
126 /* now must be different from 0, because 0 is a special value in timer->expires */
127 bird_clock_t now = 1, now_real, boot_time;
128
129 static void
130 update_times_plain(void)
131 {
132 bird_clock_t new_time = time(NULL);
133 int delta = new_time - now_real;
134
135 if ((delta >= 0) && (delta < 60))
136 now += delta;
137 else if (now_real != 0)
138 log(L_WARN "Time jump, delta %d s", delta);
139
140 now_real = new_time;
141 }
142
143 static void
144 update_times_gettime(void)
145 {
146 struct timespec ts;
147 int rv;
148
149 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
150 if (rv != 0)
151 die("clock_gettime: %m");
152
153 if (ts.tv_sec != now) {
154 if (ts.tv_sec < now)
155 log(L_ERR "Monotonic timer is broken");
156
157 now = ts.tv_sec;
158 now_real = time(NULL);
159 }
160 }
161
162 static int clock_monotonic_available;
163
164 static inline void
165 update_times(void)
166 {
167 if (clock_monotonic_available)
168 update_times_gettime();
169 else
170 update_times_plain();
171 }
172
173 static inline void
174 init_times(void)
175 {
176 struct timespec ts;
177 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
178 if (!clock_monotonic_available)
179 log(L_WARN "Monotonic timer is missing");
180 }
181
182
183 static void
184 tm_free(resource *r)
185 {
186 timer *t = (timer *) r;
187
188 tm_stop(t);
189 }
190
191 static void
192 tm_dump(resource *r)
193 {
194 timer *t = (timer *) r;
195
196 debug("(code %p, data %p, ", t->hook, t->data);
197 if (t->randomize)
198 debug("rand %d, ", t->randomize);
199 if (t->recurrent)
200 debug("recur %d, ", t->recurrent);
201 if (t->expires)
202 debug("expires in %d sec)\n", t->expires - now);
203 else
204 debug("inactive)\n");
205 }
206
207 static struct resclass tm_class = {
208 "Timer",
209 sizeof(timer),
210 tm_free,
211 tm_dump,
212 NULL,
213 NULL
214 };
215
216 /**
217 * tm_new - create a timer
218 * @p: pool
219 *
220 * This function creates a new timer resource and returns
221 * a pointer to it. To use the timer, you need to fill in
222 * the structure fields and call tm_start() to start timing.
223 */
224 timer *
225 tm_new(pool *p)
226 {
227 timer *t = ralloc(p, &tm_class);
228 return t;
229 }
230
231 static inline void
232 tm_insert_near(timer *t)
233 {
234 node *n = HEAD(near_timers);
235
236 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
237 n = n->next;
238 insert_node(&t->n, n->prev);
239 }
240
241 /**
242 * tm_start - start a timer
243 * @t: timer
244 * @after: number of seconds the timer should be run after
245 *
246 * This function schedules the hook function of the timer to
247 * be called after @after seconds. If the timer has been already
248 * started, it's @expire time is replaced by the new value.
249 *
250 * You can have set the @randomize field of @t, the timeout
251 * will be increased by a random number of seconds chosen
252 * uniformly from range 0 .. @randomize.
253 *
254 * You can call tm_start() from the handler function of the timer
255 * to request another run of the timer. Also, you can set the @recurrent
256 * field to have the timer re-added automatically with the same timeout.
257 */
258 void
259 tm_start(timer *t, unsigned after)
260 {
261 bird_clock_t when;
262
263 if (t->randomize)
264 after += random() % (t->randomize + 1);
265 when = now + after;
266 if (t->expires == when)
267 return;
268 if (t->expires)
269 rem_node(&t->n);
270 t->expires = when;
271 if (after <= NEAR_TIMER_LIMIT)
272 tm_insert_near(t);
273 else
274 {
275 if (!first_far_timer || first_far_timer > when)
276 first_far_timer = when;
277 add_tail(&far_timers, &t->n);
278 }
279 }
280
281 /**
282 * tm_stop - stop a timer
283 * @t: timer
284 *
285 * This function stops a timer. If the timer is already stopped,
286 * nothing happens.
287 */
288 void
289 tm_stop(timer *t)
290 {
291 if (t->expires)
292 {
293 rem_node(&t->n);
294 t->expires = 0;
295 }
296 }
297
298 static void
299 tm_dump_them(char *name, list *l)
300 {
301 node *n;
302 timer *t;
303
304 debug("%s timers:\n", name);
305 WALK_LIST(n, *l)
306 {
307 t = SKIP_BACK(timer, n, n);
308 debug("%p ", t);
309 tm_dump(&t->r);
310 }
311 debug("\n");
312 }
313
314 void
315 tm_dump_all(void)
316 {
317 tm_dump_them("Near", &near_timers);
318 tm_dump_them("Far", &far_timers);
319 }
320
321 static inline time_t
322 tm_first_shot(void)
323 {
324 time_t x = first_far_timer;
325
326 if (!EMPTY_LIST(near_timers))
327 {
328 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
329 if (t->expires < x)
330 x = t->expires;
331 }
332 return x;
333 }
334
335 static void
336 tm_shot(void)
337 {
338 timer *t;
339 node *n, *m;
340
341 if (first_far_timer <= now)
342 {
343 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
344 first_far_timer = TIME_INFINITY;
345 n = HEAD(far_timers);
346 while (m = n->next)
347 {
348 t = SKIP_BACK(timer, n, n);
349 if (t->expires <= limit)
350 {
351 rem_node(n);
352 tm_insert_near(t);
353 }
354 else if (t->expires < first_far_timer)
355 first_far_timer = t->expires;
356 n = m;
357 }
358 }
359 while ((n = HEAD(near_timers)) -> next)
360 {
361 int delay;
362 t = SKIP_BACK(timer, n, n);
363 if (t->expires > now)
364 break;
365 rem_node(n);
366 delay = t->expires - now;
367 t->expires = 0;
368 if (t->recurrent)
369 {
370 int i = t->recurrent - delay;
371 if (i < 0)
372 i = 0;
373 tm_start(t, i);
374 }
375 t->hook(t);
376 }
377 }
378
379 /**
380 * tm_parse_datetime - parse a date and time
381 * @x: datetime string
382 *
383 * tm_parse_datetime() takes a textual representation of
384 * a date and time (dd-mm-yyyy hh:mm:ss)
385 * and converts it to the corresponding value of type &bird_clock_t.
386 */
387 bird_clock_t
388 tm_parse_datetime(char *x)
389 {
390 struct tm tm;
391 int n;
392 time_t t;
393
394 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
395 return tm_parse_date(x);
396 tm.tm_mon--;
397 tm.tm_year -= 1900;
398 t = mktime(&tm);
399 if (t == (time_t) -1)
400 return 0;
401 return t;
402 }
403 /**
404 * tm_parse_date - parse a date
405 * @x: date string
406 *
407 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
408 * and converts it to the corresponding value of type &bird_clock_t.
409 */
410 bird_clock_t
411 tm_parse_date(char *x)
412 {
413 struct tm tm;
414 int n;
415 time_t t;
416
417 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
418 return 0;
419 tm.tm_mon--;
420 tm.tm_year -= 1900;
421 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
422 t = mktime(&tm);
423 if (t == (time_t) -1)
424 return 0;
425 return t;
426 }
427
428 static void
429 tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
430 {
431 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
432 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
433
434 if (delta < 20*3600)
435 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
436 else if (delta < 360*86400)
437 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
438 else
439 bsprintf(x, "%d", tm->tm_year+1900);
440 }
441
442 #include "conf/conf.h"
443
444 /**
445 * tm_format_datetime - convert date and time to textual representation
446 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
447 * @t: time
448 *
449 * This function formats the given relative time value @t to a textual
450 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
451 */
452 void
453 tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
454 {
455 const char *fmt_used;
456 struct tm *tm;
457 bird_clock_t delta = now - t;
458 t = now_real - delta;
459 tm = localtime(&t);
460
461 if (fmt_spec->fmt1 == NULL)
462 return tm_format_reltime(x, tm, delta);
463
464 if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
465 fmt_used = fmt_spec->fmt1;
466 else
467 fmt_used = fmt_spec->fmt2;
468
469 int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
470 if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
471 strcpy(x, "<too-long>");
472 }
473
474
475 /**
476 * DOC: Sockets
477 *
478 * Socket resources represent network connections. Their data structure (&socket)
479 * contains a lot of fields defining the exact type of the socket, the local and
480 * remote addresses and ports, pointers to socket buffers and finally pointers to
481 * hook functions to be called when new data have arrived to the receive buffer
482 * (@rx_hook), when the contents of the transmit buffer have been transmitted
483 * (@tx_hook) and when an error or connection close occurs (@err_hook).
484 *
485 * Freeing of sockets from inside socket hooks is perfectly safe.
486 */
487
488 #ifndef SOL_IP
489 #define SOL_IP IPPROTO_IP
490 #endif
491
492 #ifndef SOL_IPV6
493 #define SOL_IPV6 IPPROTO_IPV6
494 #endif
495
496 #ifndef SOL_ICMPV6
497 #define SOL_ICMPV6 IPPROTO_ICMPV6
498 #endif
499
500
501 /*
502 * Sockaddr helper functions
503 */
504
505 static inline int sockaddr_length(int af)
506 { return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
507
508 static inline void
509 sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port)
510 {
511 memset(sa, 0, sizeof(struct sockaddr_in));
512 #ifdef HAVE_SIN_LEN
513 sa->sin_len = sizeof(struct sockaddr_in);
514 #endif
515 sa->sin_family = AF_INET;
516 sa->sin_port = htons(port);
517 sa->sin_addr = ipa_to_in4(a);
518 }
519
520 static inline void
521 sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
522 {
523 memset(sa, 0, sizeof(struct sockaddr_in6));
524 #ifdef SIN6_LEN
525 sa->sin6_len = sizeof(struct sockaddr_in6);
526 #endif
527 sa->sin6_family = AF_INET6;
528 sa->sin6_port = htons(port);
529 sa->sin6_flowinfo = 0;
530 sa->sin6_addr = ipa_to_in6(a);
531
532 if (ifa && ipa_is_link_local(a))
533 sa->sin6_scope_id = ifa->index;
534 }
535
536 void
537 sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
538 {
539 if (af == AF_INET)
540 sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
541 else if (af == AF_INET6)
542 sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
543 else
544 bug("Unknown AF");
545 }
546
547 static inline void
548 sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port)
549 {
550 *port = ntohs(sa->sin_port);
551 *a = ipa_from_in4(sa->sin_addr);
552 }
553
554 static inline void
555 sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
556 {
557 *port = ntohs(sa->sin6_port);
558 *a = ipa_from_in6(sa->sin6_addr);
559
560 if (ifa && ipa_is_link_local(*a))
561 *ifa = if_find_by_index(sa->sin6_scope_id);
562 }
563
564 int
565 sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
566 {
567 if (sa->sa.sa_family != af)
568 goto fail;
569
570 if (af == AF_INET)
571 sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
572 else if (af == AF_INET6)
573 sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
574 else
575 goto fail;
576
577 return 0;
578
579 fail:
580 *a = IPA_NONE;
581 *port = 0;
582 return -1;
583 }
584
585
586 /*
587 * IPv6 multicast syscalls
588 */
589
590 /* Fortunately standardized in RFC 3493 */
591
592 #define INIT_MREQ6(maddr,ifa) \
593 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
594
595 static inline int
596 sk_setup_multicast6(sock *s)
597 {
598 int index = s->iface->index;
599 int ttl = s->ttl;
600 int n = 0;
601
602 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
603 ERR("IPV6_MULTICAST_IF");
604
605 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
606 ERR("IPV6_MULTICAST_HOPS");
607
608 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
609 ERR("IPV6_MULTICAST_LOOP");
610
611 return 0;
612 }
613
614 static inline int
615 sk_join_group6(sock *s, ip_addr maddr)
616 {
617 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
618
619 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
620 ERR("IPV6_JOIN_GROUP");
621
622 return 0;
623 }
624
625 static inline int
626 sk_leave_group6(sock *s, ip_addr maddr)
627 {
628 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
629
630 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
631 ERR("IPV6_LEAVE_GROUP");
632
633 return 0;
634 }
635
636
637 /*
638 * IPv6 packet control messages
639 */
640
641 /* Also standardized, in RFC 3542 */
642
643 /*
644 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
645 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
646 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
647 * RFC and we use IPV6_PKTINFO.
648 */
649 #ifndef IPV6_RECVPKTINFO
650 #define IPV6_RECVPKTINFO IPV6_PKTINFO
651 #endif
652 /*
653 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
654 */
655 #ifndef IPV6_RECVHOPLIMIT
656 #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
657 #endif
658
659
660 #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
661 #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
662
663 static inline int
664 sk_request_cmsg6_pktinfo(sock *s)
665 {
666 int y = 1;
667
668 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
669 ERR("IPV6_RECVPKTINFO");
670
671 return 0;
672 }
673
674 static inline int
675 sk_request_cmsg6_ttl(sock *s)
676 {
677 int y = 1;
678
679 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
680 ERR("IPV6_RECVHOPLIMIT");
681
682 return 0;
683 }
684
685 static inline void
686 sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
687 {
688 if (cm->cmsg_type == IPV6_PKTINFO)
689 {
690 struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
691 s->laddr = ipa_from_in6(pi->ipi6_addr);
692 s->lifindex = pi->ipi6_ifindex;
693 }
694 }
695
696 static inline void
697 sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
698 {
699 if (cm->cmsg_type == IPV6_HOPLIMIT)
700 s->rcv_ttl = * (int *) CMSG_DATA(cm);
701 }
702
703 static inline void
704 sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
705 {
706 struct cmsghdr *cm;
707 struct in6_pktinfo *pi;
708 int controllen = 0;
709
710 msg->msg_control = cbuf;
711 msg->msg_controllen = cbuflen;
712
713 cm = CMSG_FIRSTHDR(msg);
714 cm->cmsg_level = SOL_IPV6;
715 cm->cmsg_type = IPV6_PKTINFO;
716 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
717 controllen += CMSG_SPACE(sizeof(*pi));
718
719 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
720 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
721 pi->ipi6_addr = ipa_to_in6(s->saddr);
722
723 msg->msg_controllen = controllen;
724 }
725
726
727 /*
728 * Miscellaneous socket syscalls
729 */
730
731 static inline int
732 sk_set_ttl4(sock *s, int ttl)
733 {
734 if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
735 ERR("IP_TTL");
736
737 return 0;
738 }
739
740 static inline int
741 sk_set_ttl6(sock *s, int ttl)
742 {
743 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
744 ERR("IPV6_UNICAST_HOPS");
745
746 return 0;
747 }
748
749 static inline int
750 sk_set_tos4(sock *s, int tos)
751 {
752 if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
753 ERR("IP_TOS");
754
755 return 0;
756 }
757
758 static inline int
759 sk_set_tos6(sock *s, int tos)
760 {
761 if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
762 ERR("IPV6_TCLASS");
763
764 return 0;
765 }
766
767
768 /*
769 * Public socket functions
770 */
771
772 /**
773 * sk_setup_multicast - enable multicast for given socket
774 * @s: socket
775 *
776 * Prepare transmission of multicast packets for given datagram socket.
777 * The socket must have defined @iface.
778 *
779 * Result: 0 for success, -1 for an error.
780 */
781
782 int
783 sk_setup_multicast(sock *s)
784 {
785 ASSERT(s->iface);
786
787 if (sk_is_ipv4(s))
788 return sk_setup_multicast4(s);
789 else
790 return sk_setup_multicast6(s);
791 }
792
793 /**
794 * sk_join_group - join multicast group for given socket
795 * @s: socket
796 * @maddr: multicast address
797 *
798 * Join multicast group for given datagram socket and associated interface.
799 * The socket must have defined @iface.
800 *
801 * Result: 0 for success, -1 for an error.
802 */
803
804 int
805 sk_join_group(sock *s, ip_addr maddr)
806 {
807 if (sk_is_ipv4(s))
808 return sk_join_group4(s, maddr);
809 else
810 return sk_join_group6(s, maddr);
811 }
812
813 /**
814 * sk_leave_group - leave multicast group for given socket
815 * @s: socket
816 * @maddr: multicast address
817 *
818 * Leave multicast group for given datagram socket and associated interface.
819 * The socket must have defined @iface.
820 *
821 * Result: 0 for success, -1 for an error.
822 */
823
824 int
825 sk_leave_group(sock *s, ip_addr maddr)
826 {
827 if (sk_is_ipv4(s))
828 return sk_leave_group4(s, maddr);
829 else
830 return sk_leave_group6(s, maddr);
831 }
832
833 /**
834 * sk_setup_broadcast - enable broadcast for given socket
835 * @s: socket
836 *
837 * Allow reception and transmission of broadcast packets for given datagram
838 * socket. The socket must have defined @iface. For transmission, packets should
839 * be send to @brd address of @iface.
840 *
841 * Result: 0 for success, -1 for an error.
842 */
843
844 int
845 sk_setup_broadcast(sock *s)
846 {
847 int y = 1;
848
849 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
850 ERR("SO_BROADCAST");
851
852 return 0;
853 }
854
855 /**
856 * sk_set_ttl - set transmit TTL for given socket
857 * @s: socket
858 * @ttl: TTL value
859 *
860 * Set TTL for already opened connections when TTL was not set before. Useful
861 * for accepted connections when different ones should have different TTL.
862 *
863 * Result: 0 for success, -1 for an error.
864 */
865
866 int
867 sk_set_ttl(sock *s, int ttl)
868 {
869 s->ttl = ttl;
870
871 if (sk_is_ipv4(s))
872 return sk_set_ttl4(s, ttl);
873 else
874 return sk_set_ttl6(s, ttl);
875 }
876
877 /**
878 * sk_set_min_ttl - set minimal accepted TTL for given socket
879 * @s: socket
880 * @ttl: TTL value
881 *
882 * Set minimal accepted TTL for given socket. Can be used for TTL security.
883 * implementations.
884 *
885 * Result: 0 for success, -1 for an error.
886 */
887
888 int
889 sk_set_min_ttl(sock *s, int ttl)
890 {
891 if (sk_is_ipv4(s))
892 return sk_set_min_ttl4(s, ttl);
893 else
894 return sk_set_min_ttl6(s, ttl);
895 }
896
897 #if 0
898 /**
899 * sk_set_md5_auth - add / remove MD5 security association for given socket
900 * @s: socket
901 * @a: IP address of the other side
902 * @ifa: Interface for link-local IP address
903 * @passwd: password used for MD5 authentication
904 *
905 * In TCP MD5 handling code in kernel, there is a set of pairs (address,
906 * password) used to choose password according to address of the other side.
907 * This function is useful for listening socket, for active sockets it is enough
908 * to set s->password field.
909 *
910 * When called with passwd != NULL, the new pair is added,
911 * When called with passwd == NULL, the existing pair is removed.
912 *
913 * Result: 0 for success, -1 for an error.
914 */
915
916 int
917 sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
918 { DUMMY; }
919 #endif
920
921 /**
922 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
923 * @s: socket
924 * @offset: offset
925 *
926 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
927 * kernel will automatically fill it for outgoing packets and check it for
928 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
929 * known to the kernel.
930 *
931 * Result: 0 for success, -1 for an error.
932 */
933
934 int
935 sk_set_ipv6_checksum(sock *s, int offset)
936 {
937 if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
938 ERR("IPV6_CHECKSUM");
939
940 return 0;
941 }
942
943 int
944 sk_set_icmp6_filter(sock *s, int p1, int p2)
945 {
946 /* a bit of lame interface, but it is here only for Radv */
947 struct icmp6_filter f;
948
949 ICMP6_FILTER_SETBLOCKALL(&f);
950 ICMP6_FILTER_SETPASS(p1, &f);
951 ICMP6_FILTER_SETPASS(p2, &f);
952
953 if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
954 ERR("ICMP6_FILTER");
955
956 return 0;
957 }
958
959 void
960 sk_log_error(sock *s, const char *p)
961 {
962 log(L_ERR "%s: Socket error: %s%#m", p, s->err);
963 }
964
965
966 /*
967 * Actual struct birdsock code
968 */
969
970 static list sock_list;
971 static struct birdsock *current_sock;
972 static struct birdsock *stored_sock;
973 static int sock_recalc_fdsets_p;
974
975 static inline sock *
976 sk_next(sock *s)
977 {
978 if (!s->n.next->next)
979 return NULL;
980 else
981 return SKIP_BACK(sock, n, s->n.next);
982 }
983
984 static void
985 sk_alloc_bufs(sock *s)
986 {
987 if (!s->rbuf && s->rbsize)
988 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
989 s->rpos = s->rbuf;
990 if (!s->tbuf && s->tbsize)
991 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
992 s->tpos = s->ttx = s->tbuf;
993 }
994
995 static void
996 sk_free_bufs(sock *s)
997 {
998 if (s->rbuf_alloc)
999 {
1000 xfree(s->rbuf_alloc);
1001 s->rbuf = s->rbuf_alloc = NULL;
1002 }
1003 if (s->tbuf_alloc)
1004 {
1005 xfree(s->tbuf_alloc);
1006 s->tbuf = s->tbuf_alloc = NULL;
1007 }
1008 }
1009
1010 static void
1011 sk_free(resource *r)
1012 {
1013 sock *s = (sock *) r;
1014
1015 sk_free_bufs(s);
1016 if (s->fd >= 0)
1017 {
1018 close(s->fd);
1019
1020 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1021 if (s->flags & SKF_THREAD)
1022 return;
1023
1024 if (s == current_sock)
1025 current_sock = sk_next(s);
1026 if (s == stored_sock)
1027 stored_sock = sk_next(s);
1028 rem_node(&s->n);
1029 sock_recalc_fdsets_p = 1;
1030 }
1031 }
1032
1033 void
1034 sk_set_rbsize(sock *s, uint val)
1035 {
1036 ASSERT(s->rbuf_alloc == s->rbuf);
1037
1038 if (s->rbsize == val)
1039 return;
1040
1041 s->rbsize = val;
1042 xfree(s->rbuf_alloc);
1043 s->rbuf_alloc = xmalloc(val);
1044 s->rpos = s->rbuf = s->rbuf_alloc;
1045 }
1046
1047 void
1048 sk_set_tbsize(sock *s, uint val)
1049 {
1050 ASSERT(s->tbuf_alloc == s->tbuf);
1051
1052 if (s->tbsize == val)
1053 return;
1054
1055 byte *old_tbuf = s->tbuf;
1056
1057 s->tbsize = val;
1058 s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1059 s->tpos = s->tbuf + (s->tpos - old_tbuf);
1060 s->ttx = s->tbuf + (s->ttx - old_tbuf);
1061 }
1062
1063 void
1064 sk_set_tbuf(sock *s, void *tbuf)
1065 {
1066 s->tbuf = tbuf ?: s->tbuf_alloc;
1067 s->ttx = s->tpos = s->tbuf;
1068 }
1069
1070 void
1071 sk_reallocate(sock *s)
1072 {
1073 sk_free_bufs(s);
1074 sk_alloc_bufs(s);
1075 }
1076
1077 static void
1078 sk_dump(resource *r)
1079 {
1080 sock *s = (sock *) r;
1081 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1082
1083 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1084 sk_type_names[s->type],
1085 s->data,
1086 s->saddr,
1087 s->sport,
1088 s->daddr,
1089 s->dport,
1090 s->tos,
1091 s->ttl,
1092 s->iface ? s->iface->name : "none");
1093 }
1094
1095 static struct resclass sk_class = {
1096 "Socket",
1097 sizeof(sock),
1098 sk_free,
1099 sk_dump,
1100 NULL,
1101 NULL
1102 };
1103
1104 /**
1105 * sk_new - create a socket
1106 * @p: pool
1107 *
1108 * This function creates a new socket resource. If you want to use it,
1109 * you need to fill in all the required fields of the structure and
1110 * call sk_open() to do the actual opening of the socket.
1111 *
1112 * The real function name is sock_new(), sk_new() is a macro wrapper
1113 * to avoid collision with OpenSSL.
1114 */
1115 sock *
1116 sock_new(pool *p)
1117 {
1118 sock *s = ralloc(p, &sk_class);
1119 s->pool = p;
1120 // s->saddr = s->daddr = IPA_NONE;
1121 s->tos = s->priority = s->ttl = -1;
1122 s->fd = -1;
1123 return s;
1124 }
1125
1126 static int
1127 sk_setup(sock *s)
1128 {
1129 int y = 1;
1130 int fd = s->fd;
1131
1132 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1133 ERR("O_NONBLOCK");
1134
1135 if (!s->af)
1136 return 0;
1137
1138 if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1139 s->flags |= SKF_PKTINFO;
1140
1141 #ifdef CONFIG_USE_HDRINCL
1142 if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1143 {
1144 s->flags &= ~SKF_PKTINFO;
1145 s->flags |= SKF_HDRINCL;
1146 if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1147 ERR("IP_HDRINCL");
1148 }
1149 #endif
1150
1151 if (s->iface)
1152 {
1153 #ifdef SO_BINDTODEVICE
1154 struct ifreq ifr;
1155 strcpy(ifr.ifr_name, s->iface->name);
1156 if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1157 ERR("SO_BINDTODEVICE");
1158 #endif
1159
1160 #ifdef CONFIG_UNIX_DONTROUTE
1161 if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1162 ERR("SO_DONTROUTE");
1163 #endif
1164 }
1165
1166 if (s->priority >= 0)
1167 if (sk_set_priority(s, s->priority) < 0)
1168 return -1;
1169
1170 if (sk_is_ipv4(s))
1171 {
1172 if (s->flags & SKF_LADDR_RX)
1173 if (sk_request_cmsg4_pktinfo(s) < 0)
1174 return -1;
1175
1176 if (s->flags & SKF_TTL_RX)
1177 if (sk_request_cmsg4_ttl(s) < 0)
1178 return -1;
1179
1180 if ((s->type == SK_UDP) || (s->type == SK_IP))
1181 if (sk_disable_mtu_disc4(s) < 0)
1182 return -1;
1183
1184 if (s->ttl >= 0)
1185 if (sk_set_ttl4(s, s->ttl) < 0)
1186 return -1;
1187
1188 if (s->tos >= 0)
1189 if (sk_set_tos4(s, s->tos) < 0)
1190 return -1;
1191 }
1192
1193 if (sk_is_ipv6(s))
1194 {
1195 if (s->flags & SKF_V6ONLY)
1196 if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1197 ERR("IPV6_V6ONLY");
1198
1199 if (s->flags & SKF_LADDR_RX)
1200 if (sk_request_cmsg6_pktinfo(s) < 0)
1201 return -1;
1202
1203 if (s->flags & SKF_TTL_RX)
1204 if (sk_request_cmsg6_ttl(s) < 0)
1205 return -1;
1206
1207 if ((s->type == SK_UDP) || (s->type == SK_IP))
1208 if (sk_disable_mtu_disc6(s) < 0)
1209 return -1;
1210
1211 if (s->ttl >= 0)
1212 if (sk_set_ttl6(s, s->ttl) < 0)
1213 return -1;
1214
1215 if (s->tos >= 0)
1216 if (sk_set_tos6(s, s->tos) < 0)
1217 return -1;
1218 }
1219
1220 return 0;
1221 }
1222
1223 static void
1224 sk_insert(sock *s)
1225 {
1226 add_tail(&sock_list, &s->n);
1227 sock_recalc_fdsets_p = 1;
1228 }
1229
1230 static void
1231 sk_tcp_connected(sock *s)
1232 {
1233 sockaddr sa;
1234 int sa_len = sizeof(sa);
1235
1236 if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1237 (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1238 log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1239
1240 s->type = SK_TCP;
1241 sk_alloc_bufs(s);
1242 s->tx_hook(s);
1243 }
1244
1245 static int
1246 sk_passive_connected(sock *s, int type)
1247 {
1248 sockaddr loc_sa, rem_sa;
1249 int loc_sa_len = sizeof(loc_sa);
1250 int rem_sa_len = sizeof(rem_sa);
1251
1252 int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1253 if (fd < 0)
1254 {
1255 if ((errno != EINTR) && (errno != EAGAIN))
1256 s->err_hook(s, errno);
1257 return 0;
1258 }
1259
1260 sock *t = sk_new(s->pool);
1261 t->type = type;
1262 t->fd = fd;
1263 t->af = s->af;
1264 t->ttl = s->ttl;
1265 t->tos = s->tos;
1266 t->rbsize = s->rbsize;
1267 t->tbsize = s->tbsize;
1268
1269 if (type == SK_TCP)
1270 {
1271 if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1272 (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1273 log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1274
1275 if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1276 log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1277 }
1278
1279 if (sk_setup(t) < 0)
1280 {
1281 /* FIXME: Call err_hook instead ? */
1282 log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1283
1284 /* FIXME: handle it better in rfree() */
1285 close(t->fd);
1286 t->fd = -1;
1287 rfree(t);
1288 return 1;
1289 }
1290
1291 sk_insert(t);
1292 sk_alloc_bufs(t);
1293 s->rx_hook(t, 0);
1294 return 1;
1295 }
1296
1297 /**
1298 * sk_open - open a socket
1299 * @s: socket
1300 *
1301 * This function takes a socket resource created by sk_new() and
1302 * initialized by the user and binds a corresponding network connection
1303 * to it.
1304 *
1305 * Result: 0 for success, -1 for an error.
1306 */
1307 int
1308 sk_open(sock *s)
1309 {
1310 int af = BIRD_AF;
1311 int fd = -1;
1312 int do_bind = 0;
1313 int bind_port = 0;
1314 ip_addr bind_addr = IPA_NONE;
1315 sockaddr sa;
1316
1317 switch (s->type)
1318 {
1319 case SK_TCP_ACTIVE:
1320 s->ttx = ""; /* Force s->ttx != s->tpos */
1321 /* Fall thru */
1322 case SK_TCP_PASSIVE:
1323 fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1324 bind_port = s->sport;
1325 bind_addr = s->saddr;
1326 do_bind = bind_port || ipa_nonzero(bind_addr);
1327 break;
1328
1329 case SK_UDP:
1330 fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1331 bind_port = s->sport;
1332 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1333 do_bind = 1;
1334 break;
1335
1336 case SK_IP:
1337 fd = socket(af, SOCK_RAW, s->dport);
1338 bind_port = 0;
1339 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1340 do_bind = ipa_nonzero(bind_addr);
1341 break;
1342
1343 case SK_MAGIC:
1344 af = 0;
1345 fd = s->fd;
1346 break;
1347
1348 default:
1349 bug("sk_open() called for invalid sock type %d", s->type);
1350 }
1351
1352 if (fd < 0)
1353 ERR("socket");
1354
1355 s->af = af;
1356 s->fd = fd;
1357
1358 if (sk_setup(s) < 0)
1359 goto err;
1360
1361 if (do_bind)
1362 {
1363 if (bind_port)
1364 {
1365 int y = 1;
1366
1367 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1368 ERR2("SO_REUSEADDR");
1369
1370 #ifdef CONFIG_NO_IFACE_BIND
1371 /* Workaround missing ability to bind to an iface */
1372 if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1373 {
1374 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1375 ERR2("SO_REUSEPORT");
1376 }
1377 #endif
1378 }
1379
1380 sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
1381 if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1382 ERR2("bind");
1383 }
1384
1385 if (s->password)
1386 if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0)
1387 goto err;
1388
1389 switch (s->type)
1390 {
1391 case SK_TCP_ACTIVE:
1392 sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
1393 if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1394 sk_tcp_connected(s);
1395 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1396 errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1397 ERR2("connect");
1398 break;
1399
1400 case SK_TCP_PASSIVE:
1401 if (listen(fd, 8) < 0)
1402 ERR2("listen");
1403 break;
1404
1405 case SK_MAGIC:
1406 break;
1407
1408 default:
1409 sk_alloc_bufs(s);
1410 }
1411
1412 if (!(s->flags & SKF_THREAD))
1413 sk_insert(s);
1414 return 0;
1415
1416 err:
1417 close(fd);
1418 s->fd = -1;
1419 return -1;
1420 }
1421
1422 int
1423 sk_open_unix(sock *s, char *name)
1424 {
1425 struct sockaddr_un sa;
1426 int fd;
1427
1428 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1429
1430 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1431 if (fd < 0)
1432 return -1;
1433
1434 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1435 return -1;
1436
1437 /* Path length checked in test_old_bird() */
1438 sa.sun_family = AF_UNIX;
1439 strcpy(sa.sun_path, name);
1440
1441 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1442 return -1;
1443
1444 if (listen(fd, 8) < 0)
1445 return -1;
1446
1447 s->fd = fd;
1448 sk_insert(s);
1449 return 0;
1450 }
1451
1452
1453 #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1454 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1455 #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1456
1457 static void
1458 sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1459 {
1460 if (sk_is_ipv4(s))
1461 sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1462 else
1463 sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1464 }
1465
1466 static void
1467 sk_process_cmsgs(sock *s, struct msghdr *msg)
1468 {
1469 struct cmsghdr *cm;
1470
1471 s->laddr = IPA_NONE;
1472 s->lifindex = 0;
1473 s->rcv_ttl = -1;
1474
1475 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1476 {
1477 if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1478 {
1479 sk_process_cmsg4_pktinfo(s, cm);
1480 sk_process_cmsg4_ttl(s, cm);
1481 }
1482
1483 if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1484 {
1485 sk_process_cmsg6_pktinfo(s, cm);
1486 sk_process_cmsg6_ttl(s, cm);
1487 }
1488 }
1489 }
1490
1491
1492 static inline int
1493 sk_sendmsg(sock *s)
1494 {
1495 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1496 byte cmsg_buf[CMSG_TX_SPACE];
1497 sockaddr dst;
1498
1499 sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
1500
1501 struct msghdr msg = {
1502 .msg_name = &dst.sa,
1503 .msg_namelen = SA_LEN(dst),
1504 .msg_iov = &iov,
1505 .msg_iovlen = 1
1506 };
1507
1508 #ifdef CONFIG_USE_HDRINCL
1509 byte hdr[20];
1510 struct iovec iov2[2] = { {hdr, 20}, iov };
1511
1512 if (s->flags & SKF_HDRINCL)
1513 {
1514 sk_prepare_ip_header(s, hdr, iov.iov_len);
1515 msg.msg_iov = iov2;
1516 msg.msg_iovlen = 2;
1517 }
1518 #endif
1519
1520 if (s->flags & SKF_PKTINFO)
1521 sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1522
1523 return sendmsg(s->fd, &msg, 0);
1524 }
1525
1526 static inline int
1527 sk_recvmsg(sock *s)
1528 {
1529 struct iovec iov = {s->rbuf, s->rbsize};
1530 byte cmsg_buf[CMSG_RX_SPACE];
1531 sockaddr src;
1532
1533 struct msghdr msg = {
1534 .msg_name = &src.sa,
1535 .msg_namelen = sizeof(src), // XXXX ??
1536 .msg_iov = &iov,
1537 .msg_iovlen = 1,
1538 .msg_control = cmsg_buf,
1539 .msg_controllen = sizeof(cmsg_buf),
1540 .msg_flags = 0
1541 };
1542
1543 int rv = recvmsg(s->fd, &msg, 0);
1544 if (rv < 0)
1545 return rv;
1546
1547 //ifdef IPV4
1548 // if (cf_type == SK_IP)
1549 // rv = ipv4_skip_header(pbuf, rv);
1550 //endif
1551
1552 sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1553 sk_process_cmsgs(s, &msg);
1554
1555 if (msg.msg_flags & MSG_TRUNC)
1556 s->flags |= SKF_TRUNCATED;
1557 else
1558 s->flags &= ~SKF_TRUNCATED;
1559
1560 return rv;
1561 }
1562
1563
1564 static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1565
1566 static int
1567 sk_maybe_write(sock *s)
1568 {
1569 int e;
1570
1571 switch (s->type)
1572 {
1573 case SK_TCP:
1574 case SK_MAGIC:
1575 case SK_UNIX:
1576 while (s->ttx != s->tpos)
1577 {
1578 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1579
1580 if (e < 0)
1581 {
1582 if (errno != EINTR && errno != EAGAIN)
1583 {
1584 reset_tx_buffer(s);
1585 /* EPIPE is just a connection close notification during TX */
1586 s->err_hook(s, (errno != EPIPE) ? errno : 0);
1587 return -1;
1588 }
1589 return 0;
1590 }
1591 s->ttx += e;
1592 }
1593 reset_tx_buffer(s);
1594 return 1;
1595
1596 case SK_UDP:
1597 case SK_IP:
1598 {
1599 if (s->tbuf == s->tpos)
1600 return 1;
1601
1602 e = sk_sendmsg(s);
1603
1604 if (e < 0)
1605 {
1606 if (errno != EINTR && errno != EAGAIN)
1607 {
1608 reset_tx_buffer(s);
1609 s->err_hook(s, errno);
1610 return -1;
1611 }
1612
1613 if (!s->tx_hook)
1614 reset_tx_buffer(s);
1615 return 0;
1616 }
1617 reset_tx_buffer(s);
1618 return 1;
1619 }
1620 default:
1621 bug("sk_maybe_write: unknown socket type %d", s->type);
1622 }
1623 }
1624
1625 int
1626 sk_rx_ready(sock *s)
1627 {
1628 fd_set rd, wr;
1629 struct timeval timo;
1630 int rv;
1631
1632 FD_ZERO(&rd);
1633 FD_ZERO(&wr);
1634 FD_SET(s->fd, &rd);
1635
1636 timo.tv_sec = 0;
1637 timo.tv_usec = 0;
1638
1639 redo:
1640 rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1641
1642 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1643 goto redo;
1644
1645 return rv;
1646 }
1647
1648 /**
1649 * sk_send - send data to a socket
1650 * @s: socket
1651 * @len: number of bytes to send
1652 *
1653 * This function sends @len bytes of data prepared in the
1654 * transmit buffer of the socket @s to the network connection.
1655 * If the packet can be sent immediately, it does so and returns
1656 * 1, else it queues the packet for later processing, returns 0
1657 * and calls the @tx_hook of the socket when the tranmission
1658 * takes place.
1659 */
1660 int
1661 sk_send(sock *s, unsigned len)
1662 {
1663 s->ttx = s->tbuf;
1664 s->tpos = s->tbuf + len;
1665 return sk_maybe_write(s);
1666 }
1667
1668 /**
1669 * sk_send_to - send data to a specific destination
1670 * @s: socket
1671 * @len: number of bytes to send
1672 * @addr: IP address to send the packet to
1673 * @port: port to send the packet to
1674 *
1675 * This is a sk_send() replacement for connection-less packet sockets
1676 * which allows destination of the packet to be chosen dynamically.
1677 * Raw IP sockets should use 0 for @port.
1678 */
1679 int
1680 sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1681 {
1682 s->daddr = addr;
1683 if (port)
1684 s->dport = port;
1685
1686 s->ttx = s->tbuf;
1687 s->tpos = s->tbuf + len;
1688 return sk_maybe_write(s);
1689 }
1690
1691 /*
1692 int
1693 sk_send_full(sock *s, unsigned len, struct iface *ifa,
1694 ip_addr saddr, ip_addr daddr, unsigned dport)
1695 {
1696 s->iface = ifa;
1697 s->saddr = saddr;
1698 s->daddr = daddr;
1699 s->dport = dport;
1700 s->ttx = s->tbuf;
1701 s->tpos = s->tbuf + len;
1702 return sk_maybe_write(s);
1703 }
1704 */
1705
1706 /* sk_read() and sk_write() are called from BFD's event loop */
1707
1708 int
1709 sk_read(sock *s)
1710 {
1711 switch (s->type)
1712 {
1713 case SK_TCP_PASSIVE:
1714 return sk_passive_connected(s, SK_TCP);
1715
1716 case SK_UNIX_PASSIVE:
1717 return sk_passive_connected(s, SK_UNIX);
1718
1719 case SK_TCP:
1720 case SK_UNIX:
1721 {
1722 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1723
1724 if (c < 0)
1725 {
1726 if (errno != EINTR && errno != EAGAIN)
1727 s->err_hook(s, errno);
1728 }
1729 else if (!c)
1730 s->err_hook(s, 0);
1731 else
1732 {
1733 s->rpos += c;
1734 if (s->rx_hook(s, s->rpos - s->rbuf))
1735 {
1736 /* We need to be careful since the socket could have been deleted by the hook */
1737 if (current_sock == s)
1738 s->rpos = s->rbuf;
1739 }
1740 return 1;
1741 }
1742 return 0;
1743 }
1744
1745 case SK_MAGIC:
1746 return s->rx_hook(s, 0);
1747
1748 default:
1749 {
1750 int e = sk_recvmsg(s);
1751
1752 if (e < 0)
1753 {
1754 if (errno != EINTR && errno != EAGAIN)
1755 s->err_hook(s, errno);
1756 return 0;
1757 }
1758
1759 s->rpos = s->rbuf + e;
1760 s->rx_hook(s, e);
1761 return 1;
1762 }
1763 }
1764 }
1765
1766 int
1767 sk_write(sock *s)
1768 {
1769 switch (s->type)
1770 {
1771 case SK_TCP_ACTIVE:
1772 {
1773 sockaddr sa;
1774 sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1775
1776 if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1777 sk_tcp_connected(s);
1778 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1779 s->err_hook(s, errno);
1780 return 0;
1781 }
1782
1783 default:
1784 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1785 {
1786 if (s->tx_hook)
1787 s->tx_hook(s);
1788 return 1;
1789 }
1790 return 0;
1791 }
1792 }
1793
1794 void
1795 sk_dump_all(void)
1796 {
1797 node *n;
1798 sock *s;
1799
1800 debug("Open sockets:\n");
1801 WALK_LIST(n, sock_list)
1802 {
1803 s = SKIP_BACK(sock, n, n);
1804 debug("%p ", s);
1805 sk_dump(&s->r);
1806 }
1807 debug("\n");
1808 }
1809
1810
1811 /*
1812 * Main I/O Loop
1813 */
1814
1815 volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
1816 volatile int async_dump_flag;
1817
1818 void
1819 io_init(void)
1820 {
1821 init_list(&near_timers);
1822 init_list(&far_timers);
1823 init_list(&sock_list);
1824 init_list(&global_event_list);
1825 krt_io_init();
1826 init_times();
1827 update_times();
1828 boot_time = now;
1829 srandom((int) now_real);
1830 }
1831
1832 static int short_loops = 0;
1833 #define SHORT_LOOP_MAX 10
1834
1835 void
1836 io_loop(void)
1837 {
1838 fd_set rd, wr;
1839 struct timeval timo;
1840 time_t tout;
1841 int hi, events;
1842 sock *s;
1843 node *n;
1844
1845 sock_recalc_fdsets_p = 1;
1846 for(;;)
1847 {
1848 events = ev_run_list(&global_event_list);
1849 update_times();
1850 tout = tm_first_shot();
1851 if (tout <= now)
1852 {
1853 tm_shot();
1854 continue;
1855 }
1856 timo.tv_sec = events ? 0 : MIN(tout - now, 3);
1857 timo.tv_usec = 0;
1858
1859 if (sock_recalc_fdsets_p)
1860 {
1861 sock_recalc_fdsets_p = 0;
1862 FD_ZERO(&rd);
1863 FD_ZERO(&wr);
1864 }
1865
1866 hi = 0;
1867 WALK_LIST(n, sock_list)
1868 {
1869 s = SKIP_BACK(sock, n, n);
1870 if (s->rx_hook)
1871 {
1872 FD_SET(s->fd, &rd);
1873 if (s->fd > hi)
1874 hi = s->fd;
1875 }
1876 else
1877 FD_CLR(s->fd, &rd);
1878 if (s->tx_hook && s->ttx != s->tpos)
1879 {
1880 FD_SET(s->fd, &wr);
1881 if (s->fd > hi)
1882 hi = s->fd;
1883 }
1884 else
1885 FD_CLR(s->fd, &wr);
1886 }
1887
1888 /*
1889 * Yes, this is racy. But even if the signal comes before this test
1890 * and entering select(), it gets caught on the next timer tick.
1891 */
1892
1893 if (async_config_flag)
1894 {
1895 async_config();
1896 async_config_flag = 0;
1897 continue;
1898 }
1899 if (async_dump_flag)
1900 {
1901 async_dump();
1902 async_dump_flag = 0;
1903 continue;
1904 }
1905 if (async_shutdown_flag)
1906 {
1907 async_shutdown();
1908 async_shutdown_flag = 0;
1909 continue;
1910 }
1911
1912 /* And finally enter select() to find active sockets */
1913 hi = select(hi+1, &rd, &wr, NULL, &timo);
1914
1915 if (hi < 0)
1916 {
1917 if (errno == EINTR || errno == EAGAIN)
1918 continue;
1919 die("select: %m");
1920 }
1921 if (hi)
1922 {
1923 /* guaranteed to be non-empty */
1924 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1925
1926 while (current_sock)
1927 {
1928 sock *s = current_sock;
1929 int e;
1930 int steps;
1931
1932 steps = MAX_STEPS;
1933 if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1934 do
1935 {
1936 steps--;
1937 e = sk_read(s);
1938 if (s != current_sock)
1939 goto next;
1940 }
1941 while (e && s->rx_hook && steps);
1942
1943 steps = MAX_STEPS;
1944 if (FD_ISSET(s->fd, &wr))
1945 do
1946 {
1947 steps--;
1948 e = sk_write(s);
1949 if (s != current_sock)
1950 goto next;
1951 }
1952 while (e && steps);
1953 current_sock = sk_next(s);
1954 next: ;
1955 }
1956
1957 short_loops++;
1958 if (events && (short_loops < SHORT_LOOP_MAX))
1959 continue;
1960 short_loops = 0;
1961
1962 int count = 0;
1963 current_sock = stored_sock;
1964 if (current_sock == NULL)
1965 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1966
1967 while (current_sock && count < MAX_RX_STEPS)
1968 {
1969 sock *s = current_sock;
1970 int e UNUSED;
1971
1972 if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1973 {
1974 count++;
1975 e = sk_read(s);
1976 if (s != current_sock)
1977 goto next2;
1978 }
1979 current_sock = sk_next(s);
1980 next2: ;
1981 }
1982
1983 stored_sock = current_sock;
1984 }
1985 }
1986 }
1987
1988 void
1989 test_old_bird(char *path)
1990 {
1991 int fd;
1992 struct sockaddr_un sa;
1993
1994 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1995 if (fd < 0)
1996 die("Cannot create socket: %m");
1997 if (strlen(path) >= sizeof(sa.sun_path))
1998 die("Socket path too long");
1999 bzero(&sa, sizeof(sa));
2000 sa.sun_family = AF_UNIX;
2001 strcpy(sa.sun_path, path);
2002 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2003 die("I found another BIRD running.");
2004 close(fd);
2005 }
2006
2007