]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/unix/io.c
Fixes IPv6 build on older systems.
[thirdparty/bird.git] / sysdep / unix / io.c
CommitLineData
b5d9ee5c
MM
1/*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
38a608c5 4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
b1a1faba 5 * (c) 2004 Ondrej Filip <feela@network.cz>
b5d9ee5c
MM
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
10#include <stdio.h>
11#include <stdlib.h>
01b776e1 12#include <time.h>
b5d9ee5c
MM
13#include <sys/time.h>
14#include <sys/types.h>
15#include <sys/socket.h>
16#include <sys/fcntl.h>
46a82e9c 17#include <sys/uio.h>
b93abffa 18#include <sys/un.h>
b5d9ee5c
MM
19#include <unistd.h>
20#include <errno.h>
21
22#include "nest/bird.h"
23#include "lib/lists.h"
24#include "lib/resource.h"
25#include "lib/timer.h"
26#include "lib/socket.h"
e8f73195 27#include "lib/event.h"
afa8937a 28#include "lib/string.h"
b5d9ee5c
MM
29#include "nest/iface.h"
30
31#include "lib/unix.h"
a2867cd9 32#include "lib/sysio.h"
b5d9ee5c 33
ea89da38 34/* Maximum number of calls of tx handler for one socket in one
4323099d
OZ
35 * select iteration. Should be small enough to not monopolize CPU by
36 * one protocol instance.
37 */
38#define MAX_STEPS 4
39
ea89da38
OZ
40/* Maximum number of calls of rx handler for all sockets in one select
41 iteration. RX callbacks are often much more costly so we limit
42 this to gen small latencies */
43#define MAX_RX_STEPS 4
44
a9c986f9
MM
45/*
46 * Tracked Files
47 */
48
49struct rfile {
50 resource r;
51 FILE *f;
52};
53
54static void
55rf_free(resource *r)
56{
57 struct rfile *a = (struct rfile *) r;
58
59 fclose(a->f);
60}
61
62static void
63rf_dump(resource *r)
64{
65 struct rfile *a = (struct rfile *) r;
66
67 debug("(FILE *%p)\n", a->f);
68}
69
70static struct resclass rf_class = {
71 "FILE",
72 sizeof(struct rfile),
73 rf_free,
e81b440f
OZ
74 rf_dump,
75 NULL
a9c986f9
MM
76};
77
78void *
f78056fb 79tracked_fopen(pool *p, char *name, char *mode)
a9c986f9
MM
80{
81 FILE *f = fopen(name, mode);
82
83 if (f)
84 {
85 struct rfile *r = ralloc(p, &rf_class);
86 r->f = f;
87 }
88 return f;
89}
90
525fa2c1
MM
91/**
92 * DOC: Timers
93 *
94 * Timers are resources which represent a wish of a module to call
95 * a function at the specified time. The platform dependent code
58f7d004 96 * doesn't guarantee exact timing, only that a timer function
525fa2c1
MM
97 * won't be called before the requested time.
98 *
fd91ae33
OZ
99 * In BIRD, time is represented by values of the &bird_clock_t type
100 * which are integral numbers interpreted as a relative number of seconds since
101 * some fixed time point in past. The current time can be read
102 * from variable @now with reasonable accuracy and is monotonic. There is also
103 * a current 'absolute' time in variable @now_real reported by OS.
525fa2c1
MM
104 *
105 * Each timer is described by a &timer structure containing a pointer
106 * to the handler function (@hook), data private to this function (@data),
107 * time the function should be called at (@expires, 0 for inactive timers),
108 * for the other fields see |timer.h|.
b5d9ee5c
MM
109 */
110
111#define NEAR_TIMER_LIMIT 4
112
b5d9ee5c
MM
113static list near_timers, far_timers;
114static bird_clock_t first_far_timer = TIME_INFINITY;
115
fd91ae33
OZ
116bird_clock_t now, now_real;
117
118static void
119update_times_plain(void)
120{
121 bird_clock_t new_time = time(NULL);
122 int delta = new_time - now_real;
123
124 if ((delta >= 0) && (delta < 60))
125 now += delta;
126 else if (now_real != 0)
127 log(L_WARN "Time jump, delta %d s", delta);
128
129 now_real = new_time;
130}
131
132static void
133update_times_gettime(void)
134{
135 struct timespec ts;
136 int rv;
137
138 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
139 if (rv != 0)
140 die("clock_gettime: %m");
141
142 if (ts.tv_sec != now) {
143 if (ts.tv_sec < now)
144 log(L_ERR "Monotonic timer is broken");
145
146 now = ts.tv_sec;
147 now_real = time(NULL);
148 }
149}
150
151static int clock_monotonic_available;
152
153static inline void
154update_times(void)
155{
156 if (clock_monotonic_available)
157 update_times_gettime();
158 else
159 update_times_plain();
160}
161
162static inline void
163init_times(void)
164{
165 struct timespec ts;
166 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
167 if (!clock_monotonic_available)
168 log(L_WARN "Monotonic timer is missing");
169}
170
b5d9ee5c
MM
171
172static void
173tm_free(resource *r)
174{
175 timer *t = (timer *) r;
176
177 tm_stop(t);
178}
179
180static void
181tm_dump(resource *r)
182{
183 timer *t = (timer *) r;
184
e8f73195 185 debug("(code %p, data %p, ", t->hook, t->data);
af847acc
MM
186 if (t->randomize)
187 debug("rand %d, ", t->randomize);
188 if (t->recurrent)
189 debug("recur %d, ", t->recurrent);
b5d9ee5c
MM
190 if (t->expires)
191 debug("expires in %d sec)\n", t->expires - now);
192 else
193 debug("inactive)\n");
194}
195
196static struct resclass tm_class = {
197 "Timer",
198 sizeof(timer),
199 tm_free,
e81b440f
OZ
200 tm_dump,
201 NULL
b5d9ee5c
MM
202};
203
525fa2c1
MM
204/**
205 * tm_new - create a timer
206 * @p: pool
207 *
208 * This function creates a new timer resource and returns
209 * a pointer to it. To use the timer, you need to fill in
210 * the structure fields and call tm_start() to start timing.
211 */
b5d9ee5c
MM
212timer *
213tm_new(pool *p)
214{
215 timer *t = ralloc(p, &tm_class);
b5d9ee5c
MM
216 return t;
217}
218
219static inline void
220tm_insert_near(timer *t)
221{
222 node *n = HEAD(near_timers);
223
224 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
225 n = n->next;
226 insert_node(&t->n, n->prev);
227}
228
525fa2c1
MM
229/**
230 * tm_start - start a timer
231 * @t: timer
232 * @after: number of seconds the timer should be run after
233 *
234 * This function schedules the hook function of the timer to
235 * be called after @after seconds. If the timer has been already
236 * started, it's @expire time is replaced by the new value.
237 *
238 * You can have set the @randomize field of @t, the timeout
239 * will be increased by a random number of seconds chosen
240 * uniformly from range 0 .. @randomize.
241 *
242 * You can call tm_start() from the handler function of the timer
243 * to request another run of the timer. Also, you can set the @recurrent
244 * field to have the timer re-added automatically with the same timeout.
245 */
b5d9ee5c
MM
246void
247tm_start(timer *t, unsigned after)
248{
249 bird_clock_t when;
250
251 if (t->randomize)
af847acc 252 after += random() % (t->randomize + 1);
b5d9ee5c
MM
253 when = now + after;
254 if (t->expires == when)
255 return;
256 if (t->expires)
257 rem_node(&t->n);
258 t->expires = when;
259 if (after <= NEAR_TIMER_LIMIT)
260 tm_insert_near(t);
261 else
262 {
263 if (!first_far_timer || first_far_timer > when)
264 first_far_timer = when;
265 add_tail(&far_timers, &t->n);
266 }
267}
268
525fa2c1
MM
269/**
270 * tm_stop - stop a timer
271 * @t: timer
272 *
273 * This function stops a timer. If the timer is already stopped,
274 * nothing happens.
275 */
b5d9ee5c
MM
276void
277tm_stop(timer *t)
278{
279 if (t->expires)
280 {
281 rem_node(&t->n);
282 t->expires = 0;
283 }
284}
285
286static void
287tm_dump_them(char *name, list *l)
288{
289 node *n;
290 timer *t;
291
292 debug("%s timers:\n", name);
293 WALK_LIST(n, *l)
294 {
295 t = SKIP_BACK(timer, n, n);
296 debug("%p ", t);
297 tm_dump(&t->r);
298 }
299 debug("\n");
300}
301
302void
303tm_dump_all(void)
304{
305 tm_dump_them("Near", &near_timers);
306 tm_dump_them("Far", &far_timers);
307}
308
309static inline time_t
310tm_first_shot(void)
311{
312 time_t x = first_far_timer;
313
314 if (!EMPTY_LIST(near_timers))
315 {
316 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
317 if (t->expires < x)
318 x = t->expires;
319 }
320 return x;
321}
322
323static void
324tm_shot(void)
325{
326 timer *t;
327 node *n, *m;
328
329 if (first_far_timer <= now)
330 {
28a9a189 331 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
b5d9ee5c
MM
332 first_far_timer = TIME_INFINITY;
333 n = HEAD(far_timers);
334 while (m = n->next)
335 {
336 t = SKIP_BACK(timer, n, n);
337 if (t->expires <= limit)
338 {
339 rem_node(n);
340 tm_insert_near(t);
341 }
342 else if (t->expires < first_far_timer)
343 first_far_timer = t->expires;
344 n = m;
345 }
346 }
347 while ((n = HEAD(near_timers)) -> next)
348 {
af847acc 349 int delay;
b5d9ee5c
MM
350 t = SKIP_BACK(timer, n, n);
351 if (t->expires > now)
352 break;
353 rem_node(n);
af847acc 354 delay = t->expires - now;
b5d9ee5c 355 t->expires = 0;
af847acc
MM
356 if (t->recurrent)
357 {
358 int i = t->recurrent - delay;
359 if (i < 0)
360 i = 0;
361 tm_start(t, i);
362 }
b5d9ee5c
MM
363 t->hook(t);
364 }
365}
366
0d3effcf
OF
367/**
368 * tm_parse_datetime - parse a date and time
369 * @x: datetime string
370 *
371 * tm_parse_datetime() takes a textual representation of
372 * a date and time (dd-mm-yyyy hh:mm:ss)
373 * and converts it to the corresponding value of type &bird_clock_t.
374 */
375bird_clock_t
376tm_parse_datetime(char *x)
377{
378 struct tm tm;
379 int n;
380 time_t t;
381
382 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
383 return tm_parse_date(x);
384 tm.tm_mon--;
385 tm.tm_year -= 1900;
386 t = mktime(&tm);
387 if (t == (time_t) -1)
388 return 0;
389 return t;
390}
525fa2c1
MM
391/**
392 * tm_parse_date - parse a date
393 * @x: date string
394 *
395 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
396 * and converts it to the corresponding value of type &bird_clock_t.
397 */
913f7dc9
MM
398bird_clock_t
399tm_parse_date(char *x)
400{
401 struct tm tm;
402 int n;
403 time_t t;
404
405 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
406 return 0;
407 tm.tm_mon--;
408 tm.tm_year -= 1900;
409 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
410 t = mktime(&tm);
411 if (t == (time_t) -1)
412 return 0;
413 return t;
414}
415
c37e7851
OZ
416static void
417tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
913f7dc9 418{
c37e7851
OZ
419 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
420 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
913f7dc9 421
c37e7851
OZ
422 if (delta < 20*3600)
423 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
424 else if (delta < 360*86400)
425 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
426 else
427 bsprintf(x, "%d", tm->tm_year+1900);
913f7dc9
MM
428}
429
c37e7851
OZ
430#include "conf/conf.h"
431
525fa2c1
MM
432/**
433 * tm_format_datetime - convert date and time to textual representation
434 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
435 * @t: time
436 *
fd91ae33
OZ
437 * This function formats the given relative time value @t to a textual
438 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
525fa2c1 439 */
7a88832e 440void
c37e7851 441tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
7a88832e 442{
c37e7851 443 const char *fmt_used;
7a88832e 444 struct tm *tm;
fd91ae33
OZ
445 bird_clock_t delta = now - t;
446 t = now_real - delta;
7a88832e 447 tm = localtime(&t);
7a88832e 448
c37e7851
OZ
449 if (fmt_spec->fmt1 == NULL)
450 return tm_format_reltime(x, tm, delta);
afa8937a 451
c37e7851
OZ
452 if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
453 fmt_used = fmt_spec->fmt1;
afa8937a 454 else
c37e7851
OZ
455 fmt_used = fmt_spec->fmt2;
456
457 int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
458 if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
459 strcpy(x, "<too-long>");
afa8937a
MM
460}
461
525fa2c1
MM
462/**
463 * DOC: Sockets
464 *
465 * Socket resources represent network connections. Their data structure (&socket)
466 * contains a lot of fields defining the exact type of the socket, the local and
467 * remote addresses and ports, pointers to socket buffers and finally pointers to
468 * hook functions to be called when new data have arrived to the receive buffer
469 * (@rx_hook), when the contents of the transmit buffer have been transmitted
470 * (@tx_hook) and when an error or connection close occurs (@err_hook).
471 *
38a608c5 472 * Freeing of sockets from inside socket hooks is perfectly safe.
b5d9ee5c
MM
473 */
474
abae6e9c
MM
475#ifndef SOL_IP
476#define SOL_IP IPPROTO_IP
477#endif
478
b1a1faba
OF
479#ifndef SOL_IPV6
480#define SOL_IPV6 IPPROTO_IPV6
481#endif
482
b5d9ee5c 483static list sock_list;
38a608c5 484static struct birdsock *current_sock;
ea89da38 485static struct birdsock *stored_sock;
38a608c5
MM
486static int sock_recalc_fdsets_p;
487
488static inline sock *
489sk_next(sock *s)
490{
491 if (!s->n.next->next)
492 return NULL;
493 else
494 return SKIP_BACK(sock, n, s->n.next);
495}
b5d9ee5c
MM
496
497static void
4da25acb 498sk_alloc_bufs(sock *s)
b5d9ee5c 499{
4da25acb
MM
500 if (!s->rbuf && s->rbsize)
501 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
502 s->rpos = s->rbuf;
503 if (!s->tbuf && s->tbsize)
504 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
505 s->tpos = s->ttx = s->tbuf;
506}
b5d9ee5c 507
4da25acb
MM
508static void
509sk_free_bufs(sock *s)
510{
38a608c5 511 if (s->rbuf_alloc)
4da25acb
MM
512 {
513 xfree(s->rbuf_alloc);
514 s->rbuf = s->rbuf_alloc = NULL;
515 }
38a608c5 516 if (s->tbuf_alloc)
4da25acb
MM
517 {
518 xfree(s->tbuf_alloc);
519 s->tbuf = s->tbuf_alloc = NULL;
520 }
521}
522
523static void
524sk_free(resource *r)
525{
526 sock *s = (sock *) r;
527
528 sk_free_bufs(s);
b5d9ee5c 529 if (s->fd >= 0)
320f4173
MM
530 {
531 close(s->fd);
38a608c5
MM
532 if (s == current_sock)
533 current_sock = sk_next(s);
ea89da38
OZ
534 if (s == stored_sock)
535 stored_sock = sk_next(s);
320f4173 536 rem_node(&s->n);
38a608c5 537 sock_recalc_fdsets_p = 1;
320f4173 538 }
b5d9ee5c
MM
539}
540
4da25acb
MM
541void
542sk_reallocate(sock *s)
543{
544 sk_free_bufs(s);
545 sk_alloc_bufs(s);
546}
547
b5d9ee5c
MM
548static void
549sk_dump(resource *r)
550{
551 sock *s = (sock *) r;
b93abffa 552 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
b5d9ee5c
MM
553
554 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
555 sk_type_names[s->type],
556 s->data,
557 s->saddr,
558 s->sport,
559 s->daddr,
560 s->dport,
561 s->tos,
562 s->ttl,
563 s->iface ? s->iface->name : "none");
564}
565
566static struct resclass sk_class = {
567 "Socket",
568 sizeof(sock),
569 sk_free,
e81b440f
OZ
570 sk_dump,
571 NULL
b5d9ee5c
MM
572};
573
525fa2c1
MM
574/**
575 * sk_new - create a socket
576 * @p: pool
577 *
578 * This function creates a new socket resource. If you want to use it,
579 * you need to fill in all the required fields of the structure and
580 * call sk_open() to do the actual opening of the socket.
581 */
b5d9ee5c
MM
582sock *
583sk_new(pool *p)
584{
585 sock *s = ralloc(p, &sk_class);
586 s->pool = p;
daeeb8e9 587 // s->saddr = s->daddr = IPA_NONE;
b5d9ee5c 588 s->tos = s->ttl = -1;
b5d9ee5c
MM
589 s->fd = -1;
590 return s;
591}
592
38a608c5
MM
593static void
594sk_insert(sock *s)
595{
596 add_tail(&sock_list, &s->n);
597 sock_recalc_fdsets_p = 1;
598}
b5d9ee5c 599
4f22c981
MM
600#ifdef IPV6
601
4f22c981
MM
602void
603fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
604{
b1a1faba 605 memset (sa, 0, sizeof (struct sockaddr_in6));
4f22c981
MM
606 sa->sin6_family = AF_INET6;
607 sa->sin6_port = htons(port);
608 sa->sin6_flowinfo = 0;
b1a1faba
OF
609#ifdef HAVE_SIN_LEN
610 sa->sin6_len = sizeof(struct sockaddr_in6);
611#endif
4f22c981
MM
612 set_inaddr(&sa->sin6_addr, a);
613}
614
061ab802
OZ
615static inline void
616fill_in_sockifa(sockaddr *sa, struct iface *ifa)
617{
618 sa->sin6_scope_id = ifa ? ifa->index : 0;
619}
620
4f22c981 621void
b1a1faba 622get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check)
4f22c981 623{
b1a1faba
OF
624 if (check && sa->sin6_family != AF_INET6)
625 bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
4f22c981
MM
626 if (port)
627 *port = ntohs(sa->sin6_port);
628 memcpy(a, &sa->sin6_addr, sizeof(*a));
629 ipa_ntoh(*a);
630}
631
632#else
633
4cf45766 634void
4f22c981 635fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
b5d9ee5c 636{
b1a1faba 637 memset (sa, 0, sizeof (struct sockaddr_in));
b5d9ee5c
MM
638 sa->sin_family = AF_INET;
639 sa->sin_port = htons(port);
b1a1faba
OF
640#ifdef HAVE_SIN_LEN
641 sa->sin_len = sizeof(struct sockaddr_in);
642#endif
b5d9ee5c
MM
643 set_inaddr(&sa->sin_addr, a);
644}
645
061ab802 646static inline void
e81b440f 647fill_in_sockifa(sockaddr *sa UNUSED, struct iface *ifa UNUSED)
061ab802
OZ
648{
649}
650
af847acc 651void
b1a1faba 652get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check)
b5d9ee5c 653{
b1a1faba
OF
654 if (check && sa->sin_family != AF_INET)
655 bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
af847acc
MM
656 if (port)
657 *port = ntohs(sa->sin_port);
b5d9ee5c 658 memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
dce26783 659 ipa_ntoh(*a);
b5d9ee5c
MM
660}
661
4f22c981
MM
662#endif
663
bed41728
OZ
664
665#ifdef IPV6
666
667/* PKTINFO handling is also standardized in IPv6 */
668#define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
669#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
670
dcc60494
OZ
671/*
672 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
673 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
674 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
675 * RFC and we use IPV6_PKTINFO.
676 */
677#ifndef IPV6_RECVPKTINFO
678#define IPV6_RECVPKTINFO IPV6_PKTINFO
679#endif
680
bed41728
OZ
681static char *
682sysio_register_cmsgs(sock *s)
683{
684 int ok = 1;
685 if ((s->flags & SKF_LADDR_RX) &&
686 setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)
687 return "IPV6_RECVPKTINFO";
688
689 return NULL;
690}
691
692static void
693sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
694{
695 struct cmsghdr *cm;
696 struct in6_pktinfo *pi = NULL;
697
698 if (!(s->flags & SKF_LADDR_RX))
699 return;
700
701 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
702 {
703 if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
704 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
705 }
706
707 if (!pi)
708 {
709 s->laddr = IPA_NONE;
710 s->lifindex = 0;
711 return;
712 }
713
714 get_inaddr(&s->laddr, &pi->ipi6_addr);
715 s->lifindex = pi->ipi6_ifindex;
716 return;
717}
718
646b24d9 719/*
bed41728
OZ
720static void
721sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
722{
723 struct cmsghdr *cm;
724 struct in6_pktinfo *pi;
725
726 if (!(s->flags & SKF_LADDR_TX))
727 return;
728
729 msg->msg_control = cbuf;
730 msg->msg_controllen = cbuflen;
731
732 cm = CMSG_FIRSTHDR(msg);
733 cm->cmsg_level = IPPROTO_IPV6;
734 cm->cmsg_type = IPV6_PKTINFO;
735 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
736
737 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
738 set_inaddr(&pi->ipi6_addr, s->saddr);
739 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
740
741 msg->msg_controllen = cm->cmsg_len;
742 return;
743}
646b24d9 744*/
bed41728
OZ
745#endif
746
a39b165e
OZ
747static char *
748sk_set_ttl_int(sock *s)
749{
a39b165e 750#ifdef IPV6
f9c799a0 751 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
a39b165e
OZ
752 return "IPV6_UNICAST_HOPS";
753#else
754 if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
755 return "IP_TTL";
756#ifdef CONFIG_UNIX_DONTROUTE
ff2857b0 757 int one = 1;
a39b165e
OZ
758 if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
759 return "SO_DONTROUTE";
760#endif
761#endif
762 return NULL;
763}
764
38a608c5
MM
765#define ERR(x) do { err = x; goto bad; } while(0)
766#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
767
b5d9ee5c
MM
768static char *
769sk_setup(sock *s)
770{
771 int fd = s->fd;
353729f5 772 char *err = NULL;
b5d9ee5c
MM
773
774 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
775 ERR("fcntl(O_NONBLOCK)");
b93abffa
MM
776 if (s->type == SK_UNIX)
777 return NULL;
a39b165e 778#ifndef IPV6
b5d9ee5c 779 if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
f782b72c 780 WARN("IP_TOS");
b5d9ee5c 781#endif
789772ed
OZ
782
783#ifdef IPV6
784 int v = 1;
785 if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
786 WARN("IPV6_V6ONLY");
787#endif
788
a39b165e
OZ
789 if (s->ttl >= 0)
790 err = sk_set_ttl_int(s);
a39b165e 791
353729f5 792 sysio_register_cmsgs(s);
b5d9ee5c
MM
793bad:
794 return err;
795}
796
a39b165e
OZ
797/**
798 * sk_set_ttl - set TTL for given socket.
799 * @s: socket
800 * @ttl: TTL value
801 *
802 * Set TTL for already opened connections when TTL was not set before.
803 * Useful for accepted connections when different ones should have
804 * different TTL.
805 *
806 * Result: 0 for success, -1 for an error.
807 */
808
809int
810sk_set_ttl(sock *s, int ttl)
811{
812 char *err;
813
814 s->ttl = ttl;
815 if (err = sk_set_ttl_int(s))
816 log(L_ERR "sk_set_ttl: %s: %m", err);
817
818 return (err ? -1 : 0);
819}
820
d51aa281 821
d51aa281
OZ
822/**
823 * sk_set_md5_auth - add / remove MD5 security association for given socket.
824 * @s: socket
825 * @a: IP address of the other side
826 * @passwd: password used for MD5 authentication
827 *
828 * In TCP MD5 handling code in kernel, there is a set of pairs
829 * (address, password) used to choose password according to
830 * address of the other side. This function is useful for
831 * listening socket, for active sockets it is enough to set
832 * s->password field.
833 *
834 * When called with passwd != NULL, the new pair is added,
835 * When called with passwd == NULL, the existing pair is removed.
836 *
837 * Result: 0 for success, -1 for an error.
838 */
839
840int
841sk_set_md5_auth(sock *s, ip_addr a, char *passwd)
842{
843 sockaddr sa;
844 fill_in_sockaddr(&sa, a, 0);
845 return sk_set_md5_auth_int(s, &sa, passwd);
846}
847
f9c799a0
OZ
848int
849sk_set_broadcast(sock *s, int enable)
850{
851 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)) < 0)
4ac7c834
OZ
852 {
853 log(L_ERR "sk_set_broadcast: SO_BROADCAST: %m");
854 return -1;
855 }
856
857 return 0;
f9c799a0
OZ
858}
859
860
861#ifdef IPV6
862
4ac7c834
OZ
863int
864sk_set_ipv6_checksum(sock *s, int offset)
865{
866 if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
867 {
868 log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
869 return -1;
870 }
871
872 return 0;
873}
874
f9c799a0
OZ
875int
876sk_setup_multicast(sock *s)
877{
878 char *err;
879 int zero = 0;
880 int index;
881
882 ASSERT(s->iface && s->iface->addr);
883
884 index = s->iface->index;
885 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
886 ERR("IPV6_MULTICAST_HOPS");
887 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
888 ERR("IPV6_MULTICAST_LOOP");
889 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
890 ERR("IPV6_MULTICAST_IF");
891
e7b09e4a
OZ
892 if (err = sysio_bind_to_iface(s))
893 goto bad;
894
f9c799a0
OZ
895 return 0;
896
897bad:
898 log(L_ERR "sk_setup_multicast: %s: %m", err);
899 return -1;
900}
901
902int
903sk_join_group(sock *s, ip_addr maddr)
904{
905 struct ipv6_mreq mreq;
906
907 set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
908
909#ifdef CONFIG_IPV6_GLIBC_20
910 mreq.ipv6mr_ifindex = s->iface->index;
911#else
912 mreq.ipv6mr_interface = s->iface->index;
913#endif
914
861f223a 915 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0)
f9c799a0 916 {
861f223a 917 log(L_ERR "sk_join_group: IPV6_JOIN_GROUP: %m");
f9c799a0
OZ
918 return -1;
919 }
920
921 return 0;
922}
923
924int
925sk_leave_group(sock *s, ip_addr maddr)
926{
927 struct ipv6_mreq mreq;
928
929 set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
930
931#ifdef CONFIG_IPV6_GLIBC_20
932 mreq.ipv6mr_ifindex = s->iface->index;
933#else
934 mreq.ipv6mr_interface = s->iface->index;
935#endif
936
861f223a 937 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0)
f9c799a0 938 {
861f223a 939 log(L_ERR "sk_leave_group: IPV6_LEAVE_GROUP: %m");
f9c799a0
OZ
940 return -1;
941 }
942
943 return 0;
944}
945
353729f5 946
f9c799a0
OZ
947#else /* IPV4 */
948
949int
950sk_setup_multicast(sock *s)
951{
952 char *err;
953
954 ASSERT(s->iface && s->iface->addr);
955
956 if (err = sysio_setup_multicast(s))
957 {
958 log(L_ERR "sk_setup_multicast: %s: %m", err);
959 return -1;
960 }
961
962 return 0;
963}
964
965int
966sk_join_group(sock *s, ip_addr maddr)
967{
968 char *err;
969
970 if (err = sysio_join_group(s, maddr))
971 {
972 log(L_ERR "sk_join_group: %s: %m", err);
973 return -1;
974 }
975
976 return 0;
977}
978
979int
980sk_leave_group(sock *s, ip_addr maddr)
981{
982 char *err;
983
984 if (err = sysio_leave_group(s, maddr))
985 {
986 log(L_ERR "sk_leave_group: %s: %m", err);
987 return -1;
988 }
989
990 return 0;
991}
992
993#endif
994
d51aa281 995
b93abffa 996static void
b5d9ee5c
MM
997sk_tcp_connected(sock *s)
998{
b5d9ee5c
MM
999 s->type = SK_TCP;
1000 sk_alloc_bufs(s);
320f4173 1001 s->tx_hook(s);
b5d9ee5c
MM
1002}
1003
b93abffa
MM
1004static int
1005sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
1006{
1007 int fd = accept(s->fd, sa, &al);
1008 if (fd >= 0)
1009 {
1010 sock *t = sk_new(s->pool);
1011 char *err;
1012 t->type = type;
1013 t->fd = fd;
e1ddd993
MM
1014 t->ttl = s->ttl;
1015 t->tos = s->tos;
1016 t->rbsize = s->rbsize;
1017 t->tbsize = s->tbsize;
1018 if (type == SK_TCP)
cf31112f
OZ
1019 {
1020 sockaddr lsa;
1021 int lsa_len = sizeof(lsa);
1022 if (getsockname(fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
1023 get_sockaddr(&lsa, &t->saddr, &t->sport, 1);
1024
1025 get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1);
1026 }
38a608c5 1027 sk_insert(t);
b93abffa
MM
1028 if (err = sk_setup(t))
1029 {
1030 log(L_ERR "Incoming connection: %s: %m", err);
e1ddd993
MM
1031 rfree(t);
1032 return 1;
b93abffa
MM
1033 }
1034 sk_alloc_bufs(t);
e1ddd993 1035 s->rx_hook(t, 0);
b93abffa
MM
1036 return 1;
1037 }
1038 else if (errno != EINTR && errno != EAGAIN)
1039 {
c025b852 1040 s->err_hook(s, errno);
b93abffa
MM
1041 }
1042 return 0;
1043}
1044
525fa2c1
MM
1045/**
1046 * sk_open - open a socket
1047 * @s: socket
1048 *
1049 * This function takes a socket resource created by sk_new() and
1050 * initialized by the user and binds a corresponding network connection
1051 * to it.
1052 *
1053 * Result: 0 for success, -1 for an error.
1054 */
b5d9ee5c
MM
1055int
1056sk_open(sock *s)
1057{
93a786cb 1058 int fd;
4f22c981 1059 sockaddr sa;
b5d9ee5c
MM
1060 int one = 1;
1061 int type = s->type;
1062 int has_src = ipa_nonzero(s->saddr) || s->sport;
b5d9ee5c
MM
1063 char *err;
1064
1065 switch (type)
1066 {
1067 case SK_TCP_ACTIVE:
320f4173
MM
1068 s->ttx = ""; /* Force s->ttx != s->tpos */
1069 /* Fall thru */
b5d9ee5c 1070 case SK_TCP_PASSIVE:
4f22c981 1071 fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
b5d9ee5c
MM
1072 break;
1073 case SK_UDP:
4f22c981 1074 fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
b5d9ee5c
MM
1075 break;
1076 case SK_IP:
4f22c981 1077 fd = socket(BIRD_PF, SOCK_RAW, s->dport);
b5d9ee5c 1078 break;
b4b3b39e
MM
1079 case SK_MAGIC:
1080 fd = s->fd;
1081 break;
b5d9ee5c 1082 default:
b4b3b39e 1083 bug("sk_open() called for invalid sock type %d", type);
b5d9ee5c
MM
1084 }
1085 if (fd < 0)
1086 die("sk_open: socket: %m");
1087 s->fd = fd;
1088
1089 if (err = sk_setup(s))
1090 goto bad;
38a608c5 1091
b5d9ee5c
MM
1092 if (has_src)
1093 {
1094 int port;
1095
f9c799a0 1096 if (type == SK_IP)
b5d9ee5c
MM
1097 port = 0;
1098 else
1099 {
1100 port = s->sport;
1101 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
1102 ERR("SO_REUSEADDR");
1103 }
1104 fill_in_sockaddr(&sa, s->saddr, port);
061ab802 1105 fill_in_sockifa(&sa, s->iface);
b5d9ee5c
MM
1106 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1107 ERR("bind");
1108 }
1109 fill_in_sockaddr(&sa, s->daddr, s->dport);
d51aa281
OZ
1110
1111 if (s->password)
1112 {
1113 int rv = sk_set_md5_auth_int(s, &sa, s->password);
1114 if (rv < 0)
1115 goto bad_no_log;
1116 }
1117
b5d9ee5c
MM
1118 switch (type)
1119 {
1120 case SK_TCP_ACTIVE:
1121 if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
1122 sk_tcp_connected(s);
9cbf43eb
MM
1123 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1124 errno != ECONNREFUSED && errno != EHOSTUNREACH)
b5d9ee5c
MM
1125 ERR("connect");
1126 break;
1127 case SK_TCP_PASSIVE:
1128 if (listen(fd, 8))
1129 ERR("listen");
1130 break;
4f22c981
MM
1131 case SK_MAGIC:
1132 break;
1133 default:
320f4173 1134 sk_alloc_bufs(s);
4f22c981
MM
1135#ifdef IPV6
1136#ifdef IPV6_MTU_DISCOVER
1137 {
1138 int dont = IPV6_PMTUDISC_DONT;
1139 if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
1140 ERR("IPV6_MTU_DISCOVER");
1141 }
1142#endif
1143#else
1144#ifdef IP_PMTUDISC
1145 {
1146 int dont = IP_PMTUDISC_DONT;
1147 if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
1148 ERR("IP_PMTUDISC");
1149 }
1150#endif
1151#endif
b5d9ee5c
MM
1152 }
1153
38a608c5 1154 sk_insert(s);
b5d9ee5c
MM
1155 return 0;
1156
1157bad:
1158 log(L_ERR "sk_open: %s: %m", err);
d51aa281 1159bad_no_log:
b5d9ee5c
MM
1160 close(fd);
1161 s->fd = -1;
1162 return -1;
1163}
1164
97e46d28 1165void
b93abffa
MM
1166sk_open_unix(sock *s, char *name)
1167{
1168 int fd;
1169 struct sockaddr_un sa;
1170 char *err;
1171
1172 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1173 if (fd < 0)
97e46d28 1174 ERR("socket");
b93abffa
MM
1175 s->fd = fd;
1176 if (err = sk_setup(s))
1177 goto bad;
1178 unlink(name);
68fa95cf 1179
97e46d28 1180 /* Path length checked in test_old_bird() */
b93abffa 1181 sa.sun_family = AF_UNIX;
97c6fa02 1182 strcpy(sa.sun_path, name);
0b3bf4b1 1183 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
b93abffa
MM
1184 ERR("bind");
1185 if (listen(fd, 8))
1186 ERR("listen");
38a608c5 1187 sk_insert(s);
97e46d28 1188 return;
b93abffa 1189
97e46d28 1190 bad:
b93abffa 1191 log(L_ERR "sk_open_unix: %s: %m", err);
97e46d28 1192 die("Unable to create control socket %s", name);
b93abffa
MM
1193}
1194
353729f5
OZ
1195static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1196
b5d9ee5c
MM
1197static int
1198sk_maybe_write(sock *s)
1199{
1200 int e;
1201
1202 switch (s->type)
1203 {
1204 case SK_TCP:
b4b3b39e 1205 case SK_MAGIC:
b93abffa 1206 case SK_UNIX:
b5d9ee5c
MM
1207 while (s->ttx != s->tpos)
1208 {
1209 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1210 if (e < 0)
1211 {
1212 if (errno != EINTR && errno != EAGAIN)
1213 {
353729f5 1214 reset_tx_buffer(s);
c025b852 1215 s->err_hook(s, errno);
b5d9ee5c
MM
1216 return -1;
1217 }
1218 return 0;
1219 }
1220 s->ttx += e;
1221 }
353729f5 1222 reset_tx_buffer(s);
b5d9ee5c
MM
1223 return 1;
1224 case SK_UDP:
b5d9ee5c 1225 case SK_IP:
b5d9ee5c 1226 {
b5d9ee5c
MM
1227 if (s->tbuf == s->tpos)
1228 return 1;
b1a1faba 1229
353729f5
OZ
1230 sockaddr sa;
1231 fill_in_sockaddr(&sa, s->daddr, s->dport);
061ab802 1232 fill_in_sockifa(&sa, s->iface);
353729f5
OZ
1233
1234 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
646b24d9 1235 // byte cmsg_buf[CMSG_TX_SPACE];
353729f5
OZ
1236
1237 struct msghdr msg = {
1238 .msg_name = &sa,
1239 .msg_namelen = sizeof(sa),
1240 .msg_iov = &iov,
bed41728 1241 .msg_iovlen = 1};
353729f5 1242
646b24d9 1243 // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
353729f5
OZ
1244 e = sendmsg(s->fd, &msg, 0);
1245
b5d9ee5c
MM
1246 if (e < 0)
1247 {
1248 if (errno != EINTR && errno != EAGAIN)
1249 {
353729f5 1250 reset_tx_buffer(s);
c025b852 1251 s->err_hook(s, errno);
b5d9ee5c
MM
1252 return -1;
1253 }
1254 return 0;
1255 }
353729f5 1256 reset_tx_buffer(s);
b5d9ee5c
MM
1257 return 1;
1258 }
1259 default:
08c69a77 1260 bug("sk_maybe_write: unknown socket type %d", s->type);
b5d9ee5c
MM
1261 }
1262}
1263
ea89da38
OZ
1264int
1265sk_rx_ready(sock *s)
1266{
1267 fd_set rd, wr;
1268 struct timeval timo;
1269 int rv;
1270
1271 FD_ZERO(&rd);
1272 FD_ZERO(&wr);
1273 FD_SET(s->fd, &rd);
1274
1275 timo.tv_sec = 0;
1276 timo.tv_usec = 0;
1277
1278 redo:
1279 rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1280
1281 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1282 goto redo;
1283
1284 return rv;
1285}
1286
525fa2c1
MM
1287/**
1288 * sk_send - send data to a socket
1289 * @s: socket
1290 * @len: number of bytes to send
1291 *
1292 * This function sends @len bytes of data prepared in the
1293 * transmit buffer of the socket @s to the network connection.
1294 * If the packet can be sent immediately, it does so and returns
1295 * 1, else it queues the packet for later processing, returns 0
1296 * and calls the @tx_hook of the socket when the tranmission
1297 * takes place.
1298 */
b5d9ee5c
MM
1299int
1300sk_send(sock *s, unsigned len)
1301{
b5d9ee5c
MM
1302 s->ttx = s->tbuf;
1303 s->tpos = s->tbuf + len;
1304 return sk_maybe_write(s);
1305}
1306
525fa2c1
MM
1307/**
1308 * sk_send_to - send data to a specific destination
1309 * @s: socket
1310 * @len: number of bytes to send
1311 * @addr: IP address to send the packet to
1312 * @port: port to send the packet to
1313 *
2e9b2421 1314 * This is a sk_send() replacement for connection-less packet sockets
525fa2c1
MM
1315 * which allows destination of the packet to be chosen dynamically.
1316 */
b5d9ee5c
MM
1317int
1318sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1319{
353729f5
OZ
1320 s->daddr = addr;
1321 s->dport = port;
b5d9ee5c
MM
1322 s->ttx = s->tbuf;
1323 s->tpos = s->tbuf + len;
1324 return sk_maybe_write(s);
1325}
1326
353729f5
OZ
1327/*
1328int
1329sk_send_full(sock *s, unsigned len, struct iface *ifa,
1330 ip_addr saddr, ip_addr daddr, unsigned dport)
1331{
1332 s->iface = ifa;
1333 s->saddr = saddr;
1334 s->daddr = daddr;
1335 s->dport = dport;
1336 s->ttx = s->tbuf;
1337 s->tpos = s->tbuf + len;
1338 return sk_maybe_write(s);
1339}
1340*/
1341
b5d9ee5c
MM
1342static int
1343sk_read(sock *s)
1344{
1345 switch (s->type)
1346 {
b5d9ee5c
MM
1347 case SK_TCP_PASSIVE:
1348 {
4f22c981 1349 sockaddr sa;
b93abffa
MM
1350 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1351 }
1352 case SK_UNIX_PASSIVE:
1353 {
1354 struct sockaddr_un sa;
1355 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
b5d9ee5c
MM
1356 }
1357 case SK_TCP:
b93abffa 1358 case SK_UNIX:
b5d9ee5c
MM
1359 {
1360 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1361
1362 if (c < 0)
1363 {
1364 if (errno != EINTR && errno != EAGAIN)
c025b852 1365 s->err_hook(s, errno);
b5d9ee5c
MM
1366 }
1367 else if (!c)
c025b852 1368 s->err_hook(s, 0);
b5d9ee5c
MM
1369 else
1370 {
1371 s->rpos += c;
1372 if (s->rx_hook(s, s->rpos - s->rbuf))
38a608c5
MM
1373 {
1374 /* We need to be careful since the socket could have been deleted by the hook */
1375 if (current_sock == s)
1376 s->rpos = s->rbuf;
1377 }
b5d9ee5c
MM
1378 return 1;
1379 }
1380 return 0;
1381 }
b4b3b39e
MM
1382 case SK_MAGIC:
1383 return s->rx_hook(s, 0);
b5d9ee5c
MM
1384 default:
1385 {
4f22c981 1386 sockaddr sa;
353729f5
OZ
1387 int e;
1388
1389 struct iovec iov = {s->rbuf, s->rbsize};
1390 byte cmsg_buf[CMSG_RX_SPACE];
1391
1392 struct msghdr msg = {
1393 .msg_name = &sa,
1394 .msg_namelen = sizeof(sa),
1395 .msg_iov = &iov,
1396 .msg_iovlen = 1,
1397 .msg_control = cmsg_buf,
1398 .msg_controllen = sizeof(cmsg_buf),
1399 .msg_flags = 0};
1400
1401 e = recvmsg(s->fd, &msg, 0);
b5d9ee5c
MM
1402
1403 if (e < 0)
1404 {
1405 if (errno != EINTR && errno != EAGAIN)
c025b852 1406 s->err_hook(s, errno);
b5d9ee5c
MM
1407 return 0;
1408 }
1409 s->rpos = s->rbuf + e;
b1a1faba 1410 get_sockaddr(&sa, &s->faddr, &s->fport, 1);
353729f5
OZ
1411 sysio_process_rx_cmsgs(s, &msg);
1412
b5d9ee5c
MM
1413 s->rx_hook(s, e);
1414 return 1;
1415 }
1416 }
1417}
1418
38a608c5 1419static int
b5d9ee5c
MM
1420sk_write(sock *s)
1421{
320f4173
MM
1422 switch (s->type)
1423 {
1424 case SK_TCP_ACTIVE:
1425 {
1426 sockaddr sa;
1427 fill_in_sockaddr(&sa, s->daddr, s->dport);
09e4117c 1428 if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
320f4173
MM
1429 sk_tcp_connected(s);
1430 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
c025b852 1431 s->err_hook(s, errno);
38a608c5 1432 return 0;
320f4173 1433 }
320f4173 1434 default:
38a608c5
MM
1435 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1436 {
1437 s->tx_hook(s);
1438 return 1;
1439 }
1440 return 0;
320f4173 1441 }
b5d9ee5c
MM
1442}
1443
1444void
1445sk_dump_all(void)
1446{
1447 node *n;
1448 sock *s;
1449
1450 debug("Open sockets:\n");
1451 WALK_LIST(n, sock_list)
1452 {
1453 s = SKIP_BACK(sock, n, n);
1454 debug("%p ", s);
1455 sk_dump(&s->r);
1456 }
1457 debug("\n");
1458}
1459
1460#undef ERR
f782b72c 1461#undef WARN
b5d9ee5c
MM
1462
1463/*
1464 * Main I/O Loop
1465 */
1466
4c9dd1e4
MM
1467volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
1468volatile int async_dump_flag;
1469
b5d9ee5c
MM
1470void
1471io_init(void)
1472{
1473 init_list(&near_timers);
1474 init_list(&far_timers);
1475 init_list(&sock_list);
e8f73195 1476 init_list(&global_event_list);
7e5f5ffd 1477 krt_io_init();
fd91ae33
OZ
1478 init_times();
1479 update_times();
1480 srandom((int) now_real);
b5d9ee5c
MM
1481}
1482
ea89da38
OZ
1483static int short_loops = 0;
1484#define SHORT_LOOP_MAX 10
1485
b5d9ee5c
MM
1486void
1487io_loop(void)
1488{
1489 fd_set rd, wr;
1490 struct timeval timo;
1491 time_t tout;
30770df2 1492 int hi, events;
b5d9ee5c 1493 sock *s;
38a608c5 1494 node *n;
b5d9ee5c 1495
38a608c5 1496 sock_recalc_fdsets_p = 1;
b5d9ee5c
MM
1497 for(;;)
1498 {
30770df2 1499 events = ev_run_list(&global_event_list);
fd91ae33 1500 update_times();
b5d9ee5c
MM
1501 tout = tm_first_shot();
1502 if (tout <= now)
1503 {
1504 tm_shot();
1505 continue;
1506 }
30770df2
MM
1507 timo.tv_sec = events ? 0 : tout - now;
1508 timo.tv_usec = 0;
b5d9ee5c 1509
38a608c5
MM
1510 if (sock_recalc_fdsets_p)
1511 {
1512 sock_recalc_fdsets_p = 0;
1513 FD_ZERO(&rd);
1514 FD_ZERO(&wr);
1515 }
1516
b5d9ee5c
MM
1517 hi = 0;
1518 WALK_LIST(n, sock_list)
1519 {
1520 s = SKIP_BACK(sock, n, n);
1521 if (s->rx_hook)
1522 {
1523 FD_SET(s->fd, &rd);
1524 if (s->fd > hi)
1525 hi = s->fd;
1526 }
38a608c5
MM
1527 else
1528 FD_CLR(s->fd, &rd);
b5d9ee5c
MM
1529 if (s->tx_hook && s->ttx != s->tpos)
1530 {
1531 FD_SET(s->fd, &wr);
1532 if (s->fd > hi)
1533 hi = s->fd;
1534 }
38a608c5
MM
1535 else
1536 FD_CLR(s->fd, &wr);
b5d9ee5c
MM
1537 }
1538
4c9dd1e4
MM
1539 /*
1540 * Yes, this is racy. But even if the signal comes before this test
1541 * and entering select(), it gets caught on the next timer tick.
1542 */
1543
1544 if (async_config_flag)
1545 {
1546 async_config();
1547 async_config_flag = 0;
f4aabcee 1548 continue;
4c9dd1e4
MM
1549 }
1550 if (async_dump_flag)
1551 {
1552 async_dump();
1553 async_dump_flag = 0;
f4aabcee
MM
1554 continue;
1555 }
1556 if (async_shutdown_flag)
1557 {
1558 async_shutdown();
1559 async_shutdown_flag = 0;
1560 continue;
4c9dd1e4
MM
1561 }
1562
1563 /* And finally enter select() to find active sockets */
b5d9ee5c 1564 hi = select(hi+1, &rd, &wr, NULL, &timo);
ea89da38 1565
b5d9ee5c
MM
1566 if (hi < 0)
1567 {
1568 if (errno == EINTR || errno == EAGAIN)
1569 continue;
1570 die("select: %m");
1571 }
1572 if (hi)
1573 {
ea89da38
OZ
1574 /* guaranteed to be non-empty */
1575 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1576
38a608c5 1577 while (current_sock)
b5d9ee5c 1578 {
38a608c5
MM
1579 sock *s = current_sock;
1580 int e;
ea89da38
OZ
1581 int steps;
1582
1583 steps = MAX_STEPS;
1584 if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
38a608c5
MM
1585 do
1586 {
4323099d 1587 steps--;
38a608c5
MM
1588 e = sk_read(s);
1589 if (s != current_sock)
1590 goto next;
1591 }
4323099d
OZ
1592 while (e && s->rx_hook && steps);
1593
1594 steps = MAX_STEPS;
38a608c5
MM
1595 if (FD_ISSET(s->fd, &wr))
1596 do
1597 {
4323099d 1598 steps--;
38a608c5
MM
1599 e = sk_write(s);
1600 if (s != current_sock)
1601 goto next;
1602 }
4323099d 1603 while (e && steps);
38a608c5
MM
1604 current_sock = sk_next(s);
1605 next: ;
b5d9ee5c 1606 }
ea89da38
OZ
1607
1608 short_loops++;
1609 if (events && (short_loops < SHORT_LOOP_MAX))
1610 continue;
1611 short_loops = 0;
1612
1613 int count = 0;
1614 current_sock = stored_sock;
1615 if (current_sock == NULL)
1616 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1617
1618 while (current_sock && count < MAX_RX_STEPS)
1619 {
1620 sock *s = current_sock;
1621 int e;
ea89da38
OZ
1622
1623 if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1624 {
1625 count++;
1626 e = sk_read(s);
1627 if (s != current_sock)
1628 goto next2;
1629 }
1630 current_sock = sk_next(s);
1631 next2: ;
1632 }
1633
1634 stored_sock = current_sock;
b5d9ee5c
MM
1635 }
1636 }
1637}
41c8976e
OF
1638
1639void
1640test_old_bird(char *path)
1641{
1642 int fd;
1643 struct sockaddr_un sa;
1644
1645 fd = socket(AF_UNIX, SOCK_STREAM, 0);
41c8976e
OF
1646 if (fd < 0)
1647 die("Cannot create socket: %m");
97e46d28
OZ
1648 if (strlen(path) >= sizeof(sa.sun_path))
1649 die("Socket path too long");
41c8976e
OF
1650 bzero(&sa, sizeof(sa));
1651 sa.sun_family = AF_UNIX;
1652 strcpy(sa.sun_path, path);
1653 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1654 die("I found another BIRD running.");
1655 close(fd);
1656}
1657
1658