]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/unix/io.c
Changes static route targets drop/reject to blackhole/unreachable.
[thirdparty/bird.git] / sysdep / unix / io.c
CommitLineData
b5d9ee5c
MM
1/*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
38a608c5 4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
b1a1faba 5 * (c) 2004 Ondrej Filip <feela@network.cz>
b5d9ee5c
MM
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
607d9914
OZ
10/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
12#define _GNU_SOURCE 1
13
b5d9ee5c
MM
14#include <stdio.h>
15#include <stdlib.h>
01b776e1 16#include <time.h>
b5d9ee5c
MM
17#include <sys/time.h>
18#include <sys/types.h>
19#include <sys/socket.h>
20#include <sys/fcntl.h>
46a82e9c 21#include <sys/uio.h>
b93abffa 22#include <sys/un.h>
b5d9ee5c
MM
23#include <unistd.h>
24#include <errno.h>
d0e9b36d 25#include <netinet/in.h>
93e868c7 26#include <netinet/icmp6.h>
b5d9ee5c
MM
27
28#include "nest/bird.h"
29#include "lib/lists.h"
30#include "lib/resource.h"
31#include "lib/timer.h"
32#include "lib/socket.h"
e8f73195 33#include "lib/event.h"
afa8937a 34#include "lib/string.h"
b5d9ee5c
MM
35#include "nest/iface.h"
36
37#include "lib/unix.h"
a2867cd9 38#include "lib/sysio.h"
b5d9ee5c 39
ea89da38 40/* Maximum number of calls of tx handler for one socket in one
4323099d
OZ
41 * select iteration. Should be small enough to not monopolize CPU by
42 * one protocol instance.
43 */
44#define MAX_STEPS 4
45
ea89da38
OZ
46/* Maximum number of calls of rx handler for all sockets in one select
47 iteration. RX callbacks are often much more costly so we limit
48 this to gen small latencies */
49#define MAX_RX_STEPS 4
50
a9c986f9
MM
51/*
52 * Tracked Files
53 */
54
55struct rfile {
56 resource r;
57 FILE *f;
58};
59
60static void
61rf_free(resource *r)
62{
63 struct rfile *a = (struct rfile *) r;
64
65 fclose(a->f);
66}
67
68static void
69rf_dump(resource *r)
70{
71 struct rfile *a = (struct rfile *) r;
72
73 debug("(FILE *%p)\n", a->f);
74}
75
76static struct resclass rf_class = {
77 "FILE",
78 sizeof(struct rfile),
79 rf_free,
e81b440f 80 rf_dump,
acb60628 81 NULL,
e81b440f 82 NULL
a9c986f9
MM
83};
84
85void *
f78056fb 86tracked_fopen(pool *p, char *name, char *mode)
a9c986f9
MM
87{
88 FILE *f = fopen(name, mode);
89
90 if (f)
91 {
92 struct rfile *r = ralloc(p, &rf_class);
93 r->f = f;
94 }
95 return f;
96}
97
525fa2c1
MM
98/**
99 * DOC: Timers
100 *
101 * Timers are resources which represent a wish of a module to call
102 * a function at the specified time. The platform dependent code
58f7d004 103 * doesn't guarantee exact timing, only that a timer function
525fa2c1
MM
104 * won't be called before the requested time.
105 *
fd91ae33
OZ
106 * In BIRD, time is represented by values of the &bird_clock_t type
107 * which are integral numbers interpreted as a relative number of seconds since
108 * some fixed time point in past. The current time can be read
109 * from variable @now with reasonable accuracy and is monotonic. There is also
110 * a current 'absolute' time in variable @now_real reported by OS.
525fa2c1
MM
111 *
112 * Each timer is described by a &timer structure containing a pointer
113 * to the handler function (@hook), data private to this function (@data),
114 * time the function should be called at (@expires, 0 for inactive timers),
115 * for the other fields see |timer.h|.
b5d9ee5c
MM
116 */
117
118#define NEAR_TIMER_LIMIT 4
119
b5d9ee5c
MM
120static list near_timers, far_timers;
121static bird_clock_t first_far_timer = TIME_INFINITY;
122
002b6423
OZ
123/* now must be different from 0, because 0 is a special value in timer->expires */
124bird_clock_t now = 1, now_real;
fd91ae33
OZ
125
126static void
127update_times_plain(void)
128{
129 bird_clock_t new_time = time(NULL);
130 int delta = new_time - now_real;
131
132 if ((delta >= 0) && (delta < 60))
133 now += delta;
134 else if (now_real != 0)
135 log(L_WARN "Time jump, delta %d s", delta);
136
137 now_real = new_time;
138}
139
140static void
141update_times_gettime(void)
142{
143 struct timespec ts;
144 int rv;
145
146 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
147 if (rv != 0)
148 die("clock_gettime: %m");
149
150 if (ts.tv_sec != now) {
151 if (ts.tv_sec < now)
152 log(L_ERR "Monotonic timer is broken");
153
154 now = ts.tv_sec;
155 now_real = time(NULL);
156 }
157}
158
159static int clock_monotonic_available;
160
161static inline void
162update_times(void)
163{
164 if (clock_monotonic_available)
165 update_times_gettime();
166 else
167 update_times_plain();
168}
169
170static inline void
171init_times(void)
172{
173 struct timespec ts;
174 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
175 if (!clock_monotonic_available)
176 log(L_WARN "Monotonic timer is missing");
177}
178
b5d9ee5c
MM
179
180static void
181tm_free(resource *r)
182{
183 timer *t = (timer *) r;
184
185 tm_stop(t);
186}
187
188static void
189tm_dump(resource *r)
190{
191 timer *t = (timer *) r;
192
e8f73195 193 debug("(code %p, data %p, ", t->hook, t->data);
af847acc
MM
194 if (t->randomize)
195 debug("rand %d, ", t->randomize);
196 if (t->recurrent)
197 debug("recur %d, ", t->recurrent);
b5d9ee5c
MM
198 if (t->expires)
199 debug("expires in %d sec)\n", t->expires - now);
200 else
201 debug("inactive)\n");
202}
203
204static struct resclass tm_class = {
205 "Timer",
206 sizeof(timer),
207 tm_free,
e81b440f 208 tm_dump,
acb60628 209 NULL,
e81b440f 210 NULL
b5d9ee5c
MM
211};
212
525fa2c1
MM
213/**
214 * tm_new - create a timer
215 * @p: pool
216 *
217 * This function creates a new timer resource and returns
218 * a pointer to it. To use the timer, you need to fill in
219 * the structure fields and call tm_start() to start timing.
220 */
b5d9ee5c
MM
221timer *
222tm_new(pool *p)
223{
224 timer *t = ralloc(p, &tm_class);
b5d9ee5c
MM
225 return t;
226}
227
228static inline void
229tm_insert_near(timer *t)
230{
231 node *n = HEAD(near_timers);
232
233 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
234 n = n->next;
235 insert_node(&t->n, n->prev);
236}
237
525fa2c1
MM
238/**
239 * tm_start - start a timer
240 * @t: timer
241 * @after: number of seconds the timer should be run after
242 *
243 * This function schedules the hook function of the timer to
244 * be called after @after seconds. If the timer has been already
245 * started, it's @expire time is replaced by the new value.
246 *
247 * You can have set the @randomize field of @t, the timeout
248 * will be increased by a random number of seconds chosen
249 * uniformly from range 0 .. @randomize.
250 *
251 * You can call tm_start() from the handler function of the timer
252 * to request another run of the timer. Also, you can set the @recurrent
253 * field to have the timer re-added automatically with the same timeout.
254 */
b5d9ee5c
MM
255void
256tm_start(timer *t, unsigned after)
257{
258 bird_clock_t when;
259
260 if (t->randomize)
af847acc 261 after += random() % (t->randomize + 1);
b5d9ee5c
MM
262 when = now + after;
263 if (t->expires == when)
264 return;
265 if (t->expires)
266 rem_node(&t->n);
267 t->expires = when;
268 if (after <= NEAR_TIMER_LIMIT)
269 tm_insert_near(t);
270 else
271 {
272 if (!first_far_timer || first_far_timer > when)
273 first_far_timer = when;
274 add_tail(&far_timers, &t->n);
275 }
276}
277
525fa2c1
MM
278/**
279 * tm_stop - stop a timer
280 * @t: timer
281 *
282 * This function stops a timer. If the timer is already stopped,
283 * nothing happens.
284 */
b5d9ee5c
MM
285void
286tm_stop(timer *t)
287{
288 if (t->expires)
289 {
290 rem_node(&t->n);
291 t->expires = 0;
292 }
293}
294
295static void
296tm_dump_them(char *name, list *l)
297{
298 node *n;
299 timer *t;
300
301 debug("%s timers:\n", name);
302 WALK_LIST(n, *l)
303 {
304 t = SKIP_BACK(timer, n, n);
305 debug("%p ", t);
306 tm_dump(&t->r);
307 }
308 debug("\n");
309}
310
311void
312tm_dump_all(void)
313{
314 tm_dump_them("Near", &near_timers);
315 tm_dump_them("Far", &far_timers);
316}
317
318static inline time_t
319tm_first_shot(void)
320{
321 time_t x = first_far_timer;
322
323 if (!EMPTY_LIST(near_timers))
324 {
325 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
326 if (t->expires < x)
327 x = t->expires;
328 }
329 return x;
330}
331
332static void
333tm_shot(void)
334{
335 timer *t;
336 node *n, *m;
337
338 if (first_far_timer <= now)
339 {
28a9a189 340 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
b5d9ee5c
MM
341 first_far_timer = TIME_INFINITY;
342 n = HEAD(far_timers);
343 while (m = n->next)
344 {
345 t = SKIP_BACK(timer, n, n);
346 if (t->expires <= limit)
347 {
348 rem_node(n);
349 tm_insert_near(t);
350 }
351 else if (t->expires < first_far_timer)
352 first_far_timer = t->expires;
353 n = m;
354 }
355 }
356 while ((n = HEAD(near_timers)) -> next)
357 {
af847acc 358 int delay;
b5d9ee5c
MM
359 t = SKIP_BACK(timer, n, n);
360 if (t->expires > now)
361 break;
362 rem_node(n);
af847acc 363 delay = t->expires - now;
b5d9ee5c 364 t->expires = 0;
af847acc
MM
365 if (t->recurrent)
366 {
367 int i = t->recurrent - delay;
368 if (i < 0)
369 i = 0;
370 tm_start(t, i);
371 }
b5d9ee5c
MM
372 t->hook(t);
373 }
374}
375
0d3effcf
OF
376/**
377 * tm_parse_datetime - parse a date and time
378 * @x: datetime string
379 *
380 * tm_parse_datetime() takes a textual representation of
381 * a date and time (dd-mm-yyyy hh:mm:ss)
382 * and converts it to the corresponding value of type &bird_clock_t.
383 */
384bird_clock_t
385tm_parse_datetime(char *x)
386{
387 struct tm tm;
388 int n;
389 time_t t;
390
391 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
392 return tm_parse_date(x);
393 tm.tm_mon--;
394 tm.tm_year -= 1900;
395 t = mktime(&tm);
396 if (t == (time_t) -1)
397 return 0;
398 return t;
399}
525fa2c1
MM
400/**
401 * tm_parse_date - parse a date
402 * @x: date string
403 *
404 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
405 * and converts it to the corresponding value of type &bird_clock_t.
406 */
913f7dc9
MM
407bird_clock_t
408tm_parse_date(char *x)
409{
410 struct tm tm;
411 int n;
412 time_t t;
413
414 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
415 return 0;
416 tm.tm_mon--;
417 tm.tm_year -= 1900;
418 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
419 t = mktime(&tm);
420 if (t == (time_t) -1)
421 return 0;
422 return t;
423}
424
c37e7851
OZ
425static void
426tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
913f7dc9 427{
c37e7851
OZ
428 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
429 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
913f7dc9 430
c37e7851
OZ
431 if (delta < 20*3600)
432 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
433 else if (delta < 360*86400)
434 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
435 else
436 bsprintf(x, "%d", tm->tm_year+1900);
913f7dc9
MM
437}
438
c37e7851
OZ
439#include "conf/conf.h"
440
525fa2c1
MM
441/**
442 * tm_format_datetime - convert date and time to textual representation
443 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
444 * @t: time
445 *
fd91ae33
OZ
446 * This function formats the given relative time value @t to a textual
447 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
525fa2c1 448 */
7a88832e 449void
c37e7851 450tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
7a88832e 451{
c37e7851 452 const char *fmt_used;
7a88832e 453 struct tm *tm;
fd91ae33
OZ
454 bird_clock_t delta = now - t;
455 t = now_real - delta;
7a88832e 456 tm = localtime(&t);
7a88832e 457
c37e7851
OZ
458 if (fmt_spec->fmt1 == NULL)
459 return tm_format_reltime(x, tm, delta);
afa8937a 460
c37e7851
OZ
461 if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
462 fmt_used = fmt_spec->fmt1;
afa8937a 463 else
c37e7851
OZ
464 fmt_used = fmt_spec->fmt2;
465
466 int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
467 if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
468 strcpy(x, "<too-long>");
afa8937a
MM
469}
470
525fa2c1
MM
471/**
472 * DOC: Sockets
473 *
474 * Socket resources represent network connections. Their data structure (&socket)
475 * contains a lot of fields defining the exact type of the socket, the local and
476 * remote addresses and ports, pointers to socket buffers and finally pointers to
477 * hook functions to be called when new data have arrived to the receive buffer
478 * (@rx_hook), when the contents of the transmit buffer have been transmitted
479 * (@tx_hook) and when an error or connection close occurs (@err_hook).
480 *
38a608c5 481 * Freeing of sockets from inside socket hooks is perfectly safe.
b5d9ee5c
MM
482 */
483
abae6e9c
MM
484#ifndef SOL_IP
485#define SOL_IP IPPROTO_IP
486#endif
487
b1a1faba
OF
488#ifndef SOL_IPV6
489#define SOL_IPV6 IPPROTO_IPV6
490#endif
491
b5d9ee5c 492static list sock_list;
38a608c5 493static struct birdsock *current_sock;
ea89da38 494static struct birdsock *stored_sock;
38a608c5
MM
495static int sock_recalc_fdsets_p;
496
497static inline sock *
498sk_next(sock *s)
499{
500 if (!s->n.next->next)
501 return NULL;
502 else
503 return SKIP_BACK(sock, n, s->n.next);
504}
b5d9ee5c
MM
505
506static void
4da25acb 507sk_alloc_bufs(sock *s)
b5d9ee5c 508{
4da25acb
MM
509 if (!s->rbuf && s->rbsize)
510 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
511 s->rpos = s->rbuf;
512 if (!s->tbuf && s->tbsize)
513 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
514 s->tpos = s->ttx = s->tbuf;
515}
b5d9ee5c 516
4da25acb
MM
517static void
518sk_free_bufs(sock *s)
519{
38a608c5 520 if (s->rbuf_alloc)
4da25acb
MM
521 {
522 xfree(s->rbuf_alloc);
523 s->rbuf = s->rbuf_alloc = NULL;
524 }
38a608c5 525 if (s->tbuf_alloc)
4da25acb
MM
526 {
527 xfree(s->tbuf_alloc);
528 s->tbuf = s->tbuf_alloc = NULL;
529 }
530}
531
532static void
533sk_free(resource *r)
534{
535 sock *s = (sock *) r;
536
537 sk_free_bufs(s);
b5d9ee5c 538 if (s->fd >= 0)
320f4173
MM
539 {
540 close(s->fd);
38a608c5
MM
541 if (s == current_sock)
542 current_sock = sk_next(s);
ea89da38
OZ
543 if (s == stored_sock)
544 stored_sock = sk_next(s);
320f4173 545 rem_node(&s->n);
38a608c5 546 sock_recalc_fdsets_p = 1;
320f4173 547 }
b5d9ee5c
MM
548}
549
4da25acb
MM
550void
551sk_reallocate(sock *s)
552{
553 sk_free_bufs(s);
554 sk_alloc_bufs(s);
555}
556
b5d9ee5c
MM
557static void
558sk_dump(resource *r)
559{
560 sock *s = (sock *) r;
b93abffa 561 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
b5d9ee5c
MM
562
563 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
564 sk_type_names[s->type],
565 s->data,
566 s->saddr,
567 s->sport,
568 s->daddr,
569 s->dport,
570 s->tos,
571 s->ttl,
572 s->iface ? s->iface->name : "none");
573}
574
575static struct resclass sk_class = {
576 "Socket",
577 sizeof(sock),
578 sk_free,
e81b440f 579 sk_dump,
acb60628 580 NULL,
e81b440f 581 NULL
b5d9ee5c
MM
582};
583
525fa2c1
MM
584/**
585 * sk_new - create a socket
586 * @p: pool
587 *
588 * This function creates a new socket resource. If you want to use it,
589 * you need to fill in all the required fields of the structure and
590 * call sk_open() to do the actual opening of the socket.
c4b76d7b
OZ
591 *
592 * The real function name is sock_new(), sk_new() is a macro wrapper
593 * to avoid collision with OpenSSL.
525fa2c1 594 */
b5d9ee5c 595sock *
c4b76d7b 596sock_new(pool *p)
b5d9ee5c
MM
597{
598 sock *s = ralloc(p, &sk_class);
599 s->pool = p;
daeeb8e9 600 // s->saddr = s->daddr = IPA_NONE;
b5d9ee5c 601 s->tos = s->ttl = -1;
b5d9ee5c
MM
602 s->fd = -1;
603 return s;
604}
605
38a608c5
MM
606static void
607sk_insert(sock *s)
608{
609 add_tail(&sock_list, &s->n);
610 sock_recalc_fdsets_p = 1;
611}
b5d9ee5c 612
4f22c981
MM
613#ifdef IPV6
614
d7f469c1
OZ
615void
616fill_in_sockaddr(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, unsigned port)
4f22c981 617{
eb1451a3 618 memset(sa, 0, sizeof (struct sockaddr_in6));
4f22c981
MM
619 sa->sin6_family = AF_INET6;
620 sa->sin6_port = htons(port);
621 sa->sin6_flowinfo = 0;
b1a1faba
OF
622#ifdef HAVE_SIN_LEN
623 sa->sin6_len = sizeof(struct sockaddr_in6);
624#endif
4f22c981 625 set_inaddr(&sa->sin6_addr, a);
4f22c981 626
eb1451a3
OZ
627 if (ifa && ipa_has_link_scope(a))
628 sa->sin6_scope_id = ifa->index;
061ab802
OZ
629}
630
d7f469c1 631void
eb1451a3 632get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, unsigned *port, int check)
4f22c981 633{
b1a1faba
OF
634 if (check && sa->sin6_family != AF_INET6)
635 bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
4f22c981
MM
636 if (port)
637 *port = ntohs(sa->sin6_port);
638 memcpy(a, &sa->sin6_addr, sizeof(*a));
639 ipa_ntoh(*a);
eb1451a3
OZ
640
641 if (ifa && ipa_has_link_scope(*a))
642 *ifa = if_find_by_index(sa->sin6_scope_id);
4f22c981
MM
643}
644
645#else
646
d7f469c1
OZ
647void
648fill_in_sockaddr(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, unsigned port)
b5d9ee5c 649{
b1a1faba 650 memset (sa, 0, sizeof (struct sockaddr_in));
b5d9ee5c
MM
651 sa->sin_family = AF_INET;
652 sa->sin_port = htons(port);
b1a1faba
OF
653#ifdef HAVE_SIN_LEN
654 sa->sin_len = sizeof(struct sockaddr_in);
655#endif
b5d9ee5c
MM
656 set_inaddr(&sa->sin_addr, a);
657}
658
d7f469c1 659void
eb1451a3 660get_sockaddr(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, unsigned *port, int check)
b5d9ee5c 661{
b1a1faba
OF
662 if (check && sa->sin_family != AF_INET)
663 bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
af847acc
MM
664 if (port)
665 *port = ntohs(sa->sin_port);
b5d9ee5c 666 memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
dce26783 667 ipa_ntoh(*a);
b5d9ee5c
MM
668}
669
4f22c981
MM
670#endif
671
bed41728
OZ
672
673#ifdef IPV6
674
675/* PKTINFO handling is also standardized in IPv6 */
676#define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
677#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
678
dcc60494
OZ
679/*
680 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
681 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
682 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
683 * RFC and we use IPV6_PKTINFO.
684 */
685#ifndef IPV6_RECVPKTINFO
686#define IPV6_RECVPKTINFO IPV6_PKTINFO
687#endif
688
bed41728
OZ
689static char *
690sysio_register_cmsgs(sock *s)
691{
692 int ok = 1;
693 if ((s->flags & SKF_LADDR_RX) &&
694 setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)
695 return "IPV6_RECVPKTINFO";
696
697 return NULL;
698}
699
700static void
701sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
702{
703 struct cmsghdr *cm;
704 struct in6_pktinfo *pi = NULL;
705
706 if (!(s->flags & SKF_LADDR_RX))
707 return;
708
709 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
710 {
711 if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
712 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
713 }
714
715 if (!pi)
716 {
717 s->laddr = IPA_NONE;
718 s->lifindex = 0;
719 return;
720 }
721
722 get_inaddr(&s->laddr, &pi->ipi6_addr);
723 s->lifindex = pi->ipi6_ifindex;
724 return;
725}
726
646b24d9 727/*
bed41728
OZ
728static void
729sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
730{
731 struct cmsghdr *cm;
732 struct in6_pktinfo *pi;
733
734 if (!(s->flags & SKF_LADDR_TX))
735 return;
736
737 msg->msg_control = cbuf;
738 msg->msg_controllen = cbuflen;
739
740 cm = CMSG_FIRSTHDR(msg);
741 cm->cmsg_level = IPPROTO_IPV6;
742 cm->cmsg_type = IPV6_PKTINFO;
743 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
744
745 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
746 set_inaddr(&pi->ipi6_addr, s->saddr);
747 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
748
749 msg->msg_controllen = cm->cmsg_len;
750 return;
751}
646b24d9 752*/
bed41728
OZ
753#endif
754
a39b165e
OZ
755static char *
756sk_set_ttl_int(sock *s)
757{
a39b165e 758#ifdef IPV6
f9c799a0 759 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
a39b165e
OZ
760 return "IPV6_UNICAST_HOPS";
761#else
762 if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
763 return "IP_TTL";
764#ifdef CONFIG_UNIX_DONTROUTE
ff2857b0 765 int one = 1;
a39b165e
OZ
766 if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
767 return "SO_DONTROUTE";
768#endif
769#endif
770 return NULL;
771}
772
38a608c5
MM
773#define ERR(x) do { err = x; goto bad; } while(0)
774#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
775
b5d9ee5c
MM
776static char *
777sk_setup(sock *s)
778{
779 int fd = s->fd;
353729f5 780 char *err = NULL;
b5d9ee5c
MM
781
782 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
783 ERR("fcntl(O_NONBLOCK)");
b93abffa
MM
784 if (s->type == SK_UNIX)
785 return NULL;
a39b165e 786#ifndef IPV6
b5d9ee5c 787 if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
f782b72c 788 WARN("IP_TOS");
b5d9ee5c 789#endif
789772ed
OZ
790
791#ifdef IPV6
792 int v = 1;
793 if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
794 WARN("IPV6_V6ONLY");
795#endif
796
a39b165e
OZ
797 if (s->ttl >= 0)
798 err = sk_set_ttl_int(s);
a39b165e 799
353729f5 800 sysio_register_cmsgs(s);
b5d9ee5c
MM
801bad:
802 return err;
803}
804
a39b165e 805/**
b1b19433 806 * sk_set_ttl - set transmit TTL for given socket.
a39b165e
OZ
807 * @s: socket
808 * @ttl: TTL value
809 *
810 * Set TTL for already opened connections when TTL was not set before.
811 * Useful for accepted connections when different ones should have
812 * different TTL.
813 *
814 * Result: 0 for success, -1 for an error.
815 */
816
817int
818sk_set_ttl(sock *s, int ttl)
819{
820 char *err;
821
822 s->ttl = ttl;
823 if (err = sk_set_ttl_int(s))
824 log(L_ERR "sk_set_ttl: %s: %m", err);
825
826 return (err ? -1 : 0);
827}
828
b1b19433
OZ
829/**
830 * sk_set_min_ttl - set minimal accepted TTL for given socket.
831 * @s: socket
832 * @ttl: TTL value
833 *
834 * Can be used in TTL security implementation
835 *
836 * Result: 0 for success, -1 for an error.
837 */
838
839int
840sk_set_min_ttl(sock *s, int ttl)
841{
842 int err;
843#ifdef IPV6
844 err = sk_set_min_ttl6(s, ttl);
845#else
846 err = sk_set_min_ttl4(s, ttl);
847#endif
848
849 return err;
850}
d51aa281 851
d51aa281
OZ
852/**
853 * sk_set_md5_auth - add / remove MD5 security association for given socket.
854 * @s: socket
855 * @a: IP address of the other side
eb1451a3 856 * @ifa: Interface for link-local IP address
d51aa281
OZ
857 * @passwd: password used for MD5 authentication
858 *
859 * In TCP MD5 handling code in kernel, there is a set of pairs
860 * (address, password) used to choose password according to
861 * address of the other side. This function is useful for
862 * listening socket, for active sockets it is enough to set
863 * s->password field.
864 *
865 * When called with passwd != NULL, the new pair is added,
866 * When called with passwd == NULL, the existing pair is removed.
867 *
868 * Result: 0 for success, -1 for an error.
869 */
870
871int
eb1451a3 872sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
d51aa281
OZ
873{
874 sockaddr sa;
eb1451a3 875 fill_in_sockaddr(&sa, a, ifa, 0);
d51aa281
OZ
876 return sk_set_md5_auth_int(s, &sa, passwd);
877}
878
f9c799a0
OZ
879int
880sk_set_broadcast(sock *s, int enable)
881{
882 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)) < 0)
4ac7c834
OZ
883 {
884 log(L_ERR "sk_set_broadcast: SO_BROADCAST: %m");
885 return -1;
886 }
887
888 return 0;
f9c799a0
OZ
889}
890
891
892#ifdef IPV6
893
4ac7c834
OZ
894int
895sk_set_ipv6_checksum(sock *s, int offset)
896{
897 if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
898 {
899 log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
900 return -1;
901 }
902
903 return 0;
904}
905
93e868c7
OZ
906int
907sk_set_icmp_filter(sock *s, int p1, int p2)
908{
909 /* a bit of lame interface, but it is here only for Radv */
910 struct icmp6_filter f;
911
912 ICMP6_FILTER_SETBLOCKALL(&f);
913 ICMP6_FILTER_SETPASS(p1, &f);
914 ICMP6_FILTER_SETPASS(p2, &f);
915
916 if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
917 {
918 log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m");
919 return -1;
920 }
921
922 return 0;
923}
924
f9c799a0
OZ
925int
926sk_setup_multicast(sock *s)
927{
928 char *err;
929 int zero = 0;
930 int index;
931
932 ASSERT(s->iface && s->iface->addr);
933
934 index = s->iface->index;
935 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
936 ERR("IPV6_MULTICAST_HOPS");
937 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
938 ERR("IPV6_MULTICAST_LOOP");
939 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
940 ERR("IPV6_MULTICAST_IF");
941
e7b09e4a
OZ
942 if (err = sysio_bind_to_iface(s))
943 goto bad;
944
f9c799a0
OZ
945 return 0;
946
947bad:
948 log(L_ERR "sk_setup_multicast: %s: %m", err);
949 return -1;
950}
951
952int
953sk_join_group(sock *s, ip_addr maddr)
954{
955 struct ipv6_mreq mreq;
f1aceff5 956
f9c799a0
OZ
957 set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
958
959#ifdef CONFIG_IPV6_GLIBC_20
960 mreq.ipv6mr_ifindex = s->iface->index;
961#else
962 mreq.ipv6mr_interface = s->iface->index;
963#endif
964
861f223a 965 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0)
f9c799a0 966 {
861f223a 967 log(L_ERR "sk_join_group: IPV6_JOIN_GROUP: %m");
f9c799a0
OZ
968 return -1;
969 }
970
971 return 0;
972}
973
974int
975sk_leave_group(sock *s, ip_addr maddr)
976{
977 struct ipv6_mreq mreq;
978
979 set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
980
981#ifdef CONFIG_IPV6_GLIBC_20
982 mreq.ipv6mr_ifindex = s->iface->index;
983#else
984 mreq.ipv6mr_interface = s->iface->index;
985#endif
986
861f223a 987 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0)
f9c799a0 988 {
861f223a 989 log(L_ERR "sk_leave_group: IPV6_LEAVE_GROUP: %m");
f9c799a0
OZ
990 return -1;
991 }
992
993 return 0;
994}
995
996#else /* IPV4 */
997
998int
999sk_setup_multicast(sock *s)
1000{
1001 char *err;
1002
1003 ASSERT(s->iface && s->iface->addr);
1004
1005 if (err = sysio_setup_multicast(s))
1006 {
1007 log(L_ERR "sk_setup_multicast: %s: %m", err);
1008 return -1;
1009 }
1010
1011 return 0;
1012}
1013
1014int
1015sk_join_group(sock *s, ip_addr maddr)
1016{
1017 char *err;
1018
1019 if (err = sysio_join_group(s, maddr))
1020 {
1021 log(L_ERR "sk_join_group: %s: %m", err);
1022 return -1;
1023 }
1024
1025 return 0;
1026}
1027
1028int
1029sk_leave_group(sock *s, ip_addr maddr)
1030{
1031 char *err;
1032
1033 if (err = sysio_leave_group(s, maddr))
1034 {
1035 log(L_ERR "sk_leave_group: %s: %m", err);
1036 return -1;
1037 }
1038
1039 return 0;
1040}
1041
1042#endif
1043
d51aa281 1044
b93abffa 1045static void
b5d9ee5c
MM
1046sk_tcp_connected(sock *s)
1047{
9be9a264
OZ
1048 sockaddr lsa;
1049 int lsa_len = sizeof(lsa);
1050 if (getsockname(s->fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
eb1451a3 1051 get_sockaddr(&lsa, &s->saddr, &s->iface, &s->sport, 1);
9be9a264 1052
b5d9ee5c
MM
1053 s->type = SK_TCP;
1054 sk_alloc_bufs(s);
320f4173 1055 s->tx_hook(s);
b5d9ee5c
MM
1056}
1057
b93abffa
MM
1058static int
1059sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
1060{
1061 int fd = accept(s->fd, sa, &al);
1062 if (fd >= 0)
1063 {
1064 sock *t = sk_new(s->pool);
1065 char *err;
1066 t->type = type;
1067 t->fd = fd;
e1ddd993
MM
1068 t->ttl = s->ttl;
1069 t->tos = s->tos;
1070 t->rbsize = s->rbsize;
1071 t->tbsize = s->tbsize;
1072 if (type == SK_TCP)
cf31112f
OZ
1073 {
1074 sockaddr lsa;
1075 int lsa_len = sizeof(lsa);
1076 if (getsockname(fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
eb1451a3 1077 get_sockaddr(&lsa, &t->saddr, &t->iface, &t->sport, 1);
cf31112f 1078
eb1451a3 1079 get_sockaddr((sockaddr *) sa, &t->daddr, &t->iface, &t->dport, 1);
cf31112f 1080 }
38a608c5 1081 sk_insert(t);
b93abffa
MM
1082 if (err = sk_setup(t))
1083 {
1084 log(L_ERR "Incoming connection: %s: %m", err);
e1ddd993
MM
1085 rfree(t);
1086 return 1;
b93abffa
MM
1087 }
1088 sk_alloc_bufs(t);
e1ddd993 1089 s->rx_hook(t, 0);
b93abffa
MM
1090 return 1;
1091 }
1092 else if (errno != EINTR && errno != EAGAIN)
1093 {
c025b852 1094 s->err_hook(s, errno);
b93abffa
MM
1095 }
1096 return 0;
1097}
1098
525fa2c1
MM
1099/**
1100 * sk_open - open a socket
1101 * @s: socket
1102 *
1103 * This function takes a socket resource created by sk_new() and
1104 * initialized by the user and binds a corresponding network connection
1105 * to it.
1106 *
1107 * Result: 0 for success, -1 for an error.
1108 */
b5d9ee5c
MM
1109int
1110sk_open(sock *s)
1111{
93a786cb 1112 int fd;
4f22c981 1113 sockaddr sa;
b5d9ee5c
MM
1114 int one = 1;
1115 int type = s->type;
1116 int has_src = ipa_nonzero(s->saddr) || s->sport;
b5d9ee5c
MM
1117 char *err;
1118
1119 switch (type)
1120 {
1121 case SK_TCP_ACTIVE:
320f4173
MM
1122 s->ttx = ""; /* Force s->ttx != s->tpos */
1123 /* Fall thru */
b5d9ee5c 1124 case SK_TCP_PASSIVE:
4f22c981 1125 fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
b5d9ee5c
MM
1126 break;
1127 case SK_UDP:
4f22c981 1128 fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
b5d9ee5c
MM
1129 break;
1130 case SK_IP:
4f22c981 1131 fd = socket(BIRD_PF, SOCK_RAW, s->dport);
b5d9ee5c 1132 break;
b4b3b39e
MM
1133 case SK_MAGIC:
1134 fd = s->fd;
1135 break;
b5d9ee5c 1136 default:
b4b3b39e 1137 bug("sk_open() called for invalid sock type %d", type);
b5d9ee5c
MM
1138 }
1139 if (fd < 0)
1140 die("sk_open: socket: %m");
1141 s->fd = fd;
1142
1143 if (err = sk_setup(s))
1144 goto bad;
38a608c5 1145
b5d9ee5c
MM
1146 if (has_src)
1147 {
1148 int port;
1149
f9c799a0 1150 if (type == SK_IP)
b5d9ee5c
MM
1151 port = 0;
1152 else
1153 {
1154 port = s->sport;
1155 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
1156 ERR("SO_REUSEADDR");
1157 }
eb1451a3 1158 fill_in_sockaddr(&sa, s->saddr, s->iface, port);
b5d9ee5c
MM
1159 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1160 ERR("bind");
1161 }
eb1451a3 1162 fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
d51aa281
OZ
1163
1164 if (s->password)
1165 {
1166 int rv = sk_set_md5_auth_int(s, &sa, s->password);
1167 if (rv < 0)
1168 goto bad_no_log;
1169 }
1170
b5d9ee5c
MM
1171 switch (type)
1172 {
1173 case SK_TCP_ACTIVE:
1174 if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
1175 sk_tcp_connected(s);
9cbf43eb 1176 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
f428631c 1177 errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
b5d9ee5c
MM
1178 ERR("connect");
1179 break;
1180 case SK_TCP_PASSIVE:
1181 if (listen(fd, 8))
1182 ERR("listen");
1183 break;
4f22c981
MM
1184 case SK_MAGIC:
1185 break;
1186 default:
320f4173 1187 sk_alloc_bufs(s);
4f22c981
MM
1188#ifdef IPV6
1189#ifdef IPV6_MTU_DISCOVER
1190 {
1191 int dont = IPV6_PMTUDISC_DONT;
1192 if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
1193 ERR("IPV6_MTU_DISCOVER");
1194 }
1195#endif
1196#else
1197#ifdef IP_PMTUDISC
1198 {
1199 int dont = IP_PMTUDISC_DONT;
1200 if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
1201 ERR("IP_PMTUDISC");
1202 }
1203#endif
1204#endif
b5d9ee5c
MM
1205 }
1206
38a608c5 1207 sk_insert(s);
b5d9ee5c
MM
1208 return 0;
1209
1210bad:
1211 log(L_ERR "sk_open: %s: %m", err);
d51aa281 1212bad_no_log:
b5d9ee5c
MM
1213 close(fd);
1214 s->fd = -1;
1215 return -1;
1216}
1217
97e46d28 1218void
b93abffa
MM
1219sk_open_unix(sock *s, char *name)
1220{
1221 int fd;
1222 struct sockaddr_un sa;
1223 char *err;
1224
1225 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1226 if (fd < 0)
97e46d28 1227 ERR("socket");
b93abffa
MM
1228 s->fd = fd;
1229 if (err = sk_setup(s))
1230 goto bad;
1231 unlink(name);
68fa95cf 1232
97e46d28 1233 /* Path length checked in test_old_bird() */
b93abffa 1234 sa.sun_family = AF_UNIX;
97c6fa02 1235 strcpy(sa.sun_path, name);
0b3bf4b1 1236 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
b93abffa
MM
1237 ERR("bind");
1238 if (listen(fd, 8))
1239 ERR("listen");
38a608c5 1240 sk_insert(s);
97e46d28 1241 return;
b93abffa 1242
97e46d28 1243 bad:
b93abffa 1244 log(L_ERR "sk_open_unix: %s: %m", err);
97e46d28 1245 die("Unable to create control socket %s", name);
b93abffa
MM
1246}
1247
353729f5
OZ
1248static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1249
b5d9ee5c
MM
1250static int
1251sk_maybe_write(sock *s)
1252{
1253 int e;
1254
1255 switch (s->type)
1256 {
1257 case SK_TCP:
b4b3b39e 1258 case SK_MAGIC:
b93abffa 1259 case SK_UNIX:
b5d9ee5c
MM
1260 while (s->ttx != s->tpos)
1261 {
1262 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1263 if (e < 0)
1264 {
1265 if (errno != EINTR && errno != EAGAIN)
1266 {
353729f5 1267 reset_tx_buffer(s);
47597724
OZ
1268 /* EPIPE is just a connection close notification during TX */
1269 s->err_hook(s, (errno != EPIPE) ? errno : 0);
b5d9ee5c
MM
1270 return -1;
1271 }
1272 return 0;
1273 }
1274 s->ttx += e;
1275 }
353729f5 1276 reset_tx_buffer(s);
b5d9ee5c
MM
1277 return 1;
1278 case SK_UDP:
b5d9ee5c 1279 case SK_IP:
b5d9ee5c 1280 {
b5d9ee5c
MM
1281 if (s->tbuf == s->tpos)
1282 return 1;
b1a1faba 1283
353729f5 1284 sockaddr sa;
eb1451a3 1285 fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
353729f5
OZ
1286
1287 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
646b24d9 1288 // byte cmsg_buf[CMSG_TX_SPACE];
353729f5
OZ
1289
1290 struct msghdr msg = {
1291 .msg_name = &sa,
1292 .msg_namelen = sizeof(sa),
1293 .msg_iov = &iov,
bed41728 1294 .msg_iovlen = 1};
353729f5 1295
646b24d9 1296 // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
353729f5
OZ
1297 e = sendmsg(s->fd, &msg, 0);
1298
b5d9ee5c
MM
1299 if (e < 0)
1300 {
1301 if (errno != EINTR && errno != EAGAIN)
1302 {
353729f5 1303 reset_tx_buffer(s);
c025b852 1304 s->err_hook(s, errno);
b5d9ee5c
MM
1305 return -1;
1306 }
1307 return 0;
1308 }
353729f5 1309 reset_tx_buffer(s);
b5d9ee5c
MM
1310 return 1;
1311 }
1312 default:
08c69a77 1313 bug("sk_maybe_write: unknown socket type %d", s->type);
b5d9ee5c
MM
1314 }
1315}
1316
ea89da38
OZ
1317int
1318sk_rx_ready(sock *s)
1319{
1320 fd_set rd, wr;
1321 struct timeval timo;
1322 int rv;
1323
1324 FD_ZERO(&rd);
1325 FD_ZERO(&wr);
1326 FD_SET(s->fd, &rd);
1327
1328 timo.tv_sec = 0;
1329 timo.tv_usec = 0;
1330
1331 redo:
1332 rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1333
1334 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1335 goto redo;
1336
1337 return rv;
1338}
1339
525fa2c1
MM
1340/**
1341 * sk_send - send data to a socket
1342 * @s: socket
1343 * @len: number of bytes to send
1344 *
1345 * This function sends @len bytes of data prepared in the
1346 * transmit buffer of the socket @s to the network connection.
1347 * If the packet can be sent immediately, it does so and returns
1348 * 1, else it queues the packet for later processing, returns 0
1349 * and calls the @tx_hook of the socket when the tranmission
1350 * takes place.
1351 */
b5d9ee5c
MM
1352int
1353sk_send(sock *s, unsigned len)
1354{
b5d9ee5c
MM
1355 s->ttx = s->tbuf;
1356 s->tpos = s->tbuf + len;
1357 return sk_maybe_write(s);
1358}
1359
525fa2c1
MM
1360/**
1361 * sk_send_to - send data to a specific destination
1362 * @s: socket
1363 * @len: number of bytes to send
1364 * @addr: IP address to send the packet to
1365 * @port: port to send the packet to
1366 *
2e9b2421 1367 * This is a sk_send() replacement for connection-less packet sockets
525fa2c1
MM
1368 * which allows destination of the packet to be chosen dynamically.
1369 */
b5d9ee5c
MM
1370int
1371sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1372{
353729f5
OZ
1373 s->daddr = addr;
1374 s->dport = port;
b5d9ee5c
MM
1375 s->ttx = s->tbuf;
1376 s->tpos = s->tbuf + len;
1377 return sk_maybe_write(s);
1378}
1379
353729f5
OZ
1380/*
1381int
1382sk_send_full(sock *s, unsigned len, struct iface *ifa,
1383 ip_addr saddr, ip_addr daddr, unsigned dport)
1384{
1385 s->iface = ifa;
1386 s->saddr = saddr;
1387 s->daddr = daddr;
1388 s->dport = dport;
1389 s->ttx = s->tbuf;
1390 s->tpos = s->tbuf + len;
1391 return sk_maybe_write(s);
1392}
1393*/
1394
b5d9ee5c
MM
1395static int
1396sk_read(sock *s)
1397{
1398 switch (s->type)
1399 {
b5d9ee5c
MM
1400 case SK_TCP_PASSIVE:
1401 {
4f22c981 1402 sockaddr sa;
b93abffa
MM
1403 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1404 }
1405 case SK_UNIX_PASSIVE:
1406 {
1407 struct sockaddr_un sa;
1408 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
b5d9ee5c
MM
1409 }
1410 case SK_TCP:
b93abffa 1411 case SK_UNIX:
b5d9ee5c
MM
1412 {
1413 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1414
1415 if (c < 0)
1416 {
1417 if (errno != EINTR && errno != EAGAIN)
c025b852 1418 s->err_hook(s, errno);
b5d9ee5c
MM
1419 }
1420 else if (!c)
c025b852 1421 s->err_hook(s, 0);
b5d9ee5c
MM
1422 else
1423 {
1424 s->rpos += c;
1425 if (s->rx_hook(s, s->rpos - s->rbuf))
38a608c5
MM
1426 {
1427 /* We need to be careful since the socket could have been deleted by the hook */
1428 if (current_sock == s)
1429 s->rpos = s->rbuf;
1430 }
b5d9ee5c
MM
1431 return 1;
1432 }
1433 return 0;
1434 }
b4b3b39e
MM
1435 case SK_MAGIC:
1436 return s->rx_hook(s, 0);
b5d9ee5c
MM
1437 default:
1438 {
4f22c981 1439 sockaddr sa;
353729f5
OZ
1440 int e;
1441
1442 struct iovec iov = {s->rbuf, s->rbsize};
1443 byte cmsg_buf[CMSG_RX_SPACE];
1444
1445 struct msghdr msg = {
1446 .msg_name = &sa,
1447 .msg_namelen = sizeof(sa),
1448 .msg_iov = &iov,
1449 .msg_iovlen = 1,
1450 .msg_control = cmsg_buf,
1451 .msg_controllen = sizeof(cmsg_buf),
1452 .msg_flags = 0};
1453
1454 e = recvmsg(s->fd, &msg, 0);
b5d9ee5c
MM
1455
1456 if (e < 0)
1457 {
1458 if (errno != EINTR && errno != EAGAIN)
c025b852 1459 s->err_hook(s, errno);
b5d9ee5c
MM
1460 return 0;
1461 }
1462 s->rpos = s->rbuf + e;
eb1451a3 1463 get_sockaddr(&sa, &s->faddr, NULL, &s->fport, 1);
353729f5
OZ
1464 sysio_process_rx_cmsgs(s, &msg);
1465
b5d9ee5c
MM
1466 s->rx_hook(s, e);
1467 return 1;
1468 }
1469 }
1470}
1471
38a608c5 1472static int
b5d9ee5c
MM
1473sk_write(sock *s)
1474{
320f4173
MM
1475 switch (s->type)
1476 {
1477 case SK_TCP_ACTIVE:
1478 {
1479 sockaddr sa;
eb1451a3 1480 fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
09e4117c 1481 if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
320f4173
MM
1482 sk_tcp_connected(s);
1483 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
c025b852 1484 s->err_hook(s, errno);
38a608c5 1485 return 0;
320f4173 1486 }
320f4173 1487 default:
38a608c5
MM
1488 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1489 {
1490 s->tx_hook(s);
1491 return 1;
1492 }
1493 return 0;
320f4173 1494 }
b5d9ee5c
MM
1495}
1496
1497void
1498sk_dump_all(void)
1499{
1500 node *n;
1501 sock *s;
1502
1503 debug("Open sockets:\n");
1504 WALK_LIST(n, sock_list)
1505 {
1506 s = SKIP_BACK(sock, n, n);
1507 debug("%p ", s);
1508 sk_dump(&s->r);
1509 }
1510 debug("\n");
1511}
1512
1513#undef ERR
f782b72c 1514#undef WARN
b5d9ee5c
MM
1515
1516/*
1517 * Main I/O Loop
1518 */
1519
4c9dd1e4
MM
1520volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
1521volatile int async_dump_flag;
1522
b5d9ee5c
MM
1523void
1524io_init(void)
1525{
1526 init_list(&near_timers);
1527 init_list(&far_timers);
1528 init_list(&sock_list);
e8f73195 1529 init_list(&global_event_list);
7e5f5ffd 1530 krt_io_init();
fd91ae33
OZ
1531 init_times();
1532 update_times();
1533 srandom((int) now_real);
b5d9ee5c
MM
1534}
1535
ea89da38
OZ
1536static int short_loops = 0;
1537#define SHORT_LOOP_MAX 10
1538
b5d9ee5c
MM
1539void
1540io_loop(void)
1541{
1542 fd_set rd, wr;
1543 struct timeval timo;
1544 time_t tout;
30770df2 1545 int hi, events;
b5d9ee5c 1546 sock *s;
38a608c5 1547 node *n;
b5d9ee5c 1548
38a608c5 1549 sock_recalc_fdsets_p = 1;
b5d9ee5c
MM
1550 for(;;)
1551 {
30770df2 1552 events = ev_run_list(&global_event_list);
fd91ae33 1553 update_times();
b5d9ee5c
MM
1554 tout = tm_first_shot();
1555 if (tout <= now)
1556 {
1557 tm_shot();
1558 continue;
1559 }
30770df2
MM
1560 timo.tv_sec = events ? 0 : tout - now;
1561 timo.tv_usec = 0;
b5d9ee5c 1562
38a608c5
MM
1563 if (sock_recalc_fdsets_p)
1564 {
1565 sock_recalc_fdsets_p = 0;
1566 FD_ZERO(&rd);
1567 FD_ZERO(&wr);
1568 }
1569
b5d9ee5c
MM
1570 hi = 0;
1571 WALK_LIST(n, sock_list)
1572 {
1573 s = SKIP_BACK(sock, n, n);
1574 if (s->rx_hook)
1575 {
1576 FD_SET(s->fd, &rd);
1577 if (s->fd > hi)
1578 hi = s->fd;
1579 }
38a608c5
MM
1580 else
1581 FD_CLR(s->fd, &rd);
b5d9ee5c
MM
1582 if (s->tx_hook && s->ttx != s->tpos)
1583 {
1584 FD_SET(s->fd, &wr);
1585 if (s->fd > hi)
1586 hi = s->fd;
1587 }
38a608c5
MM
1588 else
1589 FD_CLR(s->fd, &wr);
b5d9ee5c
MM
1590 }
1591
4c9dd1e4
MM
1592 /*
1593 * Yes, this is racy. But even if the signal comes before this test
1594 * and entering select(), it gets caught on the next timer tick.
1595 */
1596
1597 if (async_config_flag)
1598 {
1599 async_config();
1600 async_config_flag = 0;
f4aabcee 1601 continue;
4c9dd1e4
MM
1602 }
1603 if (async_dump_flag)
1604 {
1605 async_dump();
1606 async_dump_flag = 0;
f4aabcee
MM
1607 continue;
1608 }
1609 if (async_shutdown_flag)
1610 {
1611 async_shutdown();
1612 async_shutdown_flag = 0;
1613 continue;
4c9dd1e4
MM
1614 }
1615
1616 /* And finally enter select() to find active sockets */
b5d9ee5c 1617 hi = select(hi+1, &rd, &wr, NULL, &timo);
ea89da38 1618
b5d9ee5c
MM
1619 if (hi < 0)
1620 {
1621 if (errno == EINTR || errno == EAGAIN)
1622 continue;
1623 die("select: %m");
1624 }
1625 if (hi)
1626 {
ea89da38
OZ
1627 /* guaranteed to be non-empty */
1628 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1629
38a608c5 1630 while (current_sock)
b5d9ee5c 1631 {
38a608c5
MM
1632 sock *s = current_sock;
1633 int e;
ea89da38
OZ
1634 int steps;
1635
1636 steps = MAX_STEPS;
1637 if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
38a608c5
MM
1638 do
1639 {
4323099d 1640 steps--;
38a608c5
MM
1641 e = sk_read(s);
1642 if (s != current_sock)
1643 goto next;
1644 }
4323099d
OZ
1645 while (e && s->rx_hook && steps);
1646
1647 steps = MAX_STEPS;
38a608c5
MM
1648 if (FD_ISSET(s->fd, &wr))
1649 do
1650 {
4323099d 1651 steps--;
38a608c5
MM
1652 e = sk_write(s);
1653 if (s != current_sock)
1654 goto next;
1655 }
4323099d 1656 while (e && steps);
38a608c5
MM
1657 current_sock = sk_next(s);
1658 next: ;
b5d9ee5c 1659 }
ea89da38
OZ
1660
1661 short_loops++;
1662 if (events && (short_loops < SHORT_LOOP_MAX))
1663 continue;
1664 short_loops = 0;
1665
1666 int count = 0;
1667 current_sock = stored_sock;
1668 if (current_sock == NULL)
1669 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1670
1671 while (current_sock && count < MAX_RX_STEPS)
1672 {
1673 sock *s = current_sock;
1674 int e;
ea89da38
OZ
1675
1676 if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1677 {
1678 count++;
1679 e = sk_read(s);
1680 if (s != current_sock)
1681 goto next2;
1682 }
1683 current_sock = sk_next(s);
1684 next2: ;
1685 }
1686
1687 stored_sock = current_sock;
b5d9ee5c
MM
1688 }
1689 }
1690}
41c8976e
OF
1691
1692void
1693test_old_bird(char *path)
1694{
1695 int fd;
1696 struct sockaddr_un sa;
1697
1698 fd = socket(AF_UNIX, SOCK_STREAM, 0);
41c8976e
OF
1699 if (fd < 0)
1700 die("Cannot create socket: %m");
97e46d28
OZ
1701 if (strlen(path) >= sizeof(sa.sun_path))
1702 die("Socket path too long");
41c8976e
OF
1703 bzero(&sa, sizeof(sa));
1704 sa.sun_family = AF_UNIX;
1705 strcpy(sa.sun_path, path);
1706 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1707 die("I found another BIRD running.");
1708 close(fd);
1709}
1710
1711