]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/unix/io.c
Fixes some missing tabs.
[thirdparty/bird.git] / sysdep / unix / io.c
CommitLineData
b5d9ee5c
MM
1/*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
38a608c5 4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
b1a1faba 5 * (c) 2004 Ondrej Filip <feela@network.cz>
b5d9ee5c
MM
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
607d9914
OZ
10/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
12#define _GNU_SOURCE 1
13
b5d9ee5c
MM
14#include <stdio.h>
15#include <stdlib.h>
01b776e1 16#include <time.h>
b5d9ee5c
MM
17#include <sys/time.h>
18#include <sys/types.h>
19#include <sys/socket.h>
20#include <sys/fcntl.h>
46a82e9c 21#include <sys/uio.h>
b93abffa 22#include <sys/un.h>
b5d9ee5c
MM
23#include <unistd.h>
24#include <errno.h>
d0e9b36d 25#include <netinet/in.h>
93e868c7 26#include <netinet/icmp6.h>
b5d9ee5c
MM
27
28#include "nest/bird.h"
29#include "lib/lists.h"
30#include "lib/resource.h"
31#include "lib/timer.h"
32#include "lib/socket.h"
e8f73195 33#include "lib/event.h"
afa8937a 34#include "lib/string.h"
b5d9ee5c
MM
35#include "nest/iface.h"
36
37#include "lib/unix.h"
a2867cd9 38#include "lib/sysio.h"
b5d9ee5c 39
ea89da38 40/* Maximum number of calls of tx handler for one socket in one
4323099d
OZ
41 * select iteration. Should be small enough to not monopolize CPU by
42 * one protocol instance.
43 */
44#define MAX_STEPS 4
45
ea89da38
OZ
46/* Maximum number of calls of rx handler for all sockets in one select
47 iteration. RX callbacks are often much more costly so we limit
48 this to gen small latencies */
49#define MAX_RX_STEPS 4
50
a9c986f9
MM
51/*
52 * Tracked Files
53 */
54
55struct rfile {
56 resource r;
57 FILE *f;
58};
59
60static void
61rf_free(resource *r)
62{
63 struct rfile *a = (struct rfile *) r;
64
65 fclose(a->f);
66}
67
68static void
69rf_dump(resource *r)
70{
71 struct rfile *a = (struct rfile *) r;
72
73 debug("(FILE *%p)\n", a->f);
74}
75
76static struct resclass rf_class = {
77 "FILE",
78 sizeof(struct rfile),
79 rf_free,
e81b440f 80 rf_dump,
acb60628 81 NULL,
e81b440f 82 NULL
a9c986f9
MM
83};
84
85void *
f78056fb 86tracked_fopen(pool *p, char *name, char *mode)
a9c986f9
MM
87{
88 FILE *f = fopen(name, mode);
89
90 if (f)
91 {
92 struct rfile *r = ralloc(p, &rf_class);
93 r->f = f;
94 }
95 return f;
96}
97
525fa2c1
MM
98/**
99 * DOC: Timers
100 *
101 * Timers are resources which represent a wish of a module to call
102 * a function at the specified time. The platform dependent code
58f7d004 103 * doesn't guarantee exact timing, only that a timer function
525fa2c1
MM
104 * won't be called before the requested time.
105 *
fd91ae33
OZ
106 * In BIRD, time is represented by values of the &bird_clock_t type
107 * which are integral numbers interpreted as a relative number of seconds since
108 * some fixed time point in past. The current time can be read
109 * from variable @now with reasonable accuracy and is monotonic. There is also
110 * a current 'absolute' time in variable @now_real reported by OS.
525fa2c1
MM
111 *
112 * Each timer is described by a &timer structure containing a pointer
113 * to the handler function (@hook), data private to this function (@data),
114 * time the function should be called at (@expires, 0 for inactive timers),
115 * for the other fields see |timer.h|.
b5d9ee5c
MM
116 */
117
118#define NEAR_TIMER_LIMIT 4
119
b5d9ee5c
MM
120static list near_timers, far_timers;
121static bird_clock_t first_far_timer = TIME_INFINITY;
122
002b6423
OZ
123/* now must be different from 0, because 0 is a special value in timer->expires */
124bird_clock_t now = 1, now_real;
fd91ae33
OZ
125
126static void
127update_times_plain(void)
128{
129 bird_clock_t new_time = time(NULL);
130 int delta = new_time - now_real;
131
132 if ((delta >= 0) && (delta < 60))
133 now += delta;
134 else if (now_real != 0)
135 log(L_WARN "Time jump, delta %d s", delta);
136
137 now_real = new_time;
138}
139
140static void
141update_times_gettime(void)
142{
143 struct timespec ts;
144 int rv;
145
146 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
147 if (rv != 0)
148 die("clock_gettime: %m");
149
150 if (ts.tv_sec != now) {
151 if (ts.tv_sec < now)
152 log(L_ERR "Monotonic timer is broken");
153
154 now = ts.tv_sec;
155 now_real = time(NULL);
156 }
157}
158
159static int clock_monotonic_available;
160
161static inline void
162update_times(void)
163{
164 if (clock_monotonic_available)
165 update_times_gettime();
166 else
167 update_times_plain();
168}
169
170static inline void
171init_times(void)
172{
173 struct timespec ts;
174 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
175 if (!clock_monotonic_available)
176 log(L_WARN "Monotonic timer is missing");
177}
178
b5d9ee5c
MM
179
180static void
181tm_free(resource *r)
182{
183 timer *t = (timer *) r;
184
185 tm_stop(t);
186}
187
188static void
189tm_dump(resource *r)
190{
191 timer *t = (timer *) r;
192
e8f73195 193 debug("(code %p, data %p, ", t->hook, t->data);
af847acc
MM
194 if (t->randomize)
195 debug("rand %d, ", t->randomize);
196 if (t->recurrent)
197 debug("recur %d, ", t->recurrent);
b5d9ee5c
MM
198 if (t->expires)
199 debug("expires in %d sec)\n", t->expires - now);
200 else
201 debug("inactive)\n");
202}
203
204static struct resclass tm_class = {
205 "Timer",
206 sizeof(timer),
207 tm_free,
e81b440f 208 tm_dump,
acb60628 209 NULL,
e81b440f 210 NULL
b5d9ee5c
MM
211};
212
525fa2c1
MM
213/**
214 * tm_new - create a timer
215 * @p: pool
216 *
217 * This function creates a new timer resource and returns
218 * a pointer to it. To use the timer, you need to fill in
219 * the structure fields and call tm_start() to start timing.
220 */
b5d9ee5c
MM
221timer *
222tm_new(pool *p)
223{
224 timer *t = ralloc(p, &tm_class);
b5d9ee5c
MM
225 return t;
226}
227
228static inline void
229tm_insert_near(timer *t)
230{
231 node *n = HEAD(near_timers);
232
233 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
234 n = n->next;
235 insert_node(&t->n, n->prev);
236}
237
525fa2c1
MM
238/**
239 * tm_start - start a timer
240 * @t: timer
241 * @after: number of seconds the timer should be run after
242 *
243 * This function schedules the hook function of the timer to
244 * be called after @after seconds. If the timer has been already
245 * started, it's @expire time is replaced by the new value.
246 *
247 * You can have set the @randomize field of @t, the timeout
248 * will be increased by a random number of seconds chosen
249 * uniformly from range 0 .. @randomize.
250 *
251 * You can call tm_start() from the handler function of the timer
252 * to request another run of the timer. Also, you can set the @recurrent
253 * field to have the timer re-added automatically with the same timeout.
254 */
b5d9ee5c
MM
255void
256tm_start(timer *t, unsigned after)
257{
258 bird_clock_t when;
259
260 if (t->randomize)
af847acc 261 after += random() % (t->randomize + 1);
b5d9ee5c
MM
262 when = now + after;
263 if (t->expires == when)
264 return;
265 if (t->expires)
266 rem_node(&t->n);
267 t->expires = when;
268 if (after <= NEAR_TIMER_LIMIT)
269 tm_insert_near(t);
270 else
271 {
272 if (!first_far_timer || first_far_timer > when)
273 first_far_timer = when;
274 add_tail(&far_timers, &t->n);
275 }
276}
277
525fa2c1
MM
278/**
279 * tm_stop - stop a timer
280 * @t: timer
281 *
282 * This function stops a timer. If the timer is already stopped,
283 * nothing happens.
284 */
b5d9ee5c
MM
285void
286tm_stop(timer *t)
287{
288 if (t->expires)
289 {
290 rem_node(&t->n);
291 t->expires = 0;
292 }
293}
294
295static void
296tm_dump_them(char *name, list *l)
297{
298 node *n;
299 timer *t;
300
301 debug("%s timers:\n", name);
302 WALK_LIST(n, *l)
303 {
304 t = SKIP_BACK(timer, n, n);
305 debug("%p ", t);
306 tm_dump(&t->r);
307 }
308 debug("\n");
309}
310
311void
312tm_dump_all(void)
313{
314 tm_dump_them("Near", &near_timers);
315 tm_dump_them("Far", &far_timers);
316}
317
318static inline time_t
319tm_first_shot(void)
320{
321 time_t x = first_far_timer;
322
323 if (!EMPTY_LIST(near_timers))
324 {
325 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
326 if (t->expires < x)
327 x = t->expires;
328 }
329 return x;
330}
331
332static void
333tm_shot(void)
334{
335 timer *t;
336 node *n, *m;
337
338 if (first_far_timer <= now)
339 {
28a9a189 340 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
b5d9ee5c
MM
341 first_far_timer = TIME_INFINITY;
342 n = HEAD(far_timers);
343 while (m = n->next)
344 {
345 t = SKIP_BACK(timer, n, n);
346 if (t->expires <= limit)
347 {
348 rem_node(n);
349 tm_insert_near(t);
350 }
351 else if (t->expires < first_far_timer)
352 first_far_timer = t->expires;
353 n = m;
354 }
355 }
356 while ((n = HEAD(near_timers)) -> next)
357 {
af847acc 358 int delay;
b5d9ee5c
MM
359 t = SKIP_BACK(timer, n, n);
360 if (t->expires > now)
361 break;
362 rem_node(n);
af847acc 363 delay = t->expires - now;
b5d9ee5c 364 t->expires = 0;
af847acc
MM
365 if (t->recurrent)
366 {
367 int i = t->recurrent - delay;
368 if (i < 0)
369 i = 0;
370 tm_start(t, i);
371 }
b5d9ee5c
MM
372 t->hook(t);
373 }
374}
375
0d3effcf
OF
376/**
377 * tm_parse_datetime - parse a date and time
378 * @x: datetime string
379 *
380 * tm_parse_datetime() takes a textual representation of
381 * a date and time (dd-mm-yyyy hh:mm:ss)
382 * and converts it to the corresponding value of type &bird_clock_t.
383 */
384bird_clock_t
385tm_parse_datetime(char *x)
386{
387 struct tm tm;
388 int n;
389 time_t t;
390
391 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
392 return tm_parse_date(x);
393 tm.tm_mon--;
394 tm.tm_year -= 1900;
395 t = mktime(&tm);
396 if (t == (time_t) -1)
397 return 0;
398 return t;
399}
525fa2c1
MM
400/**
401 * tm_parse_date - parse a date
402 * @x: date string
403 *
404 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
405 * and converts it to the corresponding value of type &bird_clock_t.
406 */
913f7dc9
MM
407bird_clock_t
408tm_parse_date(char *x)
409{
410 struct tm tm;
411 int n;
412 time_t t;
413
414 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
415 return 0;
416 tm.tm_mon--;
417 tm.tm_year -= 1900;
418 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
419 t = mktime(&tm);
420 if (t == (time_t) -1)
421 return 0;
422 return t;
423}
424
c37e7851
OZ
425static void
426tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
913f7dc9 427{
c37e7851
OZ
428 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
429 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
913f7dc9 430
c37e7851
OZ
431 if (delta < 20*3600)
432 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
433 else if (delta < 360*86400)
434 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
435 else
436 bsprintf(x, "%d", tm->tm_year+1900);
913f7dc9
MM
437}
438
c37e7851
OZ
439#include "conf/conf.h"
440
525fa2c1
MM
441/**
442 * tm_format_datetime - convert date and time to textual representation
443 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
444 * @t: time
445 *
fd91ae33
OZ
446 * This function formats the given relative time value @t to a textual
447 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
525fa2c1 448 */
7a88832e 449void
c37e7851 450tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
7a88832e 451{
c37e7851 452 const char *fmt_used;
7a88832e 453 struct tm *tm;
fd91ae33
OZ
454 bird_clock_t delta = now - t;
455 t = now_real - delta;
7a88832e 456 tm = localtime(&t);
7a88832e 457
c37e7851
OZ
458 if (fmt_spec->fmt1 == NULL)
459 return tm_format_reltime(x, tm, delta);
afa8937a 460
c37e7851
OZ
461 if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
462 fmt_used = fmt_spec->fmt1;
afa8937a 463 else
c37e7851
OZ
464 fmt_used = fmt_spec->fmt2;
465
466 int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
467 if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
468 strcpy(x, "<too-long>");
afa8937a
MM
469}
470
525fa2c1
MM
471/**
472 * DOC: Sockets
473 *
474 * Socket resources represent network connections. Their data structure (&socket)
475 * contains a lot of fields defining the exact type of the socket, the local and
476 * remote addresses and ports, pointers to socket buffers and finally pointers to
477 * hook functions to be called when new data have arrived to the receive buffer
478 * (@rx_hook), when the contents of the transmit buffer have been transmitted
479 * (@tx_hook) and when an error or connection close occurs (@err_hook).
480 *
38a608c5 481 * Freeing of sockets from inside socket hooks is perfectly safe.
b5d9ee5c
MM
482 */
483
abae6e9c
MM
484#ifndef SOL_IP
485#define SOL_IP IPPROTO_IP
486#endif
487
b1a1faba
OF
488#ifndef SOL_IPV6
489#define SOL_IPV6 IPPROTO_IPV6
490#endif
491
b5d9ee5c 492static list sock_list;
38a608c5 493static struct birdsock *current_sock;
ea89da38 494static struct birdsock *stored_sock;
38a608c5
MM
495static int sock_recalc_fdsets_p;
496
497static inline sock *
498sk_next(sock *s)
499{
500 if (!s->n.next->next)
501 return NULL;
502 else
503 return SKIP_BACK(sock, n, s->n.next);
504}
b5d9ee5c
MM
505
506static void
4da25acb 507sk_alloc_bufs(sock *s)
b5d9ee5c 508{
4da25acb
MM
509 if (!s->rbuf && s->rbsize)
510 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
511 s->rpos = s->rbuf;
512 if (!s->tbuf && s->tbsize)
513 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
514 s->tpos = s->ttx = s->tbuf;
515}
b5d9ee5c 516
4da25acb
MM
517static void
518sk_free_bufs(sock *s)
519{
38a608c5 520 if (s->rbuf_alloc)
4da25acb
MM
521 {
522 xfree(s->rbuf_alloc);
523 s->rbuf = s->rbuf_alloc = NULL;
524 }
38a608c5 525 if (s->tbuf_alloc)
4da25acb
MM
526 {
527 xfree(s->tbuf_alloc);
528 s->tbuf = s->tbuf_alloc = NULL;
529 }
530}
531
532static void
533sk_free(resource *r)
534{
535 sock *s = (sock *) r;
536
537 sk_free_bufs(s);
b5d9ee5c 538 if (s->fd >= 0)
320f4173
MM
539 {
540 close(s->fd);
38a608c5
MM
541 if (s == current_sock)
542 current_sock = sk_next(s);
ea89da38
OZ
543 if (s == stored_sock)
544 stored_sock = sk_next(s);
320f4173 545 rem_node(&s->n);
38a608c5 546 sock_recalc_fdsets_p = 1;
320f4173 547 }
b5d9ee5c
MM
548}
549
4da25acb
MM
550void
551sk_reallocate(sock *s)
552{
553 sk_free_bufs(s);
554 sk_alloc_bufs(s);
555}
556
b5d9ee5c
MM
557static void
558sk_dump(resource *r)
559{
560 sock *s = (sock *) r;
b93abffa 561 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
b5d9ee5c
MM
562
563 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
564 sk_type_names[s->type],
565 s->data,
566 s->saddr,
567 s->sport,
568 s->daddr,
569 s->dport,
570 s->tos,
571 s->ttl,
572 s->iface ? s->iface->name : "none");
573}
574
575static struct resclass sk_class = {
576 "Socket",
577 sizeof(sock),
578 sk_free,
e81b440f 579 sk_dump,
acb60628 580 NULL,
e81b440f 581 NULL
b5d9ee5c
MM
582};
583
525fa2c1
MM
584/**
585 * sk_new - create a socket
586 * @p: pool
587 *
588 * This function creates a new socket resource. If you want to use it,
589 * you need to fill in all the required fields of the structure and
590 * call sk_open() to do the actual opening of the socket.
591 */
b5d9ee5c
MM
592sock *
593sk_new(pool *p)
594{
595 sock *s = ralloc(p, &sk_class);
596 s->pool = p;
daeeb8e9 597 // s->saddr = s->daddr = IPA_NONE;
b5d9ee5c 598 s->tos = s->ttl = -1;
b5d9ee5c
MM
599 s->fd = -1;
600 return s;
601}
602
38a608c5
MM
603static void
604sk_insert(sock *s)
605{
606 add_tail(&sock_list, &s->n);
607 sock_recalc_fdsets_p = 1;
608}
b5d9ee5c 609
4f22c981
MM
610#ifdef IPV6
611
4f22c981
MM
612void
613fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
614{
b1a1faba 615 memset (sa, 0, sizeof (struct sockaddr_in6));
4f22c981
MM
616 sa->sin6_family = AF_INET6;
617 sa->sin6_port = htons(port);
618 sa->sin6_flowinfo = 0;
b1a1faba
OF
619#ifdef HAVE_SIN_LEN
620 sa->sin6_len = sizeof(struct sockaddr_in6);
621#endif
4f22c981
MM
622 set_inaddr(&sa->sin6_addr, a);
623}
624
061ab802
OZ
625static inline void
626fill_in_sockifa(sockaddr *sa, struct iface *ifa)
627{
628 sa->sin6_scope_id = ifa ? ifa->index : 0;
629}
630
4f22c981 631void
b1a1faba 632get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check)
4f22c981 633{
b1a1faba
OF
634 if (check && sa->sin6_family != AF_INET6)
635 bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
4f22c981
MM
636 if (port)
637 *port = ntohs(sa->sin6_port);
638 memcpy(a, &sa->sin6_addr, sizeof(*a));
639 ipa_ntoh(*a);
640}
641
642#else
643
4cf45766 644void
4f22c981 645fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
b5d9ee5c 646{
b1a1faba 647 memset (sa, 0, sizeof (struct sockaddr_in));
b5d9ee5c
MM
648 sa->sin_family = AF_INET;
649 sa->sin_port = htons(port);
b1a1faba
OF
650#ifdef HAVE_SIN_LEN
651 sa->sin_len = sizeof(struct sockaddr_in);
652#endif
b5d9ee5c
MM
653 set_inaddr(&sa->sin_addr, a);
654}
655
061ab802 656static inline void
e81b440f 657fill_in_sockifa(sockaddr *sa UNUSED, struct iface *ifa UNUSED)
061ab802
OZ
658{
659}
660
af847acc 661void
b1a1faba 662get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check)
b5d9ee5c 663{
b1a1faba
OF
664 if (check && sa->sin_family != AF_INET)
665 bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
af847acc
MM
666 if (port)
667 *port = ntohs(sa->sin_port);
b5d9ee5c 668 memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
dce26783 669 ipa_ntoh(*a);
b5d9ee5c
MM
670}
671
4f22c981
MM
672#endif
673
bed41728
OZ
674
675#ifdef IPV6
676
677/* PKTINFO handling is also standardized in IPv6 */
678#define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
679#define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo))
680
dcc60494
OZ
681/*
682 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
683 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
684 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
685 * RFC and we use IPV6_PKTINFO.
686 */
687#ifndef IPV6_RECVPKTINFO
688#define IPV6_RECVPKTINFO IPV6_PKTINFO
689#endif
690
bed41728
OZ
691static char *
692sysio_register_cmsgs(sock *s)
693{
694 int ok = 1;
695 if ((s->flags & SKF_LADDR_RX) &&
696 setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)
697 return "IPV6_RECVPKTINFO";
698
699 return NULL;
700}
701
702static void
703sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
704{
705 struct cmsghdr *cm;
706 struct in6_pktinfo *pi = NULL;
707
708 if (!(s->flags & SKF_LADDR_RX))
709 return;
710
711 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
712 {
713 if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
714 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
715 }
716
717 if (!pi)
718 {
719 s->laddr = IPA_NONE;
720 s->lifindex = 0;
721 return;
722 }
723
724 get_inaddr(&s->laddr, &pi->ipi6_addr);
725 s->lifindex = pi->ipi6_ifindex;
726 return;
727}
728
646b24d9 729/*
bed41728
OZ
730static void
731sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
732{
733 struct cmsghdr *cm;
734 struct in6_pktinfo *pi;
735
736 if (!(s->flags & SKF_LADDR_TX))
737 return;
738
739 msg->msg_control = cbuf;
740 msg->msg_controllen = cbuflen;
741
742 cm = CMSG_FIRSTHDR(msg);
743 cm->cmsg_level = IPPROTO_IPV6;
744 cm->cmsg_type = IPV6_PKTINFO;
745 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
746
747 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
748 set_inaddr(&pi->ipi6_addr, s->saddr);
749 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
750
751 msg->msg_controllen = cm->cmsg_len;
752 return;
753}
646b24d9 754*/
bed41728
OZ
755#endif
756
a39b165e
OZ
757static char *
758sk_set_ttl_int(sock *s)
759{
a39b165e 760#ifdef IPV6
f9c799a0 761 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
a39b165e
OZ
762 return "IPV6_UNICAST_HOPS";
763#else
764 if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
765 return "IP_TTL";
766#ifdef CONFIG_UNIX_DONTROUTE
ff2857b0 767 int one = 1;
a39b165e
OZ
768 if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
769 return "SO_DONTROUTE";
770#endif
771#endif
772 return NULL;
773}
774
38a608c5
MM
775#define ERR(x) do { err = x; goto bad; } while(0)
776#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
777
b5d9ee5c
MM
778static char *
779sk_setup(sock *s)
780{
781 int fd = s->fd;
353729f5 782 char *err = NULL;
b5d9ee5c
MM
783
784 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
785 ERR("fcntl(O_NONBLOCK)");
b93abffa
MM
786 if (s->type == SK_UNIX)
787 return NULL;
a39b165e 788#ifndef IPV6
b5d9ee5c 789 if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
f782b72c 790 WARN("IP_TOS");
b5d9ee5c 791#endif
789772ed
OZ
792
793#ifdef IPV6
794 int v = 1;
795 if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
796 WARN("IPV6_V6ONLY");
797#endif
798
a39b165e
OZ
799 if (s->ttl >= 0)
800 err = sk_set_ttl_int(s);
a39b165e 801
353729f5 802 sysio_register_cmsgs(s);
b5d9ee5c
MM
803bad:
804 return err;
805}
806
a39b165e
OZ
807/**
808 * sk_set_ttl - set TTL for given socket.
809 * @s: socket
810 * @ttl: TTL value
811 *
812 * Set TTL for already opened connections when TTL was not set before.
813 * Useful for accepted connections when different ones should have
814 * different TTL.
815 *
816 * Result: 0 for success, -1 for an error.
817 */
818
819int
820sk_set_ttl(sock *s, int ttl)
821{
822 char *err;
823
824 s->ttl = ttl;
825 if (err = sk_set_ttl_int(s))
826 log(L_ERR "sk_set_ttl: %s: %m", err);
827
828 return (err ? -1 : 0);
829}
830
d51aa281 831
d51aa281
OZ
832/**
833 * sk_set_md5_auth - add / remove MD5 security association for given socket.
834 * @s: socket
835 * @a: IP address of the other side
836 * @passwd: password used for MD5 authentication
837 *
838 * In TCP MD5 handling code in kernel, there is a set of pairs
839 * (address, password) used to choose password according to
840 * address of the other side. This function is useful for
841 * listening socket, for active sockets it is enough to set
842 * s->password field.
843 *
844 * When called with passwd != NULL, the new pair is added,
845 * When called with passwd == NULL, the existing pair is removed.
846 *
847 * Result: 0 for success, -1 for an error.
848 */
849
850int
851sk_set_md5_auth(sock *s, ip_addr a, char *passwd)
852{
853 sockaddr sa;
854 fill_in_sockaddr(&sa, a, 0);
855 return sk_set_md5_auth_int(s, &sa, passwd);
856}
857
f9c799a0
OZ
858int
859sk_set_broadcast(sock *s, int enable)
860{
861 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)) < 0)
4ac7c834
OZ
862 {
863 log(L_ERR "sk_set_broadcast: SO_BROADCAST: %m");
864 return -1;
865 }
866
867 return 0;
f9c799a0
OZ
868}
869
870
871#ifdef IPV6
872
4ac7c834
OZ
873int
874sk_set_ipv6_checksum(sock *s, int offset)
875{
876 if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
877 {
878 log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
879 return -1;
880 }
881
882 return 0;
883}
884
93e868c7
OZ
885int
886sk_set_icmp_filter(sock *s, int p1, int p2)
887{
888 /* a bit of lame interface, but it is here only for Radv */
889 struct icmp6_filter f;
890
891 ICMP6_FILTER_SETBLOCKALL(&f);
892 ICMP6_FILTER_SETPASS(p1, &f);
893 ICMP6_FILTER_SETPASS(p2, &f);
894
895 if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
896 {
897 log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m");
898 return -1;
899 }
900
901 return 0;
902}
903
f9c799a0
OZ
904int
905sk_setup_multicast(sock *s)
906{
907 char *err;
908 int zero = 0;
909 int index;
910
911 ASSERT(s->iface && s->iface->addr);
912
913 index = s->iface->index;
914 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
915 ERR("IPV6_MULTICAST_HOPS");
916 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
917 ERR("IPV6_MULTICAST_LOOP");
918 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
919 ERR("IPV6_MULTICAST_IF");
920
e7b09e4a
OZ
921 if (err = sysio_bind_to_iface(s))
922 goto bad;
923
f9c799a0
OZ
924 return 0;
925
926bad:
927 log(L_ERR "sk_setup_multicast: %s: %m", err);
928 return -1;
929}
930
931int
932sk_join_group(sock *s, ip_addr maddr)
933{
934 struct ipv6_mreq mreq;
935
936 set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
937
938#ifdef CONFIG_IPV6_GLIBC_20
939 mreq.ipv6mr_ifindex = s->iface->index;
940#else
941 mreq.ipv6mr_interface = s->iface->index;
942#endif
943
861f223a 944 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0)
f9c799a0 945 {
861f223a 946 log(L_ERR "sk_join_group: IPV6_JOIN_GROUP: %m");
f9c799a0
OZ
947 return -1;
948 }
949
950 return 0;
951}
952
953int
954sk_leave_group(sock *s, ip_addr maddr)
955{
956 struct ipv6_mreq mreq;
957
958 set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
959
960#ifdef CONFIG_IPV6_GLIBC_20
961 mreq.ipv6mr_ifindex = s->iface->index;
962#else
963 mreq.ipv6mr_interface = s->iface->index;
964#endif
965
861f223a 966 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0)
f9c799a0 967 {
861f223a 968 log(L_ERR "sk_leave_group: IPV6_LEAVE_GROUP: %m");
f9c799a0
OZ
969 return -1;
970 }
971
972 return 0;
973}
974
975#else /* IPV4 */
976
977int
978sk_setup_multicast(sock *s)
979{
980 char *err;
981
982 ASSERT(s->iface && s->iface->addr);
983
984 if (err = sysio_setup_multicast(s))
985 {
986 log(L_ERR "sk_setup_multicast: %s: %m", err);
987 return -1;
988 }
989
990 return 0;
991}
992
993int
994sk_join_group(sock *s, ip_addr maddr)
995{
996 char *err;
997
998 if (err = sysio_join_group(s, maddr))
999 {
1000 log(L_ERR "sk_join_group: %s: %m", err);
1001 return -1;
1002 }
1003
1004 return 0;
1005}
1006
1007int
1008sk_leave_group(sock *s, ip_addr maddr)
1009{
1010 char *err;
1011
1012 if (err = sysio_leave_group(s, maddr))
1013 {
1014 log(L_ERR "sk_leave_group: %s: %m", err);
1015 return -1;
1016 }
1017
1018 return 0;
1019}
1020
1021#endif
1022
d51aa281 1023
b93abffa 1024static void
b5d9ee5c
MM
1025sk_tcp_connected(sock *s)
1026{
9be9a264
OZ
1027 sockaddr lsa;
1028 int lsa_len = sizeof(lsa);
1029 if (getsockname(s->fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
1030 get_sockaddr(&lsa, &s->saddr, &s->sport, 1);
1031
b5d9ee5c
MM
1032 s->type = SK_TCP;
1033 sk_alloc_bufs(s);
320f4173 1034 s->tx_hook(s);
b5d9ee5c
MM
1035}
1036
b93abffa
MM
1037static int
1038sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
1039{
1040 int fd = accept(s->fd, sa, &al);
1041 if (fd >= 0)
1042 {
1043 sock *t = sk_new(s->pool);
1044 char *err;
1045 t->type = type;
1046 t->fd = fd;
e1ddd993
MM
1047 t->ttl = s->ttl;
1048 t->tos = s->tos;
1049 t->rbsize = s->rbsize;
1050 t->tbsize = s->tbsize;
1051 if (type == SK_TCP)
cf31112f
OZ
1052 {
1053 sockaddr lsa;
1054 int lsa_len = sizeof(lsa);
1055 if (getsockname(fd, (struct sockaddr *) &lsa, &lsa_len) == 0)
1056 get_sockaddr(&lsa, &t->saddr, &t->sport, 1);
1057
1058 get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1);
1059 }
38a608c5 1060 sk_insert(t);
b93abffa
MM
1061 if (err = sk_setup(t))
1062 {
1063 log(L_ERR "Incoming connection: %s: %m", err);
e1ddd993
MM
1064 rfree(t);
1065 return 1;
b93abffa
MM
1066 }
1067 sk_alloc_bufs(t);
e1ddd993 1068 s->rx_hook(t, 0);
b93abffa
MM
1069 return 1;
1070 }
1071 else if (errno != EINTR && errno != EAGAIN)
1072 {
c025b852 1073 s->err_hook(s, errno);
b93abffa
MM
1074 }
1075 return 0;
1076}
1077
525fa2c1
MM
1078/**
1079 * sk_open - open a socket
1080 * @s: socket
1081 *
1082 * This function takes a socket resource created by sk_new() and
1083 * initialized by the user and binds a corresponding network connection
1084 * to it.
1085 *
1086 * Result: 0 for success, -1 for an error.
1087 */
b5d9ee5c
MM
1088int
1089sk_open(sock *s)
1090{
93a786cb 1091 int fd;
4f22c981 1092 sockaddr sa;
b5d9ee5c
MM
1093 int one = 1;
1094 int type = s->type;
1095 int has_src = ipa_nonzero(s->saddr) || s->sport;
b5d9ee5c
MM
1096 char *err;
1097
1098 switch (type)
1099 {
1100 case SK_TCP_ACTIVE:
320f4173
MM
1101 s->ttx = ""; /* Force s->ttx != s->tpos */
1102 /* Fall thru */
b5d9ee5c 1103 case SK_TCP_PASSIVE:
4f22c981 1104 fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
b5d9ee5c
MM
1105 break;
1106 case SK_UDP:
4f22c981 1107 fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
b5d9ee5c
MM
1108 break;
1109 case SK_IP:
4f22c981 1110 fd = socket(BIRD_PF, SOCK_RAW, s->dport);
b5d9ee5c 1111 break;
b4b3b39e
MM
1112 case SK_MAGIC:
1113 fd = s->fd;
1114 break;
b5d9ee5c 1115 default:
b4b3b39e 1116 bug("sk_open() called for invalid sock type %d", type);
b5d9ee5c
MM
1117 }
1118 if (fd < 0)
1119 die("sk_open: socket: %m");
1120 s->fd = fd;
1121
1122 if (err = sk_setup(s))
1123 goto bad;
38a608c5 1124
b5d9ee5c
MM
1125 if (has_src)
1126 {
1127 int port;
1128
f9c799a0 1129 if (type == SK_IP)
b5d9ee5c
MM
1130 port = 0;
1131 else
1132 {
1133 port = s->sport;
1134 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
1135 ERR("SO_REUSEADDR");
1136 }
1137 fill_in_sockaddr(&sa, s->saddr, port);
061ab802 1138 fill_in_sockifa(&sa, s->iface);
b5d9ee5c
MM
1139 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1140 ERR("bind");
1141 }
1142 fill_in_sockaddr(&sa, s->daddr, s->dport);
d51aa281
OZ
1143
1144 if (s->password)
1145 {
1146 int rv = sk_set_md5_auth_int(s, &sa, s->password);
1147 if (rv < 0)
1148 goto bad_no_log;
1149 }
1150
b5d9ee5c
MM
1151 switch (type)
1152 {
1153 case SK_TCP_ACTIVE:
1154 if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
1155 sk_tcp_connected(s);
9cbf43eb 1156 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
f428631c 1157 errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
b5d9ee5c
MM
1158 ERR("connect");
1159 break;
1160 case SK_TCP_PASSIVE:
1161 if (listen(fd, 8))
1162 ERR("listen");
1163 break;
4f22c981
MM
1164 case SK_MAGIC:
1165 break;
1166 default:
320f4173 1167 sk_alloc_bufs(s);
4f22c981
MM
1168#ifdef IPV6
1169#ifdef IPV6_MTU_DISCOVER
1170 {
1171 int dont = IPV6_PMTUDISC_DONT;
1172 if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
1173 ERR("IPV6_MTU_DISCOVER");
1174 }
1175#endif
1176#else
1177#ifdef IP_PMTUDISC
1178 {
1179 int dont = IP_PMTUDISC_DONT;
1180 if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
1181 ERR("IP_PMTUDISC");
1182 }
1183#endif
1184#endif
b5d9ee5c
MM
1185 }
1186
38a608c5 1187 sk_insert(s);
b5d9ee5c
MM
1188 return 0;
1189
1190bad:
1191 log(L_ERR "sk_open: %s: %m", err);
d51aa281 1192bad_no_log:
b5d9ee5c
MM
1193 close(fd);
1194 s->fd = -1;
1195 return -1;
1196}
1197
97e46d28 1198void
b93abffa
MM
1199sk_open_unix(sock *s, char *name)
1200{
1201 int fd;
1202 struct sockaddr_un sa;
1203 char *err;
1204
1205 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1206 if (fd < 0)
97e46d28 1207 ERR("socket");
b93abffa
MM
1208 s->fd = fd;
1209 if (err = sk_setup(s))
1210 goto bad;
1211 unlink(name);
68fa95cf 1212
97e46d28 1213 /* Path length checked in test_old_bird() */
b93abffa 1214 sa.sun_family = AF_UNIX;
97c6fa02 1215 strcpy(sa.sun_path, name);
0b3bf4b1 1216 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
b93abffa
MM
1217 ERR("bind");
1218 if (listen(fd, 8))
1219 ERR("listen");
38a608c5 1220 sk_insert(s);
97e46d28 1221 return;
b93abffa 1222
97e46d28 1223 bad:
b93abffa 1224 log(L_ERR "sk_open_unix: %s: %m", err);
97e46d28 1225 die("Unable to create control socket %s", name);
b93abffa
MM
1226}
1227
353729f5
OZ
1228static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1229
b5d9ee5c
MM
1230static int
1231sk_maybe_write(sock *s)
1232{
1233 int e;
1234
1235 switch (s->type)
1236 {
1237 case SK_TCP:
b4b3b39e 1238 case SK_MAGIC:
b93abffa 1239 case SK_UNIX:
b5d9ee5c
MM
1240 while (s->ttx != s->tpos)
1241 {
1242 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1243 if (e < 0)
1244 {
1245 if (errno != EINTR && errno != EAGAIN)
1246 {
353729f5 1247 reset_tx_buffer(s);
47597724
OZ
1248 /* EPIPE is just a connection close notification during TX */
1249 s->err_hook(s, (errno != EPIPE) ? errno : 0);
b5d9ee5c
MM
1250 return -1;
1251 }
1252 return 0;
1253 }
1254 s->ttx += e;
1255 }
353729f5 1256 reset_tx_buffer(s);
b5d9ee5c
MM
1257 return 1;
1258 case SK_UDP:
b5d9ee5c 1259 case SK_IP:
b5d9ee5c 1260 {
b5d9ee5c
MM
1261 if (s->tbuf == s->tpos)
1262 return 1;
b1a1faba 1263
353729f5
OZ
1264 sockaddr sa;
1265 fill_in_sockaddr(&sa, s->daddr, s->dport);
061ab802 1266 fill_in_sockifa(&sa, s->iface);
353729f5
OZ
1267
1268 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
646b24d9 1269 // byte cmsg_buf[CMSG_TX_SPACE];
353729f5
OZ
1270
1271 struct msghdr msg = {
1272 .msg_name = &sa,
1273 .msg_namelen = sizeof(sa),
1274 .msg_iov = &iov,
bed41728 1275 .msg_iovlen = 1};
353729f5 1276
646b24d9 1277 // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
353729f5
OZ
1278 e = sendmsg(s->fd, &msg, 0);
1279
b5d9ee5c
MM
1280 if (e < 0)
1281 {
1282 if (errno != EINTR && errno != EAGAIN)
1283 {
353729f5 1284 reset_tx_buffer(s);
c025b852 1285 s->err_hook(s, errno);
b5d9ee5c
MM
1286 return -1;
1287 }
1288 return 0;
1289 }
353729f5 1290 reset_tx_buffer(s);
b5d9ee5c
MM
1291 return 1;
1292 }
1293 default:
08c69a77 1294 bug("sk_maybe_write: unknown socket type %d", s->type);
b5d9ee5c
MM
1295 }
1296}
1297
ea89da38
OZ
1298int
1299sk_rx_ready(sock *s)
1300{
1301 fd_set rd, wr;
1302 struct timeval timo;
1303 int rv;
1304
1305 FD_ZERO(&rd);
1306 FD_ZERO(&wr);
1307 FD_SET(s->fd, &rd);
1308
1309 timo.tv_sec = 0;
1310 timo.tv_usec = 0;
1311
1312 redo:
1313 rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1314
1315 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1316 goto redo;
1317
1318 return rv;
1319}
1320
525fa2c1
MM
1321/**
1322 * sk_send - send data to a socket
1323 * @s: socket
1324 * @len: number of bytes to send
1325 *
1326 * This function sends @len bytes of data prepared in the
1327 * transmit buffer of the socket @s to the network connection.
1328 * If the packet can be sent immediately, it does so and returns
1329 * 1, else it queues the packet for later processing, returns 0
1330 * and calls the @tx_hook of the socket when the tranmission
1331 * takes place.
1332 */
b5d9ee5c
MM
1333int
1334sk_send(sock *s, unsigned len)
1335{
b5d9ee5c
MM
1336 s->ttx = s->tbuf;
1337 s->tpos = s->tbuf + len;
1338 return sk_maybe_write(s);
1339}
1340
525fa2c1
MM
1341/**
1342 * sk_send_to - send data to a specific destination
1343 * @s: socket
1344 * @len: number of bytes to send
1345 * @addr: IP address to send the packet to
1346 * @port: port to send the packet to
1347 *
2e9b2421 1348 * This is a sk_send() replacement for connection-less packet sockets
525fa2c1
MM
1349 * which allows destination of the packet to be chosen dynamically.
1350 */
b5d9ee5c
MM
1351int
1352sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1353{
353729f5
OZ
1354 s->daddr = addr;
1355 s->dport = port;
b5d9ee5c
MM
1356 s->ttx = s->tbuf;
1357 s->tpos = s->tbuf + len;
1358 return sk_maybe_write(s);
1359}
1360
353729f5
OZ
1361/*
1362int
1363sk_send_full(sock *s, unsigned len, struct iface *ifa,
1364 ip_addr saddr, ip_addr daddr, unsigned dport)
1365{
1366 s->iface = ifa;
1367 s->saddr = saddr;
1368 s->daddr = daddr;
1369 s->dport = dport;
1370 s->ttx = s->tbuf;
1371 s->tpos = s->tbuf + len;
1372 return sk_maybe_write(s);
1373}
1374*/
1375
b5d9ee5c
MM
1376static int
1377sk_read(sock *s)
1378{
1379 switch (s->type)
1380 {
b5d9ee5c
MM
1381 case SK_TCP_PASSIVE:
1382 {
4f22c981 1383 sockaddr sa;
b93abffa
MM
1384 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1385 }
1386 case SK_UNIX_PASSIVE:
1387 {
1388 struct sockaddr_un sa;
1389 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
b5d9ee5c
MM
1390 }
1391 case SK_TCP:
b93abffa 1392 case SK_UNIX:
b5d9ee5c
MM
1393 {
1394 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1395
1396 if (c < 0)
1397 {
1398 if (errno != EINTR && errno != EAGAIN)
c025b852 1399 s->err_hook(s, errno);
b5d9ee5c
MM
1400 }
1401 else if (!c)
c025b852 1402 s->err_hook(s, 0);
b5d9ee5c
MM
1403 else
1404 {
1405 s->rpos += c;
1406 if (s->rx_hook(s, s->rpos - s->rbuf))
38a608c5
MM
1407 {
1408 /* We need to be careful since the socket could have been deleted by the hook */
1409 if (current_sock == s)
1410 s->rpos = s->rbuf;
1411 }
b5d9ee5c
MM
1412 return 1;
1413 }
1414 return 0;
1415 }
b4b3b39e
MM
1416 case SK_MAGIC:
1417 return s->rx_hook(s, 0);
b5d9ee5c
MM
1418 default:
1419 {
4f22c981 1420 sockaddr sa;
353729f5
OZ
1421 int e;
1422
1423 struct iovec iov = {s->rbuf, s->rbsize};
1424 byte cmsg_buf[CMSG_RX_SPACE];
1425
1426 struct msghdr msg = {
1427 .msg_name = &sa,
1428 .msg_namelen = sizeof(sa),
1429 .msg_iov = &iov,
1430 .msg_iovlen = 1,
1431 .msg_control = cmsg_buf,
1432 .msg_controllen = sizeof(cmsg_buf),
1433 .msg_flags = 0};
1434
1435 e = recvmsg(s->fd, &msg, 0);
b5d9ee5c
MM
1436
1437 if (e < 0)
1438 {
1439 if (errno != EINTR && errno != EAGAIN)
c025b852 1440 s->err_hook(s, errno);
b5d9ee5c
MM
1441 return 0;
1442 }
1443 s->rpos = s->rbuf + e;
b1a1faba 1444 get_sockaddr(&sa, &s->faddr, &s->fport, 1);
353729f5
OZ
1445 sysio_process_rx_cmsgs(s, &msg);
1446
b5d9ee5c
MM
1447 s->rx_hook(s, e);
1448 return 1;
1449 }
1450 }
1451}
1452
38a608c5 1453static int
b5d9ee5c
MM
1454sk_write(sock *s)
1455{
320f4173
MM
1456 switch (s->type)
1457 {
1458 case SK_TCP_ACTIVE:
1459 {
1460 sockaddr sa;
1461 fill_in_sockaddr(&sa, s->daddr, s->dport);
09e4117c 1462 if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
320f4173
MM
1463 sk_tcp_connected(s);
1464 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
c025b852 1465 s->err_hook(s, errno);
38a608c5 1466 return 0;
320f4173 1467 }
320f4173 1468 default:
38a608c5
MM
1469 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1470 {
1471 s->tx_hook(s);
1472 return 1;
1473 }
1474 return 0;
320f4173 1475 }
b5d9ee5c
MM
1476}
1477
1478void
1479sk_dump_all(void)
1480{
1481 node *n;
1482 sock *s;
1483
1484 debug("Open sockets:\n");
1485 WALK_LIST(n, sock_list)
1486 {
1487 s = SKIP_BACK(sock, n, n);
1488 debug("%p ", s);
1489 sk_dump(&s->r);
1490 }
1491 debug("\n");
1492}
1493
1494#undef ERR
f782b72c 1495#undef WARN
b5d9ee5c
MM
1496
1497/*
1498 * Main I/O Loop
1499 */
1500
4c9dd1e4
MM
1501volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
1502volatile int async_dump_flag;
1503
b5d9ee5c
MM
1504void
1505io_init(void)
1506{
1507 init_list(&near_timers);
1508 init_list(&far_timers);
1509 init_list(&sock_list);
e8f73195 1510 init_list(&global_event_list);
7e5f5ffd 1511 krt_io_init();
fd91ae33
OZ
1512 init_times();
1513 update_times();
1514 srandom((int) now_real);
b5d9ee5c
MM
1515}
1516
ea89da38
OZ
1517static int short_loops = 0;
1518#define SHORT_LOOP_MAX 10
1519
b5d9ee5c
MM
1520void
1521io_loop(void)
1522{
1523 fd_set rd, wr;
1524 struct timeval timo;
1525 time_t tout;
30770df2 1526 int hi, events;
b5d9ee5c 1527 sock *s;
38a608c5 1528 node *n;
b5d9ee5c 1529
38a608c5 1530 sock_recalc_fdsets_p = 1;
b5d9ee5c
MM
1531 for(;;)
1532 {
30770df2 1533 events = ev_run_list(&global_event_list);
fd91ae33 1534 update_times();
b5d9ee5c
MM
1535 tout = tm_first_shot();
1536 if (tout <= now)
1537 {
1538 tm_shot();
1539 continue;
1540 }
30770df2
MM
1541 timo.tv_sec = events ? 0 : tout - now;
1542 timo.tv_usec = 0;
b5d9ee5c 1543
38a608c5
MM
1544 if (sock_recalc_fdsets_p)
1545 {
1546 sock_recalc_fdsets_p = 0;
1547 FD_ZERO(&rd);
1548 FD_ZERO(&wr);
1549 }
1550
b5d9ee5c
MM
1551 hi = 0;
1552 WALK_LIST(n, sock_list)
1553 {
1554 s = SKIP_BACK(sock, n, n);
1555 if (s->rx_hook)
1556 {
1557 FD_SET(s->fd, &rd);
1558 if (s->fd > hi)
1559 hi = s->fd;
1560 }
38a608c5
MM
1561 else
1562 FD_CLR(s->fd, &rd);
b5d9ee5c
MM
1563 if (s->tx_hook && s->ttx != s->tpos)
1564 {
1565 FD_SET(s->fd, &wr);
1566 if (s->fd > hi)
1567 hi = s->fd;
1568 }
38a608c5
MM
1569 else
1570 FD_CLR(s->fd, &wr);
b5d9ee5c
MM
1571 }
1572
4c9dd1e4
MM
1573 /*
1574 * Yes, this is racy. But even if the signal comes before this test
1575 * and entering select(), it gets caught on the next timer tick.
1576 */
1577
1578 if (async_config_flag)
1579 {
1580 async_config();
1581 async_config_flag = 0;
f4aabcee 1582 continue;
4c9dd1e4
MM
1583 }
1584 if (async_dump_flag)
1585 {
1586 async_dump();
1587 async_dump_flag = 0;
f4aabcee
MM
1588 continue;
1589 }
1590 if (async_shutdown_flag)
1591 {
1592 async_shutdown();
1593 async_shutdown_flag = 0;
1594 continue;
4c9dd1e4
MM
1595 }
1596
1597 /* And finally enter select() to find active sockets */
b5d9ee5c 1598 hi = select(hi+1, &rd, &wr, NULL, &timo);
ea89da38 1599
b5d9ee5c
MM
1600 if (hi < 0)
1601 {
1602 if (errno == EINTR || errno == EAGAIN)
1603 continue;
1604 die("select: %m");
1605 }
1606 if (hi)
1607 {
ea89da38
OZ
1608 /* guaranteed to be non-empty */
1609 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1610
38a608c5 1611 while (current_sock)
b5d9ee5c 1612 {
38a608c5
MM
1613 sock *s = current_sock;
1614 int e;
ea89da38
OZ
1615 int steps;
1616
1617 steps = MAX_STEPS;
1618 if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
38a608c5
MM
1619 do
1620 {
4323099d 1621 steps--;
38a608c5
MM
1622 e = sk_read(s);
1623 if (s != current_sock)
1624 goto next;
1625 }
4323099d
OZ
1626 while (e && s->rx_hook && steps);
1627
1628 steps = MAX_STEPS;
38a608c5
MM
1629 if (FD_ISSET(s->fd, &wr))
1630 do
1631 {
4323099d 1632 steps--;
38a608c5
MM
1633 e = sk_write(s);
1634 if (s != current_sock)
1635 goto next;
1636 }
4323099d 1637 while (e && steps);
38a608c5
MM
1638 current_sock = sk_next(s);
1639 next: ;
b5d9ee5c 1640 }
ea89da38
OZ
1641
1642 short_loops++;
1643 if (events && (short_loops < SHORT_LOOP_MAX))
1644 continue;
1645 short_loops = 0;
1646
1647 int count = 0;
1648 current_sock = stored_sock;
1649 if (current_sock == NULL)
1650 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1651
1652 while (current_sock && count < MAX_RX_STEPS)
1653 {
1654 sock *s = current_sock;
1655 int e;
ea89da38
OZ
1656
1657 if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1658 {
1659 count++;
1660 e = sk_read(s);
1661 if (s != current_sock)
1662 goto next2;
1663 }
1664 current_sock = sk_next(s);
1665 next2: ;
1666 }
1667
1668 stored_sock = current_sock;
b5d9ee5c
MM
1669 }
1670 }
1671}
41c8976e
OF
1672
1673void
1674test_old_bird(char *path)
1675{
1676 int fd;
1677 struct sockaddr_un sa;
1678
1679 fd = socket(AF_UNIX, SOCK_STREAM, 0);
41c8976e
OF
1680 if (fd < 0)
1681 die("Cannot create socket: %m");
97e46d28
OZ
1682 if (strlen(path) >= sizeof(sa.sun_path))
1683 die("Socket path too long");
41c8976e
OF
1684 bzero(&sa, sizeof(sa));
1685 sa.sun_family = AF_UNIX;
1686 strcpy(sa.sun_path, path);
1687 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1688 die("I found another BIRD running.");
1689 close(fd);
1690}
1691
1692