]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/unix/io.c
Documentation update
[thirdparty/bird.git] / sysdep / unix / io.c
CommitLineData
b5d9ee5c
MM
1/*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
38a608c5 4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
b1a1faba 5 * (c) 2004 Ondrej Filip <feela@network.cz>
b5d9ee5c
MM
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
10#include <stdio.h>
11#include <stdlib.h>
01b776e1 12#include <time.h>
b5d9ee5c
MM
13#include <sys/time.h>
14#include <sys/types.h>
15#include <sys/socket.h>
16#include <sys/fcntl.h>
b93abffa 17#include <sys/un.h>
b5d9ee5c
MM
18#include <unistd.h>
19#include <errno.h>
20
21#include "nest/bird.h"
22#include "lib/lists.h"
23#include "lib/resource.h"
24#include "lib/timer.h"
25#include "lib/socket.h"
e8f73195 26#include "lib/event.h"
afa8937a 27#include "lib/string.h"
b5d9ee5c
MM
28#include "nest/iface.h"
29
30#include "lib/unix.h"
a2867cd9 31#include "lib/sysio.h"
b5d9ee5c 32
a9c986f9
MM
33/*
34 * Tracked Files
35 */
36
37struct rfile {
38 resource r;
39 FILE *f;
40};
41
42static void
43rf_free(resource *r)
44{
45 struct rfile *a = (struct rfile *) r;
46
47 fclose(a->f);
48}
49
50static void
51rf_dump(resource *r)
52{
53 struct rfile *a = (struct rfile *) r;
54
55 debug("(FILE *%p)\n", a->f);
56}
57
58static struct resclass rf_class = {
59 "FILE",
60 sizeof(struct rfile),
61 rf_free,
62 rf_dump
63};
64
65void *
f78056fb 66tracked_fopen(pool *p, char *name, char *mode)
a9c986f9
MM
67{
68 FILE *f = fopen(name, mode);
69
70 if (f)
71 {
72 struct rfile *r = ralloc(p, &rf_class);
73 r->f = f;
74 }
75 return f;
76}
77
525fa2c1
MM
78/**
79 * DOC: Timers
80 *
81 * Timers are resources which represent a wish of a module to call
82 * a function at the specified time. The platform dependent code
58f7d004 83 * doesn't guarantee exact timing, only that a timer function
525fa2c1
MM
84 * won't be called before the requested time.
85 *
fd91ae33
OZ
86 * In BIRD, time is represented by values of the &bird_clock_t type
87 * which are integral numbers interpreted as a relative number of seconds since
88 * some fixed time point in past. The current time can be read
89 * from variable @now with reasonable accuracy and is monotonic. There is also
90 * a current 'absolute' time in variable @now_real reported by OS.
525fa2c1
MM
91 *
92 * Each timer is described by a &timer structure containing a pointer
93 * to the handler function (@hook), data private to this function (@data),
94 * time the function should be called at (@expires, 0 for inactive timers),
95 * for the other fields see |timer.h|.
b5d9ee5c
MM
96 */
97
98#define NEAR_TIMER_LIMIT 4
99
b5d9ee5c
MM
100static list near_timers, far_timers;
101static bird_clock_t first_far_timer = TIME_INFINITY;
102
fd91ae33
OZ
103bird_clock_t now, now_real;
104
105static void
106update_times_plain(void)
107{
108 bird_clock_t new_time = time(NULL);
109 int delta = new_time - now_real;
110
111 if ((delta >= 0) && (delta < 60))
112 now += delta;
113 else if (now_real != 0)
114 log(L_WARN "Time jump, delta %d s", delta);
115
116 now_real = new_time;
117}
118
119static void
120update_times_gettime(void)
121{
122 struct timespec ts;
123 int rv;
124
125 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
126 if (rv != 0)
127 die("clock_gettime: %m");
128
129 if (ts.tv_sec != now) {
130 if (ts.tv_sec < now)
131 log(L_ERR "Monotonic timer is broken");
132
133 now = ts.tv_sec;
134 now_real = time(NULL);
135 }
136}
137
138static int clock_monotonic_available;
139
140static inline void
141update_times(void)
142{
143 if (clock_monotonic_available)
144 update_times_gettime();
145 else
146 update_times_plain();
147}
148
149static inline void
150init_times(void)
151{
152 struct timespec ts;
153 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
154 if (!clock_monotonic_available)
155 log(L_WARN "Monotonic timer is missing");
156}
157
b5d9ee5c
MM
158
159static void
160tm_free(resource *r)
161{
162 timer *t = (timer *) r;
163
164 tm_stop(t);
165}
166
167static void
168tm_dump(resource *r)
169{
170 timer *t = (timer *) r;
171
e8f73195 172 debug("(code %p, data %p, ", t->hook, t->data);
af847acc
MM
173 if (t->randomize)
174 debug("rand %d, ", t->randomize);
175 if (t->recurrent)
176 debug("recur %d, ", t->recurrent);
b5d9ee5c
MM
177 if (t->expires)
178 debug("expires in %d sec)\n", t->expires - now);
179 else
180 debug("inactive)\n");
181}
182
183static struct resclass tm_class = {
184 "Timer",
185 sizeof(timer),
186 tm_free,
187 tm_dump
188};
189
525fa2c1
MM
190/**
191 * tm_new - create a timer
192 * @p: pool
193 *
194 * This function creates a new timer resource and returns
195 * a pointer to it. To use the timer, you need to fill in
196 * the structure fields and call tm_start() to start timing.
197 */
b5d9ee5c
MM
198timer *
199tm_new(pool *p)
200{
201 timer *t = ralloc(p, &tm_class);
202 t->hook = NULL;
203 t->data = NULL;
204 t->randomize = 0;
205 t->expires = 0;
206 return t;
207}
208
209static inline void
210tm_insert_near(timer *t)
211{
212 node *n = HEAD(near_timers);
213
214 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
215 n = n->next;
216 insert_node(&t->n, n->prev);
217}
218
525fa2c1
MM
219/**
220 * tm_start - start a timer
221 * @t: timer
222 * @after: number of seconds the timer should be run after
223 *
224 * This function schedules the hook function of the timer to
225 * be called after @after seconds. If the timer has been already
226 * started, it's @expire time is replaced by the new value.
227 *
228 * You can have set the @randomize field of @t, the timeout
229 * will be increased by a random number of seconds chosen
230 * uniformly from range 0 .. @randomize.
231 *
232 * You can call tm_start() from the handler function of the timer
233 * to request another run of the timer. Also, you can set the @recurrent
234 * field to have the timer re-added automatically with the same timeout.
235 */
b5d9ee5c
MM
236void
237tm_start(timer *t, unsigned after)
238{
239 bird_clock_t when;
240
241 if (t->randomize)
af847acc 242 after += random() % (t->randomize + 1);
b5d9ee5c
MM
243 when = now + after;
244 if (t->expires == when)
245 return;
246 if (t->expires)
247 rem_node(&t->n);
248 t->expires = when;
249 if (after <= NEAR_TIMER_LIMIT)
250 tm_insert_near(t);
251 else
252 {
253 if (!first_far_timer || first_far_timer > when)
254 first_far_timer = when;
255 add_tail(&far_timers, &t->n);
256 }
257}
258
525fa2c1
MM
259/**
260 * tm_stop - stop a timer
261 * @t: timer
262 *
263 * This function stops a timer. If the timer is already stopped,
264 * nothing happens.
265 */
b5d9ee5c
MM
266void
267tm_stop(timer *t)
268{
269 if (t->expires)
270 {
271 rem_node(&t->n);
272 t->expires = 0;
273 }
274}
275
276static void
277tm_dump_them(char *name, list *l)
278{
279 node *n;
280 timer *t;
281
282 debug("%s timers:\n", name);
283 WALK_LIST(n, *l)
284 {
285 t = SKIP_BACK(timer, n, n);
286 debug("%p ", t);
287 tm_dump(&t->r);
288 }
289 debug("\n");
290}
291
292void
293tm_dump_all(void)
294{
295 tm_dump_them("Near", &near_timers);
296 tm_dump_them("Far", &far_timers);
297}
298
299static inline time_t
300tm_first_shot(void)
301{
302 time_t x = first_far_timer;
303
304 if (!EMPTY_LIST(near_timers))
305 {
306 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
307 if (t->expires < x)
308 x = t->expires;
309 }
310 return x;
311}
312
313static void
314tm_shot(void)
315{
316 timer *t;
317 node *n, *m;
318
319 if (first_far_timer <= now)
320 {
28a9a189 321 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
b5d9ee5c
MM
322 first_far_timer = TIME_INFINITY;
323 n = HEAD(far_timers);
324 while (m = n->next)
325 {
326 t = SKIP_BACK(timer, n, n);
327 if (t->expires <= limit)
328 {
329 rem_node(n);
330 tm_insert_near(t);
331 }
332 else if (t->expires < first_far_timer)
333 first_far_timer = t->expires;
334 n = m;
335 }
336 }
337 while ((n = HEAD(near_timers)) -> next)
338 {
af847acc 339 int delay;
b5d9ee5c
MM
340 t = SKIP_BACK(timer, n, n);
341 if (t->expires > now)
342 break;
343 rem_node(n);
af847acc 344 delay = t->expires - now;
b5d9ee5c 345 t->expires = 0;
af847acc
MM
346 if (t->recurrent)
347 {
348 int i = t->recurrent - delay;
349 if (i < 0)
350 i = 0;
351 tm_start(t, i);
352 }
b5d9ee5c
MM
353 t->hook(t);
354 }
355}
356
0d3effcf
OF
357/**
358 * tm_parse_datetime - parse a date and time
359 * @x: datetime string
360 *
361 * tm_parse_datetime() takes a textual representation of
362 * a date and time (dd-mm-yyyy hh:mm:ss)
363 * and converts it to the corresponding value of type &bird_clock_t.
364 */
365bird_clock_t
366tm_parse_datetime(char *x)
367{
368 struct tm tm;
369 int n;
370 time_t t;
371
372 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
373 return tm_parse_date(x);
374 tm.tm_mon--;
375 tm.tm_year -= 1900;
376 t = mktime(&tm);
377 if (t == (time_t) -1)
378 return 0;
379 return t;
380}
525fa2c1
MM
381/**
382 * tm_parse_date - parse a date
383 * @x: date string
384 *
385 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
386 * and converts it to the corresponding value of type &bird_clock_t.
387 */
913f7dc9
MM
388bird_clock_t
389tm_parse_date(char *x)
390{
391 struct tm tm;
392 int n;
393 time_t t;
394
395 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
396 return 0;
397 tm.tm_mon--;
398 tm.tm_year -= 1900;
399 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
400 t = mktime(&tm);
401 if (t == (time_t) -1)
402 return 0;
403 return t;
404}
405
525fa2c1
MM
406/**
407 * tm_format_date - convert date to textual representation
408 * @x: destination buffer of size %TM_DATE_BUFFER_SIZE
409 * @t: time
410 *
fd91ae33
OZ
411 * This function formats the given relative time value @t to a textual
412 * date representation (dd-mm-yyyy) in real time..
525fa2c1 413 */
913f7dc9
MM
414void
415tm_format_date(char *x, bird_clock_t t)
416{
417 struct tm *tm;
418
419 tm = localtime(&t);
a37410cb 420 bsprintf(x, "%02d-%02d-%04d", tm->tm_mday, tm->tm_mon+1, tm->tm_year+1900);
913f7dc9
MM
421}
422
525fa2c1
MM
423/**
424 * tm_format_datetime - convert date and time to textual representation
425 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
426 * @t: time
427 *
fd91ae33
OZ
428 * This function formats the given relative time value @t to a textual
429 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
525fa2c1 430 */
7a88832e
MM
431void
432tm_format_datetime(char *x, bird_clock_t t)
433{
434 struct tm *tm;
fd91ae33
OZ
435 bird_clock_t delta = now - t;
436 t = now_real - delta;
7a88832e
MM
437 tm = localtime(&t);
438 if (strftime(x, TM_DATETIME_BUFFER_SIZE, "%d-%m-%Y %H:%M:%S", tm) == TM_DATETIME_BUFFER_SIZE)
439 strcpy(x, "<too-long>");
440}
441
525fa2c1
MM
442/**
443 * tm_format_reltime - convert date and time to relative textual representation
444 * @x: destination buffer of size %TM_RELTIME_BUFFER_SIZE
445 * @t: time
446 *
fd91ae33
OZ
447 * This function formats the given relative time value @t to a short
448 * textual representation in real time, relative to the current time.
525fa2c1 449 */
afa8937a
MM
450void
451tm_format_reltime(char *x, bird_clock_t t)
452{
453 struct tm *tm;
afa8937a
MM
454 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
455
fd91ae33
OZ
456 bird_clock_t delta = now - t;
457 t = now_real - delta;
afa8937a 458 tm = localtime(&t);
b594ad23 459 if (delta < 20*3600)
afa8937a
MM
460 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
461 else if (delta < 360*86400)
462 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
463 else
464 bsprintf(x, "%d", tm->tm_year+1900);
465}
466
525fa2c1
MM
467/**
468 * DOC: Sockets
469 *
470 * Socket resources represent network connections. Their data structure (&socket)
471 * contains a lot of fields defining the exact type of the socket, the local and
472 * remote addresses and ports, pointers to socket buffers and finally pointers to
473 * hook functions to be called when new data have arrived to the receive buffer
474 * (@rx_hook), when the contents of the transmit buffer have been transmitted
475 * (@tx_hook) and when an error or connection close occurs (@err_hook).
476 *
38a608c5 477 * Freeing of sockets from inside socket hooks is perfectly safe.
b5d9ee5c
MM
478 */
479
abae6e9c
MM
480#ifndef SOL_IP
481#define SOL_IP IPPROTO_IP
482#endif
483
b1a1faba
OF
484#ifndef SOL_IPV6
485#define SOL_IPV6 IPPROTO_IPV6
486#endif
487
488#ifndef IPV6_ADD_MEMBERSHIP
489#define IPV6_ADD_MEMBERSHIP IP_ADD_MEMBERSHIP
490#endif
491
b5d9ee5c 492static list sock_list;
38a608c5
MM
493static struct birdsock *current_sock;
494static int sock_recalc_fdsets_p;
495
496static inline sock *
497sk_next(sock *s)
498{
499 if (!s->n.next->next)
500 return NULL;
501 else
502 return SKIP_BACK(sock, n, s->n.next);
503}
b5d9ee5c
MM
504
505static void
4da25acb 506sk_alloc_bufs(sock *s)
b5d9ee5c 507{
4da25acb
MM
508 if (!s->rbuf && s->rbsize)
509 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
510 s->rpos = s->rbuf;
511 if (!s->tbuf && s->tbsize)
512 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
513 s->tpos = s->ttx = s->tbuf;
514}
b5d9ee5c 515
4da25acb
MM
516static void
517sk_free_bufs(sock *s)
518{
38a608c5 519 if (s->rbuf_alloc)
4da25acb
MM
520 {
521 xfree(s->rbuf_alloc);
522 s->rbuf = s->rbuf_alloc = NULL;
523 }
38a608c5 524 if (s->tbuf_alloc)
4da25acb
MM
525 {
526 xfree(s->tbuf_alloc);
527 s->tbuf = s->tbuf_alloc = NULL;
528 }
529}
530
531static void
532sk_free(resource *r)
533{
534 sock *s = (sock *) r;
535
536 sk_free_bufs(s);
b5d9ee5c 537 if (s->fd >= 0)
320f4173
MM
538 {
539 close(s->fd);
38a608c5
MM
540 if (s == current_sock)
541 current_sock = sk_next(s);
320f4173 542 rem_node(&s->n);
38a608c5 543 sock_recalc_fdsets_p = 1;
320f4173 544 }
b5d9ee5c
MM
545}
546
4da25acb
MM
547void
548sk_reallocate(sock *s)
549{
550 sk_free_bufs(s);
551 sk_alloc_bufs(s);
552}
553
b5d9ee5c
MM
554static void
555sk_dump(resource *r)
556{
557 sock *s = (sock *) r;
b93abffa 558 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" };
b5d9ee5c
MM
559
560 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
561 sk_type_names[s->type],
562 s->data,
563 s->saddr,
564 s->sport,
565 s->daddr,
566 s->dport,
567 s->tos,
568 s->ttl,
569 s->iface ? s->iface->name : "none");
570}
571
572static struct resclass sk_class = {
573 "Socket",
574 sizeof(sock),
575 sk_free,
576 sk_dump
577};
578
525fa2c1
MM
579/**
580 * sk_new - create a socket
581 * @p: pool
582 *
583 * This function creates a new socket resource. If you want to use it,
584 * you need to fill in all the required fields of the structure and
585 * call sk_open() to do the actual opening of the socket.
586 */
b5d9ee5c
MM
587sock *
588sk_new(pool *p)
589{
590 sock *s = ralloc(p, &sk_class);
591 s->pool = p;
592 s->data = NULL;
593 s->saddr = s->daddr = IPA_NONE;
594 s->sport = s->dport = 0;
595 s->tos = s->ttl = -1;
596 s->iface = NULL;
597 s->rbuf = NULL;
598 s->rx_hook = NULL;
599 s->rbsize = 0;
600 s->tbuf = NULL;
601 s->tx_hook = NULL;
602 s->tbsize = 0;
603 s->err_hook = NULL;
604 s->fd = -1;
38a608c5 605 s->rbuf_alloc = s->tbuf_alloc = NULL;
d51aa281 606 s->password = NULL;
b5d9ee5c
MM
607 return s;
608}
609
38a608c5
MM
610static void
611sk_insert(sock *s)
612{
613 add_tail(&sock_list, &s->n);
614 sock_recalc_fdsets_p = 1;
615}
b5d9ee5c 616
4f22c981
MM
617#ifdef IPV6
618
4f22c981
MM
619void
620fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
621{
b1a1faba 622 memset (sa, 0, sizeof (struct sockaddr_in6));
4f22c981
MM
623 sa->sin6_family = AF_INET6;
624 sa->sin6_port = htons(port);
625 sa->sin6_flowinfo = 0;
b1a1faba
OF
626#ifdef HAVE_SIN_LEN
627 sa->sin6_len = sizeof(struct sockaddr_in6);
628#endif
4f22c981
MM
629 set_inaddr(&sa->sin6_addr, a);
630}
631
632void
b1a1faba 633get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check)
4f22c981 634{
b1a1faba
OF
635 if (check && sa->sin6_family != AF_INET6)
636 bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family);
4f22c981
MM
637 if (port)
638 *port = ntohs(sa->sin6_port);
639 memcpy(a, &sa->sin6_addr, sizeof(*a));
640 ipa_ntoh(*a);
641}
642
643#else
644
4cf45766 645void
4f22c981 646fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port)
b5d9ee5c 647{
b1a1faba 648 memset (sa, 0, sizeof (struct sockaddr_in));
b5d9ee5c
MM
649 sa->sin_family = AF_INET;
650 sa->sin_port = htons(port);
b1a1faba
OF
651#ifdef HAVE_SIN_LEN
652 sa->sin_len = sizeof(struct sockaddr_in);
653#endif
b5d9ee5c
MM
654 set_inaddr(&sa->sin_addr, a);
655}
656
af847acc 657void
b1a1faba 658get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check)
b5d9ee5c 659{
b1a1faba
OF
660 if (check && sa->sin_family != AF_INET)
661 bug("get_sockaddr called for wrong address family (%d)", sa->sin_family);
af847acc
MM
662 if (port)
663 *port = ntohs(sa->sin_port);
b5d9ee5c 664 memcpy(a, &sa->sin_addr.s_addr, sizeof(*a));
dce26783 665 ipa_ntoh(*a);
b5d9ee5c
MM
666}
667
4f22c981
MM
668#endif
669
a39b165e
OZ
670static char *
671sk_set_ttl_int(sock *s)
672{
673 int one = 1;
674#ifdef IPV6
675 if (s->type != SK_UDP_MC && s->type != SK_IP_MC &&
1389f369 676 setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
a39b165e
OZ
677 return "IPV6_UNICAST_HOPS";
678#else
679 if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
680 return "IP_TTL";
681#ifdef CONFIG_UNIX_DONTROUTE
682 if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
683 return "SO_DONTROUTE";
684#endif
685#endif
686 return NULL;
687}
688
38a608c5
MM
689#define ERR(x) do { err = x; goto bad; } while(0)
690#define WARN(x) log(L_WARN "sk_setup: %s: %m", x)
691
b5d9ee5c
MM
692static char *
693sk_setup(sock *s)
694{
695 int fd = s->fd;
b5d9ee5c
MM
696 char *err;
697
698 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
699 ERR("fcntl(O_NONBLOCK)");
b93abffa
MM
700 if (s->type == SK_UNIX)
701 return NULL;
a39b165e 702#ifndef IPV6
b5d9ee5c 703 if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
f782b72c 704 WARN("IP_TOS");
b5d9ee5c 705#endif
789772ed
OZ
706
707#ifdef IPV6
708 int v = 1;
709 if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
710 WARN("IPV6_V6ONLY");
711#endif
712
a39b165e
OZ
713 if (s->ttl >= 0)
714 err = sk_set_ttl_int(s);
715 else
716 err = NULL;
717
b5d9ee5c
MM
718bad:
719 return err;
720}
721
a39b165e
OZ
722/**
723 * sk_set_ttl - set TTL for given socket.
724 * @s: socket
725 * @ttl: TTL value
726 *
727 * Set TTL for already opened connections when TTL was not set before.
728 * Useful for accepted connections when different ones should have
729 * different TTL.
730 *
731 * Result: 0 for success, -1 for an error.
732 */
733
734int
735sk_set_ttl(sock *s, int ttl)
736{
737 char *err;
738
739 s->ttl = ttl;
740 if (err = sk_set_ttl_int(s))
741 log(L_ERR "sk_set_ttl: %s: %m", err);
742
743 return (err ? -1 : 0);
744}
745
d51aa281 746
d51aa281
OZ
747/**
748 * sk_set_md5_auth - add / remove MD5 security association for given socket.
749 * @s: socket
750 * @a: IP address of the other side
751 * @passwd: password used for MD5 authentication
752 *
753 * In TCP MD5 handling code in kernel, there is a set of pairs
754 * (address, password) used to choose password according to
755 * address of the other side. This function is useful for
756 * listening socket, for active sockets it is enough to set
757 * s->password field.
758 *
759 * When called with passwd != NULL, the new pair is added,
760 * When called with passwd == NULL, the existing pair is removed.
761 *
762 * Result: 0 for success, -1 for an error.
763 */
764
765int
766sk_set_md5_auth(sock *s, ip_addr a, char *passwd)
767{
768 sockaddr sa;
769 fill_in_sockaddr(&sa, a, 0);
770 return sk_set_md5_auth_int(s, &sa, passwd);
771}
772
773
b93abffa 774static void
b5d9ee5c
MM
775sk_tcp_connected(sock *s)
776{
b5d9ee5c
MM
777 s->type = SK_TCP;
778 sk_alloc_bufs(s);
320f4173 779 s->tx_hook(s);
b5d9ee5c
MM
780}
781
b93abffa
MM
782static int
783sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type)
784{
785 int fd = accept(s->fd, sa, &al);
786 if (fd >= 0)
787 {
788 sock *t = sk_new(s->pool);
789 char *err;
790 t->type = type;
791 t->fd = fd;
e1ddd993
MM
792 t->ttl = s->ttl;
793 t->tos = s->tos;
794 t->rbsize = s->rbsize;
795 t->tbsize = s->tbsize;
796 if (type == SK_TCP)
b1a1faba 797 get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1);
38a608c5 798 sk_insert(t);
b93abffa
MM
799 if (err = sk_setup(t))
800 {
801 log(L_ERR "Incoming connection: %s: %m", err);
e1ddd993
MM
802 rfree(t);
803 return 1;
b93abffa
MM
804 }
805 sk_alloc_bufs(t);
e1ddd993 806 s->rx_hook(t, 0);
b93abffa
MM
807 return 1;
808 }
809 else if (errno != EINTR && errno != EAGAIN)
810 {
811 log(L_ERR "accept: %m");
c025b852 812 s->err_hook(s, errno);
b93abffa
MM
813 }
814 return 0;
815}
816
525fa2c1
MM
817/**
818 * sk_open - open a socket
819 * @s: socket
820 *
821 * This function takes a socket resource created by sk_new() and
822 * initialized by the user and binds a corresponding network connection
823 * to it.
824 *
825 * Result: 0 for success, -1 for an error.
826 */
b5d9ee5c
MM
827int
828sk_open(sock *s)
829{
93a786cb 830 int fd;
4f22c981 831 sockaddr sa;
b5d9ee5c
MM
832 int one = 1;
833 int type = s->type;
834 int has_src = ipa_nonzero(s->saddr) || s->sport;
b5d9ee5c
MM
835 char *err;
836
837 switch (type)
838 {
839 case SK_TCP_ACTIVE:
320f4173
MM
840 s->ttx = ""; /* Force s->ttx != s->tpos */
841 /* Fall thru */
b5d9ee5c 842 case SK_TCP_PASSIVE:
4f22c981 843 fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
b5d9ee5c
MM
844 break;
845 case SK_UDP:
846 case SK_UDP_MC:
4f22c981 847 fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
b5d9ee5c
MM
848 break;
849 case SK_IP:
850 case SK_IP_MC:
4f22c981 851 fd = socket(BIRD_PF, SOCK_RAW, s->dport);
b5d9ee5c 852 break;
b4b3b39e
MM
853 case SK_MAGIC:
854 fd = s->fd;
855 break;
b5d9ee5c 856 default:
b4b3b39e 857 bug("sk_open() called for invalid sock type %d", type);
b5d9ee5c
MM
858 }
859 if (fd < 0)
860 die("sk_open: socket: %m");
861 s->fd = fd;
862
863 if (err = sk_setup(s))
864 goto bad;
38a608c5 865
b5d9ee5c
MM
866 switch (type)
867 {
868 case SK_UDP:
869 case SK_IP:
870 if (s->iface) /* It's a broadcast socket */
4f22c981
MM
871#ifdef IPV6
872 bug("IPv6 has no broadcasts");
873#else
b5d9ee5c
MM
874 if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) < 0)
875 ERR("SO_BROADCAST");
4f22c981 876#endif
b5d9ee5c
MM
877 break;
878 case SK_UDP_MC:
879 case SK_IP_MC:
880 {
4f22c981
MM
881#ifdef IPV6
882 /* Fortunately, IPv6 socket interface is recent enough and therefore standardized */
883 ASSERT(s->iface && s->iface->addr);
67ece6df 884 if (ipa_nonzero(s->daddr))
4f22c981
MM
885 {
886 int t = s->iface->index;
67ece6df 887 int zero = 0;
4f22c981
MM
888 if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
889 ERR("IPV6_MULTICAST_HOPS");
890 if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0)
891 ERR("IPV6_MULTICAST_LOOP");
892 if (setsockopt(fd, SOL_IPV6, IPV6_MULTICAST_IF, &t, sizeof(t)) < 0)
893 ERR("IPV6_MULTICAST_IF");
894 }
895 if (has_src)
896 {
897 struct ipv6_mreq mreq;
898 set_inaddr(&mreq.ipv6mr_multiaddr, s->daddr);
f380aa60 899#ifdef CONFIG_IPV6_GLIBC_20
4f22c981 900 mreq.ipv6mr_ifindex = s->iface->index;
f380aa60
MM
901#else
902 mreq.ipv6mr_interface = s->iface->index;
b1a1faba 903#endif /* CONFIG_IPV6_GLIBC_20 */
4f22c981
MM
904 if (setsockopt(fd, SOL_IPV6, IPV6_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0)
905 ERR("IPV6_ADD_MEMBERSHIP");
906 }
907#else
908 /* With IPv4 there are zillions of different socket interface variants. Ugh. */
9a158361 909 ASSERT(s->iface && s->iface->addr);
67ece6df
MM
910 if (err = sysio_mcast_join(s))
911 goto bad;
b1a1faba 912#endif /* IPV6 */
b5d9ee5c
MM
913 break;
914 }
915 }
916 if (has_src)
917 {
918 int port;
919
920 if (type == SK_IP || type == SK_IP_MC)
921 port = 0;
922 else
923 {
924 port = s->sport;
925 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
926 ERR("SO_REUSEADDR");
927 }
928 fill_in_sockaddr(&sa, s->saddr, port);
7d72aadb
OF
929#ifdef CONFIG_SKIP_MC_BIND
930 if (type == SK_IP && bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
931#else
b5d9ee5c 932 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
7d72aadb 933#endif
b5d9ee5c
MM
934 ERR("bind");
935 }
936 fill_in_sockaddr(&sa, s->daddr, s->dport);
d51aa281
OZ
937
938 if (s->password)
939 {
940 int rv = sk_set_md5_auth_int(s, &sa, s->password);
941 if (rv < 0)
942 goto bad_no_log;
943 }
944
b5d9ee5c
MM
945 switch (type)
946 {
947 case SK_TCP_ACTIVE:
948 if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
949 sk_tcp_connected(s);
9cbf43eb
MM
950 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
951 errno != ECONNREFUSED && errno != EHOSTUNREACH)
b5d9ee5c
MM
952 ERR("connect");
953 break;
954 case SK_TCP_PASSIVE:
955 if (listen(fd, 8))
956 ERR("listen");
957 break;
4f22c981
MM
958 case SK_MAGIC:
959 break;
960 default:
320f4173 961 sk_alloc_bufs(s);
4f22c981
MM
962#ifdef IPV6
963#ifdef IPV6_MTU_DISCOVER
964 {
965 int dont = IPV6_PMTUDISC_DONT;
966 if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0)
967 ERR("IPV6_MTU_DISCOVER");
968 }
969#endif
970#else
971#ifdef IP_PMTUDISC
972 {
973 int dont = IP_PMTUDISC_DONT;
974 if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0)
975 ERR("IP_PMTUDISC");
976 }
977#endif
978#endif
b5d9ee5c
MM
979 }
980
38a608c5 981 sk_insert(s);
b5d9ee5c
MM
982 return 0;
983
984bad:
985 log(L_ERR "sk_open: %s: %m", err);
d51aa281 986bad_no_log:
b5d9ee5c
MM
987 close(fd);
988 s->fd = -1;
989 return -1;
990}
991
b93abffa
MM
992int
993sk_open_unix(sock *s, char *name)
994{
995 int fd;
996 struct sockaddr_un sa;
997 char *err;
998
999 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1000 if (fd < 0)
1001 die("sk_open_unix: socket: %m");
1002 s->fd = fd;
1003 if (err = sk_setup(s))
1004 goto bad;
1005 unlink(name);
68fa95cf
OZ
1006
1007 if (strlen(name) >= sizeof(sa.sun_path))
1008 die("sk_open_unix: path too long");
1009
b93abffa 1010 sa.sun_family = AF_UNIX;
97c6fa02 1011 strcpy(sa.sun_path, name);
0b3bf4b1 1012 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
b93abffa
MM
1013 ERR("bind");
1014 if (listen(fd, 8))
1015 ERR("listen");
38a608c5 1016 sk_insert(s);
b93abffa
MM
1017 return 0;
1018
1019bad:
1020 log(L_ERR "sk_open_unix: %s: %m", err);
1021 close(fd);
1022 s->fd = -1;
1023 return -1;
1024}
1025
b5d9ee5c
MM
1026static int
1027sk_maybe_write(sock *s)
1028{
1029 int e;
1030
1031 switch (s->type)
1032 {
1033 case SK_TCP:
b4b3b39e 1034 case SK_MAGIC:
b93abffa 1035 case SK_UNIX:
b5d9ee5c
MM
1036 while (s->ttx != s->tpos)
1037 {
1038 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1039 if (e < 0)
1040 {
1041 if (errno != EINTR && errno != EAGAIN)
1042 {
c025b852
OF
1043 s->ttx = s->tpos; /* empty tx buffer */
1044 s->err_hook(s, errno);
b5d9ee5c
MM
1045 return -1;
1046 }
1047 return 0;
1048 }
1049 s->ttx += e;
1050 }
1051 s->ttx = s->tpos = s->tbuf;
1052 return 1;
1053 case SK_UDP:
1054 case SK_UDP_MC:
1055 case SK_IP:
1056 case SK_IP_MC:
1057 {
4f22c981 1058 sockaddr sa;
b5d9ee5c
MM
1059
1060 if (s->tbuf == s->tpos)
1061 return 1;
1062 fill_in_sockaddr(&sa, s->faddr, s->fport);
b1a1faba 1063
b5d9ee5c
MM
1064 e = sendto(s->fd, s->tbuf, s->tpos - s->tbuf, 0, (struct sockaddr *) &sa, sizeof(sa));
1065 if (e < 0)
1066 {
1067 if (errno != EINTR && errno != EAGAIN)
1068 {
c025b852
OF
1069 s->ttx = s->tpos; /* empty tx buffer */
1070 s->err_hook(s, errno);
b5d9ee5c
MM
1071 return -1;
1072 }
1073 return 0;
1074 }
1075 s->tpos = s->tbuf;
1076 return 1;
1077 }
1078 default:
08c69a77 1079 bug("sk_maybe_write: unknown socket type %d", s->type);
b5d9ee5c
MM
1080 }
1081}
1082
525fa2c1
MM
1083/**
1084 * sk_send - send data to a socket
1085 * @s: socket
1086 * @len: number of bytes to send
1087 *
1088 * This function sends @len bytes of data prepared in the
1089 * transmit buffer of the socket @s to the network connection.
1090 * If the packet can be sent immediately, it does so and returns
1091 * 1, else it queues the packet for later processing, returns 0
1092 * and calls the @tx_hook of the socket when the tranmission
1093 * takes place.
1094 */
b5d9ee5c
MM
1095int
1096sk_send(sock *s, unsigned len)
1097{
1098 s->faddr = s->daddr;
1099 s->fport = s->dport;
1100 s->ttx = s->tbuf;
1101 s->tpos = s->tbuf + len;
1102 return sk_maybe_write(s);
1103}
1104
525fa2c1
MM
1105/**
1106 * sk_send_to - send data to a specific destination
1107 * @s: socket
1108 * @len: number of bytes to send
1109 * @addr: IP address to send the packet to
1110 * @port: port to send the packet to
1111 *
2e9b2421 1112 * This is a sk_send() replacement for connection-less packet sockets
525fa2c1
MM
1113 * which allows destination of the packet to be chosen dynamically.
1114 */
b5d9ee5c
MM
1115int
1116sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1117{
1118 s->faddr = addr;
1119 s->fport = port;
1120 s->ttx = s->tbuf;
1121 s->tpos = s->tbuf + len;
1122 return sk_maybe_write(s);
1123}
1124
1125static int
1126sk_read(sock *s)
1127{
1128 switch (s->type)
1129 {
b5d9ee5c
MM
1130 case SK_TCP_PASSIVE:
1131 {
4f22c981 1132 sockaddr sa;
b93abffa
MM
1133 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP);
1134 }
1135 case SK_UNIX_PASSIVE:
1136 {
1137 struct sockaddr_un sa;
1138 return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX);
b5d9ee5c
MM
1139 }
1140 case SK_TCP:
b93abffa 1141 case SK_UNIX:
b5d9ee5c
MM
1142 {
1143 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1144
1145 if (c < 0)
1146 {
1147 if (errno != EINTR && errno != EAGAIN)
c025b852 1148 s->err_hook(s, errno);
b5d9ee5c
MM
1149 }
1150 else if (!c)
c025b852 1151 s->err_hook(s, 0);
b5d9ee5c
MM
1152 else
1153 {
1154 s->rpos += c;
1155 if (s->rx_hook(s, s->rpos - s->rbuf))
38a608c5
MM
1156 {
1157 /* We need to be careful since the socket could have been deleted by the hook */
1158 if (current_sock == s)
1159 s->rpos = s->rbuf;
1160 }
b5d9ee5c
MM
1161 return 1;
1162 }
1163 return 0;
1164 }
b4b3b39e
MM
1165 case SK_MAGIC:
1166 return s->rx_hook(s, 0);
b5d9ee5c
MM
1167 default:
1168 {
4f22c981 1169 sockaddr sa;
b5d9ee5c
MM
1170 int al = sizeof(sa);
1171 int e = recvfrom(s->fd, s->rbuf, s->rbsize, 0, (struct sockaddr *) &sa, &al);
1172
1173 if (e < 0)
1174 {
1175 if (errno != EINTR && errno != EAGAIN)
c025b852 1176 s->err_hook(s, errno);
b5d9ee5c
MM
1177 return 0;
1178 }
1179 s->rpos = s->rbuf + e;
b1a1faba 1180 get_sockaddr(&sa, &s->faddr, &s->fport, 1);
b5d9ee5c
MM
1181 s->rx_hook(s, e);
1182 return 1;
1183 }
1184 }
1185}
1186
38a608c5 1187static int
b5d9ee5c
MM
1188sk_write(sock *s)
1189{
320f4173
MM
1190 switch (s->type)
1191 {
1192 case SK_TCP_ACTIVE:
1193 {
1194 sockaddr sa;
1195 fill_in_sockaddr(&sa, s->daddr, s->dport);
09e4117c 1196 if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN)
320f4173
MM
1197 sk_tcp_connected(s);
1198 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
c025b852 1199 s->err_hook(s, errno);
38a608c5 1200 return 0;
320f4173 1201 }
320f4173 1202 default:
38a608c5
MM
1203 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1204 {
1205 s->tx_hook(s);
1206 return 1;
1207 }
1208 return 0;
320f4173 1209 }
b5d9ee5c
MM
1210}
1211
1212void
1213sk_dump_all(void)
1214{
1215 node *n;
1216 sock *s;
1217
1218 debug("Open sockets:\n");
1219 WALK_LIST(n, sock_list)
1220 {
1221 s = SKIP_BACK(sock, n, n);
1222 debug("%p ", s);
1223 sk_dump(&s->r);
1224 }
1225 debug("\n");
1226}
1227
1228#undef ERR
f782b72c 1229#undef WARN
b5d9ee5c
MM
1230
1231/*
1232 * Main I/O Loop
1233 */
1234
4c9dd1e4
MM
1235volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
1236volatile int async_dump_flag;
1237
b5d9ee5c
MM
1238void
1239io_init(void)
1240{
1241 init_list(&near_timers);
1242 init_list(&far_timers);
1243 init_list(&sock_list);
e8f73195 1244 init_list(&global_event_list);
7e5f5ffd 1245 krt_io_init();
fd91ae33
OZ
1246 init_times();
1247 update_times();
1248 srandom((int) now_real);
b5d9ee5c
MM
1249}
1250
1251void
1252io_loop(void)
1253{
1254 fd_set rd, wr;
1255 struct timeval timo;
1256 time_t tout;
30770df2 1257 int hi, events;
b5d9ee5c 1258 sock *s;
38a608c5 1259 node *n;
b5d9ee5c 1260
38a608c5 1261 sock_recalc_fdsets_p = 1;
b5d9ee5c
MM
1262 for(;;)
1263 {
30770df2 1264 events = ev_run_list(&global_event_list);
fd91ae33 1265 update_times();
b5d9ee5c
MM
1266 tout = tm_first_shot();
1267 if (tout <= now)
1268 {
1269 tm_shot();
1270 continue;
1271 }
30770df2
MM
1272 timo.tv_sec = events ? 0 : tout - now;
1273 timo.tv_usec = 0;
b5d9ee5c 1274
38a608c5
MM
1275 if (sock_recalc_fdsets_p)
1276 {
1277 sock_recalc_fdsets_p = 0;
1278 FD_ZERO(&rd);
1279 FD_ZERO(&wr);
1280 }
1281
b5d9ee5c
MM
1282 hi = 0;
1283 WALK_LIST(n, sock_list)
1284 {
1285 s = SKIP_BACK(sock, n, n);
1286 if (s->rx_hook)
1287 {
1288 FD_SET(s->fd, &rd);
1289 if (s->fd > hi)
1290 hi = s->fd;
1291 }
38a608c5
MM
1292 else
1293 FD_CLR(s->fd, &rd);
b5d9ee5c
MM
1294 if (s->tx_hook && s->ttx != s->tpos)
1295 {
1296 FD_SET(s->fd, &wr);
1297 if (s->fd > hi)
1298 hi = s->fd;
1299 }
38a608c5
MM
1300 else
1301 FD_CLR(s->fd, &wr);
b5d9ee5c
MM
1302 }
1303
4c9dd1e4
MM
1304 /*
1305 * Yes, this is racy. But even if the signal comes before this test
1306 * and entering select(), it gets caught on the next timer tick.
1307 */
1308
1309 if (async_config_flag)
1310 {
1311 async_config();
1312 async_config_flag = 0;
f4aabcee 1313 continue;
4c9dd1e4
MM
1314 }
1315 if (async_dump_flag)
1316 {
1317 async_dump();
1318 async_dump_flag = 0;
f4aabcee
MM
1319 continue;
1320 }
1321 if (async_shutdown_flag)
1322 {
1323 async_shutdown();
1324 async_shutdown_flag = 0;
1325 continue;
4c9dd1e4
MM
1326 }
1327
1328 /* And finally enter select() to find active sockets */
1329
b5d9ee5c
MM
1330 hi = select(hi+1, &rd, &wr, NULL, &timo);
1331 if (hi < 0)
1332 {
1333 if (errno == EINTR || errno == EAGAIN)
1334 continue;
1335 die("select: %m");
1336 }
1337 if (hi)
1338 {
38a608c5
MM
1339 current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); /* guaranteed to be non-empty */
1340 while (current_sock)
b5d9ee5c 1341 {
38a608c5
MM
1342 sock *s = current_sock;
1343 int e;
35164c50 1344 if (FD_ISSET(s->fd, &rd) && s->rx_hook)
38a608c5
MM
1345 do
1346 {
1347 e = sk_read(s);
1348 if (s != current_sock)
1349 goto next;
1350 }
35164c50 1351 while (e && s->rx_hook);
38a608c5
MM
1352 if (FD_ISSET(s->fd, &wr))
1353 do
1354 {
1355 e = sk_write(s);
1356 if (s != current_sock)
1357 goto next;
1358 }
1359 while (e);
1360 current_sock = sk_next(s);
1361 next: ;
b5d9ee5c
MM
1362 }
1363 }
1364 }
1365}
41c8976e
OF
1366
1367void
1368test_old_bird(char *path)
1369{
1370 int fd;
1371 struct sockaddr_un sa;
1372
1373 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1374
1375 if (fd < 0)
1376 die("Cannot create socket: %m");
1377 bzero(&sa, sizeof(sa));
1378 sa.sun_family = AF_UNIX;
1379 strcpy(sa.sun_path, path);
1380 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
1381 die("I found another BIRD running.");
1382 close(fd);
1383}
1384
1385