]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/unix/io.c
Refactoring of OSPF messages.
[thirdparty/bird.git] / sysdep / unix / io.c
CommitLineData
b5d9ee5c
MM
1/*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
38a608c5 4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
b1a1faba 5 * (c) 2004 Ondrej Filip <feela@network.cz>
b5d9ee5c
MM
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
607d9914
OZ
10/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
12#define _GNU_SOURCE 1
13
b5d9ee5c
MM
14#include <stdio.h>
15#include <stdlib.h>
01b776e1 16#include <time.h>
b5d9ee5c
MM
17#include <sys/time.h>
18#include <sys/types.h>
19#include <sys/socket.h>
46a82e9c 20#include <sys/uio.h>
b93abffa 21#include <sys/un.h>
b5d9ee5c 22#include <unistd.h>
a0b176e3 23#include <fcntl.h>
b5d9ee5c 24#include <errno.h>
05476c4d 25#include <net/if.h>
d0e9b36d 26#include <netinet/in.h>
48e5f32d
OZ
27#include <netinet/tcp.h>
28#include <netinet/udp.h>
93e868c7 29#include <netinet/icmp6.h>
b5d9ee5c
MM
30
31#include "nest/bird.h"
32#include "lib/lists.h"
33#include "lib/resource.h"
34#include "lib/timer.h"
35#include "lib/socket.h"
e8f73195 36#include "lib/event.h"
afa8937a 37#include "lib/string.h"
b5d9ee5c
MM
38#include "nest/iface.h"
39
40#include "lib/unix.h"
a2867cd9 41#include "lib/sysio.h"
b5d9ee5c 42
ea89da38 43/* Maximum number of calls of tx handler for one socket in one
4323099d
OZ
44 * select iteration. Should be small enough to not monopolize CPU by
45 * one protocol instance.
46 */
47#define MAX_STEPS 4
48
ea89da38
OZ
49/* Maximum number of calls of rx handler for all sockets in one select
50 iteration. RX callbacks are often much more costly so we limit
51 this to gen small latencies */
52#define MAX_RX_STEPS 4
53
a9c986f9
MM
54/*
55 * Tracked Files
56 */
57
58struct rfile {
59 resource r;
60 FILE *f;
61};
62
63static void
64rf_free(resource *r)
65{
66 struct rfile *a = (struct rfile *) r;
67
68 fclose(a->f);
69}
70
71static void
72rf_dump(resource *r)
73{
74 struct rfile *a = (struct rfile *) r;
75
76 debug("(FILE *%p)\n", a->f);
77}
78
79static struct resclass rf_class = {
80 "FILE",
81 sizeof(struct rfile),
82 rf_free,
e81b440f 83 rf_dump,
acb60628 84 NULL,
e81b440f 85 NULL
a9c986f9
MM
86};
87
88void *
f78056fb 89tracked_fopen(pool *p, char *name, char *mode)
a9c986f9
MM
90{
91 FILE *f = fopen(name, mode);
92
93 if (f)
94 {
95 struct rfile *r = ralloc(p, &rf_class);
96 r->f = f;
97 }
98 return f;
99}
100
525fa2c1
MM
101/**
102 * DOC: Timers
103 *
104 * Timers are resources which represent a wish of a module to call
105 * a function at the specified time. The platform dependent code
58f7d004 106 * doesn't guarantee exact timing, only that a timer function
525fa2c1
MM
107 * won't be called before the requested time.
108 *
fd91ae33
OZ
109 * In BIRD, time is represented by values of the &bird_clock_t type
110 * which are integral numbers interpreted as a relative number of seconds since
111 * some fixed time point in past. The current time can be read
112 * from variable @now with reasonable accuracy and is monotonic. There is also
113 * a current 'absolute' time in variable @now_real reported by OS.
525fa2c1
MM
114 *
115 * Each timer is described by a &timer structure containing a pointer
116 * to the handler function (@hook), data private to this function (@data),
117 * time the function should be called at (@expires, 0 for inactive timers),
118 * for the other fields see |timer.h|.
b5d9ee5c
MM
119 */
120
121#define NEAR_TIMER_LIMIT 4
122
b5d9ee5c
MM
123static list near_timers, far_timers;
124static bird_clock_t first_far_timer = TIME_INFINITY;
125
002b6423 126/* now must be different from 0, because 0 is a special value in timer->expires */
a92cf57d 127bird_clock_t now = 1, now_real, boot_time;
fd91ae33
OZ
128
129static void
130update_times_plain(void)
131{
132 bird_clock_t new_time = time(NULL);
133 int delta = new_time - now_real;
134
135 if ((delta >= 0) && (delta < 60))
136 now += delta;
137 else if (now_real != 0)
138 log(L_WARN "Time jump, delta %d s", delta);
139
140 now_real = new_time;
141}
142
143static void
144update_times_gettime(void)
145{
146 struct timespec ts;
147 int rv;
148
149 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
150 if (rv != 0)
151 die("clock_gettime: %m");
152
153 if (ts.tv_sec != now) {
154 if (ts.tv_sec < now)
155 log(L_ERR "Monotonic timer is broken");
156
157 now = ts.tv_sec;
158 now_real = time(NULL);
159 }
160}
161
162static int clock_monotonic_available;
163
164static inline void
165update_times(void)
166{
167 if (clock_monotonic_available)
168 update_times_gettime();
169 else
170 update_times_plain();
171}
172
173static inline void
174init_times(void)
175{
176 struct timespec ts;
177 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
178 if (!clock_monotonic_available)
179 log(L_WARN "Monotonic timer is missing");
180}
181
b5d9ee5c
MM
182
183static void
184tm_free(resource *r)
185{
186 timer *t = (timer *) r;
187
188 tm_stop(t);
189}
190
191static void
192tm_dump(resource *r)
193{
194 timer *t = (timer *) r;
195
e8f73195 196 debug("(code %p, data %p, ", t->hook, t->data);
af847acc
MM
197 if (t->randomize)
198 debug("rand %d, ", t->randomize);
199 if (t->recurrent)
200 debug("recur %d, ", t->recurrent);
b5d9ee5c
MM
201 if (t->expires)
202 debug("expires in %d sec)\n", t->expires - now);
203 else
204 debug("inactive)\n");
205}
206
207static struct resclass tm_class = {
208 "Timer",
209 sizeof(timer),
210 tm_free,
e81b440f 211 tm_dump,
acb60628 212 NULL,
e81b440f 213 NULL
b5d9ee5c
MM
214};
215
525fa2c1
MM
216/**
217 * tm_new - create a timer
218 * @p: pool
219 *
220 * This function creates a new timer resource and returns
221 * a pointer to it. To use the timer, you need to fill in
222 * the structure fields and call tm_start() to start timing.
223 */
b5d9ee5c
MM
224timer *
225tm_new(pool *p)
226{
227 timer *t = ralloc(p, &tm_class);
b5d9ee5c
MM
228 return t;
229}
230
231static inline void
232tm_insert_near(timer *t)
233{
234 node *n = HEAD(near_timers);
235
236 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
237 n = n->next;
238 insert_node(&t->n, n->prev);
239}
240
525fa2c1
MM
241/**
242 * tm_start - start a timer
243 * @t: timer
244 * @after: number of seconds the timer should be run after
245 *
246 * This function schedules the hook function of the timer to
247 * be called after @after seconds. If the timer has been already
248 * started, it's @expire time is replaced by the new value.
249 *
250 * You can have set the @randomize field of @t, the timeout
251 * will be increased by a random number of seconds chosen
252 * uniformly from range 0 .. @randomize.
253 *
254 * You can call tm_start() from the handler function of the timer
255 * to request another run of the timer. Also, you can set the @recurrent
256 * field to have the timer re-added automatically with the same timeout.
257 */
b5d9ee5c
MM
258void
259tm_start(timer *t, unsigned after)
260{
261 bird_clock_t when;
262
263 if (t->randomize)
af847acc 264 after += random() % (t->randomize + 1);
b5d9ee5c
MM
265 when = now + after;
266 if (t->expires == when)
267 return;
268 if (t->expires)
269 rem_node(&t->n);
270 t->expires = when;
271 if (after <= NEAR_TIMER_LIMIT)
272 tm_insert_near(t);
273 else
274 {
275 if (!first_far_timer || first_far_timer > when)
276 first_far_timer = when;
277 add_tail(&far_timers, &t->n);
278 }
279}
280
525fa2c1
MM
281/**
282 * tm_stop - stop a timer
283 * @t: timer
284 *
285 * This function stops a timer. If the timer is already stopped,
286 * nothing happens.
287 */
b5d9ee5c
MM
288void
289tm_stop(timer *t)
290{
291 if (t->expires)
292 {
293 rem_node(&t->n);
294 t->expires = 0;
295 }
296}
297
298static void
299tm_dump_them(char *name, list *l)
300{
301 node *n;
302 timer *t;
303
304 debug("%s timers:\n", name);
305 WALK_LIST(n, *l)
306 {
307 t = SKIP_BACK(timer, n, n);
308 debug("%p ", t);
309 tm_dump(&t->r);
310 }
311 debug("\n");
312}
313
314void
315tm_dump_all(void)
316{
317 tm_dump_them("Near", &near_timers);
318 tm_dump_them("Far", &far_timers);
319}
320
321static inline time_t
322tm_first_shot(void)
323{
324 time_t x = first_far_timer;
325
326 if (!EMPTY_LIST(near_timers))
327 {
328 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
329 if (t->expires < x)
330 x = t->expires;
331 }
332 return x;
333}
334
335static void
336tm_shot(void)
337{
338 timer *t;
339 node *n, *m;
340
341 if (first_far_timer <= now)
342 {
28a9a189 343 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
b5d9ee5c
MM
344 first_far_timer = TIME_INFINITY;
345 n = HEAD(far_timers);
346 while (m = n->next)
347 {
348 t = SKIP_BACK(timer, n, n);
349 if (t->expires <= limit)
350 {
351 rem_node(n);
352 tm_insert_near(t);
353 }
354 else if (t->expires < first_far_timer)
355 first_far_timer = t->expires;
356 n = m;
357 }
358 }
359 while ((n = HEAD(near_timers)) -> next)
360 {
af847acc 361 int delay;
b5d9ee5c
MM
362 t = SKIP_BACK(timer, n, n);
363 if (t->expires > now)
364 break;
365 rem_node(n);
af847acc 366 delay = t->expires - now;
b5d9ee5c 367 t->expires = 0;
af847acc
MM
368 if (t->recurrent)
369 {
370 int i = t->recurrent - delay;
371 if (i < 0)
372 i = 0;
373 tm_start(t, i);
374 }
b5d9ee5c
MM
375 t->hook(t);
376 }
377}
378
0d3effcf
OF
379/**
380 * tm_parse_datetime - parse a date and time
381 * @x: datetime string
382 *
383 * tm_parse_datetime() takes a textual representation of
384 * a date and time (dd-mm-yyyy hh:mm:ss)
385 * and converts it to the corresponding value of type &bird_clock_t.
386 */
387bird_clock_t
388tm_parse_datetime(char *x)
389{
390 struct tm tm;
391 int n;
392 time_t t;
393
394 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
395 return tm_parse_date(x);
396 tm.tm_mon--;
397 tm.tm_year -= 1900;
398 t = mktime(&tm);
399 if (t == (time_t) -1)
400 return 0;
401 return t;
402}
525fa2c1
MM
403/**
404 * tm_parse_date - parse a date
405 * @x: date string
406 *
407 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
408 * and converts it to the corresponding value of type &bird_clock_t.
409 */
913f7dc9
MM
410bird_clock_t
411tm_parse_date(char *x)
412{
413 struct tm tm;
414 int n;
415 time_t t;
416
417 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
418 return 0;
419 tm.tm_mon--;
420 tm.tm_year -= 1900;
421 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
422 t = mktime(&tm);
423 if (t == (time_t) -1)
424 return 0;
425 return t;
426}
427
c37e7851
OZ
428static void
429tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
913f7dc9 430{
c37e7851
OZ
431 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
432 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
913f7dc9 433
c37e7851
OZ
434 if (delta < 20*3600)
435 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
436 else if (delta < 360*86400)
437 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
438 else
439 bsprintf(x, "%d", tm->tm_year+1900);
913f7dc9
MM
440}
441
c37e7851
OZ
442#include "conf/conf.h"
443
525fa2c1
MM
444/**
445 * tm_format_datetime - convert date and time to textual representation
446 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
447 * @t: time
448 *
fd91ae33
OZ
449 * This function formats the given relative time value @t to a textual
450 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
525fa2c1 451 */
7a88832e 452void
c37e7851 453tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
7a88832e 454{
c37e7851 455 const char *fmt_used;
7a88832e 456 struct tm *tm;
fd91ae33
OZ
457 bird_clock_t delta = now - t;
458 t = now_real - delta;
7a88832e 459 tm = localtime(&t);
7a88832e 460
c37e7851
OZ
461 if (fmt_spec->fmt1 == NULL)
462 return tm_format_reltime(x, tm, delta);
afa8937a 463
c37e7851
OZ
464 if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
465 fmt_used = fmt_spec->fmt1;
afa8937a 466 else
c37e7851
OZ
467 fmt_used = fmt_spec->fmt2;
468
469 int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
470 if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
471 strcpy(x, "<too-long>");
afa8937a
MM
472}
473
05476c4d 474
525fa2c1
MM
475/**
476 * DOC: Sockets
477 *
478 * Socket resources represent network connections. Their data structure (&socket)
479 * contains a lot of fields defining the exact type of the socket, the local and
480 * remote addresses and ports, pointers to socket buffers and finally pointers to
481 * hook functions to be called when new data have arrived to the receive buffer
482 * (@rx_hook), when the contents of the transmit buffer have been transmitted
483 * (@tx_hook) and when an error or connection close occurs (@err_hook).
484 *
38a608c5 485 * Freeing of sockets from inside socket hooks is perfectly safe.
b5d9ee5c
MM
486 */
487
abae6e9c
MM
488#ifndef SOL_IP
489#define SOL_IP IPPROTO_IP
490#endif
491
b1a1faba
OF
492#ifndef SOL_IPV6
493#define SOL_IPV6 IPPROTO_IPV6
494#endif
495
48e5f32d
OZ
496#ifndef SOL_ICMPV6
497#define SOL_ICMPV6 IPPROTO_ICMPV6
498#endif
499
500
05476c4d
OZ
501/*
502 * Sockaddr helper functions
503 */
38a608c5 504
05476c4d
OZ
505static inline int sockaddr_length(int af)
506{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
507
508static inline void
509sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port)
38a608c5 510{
05476c4d
OZ
511 memset(sa, 0, sizeof(struct sockaddr_in));
512#ifdef HAVE_SIN_LEN
513 sa->sin_len = sizeof(struct sockaddr_in);
514#endif
515 sa->sin_family = AF_INET;
516 sa->sin_port = htons(port);
517 sa->sin_addr = ipa_to_in4(a);
38a608c5 518}
b5d9ee5c 519
05476c4d
OZ
520static inline void
521sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
b5d9ee5c 522{
05476c4d
OZ
523 memset(sa, 0, sizeof(struct sockaddr_in6));
524#ifdef SIN6_LEN
525 sa->sin6_len = sizeof(struct sockaddr_in6);
526#endif
527 sa->sin6_family = AF_INET6;
528 sa->sin6_port = htons(port);
529 sa->sin6_flowinfo = 0;
530 sa->sin6_addr = ipa_to_in6(a);
531
532 if (ifa && ipa_is_link_local(a))
533 sa->sin6_scope_id = ifa->index;
4da25acb 534}
b5d9ee5c 535
05476c4d
OZ
536void
537sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
4da25acb 538{
05476c4d
OZ
539 if (af == AF_INET)
540 sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
541 else if (af == AF_INET6)
542 sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
543 else
544 bug("Unknown AF");
4da25acb
MM
545}
546
05476c4d
OZ
547static inline void
548sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port)
4da25acb 549{
05476c4d
OZ
550 *port = ntohs(sa->sin_port);
551 *a = ipa_from_in4(sa->sin_addr);
b5d9ee5c
MM
552}
553
05476c4d
OZ
554static inline void
555sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
48e5f32d 556{
05476c4d
OZ
557 *port = ntohs(sa->sin6_port);
558 *a = ipa_from_in6(sa->sin6_addr);
48e5f32d 559
05476c4d
OZ
560 if (ifa && ipa_is_link_local(*a))
561 *ifa = if_find_by_index(sa->sin6_scope_id);
48e5f32d
OZ
562}
563
05476c4d
OZ
564int
565sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
48e5f32d 566{
05476c4d
OZ
567 if (sa->sa.sa_family != af)
568 goto fail;
48e5f32d 569
05476c4d
OZ
570 if (af == AF_INET)
571 sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
572 else if (af == AF_INET6)
573 sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
574 else
575 goto fail;
48e5f32d 576
05476c4d 577 return 0;
48e5f32d 578
05476c4d
OZ
579 fail:
580 *a = IPA_NONE;
581 *port = 0;
582 return -1;
48e5f32d
OZ
583}
584
48e5f32d 585
05476c4d
OZ
586/*
587 * IPv6 multicast syscalls
588 */
4da25acb 589
05476c4d 590/* Fortunately standardized in RFC 3493 */
b5d9ee5c 591
05476c4d
OZ
592#define INIT_MREQ6(maddr,ifa) \
593 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
b5d9ee5c 594
05476c4d
OZ
595static inline int
596sk_setup_multicast6(sock *s)
b5d9ee5c 597{
05476c4d
OZ
598 int index = s->iface->index;
599 int ttl = s->ttl;
600 int n = 0;
b5d9ee5c 601
05476c4d
OZ
602 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
603 ERR("IPV6_MULTICAST_IF");
b5d9ee5c 604
05476c4d
OZ
605 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
606 ERR("IPV6_MULTICAST_HOPS");
4f22c981 607
05476c4d
OZ
608 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
609 ERR("IPV6_MULTICAST_LOOP");
4f22c981 610
05476c4d 611 return 0;
061ab802
OZ
612}
613
05476c4d
OZ
614static inline int
615sk_join_group6(sock *s, ip_addr maddr)
4f22c981 616{
05476c4d 617 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
eb1451a3 618
05476c4d
OZ
619 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
620 ERR("IPV6_JOIN_GROUP");
4f22c981 621
05476c4d 622 return 0;
b5d9ee5c
MM
623}
624
05476c4d
OZ
625static inline int
626sk_leave_group6(sock *s, ip_addr maddr)
b5d9ee5c 627{
05476c4d 628 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
b5d9ee5c 629
05476c4d
OZ
630 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
631 ERR("IPV6_LEAVE_GROUP");
632
633 return 0;
634}
4f22c981 635
bed41728 636
05476c4d
OZ
637/*
638 * IPv6 packet control messages
639 */
bed41728 640
05476c4d 641/* Also standardized, in RFC 3542 */
bed41728 642
dcc60494
OZ
643/*
644 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
645 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
646 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
647 * RFC and we use IPV6_PKTINFO.
648 */
649#ifndef IPV6_RECVPKTINFO
650#define IPV6_RECVPKTINFO IPV6_PKTINFO
651#endif
70e212f9
OZ
652/*
653 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
654 */
655#ifndef IPV6_RECVHOPLIMIT
656#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
657#endif
dcc60494 658
70e212f9 659
05476c4d
OZ
660#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
661#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
bed41728 662
05476c4d
OZ
663static inline int
664sk_request_cmsg6_pktinfo(sock *s)
665{
666 int y = 1;
70e212f9 667
05476c4d
OZ
668 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
669 ERR("IPV6_RECVPKTINFO");
670
671 return 0;
bed41728
OZ
672}
673
05476c4d
OZ
674static inline int
675sk_request_cmsg6_ttl(sock *s)
bed41728 676{
05476c4d 677 int y = 1;
bed41728 678
05476c4d
OZ
679 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
680 ERR("IPV6_RECVHOPLIMIT");
70e212f9 681
05476c4d
OZ
682 return 0;
683}
70e212f9 684
05476c4d
OZ
685static inline void
686sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
687{
688 if (cm->cmsg_type == IPV6_PKTINFO)
70e212f9 689 {
05476c4d
OZ
690 struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
691 s->laddr = ipa_from_in6(pi->ipi6_addr);
692 s->lifindex = pi->ipi6_ifindex;
70e212f9 693 }
05476c4d 694}
70e212f9 695
05476c4d
OZ
696static inline void
697sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
698{
699 if (cm->cmsg_type == IPV6_HOPLIMIT)
700 s->rcv_ttl = * (int *) CMSG_DATA(cm);
bed41728
OZ
701}
702
05476c4d
OZ
703static inline void
704sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
bed41728
OZ
705{
706 struct cmsghdr *cm;
707 struct in6_pktinfo *pi;
8945f73d 708 int controllen = 0;
bed41728 709
bed41728
OZ
710 msg->msg_control = cbuf;
711 msg->msg_controllen = cbuflen;
712
713 cm = CMSG_FIRSTHDR(msg);
48e5f32d 714 cm->cmsg_level = SOL_IPV6;
bed41728
OZ
715 cm->cmsg_type = IPV6_PKTINFO;
716 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
8945f73d 717 controllen += CMSG_SPACE(sizeof(*pi));
bed41728
OZ
718
719 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
bed41728 720 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
05476c4d 721 pi->ipi6_addr = ipa_to_in6(s->saddr);
bed41728 722
8945f73d 723 msg->msg_controllen = controllen;
bed41728 724}
48e5f32d 725
bed41728 726
05476c4d
OZ
727/*
728 * Miscellaneous socket syscalls
729 */
730
731static inline int
732sk_set_ttl4(sock *s, int ttl)
a39b165e 733{
05476c4d
OZ
734 if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
735 ERR("IP_TTL");
736
737 return 0;
a39b165e
OZ
738}
739
05476c4d
OZ
740static inline int
741sk_set_ttl6(sock *s, int ttl)
742{
743 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
744 ERR("IPV6_UNICAST_HOPS");
38a608c5 745
05476c4d
OZ
746 return 0;
747}
748
749static inline int
750sk_set_tos4(sock *s, int tos)
b5d9ee5c 751{
05476c4d
OZ
752 if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
753 ERR("IP_TOS");
b5d9ee5c 754
05476c4d
OZ
755 return 0;
756}
ef4a50be 757
05476c4d
OZ
758static inline int
759sk_set_tos6(sock *s, int tos)
760{
761 if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
762 ERR("IPV6_TCLASS");
48e5f32d 763
05476c4d
OZ
764 return 0;
765}
48e5f32d 766
48e5f32d 767
05476c4d
OZ
768/*
769 * Public socket functions
770 */
48e5f32d 771
05476c4d
OZ
772/**
773 * sk_setup_multicast - enable multicast for given socket
774 * @s: socket
775 *
776 * Prepare transmission of multicast packets for given datagram socket.
777 * The socket must have defined @iface.
778 *
779 * Result: 0 for success, -1 for an error.
780 */
48e5f32d 781
05476c4d
OZ
782int
783sk_setup_multicast(sock *s)
784{
785 ASSERT(s->iface);
48e5f32d 786
05476c4d
OZ
787 if (sk_is_ipv4(s))
788 return sk_setup_multicast4(s);
789 else
790 return sk_setup_multicast6(s);
791}
48e5f32d 792
05476c4d
OZ
793/**
794 * sk_join_group - join multicast group for given socket
795 * @s: socket
796 * @maddr: multicast address
797 *
798 * Join multicast group for given datagram socket and associated interface.
799 * The socket must have defined @iface.
800 *
801 * Result: 0 for success, -1 for an error.
802 */
789772ed 803
05476c4d
OZ
804int
805sk_join_group(sock *s, ip_addr maddr)
806{
807 if (sk_is_ipv4(s))
808 return sk_join_group4(s, maddr);
809 else
810 return sk_join_group6(s, maddr);
811}
ef4a50be 812
05476c4d
OZ
813/**
814 * sk_leave_group - leave multicast group for given socket
815 * @s: socket
816 * @maddr: multicast address
817 *
818 * Leave multicast group for given datagram socket and associated interface.
819 * The socket must have defined @iface.
820 *
821 * Result: 0 for success, -1 for an error.
822 */
789772ed 823
05476c4d
OZ
824int
825sk_leave_group(sock *s, ip_addr maddr)
826{
827 if (sk_is_ipv4(s))
828 return sk_leave_group4(s, maddr);
829 else
830 return sk_leave_group6(s, maddr);
b5d9ee5c
MM
831}
832
a39b165e 833/**
05476c4d
OZ
834 * sk_setup_broadcast - enable broadcast for given socket
835 * @s: socket
836 *
837 * Allow reception and transmission of broadcast packets for given datagram
838 * socket. The socket must have defined @iface. For transmission, packets should
839 * be send to @brd address of @iface.
840 *
841 * Result: 0 for success, -1 for an error.
842 */
843
844int
845sk_setup_broadcast(sock *s)
846{
847 int y = 1;
848
849 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
850 ERR("SO_BROADCAST");
851
852 return 0;
853}
854
855/**
856 * sk_set_ttl - set transmit TTL for given socket
a39b165e
OZ
857 * @s: socket
858 * @ttl: TTL value
859 *
05476c4d
OZ
860 * Set TTL for already opened connections when TTL was not set before. Useful
861 * for accepted connections when different ones should have different TTL.
a39b165e
OZ
862 *
863 * Result: 0 for success, -1 for an error.
864 */
865
866int
867sk_set_ttl(sock *s, int ttl)
868{
a39b165e 869 s->ttl = ttl;
a39b165e 870
05476c4d
OZ
871 if (sk_is_ipv4(s))
872 return sk_set_ttl4(s, ttl);
873 else
874 return sk_set_ttl6(s, ttl);
a39b165e
OZ
875}
876
b1b19433 877/**
05476c4d 878 * sk_set_min_ttl - set minimal accepted TTL for given socket
b1b19433
OZ
879 * @s: socket
880 * @ttl: TTL value
881 *
05476c4d
OZ
882 * Set minimal accepted TTL for given socket. Can be used for TTL security.
883 * implementations.
b1b19433
OZ
884 *
885 * Result: 0 for success, -1 for an error.
886 */
887
888int
889sk_set_min_ttl(sock *s, int ttl)
890{
05476c4d
OZ
891 if (sk_is_ipv4(s))
892 return sk_set_min_ttl4(s, ttl);
893 else
894 return sk_set_min_ttl6(s, ttl);
b1b19433 895}
d51aa281 896
05476c4d 897#if 0
d51aa281 898/**
05476c4d 899 * sk_set_md5_auth - add / remove MD5 security association for given socket
d51aa281
OZ
900 * @s: socket
901 * @a: IP address of the other side
eb1451a3 902 * @ifa: Interface for link-local IP address
d51aa281
OZ
903 * @passwd: password used for MD5 authentication
904 *
05476c4d
OZ
905 * In TCP MD5 handling code in kernel, there is a set of pairs (address,
906 * password) used to choose password according to address of the other side.
907 * This function is useful for listening socket, for active sockets it is enough
908 * to set s->password field.
d51aa281
OZ
909 *
910 * When called with passwd != NULL, the new pair is added,
911 * When called with passwd == NULL, the existing pair is removed.
912 *
913 * Result: 0 for success, -1 for an error.
914 */
915
916int
eb1451a3 917sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
05476c4d
OZ
918{ DUMMY; }
919#endif
f9c799a0 920
05476c4d
OZ
921/**
922 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
923 * @s: socket
924 * @offset: offset
925 *
926 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
927 * kernel will automatically fill it for outgoing packets and check it for
928 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
929 * known to the kernel.
930 *
931 * Result: 0 for success, -1 for an error.
932 */
f9c799a0 933
4ac7c834
OZ
934int
935sk_set_ipv6_checksum(sock *s, int offset)
936{
48e5f32d 937 if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
05476c4d 938 ERR("IPV6_CHECKSUM");
4ac7c834
OZ
939
940 return 0;
941}
942
93e868c7 943int
05476c4d 944sk_set_icmp6_filter(sock *s, int p1, int p2)
93e868c7
OZ
945{
946 /* a bit of lame interface, but it is here only for Radv */
947 struct icmp6_filter f;
948
949 ICMP6_FILTER_SETBLOCKALL(&f);
950 ICMP6_FILTER_SETPASS(p1, &f);
951 ICMP6_FILTER_SETPASS(p2, &f);
952
48e5f32d 953 if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
05476c4d 954 ERR("ICMP6_FILTER");
93e868c7
OZ
955
956 return 0;
957}
958
05476c4d
OZ
959void
960sk_log_error(sock *s, const char *p)
961{
962 log(L_ERR "%s: Socket error: %s%#m", p, s->err);
963}
964
965
966/*
967 * Actual struct birdsock code
968 */
969
970static list sock_list;
971static struct birdsock *current_sock;
972static struct birdsock *stored_sock;
973static int sock_recalc_fdsets_p;
974
975static inline sock *
976sk_next(sock *s)
977{
978 if (!s->n.next->next)
979 return NULL;
980 else
981 return SKIP_BACK(sock, n, s->n.next);
982}
983
984static void
985sk_alloc_bufs(sock *s)
986{
987 if (!s->rbuf && s->rbsize)
988 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
989 s->rpos = s->rbuf;
990 if (!s->tbuf && s->tbsize)
991 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
992 s->tpos = s->ttx = s->tbuf;
993}
994
995static void
996sk_free_bufs(sock *s)
997{
998 if (s->rbuf_alloc)
999 {
1000 xfree(s->rbuf_alloc);
1001 s->rbuf = s->rbuf_alloc = NULL;
1002 }
1003 if (s->tbuf_alloc)
1004 {
1005 xfree(s->tbuf_alloc);
1006 s->tbuf = s->tbuf_alloc = NULL;
1007 }
1008}
1009
1010static void
1011sk_free(resource *r)
1012{
1013 sock *s = (sock *) r;
1014
1015 sk_free_bufs(s);
1016 if (s->fd >= 0)
1017 {
1018 close(s->fd);
1019
1020 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1021 if (s->flags & SKF_THREAD)
1022 return;
1023
1024 if (s == current_sock)
1025 current_sock = sk_next(s);
1026 if (s == stored_sock)
1027 stored_sock = sk_next(s);
1028 rem_node(&s->n);
1029 sock_recalc_fdsets_p = 1;
1030 }
1031}
1032
1033void
1034sk_set_rbsize(sock *s, uint val)
1035{
1036 ASSERT(s->rbuf_alloc == s->rbuf);
1037
1038 if (s->rbsize == val)
1039 return;
1040
1041 s->rbsize = val;
1042 xfree(s->rbuf_alloc);
1043 s->rbuf_alloc = xmalloc(val);
1044 s->rpos = s->rbuf = s->rbuf_alloc;
1045}
1046
1047void
1048sk_set_tbsize(sock *s, uint val)
1049{
1050 ASSERT(s->tbuf_alloc == s->tbuf);
1051
1052 if (s->tbsize == val)
1053 return;
1054
1055 byte *old_tbuf = s->tbuf;
1056
1057 s->tbsize = val;
1058 s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1059 s->tpos = s->tbuf + (s->tpos - old_tbuf);
1060 s->ttx = s->tbuf + (s->ttx - old_tbuf);
1061}
1062
1063void
1064sk_set_tbuf(sock *s, void *tbuf)
1065{
1066 s->tbuf = tbuf ?: s->tbuf_alloc;
1067 s->ttx = s->tpos = s->tbuf;
1068}
1069
1070void
1071sk_reallocate(sock *s)
1072{
1073 sk_free_bufs(s);
1074 sk_alloc_bufs(s);
1075}
1076
1077static void
1078sk_dump(resource *r)
1079{
1080 sock *s = (sock *) r;
1081 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1082
1083 debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1084 sk_type_names[s->type],
1085 s->data,
1086 s->saddr,
1087 s->sport,
1088 s->daddr,
1089 s->dport,
1090 s->tos,
1091 s->ttl,
1092 s->iface ? s->iface->name : "none");
1093}
1094
1095static struct resclass sk_class = {
1096 "Socket",
1097 sizeof(sock),
1098 sk_free,
1099 sk_dump,
1100 NULL,
1101 NULL
1102};
1103
1104/**
1105 * sk_new - create a socket
1106 * @p: pool
1107 *
1108 * This function creates a new socket resource. If you want to use it,
1109 * you need to fill in all the required fields of the structure and
1110 * call sk_open() to do the actual opening of the socket.
1111 *
1112 * The real function name is sock_new(), sk_new() is a macro wrapper
1113 * to avoid collision with OpenSSL.
1114 */
1115sock *
1116sock_new(pool *p)
1117{
1118 sock *s = ralloc(p, &sk_class);
1119 s->pool = p;
1120 // s->saddr = s->daddr = IPA_NONE;
1121 s->tos = s->priority = s->ttl = -1;
1122 s->fd = -1;
1123 return s;
1124}
1125
1126static int
1127sk_setup(sock *s)
f9c799a0 1128{
05476c4d
OZ
1129 int y = 1;
1130 int fd = s->fd;
f9c799a0 1131
05476c4d
OZ
1132 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1133 ERR("O_NONBLOCK");
f9c799a0 1134
05476c4d
OZ
1135 if (!s->af)
1136 return 0;
f9c799a0 1137
05476c4d
OZ
1138 if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1139 s->flags |= SKF_PKTINFO;
f9c799a0 1140
05476c4d
OZ
1141#ifdef CONFIG_USE_HDRINCL
1142 if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1143 {
1144 s->flags &= ~SKF_PKTINFO;
1145 s->flags |= SKF_HDRINCL;
1146 if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1147 ERR("IP_HDRINCL");
1148 }
48e5f32d
OZ
1149#endif
1150
05476c4d
OZ
1151 if (s->iface)
1152 {
1153#ifdef SO_BINDTODEVICE
1154 struct ifreq ifr;
1155 strcpy(ifr.ifr_name, s->iface->name);
1156 if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1157 ERR("SO_BINDTODEVICE");
1158#endif
f1aceff5 1159
05476c4d
OZ
1160#ifdef CONFIG_UNIX_DONTROUTE
1161 if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1162 ERR("SO_DONTROUTE");
1163#endif
1164 }
f9c799a0 1165
05476c4d
OZ
1166 if (s->priority >= 0)
1167 if (sk_set_priority(s, s->priority) < 0)
f9c799a0 1168 return -1;
f9c799a0 1169
05476c4d
OZ
1170 if (sk_is_ipv4(s))
1171 {
1172 if (s->flags & SKF_LADDR_RX)
1173 if (sk_request_cmsg4_pktinfo(s) < 0)
1174 return -1;
f9c799a0 1175
05476c4d
OZ
1176 if (s->flags & SKF_TTL_RX)
1177 if (sk_request_cmsg4_ttl(s) < 0)
1178 return -1;
f9c799a0 1179
05476c4d
OZ
1180 if ((s->type == SK_UDP) || (s->type == SK_IP))
1181 if (sk_disable_mtu_disc4(s) < 0)
1182 return -1;
f9c799a0 1183
05476c4d
OZ
1184 if (s->ttl >= 0)
1185 if (sk_set_ttl4(s, s->ttl) < 0)
1186 return -1;
f9c799a0 1187
05476c4d
OZ
1188 if (s->tos >= 0)
1189 if (sk_set_tos4(s, s->tos) < 0)
1190 return -1;
1191 }
f9c799a0 1192
05476c4d
OZ
1193 if (sk_is_ipv6(s))
1194 {
1195 if (s->flags & SKF_V6ONLY)
1196 if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1197 ERR("IPV6_V6ONLY");
f9c799a0 1198
05476c4d
OZ
1199 if (s->flags & SKF_LADDR_RX)
1200 if (sk_request_cmsg6_pktinfo(s) < 0)
1201 return -1;
f9c799a0 1202
05476c4d
OZ
1203 if (s->flags & SKF_TTL_RX)
1204 if (sk_request_cmsg6_ttl(s) < 0)
1205 return -1;
f9c799a0 1206
05476c4d
OZ
1207 if ((s->type == SK_UDP) || (s->type == SK_IP))
1208 if (sk_disable_mtu_disc6(s) < 0)
1209 return -1;
f9c799a0 1210
05476c4d
OZ
1211 if (s->ttl >= 0)
1212 if (sk_set_ttl6(s, s->ttl) < 0)
1213 return -1;
f9c799a0 1214
05476c4d
OZ
1215 if (s->tos >= 0)
1216 if (sk_set_tos6(s, s->tos) < 0)
1217 return -1;
1218 }
f9c799a0
OZ
1219
1220 return 0;
1221}
1222
05476c4d
OZ
1223static void
1224sk_insert(sock *s)
f9c799a0 1225{
05476c4d
OZ
1226 add_tail(&sock_list, &s->n);
1227 sock_recalc_fdsets_p = 1;
f9c799a0
OZ
1228}
1229
b93abffa 1230static void
b5d9ee5c
MM
1231sk_tcp_connected(sock *s)
1232{
05476c4d
OZ
1233 sockaddr sa;
1234 int sa_len = sizeof(sa);
1235
1236 if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1237 (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1238 log(L_WARN "SOCK: Cannot get local IP address for TCP>");
9be9a264 1239
b5d9ee5c
MM
1240 s->type = SK_TCP;
1241 sk_alloc_bufs(s);
320f4173 1242 s->tx_hook(s);
b5d9ee5c
MM
1243}
1244
b93abffa 1245static int
05476c4d 1246sk_passive_connected(sock *s, int type)
b93abffa 1247{
05476c4d
OZ
1248 sockaddr loc_sa, rem_sa;
1249 int loc_sa_len = sizeof(loc_sa);
1250 int rem_sa_len = sizeof(rem_sa);
cf31112f 1251
05476c4d
OZ
1252 int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1253 if (fd < 0)
1254 {
1255 if ((errno != EINTR) && (errno != EAGAIN))
c025b852 1256 s->err_hook(s, errno);
05476c4d
OZ
1257 return 0;
1258 }
1259
1260 sock *t = sk_new(s->pool);
1261 t->type = type;
1262 t->fd = fd;
1263 t->af = s->af;
1264 t->ttl = s->ttl;
1265 t->tos = s->tos;
1266 t->rbsize = s->rbsize;
1267 t->tbsize = s->tbsize;
1268
1269 if (type == SK_TCP)
1270 {
1271 if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1272 (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1273 log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1274
1275 if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1276 log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1277 }
1278
1279 if (sk_setup(t) < 0)
1280 {
1281 /* FIXME: Call err_hook instead ? */
1282 log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1283
1284 /* FIXME: handle it better in rfree() */
1285 close(t->fd);
1286 t->fd = -1;
1287 rfree(t);
1288 return 1;
1289 }
1290
1291 sk_insert(t);
1292 sk_alloc_bufs(t);
1293 s->rx_hook(t, 0);
1294 return 1;
b93abffa
MM
1295}
1296
525fa2c1
MM
1297/**
1298 * sk_open - open a socket
1299 * @s: socket
1300 *
1301 * This function takes a socket resource created by sk_new() and
1302 * initialized by the user and binds a corresponding network connection
1303 * to it.
1304 *
1305 * Result: 0 for success, -1 for an error.
1306 */
b5d9ee5c
MM
1307int
1308sk_open(sock *s)
1309{
05476c4d
OZ
1310 int af = BIRD_AF;
1311 int fd = -1;
48e5f32d
OZ
1312 int do_bind = 0;
1313 int bind_port = 0;
1314 ip_addr bind_addr = IPA_NONE;
1315 sockaddr sa;
b5d9ee5c 1316
48e5f32d 1317 switch (s->type)
05476c4d
OZ
1318 {
1319 case SK_TCP_ACTIVE:
1320 s->ttx = ""; /* Force s->ttx != s->tpos */
1321 /* Fall thru */
1322 case SK_TCP_PASSIVE:
1323 fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1324 bind_port = s->sport;
1325 bind_addr = s->saddr;
1326 do_bind = bind_port || ipa_nonzero(bind_addr);
1327 break;
1328
1329 case SK_UDP:
1330 fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1331 bind_port = s->sport;
1332 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1333 do_bind = 1;
1334 break;
1335
1336 case SK_IP:
1337 fd = socket(af, SOCK_RAW, s->dport);
1338 bind_port = 0;
1339 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1340 do_bind = ipa_nonzero(bind_addr);
1341 break;
1342
1343 case SK_MAGIC:
1344 af = 0;
1345 fd = s->fd;
1346 break;
1347
1348 default:
1349 bug("sk_open() called for invalid sock type %d", s->type);
1350 }
1351
b5d9ee5c 1352 if (fd < 0)
05476c4d
OZ
1353 ERR("socket");
1354
1355 s->af = af;
b5d9ee5c
MM
1356 s->fd = fd;
1357
05476c4d
OZ
1358 if (sk_setup(s) < 0)
1359 goto err;
38a608c5 1360
48e5f32d 1361 if (do_bind)
05476c4d
OZ
1362 {
1363 if (bind_port)
b5d9ee5c 1364 {
05476c4d
OZ
1365 int y = 1;
1366
1367 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1368 ERR2("SO_REUSEADDR");
48e5f32d 1369
8931425d 1370#ifdef CONFIG_NO_IFACE_BIND
05476c4d
OZ
1371 /* Workaround missing ability to bind to an iface */
1372 if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1373 {
1374 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1375 ERR2("SO_REUSEPORT");
1376 }
8931425d 1377#endif
b5d9ee5c 1378 }
48e5f32d 1379
05476c4d
OZ
1380 sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
1381 if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1382 ERR2("bind");
1383 }
d51aa281
OZ
1384
1385 if (s->password)
05476c4d
OZ
1386 if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0)
1387 goto err;
d51aa281 1388
48e5f32d 1389 switch (s->type)
05476c4d
OZ
1390 {
1391 case SK_TCP_ACTIVE:
1392 sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
1393 if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1394 sk_tcp_connected(s);
1395 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1396 errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1397 ERR2("connect");
1398 break;
1399
1400 case SK_TCP_PASSIVE:
1401 if (listen(fd, 8) < 0)
1402 ERR2("listen");
1403 break;
1404
1405 case SK_MAGIC:
1406 break;
1407
1408 default:
1409 sk_alloc_bufs(s);
1410 }
b5d9ee5c 1411
bf139664
OZ
1412 if (!(s->flags & SKF_THREAD))
1413 sk_insert(s);
b5d9ee5c
MM
1414 return 0;
1415
05476c4d 1416err:
b5d9ee5c
MM
1417 close(fd);
1418 s->fd = -1;
1419 return -1;
1420}
1421
05476c4d 1422int
b93abffa
MM
1423sk_open_unix(sock *s, char *name)
1424{
b93abffa 1425 struct sockaddr_un sa;
05476c4d
OZ
1426 int fd;
1427
1428 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
b93abffa
MM
1429
1430 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1431 if (fd < 0)
05476c4d
OZ
1432 return -1;
1433
1434 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1435 return -1;
68fa95cf 1436
97e46d28 1437 /* Path length checked in test_old_bird() */
b93abffa 1438 sa.sun_family = AF_UNIX;
97c6fa02 1439 strcpy(sa.sun_path, name);
05476c4d 1440
0b3bf4b1 1441 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
05476c4d
OZ
1442 return -1;
1443
1444 if (listen(fd, 8) < 0)
1445 return -1;
1446
1447 s->fd = fd;
38a608c5 1448 sk_insert(s);
05476c4d
OZ
1449 return 0;
1450}
1451
1452
1453#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1454 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1455#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1456
1457static void
1458sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1459{
1460 if (sk_is_ipv4(s))
1461 sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1462 else
1463 sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1464}
1465
1466static void
1467sk_process_cmsgs(sock *s, struct msghdr *msg)
1468{
1469 struct cmsghdr *cm;
1470
1471 s->laddr = IPA_NONE;
1472 s->lifindex = 0;
1473 s->rcv_ttl = -1;
1474
1475 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1476 {
1477 if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1478 {
1479 sk_process_cmsg4_pktinfo(s, cm);
1480 sk_process_cmsg4_ttl(s, cm);
1481 }
b93abffa 1482
05476c4d
OZ
1483 if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1484 {
1485 sk_process_cmsg6_pktinfo(s, cm);
1486 sk_process_cmsg6_ttl(s, cm);
1487 }
1488 }
b93abffa
MM
1489}
1490
48e5f32d
OZ
1491
1492static inline int
1493sk_sendmsg(sock *s)
1494{
1495 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1496 byte cmsg_buf[CMSG_TX_SPACE];
1497 sockaddr dst;
1498
05476c4d 1499 sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
48e5f32d
OZ
1500
1501 struct msghdr msg = {
05476c4d
OZ
1502 .msg_name = &dst.sa,
1503 .msg_namelen = SA_LEN(dst),
48e5f32d
OZ
1504 .msg_iov = &iov,
1505 .msg_iovlen = 1
1506 };
1507
1508#ifdef CONFIG_USE_HDRINCL
1509 byte hdr[20];
1510 struct iovec iov2[2] = { {hdr, 20}, iov };
1511
1512 if (s->flags & SKF_HDRINCL)
1513 {
05476c4d 1514 sk_prepare_ip_header(s, hdr, iov.iov_len);
48e5f32d
OZ
1515 msg.msg_iov = iov2;
1516 msg.msg_iovlen = 2;
1517 }
1518#endif
1519
1520 if (s->flags & SKF_PKTINFO)
05476c4d 1521 sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
48e5f32d
OZ
1522
1523 return sendmsg(s->fd, &msg, 0);
1524}
1525
1526static inline int
1527sk_recvmsg(sock *s)
1528{
1529 struct iovec iov = {s->rbuf, s->rbsize};
1530 byte cmsg_buf[CMSG_RX_SPACE];
1531 sockaddr src;
1532
1533 struct msghdr msg = {
05476c4d
OZ
1534 .msg_name = &src.sa,
1535 .msg_namelen = sizeof(src), // XXXX ??
48e5f32d
OZ
1536 .msg_iov = &iov,
1537 .msg_iovlen = 1,
1538 .msg_control = cmsg_buf,
1539 .msg_controllen = sizeof(cmsg_buf),
1540 .msg_flags = 0
1541 };
1542
1543 int rv = recvmsg(s->fd, &msg, 0);
1544 if (rv < 0)
1545 return rv;
1546
1547 //ifdef IPV4
1548 // if (cf_type == SK_IP)
1549 // rv = ipv4_skip_header(pbuf, rv);
1550 //endif
1551
05476c4d
OZ
1552 sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1553 sk_process_cmsgs(s, &msg);
48e5f32d
OZ
1554
1555 if (msg.msg_flags & MSG_TRUNC)
1556 s->flags |= SKF_TRUNCATED;
1557 else
1558 s->flags &= ~SKF_TRUNCATED;
1559
1560 return rv;
1561}
1562
1563
353729f5
OZ
1564static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1565
b5d9ee5c
MM
1566static int
1567sk_maybe_write(sock *s)
1568{
1569 int e;
1570
1571 switch (s->type)
05476c4d
OZ
1572 {
1573 case SK_TCP:
1574 case SK_MAGIC:
1575 case SK_UNIX:
1576 while (s->ttx != s->tpos)
b5d9ee5c 1577 {
05476c4d
OZ
1578 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1579
1580 if (e < 0)
1581 {
1582 if (errno != EINTR && errno != EAGAIN)
b5d9ee5c 1583 {
05476c4d
OZ
1584 reset_tx_buffer(s);
1585 /* EPIPE is just a connection close notification during TX */
1586 s->err_hook(s, (errno != EPIPE) ? errno : 0);
1587 return -1;
b5d9ee5c 1588 }
05476c4d
OZ
1589 return 0;
1590 }
1591 s->ttx += e;
1592 }
1593 reset_tx_buffer(s);
1594 return 1;
1595
1596 case SK_UDP:
1597 case SK_IP:
1598 {
1599 if (s->tbuf == s->tpos)
b5d9ee5c 1600 return 1;
05476c4d
OZ
1601
1602 e = sk_sendmsg(s);
1603
1604 if (e < 0)
1605 {
1606 if (errno != EINTR && errno != EAGAIN)
1607 {
1608 reset_tx_buffer(s);
1609 s->err_hook(s, errno);
1610 return -1;
1611 }
1612
1613 if (!s->tx_hook)
1614 reset_tx_buffer(s);
1615 return 0;
b5d9ee5c 1616 }
05476c4d
OZ
1617 reset_tx_buffer(s);
1618 return 1;
b5d9ee5c 1619 }
05476c4d
OZ
1620 default:
1621 bug("sk_maybe_write: unknown socket type %d", s->type);
1622 }
b5d9ee5c
MM
1623}
1624
ea89da38
OZ
1625int
1626sk_rx_ready(sock *s)
1627{
1628 fd_set rd, wr;
1629 struct timeval timo;
1630 int rv;
1631
1632 FD_ZERO(&rd);
1633 FD_ZERO(&wr);
1634 FD_SET(s->fd, &rd);
1635
1636 timo.tv_sec = 0;
1637 timo.tv_usec = 0;
1638
1639 redo:
1640 rv = select(s->fd+1, &rd, &wr, NULL, &timo);
1641
1642 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1643 goto redo;
1644
1645 return rv;
1646}
1647
525fa2c1
MM
1648/**
1649 * sk_send - send data to a socket
1650 * @s: socket
1651 * @len: number of bytes to send
1652 *
1653 * This function sends @len bytes of data prepared in the
1654 * transmit buffer of the socket @s to the network connection.
1655 * If the packet can be sent immediately, it does so and returns
1656 * 1, else it queues the packet for later processing, returns 0
1657 * and calls the @tx_hook of the socket when the tranmission
1658 * takes place.
1659 */
b5d9ee5c
MM
1660int
1661sk_send(sock *s, unsigned len)
1662{
b5d9ee5c
MM
1663 s->ttx = s->tbuf;
1664 s->tpos = s->tbuf + len;
1665 return sk_maybe_write(s);
1666}
1667
525fa2c1
MM
1668/**
1669 * sk_send_to - send data to a specific destination
1670 * @s: socket
1671 * @len: number of bytes to send
1672 * @addr: IP address to send the packet to
1673 * @port: port to send the packet to
1674 *
2e9b2421 1675 * This is a sk_send() replacement for connection-less packet sockets
525fa2c1 1676 * which allows destination of the packet to be chosen dynamically.
48e5f32d 1677 * Raw IP sockets should use 0 for @port.
525fa2c1 1678 */
b5d9ee5c
MM
1679int
1680sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1681{
353729f5 1682 s->daddr = addr;
48e5f32d
OZ
1683 if (port)
1684 s->dport = port;
1685
b5d9ee5c
MM
1686 s->ttx = s->tbuf;
1687 s->tpos = s->tbuf + len;
1688 return sk_maybe_write(s);
1689}
1690
353729f5
OZ
1691/*
1692int
1693sk_send_full(sock *s, unsigned len, struct iface *ifa,
1694 ip_addr saddr, ip_addr daddr, unsigned dport)
1695{
1696 s->iface = ifa;
1697 s->saddr = saddr;
1698 s->daddr = daddr;
1699 s->dport = dport;
1700 s->ttx = s->tbuf;
1701 s->tpos = s->tbuf + len;
1702 return sk_maybe_write(s);
1703}
1704*/
1705
6a8d3f1c
OZ
1706 /* sk_read() and sk_write() are called from BFD's event loop */
1707
1708int
b5d9ee5c
MM
1709sk_read(sock *s)
1710{
1711 switch (s->type)
05476c4d
OZ
1712 {
1713 case SK_TCP_PASSIVE:
1714 return sk_passive_connected(s, SK_TCP);
1715
1716 case SK_UNIX_PASSIVE:
1717 return sk_passive_connected(s, SK_UNIX);
1718
1719 case SK_TCP:
1720 case SK_UNIX:
b5d9ee5c 1721 {
05476c4d
OZ
1722 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1723
1724 if (c < 0)
b93abffa 1725 {
05476c4d
OZ
1726 if (errno != EINTR && errno != EAGAIN)
1727 s->err_hook(s, errno);
b5d9ee5c 1728 }
05476c4d
OZ
1729 else if (!c)
1730 s->err_hook(s, 0);
1731 else
b5d9ee5c 1732 {
05476c4d
OZ
1733 s->rpos += c;
1734 if (s->rx_hook(s, s->rpos - s->rbuf))
1735 {
1736 /* We need to be careful since the socket could have been deleted by the hook */
1737 if (current_sock == s)
1738 s->rpos = s->rbuf;
1739 }
1740 return 1;
b5d9ee5c 1741 }
05476c4d
OZ
1742 return 0;
1743 }
353729f5 1744
05476c4d
OZ
1745 case SK_MAGIC:
1746 return s->rx_hook(s, 0);
b5d9ee5c 1747
05476c4d
OZ
1748 default:
1749 {
1750 int e = sk_recvmsg(s);
353729f5 1751
05476c4d
OZ
1752 if (e < 0)
1753 {
1754 if (errno != EINTR && errno != EAGAIN)
1755 s->err_hook(s, errno);
1756 return 0;
b5d9ee5c 1757 }
05476c4d
OZ
1758
1759 s->rpos = s->rbuf + e;
1760 s->rx_hook(s, e);
1761 return 1;
b5d9ee5c 1762 }
05476c4d 1763 }
b5d9ee5c
MM
1764}
1765
6a8d3f1c 1766int
b5d9ee5c
MM
1767sk_write(sock *s)
1768{
320f4173 1769 switch (s->type)
05476c4d
OZ
1770 {
1771 case SK_TCP_ACTIVE:
320f4173 1772 {
05476c4d
OZ
1773 sockaddr sa;
1774 sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1775
1776 if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1777 sk_tcp_connected(s);
1778 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1779 s->err_hook(s, errno);
38a608c5 1780 return 0;
320f4173 1781 }
05476c4d
OZ
1782
1783 default:
1784 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1785 {
1786 if (s->tx_hook)
1787 s->tx_hook(s);
1788 return 1;
1789 }
1790 return 0;
1791 }
b5d9ee5c
MM
1792}
1793
1794void
1795sk_dump_all(void)
1796{
1797 node *n;
1798 sock *s;
1799
1800 debug("Open sockets:\n");
1801 WALK_LIST(n, sock_list)
05476c4d
OZ
1802 {
1803 s = SKIP_BACK(sock, n, n);
1804 debug("%p ", s);
1805 sk_dump(&s->r);
1806 }
b5d9ee5c
MM
1807 debug("\n");
1808}
1809
b5d9ee5c
MM
1810
1811/*
1812 * Main I/O Loop
1813 */
1814
4c9dd1e4
MM
1815volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
1816volatile int async_dump_flag;
1817
b5d9ee5c
MM
1818void
1819io_init(void)
1820{
1821 init_list(&near_timers);
1822 init_list(&far_timers);
1823 init_list(&sock_list);
e8f73195 1824 init_list(&global_event_list);
7e5f5ffd 1825 krt_io_init();
fd91ae33
OZ
1826 init_times();
1827 update_times();
a92cf57d 1828 boot_time = now;
fd91ae33 1829 srandom((int) now_real);
b5d9ee5c
MM
1830}
1831
ea89da38
OZ
1832static int short_loops = 0;
1833#define SHORT_LOOP_MAX 10
1834
b5d9ee5c
MM
1835void
1836io_loop(void)
1837{
1838 fd_set rd, wr;
1839 struct timeval timo;
1840 time_t tout;
30770df2 1841 int hi, events;
b5d9ee5c 1842 sock *s;
38a608c5 1843 node *n;
b5d9ee5c 1844
38a608c5 1845 sock_recalc_fdsets_p = 1;
b5d9ee5c
MM
1846 for(;;)
1847 {
30770df2 1848 events = ev_run_list(&global_event_list);
fd91ae33 1849 update_times();
b5d9ee5c
MM
1850 tout = tm_first_shot();
1851 if (tout <= now)
1852 {
1853 tm_shot();
1854 continue;
1855 }
a92cf57d 1856 timo.tv_sec = events ? 0 : MIN(tout - now, 3);
30770df2 1857 timo.tv_usec = 0;
b5d9ee5c 1858
38a608c5
MM
1859 if (sock_recalc_fdsets_p)
1860 {
1861 sock_recalc_fdsets_p = 0;
1862 FD_ZERO(&rd);
1863 FD_ZERO(&wr);
1864 }
1865
b5d9ee5c
MM
1866 hi = 0;
1867 WALK_LIST(n, sock_list)
1868 {
1869 s = SKIP_BACK(sock, n, n);
1870 if (s->rx_hook)
1871 {
1872 FD_SET(s->fd, &rd);
1873 if (s->fd > hi)
1874 hi = s->fd;
1875 }
38a608c5
MM
1876 else
1877 FD_CLR(s->fd, &rd);
b5d9ee5c
MM
1878 if (s->tx_hook && s->ttx != s->tpos)
1879 {
1880 FD_SET(s->fd, &wr);
1881 if (s->fd > hi)
1882 hi = s->fd;
1883 }
38a608c5
MM
1884 else
1885 FD_CLR(s->fd, &wr);
b5d9ee5c
MM
1886 }
1887
4c9dd1e4
MM
1888 /*
1889 * Yes, this is racy. But even if the signal comes before this test
1890 * and entering select(), it gets caught on the next timer tick.
1891 */
1892
1893 if (async_config_flag)
1894 {
1895 async_config();
1896 async_config_flag = 0;
f4aabcee 1897 continue;
4c9dd1e4
MM
1898 }
1899 if (async_dump_flag)
1900 {
1901 async_dump();
1902 async_dump_flag = 0;
f4aabcee
MM
1903 continue;
1904 }
1905 if (async_shutdown_flag)
1906 {
1907 async_shutdown();
1908 async_shutdown_flag = 0;
1909 continue;
4c9dd1e4
MM
1910 }
1911
1912 /* And finally enter select() to find active sockets */
b5d9ee5c 1913 hi = select(hi+1, &rd, &wr, NULL, &timo);
ea89da38 1914
b5d9ee5c
MM
1915 if (hi < 0)
1916 {
1917 if (errno == EINTR || errno == EAGAIN)
1918 continue;
1919 die("select: %m");
1920 }
1921 if (hi)
1922 {
ea89da38
OZ
1923 /* guaranteed to be non-empty */
1924 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1925
38a608c5 1926 while (current_sock)
b5d9ee5c 1927 {
38a608c5
MM
1928 sock *s = current_sock;
1929 int e;
ea89da38
OZ
1930 int steps;
1931
1932 steps = MAX_STEPS;
1933 if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
38a608c5
MM
1934 do
1935 {
4323099d 1936 steps--;
38a608c5
MM
1937 e = sk_read(s);
1938 if (s != current_sock)
1939 goto next;
1940 }
4323099d
OZ
1941 while (e && s->rx_hook && steps);
1942
1943 steps = MAX_STEPS;
38a608c5
MM
1944 if (FD_ISSET(s->fd, &wr))
1945 do
1946 {
4323099d 1947 steps--;
38a608c5
MM
1948 e = sk_write(s);
1949 if (s != current_sock)
1950 goto next;
1951 }
4323099d 1952 while (e && steps);
38a608c5
MM
1953 current_sock = sk_next(s);
1954 next: ;
b5d9ee5c 1955 }
ea89da38
OZ
1956
1957 short_loops++;
1958 if (events && (short_loops < SHORT_LOOP_MAX))
1959 continue;
1960 short_loops = 0;
1961
1962 int count = 0;
1963 current_sock = stored_sock;
1964 if (current_sock == NULL)
1965 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
1966
1967 while (current_sock && count < MAX_RX_STEPS)
1968 {
1969 sock *s = current_sock;
0479b443 1970 int e UNUSED;
ea89da38
OZ
1971
1972 if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
1973 {
1974 count++;
1975 e = sk_read(s);
1976 if (s != current_sock)
1977 goto next2;
1978 }
1979 current_sock = sk_next(s);
1980 next2: ;
1981 }
1982
1983 stored_sock = current_sock;
b5d9ee5c
MM
1984 }
1985 }
1986}
41c8976e
OF
1987
1988void
1989test_old_bird(char *path)
1990{
1991 int fd;
1992 struct sockaddr_un sa;
1993
1994 fd = socket(AF_UNIX, SOCK_STREAM, 0);
41c8976e
OF
1995 if (fd < 0)
1996 die("Cannot create socket: %m");
97e46d28
OZ
1997 if (strlen(path) >= sizeof(sa.sun_path))
1998 die("Socket path too long");
41c8976e
OF
1999 bzero(&sa, sizeof(sa));
2000 sa.sun_family = AF_UNIX;
2001 strcpy(sa.sun_path, path);
2002 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2003 die("I found another BIRD running.");
2004 close(fd);
2005}
2006
2007