]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/unix/io.c
OSPF: Fixes handling of external routes with immediate gw
[thirdparty/bird.git] / sysdep / unix / io.c
CommitLineData
b5d9ee5c
MM
1/*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
38a608c5 4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
b1a1faba 5 * (c) 2004 Ondrej Filip <feela@network.cz>
b5d9ee5c
MM
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
607d9914
OZ
10/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
12#define _GNU_SOURCE 1
13
b5d9ee5c
MM
14#include <stdio.h>
15#include <stdlib.h>
01b776e1 16#include <time.h>
b5d9ee5c
MM
17#include <sys/time.h>
18#include <sys/types.h>
19#include <sys/socket.h>
46a82e9c 20#include <sys/uio.h>
b93abffa 21#include <sys/un.h>
b5d9ee5c 22#include <unistd.h>
a0b176e3 23#include <fcntl.h>
b5d9ee5c 24#include <errno.h>
05476c4d 25#include <net/if.h>
d0e9b36d 26#include <netinet/in.h>
48e5f32d
OZ
27#include <netinet/tcp.h>
28#include <netinet/udp.h>
93e868c7 29#include <netinet/icmp6.h>
b5d9ee5c
MM
30
31#include "nest/bird.h"
32#include "lib/lists.h"
33#include "lib/resource.h"
34#include "lib/timer.h"
35#include "lib/socket.h"
e8f73195 36#include "lib/event.h"
afa8937a 37#include "lib/string.h"
b5d9ee5c
MM
38#include "nest/iface.h"
39
40#include "lib/unix.h"
a2867cd9 41#include "lib/sysio.h"
b5d9ee5c 42
ea89da38 43/* Maximum number of calls of tx handler for one socket in one
4323099d
OZ
44 * select iteration. Should be small enough to not monopolize CPU by
45 * one protocol instance.
46 */
47#define MAX_STEPS 4
48
ea89da38
OZ
49/* Maximum number of calls of rx handler for all sockets in one select
50 iteration. RX callbacks are often much more costly so we limit
51 this to gen small latencies */
52#define MAX_RX_STEPS 4
53
a9c986f9
MM
54/*
55 * Tracked Files
56 */
57
58struct rfile {
59 resource r;
60 FILE *f;
61};
62
63static void
64rf_free(resource *r)
65{
66 struct rfile *a = (struct rfile *) r;
67
68 fclose(a->f);
69}
70
71static void
72rf_dump(resource *r)
73{
74 struct rfile *a = (struct rfile *) r;
75
76 debug("(FILE *%p)\n", a->f);
77}
78
79static struct resclass rf_class = {
80 "FILE",
81 sizeof(struct rfile),
82 rf_free,
e81b440f 83 rf_dump,
acb60628 84 NULL,
e81b440f 85 NULL
a9c986f9
MM
86};
87
88void *
f78056fb 89tracked_fopen(pool *p, char *name, char *mode)
a9c986f9
MM
90{
91 FILE *f = fopen(name, mode);
92
93 if (f)
94 {
95 struct rfile *r = ralloc(p, &rf_class);
96 r->f = f;
97 }
98 return f;
99}
100
525fa2c1
MM
101/**
102 * DOC: Timers
103 *
104 * Timers are resources which represent a wish of a module to call
105 * a function at the specified time. The platform dependent code
58f7d004 106 * doesn't guarantee exact timing, only that a timer function
525fa2c1
MM
107 * won't be called before the requested time.
108 *
fd91ae33
OZ
109 * In BIRD, time is represented by values of the &bird_clock_t type
110 * which are integral numbers interpreted as a relative number of seconds since
111 * some fixed time point in past. The current time can be read
112 * from variable @now with reasonable accuracy and is monotonic. There is also
113 * a current 'absolute' time in variable @now_real reported by OS.
525fa2c1
MM
114 *
115 * Each timer is described by a &timer structure containing a pointer
116 * to the handler function (@hook), data private to this function (@data),
117 * time the function should be called at (@expires, 0 for inactive timers),
118 * for the other fields see |timer.h|.
b5d9ee5c
MM
119 */
120
121#define NEAR_TIMER_LIMIT 4
122
b5d9ee5c
MM
123static list near_timers, far_timers;
124static bird_clock_t first_far_timer = TIME_INFINITY;
125
002b6423 126/* now must be different from 0, because 0 is a special value in timer->expires */
a92cf57d 127bird_clock_t now = 1, now_real, boot_time;
fd91ae33
OZ
128
129static void
130update_times_plain(void)
131{
132 bird_clock_t new_time = time(NULL);
133 int delta = new_time - now_real;
134
135 if ((delta >= 0) && (delta < 60))
136 now += delta;
137 else if (now_real != 0)
138 log(L_WARN "Time jump, delta %d s", delta);
139
140 now_real = new_time;
141}
142
143static void
144update_times_gettime(void)
145{
146 struct timespec ts;
147 int rv;
148
149 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
150 if (rv != 0)
151 die("clock_gettime: %m");
152
153 if (ts.tv_sec != now) {
154 if (ts.tv_sec < now)
155 log(L_ERR "Monotonic timer is broken");
156
157 now = ts.tv_sec;
158 now_real = time(NULL);
159 }
160}
161
162static int clock_monotonic_available;
163
164static inline void
165update_times(void)
166{
167 if (clock_monotonic_available)
168 update_times_gettime();
169 else
170 update_times_plain();
171}
172
173static inline void
174init_times(void)
175{
176 struct timespec ts;
177 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
178 if (!clock_monotonic_available)
179 log(L_WARN "Monotonic timer is missing");
180}
181
b5d9ee5c
MM
182
183static void
184tm_free(resource *r)
185{
186 timer *t = (timer *) r;
187
188 tm_stop(t);
189}
190
191static void
192tm_dump(resource *r)
193{
194 timer *t = (timer *) r;
195
e8f73195 196 debug("(code %p, data %p, ", t->hook, t->data);
af847acc
MM
197 if (t->randomize)
198 debug("rand %d, ", t->randomize);
199 if (t->recurrent)
200 debug("recur %d, ", t->recurrent);
b5d9ee5c
MM
201 if (t->expires)
202 debug("expires in %d sec)\n", t->expires - now);
203 else
204 debug("inactive)\n");
205}
206
207static struct resclass tm_class = {
208 "Timer",
209 sizeof(timer),
210 tm_free,
e81b440f 211 tm_dump,
acb60628 212 NULL,
e81b440f 213 NULL
b5d9ee5c
MM
214};
215
525fa2c1
MM
216/**
217 * tm_new - create a timer
218 * @p: pool
219 *
220 * This function creates a new timer resource and returns
221 * a pointer to it. To use the timer, you need to fill in
222 * the structure fields and call tm_start() to start timing.
223 */
b5d9ee5c
MM
224timer *
225tm_new(pool *p)
226{
227 timer *t = ralloc(p, &tm_class);
b5d9ee5c
MM
228 return t;
229}
230
231static inline void
232tm_insert_near(timer *t)
233{
234 node *n = HEAD(near_timers);
235
236 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
237 n = n->next;
238 insert_node(&t->n, n->prev);
239}
240
525fa2c1
MM
241/**
242 * tm_start - start a timer
243 * @t: timer
244 * @after: number of seconds the timer should be run after
245 *
246 * This function schedules the hook function of the timer to
247 * be called after @after seconds. If the timer has been already
248 * started, it's @expire time is replaced by the new value.
249 *
250 * You can have set the @randomize field of @t, the timeout
251 * will be increased by a random number of seconds chosen
252 * uniformly from range 0 .. @randomize.
253 *
254 * You can call tm_start() from the handler function of the timer
255 * to request another run of the timer. Also, you can set the @recurrent
256 * field to have the timer re-added automatically with the same timeout.
257 */
b5d9ee5c
MM
258void
259tm_start(timer *t, unsigned after)
260{
261 bird_clock_t when;
262
263 if (t->randomize)
af847acc 264 after += random() % (t->randomize + 1);
b5d9ee5c
MM
265 when = now + after;
266 if (t->expires == when)
267 return;
268 if (t->expires)
269 rem_node(&t->n);
270 t->expires = when;
271 if (after <= NEAR_TIMER_LIMIT)
272 tm_insert_near(t);
273 else
274 {
275 if (!first_far_timer || first_far_timer > when)
276 first_far_timer = when;
277 add_tail(&far_timers, &t->n);
278 }
279}
280
525fa2c1
MM
281/**
282 * tm_stop - stop a timer
283 * @t: timer
284 *
285 * This function stops a timer. If the timer is already stopped,
286 * nothing happens.
287 */
b5d9ee5c
MM
288void
289tm_stop(timer *t)
290{
291 if (t->expires)
292 {
293 rem_node(&t->n);
294 t->expires = 0;
295 }
296}
297
298static void
299tm_dump_them(char *name, list *l)
300{
301 node *n;
302 timer *t;
303
304 debug("%s timers:\n", name);
305 WALK_LIST(n, *l)
306 {
307 t = SKIP_BACK(timer, n, n);
308 debug("%p ", t);
309 tm_dump(&t->r);
310 }
311 debug("\n");
312}
313
314void
315tm_dump_all(void)
316{
317 tm_dump_them("Near", &near_timers);
318 tm_dump_them("Far", &far_timers);
319}
320
321static inline time_t
322tm_first_shot(void)
323{
324 time_t x = first_far_timer;
325
326 if (!EMPTY_LIST(near_timers))
327 {
328 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
329 if (t->expires < x)
330 x = t->expires;
331 }
332 return x;
333}
334
8bcb5fb1
OZ
335void io_log_event(void *hook, void *data);
336
b5d9ee5c
MM
337static void
338tm_shot(void)
339{
340 timer *t;
341 node *n, *m;
342
343 if (first_far_timer <= now)
344 {
28a9a189 345 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
b5d9ee5c
MM
346 first_far_timer = TIME_INFINITY;
347 n = HEAD(far_timers);
348 while (m = n->next)
349 {
350 t = SKIP_BACK(timer, n, n);
351 if (t->expires <= limit)
352 {
353 rem_node(n);
354 tm_insert_near(t);
355 }
356 else if (t->expires < first_far_timer)
357 first_far_timer = t->expires;
358 n = m;
359 }
360 }
361 while ((n = HEAD(near_timers)) -> next)
362 {
af847acc 363 int delay;
b5d9ee5c
MM
364 t = SKIP_BACK(timer, n, n);
365 if (t->expires > now)
366 break;
367 rem_node(n);
af847acc 368 delay = t->expires - now;
b5d9ee5c 369 t->expires = 0;
af847acc
MM
370 if (t->recurrent)
371 {
372 int i = t->recurrent - delay;
373 if (i < 0)
374 i = 0;
375 tm_start(t, i);
376 }
8bcb5fb1 377 io_log_event(t->hook, t->data);
b5d9ee5c
MM
378 t->hook(t);
379 }
380}
381
0d3effcf
OF
382/**
383 * tm_parse_datetime - parse a date and time
384 * @x: datetime string
385 *
386 * tm_parse_datetime() takes a textual representation of
387 * a date and time (dd-mm-yyyy hh:mm:ss)
388 * and converts it to the corresponding value of type &bird_clock_t.
389 */
390bird_clock_t
391tm_parse_datetime(char *x)
392{
393 struct tm tm;
394 int n;
395 time_t t;
396
397 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
398 return tm_parse_date(x);
399 tm.tm_mon--;
400 tm.tm_year -= 1900;
401 t = mktime(&tm);
402 if (t == (time_t) -1)
403 return 0;
404 return t;
405}
525fa2c1
MM
406/**
407 * tm_parse_date - parse a date
408 * @x: date string
409 *
410 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
411 * and converts it to the corresponding value of type &bird_clock_t.
412 */
913f7dc9
MM
413bird_clock_t
414tm_parse_date(char *x)
415{
416 struct tm tm;
417 int n;
418 time_t t;
419
420 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
421 return 0;
422 tm.tm_mon--;
423 tm.tm_year -= 1900;
424 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
425 t = mktime(&tm);
426 if (t == (time_t) -1)
427 return 0;
428 return t;
429}
430
c37e7851
OZ
431static void
432tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
913f7dc9 433{
c37e7851
OZ
434 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
435 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
913f7dc9 436
c37e7851
OZ
437 if (delta < 20*3600)
438 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
439 else if (delta < 360*86400)
440 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
441 else
442 bsprintf(x, "%d", tm->tm_year+1900);
913f7dc9
MM
443}
444
c37e7851
OZ
445#include "conf/conf.h"
446
525fa2c1
MM
447/**
448 * tm_format_datetime - convert date and time to textual representation
449 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
450 * @t: time
451 *
fd91ae33
OZ
452 * This function formats the given relative time value @t to a textual
453 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
525fa2c1 454 */
7a88832e 455void
c37e7851 456tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
7a88832e 457{
c37e7851 458 const char *fmt_used;
7a88832e 459 struct tm *tm;
fd91ae33
OZ
460 bird_clock_t delta = now - t;
461 t = now_real - delta;
7a88832e 462 tm = localtime(&t);
7a88832e 463
c37e7851
OZ
464 if (fmt_spec->fmt1 == NULL)
465 return tm_format_reltime(x, tm, delta);
afa8937a 466
c37e7851
OZ
467 if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
468 fmt_used = fmt_spec->fmt1;
afa8937a 469 else
c37e7851
OZ
470 fmt_used = fmt_spec->fmt2;
471
472 int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
473 if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
474 strcpy(x, "<too-long>");
afa8937a
MM
475}
476
05476c4d 477
525fa2c1
MM
478/**
479 * DOC: Sockets
480 *
481 * Socket resources represent network connections. Their data structure (&socket)
482 * contains a lot of fields defining the exact type of the socket, the local and
483 * remote addresses and ports, pointers to socket buffers and finally pointers to
484 * hook functions to be called when new data have arrived to the receive buffer
485 * (@rx_hook), when the contents of the transmit buffer have been transmitted
486 * (@tx_hook) and when an error or connection close occurs (@err_hook).
487 *
38a608c5 488 * Freeing of sockets from inside socket hooks is perfectly safe.
b5d9ee5c
MM
489 */
490
abae6e9c
MM
491#ifndef SOL_IP
492#define SOL_IP IPPROTO_IP
493#endif
494
b1a1faba
OF
495#ifndef SOL_IPV6
496#define SOL_IPV6 IPPROTO_IPV6
497#endif
498
48e5f32d
OZ
499#ifndef SOL_ICMPV6
500#define SOL_ICMPV6 IPPROTO_ICMPV6
501#endif
502
503
05476c4d
OZ
504/*
505 * Sockaddr helper functions
506 */
38a608c5 507
05476c4d
OZ
508static inline int sockaddr_length(int af)
509{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
510
511static inline void
512sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port)
38a608c5 513{
05476c4d
OZ
514 memset(sa, 0, sizeof(struct sockaddr_in));
515#ifdef HAVE_SIN_LEN
516 sa->sin_len = sizeof(struct sockaddr_in);
517#endif
518 sa->sin_family = AF_INET;
519 sa->sin_port = htons(port);
520 sa->sin_addr = ipa_to_in4(a);
38a608c5 521}
b5d9ee5c 522
05476c4d
OZ
523static inline void
524sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
b5d9ee5c 525{
05476c4d
OZ
526 memset(sa, 0, sizeof(struct sockaddr_in6));
527#ifdef SIN6_LEN
528 sa->sin6_len = sizeof(struct sockaddr_in6);
529#endif
530 sa->sin6_family = AF_INET6;
531 sa->sin6_port = htons(port);
532 sa->sin6_flowinfo = 0;
533 sa->sin6_addr = ipa_to_in6(a);
534
535 if (ifa && ipa_is_link_local(a))
536 sa->sin6_scope_id = ifa->index;
4da25acb 537}
b5d9ee5c 538
05476c4d
OZ
539void
540sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
4da25acb 541{
05476c4d
OZ
542 if (af == AF_INET)
543 sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
544 else if (af == AF_INET6)
545 sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
546 else
547 bug("Unknown AF");
4da25acb
MM
548}
549
05476c4d
OZ
550static inline void
551sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port)
4da25acb 552{
05476c4d
OZ
553 *port = ntohs(sa->sin_port);
554 *a = ipa_from_in4(sa->sin_addr);
b5d9ee5c
MM
555}
556
05476c4d
OZ
557static inline void
558sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
48e5f32d 559{
05476c4d
OZ
560 *port = ntohs(sa->sin6_port);
561 *a = ipa_from_in6(sa->sin6_addr);
48e5f32d 562
05476c4d
OZ
563 if (ifa && ipa_is_link_local(*a))
564 *ifa = if_find_by_index(sa->sin6_scope_id);
48e5f32d
OZ
565}
566
05476c4d
OZ
567int
568sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
48e5f32d 569{
05476c4d
OZ
570 if (sa->sa.sa_family != af)
571 goto fail;
48e5f32d 572
05476c4d
OZ
573 if (af == AF_INET)
574 sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
575 else if (af == AF_INET6)
576 sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
577 else
578 goto fail;
48e5f32d 579
05476c4d 580 return 0;
48e5f32d 581
05476c4d
OZ
582 fail:
583 *a = IPA_NONE;
584 *port = 0;
585 return -1;
48e5f32d
OZ
586}
587
48e5f32d 588
05476c4d
OZ
589/*
590 * IPv6 multicast syscalls
591 */
4da25acb 592
05476c4d 593/* Fortunately standardized in RFC 3493 */
b5d9ee5c 594
05476c4d
OZ
595#define INIT_MREQ6(maddr,ifa) \
596 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
b5d9ee5c 597
05476c4d
OZ
598static inline int
599sk_setup_multicast6(sock *s)
b5d9ee5c 600{
05476c4d
OZ
601 int index = s->iface->index;
602 int ttl = s->ttl;
603 int n = 0;
b5d9ee5c 604
05476c4d
OZ
605 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
606 ERR("IPV6_MULTICAST_IF");
b5d9ee5c 607
05476c4d
OZ
608 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
609 ERR("IPV6_MULTICAST_HOPS");
4f22c981 610
05476c4d
OZ
611 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
612 ERR("IPV6_MULTICAST_LOOP");
4f22c981 613
05476c4d 614 return 0;
061ab802
OZ
615}
616
05476c4d
OZ
617static inline int
618sk_join_group6(sock *s, ip_addr maddr)
4f22c981 619{
05476c4d 620 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
eb1451a3 621
05476c4d
OZ
622 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
623 ERR("IPV6_JOIN_GROUP");
4f22c981 624
05476c4d 625 return 0;
b5d9ee5c
MM
626}
627
05476c4d
OZ
628static inline int
629sk_leave_group6(sock *s, ip_addr maddr)
b5d9ee5c 630{
05476c4d 631 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
b5d9ee5c 632
05476c4d
OZ
633 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
634 ERR("IPV6_LEAVE_GROUP");
635
636 return 0;
637}
4f22c981 638
bed41728 639
05476c4d
OZ
640/*
641 * IPv6 packet control messages
642 */
bed41728 643
05476c4d 644/* Also standardized, in RFC 3542 */
bed41728 645
dcc60494
OZ
646/*
647 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
648 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
649 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
650 * RFC and we use IPV6_PKTINFO.
651 */
652#ifndef IPV6_RECVPKTINFO
653#define IPV6_RECVPKTINFO IPV6_PKTINFO
654#endif
70e212f9
OZ
655/*
656 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
657 */
658#ifndef IPV6_RECVHOPLIMIT
659#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
660#endif
dcc60494 661
70e212f9 662
05476c4d
OZ
663#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
664#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
bed41728 665
05476c4d
OZ
666static inline int
667sk_request_cmsg6_pktinfo(sock *s)
668{
669 int y = 1;
70e212f9 670
05476c4d
OZ
671 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
672 ERR("IPV6_RECVPKTINFO");
673
674 return 0;
bed41728
OZ
675}
676
05476c4d
OZ
677static inline int
678sk_request_cmsg6_ttl(sock *s)
bed41728 679{
05476c4d 680 int y = 1;
bed41728 681
05476c4d
OZ
682 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
683 ERR("IPV6_RECVHOPLIMIT");
70e212f9 684
05476c4d
OZ
685 return 0;
686}
70e212f9 687
05476c4d
OZ
688static inline void
689sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
690{
691 if (cm->cmsg_type == IPV6_PKTINFO)
70e212f9 692 {
05476c4d
OZ
693 struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
694 s->laddr = ipa_from_in6(pi->ipi6_addr);
695 s->lifindex = pi->ipi6_ifindex;
70e212f9 696 }
05476c4d 697}
70e212f9 698
05476c4d
OZ
699static inline void
700sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
701{
702 if (cm->cmsg_type == IPV6_HOPLIMIT)
703 s->rcv_ttl = * (int *) CMSG_DATA(cm);
bed41728
OZ
704}
705
05476c4d
OZ
706static inline void
707sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
bed41728
OZ
708{
709 struct cmsghdr *cm;
710 struct in6_pktinfo *pi;
8945f73d 711 int controllen = 0;
bed41728 712
bed41728
OZ
713 msg->msg_control = cbuf;
714 msg->msg_controllen = cbuflen;
715
716 cm = CMSG_FIRSTHDR(msg);
48e5f32d 717 cm->cmsg_level = SOL_IPV6;
bed41728
OZ
718 cm->cmsg_type = IPV6_PKTINFO;
719 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
8945f73d 720 controllen += CMSG_SPACE(sizeof(*pi));
bed41728
OZ
721
722 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
bed41728 723 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
05476c4d 724 pi->ipi6_addr = ipa_to_in6(s->saddr);
bed41728 725
8945f73d 726 msg->msg_controllen = controllen;
bed41728 727}
48e5f32d 728
bed41728 729
05476c4d
OZ
730/*
731 * Miscellaneous socket syscalls
732 */
733
734static inline int
735sk_set_ttl4(sock *s, int ttl)
a39b165e 736{
05476c4d
OZ
737 if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
738 ERR("IP_TTL");
739
740 return 0;
a39b165e
OZ
741}
742
05476c4d
OZ
743static inline int
744sk_set_ttl6(sock *s, int ttl)
745{
746 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
747 ERR("IPV6_UNICAST_HOPS");
38a608c5 748
05476c4d
OZ
749 return 0;
750}
751
752static inline int
753sk_set_tos4(sock *s, int tos)
b5d9ee5c 754{
05476c4d
OZ
755 if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
756 ERR("IP_TOS");
b5d9ee5c 757
05476c4d
OZ
758 return 0;
759}
ef4a50be 760
05476c4d
OZ
761static inline int
762sk_set_tos6(sock *s, int tos)
763{
764 if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
765 ERR("IPV6_TCLASS");
48e5f32d 766
05476c4d
OZ
767 return 0;
768}
48e5f32d 769
88a183c6
OZ
770static inline byte *
771sk_skip_ip_header(byte *pkt, int *len)
772{
773 if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
774 return NULL;
775
776 int hlen = (*pkt & 0x0f) * 4;
777 if ((hlen < 20) || (hlen > *len))
778 return NULL;
779
780 *len -= hlen;
781 return pkt + hlen;
782}
783
784byte *
785sk_rx_buffer(sock *s, int *len)
786{
787 if (sk_is_ipv4(s) && (s->type == SK_IP))
788 return sk_skip_ip_header(s->rbuf, len);
789 else
790 return s->rbuf;
791}
792
48e5f32d 793
05476c4d
OZ
794/*
795 * Public socket functions
796 */
48e5f32d 797
05476c4d
OZ
798/**
799 * sk_setup_multicast - enable multicast for given socket
800 * @s: socket
801 *
802 * Prepare transmission of multicast packets for given datagram socket.
803 * The socket must have defined @iface.
804 *
805 * Result: 0 for success, -1 for an error.
806 */
48e5f32d 807
05476c4d
OZ
808int
809sk_setup_multicast(sock *s)
810{
811 ASSERT(s->iface);
48e5f32d 812
05476c4d
OZ
813 if (sk_is_ipv4(s))
814 return sk_setup_multicast4(s);
815 else
816 return sk_setup_multicast6(s);
817}
48e5f32d 818
05476c4d
OZ
819/**
820 * sk_join_group - join multicast group for given socket
821 * @s: socket
822 * @maddr: multicast address
823 *
824 * Join multicast group for given datagram socket and associated interface.
825 * The socket must have defined @iface.
826 *
827 * Result: 0 for success, -1 for an error.
828 */
789772ed 829
05476c4d
OZ
830int
831sk_join_group(sock *s, ip_addr maddr)
832{
833 if (sk_is_ipv4(s))
834 return sk_join_group4(s, maddr);
835 else
836 return sk_join_group6(s, maddr);
837}
ef4a50be 838
05476c4d
OZ
839/**
840 * sk_leave_group - leave multicast group for given socket
841 * @s: socket
842 * @maddr: multicast address
843 *
844 * Leave multicast group for given datagram socket and associated interface.
845 * The socket must have defined @iface.
846 *
847 * Result: 0 for success, -1 for an error.
848 */
789772ed 849
05476c4d
OZ
850int
851sk_leave_group(sock *s, ip_addr maddr)
852{
853 if (sk_is_ipv4(s))
854 return sk_leave_group4(s, maddr);
855 else
856 return sk_leave_group6(s, maddr);
b5d9ee5c
MM
857}
858
a39b165e 859/**
05476c4d
OZ
860 * sk_setup_broadcast - enable broadcast for given socket
861 * @s: socket
862 *
863 * Allow reception and transmission of broadcast packets for given datagram
864 * socket. The socket must have defined @iface. For transmission, packets should
865 * be send to @brd address of @iface.
866 *
867 * Result: 0 for success, -1 for an error.
868 */
869
870int
871sk_setup_broadcast(sock *s)
872{
873 int y = 1;
874
875 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
876 ERR("SO_BROADCAST");
877
878 return 0;
879}
880
881/**
882 * sk_set_ttl - set transmit TTL for given socket
a39b165e
OZ
883 * @s: socket
884 * @ttl: TTL value
885 *
05476c4d
OZ
886 * Set TTL for already opened connections when TTL was not set before. Useful
887 * for accepted connections when different ones should have different TTL.
a39b165e
OZ
888 *
889 * Result: 0 for success, -1 for an error.
890 */
891
892int
893sk_set_ttl(sock *s, int ttl)
894{
a39b165e 895 s->ttl = ttl;
a39b165e 896
05476c4d
OZ
897 if (sk_is_ipv4(s))
898 return sk_set_ttl4(s, ttl);
899 else
900 return sk_set_ttl6(s, ttl);
a39b165e
OZ
901}
902
b1b19433 903/**
05476c4d 904 * sk_set_min_ttl - set minimal accepted TTL for given socket
b1b19433
OZ
905 * @s: socket
906 * @ttl: TTL value
907 *
05476c4d
OZ
908 * Set minimal accepted TTL for given socket. Can be used for TTL security.
909 * implementations.
b1b19433
OZ
910 *
911 * Result: 0 for success, -1 for an error.
912 */
913
914int
915sk_set_min_ttl(sock *s, int ttl)
916{
05476c4d
OZ
917 if (sk_is_ipv4(s))
918 return sk_set_min_ttl4(s, ttl);
919 else
920 return sk_set_min_ttl6(s, ttl);
b1b19433 921}
d51aa281 922
05476c4d 923#if 0
d51aa281 924/**
05476c4d 925 * sk_set_md5_auth - add / remove MD5 security association for given socket
d51aa281
OZ
926 * @s: socket
927 * @a: IP address of the other side
eb1451a3 928 * @ifa: Interface for link-local IP address
d51aa281
OZ
929 * @passwd: password used for MD5 authentication
930 *
05476c4d
OZ
931 * In TCP MD5 handling code in kernel, there is a set of pairs (address,
932 * password) used to choose password according to address of the other side.
933 * This function is useful for listening socket, for active sockets it is enough
934 * to set s->password field.
d51aa281
OZ
935 *
936 * When called with passwd != NULL, the new pair is added,
937 * When called with passwd == NULL, the existing pair is removed.
938 *
939 * Result: 0 for success, -1 for an error.
940 */
941
942int
eb1451a3 943sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd)
05476c4d
OZ
944{ DUMMY; }
945#endif
f9c799a0 946
05476c4d
OZ
947/**
948 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
949 * @s: socket
950 * @offset: offset
951 *
952 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
953 * kernel will automatically fill it for outgoing packets and check it for
954 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
955 * known to the kernel.
956 *
957 * Result: 0 for success, -1 for an error.
958 */
f9c799a0 959
4ac7c834
OZ
960int
961sk_set_ipv6_checksum(sock *s, int offset)
962{
48e5f32d 963 if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
05476c4d 964 ERR("IPV6_CHECKSUM");
4ac7c834
OZ
965
966 return 0;
967}
968
93e868c7 969int
05476c4d 970sk_set_icmp6_filter(sock *s, int p1, int p2)
93e868c7
OZ
971{
972 /* a bit of lame interface, but it is here only for Radv */
973 struct icmp6_filter f;
974
975 ICMP6_FILTER_SETBLOCKALL(&f);
976 ICMP6_FILTER_SETPASS(p1, &f);
977 ICMP6_FILTER_SETPASS(p2, &f);
978
48e5f32d 979 if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
05476c4d 980 ERR("ICMP6_FILTER");
93e868c7
OZ
981
982 return 0;
983}
984
05476c4d
OZ
985void
986sk_log_error(sock *s, const char *p)
987{
988 log(L_ERR "%s: Socket error: %s%#m", p, s->err);
989}
990
991
992/*
993 * Actual struct birdsock code
994 */
995
996static list sock_list;
997static struct birdsock *current_sock;
998static struct birdsock *stored_sock;
999static int sock_recalc_fdsets_p;
1000
1001static inline sock *
1002sk_next(sock *s)
1003{
1004 if (!s->n.next->next)
1005 return NULL;
1006 else
1007 return SKIP_BACK(sock, n, s->n.next);
1008}
1009
1010static void
1011sk_alloc_bufs(sock *s)
1012{
1013 if (!s->rbuf && s->rbsize)
1014 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
1015 s->rpos = s->rbuf;
1016 if (!s->tbuf && s->tbsize)
1017 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
1018 s->tpos = s->ttx = s->tbuf;
1019}
1020
1021static void
1022sk_free_bufs(sock *s)
1023{
1024 if (s->rbuf_alloc)
1025 {
1026 xfree(s->rbuf_alloc);
1027 s->rbuf = s->rbuf_alloc = NULL;
1028 }
1029 if (s->tbuf_alloc)
1030 {
1031 xfree(s->tbuf_alloc);
1032 s->tbuf = s->tbuf_alloc = NULL;
1033 }
1034}
1035
1036static void
1037sk_free(resource *r)
1038{
1039 sock *s = (sock *) r;
1040
1041 sk_free_bufs(s);
1042 if (s->fd >= 0)
1043 {
1044 close(s->fd);
1045
1046 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1047 if (s->flags & SKF_THREAD)
1048 return;
1049
1050 if (s == current_sock)
1051 current_sock = sk_next(s);
1052 if (s == stored_sock)
1053 stored_sock = sk_next(s);
1054 rem_node(&s->n);
1055 sock_recalc_fdsets_p = 1;
1056 }
1057}
1058
1059void
1060sk_set_rbsize(sock *s, uint val)
1061{
1062 ASSERT(s->rbuf_alloc == s->rbuf);
1063
1064 if (s->rbsize == val)
1065 return;
1066
1067 s->rbsize = val;
1068 xfree(s->rbuf_alloc);
1069 s->rbuf_alloc = xmalloc(val);
1070 s->rpos = s->rbuf = s->rbuf_alloc;
1071}
1072
1073void
1074sk_set_tbsize(sock *s, uint val)
1075{
1076 ASSERT(s->tbuf_alloc == s->tbuf);
1077
1078 if (s->tbsize == val)
1079 return;
1080
1081 byte *old_tbuf = s->tbuf;
1082
1083 s->tbsize = val;
1084 s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1085 s->tpos = s->tbuf + (s->tpos - old_tbuf);
1086 s->ttx = s->tbuf + (s->ttx - old_tbuf);
1087}
1088
1089void
1090sk_set_tbuf(sock *s, void *tbuf)
1091{
1092 s->tbuf = tbuf ?: s->tbuf_alloc;
1093 s->ttx = s->tpos = s->tbuf;
1094}
1095
1096void
1097sk_reallocate(sock *s)
1098{
1099 sk_free_bufs(s);
1100 sk_alloc_bufs(s);
1101}
1102
1103static void
1104sk_dump(resource *r)
1105{
1106 sock *s = (sock *) r;
1107 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1108
af454f9b 1109 debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
05476c4d
OZ
1110 sk_type_names[s->type],
1111 s->data,
1112 s->saddr,
1113 s->sport,
1114 s->daddr,
1115 s->dport,
1116 s->tos,
1117 s->ttl,
1118 s->iface ? s->iface->name : "none");
1119}
1120
1121static struct resclass sk_class = {
1122 "Socket",
1123 sizeof(sock),
1124 sk_free,
1125 sk_dump,
1126 NULL,
1127 NULL
1128};
1129
1130/**
1131 * sk_new - create a socket
1132 * @p: pool
1133 *
1134 * This function creates a new socket resource. If you want to use it,
1135 * you need to fill in all the required fields of the structure and
1136 * call sk_open() to do the actual opening of the socket.
1137 *
1138 * The real function name is sock_new(), sk_new() is a macro wrapper
1139 * to avoid collision with OpenSSL.
1140 */
1141sock *
1142sock_new(pool *p)
1143{
1144 sock *s = ralloc(p, &sk_class);
1145 s->pool = p;
1146 // s->saddr = s->daddr = IPA_NONE;
1147 s->tos = s->priority = s->ttl = -1;
1148 s->fd = -1;
1149 return s;
1150}
1151
1152static int
1153sk_setup(sock *s)
f9c799a0 1154{
05476c4d
OZ
1155 int y = 1;
1156 int fd = s->fd;
f9c799a0 1157
05476c4d
OZ
1158 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1159 ERR("O_NONBLOCK");
f9c799a0 1160
05476c4d
OZ
1161 if (!s->af)
1162 return 0;
f9c799a0 1163
05476c4d
OZ
1164 if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1165 s->flags |= SKF_PKTINFO;
f9c799a0 1166
05476c4d
OZ
1167#ifdef CONFIG_USE_HDRINCL
1168 if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1169 {
1170 s->flags &= ~SKF_PKTINFO;
1171 s->flags |= SKF_HDRINCL;
1172 if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1173 ERR("IP_HDRINCL");
1174 }
48e5f32d
OZ
1175#endif
1176
05476c4d
OZ
1177 if (s->iface)
1178 {
1179#ifdef SO_BINDTODEVICE
1180 struct ifreq ifr;
1181 strcpy(ifr.ifr_name, s->iface->name);
1182 if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1183 ERR("SO_BINDTODEVICE");
1184#endif
f1aceff5 1185
05476c4d
OZ
1186#ifdef CONFIG_UNIX_DONTROUTE
1187 if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1188 ERR("SO_DONTROUTE");
1189#endif
1190 }
f9c799a0 1191
05476c4d
OZ
1192 if (s->priority >= 0)
1193 if (sk_set_priority(s, s->priority) < 0)
f9c799a0 1194 return -1;
f9c799a0 1195
05476c4d
OZ
1196 if (sk_is_ipv4(s))
1197 {
1198 if (s->flags & SKF_LADDR_RX)
1199 if (sk_request_cmsg4_pktinfo(s) < 0)
1200 return -1;
f9c799a0 1201
05476c4d
OZ
1202 if (s->flags & SKF_TTL_RX)
1203 if (sk_request_cmsg4_ttl(s) < 0)
1204 return -1;
f9c799a0 1205
05476c4d
OZ
1206 if ((s->type == SK_UDP) || (s->type == SK_IP))
1207 if (sk_disable_mtu_disc4(s) < 0)
1208 return -1;
f9c799a0 1209
05476c4d
OZ
1210 if (s->ttl >= 0)
1211 if (sk_set_ttl4(s, s->ttl) < 0)
1212 return -1;
f9c799a0 1213
05476c4d
OZ
1214 if (s->tos >= 0)
1215 if (sk_set_tos4(s, s->tos) < 0)
1216 return -1;
1217 }
f9c799a0 1218
05476c4d
OZ
1219 if (sk_is_ipv6(s))
1220 {
1221 if (s->flags & SKF_V6ONLY)
1222 if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1223 ERR("IPV6_V6ONLY");
f9c799a0 1224
05476c4d
OZ
1225 if (s->flags & SKF_LADDR_RX)
1226 if (sk_request_cmsg6_pktinfo(s) < 0)
1227 return -1;
f9c799a0 1228
05476c4d
OZ
1229 if (s->flags & SKF_TTL_RX)
1230 if (sk_request_cmsg6_ttl(s) < 0)
1231 return -1;
f9c799a0 1232
05476c4d
OZ
1233 if ((s->type == SK_UDP) || (s->type == SK_IP))
1234 if (sk_disable_mtu_disc6(s) < 0)
1235 return -1;
f9c799a0 1236
05476c4d
OZ
1237 if (s->ttl >= 0)
1238 if (sk_set_ttl6(s, s->ttl) < 0)
1239 return -1;
f9c799a0 1240
05476c4d
OZ
1241 if (s->tos >= 0)
1242 if (sk_set_tos6(s, s->tos) < 0)
1243 return -1;
1244 }
f9c799a0
OZ
1245
1246 return 0;
1247}
1248
05476c4d
OZ
1249static void
1250sk_insert(sock *s)
f9c799a0 1251{
05476c4d
OZ
1252 add_tail(&sock_list, &s->n);
1253 sock_recalc_fdsets_p = 1;
f9c799a0
OZ
1254}
1255
b93abffa 1256static void
b5d9ee5c
MM
1257sk_tcp_connected(sock *s)
1258{
05476c4d
OZ
1259 sockaddr sa;
1260 int sa_len = sizeof(sa);
1261
1262 if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1263 (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1264 log(L_WARN "SOCK: Cannot get local IP address for TCP>");
9be9a264 1265
b5d9ee5c
MM
1266 s->type = SK_TCP;
1267 sk_alloc_bufs(s);
320f4173 1268 s->tx_hook(s);
b5d9ee5c
MM
1269}
1270
b93abffa 1271static int
05476c4d 1272sk_passive_connected(sock *s, int type)
b93abffa 1273{
05476c4d
OZ
1274 sockaddr loc_sa, rem_sa;
1275 int loc_sa_len = sizeof(loc_sa);
1276 int rem_sa_len = sizeof(rem_sa);
cf31112f 1277
05476c4d
OZ
1278 int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1279 if (fd < 0)
1280 {
1281 if ((errno != EINTR) && (errno != EAGAIN))
c025b852 1282 s->err_hook(s, errno);
05476c4d
OZ
1283 return 0;
1284 }
1285
1286 sock *t = sk_new(s->pool);
1287 t->type = type;
1288 t->fd = fd;
1289 t->af = s->af;
1290 t->ttl = s->ttl;
1291 t->tos = s->tos;
1292 t->rbsize = s->rbsize;
1293 t->tbsize = s->tbsize;
1294
1295 if (type == SK_TCP)
1296 {
1297 if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1298 (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1299 log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1300
1301 if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1302 log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1303 }
1304
1305 if (sk_setup(t) < 0)
1306 {
1307 /* FIXME: Call err_hook instead ? */
1308 log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1309
1310 /* FIXME: handle it better in rfree() */
9c89560e 1311 close(t->fd);
05476c4d
OZ
1312 t->fd = -1;
1313 rfree(t);
1314 return 1;
1315 }
1316
1317 sk_insert(t);
1318 sk_alloc_bufs(t);
1319 s->rx_hook(t, 0);
1320 return 1;
b93abffa
MM
1321}
1322
525fa2c1
MM
1323/**
1324 * sk_open - open a socket
1325 * @s: socket
1326 *
1327 * This function takes a socket resource created by sk_new() and
1328 * initialized by the user and binds a corresponding network connection
1329 * to it.
1330 *
1331 * Result: 0 for success, -1 for an error.
1332 */
b5d9ee5c
MM
1333int
1334sk_open(sock *s)
1335{
05476c4d
OZ
1336 int af = BIRD_AF;
1337 int fd = -1;
48e5f32d
OZ
1338 int do_bind = 0;
1339 int bind_port = 0;
1340 ip_addr bind_addr = IPA_NONE;
1341 sockaddr sa;
b5d9ee5c 1342
48e5f32d 1343 switch (s->type)
05476c4d
OZ
1344 {
1345 case SK_TCP_ACTIVE:
1346 s->ttx = ""; /* Force s->ttx != s->tpos */
1347 /* Fall thru */
1348 case SK_TCP_PASSIVE:
1349 fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1350 bind_port = s->sport;
1351 bind_addr = s->saddr;
1352 do_bind = bind_port || ipa_nonzero(bind_addr);
1353 break;
9c89560e 1354
05476c4d
OZ
1355 case SK_UDP:
1356 fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1357 bind_port = s->sport;
1358 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1359 do_bind = 1;
1360 break;
1361
1362 case SK_IP:
1363 fd = socket(af, SOCK_RAW, s->dport);
1364 bind_port = 0;
1365 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1366 do_bind = ipa_nonzero(bind_addr);
1367 break;
1368
1369 case SK_MAGIC:
1370 af = 0;
1371 fd = s->fd;
1372 break;
1373
1374 default:
1375 bug("sk_open() called for invalid sock type %d", s->type);
1376 }
1377
b5d9ee5c 1378 if (fd < 0)
05476c4d
OZ
1379 ERR("socket");
1380
1381 s->af = af;
b5d9ee5c
MM
1382 s->fd = fd;
1383
05476c4d
OZ
1384 if (sk_setup(s) < 0)
1385 goto err;
38a608c5 1386
48e5f32d 1387 if (do_bind)
05476c4d
OZ
1388 {
1389 if (bind_port)
b5d9ee5c 1390 {
05476c4d
OZ
1391 int y = 1;
1392
1393 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1394 ERR2("SO_REUSEADDR");
48e5f32d 1395
8931425d 1396#ifdef CONFIG_NO_IFACE_BIND
05476c4d
OZ
1397 /* Workaround missing ability to bind to an iface */
1398 if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1399 {
1400 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1401 ERR2("SO_REUSEPORT");
1402 }
8931425d 1403#endif
b5d9ee5c 1404 }
9c89560e
OZ
1405#ifdef IP_PORTRANGE
1406 else if (s->flags & SKF_HIGH_PORT)
1407 {
1408 int range = IP_PORTRANGE_HIGH;
1409 if (setsockopt(fd, IPPROTO_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
1410 log(L_WARN "Socket error: %s%#m", "IP_PORTRANGE");
1411 }
1412#endif
48e5f32d 1413
05476c4d
OZ
1414 sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
1415 if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1416 ERR2("bind");
1417 }
d51aa281
OZ
1418
1419 if (s->password)
05476c4d
OZ
1420 if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0)
1421 goto err;
d51aa281 1422
48e5f32d 1423 switch (s->type)
05476c4d
OZ
1424 {
1425 case SK_TCP_ACTIVE:
1426 sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
1427 if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1428 sk_tcp_connected(s);
1429 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1430 errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1431 ERR2("connect");
1432 break;
1433
1434 case SK_TCP_PASSIVE:
1435 if (listen(fd, 8) < 0)
1436 ERR2("listen");
1437 break;
1438
1439 case SK_MAGIC:
1440 break;
1441
1442 default:
1443 sk_alloc_bufs(s);
1444 }
b5d9ee5c 1445
bf139664
OZ
1446 if (!(s->flags & SKF_THREAD))
1447 sk_insert(s);
b5d9ee5c
MM
1448 return 0;
1449
05476c4d 1450err:
b5d9ee5c
MM
1451 close(fd);
1452 s->fd = -1;
1453 return -1;
1454}
1455
05476c4d 1456int
b93abffa
MM
1457sk_open_unix(sock *s, char *name)
1458{
b93abffa 1459 struct sockaddr_un sa;
05476c4d
OZ
1460 int fd;
1461
1462 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
b93abffa
MM
1463
1464 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1465 if (fd < 0)
05476c4d
OZ
1466 return -1;
1467
1468 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1469 return -1;
68fa95cf 1470
97e46d28 1471 /* Path length checked in test_old_bird() */
b93abffa 1472 sa.sun_family = AF_UNIX;
97c6fa02 1473 strcpy(sa.sun_path, name);
05476c4d 1474
0b3bf4b1 1475 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
05476c4d
OZ
1476 return -1;
1477
1478 if (listen(fd, 8) < 0)
1479 return -1;
1480
1481 s->fd = fd;
38a608c5 1482 sk_insert(s);
05476c4d
OZ
1483 return 0;
1484}
1485
1486
1487#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1488 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1489#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1490
1491static void
1492sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1493{
1494 if (sk_is_ipv4(s))
1495 sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1496 else
1497 sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1498}
1499
1500static void
1501sk_process_cmsgs(sock *s, struct msghdr *msg)
1502{
1503 struct cmsghdr *cm;
1504
1505 s->laddr = IPA_NONE;
1506 s->lifindex = 0;
1507 s->rcv_ttl = -1;
1508
1509 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1510 {
1511 if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1512 {
1513 sk_process_cmsg4_pktinfo(s, cm);
1514 sk_process_cmsg4_ttl(s, cm);
1515 }
b93abffa 1516
05476c4d
OZ
1517 if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1518 {
1519 sk_process_cmsg6_pktinfo(s, cm);
1520 sk_process_cmsg6_ttl(s, cm);
1521 }
1522 }
b93abffa
MM
1523}
1524
48e5f32d
OZ
1525
1526static inline int
1527sk_sendmsg(sock *s)
1528{
1529 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1530 byte cmsg_buf[CMSG_TX_SPACE];
1531 sockaddr dst;
1532
05476c4d 1533 sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
48e5f32d
OZ
1534
1535 struct msghdr msg = {
05476c4d
OZ
1536 .msg_name = &dst.sa,
1537 .msg_namelen = SA_LEN(dst),
48e5f32d
OZ
1538 .msg_iov = &iov,
1539 .msg_iovlen = 1
1540 };
1541
1542#ifdef CONFIG_USE_HDRINCL
1543 byte hdr[20];
1544 struct iovec iov2[2] = { {hdr, 20}, iov };
1545
1546 if (s->flags & SKF_HDRINCL)
1547 {
05476c4d 1548 sk_prepare_ip_header(s, hdr, iov.iov_len);
48e5f32d
OZ
1549 msg.msg_iov = iov2;
1550 msg.msg_iovlen = 2;
1551 }
1552#endif
1553
1554 if (s->flags & SKF_PKTINFO)
05476c4d 1555 sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
48e5f32d
OZ
1556
1557 return sendmsg(s->fd, &msg, 0);
1558}
1559
1560static inline int
1561sk_recvmsg(sock *s)
1562{
1563 struct iovec iov = {s->rbuf, s->rbsize};
1564 byte cmsg_buf[CMSG_RX_SPACE];
1565 sockaddr src;
1566
1567 struct msghdr msg = {
05476c4d
OZ
1568 .msg_name = &src.sa,
1569 .msg_namelen = sizeof(src), // XXXX ??
48e5f32d
OZ
1570 .msg_iov = &iov,
1571 .msg_iovlen = 1,
1572 .msg_control = cmsg_buf,
1573 .msg_controllen = sizeof(cmsg_buf),
1574 .msg_flags = 0
1575 };
1576
1577 int rv = recvmsg(s->fd, &msg, 0);
1578 if (rv < 0)
1579 return rv;
1580
1581 //ifdef IPV4
1582 // if (cf_type == SK_IP)
1583 // rv = ipv4_skip_header(pbuf, rv);
1584 //endif
1585
05476c4d
OZ
1586 sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1587 sk_process_cmsgs(s, &msg);
48e5f32d
OZ
1588
1589 if (msg.msg_flags & MSG_TRUNC)
1590 s->flags |= SKF_TRUNCATED;
1591 else
1592 s->flags &= ~SKF_TRUNCATED;
1593
1594 return rv;
1595}
1596
1597
353729f5
OZ
1598static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1599
b5d9ee5c
MM
1600static int
1601sk_maybe_write(sock *s)
1602{
1603 int e;
1604
1605 switch (s->type)
05476c4d
OZ
1606 {
1607 case SK_TCP:
1608 case SK_MAGIC:
1609 case SK_UNIX:
1610 while (s->ttx != s->tpos)
b5d9ee5c 1611 {
05476c4d
OZ
1612 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1613
1614 if (e < 0)
1615 {
1616 if (errno != EINTR && errno != EAGAIN)
b5d9ee5c 1617 {
05476c4d
OZ
1618 reset_tx_buffer(s);
1619 /* EPIPE is just a connection close notification during TX */
1620 s->err_hook(s, (errno != EPIPE) ? errno : 0);
1621 return -1;
b5d9ee5c 1622 }
05476c4d
OZ
1623 return 0;
1624 }
1625 s->ttx += e;
1626 }
1627 reset_tx_buffer(s);
1628 return 1;
1629
1630 case SK_UDP:
1631 case SK_IP:
1632 {
1633 if (s->tbuf == s->tpos)
b5d9ee5c 1634 return 1;
05476c4d
OZ
1635
1636 e = sk_sendmsg(s);
1637
1638 if (e < 0)
1639 {
1640 if (errno != EINTR && errno != EAGAIN)
1641 {
1642 reset_tx_buffer(s);
1643 s->err_hook(s, errno);
1644 return -1;
1645 }
1646
1647 if (!s->tx_hook)
1648 reset_tx_buffer(s);
1649 return 0;
b5d9ee5c 1650 }
05476c4d
OZ
1651 reset_tx_buffer(s);
1652 return 1;
b5d9ee5c 1653 }
05476c4d
OZ
1654 default:
1655 bug("sk_maybe_write: unknown socket type %d", s->type);
1656 }
b5d9ee5c
MM
1657}
1658
ea89da38
OZ
1659int
1660sk_rx_ready(sock *s)
1661{
1662 fd_set rd, wr;
1663 struct timeval timo;
1664 int rv;
1665
1666 FD_ZERO(&rd);
1667 FD_ZERO(&wr);
1668 FD_SET(s->fd, &rd);
1669
1670 timo.tv_sec = 0;
1671 timo.tv_usec = 0;
1672
1673 redo:
1674 rv = select(s->fd+1, &rd, &wr, NULL, &timo);
9c89560e 1675
ea89da38
OZ
1676 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1677 goto redo;
1678
1679 return rv;
1680}
1681
525fa2c1
MM
1682/**
1683 * sk_send - send data to a socket
1684 * @s: socket
1685 * @len: number of bytes to send
1686 *
1687 * This function sends @len bytes of data prepared in the
1688 * transmit buffer of the socket @s to the network connection.
1689 * If the packet can be sent immediately, it does so and returns
1690 * 1, else it queues the packet for later processing, returns 0
1691 * and calls the @tx_hook of the socket when the tranmission
1692 * takes place.
1693 */
b5d9ee5c
MM
1694int
1695sk_send(sock *s, unsigned len)
1696{
b5d9ee5c
MM
1697 s->ttx = s->tbuf;
1698 s->tpos = s->tbuf + len;
1699 return sk_maybe_write(s);
1700}
1701
525fa2c1
MM
1702/**
1703 * sk_send_to - send data to a specific destination
1704 * @s: socket
1705 * @len: number of bytes to send
1706 * @addr: IP address to send the packet to
1707 * @port: port to send the packet to
1708 *
2e9b2421 1709 * This is a sk_send() replacement for connection-less packet sockets
525fa2c1 1710 * which allows destination of the packet to be chosen dynamically.
48e5f32d 1711 * Raw IP sockets should use 0 for @port.
525fa2c1 1712 */
b5d9ee5c
MM
1713int
1714sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1715{
353729f5 1716 s->daddr = addr;
48e5f32d
OZ
1717 if (port)
1718 s->dport = port;
1719
b5d9ee5c
MM
1720 s->ttx = s->tbuf;
1721 s->tpos = s->tbuf + len;
1722 return sk_maybe_write(s);
1723}
1724
353729f5
OZ
1725/*
1726int
1727sk_send_full(sock *s, unsigned len, struct iface *ifa,
1728 ip_addr saddr, ip_addr daddr, unsigned dport)
1729{
1730 s->iface = ifa;
1731 s->saddr = saddr;
1732 s->daddr = daddr;
1733 s->dport = dport;
1734 s->ttx = s->tbuf;
1735 s->tpos = s->tbuf + len;
1736 return sk_maybe_write(s);
1737}
1738*/
1739
6a8d3f1c
OZ
1740 /* sk_read() and sk_write() are called from BFD's event loop */
1741
1742int
b5d9ee5c
MM
1743sk_read(sock *s)
1744{
1745 switch (s->type)
05476c4d
OZ
1746 {
1747 case SK_TCP_PASSIVE:
1748 return sk_passive_connected(s, SK_TCP);
1749
1750 case SK_UNIX_PASSIVE:
1751 return sk_passive_connected(s, SK_UNIX);
1752
1753 case SK_TCP:
1754 case SK_UNIX:
b5d9ee5c 1755 {
05476c4d
OZ
1756 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1757
1758 if (c < 0)
b93abffa 1759 {
05476c4d
OZ
1760 if (errno != EINTR && errno != EAGAIN)
1761 s->err_hook(s, errno);
b5d9ee5c 1762 }
05476c4d
OZ
1763 else if (!c)
1764 s->err_hook(s, 0);
1765 else
b5d9ee5c 1766 {
05476c4d
OZ
1767 s->rpos += c;
1768 if (s->rx_hook(s, s->rpos - s->rbuf))
1769 {
1770 /* We need to be careful since the socket could have been deleted by the hook */
1771 if (current_sock == s)
1772 s->rpos = s->rbuf;
1773 }
1774 return 1;
b5d9ee5c 1775 }
05476c4d
OZ
1776 return 0;
1777 }
353729f5 1778
05476c4d
OZ
1779 case SK_MAGIC:
1780 return s->rx_hook(s, 0);
b5d9ee5c 1781
05476c4d
OZ
1782 default:
1783 {
1784 int e = sk_recvmsg(s);
353729f5 1785
05476c4d
OZ
1786 if (e < 0)
1787 {
1788 if (errno != EINTR && errno != EAGAIN)
1789 s->err_hook(s, errno);
1790 return 0;
b5d9ee5c 1791 }
05476c4d
OZ
1792
1793 s->rpos = s->rbuf + e;
1794 s->rx_hook(s, e);
1795 return 1;
b5d9ee5c 1796 }
05476c4d 1797 }
b5d9ee5c
MM
1798}
1799
6a8d3f1c 1800int
b5d9ee5c
MM
1801sk_write(sock *s)
1802{
320f4173 1803 switch (s->type)
05476c4d
OZ
1804 {
1805 case SK_TCP_ACTIVE:
320f4173 1806 {
05476c4d
OZ
1807 sockaddr sa;
1808 sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1809
1810 if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1811 sk_tcp_connected(s);
1812 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1813 s->err_hook(s, errno);
38a608c5 1814 return 0;
320f4173 1815 }
05476c4d
OZ
1816
1817 default:
1818 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1819 {
1820 if (s->tx_hook)
1821 s->tx_hook(s);
1822 return 1;
1823 }
1824 return 0;
1825 }
b5d9ee5c
MM
1826}
1827
1828void
1829sk_dump_all(void)
1830{
1831 node *n;
1832 sock *s;
1833
1834 debug("Open sockets:\n");
1835 WALK_LIST(n, sock_list)
05476c4d
OZ
1836 {
1837 s = SKIP_BACK(sock, n, n);
1838 debug("%p ", s);
1839 sk_dump(&s->r);
1840 }
b5d9ee5c
MM
1841 debug("\n");
1842}
1843
b5d9ee5c 1844
8bcb5fb1
OZ
1845/*
1846 * Internal event log and watchdog
1847 */
1848
1849#define EVENT_LOG_LENGTH 32
1850
1851struct event_log_entry
1852{
1853 void *hook;
1854 void *data;
1855 btime timestamp;
1856 btime duration;
1857};
1858
1859static struct event_log_entry event_log[EVENT_LOG_LENGTH];
1860static struct event_log_entry *event_open;
1861static int event_log_pos, event_log_num, watchdog_active;
1862static btime last_time;
1863static btime loop_time;
1864
1865static void
1866io_update_time(void)
1867{
1868 struct timespec ts;
1869 int rv;
1870
1871 if (!clock_monotonic_available)
1872 return;
1873
1874 /*
1875 * This is third time-tracking procedure (after update_times() above and
1876 * times_update() in BFD), dedicated to internal event log and latency
1877 * tracking. Hopefully, we consolidate these sometimes.
1878 */
1879
1880 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
1881 if (rv < 0)
1882 die("clock_gettime: %m");
1883
1884 last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
1885
1886 if (event_open)
1887 {
1888 event_open->duration = last_time - event_open->timestamp;
1889
1890 if (event_open->duration > config->latency_limit)
1891 log(L_WARN "Event 0x%p 0x%p took %d ms",
1892 event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
1893
1894 event_open = NULL;
1895 }
1896}
1897
1898/**
1899 * io_log_event - mark approaching event into event log
1900 * @hook: event hook address
1901 * @data: event data address
1902 *
1903 * Store info (hook, data, timestamp) about the following internal event into
1904 * a circular event log (@event_log). When latency tracking is enabled, the log
1905 * entry is kept open (in @event_open) so the duration can be filled later.
1906 */
1907void
1908io_log_event(void *hook, void *data)
1909{
1910 if (config->latency_debug)
1911 io_update_time();
1912
1913 struct event_log_entry *en = event_log + event_log_pos;
1914
1915 en->hook = hook;
1916 en->data = data;
1917 en->timestamp = last_time;
1918 en->duration = 0;
1919
1920 event_log_num++;
1921 event_log_pos++;
1922 event_log_pos %= EVENT_LOG_LENGTH;
1923
1924 event_open = config->latency_debug ? en : NULL;
1925}
1926
1927static inline void
1928io_close_event(void)
1929{
1930 if (event_open)
1931 io_update_time();
1932}
1933
1934void
1935io_log_dump(void)
1936{
1937 int i;
1938
1939 log(L_DEBUG "Event log:");
1940 for (i = 0; i < EVENT_LOG_LENGTH; i++)
1941 {
1942 struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
1943 if (en->hook)
1944 log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
1945 (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
1946 }
1947}
1948
1949void
1950watchdog_sigalrm(int sig UNUSED)
1951{
1952 /* Update last_time and duration, but skip latency check */
1953 config->latency_limit = 0xffffffff;
1954 io_update_time();
1955
1956 /* We want core dump */
1957 abort();
1958}
1959
1960static inline void
1961watchdog_start1(void)
1962{
1963 io_update_time();
1964
1965 loop_time = last_time;
1966}
1967
1968static inline void
1969watchdog_start(void)
1970{
1971 io_update_time();
1972
1973 loop_time = last_time;
1974 event_log_num = 0;
1975
1976 if (config->watchdog_timeout)
1977 {
1978 alarm(config->watchdog_timeout);
1979 watchdog_active = 1;
1980 }
1981}
1982
1983static inline void
1984watchdog_stop(void)
1985{
1986 io_update_time();
1987
1988 if (watchdog_active)
1989 {
1990 alarm(0);
1991 watchdog_active = 0;
1992 }
1993
1994 btime duration = last_time - loop_time;
1995 if (duration > config->watchdog_warning)
1996 log(L_WARN "I/O loop cycle took %d ms for %d events",
1997 (int) (duration TO_MS), event_log_num);
1998}
1999
2000
b5d9ee5c
MM
2001/*
2002 * Main I/O Loop
2003 */
2004
4c9dd1e4
MM
2005volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
2006volatile int async_dump_flag;
2007
b5d9ee5c
MM
2008void
2009io_init(void)
2010{
2011 init_list(&near_timers);
2012 init_list(&far_timers);
2013 init_list(&sock_list);
e8f73195 2014 init_list(&global_event_list);
7e5f5ffd 2015 krt_io_init();
fd91ae33
OZ
2016 init_times();
2017 update_times();
a92cf57d 2018 boot_time = now;
fd91ae33 2019 srandom((int) now_real);
b5d9ee5c
MM
2020}
2021
ea89da38
OZ
2022static int short_loops = 0;
2023#define SHORT_LOOP_MAX 10
2024
b5d9ee5c
MM
2025void
2026io_loop(void)
2027{
2028 fd_set rd, wr;
2029 struct timeval timo;
2030 time_t tout;
30770df2 2031 int hi, events;
b5d9ee5c 2032 sock *s;
38a608c5 2033 node *n;
b5d9ee5c 2034
8bcb5fb1 2035 watchdog_start1();
38a608c5 2036 sock_recalc_fdsets_p = 1;
b5d9ee5c
MM
2037 for(;;)
2038 {
30770df2 2039 events = ev_run_list(&global_event_list);
fd91ae33 2040 update_times();
b5d9ee5c
MM
2041 tout = tm_first_shot();
2042 if (tout <= now)
2043 {
2044 tm_shot();
2045 continue;
2046 }
a92cf57d 2047 timo.tv_sec = events ? 0 : MIN(tout - now, 3);
30770df2 2048 timo.tv_usec = 0;
b5d9ee5c 2049
8bcb5fb1
OZ
2050 io_close_event();
2051
38a608c5
MM
2052 if (sock_recalc_fdsets_p)
2053 {
2054 sock_recalc_fdsets_p = 0;
2055 FD_ZERO(&rd);
2056 FD_ZERO(&wr);
2057 }
2058
b5d9ee5c
MM
2059 hi = 0;
2060 WALK_LIST(n, sock_list)
2061 {
2062 s = SKIP_BACK(sock, n, n);
2063 if (s->rx_hook)
2064 {
2065 FD_SET(s->fd, &rd);
2066 if (s->fd > hi)
2067 hi = s->fd;
2068 }
38a608c5
MM
2069 else
2070 FD_CLR(s->fd, &rd);
b5d9ee5c
MM
2071 if (s->tx_hook && s->ttx != s->tpos)
2072 {
2073 FD_SET(s->fd, &wr);
2074 if (s->fd > hi)
2075 hi = s->fd;
2076 }
38a608c5
MM
2077 else
2078 FD_CLR(s->fd, &wr);
b5d9ee5c
MM
2079 }
2080
4c9dd1e4
MM
2081 /*
2082 * Yes, this is racy. But even if the signal comes before this test
2083 * and entering select(), it gets caught on the next timer tick.
2084 */
2085
2086 if (async_config_flag)
2087 {
8bcb5fb1 2088 io_log_event(async_config, NULL);
4c9dd1e4
MM
2089 async_config();
2090 async_config_flag = 0;
f4aabcee 2091 continue;
4c9dd1e4
MM
2092 }
2093 if (async_dump_flag)
2094 {
8bcb5fb1 2095 io_log_event(async_dump, NULL);
4c9dd1e4
MM
2096 async_dump();
2097 async_dump_flag = 0;
f4aabcee
MM
2098 continue;
2099 }
2100 if (async_shutdown_flag)
2101 {
8bcb5fb1 2102 io_log_event(async_shutdown, NULL);
f4aabcee
MM
2103 async_shutdown();
2104 async_shutdown_flag = 0;
2105 continue;
4c9dd1e4
MM
2106 }
2107
2108 /* And finally enter select() to find active sockets */
8bcb5fb1 2109 watchdog_stop();
b5d9ee5c 2110 hi = select(hi+1, &rd, &wr, NULL, &timo);
8bcb5fb1 2111 watchdog_start();
ea89da38 2112
b5d9ee5c
MM
2113 if (hi < 0)
2114 {
2115 if (errno == EINTR || errno == EAGAIN)
2116 continue;
2117 die("select: %m");
2118 }
2119 if (hi)
2120 {
ea89da38
OZ
2121 /* guaranteed to be non-empty */
2122 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2123
38a608c5 2124 while (current_sock)
b5d9ee5c 2125 {
38a608c5
MM
2126 sock *s = current_sock;
2127 int e;
ea89da38
OZ
2128 int steps;
2129
2130 steps = MAX_STEPS;
2131 if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
38a608c5
MM
2132 do
2133 {
4323099d 2134 steps--;
8bcb5fb1 2135 io_log_event(s->rx_hook, s->data);
38a608c5
MM
2136 e = sk_read(s);
2137 if (s != current_sock)
2138 goto next;
2139 }
4323099d
OZ
2140 while (e && s->rx_hook && steps);
2141
2142 steps = MAX_STEPS;
38a608c5
MM
2143 if (FD_ISSET(s->fd, &wr))
2144 do
2145 {
4323099d 2146 steps--;
8bcb5fb1 2147 io_log_event(s->tx_hook, s->data);
38a608c5
MM
2148 e = sk_write(s);
2149 if (s != current_sock)
2150 goto next;
2151 }
4323099d 2152 while (e && steps);
38a608c5
MM
2153 current_sock = sk_next(s);
2154 next: ;
b5d9ee5c 2155 }
ea89da38
OZ
2156
2157 short_loops++;
2158 if (events && (short_loops < SHORT_LOOP_MAX))
2159 continue;
2160 short_loops = 0;
2161
2162 int count = 0;
2163 current_sock = stored_sock;
2164 if (current_sock == NULL)
2165 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2166
2167 while (current_sock && count < MAX_RX_STEPS)
2168 {
2169 sock *s = current_sock;
0479b443 2170 int e UNUSED;
ea89da38
OZ
2171
2172 if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
2173 {
2174 count++;
8bcb5fb1 2175 io_log_event(s->rx_hook, s->data);
ea89da38
OZ
2176 e = sk_read(s);
2177 if (s != current_sock)
2178 goto next2;
2179 }
2180 current_sock = sk_next(s);
2181 next2: ;
2182 }
2183
2184 stored_sock = current_sock;
b5d9ee5c
MM
2185 }
2186 }
2187}
41c8976e
OF
2188
2189void
2190test_old_bird(char *path)
2191{
2192 int fd;
2193 struct sockaddr_un sa;
2194
2195 fd = socket(AF_UNIX, SOCK_STREAM, 0);
41c8976e
OF
2196 if (fd < 0)
2197 die("Cannot create socket: %m");
97e46d28
OZ
2198 if (strlen(path) >= sizeof(sa.sun_path))
2199 die("Socket path too long");
41c8976e
OF
2200 bzero(&sa, sizeof(sa));
2201 sa.sun_family = AF_UNIX;
2202 strcpy(sa.sun_path, path);
2203 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2204 die("I found another BIRD running.");
2205 close(fd);
2206}