]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/unix/io.c
Minor cleanups and fixes
[thirdparty/bird.git] / sysdep / unix / io.c
CommitLineData
b5d9ee5c
MM
1/*
2 * BIRD Internet Routing Daemon -- Unix I/O
3 *
38a608c5 4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
b1a1faba 5 * (c) 2004 Ondrej Filip <feela@network.cz>
b5d9ee5c
MM
6 *
7 * Can be freely distributed and used under the terms of the GNU GPL.
8 */
9
607d9914
OZ
10/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11 if _GNU_SOURCE is not defined. */
c8cafc8e
OZ
12#ifndef _GNU_SOURCE
13#define _GNU_SOURCE
14#endif
607d9914 15
b5d9ee5c
MM
16#include <stdio.h>
17#include <stdlib.h>
01b776e1 18#include <time.h>
b5d9ee5c
MM
19#include <sys/time.h>
20#include <sys/types.h>
21#include <sys/socket.h>
46a82e9c 22#include <sys/uio.h>
b93abffa 23#include <sys/un.h>
e1c13a5a 24#include <poll.h>
b5d9ee5c 25#include <unistd.h>
a0b176e3 26#include <fcntl.h>
b5d9ee5c 27#include <errno.h>
05476c4d 28#include <net/if.h>
d0e9b36d 29#include <netinet/in.h>
48e5f32d
OZ
30#include <netinet/tcp.h>
31#include <netinet/udp.h>
93e868c7 32#include <netinet/icmp6.h>
b5d9ee5c
MM
33
34#include "nest/bird.h"
35#include "lib/lists.h"
36#include "lib/resource.h"
7152e5ef 37#include "sysdep/unix/timer.h"
b5d9ee5c 38#include "lib/socket.h"
e8f73195 39#include "lib/event.h"
afa8937a 40#include "lib/string.h"
b5d9ee5c
MM
41#include "nest/iface.h"
42
7152e5ef
JMM
43#include "sysdep/unix/unix.h"
44#include CONFIG_INCLUDE_SYSIO_H
b5d9ee5c 45
ea89da38 46/* Maximum number of calls of tx handler for one socket in one
e1c13a5a 47 * poll iteration. Should be small enough to not monopolize CPU by
4323099d
OZ
48 * one protocol instance.
49 */
50#define MAX_STEPS 4
51
e1c13a5a 52/* Maximum number of calls of rx handler for all sockets in one poll
ea89da38
OZ
53 iteration. RX callbacks are often much more costly so we limit
54 this to gen small latencies */
55#define MAX_RX_STEPS 4
56
a9c986f9
MM
57/*
58 * Tracked Files
59 */
60
61struct rfile {
62 resource r;
63 FILE *f;
64};
65
66static void
67rf_free(resource *r)
68{
69 struct rfile *a = (struct rfile *) r;
70
71 fclose(a->f);
72}
73
74static void
75rf_dump(resource *r)
76{
77 struct rfile *a = (struct rfile *) r;
78
79 debug("(FILE *%p)\n", a->f);
80}
81
82static struct resclass rf_class = {
83 "FILE",
84 sizeof(struct rfile),
85 rf_free,
e81b440f 86 rf_dump,
acb60628 87 NULL,
e81b440f 88 NULL
a9c986f9
MM
89};
90
91void *
f78056fb 92tracked_fopen(pool *p, char *name, char *mode)
a9c986f9
MM
93{
94 FILE *f = fopen(name, mode);
95
96 if (f)
97 {
98 struct rfile *r = ralloc(p, &rf_class);
99 r->f = f;
100 }
101 return f;
102}
103
525fa2c1
MM
104/**
105 * DOC: Timers
106 *
107 * Timers are resources which represent a wish of a module to call
108 * a function at the specified time. The platform dependent code
58f7d004 109 * doesn't guarantee exact timing, only that a timer function
525fa2c1
MM
110 * won't be called before the requested time.
111 *
fd91ae33
OZ
112 * In BIRD, time is represented by values of the &bird_clock_t type
113 * which are integral numbers interpreted as a relative number of seconds since
114 * some fixed time point in past. The current time can be read
115 * from variable @now with reasonable accuracy and is monotonic. There is also
116 * a current 'absolute' time in variable @now_real reported by OS.
525fa2c1
MM
117 *
118 * Each timer is described by a &timer structure containing a pointer
119 * to the handler function (@hook), data private to this function (@data),
120 * time the function should be called at (@expires, 0 for inactive timers),
121 * for the other fields see |timer.h|.
b5d9ee5c
MM
122 */
123
124#define NEAR_TIMER_LIMIT 4
125
b5d9ee5c
MM
126static list near_timers, far_timers;
127static bird_clock_t first_far_timer = TIME_INFINITY;
128
002b6423 129/* now must be different from 0, because 0 is a special value in timer->expires */
a92cf57d 130bird_clock_t now = 1, now_real, boot_time;
fd91ae33
OZ
131
132static void
133update_times_plain(void)
134{
135 bird_clock_t new_time = time(NULL);
136 int delta = new_time - now_real;
137
138 if ((delta >= 0) && (delta < 60))
139 now += delta;
140 else if (now_real != 0)
141 log(L_WARN "Time jump, delta %d s", delta);
142
143 now_real = new_time;
144}
145
146static void
147update_times_gettime(void)
148{
149 struct timespec ts;
150 int rv;
151
152 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
153 if (rv != 0)
154 die("clock_gettime: %m");
155
156 if (ts.tv_sec != now) {
157 if (ts.tv_sec < now)
158 log(L_ERR "Monotonic timer is broken");
159
160 now = ts.tv_sec;
161 now_real = time(NULL);
162 }
163}
164
165static int clock_monotonic_available;
166
167static inline void
168update_times(void)
169{
170 if (clock_monotonic_available)
171 update_times_gettime();
172 else
173 update_times_plain();
174}
175
176static inline void
177init_times(void)
178{
179 struct timespec ts;
180 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
181 if (!clock_monotonic_available)
182 log(L_WARN "Monotonic timer is missing");
183}
184
b5d9ee5c
MM
185
186static void
187tm_free(resource *r)
188{
189 timer *t = (timer *) r;
190
191 tm_stop(t);
192}
193
194static void
195tm_dump(resource *r)
196{
197 timer *t = (timer *) r;
198
e8f73195 199 debug("(code %p, data %p, ", t->hook, t->data);
af847acc
MM
200 if (t->randomize)
201 debug("rand %d, ", t->randomize);
202 if (t->recurrent)
203 debug("recur %d, ", t->recurrent);
b5d9ee5c
MM
204 if (t->expires)
205 debug("expires in %d sec)\n", t->expires - now);
206 else
207 debug("inactive)\n");
208}
209
210static struct resclass tm_class = {
211 "Timer",
212 sizeof(timer),
213 tm_free,
e81b440f 214 tm_dump,
acb60628 215 NULL,
e81b440f 216 NULL
b5d9ee5c
MM
217};
218
525fa2c1
MM
219/**
220 * tm_new - create a timer
221 * @p: pool
222 *
223 * This function creates a new timer resource and returns
224 * a pointer to it. To use the timer, you need to fill in
225 * the structure fields and call tm_start() to start timing.
226 */
b5d9ee5c
MM
227timer *
228tm_new(pool *p)
229{
230 timer *t = ralloc(p, &tm_class);
b5d9ee5c
MM
231 return t;
232}
233
234static inline void
235tm_insert_near(timer *t)
236{
237 node *n = HEAD(near_timers);
238
239 while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
240 n = n->next;
241 insert_node(&t->n, n->prev);
242}
243
525fa2c1
MM
244/**
245 * tm_start - start a timer
246 * @t: timer
247 * @after: number of seconds the timer should be run after
248 *
249 * This function schedules the hook function of the timer to
250 * be called after @after seconds. If the timer has been already
251 * started, it's @expire time is replaced by the new value.
252 *
253 * You can have set the @randomize field of @t, the timeout
254 * will be increased by a random number of seconds chosen
255 * uniformly from range 0 .. @randomize.
256 *
257 * You can call tm_start() from the handler function of the timer
258 * to request another run of the timer. Also, you can set the @recurrent
259 * field to have the timer re-added automatically with the same timeout.
260 */
b5d9ee5c
MM
261void
262tm_start(timer *t, unsigned after)
263{
264 bird_clock_t when;
265
266 if (t->randomize)
af847acc 267 after += random() % (t->randomize + 1);
b5d9ee5c
MM
268 when = now + after;
269 if (t->expires == when)
270 return;
271 if (t->expires)
272 rem_node(&t->n);
273 t->expires = when;
274 if (after <= NEAR_TIMER_LIMIT)
275 tm_insert_near(t);
276 else
277 {
278 if (!first_far_timer || first_far_timer > when)
279 first_far_timer = when;
280 add_tail(&far_timers, &t->n);
281 }
282}
283
525fa2c1
MM
284/**
285 * tm_stop - stop a timer
286 * @t: timer
287 *
288 * This function stops a timer. If the timer is already stopped,
289 * nothing happens.
290 */
b5d9ee5c
MM
291void
292tm_stop(timer *t)
293{
294 if (t->expires)
295 {
296 rem_node(&t->n);
297 t->expires = 0;
298 }
299}
300
301static void
302tm_dump_them(char *name, list *l)
303{
304 node *n;
305 timer *t;
306
307 debug("%s timers:\n", name);
308 WALK_LIST(n, *l)
309 {
310 t = SKIP_BACK(timer, n, n);
311 debug("%p ", t);
312 tm_dump(&t->r);
313 }
314 debug("\n");
315}
316
317void
318tm_dump_all(void)
319{
320 tm_dump_them("Near", &near_timers);
321 tm_dump_them("Far", &far_timers);
322}
323
324static inline time_t
325tm_first_shot(void)
326{
327 time_t x = first_far_timer;
328
329 if (!EMPTY_LIST(near_timers))
330 {
331 timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
332 if (t->expires < x)
333 x = t->expires;
334 }
335 return x;
336}
337
8bcb5fb1
OZ
338void io_log_event(void *hook, void *data);
339
b5d9ee5c
MM
340static void
341tm_shot(void)
342{
343 timer *t;
344 node *n, *m;
345
346 if (first_far_timer <= now)
347 {
28a9a189 348 bird_clock_t limit = now + NEAR_TIMER_LIMIT;
b5d9ee5c
MM
349 first_far_timer = TIME_INFINITY;
350 n = HEAD(far_timers);
351 while (m = n->next)
352 {
353 t = SKIP_BACK(timer, n, n);
354 if (t->expires <= limit)
355 {
356 rem_node(n);
357 tm_insert_near(t);
358 }
359 else if (t->expires < first_far_timer)
360 first_far_timer = t->expires;
361 n = m;
362 }
363 }
364 while ((n = HEAD(near_timers)) -> next)
365 {
af847acc 366 int delay;
b5d9ee5c
MM
367 t = SKIP_BACK(timer, n, n);
368 if (t->expires > now)
369 break;
370 rem_node(n);
af847acc 371 delay = t->expires - now;
b5d9ee5c 372 t->expires = 0;
af847acc
MM
373 if (t->recurrent)
374 {
375 int i = t->recurrent - delay;
376 if (i < 0)
377 i = 0;
378 tm_start(t, i);
379 }
8bcb5fb1 380 io_log_event(t->hook, t->data);
b5d9ee5c
MM
381 t->hook(t);
382 }
383}
384
0d3effcf
OF
385/**
386 * tm_parse_datetime - parse a date and time
387 * @x: datetime string
388 *
389 * tm_parse_datetime() takes a textual representation of
390 * a date and time (dd-mm-yyyy hh:mm:ss)
391 * and converts it to the corresponding value of type &bird_clock_t.
392 */
393bird_clock_t
394tm_parse_datetime(char *x)
395{
396 struct tm tm;
397 int n;
398 time_t t;
399
400 if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
401 return tm_parse_date(x);
402 tm.tm_mon--;
403 tm.tm_year -= 1900;
404 t = mktime(&tm);
405 if (t == (time_t) -1)
406 return 0;
407 return t;
408}
525fa2c1
MM
409/**
410 * tm_parse_date - parse a date
411 * @x: date string
412 *
413 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
414 * and converts it to the corresponding value of type &bird_clock_t.
415 */
913f7dc9
MM
416bird_clock_t
417tm_parse_date(char *x)
418{
419 struct tm tm;
420 int n;
421 time_t t;
422
423 if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
424 return 0;
425 tm.tm_mon--;
426 tm.tm_year -= 1900;
427 tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
428 t = mktime(&tm);
429 if (t == (time_t) -1)
430 return 0;
431 return t;
432}
433
c37e7851
OZ
434static void
435tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
913f7dc9 436{
c37e7851
OZ
437 static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
438 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
913f7dc9 439
c37e7851
OZ
440 if (delta < 20*3600)
441 bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
442 else if (delta < 360*86400)
443 bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
444 else
445 bsprintf(x, "%d", tm->tm_year+1900);
913f7dc9
MM
446}
447
c37e7851
OZ
448#include "conf/conf.h"
449
525fa2c1
MM
450/**
451 * tm_format_datetime - convert date and time to textual representation
452 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
8e433d6a 453 * @fmt_spec: specification of resulting textual representation of the time
525fa2c1
MM
454 * @t: time
455 *
fd91ae33
OZ
456 * This function formats the given relative time value @t to a textual
457 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
525fa2c1 458 */
7a88832e 459void
c37e7851 460tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
7a88832e 461{
c37e7851 462 const char *fmt_used;
7a88832e 463 struct tm *tm;
fd91ae33
OZ
464 bird_clock_t delta = now - t;
465 t = now_real - delta;
7a88832e 466 tm = localtime(&t);
7a88832e 467
c37e7851
OZ
468 if (fmt_spec->fmt1 == NULL)
469 return tm_format_reltime(x, tm, delta);
afa8937a 470
c37e7851
OZ
471 if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
472 fmt_used = fmt_spec->fmt1;
afa8937a 473 else
c37e7851
OZ
474 fmt_used = fmt_spec->fmt2;
475
476 int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
477 if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
478 strcpy(x, "<too-long>");
afa8937a
MM
479}
480
05476c4d 481
525fa2c1
MM
482/**
483 * DOC: Sockets
484 *
485 * Socket resources represent network connections. Their data structure (&socket)
486 * contains a lot of fields defining the exact type of the socket, the local and
487 * remote addresses and ports, pointers to socket buffers and finally pointers to
488 * hook functions to be called when new data have arrived to the receive buffer
489 * (@rx_hook), when the contents of the transmit buffer have been transmitted
490 * (@tx_hook) and when an error or connection close occurs (@err_hook).
491 *
38a608c5 492 * Freeing of sockets from inside socket hooks is perfectly safe.
b5d9ee5c
MM
493 */
494
abae6e9c
MM
495#ifndef SOL_IP
496#define SOL_IP IPPROTO_IP
497#endif
498
b1a1faba
OF
499#ifndef SOL_IPV6
500#define SOL_IPV6 IPPROTO_IPV6
501#endif
502
48e5f32d
OZ
503#ifndef SOL_ICMPV6
504#define SOL_ICMPV6 IPPROTO_ICMPV6
505#endif
506
507
05476c4d
OZ
508/*
509 * Sockaddr helper functions
510 */
38a608c5 511
3e236955 512static inline int UNUSED sockaddr_length(int af)
05476c4d
OZ
513{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
514
515static inline void
3e236955 516sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
38a608c5 517{
05476c4d 518 memset(sa, 0, sizeof(struct sockaddr_in));
71652572 519#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
05476c4d
OZ
520 sa->sin_len = sizeof(struct sockaddr_in);
521#endif
522 sa->sin_family = AF_INET;
523 sa->sin_port = htons(port);
524 sa->sin_addr = ipa_to_in4(a);
38a608c5 525}
b5d9ee5c 526
05476c4d
OZ
527static inline void
528sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
b5d9ee5c 529{
05476c4d
OZ
530 memset(sa, 0, sizeof(struct sockaddr_in6));
531#ifdef SIN6_LEN
532 sa->sin6_len = sizeof(struct sockaddr_in6);
533#endif
534 sa->sin6_family = AF_INET6;
535 sa->sin6_port = htons(port);
536 sa->sin6_flowinfo = 0;
537 sa->sin6_addr = ipa_to_in6(a);
538
539 if (ifa && ipa_is_link_local(a))
540 sa->sin6_scope_id = ifa->index;
4da25acb 541}
b5d9ee5c 542
05476c4d
OZ
543void
544sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
4da25acb 545{
05476c4d 546 if (af == AF_INET)
3e236955 547 sockaddr_fill4((struct sockaddr_in *) sa, a, port);
05476c4d
OZ
548 else if (af == AF_INET6)
549 sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
550 else
551 bug("Unknown AF");
4da25acb
MM
552}
553
05476c4d 554static inline void
3e236955 555sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
4da25acb 556{
05476c4d
OZ
557 *port = ntohs(sa->sin_port);
558 *a = ipa_from_in4(sa->sin_addr);
b5d9ee5c
MM
559}
560
05476c4d
OZ
561static inline void
562sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
48e5f32d 563{
05476c4d
OZ
564 *port = ntohs(sa->sin6_port);
565 *a = ipa_from_in6(sa->sin6_addr);
48e5f32d 566
05476c4d
OZ
567 if (ifa && ipa_is_link_local(*a))
568 *ifa = if_find_by_index(sa->sin6_scope_id);
48e5f32d
OZ
569}
570
05476c4d
OZ
571int
572sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
48e5f32d 573{
05476c4d
OZ
574 if (sa->sa.sa_family != af)
575 goto fail;
48e5f32d 576
05476c4d 577 if (af == AF_INET)
3e236955 578 sockaddr_read4((struct sockaddr_in *) sa, a, port);
05476c4d
OZ
579 else if (af == AF_INET6)
580 sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
581 else
582 goto fail;
48e5f32d 583
05476c4d 584 return 0;
48e5f32d 585
05476c4d
OZ
586 fail:
587 *a = IPA_NONE;
588 *port = 0;
589 return -1;
48e5f32d
OZ
590}
591
48e5f32d 592
05476c4d
OZ
593/*
594 * IPv6 multicast syscalls
595 */
4da25acb 596
05476c4d 597/* Fortunately standardized in RFC 3493 */
b5d9ee5c 598
05476c4d
OZ
599#define INIT_MREQ6(maddr,ifa) \
600 { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
b5d9ee5c 601
05476c4d
OZ
602static inline int
603sk_setup_multicast6(sock *s)
b5d9ee5c 604{
05476c4d
OZ
605 int index = s->iface->index;
606 int ttl = s->ttl;
607 int n = 0;
b5d9ee5c 608
05476c4d
OZ
609 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
610 ERR("IPV6_MULTICAST_IF");
b5d9ee5c 611
05476c4d
OZ
612 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
613 ERR("IPV6_MULTICAST_HOPS");
4f22c981 614
05476c4d
OZ
615 if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
616 ERR("IPV6_MULTICAST_LOOP");
4f22c981 617
05476c4d 618 return 0;
061ab802
OZ
619}
620
05476c4d
OZ
621static inline int
622sk_join_group6(sock *s, ip_addr maddr)
4f22c981 623{
05476c4d 624 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
eb1451a3 625
05476c4d
OZ
626 if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
627 ERR("IPV6_JOIN_GROUP");
4f22c981 628
05476c4d 629 return 0;
b5d9ee5c
MM
630}
631
05476c4d
OZ
632static inline int
633sk_leave_group6(sock *s, ip_addr maddr)
b5d9ee5c 634{
05476c4d 635 struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
b5d9ee5c 636
05476c4d
OZ
637 if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
638 ERR("IPV6_LEAVE_GROUP");
639
640 return 0;
641}
4f22c981 642
bed41728 643
05476c4d
OZ
644/*
645 * IPv6 packet control messages
646 */
bed41728 647
05476c4d 648/* Also standardized, in RFC 3542 */
bed41728 649
dcc60494
OZ
650/*
651 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
652 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
653 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
654 * RFC and we use IPV6_PKTINFO.
655 */
656#ifndef IPV6_RECVPKTINFO
657#define IPV6_RECVPKTINFO IPV6_PKTINFO
658#endif
70e212f9
OZ
659/*
660 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
661 */
662#ifndef IPV6_RECVHOPLIMIT
663#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
664#endif
dcc60494 665
70e212f9 666
05476c4d
OZ
667#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
668#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
bed41728 669
05476c4d
OZ
670static inline int
671sk_request_cmsg6_pktinfo(sock *s)
672{
673 int y = 1;
70e212f9 674
05476c4d
OZ
675 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
676 ERR("IPV6_RECVPKTINFO");
677
678 return 0;
bed41728
OZ
679}
680
05476c4d
OZ
681static inline int
682sk_request_cmsg6_ttl(sock *s)
bed41728 683{
05476c4d 684 int y = 1;
bed41728 685
05476c4d
OZ
686 if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
687 ERR("IPV6_RECVHOPLIMIT");
70e212f9 688
05476c4d
OZ
689 return 0;
690}
70e212f9 691
05476c4d
OZ
692static inline void
693sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
694{
695 if (cm->cmsg_type == IPV6_PKTINFO)
70e212f9 696 {
05476c4d
OZ
697 struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
698 s->laddr = ipa_from_in6(pi->ipi6_addr);
699 s->lifindex = pi->ipi6_ifindex;
70e212f9 700 }
05476c4d 701}
70e212f9 702
05476c4d
OZ
703static inline void
704sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
705{
706 if (cm->cmsg_type == IPV6_HOPLIMIT)
707 s->rcv_ttl = * (int *) CMSG_DATA(cm);
bed41728
OZ
708}
709
05476c4d
OZ
710static inline void
711sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
bed41728
OZ
712{
713 struct cmsghdr *cm;
714 struct in6_pktinfo *pi;
8945f73d 715 int controllen = 0;
bed41728 716
bed41728
OZ
717 msg->msg_control = cbuf;
718 msg->msg_controllen = cbuflen;
719
720 cm = CMSG_FIRSTHDR(msg);
48e5f32d 721 cm->cmsg_level = SOL_IPV6;
bed41728
OZ
722 cm->cmsg_type = IPV6_PKTINFO;
723 cm->cmsg_len = CMSG_LEN(sizeof(*pi));
8945f73d 724 controllen += CMSG_SPACE(sizeof(*pi));
bed41728
OZ
725
726 pi = (struct in6_pktinfo *) CMSG_DATA(cm);
bed41728 727 pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
05476c4d 728 pi->ipi6_addr = ipa_to_in6(s->saddr);
bed41728 729
8945f73d 730 msg->msg_controllen = controllen;
bed41728 731}
48e5f32d 732
bed41728 733
05476c4d
OZ
734/*
735 * Miscellaneous socket syscalls
736 */
737
738static inline int
739sk_set_ttl4(sock *s, int ttl)
a39b165e 740{
05476c4d
OZ
741 if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
742 ERR("IP_TTL");
743
744 return 0;
a39b165e
OZ
745}
746
05476c4d
OZ
747static inline int
748sk_set_ttl6(sock *s, int ttl)
749{
750 if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
751 ERR("IPV6_UNICAST_HOPS");
38a608c5 752
05476c4d
OZ
753 return 0;
754}
755
756static inline int
757sk_set_tos4(sock *s, int tos)
b5d9ee5c 758{
05476c4d
OZ
759 if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
760 ERR("IP_TOS");
b5d9ee5c 761
05476c4d
OZ
762 return 0;
763}
ef4a50be 764
05476c4d
OZ
765static inline int
766sk_set_tos6(sock *s, int tos)
767{
768 if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
769 ERR("IPV6_TCLASS");
48e5f32d 770
05476c4d
OZ
771 return 0;
772}
48e5f32d 773
b867a87c 774static inline int
3e236955 775sk_set_high_port(sock *s UNUSED)
b867a87c
OZ
776{
777 /* Port range setting is optional, ignore it if not supported */
778
779#ifdef IP_PORTRANGE
780 if (sk_is_ipv4(s))
781 {
782 int range = IP_PORTRANGE_HIGH;
783 if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
784 ERR("IP_PORTRANGE");
785 }
786#endif
787
788#ifdef IPV6_PORTRANGE
789 if (sk_is_ipv6(s))
790 {
791 int range = IPV6_PORTRANGE_HIGH;
792 if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
793 ERR("IPV6_PORTRANGE");
794 }
795#endif
796
797 return 0;
798}
799
88a183c6
OZ
800static inline byte *
801sk_skip_ip_header(byte *pkt, int *len)
802{
803 if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
804 return NULL;
805
806 int hlen = (*pkt & 0x0f) * 4;
807 if ((hlen < 20) || (hlen > *len))
808 return NULL;
809
810 *len -= hlen;
811 return pkt + hlen;
812}
813
814byte *
815sk_rx_buffer(sock *s, int *len)
816{
817 if (sk_is_ipv4(s) && (s->type == SK_IP))
818 return sk_skip_ip_header(s->rbuf, len);
819 else
820 return s->rbuf;
821}
822
48e5f32d 823
05476c4d
OZ
824/*
825 * Public socket functions
826 */
48e5f32d 827
05476c4d
OZ
828/**
829 * sk_setup_multicast - enable multicast for given socket
830 * @s: socket
831 *
832 * Prepare transmission of multicast packets for given datagram socket.
833 * The socket must have defined @iface.
834 *
835 * Result: 0 for success, -1 for an error.
836 */
48e5f32d 837
05476c4d
OZ
838int
839sk_setup_multicast(sock *s)
840{
841 ASSERT(s->iface);
48e5f32d 842
05476c4d
OZ
843 if (sk_is_ipv4(s))
844 return sk_setup_multicast4(s);
845 else
846 return sk_setup_multicast6(s);
847}
48e5f32d 848
05476c4d
OZ
849/**
850 * sk_join_group - join multicast group for given socket
851 * @s: socket
852 * @maddr: multicast address
853 *
854 * Join multicast group for given datagram socket and associated interface.
855 * The socket must have defined @iface.
856 *
857 * Result: 0 for success, -1 for an error.
858 */
789772ed 859
05476c4d
OZ
860int
861sk_join_group(sock *s, ip_addr maddr)
862{
863 if (sk_is_ipv4(s))
864 return sk_join_group4(s, maddr);
865 else
866 return sk_join_group6(s, maddr);
867}
ef4a50be 868
05476c4d
OZ
869/**
870 * sk_leave_group - leave multicast group for given socket
871 * @s: socket
872 * @maddr: multicast address
873 *
874 * Leave multicast group for given datagram socket and associated interface.
875 * The socket must have defined @iface.
876 *
877 * Result: 0 for success, -1 for an error.
878 */
789772ed 879
05476c4d
OZ
880int
881sk_leave_group(sock *s, ip_addr maddr)
882{
883 if (sk_is_ipv4(s))
884 return sk_leave_group4(s, maddr);
885 else
886 return sk_leave_group6(s, maddr);
b5d9ee5c
MM
887}
888
a39b165e 889/**
05476c4d
OZ
890 * sk_setup_broadcast - enable broadcast for given socket
891 * @s: socket
892 *
893 * Allow reception and transmission of broadcast packets for given datagram
894 * socket. The socket must have defined @iface. For transmission, packets should
895 * be send to @brd address of @iface.
896 *
897 * Result: 0 for success, -1 for an error.
898 */
899
900int
901sk_setup_broadcast(sock *s)
902{
903 int y = 1;
904
905 if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
906 ERR("SO_BROADCAST");
907
908 return 0;
909}
910
911/**
912 * sk_set_ttl - set transmit TTL for given socket
a39b165e
OZ
913 * @s: socket
914 * @ttl: TTL value
915 *
05476c4d
OZ
916 * Set TTL for already opened connections when TTL was not set before. Useful
917 * for accepted connections when different ones should have different TTL.
a39b165e
OZ
918 *
919 * Result: 0 for success, -1 for an error.
920 */
921
922int
923sk_set_ttl(sock *s, int ttl)
924{
a39b165e 925 s->ttl = ttl;
a39b165e 926
05476c4d
OZ
927 if (sk_is_ipv4(s))
928 return sk_set_ttl4(s, ttl);
929 else
930 return sk_set_ttl6(s, ttl);
a39b165e
OZ
931}
932
b1b19433 933/**
05476c4d 934 * sk_set_min_ttl - set minimal accepted TTL for given socket
b1b19433
OZ
935 * @s: socket
936 * @ttl: TTL value
937 *
05476c4d
OZ
938 * Set minimal accepted TTL for given socket. Can be used for TTL security.
939 * implementations.
b1b19433
OZ
940 *
941 * Result: 0 for success, -1 for an error.
942 */
943
944int
945sk_set_min_ttl(sock *s, int ttl)
946{
05476c4d
OZ
947 if (sk_is_ipv4(s))
948 return sk_set_min_ttl4(s, ttl);
949 else
950 return sk_set_min_ttl6(s, ttl);
b1b19433 951}
d51aa281 952
05476c4d 953#if 0
d51aa281 954/**
05476c4d 955 * sk_set_md5_auth - add / remove MD5 security association for given socket
d51aa281 956 * @s: socket
a7baa098
OZ
957 * @local: IP address of local side
958 * @remote: IP address of remote side
eb1451a3 959 * @ifa: Interface for link-local IP address
a7baa098
OZ
960 * @passwd: Password used for MD5 authentication
961 * @setkey: Update also system SA/SP database
d51aa281 962 *
a7baa098
OZ
963 * In TCP MD5 handling code in kernel, there is a set of security associations
964 * used for choosing password and other authentication parameters according to
965 * the local and remote address. This function is useful for listening socket,
966 * for active sockets it may be enough to set s->password field.
d51aa281
OZ
967 *
968 * When called with passwd != NULL, the new pair is added,
969 * When called with passwd == NULL, the existing pair is removed.
970 *
a7baa098
OZ
971 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
972 * stored in global SA/SP database (but the behavior also must be enabled on
973 * per-socket basis). In case of multiple sockets to the same neighbor, the
974 * socket-specific state must be configured for each socket while global state
975 * just once per src-dst pair. The @setkey argument controls whether the global
976 * state (SA/SP database) is also updated.
977 *
d51aa281
OZ
978 * Result: 0 for success, -1 for an error.
979 */
980
981int
a7baa098 982sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
05476c4d
OZ
983{ DUMMY; }
984#endif
f9c799a0 985
05476c4d
OZ
986/**
987 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
988 * @s: socket
989 * @offset: offset
990 *
991 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
992 * kernel will automatically fill it for outgoing packets and check it for
993 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
994 * known to the kernel.
995 *
996 * Result: 0 for success, -1 for an error.
997 */
f9c799a0 998
4ac7c834
OZ
999int
1000sk_set_ipv6_checksum(sock *s, int offset)
1001{
48e5f32d 1002 if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
05476c4d 1003 ERR("IPV6_CHECKSUM");
4ac7c834
OZ
1004
1005 return 0;
1006}
1007
93e868c7 1008int
05476c4d 1009sk_set_icmp6_filter(sock *s, int p1, int p2)
93e868c7
OZ
1010{
1011 /* a bit of lame interface, but it is here only for Radv */
1012 struct icmp6_filter f;
1013
1014 ICMP6_FILTER_SETBLOCKALL(&f);
1015 ICMP6_FILTER_SETPASS(p1, &f);
1016 ICMP6_FILTER_SETPASS(p2, &f);
1017
48e5f32d 1018 if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
05476c4d 1019 ERR("ICMP6_FILTER");
93e868c7
OZ
1020
1021 return 0;
1022}
1023
05476c4d
OZ
1024void
1025sk_log_error(sock *s, const char *p)
1026{
1027 log(L_ERR "%s: Socket error: %s%#m", p, s->err);
1028}
1029
1030
1031/*
1032 * Actual struct birdsock code
1033 */
1034
1035static list sock_list;
1036static struct birdsock *current_sock;
1037static struct birdsock *stored_sock;
05476c4d
OZ
1038
1039static inline sock *
1040sk_next(sock *s)
1041{
1042 if (!s->n.next->next)
1043 return NULL;
1044 else
1045 return SKIP_BACK(sock, n, s->n.next);
1046}
1047
1048static void
1049sk_alloc_bufs(sock *s)
1050{
1051 if (!s->rbuf && s->rbsize)
1052 s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
1053 s->rpos = s->rbuf;
1054 if (!s->tbuf && s->tbsize)
1055 s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
1056 s->tpos = s->ttx = s->tbuf;
1057}
1058
1059static void
1060sk_free_bufs(sock *s)
1061{
1062 if (s->rbuf_alloc)
1063 {
1064 xfree(s->rbuf_alloc);
1065 s->rbuf = s->rbuf_alloc = NULL;
1066 }
1067 if (s->tbuf_alloc)
1068 {
1069 xfree(s->tbuf_alloc);
1070 s->tbuf = s->tbuf_alloc = NULL;
1071 }
1072}
1073
af62c0f9 1074#ifdef HAVE_LIBSSH
65d2a88d
PT
1075static void
1076sk_ssh_free(sock *s)
1077{
1078 struct ssh_sock *ssh = s->ssh;
1079
1080 if (s->ssh == NULL)
1081 return;
1082
1083 s->ssh = NULL;
1084
1085 if (ssh->channel)
1086 {
1087 if (ssh_channel_is_open(ssh->channel))
1088 ssh_channel_close(ssh->channel);
1089 ssh_channel_free(ssh->channel);
1090 ssh->channel = NULL;
1091 }
1092
1093 if (ssh->session)
1094 {
1095 ssh_disconnect(ssh->session);
1096 ssh_free(ssh->session);
1097 ssh->session = NULL;
1098 }
1099}
af62c0f9 1100#endif
65d2a88d 1101
05476c4d
OZ
1102static void
1103sk_free(resource *r)
1104{
1105 sock *s = (sock *) r;
1106
1107 sk_free_bufs(s);
05476c4d 1108
af62c0f9 1109#ifdef HAVE_LIBSSH
65d2a88d
PT
1110 if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
1111 sk_ssh_free(s);
af62c0f9 1112#endif
05476c4d 1113
65d2a88d
PT
1114 if (s->fd < 0)
1115 return;
05476c4d 1116
65d2a88d
PT
1117 /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1118 if (!(s->flags & SKF_THREAD))
1119 {
05476c4d
OZ
1120 if (s == current_sock)
1121 current_sock = sk_next(s);
1122 if (s == stored_sock)
1123 stored_sock = sk_next(s);
1124 rem_node(&s->n);
05476c4d 1125 }
65d2a88d
PT
1126
1127 if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
1128 close(s->fd);
1129
1130 s->fd = -1;
05476c4d
OZ
1131}
1132
1133void
1134sk_set_rbsize(sock *s, uint val)
1135{
1136 ASSERT(s->rbuf_alloc == s->rbuf);
1137
1138 if (s->rbsize == val)
1139 return;
1140
1141 s->rbsize = val;
1142 xfree(s->rbuf_alloc);
1143 s->rbuf_alloc = xmalloc(val);
1144 s->rpos = s->rbuf = s->rbuf_alloc;
1145}
1146
1147void
1148sk_set_tbsize(sock *s, uint val)
1149{
1150 ASSERT(s->tbuf_alloc == s->tbuf);
1151
1152 if (s->tbsize == val)
1153 return;
1154
1155 byte *old_tbuf = s->tbuf;
1156
1157 s->tbsize = val;
1158 s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1159 s->tpos = s->tbuf + (s->tpos - old_tbuf);
1160 s->ttx = s->tbuf + (s->ttx - old_tbuf);
1161}
1162
1163void
1164sk_set_tbuf(sock *s, void *tbuf)
1165{
1166 s->tbuf = tbuf ?: s->tbuf_alloc;
1167 s->ttx = s->tpos = s->tbuf;
1168}
1169
1170void
1171sk_reallocate(sock *s)
1172{
1173 sk_free_bufs(s);
1174 sk_alloc_bufs(s);
1175}
1176
1177static void
1178sk_dump(resource *r)
1179{
1180 sock *s = (sock *) r;
65d2a88d 1181 static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
05476c4d 1182
af454f9b 1183 debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
05476c4d
OZ
1184 sk_type_names[s->type],
1185 s->data,
1186 s->saddr,
1187 s->sport,
1188 s->daddr,
1189 s->dport,
1190 s->tos,
1191 s->ttl,
1192 s->iface ? s->iface->name : "none");
1193}
1194
1195static struct resclass sk_class = {
1196 "Socket",
1197 sizeof(sock),
1198 sk_free,
1199 sk_dump,
1200 NULL,
1201 NULL
1202};
1203
1204/**
1205 * sk_new - create a socket
1206 * @p: pool
1207 *
1208 * This function creates a new socket resource. If you want to use it,
1209 * you need to fill in all the required fields of the structure and
1210 * call sk_open() to do the actual opening of the socket.
1211 *
1212 * The real function name is sock_new(), sk_new() is a macro wrapper
1213 * to avoid collision with OpenSSL.
1214 */
1215sock *
1216sock_new(pool *p)
1217{
1218 sock *s = ralloc(p, &sk_class);
1219 s->pool = p;
1220 // s->saddr = s->daddr = IPA_NONE;
1221 s->tos = s->priority = s->ttl = -1;
1222 s->fd = -1;
1223 return s;
1224}
1225
1226static int
1227sk_setup(sock *s)
f9c799a0 1228{
05476c4d
OZ
1229 int y = 1;
1230 int fd = s->fd;
f9c799a0 1231
65d2a88d
PT
1232 if (s->type == SK_SSH_ACTIVE)
1233 return 0;
1234
05476c4d
OZ
1235 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1236 ERR("O_NONBLOCK");
f9c799a0 1237
08b3a24d 1238 if (!s->af)
05476c4d 1239 return 0;
f9c799a0 1240
05476c4d
OZ
1241 if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1242 s->flags |= SKF_PKTINFO;
f9c799a0 1243
05476c4d
OZ
1244#ifdef CONFIG_USE_HDRINCL
1245 if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1246 {
1247 s->flags &= ~SKF_PKTINFO;
1248 s->flags |= SKF_HDRINCL;
1249 if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1250 ERR("IP_HDRINCL");
1251 }
48e5f32d
OZ
1252#endif
1253
05476c4d
OZ
1254 if (s->iface)
1255 {
1256#ifdef SO_BINDTODEVICE
966ca614 1257 struct ifreq ifr = {};
05476c4d
OZ
1258 strcpy(ifr.ifr_name, s->iface->name);
1259 if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1260 ERR("SO_BINDTODEVICE");
1261#endif
f1aceff5 1262
05476c4d
OZ
1263#ifdef CONFIG_UNIX_DONTROUTE
1264 if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1265 ERR("SO_DONTROUTE");
1266#endif
1267 }
f9c799a0 1268
05476c4d
OZ
1269 if (s->priority >= 0)
1270 if (sk_set_priority(s, s->priority) < 0)
f9c799a0 1271 return -1;
f9c799a0 1272
05476c4d
OZ
1273 if (sk_is_ipv4(s))
1274 {
1275 if (s->flags & SKF_LADDR_RX)
1276 if (sk_request_cmsg4_pktinfo(s) < 0)
1277 return -1;
f9c799a0 1278
05476c4d
OZ
1279 if (s->flags & SKF_TTL_RX)
1280 if (sk_request_cmsg4_ttl(s) < 0)
1281 return -1;
f9c799a0 1282
05476c4d
OZ
1283 if ((s->type == SK_UDP) || (s->type == SK_IP))
1284 if (sk_disable_mtu_disc4(s) < 0)
1285 return -1;
f9c799a0 1286
05476c4d
OZ
1287 if (s->ttl >= 0)
1288 if (sk_set_ttl4(s, s->ttl) < 0)
1289 return -1;
f9c799a0 1290
05476c4d
OZ
1291 if (s->tos >= 0)
1292 if (sk_set_tos4(s, s->tos) < 0)
1293 return -1;
1294 }
f9c799a0 1295
05476c4d
OZ
1296 if (sk_is_ipv6(s))
1297 {
d15b0b0a 1298 if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
925aa149
JMM
1299 if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1300 ERR("IPV6_V6ONLY");
f9c799a0 1301
05476c4d
OZ
1302 if (s->flags & SKF_LADDR_RX)
1303 if (sk_request_cmsg6_pktinfo(s) < 0)
1304 return -1;
f9c799a0 1305
05476c4d
OZ
1306 if (s->flags & SKF_TTL_RX)
1307 if (sk_request_cmsg6_ttl(s) < 0)
1308 return -1;
f9c799a0 1309
05476c4d
OZ
1310 if ((s->type == SK_UDP) || (s->type == SK_IP))
1311 if (sk_disable_mtu_disc6(s) < 0)
1312 return -1;
f9c799a0 1313
05476c4d
OZ
1314 if (s->ttl >= 0)
1315 if (sk_set_ttl6(s, s->ttl) < 0)
1316 return -1;
f9c799a0 1317
05476c4d
OZ
1318 if (s->tos >= 0)
1319 if (sk_set_tos6(s, s->tos) < 0)
1320 return -1;
1321 }
f9c799a0
OZ
1322
1323 return 0;
1324}
1325
05476c4d
OZ
1326static void
1327sk_insert(sock *s)
f9c799a0 1328{
05476c4d 1329 add_tail(&sock_list, &s->n);
f9c799a0
OZ
1330}
1331
b93abffa 1332static void
b5d9ee5c
MM
1333sk_tcp_connected(sock *s)
1334{
05476c4d
OZ
1335 sockaddr sa;
1336 int sa_len = sizeof(sa);
1337
1338 if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
08b3a24d 1339 (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
05476c4d 1340 log(L_WARN "SOCK: Cannot get local IP address for TCP>");
9be9a264 1341
b5d9ee5c
MM
1342 s->type = SK_TCP;
1343 sk_alloc_bufs(s);
320f4173 1344 s->tx_hook(s);
b5d9ee5c
MM
1345}
1346
734e9fb8 1347#ifdef HAVE_LIBSSH
65d2a88d
PT
1348static void
1349sk_ssh_connected(sock *s)
1350{
1351 sk_alloc_bufs(s);
1352 s->type = SK_SSH;
1353 s->tx_hook(s);
1354}
734e9fb8 1355#endif
65d2a88d 1356
b93abffa 1357static int
05476c4d 1358sk_passive_connected(sock *s, int type)
b93abffa 1359{
05476c4d
OZ
1360 sockaddr loc_sa, rem_sa;
1361 int loc_sa_len = sizeof(loc_sa);
1362 int rem_sa_len = sizeof(rem_sa);
cf31112f 1363
05476c4d
OZ
1364 int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1365 if (fd < 0)
1366 {
1367 if ((errno != EINTR) && (errno != EAGAIN))
c025b852 1368 s->err_hook(s, errno);
05476c4d
OZ
1369 return 0;
1370 }
1371
1372 sock *t = sk_new(s->pool);
1373 t->type = type;
08b3a24d 1374 t->af = s->af;
d7661fbe 1375 t->fd = fd;
05476c4d
OZ
1376 t->ttl = s->ttl;
1377 t->tos = s->tos;
1378 t->rbsize = s->rbsize;
1379 t->tbsize = s->tbsize;
1380
1381 if (type == SK_TCP)
1382 {
1383 if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
08b3a24d 1384 (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
05476c4d
OZ
1385 log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1386
08b3a24d 1387 if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
05476c4d
OZ
1388 log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1389 }
1390
1391 if (sk_setup(t) < 0)
1392 {
1393 /* FIXME: Call err_hook instead ? */
1394 log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1395
1396 /* FIXME: handle it better in rfree() */
9c89560e 1397 close(t->fd);
05476c4d
OZ
1398 t->fd = -1;
1399 rfree(t);
1400 return 1;
1401 }
1402
1403 sk_insert(t);
1404 sk_alloc_bufs(t);
1405 s->rx_hook(t, 0);
1406 return 1;
b93abffa
MM
1407}
1408
af62c0f9 1409#ifdef HAVE_LIBSSH
65d2a88d
PT
1410/*
1411 * Return SSH_OK or SSH_AGAIN or SSH_ERROR
1412 */
1413static int
1414sk_ssh_connect(sock *s)
1415{
1416 s->fd = ssh_get_fd(s->ssh->session);
1417
1418 /* Big fall thru automata */
1419 switch (s->ssh->state)
1420 {
1421 case SK_SSH_CONNECT:
1422 {
1423 switch (ssh_connect(s->ssh->session))
1424 {
1425 case SSH_AGAIN:
cdbe1def
JMM
1426 /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
1427 * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
1428 * documented but our code relies on that.
1429 */
65d2a88d
PT
1430 return SSH_AGAIN;
1431
1432 case SSH_OK:
1433 break;
1434
1435 default:
1436 return SSH_ERROR;
1437 }
1438 }
1439
1440 case SK_SSH_SERVER_KNOWN:
1441 {
1442 s->ssh->state = SK_SSH_SERVER_KNOWN;
1443
1444 if (s->ssh->server_hostkey_path)
1445 {
1446 int server_identity_is_ok = 1;
1447
1448 /* Check server identity */
1449 switch (ssh_is_server_known(s->ssh->session))
1450 {
1451#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
1452 case SSH_SERVER_KNOWN_OK:
1453 /* The server is known and has not changed. */
1454 break;
1455
1456 case SSH_SERVER_NOT_KNOWN:
1457 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
1458 break;
1459
1460 case SSH_SERVER_KNOWN_CHANGED:
1461 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
1462 server_identity_is_ok = 0;
1463 break;
1464
1465 case SSH_SERVER_FILE_NOT_FOUND:
1466 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
1467 server_identity_is_ok = 0;
1468 break;
1469
1470 case SSH_SERVER_ERROR:
1471 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
1472 server_identity_is_ok = 0;
1473 break;
1474
1475 case SSH_SERVER_FOUND_OTHER:
1476 LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
1477 "It is a possible attack.");
1478 server_identity_is_ok = 0;
1479 break;
1480 }
1481
1482 if (!server_identity_is_ok)
1483 return SSH_ERROR;
1484 }
1485 }
1486
1487 case SK_SSH_USERAUTH:
1488 {
1489 s->ssh->state = SK_SSH_USERAUTH;
1490 switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
1491 {
1492 case SSH_AUTH_AGAIN:
1493 return SSH_AGAIN;
1494
1495 case SSH_AUTH_SUCCESS:
1496 break;
1497
1498 default:
1499 return SSH_ERROR;
1500 }
1501 }
1502
1503 case SK_SSH_CHANNEL:
1504 {
1505 s->ssh->state = SK_SSH_CHANNEL;
1506 s->ssh->channel = ssh_channel_new(s->ssh->session);
1507 if (s->ssh->channel == NULL)
1508 return SSH_ERROR;
1509 }
1510
1511 case SK_SSH_SESSION:
1512 {
1513 s->ssh->state = SK_SSH_SESSION;
1514 switch (ssh_channel_open_session(s->ssh->channel))
1515 {
1516 case SSH_AGAIN:
1517 return SSH_AGAIN;
1518
1519 case SSH_OK:
1520 break;
1521
1522 default:
1523 return SSH_ERROR;
1524 }
1525 }
1526
1527 case SK_SSH_SUBSYSTEM:
1528 {
1529 s->ssh->state = SK_SSH_SUBSYSTEM;
1530 if (s->ssh->subsystem)
1531 {
1532 switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
1533 {
1534 case SSH_AGAIN:
1535 return SSH_AGAIN;
1536
1537 case SSH_OK:
1538 break;
1539
1540 default:
1541 return SSH_ERROR;
1542 }
1543 }
1544 }
1545
1546 case SK_SSH_ESTABLISHED:
1547 s->ssh->state = SK_SSH_ESTABLISHED;
1548 }
1549
1550 return SSH_OK;
1551}
1552
1553/*
1554 * Return file descriptor number if success
1555 * Return -1 if failed
1556 */
1557static int
1558sk_open_ssh(sock *s)
1559{
1560 if (!s->ssh)
1561 bug("sk_open() sock->ssh is not allocated");
1562
1563 ssh_session sess = ssh_new();
1564 if (sess == NULL)
1565 ERR2("Cannot create a ssh session");
1566 s->ssh->session = sess;
1567
1568 const int verbosity = SSH_LOG_NOLOG;
1569 ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
1570 ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
1571 ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
1572 /* TODO: Add SSH_OPTIONS_BINDADDR */
1573 ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
1574
1575 if (s->ssh->server_hostkey_path)
1576 ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
1577
1578 if (s->ssh->client_privkey_path)
1579 ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
1580
1581 ssh_set_blocking(sess, 0);
1582
1583 switch (sk_ssh_connect(s))
1584 {
1585 case SSH_AGAIN:
1586 break;
1587
1588 case SSH_OK:
1589 sk_ssh_connected(s);
1590 break;
1591
1592 case SSH_ERROR:
1593 ERR2(ssh_get_error(sess));
1594 break;
1595 }
1596
1597 return ssh_get_fd(sess);
1598
1599 err:
1600 return -1;
1601}
af62c0f9 1602#endif
65d2a88d 1603
525fa2c1
MM
1604/**
1605 * sk_open - open a socket
1606 * @s: socket
1607 *
1608 * This function takes a socket resource created by sk_new() and
1609 * initialized by the user and binds a corresponding network connection
1610 * to it.
1611 *
1612 * Result: 0 for success, -1 for an error.
1613 */
b5d9ee5c
MM
1614int
1615sk_open(sock *s)
1616{
08b3a24d 1617 int af = AF_UNSPEC;
05476c4d 1618 int fd = -1;
48e5f32d
OZ
1619 int do_bind = 0;
1620 int bind_port = 0;
1621 ip_addr bind_addr = IPA_NONE;
1622 sockaddr sa;
b5d9ee5c 1623
08b3a24d
OZ
1624 if (s->type <= SK_IP)
1625 {
1626 /*
1627 * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
1628 * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
1629 * But the specifications have to be consistent.
1630 */
1631
1632 switch (s->subtype)
1633 {
1634 case 0:
1635 ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
1636 (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
1637 af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
1638 break;
1639
1640 case SK_IPV4:
1641 ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
1642 ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
1643 af = AF_INET;
1644 break;
1645
1646 case SK_IPV6:
1647 ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
1648 ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
1649 af = AF_INET6;
1650 break;
1651
1652 default:
1653 bug("Invalid subtype %d", s->subtype);
1654 }
1655 }
1656
48e5f32d 1657 switch (s->type)
05476c4d
OZ
1658 {
1659 case SK_TCP_ACTIVE:
1660 s->ttx = ""; /* Force s->ttx != s->tpos */
1661 /* Fall thru */
1662 case SK_TCP_PASSIVE:
08b3a24d 1663 fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
05476c4d
OZ
1664 bind_port = s->sport;
1665 bind_addr = s->saddr;
1666 do_bind = bind_port || ipa_nonzero(bind_addr);
1667 break;
9c89560e 1668
af62c0f9 1669#ifdef HAVE_LIBSSH
65d2a88d
PT
1670 case SK_SSH_ACTIVE:
1671 s->ttx = ""; /* Force s->ttx != s->tpos */
1672 fd = sk_open_ssh(s);
1673 break;
af62c0f9 1674#endif
65d2a88d 1675
05476c4d 1676 case SK_UDP:
08b3a24d 1677 fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
05476c4d
OZ
1678 bind_port = s->sport;
1679 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1680 do_bind = 1;
1681 break;
1682
1683 case SK_IP:
08b3a24d 1684 fd = socket(af, SOCK_RAW, s->dport);
05476c4d
OZ
1685 bind_port = 0;
1686 bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1687 do_bind = ipa_nonzero(bind_addr);
1688 break;
1689
1690 case SK_MAGIC:
08b3a24d 1691 af = 0;
05476c4d
OZ
1692 fd = s->fd;
1693 break;
1694
1695 default:
1696 bug("sk_open() called for invalid sock type %d", s->type);
1697 }
1698
b5d9ee5c 1699 if (fd < 0)
05476c4d
OZ
1700 ERR("socket");
1701
08b3a24d 1702 s->af = af;
b5d9ee5c
MM
1703 s->fd = fd;
1704
05476c4d
OZ
1705 if (sk_setup(s) < 0)
1706 goto err;
38a608c5 1707
48e5f32d 1708 if (do_bind)
05476c4d
OZ
1709 {
1710 if (bind_port)
b5d9ee5c 1711 {
05476c4d
OZ
1712 int y = 1;
1713
1714 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1715 ERR2("SO_REUSEADDR");
48e5f32d 1716
8931425d 1717#ifdef CONFIG_NO_IFACE_BIND
05476c4d
OZ
1718 /* Workaround missing ability to bind to an iface */
1719 if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1720 {
1721 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1722 ERR2("SO_REUSEPORT");
1723 }
8931425d 1724#endif
b5d9ee5c 1725 }
b867a87c
OZ
1726 else
1727 if (s->flags & SKF_HIGH_PORT)
1728 if (sk_set_high_port(s) < 0)
1729 log(L_WARN "Socket error: %s%#m", s->err);
48e5f32d 1730
08b3a24d 1731 sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
05476c4d
OZ
1732 if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1733 ERR2("bind");
1734 }
d51aa281
OZ
1735
1736 if (s->password)
a7baa098 1737 if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
05476c4d 1738 goto err;
d51aa281 1739
48e5f32d 1740 switch (s->type)
05476c4d
OZ
1741 {
1742 case SK_TCP_ACTIVE:
08b3a24d 1743 sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
05476c4d
OZ
1744 if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1745 sk_tcp_connected(s);
1746 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1747 errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1748 ERR2("connect");
1749 break;
1750
1751 case SK_TCP_PASSIVE:
1752 if (listen(fd, 8) < 0)
1753 ERR2("listen");
1754 break;
1755
65d2a88d 1756 case SK_SSH_ACTIVE:
05476c4d
OZ
1757 case SK_MAGIC:
1758 break;
1759
1760 default:
1761 sk_alloc_bufs(s);
1762 }
b5d9ee5c 1763
bf139664
OZ
1764 if (!(s->flags & SKF_THREAD))
1765 sk_insert(s);
65d2a88d 1766
b5d9ee5c
MM
1767 return 0;
1768
05476c4d 1769err:
b5d9ee5c
MM
1770 close(fd);
1771 s->fd = -1;
1772 return -1;
1773}
1774
05476c4d 1775int
b93abffa
MM
1776sk_open_unix(sock *s, char *name)
1777{
b93abffa 1778 struct sockaddr_un sa;
05476c4d
OZ
1779 int fd;
1780
1781 /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
b93abffa
MM
1782
1783 fd = socket(AF_UNIX, SOCK_STREAM, 0);
1784 if (fd < 0)
05476c4d
OZ
1785 return -1;
1786
1787 if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1788 return -1;
68fa95cf 1789
97e46d28 1790 /* Path length checked in test_old_bird() */
b93abffa 1791 sa.sun_family = AF_UNIX;
97c6fa02 1792 strcpy(sa.sun_path, name);
05476c4d 1793
0b3bf4b1 1794 if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
05476c4d
OZ
1795 return -1;
1796
1797 if (listen(fd, 8) < 0)
1798 return -1;
1799
1800 s->fd = fd;
38a608c5 1801 sk_insert(s);
05476c4d
OZ
1802 return 0;
1803}
1804
1805
1806#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1807 CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1808#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1809
1810static void
1811sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1812{
1813 if (sk_is_ipv4(s))
1814 sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1815 else
1816 sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1817}
1818
1819static void
1820sk_process_cmsgs(sock *s, struct msghdr *msg)
1821{
1822 struct cmsghdr *cm;
1823
1824 s->laddr = IPA_NONE;
1825 s->lifindex = 0;
1826 s->rcv_ttl = -1;
1827
1828 for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1829 {
1830 if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1831 {
1832 sk_process_cmsg4_pktinfo(s, cm);
1833 sk_process_cmsg4_ttl(s, cm);
1834 }
b93abffa 1835
05476c4d
OZ
1836 if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1837 {
1838 sk_process_cmsg6_pktinfo(s, cm);
1839 sk_process_cmsg6_ttl(s, cm);
1840 }
1841 }
b93abffa
MM
1842}
1843
48e5f32d
OZ
1844
1845static inline int
1846sk_sendmsg(sock *s)
1847{
1848 struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1849 byte cmsg_buf[CMSG_TX_SPACE];
286e2011 1850 sockaddr dst;
48e5f32d 1851
08b3a24d 1852 sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
48e5f32d
OZ
1853
1854 struct msghdr msg = {
05476c4d
OZ
1855 .msg_name = &dst.sa,
1856 .msg_namelen = SA_LEN(dst),
48e5f32d
OZ
1857 .msg_iov = &iov,
1858 .msg_iovlen = 1
1859 };
1860
1861#ifdef CONFIG_USE_HDRINCL
1862 byte hdr[20];
1863 struct iovec iov2[2] = { {hdr, 20}, iov };
1864
1865 if (s->flags & SKF_HDRINCL)
1866 {
05476c4d 1867 sk_prepare_ip_header(s, hdr, iov.iov_len);
48e5f32d
OZ
1868 msg.msg_iov = iov2;
1869 msg.msg_iovlen = 2;
1870 }
1871#endif
1872
1873 if (s->flags & SKF_PKTINFO)
05476c4d 1874 sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
48e5f32d
OZ
1875
1876 return sendmsg(s->fd, &msg, 0);
1877}
1878
1879static inline int
1880sk_recvmsg(sock *s)
1881{
1882 struct iovec iov = {s->rbuf, s->rbsize};
1883 byte cmsg_buf[CMSG_RX_SPACE];
1884 sockaddr src;
1885
1886 struct msghdr msg = {
05476c4d
OZ
1887 .msg_name = &src.sa,
1888 .msg_namelen = sizeof(src), // XXXX ??
48e5f32d
OZ
1889 .msg_iov = &iov,
1890 .msg_iovlen = 1,
1891 .msg_control = cmsg_buf,
1892 .msg_controllen = sizeof(cmsg_buf),
1893 .msg_flags = 0
1894 };
1895
1896 int rv = recvmsg(s->fd, &msg, 0);
1897 if (rv < 0)
1898 return rv;
1899
1900 //ifdef IPV4
1901 // if (cf_type == SK_IP)
1902 // rv = ipv4_skip_header(pbuf, rv);
1903 //endif
1904
08b3a24d 1905 sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
05476c4d 1906 sk_process_cmsgs(s, &msg);
48e5f32d
OZ
1907
1908 if (msg.msg_flags & MSG_TRUNC)
1909 s->flags |= SKF_TRUNCATED;
1910 else
1911 s->flags &= ~SKF_TRUNCATED;
1912
1913 return rv;
1914}
1915
1916
353729f5
OZ
1917static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1918
b5d9ee5c
MM
1919static int
1920sk_maybe_write(sock *s)
1921{
1922 int e;
1923
1924 switch (s->type)
05476c4d
OZ
1925 {
1926 case SK_TCP:
1927 case SK_MAGIC:
1928 case SK_UNIX:
1929 while (s->ttx != s->tpos)
b5d9ee5c 1930 {
05476c4d
OZ
1931 e = write(s->fd, s->ttx, s->tpos - s->ttx);
1932
1933 if (e < 0)
1934 {
1935 if (errno != EINTR && errno != EAGAIN)
b5d9ee5c 1936 {
05476c4d
OZ
1937 reset_tx_buffer(s);
1938 /* EPIPE is just a connection close notification during TX */
1939 s->err_hook(s, (errno != EPIPE) ? errno : 0);
1940 return -1;
b5d9ee5c 1941 }
05476c4d
OZ
1942 return 0;
1943 }
1944 s->ttx += e;
1945 }
1946 reset_tx_buffer(s);
1947 return 1;
1948
af62c0f9 1949#ifdef HAVE_LIBSSH
65d2a88d
PT
1950 case SK_SSH:
1951 while (s->ttx != s->tpos)
1952 {
1953 e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
1954
1955 if (e < 0)
1956 {
1957 s->err = ssh_get_error(s->ssh->session);
1958 s->err_hook(s, ssh_get_error_code(s->ssh->session));
1959
1960 reset_tx_buffer(s);
1961 /* EPIPE is just a connection close notification during TX */
1962 s->err_hook(s, (errno != EPIPE) ? errno : 0);
1963 return -1;
1964 }
1965 s->ttx += e;
1966 }
1967 reset_tx_buffer(s);
1968 return 1;
af62c0f9 1969#endif
65d2a88d 1970
05476c4d
OZ
1971 case SK_UDP:
1972 case SK_IP:
1973 {
1974 if (s->tbuf == s->tpos)
b5d9ee5c 1975 return 1;
05476c4d
OZ
1976
1977 e = sk_sendmsg(s);
1978
1979 if (e < 0)
1980 {
1981 if (errno != EINTR && errno != EAGAIN)
1982 {
1983 reset_tx_buffer(s);
1984 s->err_hook(s, errno);
1985 return -1;
1986 }
1987
1988 if (!s->tx_hook)
1989 reset_tx_buffer(s);
1990 return 0;
b5d9ee5c 1991 }
05476c4d
OZ
1992 reset_tx_buffer(s);
1993 return 1;
b5d9ee5c 1994 }
65d2a88d 1995
05476c4d
OZ
1996 default:
1997 bug("sk_maybe_write: unknown socket type %d", s->type);
1998 }
b5d9ee5c
MM
1999}
2000
ea89da38
OZ
2001int
2002sk_rx_ready(sock *s)
2003{
ea89da38 2004 int rv;
9c92f692
JMM
2005 struct pollfd pfd = { .fd = s->fd };
2006 pfd.events |= POLLIN;
ea89da38
OZ
2007
2008 redo:
9c92f692 2009 rv = poll(&pfd, 1, 0);
9c89560e 2010
ea89da38
OZ
2011 if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
2012 goto redo;
2013
2014 return rv;
2015}
2016
525fa2c1
MM
2017/**
2018 * sk_send - send data to a socket
2019 * @s: socket
2020 * @len: number of bytes to send
2021 *
2022 * This function sends @len bytes of data prepared in the
2023 * transmit buffer of the socket @s to the network connection.
2024 * If the packet can be sent immediately, it does so and returns
2025 * 1, else it queues the packet for later processing, returns 0
2026 * and calls the @tx_hook of the socket when the tranmission
2027 * takes place.
2028 */
b5d9ee5c
MM
2029int
2030sk_send(sock *s, unsigned len)
2031{
b5d9ee5c
MM
2032 s->ttx = s->tbuf;
2033 s->tpos = s->tbuf + len;
2034 return sk_maybe_write(s);
2035}
2036
525fa2c1
MM
2037/**
2038 * sk_send_to - send data to a specific destination
2039 * @s: socket
2040 * @len: number of bytes to send
2041 * @addr: IP address to send the packet to
2042 * @port: port to send the packet to
2043 *
2e9b2421 2044 * This is a sk_send() replacement for connection-less packet sockets
525fa2c1 2045 * which allows destination of the packet to be chosen dynamically.
48e5f32d 2046 * Raw IP sockets should use 0 for @port.
525fa2c1 2047 */
b5d9ee5c
MM
2048int
2049sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
2050{
353729f5 2051 s->daddr = addr;
48e5f32d
OZ
2052 if (port)
2053 s->dport = port;
2054
b5d9ee5c
MM
2055 s->ttx = s->tbuf;
2056 s->tpos = s->tbuf + len;
2057 return sk_maybe_write(s);
2058}
2059
353729f5
OZ
2060/*
2061int
2062sk_send_full(sock *s, unsigned len, struct iface *ifa,
2063 ip_addr saddr, ip_addr daddr, unsigned dport)
2064{
2065 s->iface = ifa;
2066 s->saddr = saddr;
2067 s->daddr = daddr;
2068 s->dport = dport;
2069 s->ttx = s->tbuf;
2070 s->tpos = s->tbuf + len;
2071 return sk_maybe_write(s);
2072}
2073*/
2074
65d2a88d
PT
2075static void
2076call_rx_hook(sock *s, int size)
2077{
2078 if (s->rx_hook(s, size))
2079 {
2080 /* We need to be careful since the socket could have been deleted by the hook */
2081 if (current_sock == s)
2082 s->rpos = s->rbuf;
2083 }
2084}
2085
af62c0f9 2086#ifdef HAVE_LIBSSH
65d2a88d
PT
2087static int
2088sk_read_ssh(sock *s)
2089{
2090 ssh_channel rchans[2] = { s->ssh->channel, NULL };
2091 struct timeval timev = { 1, 0 };
2092
2093 if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
2094 return 1; /* Try again */
2095
2096 if (ssh_channel_is_eof(s->ssh->channel) != 0)
2097 {
2098 /* The remote side is closing the connection */
2099 s->err_hook(s, 0);
2100 return 0;
2101 }
2102
2103 if (rchans[0] == NULL)
2104 return 0; /* No data is available on the socket */
2105
2106 const uint used_bytes = s->rpos - s->rbuf;
2107 const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
2108 if (read_bytes > 0)
2109 {
2110 /* Received data */
2111 s->rpos += read_bytes;
2112 call_rx_hook(s, used_bytes + read_bytes);
2113 return 1;
2114 }
2115 else if (read_bytes == 0)
2116 {
2117 if (ssh_channel_is_eof(s->ssh->channel) != 0)
2118 {
2119 /* The remote side is closing the connection */
2120 s->err_hook(s, 0);
2121 }
2122 }
2123 else
2124 {
2125 s->err = ssh_get_error(s->ssh->session);
2126 s->err_hook(s, ssh_get_error_code(s->ssh->session));
2127 }
2128
2129 return 0; /* No data is available on the socket */
2130}
af62c0f9 2131#endif
65d2a88d 2132
6a8d3f1c
OZ
2133 /* sk_read() and sk_write() are called from BFD's event loop */
2134
2135int
fd926ed4 2136sk_read(sock *s, int revents)
b5d9ee5c
MM
2137{
2138 switch (s->type)
05476c4d
OZ
2139 {
2140 case SK_TCP_PASSIVE:
2141 return sk_passive_connected(s, SK_TCP);
2142
2143 case SK_UNIX_PASSIVE:
2144 return sk_passive_connected(s, SK_UNIX);
2145
2146 case SK_TCP:
2147 case SK_UNIX:
b5d9ee5c 2148 {
05476c4d
OZ
2149 int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
2150
2151 if (c < 0)
b93abffa 2152 {
05476c4d
OZ
2153 if (errno != EINTR && errno != EAGAIN)
2154 s->err_hook(s, errno);
fd926ed4
JMM
2155 else if (errno == EAGAIN && !(revents & POLLIN))
2156 {
2157 log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
2158 s->err_hook(s, 0);
2159 }
b5d9ee5c 2160 }
05476c4d
OZ
2161 else if (!c)
2162 s->err_hook(s, 0);
2163 else
b5d9ee5c 2164 {
05476c4d 2165 s->rpos += c;
65d2a88d 2166 call_rx_hook(s, s->rpos - s->rbuf);
05476c4d 2167 return 1;
b5d9ee5c 2168 }
05476c4d
OZ
2169 return 0;
2170 }
353729f5 2171
af62c0f9 2172#ifdef HAVE_LIBSSH
65d2a88d
PT
2173 case SK_SSH:
2174 return sk_read_ssh(s);
af62c0f9 2175#endif
65d2a88d 2176
05476c4d
OZ
2177 case SK_MAGIC:
2178 return s->rx_hook(s, 0);
b5d9ee5c 2179
05476c4d
OZ
2180 default:
2181 {
2182 int e = sk_recvmsg(s);
353729f5 2183
05476c4d
OZ
2184 if (e < 0)
2185 {
2186 if (errno != EINTR && errno != EAGAIN)
2187 s->err_hook(s, errno);
2188 return 0;
b5d9ee5c 2189 }
05476c4d
OZ
2190
2191 s->rpos = s->rbuf + e;
2192 s->rx_hook(s, e);
2193 return 1;
b5d9ee5c 2194 }
05476c4d 2195 }
b5d9ee5c
MM
2196}
2197
6a8d3f1c 2198int
b5d9ee5c
MM
2199sk_write(sock *s)
2200{
320f4173 2201 switch (s->type)
05476c4d
OZ
2202 {
2203 case SK_TCP_ACTIVE:
320f4173 2204 {
05476c4d 2205 sockaddr sa;
08b3a24d 2206 sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
05476c4d
OZ
2207
2208 if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
2209 sk_tcp_connected(s);
2210 else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
2211 s->err_hook(s, errno);
38a608c5 2212 return 0;
320f4173 2213 }
05476c4d 2214
af62c0f9 2215#ifdef HAVE_LIBSSH
65d2a88d
PT
2216 case SK_SSH_ACTIVE:
2217 {
2218 switch (sk_ssh_connect(s))
2219 {
2220 case SSH_OK:
2221 sk_ssh_connected(s);
2222 break;
2223
2224 case SSH_AGAIN:
2225 return 1;
2226
2227 case SSH_ERROR:
2228 s->err = ssh_get_error(s->ssh->session);
2229 s->err_hook(s, ssh_get_error_code(s->ssh->session));
2230 break;
2231 }
2232 return 0;
2233 }
af62c0f9 2234#endif
65d2a88d 2235
05476c4d
OZ
2236 default:
2237 if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
2238 {
2239 if (s->tx_hook)
2240 s->tx_hook(s);
2241 return 1;
2242 }
2243 return 0;
2244 }
b5d9ee5c
MM
2245}
2246
70b90dde 2247int sk_is_ipv4(sock *s)
08b3a24d 2248{ return s->af == AF_INET; }
70b90dde
JMM
2249
2250int sk_is_ipv6(sock *s)
08b3a24d 2251{ return s->af == AF_INET6; }
70b90dde 2252
9dbcb11c
JMM
2253void
2254sk_err(sock *s, int revents)
2255{
2256 int se = 0, sse = sizeof(se);
ccd2a3ed 2257 if ((s->type != SK_MAGIC) && (revents & POLLERR))
9dbcb11c
JMM
2258 if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
2259 {
2260 log(L_ERR "IO: Socket error: SO_ERROR: %m");
2261 se = 0;
2262 }
2263
2264 s->err_hook(s, se);
2265}
2266
b5d9ee5c
MM
2267void
2268sk_dump_all(void)
2269{
2270 node *n;
2271 sock *s;
2272
2273 debug("Open sockets:\n");
2274 WALK_LIST(n, sock_list)
05476c4d
OZ
2275 {
2276 s = SKIP_BACK(sock, n, n);
2277 debug("%p ", s);
2278 sk_dump(&s->r);
2279 }
b5d9ee5c
MM
2280 debug("\n");
2281}
2282
b5d9ee5c 2283
8bcb5fb1
OZ
2284/*
2285 * Internal event log and watchdog
2286 */
2287
2288#define EVENT_LOG_LENGTH 32
2289
2290struct event_log_entry
2291{
2292 void *hook;
2293 void *data;
2294 btime timestamp;
2295 btime duration;
2296};
2297
2298static struct event_log_entry event_log[EVENT_LOG_LENGTH];
2299static struct event_log_entry *event_open;
2300static int event_log_pos, event_log_num, watchdog_active;
2301static btime last_time;
2302static btime loop_time;
2303
2304static void
2305io_update_time(void)
2306{
2307 struct timespec ts;
2308 int rv;
2309
2310 if (!clock_monotonic_available)
2311 return;
2312
2313 /*
2314 * This is third time-tracking procedure (after update_times() above and
2315 * times_update() in BFD), dedicated to internal event log and latency
2316 * tracking. Hopefully, we consolidate these sometimes.
2317 */
2318
2319 rv = clock_gettime(CLOCK_MONOTONIC, &ts);
2320 if (rv < 0)
2321 die("clock_gettime: %m");
2322
2323 last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
2324
2325 if (event_open)
2326 {
2327 event_open->duration = last_time - event_open->timestamp;
2328
2329 if (event_open->duration > config->latency_limit)
2330 log(L_WARN "Event 0x%p 0x%p took %d ms",
2331 event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
2332
2333 event_open = NULL;
2334 }
2335}
2336
2337/**
2338 * io_log_event - mark approaching event into event log
2339 * @hook: event hook address
2340 * @data: event data address
2341 *
2342 * Store info (hook, data, timestamp) about the following internal event into
2343 * a circular event log (@event_log). When latency tracking is enabled, the log
2344 * entry is kept open (in @event_open) so the duration can be filled later.
2345 */
2346void
2347io_log_event(void *hook, void *data)
2348{
2349 if (config->latency_debug)
2350 io_update_time();
2351
2352 struct event_log_entry *en = event_log + event_log_pos;
2353
2354 en->hook = hook;
2355 en->data = data;
2356 en->timestamp = last_time;
2357 en->duration = 0;
2358
2359 event_log_num++;
2360 event_log_pos++;
2361 event_log_pos %= EVENT_LOG_LENGTH;
2362
2363 event_open = config->latency_debug ? en : NULL;
2364}
2365
2366static inline void
2367io_close_event(void)
2368{
2369 if (event_open)
2370 io_update_time();
2371}
2372
2373void
2374io_log_dump(void)
2375{
2376 int i;
2377
2378 log(L_DEBUG "Event log:");
2379 for (i = 0; i < EVENT_LOG_LENGTH; i++)
2380 {
2381 struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
2382 if (en->hook)
2383 log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
2384 (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
2385 }
2386}
2387
2388void
2389watchdog_sigalrm(int sig UNUSED)
2390{
2391 /* Update last_time and duration, but skip latency check */
2392 config->latency_limit = 0xffffffff;
2393 io_update_time();
2394
2395 /* We want core dump */
2396 abort();
2397}
2398
2399static inline void
2400watchdog_start1(void)
2401{
2402 io_update_time();
2403
2404 loop_time = last_time;
2405}
2406
2407static inline void
2408watchdog_start(void)
2409{
2410 io_update_time();
2411
2412 loop_time = last_time;
2413 event_log_num = 0;
2414
2415 if (config->watchdog_timeout)
2416 {
2417 alarm(config->watchdog_timeout);
2418 watchdog_active = 1;
2419 }
2420}
2421
2422static inline void
2423watchdog_stop(void)
2424{
2425 io_update_time();
2426
2427 if (watchdog_active)
2428 {
2429 alarm(0);
2430 watchdog_active = 0;
2431 }
2432
2433 btime duration = last_time - loop_time;
2434 if (duration > config->watchdog_warning)
2435 log(L_WARN "I/O loop cycle took %d ms for %d events",
2436 (int) (duration TO_MS), event_log_num);
2437}
2438
2439
b5d9ee5c
MM
2440/*
2441 * Main I/O Loop
2442 */
2443
4c9dd1e4
MM
2444volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */
2445volatile int async_dump_flag;
c8cafc8e 2446volatile int async_shutdown_flag;
4c9dd1e4 2447
b5d9ee5c
MM
2448void
2449io_init(void)
2450{
2451 init_list(&near_timers);
2452 init_list(&far_timers);
2453 init_list(&sock_list);
e8f73195 2454 init_list(&global_event_list);
7e5f5ffd 2455 krt_io_init();
fd91ae33
OZ
2456 init_times();
2457 update_times();
a92cf57d 2458 boot_time = now;
fd91ae33 2459 srandom((int) now_real);
b5d9ee5c
MM
2460}
2461
ea89da38
OZ
2462static int short_loops = 0;
2463#define SHORT_LOOP_MAX 10
2464
b5d9ee5c
MM
2465void
2466io_loop(void)
2467{
e1c13a5a 2468 int poll_tout;
b5d9ee5c 2469 time_t tout;
ea0a8be2 2470 int nfds, events, pout;
b5d9ee5c 2471 sock *s;
38a608c5 2472 node *n;
e1c13a5a
JMM
2473 int fdmax = 256;
2474 struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
b5d9ee5c 2475
8bcb5fb1 2476 watchdog_start1();
b5d9ee5c
MM
2477 for(;;)
2478 {
30770df2 2479 events = ev_run_list(&global_event_list);
bd22d7f4 2480 timers:
fd91ae33 2481 update_times();
b5d9ee5c
MM
2482 tout = tm_first_shot();
2483 if (tout <= now)
2484 {
2485 tm_shot();
bd22d7f4 2486 goto timers;
b5d9ee5c 2487 }
e1c13a5a 2488 poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
b5d9ee5c 2489
8bcb5fb1
OZ
2490 io_close_event();
2491
e1c13a5a 2492 nfds = 0;
b5d9ee5c
MM
2493 WALK_LIST(n, sock_list)
2494 {
e1c13a5a 2495 pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
b5d9ee5c
MM
2496 s = SKIP_BACK(sock, n, n);
2497 if (s->rx_hook)
2498 {
e1c13a5a
JMM
2499 pfd[nfds].fd = s->fd;
2500 pfd[nfds].events |= POLLIN;
b5d9ee5c
MM
2501 }
2502 if (s->tx_hook && s->ttx != s->tpos)
2503 {
e1c13a5a
JMM
2504 pfd[nfds].fd = s->fd;
2505 pfd[nfds].events |= POLLOUT;
2506 }
2507 if (pfd[nfds].fd != -1)
2508 {
2509 s->index = nfds;
2510 nfds++;
b5d9ee5c 2511 }
38a608c5 2512 else
e1c13a5a
JMM
2513 s->index = -1;
2514
2515 if (nfds >= fdmax)
2516 {
2517 fdmax *= 2;
2518 pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
2519 }
b5d9ee5c
MM
2520 }
2521
4c9dd1e4
MM
2522 /*
2523 * Yes, this is racy. But even if the signal comes before this test
e1c13a5a 2524 * and entering poll(), it gets caught on the next timer tick.
4c9dd1e4
MM
2525 */
2526
2527 if (async_config_flag)
2528 {
8bcb5fb1 2529 io_log_event(async_config, NULL);
4c9dd1e4
MM
2530 async_config();
2531 async_config_flag = 0;
f4aabcee 2532 continue;
4c9dd1e4
MM
2533 }
2534 if (async_dump_flag)
2535 {
8bcb5fb1 2536 io_log_event(async_dump, NULL);
4c9dd1e4
MM
2537 async_dump();
2538 async_dump_flag = 0;
f4aabcee
MM
2539 continue;
2540 }
2541 if (async_shutdown_flag)
2542 {
8bcb5fb1 2543 io_log_event(async_shutdown, NULL);
f4aabcee
MM
2544 async_shutdown();
2545 async_shutdown_flag = 0;
2546 continue;
4c9dd1e4
MM
2547 }
2548
e1c13a5a 2549 /* And finally enter poll() to find active sockets */
8bcb5fb1 2550 watchdog_stop();
ea0a8be2 2551 pout = poll(pfd, nfds, poll_tout);
8bcb5fb1 2552 watchdog_start();
ea89da38 2553
ea0a8be2 2554 if (pout < 0)
b5d9ee5c
MM
2555 {
2556 if (errno == EINTR || errno == EAGAIN)
2557 continue;
e1c13a5a 2558 die("poll: %m");
b5d9ee5c 2559 }
ea0a8be2 2560 if (pout)
b5d9ee5c 2561 {
ea89da38
OZ
2562 /* guaranteed to be non-empty */
2563 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2564
38a608c5 2565 while (current_sock)
b5d9ee5c 2566 {
38a608c5 2567 sock *s = current_sock;
e1c13a5a
JMM
2568 if (s->index == -1)
2569 {
2570 current_sock = sk_next(s);
2571 goto next;
2572 }
2573
38a608c5 2574 int e;
ea89da38
OZ
2575 int steps;
2576
2577 steps = MAX_STEPS;
9dbcb11c 2578 if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
38a608c5
MM
2579 do
2580 {
4323099d 2581 steps--;
8bcb5fb1 2582 io_log_event(s->rx_hook, s->data);
fd926ed4 2583 e = sk_read(s, pfd[s->index].revents);
38a608c5
MM
2584 if (s != current_sock)
2585 goto next;
2586 }
4323099d
OZ
2587 while (e && s->rx_hook && steps);
2588
2589 steps = MAX_STEPS;
e1c13a5a 2590 if (pfd[s->index].revents & POLLOUT)
38a608c5
MM
2591 do
2592 {
4323099d 2593 steps--;
8bcb5fb1 2594 io_log_event(s->tx_hook, s->data);
38a608c5
MM
2595 e = sk_write(s);
2596 if (s != current_sock)
2597 goto next;
2598 }
4323099d 2599 while (e && steps);
9dbcb11c 2600
38a608c5
MM
2601 current_sock = sk_next(s);
2602 next: ;
b5d9ee5c 2603 }
ea89da38
OZ
2604
2605 short_loops++;
2606 if (events && (short_loops < SHORT_LOOP_MAX))
2607 continue;
2608 short_loops = 0;
2609
2610 int count = 0;
2611 current_sock = stored_sock;
2612 if (current_sock == NULL)
2613 current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2614
2615 while (current_sock && count < MAX_RX_STEPS)
2616 {
2617 sock *s = current_sock;
e1c13a5a
JMM
2618 if (s->index == -1)
2619 {
2620 current_sock = sk_next(s);
2621 goto next2;
2622 }
ea89da38 2623
9dbcb11c 2624 if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
ea89da38
OZ
2625 {
2626 count++;
8bcb5fb1 2627 io_log_event(s->rx_hook, s->data);
fd926ed4 2628 sk_read(s, pfd[s->index].revents);
ea89da38 2629 if (s != current_sock)
9dbcb11c 2630 goto next2;
ea89da38 2631 }
9dbcb11c
JMM
2632
2633 if (pfd[s->index].revents & (POLLHUP | POLLERR))
2634 {
2635 sk_err(s, pfd[s->index].revents);
2c33da50
JMM
2636 if (s != current_sock)
2637 goto next2;
ea89da38 2638 }
9dbcb11c 2639
ea89da38
OZ
2640 current_sock = sk_next(s);
2641 next2: ;
2642 }
2643
9dbcb11c 2644
ea89da38 2645 stored_sock = current_sock;
b5d9ee5c
MM
2646 }
2647 }
2648}
41c8976e
OF
2649
2650void
2651test_old_bird(char *path)
2652{
2653 int fd;
2654 struct sockaddr_un sa;
2655
2656 fd = socket(AF_UNIX, SOCK_STREAM, 0);
41c8976e
OF
2657 if (fd < 0)
2658 die("Cannot create socket: %m");
97e46d28
OZ
2659 if (strlen(path) >= sizeof(sa.sun_path))
2660 die("Socket path too long");
41c8976e
OF
2661 bzero(&sa, sizeof(sa));
2662 sa.sun_family = AF_UNIX;
2663 strcpy(sa.sun_path, path);
2664 if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2665 die("I found another BIRD running.");
2666 close(fd);
2667}