]>
Commit | Line | Data |
---|---|---|
b5d9ee5c MM |
1 | /* |
2 | * BIRD Internet Routing Daemon -- Unix I/O | |
3 | * | |
38a608c5 | 4 | * (c) 1998--2004 Martin Mares <mj@ucw.cz> |
b1a1faba | 5 | * (c) 2004 Ondrej Filip <feela@network.cz> |
b5d9ee5c MM |
6 | * |
7 | * Can be freely distributed and used under the terms of the GNU GPL. | |
8 | */ | |
9 | ||
607d9914 OZ |
10 | /* Unfortunately, some glibc versions hide parts of RFC 3542 API |
11 | if _GNU_SOURCE is not defined. */ | |
12 | #define _GNU_SOURCE 1 | |
13 | ||
b5d9ee5c MM |
14 | #include <stdio.h> |
15 | #include <stdlib.h> | |
01b776e1 | 16 | #include <time.h> |
b5d9ee5c MM |
17 | #include <sys/time.h> |
18 | #include <sys/types.h> | |
19 | #include <sys/socket.h> | |
20 | #include <sys/fcntl.h> | |
46a82e9c | 21 | #include <sys/uio.h> |
b93abffa | 22 | #include <sys/un.h> |
b5d9ee5c MM |
23 | #include <unistd.h> |
24 | #include <errno.h> | |
d0e9b36d | 25 | #include <netinet/in.h> |
93e868c7 | 26 | #include <netinet/icmp6.h> |
b5d9ee5c MM |
27 | |
28 | #include "nest/bird.h" | |
29 | #include "lib/lists.h" | |
30 | #include "lib/resource.h" | |
31 | #include "lib/timer.h" | |
32 | #include "lib/socket.h" | |
e8f73195 | 33 | #include "lib/event.h" |
afa8937a | 34 | #include "lib/string.h" |
b5d9ee5c MM |
35 | #include "nest/iface.h" |
36 | ||
37 | #include "lib/unix.h" | |
a2867cd9 | 38 | #include "lib/sysio.h" |
b5d9ee5c | 39 | |
ea89da38 | 40 | /* Maximum number of calls of tx handler for one socket in one |
4323099d OZ |
41 | * select iteration. Should be small enough to not monopolize CPU by |
42 | * one protocol instance. | |
43 | */ | |
44 | #define MAX_STEPS 4 | |
45 | ||
ea89da38 OZ |
46 | /* Maximum number of calls of rx handler for all sockets in one select |
47 | iteration. RX callbacks are often much more costly so we limit | |
48 | this to gen small latencies */ | |
49 | #define MAX_RX_STEPS 4 | |
50 | ||
a9c986f9 MM |
51 | /* |
52 | * Tracked Files | |
53 | */ | |
54 | ||
55 | struct rfile { | |
56 | resource r; | |
57 | FILE *f; | |
58 | }; | |
59 | ||
60 | static void | |
61 | rf_free(resource *r) | |
62 | { | |
63 | struct rfile *a = (struct rfile *) r; | |
64 | ||
65 | fclose(a->f); | |
66 | } | |
67 | ||
68 | static void | |
69 | rf_dump(resource *r) | |
70 | { | |
71 | struct rfile *a = (struct rfile *) r; | |
72 | ||
73 | debug("(FILE *%p)\n", a->f); | |
74 | } | |
75 | ||
76 | static struct resclass rf_class = { | |
77 | "FILE", | |
78 | sizeof(struct rfile), | |
79 | rf_free, | |
e81b440f | 80 | rf_dump, |
acb60628 | 81 | NULL, |
e81b440f | 82 | NULL |
a9c986f9 MM |
83 | }; |
84 | ||
85 | void * | |
f78056fb | 86 | tracked_fopen(pool *p, char *name, char *mode) |
a9c986f9 MM |
87 | { |
88 | FILE *f = fopen(name, mode); | |
89 | ||
90 | if (f) | |
91 | { | |
92 | struct rfile *r = ralloc(p, &rf_class); | |
93 | r->f = f; | |
94 | } | |
95 | return f; | |
96 | } | |
97 | ||
525fa2c1 MM |
98 | /** |
99 | * DOC: Timers | |
100 | * | |
101 | * Timers are resources which represent a wish of a module to call | |
102 | * a function at the specified time. The platform dependent code | |
58f7d004 | 103 | * doesn't guarantee exact timing, only that a timer function |
525fa2c1 MM |
104 | * won't be called before the requested time. |
105 | * | |
fd91ae33 OZ |
106 | * In BIRD, time is represented by values of the &bird_clock_t type |
107 | * which are integral numbers interpreted as a relative number of seconds since | |
108 | * some fixed time point in past. The current time can be read | |
109 | * from variable @now with reasonable accuracy and is monotonic. There is also | |
110 | * a current 'absolute' time in variable @now_real reported by OS. | |
525fa2c1 MM |
111 | * |
112 | * Each timer is described by a &timer structure containing a pointer | |
113 | * to the handler function (@hook), data private to this function (@data), | |
114 | * time the function should be called at (@expires, 0 for inactive timers), | |
115 | * for the other fields see |timer.h|. | |
b5d9ee5c MM |
116 | */ |
117 | ||
118 | #define NEAR_TIMER_LIMIT 4 | |
119 | ||
b5d9ee5c MM |
120 | static list near_timers, far_timers; |
121 | static bird_clock_t first_far_timer = TIME_INFINITY; | |
122 | ||
002b6423 OZ |
123 | /* now must be different from 0, because 0 is a special value in timer->expires */ |
124 | bird_clock_t now = 1, now_real; | |
fd91ae33 OZ |
125 | |
126 | static void | |
127 | update_times_plain(void) | |
128 | { | |
129 | bird_clock_t new_time = time(NULL); | |
130 | int delta = new_time - now_real; | |
131 | ||
132 | if ((delta >= 0) && (delta < 60)) | |
133 | now += delta; | |
134 | else if (now_real != 0) | |
135 | log(L_WARN "Time jump, delta %d s", delta); | |
136 | ||
137 | now_real = new_time; | |
138 | } | |
139 | ||
140 | static void | |
141 | update_times_gettime(void) | |
142 | { | |
143 | struct timespec ts; | |
144 | int rv; | |
145 | ||
146 | rv = clock_gettime(CLOCK_MONOTONIC, &ts); | |
147 | if (rv != 0) | |
148 | die("clock_gettime: %m"); | |
149 | ||
150 | if (ts.tv_sec != now) { | |
151 | if (ts.tv_sec < now) | |
152 | log(L_ERR "Monotonic timer is broken"); | |
153 | ||
154 | now = ts.tv_sec; | |
155 | now_real = time(NULL); | |
156 | } | |
157 | } | |
158 | ||
159 | static int clock_monotonic_available; | |
160 | ||
161 | static inline void | |
162 | update_times(void) | |
163 | { | |
164 | if (clock_monotonic_available) | |
165 | update_times_gettime(); | |
166 | else | |
167 | update_times_plain(); | |
168 | } | |
169 | ||
170 | static inline void | |
171 | init_times(void) | |
172 | { | |
173 | struct timespec ts; | |
174 | clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0); | |
175 | if (!clock_monotonic_available) | |
176 | log(L_WARN "Monotonic timer is missing"); | |
177 | } | |
178 | ||
b5d9ee5c MM |
179 | |
180 | static void | |
181 | tm_free(resource *r) | |
182 | { | |
183 | timer *t = (timer *) r; | |
184 | ||
185 | tm_stop(t); | |
186 | } | |
187 | ||
188 | static void | |
189 | tm_dump(resource *r) | |
190 | { | |
191 | timer *t = (timer *) r; | |
192 | ||
e8f73195 | 193 | debug("(code %p, data %p, ", t->hook, t->data); |
af847acc MM |
194 | if (t->randomize) |
195 | debug("rand %d, ", t->randomize); | |
196 | if (t->recurrent) | |
197 | debug("recur %d, ", t->recurrent); | |
b5d9ee5c MM |
198 | if (t->expires) |
199 | debug("expires in %d sec)\n", t->expires - now); | |
200 | else | |
201 | debug("inactive)\n"); | |
202 | } | |
203 | ||
204 | static struct resclass tm_class = { | |
205 | "Timer", | |
206 | sizeof(timer), | |
207 | tm_free, | |
e81b440f | 208 | tm_dump, |
acb60628 | 209 | NULL, |
e81b440f | 210 | NULL |
b5d9ee5c MM |
211 | }; |
212 | ||
525fa2c1 MM |
213 | /** |
214 | * tm_new - create a timer | |
215 | * @p: pool | |
216 | * | |
217 | * This function creates a new timer resource and returns | |
218 | * a pointer to it. To use the timer, you need to fill in | |
219 | * the structure fields and call tm_start() to start timing. | |
220 | */ | |
b5d9ee5c MM |
221 | timer * |
222 | tm_new(pool *p) | |
223 | { | |
224 | timer *t = ralloc(p, &tm_class); | |
b5d9ee5c MM |
225 | return t; |
226 | } | |
227 | ||
228 | static inline void | |
229 | tm_insert_near(timer *t) | |
230 | { | |
231 | node *n = HEAD(near_timers); | |
232 | ||
233 | while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires)) | |
234 | n = n->next; | |
235 | insert_node(&t->n, n->prev); | |
236 | } | |
237 | ||
525fa2c1 MM |
238 | /** |
239 | * tm_start - start a timer | |
240 | * @t: timer | |
241 | * @after: number of seconds the timer should be run after | |
242 | * | |
243 | * This function schedules the hook function of the timer to | |
244 | * be called after @after seconds. If the timer has been already | |
245 | * started, it's @expire time is replaced by the new value. | |
246 | * | |
247 | * You can have set the @randomize field of @t, the timeout | |
248 | * will be increased by a random number of seconds chosen | |
249 | * uniformly from range 0 .. @randomize. | |
250 | * | |
251 | * You can call tm_start() from the handler function of the timer | |
252 | * to request another run of the timer. Also, you can set the @recurrent | |
253 | * field to have the timer re-added automatically with the same timeout. | |
254 | */ | |
b5d9ee5c MM |
255 | void |
256 | tm_start(timer *t, unsigned after) | |
257 | { | |
258 | bird_clock_t when; | |
259 | ||
260 | if (t->randomize) | |
af847acc | 261 | after += random() % (t->randomize + 1); |
b5d9ee5c MM |
262 | when = now + after; |
263 | if (t->expires == when) | |
264 | return; | |
265 | if (t->expires) | |
266 | rem_node(&t->n); | |
267 | t->expires = when; | |
268 | if (after <= NEAR_TIMER_LIMIT) | |
269 | tm_insert_near(t); | |
270 | else | |
271 | { | |
272 | if (!first_far_timer || first_far_timer > when) | |
273 | first_far_timer = when; | |
274 | add_tail(&far_timers, &t->n); | |
275 | } | |
276 | } | |
277 | ||
525fa2c1 MM |
278 | /** |
279 | * tm_stop - stop a timer | |
280 | * @t: timer | |
281 | * | |
282 | * This function stops a timer. If the timer is already stopped, | |
283 | * nothing happens. | |
284 | */ | |
b5d9ee5c MM |
285 | void |
286 | tm_stop(timer *t) | |
287 | { | |
288 | if (t->expires) | |
289 | { | |
290 | rem_node(&t->n); | |
291 | t->expires = 0; | |
292 | } | |
293 | } | |
294 | ||
295 | static void | |
296 | tm_dump_them(char *name, list *l) | |
297 | { | |
298 | node *n; | |
299 | timer *t; | |
300 | ||
301 | debug("%s timers:\n", name); | |
302 | WALK_LIST(n, *l) | |
303 | { | |
304 | t = SKIP_BACK(timer, n, n); | |
305 | debug("%p ", t); | |
306 | tm_dump(&t->r); | |
307 | } | |
308 | debug("\n"); | |
309 | } | |
310 | ||
311 | void | |
312 | tm_dump_all(void) | |
313 | { | |
314 | tm_dump_them("Near", &near_timers); | |
315 | tm_dump_them("Far", &far_timers); | |
316 | } | |
317 | ||
318 | static inline time_t | |
319 | tm_first_shot(void) | |
320 | { | |
321 | time_t x = first_far_timer; | |
322 | ||
323 | if (!EMPTY_LIST(near_timers)) | |
324 | { | |
325 | timer *t = SKIP_BACK(timer, n, HEAD(near_timers)); | |
326 | if (t->expires < x) | |
327 | x = t->expires; | |
328 | } | |
329 | return x; | |
330 | } | |
331 | ||
332 | static void | |
333 | tm_shot(void) | |
334 | { | |
335 | timer *t; | |
336 | node *n, *m; | |
337 | ||
338 | if (first_far_timer <= now) | |
339 | { | |
28a9a189 | 340 | bird_clock_t limit = now + NEAR_TIMER_LIMIT; |
b5d9ee5c MM |
341 | first_far_timer = TIME_INFINITY; |
342 | n = HEAD(far_timers); | |
343 | while (m = n->next) | |
344 | { | |
345 | t = SKIP_BACK(timer, n, n); | |
346 | if (t->expires <= limit) | |
347 | { | |
348 | rem_node(n); | |
349 | tm_insert_near(t); | |
350 | } | |
351 | else if (t->expires < first_far_timer) | |
352 | first_far_timer = t->expires; | |
353 | n = m; | |
354 | } | |
355 | } | |
356 | while ((n = HEAD(near_timers)) -> next) | |
357 | { | |
af847acc | 358 | int delay; |
b5d9ee5c MM |
359 | t = SKIP_BACK(timer, n, n); |
360 | if (t->expires > now) | |
361 | break; | |
362 | rem_node(n); | |
af847acc | 363 | delay = t->expires - now; |
b5d9ee5c | 364 | t->expires = 0; |
af847acc MM |
365 | if (t->recurrent) |
366 | { | |
367 | int i = t->recurrent - delay; | |
368 | if (i < 0) | |
369 | i = 0; | |
370 | tm_start(t, i); | |
371 | } | |
b5d9ee5c MM |
372 | t->hook(t); |
373 | } | |
374 | } | |
375 | ||
0d3effcf OF |
376 | /** |
377 | * tm_parse_datetime - parse a date and time | |
378 | * @x: datetime string | |
379 | * | |
380 | * tm_parse_datetime() takes a textual representation of | |
381 | * a date and time (dd-mm-yyyy hh:mm:ss) | |
382 | * and converts it to the corresponding value of type &bird_clock_t. | |
383 | */ | |
384 | bird_clock_t | |
385 | tm_parse_datetime(char *x) | |
386 | { | |
387 | struct tm tm; | |
388 | int n; | |
389 | time_t t; | |
390 | ||
391 | if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n]) | |
392 | return tm_parse_date(x); | |
393 | tm.tm_mon--; | |
394 | tm.tm_year -= 1900; | |
395 | t = mktime(&tm); | |
396 | if (t == (time_t) -1) | |
397 | return 0; | |
398 | return t; | |
399 | } | |
525fa2c1 MM |
400 | /** |
401 | * tm_parse_date - parse a date | |
402 | * @x: date string | |
403 | * | |
404 | * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy) | |
405 | * and converts it to the corresponding value of type &bird_clock_t. | |
406 | */ | |
913f7dc9 MM |
407 | bird_clock_t |
408 | tm_parse_date(char *x) | |
409 | { | |
410 | struct tm tm; | |
411 | int n; | |
412 | time_t t; | |
413 | ||
414 | if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n]) | |
415 | return 0; | |
416 | tm.tm_mon--; | |
417 | tm.tm_year -= 1900; | |
418 | tm.tm_hour = tm.tm_min = tm.tm_sec = 0; | |
419 | t = mktime(&tm); | |
420 | if (t == (time_t) -1) | |
421 | return 0; | |
422 | return t; | |
423 | } | |
424 | ||
c37e7851 OZ |
425 | static void |
426 | tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta) | |
913f7dc9 | 427 | { |
c37e7851 OZ |
428 | static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", |
429 | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; | |
913f7dc9 | 430 | |
c37e7851 OZ |
431 | if (delta < 20*3600) |
432 | bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min); | |
433 | else if (delta < 360*86400) | |
434 | bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday); | |
435 | else | |
436 | bsprintf(x, "%d", tm->tm_year+1900); | |
913f7dc9 MM |
437 | } |
438 | ||
c37e7851 OZ |
439 | #include "conf/conf.h" |
440 | ||
525fa2c1 MM |
441 | /** |
442 | * tm_format_datetime - convert date and time to textual representation | |
443 | * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE | |
444 | * @t: time | |
445 | * | |
fd91ae33 OZ |
446 | * This function formats the given relative time value @t to a textual |
447 | * date/time representation (dd-mm-yyyy hh:mm:ss) in real time. | |
525fa2c1 | 448 | */ |
7a88832e | 449 | void |
c37e7851 | 450 | tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) |
7a88832e | 451 | { |
c37e7851 | 452 | const char *fmt_used; |
7a88832e | 453 | struct tm *tm; |
fd91ae33 OZ |
454 | bird_clock_t delta = now - t; |
455 | t = now_real - delta; | |
7a88832e | 456 | tm = localtime(&t); |
7a88832e | 457 | |
c37e7851 OZ |
458 | if (fmt_spec->fmt1 == NULL) |
459 | return tm_format_reltime(x, tm, delta); | |
afa8937a | 460 | |
c37e7851 OZ |
461 | if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit)) |
462 | fmt_used = fmt_spec->fmt1; | |
afa8937a | 463 | else |
c37e7851 OZ |
464 | fmt_used = fmt_spec->fmt2; |
465 | ||
466 | int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm); | |
467 | if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE)) | |
468 | strcpy(x, "<too-long>"); | |
afa8937a MM |
469 | } |
470 | ||
525fa2c1 MM |
471 | /** |
472 | * DOC: Sockets | |
473 | * | |
474 | * Socket resources represent network connections. Their data structure (&socket) | |
475 | * contains a lot of fields defining the exact type of the socket, the local and | |
476 | * remote addresses and ports, pointers to socket buffers and finally pointers to | |
477 | * hook functions to be called when new data have arrived to the receive buffer | |
478 | * (@rx_hook), when the contents of the transmit buffer have been transmitted | |
479 | * (@tx_hook) and when an error or connection close occurs (@err_hook). | |
480 | * | |
38a608c5 | 481 | * Freeing of sockets from inside socket hooks is perfectly safe. |
b5d9ee5c MM |
482 | */ |
483 | ||
abae6e9c MM |
484 | #ifndef SOL_IP |
485 | #define SOL_IP IPPROTO_IP | |
486 | #endif | |
487 | ||
b1a1faba OF |
488 | #ifndef SOL_IPV6 |
489 | #define SOL_IPV6 IPPROTO_IPV6 | |
490 | #endif | |
491 | ||
b5d9ee5c | 492 | static list sock_list; |
38a608c5 | 493 | static struct birdsock *current_sock; |
ea89da38 | 494 | static struct birdsock *stored_sock; |
38a608c5 MM |
495 | static int sock_recalc_fdsets_p; |
496 | ||
497 | static inline sock * | |
498 | sk_next(sock *s) | |
499 | { | |
500 | if (!s->n.next->next) | |
501 | return NULL; | |
502 | else | |
503 | return SKIP_BACK(sock, n, s->n.next); | |
504 | } | |
b5d9ee5c MM |
505 | |
506 | static void | |
4da25acb | 507 | sk_alloc_bufs(sock *s) |
b5d9ee5c | 508 | { |
4da25acb MM |
509 | if (!s->rbuf && s->rbsize) |
510 | s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize); | |
511 | s->rpos = s->rbuf; | |
512 | if (!s->tbuf && s->tbsize) | |
513 | s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize); | |
514 | s->tpos = s->ttx = s->tbuf; | |
515 | } | |
b5d9ee5c | 516 | |
4da25acb MM |
517 | static void |
518 | sk_free_bufs(sock *s) | |
519 | { | |
38a608c5 | 520 | if (s->rbuf_alloc) |
4da25acb MM |
521 | { |
522 | xfree(s->rbuf_alloc); | |
523 | s->rbuf = s->rbuf_alloc = NULL; | |
524 | } | |
38a608c5 | 525 | if (s->tbuf_alloc) |
4da25acb MM |
526 | { |
527 | xfree(s->tbuf_alloc); | |
528 | s->tbuf = s->tbuf_alloc = NULL; | |
529 | } | |
530 | } | |
531 | ||
532 | static void | |
533 | sk_free(resource *r) | |
534 | { | |
535 | sock *s = (sock *) r; | |
536 | ||
537 | sk_free_bufs(s); | |
b5d9ee5c | 538 | if (s->fd >= 0) |
320f4173 MM |
539 | { |
540 | close(s->fd); | |
38a608c5 MM |
541 | if (s == current_sock) |
542 | current_sock = sk_next(s); | |
ea89da38 OZ |
543 | if (s == stored_sock) |
544 | stored_sock = sk_next(s); | |
320f4173 | 545 | rem_node(&s->n); |
38a608c5 | 546 | sock_recalc_fdsets_p = 1; |
320f4173 | 547 | } |
b5d9ee5c MM |
548 | } |
549 | ||
4da25acb MM |
550 | void |
551 | sk_reallocate(sock *s) | |
552 | { | |
553 | sk_free_bufs(s); | |
554 | sk_alloc_bufs(s); | |
555 | } | |
556 | ||
b5d9ee5c MM |
557 | static void |
558 | sk_dump(resource *r) | |
559 | { | |
560 | sock *s = (sock *) r; | |
b93abffa | 561 | static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", "UDP/MC", "IP", "IP/MC", "MAGIC", "UNIX<", "UNIX", "DEL!" }; |
b5d9ee5c MM |
562 | |
563 | debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n", | |
564 | sk_type_names[s->type], | |
565 | s->data, | |
566 | s->saddr, | |
567 | s->sport, | |
568 | s->daddr, | |
569 | s->dport, | |
570 | s->tos, | |
571 | s->ttl, | |
572 | s->iface ? s->iface->name : "none"); | |
573 | } | |
574 | ||
575 | static struct resclass sk_class = { | |
576 | "Socket", | |
577 | sizeof(sock), | |
578 | sk_free, | |
e81b440f | 579 | sk_dump, |
acb60628 | 580 | NULL, |
e81b440f | 581 | NULL |
b5d9ee5c MM |
582 | }; |
583 | ||
525fa2c1 MM |
584 | /** |
585 | * sk_new - create a socket | |
586 | * @p: pool | |
587 | * | |
588 | * This function creates a new socket resource. If you want to use it, | |
589 | * you need to fill in all the required fields of the structure and | |
590 | * call sk_open() to do the actual opening of the socket. | |
591 | */ | |
b5d9ee5c MM |
592 | sock * |
593 | sk_new(pool *p) | |
594 | { | |
595 | sock *s = ralloc(p, &sk_class); | |
596 | s->pool = p; | |
daeeb8e9 | 597 | // s->saddr = s->daddr = IPA_NONE; |
b5d9ee5c | 598 | s->tos = s->ttl = -1; |
b5d9ee5c MM |
599 | s->fd = -1; |
600 | return s; | |
601 | } | |
602 | ||
38a608c5 MM |
603 | static void |
604 | sk_insert(sock *s) | |
605 | { | |
606 | add_tail(&sock_list, &s->n); | |
607 | sock_recalc_fdsets_p = 1; | |
608 | } | |
b5d9ee5c | 609 | |
4f22c981 MM |
610 | #ifdef IPV6 |
611 | ||
4f22c981 MM |
612 | void |
613 | fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port) | |
614 | { | |
b1a1faba | 615 | memset (sa, 0, sizeof (struct sockaddr_in6)); |
4f22c981 MM |
616 | sa->sin6_family = AF_INET6; |
617 | sa->sin6_port = htons(port); | |
618 | sa->sin6_flowinfo = 0; | |
b1a1faba OF |
619 | #ifdef HAVE_SIN_LEN |
620 | sa->sin6_len = sizeof(struct sockaddr_in6); | |
621 | #endif | |
4f22c981 MM |
622 | set_inaddr(&sa->sin6_addr, a); |
623 | } | |
624 | ||
061ab802 OZ |
625 | static inline void |
626 | fill_in_sockifa(sockaddr *sa, struct iface *ifa) | |
627 | { | |
628 | sa->sin6_scope_id = ifa ? ifa->index : 0; | |
629 | } | |
630 | ||
4f22c981 | 631 | void |
b1a1faba | 632 | get_sockaddr(struct sockaddr_in6 *sa, ip_addr *a, unsigned *port, int check) |
4f22c981 | 633 | { |
b1a1faba OF |
634 | if (check && sa->sin6_family != AF_INET6) |
635 | bug("get_sockaddr called for wrong address family (%d)", sa->sin6_family); | |
4f22c981 MM |
636 | if (port) |
637 | *port = ntohs(sa->sin6_port); | |
638 | memcpy(a, &sa->sin6_addr, sizeof(*a)); | |
639 | ipa_ntoh(*a); | |
640 | } | |
641 | ||
642 | #else | |
643 | ||
4cf45766 | 644 | void |
4f22c981 | 645 | fill_in_sockaddr(sockaddr *sa, ip_addr a, unsigned port) |
b5d9ee5c | 646 | { |
b1a1faba | 647 | memset (sa, 0, sizeof (struct sockaddr_in)); |
b5d9ee5c MM |
648 | sa->sin_family = AF_INET; |
649 | sa->sin_port = htons(port); | |
b1a1faba OF |
650 | #ifdef HAVE_SIN_LEN |
651 | sa->sin_len = sizeof(struct sockaddr_in); | |
652 | #endif | |
b5d9ee5c MM |
653 | set_inaddr(&sa->sin_addr, a); |
654 | } | |
655 | ||
061ab802 | 656 | static inline void |
e81b440f | 657 | fill_in_sockifa(sockaddr *sa UNUSED, struct iface *ifa UNUSED) |
061ab802 OZ |
658 | { |
659 | } | |
660 | ||
af847acc | 661 | void |
b1a1faba | 662 | get_sockaddr(struct sockaddr_in *sa, ip_addr *a, unsigned *port, int check) |
b5d9ee5c | 663 | { |
b1a1faba OF |
664 | if (check && sa->sin_family != AF_INET) |
665 | bug("get_sockaddr called for wrong address family (%d)", sa->sin_family); | |
af847acc MM |
666 | if (port) |
667 | *port = ntohs(sa->sin_port); | |
b5d9ee5c | 668 | memcpy(a, &sa->sin_addr.s_addr, sizeof(*a)); |
dce26783 | 669 | ipa_ntoh(*a); |
b5d9ee5c MM |
670 | } |
671 | ||
4f22c981 MM |
672 | #endif |
673 | ||
bed41728 OZ |
674 | |
675 | #ifdef IPV6 | |
676 | ||
677 | /* PKTINFO handling is also standardized in IPv6 */ | |
678 | #define CMSG_RX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo)) | |
679 | #define CMSG_TX_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo)) | |
680 | ||
dcc60494 OZ |
681 | /* |
682 | * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg | |
683 | * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we | |
684 | * don't have IPV6_RECVPKTINFO we suppose the OS implements the older | |
685 | * RFC and we use IPV6_PKTINFO. | |
686 | */ | |
687 | #ifndef IPV6_RECVPKTINFO | |
688 | #define IPV6_RECVPKTINFO IPV6_PKTINFO | |
689 | #endif | |
690 | ||
bed41728 OZ |
691 | static char * |
692 | sysio_register_cmsgs(sock *s) | |
693 | { | |
694 | int ok = 1; | |
695 | if ((s->flags & SKF_LADDR_RX) && | |
696 | setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0) | |
697 | return "IPV6_RECVPKTINFO"; | |
698 | ||
699 | return NULL; | |
700 | } | |
701 | ||
702 | static void | |
703 | sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) | |
704 | { | |
705 | struct cmsghdr *cm; | |
706 | struct in6_pktinfo *pi = NULL; | |
707 | ||
708 | if (!(s->flags & SKF_LADDR_RX)) | |
709 | return; | |
710 | ||
711 | for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) | |
712 | { | |
713 | if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO) | |
714 | pi = (struct in6_pktinfo *) CMSG_DATA(cm); | |
715 | } | |
716 | ||
717 | if (!pi) | |
718 | { | |
719 | s->laddr = IPA_NONE; | |
720 | s->lifindex = 0; | |
721 | return; | |
722 | } | |
723 | ||
724 | get_inaddr(&s->laddr, &pi->ipi6_addr); | |
725 | s->lifindex = pi->ipi6_ifindex; | |
726 | return; | |
727 | } | |
728 | ||
646b24d9 | 729 | /* |
bed41728 OZ |
730 | static void |
731 | sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) | |
732 | { | |
733 | struct cmsghdr *cm; | |
734 | struct in6_pktinfo *pi; | |
735 | ||
736 | if (!(s->flags & SKF_LADDR_TX)) | |
737 | return; | |
738 | ||
739 | msg->msg_control = cbuf; | |
740 | msg->msg_controllen = cbuflen; | |
741 | ||
742 | cm = CMSG_FIRSTHDR(msg); | |
743 | cm->cmsg_level = IPPROTO_IPV6; | |
744 | cm->cmsg_type = IPV6_PKTINFO; | |
745 | cm->cmsg_len = CMSG_LEN(sizeof(*pi)); | |
746 | ||
747 | pi = (struct in6_pktinfo *) CMSG_DATA(cm); | |
748 | set_inaddr(&pi->ipi6_addr, s->saddr); | |
749 | pi->ipi6_ifindex = s->iface ? s->iface->index : 0; | |
750 | ||
751 | msg->msg_controllen = cm->cmsg_len; | |
752 | return; | |
753 | } | |
646b24d9 | 754 | */ |
bed41728 OZ |
755 | #endif |
756 | ||
a39b165e OZ |
757 | static char * |
758 | sk_set_ttl_int(sock *s) | |
759 | { | |
a39b165e | 760 | #ifdef IPV6 |
f9c799a0 | 761 | if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0) |
a39b165e OZ |
762 | return "IPV6_UNICAST_HOPS"; |
763 | #else | |
764 | if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0) | |
765 | return "IP_TTL"; | |
766 | #ifdef CONFIG_UNIX_DONTROUTE | |
ff2857b0 | 767 | int one = 1; |
a39b165e OZ |
768 | if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) |
769 | return "SO_DONTROUTE"; | |
770 | #endif | |
771 | #endif | |
772 | return NULL; | |
773 | } | |
774 | ||
38a608c5 MM |
775 | #define ERR(x) do { err = x; goto bad; } while(0) |
776 | #define WARN(x) log(L_WARN "sk_setup: %s: %m", x) | |
777 | ||
b5d9ee5c MM |
778 | static char * |
779 | sk_setup(sock *s) | |
780 | { | |
781 | int fd = s->fd; | |
353729f5 | 782 | char *err = NULL; |
b5d9ee5c MM |
783 | |
784 | if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) | |
785 | ERR("fcntl(O_NONBLOCK)"); | |
b93abffa MM |
786 | if (s->type == SK_UNIX) |
787 | return NULL; | |
a39b165e | 788 | #ifndef IPV6 |
b5d9ee5c | 789 | if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0) |
f782b72c | 790 | WARN("IP_TOS"); |
b5d9ee5c | 791 | #endif |
789772ed OZ |
792 | |
793 | #ifdef IPV6 | |
794 | int v = 1; | |
795 | if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0) | |
796 | WARN("IPV6_V6ONLY"); | |
797 | #endif | |
798 | ||
a39b165e OZ |
799 | if (s->ttl >= 0) |
800 | err = sk_set_ttl_int(s); | |
a39b165e | 801 | |
353729f5 | 802 | sysio_register_cmsgs(s); |
b5d9ee5c MM |
803 | bad: |
804 | return err; | |
805 | } | |
806 | ||
a39b165e OZ |
807 | /** |
808 | * sk_set_ttl - set TTL for given socket. | |
809 | * @s: socket | |
810 | * @ttl: TTL value | |
811 | * | |
812 | * Set TTL for already opened connections when TTL was not set before. | |
813 | * Useful for accepted connections when different ones should have | |
814 | * different TTL. | |
815 | * | |
816 | * Result: 0 for success, -1 for an error. | |
817 | */ | |
818 | ||
819 | int | |
820 | sk_set_ttl(sock *s, int ttl) | |
821 | { | |
822 | char *err; | |
823 | ||
824 | s->ttl = ttl; | |
825 | if (err = sk_set_ttl_int(s)) | |
826 | log(L_ERR "sk_set_ttl: %s: %m", err); | |
827 | ||
828 | return (err ? -1 : 0); | |
829 | } | |
830 | ||
d51aa281 | 831 | |
d51aa281 OZ |
832 | /** |
833 | * sk_set_md5_auth - add / remove MD5 security association for given socket. | |
834 | * @s: socket | |
835 | * @a: IP address of the other side | |
836 | * @passwd: password used for MD5 authentication | |
837 | * | |
838 | * In TCP MD5 handling code in kernel, there is a set of pairs | |
839 | * (address, password) used to choose password according to | |
840 | * address of the other side. This function is useful for | |
841 | * listening socket, for active sockets it is enough to set | |
842 | * s->password field. | |
843 | * | |
844 | * When called with passwd != NULL, the new pair is added, | |
845 | * When called with passwd == NULL, the existing pair is removed. | |
846 | * | |
847 | * Result: 0 for success, -1 for an error. | |
848 | */ | |
849 | ||
850 | int | |
851 | sk_set_md5_auth(sock *s, ip_addr a, char *passwd) | |
852 | { | |
853 | sockaddr sa; | |
854 | fill_in_sockaddr(&sa, a, 0); | |
855 | return sk_set_md5_auth_int(s, &sa, passwd); | |
856 | } | |
857 | ||
f9c799a0 OZ |
858 | int |
859 | sk_set_broadcast(sock *s, int enable) | |
860 | { | |
861 | if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &enable, sizeof(enable)) < 0) | |
4ac7c834 OZ |
862 | { |
863 | log(L_ERR "sk_set_broadcast: SO_BROADCAST: %m"); | |
864 | return -1; | |
865 | } | |
866 | ||
867 | return 0; | |
f9c799a0 OZ |
868 | } |
869 | ||
870 | ||
871 | #ifdef IPV6 | |
872 | ||
4ac7c834 OZ |
873 | int |
874 | sk_set_ipv6_checksum(sock *s, int offset) | |
875 | { | |
876 | if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) | |
877 | { | |
878 | log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m"); | |
879 | return -1; | |
880 | } | |
881 | ||
882 | return 0; | |
883 | } | |
884 | ||
93e868c7 OZ |
885 | int |
886 | sk_set_icmp_filter(sock *s, int p1, int p2) | |
887 | { | |
888 | /* a bit of lame interface, but it is here only for Radv */ | |
889 | struct icmp6_filter f; | |
890 | ||
891 | ICMP6_FILTER_SETBLOCKALL(&f); | |
892 | ICMP6_FILTER_SETPASS(p1, &f); | |
893 | ICMP6_FILTER_SETPASS(p2, &f); | |
894 | ||
895 | if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) | |
896 | { | |
897 | log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m"); | |
898 | return -1; | |
899 | } | |
900 | ||
901 | return 0; | |
902 | } | |
903 | ||
f9c799a0 OZ |
904 | int |
905 | sk_setup_multicast(sock *s) | |
906 | { | |
907 | char *err; | |
908 | int zero = 0; | |
909 | int index; | |
910 | ||
911 | ASSERT(s->iface && s->iface->addr); | |
912 | ||
913 | index = s->iface->index; | |
914 | if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0) | |
915 | ERR("IPV6_MULTICAST_HOPS"); | |
916 | if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &zero, sizeof(zero)) < 0) | |
917 | ERR("IPV6_MULTICAST_LOOP"); | |
918 | if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) | |
919 | ERR("IPV6_MULTICAST_IF"); | |
920 | ||
e7b09e4a OZ |
921 | if (err = sysio_bind_to_iface(s)) |
922 | goto bad; | |
923 | ||
f9c799a0 OZ |
924 | return 0; |
925 | ||
926 | bad: | |
927 | log(L_ERR "sk_setup_multicast: %s: %m", err); | |
928 | return -1; | |
929 | } | |
930 | ||
931 | int | |
932 | sk_join_group(sock *s, ip_addr maddr) | |
933 | { | |
934 | struct ipv6_mreq mreq; | |
935 | ||
936 | set_inaddr(&mreq.ipv6mr_multiaddr, maddr); | |
937 | ||
938 | #ifdef CONFIG_IPV6_GLIBC_20 | |
939 | mreq.ipv6mr_ifindex = s->iface->index; | |
940 | #else | |
941 | mreq.ipv6mr_interface = s->iface->index; | |
942 | #endif | |
943 | ||
861f223a | 944 | if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0) |
f9c799a0 | 945 | { |
861f223a | 946 | log(L_ERR "sk_join_group: IPV6_JOIN_GROUP: %m"); |
f9c799a0 OZ |
947 | return -1; |
948 | } | |
949 | ||
950 | return 0; | |
951 | } | |
952 | ||
953 | int | |
954 | sk_leave_group(sock *s, ip_addr maddr) | |
955 | { | |
956 | struct ipv6_mreq mreq; | |
957 | ||
958 | set_inaddr(&mreq.ipv6mr_multiaddr, maddr); | |
959 | ||
960 | #ifdef CONFIG_IPV6_GLIBC_20 | |
961 | mreq.ipv6mr_ifindex = s->iface->index; | |
962 | #else | |
963 | mreq.ipv6mr_interface = s->iface->index; | |
964 | #endif | |
965 | ||
861f223a | 966 | if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0) |
f9c799a0 | 967 | { |
861f223a | 968 | log(L_ERR "sk_leave_group: IPV6_LEAVE_GROUP: %m"); |
f9c799a0 OZ |
969 | return -1; |
970 | } | |
971 | ||
972 | return 0; | |
973 | } | |
974 | ||
975 | #else /* IPV4 */ | |
976 | ||
977 | int | |
978 | sk_setup_multicast(sock *s) | |
979 | { | |
980 | char *err; | |
981 | ||
982 | ASSERT(s->iface && s->iface->addr); | |
983 | ||
984 | if (err = sysio_setup_multicast(s)) | |
985 | { | |
986 | log(L_ERR "sk_setup_multicast: %s: %m", err); | |
987 | return -1; | |
988 | } | |
989 | ||
990 | return 0; | |
991 | } | |
992 | ||
993 | int | |
994 | sk_join_group(sock *s, ip_addr maddr) | |
995 | { | |
996 | char *err; | |
997 | ||
998 | if (err = sysio_join_group(s, maddr)) | |
999 | { | |
1000 | log(L_ERR "sk_join_group: %s: %m", err); | |
1001 | return -1; | |
1002 | } | |
1003 | ||
1004 | return 0; | |
1005 | } | |
1006 | ||
1007 | int | |
1008 | sk_leave_group(sock *s, ip_addr maddr) | |
1009 | { | |
1010 | char *err; | |
1011 | ||
1012 | if (err = sysio_leave_group(s, maddr)) | |
1013 | { | |
1014 | log(L_ERR "sk_leave_group: %s: %m", err); | |
1015 | return -1; | |
1016 | } | |
1017 | ||
1018 | return 0; | |
1019 | } | |
1020 | ||
1021 | #endif | |
1022 | ||
d51aa281 | 1023 | |
b93abffa | 1024 | static void |
b5d9ee5c MM |
1025 | sk_tcp_connected(sock *s) |
1026 | { | |
9be9a264 OZ |
1027 | sockaddr lsa; |
1028 | int lsa_len = sizeof(lsa); | |
1029 | if (getsockname(s->fd, (struct sockaddr *) &lsa, &lsa_len) == 0) | |
1030 | get_sockaddr(&lsa, &s->saddr, &s->sport, 1); | |
1031 | ||
b5d9ee5c MM |
1032 | s->type = SK_TCP; |
1033 | sk_alloc_bufs(s); | |
320f4173 | 1034 | s->tx_hook(s); |
b5d9ee5c MM |
1035 | } |
1036 | ||
b93abffa MM |
1037 | static int |
1038 | sk_passive_connected(sock *s, struct sockaddr *sa, int al, int type) | |
1039 | { | |
1040 | int fd = accept(s->fd, sa, &al); | |
1041 | if (fd >= 0) | |
1042 | { | |
1043 | sock *t = sk_new(s->pool); | |
1044 | char *err; | |
1045 | t->type = type; | |
1046 | t->fd = fd; | |
e1ddd993 MM |
1047 | t->ttl = s->ttl; |
1048 | t->tos = s->tos; | |
1049 | t->rbsize = s->rbsize; | |
1050 | t->tbsize = s->tbsize; | |
1051 | if (type == SK_TCP) | |
cf31112f OZ |
1052 | { |
1053 | sockaddr lsa; | |
1054 | int lsa_len = sizeof(lsa); | |
1055 | if (getsockname(fd, (struct sockaddr *) &lsa, &lsa_len) == 0) | |
1056 | get_sockaddr(&lsa, &t->saddr, &t->sport, 1); | |
1057 | ||
1058 | get_sockaddr((sockaddr *) sa, &t->daddr, &t->dport, 1); | |
1059 | } | |
38a608c5 | 1060 | sk_insert(t); |
b93abffa MM |
1061 | if (err = sk_setup(t)) |
1062 | { | |
1063 | log(L_ERR "Incoming connection: %s: %m", err); | |
e1ddd993 MM |
1064 | rfree(t); |
1065 | return 1; | |
b93abffa MM |
1066 | } |
1067 | sk_alloc_bufs(t); | |
e1ddd993 | 1068 | s->rx_hook(t, 0); |
b93abffa MM |
1069 | return 1; |
1070 | } | |
1071 | else if (errno != EINTR && errno != EAGAIN) | |
1072 | { | |
c025b852 | 1073 | s->err_hook(s, errno); |
b93abffa MM |
1074 | } |
1075 | return 0; | |
1076 | } | |
1077 | ||
525fa2c1 MM |
1078 | /** |
1079 | * sk_open - open a socket | |
1080 | * @s: socket | |
1081 | * | |
1082 | * This function takes a socket resource created by sk_new() and | |
1083 | * initialized by the user and binds a corresponding network connection | |
1084 | * to it. | |
1085 | * | |
1086 | * Result: 0 for success, -1 for an error. | |
1087 | */ | |
b5d9ee5c MM |
1088 | int |
1089 | sk_open(sock *s) | |
1090 | { | |
93a786cb | 1091 | int fd; |
4f22c981 | 1092 | sockaddr sa; |
b5d9ee5c MM |
1093 | int one = 1; |
1094 | int type = s->type; | |
1095 | int has_src = ipa_nonzero(s->saddr) || s->sport; | |
b5d9ee5c MM |
1096 | char *err; |
1097 | ||
1098 | switch (type) | |
1099 | { | |
1100 | case SK_TCP_ACTIVE: | |
320f4173 MM |
1101 | s->ttx = ""; /* Force s->ttx != s->tpos */ |
1102 | /* Fall thru */ | |
b5d9ee5c | 1103 | case SK_TCP_PASSIVE: |
4f22c981 | 1104 | fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP); |
b5d9ee5c MM |
1105 | break; |
1106 | case SK_UDP: | |
4f22c981 | 1107 | fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP); |
b5d9ee5c MM |
1108 | break; |
1109 | case SK_IP: | |
4f22c981 | 1110 | fd = socket(BIRD_PF, SOCK_RAW, s->dport); |
b5d9ee5c | 1111 | break; |
b4b3b39e MM |
1112 | case SK_MAGIC: |
1113 | fd = s->fd; | |
1114 | break; | |
b5d9ee5c | 1115 | default: |
b4b3b39e | 1116 | bug("sk_open() called for invalid sock type %d", type); |
b5d9ee5c MM |
1117 | } |
1118 | if (fd < 0) | |
1119 | die("sk_open: socket: %m"); | |
1120 | s->fd = fd; | |
1121 | ||
1122 | if (err = sk_setup(s)) | |
1123 | goto bad; | |
38a608c5 | 1124 | |
b5d9ee5c MM |
1125 | if (has_src) |
1126 | { | |
1127 | int port; | |
1128 | ||
f9c799a0 | 1129 | if (type == SK_IP) |
b5d9ee5c MM |
1130 | port = 0; |
1131 | else | |
1132 | { | |
1133 | port = s->sport; | |
1134 | if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) | |
1135 | ERR("SO_REUSEADDR"); | |
1136 | } | |
1137 | fill_in_sockaddr(&sa, s->saddr, port); | |
061ab802 | 1138 | fill_in_sockifa(&sa, s->iface); |
b5d9ee5c MM |
1139 | if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) |
1140 | ERR("bind"); | |
1141 | } | |
1142 | fill_in_sockaddr(&sa, s->daddr, s->dport); | |
d51aa281 OZ |
1143 | |
1144 | if (s->password) | |
1145 | { | |
1146 | int rv = sk_set_md5_auth_int(s, &sa, s->password); | |
1147 | if (rv < 0) | |
1148 | goto bad_no_log; | |
1149 | } | |
1150 | ||
b5d9ee5c MM |
1151 | switch (type) |
1152 | { | |
1153 | case SK_TCP_ACTIVE: | |
1154 | if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0) | |
1155 | sk_tcp_connected(s); | |
9cbf43eb | 1156 | else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && |
f428631c | 1157 | errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH) |
b5d9ee5c MM |
1158 | ERR("connect"); |
1159 | break; | |
1160 | case SK_TCP_PASSIVE: | |
1161 | if (listen(fd, 8)) | |
1162 | ERR("listen"); | |
1163 | break; | |
4f22c981 MM |
1164 | case SK_MAGIC: |
1165 | break; | |
1166 | default: | |
320f4173 | 1167 | sk_alloc_bufs(s); |
4f22c981 MM |
1168 | #ifdef IPV6 |
1169 | #ifdef IPV6_MTU_DISCOVER | |
1170 | { | |
1171 | int dont = IPV6_PMTUDISC_DONT; | |
1172 | if (setsockopt(fd, SOL_IPV6, IPV6_MTU_DISCOVER, &dont, sizeof(dont)) < 0) | |
1173 | ERR("IPV6_MTU_DISCOVER"); | |
1174 | } | |
1175 | #endif | |
1176 | #else | |
1177 | #ifdef IP_PMTUDISC | |
1178 | { | |
1179 | int dont = IP_PMTUDISC_DONT; | |
1180 | if (setsockopt(fd, SOL_IP, IP_PMTUDISC, &dont, sizeof(dont)) < 0) | |
1181 | ERR("IP_PMTUDISC"); | |
1182 | } | |
1183 | #endif | |
1184 | #endif | |
b5d9ee5c MM |
1185 | } |
1186 | ||
38a608c5 | 1187 | sk_insert(s); |
b5d9ee5c MM |
1188 | return 0; |
1189 | ||
1190 | bad: | |
1191 | log(L_ERR "sk_open: %s: %m", err); | |
d51aa281 | 1192 | bad_no_log: |
b5d9ee5c MM |
1193 | close(fd); |
1194 | s->fd = -1; | |
1195 | return -1; | |
1196 | } | |
1197 | ||
97e46d28 | 1198 | void |
b93abffa MM |
1199 | sk_open_unix(sock *s, char *name) |
1200 | { | |
1201 | int fd; | |
1202 | struct sockaddr_un sa; | |
1203 | char *err; | |
1204 | ||
1205 | fd = socket(AF_UNIX, SOCK_STREAM, 0); | |
1206 | if (fd < 0) | |
97e46d28 | 1207 | ERR("socket"); |
b93abffa MM |
1208 | s->fd = fd; |
1209 | if (err = sk_setup(s)) | |
1210 | goto bad; | |
1211 | unlink(name); | |
68fa95cf | 1212 | |
97e46d28 | 1213 | /* Path length checked in test_old_bird() */ |
b93abffa | 1214 | sa.sun_family = AF_UNIX; |
97c6fa02 | 1215 | strcpy(sa.sun_path, name); |
0b3bf4b1 | 1216 | if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) |
b93abffa MM |
1217 | ERR("bind"); |
1218 | if (listen(fd, 8)) | |
1219 | ERR("listen"); | |
38a608c5 | 1220 | sk_insert(s); |
97e46d28 | 1221 | return; |
b93abffa | 1222 | |
97e46d28 | 1223 | bad: |
b93abffa | 1224 | log(L_ERR "sk_open_unix: %s: %m", err); |
97e46d28 | 1225 | die("Unable to create control socket %s", name); |
b93abffa MM |
1226 | } |
1227 | ||
353729f5 OZ |
1228 | static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; } |
1229 | ||
b5d9ee5c MM |
1230 | static int |
1231 | sk_maybe_write(sock *s) | |
1232 | { | |
1233 | int e; | |
1234 | ||
1235 | switch (s->type) | |
1236 | { | |
1237 | case SK_TCP: | |
b4b3b39e | 1238 | case SK_MAGIC: |
b93abffa | 1239 | case SK_UNIX: |
b5d9ee5c MM |
1240 | while (s->ttx != s->tpos) |
1241 | { | |
1242 | e = write(s->fd, s->ttx, s->tpos - s->ttx); | |
1243 | if (e < 0) | |
1244 | { | |
1245 | if (errno != EINTR && errno != EAGAIN) | |
1246 | { | |
353729f5 | 1247 | reset_tx_buffer(s); |
47597724 OZ |
1248 | /* EPIPE is just a connection close notification during TX */ |
1249 | s->err_hook(s, (errno != EPIPE) ? errno : 0); | |
b5d9ee5c MM |
1250 | return -1; |
1251 | } | |
1252 | return 0; | |
1253 | } | |
1254 | s->ttx += e; | |
1255 | } | |
353729f5 | 1256 | reset_tx_buffer(s); |
b5d9ee5c MM |
1257 | return 1; |
1258 | case SK_UDP: | |
b5d9ee5c | 1259 | case SK_IP: |
b5d9ee5c | 1260 | { |
b5d9ee5c MM |
1261 | if (s->tbuf == s->tpos) |
1262 | return 1; | |
b1a1faba | 1263 | |
353729f5 OZ |
1264 | sockaddr sa; |
1265 | fill_in_sockaddr(&sa, s->daddr, s->dport); | |
061ab802 | 1266 | fill_in_sockifa(&sa, s->iface); |
353729f5 OZ |
1267 | |
1268 | struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; | |
646b24d9 | 1269 | // byte cmsg_buf[CMSG_TX_SPACE]; |
353729f5 OZ |
1270 | |
1271 | struct msghdr msg = { | |
1272 | .msg_name = &sa, | |
1273 | .msg_namelen = sizeof(sa), | |
1274 | .msg_iov = &iov, | |
bed41728 | 1275 | .msg_iovlen = 1}; |
353729f5 | 1276 | |
646b24d9 | 1277 | // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); |
353729f5 OZ |
1278 | e = sendmsg(s->fd, &msg, 0); |
1279 | ||
b5d9ee5c MM |
1280 | if (e < 0) |
1281 | { | |
1282 | if (errno != EINTR && errno != EAGAIN) | |
1283 | { | |
353729f5 | 1284 | reset_tx_buffer(s); |
c025b852 | 1285 | s->err_hook(s, errno); |
b5d9ee5c MM |
1286 | return -1; |
1287 | } | |
1288 | return 0; | |
1289 | } | |
353729f5 | 1290 | reset_tx_buffer(s); |
b5d9ee5c MM |
1291 | return 1; |
1292 | } | |
1293 | default: | |
08c69a77 | 1294 | bug("sk_maybe_write: unknown socket type %d", s->type); |
b5d9ee5c MM |
1295 | } |
1296 | } | |
1297 | ||
ea89da38 OZ |
1298 | int |
1299 | sk_rx_ready(sock *s) | |
1300 | { | |
1301 | fd_set rd, wr; | |
1302 | struct timeval timo; | |
1303 | int rv; | |
1304 | ||
1305 | FD_ZERO(&rd); | |
1306 | FD_ZERO(&wr); | |
1307 | FD_SET(s->fd, &rd); | |
1308 | ||
1309 | timo.tv_sec = 0; | |
1310 | timo.tv_usec = 0; | |
1311 | ||
1312 | redo: | |
1313 | rv = select(s->fd+1, &rd, &wr, NULL, &timo); | |
1314 | ||
1315 | if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) | |
1316 | goto redo; | |
1317 | ||
1318 | return rv; | |
1319 | } | |
1320 | ||
525fa2c1 MM |
1321 | /** |
1322 | * sk_send - send data to a socket | |
1323 | * @s: socket | |
1324 | * @len: number of bytes to send | |
1325 | * | |
1326 | * This function sends @len bytes of data prepared in the | |
1327 | * transmit buffer of the socket @s to the network connection. | |
1328 | * If the packet can be sent immediately, it does so and returns | |
1329 | * 1, else it queues the packet for later processing, returns 0 | |
1330 | * and calls the @tx_hook of the socket when the tranmission | |
1331 | * takes place. | |
1332 | */ | |
b5d9ee5c MM |
1333 | int |
1334 | sk_send(sock *s, unsigned len) | |
1335 | { | |
b5d9ee5c MM |
1336 | s->ttx = s->tbuf; |
1337 | s->tpos = s->tbuf + len; | |
1338 | return sk_maybe_write(s); | |
1339 | } | |
1340 | ||
525fa2c1 MM |
1341 | /** |
1342 | * sk_send_to - send data to a specific destination | |
1343 | * @s: socket | |
1344 | * @len: number of bytes to send | |
1345 | * @addr: IP address to send the packet to | |
1346 | * @port: port to send the packet to | |
1347 | * | |
2e9b2421 | 1348 | * This is a sk_send() replacement for connection-less packet sockets |
525fa2c1 MM |
1349 | * which allows destination of the packet to be chosen dynamically. |
1350 | */ | |
b5d9ee5c MM |
1351 | int |
1352 | sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) | |
1353 | { | |
353729f5 OZ |
1354 | s->daddr = addr; |
1355 | s->dport = port; | |
b5d9ee5c MM |
1356 | s->ttx = s->tbuf; |
1357 | s->tpos = s->tbuf + len; | |
1358 | return sk_maybe_write(s); | |
1359 | } | |
1360 | ||
353729f5 OZ |
1361 | /* |
1362 | int | |
1363 | sk_send_full(sock *s, unsigned len, struct iface *ifa, | |
1364 | ip_addr saddr, ip_addr daddr, unsigned dport) | |
1365 | { | |
1366 | s->iface = ifa; | |
1367 | s->saddr = saddr; | |
1368 | s->daddr = daddr; | |
1369 | s->dport = dport; | |
1370 | s->ttx = s->tbuf; | |
1371 | s->tpos = s->tbuf + len; | |
1372 | return sk_maybe_write(s); | |
1373 | } | |
1374 | */ | |
1375 | ||
b5d9ee5c MM |
1376 | static int |
1377 | sk_read(sock *s) | |
1378 | { | |
1379 | switch (s->type) | |
1380 | { | |
b5d9ee5c MM |
1381 | case SK_TCP_PASSIVE: |
1382 | { | |
4f22c981 | 1383 | sockaddr sa; |
b93abffa MM |
1384 | return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_TCP); |
1385 | } | |
1386 | case SK_UNIX_PASSIVE: | |
1387 | { | |
1388 | struct sockaddr_un sa; | |
1389 | return sk_passive_connected(s, (struct sockaddr *) &sa, sizeof(sa), SK_UNIX); | |
b5d9ee5c MM |
1390 | } |
1391 | case SK_TCP: | |
b93abffa | 1392 | case SK_UNIX: |
b5d9ee5c MM |
1393 | { |
1394 | int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos); | |
1395 | ||
1396 | if (c < 0) | |
1397 | { | |
1398 | if (errno != EINTR && errno != EAGAIN) | |
c025b852 | 1399 | s->err_hook(s, errno); |
b5d9ee5c MM |
1400 | } |
1401 | else if (!c) | |
c025b852 | 1402 | s->err_hook(s, 0); |
b5d9ee5c MM |
1403 | else |
1404 | { | |
1405 | s->rpos += c; | |
1406 | if (s->rx_hook(s, s->rpos - s->rbuf)) | |
38a608c5 MM |
1407 | { |
1408 | /* We need to be careful since the socket could have been deleted by the hook */ | |
1409 | if (current_sock == s) | |
1410 | s->rpos = s->rbuf; | |
1411 | } | |
b5d9ee5c MM |
1412 | return 1; |
1413 | } | |
1414 | return 0; | |
1415 | } | |
b4b3b39e MM |
1416 | case SK_MAGIC: |
1417 | return s->rx_hook(s, 0); | |
b5d9ee5c MM |
1418 | default: |
1419 | { | |
4f22c981 | 1420 | sockaddr sa; |
353729f5 OZ |
1421 | int e; |
1422 | ||
1423 | struct iovec iov = {s->rbuf, s->rbsize}; | |
1424 | byte cmsg_buf[CMSG_RX_SPACE]; | |
1425 | ||
1426 | struct msghdr msg = { | |
1427 | .msg_name = &sa, | |
1428 | .msg_namelen = sizeof(sa), | |
1429 | .msg_iov = &iov, | |
1430 | .msg_iovlen = 1, | |
1431 | .msg_control = cmsg_buf, | |
1432 | .msg_controllen = sizeof(cmsg_buf), | |
1433 | .msg_flags = 0}; | |
1434 | ||
1435 | e = recvmsg(s->fd, &msg, 0); | |
b5d9ee5c MM |
1436 | |
1437 | if (e < 0) | |
1438 | { | |
1439 | if (errno != EINTR && errno != EAGAIN) | |
c025b852 | 1440 | s->err_hook(s, errno); |
b5d9ee5c MM |
1441 | return 0; |
1442 | } | |
1443 | s->rpos = s->rbuf + e; | |
b1a1faba | 1444 | get_sockaddr(&sa, &s->faddr, &s->fport, 1); |
353729f5 OZ |
1445 | sysio_process_rx_cmsgs(s, &msg); |
1446 | ||
b5d9ee5c MM |
1447 | s->rx_hook(s, e); |
1448 | return 1; | |
1449 | } | |
1450 | } | |
1451 | } | |
1452 | ||
38a608c5 | 1453 | static int |
b5d9ee5c MM |
1454 | sk_write(sock *s) |
1455 | { | |
320f4173 MM |
1456 | switch (s->type) |
1457 | { | |
1458 | case SK_TCP_ACTIVE: | |
1459 | { | |
1460 | sockaddr sa; | |
1461 | fill_in_sockaddr(&sa, s->daddr, s->dport); | |
09e4117c | 1462 | if (connect(s->fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0 || errno == EISCONN) |
320f4173 MM |
1463 | sk_tcp_connected(s); |
1464 | else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS) | |
c025b852 | 1465 | s->err_hook(s, errno); |
38a608c5 | 1466 | return 0; |
320f4173 | 1467 | } |
320f4173 | 1468 | default: |
38a608c5 MM |
1469 | if (s->ttx != s->tpos && sk_maybe_write(s) > 0) |
1470 | { | |
1471 | s->tx_hook(s); | |
1472 | return 1; | |
1473 | } | |
1474 | return 0; | |
320f4173 | 1475 | } |
b5d9ee5c MM |
1476 | } |
1477 | ||
1478 | void | |
1479 | sk_dump_all(void) | |
1480 | { | |
1481 | node *n; | |
1482 | sock *s; | |
1483 | ||
1484 | debug("Open sockets:\n"); | |
1485 | WALK_LIST(n, sock_list) | |
1486 | { | |
1487 | s = SKIP_BACK(sock, n, n); | |
1488 | debug("%p ", s); | |
1489 | sk_dump(&s->r); | |
1490 | } | |
1491 | debug("\n"); | |
1492 | } | |
1493 | ||
1494 | #undef ERR | |
f782b72c | 1495 | #undef WARN |
b5d9ee5c MM |
1496 | |
1497 | /* | |
1498 | * Main I/O Loop | |
1499 | */ | |
1500 | ||
4c9dd1e4 MM |
1501 | volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */ |
1502 | volatile int async_dump_flag; | |
1503 | ||
b5d9ee5c MM |
1504 | void |
1505 | io_init(void) | |
1506 | { | |
1507 | init_list(&near_timers); | |
1508 | init_list(&far_timers); | |
1509 | init_list(&sock_list); | |
e8f73195 | 1510 | init_list(&global_event_list); |
7e5f5ffd | 1511 | krt_io_init(); |
fd91ae33 OZ |
1512 | init_times(); |
1513 | update_times(); | |
1514 | srandom((int) now_real); | |
b5d9ee5c MM |
1515 | } |
1516 | ||
ea89da38 OZ |
1517 | static int short_loops = 0; |
1518 | #define SHORT_LOOP_MAX 10 | |
1519 | ||
b5d9ee5c MM |
1520 | void |
1521 | io_loop(void) | |
1522 | { | |
1523 | fd_set rd, wr; | |
1524 | struct timeval timo; | |
1525 | time_t tout; | |
30770df2 | 1526 | int hi, events; |
b5d9ee5c | 1527 | sock *s; |
38a608c5 | 1528 | node *n; |
b5d9ee5c | 1529 | |
38a608c5 | 1530 | sock_recalc_fdsets_p = 1; |
b5d9ee5c MM |
1531 | for(;;) |
1532 | { | |
30770df2 | 1533 | events = ev_run_list(&global_event_list); |
fd91ae33 | 1534 | update_times(); |
b5d9ee5c MM |
1535 | tout = tm_first_shot(); |
1536 | if (tout <= now) | |
1537 | { | |
1538 | tm_shot(); | |
1539 | continue; | |
1540 | } | |
30770df2 MM |
1541 | timo.tv_sec = events ? 0 : tout - now; |
1542 | timo.tv_usec = 0; | |
b5d9ee5c | 1543 | |
38a608c5 MM |
1544 | if (sock_recalc_fdsets_p) |
1545 | { | |
1546 | sock_recalc_fdsets_p = 0; | |
1547 | FD_ZERO(&rd); | |
1548 | FD_ZERO(&wr); | |
1549 | } | |
1550 | ||
b5d9ee5c MM |
1551 | hi = 0; |
1552 | WALK_LIST(n, sock_list) | |
1553 | { | |
1554 | s = SKIP_BACK(sock, n, n); | |
1555 | if (s->rx_hook) | |
1556 | { | |
1557 | FD_SET(s->fd, &rd); | |
1558 | if (s->fd > hi) | |
1559 | hi = s->fd; | |
1560 | } | |
38a608c5 MM |
1561 | else |
1562 | FD_CLR(s->fd, &rd); | |
b5d9ee5c MM |
1563 | if (s->tx_hook && s->ttx != s->tpos) |
1564 | { | |
1565 | FD_SET(s->fd, &wr); | |
1566 | if (s->fd > hi) | |
1567 | hi = s->fd; | |
1568 | } | |
38a608c5 MM |
1569 | else |
1570 | FD_CLR(s->fd, &wr); | |
b5d9ee5c MM |
1571 | } |
1572 | ||
4c9dd1e4 MM |
1573 | /* |
1574 | * Yes, this is racy. But even if the signal comes before this test | |
1575 | * and entering select(), it gets caught on the next timer tick. | |
1576 | */ | |
1577 | ||
1578 | if (async_config_flag) | |
1579 | { | |
1580 | async_config(); | |
1581 | async_config_flag = 0; | |
f4aabcee | 1582 | continue; |
4c9dd1e4 MM |
1583 | } |
1584 | if (async_dump_flag) | |
1585 | { | |
1586 | async_dump(); | |
1587 | async_dump_flag = 0; | |
f4aabcee MM |
1588 | continue; |
1589 | } | |
1590 | if (async_shutdown_flag) | |
1591 | { | |
1592 | async_shutdown(); | |
1593 | async_shutdown_flag = 0; | |
1594 | continue; | |
4c9dd1e4 MM |
1595 | } |
1596 | ||
1597 | /* And finally enter select() to find active sockets */ | |
b5d9ee5c | 1598 | hi = select(hi+1, &rd, &wr, NULL, &timo); |
ea89da38 | 1599 | |
b5d9ee5c MM |
1600 | if (hi < 0) |
1601 | { | |
1602 | if (errno == EINTR || errno == EAGAIN) | |
1603 | continue; | |
1604 | die("select: %m"); | |
1605 | } | |
1606 | if (hi) | |
1607 | { | |
ea89da38 OZ |
1608 | /* guaranteed to be non-empty */ |
1609 | current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); | |
1610 | ||
38a608c5 | 1611 | while (current_sock) |
b5d9ee5c | 1612 | { |
38a608c5 MM |
1613 | sock *s = current_sock; |
1614 | int e; | |
ea89da38 OZ |
1615 | int steps; |
1616 | ||
1617 | steps = MAX_STEPS; | |
1618 | if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) | |
38a608c5 MM |
1619 | do |
1620 | { | |
4323099d | 1621 | steps--; |
38a608c5 MM |
1622 | e = sk_read(s); |
1623 | if (s != current_sock) | |
1624 | goto next; | |
1625 | } | |
4323099d OZ |
1626 | while (e && s->rx_hook && steps); |
1627 | ||
1628 | steps = MAX_STEPS; | |
38a608c5 MM |
1629 | if (FD_ISSET(s->fd, &wr)) |
1630 | do | |
1631 | { | |
4323099d | 1632 | steps--; |
38a608c5 MM |
1633 | e = sk_write(s); |
1634 | if (s != current_sock) | |
1635 | goto next; | |
1636 | } | |
4323099d | 1637 | while (e && steps); |
38a608c5 MM |
1638 | current_sock = sk_next(s); |
1639 | next: ; | |
b5d9ee5c | 1640 | } |
ea89da38 OZ |
1641 | |
1642 | short_loops++; | |
1643 | if (events && (short_loops < SHORT_LOOP_MAX)) | |
1644 | continue; | |
1645 | short_loops = 0; | |
1646 | ||
1647 | int count = 0; | |
1648 | current_sock = stored_sock; | |
1649 | if (current_sock == NULL) | |
1650 | current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); | |
1651 | ||
1652 | while (current_sock && count < MAX_RX_STEPS) | |
1653 | { | |
1654 | sock *s = current_sock; | |
1655 | int e; | |
ea89da38 OZ |
1656 | |
1657 | if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) | |
1658 | { | |
1659 | count++; | |
1660 | e = sk_read(s); | |
1661 | if (s != current_sock) | |
1662 | goto next2; | |
1663 | } | |
1664 | current_sock = sk_next(s); | |
1665 | next2: ; | |
1666 | } | |
1667 | ||
1668 | stored_sock = current_sock; | |
b5d9ee5c MM |
1669 | } |
1670 | } | |
1671 | } | |
41c8976e OF |
1672 | |
1673 | void | |
1674 | test_old_bird(char *path) | |
1675 | { | |
1676 | int fd; | |
1677 | struct sockaddr_un sa; | |
1678 | ||
1679 | fd = socket(AF_UNIX, SOCK_STREAM, 0); | |
41c8976e OF |
1680 | if (fd < 0) |
1681 | die("Cannot create socket: %m"); | |
97e46d28 OZ |
1682 | if (strlen(path) >= sizeof(sa.sun_path)) |
1683 | die("Socket path too long"); | |
41c8976e OF |
1684 | bzero(&sa, sizeof(sa)); |
1685 | sa.sun_family = AF_UNIX; | |
1686 | strcpy(sa.sun_path, path); | |
1687 | if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0) | |
1688 | die("I found another BIRD running."); | |
1689 | close(fd); | |
1690 | } | |
1691 | ||
1692 |