]>
Commit | Line | Data |
---|---|---|
b5d9ee5c MM |
1 | /* |
2 | * BIRD Internet Routing Daemon -- Unix I/O | |
3 | * | |
38a608c5 | 4 | * (c) 1998--2004 Martin Mares <mj@ucw.cz> |
b1a1faba | 5 | * (c) 2004 Ondrej Filip <feela@network.cz> |
b5d9ee5c MM |
6 | * |
7 | * Can be freely distributed and used under the terms of the GNU GPL. | |
8 | */ | |
9 | ||
607d9914 OZ |
10 | /* Unfortunately, some glibc versions hide parts of RFC 3542 API |
11 | if _GNU_SOURCE is not defined. */ | |
12 | #define _GNU_SOURCE 1 | |
13 | ||
b5d9ee5c MM |
14 | #include <stdio.h> |
15 | #include <stdlib.h> | |
01b776e1 | 16 | #include <time.h> |
b5d9ee5c MM |
17 | #include <sys/time.h> |
18 | #include <sys/types.h> | |
19 | #include <sys/socket.h> | |
46a82e9c | 20 | #include <sys/uio.h> |
b93abffa | 21 | #include <sys/un.h> |
b5d9ee5c | 22 | #include <unistd.h> |
a0b176e3 | 23 | #include <fcntl.h> |
b5d9ee5c | 24 | #include <errno.h> |
05476c4d | 25 | #include <net/if.h> |
d0e9b36d | 26 | #include <netinet/in.h> |
48e5f32d OZ |
27 | #include <netinet/tcp.h> |
28 | #include <netinet/udp.h> | |
93e868c7 | 29 | #include <netinet/icmp6.h> |
b5d9ee5c MM |
30 | |
31 | #include "nest/bird.h" | |
32 | #include "lib/lists.h" | |
33 | #include "lib/resource.h" | |
34 | #include "lib/timer.h" | |
35 | #include "lib/socket.h" | |
e8f73195 | 36 | #include "lib/event.h" |
afa8937a | 37 | #include "lib/string.h" |
b5d9ee5c MM |
38 | #include "nest/iface.h" |
39 | ||
40 | #include "lib/unix.h" | |
a2867cd9 | 41 | #include "lib/sysio.h" |
b5d9ee5c | 42 | |
ea89da38 | 43 | /* Maximum number of calls of tx handler for one socket in one |
4323099d OZ |
44 | * select iteration. Should be small enough to not monopolize CPU by |
45 | * one protocol instance. | |
46 | */ | |
47 | #define MAX_STEPS 4 | |
48 | ||
ea89da38 OZ |
49 | /* Maximum number of calls of rx handler for all sockets in one select |
50 | iteration. RX callbacks are often much more costly so we limit | |
51 | this to gen small latencies */ | |
52 | #define MAX_RX_STEPS 4 | |
53 | ||
a9c986f9 MM |
54 | /* |
55 | * Tracked Files | |
56 | */ | |
57 | ||
58 | struct rfile { | |
59 | resource r; | |
60 | FILE *f; | |
61 | }; | |
62 | ||
63 | static void | |
64 | rf_free(resource *r) | |
65 | { | |
66 | struct rfile *a = (struct rfile *) r; | |
67 | ||
68 | fclose(a->f); | |
69 | } | |
70 | ||
71 | static void | |
72 | rf_dump(resource *r) | |
73 | { | |
74 | struct rfile *a = (struct rfile *) r; | |
75 | ||
76 | debug("(FILE *%p)\n", a->f); | |
77 | } | |
78 | ||
79 | static struct resclass rf_class = { | |
80 | "FILE", | |
81 | sizeof(struct rfile), | |
82 | rf_free, | |
e81b440f | 83 | rf_dump, |
acb60628 | 84 | NULL, |
e81b440f | 85 | NULL |
a9c986f9 MM |
86 | }; |
87 | ||
88 | void * | |
f78056fb | 89 | tracked_fopen(pool *p, char *name, char *mode) |
a9c986f9 MM |
90 | { |
91 | FILE *f = fopen(name, mode); | |
92 | ||
93 | if (f) | |
94 | { | |
95 | struct rfile *r = ralloc(p, &rf_class); | |
96 | r->f = f; | |
97 | } | |
98 | return f; | |
99 | } | |
100 | ||
525fa2c1 MM |
101 | /** |
102 | * DOC: Timers | |
103 | * | |
104 | * Timers are resources which represent a wish of a module to call | |
105 | * a function at the specified time. The platform dependent code | |
58f7d004 | 106 | * doesn't guarantee exact timing, only that a timer function |
525fa2c1 MM |
107 | * won't be called before the requested time. |
108 | * | |
fd91ae33 OZ |
109 | * In BIRD, time is represented by values of the &bird_clock_t type |
110 | * which are integral numbers interpreted as a relative number of seconds since | |
111 | * some fixed time point in past. The current time can be read | |
112 | * from variable @now with reasonable accuracy and is monotonic. There is also | |
113 | * a current 'absolute' time in variable @now_real reported by OS. | |
525fa2c1 MM |
114 | * |
115 | * Each timer is described by a &timer structure containing a pointer | |
116 | * to the handler function (@hook), data private to this function (@data), | |
117 | * time the function should be called at (@expires, 0 for inactive timers), | |
118 | * for the other fields see |timer.h|. | |
b5d9ee5c MM |
119 | */ |
120 | ||
121 | #define NEAR_TIMER_LIMIT 4 | |
122 | ||
b5d9ee5c MM |
123 | static list near_timers, far_timers; |
124 | static bird_clock_t first_far_timer = TIME_INFINITY; | |
125 | ||
002b6423 | 126 | /* now must be different from 0, because 0 is a special value in timer->expires */ |
a92cf57d | 127 | bird_clock_t now = 1, now_real, boot_time; |
fd91ae33 OZ |
128 | |
129 | static void | |
130 | update_times_plain(void) | |
131 | { | |
132 | bird_clock_t new_time = time(NULL); | |
133 | int delta = new_time - now_real; | |
134 | ||
135 | if ((delta >= 0) && (delta < 60)) | |
136 | now += delta; | |
137 | else if (now_real != 0) | |
138 | log(L_WARN "Time jump, delta %d s", delta); | |
139 | ||
140 | now_real = new_time; | |
141 | } | |
142 | ||
143 | static void | |
144 | update_times_gettime(void) | |
145 | { | |
146 | struct timespec ts; | |
147 | int rv; | |
148 | ||
149 | rv = clock_gettime(CLOCK_MONOTONIC, &ts); | |
150 | if (rv != 0) | |
151 | die("clock_gettime: %m"); | |
152 | ||
153 | if (ts.tv_sec != now) { | |
154 | if (ts.tv_sec < now) | |
155 | log(L_ERR "Monotonic timer is broken"); | |
156 | ||
157 | now = ts.tv_sec; | |
158 | now_real = time(NULL); | |
159 | } | |
160 | } | |
161 | ||
162 | static int clock_monotonic_available; | |
163 | ||
164 | static inline void | |
165 | update_times(void) | |
166 | { | |
167 | if (clock_monotonic_available) | |
168 | update_times_gettime(); | |
169 | else | |
170 | update_times_plain(); | |
171 | } | |
172 | ||
173 | static inline void | |
174 | init_times(void) | |
175 | { | |
176 | struct timespec ts; | |
177 | clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0); | |
178 | if (!clock_monotonic_available) | |
179 | log(L_WARN "Monotonic timer is missing"); | |
180 | } | |
181 | ||
b5d9ee5c MM |
182 | |
183 | static void | |
184 | tm_free(resource *r) | |
185 | { | |
186 | timer *t = (timer *) r; | |
187 | ||
188 | tm_stop(t); | |
189 | } | |
190 | ||
191 | static void | |
192 | tm_dump(resource *r) | |
193 | { | |
194 | timer *t = (timer *) r; | |
195 | ||
e8f73195 | 196 | debug("(code %p, data %p, ", t->hook, t->data); |
af847acc MM |
197 | if (t->randomize) |
198 | debug("rand %d, ", t->randomize); | |
199 | if (t->recurrent) | |
200 | debug("recur %d, ", t->recurrent); | |
b5d9ee5c MM |
201 | if (t->expires) |
202 | debug("expires in %d sec)\n", t->expires - now); | |
203 | else | |
204 | debug("inactive)\n"); | |
205 | } | |
206 | ||
207 | static struct resclass tm_class = { | |
208 | "Timer", | |
209 | sizeof(timer), | |
210 | tm_free, | |
e81b440f | 211 | tm_dump, |
acb60628 | 212 | NULL, |
e81b440f | 213 | NULL |
b5d9ee5c MM |
214 | }; |
215 | ||
525fa2c1 MM |
216 | /** |
217 | * tm_new - create a timer | |
218 | * @p: pool | |
219 | * | |
220 | * This function creates a new timer resource and returns | |
221 | * a pointer to it. To use the timer, you need to fill in | |
222 | * the structure fields and call tm_start() to start timing. | |
223 | */ | |
b5d9ee5c MM |
224 | timer * |
225 | tm_new(pool *p) | |
226 | { | |
227 | timer *t = ralloc(p, &tm_class); | |
b5d9ee5c MM |
228 | return t; |
229 | } | |
230 | ||
231 | static inline void | |
232 | tm_insert_near(timer *t) | |
233 | { | |
234 | node *n = HEAD(near_timers); | |
235 | ||
236 | while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires)) | |
237 | n = n->next; | |
238 | insert_node(&t->n, n->prev); | |
239 | } | |
240 | ||
525fa2c1 MM |
241 | /** |
242 | * tm_start - start a timer | |
243 | * @t: timer | |
244 | * @after: number of seconds the timer should be run after | |
245 | * | |
246 | * This function schedules the hook function of the timer to | |
247 | * be called after @after seconds. If the timer has been already | |
248 | * started, it's @expire time is replaced by the new value. | |
249 | * | |
250 | * You can have set the @randomize field of @t, the timeout | |
251 | * will be increased by a random number of seconds chosen | |
252 | * uniformly from range 0 .. @randomize. | |
253 | * | |
254 | * You can call tm_start() from the handler function of the timer | |
255 | * to request another run of the timer. Also, you can set the @recurrent | |
256 | * field to have the timer re-added automatically with the same timeout. | |
257 | */ | |
b5d9ee5c MM |
258 | void |
259 | tm_start(timer *t, unsigned after) | |
260 | { | |
261 | bird_clock_t when; | |
262 | ||
263 | if (t->randomize) | |
af847acc | 264 | after += random() % (t->randomize + 1); |
b5d9ee5c MM |
265 | when = now + after; |
266 | if (t->expires == when) | |
267 | return; | |
268 | if (t->expires) | |
269 | rem_node(&t->n); | |
270 | t->expires = when; | |
271 | if (after <= NEAR_TIMER_LIMIT) | |
272 | tm_insert_near(t); | |
273 | else | |
274 | { | |
275 | if (!first_far_timer || first_far_timer > when) | |
276 | first_far_timer = when; | |
277 | add_tail(&far_timers, &t->n); | |
278 | } | |
279 | } | |
280 | ||
525fa2c1 MM |
281 | /** |
282 | * tm_stop - stop a timer | |
283 | * @t: timer | |
284 | * | |
285 | * This function stops a timer. If the timer is already stopped, | |
286 | * nothing happens. | |
287 | */ | |
b5d9ee5c MM |
288 | void |
289 | tm_stop(timer *t) | |
290 | { | |
291 | if (t->expires) | |
292 | { | |
293 | rem_node(&t->n); | |
294 | t->expires = 0; | |
295 | } | |
296 | } | |
297 | ||
298 | static void | |
299 | tm_dump_them(char *name, list *l) | |
300 | { | |
301 | node *n; | |
302 | timer *t; | |
303 | ||
304 | debug("%s timers:\n", name); | |
305 | WALK_LIST(n, *l) | |
306 | { | |
307 | t = SKIP_BACK(timer, n, n); | |
308 | debug("%p ", t); | |
309 | tm_dump(&t->r); | |
310 | } | |
311 | debug("\n"); | |
312 | } | |
313 | ||
314 | void | |
315 | tm_dump_all(void) | |
316 | { | |
317 | tm_dump_them("Near", &near_timers); | |
318 | tm_dump_them("Far", &far_timers); | |
319 | } | |
320 | ||
321 | static inline time_t | |
322 | tm_first_shot(void) | |
323 | { | |
324 | time_t x = first_far_timer; | |
325 | ||
326 | if (!EMPTY_LIST(near_timers)) | |
327 | { | |
328 | timer *t = SKIP_BACK(timer, n, HEAD(near_timers)); | |
329 | if (t->expires < x) | |
330 | x = t->expires; | |
331 | } | |
332 | return x; | |
333 | } | |
334 | ||
335 | static void | |
336 | tm_shot(void) | |
337 | { | |
338 | timer *t; | |
339 | node *n, *m; | |
340 | ||
341 | if (first_far_timer <= now) | |
342 | { | |
28a9a189 | 343 | bird_clock_t limit = now + NEAR_TIMER_LIMIT; |
b5d9ee5c MM |
344 | first_far_timer = TIME_INFINITY; |
345 | n = HEAD(far_timers); | |
346 | while (m = n->next) | |
347 | { | |
348 | t = SKIP_BACK(timer, n, n); | |
349 | if (t->expires <= limit) | |
350 | { | |
351 | rem_node(n); | |
352 | tm_insert_near(t); | |
353 | } | |
354 | else if (t->expires < first_far_timer) | |
355 | first_far_timer = t->expires; | |
356 | n = m; | |
357 | } | |
358 | } | |
359 | while ((n = HEAD(near_timers)) -> next) | |
360 | { | |
af847acc | 361 | int delay; |
b5d9ee5c MM |
362 | t = SKIP_BACK(timer, n, n); |
363 | if (t->expires > now) | |
364 | break; | |
365 | rem_node(n); | |
af847acc | 366 | delay = t->expires - now; |
b5d9ee5c | 367 | t->expires = 0; |
af847acc MM |
368 | if (t->recurrent) |
369 | { | |
370 | int i = t->recurrent - delay; | |
371 | if (i < 0) | |
372 | i = 0; | |
373 | tm_start(t, i); | |
374 | } | |
b5d9ee5c MM |
375 | t->hook(t); |
376 | } | |
377 | } | |
378 | ||
0d3effcf OF |
379 | /** |
380 | * tm_parse_datetime - parse a date and time | |
381 | * @x: datetime string | |
382 | * | |
383 | * tm_parse_datetime() takes a textual representation of | |
384 | * a date and time (dd-mm-yyyy hh:mm:ss) | |
385 | * and converts it to the corresponding value of type &bird_clock_t. | |
386 | */ | |
387 | bird_clock_t | |
388 | tm_parse_datetime(char *x) | |
389 | { | |
390 | struct tm tm; | |
391 | int n; | |
392 | time_t t; | |
393 | ||
394 | if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n]) | |
395 | return tm_parse_date(x); | |
396 | tm.tm_mon--; | |
397 | tm.tm_year -= 1900; | |
398 | t = mktime(&tm); | |
399 | if (t == (time_t) -1) | |
400 | return 0; | |
401 | return t; | |
402 | } | |
525fa2c1 MM |
403 | /** |
404 | * tm_parse_date - parse a date | |
405 | * @x: date string | |
406 | * | |
407 | * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy) | |
408 | * and converts it to the corresponding value of type &bird_clock_t. | |
409 | */ | |
913f7dc9 MM |
410 | bird_clock_t |
411 | tm_parse_date(char *x) | |
412 | { | |
413 | struct tm tm; | |
414 | int n; | |
415 | time_t t; | |
416 | ||
417 | if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n]) | |
418 | return 0; | |
419 | tm.tm_mon--; | |
420 | tm.tm_year -= 1900; | |
421 | tm.tm_hour = tm.tm_min = tm.tm_sec = 0; | |
422 | t = mktime(&tm); | |
423 | if (t == (time_t) -1) | |
424 | return 0; | |
425 | return t; | |
426 | } | |
427 | ||
c37e7851 OZ |
428 | static void |
429 | tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta) | |
913f7dc9 | 430 | { |
c37e7851 OZ |
431 | static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", |
432 | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; | |
913f7dc9 | 433 | |
c37e7851 OZ |
434 | if (delta < 20*3600) |
435 | bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min); | |
436 | else if (delta < 360*86400) | |
437 | bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday); | |
438 | else | |
439 | bsprintf(x, "%d", tm->tm_year+1900); | |
913f7dc9 MM |
440 | } |
441 | ||
c37e7851 OZ |
442 | #include "conf/conf.h" |
443 | ||
525fa2c1 MM |
444 | /** |
445 | * tm_format_datetime - convert date and time to textual representation | |
446 | * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE | |
447 | * @t: time | |
448 | * | |
fd91ae33 OZ |
449 | * This function formats the given relative time value @t to a textual |
450 | * date/time representation (dd-mm-yyyy hh:mm:ss) in real time. | |
525fa2c1 | 451 | */ |
7a88832e | 452 | void |
c37e7851 | 453 | tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) |
7a88832e | 454 | { |
c37e7851 | 455 | const char *fmt_used; |
7a88832e | 456 | struct tm *tm; |
fd91ae33 OZ |
457 | bird_clock_t delta = now - t; |
458 | t = now_real - delta; | |
7a88832e | 459 | tm = localtime(&t); |
7a88832e | 460 | |
c37e7851 OZ |
461 | if (fmt_spec->fmt1 == NULL) |
462 | return tm_format_reltime(x, tm, delta); | |
afa8937a | 463 | |
c37e7851 OZ |
464 | if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit)) |
465 | fmt_used = fmt_spec->fmt1; | |
afa8937a | 466 | else |
c37e7851 OZ |
467 | fmt_used = fmt_spec->fmt2; |
468 | ||
469 | int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm); | |
470 | if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE)) | |
471 | strcpy(x, "<too-long>"); | |
afa8937a MM |
472 | } |
473 | ||
05476c4d | 474 | |
525fa2c1 MM |
475 | /** |
476 | * DOC: Sockets | |
477 | * | |
478 | * Socket resources represent network connections. Their data structure (&socket) | |
479 | * contains a lot of fields defining the exact type of the socket, the local and | |
480 | * remote addresses and ports, pointers to socket buffers and finally pointers to | |
481 | * hook functions to be called when new data have arrived to the receive buffer | |
482 | * (@rx_hook), when the contents of the transmit buffer have been transmitted | |
483 | * (@tx_hook) and when an error or connection close occurs (@err_hook). | |
484 | * | |
38a608c5 | 485 | * Freeing of sockets from inside socket hooks is perfectly safe. |
b5d9ee5c MM |
486 | */ |
487 | ||
abae6e9c MM |
488 | #ifndef SOL_IP |
489 | #define SOL_IP IPPROTO_IP | |
490 | #endif | |
491 | ||
b1a1faba OF |
492 | #ifndef SOL_IPV6 |
493 | #define SOL_IPV6 IPPROTO_IPV6 | |
494 | #endif | |
495 | ||
48e5f32d OZ |
496 | #ifndef SOL_ICMPV6 |
497 | #define SOL_ICMPV6 IPPROTO_ICMPV6 | |
498 | #endif | |
499 | ||
500 | ||
05476c4d OZ |
501 | /* |
502 | * Sockaddr helper functions | |
503 | */ | |
38a608c5 | 504 | |
05476c4d OZ |
505 | static inline int sockaddr_length(int af) |
506 | { return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); } | |
507 | ||
508 | static inline void | |
509 | sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port) | |
38a608c5 | 510 | { |
05476c4d OZ |
511 | memset(sa, 0, sizeof(struct sockaddr_in)); |
512 | #ifdef HAVE_SIN_LEN | |
513 | sa->sin_len = sizeof(struct sockaddr_in); | |
514 | #endif | |
515 | sa->sin_family = AF_INET; | |
516 | sa->sin_port = htons(port); | |
517 | sa->sin_addr = ipa_to_in4(a); | |
38a608c5 | 518 | } |
b5d9ee5c | 519 | |
05476c4d OZ |
520 | static inline void |
521 | sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port) | |
b5d9ee5c | 522 | { |
05476c4d OZ |
523 | memset(sa, 0, sizeof(struct sockaddr_in6)); |
524 | #ifdef SIN6_LEN | |
525 | sa->sin6_len = sizeof(struct sockaddr_in6); | |
526 | #endif | |
527 | sa->sin6_family = AF_INET6; | |
528 | sa->sin6_port = htons(port); | |
529 | sa->sin6_flowinfo = 0; | |
530 | sa->sin6_addr = ipa_to_in6(a); | |
531 | ||
532 | if (ifa && ipa_is_link_local(a)) | |
533 | sa->sin6_scope_id = ifa->index; | |
4da25acb | 534 | } |
b5d9ee5c | 535 | |
05476c4d OZ |
536 | void |
537 | sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port) | |
4da25acb | 538 | { |
05476c4d OZ |
539 | if (af == AF_INET) |
540 | sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port); | |
541 | else if (af == AF_INET6) | |
542 | sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port); | |
543 | else | |
544 | bug("Unknown AF"); | |
4da25acb MM |
545 | } |
546 | ||
05476c4d OZ |
547 | static inline void |
548 | sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port) | |
4da25acb | 549 | { |
05476c4d OZ |
550 | *port = ntohs(sa->sin_port); |
551 | *a = ipa_from_in4(sa->sin_addr); | |
b5d9ee5c MM |
552 | } |
553 | ||
05476c4d OZ |
554 | static inline void |
555 | sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port) | |
48e5f32d | 556 | { |
05476c4d OZ |
557 | *port = ntohs(sa->sin6_port); |
558 | *a = ipa_from_in6(sa->sin6_addr); | |
48e5f32d | 559 | |
05476c4d OZ |
560 | if (ifa && ipa_is_link_local(*a)) |
561 | *ifa = if_find_by_index(sa->sin6_scope_id); | |
48e5f32d OZ |
562 | } |
563 | ||
05476c4d OZ |
564 | int |
565 | sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port) | |
48e5f32d | 566 | { |
05476c4d OZ |
567 | if (sa->sa.sa_family != af) |
568 | goto fail; | |
48e5f32d | 569 | |
05476c4d OZ |
570 | if (af == AF_INET) |
571 | sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port); | |
572 | else if (af == AF_INET6) | |
573 | sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port); | |
574 | else | |
575 | goto fail; | |
48e5f32d | 576 | |
05476c4d | 577 | return 0; |
48e5f32d | 578 | |
05476c4d OZ |
579 | fail: |
580 | *a = IPA_NONE; | |
581 | *port = 0; | |
582 | return -1; | |
48e5f32d OZ |
583 | } |
584 | ||
48e5f32d | 585 | |
05476c4d OZ |
586 | /* |
587 | * IPv6 multicast syscalls | |
588 | */ | |
4da25acb | 589 | |
05476c4d | 590 | /* Fortunately standardized in RFC 3493 */ |
b5d9ee5c | 591 | |
05476c4d OZ |
592 | #define INIT_MREQ6(maddr,ifa) \ |
593 | { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index } | |
b5d9ee5c | 594 | |
05476c4d OZ |
595 | static inline int |
596 | sk_setup_multicast6(sock *s) | |
b5d9ee5c | 597 | { |
05476c4d OZ |
598 | int index = s->iface->index; |
599 | int ttl = s->ttl; | |
600 | int n = 0; | |
b5d9ee5c | 601 | |
05476c4d OZ |
602 | if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) |
603 | ERR("IPV6_MULTICAST_IF"); | |
b5d9ee5c | 604 | |
05476c4d OZ |
605 | if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0) |
606 | ERR("IPV6_MULTICAST_HOPS"); | |
4f22c981 | 607 | |
05476c4d OZ |
608 | if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0) |
609 | ERR("IPV6_MULTICAST_LOOP"); | |
4f22c981 | 610 | |
05476c4d | 611 | return 0; |
061ab802 OZ |
612 | } |
613 | ||
05476c4d OZ |
614 | static inline int |
615 | sk_join_group6(sock *s, ip_addr maddr) | |
4f22c981 | 616 | { |
05476c4d | 617 | struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); |
eb1451a3 | 618 | |
05476c4d OZ |
619 | if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0) |
620 | ERR("IPV6_JOIN_GROUP"); | |
4f22c981 | 621 | |
05476c4d | 622 | return 0; |
b5d9ee5c MM |
623 | } |
624 | ||
05476c4d OZ |
625 | static inline int |
626 | sk_leave_group6(sock *s, ip_addr maddr) | |
b5d9ee5c | 627 | { |
05476c4d | 628 | struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); |
b5d9ee5c | 629 | |
05476c4d OZ |
630 | if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0) |
631 | ERR("IPV6_LEAVE_GROUP"); | |
632 | ||
633 | return 0; | |
634 | } | |
4f22c981 | 635 | |
bed41728 | 636 | |
05476c4d OZ |
637 | /* |
638 | * IPv6 packet control messages | |
639 | */ | |
bed41728 | 640 | |
05476c4d | 641 | /* Also standardized, in RFC 3542 */ |
bed41728 | 642 | |
dcc60494 OZ |
643 | /* |
644 | * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg | |
645 | * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we | |
646 | * don't have IPV6_RECVPKTINFO we suppose the OS implements the older | |
647 | * RFC and we use IPV6_PKTINFO. | |
648 | */ | |
649 | #ifndef IPV6_RECVPKTINFO | |
650 | #define IPV6_RECVPKTINFO IPV6_PKTINFO | |
651 | #endif | |
70e212f9 OZ |
652 | /* |
653 | * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT. | |
654 | */ | |
655 | #ifndef IPV6_RECVHOPLIMIT | |
656 | #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT | |
657 | #endif | |
dcc60494 | 658 | |
70e212f9 | 659 | |
05476c4d OZ |
660 | #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo)) |
661 | #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int)) | |
bed41728 | 662 | |
05476c4d OZ |
663 | static inline int |
664 | sk_request_cmsg6_pktinfo(sock *s) | |
665 | { | |
666 | int y = 1; | |
70e212f9 | 667 | |
05476c4d OZ |
668 | if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0) |
669 | ERR("IPV6_RECVPKTINFO"); | |
670 | ||
671 | return 0; | |
bed41728 OZ |
672 | } |
673 | ||
05476c4d OZ |
674 | static inline int |
675 | sk_request_cmsg6_ttl(sock *s) | |
bed41728 | 676 | { |
05476c4d | 677 | int y = 1; |
bed41728 | 678 | |
05476c4d OZ |
679 | if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0) |
680 | ERR("IPV6_RECVHOPLIMIT"); | |
70e212f9 | 681 | |
05476c4d OZ |
682 | return 0; |
683 | } | |
70e212f9 | 684 | |
05476c4d OZ |
685 | static inline void |
686 | sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm) | |
687 | { | |
688 | if (cm->cmsg_type == IPV6_PKTINFO) | |
70e212f9 | 689 | { |
05476c4d OZ |
690 | struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm); |
691 | s->laddr = ipa_from_in6(pi->ipi6_addr); | |
692 | s->lifindex = pi->ipi6_ifindex; | |
70e212f9 | 693 | } |
05476c4d | 694 | } |
70e212f9 | 695 | |
05476c4d OZ |
696 | static inline void |
697 | sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm) | |
698 | { | |
699 | if (cm->cmsg_type == IPV6_HOPLIMIT) | |
700 | s->rcv_ttl = * (int *) CMSG_DATA(cm); | |
bed41728 OZ |
701 | } |
702 | ||
05476c4d OZ |
703 | static inline void |
704 | sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) | |
bed41728 OZ |
705 | { |
706 | struct cmsghdr *cm; | |
707 | struct in6_pktinfo *pi; | |
8945f73d | 708 | int controllen = 0; |
bed41728 | 709 | |
bed41728 OZ |
710 | msg->msg_control = cbuf; |
711 | msg->msg_controllen = cbuflen; | |
712 | ||
713 | cm = CMSG_FIRSTHDR(msg); | |
48e5f32d | 714 | cm->cmsg_level = SOL_IPV6; |
bed41728 OZ |
715 | cm->cmsg_type = IPV6_PKTINFO; |
716 | cm->cmsg_len = CMSG_LEN(sizeof(*pi)); | |
8945f73d | 717 | controllen += CMSG_SPACE(sizeof(*pi)); |
bed41728 OZ |
718 | |
719 | pi = (struct in6_pktinfo *) CMSG_DATA(cm); | |
bed41728 | 720 | pi->ipi6_ifindex = s->iface ? s->iface->index : 0; |
05476c4d | 721 | pi->ipi6_addr = ipa_to_in6(s->saddr); |
bed41728 | 722 | |
8945f73d | 723 | msg->msg_controllen = controllen; |
bed41728 | 724 | } |
48e5f32d | 725 | |
bed41728 | 726 | |
05476c4d OZ |
727 | /* |
728 | * Miscellaneous socket syscalls | |
729 | */ | |
730 | ||
731 | static inline int | |
732 | sk_set_ttl4(sock *s, int ttl) | |
a39b165e | 733 | { |
05476c4d OZ |
734 | if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0) |
735 | ERR("IP_TTL"); | |
736 | ||
737 | return 0; | |
a39b165e OZ |
738 | } |
739 | ||
05476c4d OZ |
740 | static inline int |
741 | sk_set_ttl6(sock *s, int ttl) | |
742 | { | |
743 | if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0) | |
744 | ERR("IPV6_UNICAST_HOPS"); | |
38a608c5 | 745 | |
05476c4d OZ |
746 | return 0; |
747 | } | |
748 | ||
749 | static inline int | |
750 | sk_set_tos4(sock *s, int tos) | |
b5d9ee5c | 751 | { |
05476c4d OZ |
752 | if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0) |
753 | ERR("IP_TOS"); | |
b5d9ee5c | 754 | |
05476c4d OZ |
755 | return 0; |
756 | } | |
ef4a50be | 757 | |
05476c4d OZ |
758 | static inline int |
759 | sk_set_tos6(sock *s, int tos) | |
760 | { | |
761 | if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0) | |
762 | ERR("IPV6_TCLASS"); | |
48e5f32d | 763 | |
05476c4d OZ |
764 | return 0; |
765 | } | |
48e5f32d | 766 | |
48e5f32d | 767 | |
05476c4d OZ |
768 | /* |
769 | * Public socket functions | |
770 | */ | |
48e5f32d | 771 | |
05476c4d OZ |
772 | /** |
773 | * sk_setup_multicast - enable multicast for given socket | |
774 | * @s: socket | |
775 | * | |
776 | * Prepare transmission of multicast packets for given datagram socket. | |
777 | * The socket must have defined @iface. | |
778 | * | |
779 | * Result: 0 for success, -1 for an error. | |
780 | */ | |
48e5f32d | 781 | |
05476c4d OZ |
782 | int |
783 | sk_setup_multicast(sock *s) | |
784 | { | |
785 | ASSERT(s->iface); | |
48e5f32d | 786 | |
05476c4d OZ |
787 | if (sk_is_ipv4(s)) |
788 | return sk_setup_multicast4(s); | |
789 | else | |
790 | return sk_setup_multicast6(s); | |
791 | } | |
48e5f32d | 792 | |
05476c4d OZ |
793 | /** |
794 | * sk_join_group - join multicast group for given socket | |
795 | * @s: socket | |
796 | * @maddr: multicast address | |
797 | * | |
798 | * Join multicast group for given datagram socket and associated interface. | |
799 | * The socket must have defined @iface. | |
800 | * | |
801 | * Result: 0 for success, -1 for an error. | |
802 | */ | |
789772ed | 803 | |
05476c4d OZ |
804 | int |
805 | sk_join_group(sock *s, ip_addr maddr) | |
806 | { | |
807 | if (sk_is_ipv4(s)) | |
808 | return sk_join_group4(s, maddr); | |
809 | else | |
810 | return sk_join_group6(s, maddr); | |
811 | } | |
ef4a50be | 812 | |
05476c4d OZ |
813 | /** |
814 | * sk_leave_group - leave multicast group for given socket | |
815 | * @s: socket | |
816 | * @maddr: multicast address | |
817 | * | |
818 | * Leave multicast group for given datagram socket and associated interface. | |
819 | * The socket must have defined @iface. | |
820 | * | |
821 | * Result: 0 for success, -1 for an error. | |
822 | */ | |
789772ed | 823 | |
05476c4d OZ |
824 | int |
825 | sk_leave_group(sock *s, ip_addr maddr) | |
826 | { | |
827 | if (sk_is_ipv4(s)) | |
828 | return sk_leave_group4(s, maddr); | |
829 | else | |
830 | return sk_leave_group6(s, maddr); | |
b5d9ee5c MM |
831 | } |
832 | ||
a39b165e | 833 | /** |
05476c4d OZ |
834 | * sk_setup_broadcast - enable broadcast for given socket |
835 | * @s: socket | |
836 | * | |
837 | * Allow reception and transmission of broadcast packets for given datagram | |
838 | * socket. The socket must have defined @iface. For transmission, packets should | |
839 | * be send to @brd address of @iface. | |
840 | * | |
841 | * Result: 0 for success, -1 for an error. | |
842 | */ | |
843 | ||
844 | int | |
845 | sk_setup_broadcast(sock *s) | |
846 | { | |
847 | int y = 1; | |
848 | ||
849 | if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0) | |
850 | ERR("SO_BROADCAST"); | |
851 | ||
852 | return 0; | |
853 | } | |
854 | ||
855 | /** | |
856 | * sk_set_ttl - set transmit TTL for given socket | |
a39b165e OZ |
857 | * @s: socket |
858 | * @ttl: TTL value | |
859 | * | |
05476c4d OZ |
860 | * Set TTL for already opened connections when TTL was not set before. Useful |
861 | * for accepted connections when different ones should have different TTL. | |
a39b165e OZ |
862 | * |
863 | * Result: 0 for success, -1 for an error. | |
864 | */ | |
865 | ||
866 | int | |
867 | sk_set_ttl(sock *s, int ttl) | |
868 | { | |
a39b165e | 869 | s->ttl = ttl; |
a39b165e | 870 | |
05476c4d OZ |
871 | if (sk_is_ipv4(s)) |
872 | return sk_set_ttl4(s, ttl); | |
873 | else | |
874 | return sk_set_ttl6(s, ttl); | |
a39b165e OZ |
875 | } |
876 | ||
b1b19433 | 877 | /** |
05476c4d | 878 | * sk_set_min_ttl - set minimal accepted TTL for given socket |
b1b19433 OZ |
879 | * @s: socket |
880 | * @ttl: TTL value | |
881 | * | |
05476c4d OZ |
882 | * Set minimal accepted TTL for given socket. Can be used for TTL security. |
883 | * implementations. | |
b1b19433 OZ |
884 | * |
885 | * Result: 0 for success, -1 for an error. | |
886 | */ | |
887 | ||
888 | int | |
889 | sk_set_min_ttl(sock *s, int ttl) | |
890 | { | |
05476c4d OZ |
891 | if (sk_is_ipv4(s)) |
892 | return sk_set_min_ttl4(s, ttl); | |
893 | else | |
894 | return sk_set_min_ttl6(s, ttl); | |
b1b19433 | 895 | } |
d51aa281 | 896 | |
05476c4d | 897 | #if 0 |
d51aa281 | 898 | /** |
05476c4d | 899 | * sk_set_md5_auth - add / remove MD5 security association for given socket |
d51aa281 OZ |
900 | * @s: socket |
901 | * @a: IP address of the other side | |
eb1451a3 | 902 | * @ifa: Interface for link-local IP address |
d51aa281 OZ |
903 | * @passwd: password used for MD5 authentication |
904 | * | |
05476c4d OZ |
905 | * In TCP MD5 handling code in kernel, there is a set of pairs (address, |
906 | * password) used to choose password according to address of the other side. | |
907 | * This function is useful for listening socket, for active sockets it is enough | |
908 | * to set s->password field. | |
d51aa281 OZ |
909 | * |
910 | * When called with passwd != NULL, the new pair is added, | |
911 | * When called with passwd == NULL, the existing pair is removed. | |
912 | * | |
913 | * Result: 0 for success, -1 for an error. | |
914 | */ | |
915 | ||
916 | int | |
eb1451a3 | 917 | sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd) |
05476c4d OZ |
918 | { DUMMY; } |
919 | #endif | |
f9c799a0 | 920 | |
05476c4d OZ |
921 | /** |
922 | * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket | |
923 | * @s: socket | |
924 | * @offset: offset | |
925 | * | |
926 | * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the | |
927 | * kernel will automatically fill it for outgoing packets and check it for | |
928 | * incoming packets. Should not be used on ICMPv6 sockets, where the position is | |
929 | * known to the kernel. | |
930 | * | |
931 | * Result: 0 for success, -1 for an error. | |
932 | */ | |
f9c799a0 | 933 | |
4ac7c834 OZ |
934 | int |
935 | sk_set_ipv6_checksum(sock *s, int offset) | |
936 | { | |
48e5f32d | 937 | if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) |
05476c4d | 938 | ERR("IPV6_CHECKSUM"); |
4ac7c834 OZ |
939 | |
940 | return 0; | |
941 | } | |
942 | ||
93e868c7 | 943 | int |
05476c4d | 944 | sk_set_icmp6_filter(sock *s, int p1, int p2) |
93e868c7 OZ |
945 | { |
946 | /* a bit of lame interface, but it is here only for Radv */ | |
947 | struct icmp6_filter f; | |
948 | ||
949 | ICMP6_FILTER_SETBLOCKALL(&f); | |
950 | ICMP6_FILTER_SETPASS(p1, &f); | |
951 | ICMP6_FILTER_SETPASS(p2, &f); | |
952 | ||
48e5f32d | 953 | if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) |
05476c4d | 954 | ERR("ICMP6_FILTER"); |
93e868c7 OZ |
955 | |
956 | return 0; | |
957 | } | |
958 | ||
05476c4d OZ |
959 | void |
960 | sk_log_error(sock *s, const char *p) | |
961 | { | |
962 | log(L_ERR "%s: Socket error: %s%#m", p, s->err); | |
963 | } | |
964 | ||
965 | ||
966 | /* | |
967 | * Actual struct birdsock code | |
968 | */ | |
969 | ||
970 | static list sock_list; | |
971 | static struct birdsock *current_sock; | |
972 | static struct birdsock *stored_sock; | |
973 | static int sock_recalc_fdsets_p; | |
974 | ||
975 | static inline sock * | |
976 | sk_next(sock *s) | |
977 | { | |
978 | if (!s->n.next->next) | |
979 | return NULL; | |
980 | else | |
981 | return SKIP_BACK(sock, n, s->n.next); | |
982 | } | |
983 | ||
984 | static void | |
985 | sk_alloc_bufs(sock *s) | |
986 | { | |
987 | if (!s->rbuf && s->rbsize) | |
988 | s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize); | |
989 | s->rpos = s->rbuf; | |
990 | if (!s->tbuf && s->tbsize) | |
991 | s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize); | |
992 | s->tpos = s->ttx = s->tbuf; | |
993 | } | |
994 | ||
995 | static void | |
996 | sk_free_bufs(sock *s) | |
997 | { | |
998 | if (s->rbuf_alloc) | |
999 | { | |
1000 | xfree(s->rbuf_alloc); | |
1001 | s->rbuf = s->rbuf_alloc = NULL; | |
1002 | } | |
1003 | if (s->tbuf_alloc) | |
1004 | { | |
1005 | xfree(s->tbuf_alloc); | |
1006 | s->tbuf = s->tbuf_alloc = NULL; | |
1007 | } | |
1008 | } | |
1009 | ||
1010 | static void | |
1011 | sk_free(resource *r) | |
1012 | { | |
1013 | sock *s = (sock *) r; | |
1014 | ||
1015 | sk_free_bufs(s); | |
1016 | if (s->fd >= 0) | |
1017 | { | |
1018 | close(s->fd); | |
1019 | ||
1020 | /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ | |
1021 | if (s->flags & SKF_THREAD) | |
1022 | return; | |
1023 | ||
1024 | if (s == current_sock) | |
1025 | current_sock = sk_next(s); | |
1026 | if (s == stored_sock) | |
1027 | stored_sock = sk_next(s); | |
1028 | rem_node(&s->n); | |
1029 | sock_recalc_fdsets_p = 1; | |
1030 | } | |
1031 | } | |
1032 | ||
1033 | void | |
1034 | sk_set_rbsize(sock *s, uint val) | |
1035 | { | |
1036 | ASSERT(s->rbuf_alloc == s->rbuf); | |
1037 | ||
1038 | if (s->rbsize == val) | |
1039 | return; | |
1040 | ||
1041 | s->rbsize = val; | |
1042 | xfree(s->rbuf_alloc); | |
1043 | s->rbuf_alloc = xmalloc(val); | |
1044 | s->rpos = s->rbuf = s->rbuf_alloc; | |
1045 | } | |
1046 | ||
1047 | void | |
1048 | sk_set_tbsize(sock *s, uint val) | |
1049 | { | |
1050 | ASSERT(s->tbuf_alloc == s->tbuf); | |
1051 | ||
1052 | if (s->tbsize == val) | |
1053 | return; | |
1054 | ||
1055 | byte *old_tbuf = s->tbuf; | |
1056 | ||
1057 | s->tbsize = val; | |
1058 | s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); | |
1059 | s->tpos = s->tbuf + (s->tpos - old_tbuf); | |
1060 | s->ttx = s->tbuf + (s->ttx - old_tbuf); | |
1061 | } | |
1062 | ||
1063 | void | |
1064 | sk_set_tbuf(sock *s, void *tbuf) | |
1065 | { | |
1066 | s->tbuf = tbuf ?: s->tbuf_alloc; | |
1067 | s->ttx = s->tpos = s->tbuf; | |
1068 | } | |
1069 | ||
1070 | void | |
1071 | sk_reallocate(sock *s) | |
1072 | { | |
1073 | sk_free_bufs(s); | |
1074 | sk_alloc_bufs(s); | |
1075 | } | |
1076 | ||
1077 | static void | |
1078 | sk_dump(resource *r) | |
1079 | { | |
1080 | sock *s = (sock *) r; | |
1081 | static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" }; | |
1082 | ||
1083 | debug("(%s, ud=%p, sa=%08x, sp=%d, da=%08x, dp=%d, tos=%d, ttl=%d, if=%s)\n", | |
1084 | sk_type_names[s->type], | |
1085 | s->data, | |
1086 | s->saddr, | |
1087 | s->sport, | |
1088 | s->daddr, | |
1089 | s->dport, | |
1090 | s->tos, | |
1091 | s->ttl, | |
1092 | s->iface ? s->iface->name : "none"); | |
1093 | } | |
1094 | ||
1095 | static struct resclass sk_class = { | |
1096 | "Socket", | |
1097 | sizeof(sock), | |
1098 | sk_free, | |
1099 | sk_dump, | |
1100 | NULL, | |
1101 | NULL | |
1102 | }; | |
1103 | ||
1104 | /** | |
1105 | * sk_new - create a socket | |
1106 | * @p: pool | |
1107 | * | |
1108 | * This function creates a new socket resource. If you want to use it, | |
1109 | * you need to fill in all the required fields of the structure and | |
1110 | * call sk_open() to do the actual opening of the socket. | |
1111 | * | |
1112 | * The real function name is sock_new(), sk_new() is a macro wrapper | |
1113 | * to avoid collision with OpenSSL. | |
1114 | */ | |
1115 | sock * | |
1116 | sock_new(pool *p) | |
1117 | { | |
1118 | sock *s = ralloc(p, &sk_class); | |
1119 | s->pool = p; | |
1120 | // s->saddr = s->daddr = IPA_NONE; | |
1121 | s->tos = s->priority = s->ttl = -1; | |
1122 | s->fd = -1; | |
1123 | return s; | |
1124 | } | |
1125 | ||
1126 | static int | |
1127 | sk_setup(sock *s) | |
f9c799a0 | 1128 | { |
05476c4d OZ |
1129 | int y = 1; |
1130 | int fd = s->fd; | |
f9c799a0 | 1131 | |
05476c4d OZ |
1132 | if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) |
1133 | ERR("O_NONBLOCK"); | |
f9c799a0 | 1134 | |
05476c4d OZ |
1135 | if (!s->af) |
1136 | return 0; | |
f9c799a0 | 1137 | |
05476c4d OZ |
1138 | if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND)) |
1139 | s->flags |= SKF_PKTINFO; | |
f9c799a0 | 1140 | |
05476c4d OZ |
1141 | #ifdef CONFIG_USE_HDRINCL |
1142 | if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO)) | |
1143 | { | |
1144 | s->flags &= ~SKF_PKTINFO; | |
1145 | s->flags |= SKF_HDRINCL; | |
1146 | if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0) | |
1147 | ERR("IP_HDRINCL"); | |
1148 | } | |
48e5f32d OZ |
1149 | #endif |
1150 | ||
05476c4d OZ |
1151 | if (s->iface) |
1152 | { | |
1153 | #ifdef SO_BINDTODEVICE | |
1154 | struct ifreq ifr; | |
1155 | strcpy(ifr.ifr_name, s->iface->name); | |
1156 | if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) | |
1157 | ERR("SO_BINDTODEVICE"); | |
1158 | #endif | |
f1aceff5 | 1159 | |
05476c4d OZ |
1160 | #ifdef CONFIG_UNIX_DONTROUTE |
1161 | if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0) | |
1162 | ERR("SO_DONTROUTE"); | |
1163 | #endif | |
1164 | } | |
f9c799a0 | 1165 | |
05476c4d OZ |
1166 | if (s->priority >= 0) |
1167 | if (sk_set_priority(s, s->priority) < 0) | |
f9c799a0 | 1168 | return -1; |
f9c799a0 | 1169 | |
05476c4d OZ |
1170 | if (sk_is_ipv4(s)) |
1171 | { | |
1172 | if (s->flags & SKF_LADDR_RX) | |
1173 | if (sk_request_cmsg4_pktinfo(s) < 0) | |
1174 | return -1; | |
f9c799a0 | 1175 | |
05476c4d OZ |
1176 | if (s->flags & SKF_TTL_RX) |
1177 | if (sk_request_cmsg4_ttl(s) < 0) | |
1178 | return -1; | |
f9c799a0 | 1179 | |
05476c4d OZ |
1180 | if ((s->type == SK_UDP) || (s->type == SK_IP)) |
1181 | if (sk_disable_mtu_disc4(s) < 0) | |
1182 | return -1; | |
f9c799a0 | 1183 | |
05476c4d OZ |
1184 | if (s->ttl >= 0) |
1185 | if (sk_set_ttl4(s, s->ttl) < 0) | |
1186 | return -1; | |
f9c799a0 | 1187 | |
05476c4d OZ |
1188 | if (s->tos >= 0) |
1189 | if (sk_set_tos4(s, s->tos) < 0) | |
1190 | return -1; | |
1191 | } | |
f9c799a0 | 1192 | |
05476c4d OZ |
1193 | if (sk_is_ipv6(s)) |
1194 | { | |
1195 | if (s->flags & SKF_V6ONLY) | |
1196 | if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0) | |
1197 | ERR("IPV6_V6ONLY"); | |
f9c799a0 | 1198 | |
05476c4d OZ |
1199 | if (s->flags & SKF_LADDR_RX) |
1200 | if (sk_request_cmsg6_pktinfo(s) < 0) | |
1201 | return -1; | |
f9c799a0 | 1202 | |
05476c4d OZ |
1203 | if (s->flags & SKF_TTL_RX) |
1204 | if (sk_request_cmsg6_ttl(s) < 0) | |
1205 | return -1; | |
f9c799a0 | 1206 | |
05476c4d OZ |
1207 | if ((s->type == SK_UDP) || (s->type == SK_IP)) |
1208 | if (sk_disable_mtu_disc6(s) < 0) | |
1209 | return -1; | |
f9c799a0 | 1210 | |
05476c4d OZ |
1211 | if (s->ttl >= 0) |
1212 | if (sk_set_ttl6(s, s->ttl) < 0) | |
1213 | return -1; | |
f9c799a0 | 1214 | |
05476c4d OZ |
1215 | if (s->tos >= 0) |
1216 | if (sk_set_tos6(s, s->tos) < 0) | |
1217 | return -1; | |
1218 | } | |
f9c799a0 OZ |
1219 | |
1220 | return 0; | |
1221 | } | |
1222 | ||
05476c4d OZ |
1223 | static void |
1224 | sk_insert(sock *s) | |
f9c799a0 | 1225 | { |
05476c4d OZ |
1226 | add_tail(&sock_list, &s->n); |
1227 | sock_recalc_fdsets_p = 1; | |
f9c799a0 OZ |
1228 | } |
1229 | ||
b93abffa | 1230 | static void |
b5d9ee5c MM |
1231 | sk_tcp_connected(sock *s) |
1232 | { | |
05476c4d OZ |
1233 | sockaddr sa; |
1234 | int sa_len = sizeof(sa); | |
1235 | ||
1236 | if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) || | |
1237 | (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0)) | |
1238 | log(L_WARN "SOCK: Cannot get local IP address for TCP>"); | |
9be9a264 | 1239 | |
b5d9ee5c MM |
1240 | s->type = SK_TCP; |
1241 | sk_alloc_bufs(s); | |
320f4173 | 1242 | s->tx_hook(s); |
b5d9ee5c MM |
1243 | } |
1244 | ||
b93abffa | 1245 | static int |
05476c4d | 1246 | sk_passive_connected(sock *s, int type) |
b93abffa | 1247 | { |
05476c4d OZ |
1248 | sockaddr loc_sa, rem_sa; |
1249 | int loc_sa_len = sizeof(loc_sa); | |
1250 | int rem_sa_len = sizeof(rem_sa); | |
cf31112f | 1251 | |
05476c4d OZ |
1252 | int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len); |
1253 | if (fd < 0) | |
1254 | { | |
1255 | if ((errno != EINTR) && (errno != EAGAIN)) | |
c025b852 | 1256 | s->err_hook(s, errno); |
05476c4d OZ |
1257 | return 0; |
1258 | } | |
1259 | ||
1260 | sock *t = sk_new(s->pool); | |
1261 | t->type = type; | |
1262 | t->fd = fd; | |
1263 | t->af = s->af; | |
1264 | t->ttl = s->ttl; | |
1265 | t->tos = s->tos; | |
1266 | t->rbsize = s->rbsize; | |
1267 | t->tbsize = s->tbsize; | |
1268 | ||
1269 | if (type == SK_TCP) | |
1270 | { | |
1271 | if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) || | |
1272 | (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0)) | |
1273 | log(L_WARN "SOCK: Cannot get local IP address for TCP<"); | |
1274 | ||
1275 | if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0) | |
1276 | log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); | |
1277 | } | |
1278 | ||
1279 | if (sk_setup(t) < 0) | |
1280 | { | |
1281 | /* FIXME: Call err_hook instead ? */ | |
1282 | log(L_ERR "SOCK: Incoming connection: %s%#m", t->err); | |
1283 | ||
1284 | /* FIXME: handle it better in rfree() */ | |
1285 | close(t->fd); | |
1286 | t->fd = -1; | |
1287 | rfree(t); | |
1288 | return 1; | |
1289 | } | |
1290 | ||
1291 | sk_insert(t); | |
1292 | sk_alloc_bufs(t); | |
1293 | s->rx_hook(t, 0); | |
1294 | return 1; | |
b93abffa MM |
1295 | } |
1296 | ||
525fa2c1 MM |
1297 | /** |
1298 | * sk_open - open a socket | |
1299 | * @s: socket | |
1300 | * | |
1301 | * This function takes a socket resource created by sk_new() and | |
1302 | * initialized by the user and binds a corresponding network connection | |
1303 | * to it. | |
1304 | * | |
1305 | * Result: 0 for success, -1 for an error. | |
1306 | */ | |
b5d9ee5c MM |
1307 | int |
1308 | sk_open(sock *s) | |
1309 | { | |
05476c4d OZ |
1310 | int af = BIRD_AF; |
1311 | int fd = -1; | |
48e5f32d OZ |
1312 | int do_bind = 0; |
1313 | int bind_port = 0; | |
1314 | ip_addr bind_addr = IPA_NONE; | |
1315 | sockaddr sa; | |
b5d9ee5c | 1316 | |
48e5f32d | 1317 | switch (s->type) |
05476c4d OZ |
1318 | { |
1319 | case SK_TCP_ACTIVE: | |
1320 | s->ttx = ""; /* Force s->ttx != s->tpos */ | |
1321 | /* Fall thru */ | |
1322 | case SK_TCP_PASSIVE: | |
1323 | fd = socket(af, SOCK_STREAM, IPPROTO_TCP); | |
1324 | bind_port = s->sport; | |
1325 | bind_addr = s->saddr; | |
1326 | do_bind = bind_port || ipa_nonzero(bind_addr); | |
1327 | break; | |
1328 | ||
1329 | case SK_UDP: | |
1330 | fd = socket(af, SOCK_DGRAM, IPPROTO_UDP); | |
1331 | bind_port = s->sport; | |
1332 | bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; | |
1333 | do_bind = 1; | |
1334 | break; | |
1335 | ||
1336 | case SK_IP: | |
1337 | fd = socket(af, SOCK_RAW, s->dport); | |
1338 | bind_port = 0; | |
1339 | bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; | |
1340 | do_bind = ipa_nonzero(bind_addr); | |
1341 | break; | |
1342 | ||
1343 | case SK_MAGIC: | |
1344 | af = 0; | |
1345 | fd = s->fd; | |
1346 | break; | |
1347 | ||
1348 | default: | |
1349 | bug("sk_open() called for invalid sock type %d", s->type); | |
1350 | } | |
1351 | ||
b5d9ee5c | 1352 | if (fd < 0) |
05476c4d OZ |
1353 | ERR("socket"); |
1354 | ||
1355 | s->af = af; | |
b5d9ee5c MM |
1356 | s->fd = fd; |
1357 | ||
05476c4d OZ |
1358 | if (sk_setup(s) < 0) |
1359 | goto err; | |
38a608c5 | 1360 | |
48e5f32d | 1361 | if (do_bind) |
05476c4d OZ |
1362 | { |
1363 | if (bind_port) | |
b5d9ee5c | 1364 | { |
05476c4d OZ |
1365 | int y = 1; |
1366 | ||
1367 | if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0) | |
1368 | ERR2("SO_REUSEADDR"); | |
48e5f32d | 1369 | |
8931425d | 1370 | #ifdef CONFIG_NO_IFACE_BIND |
05476c4d OZ |
1371 | /* Workaround missing ability to bind to an iface */ |
1372 | if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr)) | |
1373 | { | |
1374 | if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0) | |
1375 | ERR2("SO_REUSEPORT"); | |
1376 | } | |
8931425d | 1377 | #endif |
b5d9ee5c | 1378 | } |
48e5f32d | 1379 | |
05476c4d OZ |
1380 | sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port); |
1381 | if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) | |
1382 | ERR2("bind"); | |
1383 | } | |
d51aa281 OZ |
1384 | |
1385 | if (s->password) | |
05476c4d OZ |
1386 | if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0) |
1387 | goto err; | |
d51aa281 | 1388 | |
48e5f32d | 1389 | switch (s->type) |
05476c4d OZ |
1390 | { |
1391 | case SK_TCP_ACTIVE: | |
1392 | sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport); | |
1393 | if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0) | |
1394 | sk_tcp_connected(s); | |
1395 | else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && | |
1396 | errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH) | |
1397 | ERR2("connect"); | |
1398 | break; | |
1399 | ||
1400 | case SK_TCP_PASSIVE: | |
1401 | if (listen(fd, 8) < 0) | |
1402 | ERR2("listen"); | |
1403 | break; | |
1404 | ||
1405 | case SK_MAGIC: | |
1406 | break; | |
1407 | ||
1408 | default: | |
1409 | sk_alloc_bufs(s); | |
1410 | } | |
b5d9ee5c | 1411 | |
bf139664 OZ |
1412 | if (!(s->flags & SKF_THREAD)) |
1413 | sk_insert(s); | |
b5d9ee5c MM |
1414 | return 0; |
1415 | ||
05476c4d | 1416 | err: |
b5d9ee5c MM |
1417 | close(fd); |
1418 | s->fd = -1; | |
1419 | return -1; | |
1420 | } | |
1421 | ||
05476c4d | 1422 | int |
b93abffa MM |
1423 | sk_open_unix(sock *s, char *name) |
1424 | { | |
b93abffa | 1425 | struct sockaddr_un sa; |
05476c4d OZ |
1426 | int fd; |
1427 | ||
1428 | /* We are sloppy during error (leak fd and not set s->err), but we die anyway */ | |
b93abffa MM |
1429 | |
1430 | fd = socket(AF_UNIX, SOCK_STREAM, 0); | |
1431 | if (fd < 0) | |
05476c4d OZ |
1432 | return -1; |
1433 | ||
1434 | if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) | |
1435 | return -1; | |
68fa95cf | 1436 | |
97e46d28 | 1437 | /* Path length checked in test_old_bird() */ |
b93abffa | 1438 | sa.sun_family = AF_UNIX; |
97c6fa02 | 1439 | strcpy(sa.sun_path, name); |
05476c4d | 1440 | |
0b3bf4b1 | 1441 | if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) |
05476c4d OZ |
1442 | return -1; |
1443 | ||
1444 | if (listen(fd, 8) < 0) | |
1445 | return -1; | |
1446 | ||
1447 | s->fd = fd; | |
38a608c5 | 1448 | sk_insert(s); |
05476c4d OZ |
1449 | return 0; |
1450 | } | |
1451 | ||
1452 | ||
1453 | #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \ | |
1454 | CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL) | |
1455 | #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO) | |
1456 | ||
1457 | static void | |
1458 | sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) | |
1459 | { | |
1460 | if (sk_is_ipv4(s)) | |
1461 | sk_prepare_cmsgs4(s, msg, cbuf, cbuflen); | |
1462 | else | |
1463 | sk_prepare_cmsgs6(s, msg, cbuf, cbuflen); | |
1464 | } | |
1465 | ||
1466 | static void | |
1467 | sk_process_cmsgs(sock *s, struct msghdr *msg) | |
1468 | { | |
1469 | struct cmsghdr *cm; | |
1470 | ||
1471 | s->laddr = IPA_NONE; | |
1472 | s->lifindex = 0; | |
1473 | s->rcv_ttl = -1; | |
1474 | ||
1475 | for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) | |
1476 | { | |
1477 | if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s)) | |
1478 | { | |
1479 | sk_process_cmsg4_pktinfo(s, cm); | |
1480 | sk_process_cmsg4_ttl(s, cm); | |
1481 | } | |
b93abffa | 1482 | |
05476c4d OZ |
1483 | if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s)) |
1484 | { | |
1485 | sk_process_cmsg6_pktinfo(s, cm); | |
1486 | sk_process_cmsg6_ttl(s, cm); | |
1487 | } | |
1488 | } | |
b93abffa MM |
1489 | } |
1490 | ||
48e5f32d OZ |
1491 | |
1492 | static inline int | |
1493 | sk_sendmsg(sock *s) | |
1494 | { | |
1495 | struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; | |
1496 | byte cmsg_buf[CMSG_TX_SPACE]; | |
1497 | sockaddr dst; | |
1498 | ||
05476c4d | 1499 | sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport); |
48e5f32d OZ |
1500 | |
1501 | struct msghdr msg = { | |
05476c4d OZ |
1502 | .msg_name = &dst.sa, |
1503 | .msg_namelen = SA_LEN(dst), | |
48e5f32d OZ |
1504 | .msg_iov = &iov, |
1505 | .msg_iovlen = 1 | |
1506 | }; | |
1507 | ||
1508 | #ifdef CONFIG_USE_HDRINCL | |
1509 | byte hdr[20]; | |
1510 | struct iovec iov2[2] = { {hdr, 20}, iov }; | |
1511 | ||
1512 | if (s->flags & SKF_HDRINCL) | |
1513 | { | |
05476c4d | 1514 | sk_prepare_ip_header(s, hdr, iov.iov_len); |
48e5f32d OZ |
1515 | msg.msg_iov = iov2; |
1516 | msg.msg_iovlen = 2; | |
1517 | } | |
1518 | #endif | |
1519 | ||
1520 | if (s->flags & SKF_PKTINFO) | |
05476c4d | 1521 | sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); |
48e5f32d OZ |
1522 | |
1523 | return sendmsg(s->fd, &msg, 0); | |
1524 | } | |
1525 | ||
1526 | static inline int | |
1527 | sk_recvmsg(sock *s) | |
1528 | { | |
1529 | struct iovec iov = {s->rbuf, s->rbsize}; | |
1530 | byte cmsg_buf[CMSG_RX_SPACE]; | |
1531 | sockaddr src; | |
1532 | ||
1533 | struct msghdr msg = { | |
05476c4d OZ |
1534 | .msg_name = &src.sa, |
1535 | .msg_namelen = sizeof(src), // XXXX ?? | |
48e5f32d OZ |
1536 | .msg_iov = &iov, |
1537 | .msg_iovlen = 1, | |
1538 | .msg_control = cmsg_buf, | |
1539 | .msg_controllen = sizeof(cmsg_buf), | |
1540 | .msg_flags = 0 | |
1541 | }; | |
1542 | ||
1543 | int rv = recvmsg(s->fd, &msg, 0); | |
1544 | if (rv < 0) | |
1545 | return rv; | |
1546 | ||
1547 | //ifdef IPV4 | |
1548 | // if (cf_type == SK_IP) | |
1549 | // rv = ipv4_skip_header(pbuf, rv); | |
1550 | //endif | |
1551 | ||
05476c4d OZ |
1552 | sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport); |
1553 | sk_process_cmsgs(s, &msg); | |
48e5f32d OZ |
1554 | |
1555 | if (msg.msg_flags & MSG_TRUNC) | |
1556 | s->flags |= SKF_TRUNCATED; | |
1557 | else | |
1558 | s->flags &= ~SKF_TRUNCATED; | |
1559 | ||
1560 | return rv; | |
1561 | } | |
1562 | ||
1563 | ||
353729f5 OZ |
1564 | static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; } |
1565 | ||
b5d9ee5c MM |
1566 | static int |
1567 | sk_maybe_write(sock *s) | |
1568 | { | |
1569 | int e; | |
1570 | ||
1571 | switch (s->type) | |
05476c4d OZ |
1572 | { |
1573 | case SK_TCP: | |
1574 | case SK_MAGIC: | |
1575 | case SK_UNIX: | |
1576 | while (s->ttx != s->tpos) | |
b5d9ee5c | 1577 | { |
05476c4d OZ |
1578 | e = write(s->fd, s->ttx, s->tpos - s->ttx); |
1579 | ||
1580 | if (e < 0) | |
1581 | { | |
1582 | if (errno != EINTR && errno != EAGAIN) | |
b5d9ee5c | 1583 | { |
05476c4d OZ |
1584 | reset_tx_buffer(s); |
1585 | /* EPIPE is just a connection close notification during TX */ | |
1586 | s->err_hook(s, (errno != EPIPE) ? errno : 0); | |
1587 | return -1; | |
b5d9ee5c | 1588 | } |
05476c4d OZ |
1589 | return 0; |
1590 | } | |
1591 | s->ttx += e; | |
1592 | } | |
1593 | reset_tx_buffer(s); | |
1594 | return 1; | |
1595 | ||
1596 | case SK_UDP: | |
1597 | case SK_IP: | |
1598 | { | |
1599 | if (s->tbuf == s->tpos) | |
b5d9ee5c | 1600 | return 1; |
05476c4d OZ |
1601 | |
1602 | e = sk_sendmsg(s); | |
1603 | ||
1604 | if (e < 0) | |
1605 | { | |
1606 | if (errno != EINTR && errno != EAGAIN) | |
1607 | { | |
1608 | reset_tx_buffer(s); | |
1609 | s->err_hook(s, errno); | |
1610 | return -1; | |
1611 | } | |
1612 | ||
1613 | if (!s->tx_hook) | |
1614 | reset_tx_buffer(s); | |
1615 | return 0; | |
b5d9ee5c | 1616 | } |
05476c4d OZ |
1617 | reset_tx_buffer(s); |
1618 | return 1; | |
b5d9ee5c | 1619 | } |
05476c4d OZ |
1620 | default: |
1621 | bug("sk_maybe_write: unknown socket type %d", s->type); | |
1622 | } | |
b5d9ee5c MM |
1623 | } |
1624 | ||
ea89da38 OZ |
1625 | int |
1626 | sk_rx_ready(sock *s) | |
1627 | { | |
1628 | fd_set rd, wr; | |
1629 | struct timeval timo; | |
1630 | int rv; | |
1631 | ||
1632 | FD_ZERO(&rd); | |
1633 | FD_ZERO(&wr); | |
1634 | FD_SET(s->fd, &rd); | |
1635 | ||
1636 | timo.tv_sec = 0; | |
1637 | timo.tv_usec = 0; | |
1638 | ||
1639 | redo: | |
1640 | rv = select(s->fd+1, &rd, &wr, NULL, &timo); | |
1641 | ||
1642 | if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) | |
1643 | goto redo; | |
1644 | ||
1645 | return rv; | |
1646 | } | |
1647 | ||
525fa2c1 MM |
1648 | /** |
1649 | * sk_send - send data to a socket | |
1650 | * @s: socket | |
1651 | * @len: number of bytes to send | |
1652 | * | |
1653 | * This function sends @len bytes of data prepared in the | |
1654 | * transmit buffer of the socket @s to the network connection. | |
1655 | * If the packet can be sent immediately, it does so and returns | |
1656 | * 1, else it queues the packet for later processing, returns 0 | |
1657 | * and calls the @tx_hook of the socket when the tranmission | |
1658 | * takes place. | |
1659 | */ | |
b5d9ee5c MM |
1660 | int |
1661 | sk_send(sock *s, unsigned len) | |
1662 | { | |
b5d9ee5c MM |
1663 | s->ttx = s->tbuf; |
1664 | s->tpos = s->tbuf + len; | |
1665 | return sk_maybe_write(s); | |
1666 | } | |
1667 | ||
525fa2c1 MM |
1668 | /** |
1669 | * sk_send_to - send data to a specific destination | |
1670 | * @s: socket | |
1671 | * @len: number of bytes to send | |
1672 | * @addr: IP address to send the packet to | |
1673 | * @port: port to send the packet to | |
1674 | * | |
2e9b2421 | 1675 | * This is a sk_send() replacement for connection-less packet sockets |
525fa2c1 | 1676 | * which allows destination of the packet to be chosen dynamically. |
48e5f32d | 1677 | * Raw IP sockets should use 0 for @port. |
525fa2c1 | 1678 | */ |
b5d9ee5c MM |
1679 | int |
1680 | sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) | |
1681 | { | |
353729f5 | 1682 | s->daddr = addr; |
48e5f32d OZ |
1683 | if (port) |
1684 | s->dport = port; | |
1685 | ||
b5d9ee5c MM |
1686 | s->ttx = s->tbuf; |
1687 | s->tpos = s->tbuf + len; | |
1688 | return sk_maybe_write(s); | |
1689 | } | |
1690 | ||
353729f5 OZ |
1691 | /* |
1692 | int | |
1693 | sk_send_full(sock *s, unsigned len, struct iface *ifa, | |
1694 | ip_addr saddr, ip_addr daddr, unsigned dport) | |
1695 | { | |
1696 | s->iface = ifa; | |
1697 | s->saddr = saddr; | |
1698 | s->daddr = daddr; | |
1699 | s->dport = dport; | |
1700 | s->ttx = s->tbuf; | |
1701 | s->tpos = s->tbuf + len; | |
1702 | return sk_maybe_write(s); | |
1703 | } | |
1704 | */ | |
1705 | ||
6a8d3f1c OZ |
1706 | /* sk_read() and sk_write() are called from BFD's event loop */ |
1707 | ||
1708 | int | |
b5d9ee5c MM |
1709 | sk_read(sock *s) |
1710 | { | |
1711 | switch (s->type) | |
05476c4d OZ |
1712 | { |
1713 | case SK_TCP_PASSIVE: | |
1714 | return sk_passive_connected(s, SK_TCP); | |
1715 | ||
1716 | case SK_UNIX_PASSIVE: | |
1717 | return sk_passive_connected(s, SK_UNIX); | |
1718 | ||
1719 | case SK_TCP: | |
1720 | case SK_UNIX: | |
b5d9ee5c | 1721 | { |
05476c4d OZ |
1722 | int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos); |
1723 | ||
1724 | if (c < 0) | |
b93abffa | 1725 | { |
05476c4d OZ |
1726 | if (errno != EINTR && errno != EAGAIN) |
1727 | s->err_hook(s, errno); | |
b5d9ee5c | 1728 | } |
05476c4d OZ |
1729 | else if (!c) |
1730 | s->err_hook(s, 0); | |
1731 | else | |
b5d9ee5c | 1732 | { |
05476c4d OZ |
1733 | s->rpos += c; |
1734 | if (s->rx_hook(s, s->rpos - s->rbuf)) | |
1735 | { | |
1736 | /* We need to be careful since the socket could have been deleted by the hook */ | |
1737 | if (current_sock == s) | |
1738 | s->rpos = s->rbuf; | |
1739 | } | |
1740 | return 1; | |
b5d9ee5c | 1741 | } |
05476c4d OZ |
1742 | return 0; |
1743 | } | |
353729f5 | 1744 | |
05476c4d OZ |
1745 | case SK_MAGIC: |
1746 | return s->rx_hook(s, 0); | |
b5d9ee5c | 1747 | |
05476c4d OZ |
1748 | default: |
1749 | { | |
1750 | int e = sk_recvmsg(s); | |
353729f5 | 1751 | |
05476c4d OZ |
1752 | if (e < 0) |
1753 | { | |
1754 | if (errno != EINTR && errno != EAGAIN) | |
1755 | s->err_hook(s, errno); | |
1756 | return 0; | |
b5d9ee5c | 1757 | } |
05476c4d OZ |
1758 | |
1759 | s->rpos = s->rbuf + e; | |
1760 | s->rx_hook(s, e); | |
1761 | return 1; | |
b5d9ee5c | 1762 | } |
05476c4d | 1763 | } |
b5d9ee5c MM |
1764 | } |
1765 | ||
6a8d3f1c | 1766 | int |
b5d9ee5c MM |
1767 | sk_write(sock *s) |
1768 | { | |
320f4173 | 1769 | switch (s->type) |
05476c4d OZ |
1770 | { |
1771 | case SK_TCP_ACTIVE: | |
320f4173 | 1772 | { |
05476c4d OZ |
1773 | sockaddr sa; |
1774 | sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); | |
1775 | ||
1776 | if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN) | |
1777 | sk_tcp_connected(s); | |
1778 | else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS) | |
1779 | s->err_hook(s, errno); | |
38a608c5 | 1780 | return 0; |
320f4173 | 1781 | } |
05476c4d OZ |
1782 | |
1783 | default: | |
1784 | if (s->ttx != s->tpos && sk_maybe_write(s) > 0) | |
1785 | { | |
1786 | if (s->tx_hook) | |
1787 | s->tx_hook(s); | |
1788 | return 1; | |
1789 | } | |
1790 | return 0; | |
1791 | } | |
b5d9ee5c MM |
1792 | } |
1793 | ||
1794 | void | |
1795 | sk_dump_all(void) | |
1796 | { | |
1797 | node *n; | |
1798 | sock *s; | |
1799 | ||
1800 | debug("Open sockets:\n"); | |
1801 | WALK_LIST(n, sock_list) | |
05476c4d OZ |
1802 | { |
1803 | s = SKIP_BACK(sock, n, n); | |
1804 | debug("%p ", s); | |
1805 | sk_dump(&s->r); | |
1806 | } | |
b5d9ee5c MM |
1807 | debug("\n"); |
1808 | } | |
1809 | ||
b5d9ee5c MM |
1810 | |
1811 | /* | |
1812 | * Main I/O Loop | |
1813 | */ | |
1814 | ||
4c9dd1e4 MM |
1815 | volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */ |
1816 | volatile int async_dump_flag; | |
1817 | ||
b5d9ee5c MM |
1818 | void |
1819 | io_init(void) | |
1820 | { | |
1821 | init_list(&near_timers); | |
1822 | init_list(&far_timers); | |
1823 | init_list(&sock_list); | |
e8f73195 | 1824 | init_list(&global_event_list); |
7e5f5ffd | 1825 | krt_io_init(); |
fd91ae33 OZ |
1826 | init_times(); |
1827 | update_times(); | |
a92cf57d | 1828 | boot_time = now; |
fd91ae33 | 1829 | srandom((int) now_real); |
b5d9ee5c MM |
1830 | } |
1831 | ||
ea89da38 OZ |
1832 | static int short_loops = 0; |
1833 | #define SHORT_LOOP_MAX 10 | |
1834 | ||
b5d9ee5c MM |
1835 | void |
1836 | io_loop(void) | |
1837 | { | |
1838 | fd_set rd, wr; | |
1839 | struct timeval timo; | |
1840 | time_t tout; | |
30770df2 | 1841 | int hi, events; |
b5d9ee5c | 1842 | sock *s; |
38a608c5 | 1843 | node *n; |
b5d9ee5c | 1844 | |
38a608c5 | 1845 | sock_recalc_fdsets_p = 1; |
b5d9ee5c MM |
1846 | for(;;) |
1847 | { | |
30770df2 | 1848 | events = ev_run_list(&global_event_list); |
fd91ae33 | 1849 | update_times(); |
b5d9ee5c MM |
1850 | tout = tm_first_shot(); |
1851 | if (tout <= now) | |
1852 | { | |
1853 | tm_shot(); | |
1854 | continue; | |
1855 | } | |
a92cf57d | 1856 | timo.tv_sec = events ? 0 : MIN(tout - now, 3); |
30770df2 | 1857 | timo.tv_usec = 0; |
b5d9ee5c | 1858 | |
38a608c5 MM |
1859 | if (sock_recalc_fdsets_p) |
1860 | { | |
1861 | sock_recalc_fdsets_p = 0; | |
1862 | FD_ZERO(&rd); | |
1863 | FD_ZERO(&wr); | |
1864 | } | |
1865 | ||
b5d9ee5c MM |
1866 | hi = 0; |
1867 | WALK_LIST(n, sock_list) | |
1868 | { | |
1869 | s = SKIP_BACK(sock, n, n); | |
1870 | if (s->rx_hook) | |
1871 | { | |
1872 | FD_SET(s->fd, &rd); | |
1873 | if (s->fd > hi) | |
1874 | hi = s->fd; | |
1875 | } | |
38a608c5 MM |
1876 | else |
1877 | FD_CLR(s->fd, &rd); | |
b5d9ee5c MM |
1878 | if (s->tx_hook && s->ttx != s->tpos) |
1879 | { | |
1880 | FD_SET(s->fd, &wr); | |
1881 | if (s->fd > hi) | |
1882 | hi = s->fd; | |
1883 | } | |
38a608c5 MM |
1884 | else |
1885 | FD_CLR(s->fd, &wr); | |
b5d9ee5c MM |
1886 | } |
1887 | ||
4c9dd1e4 MM |
1888 | /* |
1889 | * Yes, this is racy. But even if the signal comes before this test | |
1890 | * and entering select(), it gets caught on the next timer tick. | |
1891 | */ | |
1892 | ||
1893 | if (async_config_flag) | |
1894 | { | |
1895 | async_config(); | |
1896 | async_config_flag = 0; | |
f4aabcee | 1897 | continue; |
4c9dd1e4 MM |
1898 | } |
1899 | if (async_dump_flag) | |
1900 | { | |
1901 | async_dump(); | |
1902 | async_dump_flag = 0; | |
f4aabcee MM |
1903 | continue; |
1904 | } | |
1905 | if (async_shutdown_flag) | |
1906 | { | |
1907 | async_shutdown(); | |
1908 | async_shutdown_flag = 0; | |
1909 | continue; | |
4c9dd1e4 MM |
1910 | } |
1911 | ||
1912 | /* And finally enter select() to find active sockets */ | |
b5d9ee5c | 1913 | hi = select(hi+1, &rd, &wr, NULL, &timo); |
ea89da38 | 1914 | |
b5d9ee5c MM |
1915 | if (hi < 0) |
1916 | { | |
1917 | if (errno == EINTR || errno == EAGAIN) | |
1918 | continue; | |
1919 | die("select: %m"); | |
1920 | } | |
1921 | if (hi) | |
1922 | { | |
ea89da38 OZ |
1923 | /* guaranteed to be non-empty */ |
1924 | current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); | |
1925 | ||
38a608c5 | 1926 | while (current_sock) |
b5d9ee5c | 1927 | { |
38a608c5 MM |
1928 | sock *s = current_sock; |
1929 | int e; | |
ea89da38 OZ |
1930 | int steps; |
1931 | ||
1932 | steps = MAX_STEPS; | |
1933 | if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) | |
38a608c5 MM |
1934 | do |
1935 | { | |
4323099d | 1936 | steps--; |
38a608c5 MM |
1937 | e = sk_read(s); |
1938 | if (s != current_sock) | |
1939 | goto next; | |
1940 | } | |
4323099d OZ |
1941 | while (e && s->rx_hook && steps); |
1942 | ||
1943 | steps = MAX_STEPS; | |
38a608c5 MM |
1944 | if (FD_ISSET(s->fd, &wr)) |
1945 | do | |
1946 | { | |
4323099d | 1947 | steps--; |
38a608c5 MM |
1948 | e = sk_write(s); |
1949 | if (s != current_sock) | |
1950 | goto next; | |
1951 | } | |
4323099d | 1952 | while (e && steps); |
38a608c5 MM |
1953 | current_sock = sk_next(s); |
1954 | next: ; | |
b5d9ee5c | 1955 | } |
ea89da38 OZ |
1956 | |
1957 | short_loops++; | |
1958 | if (events && (short_loops < SHORT_LOOP_MAX)) | |
1959 | continue; | |
1960 | short_loops = 0; | |
1961 | ||
1962 | int count = 0; | |
1963 | current_sock = stored_sock; | |
1964 | if (current_sock == NULL) | |
1965 | current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); | |
1966 | ||
1967 | while (current_sock && count < MAX_RX_STEPS) | |
1968 | { | |
1969 | sock *s = current_sock; | |
0479b443 | 1970 | int e UNUSED; |
ea89da38 OZ |
1971 | |
1972 | if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) | |
1973 | { | |
1974 | count++; | |
1975 | e = sk_read(s); | |
1976 | if (s != current_sock) | |
1977 | goto next2; | |
1978 | } | |
1979 | current_sock = sk_next(s); | |
1980 | next2: ; | |
1981 | } | |
1982 | ||
1983 | stored_sock = current_sock; | |
b5d9ee5c MM |
1984 | } |
1985 | } | |
1986 | } | |
41c8976e OF |
1987 | |
1988 | void | |
1989 | test_old_bird(char *path) | |
1990 | { | |
1991 | int fd; | |
1992 | struct sockaddr_un sa; | |
1993 | ||
1994 | fd = socket(AF_UNIX, SOCK_STREAM, 0); | |
41c8976e OF |
1995 | if (fd < 0) |
1996 | die("Cannot create socket: %m"); | |
97e46d28 OZ |
1997 | if (strlen(path) >= sizeof(sa.sun_path)) |
1998 | die("Socket path too long"); | |
41c8976e OF |
1999 | bzero(&sa, sizeof(sa)); |
2000 | sa.sun_family = AF_UNIX; | |
2001 | strcpy(sa.sun_path, path); | |
2002 | if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0) | |
2003 | die("I found another BIRD running."); | |
2004 | close(fd); | |
2005 | } | |
2006 | ||
2007 |