]> git.ipfire.org Git - thirdparty/bird.git/blob - sysdep/unix/krt.c
Merge branch 'master' into mq-filter-stack
[thirdparty/bird.git] / sysdep / unix / krt.c
1 /*
2 * BIRD -- UNIX Kernel Synchronization
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Kernel synchronization
11 *
12 * This system dependent module implements the Kernel and Device protocol,
13 * that is synchronization of interface lists and routing tables with the
14 * OS kernel.
15 *
16 * The whole kernel synchronization is a bit messy and touches some internals
17 * of the routing table engine, because routing table maintenance is a typical
18 * example of the proverbial compatibility between different Unices and we want
19 * to keep the overhead of our KRT business as low as possible and avoid maintaining
20 * a local routing table copy.
21 *
22 * The kernel syncer can work in three different modes (according to system config header):
23 * Either with a single routing table and single KRT protocol [traditional UNIX]
24 * or with many routing tables and separate KRT protocols for all of them
25 * or with many routing tables, but every scan including all tables, so we start
26 * separate KRT protocols which cooperate with each other [Linux].
27 * In this case, we keep only a single scan timer.
28 *
29 * We use FIB node flags in the routing table to keep track of route
30 * synchronization status. We also attach temporary &rte's to the routing table,
31 * but it cannot do any harm to the rest of BIRD since table synchronization is
32 * an atomic process.
33 *
34 * When starting up, we cheat by looking if there is another
35 * KRT instance to be initialized later and performing table scan
36 * only once for all the instances.
37 *
38 * The code uses OS-dependent parts for kernel updates and scans. These parts are
39 * in more specific sysdep directories (e.g. sysdep/linux) in functions krt_sys_*
40 * and kif_sys_* (and some others like krt_replace_rte()) and krt-sys.h header file.
41 * This is also used for platform specific protocol options and route attributes.
42 *
43 * There was also an old code that used traditional UNIX ioctls for these tasks.
44 * It was unmaintained and later removed. For reference, see sysdep/krt-* files
45 * in commit 396dfa9042305f62da1f56589c4b98fac57fc2f6
46 */
47
48 /*
49 * If you are brave enough, continue now. You cannot say you haven't been warned.
50 */
51
52 #undef LOCAL_DEBUG
53
54 #include "nest/bird.h"
55 #include "nest/iface.h"
56 #include "nest/route.h"
57 #include "nest/protocol.h"
58 #include "filter/filter.h"
59 #include "conf/conf.h"
60 #include "lib/string.h"
61 #include "lib/timer.h"
62
63 #include "unix.h"
64 #include "krt.h"
65
66 /*
67 * Global resources
68 */
69
70 pool *krt_pool;
71 static linpool *krt_filter_lp;
72 static list krt_proto_list;
73
74 void
75 krt_io_init(void)
76 {
77 krt_pool = rp_new(&root_pool, "Kernel Syncer");
78 krt_filter_lp = lp_new_default(krt_pool);
79 init_list(&krt_proto_list);
80 krt_sys_io_init();
81 }
82
83 /*
84 * Interfaces
85 */
86
87 struct kif_proto *kif_proto;
88 static struct kif_config *kif_cf;
89 static timer *kif_scan_timer;
90 static btime kif_last_shot;
91
92 static struct kif_iface_config kif_default_iface = {};
93
94 struct kif_iface_config *
95 kif_get_iface_config(struct iface *iface)
96 {
97 struct kif_config *cf = (void *) (kif_proto->p.cf);
98 struct kif_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL);
99 return ic ?: &kif_default_iface;
100 }
101
102 static void
103 kif_scan(timer *t)
104 {
105 struct kif_proto *p = t->data;
106
107 KRT_TRACE(p, D_EVENTS, "Scanning interfaces");
108 kif_last_shot = current_time();
109 kif_do_scan(p);
110 }
111
112 static void
113 kif_force_scan(void)
114 {
115 if (kif_proto && ((kif_last_shot + 2 S) < current_time()))
116 {
117 kif_scan(kif_scan_timer);
118 tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time);
119 }
120 }
121
122 void
123 kif_request_scan(void)
124 {
125 if (kif_proto && (kif_scan_timer->expires > (current_time() + 1 S)))
126 tm_start(kif_scan_timer, 1 S);
127 }
128
129 static struct proto *
130 kif_init(struct proto_config *c)
131 {
132 struct kif_proto *p = proto_new(c);
133
134 kif_sys_init(p);
135 return &p->p;
136 }
137
138 static int
139 kif_start(struct proto *P)
140 {
141 struct kif_proto *p = (struct kif_proto *) P;
142
143 kif_proto = p;
144 kif_sys_start(p);
145
146 /* Start periodic interface scanning */
147 kif_scan_timer = tm_new_init(P->pool, kif_scan, p, KIF_CF->scan_time, 0);
148 kif_scan(kif_scan_timer);
149 tm_start(kif_scan_timer, KIF_CF->scan_time);
150
151 return PS_UP;
152 }
153
154 static int
155 kif_shutdown(struct proto *P)
156 {
157 struct kif_proto *p = (struct kif_proto *) P;
158
159 tm_stop(kif_scan_timer);
160 kif_sys_shutdown(p);
161 kif_proto = NULL;
162
163 return PS_DOWN;
164 }
165
166 static int
167 kif_reconfigure(struct proto *p, struct proto_config *new)
168 {
169 struct kif_config *o = (struct kif_config *) p->cf;
170 struct kif_config *n = (struct kif_config *) new;
171
172 if (!kif_sys_reconfigure((struct kif_proto *) p, n, o))
173 return 0;
174
175 if (o->scan_time != n->scan_time)
176 {
177 tm_stop(kif_scan_timer);
178 kif_scan_timer->recurrent = n->scan_time;
179 kif_scan(kif_scan_timer);
180 tm_start(kif_scan_timer, n->scan_time);
181 }
182
183 if (!EMPTY_LIST(o->iface_list) || !EMPTY_LIST(n->iface_list))
184 {
185 /* This is hack, we have to update a configuration
186 * to the new value just now, because it is used
187 * for recalculation of preferred addresses.
188 */
189 p->cf = new;
190
191 if_recalc_all_preferred_addresses();
192 }
193
194 return 1;
195 }
196
197
198 static void
199 kif_preconfig(struct protocol *P UNUSED, struct config *c)
200 {
201 kif_cf = NULL;
202 kif_sys_preconfig(c);
203 }
204
205 struct proto_config *
206 kif_init_config(int class)
207 {
208 if (kif_cf)
209 cf_error("Kernel device protocol already defined");
210
211 kif_cf = (struct kif_config *) proto_config_new(&proto_unix_iface, class);
212 kif_cf->scan_time = 60 S;
213 init_list(&kif_cf->iface_list);
214
215 kif_sys_init_config(kif_cf);
216 return (struct proto_config *) kif_cf;
217 }
218
219 static void
220 kif_copy_config(struct proto_config *dest, struct proto_config *src)
221 {
222 struct kif_config *d = (struct kif_config *) dest;
223 struct kif_config *s = (struct kif_config *) src;
224
225 /* Copy interface config list */
226 cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct kif_iface_config));
227
228 /* Fix sysdep parts */
229 kif_sys_copy_config(d, s);
230 }
231
232 struct protocol proto_unix_iface = {
233 .name = "Device",
234 .template = "device%d",
235 .class = PROTOCOL_DEVICE,
236 .proto_size = sizeof(struct kif_proto),
237 .config_size = sizeof(struct kif_config),
238 .preconfig = kif_preconfig,
239 .init = kif_init,
240 .start = kif_start,
241 .shutdown = kif_shutdown,
242 .reconfigure = kif_reconfigure,
243 .copy_config = kif_copy_config
244 };
245
246 /*
247 * Tracing of routes
248 */
249
250 static inline void
251 krt_trace_in(struct krt_proto *p, rte *e, char *msg)
252 {
253 if (p->p.debug & D_PACKETS)
254 log(L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg);
255 }
256
257 static inline void
258 krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg)
259 {
260 if (p->p.debug & D_PACKETS)
261 log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg);
262 }
263
264 /*
265 * Inherited Routes
266 */
267
268 #ifdef KRT_ALLOW_LEARN
269
270 static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS;
271
272 /*
273 * krt_same_key() specifies what (aside from the net) is the key in
274 * kernel routing tables. It should be OS-dependent, this is for
275 * Linux. It is important for asynchronous alien updates, because a
276 * positive update is implicitly a negative one for any old route with
277 * the same key.
278 */
279
280 static inline int
281 krt_same_key(rte *a, rte *b)
282 {
283 return a->u.krt.metric == b->u.krt.metric;
284 }
285
286 static inline int
287 krt_uptodate(rte *a, rte *b)
288 {
289 if (a->attrs != b->attrs)
290 return 0;
291
292 if (a->u.krt.proto != b->u.krt.proto)
293 return 0;
294
295 return 1;
296 }
297
298 static void
299 krt_learn_announce_update(struct krt_proto *p, rte *e)
300 {
301 net *n = e->net;
302 rta *aa = rta_clone(e->attrs);
303 rte *ee = rte_get_temp(aa);
304 ee->pflags = EA_ID_FLAG(EA_KRT_SOURCE) | EA_ID_FLAG(EA_KRT_METRIC);
305 ee->u.krt = e->u.krt;
306 rte_update(&p->p, n->n.addr, ee);
307 }
308
309 static void
310 krt_learn_announce_delete(struct krt_proto *p, net *n)
311 {
312 rte_update(&p->p, n->n.addr, NULL);
313 }
314
315 /* Called when alien route is discovered during scan */
316 static void
317 krt_learn_scan(struct krt_proto *p, rte *e)
318 {
319 net *n0 = e->net;
320 net *n = net_get(&p->krt_table, n0->n.addr);
321 rte *m, **mm;
322
323 e->attrs = rta_lookup(e->attrs);
324
325 for(mm=&n->routes; m = *mm; mm=&m->next)
326 if (krt_same_key(m, e))
327 break;
328 if (m)
329 {
330 if (krt_uptodate(m, e))
331 {
332 krt_trace_in_rl(&rl_alien, p, e, "[alien] seen");
333 rte_free(e);
334 m->u.krt.seen = 1;
335 }
336 else
337 {
338 krt_trace_in(p, e, "[alien] updated");
339 *mm = m->next;
340 rte_free(m);
341 m = NULL;
342 }
343 }
344 else
345 krt_trace_in(p, e, "[alien] created");
346 if (!m)
347 {
348 e->next = n->routes;
349 n->routes = e;
350 e->u.krt.seen = 1;
351 }
352 }
353
354 static void
355 krt_learn_prune(struct krt_proto *p)
356 {
357 struct fib *fib = &p->krt_table.fib;
358 struct fib_iterator fit;
359
360 KRT_TRACE(p, D_EVENTS, "Pruning inherited routes");
361
362 FIB_ITERATE_INIT(&fit, fib);
363 again:
364 FIB_ITERATE_START(fib, &fit, net, n)
365 {
366 rte *e, **ee, *best, **pbest, *old_best;
367
368 /*
369 * Note that old_best may be NULL even if there was an old best route in
370 * the previous step, because it might be replaced in krt_learn_scan().
371 * But in that case there is a new valid best route.
372 */
373
374 old_best = NULL;
375 best = NULL;
376 pbest = NULL;
377 ee = &n->routes;
378 while (e = *ee)
379 {
380 if (e->u.krt.best)
381 old_best = e;
382
383 if (!e->u.krt.seen)
384 {
385 *ee = e->next;
386 rte_free(e);
387 continue;
388 }
389
390 if (!best || best->u.krt.metric > e->u.krt.metric)
391 {
392 best = e;
393 pbest = ee;
394 }
395
396 e->u.krt.seen = 0;
397 e->u.krt.best = 0;
398 ee = &e->next;
399 }
400 if (!n->routes)
401 {
402 DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen);
403 if (old_best)
404 krt_learn_announce_delete(p, n);
405
406 FIB_ITERATE_PUT(&fit);
407 fib_delete(fib, n);
408 goto again;
409 }
410
411 best->u.krt.best = 1;
412 *pbest = best->next;
413 best->next = n->routes;
414 n->routes = best;
415
416 if ((best != old_best) || p->reload)
417 {
418 DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
419 krt_learn_announce_update(p, best);
420 }
421 else
422 DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
423 }
424 FIB_ITERATE_END;
425
426 p->reload = 0;
427 }
428
429 static void
430 krt_learn_async(struct krt_proto *p, rte *e, int new)
431 {
432 net *n0 = e->net;
433 net *n = net_get(&p->krt_table, n0->n.addr);
434 rte *g, **gg, *best, **bestp, *old_best;
435
436 e->attrs = rta_lookup(e->attrs);
437
438 old_best = n->routes;
439 for(gg=&n->routes; g = *gg; gg = &g->next)
440 if (krt_same_key(g, e))
441 break;
442 if (new)
443 {
444 if (g)
445 {
446 if (krt_uptodate(g, e))
447 {
448 krt_trace_in(p, e, "[alien async] same");
449 rte_free(e);
450 return;
451 }
452 krt_trace_in(p, e, "[alien async] updated");
453 *gg = g->next;
454 rte_free(g);
455 }
456 else
457 krt_trace_in(p, e, "[alien async] created");
458
459 e->next = n->routes;
460 n->routes = e;
461 }
462 else if (!g)
463 {
464 krt_trace_in(p, e, "[alien async] delete failed");
465 rte_free(e);
466 return;
467 }
468 else
469 {
470 krt_trace_in(p, e, "[alien async] removed");
471 *gg = g->next;
472 rte_free(e);
473 rte_free(g);
474 }
475 best = n->routes;
476 bestp = &n->routes;
477 for(gg=&n->routes; g=*gg; gg=&g->next)
478 {
479 if (best->u.krt.metric > g->u.krt.metric)
480 {
481 best = g;
482 bestp = gg;
483 }
484
485 g->u.krt.best = 0;
486 }
487
488 if (best)
489 {
490 best->u.krt.best = 1;
491 *bestp = best->next;
492 best->next = n->routes;
493 n->routes = best;
494 }
495
496 if (best != old_best)
497 {
498 DBG("krt_learn_async: distributing change\n");
499 if (best)
500 krt_learn_announce_update(p, best);
501 else
502 krt_learn_announce_delete(p, n);
503 }
504 }
505
506 static void
507 krt_learn_init(struct krt_proto *p)
508 {
509 if (KRT_CF->learn)
510 {
511 struct rtable_config *cf = mb_allocz(p->p.pool, sizeof(struct rtable_config));
512 cf->name = "Inherited";
513 cf->addr_type = p->p.net_type;
514
515 rt_setup(p->p.pool, &p->krt_table, cf);
516 }
517 }
518
519 static void
520 krt_dump(struct proto *P)
521 {
522 struct krt_proto *p = (struct krt_proto *) P;
523
524 if (!KRT_CF->learn)
525 return;
526 debug("KRT: Table of inheritable routes\n");
527 rt_dump(&p->krt_table);
528 }
529
530 static void
531 krt_dump_attrs(rte *e)
532 {
533 debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto);
534 }
535
536 #endif
537
538 /*
539 * Routes
540 */
541
542 static void
543 krt_flush_routes(struct krt_proto *p)
544 {
545 struct rtable *t = p->p.main_channel->table;
546
547 KRT_TRACE(p, D_EVENTS, "Flushing kernel routes");
548 FIB_WALK(&t->fib, net, n)
549 {
550 rte *e = n->routes;
551 if (rte_is_valid(e) && (n->n.flags & KRF_INSTALLED))
552 {
553 /* FIXME: this does not work if gw is changed in export filter */
554 krt_replace_rte(p, e->net, NULL, e);
555 n->n.flags &= ~KRF_INSTALLED;
556 }
557 }
558 FIB_WALK_END;
559 }
560
561 static struct rte *
562 krt_export_net(struct krt_proto *p, net *net, rte **rt_free)
563 {
564 struct channel *c = p->p.main_channel;
565 const struct filter *filter = c->out_filter;
566 rte *rt;
567
568 if (c->ra_mode == RA_MERGED)
569 return rt_export_merged(c, net, rt_free, krt_filter_lp, 1);
570
571 rt = net->routes;
572 *rt_free = NULL;
573
574 if (!rte_is_valid(rt))
575 return NULL;
576
577 if (filter == FILTER_REJECT)
578 return NULL;
579
580 rte_make_tmp_attrs(&rt, krt_filter_lp, NULL);
581
582 /* We could run krt_preexport() here, but it is already handled by KRF_INSTALLED */
583
584 if (filter == FILTER_ACCEPT)
585 goto accept;
586
587 if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT)
588 goto reject;
589
590
591 accept:
592 if (rt != net->routes)
593 *rt_free = rt;
594 return rt;
595
596 reject:
597 if (rt != net->routes)
598 rte_free(rt);
599 return NULL;
600 }
601
602 static int
603 krt_same_dest(rte *k, rte *e)
604 {
605 rta *ka = k->attrs, *ea = e->attrs;
606
607 if (ka->dest != ea->dest)
608 return 0;
609
610 if (ka->dest == RTD_UNICAST)
611 return nexthop_same(&(ka->nh), &(ea->nh));
612
613 return 1;
614 }
615
616 /*
617 * This gets called back when the low-level scanning code discovers a route.
618 * We expect that the route is a temporary rte and its attributes are uncached.
619 */
620
621 void
622 krt_got_route(struct krt_proto *p, rte *e)
623 {
624 net *net = e->net;
625 int verdict;
626
627 #ifdef KRT_ALLOW_LEARN
628 switch (e->u.krt.src)
629 {
630 case KRT_SRC_KERNEL:
631 verdict = KRF_IGNORE;
632 goto sentenced;
633
634 case KRT_SRC_REDIRECT:
635 verdict = KRF_DELETE;
636 goto sentenced;
637
638 case KRT_SRC_ALIEN:
639 if (KRT_CF->learn)
640 krt_learn_scan(p, e);
641 else
642 {
643 krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored");
644 rte_free(e);
645 }
646 return;
647 }
648 #endif
649 /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
650
651 if (net->n.flags & KRF_VERDICT_MASK)
652 {
653 /* Route to this destination was already seen. Strange, but it happens... */
654 krt_trace_in(p, e, "already seen");
655 rte_free(e);
656 return;
657 }
658
659 if (!p->ready)
660 {
661 /* We wait for the initial feed to have correct KRF_INSTALLED flag */
662 verdict = KRF_IGNORE;
663 goto sentenced;
664 }
665
666 if (net->n.flags & KRF_INSTALLED)
667 {
668 rte *new, *rt_free;
669
670 new = krt_export_net(p, net, &rt_free);
671
672 /* TODO: There also may be changes in route eattrs, we ignore that for now. */
673
674 if (!new)
675 verdict = KRF_DELETE;
676 else if ((net->n.flags & KRF_SYNC_ERROR) || !krt_same_dest(e, new))
677 verdict = KRF_UPDATE;
678 else
679 verdict = KRF_SEEN;
680
681 if (rt_free)
682 rte_free(rt_free);
683
684 lp_flush(krt_filter_lp);
685 }
686 else
687 verdict = KRF_DELETE;
688
689 sentenced:
690 krt_trace_in(p, e, ((char *[]) { "?", "seen", "will be updated", "will be removed", "ignored" }) [verdict]);
691 net->n.flags = (net->n.flags & ~KRF_VERDICT_MASK) | verdict;
692 if (verdict == KRF_UPDATE || verdict == KRF_DELETE)
693 {
694 /* Get a cached copy of attributes and temporarily link the route */
695 rta *a = e->attrs;
696 a->source = RTS_DUMMY;
697 e->attrs = rta_lookup(a);
698 e->next = net->routes;
699 net->routes = e;
700 }
701 else
702 rte_free(e);
703 }
704
705 static void
706 krt_prune(struct krt_proto *p)
707 {
708 struct rtable *t = p->p.main_channel->table;
709
710 KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name);
711 FIB_WALK(&t->fib, net, n)
712 {
713 int verdict = n->n.flags & KRF_VERDICT_MASK;
714 rte *new, *old, *rt_free = NULL;
715
716 if (verdict == KRF_UPDATE || verdict == KRF_DELETE)
717 {
718 /* Get a dummy route from krt_got_route() */
719 old = n->routes;
720 n->routes = old->next;
721 }
722 else
723 old = NULL;
724
725 if (verdict == KRF_CREATE || verdict == KRF_UPDATE)
726 {
727 /* We have to run export filter to get proper 'new' route */
728 new = krt_export_net(p, n, &rt_free);
729
730 if (!new)
731 verdict = (verdict == KRF_CREATE) ? KRF_IGNORE : KRF_DELETE;
732 }
733 else
734 new = NULL;
735
736 switch (verdict)
737 {
738 case KRF_CREATE:
739 if (new && (n->n.flags & KRF_INSTALLED))
740 {
741 krt_trace_in(p, new, "reinstalling");
742 krt_replace_rte(p, n, new, NULL);
743 }
744 break;
745 case KRF_SEEN:
746 case KRF_IGNORE:
747 /* Nothing happens */
748 break;
749 case KRF_UPDATE:
750 krt_trace_in(p, new, "updating");
751 krt_replace_rte(p, n, new, old);
752 break;
753 case KRF_DELETE:
754 krt_trace_in(p, old, "deleting");
755 krt_replace_rte(p, n, NULL, old);
756 break;
757 default:
758 bug("krt_prune: invalid route status");
759 }
760
761 if (old)
762 rte_free(old);
763 if (rt_free)
764 rte_free(rt_free);
765 lp_flush(krt_filter_lp);
766 n->n.flags &= ~KRF_VERDICT_MASK;
767 }
768 FIB_WALK_END;
769
770 #ifdef KRT_ALLOW_LEARN
771 if (KRT_CF->learn)
772 krt_learn_prune(p);
773 #endif
774
775 if (p->ready)
776 p->initialized = 1;
777 }
778
779 void
780 krt_got_route_async(struct krt_proto *p, rte *e, int new)
781 {
782 net *net = e->net;
783
784 switch (e->u.krt.src)
785 {
786 case KRT_SRC_BIRD:
787 /* Should be filtered by the back end */
788 bug("BIRD originated routes should not get here.");
789
790 case KRT_SRC_REDIRECT:
791 if (new)
792 {
793 krt_trace_in(p, e, "[redirect] deleting");
794 krt_replace_rte(p, net, NULL, e);
795 }
796 /* If !new, it is probably echo of our deletion */
797 break;
798
799 #ifdef KRT_ALLOW_LEARN
800 case KRT_SRC_ALIEN:
801 if (KRT_CF->learn)
802 {
803 krt_learn_async(p, e, new);
804 return;
805 }
806 #endif
807 }
808 rte_free(e);
809 }
810
811 /*
812 * Periodic scanning
813 */
814
815
816 #ifdef CONFIG_ALL_TABLES_AT_ONCE
817
818 static timer *krt_scan_timer;
819 static int krt_scan_count;
820
821 static void
822 krt_scan(timer *t UNUSED)
823 {
824 struct krt_proto *p;
825
826 kif_force_scan();
827
828 /* We need some node to decide whether to print the debug messages or not */
829 p = SKIP_BACK(struct krt_proto, krt_node, HEAD(krt_proto_list));
830 KRT_TRACE(p, D_EVENTS, "Scanning routing table");
831
832 krt_do_scan(NULL);
833
834 void *q;
835 WALK_LIST(q, krt_proto_list)
836 {
837 p = SKIP_BACK(struct krt_proto, krt_node, q);
838 krt_prune(p);
839 }
840 }
841
842 static void
843 krt_scan_timer_start(struct krt_proto *p)
844 {
845 if (!krt_scan_count)
846 krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0);
847
848 krt_scan_count++;
849
850 tm_start(krt_scan_timer, 1 S);
851 }
852
853 static void
854 krt_scan_timer_stop(struct krt_proto *p UNUSED)
855 {
856 krt_scan_count--;
857
858 if (!krt_scan_count)
859 {
860 rfree(krt_scan_timer);
861 krt_scan_timer = NULL;
862 }
863 }
864
865 static void
866 krt_scan_timer_kick(struct krt_proto *p UNUSED)
867 {
868 tm_start(krt_scan_timer, 0);
869 }
870
871 #else
872
873 static void
874 krt_scan(timer *t)
875 {
876 struct krt_proto *p = t->data;
877
878 kif_force_scan();
879
880 KRT_TRACE(p, D_EVENTS, "Scanning routing table");
881 krt_do_scan(p);
882 krt_prune(p);
883 }
884
885 static void
886 krt_scan_timer_start(struct krt_proto *p)
887 {
888 p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
889 tm_start(p->scan_timer, 1 S);
890 }
891
892 static void
893 krt_scan_timer_stop(struct krt_proto *p)
894 {
895 tm_stop(p->scan_timer);
896 }
897
898 static void
899 krt_scan_timer_kick(struct krt_proto *p)
900 {
901 tm_start(p->scan_timer, 0);
902 }
903
904 #endif
905
906
907
908
909 /*
910 * Updates
911 */
912
913 static void
914 krt_make_tmp_attrs(struct rte *rt, struct linpool *pool)
915 {
916 rte_init_tmp_attrs(rt, pool, 2);
917 rte_make_tmp_attr(rt, EA_KRT_SOURCE, EAF_TYPE_INT, rt->u.krt.proto);
918 rte_make_tmp_attr(rt, EA_KRT_METRIC, EAF_TYPE_INT, rt->u.krt.metric);
919 }
920
921 static void
922 krt_store_tmp_attrs(struct rte *rt, struct linpool *pool)
923 {
924 rte_init_tmp_attrs(rt, pool, 2);
925 rt->u.krt.proto = rte_store_tmp_attr(rt, EA_KRT_SOURCE);
926 rt->u.krt.metric = rte_store_tmp_attr(rt, EA_KRT_METRIC);
927 }
928
929 static int
930 krt_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED)
931 {
932 // struct krt_proto *p = (struct krt_proto *) P;
933 rte *e = *new;
934
935 if (e->attrs->src->proto == P)
936 {
937 #ifdef CONFIG_SINGLE_ROUTE
938 /*
939 * Implicit withdraw - when the imported kernel route becomes the best one,
940 * we know that the previous one exported to the kernel was already removed,
941 * but if we processed the update as usual, we would send withdraw to the
942 * kernel, which would remove the new imported route instead.
943 *
944 * We will remove KRT_INSTALLED flag, which stops such withdraw to be
945 * processed in krt_rt_notify() and krt_replace_rte().
946 */
947 if (e == e->net->routes)
948 e->net->n.flags &= ~KRF_INSTALLED;
949 #endif
950 return -1;
951 }
952
953 if (!krt_capable(e))
954 return -1;
955
956 return 0;
957 }
958
959 static void
960 krt_rt_notify(struct proto *P, struct channel *ch UNUSED, net *net,
961 rte *new, rte *old)
962 {
963 struct krt_proto *p = (struct krt_proto *) P;
964
965 if (config->shutdown)
966 return;
967 if (!(net->n.flags & KRF_INSTALLED))
968 old = NULL;
969 if (new)
970 net->n.flags |= KRF_INSTALLED;
971 else
972 net->n.flags &= ~KRF_INSTALLED;
973 if (p->initialized) /* Before first scan we don't touch the routes */
974 krt_replace_rte(p, net, new, old);
975 }
976
977 static void
978 krt_if_notify(struct proto *P, uint flags, struct iface *iface UNUSED)
979 {
980 struct krt_proto *p = (struct krt_proto *) P;
981
982 /*
983 * When interface went down, we should remove routes to it. In the ideal world,
984 * OS kernel would send us route removal notifications in such cases, but we
985 * cannot rely on it as it is often not true. E.g. Linux kernel removes related
986 * routes when an interface went down, but it does not notify userspace about
987 * that. To be sure, we just schedule a scan to ensure synchronization.
988 */
989
990 if ((flags & IF_CHANGE_DOWN) && KRT_CF->learn)
991 krt_scan_timer_kick(p);
992 }
993
994 static void
995 krt_reload_routes(struct channel *C)
996 {
997 struct krt_proto *p = (void *) C->proto;
998
999 /* Although we keep learned routes in krt_table, we rather schedule a scan */
1000
1001 if (KRT_CF->learn)
1002 {
1003 p->reload = 1;
1004 krt_scan_timer_kick(p);
1005 }
1006 }
1007
1008 static void
1009 krt_feed_end(struct channel *C)
1010 {
1011 struct krt_proto *p = (void *) C->proto;
1012
1013 p->ready = 1;
1014 krt_scan_timer_kick(p);
1015 }
1016
1017
1018 static int
1019 krt_rte_same(rte *a, rte *b)
1020 {
1021 /* src is always KRT_SRC_ALIEN and type is irrelevant */
1022 return (a->u.krt.proto == b->u.krt.proto) && (a->u.krt.metric == b->u.krt.metric);
1023 }
1024
1025
1026 /*
1027 * Protocol glue
1028 */
1029
1030 struct krt_config *krt_cf;
1031
1032 static void
1033 krt_preconfig(struct protocol *P UNUSED, struct config *c)
1034 {
1035 krt_cf = NULL;
1036 krt_sys_preconfig(c);
1037 }
1038
1039 static void
1040 krt_postconfig(struct proto_config *CF)
1041 {
1042 struct krt_config *cf = (void *) CF;
1043
1044 if (EMPTY_LIST(CF->channels))
1045 cf_error("Channel not specified");
1046
1047 #ifdef CONFIG_ALL_TABLES_AT_ONCE
1048 if (krt_cf->scan_time != cf->scan_time)
1049 cf_error("All kernel syncers must use the same table scan interval");
1050 #endif
1051
1052 struct channel_config *cc = proto_cf_main_channel(CF);
1053 struct rtable_config *tab = cc->table;
1054 if (tab->krt_attached)
1055 cf_error("Kernel syncer (%s) already attached to table %s", tab->krt_attached->name, tab->name);
1056 tab->krt_attached = CF;
1057
1058 if (cf->merge_paths)
1059 {
1060 cc->ra_mode = RA_MERGED;
1061 cc->merge_limit = cf->merge_paths;
1062 }
1063
1064 krt_sys_postconfig(cf);
1065 }
1066
1067 static struct proto *
1068 krt_init(struct proto_config *CF)
1069 {
1070 struct krt_proto *p = proto_new(CF);
1071 // struct krt_config *cf = (void *) CF;
1072
1073 p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF));
1074
1075 p->p.preexport = krt_preexport;
1076 p->p.rt_notify = krt_rt_notify;
1077 p->p.if_notify = krt_if_notify;
1078 p->p.reload_routes = krt_reload_routes;
1079 p->p.feed_end = krt_feed_end;
1080 p->p.make_tmp_attrs = krt_make_tmp_attrs;
1081 p->p.store_tmp_attrs = krt_store_tmp_attrs;
1082 p->p.rte_same = krt_rte_same;
1083
1084 krt_sys_init(p);
1085 return &p->p;
1086 }
1087
1088 static int
1089 krt_start(struct proto *P)
1090 {
1091 struct krt_proto *p = (struct krt_proto *) P;
1092
1093 switch (p->p.net_type)
1094 {
1095 case NET_IP4: p->af = AF_INET; break;
1096 case NET_IP6: p->af = AF_INET6; break;
1097 case NET_IP6_SADR: p->af = AF_INET6; break;
1098 #ifdef AF_MPLS
1099 case NET_MPLS: p->af = AF_MPLS; break;
1100 #endif
1101 default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break;
1102 }
1103
1104 add_tail(&krt_proto_list, &p->krt_node);
1105
1106 #ifdef KRT_ALLOW_LEARN
1107 krt_learn_init(p);
1108 #endif
1109
1110 if (!krt_sys_start(p))
1111 {
1112 rem_node(&p->krt_node);
1113 return PS_START;
1114 }
1115
1116 krt_scan_timer_start(p);
1117
1118 if (p->p.gr_recovery && KRT_CF->graceful_restart)
1119 p->p.main_channel->gr_wait = 1;
1120
1121 return PS_UP;
1122 }
1123
1124 static int
1125 krt_shutdown(struct proto *P)
1126 {
1127 struct krt_proto *p = (struct krt_proto *) P;
1128
1129 krt_scan_timer_stop(p);
1130
1131 /* FIXME we should flush routes even when persist during reconfiguration */
1132 if (p->initialized && !KRT_CF->persist && (P->down_code != PDC_CMD_GR_DOWN))
1133 krt_flush_routes(p);
1134
1135 p->ready = 0;
1136 p->initialized = 0;
1137
1138 if (p->p.proto_state == PS_START)
1139 return PS_DOWN;
1140
1141 krt_sys_shutdown(p);
1142 rem_node(&p->krt_node);
1143
1144 return PS_DOWN;
1145 }
1146
1147 static int
1148 krt_reconfigure(struct proto *p, struct proto_config *CF)
1149 {
1150 struct krt_config *o = (void *) p->cf;
1151 struct krt_config *n = (void *) CF;
1152
1153 if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF)))
1154 return 0;
1155
1156 if (!krt_sys_reconfigure((struct krt_proto *) p, n, o))
1157 return 0;
1158
1159 /* persist, graceful restart need not be the same */
1160 return o->scan_time == n->scan_time && o->learn == n->learn;
1161 }
1162
1163 struct proto_config *
1164 krt_init_config(int class)
1165 {
1166 #ifndef CONFIG_MULTIPLE_TABLES
1167 if (krt_cf)
1168 cf_error("Kernel protocol already defined");
1169 #endif
1170
1171 krt_cf = (struct krt_config *) proto_config_new(&proto_unix_kernel, class);
1172 krt_cf->scan_time = 60 S;
1173
1174 krt_sys_init_config(krt_cf);
1175 return (struct proto_config *) krt_cf;
1176 }
1177
1178 static void
1179 krt_copy_config(struct proto_config *dest, struct proto_config *src)
1180 {
1181 struct krt_config *d = (struct krt_config *) dest;
1182 struct krt_config *s = (struct krt_config *) src;
1183
1184 /* Fix sysdep parts */
1185 krt_sys_copy_config(d, s);
1186 }
1187
1188 static int
1189 krt_get_attr(eattr *a, byte *buf, int buflen)
1190 {
1191 switch (a->id)
1192 {
1193 case EA_KRT_SOURCE:
1194 bsprintf(buf, "source");
1195 return GA_NAME;
1196
1197 case EA_KRT_METRIC:
1198 bsprintf(buf, "metric");
1199 return GA_NAME;
1200
1201 default:
1202 return krt_sys_get_attr(a, buf, buflen);
1203 }
1204 }
1205
1206
1207 #ifdef CONFIG_IP6_SADR_KERNEL
1208 #define MAYBE_IP6_SADR NB_IP6_SADR
1209 #else
1210 #define MAYBE_IP6_SADR 0
1211 #endif
1212
1213 #ifdef HAVE_MPLS_KERNEL
1214 #define MAYBE_MPLS NB_MPLS
1215 #else
1216 #define MAYBE_MPLS 0
1217 #endif
1218
1219 struct protocol proto_unix_kernel = {
1220 .name = "Kernel",
1221 .template = "kernel%d",
1222 .class = PROTOCOL_KERNEL,
1223 .preference = DEF_PREF_INHERITED,
1224 .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS,
1225 .proto_size = sizeof(struct krt_proto),
1226 .config_size = sizeof(struct krt_config),
1227 .preconfig = krt_preconfig,
1228 .postconfig = krt_postconfig,
1229 .init = krt_init,
1230 .start = krt_start,
1231 .shutdown = krt_shutdown,
1232 .reconfigure = krt_reconfigure,
1233 .copy_config = krt_copy_config,
1234 .get_attr = krt_get_attr,
1235 #ifdef KRT_ALLOW_LEARN
1236 .dump = krt_dump,
1237 .dump_attrs = krt_dump_attrs,
1238 #endif
1239 };