]> git.ipfire.org Git - thirdparty/bird.git/blob - sysdep/unix/krt.c
Nest: Use bitmaps to keep track of exported routes
[thirdparty/bird.git] / sysdep / unix / krt.c
1 /*
2 * BIRD -- UNIX Kernel Synchronization
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Kernel synchronization
11 *
12 * This system dependent module implements the Kernel and Device protocol,
13 * that is synchronization of interface lists and routing tables with the
14 * OS kernel.
15 *
16 * The whole kernel synchronization is a bit messy and touches some internals
17 * of the routing table engine, because routing table maintenance is a typical
18 * example of the proverbial compatibility between different Unices and we want
19 * to keep the overhead of our KRT business as low as possible and avoid maintaining
20 * a local routing table copy.
21 *
22 * The kernel syncer can work in three different modes (according to system config header):
23 * Either with a single routing table and single KRT protocol [traditional UNIX]
24 * or with many routing tables and separate KRT protocols for all of them
25 * or with many routing tables, but every scan including all tables, so we start
26 * separate KRT protocols which cooperate with each other [Linux].
27 * In this case, we keep only a single scan timer.
28 *
29 * We use FIB node flags in the routing table to keep track of route
30 * synchronization status. We also attach temporary &rte's to the routing table,
31 * but it cannot do any harm to the rest of BIRD since table synchronization is
32 * an atomic process.
33 *
34 * When starting up, we cheat by looking if there is another
35 * KRT instance to be initialized later and performing table scan
36 * only once for all the instances.
37 *
38 * The code uses OS-dependent parts for kernel updates and scans. These parts are
39 * in more specific sysdep directories (e.g. sysdep/linux) in functions krt_sys_*
40 * and kif_sys_* (and some others like krt_replace_rte()) and krt-sys.h header file.
41 * This is also used for platform specific protocol options and route attributes.
42 *
43 * There was also an old code that used traditional UNIX ioctls for these tasks.
44 * It was unmaintained and later removed. For reference, see sysdep/krt-* files
45 * in commit 396dfa9042305f62da1f56589c4b98fac57fc2f6
46 */
47
48 /*
49 * If you are brave enough, continue now. You cannot say you haven't been warned.
50 */
51
52 #undef LOCAL_DEBUG
53
54 #include "nest/bird.h"
55 #include "nest/iface.h"
56 #include "nest/route.h"
57 #include "nest/protocol.h"
58 #include "filter/filter.h"
59 #include "conf/conf.h"
60 #include "lib/string.h"
61 #include "lib/timer.h"
62
63 #include "unix.h"
64 #include "krt.h"
65
66 /*
67 * Global resources
68 */
69
70 pool *krt_pool;
71 static linpool *krt_filter_lp;
72 static list krt_proto_list;
73
74 void
75 krt_io_init(void)
76 {
77 krt_pool = rp_new(&root_pool, "Kernel Syncer");
78 krt_filter_lp = lp_new_default(krt_pool);
79 init_list(&krt_proto_list);
80 krt_sys_io_init();
81 }
82
83 /*
84 * Interfaces
85 */
86
87 struct kif_proto *kif_proto;
88 static struct kif_config *kif_cf;
89 static timer *kif_scan_timer;
90 static btime kif_last_shot;
91
92 static struct kif_iface_config kif_default_iface = {};
93
94 struct kif_iface_config *
95 kif_get_iface_config(struct iface *iface)
96 {
97 struct kif_config *cf = (void *) (kif_proto->p.cf);
98 struct kif_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL);
99 return ic ?: &kif_default_iface;
100 }
101
102 static void
103 kif_scan(timer *t)
104 {
105 struct kif_proto *p = t->data;
106
107 KRT_TRACE(p, D_EVENTS, "Scanning interfaces");
108 kif_last_shot = current_time();
109 kif_do_scan(p);
110 }
111
112 static void
113 kif_force_scan(void)
114 {
115 if (kif_proto && ((kif_last_shot + 2 S) < current_time()))
116 {
117 kif_scan(kif_scan_timer);
118 tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time);
119 }
120 }
121
122 void
123 kif_request_scan(void)
124 {
125 if (kif_proto && (kif_scan_timer->expires > (current_time() + 1 S)))
126 tm_start(kif_scan_timer, 1 S);
127 }
128
129 static struct proto *
130 kif_init(struct proto_config *c)
131 {
132 struct kif_proto *p = proto_new(c);
133
134 kif_sys_init(p);
135 return &p->p;
136 }
137
138 static int
139 kif_start(struct proto *P)
140 {
141 struct kif_proto *p = (struct kif_proto *) P;
142
143 kif_proto = p;
144 kif_sys_start(p);
145
146 /* Start periodic interface scanning */
147 kif_scan_timer = tm_new_init(P->pool, kif_scan, p, KIF_CF->scan_time, 0);
148 kif_scan(kif_scan_timer);
149 tm_start(kif_scan_timer, KIF_CF->scan_time);
150
151 return PS_UP;
152 }
153
154 static int
155 kif_shutdown(struct proto *P)
156 {
157 struct kif_proto *p = (struct kif_proto *) P;
158
159 tm_stop(kif_scan_timer);
160 kif_sys_shutdown(p);
161 kif_proto = NULL;
162
163 return PS_DOWN;
164 }
165
166 static int
167 kif_reconfigure(struct proto *p, struct proto_config *new)
168 {
169 struct kif_config *o = (struct kif_config *) p->cf;
170 struct kif_config *n = (struct kif_config *) new;
171
172 if (!kif_sys_reconfigure((struct kif_proto *) p, n, o))
173 return 0;
174
175 if (o->scan_time != n->scan_time)
176 {
177 tm_stop(kif_scan_timer);
178 kif_scan_timer->recurrent = n->scan_time;
179 kif_scan(kif_scan_timer);
180 tm_start(kif_scan_timer, n->scan_time);
181 }
182
183 if (!EMPTY_LIST(o->iface_list) || !EMPTY_LIST(n->iface_list))
184 {
185 /* This is hack, we have to update a configuration
186 * to the new value just now, because it is used
187 * for recalculation of preferred addresses.
188 */
189 p->cf = new;
190
191 if_recalc_all_preferred_addresses();
192 }
193
194 return 1;
195 }
196
197
198 static void
199 kif_preconfig(struct protocol *P UNUSED, struct config *c)
200 {
201 kif_cf = NULL;
202 kif_sys_preconfig(c);
203 }
204
205 struct proto_config *
206 kif_init_config(int class)
207 {
208 if (kif_cf)
209 cf_error("Kernel device protocol already defined");
210
211 kif_cf = (struct kif_config *) proto_config_new(&proto_unix_iface, class);
212 kif_cf->scan_time = 60 S;
213 init_list(&kif_cf->iface_list);
214
215 kif_sys_init_config(kif_cf);
216 return (struct proto_config *) kif_cf;
217 }
218
219 static void
220 kif_copy_config(struct proto_config *dest, struct proto_config *src)
221 {
222 struct kif_config *d = (struct kif_config *) dest;
223 struct kif_config *s = (struct kif_config *) src;
224
225 /* Copy interface config list */
226 cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct kif_iface_config));
227
228 /* Fix sysdep parts */
229 kif_sys_copy_config(d, s);
230 }
231
232 struct protocol proto_unix_iface = {
233 .name = "Device",
234 .template = "device%d",
235 .class = PROTOCOL_DEVICE,
236 .proto_size = sizeof(struct kif_proto),
237 .config_size = sizeof(struct kif_config),
238 .preconfig = kif_preconfig,
239 .init = kif_init,
240 .start = kif_start,
241 .shutdown = kif_shutdown,
242 .reconfigure = kif_reconfigure,
243 .copy_config = kif_copy_config
244 };
245
246 /*
247 * Tracing of routes
248 */
249
250 static inline void
251 krt_trace_in(struct krt_proto *p, rte *e, char *msg)
252 {
253 if (p->p.debug & D_PACKETS)
254 log(L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg);
255 }
256
257 static inline void
258 krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg)
259 {
260 if (p->p.debug & D_PACKETS)
261 log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg);
262 }
263
264 /*
265 * Inherited Routes
266 */
267
268 #ifdef KRT_ALLOW_LEARN
269
270 static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS;
271
272 /*
273 * krt_same_key() specifies what (aside from the net) is the key in
274 * kernel routing tables. It should be OS-dependent, this is for
275 * Linux. It is important for asynchronous alien updates, because a
276 * positive update is implicitly a negative one for any old route with
277 * the same key.
278 */
279
280 static inline int
281 krt_same_key(rte *a, rte *b)
282 {
283 return a->u.krt.metric == b->u.krt.metric;
284 }
285
286 static inline int
287 krt_uptodate(rte *a, rte *b)
288 {
289 if (a->attrs != b->attrs)
290 return 0;
291
292 if (a->u.krt.proto != b->u.krt.proto)
293 return 0;
294
295 return 1;
296 }
297
298 static void
299 krt_learn_announce_update(struct krt_proto *p, rte *e)
300 {
301 net *n = e->net;
302 rta *aa = rta_clone(e->attrs);
303 rte *ee = rte_get_temp(aa);
304 ee->pflags = EA_ID_FLAG(EA_KRT_SOURCE) | EA_ID_FLAG(EA_KRT_METRIC);
305 ee->u.krt = e->u.krt;
306 rte_update(&p->p, n->n.addr, ee);
307 }
308
309 static void
310 krt_learn_announce_delete(struct krt_proto *p, net *n)
311 {
312 rte_update(&p->p, n->n.addr, NULL);
313 }
314
315 /* Called when alien route is discovered during scan */
316 static void
317 krt_learn_scan(struct krt_proto *p, rte *e)
318 {
319 net *n0 = e->net;
320 net *n = net_get(&p->krt_table, n0->n.addr);
321 rte *m, **mm;
322
323 e->attrs = rta_lookup(e->attrs);
324
325 for(mm=&n->routes; m = *mm; mm=&m->next)
326 if (krt_same_key(m, e))
327 break;
328 if (m)
329 {
330 if (krt_uptodate(m, e))
331 {
332 krt_trace_in_rl(&rl_alien, p, e, "[alien] seen");
333 rte_free(e);
334 m->u.krt.seen = 1;
335 }
336 else
337 {
338 krt_trace_in(p, e, "[alien] updated");
339 *mm = m->next;
340 rte_free(m);
341 m = NULL;
342 }
343 }
344 else
345 krt_trace_in(p, e, "[alien] created");
346 if (!m)
347 {
348 e->next = n->routes;
349 n->routes = e;
350 e->u.krt.seen = 1;
351 }
352 }
353
354 static void
355 krt_learn_prune(struct krt_proto *p)
356 {
357 struct fib *fib = &p->krt_table.fib;
358 struct fib_iterator fit;
359
360 KRT_TRACE(p, D_EVENTS, "Pruning inherited routes");
361
362 FIB_ITERATE_INIT(&fit, fib);
363 again:
364 FIB_ITERATE_START(fib, &fit, net, n)
365 {
366 rte *e, **ee, *best, **pbest, *old_best;
367
368 /*
369 * Note that old_best may be NULL even if there was an old best route in
370 * the previous step, because it might be replaced in krt_learn_scan().
371 * But in that case there is a new valid best route.
372 */
373
374 old_best = NULL;
375 best = NULL;
376 pbest = NULL;
377 ee = &n->routes;
378 while (e = *ee)
379 {
380 if (e->u.krt.best)
381 old_best = e;
382
383 if (!e->u.krt.seen)
384 {
385 *ee = e->next;
386 rte_free(e);
387 continue;
388 }
389
390 if (!best || best->u.krt.metric > e->u.krt.metric)
391 {
392 best = e;
393 pbest = ee;
394 }
395
396 e->u.krt.seen = 0;
397 e->u.krt.best = 0;
398 ee = &e->next;
399 }
400 if (!n->routes)
401 {
402 DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen);
403 if (old_best)
404 krt_learn_announce_delete(p, n);
405
406 FIB_ITERATE_PUT(&fit);
407 fib_delete(fib, n);
408 goto again;
409 }
410
411 best->u.krt.best = 1;
412 *pbest = best->next;
413 best->next = n->routes;
414 n->routes = best;
415
416 if ((best != old_best) || p->reload)
417 {
418 DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
419 krt_learn_announce_update(p, best);
420 }
421 else
422 DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
423 }
424 FIB_ITERATE_END;
425
426 p->reload = 0;
427 }
428
429 static void
430 krt_learn_async(struct krt_proto *p, rte *e, int new)
431 {
432 net *n0 = e->net;
433 net *n = net_get(&p->krt_table, n0->n.addr);
434 rte *g, **gg, *best, **bestp, *old_best;
435
436 e->attrs = rta_lookup(e->attrs);
437
438 old_best = n->routes;
439 for(gg=&n->routes; g = *gg; gg = &g->next)
440 if (krt_same_key(g, e))
441 break;
442 if (new)
443 {
444 if (g)
445 {
446 if (krt_uptodate(g, e))
447 {
448 krt_trace_in(p, e, "[alien async] same");
449 rte_free(e);
450 return;
451 }
452 krt_trace_in(p, e, "[alien async] updated");
453 *gg = g->next;
454 rte_free(g);
455 }
456 else
457 krt_trace_in(p, e, "[alien async] created");
458
459 e->next = n->routes;
460 n->routes = e;
461 }
462 else if (!g)
463 {
464 krt_trace_in(p, e, "[alien async] delete failed");
465 rte_free(e);
466 return;
467 }
468 else
469 {
470 krt_trace_in(p, e, "[alien async] removed");
471 *gg = g->next;
472 rte_free(e);
473 rte_free(g);
474 }
475 best = n->routes;
476 bestp = &n->routes;
477 for(gg=&n->routes; g=*gg; gg=&g->next)
478 {
479 if (best->u.krt.metric > g->u.krt.metric)
480 {
481 best = g;
482 bestp = gg;
483 }
484
485 g->u.krt.best = 0;
486 }
487
488 if (best)
489 {
490 best->u.krt.best = 1;
491 *bestp = best->next;
492 best->next = n->routes;
493 n->routes = best;
494 }
495
496 if (best != old_best)
497 {
498 DBG("krt_learn_async: distributing change\n");
499 if (best)
500 krt_learn_announce_update(p, best);
501 else
502 krt_learn_announce_delete(p, n);
503 }
504 }
505
506 static void
507 krt_learn_init(struct krt_proto *p)
508 {
509 if (KRT_CF->learn)
510 {
511 struct rtable_config *cf = mb_allocz(p->p.pool, sizeof(struct rtable_config));
512 cf->name = "Inherited";
513 cf->addr_type = p->p.net_type;
514
515 rt_setup(p->p.pool, &p->krt_table, cf);
516 }
517 }
518
519 static void
520 krt_dump(struct proto *P)
521 {
522 struct krt_proto *p = (struct krt_proto *) P;
523
524 if (!KRT_CF->learn)
525 return;
526 debug("KRT: Table of inheritable routes\n");
527 rt_dump(&p->krt_table);
528 }
529
530 static void
531 krt_dump_attrs(rte *e)
532 {
533 debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto);
534 }
535
536 #endif
537
538 /*
539 * Routes
540 */
541
542 static void
543 krt_flush_routes(struct krt_proto *p)
544 {
545 struct rtable *t = p->p.main_channel->table;
546
547 KRT_TRACE(p, D_EVENTS, "Flushing kernel routes");
548 FIB_WALK(&t->fib, net, n)
549 {
550 rte *e = n->routes;
551 if (rte_is_valid(e) && (n->n.flags & KRF_INSTALLED))
552 {
553 /* FIXME: this does not work if gw is changed in export filter */
554 krt_replace_rte(p, e->net, NULL, e);
555 n->n.flags &= ~KRF_INSTALLED;
556 }
557 }
558 FIB_WALK_END;
559 }
560
561 static struct rte *
562 krt_export_net(struct krt_proto *p, net *net, rte **rt_free)
563 {
564 struct channel *c = p->p.main_channel;
565 const struct filter *filter = c->out_filter;
566 rte *rt;
567
568 if (c->ra_mode == RA_MERGED)
569 return rt_export_merged(c, net, rt_free, krt_filter_lp, 1);
570
571 rt = net->routes;
572 *rt_free = NULL;
573
574 if (!rte_is_valid(rt))
575 return NULL;
576
577 if (filter == FILTER_REJECT)
578 return NULL;
579
580 rte_make_tmp_attrs(&rt, krt_filter_lp, NULL);
581
582 /* We could run krt_preexport() here, but it is already handled by KRF_INSTALLED */
583
584 if (filter == FILTER_ACCEPT)
585 goto accept;
586
587 if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT)
588 goto reject;
589
590
591 accept:
592 if (rt != net->routes)
593 *rt_free = rt;
594 return rt;
595
596 reject:
597 if (rt != net->routes)
598 rte_free(rt);
599 return NULL;
600 }
601
602 static int
603 krt_same_dest(rte *k, rte *e)
604 {
605 rta *ka = k->attrs, *ea = e->attrs;
606
607 if (ka->dest != ea->dest)
608 return 0;
609
610 if (ka->dest == RTD_UNICAST)
611 return nexthop_same(&(ka->nh), &(ea->nh));
612
613 return 1;
614 }
615
616 /*
617 * This gets called back when the low-level scanning code discovers a route.
618 * We expect that the route is a temporary rte and its attributes are uncached.
619 */
620
621 void
622 krt_got_route(struct krt_proto *p, rte *e)
623 {
624 net *net = e->net;
625 int verdict;
626
627 #ifdef KRT_ALLOW_LEARN
628 switch (e->u.krt.src)
629 {
630 case KRT_SRC_KERNEL:
631 verdict = KRF_IGNORE;
632 goto sentenced;
633
634 case KRT_SRC_REDIRECT:
635 verdict = KRF_DELETE;
636 goto sentenced;
637
638 case KRT_SRC_ALIEN:
639 if (KRT_CF->learn)
640 krt_learn_scan(p, e);
641 else
642 {
643 krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored");
644 rte_free(e);
645 }
646 return;
647 }
648 #endif
649 /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
650
651 if (net->n.flags & KRF_VERDICT_MASK)
652 {
653 /* Route to this destination was already seen. Strange, but it happens... */
654 krt_trace_in(p, e, "already seen");
655 rte_free(e);
656 return;
657 }
658
659 if (!p->ready)
660 {
661 /* We wait for the initial feed to have correct KRF_INSTALLED flag */
662 verdict = KRF_IGNORE;
663 goto sentenced;
664 }
665
666 if (net->n.flags & KRF_INSTALLED)
667 {
668 rte *new, *rt_free;
669
670 new = krt_export_net(p, net, &rt_free);
671
672 /* TODO: There also may be changes in route eattrs, we ignore that for now. */
673
674 if (!new)
675 verdict = KRF_DELETE;
676 else if ((net->n.flags & KRF_SYNC_ERROR) || !krt_same_dest(e, new))
677 verdict = KRF_UPDATE;
678 else
679 verdict = KRF_SEEN;
680
681 if (rt_free)
682 rte_free(rt_free);
683
684 lp_flush(krt_filter_lp);
685 }
686 else
687 verdict = KRF_DELETE;
688
689 sentenced:
690 krt_trace_in(p, e, ((char *[]) { "?", "seen", "will be updated", "will be removed", "ignored" }) [verdict]);
691 net->n.flags = (net->n.flags & ~KRF_VERDICT_MASK) | verdict;
692 if (verdict == KRF_UPDATE || verdict == KRF_DELETE)
693 {
694 /* Get a cached copy of attributes and temporarily link the route */
695 rta *a = e->attrs;
696 a->source = RTS_DUMMY;
697 e->attrs = rta_lookup(a);
698 e->next = net->routes;
699 net->routes = e;
700 }
701 else
702 rte_free(e);
703 }
704
705 static void
706 krt_prune(struct krt_proto *p)
707 {
708 struct rtable *t = p->p.main_channel->table;
709
710 KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name);
711 FIB_WALK(&t->fib, net, n)
712 {
713 int verdict = n->n.flags & KRF_VERDICT_MASK;
714 rte *new, *old, *rt_free = NULL;
715
716 if (verdict == KRF_UPDATE || verdict == KRF_DELETE)
717 {
718 /* Get a dummy route from krt_got_route() */
719 old = n->routes;
720 n->routes = old->next;
721 }
722 else
723 old = NULL;
724
725 if (verdict == KRF_CREATE || verdict == KRF_UPDATE)
726 {
727 /* We have to run export filter to get proper 'new' route */
728 new = krt_export_net(p, n, &rt_free);
729
730 if (!new)
731 verdict = (verdict == KRF_CREATE) ? KRF_IGNORE : KRF_DELETE;
732 }
733 else
734 new = NULL;
735
736 switch (verdict)
737 {
738 case KRF_CREATE:
739 krt_trace_in(p, new, "reinstalling");
740 krt_replace_rte(p, n, new, NULL);
741 break;
742 case KRF_SEEN:
743 case KRF_IGNORE:
744 /* Nothing happens */
745 break;
746 case KRF_UPDATE:
747 krt_trace_in(p, new, "updating");
748 krt_replace_rte(p, n, new, old);
749 break;
750 case KRF_DELETE:
751 krt_trace_in(p, old, "deleting");
752 krt_replace_rte(p, n, NULL, old);
753 break;
754 default:
755 bug("krt_prune: invalid route status");
756 }
757
758 if (old)
759 rte_free(old);
760 if (rt_free)
761 rte_free(rt_free);
762 lp_flush(krt_filter_lp);
763 n->n.flags &= ~KRF_VERDICT_MASK;
764 }
765 FIB_WALK_END;
766
767 #ifdef KRT_ALLOW_LEARN
768 if (KRT_CF->learn)
769 krt_learn_prune(p);
770 #endif
771
772 if (p->ready)
773 p->initialized = 1;
774 }
775
776 void
777 krt_got_route_async(struct krt_proto *p, rte *e, int new)
778 {
779 net *net = e->net;
780
781 switch (e->u.krt.src)
782 {
783 case KRT_SRC_BIRD:
784 /* Should be filtered by the back end */
785 bug("BIRD originated routes should not get here.");
786
787 case KRT_SRC_REDIRECT:
788 if (new)
789 {
790 krt_trace_in(p, e, "[redirect] deleting");
791 krt_replace_rte(p, net, NULL, e);
792 }
793 /* If !new, it is probably echo of our deletion */
794 break;
795
796 #ifdef KRT_ALLOW_LEARN
797 case KRT_SRC_ALIEN:
798 if (KRT_CF->learn)
799 {
800 krt_learn_async(p, e, new);
801 return;
802 }
803 #endif
804 }
805 rte_free(e);
806 }
807
808 /*
809 * Periodic scanning
810 */
811
812
813 #ifdef CONFIG_ALL_TABLES_AT_ONCE
814
815 static timer *krt_scan_timer;
816 static int krt_scan_count;
817
818 static void
819 krt_scan(timer *t UNUSED)
820 {
821 struct krt_proto *p;
822
823 kif_force_scan();
824
825 /* We need some node to decide whether to print the debug messages or not */
826 p = SKIP_BACK(struct krt_proto, krt_node, HEAD(krt_proto_list));
827 KRT_TRACE(p, D_EVENTS, "Scanning routing table");
828
829 krt_do_scan(NULL);
830
831 void *q;
832 WALK_LIST(q, krt_proto_list)
833 {
834 p = SKIP_BACK(struct krt_proto, krt_node, q);
835 krt_prune(p);
836 }
837 }
838
839 static void
840 krt_scan_timer_start(struct krt_proto *p)
841 {
842 if (!krt_scan_count)
843 krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0);
844
845 krt_scan_count++;
846
847 tm_start(krt_scan_timer, 1 S);
848 }
849
850 static void
851 krt_scan_timer_stop(struct krt_proto *p UNUSED)
852 {
853 krt_scan_count--;
854
855 if (!krt_scan_count)
856 {
857 rfree(krt_scan_timer);
858 krt_scan_timer = NULL;
859 }
860 }
861
862 static void
863 krt_scan_timer_kick(struct krt_proto *p UNUSED)
864 {
865 tm_start(krt_scan_timer, 0);
866 }
867
868 #else
869
870 static void
871 krt_scan(timer *t)
872 {
873 struct krt_proto *p = t->data;
874
875 kif_force_scan();
876
877 KRT_TRACE(p, D_EVENTS, "Scanning routing table");
878 krt_do_scan(p);
879 krt_prune(p);
880 }
881
882 static void
883 krt_scan_timer_start(struct krt_proto *p)
884 {
885 p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
886 tm_start(p->scan_timer, 1 S);
887 }
888
889 static void
890 krt_scan_timer_stop(struct krt_proto *p)
891 {
892 tm_stop(p->scan_timer);
893 }
894
895 static void
896 krt_scan_timer_kick(struct krt_proto *p)
897 {
898 tm_start(p->scan_timer, 0);
899 }
900
901 #endif
902
903
904
905
906 /*
907 * Updates
908 */
909
910 static void
911 krt_make_tmp_attrs(struct rte *rt, struct linpool *pool)
912 {
913 rte_init_tmp_attrs(rt, pool, 2);
914 rte_make_tmp_attr(rt, EA_KRT_SOURCE, EAF_TYPE_INT, rt->u.krt.proto);
915 rte_make_tmp_attr(rt, EA_KRT_METRIC, EAF_TYPE_INT, rt->u.krt.metric);
916 }
917
918 static void
919 krt_store_tmp_attrs(struct rte *rt, struct linpool *pool)
920 {
921 rte_init_tmp_attrs(rt, pool, 2);
922 rt->u.krt.proto = rte_store_tmp_attr(rt, EA_KRT_SOURCE);
923 rt->u.krt.metric = rte_store_tmp_attr(rt, EA_KRT_METRIC);
924 }
925
926 static int
927 krt_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED)
928 {
929 // struct krt_proto *p = (struct krt_proto *) P;
930 rte *e = *new;
931
932 if (e->attrs->src->proto == P)
933 {
934 #ifdef CONFIG_SINGLE_ROUTE
935 /*
936 * Implicit withdraw - when the imported kernel route becomes the best one,
937 * we know that the previous one exported to the kernel was already removed,
938 * but if we processed the update as usual, we would send withdraw to the
939 * kernel, which would remove the new imported route instead.
940 *
941 * We will remove KRT_INSTALLED flag, which stops such withdraw to be
942 * processed in krt_rt_notify() and krt_replace_rte().
943 */
944 if (e == e->net->routes)
945 e->net->n.flags &= ~KRF_INSTALLED;
946 #endif
947 return -1;
948 }
949
950 if (!krt_capable(e))
951 return -1;
952
953 return 0;
954 }
955
956 static void
957 krt_rt_notify(struct proto *P, struct channel *ch UNUSED, net *net,
958 rte *new, rte *old)
959 {
960 struct krt_proto *p = (struct krt_proto *) P;
961
962 if (config->shutdown)
963 return;
964 if (!(net->n.flags & KRF_INSTALLED))
965 old = NULL;
966 if (new)
967 net->n.flags |= KRF_INSTALLED;
968 else
969 net->n.flags &= ~KRF_INSTALLED;
970 if (p->initialized) /* Before first scan we don't touch the routes */
971 krt_replace_rte(p, net, new, old);
972 }
973
974 static void
975 krt_if_notify(struct proto *P, uint flags, struct iface *iface UNUSED)
976 {
977 struct krt_proto *p = (struct krt_proto *) P;
978
979 /*
980 * When interface went down, we should remove routes to it. In the ideal world,
981 * OS kernel would send us route removal notifications in such cases, but we
982 * cannot rely on it as it is often not true. E.g. Linux kernel removes related
983 * routes when an interface went down, but it does not notify userspace about
984 * that. To be sure, we just schedule a scan to ensure synchronization.
985 */
986
987 if ((flags & IF_CHANGE_DOWN) && KRT_CF->learn)
988 krt_scan_timer_kick(p);
989 }
990
991 static void
992 krt_reload_routes(struct channel *C)
993 {
994 struct krt_proto *p = (void *) C->proto;
995
996 /* Although we keep learned routes in krt_table, we rather schedule a scan */
997
998 if (KRT_CF->learn)
999 {
1000 p->reload = 1;
1001 krt_scan_timer_kick(p);
1002 }
1003 }
1004
1005 static void
1006 krt_feed_end(struct channel *C)
1007 {
1008 struct krt_proto *p = (void *) C->proto;
1009
1010 p->ready = 1;
1011 krt_scan_timer_kick(p);
1012 }
1013
1014
1015 static int
1016 krt_rte_same(rte *a, rte *b)
1017 {
1018 /* src is always KRT_SRC_ALIEN and type is irrelevant */
1019 return (a->u.krt.proto == b->u.krt.proto) && (a->u.krt.metric == b->u.krt.metric);
1020 }
1021
1022
1023 /*
1024 * Protocol glue
1025 */
1026
1027 struct krt_config *krt_cf;
1028
1029 static void
1030 krt_preconfig(struct protocol *P UNUSED, struct config *c)
1031 {
1032 krt_cf = NULL;
1033 krt_sys_preconfig(c);
1034 }
1035
1036 static void
1037 krt_postconfig(struct proto_config *CF)
1038 {
1039 struct krt_config *cf = (void *) CF;
1040
1041 if (EMPTY_LIST(CF->channels))
1042 cf_error("Channel not specified");
1043
1044 #ifdef CONFIG_ALL_TABLES_AT_ONCE
1045 if (krt_cf->scan_time != cf->scan_time)
1046 cf_error("All kernel syncers must use the same table scan interval");
1047 #endif
1048
1049 struct channel_config *cc = proto_cf_main_channel(CF);
1050 struct rtable_config *tab = cc->table;
1051 if (tab->krt_attached)
1052 cf_error("Kernel syncer (%s) already attached to table %s", tab->krt_attached->name, tab->name);
1053 tab->krt_attached = CF;
1054
1055 if (cf->merge_paths)
1056 {
1057 cc->ra_mode = RA_MERGED;
1058 cc->merge_limit = cf->merge_paths;
1059 }
1060
1061 krt_sys_postconfig(cf);
1062 }
1063
1064 static struct proto *
1065 krt_init(struct proto_config *CF)
1066 {
1067 struct krt_proto *p = proto_new(CF);
1068 // struct krt_config *cf = (void *) CF;
1069
1070 p->p.main_channel = proto_add_channel(&p->p, proto_cf_main_channel(CF));
1071
1072 p->p.preexport = krt_preexport;
1073 p->p.rt_notify = krt_rt_notify;
1074 p->p.if_notify = krt_if_notify;
1075 p->p.reload_routes = krt_reload_routes;
1076 p->p.feed_end = krt_feed_end;
1077 p->p.make_tmp_attrs = krt_make_tmp_attrs;
1078 p->p.store_tmp_attrs = krt_store_tmp_attrs;
1079 p->p.rte_same = krt_rte_same;
1080
1081 krt_sys_init(p);
1082 return &p->p;
1083 }
1084
1085 static int
1086 krt_start(struct proto *P)
1087 {
1088 struct krt_proto *p = (struct krt_proto *) P;
1089
1090 switch (p->p.net_type)
1091 {
1092 case NET_IP4: p->af = AF_INET; break;
1093 case NET_IP6: p->af = AF_INET6; break;
1094 case NET_IP6_SADR: p->af = AF_INET6; break;
1095 #ifdef AF_MPLS
1096 case NET_MPLS: p->af = AF_MPLS; break;
1097 #endif
1098 default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break;
1099 }
1100
1101 add_tail(&krt_proto_list, &p->krt_node);
1102
1103 #ifdef KRT_ALLOW_LEARN
1104 krt_learn_init(p);
1105 #endif
1106
1107 if (!krt_sys_start(p))
1108 {
1109 rem_node(&p->krt_node);
1110 return PS_START;
1111 }
1112
1113 krt_scan_timer_start(p);
1114
1115 if (p->p.gr_recovery && KRT_CF->graceful_restart)
1116 p->p.main_channel->gr_wait = 1;
1117
1118 return PS_UP;
1119 }
1120
1121 static int
1122 krt_shutdown(struct proto *P)
1123 {
1124 struct krt_proto *p = (struct krt_proto *) P;
1125
1126 krt_scan_timer_stop(p);
1127
1128 /* FIXME we should flush routes even when persist during reconfiguration */
1129 if (p->initialized && !KRT_CF->persist && (P->down_code != PDC_CMD_GR_DOWN))
1130 krt_flush_routes(p);
1131
1132 p->ready = 0;
1133 p->initialized = 0;
1134
1135 if (p->p.proto_state == PS_START)
1136 return PS_DOWN;
1137
1138 krt_sys_shutdown(p);
1139 rem_node(&p->krt_node);
1140
1141 return PS_DOWN;
1142 }
1143
1144 static int
1145 krt_reconfigure(struct proto *p, struct proto_config *CF)
1146 {
1147 struct krt_config *o = (void *) p->cf;
1148 struct krt_config *n = (void *) CF;
1149
1150 if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF)))
1151 return 0;
1152
1153 if (!krt_sys_reconfigure((struct krt_proto *) p, n, o))
1154 return 0;
1155
1156 /* persist, graceful restart need not be the same */
1157 return o->scan_time == n->scan_time && o->learn == n->learn;
1158 }
1159
1160 struct proto_config *
1161 krt_init_config(int class)
1162 {
1163 #ifndef CONFIG_MULTIPLE_TABLES
1164 if (krt_cf)
1165 cf_error("Kernel protocol already defined");
1166 #endif
1167
1168 krt_cf = (struct krt_config *) proto_config_new(&proto_unix_kernel, class);
1169 krt_cf->scan_time = 60 S;
1170
1171 krt_sys_init_config(krt_cf);
1172 return (struct proto_config *) krt_cf;
1173 }
1174
1175 static void
1176 krt_copy_config(struct proto_config *dest, struct proto_config *src)
1177 {
1178 struct krt_config *d = (struct krt_config *) dest;
1179 struct krt_config *s = (struct krt_config *) src;
1180
1181 /* Fix sysdep parts */
1182 krt_sys_copy_config(d, s);
1183 }
1184
1185 static int
1186 krt_get_attr(eattr *a, byte *buf, int buflen)
1187 {
1188 switch (a->id)
1189 {
1190 case EA_KRT_SOURCE:
1191 bsprintf(buf, "source");
1192 return GA_NAME;
1193
1194 case EA_KRT_METRIC:
1195 bsprintf(buf, "metric");
1196 return GA_NAME;
1197
1198 default:
1199 return krt_sys_get_attr(a, buf, buflen);
1200 }
1201 }
1202
1203
1204 #ifdef CONFIG_IP6_SADR_KERNEL
1205 #define MAYBE_IP6_SADR NB_IP6_SADR
1206 #else
1207 #define MAYBE_IP6_SADR 0
1208 #endif
1209
1210 #ifdef HAVE_MPLS_KERNEL
1211 #define MAYBE_MPLS NB_MPLS
1212 #else
1213 #define MAYBE_MPLS 0
1214 #endif
1215
1216 struct protocol proto_unix_kernel = {
1217 .name = "Kernel",
1218 .template = "kernel%d",
1219 .class = PROTOCOL_KERNEL,
1220 .preference = DEF_PREF_INHERITED,
1221 .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS,
1222 .proto_size = sizeof(struct krt_proto),
1223 .config_size = sizeof(struct krt_config),
1224 .preconfig = krt_preconfig,
1225 .postconfig = krt_postconfig,
1226 .init = krt_init,
1227 .start = krt_start,
1228 .shutdown = krt_shutdown,
1229 .reconfigure = krt_reconfigure,
1230 .copy_config = krt_copy_config,
1231 .get_attr = krt_get_attr,
1232 #ifdef KRT_ALLOW_LEARN
1233 .dump = krt_dump,
1234 .dump_attrs = krt_dump_attrs,
1235 #endif
1236 };