]> git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
Merge branch 'master' into int-new
[thirdparty/bird.git] / nest / rt-table.c
1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/cli.h"
37 #include "nest/iface.h"
38 #include "lib/resource.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
45
46 pool *rt_table_pool;
47
48 static slab *rte_slab;
49 static linpool *rte_update_pool;
50
51 static list routing_tables;
52
53 static void rt_free_hostcache(rtable *tab);
54 static void rt_notify_hostcache(rtable *tab, net *net);
55 static void rt_update_hostcache(rtable *tab);
56 static void rt_next_hop_update(rtable *tab);
57 static inline void rt_prune_table(rtable *tab);
58
59
60 static inline struct ea_list *
61 make_tmp_attrs(struct rte *rt, struct linpool *pool)
62 {
63 struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
64 mta = rt->attrs->src->proto->make_tmp_attrs;
65 return mta ? mta(rt, pool) : NULL;
66 }
67
68
69 /* Like fib_route(), but skips empty net entries */
70 static inline void *
71 net_route_ip4(rtable *t, net_addr_ip4 *n)
72 {
73 net *r;
74
75 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
76 {
77 n->pxlen--;
78 ip4_clrbit(&n->prefix, n->pxlen);
79 }
80
81 return r;
82 }
83
84 static inline void *
85 net_route_ip6(rtable *t, net_addr_ip6 *n)
86 {
87 net *r;
88
89 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
90 {
91 n->pxlen--;
92 ip6_clrbit(&n->prefix, n->pxlen);
93 }
94
95 return r;
96 }
97
98 void *
99 net_route(rtable *tab, const net_addr *n)
100 {
101 ASSERT(tab->addr_type == n->type);
102
103 net_addr *n0 = alloca(n->length);
104 net_copy(n0, n);
105
106 switch (n->type)
107 {
108 case NET_IP4:
109 case NET_VPN4:
110 case NET_ROA4:
111 return net_route_ip4(tab, (net_addr_ip4 *) n0);
112
113 case NET_IP6:
114 case NET_VPN6:
115 case NET_ROA6:
116 return net_route_ip6(tab, (net_addr_ip6 *) n0);
117
118 default:
119 return NULL;
120 }
121 }
122
123
124 static int
125 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
126 {
127 struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
128 struct fib_node *fn;
129 int anything = 0;
130
131 while (1)
132 {
133 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
134 {
135 net_addr_roa4 *roa = (void *) fn->addr;
136 net *r = fib_node_to_user(&tab->fib, fn);
137
138 if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
139 {
140 anything = 1;
141 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
142 return ROA_VALID;
143 }
144 }
145
146 if (n.pxlen == 0)
147 break;
148
149 n.pxlen--;
150 ip4_clrbit(&n.prefix, n.pxlen);
151 }
152
153 return anything ? ROA_INVALID : ROA_UNKNOWN;
154 }
155
156 static int
157 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
158 {
159 struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
160 struct fib_node *fn;
161 int anything = 0;
162
163 while (1)
164 {
165 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
166 {
167 net_addr_roa6 *roa = (void *) fn->addr;
168 net *r = fib_node_to_user(&tab->fib, fn);
169
170 if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
171 {
172 anything = 1;
173 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
174 return ROA_VALID;
175 }
176 }
177
178 if (n.pxlen == 0)
179 break;
180
181 n.pxlen--;
182 ip6_clrbit(&n.prefix, n.pxlen);
183 }
184
185 return anything ? ROA_INVALID : ROA_UNKNOWN;
186 }
187
188 /**
189 * roa_check - check validity of route origination in a ROA table
190 * @tab: ROA table
191 * @n: network prefix to check
192 * @asn: AS number of network prefix
193 *
194 * Implements RFC 6483 route validation for the given network prefix. The
195 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
196 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
197 * a candidate ROA with matching ASN and maxlen field greater than or equal to
198 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
199 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
200 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
201 * must have type NET_IP4 or NET_IP6, respectively.
202 */
203 int
204 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
205 {
206 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
207 return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
208 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
209 return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
210 else
211 return ROA_UNKNOWN; /* Should not happen */
212 }
213
214 /**
215 * rte_find - find a route
216 * @net: network node
217 * @src: route source
218 *
219 * The rte_find() function returns a route for destination @net
220 * which is from route source @src.
221 */
222 rte *
223 rte_find(net *net, struct rte_src *src)
224 {
225 rte *e = net->routes;
226
227 while (e && e->attrs->src != src)
228 e = e->next;
229 return e;
230 }
231
232 /**
233 * rte_get_temp - get a temporary &rte
234 * @a: attributes to assign to the new route (a &rta; in case it's
235 * un-cached, rte_update() will create a cached copy automatically)
236 *
237 * Create a temporary &rte and bind it with the attributes @a.
238 * Also set route preference to the default preference set for
239 * the protocol.
240 */
241 rte *
242 rte_get_temp(rta *a)
243 {
244 rte *e = sl_alloc(rte_slab);
245
246 e->attrs = a;
247 e->flags = 0;
248 e->pref = 0;
249 return e;
250 }
251
252 rte *
253 rte_do_cow(rte *r)
254 {
255 rte *e = sl_alloc(rte_slab);
256
257 memcpy(e, r, sizeof(rte));
258 e->attrs = rta_clone(r->attrs);
259 e->flags = 0;
260 return e;
261 }
262
263 /**
264 * rte_cow_rta - get a private writable copy of &rte with writable &rta
265 * @r: a route entry to be copied
266 * @lp: a linpool from which to allocate &rta
267 *
268 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
269 * modification. There are three possibilities: First, both &rte and &rta are
270 * private copies, in that case they are returned unchanged. Second, &rte is
271 * private copy, but &rta is cached, in that case &rta is duplicated using
272 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
273 * both structures are duplicated by rte_do_cow() and rta_do_cow().
274 *
275 * Note that in the second case, cached &rta loses one reference, while private
276 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
277 * nexthops, ...) with it. To work properly, original shared &rta should have
278 * another reference during the life of created private copy.
279 *
280 * Result: a pointer to the new writable &rte with writable &rta.
281 */
282 rte *
283 rte_cow_rta(rte *r, linpool *lp)
284 {
285 if (!rta_is_cached(r->attrs))
286 return r;
287
288 rte *e = rte_cow(r);
289 rta *a = rta_do_cow(r->attrs, lp);
290 rta_free(e->attrs);
291 e->attrs = a;
292 return e;
293 }
294
295 static int /* Actually better or at least as good as */
296 rte_better(rte *new, rte *old)
297 {
298 int (*better)(rte *, rte *);
299
300 if (!rte_is_valid(old))
301 return 1;
302 if (!rte_is_valid(new))
303 return 0;
304
305 if (new->pref > old->pref)
306 return 1;
307 if (new->pref < old->pref)
308 return 0;
309 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
310 {
311 /*
312 * If the user has configured protocol preferences, so that two different protocols
313 * have the same preference, try to break the tie by comparing addresses. Not too
314 * useful, but keeps the ordering of routes unambiguous.
315 */
316 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
317 }
318 if (better = new->attrs->src->proto->rte_better)
319 return better(new, old);
320 return 0;
321 }
322
323 static int
324 rte_mergable(rte *pri, rte *sec)
325 {
326 int (*mergable)(rte *, rte *);
327
328 if (!rte_is_valid(pri) || !rte_is_valid(sec))
329 return 0;
330
331 if (pri->pref != sec->pref)
332 return 0;
333
334 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
335 return 0;
336
337 if (mergable = pri->attrs->src->proto->rte_mergable)
338 return mergable(pri, sec);
339
340 return 0;
341 }
342
343 static void
344 rte_trace(struct proto *p, rte *e, int dir, char *msg)
345 {
346 log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
347 }
348
349 static inline void
350 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
351 {
352 if (p->debug & flag)
353 rte_trace(p, e, '>', msg);
354 }
355
356 static inline void
357 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
358 {
359 if (p->debug & flag)
360 rte_trace(p, e, '<', msg);
361 }
362
363 static rte *
364 export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
365 {
366 struct proto *p = c->proto;
367 struct filter *filter = c->out_filter;
368 struct proto_stats *stats = &c->stats;
369 ea_list *tmpb = NULL;
370 rte *rt;
371 int v;
372
373 rt = rt0;
374 *rt_free = NULL;
375
376 if (!tmpa)
377 tmpa = &tmpb;
378
379 *tmpa = make_tmp_attrs(rt, pool);
380
381 v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0;
382 if (v < 0)
383 {
384 if (silent)
385 goto reject;
386
387 stats->exp_updates_rejected++;
388 if (v == RIC_REJECT)
389 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
390 goto reject;
391 }
392 if (v > 0)
393 {
394 if (!silent)
395 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
396 goto accept;
397 }
398
399 v = filter && ((filter == FILTER_REJECT) ||
400 (f_run(filter, &rt, tmpa, pool, FF_FORCE_TMPATTR) > F_ACCEPT));
401 if (v)
402 {
403 if (silent)
404 goto reject;
405
406 stats->exp_updates_filtered++;
407 rte_trace_out(D_FILTERS, p, rt, "filtered out");
408 goto reject;
409 }
410
411 accept:
412 if (rt != rt0)
413 *rt_free = rt;
414 return rt;
415
416 reject:
417 /* Discard temporary rte */
418 if (rt != rt0)
419 rte_free(rt);
420 return NULL;
421 }
422
423 static inline rte *
424 export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
425 {
426 return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent);
427 }
428
429 static void
430 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
431 {
432 struct proto *p = c->proto;
433 struct proto_stats *stats = &c->stats;
434
435
436 /*
437 * First, apply export limit.
438 *
439 * Export route limits has several problems. Because exp_routes
440 * counter is reset before refeed, we don't really know whether
441 * limit is breached and whether the update is new or not. Therefore
442 * the number of really exported routes may exceed the limit
443 * temporarily (routes exported before and new routes in refeed).
444 *
445 * Minor advantage is that if the limit is decreased and refeed is
446 * requested, the number of exported routes really decrease.
447 *
448 * Second problem is that with export limits, we don't know whether
449 * old was really exported (it might be blocked by limit). When a
450 * withdraw is exported, we announce it even when the previous
451 * update was blocked. This is not a big issue, but the same problem
452 * is in updating exp_routes counter. Therefore, to be consistent in
453 * increases and decreases of exp_routes, we count exported routes
454 * regardless of blocking by limits.
455 *
456 * Similar problem is in handling updates - when a new route is
457 * received and blocking is active, the route would be blocked, but
458 * when an update for the route will be received later, the update
459 * would be propagated (as old != NULL). Therefore, we have to block
460 * also non-new updates (contrary to import blocking).
461 */
462
463 struct channel_limit *l = &c->out_limit;
464 if (l->action && new)
465 {
466 if ((!old || refeed) && (stats->exp_routes >= l->limit))
467 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
468
469 if (l->state == PLS_BLOCKED)
470 {
471 stats->exp_routes++; /* see note above */
472 stats->exp_updates_rejected++;
473 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
474 new = NULL;
475
476 if (!old)
477 return;
478 }
479 }
480
481
482 if (new)
483 stats->exp_updates_accepted++;
484 else
485 stats->exp_withdraws_accepted++;
486
487 /* Hack: We do not decrease exp_routes during refeed, we instead
488 reset exp_routes at the start of refeed. */
489 if (new)
490 stats->exp_routes++;
491 if (old && !refeed)
492 stats->exp_routes--;
493
494 if (p->debug & D_ROUTES)
495 {
496 if (new && old)
497 rte_trace_out(D_ROUTES, p, new, "replaced");
498 else if (new)
499 rte_trace_out(D_ROUTES, p, new, "added");
500 else if (old)
501 rte_trace_out(D_ROUTES, p, old, "removed");
502 }
503 if (!new)
504 p->rt_notify(p, c, net, NULL, old, NULL);
505 else if (tmpa)
506 {
507 ea_list *t = tmpa;
508 while (t->next)
509 t = t->next;
510 t->next = new->attrs->eattrs;
511 p->rt_notify(p, c, net, new, old, tmpa);
512 t->next = NULL;
513 }
514 else
515 p->rt_notify(p, c, net, new, old, new->attrs->eattrs);
516 }
517
518 static void
519 rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
520 {
521 struct proto *p = c->proto;
522
523 rte *new = new0;
524 rte *old = old0;
525 rte *new_free = NULL;
526 rte *old_free = NULL;
527 ea_list *tmpa = NULL;
528
529 if (new)
530 c->stats.exp_updates_received++;
531 else
532 c->stats.exp_withdraws_received++;
533
534 /*
535 * This is a tricky part - we don't know whether route 'old' was
536 * exported to protocol 'p' or was filtered by the export filter.
537 * We try to run the export filter to know this to have a correct
538 * value in 'old' argument of rte_update (and proper filter value)
539 *
540 * FIXME - this is broken because 'configure soft' may change
541 * filters but keep routes. Refeed is expected to be called after
542 * change of the filters and with old == new, therefore we do not
543 * even try to run the filter on an old route, This may lead to
544 * 'spurious withdraws' but ensure that there are no 'missing
545 * withdraws'.
546 *
547 * This is not completely safe as there is a window between
548 * reconfiguration and the end of refeed - if a newly filtered
549 * route disappears during this period, proper withdraw is not
550 * sent (because old would be also filtered) and the route is
551 * not refeeded (because it disappeared before that).
552 */
553
554 if (new)
555 new = export_filter(c, new, &new_free, &tmpa, 0);
556
557 if (old && !refeed)
558 old = export_filter(c, old, &old_free, NULL, 1);
559
560 if (!new && !old)
561 {
562 /*
563 * As mentioned above, 'old' value may be incorrect in some race conditions.
564 * We generally ignore it with the exception of withdraw to pipe protocol.
565 * In that case we rather propagate unfiltered withdraws regardless of
566 * export filters to ensure that when a protocol is flushed, its routes are
567 * removed from all tables. Possible spurious unfiltered withdraws are not
568 * problem here as they are ignored if there is no corresponding route at
569 * the other end of the pipe. We directly call rt_notify() hook instead of
570 * do_rt_notify() to avoid logging and stat counters.
571 */
572
573 #ifdef CONFIG_PIPE
574 if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
575 p->rt_notify(p, c, net, NULL, old0, NULL);
576 #endif
577
578 return;
579 }
580
581 do_rt_notify(c, net, new, old, tmpa, refeed);
582
583 /* Discard temporary rte's */
584 if (new_free)
585 rte_free(new_free);
586 if (old_free)
587 rte_free(old_free);
588 }
589
590 static void
591 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
592 {
593 // struct proto *p = c->proto;
594
595 rte *r;
596 rte *new_best = NULL;
597 rte *old_best = NULL;
598 rte *new_free = NULL;
599 rte *old_free = NULL;
600 ea_list *tmpa = NULL;
601
602 /* Used to track whether we met old_changed position. If before_old is NULL
603 old_changed was the first and we met it implicitly before current best route. */
604 int old_meet = old_changed && !before_old;
605
606 /* Note that before_old is either NULL or valid (not rejected) route.
607 If old_changed is valid, before_old have to be too. If old changed route
608 was not valid, caller must use NULL for both old_changed and before_old. */
609
610 if (new_changed)
611 c->stats.exp_updates_received++;
612 else
613 c->stats.exp_withdraws_received++;
614
615 /* First, find the new_best route - first accepted by filters */
616 for (r=net->routes; rte_is_valid(r); r=r->next)
617 {
618 if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
619 break;
620
621 /* Note if we walked around the position of old_changed route */
622 if (r == before_old)
623 old_meet = 1;
624 }
625
626 /*
627 * Second, handle the feed case. That means we do not care for
628 * old_best. It is NULL for feed, and the new_best for refeed.
629 * For refeed, there is a hack similar to one in rt_notify_basic()
630 * to ensure withdraws in case of changed filters
631 */
632 if (feed)
633 {
634 if (feed == 2) /* refeed */
635 old_best = new_best ? new_best :
636 (rte_is_valid(net->routes) ? net->routes : NULL);
637 else
638 old_best = NULL;
639
640 if (!new_best && !old_best)
641 return;
642
643 goto found;
644 }
645
646 /*
647 * Now, we find the old_best route. Generally, it is the same as the
648 * new_best, unless new_best is the same as new_changed or
649 * old_changed is accepted before new_best.
650 *
651 * There are four cases:
652 *
653 * - We would find and accept old_changed before new_best, therefore
654 * old_changed is old_best. In remaining cases we suppose this
655 * is not true.
656 *
657 * - We found no new_best, therefore there is also no old_best and
658 * we ignore this withdraw.
659 *
660 * - We found new_best different than new_changed, therefore
661 * old_best is the same as new_best and we ignore this update.
662 *
663 * - We found new_best the same as new_changed, therefore it cannot
664 * be old_best and we have to continue search for old_best.
665 */
666
667 /* First case */
668 if (old_meet)
669 if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
670 goto found;
671
672 /* Second case */
673 if (!new_best)
674 return;
675
676 /* Third case, we use r instead of new_best, because export_filter() could change it */
677 if (r != new_changed)
678 {
679 if (new_free)
680 rte_free(new_free);
681 return;
682 }
683
684 /* Fourth case */
685 for (r=r->next; rte_is_valid(r); r=r->next)
686 {
687 if (old_best = export_filter(c, r, &old_free, NULL, 1))
688 goto found;
689
690 if (r == before_old)
691 if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
692 goto found;
693 }
694
695 /* Implicitly, old_best is NULL and new_best is non-NULL */
696
697 found:
698 do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
699
700 /* Discard temporary rte's */
701 if (new_free)
702 rte_free(new_free);
703 if (old_free)
704 rte_free(old_free);
705 }
706
707
708 static struct nexthop *
709 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
710 {
711 return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
712 }
713
714 rte *
715 rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
716 {
717 // struct proto *p = c->proto;
718 struct nexthop *nhs = NULL;
719 rte *best0, *best, *rt0, *rt, *tmp;
720
721 best0 = net->routes;
722 *rt_free = NULL;
723
724 if (!rte_is_valid(best0))
725 return NULL;
726
727 best = export_filter_(c, best0, rt_free, tmpa, pool, silent);
728
729 if (!best || !rte_is_reachable(best))
730 return best;
731
732 for (rt0 = best0->next; rt0; rt0 = rt0->next)
733 {
734 if (!rte_mergable(best0, rt0))
735 continue;
736
737 rt = export_filter_(c, rt0, &tmp, NULL, pool, 1);
738
739 if (!rt)
740 continue;
741
742 if (rte_is_reachable(rt))
743 nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
744
745 if (tmp)
746 rte_free(tmp);
747 }
748
749 if (nhs)
750 {
751 nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
752
753 if (nhs->next)
754 {
755 best = rte_cow_rta(best, pool);
756 nexthop_link(best->attrs, nhs);
757 }
758 }
759
760 if (best != best0)
761 *rt_free = best;
762
763 return best;
764 }
765
766
767 static void
768 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
769 rte *new_best, rte*old_best, int refeed)
770 {
771 // struct proto *p = c->proto;
772
773 rte *new_best_free = NULL;
774 rte *old_best_free = NULL;
775 rte *new_changed_free = NULL;
776 rte *old_changed_free = NULL;
777 ea_list *tmpa = NULL;
778
779 /* We assume that all rte arguments are either NULL or rte_is_valid() */
780
781 /* This check should be done by the caller */
782 if (!new_best && !old_best)
783 return;
784
785 /* Check whether the change is relevant to the merged route */
786 if ((new_best == old_best) && !refeed)
787 {
788 new_changed = rte_mergable(new_best, new_changed) ?
789 export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
790
791 old_changed = rte_mergable(old_best, old_changed) ?
792 export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
793
794 if (!new_changed && !old_changed)
795 return;
796 }
797
798 if (new_best)
799 c->stats.exp_updates_received++;
800 else
801 c->stats.exp_withdraws_received++;
802
803 /* Prepare new merged route */
804 if (new_best)
805 new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0);
806
807 /* Prepare old merged route (without proper merged next hops) */
808 /* There are some issues with running filter on old route - see rt_notify_basic() */
809 if (old_best && !refeed)
810 old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
811
812 if (new_best || old_best)
813 do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
814
815 /* Discard temporary rte's */
816 if (new_best_free)
817 rte_free(new_best_free);
818 if (old_best_free)
819 rte_free(old_best_free);
820 if (new_changed_free)
821 rte_free(new_changed_free);
822 if (old_changed_free)
823 rte_free(old_changed_free);
824 }
825
826
827 /**
828 * rte_announce - announce a routing table change
829 * @tab: table the route has been added to
830 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
831 * @net: network in question
832 * @new: the new route to be announced
833 * @old: the previous route for the same network
834 * @new_best: the new best route for the same network
835 * @old_best: the previous best route for the same network
836 * @before_old: The previous route before @old for the same network.
837 * If @before_old is NULL @old was the first.
838 *
839 * This function gets a routing table update and announces it
840 * to all protocols that acccepts given type of route announcement
841 * and are connected to the same table by their announcement hooks.
842 *
843 * Route announcement of type %RA_OPTIMAL si generated when optimal
844 * route (in routing table @tab) changes. In that case @old stores the
845 * old optimal route.
846 *
847 * Route announcement of type %RA_ANY si generated when any route (in
848 * routing table @tab) changes In that case @old stores the old route
849 * from the same protocol.
850 *
851 * For each appropriate protocol, we first call its import_control()
852 * hook which performs basic checks on the route (each protocol has a
853 * right to veto or force accept of the route before any filter is
854 * asked) and adds default values of attributes specific to the new
855 * protocol (metrics, tags etc.). Then it consults the protocol's
856 * export filter and if it accepts the route, the rt_notify() hook of
857 * the protocol gets called.
858 */
859 static void
860 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
861 rte *new_best, rte *old_best, rte *before_old)
862 {
863 if (!rte_is_valid(new))
864 new = NULL;
865
866 if (!rte_is_valid(old))
867 old = before_old = NULL;
868
869 if (!rte_is_valid(new_best))
870 new_best = NULL;
871
872 if (!rte_is_valid(old_best))
873 old_best = NULL;
874
875 if (!old && !new)
876 return;
877
878 if ((type == RA_OPTIMAL) && tab->hostcache)
879 rt_notify_hostcache(tab, net);
880
881 struct channel *c; node *n;
882 WALK_LIST2(c, n, tab->channels, table_node)
883 {
884 if (c->export_state == ES_DOWN)
885 continue;
886
887 if (c->ra_mode == type)
888 if (type == RA_ACCEPTED)
889 rt_notify_accepted(c, net, new, old, before_old, 0);
890 else if (type == RA_MERGED)
891 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
892 else
893 rt_notify_basic(c, net, new, old, 0);
894 }
895 }
896
897 static inline int
898 rte_validate(rte *e)
899 {
900 int c;
901 net *n = e->net;
902
903 if (!net_validate(n->n.addr))
904 {
905 log(L_WARN "Ignoring bogus prefix %N received via %s",
906 n->n.addr, e->sender->proto->name);
907 return 0;
908 }
909
910 c = net_classify(n->n.addr);
911 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
912 {
913 log(L_WARN "Ignoring bogus route %N received via %s",
914 n->n.addr, e->sender->proto->name);
915 return 0;
916 }
917
918 if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
919 {
920 log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
921 n->n.addr, e->attrs->dest, e->sender->proto->name);
922 return 0;
923 }
924
925 if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
926 {
927 log(L_WARN "Ignoring unsorted multipath route %N received via %s",
928 n->n.addr, e->sender->proto->name);
929 return 0;
930 }
931
932 return 1;
933 }
934
935 /**
936 * rte_free - delete a &rte
937 * @e: &rte to be deleted
938 *
939 * rte_free() deletes the given &rte from the routing table it's linked to.
940 */
941 void
942 rte_free(rte *e)
943 {
944 if (rta_is_cached(e->attrs))
945 rta_free(e->attrs);
946 sl_free(rte_slab, e);
947 }
948
949 static inline void
950 rte_free_quick(rte *e)
951 {
952 rta_free(e->attrs);
953 sl_free(rte_slab, e);
954 }
955
956 static int
957 rte_same(rte *x, rte *y)
958 {
959 return
960 x->attrs == y->attrs &&
961 x->flags == y->flags &&
962 x->pflags == y->pflags &&
963 x->pref == y->pref &&
964 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
965 }
966
967 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
968
969 static void
970 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
971 {
972 struct proto *p = c->proto;
973 struct rtable *table = c->table;
974 struct proto_stats *stats = &c->stats;
975 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
976 rte *before_old = NULL;
977 rte *old_best = net->routes;
978 rte *old = NULL;
979 rte **k;
980
981 k = &net->routes; /* Find and remove original route from the same protocol */
982 while (old = *k)
983 {
984 if (old->attrs->src == src)
985 {
986 /* If there is the same route in the routing table but from
987 * a different sender, then there are two paths from the
988 * source protocol to this routing table through transparent
989 * pipes, which is not allowed.
990 *
991 * We log that and ignore the route. If it is withdraw, we
992 * ignore it completely (there might be 'spurious withdraws',
993 * see FIXME in do_rte_announce())
994 */
995 if (old->sender->proto != p)
996 {
997 if (new)
998 {
999 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1000 net->n.addr, table->name);
1001 rte_free_quick(new);
1002 }
1003 return;
1004 }
1005
1006 if (new && rte_same(old, new))
1007 {
1008 /* No changes, ignore the new route */
1009
1010 if (!rte_is_filtered(new))
1011 {
1012 stats->imp_updates_ignored++;
1013 rte_trace_in(D_ROUTES, p, new, "ignored");
1014 }
1015
1016 rte_free_quick(new);
1017 return;
1018 }
1019 *k = old->next;
1020 break;
1021 }
1022 k = &old->next;
1023 before_old = old;
1024 }
1025
1026 if (!old)
1027 before_old = NULL;
1028
1029 if (!old && !new)
1030 {
1031 stats->imp_withdraws_ignored++;
1032 return;
1033 }
1034
1035 int new_ok = rte_is_ok(new);
1036 int old_ok = rte_is_ok(old);
1037
1038 struct channel_limit *l = &c->rx_limit;
1039 if (l->action && !old && new)
1040 {
1041 u32 all_routes = stats->imp_routes + stats->filt_routes;
1042
1043 if (all_routes >= l->limit)
1044 channel_notify_limit(c, l, PLD_RX, all_routes);
1045
1046 if (l->state == PLS_BLOCKED)
1047 {
1048 /* In receive limit the situation is simple, old is NULL so
1049 we just free new and exit like nothing happened */
1050
1051 stats->imp_updates_ignored++;
1052 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1053 rte_free_quick(new);
1054 return;
1055 }
1056 }
1057
1058 l = &c->in_limit;
1059 if (l->action && !old_ok && new_ok)
1060 {
1061 if (stats->imp_routes >= l->limit)
1062 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1063
1064 if (l->state == PLS_BLOCKED)
1065 {
1066 /* In import limit the situation is more complicated. We
1067 shouldn't just drop the route, we should handle it like
1068 it was filtered. We also have to continue the route
1069 processing if old or new is non-NULL, but we should exit
1070 if both are NULL as this case is probably assumed to be
1071 already handled. */
1072
1073 stats->imp_updates_ignored++;
1074 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1075
1076 if (c->in_keep_filtered)
1077 new->flags |= REF_FILTERED;
1078 else
1079 { rte_free_quick(new); new = NULL; }
1080
1081 /* Note that old && !new could be possible when
1082 c->in_keep_filtered changed in the recent past. */
1083
1084 if (!old && !new)
1085 return;
1086
1087 new_ok = 0;
1088 goto skip_stats1;
1089 }
1090 }
1091
1092 if (new_ok)
1093 stats->imp_updates_accepted++;
1094 else if (old_ok)
1095 stats->imp_withdraws_accepted++;
1096 else
1097 stats->imp_withdraws_ignored++;
1098
1099 skip_stats1:
1100
1101 if (new)
1102 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1103 if (old)
1104 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1105
1106 if (table->config->sorted)
1107 {
1108 /* If routes are sorted, just insert new route to appropriate position */
1109 if (new)
1110 {
1111 if (before_old && !rte_better(new, before_old))
1112 k = &before_old->next;
1113 else
1114 k = &net->routes;
1115
1116 for (; *k; k=&(*k)->next)
1117 if (rte_better(new, *k))
1118 break;
1119
1120 new->next = *k;
1121 *k = new;
1122 }
1123 }
1124 else
1125 {
1126 /* If routes are not sorted, find the best route and move it on
1127 the first position. There are several optimized cases. */
1128
1129 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1130 goto do_recalculate;
1131
1132 if (new && rte_better(new, old_best))
1133 {
1134 /* The first case - the new route is cleary optimal,
1135 we link it at the first position */
1136
1137 new->next = net->routes;
1138 net->routes = new;
1139 }
1140 else if (old == old_best)
1141 {
1142 /* The second case - the old best route disappeared, we add the
1143 new route (if we have any) to the list (we don't care about
1144 position) and then we elect the new optimal route and relink
1145 that route at the first position and announce it. New optimal
1146 route might be NULL if there is no more routes */
1147
1148 do_recalculate:
1149 /* Add the new route to the list */
1150 if (new)
1151 {
1152 new->next = net->routes;
1153 net->routes = new;
1154 }
1155
1156 /* Find a new optimal route (if there is any) */
1157 if (net->routes)
1158 {
1159 rte **bp = &net->routes;
1160 for (k=&(*bp)->next; *k; k=&(*k)->next)
1161 if (rte_better(*k, *bp))
1162 bp = k;
1163
1164 /* And relink it */
1165 rte *best = *bp;
1166 *bp = best->next;
1167 best->next = net->routes;
1168 net->routes = best;
1169 }
1170 }
1171 else if (new)
1172 {
1173 /* The third case - the new route is not better than the old
1174 best route (therefore old_best != NULL) and the old best
1175 route was not removed (therefore old_best == net->routes).
1176 We just link the new route after the old best route. */
1177
1178 ASSERT(net->routes != NULL);
1179 new->next = net->routes->next;
1180 net->routes->next = new;
1181 }
1182 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1183 }
1184
1185 if (new)
1186 new->lastmod = now;
1187
1188 /* Log the route change */
1189 if (p->debug & D_ROUTES)
1190 {
1191 if (new_ok)
1192 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1193 else if (old_ok)
1194 {
1195 if (old != old_best)
1196 rte_trace(p, old, '>', "removed");
1197 else if (rte_is_ok(net->routes))
1198 rte_trace(p, old, '>', "removed [replaced]");
1199 else
1200 rte_trace(p, old, '>', "removed [sole]");
1201 }
1202 }
1203
1204 /* Propagate the route change */
1205 rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1206 if (net->routes != old_best)
1207 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1208 if (table->config->sorted)
1209 rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1210 rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1211
1212 if (!net->routes &&
1213 (table->gc_counter++ >= table->config->gc_max_ops) &&
1214 (table->gc_time + table->config->gc_min_time <= now))
1215 rt_schedule_prune(table);
1216
1217 if (old_ok && p->rte_remove)
1218 p->rte_remove(net, old);
1219 if (new_ok && p->rte_insert)
1220 p->rte_insert(net, new);
1221
1222 if (old)
1223 rte_free_quick(old);
1224 }
1225
1226 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1227
1228 static inline void
1229 rte_update_lock(void)
1230 {
1231 rte_update_nest_cnt++;
1232 }
1233
1234 static inline void
1235 rte_update_unlock(void)
1236 {
1237 if (!--rte_update_nest_cnt)
1238 lp_flush(rte_update_pool);
1239 }
1240
1241 static inline void
1242 rte_hide_dummy_routes(net *net, rte **dummy)
1243 {
1244 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1245 {
1246 *dummy = net->routes;
1247 net->routes = (*dummy)->next;
1248 }
1249 }
1250
1251 static inline void
1252 rte_unhide_dummy_routes(net *net, rte **dummy)
1253 {
1254 if (*dummy)
1255 {
1256 (*dummy)->next = net->routes;
1257 net->routes = *dummy;
1258 }
1259 }
1260
1261 /**
1262 * rte_update - enter a new update to a routing table
1263 * @table: table to be updated
1264 * @c: channel doing the update
1265 * @net: network node
1266 * @p: protocol submitting the update
1267 * @src: protocol originating the update
1268 * @new: a &rte representing the new route or %NULL for route removal.
1269 *
1270 * This function is called by the routing protocols whenever they discover
1271 * a new route or wish to update/remove an existing route. The right announcement
1272 * sequence is to build route attributes first (either un-cached with @aflags set
1273 * to zero or a cached one using rta_lookup(); in this case please note that
1274 * you need to increase the use count of the attributes yourself by calling
1275 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1276 * the appropriate data and finally submit the new &rte by calling rte_update().
1277 *
1278 * @src specifies the protocol that originally created the route and the meaning
1279 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1280 * same value as @new->attrs->proto. @p specifies the protocol that called
1281 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1282 * stores @p in @new->sender;
1283 *
1284 * When rte_update() gets any route, it automatically validates it (checks,
1285 * whether the network and next hop address are valid IP addresses and also
1286 * whether a normal routing protocol doesn't try to smuggle a host or link
1287 * scope route to the table), converts all protocol dependent attributes stored
1288 * in the &rte to temporary extended attributes, consults import filters of the
1289 * protocol to see if the route should be accepted and/or its attributes modified,
1290 * stores the temporary attributes back to the &rte.
1291 *
1292 * Now, having a "public" version of the route, we
1293 * automatically find any old route defined by the protocol @src
1294 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1295 * recalculate the optimal route for this destination and finally broadcast
1296 * the change (if any) to all routing protocols by calling rte_announce().
1297 *
1298 * All memory used for attribute lists and other temporary allocations is taken
1299 * from a special linear pool @rte_update_pool and freed when rte_update()
1300 * finishes.
1301 */
1302
1303 void
1304 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1305 {
1306 struct proto *p = c->proto;
1307 struct proto_stats *stats = &c->stats;
1308 struct filter *filter = c->in_filter;
1309 ea_list *tmpa = NULL;
1310 rte *dummy = NULL;
1311 net *nn;
1312
1313 ASSERT(c->channel_state == CS_UP);
1314
1315 rte_update_lock();
1316 if (new)
1317 {
1318 nn = net_get(c->table, n);
1319
1320 new->net = nn;
1321 new->sender = c;
1322
1323 if (!new->pref)
1324 new->pref = c->preference;
1325
1326 stats->imp_updates_received++;
1327 if (!rte_validate(new))
1328 {
1329 rte_trace_in(D_FILTERS, p, new, "invalid");
1330 stats->imp_updates_invalid++;
1331 goto drop;
1332 }
1333
1334 if (filter == FILTER_REJECT)
1335 {
1336 stats->imp_updates_filtered++;
1337 rte_trace_in(D_FILTERS, p, new, "filtered out");
1338
1339 if (! c->in_keep_filtered)
1340 goto drop;
1341
1342 /* new is a private copy, i could modify it */
1343 new->flags |= REF_FILTERED;
1344 }
1345 else
1346 {
1347 tmpa = make_tmp_attrs(new, rte_update_pool);
1348 if (filter && (filter != FILTER_REJECT))
1349 {
1350 ea_list *old_tmpa = tmpa;
1351 int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1352 if (fr > F_ACCEPT)
1353 {
1354 stats->imp_updates_filtered++;
1355 rte_trace_in(D_FILTERS, p, new, "filtered out");
1356
1357 if (! c->in_keep_filtered)
1358 goto drop;
1359
1360 new->flags |= REF_FILTERED;
1361 }
1362 if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1363 src->proto->store_tmp_attrs(new, tmpa);
1364 }
1365 }
1366 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1367 new->attrs = rta_lookup(new->attrs);
1368 new->flags |= REF_COW;
1369 }
1370 else
1371 {
1372 stats->imp_withdraws_received++;
1373
1374 if (!(nn = net_find(c->table, n)) || !src)
1375 {
1376 stats->imp_withdraws_ignored++;
1377 rte_update_unlock();
1378 return;
1379 }
1380 }
1381
1382 recalc:
1383 rte_hide_dummy_routes(nn, &dummy);
1384 rte_recalculate(c, nn, new, src);
1385 rte_unhide_dummy_routes(nn, &dummy);
1386 rte_update_unlock();
1387 return;
1388
1389 drop:
1390 rte_free(new);
1391 new = NULL;
1392 goto recalc;
1393 }
1394
1395 /* Independent call to rte_announce(), used from next hop
1396 recalculation, outside of rte_update(). new must be non-NULL */
1397 static inline void
1398 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1399 rte *new_best, rte *old_best)
1400 {
1401 rte_update_lock();
1402 rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1403 rte_update_unlock();
1404 }
1405
1406 static inline void
1407 rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */
1408 {
1409 rte_update_lock();
1410 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1411 rte_update_unlock();
1412 }
1413
1414 /* Check rtable for best route to given net whether it would be exported do p */
1415 int
1416 rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1417 {
1418 net *n = net_find(t, a);
1419 rte *rt = n ? n->routes : NULL;
1420
1421 if (!rte_is_valid(rt))
1422 return 0;
1423
1424 rte_update_lock();
1425
1426 /* Rest is stripped down export_filter() */
1427 ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1428 int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1429 if (v == RIC_PROCESS)
1430 v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1431
1432 /* Discard temporary rte */
1433 if (rt != n->routes)
1434 rte_free(rt);
1435
1436 rte_update_unlock();
1437
1438 return v > 0;
1439 }
1440
1441
1442 /**
1443 * rt_refresh_begin - start a refresh cycle
1444 * @t: related routing table
1445 * @c related channel
1446 *
1447 * This function starts a refresh cycle for given routing table and announce
1448 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1449 * routes to the routing table (by rte_update()). After that, all protocol
1450 * routes (more precisely routes with @c as @sender) not sent during the
1451 * refresh cycle but still in the table from the past are pruned. This is
1452 * implemented by marking all related routes as stale by REF_STALE flag in
1453 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1454 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1455 */
1456 void
1457 rt_refresh_begin(rtable *t, struct channel *c)
1458 {
1459 FIB_WALK(&t->fib, net, n)
1460 {
1461 rte *e;
1462 for (e = n->routes; e; e = e->next)
1463 if (e->sender == c)
1464 e->flags |= REF_STALE;
1465 }
1466 FIB_WALK_END;
1467 }
1468
1469 /**
1470 * rt_refresh_end - end a refresh cycle
1471 * @t: related routing table
1472 * @c: related channel
1473 *
1474 * This function ends a refresh cycle for given routing table and announce
1475 * hook. See rt_refresh_begin() for description of refresh cycles.
1476 */
1477 void
1478 rt_refresh_end(rtable *t, struct channel *c)
1479 {
1480 int prune = 0;
1481
1482 FIB_WALK(&t->fib, net, n)
1483 {
1484 rte *e;
1485 for (e = n->routes; e; e = e->next)
1486 if ((e->sender == c) && (e->flags & REF_STALE))
1487 {
1488 e->flags |= REF_DISCARD;
1489 prune = 1;
1490 }
1491 }
1492 FIB_WALK_END;
1493
1494 if (prune)
1495 rt_schedule_prune(t);
1496 }
1497
1498
1499 /**
1500 * rte_dump - dump a route
1501 * @e: &rte to be dumped
1502 *
1503 * This functions dumps contents of a &rte to debug output.
1504 */
1505 void
1506 rte_dump(rte *e)
1507 {
1508 net *n = e->net;
1509 debug("%-1N ", n->n.addr);
1510 debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1511 rta_dump(e->attrs);
1512 if (e->attrs->src->proto->proto->dump_attrs)
1513 e->attrs->src->proto->proto->dump_attrs(e);
1514 debug("\n");
1515 }
1516
1517 /**
1518 * rt_dump - dump a routing table
1519 * @t: routing table to be dumped
1520 *
1521 * This function dumps contents of a given routing table to debug output.
1522 */
1523 void
1524 rt_dump(rtable *t)
1525 {
1526 debug("Dump of routing table <%s>\n", t->name);
1527 #ifdef DEBUGGING
1528 fib_check(&t->fib);
1529 #endif
1530 FIB_WALK(&t->fib, net, n)
1531 {
1532 rte *e;
1533 for(e=n->routes; e; e=e->next)
1534 rte_dump(e);
1535 }
1536 FIB_WALK_END;
1537 debug("\n");
1538 }
1539
1540 /**
1541 * rt_dump_all - dump all routing tables
1542 *
1543 * This function dumps contents of all routing tables to debug output.
1544 */
1545 void
1546 rt_dump_all(void)
1547 {
1548 rtable *t;
1549
1550 WALK_LIST(t, routing_tables)
1551 rt_dump(t);
1552 }
1553
1554 static inline void
1555 rt_schedule_hcu(rtable *tab)
1556 {
1557 if (tab->hcu_scheduled)
1558 return;
1559
1560 tab->hcu_scheduled = 1;
1561 ev_schedule(tab->rt_event);
1562 }
1563
1564 static inline void
1565 rt_schedule_nhu(rtable *tab)
1566 {
1567 if (tab->nhu_state == NHU_CLEAN)
1568 ev_schedule(tab->rt_event);
1569
1570 /* state change:
1571 * NHU_CLEAN -> NHU_SCHEDULED
1572 * NHU_RUNNING -> NHU_DIRTY
1573 */
1574 tab->nhu_state |= NHU_SCHEDULED;
1575 }
1576
1577 void
1578 rt_schedule_prune(rtable *tab)
1579 {
1580 if (tab->prune_state == 0)
1581 ev_schedule(tab->rt_event);
1582
1583 /* state change 0->1, 2->3 */
1584 tab->prune_state |= 1;
1585 }
1586
1587
1588 static void
1589 rt_event(void *ptr)
1590 {
1591 rtable *tab = ptr;
1592
1593 rt_lock_table(tab);
1594
1595 if (tab->hcu_scheduled)
1596 rt_update_hostcache(tab);
1597
1598 if (tab->nhu_state)
1599 rt_next_hop_update(tab);
1600
1601 if (tab->prune_state)
1602 rt_prune_table(tab);
1603
1604 rt_unlock_table(tab);
1605 }
1606
1607 void
1608 rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1609 {
1610 bzero(t, sizeof(*t));
1611 t->name = name;
1612 t->config = cf;
1613 t->addr_type = cf ? cf->addr_type : NET_IP4;
1614 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1615 init_list(&t->channels);
1616
1617 if (cf)
1618 {
1619 t->rt_event = ev_new(p);
1620 t->rt_event->hook = rt_event;
1621 t->rt_event->data = t;
1622 t->gc_time = now;
1623 }
1624 }
1625
1626 /**
1627 * rt_init - initialize routing tables
1628 *
1629 * This function is called during BIRD startup. It initializes the
1630 * routing table module.
1631 */
1632 void
1633 rt_init(void)
1634 {
1635 rta_init();
1636 rt_table_pool = rp_new(&root_pool, "Routing tables");
1637 rte_update_pool = lp_new(rt_table_pool, 4080);
1638 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1639 init_list(&routing_tables);
1640 }
1641
1642
1643 /**
1644 * rt_prune_table - prune a routing table
1645 *
1646 * The prune loop scans routing tables and removes routes belonging to flushing
1647 * protocols, discarded routes and also stale network entries. It is called from
1648 * rt_event(). The event is rescheduled if the current iteration do not finish
1649 * the table. The pruning is directed by the prune state (@prune_state),
1650 * specifying whether the prune cycle is scheduled or running, and there
1651 * is also a persistent pruning iterator (@prune_fit).
1652 *
1653 * The prune loop is used also for channel flushing. For this purpose, the
1654 * channels to flush are marked before the iteration and notified after the
1655 * iteration.
1656 */
1657 static void
1658 rt_prune_table(rtable *tab)
1659 {
1660 struct fib_iterator *fit = &tab->prune_fit;
1661 int limit = 512;
1662
1663 struct channel *c;
1664 node *n, *x;
1665
1666 DBG("Pruning route table %s\n", tab->name);
1667 #ifdef DEBUGGING
1668 fib_check(&tab->fib);
1669 #endif
1670
1671 if (tab->prune_state == 0)
1672 return;
1673
1674 if (tab->prune_state == 1)
1675 {
1676 /* Mark channels to flush */
1677 WALK_LIST2(c, n, tab->channels, table_node)
1678 if (c->channel_state == CS_FLUSHING)
1679 c->flush_active = 1;
1680
1681 FIB_ITERATE_INIT(fit, &tab->fib);
1682 tab->prune_state = 2;
1683 }
1684
1685 again:
1686 FIB_ITERATE_START(&tab->fib, fit, net, n)
1687 {
1688 rte *e;
1689
1690 rescan:
1691 for (e=n->routes; e; e=e->next)
1692 if (e->sender->flush_active || (e->flags & REF_DISCARD))
1693 {
1694 if (limit <= 0)
1695 {
1696 FIB_ITERATE_PUT(fit);
1697 ev_schedule(tab->rt_event);
1698 return;
1699 }
1700
1701 rte_discard(e);
1702 limit--;
1703
1704 goto rescan;
1705 }
1706
1707 if (!n->routes) /* Orphaned FIB entry */
1708 {
1709 FIB_ITERATE_PUT(fit);
1710 fib_delete(&tab->fib, n);
1711 goto again;
1712 }
1713 }
1714 FIB_ITERATE_END;
1715
1716 #ifdef DEBUGGING
1717 fib_check(&tab->fib);
1718 #endif
1719
1720 tab->gc_counter = 0;
1721 tab->gc_time = now;
1722
1723 /* state change 2->0, 3->1 */
1724 tab->prune_state &= 1;
1725
1726 if (tab->prune_state > 0)
1727 ev_schedule(tab->rt_event);
1728
1729 /* FIXME: This should be handled in a better way */
1730 rt_prune_sources();
1731
1732 /* Close flushed channels */
1733 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1734 if (c->flush_active)
1735 {
1736 c->flush_active = 0;
1737 channel_set_state(c, CS_DOWN);
1738 }
1739
1740 return;
1741 }
1742
1743 void
1744 rt_preconfig(struct config *c)
1745 {
1746 init_list(&c->tables);
1747
1748 rt_new_table(cf_get_symbol("master4"), NET_IP4);
1749 rt_new_table(cf_get_symbol("master6"), NET_IP6);
1750 }
1751
1752
1753 /*
1754 * Some functions for handing internal next hop updates
1755 * triggered by rt_schedule_nhu().
1756 */
1757
1758 static inline int
1759 rta_next_hop_outdated(rta *a)
1760 {
1761 struct hostentry *he = a->hostentry;
1762
1763 if (!he)
1764 return 0;
1765
1766 if (!he->src)
1767 return a->dest != RTD_UNREACHABLE;
1768
1769 return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1770 (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
1771 }
1772
1773 void
1774 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
1775 {
1776 a->hostentry = he;
1777 a->dest = he->dest;
1778 a->igp_metric = he->igp_metric;
1779
1780 if (a->dest != RTD_UNICAST)
1781 {
1782 /* No nexthop */
1783 no_nexthop:
1784 a->nh = (struct nexthop) {};
1785 if (mls)
1786 { /* Store the label stack for later changes */
1787 a->nh.labels_orig = a->nh.labels = mls->len;
1788 memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1789 }
1790 return;
1791 }
1792
1793 if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1794 { /* Just link the nexthop chain, no label append happens. */
1795 memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1796 return;
1797 }
1798
1799 struct nexthop *nhp = NULL, *nhr = NULL;
1800 int skip_nexthop = 0;
1801
1802 for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
1803 {
1804 if (skip_nexthop)
1805 skip_nexthop--;
1806 else
1807 {
1808 nhr = nhp;
1809 nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
1810 }
1811
1812 nhp->iface = nh->iface;
1813 nhp->weight = nh->weight;
1814 if (mls)
1815 {
1816 nhp->labels = nh->labels + mls->len;
1817 nhp->labels_orig = mls->len;
1818 if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1819 {
1820 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
1821 memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
1822 }
1823 else
1824 {
1825 log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1826 nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
1827 skip_nexthop++;
1828 continue;
1829 }
1830 }
1831 if (ipa_nonzero(nh->gw))
1832 nhp->gw = nh->gw; /* Router nexthop */
1833 else if (ipa_nonzero(he->link))
1834 nhp->gw = he->link; /* Device nexthop with link-local address known */
1835 else
1836 nhp->gw = he->addr; /* Device nexthop with link-local address unknown */
1837 }
1838
1839 if (skip_nexthop)
1840 if (nhr)
1841 nhr->next = NULL;
1842 else
1843 {
1844 a->dest = RTD_UNREACHABLE;
1845 log(L_WARN "No valid nexthop remaining, setting route unreachable");
1846 goto no_nexthop;
1847 }
1848 }
1849
1850 static inline rte *
1851 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1852 {
1853 rta *a = alloca(RTA_MAX_SIZE);
1854 memcpy(a, old->attrs, rta_size(old->attrs));
1855
1856 mpls_label_stack mls = { .len = a->nh.labels_orig };
1857 memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
1858
1859 rta_apply_hostentry(a, old->attrs->hostentry, &mls);
1860 a->aflags = 0;
1861
1862 rte *e = sl_alloc(rte_slab);
1863 memcpy(e, old, sizeof(rte));
1864 e->attrs = rta_lookup(a);
1865
1866 return e;
1867 }
1868
1869 static inline int
1870 rt_next_hop_update_net(rtable *tab, net *n)
1871 {
1872 rte **k, *e, *new, *old_best, **new_best;
1873 int count = 0;
1874 int free_old_best = 0;
1875
1876 old_best = n->routes;
1877 if (!old_best)
1878 return 0;
1879
1880 for (k = &n->routes; e = *k; k = &e->next)
1881 if (rta_next_hop_outdated(e->attrs))
1882 {
1883 new = rt_next_hop_update_rte(tab, e);
1884 *k = new;
1885
1886 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1887 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1888
1889 /* Call a pre-comparison hook */
1890 /* Not really an efficient way to compute this */
1891 if (e->attrs->src->proto->rte_recalculate)
1892 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1893
1894 if (e != old_best)
1895 rte_free_quick(e);
1896 else /* Freeing of the old best rte is postponed */
1897 free_old_best = 1;
1898
1899 e = new;
1900 count++;
1901 }
1902
1903 if (!count)
1904 return 0;
1905
1906 /* Find the new best route */
1907 new_best = NULL;
1908 for (k = &n->routes; e = *k; k = &e->next)
1909 {
1910 if (!new_best || rte_better(e, *new_best))
1911 new_best = k;
1912 }
1913
1914 /* Relink the new best route to the first position */
1915 new = *new_best;
1916 if (new != n->routes)
1917 {
1918 *new_best = new->next;
1919 new->next = n->routes;
1920 n->routes = new;
1921 }
1922
1923 /* Announce the new best route */
1924 if (new != old_best)
1925 {
1926 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1927 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1928 }
1929
1930 /* FIXME: Better announcement of merged routes */
1931 rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1932
1933 if (free_old_best)
1934 rte_free_quick(old_best);
1935
1936 return count;
1937 }
1938
1939 static void
1940 rt_next_hop_update(rtable *tab)
1941 {
1942 struct fib_iterator *fit = &tab->nhu_fit;
1943 int max_feed = 32;
1944
1945 if (tab->nhu_state == NHU_CLEAN)
1946 return;
1947
1948 if (tab->nhu_state == NHU_SCHEDULED)
1949 {
1950 FIB_ITERATE_INIT(fit, &tab->fib);
1951 tab->nhu_state = NHU_RUNNING;
1952 }
1953
1954 FIB_ITERATE_START(&tab->fib, fit, net, n)
1955 {
1956 if (max_feed <= 0)
1957 {
1958 FIB_ITERATE_PUT(fit);
1959 ev_schedule(tab->rt_event);
1960 return;
1961 }
1962 max_feed -= rt_next_hop_update_net(tab, n);
1963 }
1964 FIB_ITERATE_END;
1965
1966 /* State change:
1967 * NHU_DIRTY -> NHU_SCHEDULED
1968 * NHU_RUNNING -> NHU_CLEAN
1969 */
1970 tab->nhu_state &= 1;
1971
1972 if (tab->nhu_state != NHU_CLEAN)
1973 ev_schedule(tab->rt_event);
1974 }
1975
1976
1977 struct rtable_config *
1978 rt_new_table(struct symbol *s, uint addr_type)
1979 {
1980 /* Hack that allows to 'redefine' the master table */
1981 if ((s->class == SYM_TABLE) &&
1982 (s->def == new_config->def_tables[addr_type]) &&
1983 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1984 return s->def;
1985
1986 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1987
1988 cf_define_symbol(s, SYM_TABLE, c);
1989 c->name = s->name;
1990 c->addr_type = addr_type;
1991 c->gc_max_ops = 1000;
1992 c->gc_min_time = 5;
1993
1994 add_tail(&new_config->tables, &c->n);
1995
1996 /* First table of each type is kept as default */
1997 if (! new_config->def_tables[addr_type])
1998 new_config->def_tables[addr_type] = c;
1999
2000 return c;
2001 }
2002
2003 /**
2004 * rt_lock_table - lock a routing table
2005 * @r: routing table to be locked
2006 *
2007 * Lock a routing table, because it's in use by a protocol,
2008 * preventing it from being freed when it gets undefined in a new
2009 * configuration.
2010 */
2011 void
2012 rt_lock_table(rtable *r)
2013 {
2014 r->use_count++;
2015 }
2016
2017 /**
2018 * rt_unlock_table - unlock a routing table
2019 * @r: routing table to be unlocked
2020 *
2021 * Unlock a routing table formerly locked by rt_lock_table(),
2022 * that is decrease its use count and delete it if it's scheduled
2023 * for deletion by configuration changes.
2024 */
2025 void
2026 rt_unlock_table(rtable *r)
2027 {
2028 if (!--r->use_count && r->deleted)
2029 {
2030 struct config *conf = r->deleted;
2031 DBG("Deleting routing table %s\n", r->name);
2032 r->config->table = NULL;
2033 if (r->hostcache)
2034 rt_free_hostcache(r);
2035 rem_node(&r->n);
2036 fib_free(&r->fib);
2037 rfree(r->rt_event);
2038 mb_free(r);
2039 config_del_obstacle(conf);
2040 }
2041 }
2042
2043 /**
2044 * rt_commit - commit new routing table configuration
2045 * @new: new configuration
2046 * @old: original configuration or %NULL if it's boot time config
2047 *
2048 * Scan differences between @old and @new configuration and modify
2049 * the routing tables according to these changes. If @new defines a
2050 * previously unknown table, create it, if it omits a table existing
2051 * in @old, schedule it for deletion (it gets deleted when all protocols
2052 * disconnect from it by calling rt_unlock_table()), if it exists
2053 * in both configurations, leave it unchanged.
2054 */
2055 void
2056 rt_commit(struct config *new, struct config *old)
2057 {
2058 struct rtable_config *o, *r;
2059
2060 DBG("rt_commit:\n");
2061 if (old)
2062 {
2063 WALK_LIST(o, old->tables)
2064 {
2065 rtable *ot = o->table;
2066 if (!ot->deleted)
2067 {
2068 struct symbol *sym = cf_find_symbol(new, o->name);
2069 if (sym && sym->class == SYM_TABLE && !new->shutdown)
2070 {
2071 DBG("\t%s: same\n", o->name);
2072 r = sym->def;
2073 r->table = ot;
2074 ot->name = r->name;
2075 ot->config = r;
2076 if (o->sorted != r->sorted)
2077 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2078 }
2079 else
2080 {
2081 DBG("\t%s: deleted\n", o->name);
2082 ot->deleted = old;
2083 config_add_obstacle(old);
2084 rt_lock_table(ot);
2085 rt_unlock_table(ot);
2086 }
2087 }
2088 }
2089 }
2090
2091 WALK_LIST(r, new->tables)
2092 if (!r->table)
2093 {
2094 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2095 DBG("\t%s: created\n", r->name);
2096 rt_setup(rt_table_pool, t, r->name, r);
2097 add_tail(&routing_tables, &t->n);
2098 r->table = t;
2099 }
2100 DBG("\tdone\n");
2101 }
2102
2103 static inline void
2104 do_feed_channel(struct channel *c, net *n, rte *e)
2105 {
2106 rte_update_lock();
2107 if (c->ra_mode == RA_ACCEPTED)
2108 rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2109 else if (c->ra_mode == RA_MERGED)
2110 rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2111 else /* RA_BASIC */
2112 rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2113 rte_update_unlock();
2114 }
2115
2116 /**
2117 * rt_feed_channel - advertise all routes to a channel
2118 * @c: channel to be fed
2119 *
2120 * This function performs one pass of advertisement of routes to a channel that
2121 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2122 * has something to do. (We avoid transferring all the routes in single pass in
2123 * order not to monopolize CPU time.)
2124 */
2125 int
2126 rt_feed_channel(struct channel *c)
2127 {
2128 struct fib_iterator *fit = &c->feed_fit;
2129 int max_feed = 256;
2130
2131 ASSERT(c->export_state == ES_FEEDING);
2132
2133 if (!c->feed_active)
2134 {
2135 FIB_ITERATE_INIT(fit, &c->table->fib);
2136 c->feed_active = 1;
2137 }
2138
2139 FIB_ITERATE_START(&c->table->fib, fit, net, n)
2140 {
2141 rte *e = n->routes;
2142 if (max_feed <= 0)
2143 {
2144 FIB_ITERATE_PUT(fit);
2145 return 0;
2146 }
2147
2148 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2149
2150 if ((c->ra_mode == RA_OPTIMAL) ||
2151 (c->ra_mode == RA_ACCEPTED) ||
2152 (c->ra_mode == RA_MERGED))
2153 if (rte_is_valid(e))
2154 {
2155 /* In the meantime, the protocol may fell down */
2156 if (c->export_state != ES_FEEDING)
2157 goto done;
2158
2159 do_feed_channel(c, n, e);
2160 max_feed--;
2161 }
2162
2163 if (c->ra_mode == RA_ANY)
2164 for(e = n->routes; e; e = e->next)
2165 {
2166 /* In the meantime, the protocol may fell down */
2167 if (c->export_state != ES_FEEDING)
2168 goto done;
2169
2170 if (!rte_is_valid(e))
2171 continue;
2172
2173 do_feed_channel(c, n, e);
2174 max_feed--;
2175 }
2176 }
2177 FIB_ITERATE_END;
2178
2179 done:
2180 c->feed_active = 0;
2181 return 1;
2182 }
2183
2184 /**
2185 * rt_feed_baby_abort - abort protocol feeding
2186 * @c: channel
2187 *
2188 * This function is called by the protocol code when the protocol stops or
2189 * ceases to exist during the feeding.
2190 */
2191 void
2192 rt_feed_channel_abort(struct channel *c)
2193 {
2194 if (c->feed_active)
2195 {
2196 /* Unlink the iterator */
2197 fit_get(&c->table->fib, &c->feed_fit);
2198 c->feed_active = 0;
2199 }
2200 }
2201
2202 static inline unsigned
2203 ptr_hash(void *ptr)
2204 {
2205 uintptr_t p = (uintptr_t) ptr;
2206 return p ^ (p << 8) ^ (p >> 16);
2207 }
2208
2209 static inline u32
2210 hc_hash(ip_addr a, rtable *dep)
2211 {
2212 return ipa_hash(a) ^ ptr_hash(dep);
2213 }
2214
2215 static inline void
2216 hc_insert(struct hostcache *hc, struct hostentry *he)
2217 {
2218 uint k = he->hash_key >> hc->hash_shift;
2219 he->next = hc->hash_table[k];
2220 hc->hash_table[k] = he;
2221 }
2222
2223 static inline void
2224 hc_remove(struct hostcache *hc, struct hostentry *he)
2225 {
2226 struct hostentry **hep;
2227 uint k = he->hash_key >> hc->hash_shift;
2228
2229 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2230 *hep = he->next;
2231 }
2232
2233 #define HC_DEF_ORDER 10
2234 #define HC_HI_MARK *4
2235 #define HC_HI_STEP 2
2236 #define HC_HI_ORDER 16 /* Must be at most 16 */
2237 #define HC_LO_MARK /5
2238 #define HC_LO_STEP 2
2239 #define HC_LO_ORDER 10
2240
2241 static void
2242 hc_alloc_table(struct hostcache *hc, unsigned order)
2243 {
2244 uint hsize = 1 << order;
2245 hc->hash_order = order;
2246 hc->hash_shift = 32 - order;
2247 hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2248 hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK);
2249
2250 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2251 }
2252
2253 static void
2254 hc_resize(struct hostcache *hc, unsigned new_order)
2255 {
2256 struct hostentry **old_table = hc->hash_table;
2257 struct hostentry *he, *hen;
2258 uint old_size = 1 << hc->hash_order;
2259 uint i;
2260
2261 hc_alloc_table(hc, new_order);
2262 for (i = 0; i < old_size; i++)
2263 for (he = old_table[i]; he != NULL; he=hen)
2264 {
2265 hen = he->next;
2266 hc_insert(hc, he);
2267 }
2268 mb_free(old_table);
2269 }
2270
2271 static struct hostentry *
2272 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2273 {
2274 struct hostentry *he = sl_alloc(hc->slab);
2275
2276 *he = (struct hostentry) {
2277 .addr = a,
2278 .link = ll,
2279 .tab = dep,
2280 .hash_key = k,
2281 };
2282
2283 add_tail(&hc->hostentries, &he->ln);
2284 hc_insert(hc, he);
2285
2286 hc->hash_items++;
2287 if (hc->hash_items > hc->hash_max)
2288 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2289
2290 return he;
2291 }
2292
2293 static void
2294 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2295 {
2296 rta_free(he->src);
2297
2298 rem_node(&he->ln);
2299 hc_remove(hc, he);
2300 sl_free(hc->slab, he);
2301
2302 hc->hash_items--;
2303 if (hc->hash_items < hc->hash_min)
2304 hc_resize(hc, hc->hash_order - HC_LO_STEP);
2305 }
2306
2307 static void
2308 rt_init_hostcache(rtable *tab)
2309 {
2310 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2311 init_list(&hc->hostentries);
2312
2313 hc->hash_items = 0;
2314 hc_alloc_table(hc, HC_DEF_ORDER);
2315 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2316
2317 hc->lp = lp_new(rt_table_pool, 1008);
2318 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2319
2320 tab->hostcache = hc;
2321 }
2322
2323 static void
2324 rt_free_hostcache(rtable *tab)
2325 {
2326 struct hostcache *hc = tab->hostcache;
2327
2328 node *n;
2329 WALK_LIST(n, hc->hostentries)
2330 {
2331 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2332 rta_free(he->src);
2333
2334 if (he->uc)
2335 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2336 }
2337
2338 rfree(hc->slab);
2339 rfree(hc->lp);
2340 mb_free(hc->hash_table);
2341 mb_free(hc);
2342 }
2343
2344 static void
2345 rt_notify_hostcache(rtable *tab, net *net)
2346 {
2347 if (tab->hcu_scheduled)
2348 return;
2349
2350 if (trie_match_net(tab->hostcache->trie, net->n.addr))
2351 rt_schedule_hcu(tab);
2352 }
2353
2354 static int
2355 if_local_addr(ip_addr a, struct iface *i)
2356 {
2357 struct ifa *b;
2358
2359 WALK_LIST(b, i->addrs)
2360 if (ipa_equal(a, b->ip))
2361 return 1;
2362
2363 return 0;
2364 }
2365
2366 static u32
2367 rt_get_igp_metric(rte *rt)
2368 {
2369 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2370
2371 if (ea)
2372 return ea->u.data;
2373
2374 rta *a = rt->attrs;
2375
2376 #ifdef CONFIG_OSPF
2377 if ((a->source == RTS_OSPF) ||
2378 (a->source == RTS_OSPF_IA) ||
2379 (a->source == RTS_OSPF_EXT1))
2380 return rt->u.ospf.metric1;
2381 #endif
2382
2383 #ifdef CONFIG_RIP
2384 if (a->source == RTS_RIP)
2385 return rt->u.rip.metric;
2386 #endif
2387
2388 if (a->source == RTS_DEVICE)
2389 return 0;
2390
2391 return IGP_METRIC_UNKNOWN;
2392 }
2393
2394 static int
2395 rt_update_hostentry(rtable *tab, struct hostentry *he)
2396 {
2397 rta *old_src = he->src;
2398 int pxlen = 0;
2399
2400 /* Reset the hostentry */
2401 he->src = NULL;
2402 he->nexthop_linkable = 0;
2403 he->dest = RTD_UNREACHABLE;
2404 he->igp_metric = 0;
2405
2406 net_addr he_addr;
2407 net_fill_ip_host(&he_addr, he->addr);
2408 net *n = net_route(tab, &he_addr);
2409 if (n)
2410 {
2411 rte *e = n->routes;
2412 rta *a = e->attrs;
2413 pxlen = n->n.addr->pxlen;
2414
2415 if (a->hostentry)
2416 {
2417 /* Recursive route should not depend on another recursive route */
2418 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2419 he->addr, n->n.addr);
2420 goto done;
2421 }
2422
2423 he->dest = a->dest;
2424 he->nexthop_linkable = 1;
2425 if (he->dest == RTD_UNICAST)
2426 {
2427 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2428 if (ipa_zero(nh->gw))
2429 {
2430 if (if_local_addr(he->addr, nh->iface))
2431 {
2432 /* The host address is a local address, this is not valid */
2433 log(L_WARN "Next hop address %I is a local address of iface %s",
2434 he->addr, nh->iface->name);
2435 goto done;
2436 }
2437
2438 he->nexthop_linkable = 0;
2439 break;
2440 }
2441 }
2442
2443 he->src = rta_clone(a);
2444 he->igp_metric = rt_get_igp_metric(e);
2445 }
2446
2447 done:
2448 /* Add a prefix range to the trie */
2449 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2450
2451 rta_free(old_src);
2452 return old_src != he->src;
2453 }
2454
2455 static void
2456 rt_update_hostcache(rtable *tab)
2457 {
2458 struct hostcache *hc = tab->hostcache;
2459 struct hostentry *he;
2460 node *n, *x;
2461
2462 /* Reset the trie */
2463 lp_flush(hc->lp);
2464 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2465
2466 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2467 {
2468 he = SKIP_BACK(struct hostentry, ln, n);
2469 if (!he->uc)
2470 {
2471 hc_delete_hostentry(hc, he);
2472 continue;
2473 }
2474
2475 if (rt_update_hostentry(tab, he))
2476 rt_schedule_nhu(he->tab);
2477 }
2478
2479 tab->hcu_scheduled = 0;
2480 }
2481
2482 struct hostentry *
2483 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2484 {
2485 struct hostentry *he;
2486
2487 if (!tab->hostcache)
2488 rt_init_hostcache(tab);
2489
2490 u32 k = hc_hash(a, dep);
2491 struct hostcache *hc = tab->hostcache;
2492 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2493 if (ipa_equal(he->addr, a) && (he->tab == dep))
2494 return he;
2495
2496 he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
2497 rt_update_hostentry(tab, he);
2498 return he;
2499 }
2500
2501
2502 /*
2503 * CLI commands
2504 */
2505
2506 static void
2507 rt_show_table(struct cli *c, struct rt_show_data *d)
2508 {
2509 /* No table blocks in 'show route count' */
2510 if (d->stats == 2)
2511 return;
2512
2513 if (d->last_table) cli_printf(c, -1007, "");
2514 cli_printf(c, -1007, "Table %s:", d->tab->table->name);
2515 d->last_table = d->tab;
2516 }
2517
2518 static void
2519 rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2520 {
2521 byte from[IPA_MAX_TEXT_LENGTH+8];
2522 byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2523 rta *a = e->attrs;
2524 int primary = (e->net->routes == e);
2525 int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2526 void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2527 struct nexthop *nh;
2528
2529 tm_format_datetime(tm, &config->tf_route, e->lastmod);
2530 if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw))
2531 bsprintf(from, " from %I", a->from);
2532 else
2533 from[0] = 0;
2534
2535 get_route_info = a->src->proto->proto->get_route_info;
2536 if (get_route_info || d->verbose)
2537 {
2538 /* Need to normalize the extended attributes */
2539 ea_list *t = tmpa;
2540 t = ea_append(t, a->eattrs);
2541 tmpa = alloca(ea_scan(t));
2542 ea_merge(t, tmpa);
2543 ea_sort(tmpa);
2544 }
2545 if (get_route_info)
2546 get_route_info(e, info, tmpa);
2547 else
2548 bsprintf(info, " (%d)", e->pref);
2549
2550 if (d->last_table != d->tab)
2551 rt_show_table(c, d);
2552
2553 cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest),
2554 a->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2555
2556 if (a->dest == RTD_UNICAST)
2557 for (nh = &(a->nh); nh; nh = nh->next)
2558 {
2559 char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls;
2560
2561 if (nh->labels)
2562 {
2563 lsp += bsprintf(lsp, " mpls %d", nh->label[0]);
2564 for (int i=1;i<nh->labels; i++)
2565 lsp += bsprintf(lsp, "/%d", nh->label[i]);
2566 }
2567 *lsp = '\0';
2568
2569 if (a->nh.next)
2570 cli_printf(c, -1007, "\tvia %I%s on %s weight %d", nh->gw, mpls, nh->iface->name, nh->weight + 1);
2571 else
2572 cli_printf(c, -1007, "\tvia %I%s on %s", nh->gw, mpls, nh->iface->name);
2573 }
2574
2575 if (d->verbose)
2576 rta_show(c, a, tmpa);
2577 }
2578
2579 static void
2580 rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2581 {
2582 rte *e, *ee;
2583 byte ia[NET_MAX_TEXT_LENGTH+1];
2584 struct ea_list *tmpa;
2585 struct channel *ec = d->tab->export_channel;
2586 int first = 1;
2587 int pass = 0;
2588
2589 bsnprintf(ia, sizeof(ia), "%N", n->n.addr);
2590
2591 for (e = n->routes; e; e = e->next)
2592 {
2593 if (rte_is_filtered(e) != d->filtered)
2594 continue;
2595
2596 d->rt_counter++;
2597 d->net_counter += first;
2598 first = 0;
2599
2600 if (pass)
2601 continue;
2602
2603 ee = e;
2604 rte_update_lock(); /* We use the update buffer for filtering */
2605 tmpa = make_tmp_attrs(e, rte_update_pool);
2606
2607 /* Export channel is down, do not try to export routes to it */
2608 if (ec && (ec->export_state == ES_DOWN))
2609 goto skip;
2610
2611 /* Special case for merged export */
2612 if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED))
2613 {
2614 rte *rt_free;
2615 e = rt_export_merged(ec, n, &rt_free, &tmpa, rte_update_pool, 1);
2616 pass = 1;
2617
2618 if (!e)
2619 { e = ee; goto skip; }
2620 }
2621 else if (d->export_mode)
2622 {
2623 struct proto *ep = ec->proto;
2624 int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2625
2626 if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED)
2627 pass = 1;
2628
2629 if (ic < 0)
2630 goto skip;
2631
2632 if (d->export_mode > RSEM_PREEXPORT)
2633 {
2634 /*
2635 * FIXME - This shows what should be exported according to current
2636 * filters, but not what was really exported. 'configure soft'
2637 * command may change the export filter and do not update routes.
2638 */
2639 int do_export = (ic > 0) ||
2640 (f_run(ec->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2641
2642 if (do_export != (d->export_mode == RSEM_EXPORT))
2643 goto skip;
2644
2645 if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_ACCEPTED))
2646 pass = 1;
2647 }
2648 }
2649
2650 if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2651 goto skip;
2652
2653 if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2654 goto skip;
2655
2656 if (d->stats < 2)
2657 rt_show_rte(c, ia, e, d, tmpa);
2658
2659 d->show_counter++;
2660 ia[0] = 0;
2661
2662 skip:
2663 if (e != ee)
2664 {
2665 rte_free(e);
2666 e = ee;
2667 }
2668 rte_update_unlock();
2669
2670 if (d->primary_only)
2671 break;
2672 }
2673 }
2674
2675 static void
2676 rt_show_cleanup(struct cli *c)
2677 {
2678 struct rt_show_data *d = c->rover;
2679 struct rt_show_data_rtable *tab;
2680
2681 /* Unlink the iterator */
2682 if (d->table_open)
2683 fit_get(&d->tab->table->fib, &d->fit);
2684
2685 /* Unlock referenced tables */
2686 WALK_LIST(tab, d->tables)
2687 rt_unlock_table(tab->table);
2688 }
2689
2690 static void
2691 rt_show_cont(struct cli *c)
2692 {
2693 struct rt_show_data *d = c->rover;
2694 #ifdef DEBUGGING
2695 unsigned max = 4;
2696 #else
2697 unsigned max = 64;
2698 #endif
2699 struct fib *fib = &d->tab->table->fib;
2700 struct fib_iterator *it = &d->fit;
2701
2702 if (d->running_on_config && (d->running_on_config != config))
2703 {
2704 cli_printf(c, 8004, "Stopped due to reconfiguration");
2705 goto done;
2706 }
2707
2708 if (!d->table_open)
2709 {
2710 FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib);
2711 d->table_open = 1;
2712 d->table_counter++;
2713
2714 d->show_counter_last = d->show_counter;
2715 d->rt_counter_last = d->rt_counter;
2716 d->net_counter_last = d->net_counter;
2717
2718 if (d->tables_defined_by & RSD_TDB_SET)
2719 rt_show_table(c, d);
2720 }
2721
2722 FIB_ITERATE_START(fib, it, net, n)
2723 {
2724 if (!max--)
2725 {
2726 FIB_ITERATE_PUT(it);
2727 return;
2728 }
2729 rt_show_net(c, n, d);
2730 }
2731 FIB_ITERATE_END;
2732
2733 if (d->stats)
2734 {
2735 if (d->last_table != d->tab)
2736 rt_show_table(c, d);
2737
2738 cli_printf(c, -1007, "%d of %d routes for %d networks in table %s",
2739 d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last,
2740 d->net_counter - d->net_counter_last, d->tab->table->name);
2741 }
2742
2743 d->table_open = 0;
2744 d->tab = NODE_NEXT(d->tab);
2745
2746 if (NODE_VALID(d->tab))
2747 return;
2748
2749 if (d->stats && (d->table_counter > 1))
2750 {
2751 if (d->last_table) cli_printf(c, -1007, "");
2752 cli_printf(c, 14, "Total: %d of %d routes for %d networks in %d tables",
2753 d->show_counter, d->rt_counter, d->net_counter, d->table_counter);
2754 }
2755 else
2756 cli_printf(c, 0, "");
2757
2758 done:
2759 rt_show_cleanup(c);
2760 c->cont = c->cleanup = NULL;
2761 }
2762
2763 struct rt_show_data_rtable *
2764 rt_show_add_table(struct rt_show_data *d, rtable *t)
2765 {
2766 struct rt_show_data_rtable *tab = cfg_allocz(sizeof(struct rt_show_data_rtable));
2767 tab->table = t;
2768 add_tail(&(d->tables), &(tab->n));
2769 return tab;
2770 }
2771
2772 static inline void
2773 rt_show_get_default_tables(struct rt_show_data *d)
2774 {
2775 struct channel *c;
2776 struct rt_show_data_rtable *tab;
2777
2778 if (d->export_channel)
2779 {
2780 c = d->export_channel;
2781 tab = rt_show_add_table(d, c->table);
2782 tab->export_channel = c;
2783 return;
2784 }
2785
2786 if (d->export_protocol)
2787 {
2788 WALK_LIST(c, d->export_protocol->channels)
2789 {
2790 if (c->export_state == ES_DOWN)
2791 continue;
2792
2793 tab = rt_show_add_table(d, c->table);
2794 tab->export_channel = c;
2795 }
2796 return;
2797 }
2798
2799 if (d->show_protocol)
2800 {
2801 WALK_LIST(c, d->show_protocol->channels)
2802 rt_show_add_table(d, c->table);
2803 return;
2804 }
2805
2806 for (int i=1; i<NET_MAX; i++)
2807 if (config->def_tables[i])
2808 rt_show_add_table(d, config->def_tables[i]->table);
2809 }
2810
2811 static inline void
2812 rt_show_prepare_tables(struct rt_show_data *d)
2813 {
2814 struct rt_show_data_rtable *tab, *tabx;
2815
2816 /* Add implicit tables if no table is specified */
2817 if (EMPTY_LIST(d->tables))
2818 rt_show_get_default_tables(d);
2819
2820 WALK_LIST_DELSAFE(tab, tabx, d->tables)
2821 {
2822 /* Ensure there is defined export_channel for each table */
2823 if (d->export_mode)
2824 {
2825 if (!tab->export_channel && d->export_channel &&
2826 (tab->table == d->export_channel->table))
2827 tab->export_channel = d->export_channel;
2828
2829 if (!tab->export_channel && d->export_protocol)
2830 tab->export_channel = proto_find_channel_by_table(d->export_protocol, tab->table);
2831
2832 if (!tab->export_channel)
2833 {
2834 if (d->tables_defined_by & RSD_TDB_NMN)
2835 cf_error("No export channel for table %s", tab->table->name);
2836
2837 rem_node(&(tab->n));
2838 continue;
2839 }
2840 }
2841
2842 /* Ensure specified network is compatible with each table */
2843 if (d->addr && (tab->table->addr_type != d->addr->type))
2844 {
2845 if (d->tables_defined_by & RSD_TDB_NMN)
2846 cf_error("Incompatible type of prefix/ip for table %s", tab->table->name);
2847
2848 rem_node(&(tab->n));
2849 continue;
2850 }
2851 }
2852
2853 /* Ensure there is at least one table */
2854 if (EMPTY_LIST(d->tables))
2855 cf_error("No valid tables");
2856 }
2857
2858 void
2859 rt_show(struct rt_show_data *d)
2860 {
2861 struct rt_show_data_rtable *tab;
2862 net *n;
2863
2864 /* Filtered routes are neither exported nor have sensible ordering */
2865 if (d->filtered && (d->export_mode || d->primary_only))
2866 cf_error("Incompatible show route options");
2867
2868 rt_show_prepare_tables(d);
2869
2870 if (!d->addr)
2871 {
2872 WALK_LIST(tab, d->tables)
2873 rt_lock_table(tab->table);
2874
2875 /* There is at least one table */
2876 d->tab = HEAD(d->tables);
2877 this_cli->cont = rt_show_cont;
2878 this_cli->cleanup = rt_show_cleanup;
2879 this_cli->rover = d;
2880 }
2881 else
2882 {
2883 WALK_LIST(tab, d->tables)
2884 {
2885 d->tab = tab;
2886
2887 if (d->show_for)
2888 n = net_route(tab->table, d->addr);
2889 else
2890 n = net_find(tab->table, d->addr);
2891
2892 if (n)
2893 rt_show_net(this_cli, n, d);
2894 }
2895
2896 if (d->rt_counter)
2897 cli_msg(0, "");
2898 else
2899 cli_msg(8001, "Network not found");
2900 }
2901 }
2902
2903 /*
2904 * Documentation for functions declared inline in route.h
2905 */
2906 #if 0
2907
2908 /**
2909 * net_find - find a network entry
2910 * @tab: a routing table
2911 * @addr: address of the network
2912 *
2913 * net_find() looks up the given network in routing table @tab and
2914 * returns a pointer to its &net entry or %NULL if no such network
2915 * exists.
2916 */
2917 static inline net *net_find(rtable *tab, net_addr *addr)
2918 { DUMMY; }
2919
2920 /**
2921 * net_get - obtain a network entry
2922 * @tab: a routing table
2923 * @addr: address of the network
2924 *
2925 * net_get() looks up the given network in routing table @tab and
2926 * returns a pointer to its &net entry. If no such entry exists, it's
2927 * created.
2928 */
2929 static inline net *net_get(rtable *tab, net_addr *addr)
2930 { DUMMY; }
2931
2932 /**
2933 * rte_cow - copy a route for writing
2934 * @r: a route entry to be copied
2935 *
2936 * rte_cow() takes a &rte and prepares it for modification. The exact action
2937 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2938 * just returned unchanged, else a new temporary entry with the same contents
2939 * is created.
2940 *
2941 * The primary use of this function is inside the filter machinery -- when
2942 * a filter wants to modify &rte contents (to change the preference or to
2943 * attach another set of attributes), it must ensure that the &rte is not
2944 * shared with anyone else (and especially that it isn't stored in any routing
2945 * table).
2946 *
2947 * Result: a pointer to the new writable &rte.
2948 */
2949 static inline rte * rte_cow(rte *r)
2950 { DUMMY; }
2951
2952 #endif