]> git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
eb9dc3a503564311bcaa3b49953a257c65af33ba
[thirdparty/bird.git] / nest / rt-table.c
1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/cli.h"
37 #include "nest/iface.h"
38 #include "lib/resource.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
45
46 pool *rt_table_pool;
47
48 static slab *rte_slab;
49 static linpool *rte_update_pool;
50
51 static list routing_tables;
52
53 static byte *rt_format_via(rte *e);
54 static void rt_free_hostcache(rtable *tab);
55 static void rt_notify_hostcache(rtable *tab, net *net);
56 static void rt_update_hostcache(rtable *tab);
57 static void rt_next_hop_update(rtable *tab);
58 static inline void rt_prune_table(rtable *tab);
59
60
61 static inline struct ea_list *
62 make_tmp_attrs(struct rte *rt, struct linpool *pool)
63 {
64 struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
65 mta = rt->attrs->src->proto->make_tmp_attrs;
66 return mta ? mta(rt, rte_update_pool) : NULL;
67 }
68
69
70 /* Like fib_route(), but skips empty net entries */
71 static inline void *
72 net_route_ip4(struct fib *f, net_addr_ip4 *n)
73 {
74 net *r;
75
76 while (r = fib_find(f, (net_addr *) n),
77 !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
78 {
79 n->pxlen--;
80 ip4_clrbit(&n->prefix, n->pxlen);
81 }
82
83 return r;
84 }
85
86 static inline void *
87 net_route_ip6(struct fib *f, net_addr_ip6 *n)
88 {
89 net *r;
90
91 while (r = fib_find(f, (net_addr *) n),
92 !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
93 {
94 n->pxlen--;
95 ip6_clrbit(&n->prefix, n->pxlen);
96 }
97
98 return r;
99 }
100
101 void *
102 net_route(rtable *tab, const net_addr *n)
103 {
104 ASSERT(tab->addr_type == n->type);
105
106 net_addr *n0 = alloca(n->length);
107 net_copy(n0, n);
108
109 switch (n->type)
110 {
111 case NET_IP4:
112 case NET_VPN4:
113 case NET_ROA4:
114 return net_route_ip4(&tab->fib, (net_addr_ip4 *) n0);
115
116 case NET_IP6:
117 case NET_VPN6:
118 case NET_ROA6:
119 return net_route_ip6(&tab->fib, (net_addr_ip6 *) n0);
120
121 default:
122 return NULL;
123 }
124 }
125
126
127 static int
128 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
129 {
130 struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
131 struct fib_node *fn;
132 int anything = 0;
133
134 while (1)
135 {
136 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
137 {
138 net_addr_roa4 *roa = (void *) fn->addr;
139 net *r = fib_node_to_user(&tab->fib, fn);
140
141 if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
142 {
143 anything = 1;
144 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
145 return ROA_VALID;
146 }
147 }
148
149 if (n.pxlen == 0)
150 break;
151
152 n.pxlen--;
153 ip4_clrbit(&n.prefix, n.pxlen);
154 }
155
156 return anything ? ROA_INVALID : ROA_UNKNOWN;
157 }
158
159 static int
160 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
161 {
162 struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
163 struct fib_node *fn;
164 int anything = 0;
165
166 while (1)
167 {
168 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
169 {
170 net_addr_roa6 *roa = (void *) fn->addr;
171 net *r = fib_node_to_user(&tab->fib, fn);
172
173 if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
174 {
175 anything = 1;
176 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
177 return ROA_VALID;
178 }
179 }
180
181 if (n.pxlen == 0)
182 break;
183
184 n.pxlen--;
185 ip6_clrbit(&n.prefix, n.pxlen);
186 }
187
188 return anything ? ROA_INVALID : ROA_UNKNOWN;
189 }
190
191 /**
192 * roa_check - check validity of route origination in a ROA table
193 * @tab: ROA table
194 * @n: network prefix to check
195 * @asn: AS number of network prefix
196 *
197 * Implements RFC 6483 route validation for the given network prefix. The
198 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
199 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
200 * a candidate ROA with matching ASN and maxlen field greater than or equal to
201 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
202 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
203 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
204 * must have type NET_IP4 or NET_IP6, respectively.
205 */
206 int
207 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
208 {
209 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
210 return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
211 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
212 return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
213 else
214 return ROA_UNKNOWN; /* Should not happen */
215 }
216
217 /**
218 * rte_find - find a route
219 * @net: network node
220 * @src: route source
221 *
222 * The rte_find() function returns a route for destination @net
223 * which is from route source @src.
224 */
225 rte *
226 rte_find(net *net, struct rte_src *src)
227 {
228 rte *e = net->routes;
229
230 while (e && e->attrs->src != src)
231 e = e->next;
232 return e;
233 }
234
235 /**
236 * rte_get_temp - get a temporary &rte
237 * @a: attributes to assign to the new route (a &rta; in case it's
238 * un-cached, rte_update() will create a cached copy automatically)
239 *
240 * Create a temporary &rte and bind it with the attributes @a.
241 * Also set route preference to the default preference set for
242 * the protocol.
243 */
244 rte *
245 rte_get_temp(rta *a)
246 {
247 rte *e = sl_alloc(rte_slab);
248
249 e->attrs = a;
250 e->flags = 0;
251 e->pref = 0;
252 return e;
253 }
254
255 rte *
256 rte_do_cow(rte *r)
257 {
258 rte *e = sl_alloc(rte_slab);
259
260 memcpy(e, r, sizeof(rte));
261 e->attrs = rta_clone(r->attrs);
262 e->flags = 0;
263 return e;
264 }
265
266 /**
267 * rte_cow_rta - get a private writable copy of &rte with writable &rta
268 * @r: a route entry to be copied
269 * @lp: a linpool from which to allocate &rta
270 *
271 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
272 * modification. There are three possibilities: First, both &rte and &rta are
273 * private copies, in that case they are returned unchanged. Second, &rte is
274 * private copy, but &rta is cached, in that case &rta is duplicated using
275 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
276 * both structures are duplicated by rte_do_cow() and rta_do_cow().
277 *
278 * Note that in the second case, cached &rta loses one reference, while private
279 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
280 * nexthops, ...) with it. To work properly, original shared &rta should have
281 * another reference during the life of created private copy.
282 *
283 * Result: a pointer to the new writable &rte with writable &rta.
284 */
285 rte *
286 rte_cow_rta(rte *r, linpool *lp)
287 {
288 if (!rta_is_cached(r->attrs))
289 return r;
290
291 rte *e = rte_cow(r);
292 rta *a = rta_do_cow(r->attrs, lp);
293 rta_free(e->attrs);
294 e->attrs = a;
295 return e;
296 }
297
298 static int /* Actually better or at least as good as */
299 rte_better(rte *new, rte *old)
300 {
301 int (*better)(rte *, rte *);
302
303 if (!rte_is_valid(old))
304 return 1;
305 if (!rte_is_valid(new))
306 return 0;
307
308 if (new->pref > old->pref)
309 return 1;
310 if (new->pref < old->pref)
311 return 0;
312 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
313 {
314 /*
315 * If the user has configured protocol preferences, so that two different protocols
316 * have the same preference, try to break the tie by comparing addresses. Not too
317 * useful, but keeps the ordering of routes unambiguous.
318 */
319 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
320 }
321 if (better = new->attrs->src->proto->rte_better)
322 return better(new, old);
323 return 0;
324 }
325
326 static int
327 rte_mergable(rte *pri, rte *sec)
328 {
329 int (*mergable)(rte *, rte *);
330
331 if (!rte_is_valid(pri) || !rte_is_valid(sec))
332 return 0;
333
334 if (pri->pref != sec->pref)
335 return 0;
336
337 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
338 return 0;
339
340 if (mergable = pri->attrs->src->proto->rte_mergable)
341 return mergable(pri, sec);
342
343 return 0;
344 }
345
346 static void
347 rte_trace(struct proto *p, rte *e, int dir, char *msg)
348 {
349 log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rt_format_via(e));
350 }
351
352 static inline void
353 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
354 {
355 if (p->debug & flag)
356 rte_trace(p, e, '>', msg);
357 }
358
359 static inline void
360 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
361 {
362 if (p->debug & flag)
363 rte_trace(p, e, '<', msg);
364 }
365
366 static rte *
367 export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
368 {
369 struct proto *p = c->proto;
370 struct filter *filter = c->out_filter;
371 struct proto_stats *stats = &c->stats;
372 ea_list *tmpb = NULL;
373 rte *rt;
374 int v;
375
376 rt = rt0;
377 *rt_free = NULL;
378
379 if (!tmpa)
380 tmpa = &tmpb;
381
382 *tmpa = make_tmp_attrs(rt, pool);
383
384 v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0;
385 if (v < 0)
386 {
387 if (silent)
388 goto reject;
389
390 stats->exp_updates_rejected++;
391 if (v == RIC_REJECT)
392 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
393 goto reject;
394 }
395 if (v > 0)
396 {
397 if (!silent)
398 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
399 goto accept;
400 }
401
402 v = filter && ((filter == FILTER_REJECT) ||
403 (f_run(filter, &rt, tmpa, pool, FF_FORCE_TMPATTR) > F_ACCEPT));
404 if (v)
405 {
406 if (silent)
407 goto reject;
408
409 stats->exp_updates_filtered++;
410 rte_trace_out(D_FILTERS, p, rt, "filtered out");
411 goto reject;
412 }
413
414 accept:
415 if (rt != rt0)
416 *rt_free = rt;
417 return rt;
418
419 reject:
420 /* Discard temporary rte */
421 if (rt != rt0)
422 rte_free(rt);
423 return NULL;
424 }
425
426 static inline rte *
427 export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
428 {
429 return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent);
430 }
431
432 static void
433 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
434 {
435 struct proto *p = c->proto;
436 struct proto_stats *stats = &c->stats;
437
438
439 /*
440 * First, apply export limit.
441 *
442 * Export route limits has several problems. Because exp_routes
443 * counter is reset before refeed, we don't really know whether
444 * limit is breached and whether the update is new or not. Therefore
445 * the number of really exported routes may exceed the limit
446 * temporarily (routes exported before and new routes in refeed).
447 *
448 * Minor advantage is that if the limit is decreased and refeed is
449 * requested, the number of exported routes really decrease.
450 *
451 * Second problem is that with export limits, we don't know whether
452 * old was really exported (it might be blocked by limit). When a
453 * withdraw is exported, we announce it even when the previous
454 * update was blocked. This is not a big issue, but the same problem
455 * is in updating exp_routes counter. Therefore, to be consistent in
456 * increases and decreases of exp_routes, we count exported routes
457 * regardless of blocking by limits.
458 *
459 * Similar problem is in handling updates - when a new route is
460 * received and blocking is active, the route would be blocked, but
461 * when an update for the route will be received later, the update
462 * would be propagated (as old != NULL). Therefore, we have to block
463 * also non-new updates (contrary to import blocking).
464 */
465
466 struct channel_limit *l = &c->out_limit;
467 if (l->action && new)
468 {
469 if ((!old || refeed) && (stats->exp_routes >= l->limit))
470 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
471
472 if (l->state == PLS_BLOCKED)
473 {
474 stats->exp_routes++; /* see note above */
475 stats->exp_updates_rejected++;
476 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
477 new = NULL;
478
479 if (!old)
480 return;
481 }
482 }
483
484
485 if (new)
486 stats->exp_updates_accepted++;
487 else
488 stats->exp_withdraws_accepted++;
489
490 /* Hack: We do not decrease exp_routes during refeed, we instead
491 reset exp_routes at the start of refeed. */
492 if (new)
493 stats->exp_routes++;
494 if (old && !refeed)
495 stats->exp_routes--;
496
497 if (p->debug & D_ROUTES)
498 {
499 if (new && old)
500 rte_trace_out(D_ROUTES, p, new, "replaced");
501 else if (new)
502 rte_trace_out(D_ROUTES, p, new, "added");
503 else if (old)
504 rte_trace_out(D_ROUTES, p, old, "removed");
505 }
506 if (!new)
507 p->rt_notify(p, c, net, NULL, old, NULL);
508 else if (tmpa)
509 {
510 ea_list *t = tmpa;
511 while (t->next)
512 t = t->next;
513 t->next = new->attrs->eattrs;
514 p->rt_notify(p, c, net, new, old, tmpa);
515 t->next = NULL;
516 }
517 else
518 p->rt_notify(p, c, net, new, old, new->attrs->eattrs);
519 }
520
521 static void
522 rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
523 {
524 struct proto *p = c->proto;
525
526 rte *new = new0;
527 rte *old = old0;
528 rte *new_free = NULL;
529 rte *old_free = NULL;
530 ea_list *tmpa = NULL;
531
532 if (new)
533 c->stats.exp_updates_received++;
534 else
535 c->stats.exp_withdraws_received++;
536
537 /*
538 * This is a tricky part - we don't know whether route 'old' was
539 * exported to protocol 'p' or was filtered by the export filter.
540 * We try to run the export filter to know this to have a correct
541 * value in 'old' argument of rte_update (and proper filter value)
542 *
543 * FIXME - this is broken because 'configure soft' may change
544 * filters but keep routes. Refeed is expected to be called after
545 * change of the filters and with old == new, therefore we do not
546 * even try to run the filter on an old route, This may lead to
547 * 'spurious withdraws' but ensure that there are no 'missing
548 * withdraws'.
549 *
550 * This is not completely safe as there is a window between
551 * reconfiguration and the end of refeed - if a newly filtered
552 * route disappears during this period, proper withdraw is not
553 * sent (because old would be also filtered) and the route is
554 * not refeeded (because it disappeared before that).
555 */
556
557 if (new)
558 new = export_filter(c, new, &new_free, &tmpa, 0);
559
560 if (old && !refeed)
561 old = export_filter(c, old, &old_free, NULL, 1);
562
563 if (!new && !old)
564 {
565 /*
566 * As mentioned above, 'old' value may be incorrect in some race conditions.
567 * We generally ignore it with the exception of withdraw to pipe protocol.
568 * In that case we rather propagate unfiltered withdraws regardless of
569 * export filters to ensure that when a protocol is flushed, its routes are
570 * removed from all tables. Possible spurious unfiltered withdraws are not
571 * problem here as they are ignored if there is no corresponding route at
572 * the other end of the pipe. We directly call rt_notify() hook instead of
573 * do_rt_notify() to avoid logging and stat counters.
574 */
575
576 #ifdef CONFIG_PIPE
577 if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
578 p->rt_notify(p, c, net, NULL, old0, NULL);
579 #endif
580
581 return;
582 }
583
584 do_rt_notify(c, net, new, old, tmpa, refeed);
585
586 /* Discard temporary rte's */
587 if (new_free)
588 rte_free(new_free);
589 if (old_free)
590 rte_free(old_free);
591 }
592
593 static void
594 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
595 {
596 // struct proto *p = c->proto;
597
598 rte *r;
599 rte *new_best = NULL;
600 rte *old_best = NULL;
601 rte *new_free = NULL;
602 rte *old_free = NULL;
603 ea_list *tmpa = NULL;
604
605 /* Used to track whether we met old_changed position. If before_old is NULL
606 old_changed was the first and we met it implicitly before current best route. */
607 int old_meet = old_changed && !before_old;
608
609 /* Note that before_old is either NULL or valid (not rejected) route.
610 If old_changed is valid, before_old have to be too. If old changed route
611 was not valid, caller must use NULL for both old_changed and before_old. */
612
613 if (new_changed)
614 c->stats.exp_updates_received++;
615 else
616 c->stats.exp_withdraws_received++;
617
618 /* First, find the new_best route - first accepted by filters */
619 for (r=net->routes; rte_is_valid(r); r=r->next)
620 {
621 if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
622 break;
623
624 /* Note if we walked around the position of old_changed route */
625 if (r == before_old)
626 old_meet = 1;
627 }
628
629 /*
630 * Second, handle the feed case. That means we do not care for
631 * old_best. It is NULL for feed, and the new_best for refeed.
632 * For refeed, there is a hack similar to one in rt_notify_basic()
633 * to ensure withdraws in case of changed filters
634 */
635 if (feed)
636 {
637 if (feed == 2) /* refeed */
638 old_best = new_best ? new_best :
639 (rte_is_valid(net->routes) ? net->routes : NULL);
640 else
641 old_best = NULL;
642
643 if (!new_best && !old_best)
644 return;
645
646 goto found;
647 }
648
649 /*
650 * Now, we find the old_best route. Generally, it is the same as the
651 * new_best, unless new_best is the same as new_changed or
652 * old_changed is accepted before new_best.
653 *
654 * There are four cases:
655 *
656 * - We would find and accept old_changed before new_best, therefore
657 * old_changed is old_best. In remaining cases we suppose this
658 * is not true.
659 *
660 * - We found no new_best, therefore there is also no old_best and
661 * we ignore this withdraw.
662 *
663 * - We found new_best different than new_changed, therefore
664 * old_best is the same as new_best and we ignore this update.
665 *
666 * - We found new_best the same as new_changed, therefore it cannot
667 * be old_best and we have to continue search for old_best.
668 */
669
670 /* First case */
671 if (old_meet)
672 if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
673 goto found;
674
675 /* Second case */
676 if (!new_best)
677 return;
678
679 /* Third case, we use r instead of new_best, because export_filter() could change it */
680 if (r != new_changed)
681 {
682 if (new_free)
683 rte_free(new_free);
684 return;
685 }
686
687 /* Fourth case */
688 for (r=r->next; rte_is_valid(r); r=r->next)
689 {
690 if (old_best = export_filter(c, r, &old_free, NULL, 1))
691 goto found;
692
693 if (r == before_old)
694 if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
695 goto found;
696 }
697
698 /* Implicitly, old_best is NULL and new_best is non-NULL */
699
700 found:
701 do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
702
703 /* Discard temporary rte's */
704 if (new_free)
705 rte_free(new_free);
706 if (old_free)
707 rte_free(old_free);
708 }
709
710
711 static struct mpnh *
712 mpnh_merge_rta(struct mpnh *nhs, rta *a, linpool *pool, int max)
713 {
714 struct mpnh nh = { .gw = a->gw, .iface = a->iface };
715 struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
716 return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
717 }
718
719 rte *
720 rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
721 {
722 // struct proto *p = c->proto;
723 struct mpnh *nhs = NULL;
724 rte *best0, *best, *rt0, *rt, *tmp;
725
726 best0 = net->routes;
727 *rt_free = NULL;
728
729 if (!rte_is_valid(best0))
730 return NULL;
731
732 best = export_filter_(c, best0, rt_free, tmpa, pool, silent);
733
734 if (!best || !rte_is_reachable(best))
735 return best;
736
737 for (rt0 = best0->next; rt0; rt0 = rt0->next)
738 {
739 if (!rte_mergable(best0, rt0))
740 continue;
741
742 rt = export_filter_(c, rt0, &tmp, NULL, pool, 1);
743
744 if (!rt)
745 continue;
746
747 if (rte_is_reachable(rt))
748 nhs = mpnh_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
749
750 if (tmp)
751 rte_free(tmp);
752 }
753
754 if (nhs)
755 {
756 nhs = mpnh_merge_rta(nhs, best->attrs, pool, c->merge_limit);
757
758 if (nhs->next)
759 {
760 best = rte_cow_rta(best, pool);
761 best->attrs->dest = RTD_MULTIPATH;
762 best->attrs->nexthops = nhs;
763 }
764 }
765
766 if (best != best0)
767 *rt_free = best;
768
769 return best;
770 }
771
772
773 static void
774 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
775 rte *new_best, rte*old_best, int refeed)
776 {
777 // struct proto *p = c->proto;
778
779 rte *new_best_free = NULL;
780 rte *old_best_free = NULL;
781 rte *new_changed_free = NULL;
782 rte *old_changed_free = NULL;
783 ea_list *tmpa = NULL;
784
785 /* We assume that all rte arguments are either NULL or rte_is_valid() */
786
787 /* This check should be done by the caller */
788 if (!new_best && !old_best)
789 return;
790
791 /* Check whether the change is relevant to the merged route */
792 if ((new_best == old_best) && !refeed)
793 {
794 new_changed = rte_mergable(new_best, new_changed) ?
795 export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
796
797 old_changed = rte_mergable(old_best, old_changed) ?
798 export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
799
800 if (!new_changed && !old_changed)
801 return;
802 }
803
804 if (new_best)
805 c->stats.exp_updates_received++;
806 else
807 c->stats.exp_withdraws_received++;
808
809 /* Prepare new merged route */
810 if (new_best)
811 new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0);
812
813 /* Prepare old merged route (without proper merged next hops) */
814 /* There are some issues with running filter on old route - see rt_notify_basic() */
815 if (old_best && !refeed)
816 old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
817
818 if (new_best || old_best)
819 do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
820
821 /* Discard temporary rte's */
822 if (new_best_free)
823 rte_free(new_best_free);
824 if (old_best_free)
825 rte_free(old_best_free);
826 if (new_changed_free)
827 rte_free(new_changed_free);
828 if (old_changed_free)
829 rte_free(old_changed_free);
830 }
831
832
833 /**
834 * rte_announce - announce a routing table change
835 * @tab: table the route has been added to
836 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
837 * @net: network in question
838 * @new: the new route to be announced
839 * @old: the previous route for the same network
840 * @new_best: the new best route for the same network
841 * @old_best: the previous best route for the same network
842 * @before_old: The previous route before @old for the same network.
843 * If @before_old is NULL @old was the first.
844 *
845 * This function gets a routing table update and announces it
846 * to all protocols that acccepts given type of route announcement
847 * and are connected to the same table by their announcement hooks.
848 *
849 * Route announcement of type %RA_OPTIMAL si generated when optimal
850 * route (in routing table @tab) changes. In that case @old stores the
851 * old optimal route.
852 *
853 * Route announcement of type %RA_ANY si generated when any route (in
854 * routing table @tab) changes In that case @old stores the old route
855 * from the same protocol.
856 *
857 * For each appropriate protocol, we first call its import_control()
858 * hook which performs basic checks on the route (each protocol has a
859 * right to veto or force accept of the route before any filter is
860 * asked) and adds default values of attributes specific to the new
861 * protocol (metrics, tags etc.). Then it consults the protocol's
862 * export filter and if it accepts the route, the rt_notify() hook of
863 * the protocol gets called.
864 */
865 static void
866 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
867 rte *new_best, rte *old_best, rte *before_old)
868 {
869 if (!rte_is_valid(new))
870 new = NULL;
871
872 if (!rte_is_valid(old))
873 old = before_old = NULL;
874
875 if (!rte_is_valid(new_best))
876 new_best = NULL;
877
878 if (!rte_is_valid(old_best))
879 old_best = NULL;
880
881 if (!old && !new)
882 return;
883
884 if ((type == RA_OPTIMAL) && tab->hostcache)
885 rt_notify_hostcache(tab, net);
886
887 struct channel *c; node *n;
888 WALK_LIST2(c, n, tab->channels, table_node)
889 {
890 if (c->export_state == ES_DOWN)
891 continue;
892
893 if (c->ra_mode == type)
894 if (type == RA_ACCEPTED)
895 rt_notify_accepted(c, net, new, old, before_old, 0);
896 else if (type == RA_MERGED)
897 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
898 else
899 rt_notify_basic(c, net, new, old, 0);
900 }
901 }
902
903 static inline int
904 rte_validate(rte *e)
905 {
906 int c;
907 net *n = e->net;
908
909 // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
910 if (!net_validate(n->n.addr))
911 {
912 log(L_WARN "Ignoring bogus prefix %N received via %s",
913 n->n.addr, e->sender->proto->name);
914 return 0;
915 }
916
917 c = net_classify(n->n.addr);
918 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
919 {
920 log(L_WARN "Ignoring bogus route %N received via %s",
921 n->n.addr, e->sender->proto->name);
922 return 0;
923 }
924
925 if ((e->attrs->dest == RTD_MULTIPATH) && !mpnh_is_sorted(e->attrs->nexthops))
926 {
927 log(L_WARN "Ignoring unsorted multipath route %N received via %s",
928 n->n.addr, e->sender->proto->name);
929 return 0;
930 }
931
932 return 1;
933 }
934
935 /**
936 * rte_free - delete a &rte
937 * @e: &rte to be deleted
938 *
939 * rte_free() deletes the given &rte from the routing table it's linked to.
940 */
941 void
942 rte_free(rte *e)
943 {
944 if (rta_is_cached(e->attrs))
945 rta_free(e->attrs);
946 sl_free(rte_slab, e);
947 }
948
949 static inline void
950 rte_free_quick(rte *e)
951 {
952 rta_free(e->attrs);
953 sl_free(rte_slab, e);
954 }
955
956 static int
957 rte_same(rte *x, rte *y)
958 {
959 return
960 x->attrs == y->attrs &&
961 x->flags == y->flags &&
962 x->pflags == y->pflags &&
963 x->pref == y->pref &&
964 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
965 }
966
967 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
968
969 static void
970 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
971 {
972 struct proto *p = c->proto;
973 struct rtable *table = c->table;
974 struct proto_stats *stats = &c->stats;
975 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
976 rte *before_old = NULL;
977 rte *old_best = net->routes;
978 rte *old = NULL;
979 rte **k;
980
981 k = &net->routes; /* Find and remove original route from the same protocol */
982 while (old = *k)
983 {
984 if (old->attrs->src == src)
985 {
986 /* If there is the same route in the routing table but from
987 * a different sender, then there are two paths from the
988 * source protocol to this routing table through transparent
989 * pipes, which is not allowed.
990 *
991 * We log that and ignore the route. If it is withdraw, we
992 * ignore it completely (there might be 'spurious withdraws',
993 * see FIXME in do_rte_announce())
994 */
995 if (old->sender->proto != p)
996 {
997 if (new)
998 {
999 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1000 net->n.addr, table->name);
1001 rte_free_quick(new);
1002 }
1003 return;
1004 }
1005
1006 if (new && rte_same(old, new))
1007 {
1008 /* No changes, ignore the new route */
1009
1010 if (!rte_is_filtered(new))
1011 {
1012 stats->imp_updates_ignored++;
1013 rte_trace_in(D_ROUTES, p, new, "ignored");
1014 }
1015
1016 rte_free_quick(new);
1017 return;
1018 }
1019 *k = old->next;
1020 break;
1021 }
1022 k = &old->next;
1023 before_old = old;
1024 }
1025
1026 if (!old)
1027 before_old = NULL;
1028
1029 if (!old && !new)
1030 {
1031 stats->imp_withdraws_ignored++;
1032 return;
1033 }
1034
1035 int new_ok = rte_is_ok(new);
1036 int old_ok = rte_is_ok(old);
1037
1038 struct channel_limit *l = &c->rx_limit;
1039 if (l->action && !old && new)
1040 {
1041 u32 all_routes = stats->imp_routes + stats->filt_routes;
1042
1043 if (all_routes >= l->limit)
1044 channel_notify_limit(c, l, PLD_RX, all_routes);
1045
1046 if (l->state == PLS_BLOCKED)
1047 {
1048 /* In receive limit the situation is simple, old is NULL so
1049 we just free new and exit like nothing happened */
1050
1051 stats->imp_updates_ignored++;
1052 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1053 rte_free_quick(new);
1054 return;
1055 }
1056 }
1057
1058 l = &c->in_limit;
1059 if (l->action && !old_ok && new_ok)
1060 {
1061 if (stats->imp_routes >= l->limit)
1062 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1063
1064 if (l->state == PLS_BLOCKED)
1065 {
1066 /* In import limit the situation is more complicated. We
1067 shouldn't just drop the route, we should handle it like
1068 it was filtered. We also have to continue the route
1069 processing if old or new is non-NULL, but we should exit
1070 if both are NULL as this case is probably assumed to be
1071 already handled. */
1072
1073 stats->imp_updates_ignored++;
1074 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1075
1076 if (c->in_keep_filtered)
1077 new->flags |= REF_FILTERED;
1078 else
1079 { rte_free_quick(new); new = NULL; }
1080
1081 /* Note that old && !new could be possible when
1082 c->in_keep_filtered changed in the recent past. */
1083
1084 if (!old && !new)
1085 return;
1086
1087 new_ok = 0;
1088 goto skip_stats1;
1089 }
1090 }
1091
1092 if (new_ok)
1093 stats->imp_updates_accepted++;
1094 else if (old_ok)
1095 stats->imp_withdraws_accepted++;
1096 else
1097 stats->imp_withdraws_ignored++;
1098
1099 skip_stats1:
1100
1101 if (new)
1102 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1103 if (old)
1104 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1105
1106 if (table->config->sorted)
1107 {
1108 /* If routes are sorted, just insert new route to appropriate position */
1109 if (new)
1110 {
1111 if (before_old && !rte_better(new, before_old))
1112 k = &before_old->next;
1113 else
1114 k = &net->routes;
1115
1116 for (; *k; k=&(*k)->next)
1117 if (rte_better(new, *k))
1118 break;
1119
1120 new->next = *k;
1121 *k = new;
1122 }
1123 }
1124 else
1125 {
1126 /* If routes are not sorted, find the best route and move it on
1127 the first position. There are several optimized cases. */
1128
1129 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1130 goto do_recalculate;
1131
1132 if (new && rte_better(new, old_best))
1133 {
1134 /* The first case - the new route is cleary optimal,
1135 we link it at the first position */
1136
1137 new->next = net->routes;
1138 net->routes = new;
1139 }
1140 else if (old == old_best)
1141 {
1142 /* The second case - the old best route disappeared, we add the
1143 new route (if we have any) to the list (we don't care about
1144 position) and then we elect the new optimal route and relink
1145 that route at the first position and announce it. New optimal
1146 route might be NULL if there is no more routes */
1147
1148 do_recalculate:
1149 /* Add the new route to the list */
1150 if (new)
1151 {
1152 new->next = net->routes;
1153 net->routes = new;
1154 }
1155
1156 /* Find a new optimal route (if there is any) */
1157 if (net->routes)
1158 {
1159 rte **bp = &net->routes;
1160 for (k=&(*bp)->next; *k; k=&(*k)->next)
1161 if (rte_better(*k, *bp))
1162 bp = k;
1163
1164 /* And relink it */
1165 rte *best = *bp;
1166 *bp = best->next;
1167 best->next = net->routes;
1168 net->routes = best;
1169 }
1170 }
1171 else if (new)
1172 {
1173 /* The third case - the new route is not better than the old
1174 best route (therefore old_best != NULL) and the old best
1175 route was not removed (therefore old_best == net->routes).
1176 We just link the new route after the old best route. */
1177
1178 ASSERT(net->routes != NULL);
1179 new->next = net->routes->next;
1180 net->routes->next = new;
1181 }
1182 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1183 }
1184
1185 if (new)
1186 new->lastmod = now;
1187
1188 /* Log the route change */
1189 if (p->debug & D_ROUTES)
1190 {
1191 if (new_ok)
1192 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1193 else if (old_ok)
1194 {
1195 if (old != old_best)
1196 rte_trace(p, old, '>', "removed");
1197 else if (rte_is_ok(net->routes))
1198 rte_trace(p, old, '>', "removed [replaced]");
1199 else
1200 rte_trace(p, old, '>', "removed [sole]");
1201 }
1202 }
1203
1204 /* Propagate the route change */
1205 rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1206 if (net->routes != old_best)
1207 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1208 if (table->config->sorted)
1209 rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1210 rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1211
1212 if (!net->routes &&
1213 (table->gc_counter++ >= table->config->gc_max_ops) &&
1214 (table->gc_time + table->config->gc_min_time <= now))
1215 rt_schedule_prune(table);
1216
1217 if (old_ok && p->rte_remove)
1218 p->rte_remove(net, old);
1219 if (new_ok && p->rte_insert)
1220 p->rte_insert(net, new);
1221
1222 if (old)
1223 rte_free_quick(old);
1224 }
1225
1226 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1227
1228 static inline void
1229 rte_update_lock(void)
1230 {
1231 rte_update_nest_cnt++;
1232 }
1233
1234 static inline void
1235 rte_update_unlock(void)
1236 {
1237 if (!--rte_update_nest_cnt)
1238 lp_flush(rte_update_pool);
1239 }
1240
1241 static inline void
1242 rte_hide_dummy_routes(net *net, rte **dummy)
1243 {
1244 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1245 {
1246 *dummy = net->routes;
1247 net->routes = (*dummy)->next;
1248 }
1249 }
1250
1251 static inline void
1252 rte_unhide_dummy_routes(net *net, rte **dummy)
1253 {
1254 if (*dummy)
1255 {
1256 (*dummy)->next = net->routes;
1257 net->routes = *dummy;
1258 }
1259 }
1260
1261 /**
1262 * rte_update - enter a new update to a routing table
1263 * @table: table to be updated
1264 * @c: channel doing the update
1265 * @net: network node
1266 * @p: protocol submitting the update
1267 * @src: protocol originating the update
1268 * @new: a &rte representing the new route or %NULL for route removal.
1269 *
1270 * This function is called by the routing protocols whenever they discover
1271 * a new route or wish to update/remove an existing route. The right announcement
1272 * sequence is to build route attributes first (either un-cached with @aflags set
1273 * to zero or a cached one using rta_lookup(); in this case please note that
1274 * you need to increase the use count of the attributes yourself by calling
1275 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1276 * the appropriate data and finally submit the new &rte by calling rte_update().
1277 *
1278 * @src specifies the protocol that originally created the route and the meaning
1279 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1280 * same value as @new->attrs->proto. @p specifies the protocol that called
1281 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1282 * stores @p in @new->sender;
1283 *
1284 * When rte_update() gets any route, it automatically validates it (checks,
1285 * whether the network and next hop address are valid IP addresses and also
1286 * whether a normal routing protocol doesn't try to smuggle a host or link
1287 * scope route to the table), converts all protocol dependent attributes stored
1288 * in the &rte to temporary extended attributes, consults import filters of the
1289 * protocol to see if the route should be accepted and/or its attributes modified,
1290 * stores the temporary attributes back to the &rte.
1291 *
1292 * Now, having a "public" version of the route, we
1293 * automatically find any old route defined by the protocol @src
1294 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1295 * recalculate the optimal route for this destination and finally broadcast
1296 * the change (if any) to all routing protocols by calling rte_announce().
1297 *
1298 * All memory used for attribute lists and other temporary allocations is taken
1299 * from a special linear pool @rte_update_pool and freed when rte_update()
1300 * finishes.
1301 */
1302
1303 void
1304 rte_update2(struct channel *c, net_addr *n, rte *new, struct rte_src *src)
1305 {
1306 struct proto *p = c->proto;
1307 struct proto_stats *stats = &c->stats;
1308 struct filter *filter = c->in_filter;
1309 ea_list *tmpa = NULL;
1310 rte *dummy = NULL;
1311 net *nn;
1312
1313 ASSERT(c->channel_state == CS_UP);
1314
1315 rte_update_lock();
1316 if (new)
1317 {
1318 nn = net_get(c->table, n);
1319
1320 new->net = nn;
1321 new->sender = c;
1322
1323 if (!new->pref)
1324 new->pref = c->preference;
1325
1326 stats->imp_updates_received++;
1327 if (!rte_validate(new))
1328 {
1329 rte_trace_in(D_FILTERS, p, new, "invalid");
1330 stats->imp_updates_invalid++;
1331 goto drop;
1332 }
1333
1334 if (filter == FILTER_REJECT)
1335 {
1336 stats->imp_updates_filtered++;
1337 rte_trace_in(D_FILTERS, p, new, "filtered out");
1338
1339 if (! c->in_keep_filtered)
1340 goto drop;
1341
1342 /* new is a private copy, i could modify it */
1343 new->flags |= REF_FILTERED;
1344 }
1345 else
1346 {
1347 tmpa = make_tmp_attrs(new, rte_update_pool);
1348 if (filter && (filter != FILTER_REJECT))
1349 {
1350 ea_list *old_tmpa = tmpa;
1351 int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1352 if (fr > F_ACCEPT)
1353 {
1354 stats->imp_updates_filtered++;
1355 rte_trace_in(D_FILTERS, p, new, "filtered out");
1356
1357 if (! c->in_keep_filtered)
1358 goto drop;
1359
1360 new->flags |= REF_FILTERED;
1361 }
1362 if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1363 src->proto->store_tmp_attrs(new, tmpa);
1364 }
1365 }
1366 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1367 new->attrs = rta_lookup(new->attrs);
1368 new->flags |= REF_COW;
1369 }
1370 else
1371 {
1372 stats->imp_withdraws_received++;
1373
1374 if (!(nn = net_find(c->table, n)) || !src)
1375 {
1376 stats->imp_withdraws_ignored++;
1377 rte_update_unlock();
1378 return;
1379 }
1380 }
1381
1382 recalc:
1383 rte_hide_dummy_routes(nn, &dummy);
1384 rte_recalculate(c, nn, new, src);
1385 rte_unhide_dummy_routes(nn, &dummy);
1386 rte_update_unlock();
1387 return;
1388
1389 drop:
1390 rte_free(new);
1391 new = NULL;
1392 goto recalc;
1393 }
1394
1395 /* Independent call to rte_announce(), used from next hop
1396 recalculation, outside of rte_update(). new must be non-NULL */
1397 static inline void
1398 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1399 rte *new_best, rte *old_best)
1400 {
1401 rte_update_lock();
1402 rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1403 rte_update_unlock();
1404 }
1405
1406 void
1407 rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */
1408 {
1409 rte_update_lock();
1410 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1411 rte_update_unlock();
1412 }
1413
1414 /* Check rtable for best route to given net whether it would be exported do p */
1415 int
1416 rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1417 {
1418 net *n = net_find(t, a);
1419 rte *rt = n ? n->routes : NULL;
1420
1421 if (!rte_is_valid(rt))
1422 return 0;
1423
1424 rte_update_lock();
1425
1426 /* Rest is stripped down export_filter() */
1427 ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1428 int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1429 if (v == RIC_PROCESS)
1430 v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1431
1432 /* Discard temporary rte */
1433 if (rt != n->routes)
1434 rte_free(rt);
1435
1436 rte_update_unlock();
1437
1438 return v > 0;
1439 }
1440
1441
1442 /**
1443 * rt_refresh_begin - start a refresh cycle
1444 * @t: related routing table
1445 * @c related channel
1446 *
1447 * This function starts a refresh cycle for given routing table and announce
1448 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1449 * routes to the routing table (by rte_update()). After that, all protocol
1450 * routes (more precisely routes with @c as @sender) not sent during the
1451 * refresh cycle but still in the table from the past are pruned. This is
1452 * implemented by marking all related routes as stale by REF_STALE flag in
1453 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1454 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1455 */
1456 void
1457 rt_refresh_begin(rtable *t, struct channel *c)
1458 {
1459 FIB_WALK(&t->fib, net, n)
1460 {
1461 rte *e;
1462 for (e = n->routes; e; e = e->next)
1463 if (e->sender == c)
1464 e->flags |= REF_STALE;
1465 }
1466 FIB_WALK_END;
1467 }
1468
1469 /**
1470 * rt_refresh_end - end a refresh cycle
1471 * @t: related routing table
1472 * @c: related channel
1473 *
1474 * This function ends a refresh cycle for given routing table and announce
1475 * hook. See rt_refresh_begin() for description of refresh cycles.
1476 */
1477 void
1478 rt_refresh_end(rtable *t, struct channel *c)
1479 {
1480 int prune = 0;
1481
1482 FIB_WALK(&t->fib, net, n)
1483 {
1484 rte *e;
1485 for (e = n->routes; e; e = e->next)
1486 if ((e->sender == c) && (e->flags & REF_STALE))
1487 {
1488 e->flags |= REF_DISCARD;
1489 prune = 1;
1490 }
1491 }
1492 FIB_WALK_END;
1493
1494 if (prune)
1495 rt_schedule_prune(t);
1496 }
1497
1498
1499 /**
1500 * rte_dump - dump a route
1501 * @e: &rte to be dumped
1502 *
1503 * This functions dumps contents of a &rte to debug output.
1504 */
1505 void
1506 rte_dump(rte *e)
1507 {
1508 net *n = e->net;
1509 debug("%-1N ", n->n.addr);
1510 debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1511 rta_dump(e->attrs);
1512 if (e->attrs->src->proto->proto->dump_attrs)
1513 e->attrs->src->proto->proto->dump_attrs(e);
1514 debug("\n");
1515 }
1516
1517 /**
1518 * rt_dump - dump a routing table
1519 * @t: routing table to be dumped
1520 *
1521 * This function dumps contents of a given routing table to debug output.
1522 */
1523 void
1524 rt_dump(rtable *t)
1525 {
1526 debug("Dump of routing table <%s>\n", t->name);
1527 #ifdef DEBUGGING
1528 fib_check(&t->fib);
1529 #endif
1530 FIB_WALK(&t->fib, net, n)
1531 {
1532 rte *e;
1533 for(e=n->routes; e; e=e->next)
1534 rte_dump(e);
1535 }
1536 FIB_WALK_END;
1537 debug("\n");
1538 }
1539
1540 /**
1541 * rt_dump_all - dump all routing tables
1542 *
1543 * This function dumps contents of all routing tables to debug output.
1544 */
1545 void
1546 rt_dump_all(void)
1547 {
1548 rtable *t;
1549
1550 WALK_LIST(t, routing_tables)
1551 rt_dump(t);
1552 }
1553
1554 static inline void
1555 rt_schedule_hcu(rtable *tab)
1556 {
1557 if (tab->hcu_scheduled)
1558 return;
1559
1560 tab->hcu_scheduled = 1;
1561 ev_schedule(tab->rt_event);
1562 }
1563
1564 static inline void
1565 rt_schedule_nhu(rtable *tab)
1566 {
1567 if (tab->nhu_state == 0)
1568 ev_schedule(tab->rt_event);
1569
1570 /* state change 0->1, 2->3 */
1571 tab->nhu_state |= 1;
1572 }
1573
1574 void
1575 rt_schedule_prune(rtable *tab)
1576 {
1577 if (tab->prune_state == 0)
1578 ev_schedule(tab->rt_event);
1579
1580 /* state change 0->1, 2->3 */
1581 tab->prune_state |= 1;
1582 }
1583
1584
1585 static void
1586 rt_event(void *ptr)
1587 {
1588 rtable *tab = ptr;
1589
1590 rt_lock_table(tab);
1591
1592 if (tab->hcu_scheduled)
1593 rt_update_hostcache(tab);
1594
1595 if (tab->nhu_state)
1596 rt_next_hop_update(tab);
1597
1598 if (tab->prune_state)
1599 rt_prune_table(tab);
1600
1601 rt_unlock_table(tab);
1602 }
1603
1604 void
1605 rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1606 {
1607 bzero(t, sizeof(*t));
1608 t->name = name;
1609 t->config = cf;
1610 t->addr_type = cf ? cf->addr_type : NET_IP4;
1611 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1612 init_list(&t->channels);
1613
1614 if (cf)
1615 {
1616 t->rt_event = ev_new(p);
1617 t->rt_event->hook = rt_event;
1618 t->rt_event->data = t;
1619 t->gc_time = now;
1620 }
1621 }
1622
1623 /**
1624 * rt_init - initialize routing tables
1625 *
1626 * This function is called during BIRD startup. It initializes the
1627 * routing table module.
1628 */
1629 void
1630 rt_init(void)
1631 {
1632 rta_init();
1633 rt_table_pool = rp_new(&root_pool, "Routing tables");
1634 rte_update_pool = lp_new(rt_table_pool, 4080);
1635 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1636 init_list(&routing_tables);
1637 }
1638
1639
1640 /**
1641 * rt_prune_table - prune a routing table
1642 *
1643 * The prune loop scans routing tables and removes routes belonging to flushing
1644 * protocols, discarded routes and also stale network entries. It is called from
1645 * rt_event(). The event is rescheduled if the current iteration do not finish
1646 * the table. The pruning is directed by the prune state (@prune_state),
1647 * specifying whether the prune cycle is scheduled or running, and there
1648 * is also a persistent pruning iterator (@prune_fit).
1649 *
1650 * The prune loop is used also for channel flushing. For this purpose, the
1651 * channels to flush are marked before the iteration and notified after the
1652 * iteration.
1653 */
1654 static void
1655 rt_prune_table(rtable *tab)
1656 {
1657 struct fib_iterator *fit = &tab->prune_fit;
1658 int limit = 512;
1659
1660 struct channel *c;
1661 node *n, *x;
1662
1663 DBG("Pruning route table %s\n", tab->name);
1664 #ifdef DEBUGGING
1665 fib_check(&tab->fib);
1666 #endif
1667
1668 if (tab->prune_state == 0)
1669 return;
1670
1671 if (tab->prune_state == 1)
1672 {
1673 /* Mark channels to flush */
1674 WALK_LIST2(c, n, tab->channels, table_node)
1675 if (c->channel_state == CS_FLUSHING)
1676 c->flush_active = 1;
1677
1678 FIB_ITERATE_INIT(fit, &tab->fib);
1679 tab->prune_state = 2;
1680 }
1681
1682 again:
1683 FIB_ITERATE_START(&tab->fib, fit, net, n)
1684 {
1685 rte *e;
1686
1687 rescan:
1688 for (e=n->routes; e; e=e->next)
1689 if (e->sender->flush_active || (e->flags & REF_DISCARD))
1690 {
1691 if (limit <= 0)
1692 {
1693 FIB_ITERATE_PUT(fit);
1694 ev_schedule(tab->rt_event);
1695 return;
1696 }
1697
1698 rte_discard(tab, e);
1699 limit--;
1700
1701 goto rescan;
1702 }
1703
1704 if (!n->routes) /* Orphaned FIB entry */
1705 {
1706 FIB_ITERATE_PUT(fit);
1707 fib_delete(&tab->fib, n);
1708 goto again;
1709 }
1710 }
1711 FIB_ITERATE_END;
1712
1713 #ifdef DEBUGGING
1714 fib_check(&tab->fib);
1715 #endif
1716
1717 tab->gc_counter = 0;
1718 tab->gc_time = now;
1719
1720 /* state change 2->0, 3->1 */
1721 tab->prune_state &= 1;
1722
1723 if (tab->prune_state > 0)
1724 ev_schedule(tab->rt_event);
1725
1726 /* FIXME: This should be handled in a better way */
1727 rt_prune_sources();
1728
1729 /* Close flushed channels */
1730 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1731 if (c->flush_active)
1732 {
1733 c->flush_active = 0;
1734 channel_set_state(c, CS_DOWN);
1735 }
1736
1737 return;
1738 }
1739
1740 void
1741 rt_preconfig(struct config *c)
1742 {
1743 init_list(&c->tables);
1744
1745 rt_new_table(cf_get_symbol("master4"), NET_IP4);
1746 rt_new_table(cf_get_symbol("master6"), NET_IP6);
1747 }
1748
1749
1750 /*
1751 * Some functions for handing internal next hop updates
1752 * triggered by rt_schedule_nhu().
1753 */
1754
1755 static inline int
1756 rta_next_hop_outdated(rta *a)
1757 {
1758 struct hostentry *he = a->hostentry;
1759
1760 if (!he)
1761 return 0;
1762
1763 if (!he->src)
1764 return a->dest != RTD_UNREACHABLE;
1765
1766 return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1767 (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1768 !mpnh_same(a->nexthops, he->src->nexthops);
1769 }
1770
1771 static inline void
1772 rta_apply_hostentry(rta *a, struct hostentry *he)
1773 {
1774 a->hostentry = he;
1775 a->iface = he->src ? he->src->iface : NULL;
1776 a->gw = he->gw;
1777 a->dest = he->dest;
1778 a->igp_metric = he->igp_metric;
1779 a->nexthops = he->src ? he->src->nexthops : NULL;
1780 }
1781
1782 static inline rte *
1783 rt_next_hop_update_rte(rtable *tab, rte *old)
1784 {
1785 rta a;
1786 memcpy(&a, old->attrs, sizeof(rta));
1787 rta_apply_hostentry(&a, old->attrs->hostentry);
1788 a.aflags = 0;
1789
1790 rte *e = sl_alloc(rte_slab);
1791 memcpy(e, old, sizeof(rte));
1792 e->attrs = rta_lookup(&a);
1793
1794 return e;
1795 }
1796
1797 static inline int
1798 rt_next_hop_update_net(rtable *tab, net *n)
1799 {
1800 rte **k, *e, *new, *old_best, **new_best;
1801 int count = 0;
1802 int free_old_best = 0;
1803
1804 old_best = n->routes;
1805 if (!old_best)
1806 return 0;
1807
1808 for (k = &n->routes; e = *k; k = &e->next)
1809 if (rta_next_hop_outdated(e->attrs))
1810 {
1811 new = rt_next_hop_update_rte(tab, e);
1812 *k = new;
1813
1814 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1815 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1816
1817 /* Call a pre-comparison hook */
1818 /* Not really an efficient way to compute this */
1819 if (e->attrs->src->proto->rte_recalculate)
1820 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1821
1822 if (e != old_best)
1823 rte_free_quick(e);
1824 else /* Freeing of the old best rte is postponed */
1825 free_old_best = 1;
1826
1827 e = new;
1828 count++;
1829 }
1830
1831 if (!count)
1832 return 0;
1833
1834 /* Find the new best route */
1835 new_best = NULL;
1836 for (k = &n->routes; e = *k; k = &e->next)
1837 {
1838 if (!new_best || rte_better(e, *new_best))
1839 new_best = k;
1840 }
1841
1842 /* Relink the new best route to the first position */
1843 new = *new_best;
1844 if (new != n->routes)
1845 {
1846 *new_best = new->next;
1847 new->next = n->routes;
1848 n->routes = new;
1849 }
1850
1851 /* Announce the new best route */
1852 if (new != old_best)
1853 {
1854 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1855 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1856 }
1857
1858 /* FIXME: Better announcement of merged routes */
1859 rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1860
1861 if (free_old_best)
1862 rte_free_quick(old_best);
1863
1864 return count;
1865 }
1866
1867 static void
1868 rt_next_hop_update(rtable *tab)
1869 {
1870 struct fib_iterator *fit = &tab->nhu_fit;
1871 int max_feed = 32;
1872
1873 if (tab->nhu_state == 0)
1874 return;
1875
1876 if (tab->nhu_state == 1)
1877 {
1878 FIB_ITERATE_INIT(fit, &tab->fib);
1879 tab->nhu_state = 2;
1880 }
1881
1882 FIB_ITERATE_START(&tab->fib, fit, net, n)
1883 {
1884 if (max_feed <= 0)
1885 {
1886 FIB_ITERATE_PUT(fit);
1887 ev_schedule(tab->rt_event);
1888 return;
1889 }
1890 max_feed -= rt_next_hop_update_net(tab, n);
1891 }
1892 FIB_ITERATE_END;
1893
1894 /* state change 2->0, 3->1 */
1895 tab->nhu_state &= 1;
1896
1897 if (tab->nhu_state > 0)
1898 ev_schedule(tab->rt_event);
1899 }
1900
1901
1902 struct rtable_config *
1903 rt_new_table(struct symbol *s, uint addr_type)
1904 {
1905 /* Hack that allows to 'redefine' the master table */
1906 if ((s->class == SYM_TABLE) &&
1907 (s->def == new_config->def_tables[addr_type]) &&
1908 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1909 return s->def;
1910
1911 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1912
1913 cf_define_symbol(s, SYM_TABLE, c);
1914 c->name = s->name;
1915 c->addr_type = addr_type;
1916 c->gc_max_ops = 1000;
1917 c->gc_min_time = 5;
1918
1919 add_tail(&new_config->tables, &c->n);
1920
1921 /* First table of each type is kept as default */
1922 if (! new_config->def_tables[addr_type])
1923 new_config->def_tables[addr_type] = c;
1924
1925 return c;
1926 }
1927
1928 /**
1929 * rt_lock_table - lock a routing table
1930 * @r: routing table to be locked
1931 *
1932 * Lock a routing table, because it's in use by a protocol,
1933 * preventing it from being freed when it gets undefined in a new
1934 * configuration.
1935 */
1936 void
1937 rt_lock_table(rtable *r)
1938 {
1939 r->use_count++;
1940 }
1941
1942 /**
1943 * rt_unlock_table - unlock a routing table
1944 * @r: routing table to be unlocked
1945 *
1946 * Unlock a routing table formerly locked by rt_lock_table(),
1947 * that is decrease its use count and delete it if it's scheduled
1948 * for deletion by configuration changes.
1949 */
1950 void
1951 rt_unlock_table(rtable *r)
1952 {
1953 if (!--r->use_count && r->deleted)
1954 {
1955 struct config *conf = r->deleted;
1956 DBG("Deleting routing table %s\n", r->name);
1957 r->config->table = NULL;
1958 if (r->hostcache)
1959 rt_free_hostcache(r);
1960 rem_node(&r->n);
1961 fib_free(&r->fib);
1962 rfree(r->rt_event);
1963 mb_free(r);
1964 config_del_obstacle(conf);
1965 }
1966 }
1967
1968 /**
1969 * rt_commit - commit new routing table configuration
1970 * @new: new configuration
1971 * @old: original configuration or %NULL if it's boot time config
1972 *
1973 * Scan differences between @old and @new configuration and modify
1974 * the routing tables according to these changes. If @new defines a
1975 * previously unknown table, create it, if it omits a table existing
1976 * in @old, schedule it for deletion (it gets deleted when all protocols
1977 * disconnect from it by calling rt_unlock_table()), if it exists
1978 * in both configurations, leave it unchanged.
1979 */
1980 void
1981 rt_commit(struct config *new, struct config *old)
1982 {
1983 struct rtable_config *o, *r;
1984
1985 DBG("rt_commit:\n");
1986 if (old)
1987 {
1988 WALK_LIST(o, old->tables)
1989 {
1990 rtable *ot = o->table;
1991 if (!ot->deleted)
1992 {
1993 struct symbol *sym = cf_find_symbol(new, o->name);
1994 if (sym && sym->class == SYM_TABLE && !new->shutdown)
1995 {
1996 DBG("\t%s: same\n", o->name);
1997 r = sym->def;
1998 r->table = ot;
1999 ot->name = r->name;
2000 ot->config = r;
2001 if (o->sorted != r->sorted)
2002 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2003 }
2004 else
2005 {
2006 DBG("\t%s: deleted\n", o->name);
2007 ot->deleted = old;
2008 config_add_obstacle(old);
2009 rt_lock_table(ot);
2010 rt_unlock_table(ot);
2011 }
2012 }
2013 }
2014 }
2015
2016 WALK_LIST(r, new->tables)
2017 if (!r->table)
2018 {
2019 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2020 DBG("\t%s: created\n", r->name);
2021 rt_setup(rt_table_pool, t, r->name, r);
2022 add_tail(&routing_tables, &t->n);
2023 r->table = t;
2024 }
2025 DBG("\tdone\n");
2026 }
2027
2028 static inline void
2029 do_feed_channel(struct channel *c, net *n, rte *e)
2030 {
2031 rte_update_lock();
2032 if (c->ra_mode == RA_ACCEPTED)
2033 rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2034 else if (c->ra_mode == RA_MERGED)
2035 rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2036 else /* RA_BASIC */
2037 rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2038 rte_update_unlock();
2039 }
2040
2041 /**
2042 * rt_feed_channel - advertise all routes to a channel
2043 * @c: channel to be fed
2044 *
2045 * This function performs one pass of advertisement of routes to a channel that
2046 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2047 * has something to do. (We avoid transferring all the routes in single pass in
2048 * order not to monopolize CPU time.)
2049 */
2050 int
2051 rt_feed_channel(struct channel *c)
2052 {
2053 struct fib_iterator *fit = &c->feed_fit;
2054 int max_feed = 256;
2055
2056 ASSERT(c->export_state == ES_FEEDING);
2057
2058 if (!c->feed_active)
2059 {
2060 FIB_ITERATE_INIT(fit, &c->table->fib);
2061 c->feed_active = 1;
2062 }
2063
2064 FIB_ITERATE_START(&c->table->fib, fit, net, n)
2065 {
2066 rte *e = n->routes;
2067 if (max_feed <= 0)
2068 {
2069 FIB_ITERATE_PUT(fit);
2070 return 0;
2071 }
2072
2073 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2074
2075 if ((c->ra_mode == RA_OPTIMAL) ||
2076 (c->ra_mode == RA_ACCEPTED) ||
2077 (c->ra_mode == RA_MERGED))
2078 if (rte_is_valid(e))
2079 {
2080 /* In the meantime, the protocol may fell down */
2081 if (c->export_state != ES_FEEDING)
2082 goto done;
2083
2084 do_feed_channel(c, n, e);
2085 max_feed--;
2086 }
2087
2088 if (c->ra_mode == RA_ANY)
2089 for(e = n->routes; e; e = e->next)
2090 {
2091 /* In the meantime, the protocol may fell down */
2092 if (c->export_state != ES_FEEDING)
2093 goto done;
2094
2095 if (!rte_is_valid(e))
2096 continue;
2097
2098 do_feed_channel(c, n, e);
2099 max_feed--;
2100 }
2101 }
2102 FIB_ITERATE_END;
2103
2104 done:
2105 c->feed_active = 0;
2106 return 1;
2107 }
2108
2109 /**
2110 * rt_feed_baby_abort - abort protocol feeding
2111 * @c: channel
2112 *
2113 * This function is called by the protocol code when the protocol stops or
2114 * ceases to exist during the feeding.
2115 */
2116 void
2117 rt_feed_channel_abort(struct channel *c)
2118 {
2119 if (c->feed_active)
2120 {
2121 /* Unlink the iterator */
2122 fit_get(&c->table->fib, &c->feed_fit);
2123 c->feed_active = 0;
2124 }
2125 }
2126
2127 static inline unsigned
2128 ptr_hash(void *ptr)
2129 {
2130 uintptr_t p = (uintptr_t) ptr;
2131 return p ^ (p << 8) ^ (p >> 16);
2132 }
2133
2134 static inline u32
2135 hc_hash(ip_addr a, rtable *dep)
2136 {
2137 return ipa_hash(a) ^ ptr_hash(dep);
2138 }
2139
2140 static inline void
2141 hc_insert(struct hostcache *hc, struct hostentry *he)
2142 {
2143 uint k = he->hash_key >> hc->hash_shift;
2144 he->next = hc->hash_table[k];
2145 hc->hash_table[k] = he;
2146 }
2147
2148 static inline void
2149 hc_remove(struct hostcache *hc, struct hostentry *he)
2150 {
2151 struct hostentry **hep;
2152 uint k = he->hash_key >> hc->hash_shift;
2153
2154 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2155 *hep = he->next;
2156 }
2157
2158 #define HC_DEF_ORDER 10
2159 #define HC_HI_MARK *4
2160 #define HC_HI_STEP 2
2161 #define HC_HI_ORDER 16 /* Must be at most 16 */
2162 #define HC_LO_MARK /5
2163 #define HC_LO_STEP 2
2164 #define HC_LO_ORDER 10
2165
2166 static void
2167 hc_alloc_table(struct hostcache *hc, unsigned order)
2168 {
2169 unsigned hsize = 1 << order;
2170 hc->hash_order = order;
2171 hc->hash_shift = 32 - order;
2172 hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
2173 hc->hash_min = (order <= HC_LO_ORDER) ? 0 : (hsize HC_LO_MARK);
2174
2175 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2176 }
2177
2178 static void
2179 hc_resize(struct hostcache *hc, unsigned new_order)
2180 {
2181 unsigned old_size = 1 << hc->hash_order;
2182 struct hostentry **old_table = hc->hash_table;
2183 struct hostentry *he, *hen;
2184 int i;
2185
2186 hc_alloc_table(hc, new_order);
2187 for (i = 0; i < old_size; i++)
2188 for (he = old_table[i]; he != NULL; he=hen)
2189 {
2190 hen = he->next;
2191 hc_insert(hc, he);
2192 }
2193 mb_free(old_table);
2194 }
2195
2196 static struct hostentry *
2197 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2198 {
2199 struct hostentry *he = sl_alloc(hc->slab);
2200
2201 he->addr = a;
2202 he->link = ll;
2203 he->tab = dep;
2204 he->hash_key = k;
2205 he->uc = 0;
2206 he->src = NULL;
2207
2208 add_tail(&hc->hostentries, &he->ln);
2209 hc_insert(hc, he);
2210
2211 hc->hash_items++;
2212 if (hc->hash_items > hc->hash_max)
2213 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2214
2215 return he;
2216 }
2217
2218 static void
2219 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2220 {
2221 rta_free(he->src);
2222
2223 rem_node(&he->ln);
2224 hc_remove(hc, he);
2225 sl_free(hc->slab, he);
2226
2227 hc->hash_items--;
2228 if (hc->hash_items < hc->hash_min)
2229 hc_resize(hc, hc->hash_order - HC_LO_STEP);
2230 }
2231
2232 static void
2233 rt_init_hostcache(rtable *tab)
2234 {
2235 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2236 init_list(&hc->hostentries);
2237
2238 hc->hash_items = 0;
2239 hc_alloc_table(hc, HC_DEF_ORDER);
2240 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2241
2242 hc->lp = lp_new(rt_table_pool, 1008);
2243 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2244
2245 tab->hostcache = hc;
2246 }
2247
2248 static void
2249 rt_free_hostcache(rtable *tab)
2250 {
2251 struct hostcache *hc = tab->hostcache;
2252
2253 node *n;
2254 WALK_LIST(n, hc->hostentries)
2255 {
2256 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2257 rta_free(he->src);
2258
2259 if (he->uc)
2260 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2261 }
2262
2263 rfree(hc->slab);
2264 rfree(hc->lp);
2265 mb_free(hc->hash_table);
2266 mb_free(hc);
2267 }
2268
2269 static void
2270 rt_notify_hostcache(rtable *tab, net *net)
2271 {
2272 if (tab->hcu_scheduled)
2273 return;
2274
2275 if (trie_match_net(tab->hostcache->trie, net->n.addr))
2276 rt_schedule_hcu(tab);
2277 }
2278
2279 static int
2280 if_local_addr(ip_addr a, struct iface *i)
2281 {
2282 struct ifa *b;
2283
2284 WALK_LIST(b, i->addrs)
2285 if (ipa_equal(a, b->ip))
2286 return 1;
2287
2288 return 0;
2289 }
2290
2291 static u32
2292 rt_get_igp_metric(rte *rt)
2293 {
2294 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2295
2296 if (ea)
2297 return ea->u.data;
2298
2299 rta *a = rt->attrs;
2300
2301 #ifdef CONFIG_OSPF
2302 if ((a->source == RTS_OSPF) ||
2303 (a->source == RTS_OSPF_IA) ||
2304 (a->source == RTS_OSPF_EXT1))
2305 return rt->u.ospf.metric1;
2306 #endif
2307
2308 #ifdef CONFIG_RIP
2309 if (a->source == RTS_RIP)
2310 return rt->u.rip.metric;
2311 #endif
2312
2313 /* Device routes */
2314 if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2315 return 0;
2316
2317 return IGP_METRIC_UNKNOWN;
2318 }
2319
2320 static int
2321 rt_update_hostentry(rtable *tab, struct hostentry *he)
2322 {
2323 rta *old_src = he->src;
2324 int pxlen = 0;
2325
2326 /* Reset the hostentry */
2327 he->src = NULL;
2328 he->gw = IPA_NONE;
2329 he->dest = RTD_UNREACHABLE;
2330 he->igp_metric = 0;
2331
2332 net_addr he_addr;
2333 net_fill_ip_host(&he_addr, he->addr);
2334 net *n = net_route(tab, &he_addr);
2335 if (n)
2336 {
2337 rte *e = n->routes;
2338 rta *a = e->attrs;
2339 pxlen = n->n.addr->pxlen;
2340
2341 if (a->hostentry)
2342 {
2343 /* Recursive route should not depend on another recursive route */
2344 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2345 he->addr, n->n.addr);
2346 goto done;
2347 }
2348
2349 if (a->dest == RTD_DEVICE)
2350 {
2351 if (if_local_addr(he->addr, a->iface))
2352 {
2353 /* The host address is a local address, this is not valid */
2354 log(L_WARN "Next hop address %I is a local address of iface %s",
2355 he->addr, a->iface->name);
2356 goto done;
2357 }
2358
2359 /* The host is directly reachable, use link as a gateway */
2360 he->gw = he->link;
2361 he->dest = RTD_ROUTER;
2362 }
2363 else
2364 {
2365 /* The host is reachable through some route entry */
2366 he->gw = a->gw;
2367 he->dest = a->dest;
2368 }
2369
2370 he->src = rta_clone(a);
2371 he->igp_metric = rt_get_igp_metric(e);
2372 }
2373
2374 done:
2375 /* Add a prefix range to the trie */
2376 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2377
2378 rta_free(old_src);
2379 return old_src != he->src;
2380 }
2381
2382 static void
2383 rt_update_hostcache(rtable *tab)
2384 {
2385 struct hostcache *hc = tab->hostcache;
2386 struct hostentry *he;
2387 node *n, *x;
2388
2389 /* Reset the trie */
2390 lp_flush(hc->lp);
2391 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2392
2393 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2394 {
2395 he = SKIP_BACK(struct hostentry, ln, n);
2396 if (!he->uc)
2397 {
2398 hc_delete_hostentry(hc, he);
2399 continue;
2400 }
2401
2402 if (rt_update_hostentry(tab, he))
2403 rt_schedule_nhu(he->tab);
2404 }
2405
2406 tab->hcu_scheduled = 0;
2407 }
2408
2409 static struct hostentry *
2410 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2411 {
2412 struct hostentry *he;
2413
2414 if (!tab->hostcache)
2415 rt_init_hostcache(tab);
2416
2417 u32 k = hc_hash(a, dep);
2418 struct hostcache *hc = tab->hostcache;
2419 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2420 if (ipa_equal(he->addr, a) && (he->tab == dep))
2421 return he;
2422
2423 he = hc_new_hostentry(hc, a, ll, dep, k);
2424 rt_update_hostentry(tab, he);
2425 return he;
2426 }
2427
2428 void
2429 rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2430 {
2431 rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2432 }
2433
2434
2435 /*
2436 * CLI commands
2437 */
2438
2439 static byte *
2440 rt_format_via(rte *e)
2441 {
2442 rta *a = e->attrs;
2443
2444 /* Max text length w/o IP addr and interface name is 16 */
2445 static byte via[IPA_MAX_TEXT_LENGTH+sizeof(a->iface->name)+16];
2446
2447 switch (a->dest)
2448 {
2449 case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2450 case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break;
2451 case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
2452 case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
2453 case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
2454 case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
2455 default: bsprintf(via, "???");
2456 }
2457 return via;
2458 }
2459
2460 static void
2461 rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2462 {
2463 byte from[IPA_MAX_TEXT_LENGTH+8];
2464 byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2465 rta *a = e->attrs;
2466 int primary = (e->net->routes == e);
2467 int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2468 void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2469 struct mpnh *nh;
2470
2471 tm_format_datetime(tm, &config->tf_route, e->lastmod);
2472 if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2473 bsprintf(from, " from %I", a->from);
2474 else
2475 from[0] = 0;
2476
2477 get_route_info = a->src->proto->proto->get_route_info;
2478 if (get_route_info || d->verbose)
2479 {
2480 /* Need to normalize the extended attributes */
2481 ea_list *t = tmpa;
2482 t = ea_append(t, a->eattrs);
2483 tmpa = alloca(ea_scan(t));
2484 ea_merge(t, tmpa);
2485 ea_sort(tmpa);
2486 }
2487 if (get_route_info)
2488 get_route_info(e, info, tmpa);
2489 else
2490 bsprintf(info, " (%d)", e->pref);
2491 cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name,
2492 tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2493 for (nh = a->nexthops; nh; nh = nh->next)
2494 cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2495 if (d->verbose)
2496 rta_show(c, a, tmpa);
2497 }
2498
2499 static void
2500 rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2501 {
2502 rte *e, *ee;
2503 byte ia[NET_MAX_TEXT_LENGTH+1];
2504 struct ea_list *tmpa;
2505 struct channel *ec = d->export_channel;
2506 int first = 1;
2507 int pass = 0;
2508
2509 bsprintf(ia, "%N", n->n.addr);
2510
2511
2512 for (e = n->routes; e; e = e->next)
2513 {
2514 if (rte_is_filtered(e) != d->filtered)
2515 continue;
2516
2517 d->rt_counter++;
2518 d->net_counter += first;
2519 first = 0;
2520
2521 if (pass)
2522 continue;
2523
2524 ee = e;
2525 rte_update_lock(); /* We use the update buffer for filtering */
2526 tmpa = make_tmp_attrs(e, rte_update_pool);
2527
2528 /* Special case for merged export */
2529 if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED))
2530 {
2531 rte *rt_free;
2532 e = rt_export_merged(ec, n, &rt_free, &tmpa, rte_update_pool, 1);
2533 pass = 1;
2534
2535 if (!e)
2536 { e = ee; goto skip; }
2537 }
2538 else if (d->export_mode)
2539 {
2540 struct proto *ep = d->export_protocol;
2541 int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2542
2543 if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED)
2544 pass = 1;
2545
2546 if (ic < 0)
2547 goto skip;
2548
2549 if (d->export_mode > RSEM_PREEXPORT)
2550 {
2551 /*
2552 * FIXME - This shows what should be exported according to current
2553 * filters, but not what was really exported. 'configure soft'
2554 * command may change the export filter and do not update routes.
2555 */
2556 int do_export = (ic > 0) ||
2557 (f_run(ec->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2558
2559 if (do_export != (d->export_mode == RSEM_EXPORT))
2560 goto skip;
2561
2562 if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_ACCEPTED))
2563 pass = 1;
2564 }
2565 }
2566
2567 if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2568 goto skip;
2569
2570 if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2571 goto skip;
2572
2573 d->show_counter++;
2574 if (d->stats < 2)
2575 rt_show_rte(c, ia, e, d, tmpa);
2576 ia[0] = 0;
2577
2578 skip:
2579 if (e != ee)
2580 {
2581 rte_free(e);
2582 e = ee;
2583 }
2584 rte_update_unlock();
2585
2586 if (d->primary_only)
2587 break;
2588 }
2589 }
2590
2591 static struct channel *
2592 rt_show_export_channel(struct rt_show_data *d)
2593 {
2594 if (! d->export_protocol->rt_notify)
2595 return NULL;
2596
2597 return proto_find_channel_by_table(d->export_protocol, d->table);
2598 }
2599
2600 static void
2601 rt_show_cont(struct cli *c)
2602 {
2603 struct rt_show_data *d = c->rover;
2604 #ifdef DEBUGGING
2605 unsigned max = 4;
2606 #else
2607 unsigned max = 64;
2608 #endif
2609 struct fib *fib = &d->table->fib;
2610 struct fib_iterator *it = &d->fit;
2611
2612 if (d->export_mode)
2613 {
2614 /* Ensure we have current export channel */
2615 d->export_channel = rt_show_export_channel(d);
2616 if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2617 {
2618 cli_printf(c, 8005, "Channel is down");
2619 goto done;
2620 }
2621 }
2622
2623 FIB_ITERATE_START(fib, it, net, n)
2624 {
2625 if (!max--)
2626 {
2627 FIB_ITERATE_PUT(it);
2628 return;
2629 }
2630 rt_show_net(c, n, d);
2631 }
2632 FIB_ITERATE_END;
2633 if (d->stats)
2634 cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2635 else
2636 cli_printf(c, 0, "");
2637 done:
2638 c->cont = c->cleanup = NULL;
2639 }
2640
2641 static void
2642 rt_show_cleanup(struct cli *c)
2643 {
2644 struct rt_show_data *d = c->rover;
2645
2646 /* Unlink the iterator */
2647 fit_get(&d->table->fib, &d->fit);
2648 }
2649
2650 static inline rtable *
2651 rt_show_get_table(struct proto *p)
2652 {
2653 /* FIXME: Use a better way to handle multi-channel protocols */
2654
2655 if (p->main_channel)
2656 return p->main_channel->table;
2657
2658 if (!EMPTY_LIST(p->channels))
2659 return ((struct channel *) HEAD(p->channels))->table;
2660
2661 return NULL;
2662 }
2663
2664 void
2665 rt_show(struct rt_show_data *d)
2666 {
2667 net *n;
2668
2669 /* Default is either a master table or a table related to a respective protocol */
2670 if (!d->table && d->export_protocol) d->table = rt_show_get_table(d->export_protocol);
2671 if (!d->table && d->show_protocol) d->table = rt_show_get_table(d->show_protocol);
2672 if (!d->table) d->table = config->def_tables[NET_IP4]->table; /* FIXME: iterate through all tables ? */
2673
2674 /* Filtered routes are neither exported nor have sensible ordering */
2675 if (d->filtered && (d->export_mode || d->primary_only))
2676 cli_msg(0, "");
2677
2678 if (!d->addr)
2679 {
2680 FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2681 this_cli->cont = rt_show_cont;
2682 this_cli->cleanup = rt_show_cleanup;
2683 this_cli->rover = d;
2684 }
2685 else
2686 {
2687 if (d->export_mode)
2688 {
2689 /* Find channel associated with the export protocol */
2690 d->export_channel = rt_show_export_channel(d);
2691 if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2692 {
2693 cli_msg(8005, "Channel is down");
2694 return;
2695 }
2696 }
2697
2698 if (d->show_for)
2699 n = net_route(d->table, d->addr);
2700 else
2701 n = net_find(d->table, d->addr);
2702
2703 if (n)
2704 rt_show_net(this_cli, n, d);
2705
2706 if (d->rt_counter)
2707 cli_msg(0, "");
2708 else
2709 cli_msg(8001, "Network not in table");
2710 }
2711 }
2712
2713 /*
2714 * Documentation for functions declared inline in route.h
2715 */
2716 #if 0
2717
2718 /**
2719 * net_find - find a network entry
2720 * @tab: a routing table
2721 * @addr: address of the network
2722 *
2723 * net_find() looks up the given network in routing table @tab and
2724 * returns a pointer to its &net entry or %NULL if no such network
2725 * exists.
2726 */
2727 static inline net *net_find(rtable *tab, net_addr *addr)
2728 { DUMMY; }
2729
2730 /**
2731 * net_get - obtain a network entry
2732 * @tab: a routing table
2733 * @addr: address of the network
2734 *
2735 * net_get() looks up the given network in routing table @tab and
2736 * returns a pointer to its &net entry. If no such entry exists, it's
2737 * created.
2738 */
2739 static inline net *net_get(rtable *tab, net_addr *addr)
2740 { DUMMY; }
2741
2742 /**
2743 * rte_cow - copy a route for writing
2744 * @r: a route entry to be copied
2745 *
2746 * rte_cow() takes a &rte and prepares it for modification. The exact action
2747 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2748 * just returned unchanged, else a new temporary entry with the same contents
2749 * is created.
2750 *
2751 * The primary use of this function is inside the filter machinery -- when
2752 * a filter wants to modify &rte contents (to change the preference or to
2753 * attach another set of attributes), it must ensure that the &rte is not
2754 * shared with anyone else (and especially that it isn't stored in any routing
2755 * table).
2756 *
2757 * Result: a pointer to the new writable &rte.
2758 */
2759 static inline rte * rte_cow(rte *r)
2760 { DUMMY; }
2761
2762 #endif