]> git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
Merge commit '98bb80a243b58c43453e9be69d19d0350286549c' into int-new
[thirdparty/bird.git] / nest / rt-table.c
1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/iface.h"
37 #include "lib/resource.h"
38 #include "lib/event.h"
39 #include "lib/string.h"
40 #include "conf/conf.h"
41 #include "filter/filter.h"
42 #include "lib/string.h"
43 #include "lib/alloca.h"
44
45 pool *rt_table_pool;
46
47 static slab *rte_slab;
48 static linpool *rte_update_pool;
49
50 static list routing_tables;
51
52 static void rt_free_hostcache(rtable *tab);
53 static void rt_notify_hostcache(rtable *tab, net *net);
54 static void rt_update_hostcache(rtable *tab);
55 static void rt_next_hop_update(rtable *tab);
56 static inline void rt_prune_table(rtable *tab);
57
58
59 /* Like fib_route(), but skips empty net entries */
60 static inline void *
61 net_route_ip4(rtable *t, net_addr_ip4 *n)
62 {
63 net *r;
64
65 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
66 {
67 n->pxlen--;
68 ip4_clrbit(&n->prefix, n->pxlen);
69 }
70
71 return r;
72 }
73
74 static inline void *
75 net_route_ip6(rtable *t, net_addr_ip6 *n)
76 {
77 net *r;
78
79 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
80 {
81 n->pxlen--;
82 ip6_clrbit(&n->prefix, n->pxlen);
83 }
84
85 return r;
86 }
87
88 void *
89 net_route(rtable *tab, const net_addr *n)
90 {
91 ASSERT(tab->addr_type == n->type);
92
93 net_addr *n0 = alloca(n->length);
94 net_copy(n0, n);
95
96 switch (n->type)
97 {
98 case NET_IP4:
99 case NET_VPN4:
100 case NET_ROA4:
101 return net_route_ip4(tab, (net_addr_ip4 *) n0);
102
103 case NET_IP6:
104 case NET_VPN6:
105 case NET_ROA6:
106 return net_route_ip6(tab, (net_addr_ip6 *) n0);
107
108 default:
109 return NULL;
110 }
111 }
112
113
114 static int
115 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
116 {
117 struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
118 struct fib_node *fn;
119 int anything = 0;
120
121 while (1)
122 {
123 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
124 {
125 net_addr_roa4 *roa = (void *) fn->addr;
126 net *r = fib_node_to_user(&tab->fib, fn);
127
128 if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
129 {
130 anything = 1;
131 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
132 return ROA_VALID;
133 }
134 }
135
136 if (n.pxlen == 0)
137 break;
138
139 n.pxlen--;
140 ip4_clrbit(&n.prefix, n.pxlen);
141 }
142
143 return anything ? ROA_INVALID : ROA_UNKNOWN;
144 }
145
146 static int
147 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
148 {
149 struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
150 struct fib_node *fn;
151 int anything = 0;
152
153 while (1)
154 {
155 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
156 {
157 net_addr_roa6 *roa = (void *) fn->addr;
158 net *r = fib_node_to_user(&tab->fib, fn);
159
160 if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
161 {
162 anything = 1;
163 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
164 return ROA_VALID;
165 }
166 }
167
168 if (n.pxlen == 0)
169 break;
170
171 n.pxlen--;
172 ip6_clrbit(&n.prefix, n.pxlen);
173 }
174
175 return anything ? ROA_INVALID : ROA_UNKNOWN;
176 }
177
178 /**
179 * roa_check - check validity of route origination in a ROA table
180 * @tab: ROA table
181 * @n: network prefix to check
182 * @asn: AS number of network prefix
183 *
184 * Implements RFC 6483 route validation for the given network prefix. The
185 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
186 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
187 * a candidate ROA with matching ASN and maxlen field greater than or equal to
188 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
189 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
190 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
191 * must have type NET_IP4 or NET_IP6, respectively.
192 */
193 int
194 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
195 {
196 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
197 return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
198 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
199 return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
200 else
201 return ROA_UNKNOWN; /* Should not happen */
202 }
203
204 /**
205 * rte_find - find a route
206 * @net: network node
207 * @src: route source
208 *
209 * The rte_find() function returns a route for destination @net
210 * which is from route source @src.
211 */
212 rte *
213 rte_find(net *net, struct rte_src *src)
214 {
215 rte *e = net->routes;
216
217 while (e && e->attrs->src != src)
218 e = e->next;
219 return e;
220 }
221
222 /**
223 * rte_get_temp - get a temporary &rte
224 * @a: attributes to assign to the new route (a &rta; in case it's
225 * un-cached, rte_update() will create a cached copy automatically)
226 *
227 * Create a temporary &rte and bind it with the attributes @a.
228 * Also set route preference to the default preference set for
229 * the protocol.
230 */
231 rte *
232 rte_get_temp(rta *a)
233 {
234 rte *e = sl_alloc(rte_slab);
235
236 e->attrs = a;
237 e->flags = 0;
238 e->pref = 0;
239 return e;
240 }
241
242 rte *
243 rte_do_cow(rte *r)
244 {
245 rte *e = sl_alloc(rte_slab);
246
247 memcpy(e, r, sizeof(rte));
248 e->attrs = rta_clone(r->attrs);
249 e->flags = 0;
250 return e;
251 }
252
253 /**
254 * rte_cow_rta - get a private writable copy of &rte with writable &rta
255 * @r: a route entry to be copied
256 * @lp: a linpool from which to allocate &rta
257 *
258 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
259 * modification. There are three possibilities: First, both &rte and &rta are
260 * private copies, in that case they are returned unchanged. Second, &rte is
261 * private copy, but &rta is cached, in that case &rta is duplicated using
262 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
263 * both structures are duplicated by rte_do_cow() and rta_do_cow().
264 *
265 * Note that in the second case, cached &rta loses one reference, while private
266 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
267 * nexthops, ...) with it. To work properly, original shared &rta should have
268 * another reference during the life of created private copy.
269 *
270 * Result: a pointer to the new writable &rte with writable &rta.
271 */
272 rte *
273 rte_cow_rta(rte *r, linpool *lp)
274 {
275 if (!rta_is_cached(r->attrs))
276 return r;
277
278 rte *e = rte_cow(r);
279 rta *a = rta_do_cow(r->attrs, lp);
280 rta_free(e->attrs);
281 e->attrs = a;
282 return e;
283 }
284
285 static int /* Actually better or at least as good as */
286 rte_better(rte *new, rte *old)
287 {
288 int (*better)(rte *, rte *);
289
290 if (!rte_is_valid(old))
291 return 1;
292 if (!rte_is_valid(new))
293 return 0;
294
295 if (new->pref > old->pref)
296 return 1;
297 if (new->pref < old->pref)
298 return 0;
299 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
300 {
301 /*
302 * If the user has configured protocol preferences, so that two different protocols
303 * have the same preference, try to break the tie by comparing addresses. Not too
304 * useful, but keeps the ordering of routes unambiguous.
305 */
306 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
307 }
308 if (better = new->attrs->src->proto->rte_better)
309 return better(new, old);
310 return 0;
311 }
312
313 static int
314 rte_mergable(rte *pri, rte *sec)
315 {
316 int (*mergable)(rte *, rte *);
317
318 if (!rte_is_valid(pri) || !rte_is_valid(sec))
319 return 0;
320
321 if (pri->pref != sec->pref)
322 return 0;
323
324 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
325 return 0;
326
327 if (mergable = pri->attrs->src->proto->rte_mergable)
328 return mergable(pri, sec);
329
330 return 0;
331 }
332
333 static void
334 rte_trace(struct proto *p, rte *e, int dir, char *msg)
335 {
336 log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
337 }
338
339 static inline void
340 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
341 {
342 if (p->debug & flag)
343 rte_trace(p, e, '>', msg);
344 }
345
346 static inline void
347 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
348 {
349 if (p->debug & flag)
350 rte_trace(p, e, '<', msg);
351 }
352
353 static rte *
354 export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
355 {
356 struct proto *p = c->proto;
357 struct filter *filter = c->out_filter;
358 struct proto_stats *stats = &c->stats;
359 ea_list *tmpb = NULL;
360 rte *rt;
361 int v;
362
363 rt = rt0;
364 *rt_free = NULL;
365
366 if (!tmpa)
367 tmpa = &tmpb;
368
369 *tmpa = rte_make_tmp_attrs(rt, pool);
370
371 v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0;
372 if (v < 0)
373 {
374 if (silent)
375 goto reject;
376
377 stats->exp_updates_rejected++;
378 if (v == RIC_REJECT)
379 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
380 goto reject;
381 }
382 if (v > 0)
383 {
384 if (!silent)
385 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
386 goto accept;
387 }
388
389 v = filter && ((filter == FILTER_REJECT) ||
390 (f_run(filter, &rt, tmpa, pool, FF_FORCE_TMPATTR) > F_ACCEPT));
391 if (v)
392 {
393 if (silent)
394 goto reject;
395
396 stats->exp_updates_filtered++;
397 rte_trace_out(D_FILTERS, p, rt, "filtered out");
398 goto reject;
399 }
400
401 accept:
402 if (rt != rt0)
403 *rt_free = rt;
404 return rt;
405
406 reject:
407 /* Discard temporary rte */
408 if (rt != rt0)
409 rte_free(rt);
410 return NULL;
411 }
412
413 static inline rte *
414 export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
415 {
416 return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent);
417 }
418
419 static void
420 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
421 {
422 struct proto *p = c->proto;
423 struct proto_stats *stats = &c->stats;
424
425
426 /*
427 * First, apply export limit.
428 *
429 * Export route limits has several problems. Because exp_routes
430 * counter is reset before refeed, we don't really know whether
431 * limit is breached and whether the update is new or not. Therefore
432 * the number of really exported routes may exceed the limit
433 * temporarily (routes exported before and new routes in refeed).
434 *
435 * Minor advantage is that if the limit is decreased and refeed is
436 * requested, the number of exported routes really decrease.
437 *
438 * Second problem is that with export limits, we don't know whether
439 * old was really exported (it might be blocked by limit). When a
440 * withdraw is exported, we announce it even when the previous
441 * update was blocked. This is not a big issue, but the same problem
442 * is in updating exp_routes counter. Therefore, to be consistent in
443 * increases and decreases of exp_routes, we count exported routes
444 * regardless of blocking by limits.
445 *
446 * Similar problem is in handling updates - when a new route is
447 * received and blocking is active, the route would be blocked, but
448 * when an update for the route will be received later, the update
449 * would be propagated (as old != NULL). Therefore, we have to block
450 * also non-new updates (contrary to import blocking).
451 */
452
453 struct channel_limit *l = &c->out_limit;
454 if (l->action && new)
455 {
456 if ((!old || refeed) && (stats->exp_routes >= l->limit))
457 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
458
459 if (l->state == PLS_BLOCKED)
460 {
461 stats->exp_routes++; /* see note above */
462 stats->exp_updates_rejected++;
463 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
464 new = NULL;
465
466 if (!old)
467 return;
468 }
469 }
470
471
472 if (new)
473 stats->exp_updates_accepted++;
474 else
475 stats->exp_withdraws_accepted++;
476
477 /* Hack: We do not decrease exp_routes during refeed, we instead
478 reset exp_routes at the start of refeed. */
479 if (new)
480 stats->exp_routes++;
481 if (old && !refeed)
482 stats->exp_routes--;
483
484 if (p->debug & D_ROUTES)
485 {
486 if (new && old)
487 rte_trace_out(D_ROUTES, p, new, "replaced");
488 else if (new)
489 rte_trace_out(D_ROUTES, p, new, "added");
490 else if (old)
491 rte_trace_out(D_ROUTES, p, old, "removed");
492 }
493 if (!new)
494 p->rt_notify(p, c, net, NULL, old, NULL);
495 else if (tmpa)
496 {
497 ea_list *t = tmpa;
498 while (t->next)
499 t = t->next;
500 t->next = new->attrs->eattrs;
501 p->rt_notify(p, c, net, new, old, tmpa);
502 t->next = NULL;
503 }
504 else
505 p->rt_notify(p, c, net, new, old, new->attrs->eattrs);
506 }
507
508 static void
509 rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
510 {
511 struct proto *p = c->proto;
512
513 rte *new = new0;
514 rte *old = old0;
515 rte *new_free = NULL;
516 rte *old_free = NULL;
517 ea_list *tmpa = NULL;
518
519 if (new)
520 c->stats.exp_updates_received++;
521 else
522 c->stats.exp_withdraws_received++;
523
524 /*
525 * This is a tricky part - we don't know whether route 'old' was
526 * exported to protocol 'p' or was filtered by the export filter.
527 * We try to run the export filter to know this to have a correct
528 * value in 'old' argument of rte_update (and proper filter value)
529 *
530 * FIXME - this is broken because 'configure soft' may change
531 * filters but keep routes. Refeed is expected to be called after
532 * change of the filters and with old == new, therefore we do not
533 * even try to run the filter on an old route, This may lead to
534 * 'spurious withdraws' but ensure that there are no 'missing
535 * withdraws'.
536 *
537 * This is not completely safe as there is a window between
538 * reconfiguration and the end of refeed - if a newly filtered
539 * route disappears during this period, proper withdraw is not
540 * sent (because old would be also filtered) and the route is
541 * not refeeded (because it disappeared before that).
542 */
543
544 if (new)
545 new = export_filter(c, new, &new_free, &tmpa, 0);
546
547 if (old && !refeed)
548 old = export_filter(c, old, &old_free, NULL, 1);
549
550 if (!new && !old)
551 {
552 /*
553 * As mentioned above, 'old' value may be incorrect in some race conditions.
554 * We generally ignore it with the exception of withdraw to pipe protocol.
555 * In that case we rather propagate unfiltered withdraws regardless of
556 * export filters to ensure that when a protocol is flushed, its routes are
557 * removed from all tables. Possible spurious unfiltered withdraws are not
558 * problem here as they are ignored if there is no corresponding route at
559 * the other end of the pipe. We directly call rt_notify() hook instead of
560 * do_rt_notify() to avoid logging and stat counters.
561 */
562
563 #ifdef CONFIG_PIPE
564 if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
565 p->rt_notify(p, c, net, NULL, old0, NULL);
566 #endif
567
568 return;
569 }
570
571 do_rt_notify(c, net, new, old, tmpa, refeed);
572
573 /* Discard temporary rte's */
574 if (new_free)
575 rte_free(new_free);
576 if (old_free)
577 rte_free(old_free);
578 }
579
580 static void
581 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
582 {
583 // struct proto *p = c->proto;
584
585 rte *r;
586 rte *new_best = NULL;
587 rte *old_best = NULL;
588 rte *new_free = NULL;
589 rte *old_free = NULL;
590 ea_list *tmpa = NULL;
591
592 /* Used to track whether we met old_changed position. If before_old is NULL
593 old_changed was the first and we met it implicitly before current best route. */
594 int old_meet = old_changed && !before_old;
595
596 /* Note that before_old is either NULL or valid (not rejected) route.
597 If old_changed is valid, before_old have to be too. If old changed route
598 was not valid, caller must use NULL for both old_changed and before_old. */
599
600 if (new_changed)
601 c->stats.exp_updates_received++;
602 else
603 c->stats.exp_withdraws_received++;
604
605 /* First, find the new_best route - first accepted by filters */
606 for (r=net->routes; rte_is_valid(r); r=r->next)
607 {
608 if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
609 break;
610
611 /* Note if we walked around the position of old_changed route */
612 if (r == before_old)
613 old_meet = 1;
614 }
615
616 /*
617 * Second, handle the feed case. That means we do not care for
618 * old_best. It is NULL for feed, and the new_best for refeed.
619 * For refeed, there is a hack similar to one in rt_notify_basic()
620 * to ensure withdraws in case of changed filters
621 */
622 if (feed)
623 {
624 if (feed == 2) /* refeed */
625 old_best = new_best ? new_best :
626 (rte_is_valid(net->routes) ? net->routes : NULL);
627 else
628 old_best = NULL;
629
630 if (!new_best && !old_best)
631 return;
632
633 goto found;
634 }
635
636 /*
637 * Now, we find the old_best route. Generally, it is the same as the
638 * new_best, unless new_best is the same as new_changed or
639 * old_changed is accepted before new_best.
640 *
641 * There are four cases:
642 *
643 * - We would find and accept old_changed before new_best, therefore
644 * old_changed is old_best. In remaining cases we suppose this
645 * is not true.
646 *
647 * - We found no new_best, therefore there is also no old_best and
648 * we ignore this withdraw.
649 *
650 * - We found new_best different than new_changed, therefore
651 * old_best is the same as new_best and we ignore this update.
652 *
653 * - We found new_best the same as new_changed, therefore it cannot
654 * be old_best and we have to continue search for old_best.
655 */
656
657 /* First case */
658 if (old_meet)
659 if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
660 goto found;
661
662 /* Second case */
663 if (!new_best)
664 return;
665
666 /* Third case, we use r instead of new_best, because export_filter() could change it */
667 if (r != new_changed)
668 {
669 if (new_free)
670 rte_free(new_free);
671 return;
672 }
673
674 /* Fourth case */
675 for (r=r->next; rte_is_valid(r); r=r->next)
676 {
677 if (old_best = export_filter(c, r, &old_free, NULL, 1))
678 goto found;
679
680 if (r == before_old)
681 if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
682 goto found;
683 }
684
685 /* Implicitly, old_best is NULL and new_best is non-NULL */
686
687 found:
688 do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
689
690 /* Discard temporary rte's */
691 if (new_free)
692 rte_free(new_free);
693 if (old_free)
694 rte_free(old_free);
695 }
696
697
698 static struct nexthop *
699 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
700 {
701 return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
702 }
703
704 rte *
705 rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
706 {
707 // struct proto *p = c->proto;
708 struct nexthop *nhs = NULL;
709 rte *best0, *best, *rt0, *rt, *tmp;
710
711 best0 = net->routes;
712 *rt_free = NULL;
713
714 if (!rte_is_valid(best0))
715 return NULL;
716
717 best = export_filter_(c, best0, rt_free, tmpa, pool, silent);
718
719 if (!best || !rte_is_reachable(best))
720 return best;
721
722 for (rt0 = best0->next; rt0; rt0 = rt0->next)
723 {
724 if (!rte_mergable(best0, rt0))
725 continue;
726
727 rt = export_filter_(c, rt0, &tmp, NULL, pool, 1);
728
729 if (!rt)
730 continue;
731
732 if (rte_is_reachable(rt))
733 nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
734
735 if (tmp)
736 rte_free(tmp);
737 }
738
739 if (nhs)
740 {
741 nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
742
743 if (nhs->next)
744 {
745 best = rte_cow_rta(best, pool);
746 nexthop_link(best->attrs, nhs);
747 }
748 }
749
750 if (best != best0)
751 *rt_free = best;
752
753 return best;
754 }
755
756
757 static void
758 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
759 rte *new_best, rte*old_best, int refeed)
760 {
761 // struct proto *p = c->proto;
762
763 rte *new_best_free = NULL;
764 rte *old_best_free = NULL;
765 rte *new_changed_free = NULL;
766 rte *old_changed_free = NULL;
767 ea_list *tmpa = NULL;
768
769 /* We assume that all rte arguments are either NULL or rte_is_valid() */
770
771 /* This check should be done by the caller */
772 if (!new_best && !old_best)
773 return;
774
775 /* Check whether the change is relevant to the merged route */
776 if ((new_best == old_best) && !refeed)
777 {
778 new_changed = rte_mergable(new_best, new_changed) ?
779 export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
780
781 old_changed = rte_mergable(old_best, old_changed) ?
782 export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
783
784 if (!new_changed && !old_changed)
785 return;
786 }
787
788 if (new_best)
789 c->stats.exp_updates_received++;
790 else
791 c->stats.exp_withdraws_received++;
792
793 /* Prepare new merged route */
794 if (new_best)
795 new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0);
796
797 /* Prepare old merged route (without proper merged next hops) */
798 /* There are some issues with running filter on old route - see rt_notify_basic() */
799 if (old_best && !refeed)
800 old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
801
802 if (new_best || old_best)
803 do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
804
805 /* Discard temporary rte's */
806 if (new_best_free)
807 rte_free(new_best_free);
808 if (old_best_free)
809 rte_free(old_best_free);
810 if (new_changed_free)
811 rte_free(new_changed_free);
812 if (old_changed_free)
813 rte_free(old_changed_free);
814 }
815
816
817 /**
818 * rte_announce - announce a routing table change
819 * @tab: table the route has been added to
820 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
821 * @net: network in question
822 * @new: the new route to be announced
823 * @old: the previous route for the same network
824 * @new_best: the new best route for the same network
825 * @old_best: the previous best route for the same network
826 * @before_old: The previous route before @old for the same network.
827 * If @before_old is NULL @old was the first.
828 *
829 * This function gets a routing table update and announces it
830 * to all protocols that acccepts given type of route announcement
831 * and are connected to the same table by their announcement hooks.
832 *
833 * Route announcement of type %RA_OPTIMAL si generated when optimal
834 * route (in routing table @tab) changes. In that case @old stores the
835 * old optimal route.
836 *
837 * Route announcement of type %RA_ANY si generated when any route (in
838 * routing table @tab) changes In that case @old stores the old route
839 * from the same protocol.
840 *
841 * For each appropriate protocol, we first call its import_control()
842 * hook which performs basic checks on the route (each protocol has a
843 * right to veto or force accept of the route before any filter is
844 * asked) and adds default values of attributes specific to the new
845 * protocol (metrics, tags etc.). Then it consults the protocol's
846 * export filter and if it accepts the route, the rt_notify() hook of
847 * the protocol gets called.
848 */
849 static void
850 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
851 rte *new_best, rte *old_best, rte *before_old)
852 {
853 if (!rte_is_valid(new))
854 new = NULL;
855
856 if (!rte_is_valid(old))
857 old = before_old = NULL;
858
859 if (!rte_is_valid(new_best))
860 new_best = NULL;
861
862 if (!rte_is_valid(old_best))
863 old_best = NULL;
864
865 if (!old && !new)
866 return;
867
868 if ((type == RA_OPTIMAL) && tab->hostcache)
869 rt_notify_hostcache(tab, net);
870
871 struct channel *c; node *n;
872 WALK_LIST2(c, n, tab->channels, table_node)
873 {
874 if (c->export_state == ES_DOWN)
875 continue;
876
877 if (c->ra_mode == type)
878 if (type == RA_ACCEPTED)
879 rt_notify_accepted(c, net, new, old, before_old, 0);
880 else if (type == RA_MERGED)
881 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
882 else
883 rt_notify_basic(c, net, new, old, 0);
884 }
885 }
886
887 static inline int
888 rte_validate(rte *e)
889 {
890 int c;
891 net *n = e->net;
892
893 if (!net_validate(n->n.addr))
894 {
895 log(L_WARN "Ignoring bogus prefix %N received via %s",
896 n->n.addr, e->sender->proto->name);
897 return 0;
898 }
899
900 c = net_classify(n->n.addr);
901 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
902 {
903 log(L_WARN "Ignoring bogus route %N received via %s",
904 n->n.addr, e->sender->proto->name);
905 return 0;
906 }
907
908 if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
909 {
910 log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
911 n->n.addr, e->attrs->dest, e->sender->proto->name);
912 return 0;
913 }
914
915 if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
916 {
917 log(L_WARN "Ignoring unsorted multipath route %N received via %s",
918 n->n.addr, e->sender->proto->name);
919 return 0;
920 }
921
922 return 1;
923 }
924
925 /**
926 * rte_free - delete a &rte
927 * @e: &rte to be deleted
928 *
929 * rte_free() deletes the given &rte from the routing table it's linked to.
930 */
931 void
932 rte_free(rte *e)
933 {
934 if (rta_is_cached(e->attrs))
935 rta_free(e->attrs);
936 sl_free(rte_slab, e);
937 }
938
939 static inline void
940 rte_free_quick(rte *e)
941 {
942 rta_free(e->attrs);
943 sl_free(rte_slab, e);
944 }
945
946 static int
947 rte_same(rte *x, rte *y)
948 {
949 return
950 x->attrs == y->attrs &&
951 x->flags == y->flags &&
952 x->pflags == y->pflags &&
953 x->pref == y->pref &&
954 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
955 }
956
957 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
958
959 static void
960 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
961 {
962 struct proto *p = c->proto;
963 struct rtable *table = c->table;
964 struct proto_stats *stats = &c->stats;
965 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
966 rte *before_old = NULL;
967 rte *old_best = net->routes;
968 rte *old = NULL;
969 rte **k;
970
971 k = &net->routes; /* Find and remove original route from the same protocol */
972 while (old = *k)
973 {
974 if (old->attrs->src == src)
975 {
976 /* If there is the same route in the routing table but from
977 * a different sender, then there are two paths from the
978 * source protocol to this routing table through transparent
979 * pipes, which is not allowed.
980 *
981 * We log that and ignore the route. If it is withdraw, we
982 * ignore it completely (there might be 'spurious withdraws',
983 * see FIXME in do_rte_announce())
984 */
985 if (old->sender->proto != p)
986 {
987 if (new)
988 {
989 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
990 net->n.addr, table->name);
991 rte_free_quick(new);
992 }
993 return;
994 }
995
996 if (new && rte_same(old, new))
997 {
998 /* No changes, ignore the new route */
999
1000 if (!rte_is_filtered(new))
1001 {
1002 stats->imp_updates_ignored++;
1003 rte_trace_in(D_ROUTES, p, new, "ignored");
1004 }
1005
1006 rte_free_quick(new);
1007 return;
1008 }
1009 *k = old->next;
1010 break;
1011 }
1012 k = &old->next;
1013 before_old = old;
1014 }
1015
1016 if (!old)
1017 before_old = NULL;
1018
1019 if (!old && !new)
1020 {
1021 stats->imp_withdraws_ignored++;
1022 return;
1023 }
1024
1025 int new_ok = rte_is_ok(new);
1026 int old_ok = rte_is_ok(old);
1027
1028 struct channel_limit *l = &c->rx_limit;
1029 if (l->action && !old && new)
1030 {
1031 u32 all_routes = stats->imp_routes + stats->filt_routes;
1032
1033 if (all_routes >= l->limit)
1034 channel_notify_limit(c, l, PLD_RX, all_routes);
1035
1036 if (l->state == PLS_BLOCKED)
1037 {
1038 /* In receive limit the situation is simple, old is NULL so
1039 we just free new and exit like nothing happened */
1040
1041 stats->imp_updates_ignored++;
1042 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1043 rte_free_quick(new);
1044 return;
1045 }
1046 }
1047
1048 l = &c->in_limit;
1049 if (l->action && !old_ok && new_ok)
1050 {
1051 if (stats->imp_routes >= l->limit)
1052 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1053
1054 if (l->state == PLS_BLOCKED)
1055 {
1056 /* In import limit the situation is more complicated. We
1057 shouldn't just drop the route, we should handle it like
1058 it was filtered. We also have to continue the route
1059 processing if old or new is non-NULL, but we should exit
1060 if both are NULL as this case is probably assumed to be
1061 already handled. */
1062
1063 stats->imp_updates_ignored++;
1064 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1065
1066 if (c->in_keep_filtered)
1067 new->flags |= REF_FILTERED;
1068 else
1069 { rte_free_quick(new); new = NULL; }
1070
1071 /* Note that old && !new could be possible when
1072 c->in_keep_filtered changed in the recent past. */
1073
1074 if (!old && !new)
1075 return;
1076
1077 new_ok = 0;
1078 goto skip_stats1;
1079 }
1080 }
1081
1082 if (new_ok)
1083 stats->imp_updates_accepted++;
1084 else if (old_ok)
1085 stats->imp_withdraws_accepted++;
1086 else
1087 stats->imp_withdraws_ignored++;
1088
1089 skip_stats1:
1090
1091 if (new)
1092 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1093 if (old)
1094 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1095
1096 if (table->config->sorted)
1097 {
1098 /* If routes are sorted, just insert new route to appropriate position */
1099 if (new)
1100 {
1101 if (before_old && !rte_better(new, before_old))
1102 k = &before_old->next;
1103 else
1104 k = &net->routes;
1105
1106 for (; *k; k=&(*k)->next)
1107 if (rte_better(new, *k))
1108 break;
1109
1110 new->next = *k;
1111 *k = new;
1112 }
1113 }
1114 else
1115 {
1116 /* If routes are not sorted, find the best route and move it on
1117 the first position. There are several optimized cases. */
1118
1119 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1120 goto do_recalculate;
1121
1122 if (new && rte_better(new, old_best))
1123 {
1124 /* The first case - the new route is cleary optimal,
1125 we link it at the first position */
1126
1127 new->next = net->routes;
1128 net->routes = new;
1129 }
1130 else if (old == old_best)
1131 {
1132 /* The second case - the old best route disappeared, we add the
1133 new route (if we have any) to the list (we don't care about
1134 position) and then we elect the new optimal route and relink
1135 that route at the first position and announce it. New optimal
1136 route might be NULL if there is no more routes */
1137
1138 do_recalculate:
1139 /* Add the new route to the list */
1140 if (new)
1141 {
1142 new->next = net->routes;
1143 net->routes = new;
1144 }
1145
1146 /* Find a new optimal route (if there is any) */
1147 if (net->routes)
1148 {
1149 rte **bp = &net->routes;
1150 for (k=&(*bp)->next; *k; k=&(*k)->next)
1151 if (rte_better(*k, *bp))
1152 bp = k;
1153
1154 /* And relink it */
1155 rte *best = *bp;
1156 *bp = best->next;
1157 best->next = net->routes;
1158 net->routes = best;
1159 }
1160 }
1161 else if (new)
1162 {
1163 /* The third case - the new route is not better than the old
1164 best route (therefore old_best != NULL) and the old best
1165 route was not removed (therefore old_best == net->routes).
1166 We just link the new route after the old best route. */
1167
1168 ASSERT(net->routes != NULL);
1169 new->next = net->routes->next;
1170 net->routes->next = new;
1171 }
1172 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1173 }
1174
1175 if (new)
1176 new->lastmod = current_time();
1177
1178 /* Log the route change */
1179 if (p->debug & D_ROUTES)
1180 {
1181 if (new_ok)
1182 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1183 else if (old_ok)
1184 {
1185 if (old != old_best)
1186 rte_trace(p, old, '>', "removed");
1187 else if (rte_is_ok(net->routes))
1188 rte_trace(p, old, '>', "removed [replaced]");
1189 else
1190 rte_trace(p, old, '>', "removed [sole]");
1191 }
1192 }
1193
1194 /* Propagate the route change */
1195 rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1196 if (net->routes != old_best)
1197 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1198 if (table->config->sorted)
1199 rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1200 rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1201
1202 if (!net->routes &&
1203 (table->gc_counter++ >= table->config->gc_max_ops) &&
1204 (table->gc_time + table->config->gc_min_time <= current_time()))
1205 rt_schedule_prune(table);
1206
1207 if (old_ok && p->rte_remove)
1208 p->rte_remove(net, old);
1209 if (new_ok && p->rte_insert)
1210 p->rte_insert(net, new);
1211
1212 if (old)
1213 rte_free_quick(old);
1214 }
1215
1216 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1217
1218 static inline void
1219 rte_update_lock(void)
1220 {
1221 rte_update_nest_cnt++;
1222 }
1223
1224 static inline void
1225 rte_update_unlock(void)
1226 {
1227 if (!--rte_update_nest_cnt)
1228 lp_flush(rte_update_pool);
1229 }
1230
1231 static inline void
1232 rte_hide_dummy_routes(net *net, rte **dummy)
1233 {
1234 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1235 {
1236 *dummy = net->routes;
1237 net->routes = (*dummy)->next;
1238 }
1239 }
1240
1241 static inline void
1242 rte_unhide_dummy_routes(net *net, rte **dummy)
1243 {
1244 if (*dummy)
1245 {
1246 (*dummy)->next = net->routes;
1247 net->routes = *dummy;
1248 }
1249 }
1250
1251 /**
1252 * rte_update - enter a new update to a routing table
1253 * @table: table to be updated
1254 * @c: channel doing the update
1255 * @net: network node
1256 * @p: protocol submitting the update
1257 * @src: protocol originating the update
1258 * @new: a &rte representing the new route or %NULL for route removal.
1259 *
1260 * This function is called by the routing protocols whenever they discover
1261 * a new route or wish to update/remove an existing route. The right announcement
1262 * sequence is to build route attributes first (either un-cached with @aflags set
1263 * to zero or a cached one using rta_lookup(); in this case please note that
1264 * you need to increase the use count of the attributes yourself by calling
1265 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1266 * the appropriate data and finally submit the new &rte by calling rte_update().
1267 *
1268 * @src specifies the protocol that originally created the route and the meaning
1269 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1270 * same value as @new->attrs->proto. @p specifies the protocol that called
1271 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1272 * stores @p in @new->sender;
1273 *
1274 * When rte_update() gets any route, it automatically validates it (checks,
1275 * whether the network and next hop address are valid IP addresses and also
1276 * whether a normal routing protocol doesn't try to smuggle a host or link
1277 * scope route to the table), converts all protocol dependent attributes stored
1278 * in the &rte to temporary extended attributes, consults import filters of the
1279 * protocol to see if the route should be accepted and/or its attributes modified,
1280 * stores the temporary attributes back to the &rte.
1281 *
1282 * Now, having a "public" version of the route, we
1283 * automatically find any old route defined by the protocol @src
1284 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1285 * recalculate the optimal route for this destination and finally broadcast
1286 * the change (if any) to all routing protocols by calling rte_announce().
1287 *
1288 * All memory used for attribute lists and other temporary allocations is taken
1289 * from a special linear pool @rte_update_pool and freed when rte_update()
1290 * finishes.
1291 */
1292
1293 void
1294 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1295 {
1296 struct proto *p = c->proto;
1297 struct proto_stats *stats = &c->stats;
1298 struct filter *filter = c->in_filter;
1299 ea_list *tmpa = NULL;
1300 rte *dummy = NULL;
1301 net *nn;
1302
1303 ASSERT(c->channel_state == CS_UP);
1304
1305 rte_update_lock();
1306 if (new)
1307 {
1308 nn = net_get(c->table, n);
1309
1310 new->net = nn;
1311 new->sender = c;
1312
1313 if (!new->pref)
1314 new->pref = c->preference;
1315
1316 stats->imp_updates_received++;
1317 if (!rte_validate(new))
1318 {
1319 rte_trace_in(D_FILTERS, p, new, "invalid");
1320 stats->imp_updates_invalid++;
1321 goto drop;
1322 }
1323
1324 if (filter == FILTER_REJECT)
1325 {
1326 stats->imp_updates_filtered++;
1327 rte_trace_in(D_FILTERS, p, new, "filtered out");
1328
1329 if (! c->in_keep_filtered)
1330 goto drop;
1331
1332 /* new is a private copy, i could modify it */
1333 new->flags |= REF_FILTERED;
1334 }
1335 else
1336 {
1337 tmpa = rte_make_tmp_attrs(new, rte_update_pool);
1338 if (filter && (filter != FILTER_REJECT))
1339 {
1340 ea_list *old_tmpa = tmpa;
1341 int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1342 if (fr > F_ACCEPT)
1343 {
1344 stats->imp_updates_filtered++;
1345 rte_trace_in(D_FILTERS, p, new, "filtered out");
1346
1347 if (! c->in_keep_filtered)
1348 goto drop;
1349
1350 new->flags |= REF_FILTERED;
1351 }
1352 if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1353 src->proto->store_tmp_attrs(new, tmpa);
1354 }
1355 }
1356 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1357 new->attrs = rta_lookup(new->attrs);
1358 new->flags |= REF_COW;
1359 }
1360 else
1361 {
1362 stats->imp_withdraws_received++;
1363
1364 if (!(nn = net_find(c->table, n)) || !src)
1365 {
1366 stats->imp_withdraws_ignored++;
1367 rte_update_unlock();
1368 return;
1369 }
1370 }
1371
1372 recalc:
1373 rte_hide_dummy_routes(nn, &dummy);
1374 rte_recalculate(c, nn, new, src);
1375 rte_unhide_dummy_routes(nn, &dummy);
1376 rte_update_unlock();
1377 return;
1378
1379 drop:
1380 rte_free(new);
1381 new = NULL;
1382 goto recalc;
1383 }
1384
1385 /* Independent call to rte_announce(), used from next hop
1386 recalculation, outside of rte_update(). new must be non-NULL */
1387 static inline void
1388 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1389 rte *new_best, rte *old_best)
1390 {
1391 rte_update_lock();
1392 rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1393 rte_update_unlock();
1394 }
1395
1396 static inline void
1397 rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */
1398 {
1399 rte_update_lock();
1400 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1401 rte_update_unlock();
1402 }
1403
1404 /* Check rtable for best route to given net whether it would be exported do p */
1405 int
1406 rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1407 {
1408 net *n = net_find(t, a);
1409 rte *rt = n ? n->routes : NULL;
1410
1411 if (!rte_is_valid(rt))
1412 return 0;
1413
1414 rte_update_lock();
1415
1416 /* Rest is stripped down export_filter() */
1417 ea_list *tmpa = rte_make_tmp_attrs(rt, rte_update_pool);
1418 int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1419 if (v == RIC_PROCESS)
1420 v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1421
1422 /* Discard temporary rte */
1423 if (rt != n->routes)
1424 rte_free(rt);
1425
1426 rte_update_unlock();
1427
1428 return v > 0;
1429 }
1430
1431
1432 /**
1433 * rt_refresh_begin - start a refresh cycle
1434 * @t: related routing table
1435 * @c related channel
1436 *
1437 * This function starts a refresh cycle for given routing table and announce
1438 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1439 * routes to the routing table (by rte_update()). After that, all protocol
1440 * routes (more precisely routes with @c as @sender) not sent during the
1441 * refresh cycle but still in the table from the past are pruned. This is
1442 * implemented by marking all related routes as stale by REF_STALE flag in
1443 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1444 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1445 */
1446 void
1447 rt_refresh_begin(rtable *t, struct channel *c)
1448 {
1449 FIB_WALK(&t->fib, net, n)
1450 {
1451 rte *e;
1452 for (e = n->routes; e; e = e->next)
1453 if (e->sender == c)
1454 e->flags |= REF_STALE;
1455 }
1456 FIB_WALK_END;
1457 }
1458
1459 /**
1460 * rt_refresh_end - end a refresh cycle
1461 * @t: related routing table
1462 * @c: related channel
1463 *
1464 * This function ends a refresh cycle for given routing table and announce
1465 * hook. See rt_refresh_begin() for description of refresh cycles.
1466 */
1467 void
1468 rt_refresh_end(rtable *t, struct channel *c)
1469 {
1470 int prune = 0;
1471
1472 FIB_WALK(&t->fib, net, n)
1473 {
1474 rte *e;
1475 for (e = n->routes; e; e = e->next)
1476 if ((e->sender == c) && (e->flags & REF_STALE))
1477 {
1478 e->flags |= REF_DISCARD;
1479 prune = 1;
1480 }
1481 }
1482 FIB_WALK_END;
1483
1484 if (prune)
1485 rt_schedule_prune(t);
1486 }
1487
1488
1489 /**
1490 * rte_dump - dump a route
1491 * @e: &rte to be dumped
1492 *
1493 * This functions dumps contents of a &rte to debug output.
1494 */
1495 void
1496 rte_dump(rte *e)
1497 {
1498 net *n = e->net;
1499 debug("%-1N ", n->n.addr);
1500 debug("KF=%02x PF=%02x pref=%d ", n->n.flags, e->pflags, e->pref);
1501 rta_dump(e->attrs);
1502 if (e->attrs->src->proto->proto->dump_attrs)
1503 e->attrs->src->proto->proto->dump_attrs(e);
1504 debug("\n");
1505 }
1506
1507 /**
1508 * rt_dump - dump a routing table
1509 * @t: routing table to be dumped
1510 *
1511 * This function dumps contents of a given routing table to debug output.
1512 */
1513 void
1514 rt_dump(rtable *t)
1515 {
1516 debug("Dump of routing table <%s>\n", t->name);
1517 #ifdef DEBUGGING
1518 fib_check(&t->fib);
1519 #endif
1520 FIB_WALK(&t->fib, net, n)
1521 {
1522 rte *e;
1523 for(e=n->routes; e; e=e->next)
1524 rte_dump(e);
1525 }
1526 FIB_WALK_END;
1527 debug("\n");
1528 }
1529
1530 /**
1531 * rt_dump_all - dump all routing tables
1532 *
1533 * This function dumps contents of all routing tables to debug output.
1534 */
1535 void
1536 rt_dump_all(void)
1537 {
1538 rtable *t;
1539
1540 WALK_LIST(t, routing_tables)
1541 rt_dump(t);
1542 }
1543
1544 static inline void
1545 rt_schedule_hcu(rtable *tab)
1546 {
1547 if (tab->hcu_scheduled)
1548 return;
1549
1550 tab->hcu_scheduled = 1;
1551 ev_schedule(tab->rt_event);
1552 }
1553
1554 static inline void
1555 rt_schedule_nhu(rtable *tab)
1556 {
1557 if (tab->nhu_state == NHU_CLEAN)
1558 ev_schedule(tab->rt_event);
1559
1560 /* state change:
1561 * NHU_CLEAN -> NHU_SCHEDULED
1562 * NHU_RUNNING -> NHU_DIRTY
1563 */
1564 tab->nhu_state |= NHU_SCHEDULED;
1565 }
1566
1567 void
1568 rt_schedule_prune(rtable *tab)
1569 {
1570 if (tab->prune_state == 0)
1571 ev_schedule(tab->rt_event);
1572
1573 /* state change 0->1, 2->3 */
1574 tab->prune_state |= 1;
1575 }
1576
1577
1578 static void
1579 rt_event(void *ptr)
1580 {
1581 rtable *tab = ptr;
1582
1583 rt_lock_table(tab);
1584
1585 if (tab->hcu_scheduled)
1586 rt_update_hostcache(tab);
1587
1588 if (tab->nhu_state)
1589 rt_next_hop_update(tab);
1590
1591 if (tab->prune_state)
1592 rt_prune_table(tab);
1593
1594 rt_unlock_table(tab);
1595 }
1596
1597 void
1598 rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1599 {
1600 bzero(t, sizeof(*t));
1601 t->name = name;
1602 t->config = cf;
1603 t->addr_type = cf ? cf->addr_type : NET_IP4;
1604 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1605 init_list(&t->channels);
1606
1607 if (cf)
1608 {
1609 t->rt_event = ev_new(p);
1610 t->rt_event->hook = rt_event;
1611 t->rt_event->data = t;
1612 t->gc_time = current_time();
1613 }
1614 }
1615
1616 /**
1617 * rt_init - initialize routing tables
1618 *
1619 * This function is called during BIRD startup. It initializes the
1620 * routing table module.
1621 */
1622 void
1623 rt_init(void)
1624 {
1625 rta_init();
1626 rt_table_pool = rp_new(&root_pool, "Routing tables");
1627 rte_update_pool = lp_new_default(rt_table_pool);
1628 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1629 init_list(&routing_tables);
1630 }
1631
1632
1633 /**
1634 * rt_prune_table - prune a routing table
1635 *
1636 * The prune loop scans routing tables and removes routes belonging to flushing
1637 * protocols, discarded routes and also stale network entries. It is called from
1638 * rt_event(). The event is rescheduled if the current iteration do not finish
1639 * the table. The pruning is directed by the prune state (@prune_state),
1640 * specifying whether the prune cycle is scheduled or running, and there
1641 * is also a persistent pruning iterator (@prune_fit).
1642 *
1643 * The prune loop is used also for channel flushing. For this purpose, the
1644 * channels to flush are marked before the iteration and notified after the
1645 * iteration.
1646 */
1647 static void
1648 rt_prune_table(rtable *tab)
1649 {
1650 struct fib_iterator *fit = &tab->prune_fit;
1651 int limit = 512;
1652
1653 struct channel *c;
1654 node *n, *x;
1655
1656 DBG("Pruning route table %s\n", tab->name);
1657 #ifdef DEBUGGING
1658 fib_check(&tab->fib);
1659 #endif
1660
1661 if (tab->prune_state == 0)
1662 return;
1663
1664 if (tab->prune_state == 1)
1665 {
1666 /* Mark channels to flush */
1667 WALK_LIST2(c, n, tab->channels, table_node)
1668 if (c->channel_state == CS_FLUSHING)
1669 c->flush_active = 1;
1670
1671 FIB_ITERATE_INIT(fit, &tab->fib);
1672 tab->prune_state = 2;
1673 }
1674
1675 again:
1676 FIB_ITERATE_START(&tab->fib, fit, net, n)
1677 {
1678 rte *e;
1679
1680 rescan:
1681 for (e=n->routes; e; e=e->next)
1682 if (e->sender->flush_active || (e->flags & REF_DISCARD))
1683 {
1684 if (limit <= 0)
1685 {
1686 FIB_ITERATE_PUT(fit);
1687 ev_schedule(tab->rt_event);
1688 return;
1689 }
1690
1691 rte_discard(e);
1692 limit--;
1693
1694 goto rescan;
1695 }
1696
1697 if (!n->routes) /* Orphaned FIB entry */
1698 {
1699 FIB_ITERATE_PUT(fit);
1700 fib_delete(&tab->fib, n);
1701 goto again;
1702 }
1703 }
1704 FIB_ITERATE_END;
1705
1706 #ifdef DEBUGGING
1707 fib_check(&tab->fib);
1708 #endif
1709
1710 tab->gc_counter = 0;
1711 tab->gc_time = current_time();
1712
1713 /* state change 2->0, 3->1 */
1714 tab->prune_state &= 1;
1715
1716 if (tab->prune_state > 0)
1717 ev_schedule(tab->rt_event);
1718
1719 /* FIXME: This should be handled in a better way */
1720 rt_prune_sources();
1721
1722 /* Close flushed channels */
1723 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1724 if (c->flush_active)
1725 {
1726 c->flush_active = 0;
1727 channel_set_state(c, CS_DOWN);
1728 }
1729
1730 return;
1731 }
1732
1733 void
1734 rt_preconfig(struct config *c)
1735 {
1736 init_list(&c->tables);
1737
1738 rt_new_table(cf_get_symbol("master4"), NET_IP4);
1739 rt_new_table(cf_get_symbol("master6"), NET_IP6);
1740 }
1741
1742
1743 /*
1744 * Some functions for handing internal next hop updates
1745 * triggered by rt_schedule_nhu().
1746 */
1747
1748 static inline int
1749 rta_next_hop_outdated(rta *a)
1750 {
1751 struct hostentry *he = a->hostentry;
1752
1753 if (!he)
1754 return 0;
1755
1756 if (!he->src)
1757 return a->dest != RTD_UNREACHABLE;
1758
1759 return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1760 (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
1761 }
1762
1763 void
1764 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
1765 {
1766 a->hostentry = he;
1767 a->dest = he->dest;
1768 a->igp_metric = he->igp_metric;
1769
1770 if (a->dest != RTD_UNICAST)
1771 {
1772 /* No nexthop */
1773 no_nexthop:
1774 a->nh = (struct nexthop) {};
1775 if (mls)
1776 { /* Store the label stack for later changes */
1777 a->nh.labels_orig = a->nh.labels = mls->len;
1778 memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1779 }
1780 return;
1781 }
1782
1783 if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1784 { /* Just link the nexthop chain, no label append happens. */
1785 memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1786 return;
1787 }
1788
1789 struct nexthop *nhp = NULL, *nhr = NULL;
1790 int skip_nexthop = 0;
1791
1792 for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
1793 {
1794 if (skip_nexthop)
1795 skip_nexthop--;
1796 else
1797 {
1798 nhr = nhp;
1799 nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
1800 }
1801
1802 nhp->iface = nh->iface;
1803 nhp->weight = nh->weight;
1804 if (mls)
1805 {
1806 nhp->labels = nh->labels + mls->len;
1807 nhp->labels_orig = mls->len;
1808 if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1809 {
1810 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
1811 memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
1812 }
1813 else
1814 {
1815 log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1816 nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
1817 skip_nexthop++;
1818 continue;
1819 }
1820 }
1821 if (ipa_nonzero(nh->gw))
1822 {
1823 nhp->gw = nh->gw; /* Router nexthop */
1824 nhp->flags |= (nh->flags & RNF_ONLINK);
1825 }
1826 else if (ipa_nonzero(he->link))
1827 nhp->gw = he->link; /* Device nexthop with link-local address known */
1828 else
1829 nhp->gw = he->addr; /* Device nexthop with link-local address unknown */
1830 }
1831
1832 if (skip_nexthop)
1833 if (nhr)
1834 nhr->next = NULL;
1835 else
1836 {
1837 a->dest = RTD_UNREACHABLE;
1838 log(L_WARN "No valid nexthop remaining, setting route unreachable");
1839 goto no_nexthop;
1840 }
1841 }
1842
1843 static inline rte *
1844 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1845 {
1846 rta *a = alloca(RTA_MAX_SIZE);
1847 memcpy(a, old->attrs, rta_size(old->attrs));
1848
1849 mpls_label_stack mls = { .len = a->nh.labels_orig };
1850 memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
1851
1852 rta_apply_hostentry(a, old->attrs->hostentry, &mls);
1853 a->aflags = 0;
1854
1855 rte *e = sl_alloc(rte_slab);
1856 memcpy(e, old, sizeof(rte));
1857 e->attrs = rta_lookup(a);
1858
1859 return e;
1860 }
1861
1862 static inline int
1863 rt_next_hop_update_net(rtable *tab, net *n)
1864 {
1865 rte **k, *e, *new, *old_best, **new_best;
1866 int count = 0;
1867 int free_old_best = 0;
1868
1869 old_best = n->routes;
1870 if (!old_best)
1871 return 0;
1872
1873 for (k = &n->routes; e = *k; k = &e->next)
1874 if (rta_next_hop_outdated(e->attrs))
1875 {
1876 new = rt_next_hop_update_rte(tab, e);
1877 *k = new;
1878
1879 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1880 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1881
1882 /* Call a pre-comparison hook */
1883 /* Not really an efficient way to compute this */
1884 if (e->attrs->src->proto->rte_recalculate)
1885 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1886
1887 if (e != old_best)
1888 rte_free_quick(e);
1889 else /* Freeing of the old best rte is postponed */
1890 free_old_best = 1;
1891
1892 e = new;
1893 count++;
1894 }
1895
1896 if (!count)
1897 return 0;
1898
1899 /* Find the new best route */
1900 new_best = NULL;
1901 for (k = &n->routes; e = *k; k = &e->next)
1902 {
1903 if (!new_best || rte_better(e, *new_best))
1904 new_best = k;
1905 }
1906
1907 /* Relink the new best route to the first position */
1908 new = *new_best;
1909 if (new != n->routes)
1910 {
1911 *new_best = new->next;
1912 new->next = n->routes;
1913 n->routes = new;
1914 }
1915
1916 /* Announce the new best route */
1917 if (new != old_best)
1918 {
1919 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1920 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1921 }
1922
1923 /* FIXME: Better announcement of merged routes */
1924 rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1925
1926 if (free_old_best)
1927 rte_free_quick(old_best);
1928
1929 return count;
1930 }
1931
1932 static void
1933 rt_next_hop_update(rtable *tab)
1934 {
1935 struct fib_iterator *fit = &tab->nhu_fit;
1936 int max_feed = 32;
1937
1938 if (tab->nhu_state == NHU_CLEAN)
1939 return;
1940
1941 if (tab->nhu_state == NHU_SCHEDULED)
1942 {
1943 FIB_ITERATE_INIT(fit, &tab->fib);
1944 tab->nhu_state = NHU_RUNNING;
1945 }
1946
1947 FIB_ITERATE_START(&tab->fib, fit, net, n)
1948 {
1949 if (max_feed <= 0)
1950 {
1951 FIB_ITERATE_PUT(fit);
1952 ev_schedule(tab->rt_event);
1953 return;
1954 }
1955 max_feed -= rt_next_hop_update_net(tab, n);
1956 }
1957 FIB_ITERATE_END;
1958
1959 /* State change:
1960 * NHU_DIRTY -> NHU_SCHEDULED
1961 * NHU_RUNNING -> NHU_CLEAN
1962 */
1963 tab->nhu_state &= 1;
1964
1965 if (tab->nhu_state != NHU_CLEAN)
1966 ev_schedule(tab->rt_event);
1967 }
1968
1969
1970 struct rtable_config *
1971 rt_new_table(struct symbol *s, uint addr_type)
1972 {
1973 /* Hack that allows to 'redefine' the master table */
1974 if ((s->class == SYM_TABLE) &&
1975 (s->def == new_config->def_tables[addr_type]) &&
1976 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1977 return s->def;
1978
1979 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1980
1981 cf_define_symbol(s, SYM_TABLE, c);
1982 c->name = s->name;
1983 c->addr_type = addr_type;
1984 c->gc_max_ops = 1000;
1985 c->gc_min_time = 5;
1986
1987 add_tail(&new_config->tables, &c->n);
1988
1989 /* First table of each type is kept as default */
1990 if (! new_config->def_tables[addr_type])
1991 new_config->def_tables[addr_type] = c;
1992
1993 return c;
1994 }
1995
1996 /**
1997 * rt_lock_table - lock a routing table
1998 * @r: routing table to be locked
1999 *
2000 * Lock a routing table, because it's in use by a protocol,
2001 * preventing it from being freed when it gets undefined in a new
2002 * configuration.
2003 */
2004 void
2005 rt_lock_table(rtable *r)
2006 {
2007 r->use_count++;
2008 }
2009
2010 /**
2011 * rt_unlock_table - unlock a routing table
2012 * @r: routing table to be unlocked
2013 *
2014 * Unlock a routing table formerly locked by rt_lock_table(),
2015 * that is decrease its use count and delete it if it's scheduled
2016 * for deletion by configuration changes.
2017 */
2018 void
2019 rt_unlock_table(rtable *r)
2020 {
2021 if (!--r->use_count && r->deleted)
2022 {
2023 struct config *conf = r->deleted;
2024 DBG("Deleting routing table %s\n", r->name);
2025 r->config->table = NULL;
2026 if (r->hostcache)
2027 rt_free_hostcache(r);
2028 rem_node(&r->n);
2029 fib_free(&r->fib);
2030 rfree(r->rt_event);
2031 mb_free(r);
2032 config_del_obstacle(conf);
2033 }
2034 }
2035
2036 /**
2037 * rt_commit - commit new routing table configuration
2038 * @new: new configuration
2039 * @old: original configuration or %NULL if it's boot time config
2040 *
2041 * Scan differences between @old and @new configuration and modify
2042 * the routing tables according to these changes. If @new defines a
2043 * previously unknown table, create it, if it omits a table existing
2044 * in @old, schedule it for deletion (it gets deleted when all protocols
2045 * disconnect from it by calling rt_unlock_table()), if it exists
2046 * in both configurations, leave it unchanged.
2047 */
2048 void
2049 rt_commit(struct config *new, struct config *old)
2050 {
2051 struct rtable_config *o, *r;
2052
2053 DBG("rt_commit:\n");
2054 if (old)
2055 {
2056 WALK_LIST(o, old->tables)
2057 {
2058 rtable *ot = o->table;
2059 if (!ot->deleted)
2060 {
2061 struct symbol *sym = cf_find_symbol(new, o->name);
2062 if (sym && sym->class == SYM_TABLE && !new->shutdown)
2063 {
2064 DBG("\t%s: same\n", o->name);
2065 r = sym->def;
2066 r->table = ot;
2067 ot->name = r->name;
2068 ot->config = r;
2069 if (o->sorted != r->sorted)
2070 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2071 }
2072 else
2073 {
2074 DBG("\t%s: deleted\n", o->name);
2075 ot->deleted = old;
2076 config_add_obstacle(old);
2077 rt_lock_table(ot);
2078 rt_unlock_table(ot);
2079 }
2080 }
2081 }
2082 }
2083
2084 WALK_LIST(r, new->tables)
2085 if (!r->table)
2086 {
2087 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2088 DBG("\t%s: created\n", r->name);
2089 rt_setup(rt_table_pool, t, r->name, r);
2090 add_tail(&routing_tables, &t->n);
2091 r->table = t;
2092 }
2093 DBG("\tdone\n");
2094 }
2095
2096 static inline void
2097 do_feed_channel(struct channel *c, net *n, rte *e)
2098 {
2099 rte_update_lock();
2100 if (c->ra_mode == RA_ACCEPTED)
2101 rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2102 else if (c->ra_mode == RA_MERGED)
2103 rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2104 else /* RA_BASIC */
2105 rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2106 rte_update_unlock();
2107 }
2108
2109 /**
2110 * rt_feed_channel - advertise all routes to a channel
2111 * @c: channel to be fed
2112 *
2113 * This function performs one pass of advertisement of routes to a channel that
2114 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2115 * has something to do. (We avoid transferring all the routes in single pass in
2116 * order not to monopolize CPU time.)
2117 */
2118 int
2119 rt_feed_channel(struct channel *c)
2120 {
2121 struct fib_iterator *fit = &c->feed_fit;
2122 int max_feed = 256;
2123
2124 ASSERT(c->export_state == ES_FEEDING);
2125
2126 if (!c->feed_active)
2127 {
2128 FIB_ITERATE_INIT(fit, &c->table->fib);
2129 c->feed_active = 1;
2130 }
2131
2132 FIB_ITERATE_START(&c->table->fib, fit, net, n)
2133 {
2134 rte *e = n->routes;
2135 if (max_feed <= 0)
2136 {
2137 FIB_ITERATE_PUT(fit);
2138 return 0;
2139 }
2140
2141 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2142
2143 if ((c->ra_mode == RA_OPTIMAL) ||
2144 (c->ra_mode == RA_ACCEPTED) ||
2145 (c->ra_mode == RA_MERGED))
2146 if (rte_is_valid(e))
2147 {
2148 /* In the meantime, the protocol may fell down */
2149 if (c->export_state != ES_FEEDING)
2150 goto done;
2151
2152 do_feed_channel(c, n, e);
2153 max_feed--;
2154 }
2155
2156 if (c->ra_mode == RA_ANY)
2157 for(e = n->routes; e; e = e->next)
2158 {
2159 /* In the meantime, the protocol may fell down */
2160 if (c->export_state != ES_FEEDING)
2161 goto done;
2162
2163 if (!rte_is_valid(e))
2164 continue;
2165
2166 do_feed_channel(c, n, e);
2167 max_feed--;
2168 }
2169 }
2170 FIB_ITERATE_END;
2171
2172 done:
2173 c->feed_active = 0;
2174 return 1;
2175 }
2176
2177 /**
2178 * rt_feed_baby_abort - abort protocol feeding
2179 * @c: channel
2180 *
2181 * This function is called by the protocol code when the protocol stops or
2182 * ceases to exist during the feeding.
2183 */
2184 void
2185 rt_feed_channel_abort(struct channel *c)
2186 {
2187 if (c->feed_active)
2188 {
2189 /* Unlink the iterator */
2190 fit_get(&c->table->fib, &c->feed_fit);
2191 c->feed_active = 0;
2192 }
2193 }
2194
2195 static inline unsigned
2196 ptr_hash(void *ptr)
2197 {
2198 uintptr_t p = (uintptr_t) ptr;
2199 return p ^ (p << 8) ^ (p >> 16);
2200 }
2201
2202 static inline u32
2203 hc_hash(ip_addr a, rtable *dep)
2204 {
2205 return ipa_hash(a) ^ ptr_hash(dep);
2206 }
2207
2208 static inline void
2209 hc_insert(struct hostcache *hc, struct hostentry *he)
2210 {
2211 uint k = he->hash_key >> hc->hash_shift;
2212 he->next = hc->hash_table[k];
2213 hc->hash_table[k] = he;
2214 }
2215
2216 static inline void
2217 hc_remove(struct hostcache *hc, struct hostentry *he)
2218 {
2219 struct hostentry **hep;
2220 uint k = he->hash_key >> hc->hash_shift;
2221
2222 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2223 *hep = he->next;
2224 }
2225
2226 #define HC_DEF_ORDER 10
2227 #define HC_HI_MARK *4
2228 #define HC_HI_STEP 2
2229 #define HC_HI_ORDER 16 /* Must be at most 16 */
2230 #define HC_LO_MARK /5
2231 #define HC_LO_STEP 2
2232 #define HC_LO_ORDER 10
2233
2234 static void
2235 hc_alloc_table(struct hostcache *hc, unsigned order)
2236 {
2237 uint hsize = 1 << order;
2238 hc->hash_order = order;
2239 hc->hash_shift = 32 - order;
2240 hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2241 hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK);
2242
2243 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2244 }
2245
2246 static void
2247 hc_resize(struct hostcache *hc, unsigned new_order)
2248 {
2249 struct hostentry **old_table = hc->hash_table;
2250 struct hostentry *he, *hen;
2251 uint old_size = 1 << hc->hash_order;
2252 uint i;
2253
2254 hc_alloc_table(hc, new_order);
2255 for (i = 0; i < old_size; i++)
2256 for (he = old_table[i]; he != NULL; he=hen)
2257 {
2258 hen = he->next;
2259 hc_insert(hc, he);
2260 }
2261 mb_free(old_table);
2262 }
2263
2264 static struct hostentry *
2265 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2266 {
2267 struct hostentry *he = sl_alloc(hc->slab);
2268
2269 *he = (struct hostentry) {
2270 .addr = a,
2271 .link = ll,
2272 .tab = dep,
2273 .hash_key = k,
2274 };
2275
2276 add_tail(&hc->hostentries, &he->ln);
2277 hc_insert(hc, he);
2278
2279 hc->hash_items++;
2280 if (hc->hash_items > hc->hash_max)
2281 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2282
2283 return he;
2284 }
2285
2286 static void
2287 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2288 {
2289 rta_free(he->src);
2290
2291 rem_node(&he->ln);
2292 hc_remove(hc, he);
2293 sl_free(hc->slab, he);
2294
2295 hc->hash_items--;
2296 if (hc->hash_items < hc->hash_min)
2297 hc_resize(hc, hc->hash_order - HC_LO_STEP);
2298 }
2299
2300 static void
2301 rt_init_hostcache(rtable *tab)
2302 {
2303 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2304 init_list(&hc->hostentries);
2305
2306 hc->hash_items = 0;
2307 hc_alloc_table(hc, HC_DEF_ORDER);
2308 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2309
2310 hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
2311 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2312
2313 tab->hostcache = hc;
2314 }
2315
2316 static void
2317 rt_free_hostcache(rtable *tab)
2318 {
2319 struct hostcache *hc = tab->hostcache;
2320
2321 node *n;
2322 WALK_LIST(n, hc->hostentries)
2323 {
2324 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2325 rta_free(he->src);
2326
2327 if (he->uc)
2328 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2329 }
2330
2331 rfree(hc->slab);
2332 rfree(hc->lp);
2333 mb_free(hc->hash_table);
2334 mb_free(hc);
2335 }
2336
2337 static void
2338 rt_notify_hostcache(rtable *tab, net *net)
2339 {
2340 if (tab->hcu_scheduled)
2341 return;
2342
2343 if (trie_match_net(tab->hostcache->trie, net->n.addr))
2344 rt_schedule_hcu(tab);
2345 }
2346
2347 static int
2348 if_local_addr(ip_addr a, struct iface *i)
2349 {
2350 struct ifa *b;
2351
2352 WALK_LIST(b, i->addrs)
2353 if (ipa_equal(a, b->ip))
2354 return 1;
2355
2356 return 0;
2357 }
2358
2359 static u32
2360 rt_get_igp_metric(rte *rt)
2361 {
2362 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2363
2364 if (ea)
2365 return ea->u.data;
2366
2367 rta *a = rt->attrs;
2368
2369 #ifdef CONFIG_OSPF
2370 if ((a->source == RTS_OSPF) ||
2371 (a->source == RTS_OSPF_IA) ||
2372 (a->source == RTS_OSPF_EXT1))
2373 return rt->u.ospf.metric1;
2374 #endif
2375
2376 #ifdef CONFIG_RIP
2377 if (a->source == RTS_RIP)
2378 return rt->u.rip.metric;
2379 #endif
2380
2381 if (a->source == RTS_DEVICE)
2382 return 0;
2383
2384 return IGP_METRIC_UNKNOWN;
2385 }
2386
2387 static int
2388 rt_update_hostentry(rtable *tab, struct hostentry *he)
2389 {
2390 rta *old_src = he->src;
2391 int pxlen = 0;
2392
2393 /* Reset the hostentry */
2394 he->src = NULL;
2395 he->nexthop_linkable = 0;
2396 he->dest = RTD_UNREACHABLE;
2397 he->igp_metric = 0;
2398
2399 net_addr he_addr;
2400 net_fill_ip_host(&he_addr, he->addr);
2401 net *n = net_route(tab, &he_addr);
2402 if (n)
2403 {
2404 rte *e = n->routes;
2405 rta *a = e->attrs;
2406 pxlen = n->n.addr->pxlen;
2407
2408 if (a->hostentry)
2409 {
2410 /* Recursive route should not depend on another recursive route */
2411 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2412 he->addr, n->n.addr);
2413 goto done;
2414 }
2415
2416 he->dest = a->dest;
2417 he->nexthop_linkable = 1;
2418 if (he->dest == RTD_UNICAST)
2419 {
2420 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2421 if (ipa_zero(nh->gw))
2422 {
2423 if (if_local_addr(he->addr, nh->iface))
2424 {
2425 /* The host address is a local address, this is not valid */
2426 log(L_WARN "Next hop address %I is a local address of iface %s",
2427 he->addr, nh->iface->name);
2428 goto done;
2429 }
2430
2431 he->nexthop_linkable = 0;
2432 break;
2433 }
2434 }
2435
2436 he->src = rta_clone(a);
2437 he->igp_metric = rt_get_igp_metric(e);
2438 }
2439
2440 done:
2441 /* Add a prefix range to the trie */
2442 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2443
2444 rta_free(old_src);
2445 return old_src != he->src;
2446 }
2447
2448 static void
2449 rt_update_hostcache(rtable *tab)
2450 {
2451 struct hostcache *hc = tab->hostcache;
2452 struct hostentry *he;
2453 node *n, *x;
2454
2455 /* Reset the trie */
2456 lp_flush(hc->lp);
2457 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2458
2459 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2460 {
2461 he = SKIP_BACK(struct hostentry, ln, n);
2462 if (!he->uc)
2463 {
2464 hc_delete_hostentry(hc, he);
2465 continue;
2466 }
2467
2468 if (rt_update_hostentry(tab, he))
2469 rt_schedule_nhu(he->tab);
2470 }
2471
2472 tab->hcu_scheduled = 0;
2473 }
2474
2475 struct hostentry *
2476 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2477 {
2478 struct hostentry *he;
2479
2480 if (!tab->hostcache)
2481 rt_init_hostcache(tab);
2482
2483 u32 k = hc_hash(a, dep);
2484 struct hostcache *hc = tab->hostcache;
2485 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2486 if (ipa_equal(he->addr, a) && (he->tab == dep))
2487 return he;
2488
2489 he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
2490 rt_update_hostentry(tab, he);
2491 return he;
2492 }
2493
2494
2495 /*
2496 * Documentation for functions declared inline in route.h
2497 */
2498 #if 0
2499
2500 /**
2501 * net_find - find a network entry
2502 * @tab: a routing table
2503 * @addr: address of the network
2504 *
2505 * net_find() looks up the given network in routing table @tab and
2506 * returns a pointer to its &net entry or %NULL if no such network
2507 * exists.
2508 */
2509 static inline net *net_find(rtable *tab, net_addr *addr)
2510 { DUMMY; }
2511
2512 /**
2513 * net_get - obtain a network entry
2514 * @tab: a routing table
2515 * @addr: address of the network
2516 *
2517 * net_get() looks up the given network in routing table @tab and
2518 * returns a pointer to its &net entry. If no such entry exists, it's
2519 * created.
2520 */
2521 static inline net *net_get(rtable *tab, net_addr *addr)
2522 { DUMMY; }
2523
2524 /**
2525 * rte_cow - copy a route for writing
2526 * @r: a route entry to be copied
2527 *
2528 * rte_cow() takes a &rte and prepares it for modification. The exact action
2529 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2530 * just returned unchanged, else a new temporary entry with the same contents
2531 * is created.
2532 *
2533 * The primary use of this function is inside the filter machinery -- when
2534 * a filter wants to modify &rte contents (to change the preference or to
2535 * attach another set of attributes), it must ensure that the &rte is not
2536 * shared with anyone else (and especially that it isn't stored in any routing
2537 * table).
2538 *
2539 * Result: a pointer to the new writable &rte.
2540 */
2541 static inline rte * rte_cow(rte *r)
2542 { DUMMY; }
2543
2544 #endif