]> git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
Nest: Don't lookup net in table before filters are run.
[thirdparty/bird.git] / nest / rt-table.c
1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/iface.h"
37 #include "lib/resource.h"
38 #include "lib/event.h"
39 #include "lib/string.h"
40 #include "conf/conf.h"
41 #include "filter/filter.h"
42 #include "lib/hash.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
45
46 pool *rt_table_pool;
47
48 static slab *rte_slab;
49 static linpool *rte_update_pool;
50
51 list routing_tables;
52
53 static void rt_free_hostcache(rtable *tab);
54 static void rt_notify_hostcache(rtable *tab, net *net);
55 static void rt_update_hostcache(rtable *tab);
56 static void rt_next_hop_update(rtable *tab);
57 static inline void rt_prune_table(rtable *tab);
58
59
60 /* Like fib_route(), but skips empty net entries */
61 static inline void *
62 net_route_ip4(rtable *t, net_addr_ip4 *n)
63 {
64 net *r;
65
66 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
67 {
68 n->pxlen--;
69 ip4_clrbit(&n->prefix, n->pxlen);
70 }
71
72 return r;
73 }
74
75 static inline void *
76 net_route_ip6(rtable *t, net_addr_ip6 *n)
77 {
78 net *r;
79
80 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
81 {
82 n->pxlen--;
83 ip6_clrbit(&n->prefix, n->pxlen);
84 }
85
86 return r;
87 }
88
89 static inline void *
90 net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n)
91 {
92 struct fib_node *fn;
93
94 while (1)
95 {
96 net *best = NULL;
97 int best_pxlen = 0;
98
99 /* We need to do dst first matching. Since sadr addresses are hashed on dst
100 prefix only, find the hash table chain and go through it to find the
101 match with the smallest matching src prefix. */
102 for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next)
103 {
104 net_addr_ip6_sadr *a = (void *) fn->addr;
105
106 if (net_equal_dst_ip6_sadr(n, a) &&
107 net_in_net_src_ip6_sadr(n, a) &&
108 (a->src_pxlen >= best_pxlen))
109 {
110 best = fib_node_to_user(&t->fib, fn);
111 best_pxlen = a->src_pxlen;
112 }
113 }
114
115 if (best)
116 return best;
117
118 if (!n->dst_pxlen)
119 break;
120
121 n->dst_pxlen--;
122 ip6_clrbit(&n->dst_prefix, n->dst_pxlen);
123 }
124
125 return NULL;
126 }
127
128 void *
129 net_route(rtable *tab, const net_addr *n)
130 {
131 ASSERT(tab->addr_type == n->type);
132
133 net_addr *n0 = alloca(n->length);
134 net_copy(n0, n);
135
136 switch (n->type)
137 {
138 case NET_IP4:
139 case NET_VPN4:
140 case NET_ROA4:
141 return net_route_ip4(tab, (net_addr_ip4 *) n0);
142
143 case NET_IP6:
144 case NET_VPN6:
145 case NET_ROA6:
146 return net_route_ip6(tab, (net_addr_ip6 *) n0);
147
148 case NET_IP6_SADR:
149 return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0);
150
151 default:
152 return NULL;
153 }
154 }
155
156
157 static int
158 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
159 {
160 struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
161 struct fib_node *fn;
162 int anything = 0;
163
164 while (1)
165 {
166 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
167 {
168 net_addr_roa4 *roa = (void *) fn->addr;
169 net *r = fib_node_to_user(&tab->fib, fn);
170
171 if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
172 {
173 anything = 1;
174 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
175 return ROA_VALID;
176 }
177 }
178
179 if (n.pxlen == 0)
180 break;
181
182 n.pxlen--;
183 ip4_clrbit(&n.prefix, n.pxlen);
184 }
185
186 return anything ? ROA_INVALID : ROA_UNKNOWN;
187 }
188
189 static int
190 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
191 {
192 struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
193 struct fib_node *fn;
194 int anything = 0;
195
196 while (1)
197 {
198 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
199 {
200 net_addr_roa6 *roa = (void *) fn->addr;
201 net *r = fib_node_to_user(&tab->fib, fn);
202
203 if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
204 {
205 anything = 1;
206 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
207 return ROA_VALID;
208 }
209 }
210
211 if (n.pxlen == 0)
212 break;
213
214 n.pxlen--;
215 ip6_clrbit(&n.prefix, n.pxlen);
216 }
217
218 return anything ? ROA_INVALID : ROA_UNKNOWN;
219 }
220
221 /**
222 * roa_check - check validity of route origination in a ROA table
223 * @tab: ROA table
224 * @n: network prefix to check
225 * @asn: AS number of network prefix
226 *
227 * Implements RFC 6483 route validation for the given network prefix. The
228 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
229 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
230 * a candidate ROA with matching ASN and maxlen field greater than or equal to
231 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
232 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
233 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
234 * must have type NET_IP4 or NET_IP6, respectively.
235 */
236 int
237 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
238 {
239 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
240 return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
241 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
242 return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
243 else
244 return ROA_UNKNOWN; /* Should not happen */
245 }
246
247 /**
248 * rte_find - find a route
249 * @net: network node
250 * @src: route source
251 *
252 * The rte_find() function returns a route for destination @net
253 * which is from route source @src.
254 */
255 rte *
256 rte_find(net *net, struct rte_src *src)
257 {
258 rte *e = net->routes;
259
260 while (e && e->attrs->src != src)
261 e = e->next;
262 return e;
263 }
264
265 /**
266 * rte_get_temp - get a temporary &rte
267 * @a: attributes to assign to the new route (a &rta; in case it's
268 * un-cached, rte_update() will create a cached copy automatically)
269 *
270 * Create a temporary &rte and bind it with the attributes @a.
271 * Also set route preference to the default preference set for
272 * the protocol.
273 */
274 rte *
275 rte_get_temp(rta *a)
276 {
277 rte *e = sl_alloc(rte_slab);
278
279 e->attrs = a;
280 e->flags = 0;
281 e->pref = 0;
282 return e;
283 }
284
285 rte *
286 rte_do_cow(rte *r)
287 {
288 rte *e = sl_alloc(rte_slab);
289
290 memcpy(e, r, sizeof(rte));
291 e->attrs = rta_clone(r->attrs);
292 e->flags = 0;
293 return e;
294 }
295
296 /**
297 * rte_cow_rta - get a private writable copy of &rte with writable &rta
298 * @r: a route entry to be copied
299 * @lp: a linpool from which to allocate &rta
300 *
301 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
302 * modification. There are three possibilities: First, both &rte and &rta are
303 * private copies, in that case they are returned unchanged. Second, &rte is
304 * private copy, but &rta is cached, in that case &rta is duplicated using
305 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
306 * both structures are duplicated by rte_do_cow() and rta_do_cow().
307 *
308 * Note that in the second case, cached &rta loses one reference, while private
309 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
310 * nexthops, ...) with it. To work properly, original shared &rta should have
311 * another reference during the life of created private copy.
312 *
313 * Result: a pointer to the new writable &rte with writable &rta.
314 */
315 rte *
316 rte_cow_rta(rte *r, linpool *lp)
317 {
318 if (!rta_is_cached(r->attrs))
319 return r;
320
321 r = rte_cow(r);
322 rta *a = rta_do_cow(r->attrs, lp);
323 rta_free(r->attrs);
324 r->attrs = a;
325 return r;
326 }
327
328 static int /* Actually better or at least as good as */
329 rte_better(rte *new, rte *old)
330 {
331 int (*better)(rte *, rte *);
332
333 if (!rte_is_valid(old))
334 return 1;
335 if (!rte_is_valid(new))
336 return 0;
337
338 if (new->pref > old->pref)
339 return 1;
340 if (new->pref < old->pref)
341 return 0;
342 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
343 {
344 /*
345 * If the user has configured protocol preferences, so that two different protocols
346 * have the same preference, try to break the tie by comparing addresses. Not too
347 * useful, but keeps the ordering of routes unambiguous.
348 */
349 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
350 }
351 if (better = new->attrs->src->proto->rte_better)
352 return better(new, old);
353 return 0;
354 }
355
356 static int
357 rte_mergable(rte *pri, rte *sec)
358 {
359 int (*mergable)(rte *, rte *);
360
361 if (!rte_is_valid(pri) || !rte_is_valid(sec))
362 return 0;
363
364 if (pri->pref != sec->pref)
365 return 0;
366
367 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
368 return 0;
369
370 if (mergable = pri->attrs->src->proto->rte_mergable)
371 return mergable(pri, sec);
372
373 return 0;
374 }
375
376 static void
377 rte_trace(struct proto *p, rte *e, int dir, char *msg)
378 {
379 log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
380 }
381
382 static inline void
383 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
384 {
385 if (p->debug & flag)
386 rte_trace(p, e, '>', msg);
387 }
388
389 static inline void
390 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
391 {
392 if (p->debug & flag)
393 rte_trace(p, e, '<', msg);
394 }
395
396 static rte *
397 export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
398 {
399 struct proto *p = c->proto;
400 struct filter *filter = c->out_filter;
401 struct proto_stats *stats = &c->stats;
402 rte *rt;
403 int v;
404
405 rt = rt0;
406 *rt_free = NULL;
407
408 v = p->preexport ? p->preexport(p, &rt, pool) : 0;
409 if (v < 0)
410 {
411 if (silent)
412 goto reject;
413
414 stats->exp_updates_rejected++;
415 if (v == RIC_REJECT)
416 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
417 goto reject;
418 }
419 if (v > 0)
420 {
421 if (!silent)
422 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
423 goto accept;
424 }
425
426 rte_make_tmp_attrs(&rt, pool);
427
428 v = filter && ((filter == FILTER_REJECT) ||
429 (f_run(filter, &rt, pool,
430 (silent ? FF_SILENT : 0)) > F_ACCEPT));
431 if (v)
432 {
433 if (silent)
434 goto reject;
435
436 stats->exp_updates_filtered++;
437 rte_trace_out(D_FILTERS, p, rt, "filtered out");
438 goto reject;
439 }
440
441 accept:
442 if (rt != rt0)
443 *rt_free = rt;
444 return rt;
445
446 reject:
447 /* Discard temporary rte */
448 if (rt != rt0)
449 rte_free(rt);
450 return NULL;
451 }
452
453 static inline rte *
454 export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
455 {
456 return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
457 }
458
459 static void
460 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
461 {
462 struct proto *p = c->proto;
463 struct proto_stats *stats = &c->stats;
464
465
466 /*
467 * First, apply export limit.
468 *
469 * Export route limits has several problems. Because exp_routes
470 * counter is reset before refeed, we don't really know whether
471 * limit is breached and whether the update is new or not. Therefore
472 * the number of really exported routes may exceed the limit
473 * temporarily (routes exported before and new routes in refeed).
474 *
475 * Minor advantage is that if the limit is decreased and refeed is
476 * requested, the number of exported routes really decrease.
477 *
478 * Second problem is that with export limits, we don't know whether
479 * old was really exported (it might be blocked by limit). When a
480 * withdraw is exported, we announce it even when the previous
481 * update was blocked. This is not a big issue, but the same problem
482 * is in updating exp_routes counter. Therefore, to be consistent in
483 * increases and decreases of exp_routes, we count exported routes
484 * regardless of blocking by limits.
485 *
486 * Similar problem is in handling updates - when a new route is
487 * received and blocking is active, the route would be blocked, but
488 * when an update for the route will be received later, the update
489 * would be propagated (as old != NULL). Therefore, we have to block
490 * also non-new updates (contrary to import blocking).
491 */
492
493 struct channel_limit *l = &c->out_limit;
494 if (l->action && new)
495 {
496 if ((!old || refeed) && (stats->exp_routes >= l->limit))
497 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
498
499 if (l->state == PLS_BLOCKED)
500 {
501 stats->exp_routes++; /* see note above */
502 stats->exp_updates_rejected++;
503 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
504 new = NULL;
505
506 if (!old)
507 return;
508 }
509 }
510
511
512 if (new)
513 stats->exp_updates_accepted++;
514 else
515 stats->exp_withdraws_accepted++;
516
517 /* Hack: We do not decrease exp_routes during refeed, we instead
518 reset exp_routes at the start of refeed. */
519 if (new)
520 stats->exp_routes++;
521 if (old && !refeed)
522 stats->exp_routes--;
523
524 if (p->debug & D_ROUTES)
525 {
526 if (new && old)
527 rte_trace_out(D_ROUTES, p, new, "replaced");
528 else if (new)
529 rte_trace_out(D_ROUTES, p, new, "added");
530 else if (old)
531 rte_trace_out(D_ROUTES, p, old, "removed");
532 }
533 p->rt_notify(p, c, net, new, old);
534 }
535
536 static void
537 rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
538 {
539 struct proto *p = c->proto;
540
541 rte *new = new0;
542 rte *old = old0;
543 rte *new_free = NULL;
544 rte *old_free = NULL;
545
546 if (new)
547 c->stats.exp_updates_received++;
548 else
549 c->stats.exp_withdraws_received++;
550
551 /*
552 * This is a tricky part - we don't know whether route 'old' was exported to
553 * protocol 'p' or was filtered by the export filter. We try to run the export
554 * filter to know this to have a correct value in 'old' argument of rte_update
555 * (and proper filter value).
556 *
557 * This is broken because 'configure soft' may change filters but keep routes.
558 * Refeed cycle is expected to be called after change of the filters and with
559 * old == new, therefore we do not even try to run the filter on an old route.
560 * This may lead to 'spurious withdraws' but ensure that there are no 'missing
561 * withdraws'.
562 *
563 * This is not completely safe as there is a window between reconfiguration
564 * and the end of refeed - if a newly filtered route disappears during this
565 * period, proper withdraw is not sent (because old would be also filtered)
566 * and the route is not refeeded (because it disappeared before that).
567 * Therefore, we also do not try to run the filter on old routes that are
568 * older than the last filter change.
569 */
570
571 if (new)
572 new = export_filter(c, new, &new_free, 0);
573
574 if (old && !(refeed || (old->lastmod <= c->last_tx_filter_change)))
575 old = export_filter(c, old, &old_free, 1);
576
577 if (!new && !old)
578 {
579 /*
580 * As mentioned above, 'old' value may be incorrect in some race conditions.
581 * We generally ignore it with the exception of withdraw to pipe protocol.
582 * In that case we rather propagate unfiltered withdraws regardless of
583 * export filters to ensure that when a protocol is flushed, its routes are
584 * removed from all tables. Possible spurious unfiltered withdraws are not
585 * problem here as they are ignored if there is no corresponding route at
586 * the other end of the pipe. We directly call rt_notify() hook instead of
587 * do_rt_notify() to avoid logging and stat counters.
588 */
589
590 #ifdef CONFIG_PIPE
591 if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
592 p->rt_notify(p, c, net, NULL, old0);
593 #endif
594
595 return;
596 }
597
598 do_rt_notify(c, net, new, old, refeed);
599
600 /* Discard temporary rte's */
601 if (new_free)
602 rte_free(new_free);
603 if (old_free)
604 rte_free(old_free);
605 }
606
607 static void
608 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
609 {
610 // struct proto *p = c->proto;
611
612 rte *r;
613 rte *new_best = NULL;
614 rte *old_best = NULL;
615 rte *new_free = NULL;
616 rte *old_free = NULL;
617
618 /* Used to track whether we met old_changed position. If before_old is NULL
619 old_changed was the first and we met it implicitly before current best route. */
620 int old_meet = old_changed && !before_old;
621
622 /* Note that before_old is either NULL or valid (not rejected) route.
623 If old_changed is valid, before_old have to be too. If old changed route
624 was not valid, caller must use NULL for both old_changed and before_old. */
625
626 if (new_changed)
627 c->stats.exp_updates_received++;
628 else
629 c->stats.exp_withdraws_received++;
630
631 /* First, find the new_best route - first accepted by filters */
632 for (r=net->routes; rte_is_valid(r); r=r->next)
633 {
634 if (new_best = export_filter(c, r, &new_free, 0))
635 break;
636
637 /* Note if we walked around the position of old_changed route */
638 if (r == before_old)
639 old_meet = 1;
640 }
641
642 /*
643 * Second, handle the feed case. That means we do not care for
644 * old_best. It is NULL for feed, and the new_best for refeed.
645 * For refeed, there is a hack similar to one in rt_notify_basic()
646 * to ensure withdraws in case of changed filters
647 */
648 if (feed)
649 {
650 if (feed == 2) /* refeed */
651 old_best = new_best ? new_best :
652 (rte_is_valid(net->routes) ? net->routes : NULL);
653 else
654 old_best = NULL;
655
656 if (!new_best && !old_best)
657 return;
658
659 goto found;
660 }
661
662 /*
663 * Now, we find the old_best route. Generally, it is the same as the
664 * new_best, unless new_best is the same as new_changed or
665 * old_changed is accepted before new_best.
666 *
667 * There are four cases:
668 *
669 * - We would find and accept old_changed before new_best, therefore
670 * old_changed is old_best. In remaining cases we suppose this
671 * is not true.
672 *
673 * - We found no new_best, therefore there is also no old_best and
674 * we ignore this withdraw.
675 *
676 * - We found new_best different than new_changed, therefore
677 * old_best is the same as new_best and we ignore this update.
678 *
679 * - We found new_best the same as new_changed, therefore it cannot
680 * be old_best and we have to continue search for old_best.
681 *
682 * There is also a hack to ensure consistency in case of changed filters.
683 * It does not find the proper old_best, just selects a non-NULL route.
684 */
685
686 /* Hack for changed filters */
687 if (old_changed && (old_changed->lastmod <= c->last_tx_filter_change))
688 {
689 old_best = old_changed;
690 goto found;
691 }
692
693 /* First case */
694 if (old_meet)
695 if (old_best = export_filter(c, old_changed, &old_free, 1))
696 goto found;
697
698 /* Second case */
699 if (!new_best)
700 return;
701
702 /* Third case, we use r instead of new_best, because export_filter() could change it */
703 if (r != new_changed)
704 {
705 if (new_free)
706 rte_free(new_free);
707 return;
708 }
709
710 /* Fourth case */
711 for (r=r->next; rte_is_valid(r); r=r->next)
712 {
713 if (old_best = export_filter(c, r, &old_free, 1))
714 goto found;
715
716 if (r == before_old)
717 if (old_best = export_filter(c, old_changed, &old_free, 1))
718 goto found;
719 }
720
721 /* Implicitly, old_best is NULL and new_best is non-NULL */
722
723 found:
724 do_rt_notify(c, net, new_best, old_best, (feed == 2));
725
726 /* Discard temporary rte's */
727 if (new_free)
728 rte_free(new_free);
729 if (old_free)
730 rte_free(old_free);
731 }
732
733
734 static struct nexthop *
735 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
736 {
737 return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
738 }
739
740 rte *
741 rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
742 {
743 // struct proto *p = c->proto;
744 struct nexthop *nhs = NULL;
745 rte *best0, *best, *rt0, *rt, *tmp;
746
747 best0 = net->routes;
748 *rt_free = NULL;
749
750 if (!rte_is_valid(best0))
751 return NULL;
752
753 best = export_filter_(c, best0, rt_free, pool, silent);
754
755 if (!best || !rte_is_reachable(best))
756 return best;
757
758 for (rt0 = best0->next; rt0; rt0 = rt0->next)
759 {
760 if (!rte_mergable(best0, rt0))
761 continue;
762
763 rt = export_filter_(c, rt0, &tmp, pool, 1);
764
765 if (!rt)
766 continue;
767
768 if (rte_is_reachable(rt))
769 nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
770
771 if (tmp)
772 rte_free(tmp);
773 }
774
775 if (nhs)
776 {
777 nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
778
779 if (nhs->next)
780 {
781 best = rte_cow_rta(best, pool);
782 nexthop_link(best->attrs, nhs);
783 }
784 }
785
786 if (best != best0)
787 *rt_free = best;
788
789 return best;
790 }
791
792
793 static void
794 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
795 rte *new_best, rte*old_best, int refeed)
796 {
797 // struct proto *p = c->proto;
798
799 rte *new_best_free = NULL;
800 rte *old_best_free = NULL;
801 rte *new_changed_free = NULL;
802 rte *old_changed_free = NULL;
803
804 /* We assume that all rte arguments are either NULL or rte_is_valid() */
805
806 /* This check should be done by the caller */
807 if (!new_best && !old_best)
808 return;
809
810 /* Check whether the change is relevant to the merged route */
811 if ((new_best == old_best) && !refeed)
812 {
813 new_changed = rte_mergable(new_best, new_changed) ?
814 export_filter(c, new_changed, &new_changed_free, 1) : NULL;
815
816 old_changed = rte_mergable(old_best, old_changed) ?
817 export_filter(c, old_changed, &old_changed_free, 1) : NULL;
818
819 if (!new_changed && !old_changed)
820 return;
821 }
822
823 if (new_best)
824 c->stats.exp_updates_received++;
825 else
826 c->stats.exp_withdraws_received++;
827
828 /* Prepare new merged route */
829 if (new_best)
830 new_best = rt_export_merged(c, net, &new_best_free, rte_update_pool, 0);
831
832 /* Prepare old merged route (without proper merged next hops) */
833 /* There are some issues with running filter on old route - see rt_notify_basic() */
834 if (old_best && !refeed)
835 old_best = export_filter(c, old_best, &old_best_free, 1);
836
837 if (new_best || old_best)
838 do_rt_notify(c, net, new_best, old_best, refeed);
839
840 /* Discard temporary rte's */
841 if (new_best_free)
842 rte_free(new_best_free);
843 if (old_best_free)
844 rte_free(old_best_free);
845 if (new_changed_free)
846 rte_free(new_changed_free);
847 if (old_changed_free)
848 rte_free(old_changed_free);
849 }
850
851
852 /**
853 * rte_announce - announce a routing table change
854 * @tab: table the route has been added to
855 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
856 * @net: network in question
857 * @new: the new route to be announced
858 * @old: the previous route for the same network
859 * @new_best: the new best route for the same network
860 * @old_best: the previous best route for the same network
861 * @before_old: The previous route before @old for the same network.
862 * If @before_old is NULL @old was the first.
863 *
864 * This function gets a routing table update and announces it
865 * to all protocols that acccepts given type of route announcement
866 * and are connected to the same table by their announcement hooks.
867 *
868 * Route announcement of type %RA_OPTIMAL si generated when optimal
869 * route (in routing table @tab) changes. In that case @old stores the
870 * old optimal route.
871 *
872 * Route announcement of type %RA_ANY si generated when any route (in
873 * routing table @tab) changes In that case @old stores the old route
874 * from the same protocol.
875 *
876 * For each appropriate protocol, we first call its preexport()
877 * hook which performs basic checks on the route (each protocol has a
878 * right to veto or force accept of the route before any filter is
879 * asked) and adds default values of attributes specific to the new
880 * protocol (metrics, tags etc.). Then it consults the protocol's
881 * export filter and if it accepts the route, the rt_notify() hook of
882 * the protocol gets called.
883 */
884 static void
885 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
886 rte *new_best, rte *old_best, rte *before_old)
887 {
888 if (!rte_is_valid(new))
889 new = NULL;
890
891 if (!rte_is_valid(old))
892 old = before_old = NULL;
893
894 if (!rte_is_valid(new_best))
895 new_best = NULL;
896
897 if (!rte_is_valid(old_best))
898 old_best = NULL;
899
900 if (!old && !new)
901 return;
902
903 if ((type == RA_OPTIMAL) && tab->hostcache)
904 rt_notify_hostcache(tab, net);
905
906 struct channel *c; node *n;
907 WALK_LIST2(c, n, tab->channels, table_node)
908 {
909 if (c->export_state == ES_DOWN)
910 continue;
911
912 if (c->ra_mode == type)
913 if (type == RA_ACCEPTED)
914 rt_notify_accepted(c, net, new, old, before_old, 0);
915 else if (type == RA_MERGED)
916 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
917 else
918 rt_notify_basic(c, net, new, old, 0);
919 }
920 }
921
922 static inline int
923 rte_validate(rte *e)
924 {
925 int c;
926 net *n = e->net;
927
928 if (!net_validate(n->n.addr))
929 {
930 log(L_WARN "Ignoring bogus prefix %N received via %s",
931 n->n.addr, e->sender->proto->name);
932 return 0;
933 }
934
935 /* FIXME: better handling different nettypes */
936 c = !net_is_flow(n->n.addr) ?
937 net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
938 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
939 {
940 log(L_WARN "Ignoring bogus route %N received via %s",
941 n->n.addr, e->sender->proto->name);
942 return 0;
943 }
944
945 if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
946 {
947 log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
948 n->n.addr, e->attrs->dest, e->sender->proto->name);
949 return 0;
950 }
951
952 if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
953 {
954 log(L_WARN "Ignoring unsorted multipath route %N received via %s",
955 n->n.addr, e->sender->proto->name);
956 return 0;
957 }
958
959 return 1;
960 }
961
962 /**
963 * rte_free - delete a &rte
964 * @e: &rte to be deleted
965 *
966 * rte_free() deletes the given &rte from the routing table it's linked to.
967 */
968 void
969 rte_free(rte *e)
970 {
971 if (rta_is_cached(e->attrs))
972 rta_free(e->attrs);
973 sl_free(rte_slab, e);
974 }
975
976 static inline void
977 rte_free_quick(rte *e)
978 {
979 rta_free(e->attrs);
980 sl_free(rte_slab, e);
981 }
982
983 static int
984 rte_same(rte *x, rte *y)
985 {
986 return
987 x->attrs == y->attrs &&
988 x->flags == y->flags &&
989 x->pflags == y->pflags &&
990 x->pref == y->pref &&
991 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
992 }
993
994 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
995
996 static void
997 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
998 {
999 struct proto *p = c->proto;
1000 struct rtable *table = c->table;
1001 struct proto_stats *stats = &c->stats;
1002 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
1003 rte *before_old = NULL;
1004 rte *old_best = net->routes;
1005 rte *old = NULL;
1006 rte **k;
1007
1008 k = &net->routes; /* Find and remove original route from the same protocol */
1009 while (old = *k)
1010 {
1011 if (old->attrs->src == src)
1012 {
1013 /* If there is the same route in the routing table but from
1014 * a different sender, then there are two paths from the
1015 * source protocol to this routing table through transparent
1016 * pipes, which is not allowed.
1017 *
1018 * We log that and ignore the route. If it is withdraw, we
1019 * ignore it completely (there might be 'spurious withdraws',
1020 * see FIXME in do_rte_announce())
1021 */
1022 if (old->sender->proto != p)
1023 {
1024 if (new)
1025 {
1026 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1027 net->n.addr, table->name);
1028 rte_free_quick(new);
1029 }
1030 return;
1031 }
1032
1033 if (new && rte_same(old, new))
1034 {
1035 /* No changes, ignore the new route */
1036
1037 if (!rte_is_filtered(new))
1038 {
1039 stats->imp_updates_ignored++;
1040 rte_trace_in(D_ROUTES, p, new, "ignored");
1041 }
1042
1043 rte_free_quick(new);
1044 return;
1045 }
1046 *k = old->next;
1047 table->rt_count--;
1048 break;
1049 }
1050 k = &old->next;
1051 before_old = old;
1052 }
1053
1054 if (!old)
1055 before_old = NULL;
1056
1057 if (!old && !new)
1058 {
1059 stats->imp_withdraws_ignored++;
1060 return;
1061 }
1062
1063 int new_ok = rte_is_ok(new);
1064 int old_ok = rte_is_ok(old);
1065
1066 struct channel_limit *l = &c->rx_limit;
1067 if (l->action && !old && new && !c->in_table)
1068 {
1069 u32 all_routes = stats->imp_routes + stats->filt_routes;
1070
1071 if (all_routes >= l->limit)
1072 channel_notify_limit(c, l, PLD_RX, all_routes);
1073
1074 if (l->state == PLS_BLOCKED)
1075 {
1076 /* In receive limit the situation is simple, old is NULL so
1077 we just free new and exit like nothing happened */
1078
1079 stats->imp_updates_ignored++;
1080 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1081 rte_free_quick(new);
1082 return;
1083 }
1084 }
1085
1086 l = &c->in_limit;
1087 if (l->action && !old_ok && new_ok)
1088 {
1089 if (stats->imp_routes >= l->limit)
1090 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1091
1092 if (l->state == PLS_BLOCKED)
1093 {
1094 /* In import limit the situation is more complicated. We
1095 shouldn't just drop the route, we should handle it like
1096 it was filtered. We also have to continue the route
1097 processing if old or new is non-NULL, but we should exit
1098 if both are NULL as this case is probably assumed to be
1099 already handled. */
1100
1101 stats->imp_updates_ignored++;
1102 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1103
1104 if (c->in_keep_filtered)
1105 new->flags |= REF_FILTERED;
1106 else
1107 { rte_free_quick(new); new = NULL; }
1108
1109 /* Note that old && !new could be possible when
1110 c->in_keep_filtered changed in the recent past. */
1111
1112 if (!old && !new)
1113 return;
1114
1115 new_ok = 0;
1116 goto skip_stats1;
1117 }
1118 }
1119
1120 if (new_ok)
1121 stats->imp_updates_accepted++;
1122 else if (old_ok)
1123 stats->imp_withdraws_accepted++;
1124 else
1125 stats->imp_withdraws_ignored++;
1126
1127 skip_stats1:
1128
1129 if (new)
1130 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1131 if (old)
1132 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1133
1134 if (table->config->sorted)
1135 {
1136 /* If routes are sorted, just insert new route to appropriate position */
1137 if (new)
1138 {
1139 if (before_old && !rte_better(new, before_old))
1140 k = &before_old->next;
1141 else
1142 k = &net->routes;
1143
1144 for (; *k; k=&(*k)->next)
1145 if (rte_better(new, *k))
1146 break;
1147
1148 new->next = *k;
1149 *k = new;
1150 table->rt_count++;
1151 }
1152 }
1153 else
1154 {
1155 /* If routes are not sorted, find the best route and move it on
1156 the first position. There are several optimized cases. */
1157
1158 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1159 goto do_recalculate;
1160
1161 if (new && rte_better(new, old_best))
1162 {
1163 /* The first case - the new route is cleary optimal,
1164 we link it at the first position */
1165
1166 new->next = net->routes;
1167 net->routes = new;
1168 table->rt_count++;
1169 }
1170 else if (old == old_best)
1171 {
1172 /* The second case - the old best route disappeared, we add the
1173 new route (if we have any) to the list (we don't care about
1174 position) and then we elect the new optimal route and relink
1175 that route at the first position and announce it. New optimal
1176 route might be NULL if there is no more routes */
1177
1178 do_recalculate:
1179 /* Add the new route to the list */
1180 if (new)
1181 {
1182 new->next = net->routes;
1183 net->routes = new;
1184 table->rt_count++;
1185 }
1186
1187 /* Find a new optimal route (if there is any) */
1188 if (net->routes)
1189 {
1190 rte **bp = &net->routes;
1191 for (k=&(*bp)->next; *k; k=&(*k)->next)
1192 if (rte_better(*k, *bp))
1193 bp = k;
1194
1195 /* And relink it */
1196 rte *best = *bp;
1197 *bp = best->next;
1198 best->next = net->routes;
1199 net->routes = best;
1200 }
1201 }
1202 else if (new)
1203 {
1204 /* The third case - the new route is not better than the old
1205 best route (therefore old_best != NULL) and the old best
1206 route was not removed (therefore old_best == net->routes).
1207 We just link the new route after the old best route. */
1208
1209 ASSERT(net->routes != NULL);
1210 new->next = net->routes->next;
1211 net->routes->next = new;
1212 table->rt_count++;
1213 }
1214 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1215 }
1216
1217 if (new)
1218 new->lastmod = current_time();
1219
1220 /* Log the route change */
1221 if (p->debug & D_ROUTES)
1222 {
1223 if (new_ok)
1224 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1225 else if (old_ok)
1226 {
1227 if (old != old_best)
1228 rte_trace(p, old, '>', "removed");
1229 else if (rte_is_ok(net->routes))
1230 rte_trace(p, old, '>', "removed [replaced]");
1231 else
1232 rte_trace(p, old, '>', "removed [sole]");
1233 }
1234 }
1235
1236 /* Propagate the route change */
1237 rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1238 if (net->routes != old_best)
1239 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1240 if (table->config->sorted)
1241 rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1242 rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1243
1244 if (!net->routes &&
1245 (table->gc_counter++ >= table->config->gc_max_ops) &&
1246 (table->gc_time + table->config->gc_min_time <= current_time()))
1247 rt_schedule_prune(table);
1248
1249 if (old_ok && p->rte_remove)
1250 p->rte_remove(net, old);
1251 if (new_ok && p->rte_insert)
1252 p->rte_insert(net, new);
1253
1254 if (old)
1255 rte_free_quick(old);
1256 }
1257
1258 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1259
1260 static inline void
1261 rte_update_lock(void)
1262 {
1263 rte_update_nest_cnt++;
1264 }
1265
1266 static inline void
1267 rte_update_unlock(void)
1268 {
1269 if (!--rte_update_nest_cnt)
1270 lp_flush(rte_update_pool);
1271 }
1272
1273 static inline void
1274 rte_hide_dummy_routes(net *net, rte **dummy)
1275 {
1276 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1277 {
1278 *dummy = net->routes;
1279 net->routes = (*dummy)->next;
1280 }
1281 }
1282
1283 static inline void
1284 rte_unhide_dummy_routes(net *net, rte **dummy)
1285 {
1286 if (*dummy)
1287 {
1288 (*dummy)->next = net->routes;
1289 net->routes = *dummy;
1290 }
1291 }
1292
1293 /**
1294 * rte_update - enter a new update to a routing table
1295 * @table: table to be updated
1296 * @c: channel doing the update
1297 * @net: network node
1298 * @p: protocol submitting the update
1299 * @src: protocol originating the update
1300 * @new: a &rte representing the new route or %NULL for route removal.
1301 *
1302 * This function is called by the routing protocols whenever they discover
1303 * a new route or wish to update/remove an existing route. The right announcement
1304 * sequence is to build route attributes first (either un-cached with @aflags set
1305 * to zero or a cached one using rta_lookup(); in this case please note that
1306 * you need to increase the use count of the attributes yourself by calling
1307 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1308 * the appropriate data and finally submit the new &rte by calling rte_update().
1309 *
1310 * @src specifies the protocol that originally created the route and the meaning
1311 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1312 * same value as @new->attrs->proto. @p specifies the protocol that called
1313 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1314 * stores @p in @new->sender;
1315 *
1316 * When rte_update() gets any route, it automatically validates it (checks,
1317 * whether the network and next hop address are valid IP addresses and also
1318 * whether a normal routing protocol doesn't try to smuggle a host or link
1319 * scope route to the table), converts all protocol dependent attributes stored
1320 * in the &rte to temporary extended attributes, consults import filters of the
1321 * protocol to see if the route should be accepted and/or its attributes modified,
1322 * stores the temporary attributes back to the &rte.
1323 *
1324 * Now, having a "public" version of the route, we
1325 * automatically find any old route defined by the protocol @src
1326 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1327 * recalculate the optimal route for this destination and finally broadcast
1328 * the change (if any) to all routing protocols by calling rte_announce().
1329 *
1330 * All memory used for attribute lists and other temporary allocations is taken
1331 * from a special linear pool @rte_update_pool and freed when rte_update()
1332 * finishes.
1333 */
1334
1335 void
1336 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1337 {
1338 struct proto *p = c->proto;
1339 struct proto_stats *stats = &c->stats;
1340 struct filter *filter = c->in_filter;
1341 rte *dummy = NULL;
1342 net *nn;
1343
1344 ASSERT(c->channel_state == CS_UP);
1345
1346 rte_update_lock();
1347 if (new)
1348 {
1349 /* Create a temporary table node */
1350 nn = alloca(sizeof(net) + n->length);
1351 memset(nn, 0, sizeof(net) + n->length);
1352 net_copy(nn->n.addr, n);
1353
1354 new->net = nn;
1355 new->sender = c;
1356
1357 if (!new->pref)
1358 new->pref = c->preference;
1359
1360 stats->imp_updates_received++;
1361 if (!rte_validate(new))
1362 {
1363 rte_trace_in(D_FILTERS, p, new, "invalid");
1364 stats->imp_updates_invalid++;
1365 goto drop;
1366 }
1367
1368 if (filter == FILTER_REJECT)
1369 {
1370 stats->imp_updates_filtered++;
1371 rte_trace_in(D_FILTERS, p, new, "filtered out");
1372
1373 if (! c->in_keep_filtered)
1374 goto drop;
1375
1376 /* new is a private copy, i could modify it */
1377 new->flags |= REF_FILTERED;
1378 }
1379 else
1380 {
1381 rte_make_tmp_attrs(&new, rte_update_pool);
1382 if (filter && (filter != FILTER_REJECT))
1383 {
1384 ea_list *oldea = new->attrs->eattrs;
1385 int fr = f_run(filter, &new, rte_update_pool, 0);
1386 if (fr > F_ACCEPT)
1387 {
1388 stats->imp_updates_filtered++;
1389 rte_trace_in(D_FILTERS, p, new, "filtered out");
1390
1391 if (! c->in_keep_filtered)
1392 goto drop;
1393
1394 new->flags |= REF_FILTERED;
1395 }
1396 if (new->attrs->eattrs != oldea && src->proto->store_tmp_attrs)
1397 src->proto->store_tmp_attrs(new);
1398 }
1399 }
1400 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1401 new->attrs = rta_lookup(new->attrs);
1402 new->flags |= REF_COW;
1403
1404 /* Use the actual struct network, not the dummy one */
1405 nn = net_get(c->table, n);
1406 new->net = nn;
1407 }
1408 else
1409 {
1410 stats->imp_withdraws_received++;
1411
1412 if (!(nn = net_find(c->table, n)) || !src)
1413 {
1414 stats->imp_withdraws_ignored++;
1415 rte_update_unlock();
1416 return;
1417 }
1418 }
1419
1420 recalc:
1421 /* And recalculate the best route */
1422 rte_hide_dummy_routes(nn, &dummy);
1423 rte_recalculate(c, nn, new, src);
1424 rte_unhide_dummy_routes(nn, &dummy);
1425
1426 rte_update_unlock();
1427 return;
1428
1429 drop:
1430 rte_free(new);
1431 new = NULL;
1432 if (nn = net_find(c->table, n))
1433 goto recalc;
1434
1435 rte_update_unlock();
1436 }
1437
1438 /* Independent call to rte_announce(), used from next hop
1439 recalculation, outside of rte_update(). new must be non-NULL */
1440 static inline void
1441 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1442 rte *new_best, rte *old_best)
1443 {
1444 rte_update_lock();
1445 rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1446 rte_update_unlock();
1447 }
1448
1449 static inline void
1450 rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */
1451 {
1452 rte_update_lock();
1453 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1454 rte_update_unlock();
1455 }
1456
1457 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1458 static inline void
1459 rte_modify(rte *old)
1460 {
1461 rte_update_lock();
1462
1463 rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1464 if (new != old)
1465 {
1466 if (new)
1467 {
1468 if (!rta_is_cached(new->attrs))
1469 new->attrs = rta_lookup(new->attrs);
1470 new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1471 }
1472
1473 rte_recalculate(old->sender, old->net, new, old->attrs->src);
1474 }
1475
1476 rte_update_unlock();
1477 }
1478
1479 /* Check rtable for best route to given net whether it would be exported do p */
1480 int
1481 rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1482 {
1483 net *n = net_find(t, a);
1484 rte *rt = n ? n->routes : NULL;
1485
1486 if (!rte_is_valid(rt))
1487 return 0;
1488
1489 rte_update_lock();
1490
1491 /* Rest is stripped down export_filter() */
1492 int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0;
1493 if (v == RIC_PROCESS)
1494 {
1495 rte_make_tmp_attrs(&rt, rte_update_pool);
1496 v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
1497 }
1498
1499 /* Discard temporary rte */
1500 if (rt != n->routes)
1501 rte_free(rt);
1502
1503 rte_update_unlock();
1504
1505 return v > 0;
1506 }
1507
1508
1509 /**
1510 * rt_refresh_begin - start a refresh cycle
1511 * @t: related routing table
1512 * @c related channel
1513 *
1514 * This function starts a refresh cycle for given routing table and announce
1515 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1516 * routes to the routing table (by rte_update()). After that, all protocol
1517 * routes (more precisely routes with @c as @sender) not sent during the
1518 * refresh cycle but still in the table from the past are pruned. This is
1519 * implemented by marking all related routes as stale by REF_STALE flag in
1520 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1521 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1522 */
1523 void
1524 rt_refresh_begin(rtable *t, struct channel *c)
1525 {
1526 FIB_WALK(&t->fib, net, n)
1527 {
1528 rte *e;
1529 for (e = n->routes; e; e = e->next)
1530 if (e->sender == c)
1531 e->flags |= REF_STALE;
1532 }
1533 FIB_WALK_END;
1534 }
1535
1536 /**
1537 * rt_refresh_end - end a refresh cycle
1538 * @t: related routing table
1539 * @c: related channel
1540 *
1541 * This function ends a refresh cycle for given routing table and announce
1542 * hook. See rt_refresh_begin() for description of refresh cycles.
1543 */
1544 void
1545 rt_refresh_end(rtable *t, struct channel *c)
1546 {
1547 int prune = 0;
1548
1549 FIB_WALK(&t->fib, net, n)
1550 {
1551 rte *e;
1552 for (e = n->routes; e; e = e->next)
1553 if ((e->sender == c) && (e->flags & REF_STALE))
1554 {
1555 e->flags |= REF_DISCARD;
1556 prune = 1;
1557 }
1558 }
1559 FIB_WALK_END;
1560
1561 if (prune)
1562 rt_schedule_prune(t);
1563 }
1564
1565 void
1566 rt_modify_stale(rtable *t, struct channel *c)
1567 {
1568 int prune = 0;
1569
1570 FIB_WALK(&t->fib, net, n)
1571 {
1572 rte *e;
1573 for (e = n->routes; e; e = e->next)
1574 if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1575 {
1576 e->flags |= REF_MODIFY;
1577 prune = 1;
1578 }
1579 }
1580 FIB_WALK_END;
1581
1582 if (prune)
1583 rt_schedule_prune(t);
1584 }
1585
1586 /**
1587 * rte_dump - dump a route
1588 * @e: &rte to be dumped
1589 *
1590 * This functions dumps contents of a &rte to debug output.
1591 */
1592 void
1593 rte_dump(rte *e)
1594 {
1595 net *n = e->net;
1596 debug("%-1N ", n->n.addr);
1597 debug("KF=%02x PF=%02x pref=%d ", n->n.flags, e->pflags, e->pref);
1598 rta_dump(e->attrs);
1599 if (e->attrs->src->proto->proto->dump_attrs)
1600 e->attrs->src->proto->proto->dump_attrs(e);
1601 debug("\n");
1602 }
1603
1604 /**
1605 * rt_dump - dump a routing table
1606 * @t: routing table to be dumped
1607 *
1608 * This function dumps contents of a given routing table to debug output.
1609 */
1610 void
1611 rt_dump(rtable *t)
1612 {
1613 debug("Dump of routing table <%s>\n", t->name);
1614 #ifdef DEBUGGING
1615 fib_check(&t->fib);
1616 #endif
1617 FIB_WALK(&t->fib, net, n)
1618 {
1619 rte *e;
1620 for(e=n->routes; e; e=e->next)
1621 rte_dump(e);
1622 }
1623 FIB_WALK_END;
1624 debug("\n");
1625 }
1626
1627 /**
1628 * rt_dump_all - dump all routing tables
1629 *
1630 * This function dumps contents of all routing tables to debug output.
1631 */
1632 void
1633 rt_dump_all(void)
1634 {
1635 rtable *t;
1636
1637 WALK_LIST(t, routing_tables)
1638 rt_dump(t);
1639 }
1640
1641 static inline void
1642 rt_schedule_hcu(rtable *tab)
1643 {
1644 if (tab->hcu_scheduled)
1645 return;
1646
1647 tab->hcu_scheduled = 1;
1648 ev_schedule(tab->rt_event);
1649 }
1650
1651 static inline void
1652 rt_schedule_nhu(rtable *tab)
1653 {
1654 if (tab->nhu_state == NHU_CLEAN)
1655 ev_schedule(tab->rt_event);
1656
1657 /* state change:
1658 * NHU_CLEAN -> NHU_SCHEDULED
1659 * NHU_RUNNING -> NHU_DIRTY
1660 */
1661 tab->nhu_state |= NHU_SCHEDULED;
1662 }
1663
1664 void
1665 rt_schedule_prune(rtable *tab)
1666 {
1667 if (tab->prune_state == 0)
1668 ev_schedule(tab->rt_event);
1669
1670 /* state change 0->1, 2->3 */
1671 tab->prune_state |= 1;
1672 }
1673
1674
1675 static void
1676 rt_event(void *ptr)
1677 {
1678 rtable *tab = ptr;
1679
1680 rt_lock_table(tab);
1681
1682 if (tab->hcu_scheduled)
1683 rt_update_hostcache(tab);
1684
1685 if (tab->nhu_state)
1686 rt_next_hop_update(tab);
1687
1688 if (tab->prune_state)
1689 rt_prune_table(tab);
1690
1691 rt_unlock_table(tab);
1692 }
1693
1694 void
1695 rt_setup(pool *p, rtable *t, struct rtable_config *cf)
1696 {
1697 bzero(t, sizeof(*t));
1698 t->name = cf->name;
1699 t->config = cf;
1700 t->addr_type = cf->addr_type;
1701 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1702 init_list(&t->channels);
1703
1704 t->rt_event = ev_new_init(p, rt_event, t);
1705 t->gc_time = current_time();
1706 }
1707
1708 /**
1709 * rt_init - initialize routing tables
1710 *
1711 * This function is called during BIRD startup. It initializes the
1712 * routing table module.
1713 */
1714 void
1715 rt_init(void)
1716 {
1717 rta_init();
1718 rt_table_pool = rp_new(&root_pool, "Routing tables");
1719 rte_update_pool = lp_new_default(rt_table_pool);
1720 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1721 init_list(&routing_tables);
1722 }
1723
1724
1725 /**
1726 * rt_prune_table - prune a routing table
1727 *
1728 * The prune loop scans routing tables and removes routes belonging to flushing
1729 * protocols, discarded routes and also stale network entries. It is called from
1730 * rt_event(). The event is rescheduled if the current iteration do not finish
1731 * the table. The pruning is directed by the prune state (@prune_state),
1732 * specifying whether the prune cycle is scheduled or running, and there
1733 * is also a persistent pruning iterator (@prune_fit).
1734 *
1735 * The prune loop is used also for channel flushing. For this purpose, the
1736 * channels to flush are marked before the iteration and notified after the
1737 * iteration.
1738 */
1739 static void
1740 rt_prune_table(rtable *tab)
1741 {
1742 struct fib_iterator *fit = &tab->prune_fit;
1743 int limit = 512;
1744
1745 struct channel *c;
1746 node *n, *x;
1747
1748 DBG("Pruning route table %s\n", tab->name);
1749 #ifdef DEBUGGING
1750 fib_check(&tab->fib);
1751 #endif
1752
1753 if (tab->prune_state == 0)
1754 return;
1755
1756 if (tab->prune_state == 1)
1757 {
1758 /* Mark channels to flush */
1759 WALK_LIST2(c, n, tab->channels, table_node)
1760 if (c->channel_state == CS_FLUSHING)
1761 c->flush_active = 1;
1762
1763 FIB_ITERATE_INIT(fit, &tab->fib);
1764 tab->prune_state = 2;
1765 }
1766
1767 again:
1768 FIB_ITERATE_START(&tab->fib, fit, net, n)
1769 {
1770 rte *e;
1771
1772 rescan:
1773 for (e=n->routes; e; e=e->next)
1774 {
1775 if (e->sender->flush_active || (e->flags & REF_DISCARD))
1776 {
1777 if (limit <= 0)
1778 {
1779 FIB_ITERATE_PUT(fit);
1780 ev_schedule(tab->rt_event);
1781 return;
1782 }
1783
1784 rte_discard(e);
1785 limit--;
1786
1787 goto rescan;
1788 }
1789
1790 if (e->flags & REF_MODIFY)
1791 {
1792 if (limit <= 0)
1793 {
1794 FIB_ITERATE_PUT(fit);
1795 ev_schedule(tab->rt_event);
1796 return;
1797 }
1798
1799 rte_modify(e);
1800 limit--;
1801
1802 goto rescan;
1803 }
1804 }
1805
1806 if (!n->routes) /* Orphaned FIB entry */
1807 {
1808 FIB_ITERATE_PUT(fit);
1809 fib_delete(&tab->fib, n);
1810 goto again;
1811 }
1812 }
1813 FIB_ITERATE_END;
1814
1815 #ifdef DEBUGGING
1816 fib_check(&tab->fib);
1817 #endif
1818
1819 tab->gc_counter = 0;
1820 tab->gc_time = current_time();
1821
1822 /* state change 2->0, 3->1 */
1823 tab->prune_state &= 1;
1824
1825 if (tab->prune_state > 0)
1826 ev_schedule(tab->rt_event);
1827
1828 /* FIXME: This should be handled in a better way */
1829 rt_prune_sources();
1830
1831 /* Close flushed channels */
1832 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1833 if (c->flush_active)
1834 {
1835 c->flush_active = 0;
1836 channel_set_state(c, CS_DOWN);
1837 }
1838
1839 return;
1840 }
1841
1842 void
1843 rt_preconfig(struct config *c)
1844 {
1845 init_list(&c->tables);
1846
1847 rt_new_table(cf_get_symbol("master4"), NET_IP4);
1848 rt_new_table(cf_get_symbol("master6"), NET_IP6);
1849 }
1850
1851
1852 /*
1853 * Some functions for handing internal next hop updates
1854 * triggered by rt_schedule_nhu().
1855 */
1856
1857 static inline int
1858 rta_next_hop_outdated(rta *a)
1859 {
1860 struct hostentry *he = a->hostentry;
1861
1862 if (!he)
1863 return 0;
1864
1865 if (!he->src)
1866 return a->dest != RTD_UNREACHABLE;
1867
1868 return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1869 (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
1870 }
1871
1872 void
1873 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
1874 {
1875 a->hostentry = he;
1876 a->dest = he->dest;
1877 a->igp_metric = he->igp_metric;
1878
1879 if (a->dest != RTD_UNICAST)
1880 {
1881 /* No nexthop */
1882 no_nexthop:
1883 a->nh = (struct nexthop) {};
1884 if (mls)
1885 { /* Store the label stack for later changes */
1886 a->nh.labels_orig = a->nh.labels = mls->len;
1887 memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1888 }
1889 return;
1890 }
1891
1892 if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1893 { /* Just link the nexthop chain, no label append happens. */
1894 memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1895 return;
1896 }
1897
1898 struct nexthop *nhp = NULL, *nhr = NULL;
1899 int skip_nexthop = 0;
1900
1901 for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
1902 {
1903 if (skip_nexthop)
1904 skip_nexthop--;
1905 else
1906 {
1907 nhr = nhp;
1908 nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
1909 }
1910
1911 nhp->iface = nh->iface;
1912 nhp->weight = nh->weight;
1913 if (mls)
1914 {
1915 nhp->labels = nh->labels + mls->len;
1916 nhp->labels_orig = mls->len;
1917 if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1918 {
1919 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
1920 memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
1921 }
1922 else
1923 {
1924 log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1925 nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
1926 skip_nexthop++;
1927 continue;
1928 }
1929 }
1930 if (ipa_nonzero(nh->gw))
1931 {
1932 nhp->gw = nh->gw; /* Router nexthop */
1933 nhp->flags |= (nh->flags & RNF_ONLINK);
1934 }
1935 else if (ipa_nonzero(he->link))
1936 nhp->gw = he->link; /* Device nexthop with link-local address known */
1937 else
1938 nhp->gw = he->addr; /* Device nexthop with link-local address unknown */
1939 }
1940
1941 if (skip_nexthop)
1942 if (nhr)
1943 nhr->next = NULL;
1944 else
1945 {
1946 a->dest = RTD_UNREACHABLE;
1947 log(L_WARN "No valid nexthop remaining, setting route unreachable");
1948 goto no_nexthop;
1949 }
1950 }
1951
1952 static inline rte *
1953 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1954 {
1955 rta *a = alloca(RTA_MAX_SIZE);
1956 memcpy(a, old->attrs, rta_size(old->attrs));
1957
1958 mpls_label_stack mls = { .len = a->nh.labels_orig };
1959 memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
1960
1961 rta_apply_hostentry(a, old->attrs->hostentry, &mls);
1962 a->aflags = 0;
1963
1964 rte *e = sl_alloc(rte_slab);
1965 memcpy(e, old, sizeof(rte));
1966 e->attrs = rta_lookup(a);
1967
1968 return e;
1969 }
1970
1971 static inline int
1972 rt_next_hop_update_net(rtable *tab, net *n)
1973 {
1974 rte **k, *e, *new, *old_best, **new_best;
1975 int count = 0;
1976 int free_old_best = 0;
1977
1978 old_best = n->routes;
1979 if (!old_best)
1980 return 0;
1981
1982 for (k = &n->routes; e = *k; k = &e->next)
1983 if (rta_next_hop_outdated(e->attrs))
1984 {
1985 new = rt_next_hop_update_rte(tab, e);
1986 *k = new;
1987
1988 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1989 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1990
1991 /* Call a pre-comparison hook */
1992 /* Not really an efficient way to compute this */
1993 if (e->attrs->src->proto->rte_recalculate)
1994 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1995
1996 if (e != old_best)
1997 rte_free_quick(e);
1998 else /* Freeing of the old best rte is postponed */
1999 free_old_best = 1;
2000
2001 e = new;
2002 count++;
2003 }
2004
2005 if (!count)
2006 return 0;
2007
2008 /* Find the new best route */
2009 new_best = NULL;
2010 for (k = &n->routes; e = *k; k = &e->next)
2011 {
2012 if (!new_best || rte_better(e, *new_best))
2013 new_best = k;
2014 }
2015
2016 /* Relink the new best route to the first position */
2017 new = *new_best;
2018 if (new != n->routes)
2019 {
2020 *new_best = new->next;
2021 new->next = n->routes;
2022 n->routes = new;
2023 }
2024
2025 /* Announce the new best route */
2026 if (new != old_best)
2027 {
2028 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
2029 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
2030 }
2031
2032 /* FIXME: Better announcement of merged routes */
2033 rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
2034
2035 if (free_old_best)
2036 rte_free_quick(old_best);
2037
2038 return count;
2039 }
2040
2041 static void
2042 rt_next_hop_update(rtable *tab)
2043 {
2044 struct fib_iterator *fit = &tab->nhu_fit;
2045 int max_feed = 32;
2046
2047 if (tab->nhu_state == NHU_CLEAN)
2048 return;
2049
2050 if (tab->nhu_state == NHU_SCHEDULED)
2051 {
2052 FIB_ITERATE_INIT(fit, &tab->fib);
2053 tab->nhu_state = NHU_RUNNING;
2054 }
2055
2056 FIB_ITERATE_START(&tab->fib, fit, net, n)
2057 {
2058 if (max_feed <= 0)
2059 {
2060 FIB_ITERATE_PUT(fit);
2061 ev_schedule(tab->rt_event);
2062 return;
2063 }
2064 max_feed -= rt_next_hop_update_net(tab, n);
2065 }
2066 FIB_ITERATE_END;
2067
2068 /* State change:
2069 * NHU_DIRTY -> NHU_SCHEDULED
2070 * NHU_RUNNING -> NHU_CLEAN
2071 */
2072 tab->nhu_state &= 1;
2073
2074 if (tab->nhu_state != NHU_CLEAN)
2075 ev_schedule(tab->rt_event);
2076 }
2077
2078
2079 struct rtable_config *
2080 rt_new_table(struct symbol *s, uint addr_type)
2081 {
2082 /* Hack that allows to 'redefine' the master table */
2083 if ((s->class == SYM_TABLE) &&
2084 (s->def == new_config->def_tables[addr_type]) &&
2085 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
2086 return s->def;
2087
2088 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2089
2090 cf_define_symbol(s, SYM_TABLE, c);
2091 c->name = s->name;
2092 c->addr_type = addr_type;
2093 c->gc_max_ops = 1000;
2094 c->gc_min_time = 5;
2095
2096 add_tail(&new_config->tables, &c->n);
2097
2098 /* First table of each type is kept as default */
2099 if (! new_config->def_tables[addr_type])
2100 new_config->def_tables[addr_type] = c;
2101
2102 return c;
2103 }
2104
2105 /**
2106 * rt_lock_table - lock a routing table
2107 * @r: routing table to be locked
2108 *
2109 * Lock a routing table, because it's in use by a protocol,
2110 * preventing it from being freed when it gets undefined in a new
2111 * configuration.
2112 */
2113 void
2114 rt_lock_table(rtable *r)
2115 {
2116 r->use_count++;
2117 }
2118
2119 /**
2120 * rt_unlock_table - unlock a routing table
2121 * @r: routing table to be unlocked
2122 *
2123 * Unlock a routing table formerly locked by rt_lock_table(),
2124 * that is decrease its use count and delete it if it's scheduled
2125 * for deletion by configuration changes.
2126 */
2127 void
2128 rt_unlock_table(rtable *r)
2129 {
2130 if (!--r->use_count && r->deleted)
2131 {
2132 struct config *conf = r->deleted;
2133 DBG("Deleting routing table %s\n", r->name);
2134 r->config->table = NULL;
2135 if (r->hostcache)
2136 rt_free_hostcache(r);
2137 rem_node(&r->n);
2138 fib_free(&r->fib);
2139 rfree(r->rt_event);
2140 mb_free(r);
2141 config_del_obstacle(conf);
2142 }
2143 }
2144
2145 static struct rtable_config *
2146 rt_find_table_config(struct config *cf, char *name)
2147 {
2148 struct symbol *sym = cf_find_symbol(cf, name);
2149 return (sym && (sym->class == SYM_TABLE)) ? sym->def : NULL;
2150 }
2151
2152 /**
2153 * rt_commit - commit new routing table configuration
2154 * @new: new configuration
2155 * @old: original configuration or %NULL if it's boot time config
2156 *
2157 * Scan differences between @old and @new configuration and modify
2158 * the routing tables according to these changes. If @new defines a
2159 * previously unknown table, create it, if it omits a table existing
2160 * in @old, schedule it for deletion (it gets deleted when all protocols
2161 * disconnect from it by calling rt_unlock_table()), if it exists
2162 * in both configurations, leave it unchanged.
2163 */
2164 void
2165 rt_commit(struct config *new, struct config *old)
2166 {
2167 struct rtable_config *o, *r;
2168
2169 DBG("rt_commit:\n");
2170 if (old)
2171 {
2172 WALK_LIST(o, old->tables)
2173 {
2174 rtable *ot = o->table;
2175 if (!ot->deleted)
2176 {
2177 r = rt_find_table_config(new, o->name);
2178 if (r && (r->addr_type == o->addr_type) && !new->shutdown)
2179 {
2180 DBG("\t%s: same\n", o->name);
2181 r->table = ot;
2182 ot->name = r->name;
2183 ot->config = r;
2184 if (o->sorted != r->sorted)
2185 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2186 }
2187 else
2188 {
2189 DBG("\t%s: deleted\n", o->name);
2190 ot->deleted = old;
2191 config_add_obstacle(old);
2192 rt_lock_table(ot);
2193 rt_unlock_table(ot);
2194 }
2195 }
2196 }
2197 }
2198
2199 WALK_LIST(r, new->tables)
2200 if (!r->table)
2201 {
2202 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2203 DBG("\t%s: created\n", r->name);
2204 rt_setup(rt_table_pool, t, r);
2205 add_tail(&routing_tables, &t->n);
2206 r->table = t;
2207 }
2208 DBG("\tdone\n");
2209 }
2210
2211 static inline void
2212 do_feed_channel(struct channel *c, net *n, rte *e)
2213 {
2214 rte_update_lock();
2215 if (c->ra_mode == RA_ACCEPTED)
2216 rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2217 else if (c->ra_mode == RA_MERGED)
2218 rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2219 else /* RA_BASIC */
2220 rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2221 rte_update_unlock();
2222 }
2223
2224 /**
2225 * rt_feed_channel - advertise all routes to a channel
2226 * @c: channel to be fed
2227 *
2228 * This function performs one pass of advertisement of routes to a channel that
2229 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2230 * has something to do. (We avoid transferring all the routes in single pass in
2231 * order not to monopolize CPU time.)
2232 */
2233 int
2234 rt_feed_channel(struct channel *c)
2235 {
2236 struct fib_iterator *fit = &c->feed_fit;
2237 int max_feed = 256;
2238
2239 ASSERT(c->export_state == ES_FEEDING);
2240
2241 if (!c->feed_active)
2242 {
2243 FIB_ITERATE_INIT(fit, &c->table->fib);
2244 c->feed_active = 1;
2245 }
2246
2247 FIB_ITERATE_START(&c->table->fib, fit, net, n)
2248 {
2249 rte *e = n->routes;
2250 if (max_feed <= 0)
2251 {
2252 FIB_ITERATE_PUT(fit);
2253 return 0;
2254 }
2255
2256 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2257
2258 if ((c->ra_mode == RA_OPTIMAL) ||
2259 (c->ra_mode == RA_ACCEPTED) ||
2260 (c->ra_mode == RA_MERGED))
2261 if (rte_is_valid(e))
2262 {
2263 /* In the meantime, the protocol may fell down */
2264 if (c->export_state != ES_FEEDING)
2265 goto done;
2266
2267 do_feed_channel(c, n, e);
2268 max_feed--;
2269 }
2270
2271 if (c->ra_mode == RA_ANY)
2272 for(e = n->routes; e; e = e->next)
2273 {
2274 /* In the meantime, the protocol may fell down */
2275 if (c->export_state != ES_FEEDING)
2276 goto done;
2277
2278 if (!rte_is_valid(e))
2279 continue;
2280
2281 do_feed_channel(c, n, e);
2282 max_feed--;
2283 }
2284 }
2285 FIB_ITERATE_END;
2286
2287 done:
2288 c->feed_active = 0;
2289 return 1;
2290 }
2291
2292 /**
2293 * rt_feed_baby_abort - abort protocol feeding
2294 * @c: channel
2295 *
2296 * This function is called by the protocol code when the protocol stops or
2297 * ceases to exist during the feeding.
2298 */
2299 void
2300 rt_feed_channel_abort(struct channel *c)
2301 {
2302 if (c->feed_active)
2303 {
2304 /* Unlink the iterator */
2305 fit_get(&c->table->fib, &c->feed_fit);
2306 c->feed_active = 0;
2307 }
2308 }
2309
2310
2311 int
2312 rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
2313 {
2314 struct rtable *tab = c->in_table;
2315 rte *old, **pos;
2316 net *net;
2317
2318 if (new)
2319 {
2320 net = net_get(tab, n);
2321
2322 if (!new->pref)
2323 new->pref = c->preference;
2324
2325 if (!rta_is_cached(new->attrs))
2326 new->attrs = rta_lookup(new->attrs);
2327 }
2328 else
2329 {
2330 net = net_find(tab, n);
2331
2332 if (!net)
2333 goto drop_withdraw;
2334 }
2335
2336 /* Find the old rte */
2337 for (pos = &net->routes; old = *pos; pos = &old->next)
2338 if (old->attrs->src == src)
2339 {
2340 if (new && rte_same(old, new))
2341 goto drop_update;
2342
2343 /* Remove the old rte */
2344 *pos = old->next;
2345 rte_free_quick(old);
2346 tab->rt_count--;
2347
2348 break;
2349 }
2350
2351 if (!new)
2352 {
2353 if (!old)
2354 goto drop_withdraw;
2355
2356 return 1;
2357 }
2358
2359 struct channel_limit *l = &c->rx_limit;
2360 if (l->action && !old)
2361 {
2362 if (tab->rt_count >= l->limit)
2363 channel_notify_limit(c, l, PLD_RX, tab->rt_count);
2364
2365 if (l->state == PLS_BLOCKED)
2366 {
2367 rte_trace_in(D_FILTERS, c->proto, new, "ignored [limit]");
2368 goto drop_update;
2369 }
2370 }
2371
2372 /* Insert the new rte */
2373 rte *e = rte_do_cow(new);
2374 e->flags |= REF_COW;
2375 e->net = net;
2376 e->sender = c;
2377 e->lastmod = current_time();
2378 e->next = *pos;
2379 *pos = e;
2380 tab->rt_count++;
2381 return 1;
2382
2383 drop_update:
2384 c->stats.imp_updates_received++;
2385 c->stats.imp_updates_ignored++;
2386 rte_free(new);
2387 return 0;
2388
2389 drop_withdraw:
2390 c->stats.imp_withdraws_received++;
2391 c->stats.imp_withdraws_ignored++;
2392 return 0;
2393 }
2394
2395 int
2396 rt_reload_channel(struct channel *c)
2397 {
2398 struct rtable *tab = c->in_table;
2399 struct fib_iterator *fit = &c->reload_fit;
2400 int max_feed = 64;
2401
2402 ASSERT(c->channel_state == CS_UP);
2403
2404 if (!c->reload_active)
2405 {
2406 FIB_ITERATE_INIT(fit, &tab->fib);
2407 c->reload_active = 1;
2408 }
2409
2410 FIB_ITERATE_START(&tab->fib, fit, net, n)
2411 {
2412 if (max_feed <= 0)
2413 {
2414 FIB_ITERATE_PUT(fit);
2415 return 0;
2416 }
2417
2418 for (rte *e = n->routes; e; e = e->next)
2419 {
2420 rte_update2(c, n->n.addr, rte_do_cow(e), e->attrs->src);
2421 max_feed--;
2422 }
2423 }
2424 FIB_ITERATE_END;
2425
2426 c->reload_active = 0;
2427 return 1;
2428 }
2429
2430 void
2431 rt_reload_channel_abort(struct channel *c)
2432 {
2433 if (c->reload_active)
2434 {
2435 /* Unlink the iterator */
2436 fit_get(&c->in_table->fib, &c->reload_fit);
2437 c->reload_active = 0;
2438 }
2439 }
2440
2441 void
2442 rt_prune_sync(rtable *t, int all)
2443 {
2444 FIB_WALK(&t->fib, net, n)
2445 {
2446 rte *e, **ee = &n->routes;
2447 while (e = *ee)
2448 {
2449 if (all || (e->flags & (REF_STALE | REF_DISCARD)))
2450 {
2451 *ee = e->next;
2452 rte_free_quick(e);
2453 t->rt_count--;
2454 }
2455 else
2456 ee = &e->next;
2457 }
2458 }
2459 FIB_WALK_END;
2460 }
2461
2462
2463 static inline u32
2464 hc_hash(ip_addr a, rtable *dep)
2465 {
2466 return ipa_hash(a) ^ ptr_hash(dep);
2467 }
2468
2469 static inline void
2470 hc_insert(struct hostcache *hc, struct hostentry *he)
2471 {
2472 uint k = he->hash_key >> hc->hash_shift;
2473 he->next = hc->hash_table[k];
2474 hc->hash_table[k] = he;
2475 }
2476
2477 static inline void
2478 hc_remove(struct hostcache *hc, struct hostentry *he)
2479 {
2480 struct hostentry **hep;
2481 uint k = he->hash_key >> hc->hash_shift;
2482
2483 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2484 *hep = he->next;
2485 }
2486
2487 #define HC_DEF_ORDER 10
2488 #define HC_HI_MARK *4
2489 #define HC_HI_STEP 2
2490 #define HC_HI_ORDER 16 /* Must be at most 16 */
2491 #define HC_LO_MARK /5
2492 #define HC_LO_STEP 2
2493 #define HC_LO_ORDER 10
2494
2495 static void
2496 hc_alloc_table(struct hostcache *hc, unsigned order)
2497 {
2498 uint hsize = 1 << order;
2499 hc->hash_order = order;
2500 hc->hash_shift = 32 - order;
2501 hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2502 hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK);
2503
2504 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2505 }
2506
2507 static void
2508 hc_resize(struct hostcache *hc, unsigned new_order)
2509 {
2510 struct hostentry **old_table = hc->hash_table;
2511 struct hostentry *he, *hen;
2512 uint old_size = 1 << hc->hash_order;
2513 uint i;
2514
2515 hc_alloc_table(hc, new_order);
2516 for (i = 0; i < old_size; i++)
2517 for (he = old_table[i]; he != NULL; he=hen)
2518 {
2519 hen = he->next;
2520 hc_insert(hc, he);
2521 }
2522 mb_free(old_table);
2523 }
2524
2525 static struct hostentry *
2526 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2527 {
2528 struct hostentry *he = sl_alloc(hc->slab);
2529
2530 *he = (struct hostentry) {
2531 .addr = a,
2532 .link = ll,
2533 .tab = dep,
2534 .hash_key = k,
2535 };
2536
2537 add_tail(&hc->hostentries, &he->ln);
2538 hc_insert(hc, he);
2539
2540 hc->hash_items++;
2541 if (hc->hash_items > hc->hash_max)
2542 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2543
2544 return he;
2545 }
2546
2547 static void
2548 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2549 {
2550 rta_free(he->src);
2551
2552 rem_node(&he->ln);
2553 hc_remove(hc, he);
2554 sl_free(hc->slab, he);
2555
2556 hc->hash_items--;
2557 if (hc->hash_items < hc->hash_min)
2558 hc_resize(hc, hc->hash_order - HC_LO_STEP);
2559 }
2560
2561 static void
2562 rt_init_hostcache(rtable *tab)
2563 {
2564 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2565 init_list(&hc->hostentries);
2566
2567 hc->hash_items = 0;
2568 hc_alloc_table(hc, HC_DEF_ORDER);
2569 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2570
2571 hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
2572 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2573
2574 tab->hostcache = hc;
2575 }
2576
2577 static void
2578 rt_free_hostcache(rtable *tab)
2579 {
2580 struct hostcache *hc = tab->hostcache;
2581
2582 node *n;
2583 WALK_LIST(n, hc->hostentries)
2584 {
2585 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2586 rta_free(he->src);
2587
2588 if (he->uc)
2589 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2590 }
2591
2592 rfree(hc->slab);
2593 rfree(hc->lp);
2594 mb_free(hc->hash_table);
2595 mb_free(hc);
2596 }
2597
2598 static void
2599 rt_notify_hostcache(rtable *tab, net *net)
2600 {
2601 if (tab->hcu_scheduled)
2602 return;
2603
2604 if (trie_match_net(tab->hostcache->trie, net->n.addr))
2605 rt_schedule_hcu(tab);
2606 }
2607
2608 static int
2609 if_local_addr(ip_addr a, struct iface *i)
2610 {
2611 struct ifa *b;
2612
2613 WALK_LIST(b, i->addrs)
2614 if (ipa_equal(a, b->ip))
2615 return 1;
2616
2617 return 0;
2618 }
2619
2620 static u32
2621 rt_get_igp_metric(rte *rt)
2622 {
2623 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2624
2625 if (ea)
2626 return ea->u.data;
2627
2628 rta *a = rt->attrs;
2629
2630 #ifdef CONFIG_OSPF
2631 if ((a->source == RTS_OSPF) ||
2632 (a->source == RTS_OSPF_IA) ||
2633 (a->source == RTS_OSPF_EXT1))
2634 return rt->u.ospf.metric1;
2635 #endif
2636
2637 #ifdef CONFIG_RIP
2638 if (a->source == RTS_RIP)
2639 return rt->u.rip.metric;
2640 #endif
2641
2642 if (a->source == RTS_DEVICE)
2643 return 0;
2644
2645 return IGP_METRIC_UNKNOWN;
2646 }
2647
2648 static int
2649 rt_update_hostentry(rtable *tab, struct hostentry *he)
2650 {
2651 rta *old_src = he->src;
2652 int direct = 0;
2653 int pxlen = 0;
2654
2655 /* Reset the hostentry */
2656 he->src = NULL;
2657 he->dest = RTD_UNREACHABLE;
2658 he->nexthop_linkable = 0;
2659 he->igp_metric = 0;
2660
2661 net_addr he_addr;
2662 net_fill_ip_host(&he_addr, he->addr);
2663 net *n = net_route(tab, &he_addr);
2664 if (n)
2665 {
2666 rte *e = n->routes;
2667 rta *a = e->attrs;
2668 pxlen = n->n.addr->pxlen;
2669
2670 if (a->hostentry)
2671 {
2672 /* Recursive route should not depend on another recursive route */
2673 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2674 he->addr, n->n.addr);
2675 goto done;
2676 }
2677
2678 if (a->dest == RTD_UNICAST)
2679 {
2680 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2681 if (ipa_zero(nh->gw))
2682 {
2683 if (if_local_addr(he->addr, nh->iface))
2684 {
2685 /* The host address is a local address, this is not valid */
2686 log(L_WARN "Next hop address %I is a local address of iface %s",
2687 he->addr, nh->iface->name);
2688 goto done;
2689 }
2690
2691 direct++;
2692 }
2693 }
2694
2695 he->src = rta_clone(a);
2696 he->dest = a->dest;
2697 he->nexthop_linkable = !direct;
2698 he->igp_metric = rt_get_igp_metric(e);
2699 }
2700
2701 done:
2702 /* Add a prefix range to the trie */
2703 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2704
2705 rta_free(old_src);
2706 return old_src != he->src;
2707 }
2708
2709 static void
2710 rt_update_hostcache(rtable *tab)
2711 {
2712 struct hostcache *hc = tab->hostcache;
2713 struct hostentry *he;
2714 node *n, *x;
2715
2716 /* Reset the trie */
2717 lp_flush(hc->lp);
2718 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2719
2720 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2721 {
2722 he = SKIP_BACK(struct hostentry, ln, n);
2723 if (!he->uc)
2724 {
2725 hc_delete_hostentry(hc, he);
2726 continue;
2727 }
2728
2729 if (rt_update_hostentry(tab, he))
2730 rt_schedule_nhu(he->tab);
2731 }
2732
2733 tab->hcu_scheduled = 0;
2734 }
2735
2736 struct hostentry *
2737 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2738 {
2739 struct hostentry *he;
2740
2741 if (!tab->hostcache)
2742 rt_init_hostcache(tab);
2743
2744 u32 k = hc_hash(a, dep);
2745 struct hostcache *hc = tab->hostcache;
2746 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2747 if (ipa_equal(he->addr, a) && (he->tab == dep))
2748 return he;
2749
2750 he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
2751 rt_update_hostentry(tab, he);
2752 return he;
2753 }
2754
2755
2756 /*
2757 * Documentation for functions declared inline in route.h
2758 */
2759 #if 0
2760
2761 /**
2762 * net_find - find a network entry
2763 * @tab: a routing table
2764 * @addr: address of the network
2765 *
2766 * net_find() looks up the given network in routing table @tab and
2767 * returns a pointer to its &net entry or %NULL if no such network
2768 * exists.
2769 */
2770 static inline net *net_find(rtable *tab, net_addr *addr)
2771 { DUMMY; }
2772
2773 /**
2774 * net_get - obtain a network entry
2775 * @tab: a routing table
2776 * @addr: address of the network
2777 *
2778 * net_get() looks up the given network in routing table @tab and
2779 * returns a pointer to its &net entry. If no such entry exists, it's
2780 * created.
2781 */
2782 static inline net *net_get(rtable *tab, net_addr *addr)
2783 { DUMMY; }
2784
2785 /**
2786 * rte_cow - copy a route for writing
2787 * @r: a route entry to be copied
2788 *
2789 * rte_cow() takes a &rte and prepares it for modification. The exact action
2790 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2791 * just returned unchanged, else a new temporary entry with the same contents
2792 * is created.
2793 *
2794 * The primary use of this function is inside the filter machinery -- when
2795 * a filter wants to modify &rte contents (to change the preference or to
2796 * attach another set of attributes), it must ensure that the &rte is not
2797 * shared with anyone else (and especially that it isn't stored in any routing
2798 * table).
2799 *
2800 * Result: a pointer to the new writable &rte.
2801 */
2802 static inline rte * rte_cow(rte *r)
2803 { DUMMY; }
2804
2805 #endif