]> git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
Nest: Fix race condition during reconfiguration
[thirdparty/bird.git] / nest / rt-table.c
1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/cli.h"
37 #include "nest/iface.h"
38 #include "lib/resource.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
45
46 pool *rt_table_pool;
47
48 static slab *rte_slab;
49 static linpool *rte_update_pool;
50
51 static list routing_tables;
52
53 static byte *rt_format_via(rte *e);
54 static void rt_free_hostcache(rtable *tab);
55 static void rt_notify_hostcache(rtable *tab, net *net);
56 static void rt_update_hostcache(rtable *tab);
57 static void rt_next_hop_update(rtable *tab);
58 static inline int rt_prune_table(rtable *tab);
59 static inline void rt_schedule_gc(rtable *tab);
60 static inline void rt_schedule_prune(rtable *tab);
61
62
63 /* Like fib_route(), but skips empty net entries */
64 static net *
65 net_route(rtable *tab, ip_addr a, int len)
66 {
67 ip_addr a0;
68 net *n;
69
70 while (len >= 0)
71 {
72 a0 = ipa_and(a, ipa_mkmask(len));
73 n = fib_find(&tab->fib, &a0, len);
74 if (n && rte_is_valid(n->routes))
75 return n;
76 len--;
77 }
78 return NULL;
79 }
80
81 static void
82 rte_init(struct fib_node *N)
83 {
84 net *n = (net *) N;
85
86 N->flags = 0;
87 n->routes = NULL;
88 }
89
90 /**
91 * rte_find - find a route
92 * @net: network node
93 * @src: route source
94 *
95 * The rte_find() function returns a route for destination @net
96 * which is from route source @src.
97 */
98 rte *
99 rte_find(net *net, struct rte_src *src)
100 {
101 rte *e = net->routes;
102
103 while (e && e->attrs->src != src)
104 e = e->next;
105 return e;
106 }
107
108 /**
109 * rte_get_temp - get a temporary &rte
110 * @a: attributes to assign to the new route (a &rta; in case it's
111 * un-cached, rte_update() will create a cached copy automatically)
112 *
113 * Create a temporary &rte and bind it with the attributes @a.
114 * Also set route preference to the default preference set for
115 * the protocol.
116 */
117 rte *
118 rte_get_temp(rta *a)
119 {
120 rte *e = sl_alloc(rte_slab);
121
122 e->attrs = a;
123 e->flags = 0;
124 e->pref = a->src->proto->preference;
125 return e;
126 }
127
128 rte *
129 rte_do_cow(rte *r)
130 {
131 rte *e = sl_alloc(rte_slab);
132
133 memcpy(e, r, sizeof(rte));
134 e->attrs = rta_clone(r->attrs);
135 e->flags = 0;
136 return e;
137 }
138
139 /**
140 * rte_cow_rta - get a private writable copy of &rte with writable &rta
141 * @r: a route entry to be copied
142 * @lp: a linpool from which to allocate &rta
143 *
144 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
145 * modification. There are three possibilities: First, both &rte and &rta are
146 * private copies, in that case they are returned unchanged. Second, &rte is
147 * private copy, but &rta is cached, in that case &rta is duplicated using
148 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
149 * both structures are duplicated by rte_do_cow() and rta_do_cow().
150 *
151 * Note that in the second case, cached &rta loses one reference, while private
152 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
153 * nexthops, ...) with it. To work properly, original shared &rta should have
154 * another reference during the life of created private copy.
155 *
156 * Result: a pointer to the new writable &rte with writable &rta.
157 */
158 rte *
159 rte_cow_rta(rte *r, linpool *lp)
160 {
161 if (!rta_is_cached(r->attrs))
162 return r;
163
164 rte *e = rte_cow(r);
165 rta *a = rta_do_cow(r->attrs, lp);
166 rta_free(e->attrs);
167 e->attrs = a;
168 return e;
169 }
170
171 static int /* Actually better or at least as good as */
172 rte_better(rte *new, rte *old)
173 {
174 int (*better)(rte *, rte *);
175
176 if (!rte_is_valid(old))
177 return 1;
178 if (!rte_is_valid(new))
179 return 0;
180
181 if (new->pref > old->pref)
182 return 1;
183 if (new->pref < old->pref)
184 return 0;
185 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
186 {
187 /*
188 * If the user has configured protocol preferences, so that two different protocols
189 * have the same preference, try to break the tie by comparing addresses. Not too
190 * useful, but keeps the ordering of routes unambiguous.
191 */
192 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
193 }
194 if (better = new->attrs->src->proto->rte_better)
195 return better(new, old);
196 return 0;
197 }
198
199 static int
200 rte_mergable(rte *pri, rte *sec)
201 {
202 int (*mergable)(rte *, rte *);
203
204 if (!rte_is_valid(pri) || !rte_is_valid(sec))
205 return 0;
206
207 if (pri->pref != sec->pref)
208 return 0;
209
210 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
211 return 0;
212
213 if (mergable = pri->attrs->src->proto->rte_mergable)
214 return mergable(pri, sec);
215
216 return 0;
217 }
218
219 static void
220 rte_trace(struct proto *p, rte *e, int dir, char *msg)
221 {
222 log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, rt_format_via(e));
223 }
224
225 static inline void
226 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
227 {
228 if (p->debug & flag)
229 rte_trace(p, e, '>', msg);
230 }
231
232 static inline void
233 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
234 {
235 if (p->debug & flag)
236 rte_trace(p, e, '<', msg);
237 }
238
239 static rte *
240 export_filter_(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
241 {
242 struct proto *p = ah->proto;
243 struct filter *filter = ah->out_filter;
244 struct proto_stats *stats = ah->stats;
245 ea_list *tmpb = NULL;
246 rte *rt;
247 int v;
248
249 rt = rt0;
250 *rt_free = NULL;
251
252 if (!tmpa)
253 tmpa = &tmpb;
254
255 *tmpa = rte_make_tmp_attrs(rt, pool);
256
257 v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0;
258 if (v < 0)
259 {
260 if (silent)
261 goto reject;
262
263 stats->exp_updates_rejected++;
264 if (v == RIC_REJECT)
265 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
266 goto reject;
267 }
268 if (v > 0)
269 {
270 if (!silent)
271 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
272 goto accept;
273 }
274
275 v = filter && ((filter == FILTER_REJECT) ||
276 (f_run(filter, &rt, tmpa, pool,
277 FF_FORCE_TMPATTR | (silent ? FF_SILENT : 0)) > F_ACCEPT));
278 if (v)
279 {
280 if (silent)
281 goto reject;
282
283 stats->exp_updates_filtered++;
284 rte_trace_out(D_FILTERS, p, rt, "filtered out");
285 goto reject;
286 }
287
288 accept:
289 if (rt != rt0)
290 *rt_free = rt;
291 return rt;
292
293 reject:
294 /* Discard temporary rte */
295 if (rt != rt0)
296 rte_free(rt);
297 return NULL;
298 }
299
300 static inline rte *
301 export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
302 {
303 return export_filter_(ah, rt0, rt_free, tmpa, rte_update_pool, silent);
304 }
305
306 static void
307 do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
308 {
309 struct proto *p = ah->proto;
310 struct proto_stats *stats = ah->stats;
311
312
313 /*
314 * First, apply export limit.
315 *
316 * Export route limits has several problems. Because exp_routes
317 * counter is reset before refeed, we don't really know whether
318 * limit is breached and whether the update is new or not. Therefore
319 * the number of really exported routes may exceed the limit
320 * temporarily (routes exported before and new routes in refeed).
321 *
322 * Minor advantage is that if the limit is decreased and refeed is
323 * requested, the number of exported routes really decrease.
324 *
325 * Second problem is that with export limits, we don't know whether
326 * old was really exported (it might be blocked by limit). When a
327 * withdraw is exported, we announce it even when the previous
328 * update was blocked. This is not a big issue, but the same problem
329 * is in updating exp_routes counter. Therefore, to be consistent in
330 * increases and decreases of exp_routes, we count exported routes
331 * regardless of blocking by limits.
332 *
333 * Similar problem is in handling updates - when a new route is
334 * received and blocking is active, the route would be blocked, but
335 * when an update for the route will be received later, the update
336 * would be propagated (as old != NULL). Therefore, we have to block
337 * also non-new updates (contrary to import blocking).
338 */
339
340 struct proto_limit *l = ah->out_limit;
341 if (l && new)
342 {
343 if ((!old || refeed) && (stats->exp_routes >= l->limit))
344 proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
345
346 if (l->state == PLS_BLOCKED)
347 {
348 stats->exp_routes++; /* see note above */
349 stats->exp_updates_rejected++;
350 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
351 new = NULL;
352
353 if (!old)
354 return;
355 }
356 }
357
358
359 if (new)
360 stats->exp_updates_accepted++;
361 else
362 stats->exp_withdraws_accepted++;
363
364 /* Hack: We do not decrease exp_routes during refeed, we instead
365 reset exp_routes at the start of refeed. */
366 if (new)
367 stats->exp_routes++;
368 if (old && !refeed)
369 stats->exp_routes--;
370
371 if (p->debug & D_ROUTES)
372 {
373 if (new && old)
374 rte_trace_out(D_ROUTES, p, new, "replaced");
375 else if (new)
376 rte_trace_out(D_ROUTES, p, new, "added");
377 else if (old)
378 rte_trace_out(D_ROUTES, p, old, "removed");
379 }
380 if (!new)
381 p->rt_notify(p, ah->table, net, NULL, old, NULL);
382 else if (tmpa)
383 {
384 ea_list *t = tmpa;
385 while (t->next)
386 t = t->next;
387 t->next = new->attrs->eattrs;
388 p->rt_notify(p, ah->table, net, new, old, tmpa);
389 t->next = NULL;
390 }
391 else
392 p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
393 }
394
395 static void
396 rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed)
397 {
398 struct proto *p = ah->proto;
399 struct proto_stats *stats = ah->stats;
400
401 rte *new = new0;
402 rte *old = old0;
403 rte *new_free = NULL;
404 rte *old_free = NULL;
405 ea_list *tmpa = NULL;
406
407 if (new)
408 stats->exp_updates_received++;
409 else
410 stats->exp_withdraws_received++;
411
412 /*
413 * This is a tricky part - we don't know whether route 'old' was
414 * exported to protocol 'p' or was filtered by the export filter.
415 * We try to run the export filter to know this to have a correct
416 * value in 'old' argument of rte_update (and proper filter value)
417 *
418 * FIXME - this is broken because 'configure soft' may change
419 * filters but keep routes. Refeed is expected to be called after
420 * change of the filters and with old == new, therefore we do not
421 * even try to run the filter on an old route, This may lead to
422 * 'spurious withdraws' but ensure that there are no 'missing
423 * withdraws'.
424 *
425 * This is not completely safe as there is a window between
426 * reconfiguration and the end of refeed - if a newly filtered
427 * route disappears during this period, proper withdraw is not
428 * sent (because old would be also filtered) and the route is
429 * not refeeded (because it disappeared before that). Therefore,
430 * we also do not try to run the filter on old routes that are
431 * older than the last filter change.
432 */
433
434 if (new)
435 new = export_filter(ah, new, &new_free, &tmpa, 0);
436
437 if (old && !(refeed || (old->lastmod <= ah->last_out_filter_change)))
438 old = export_filter(ah, old, &old_free, NULL, 1);
439
440 if (!new && !old)
441 {
442 /*
443 * As mentioned above, 'old' value may be incorrect in some race conditions.
444 * We generally ignore it with the exception of withdraw to pipe protocol.
445 * In that case we rather propagate unfiltered withdraws regardless of
446 * export filters to ensure that when a protocol is flushed, its routes are
447 * removed from all tables. Possible spurious unfiltered withdraws are not
448 * problem here as they are ignored if there is no corresponding route at
449 * the other end of the pipe. We directly call rt_notify() hook instead of
450 * do_rt_notify() to avoid logging and stat counters.
451 */
452
453 #ifdef CONFIG_PIPE
454 if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
455 p->rt_notify(p, ah->table, net, NULL, old0, NULL);
456 #endif
457
458 return;
459 }
460
461 do_rt_notify(ah, net, new, old, tmpa, refeed);
462
463 /* Discard temporary rte's */
464 if (new_free)
465 rte_free(new_free);
466 if (old_free)
467 rte_free(old_free);
468 }
469
470 static void
471 rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
472 {
473 // struct proto *p = ah->proto;
474 struct proto_stats *stats = ah->stats;
475
476 rte *r;
477 rte *new_best = NULL;
478 rte *old_best = NULL;
479 rte *new_free = NULL;
480 rte *old_free = NULL;
481 ea_list *tmpa = NULL;
482
483 /* Used to track whether we met old_changed position. If before_old is NULL
484 old_changed was the first and we met it implicitly before current best route. */
485 int old_meet = old_changed && !before_old;
486
487 /* Note that before_old is either NULL or valid (not rejected) route.
488 If old_changed is valid, before_old have to be too. If old changed route
489 was not valid, caller must use NULL for both old_changed and before_old. */
490
491 if (new_changed)
492 stats->exp_updates_received++;
493 else
494 stats->exp_withdraws_received++;
495
496 /* First, find the new_best route - first accepted by filters */
497 for (r=net->routes; rte_is_valid(r); r=r->next)
498 {
499 if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
500 break;
501
502 /* Note if we walked around the position of old_changed route */
503 if (r == before_old)
504 old_meet = 1;
505 }
506
507 /*
508 * Second, handle the feed case. That means we do not care for
509 * old_best. It is NULL for feed, and the new_best for refeed.
510 * For refeed, there is a hack similar to one in rt_notify_basic()
511 * to ensure withdraws in case of changed filters
512 */
513 if (feed)
514 {
515 if (feed == 2) /* refeed */
516 old_best = new_best ? new_best :
517 (rte_is_valid(net->routes) ? net->routes : NULL);
518 else
519 old_best = NULL;
520
521 if (!new_best && !old_best)
522 return;
523
524 goto found;
525 }
526
527 /*
528 * Now, we find the old_best route. Generally, it is the same as the
529 * new_best, unless new_best is the same as new_changed or
530 * old_changed is accepted before new_best.
531 *
532 * There are four cases:
533 *
534 * - We would find and accept old_changed before new_best, therefore
535 * old_changed is old_best. In remaining cases we suppose this
536 * is not true.
537 *
538 * - We found no new_best, therefore there is also no old_best and
539 * we ignore this withdraw.
540 *
541 * - We found new_best different than new_changed, therefore
542 * old_best is the same as new_best and we ignore this update.
543 *
544 * - We found new_best the same as new_changed, therefore it cannot
545 * be old_best and we have to continue search for old_best.
546 */
547
548 /* First case */
549 if (old_meet)
550 if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
551 goto found;
552
553 /* Second case */
554 if (!new_best)
555 return;
556
557 /* Third case, we use r instead of new_best, because export_filter() could change it */
558 if (r != new_changed)
559 {
560 if (new_free)
561 rte_free(new_free);
562 return;
563 }
564
565 /* Fourth case */
566 for (r=r->next; rte_is_valid(r); r=r->next)
567 {
568 if (old_best = export_filter(ah, r, &old_free, NULL, 1))
569 goto found;
570
571 if (r == before_old)
572 if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
573 goto found;
574 }
575
576 /* Implicitly, old_best is NULL and new_best is non-NULL */
577
578 found:
579 do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));
580
581 /* Discard temporary rte's */
582 if (new_free)
583 rte_free(new_free);
584 if (old_free)
585 rte_free(old_free);
586 }
587
588
589 static struct mpnh *
590 mpnh_merge_rta(struct mpnh *nhs, rta *a, linpool *pool, int max)
591 {
592 struct mpnh nh = { .gw = a->gw, .iface = a->iface };
593 struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
594 return mpnh_merge(nhs, nh2, 1, 0, max, pool);
595 }
596
597 rte *
598 rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
599 {
600 // struct proto *p = ah->proto;
601 struct mpnh *nhs = NULL;
602 rte *best0, *best, *rt0, *rt, *tmp;
603
604 best0 = net->routes;
605 *rt_free = NULL;
606
607 if (!rte_is_valid(best0))
608 return NULL;
609
610 best = export_filter_(ah, best0, rt_free, tmpa, pool, silent);
611
612 if (!best || !rte_is_reachable(best))
613 return best;
614
615 for (rt0 = best0->next; rt0; rt0 = rt0->next)
616 {
617 if (!rte_mergable(best0, rt0))
618 continue;
619
620 rt = export_filter_(ah, rt0, &tmp, NULL, pool, 1);
621
622 if (!rt)
623 continue;
624
625 if (rte_is_reachable(rt))
626 nhs = mpnh_merge_rta(nhs, rt->attrs, pool, ah->proto->merge_limit);
627
628 if (tmp)
629 rte_free(tmp);
630 }
631
632 if (nhs)
633 {
634 nhs = mpnh_merge_rta(nhs, best->attrs, pool, ah->proto->merge_limit);
635
636 if (nhs->next)
637 {
638 best = rte_cow_rta(best, pool);
639 best->attrs->dest = RTD_MULTIPATH;
640 best->attrs->nexthops = nhs;
641 }
642 }
643
644 if (best != best0)
645 *rt_free = best;
646
647 return best;
648 }
649
650
651 static void
652 rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
653 rte *new_best, rte*old_best, int refeed)
654 {
655 // struct proto *p = ah->proto;
656
657 rte *new_best_free = NULL;
658 rte *old_best_free = NULL;
659 rte *new_changed_free = NULL;
660 rte *old_changed_free = NULL;
661 ea_list *tmpa = NULL;
662
663 /* We assume that all rte arguments are either NULL or rte_is_valid() */
664
665 /* This check should be done by the caller */
666 if (!new_best && !old_best)
667 return;
668
669 /* Check whether the change is relevant to the merged route */
670 if ((new_best == old_best) && !refeed)
671 {
672 new_changed = rte_mergable(new_best, new_changed) ?
673 export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
674
675 old_changed = rte_mergable(old_best, old_changed) ?
676 export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
677
678 if (!new_changed && !old_changed)
679 return;
680 }
681
682 if (new_best)
683 ah->stats->exp_updates_received++;
684 else
685 ah->stats->exp_withdraws_received++;
686
687 /* Prepare new merged route */
688 if (new_best)
689 new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, rte_update_pool, 0);
690
691 /* Prepare old merged route (without proper merged next hops) */
692 /* There are some issues with running filter on old route - see rt_notify_basic() */
693 if (old_best && !refeed)
694 old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
695
696 if (new_best || old_best)
697 do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
698
699 /* Discard temporary rte's */
700 if (new_best_free)
701 rte_free(new_best_free);
702 if (old_best_free)
703 rte_free(old_best_free);
704 if (new_changed_free)
705 rte_free(new_changed_free);
706 if (old_changed_free)
707 rte_free(old_changed_free);
708 }
709
710
711 /**
712 * rte_announce - announce a routing table change
713 * @tab: table the route has been added to
714 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
715 * @net: network in question
716 * @new: the new route to be announced
717 * @old: the previous route for the same network
718 * @new_best: the new best route for the same network
719 * @old_best: the previous best route for the same network
720 * @before_old: The previous route before @old for the same network.
721 * If @before_old is NULL @old was the first.
722 *
723 * This function gets a routing table update and announces it
724 * to all protocols that acccepts given type of route announcement
725 * and are connected to the same table by their announcement hooks.
726 *
727 * Route announcement of type %RA_OPTIMAL si generated when optimal
728 * route (in routing table @tab) changes. In that case @old stores the
729 * old optimal route.
730 *
731 * Route announcement of type %RA_ANY si generated when any route (in
732 * routing table @tab) changes In that case @old stores the old route
733 * from the same protocol.
734 *
735 * For each appropriate protocol, we first call its import_control()
736 * hook which performs basic checks on the route (each protocol has a
737 * right to veto or force accept of the route before any filter is
738 * asked) and adds default values of attributes specific to the new
739 * protocol (metrics, tags etc.). Then it consults the protocol's
740 * export filter and if it accepts the route, the rt_notify() hook of
741 * the protocol gets called.
742 */
743 static void
744 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
745 rte *new_best, rte *old_best, rte *before_old)
746 {
747 if (!rte_is_valid(new))
748 new = NULL;
749
750 if (!rte_is_valid(old))
751 old = before_old = NULL;
752
753 if (!rte_is_valid(new_best))
754 new_best = NULL;
755
756 if (!rte_is_valid(old_best))
757 old_best = NULL;
758
759 if (!old && !new)
760 return;
761
762 if (type == RA_OPTIMAL)
763 {
764 if (new)
765 new->attrs->src->proto->stats.pref_routes++;
766 if (old)
767 old->attrs->src->proto->stats.pref_routes--;
768
769 if (tab->hostcache)
770 rt_notify_hostcache(tab, net);
771 }
772
773 struct announce_hook *a;
774 WALK_LIST(a, tab->hooks)
775 {
776 ASSERT(a->proto->export_state != ES_DOWN);
777 if (a->proto->accept_ra_types == type)
778 if (type == RA_ACCEPTED)
779 rt_notify_accepted(a, net, new, old, before_old, 0);
780 else if (type == RA_MERGED)
781 rt_notify_merged(a, net, new, old, new_best, old_best, 0);
782 else
783 rt_notify_basic(a, net, new, old, 0);
784 }
785 }
786
787 static inline int
788 rte_validate(rte *e)
789 {
790 int c;
791 net *n = e->net;
792
793 if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
794 {
795 log(L_WARN "Ignoring bogus prefix %I/%d received via %s",
796 n->n.prefix, n->n.pxlen, e->sender->proto->name);
797 return 0;
798 }
799
800 c = ipa_classify_net(n->n.prefix);
801 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
802 {
803 log(L_WARN "Ignoring bogus route %I/%d received via %s",
804 n->n.prefix, n->n.pxlen, e->sender->proto->name);
805 return 0;
806 }
807
808 if ((e->attrs->dest == RTD_MULTIPATH) && !mpnh_is_sorted(e->attrs->nexthops))
809 {
810 log(L_WARN "Ignoring unsorted multipath route %I/%d received via %s",
811 n->n.prefix, n->n.pxlen, e->sender->proto->name);
812 return 0;
813 }
814
815 return 1;
816 }
817
818 /**
819 * rte_free - delete a &rte
820 * @e: &rte to be deleted
821 *
822 * rte_free() deletes the given &rte from the routing table it's linked to.
823 */
824 void
825 rte_free(rte *e)
826 {
827 if (rta_is_cached(e->attrs))
828 rta_free(e->attrs);
829 sl_free(rte_slab, e);
830 }
831
832 static inline void
833 rte_free_quick(rte *e)
834 {
835 rta_free(e->attrs);
836 sl_free(rte_slab, e);
837 }
838
839 static int
840 rte_same(rte *x, rte *y)
841 {
842 return
843 x->attrs == y->attrs &&
844 x->flags == y->flags &&
845 x->pflags == y->pflags &&
846 x->pref == y->pref &&
847 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
848 }
849
850 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
851
852 static void
853 rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
854 {
855 struct proto *p = ah->proto;
856 struct rtable *table = ah->table;
857 struct proto_stats *stats = ah->stats;
858 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
859 rte *before_old = NULL;
860 rte *old_best = net->routes;
861 rte *old = NULL;
862 rte **k;
863
864 k = &net->routes; /* Find and remove original route from the same protocol */
865 while (old = *k)
866 {
867 if (old->attrs->src == src)
868 {
869 /* If there is the same route in the routing table but from
870 * a different sender, then there are two paths from the
871 * source protocol to this routing table through transparent
872 * pipes, which is not allowed.
873 *
874 * We log that and ignore the route. If it is withdraw, we
875 * ignore it completely (there might be 'spurious withdraws',
876 * see FIXME in do_rte_announce())
877 */
878 if (old->sender->proto != p)
879 {
880 if (new)
881 {
882 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %I/%d to table %s",
883 net->n.prefix, net->n.pxlen, table->name);
884 rte_free_quick(new);
885 }
886 return;
887 }
888
889 if (new && rte_same(old, new))
890 {
891 /* No changes, ignore the new route */
892
893 if (!rte_is_filtered(new))
894 {
895 stats->imp_updates_ignored++;
896 rte_trace_in(D_ROUTES, p, new, "ignored");
897 }
898
899 rte_free_quick(new);
900 return;
901 }
902 *k = old->next;
903 break;
904 }
905 k = &old->next;
906 before_old = old;
907 }
908
909 if (!old)
910 before_old = NULL;
911
912 if (!old && !new)
913 {
914 stats->imp_withdraws_ignored++;
915 return;
916 }
917
918 int new_ok = rte_is_ok(new);
919 int old_ok = rte_is_ok(old);
920
921 struct proto_limit *l = ah->rx_limit;
922 if (l && !old && new)
923 {
924 u32 all_routes = stats->imp_routes + stats->filt_routes;
925
926 if (all_routes >= l->limit)
927 proto_notify_limit(ah, l, PLD_RX, all_routes);
928
929 if (l->state == PLS_BLOCKED)
930 {
931 /* In receive limit the situation is simple, old is NULL so
932 we just free new and exit like nothing happened */
933
934 stats->imp_updates_ignored++;
935 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
936 rte_free_quick(new);
937 return;
938 }
939 }
940
941 l = ah->in_limit;
942 if (l && !old_ok && new_ok)
943 {
944 if (stats->imp_routes >= l->limit)
945 proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
946
947 if (l->state == PLS_BLOCKED)
948 {
949 /* In import limit the situation is more complicated. We
950 shouldn't just drop the route, we should handle it like
951 it was filtered. We also have to continue the route
952 processing if old or new is non-NULL, but we should exit
953 if both are NULL as this case is probably assumed to be
954 already handled. */
955
956 stats->imp_updates_ignored++;
957 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
958
959 if (ah->in_keep_filtered)
960 new->flags |= REF_FILTERED;
961 else
962 { rte_free_quick(new); new = NULL; }
963
964 /* Note that old && !new could be possible when
965 ah->in_keep_filtered changed in the recent past. */
966
967 if (!old && !new)
968 return;
969
970 new_ok = 0;
971 goto skip_stats1;
972 }
973 }
974
975 if (new_ok)
976 stats->imp_updates_accepted++;
977 else if (old_ok)
978 stats->imp_withdraws_accepted++;
979 else
980 stats->imp_withdraws_ignored++;
981
982 skip_stats1:
983
984 if (new)
985 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
986 if (old)
987 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
988
989 if (table->config->sorted)
990 {
991 /* If routes are sorted, just insert new route to appropriate position */
992 if (new)
993 {
994 if (before_old && !rte_better(new, before_old))
995 k = &before_old->next;
996 else
997 k = &net->routes;
998
999 for (; *k; k=&(*k)->next)
1000 if (rte_better(new, *k))
1001 break;
1002
1003 new->next = *k;
1004 *k = new;
1005 }
1006 }
1007 else
1008 {
1009 /* If routes are not sorted, find the best route and move it on
1010 the first position. There are several optimized cases. */
1011
1012 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1013 goto do_recalculate;
1014
1015 if (new && rte_better(new, old_best))
1016 {
1017 /* The first case - the new route is cleary optimal,
1018 we link it at the first position */
1019
1020 new->next = net->routes;
1021 net->routes = new;
1022 }
1023 else if (old == old_best)
1024 {
1025 /* The second case - the old best route disappeared, we add the
1026 new route (if we have any) to the list (we don't care about
1027 position) and then we elect the new optimal route and relink
1028 that route at the first position and announce it. New optimal
1029 route might be NULL if there is no more routes */
1030
1031 do_recalculate:
1032 /* Add the new route to the list */
1033 if (new)
1034 {
1035 new->next = net->routes;
1036 net->routes = new;
1037 }
1038
1039 /* Find a new optimal route (if there is any) */
1040 if (net->routes)
1041 {
1042 rte **bp = &net->routes;
1043 for (k=&(*bp)->next; *k; k=&(*k)->next)
1044 if (rte_better(*k, *bp))
1045 bp = k;
1046
1047 /* And relink it */
1048 rte *best = *bp;
1049 *bp = best->next;
1050 best->next = net->routes;
1051 net->routes = best;
1052 }
1053 }
1054 else if (new)
1055 {
1056 /* The third case - the new route is not better than the old
1057 best route (therefore old_best != NULL) and the old best
1058 route was not removed (therefore old_best == net->routes).
1059 We just link the new route after the old best route. */
1060
1061 ASSERT(net->routes != NULL);
1062 new->next = net->routes->next;
1063 net->routes->next = new;
1064 }
1065 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1066 }
1067
1068 if (new)
1069 new->lastmod = now;
1070
1071 /* Log the route change */
1072 if (p->debug & D_ROUTES)
1073 {
1074 if (new_ok)
1075 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1076 else if (old_ok)
1077 {
1078 if (old != old_best)
1079 rte_trace(p, old, '>', "removed");
1080 else if (rte_is_ok(net->routes))
1081 rte_trace(p, old, '>', "removed [replaced]");
1082 else
1083 rte_trace(p, old, '>', "removed [sole]");
1084 }
1085 }
1086
1087 /* Propagate the route change */
1088 rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1089 if (net->routes != old_best)
1090 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1091 if (table->config->sorted)
1092 rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1093 rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1094
1095 if (!net->routes &&
1096 (table->gc_counter++ >= table->config->gc_max_ops) &&
1097 (table->gc_time + table->config->gc_min_time <= now))
1098 rt_schedule_gc(table);
1099
1100 if (old_ok && p->rte_remove)
1101 p->rte_remove(net, old);
1102 if (new_ok && p->rte_insert)
1103 p->rte_insert(net, new);
1104
1105 if (old)
1106 rte_free_quick(old);
1107 }
1108
1109 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1110
1111 static inline void
1112 rte_update_lock(void)
1113 {
1114 rte_update_nest_cnt++;
1115 }
1116
1117 static inline void
1118 rte_update_unlock(void)
1119 {
1120 if (!--rte_update_nest_cnt)
1121 lp_flush(rte_update_pool);
1122 }
1123
1124 static inline void
1125 rte_hide_dummy_routes(net *net, rte **dummy)
1126 {
1127 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1128 {
1129 *dummy = net->routes;
1130 net->routes = (*dummy)->next;
1131 }
1132 }
1133
1134 static inline void
1135 rte_unhide_dummy_routes(net *net, rte **dummy)
1136 {
1137 if (*dummy)
1138 {
1139 (*dummy)->next = net->routes;
1140 net->routes = *dummy;
1141 }
1142 }
1143
1144 /**
1145 * rte_update - enter a new update to a routing table
1146 * @table: table to be updated
1147 * @ah: pointer to table announce hook
1148 * @net: network node
1149 * @p: protocol submitting the update
1150 * @src: protocol originating the update
1151 * @new: a &rte representing the new route or %NULL for route removal.
1152 *
1153 * This function is called by the routing protocols whenever they discover
1154 * a new route or wish to update/remove an existing route. The right announcement
1155 * sequence is to build route attributes first (either un-cached with @aflags set
1156 * to zero or a cached one using rta_lookup(); in this case please note that
1157 * you need to increase the use count of the attributes yourself by calling
1158 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1159 * the appropriate data and finally submit the new &rte by calling rte_update().
1160 *
1161 * @src specifies the protocol that originally created the route and the meaning
1162 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1163 * same value as @new->attrs->proto. @p specifies the protocol that called
1164 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1165 * stores @p in @new->sender;
1166 *
1167 * When rte_update() gets any route, it automatically validates it (checks,
1168 * whether the network and next hop address are valid IP addresses and also
1169 * whether a normal routing protocol doesn't try to smuggle a host or link
1170 * scope route to the table), converts all protocol dependent attributes stored
1171 * in the &rte to temporary extended attributes, consults import filters of the
1172 * protocol to see if the route should be accepted and/or its attributes modified,
1173 * stores the temporary attributes back to the &rte.
1174 *
1175 * Now, having a "public" version of the route, we
1176 * automatically find any old route defined by the protocol @src
1177 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1178 * recalculate the optimal route for this destination and finally broadcast
1179 * the change (if any) to all routing protocols by calling rte_announce().
1180 *
1181 * All memory used for attribute lists and other temporary allocations is taken
1182 * from a special linear pool @rte_update_pool and freed when rte_update()
1183 * finishes.
1184 */
1185
1186 void
1187 rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
1188 {
1189 struct proto *p = ah->proto;
1190 struct proto_stats *stats = ah->stats;
1191 struct filter *filter = ah->in_filter;
1192 ea_list *tmpa = NULL;
1193 rte *dummy = NULL;
1194
1195 rte_update_lock();
1196 if (new)
1197 {
1198 new->sender = ah;
1199
1200 stats->imp_updates_received++;
1201 if (!rte_validate(new))
1202 {
1203 rte_trace_in(D_FILTERS, p, new, "invalid");
1204 stats->imp_updates_invalid++;
1205 goto drop;
1206 }
1207
1208 if (filter == FILTER_REJECT)
1209 {
1210 stats->imp_updates_filtered++;
1211 rte_trace_in(D_FILTERS, p, new, "filtered out");
1212
1213 if (! ah->in_keep_filtered)
1214 goto drop;
1215
1216 /* new is a private copy, i could modify it */
1217 new->flags |= REF_FILTERED;
1218 }
1219 else
1220 {
1221 tmpa = rte_make_tmp_attrs(new, rte_update_pool);
1222 if (filter && (filter != FILTER_REJECT))
1223 {
1224 ea_list *old_tmpa = tmpa;
1225 int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1226 if (fr > F_ACCEPT)
1227 {
1228 stats->imp_updates_filtered++;
1229 rte_trace_in(D_FILTERS, p, new, "filtered out");
1230
1231 if (! ah->in_keep_filtered)
1232 goto drop;
1233
1234 new->flags |= REF_FILTERED;
1235 }
1236 if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1237 src->proto->store_tmp_attrs(new, tmpa);
1238 }
1239 }
1240 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1241 new->attrs = rta_lookup(new->attrs);
1242 new->flags |= REF_COW;
1243 }
1244 else
1245 {
1246 stats->imp_withdraws_received++;
1247
1248 if (!net || !src)
1249 {
1250 stats->imp_withdraws_ignored++;
1251 rte_update_unlock();
1252 return;
1253 }
1254 }
1255
1256 recalc:
1257 rte_hide_dummy_routes(net, &dummy);
1258 rte_recalculate(ah, net, new, src);
1259 rte_unhide_dummy_routes(net, &dummy);
1260 rte_update_unlock();
1261 return;
1262
1263 drop:
1264 rte_free(new);
1265 new = NULL;
1266 goto recalc;
1267 }
1268
1269 /* Independent call to rte_announce(), used from next hop
1270 recalculation, outside of rte_update(). new must be non-NULL */
1271 static inline void
1272 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1273 rte *new_best, rte *old_best)
1274 {
1275 rte_update_lock();
1276 rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1277 rte_update_unlock();
1278 }
1279
1280 static inline void
1281 rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */
1282 {
1283 rte_update_lock();
1284 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1285 rte_update_unlock();
1286 }
1287
1288 /* Check rtable for best route to given net whether it would be exported do p */
1289 int
1290 rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter)
1291 {
1292 net *n = net_find(t, prefix, pxlen);
1293 rte *rt = n ? n->routes : NULL;
1294
1295 if (!rte_is_valid(rt))
1296 return 0;
1297
1298 rte_update_lock();
1299
1300 /* Rest is stripped down export_filter() */
1301 ea_list *tmpa = rte_make_tmp_attrs(rt, rte_update_pool);
1302 int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1303 if (v == RIC_PROCESS)
1304 v = (f_run(filter, &rt, &tmpa, rte_update_pool,
1305 FF_FORCE_TMPATTR | FF_SILENT) <= F_ACCEPT);
1306
1307 /* Discard temporary rte */
1308 if (rt != n->routes)
1309 rte_free(rt);
1310
1311 rte_update_unlock();
1312
1313 return v > 0;
1314 }
1315
1316
1317 /**
1318 * rt_refresh_begin - start a refresh cycle
1319 * @t: related routing table
1320 * @ah: related announce hook
1321 *
1322 * This function starts a refresh cycle for given routing table and announce
1323 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1324 * routes to the routing table (by rte_update()). After that, all protocol
1325 * routes (more precisely routes with @ah as @sender) not sent during the
1326 * refresh cycle but still in the table from the past are pruned. This is
1327 * implemented by marking all related routes as stale by REF_STALE flag in
1328 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1329 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1330 */
1331 void
1332 rt_refresh_begin(rtable *t, struct announce_hook *ah)
1333 {
1334 net *n;
1335 rte *e;
1336
1337 FIB_WALK(&t->fib, fn)
1338 {
1339 n = (net *) fn;
1340 for (e = n->routes; e; e = e->next)
1341 if (e->sender == ah)
1342 e->flags |= REF_STALE;
1343 }
1344 FIB_WALK_END;
1345 }
1346
1347 /**
1348 * rt_refresh_end - end a refresh cycle
1349 * @t: related routing table
1350 * @ah: related announce hook
1351 *
1352 * This function starts a refresh cycle for given routing table and announce
1353 * hook. See rt_refresh_begin() for description of refresh cycles.
1354 */
1355 void
1356 rt_refresh_end(rtable *t, struct announce_hook *ah)
1357 {
1358 int prune = 0;
1359 net *n;
1360 rte *e;
1361
1362 FIB_WALK(&t->fib, fn)
1363 {
1364 n = (net *) fn;
1365 for (e = n->routes; e; e = e->next)
1366 if ((e->sender == ah) && (e->flags & REF_STALE))
1367 {
1368 e->flags |= REF_DISCARD;
1369 prune = 1;
1370 }
1371 }
1372 FIB_WALK_END;
1373
1374 if (prune)
1375 rt_schedule_prune(t);
1376 }
1377
1378
1379 /**
1380 * rte_dump - dump a route
1381 * @e: &rte to be dumped
1382 *
1383 * This functions dumps contents of a &rte to debug output.
1384 */
1385 void
1386 rte_dump(rte *e)
1387 {
1388 net *n = e->net;
1389 debug("%-1I/%2d ", n->n.prefix, n->n.pxlen);
1390 debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1391 rta_dump(e->attrs);
1392 if (e->attrs->src->proto->proto->dump_attrs)
1393 e->attrs->src->proto->proto->dump_attrs(e);
1394 debug("\n");
1395 }
1396
1397 /**
1398 * rt_dump - dump a routing table
1399 * @t: routing table to be dumped
1400 *
1401 * This function dumps contents of a given routing table to debug output.
1402 */
1403 void
1404 rt_dump(rtable *t)
1405 {
1406 rte *e;
1407 net *n;
1408 struct announce_hook *a;
1409
1410 debug("Dump of routing table <%s>\n", t->name);
1411 #ifdef DEBUGGING
1412 fib_check(&t->fib);
1413 #endif
1414 FIB_WALK(&t->fib, fn)
1415 {
1416 n = (net *) fn;
1417 for(e=n->routes; e; e=e->next)
1418 rte_dump(e);
1419 }
1420 FIB_WALK_END;
1421 WALK_LIST(a, t->hooks)
1422 debug("\tAnnounces routes to protocol %s\n", a->proto->name);
1423 debug("\n");
1424 }
1425
1426 /**
1427 * rt_dump_all - dump all routing tables
1428 *
1429 * This function dumps contents of all routing tables to debug output.
1430 */
1431 void
1432 rt_dump_all(void)
1433 {
1434 rtable *t;
1435
1436 WALK_LIST(t, routing_tables)
1437 rt_dump(t);
1438 }
1439
1440 static inline void
1441 rt_schedule_prune(rtable *tab)
1442 {
1443 rt_mark_for_prune(tab);
1444 ev_schedule(tab->rt_event);
1445 }
1446
1447 static inline void
1448 rt_schedule_gc(rtable *tab)
1449 {
1450 if (tab->gc_scheduled)
1451 return;
1452
1453 tab->gc_scheduled = 1;
1454 ev_schedule(tab->rt_event);
1455 }
1456
1457 static inline void
1458 rt_schedule_hcu(rtable *tab)
1459 {
1460 if (tab->hcu_scheduled)
1461 return;
1462
1463 tab->hcu_scheduled = 1;
1464 ev_schedule(tab->rt_event);
1465 }
1466
1467 static inline void
1468 rt_schedule_nhu(rtable *tab)
1469 {
1470 if (tab->nhu_state == 0)
1471 ev_schedule(tab->rt_event);
1472
1473 /* state change 0->1, 2->3 */
1474 tab->nhu_state |= 1;
1475 }
1476
1477
1478 static void
1479 rt_prune_nets(rtable *tab)
1480 {
1481 struct fib_iterator fit;
1482 int ncnt = 0, ndel = 0;
1483
1484 #ifdef DEBUGGING
1485 fib_check(&tab->fib);
1486 #endif
1487
1488 FIB_ITERATE_INIT(&fit, &tab->fib);
1489 again:
1490 FIB_ITERATE_START(&tab->fib, &fit, f)
1491 {
1492 net *n = (net *) f;
1493 ncnt++;
1494 if (!n->routes) /* Orphaned FIB entry */
1495 {
1496 FIB_ITERATE_PUT(&fit, f);
1497 fib_delete(&tab->fib, f);
1498 ndel++;
1499 goto again;
1500 }
1501 }
1502 FIB_ITERATE_END(f);
1503 DBG("Pruned %d of %d networks\n", ndel, ncnt);
1504
1505 tab->gc_counter = 0;
1506 tab->gc_time = now;
1507 tab->gc_scheduled = 0;
1508 }
1509
1510 static void
1511 rt_event(void *ptr)
1512 {
1513 rtable *tab = ptr;
1514
1515 if (tab->hcu_scheduled)
1516 rt_update_hostcache(tab);
1517
1518 if (tab->nhu_state)
1519 rt_next_hop_update(tab);
1520
1521 if (tab->prune_state)
1522 if (!rt_prune_table(tab))
1523 {
1524 /* Table prune unfinished */
1525 ev_schedule(tab->rt_event);
1526 return;
1527 }
1528
1529 if (tab->gc_scheduled)
1530 {
1531 rt_prune_nets(tab);
1532 rt_prune_sources(); // FIXME this should be moved to independent event
1533 }
1534 }
1535
1536 void
1537 rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1538 {
1539 bzero(t, sizeof(*t));
1540 fib_init(&t->fib, p, sizeof(net), 0, rte_init);
1541 t->name = name;
1542 t->config = cf;
1543 init_list(&t->hooks);
1544 if (cf)
1545 {
1546 t->rt_event = ev_new(p);
1547 t->rt_event->hook = rt_event;
1548 t->rt_event->data = t;
1549 t->gc_time = now;
1550 }
1551 }
1552
1553 /**
1554 * rt_init - initialize routing tables
1555 *
1556 * This function is called during BIRD startup. It initializes the
1557 * routing table module.
1558 */
1559 void
1560 rt_init(void)
1561 {
1562 rta_init();
1563 rt_table_pool = rp_new(&root_pool, "Routing tables");
1564 rte_update_pool = lp_new(rt_table_pool, 4080);
1565 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1566 init_list(&routing_tables);
1567 }
1568
1569
1570 static int
1571 rt_prune_step(rtable *tab, int *limit)
1572 {
1573 struct fib_iterator *fit = &tab->prune_fit;
1574
1575 DBG("Pruning route table %s\n", tab->name);
1576 #ifdef DEBUGGING
1577 fib_check(&tab->fib);
1578 #endif
1579
1580 if (tab->prune_state == RPS_NONE)
1581 return 1;
1582
1583 if (tab->prune_state == RPS_SCHEDULED)
1584 {
1585 FIB_ITERATE_INIT(fit, &tab->fib);
1586 tab->prune_state = RPS_RUNNING;
1587 }
1588
1589 again:
1590 FIB_ITERATE_START(&tab->fib, fit, fn)
1591 {
1592 net *n = (net *) fn;
1593 rte *e;
1594
1595 rescan:
1596 for (e=n->routes; e; e=e->next)
1597 if (e->sender->proto->flushing || (e->flags & REF_DISCARD))
1598 {
1599 if (*limit <= 0)
1600 {
1601 FIB_ITERATE_PUT(fit, fn);
1602 return 0;
1603 }
1604
1605 rte_discard(e);
1606 (*limit)--;
1607
1608 goto rescan;
1609 }
1610 if (!n->routes) /* Orphaned FIB entry */
1611 {
1612 FIB_ITERATE_PUT(fit, fn);
1613 fib_delete(&tab->fib, fn);
1614 goto again;
1615 }
1616 }
1617 FIB_ITERATE_END(fn);
1618
1619 #ifdef DEBUGGING
1620 fib_check(&tab->fib);
1621 #endif
1622
1623 tab->prune_state = RPS_NONE;
1624 return 1;
1625 }
1626
1627 /**
1628 * rt_prune_table - prune a routing table
1629 * @tab: a routing table for pruning
1630 *
1631 * This function scans the routing table @tab and removes routes belonging to
1632 * flushing protocols, discarded routes and also stale network entries, in a
1633 * similar fashion like rt_prune_loop(). Returns 1 when all such routes are
1634 * pruned. Contrary to rt_prune_loop(), this function is not a part of the
1635 * protocol flushing loop, but it is called from rt_event() for just one routing
1636 * table.
1637 *
1638 * Note that rt_prune_table() and rt_prune_loop() share (for each table) the
1639 * prune state (@prune_state) and also the pruning iterator (@prune_fit).
1640 */
1641 static inline int
1642 rt_prune_table(rtable *tab)
1643 {
1644 int limit = 512;
1645 return rt_prune_step(tab, &limit);
1646 }
1647
1648 /**
1649 * rt_prune_loop - prune routing tables
1650 *
1651 * The prune loop scans routing tables and removes routes belonging to flushing
1652 * protocols, discarded routes and also stale network entries. Returns 1 when
1653 * all such routes are pruned. It is a part of the protocol flushing loop.
1654 */
1655 int
1656 rt_prune_loop(void)
1657 {
1658 int limit = 512;
1659 rtable *t;
1660
1661 WALK_LIST(t, routing_tables)
1662 if (! rt_prune_step(t, &limit))
1663 return 0;
1664
1665 return 1;
1666 }
1667
1668 void
1669 rt_preconfig(struct config *c)
1670 {
1671 struct symbol *s = cf_get_symbol("master");
1672
1673 init_list(&c->tables);
1674 c->master_rtc = rt_new_table(s);
1675 }
1676
1677
1678 /*
1679 * Some functions for handing internal next hop updates
1680 * triggered by rt_schedule_nhu().
1681 */
1682
1683 static inline int
1684 rta_next_hop_outdated(rta *a)
1685 {
1686 struct hostentry *he = a->hostentry;
1687
1688 if (!he)
1689 return 0;
1690
1691 if (!he->src)
1692 return a->dest != RTD_UNREACHABLE;
1693
1694 return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1695 (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1696 !mpnh_same(a->nexthops, he->src->nexthops);
1697 }
1698
1699 static inline void
1700 rta_apply_hostentry(rta *a, struct hostentry *he)
1701 {
1702 a->hostentry = he;
1703 a->iface = he->src ? he->src->iface : NULL;
1704 a->gw = he->gw;
1705 a->dest = he->dest;
1706 a->igp_metric = he->igp_metric;
1707 a->nexthops = he->src ? he->src->nexthops : NULL;
1708 }
1709
1710 static inline rte *
1711 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1712 {
1713 rta a;
1714 memcpy(&a, old->attrs, sizeof(rta));
1715 rta_apply_hostentry(&a, old->attrs->hostentry);
1716 a.aflags = 0;
1717
1718 rte *e = sl_alloc(rte_slab);
1719 memcpy(e, old, sizeof(rte));
1720 e->attrs = rta_lookup(&a);
1721
1722 return e;
1723 }
1724
1725 static inline int
1726 rt_next_hop_update_net(rtable *tab, net *n)
1727 {
1728 rte **k, *e, *new, *old_best, **new_best;
1729 int count = 0;
1730 int free_old_best = 0;
1731
1732 old_best = n->routes;
1733 if (!old_best)
1734 return 0;
1735
1736 for (k = &n->routes; e = *k; k = &e->next)
1737 if (rta_next_hop_outdated(e->attrs))
1738 {
1739 new = rt_next_hop_update_rte(tab, e);
1740 *k = new;
1741
1742 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1743 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1744
1745 /* Call a pre-comparison hook */
1746 /* Not really an efficient way to compute this */
1747 if (e->attrs->src->proto->rte_recalculate)
1748 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1749
1750 if (e != old_best)
1751 rte_free_quick(e);
1752 else /* Freeing of the old best rte is postponed */
1753 free_old_best = 1;
1754
1755 e = new;
1756 count++;
1757 }
1758
1759 if (!count)
1760 return 0;
1761
1762 /* Find the new best route */
1763 new_best = NULL;
1764 for (k = &n->routes; e = *k; k = &e->next)
1765 {
1766 if (!new_best || rte_better(e, *new_best))
1767 new_best = k;
1768 }
1769
1770 /* Relink the new best route to the first position */
1771 new = *new_best;
1772 if (new != n->routes)
1773 {
1774 *new_best = new->next;
1775 new->next = n->routes;
1776 n->routes = new;
1777 }
1778
1779 /* Announce the new best route */
1780 if (new != old_best)
1781 {
1782 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1783 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1784 }
1785
1786 /* FIXME: Better announcement of merged routes */
1787 rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1788
1789 if (free_old_best)
1790 rte_free_quick(old_best);
1791
1792 return count;
1793 }
1794
1795 static void
1796 rt_next_hop_update(rtable *tab)
1797 {
1798 struct fib_iterator *fit = &tab->nhu_fit;
1799 int max_feed = 32;
1800
1801 if (tab->nhu_state == 0)
1802 return;
1803
1804 if (tab->nhu_state == 1)
1805 {
1806 FIB_ITERATE_INIT(fit, &tab->fib);
1807 tab->nhu_state = 2;
1808 }
1809
1810 FIB_ITERATE_START(&tab->fib, fit, fn)
1811 {
1812 if (max_feed <= 0)
1813 {
1814 FIB_ITERATE_PUT(fit, fn);
1815 ev_schedule(tab->rt_event);
1816 return;
1817 }
1818 max_feed -= rt_next_hop_update_net(tab, (net *) fn);
1819 }
1820 FIB_ITERATE_END(fn);
1821
1822 /* state change 2->0, 3->1 */
1823 tab->nhu_state &= 1;
1824
1825 if (tab->nhu_state > 0)
1826 ev_schedule(tab->rt_event);
1827 }
1828
1829
1830 struct rtable_config *
1831 rt_new_table(struct symbol *s)
1832 {
1833 /* Hack that allows to 'redefine' the master table */
1834 if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc))
1835 return s->def;
1836
1837 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1838
1839 cf_define_symbol(s, SYM_TABLE, c);
1840 c->name = s->name;
1841 add_tail(&new_config->tables, &c->n);
1842 c->gc_max_ops = 1000;
1843 c->gc_min_time = 5;
1844 return c;
1845 }
1846
1847 /**
1848 * rt_lock_table - lock a routing table
1849 * @r: routing table to be locked
1850 *
1851 * Lock a routing table, because it's in use by a protocol,
1852 * preventing it from being freed when it gets undefined in a new
1853 * configuration.
1854 */
1855 void
1856 rt_lock_table(rtable *r)
1857 {
1858 r->use_count++;
1859 }
1860
1861 /**
1862 * rt_unlock_table - unlock a routing table
1863 * @r: routing table to be unlocked
1864 *
1865 * Unlock a routing table formerly locked by rt_lock_table(),
1866 * that is decrease its use count and delete it if it's scheduled
1867 * for deletion by configuration changes.
1868 */
1869 void
1870 rt_unlock_table(rtable *r)
1871 {
1872 if (!--r->use_count && r->deleted)
1873 {
1874 struct config *conf = r->deleted;
1875 DBG("Deleting routing table %s\n", r->name);
1876 r->config->table = NULL;
1877 if (r->hostcache)
1878 rt_free_hostcache(r);
1879 rem_node(&r->n);
1880 fib_free(&r->fib);
1881 rfree(r->rt_event);
1882 mb_free(r);
1883 config_del_obstacle(conf);
1884 }
1885 }
1886
1887 /**
1888 * rt_commit - commit new routing table configuration
1889 * @new: new configuration
1890 * @old: original configuration or %NULL if it's boot time config
1891 *
1892 * Scan differences between @old and @new configuration and modify
1893 * the routing tables according to these changes. If @new defines a
1894 * previously unknown table, create it, if it omits a table existing
1895 * in @old, schedule it for deletion (it gets deleted when all protocols
1896 * disconnect from it by calling rt_unlock_table()), if it exists
1897 * in both configurations, leave it unchanged.
1898 */
1899 void
1900 rt_commit(struct config *new, struct config *old)
1901 {
1902 struct rtable_config *o, *r;
1903
1904 DBG("rt_commit:\n");
1905 if (old)
1906 {
1907 WALK_LIST(o, old->tables)
1908 {
1909 rtable *ot = o->table;
1910 if (!ot->deleted)
1911 {
1912 struct symbol *sym = cf_find_symbol(new, o->name);
1913 if (sym && sym->class == SYM_TABLE && !new->shutdown)
1914 {
1915 DBG("\t%s: same\n", o->name);
1916 r = sym->def;
1917 r->table = ot;
1918 ot->name = r->name;
1919 ot->config = r;
1920 if (o->sorted != r->sorted)
1921 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1922 }
1923 else
1924 {
1925 DBG("\t%s: deleted\n", o->name);
1926 ot->deleted = old;
1927 config_add_obstacle(old);
1928 rt_lock_table(ot);
1929 rt_unlock_table(ot);
1930 }
1931 }
1932 }
1933 }
1934
1935 WALK_LIST(r, new->tables)
1936 if (!r->table)
1937 {
1938 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1939 DBG("\t%s: created\n", r->name);
1940 rt_setup(rt_table_pool, t, r->name, r);
1941 add_tail(&routing_tables, &t->n);
1942 r->table = t;
1943 }
1944 DBG("\tdone\n");
1945 }
1946
1947 static inline void
1948 do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
1949 {
1950 rte_update_lock();
1951 if (type == RA_ACCEPTED)
1952 rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
1953 else if (type == RA_MERGED)
1954 rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
1955 else
1956 rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
1957 rte_update_unlock();
1958 }
1959
1960 /**
1961 * rt_feed_baby - advertise routes to a new protocol
1962 * @p: protocol to be fed
1963 *
1964 * This function performs one pass of advertisement of routes to a newly
1965 * initialized protocol. It's called by the protocol code as long as it
1966 * has something to do. (We avoid transferring all the routes in single
1967 * pass in order not to monopolize CPU time.)
1968 */
1969 int
1970 rt_feed_baby(struct proto *p)
1971 {
1972 struct announce_hook *h;
1973 struct fib_iterator *fit;
1974 int max_feed = 256;
1975
1976 if (!p->feed_ahook) /* Need to initialize first */
1977 {
1978 if (!p->ahooks)
1979 return 1;
1980 DBG("Announcing routes to new protocol %s\n", p->name);
1981 p->feed_ahook = p->ahooks;
1982 fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator));
1983 goto next_hook;
1984 }
1985 fit = p->feed_iterator;
1986
1987 again:
1988 h = p->feed_ahook;
1989 FIB_ITERATE_START(&h->table->fib, fit, fn)
1990 {
1991 net *n = (net *) fn;
1992 rte *e = n->routes;
1993 if (max_feed <= 0)
1994 {
1995 FIB_ITERATE_PUT(fit, fn);
1996 return 0;
1997 }
1998
1999 /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2000
2001 if ((p->accept_ra_types == RA_OPTIMAL) ||
2002 (p->accept_ra_types == RA_ACCEPTED) ||
2003 (p->accept_ra_types == RA_MERGED))
2004 if (rte_is_valid(e))
2005 {
2006 if (p->export_state != ES_FEEDING)
2007 return 1; /* In the meantime, the protocol fell down. */
2008
2009 do_feed_baby(p, p->accept_ra_types, h, n, e);
2010 max_feed--;
2011 }
2012
2013 if (p->accept_ra_types == RA_ANY)
2014 for(e = n->routes; e; e = e->next)
2015 {
2016 if (p->export_state != ES_FEEDING)
2017 return 1; /* In the meantime, the protocol fell down. */
2018
2019 if (!rte_is_valid(e))
2020 continue;
2021
2022 do_feed_baby(p, RA_ANY, h, n, e);
2023 max_feed--;
2024 }
2025 }
2026 FIB_ITERATE_END(fn);
2027 p->feed_ahook = h->next;
2028 if (!p->feed_ahook)
2029 {
2030 mb_free(p->feed_iterator);
2031 p->feed_iterator = NULL;
2032 return 1;
2033 }
2034
2035 next_hook:
2036 h = p->feed_ahook;
2037 FIB_ITERATE_INIT(fit, &h->table->fib);
2038 goto again;
2039 }
2040
2041 /**
2042 * rt_feed_baby_abort - abort protocol feeding
2043 * @p: protocol
2044 *
2045 * This function is called by the protocol code when the protocol
2046 * stops or ceases to exist before the last iteration of rt_feed_baby()
2047 * has finished.
2048 */
2049 void
2050 rt_feed_baby_abort(struct proto *p)
2051 {
2052 if (p->feed_ahook)
2053 {
2054 /* Unlink the iterator and exit */
2055 fit_get(&p->feed_ahook->table->fib, p->feed_iterator);
2056 p->feed_ahook = NULL;
2057 }
2058 }
2059
2060
2061 static inline unsigned
2062 ptr_hash(void *ptr)
2063 {
2064 uintptr_t p = (uintptr_t) ptr;
2065 return p ^ (p << 8) ^ (p >> 16);
2066 }
2067
2068 static inline unsigned
2069 hc_hash(ip_addr a, rtable *dep)
2070 {
2071 return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff;
2072 }
2073
2074 static inline void
2075 hc_insert(struct hostcache *hc, struct hostentry *he)
2076 {
2077 uint k = he->hash_key >> hc->hash_shift;
2078 he->next = hc->hash_table[k];
2079 hc->hash_table[k] = he;
2080 }
2081
2082 static inline void
2083 hc_remove(struct hostcache *hc, struct hostentry *he)
2084 {
2085 struct hostentry **hep;
2086 uint k = he->hash_key >> hc->hash_shift;
2087
2088 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2089 *hep = he->next;
2090 }
2091
2092 #define HC_DEF_ORDER 10
2093 #define HC_HI_MARK *4
2094 #define HC_HI_STEP 2
2095 #define HC_HI_ORDER 16 /* Must be at most 16 */
2096 #define HC_LO_MARK /5
2097 #define HC_LO_STEP 2
2098 #define HC_LO_ORDER 10
2099
2100 static void
2101 hc_alloc_table(struct hostcache *hc, unsigned order)
2102 {
2103 uint hsize = 1 << order;
2104 hc->hash_order = order;
2105 hc->hash_shift = 16 - order;
2106 hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2107 hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK);
2108
2109 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2110 }
2111
2112 static void
2113 hc_resize(struct hostcache *hc, unsigned new_order)
2114 {
2115 struct hostentry **old_table = hc->hash_table;
2116 struct hostentry *he, *hen;
2117 uint old_size = 1 << hc->hash_order;
2118 uint i;
2119
2120 hc_alloc_table(hc, new_order);
2121 for (i = 0; i < old_size; i++)
2122 for (he = old_table[i]; he != NULL; he=hen)
2123 {
2124 hen = he->next;
2125 hc_insert(hc, he);
2126 }
2127 mb_free(old_table);
2128 }
2129
2130 static struct hostentry *
2131 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2132 {
2133 struct hostentry *he = sl_alloc(hc->slab);
2134
2135 he->addr = a;
2136 he->link = ll;
2137 he->tab = dep;
2138 he->hash_key = k;
2139 he->uc = 0;
2140 he->src = NULL;
2141
2142 add_tail(&hc->hostentries, &he->ln);
2143 hc_insert(hc, he);
2144
2145 hc->hash_items++;
2146 if (hc->hash_items > hc->hash_max)
2147 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2148
2149 return he;
2150 }
2151
2152 static void
2153 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2154 {
2155 rta_free(he->src);
2156
2157 rem_node(&he->ln);
2158 hc_remove(hc, he);
2159 sl_free(hc->slab, he);
2160
2161 hc->hash_items--;
2162 if (hc->hash_items < hc->hash_min)
2163 hc_resize(hc, hc->hash_order - HC_LO_STEP);
2164 }
2165
2166 static void
2167 rt_init_hostcache(rtable *tab)
2168 {
2169 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2170 init_list(&hc->hostentries);
2171
2172 hc->hash_items = 0;
2173 hc_alloc_table(hc, HC_DEF_ORDER);
2174 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2175
2176 hc->lp = lp_new(rt_table_pool, 1008);
2177 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2178
2179 tab->hostcache = hc;
2180 }
2181
2182 static void
2183 rt_free_hostcache(rtable *tab)
2184 {
2185 struct hostcache *hc = tab->hostcache;
2186
2187 node *n;
2188 WALK_LIST(n, hc->hostentries)
2189 {
2190 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2191 rta_free(he->src);
2192
2193 if (he->uc)
2194 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2195 }
2196
2197 rfree(hc->slab);
2198 rfree(hc->lp);
2199 mb_free(hc->hash_table);
2200 mb_free(hc);
2201 }
2202
2203 static void
2204 rt_notify_hostcache(rtable *tab, net *net)
2205 {
2206 struct hostcache *hc = tab->hostcache;
2207
2208 if (tab->hcu_scheduled)
2209 return;
2210
2211 if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen))
2212 rt_schedule_hcu(tab);
2213 }
2214
2215 static int
2216 if_local_addr(ip_addr a, struct iface *i)
2217 {
2218 struct ifa *b;
2219
2220 WALK_LIST(b, i->addrs)
2221 if (ipa_equal(a, b->ip))
2222 return 1;
2223
2224 return 0;
2225 }
2226
2227 static u32
2228 rt_get_igp_metric(rte *rt)
2229 {
2230 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2231
2232 if (ea)
2233 return ea->u.data;
2234
2235 rta *a = rt->attrs;
2236
2237 #ifdef CONFIG_OSPF
2238 if ((a->source == RTS_OSPF) ||
2239 (a->source == RTS_OSPF_IA) ||
2240 (a->source == RTS_OSPF_EXT1))
2241 return rt->u.ospf.metric1;
2242 #endif
2243
2244 #ifdef CONFIG_RIP
2245 if (a->source == RTS_RIP)
2246 return rt->u.rip.metric;
2247 #endif
2248
2249 /* Device routes */
2250 if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2251 return 0;
2252
2253 return IGP_METRIC_UNKNOWN;
2254 }
2255
2256 static int
2257 rt_update_hostentry(rtable *tab, struct hostentry *he)
2258 {
2259 rta *old_src = he->src;
2260 int pxlen = 0;
2261
2262 /* Reset the hostentry */
2263 he->src = NULL;
2264 he->gw = IPA_NONE;
2265 he->dest = RTD_UNREACHABLE;
2266 he->igp_metric = 0;
2267
2268 net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
2269 if (n)
2270 {
2271 rte *e = n->routes;
2272 rta *a = e->attrs;
2273 pxlen = n->n.pxlen;
2274
2275 if (a->hostentry)
2276 {
2277 /* Recursive route should not depend on another recursive route */
2278 log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d",
2279 he->addr, n->n.prefix, pxlen);
2280 goto done;
2281 }
2282
2283 if (a->dest == RTD_DEVICE)
2284 {
2285 if (if_local_addr(he->addr, a->iface))
2286 {
2287 /* The host address is a local address, this is not valid */
2288 log(L_WARN "Next hop address %I is a local address of iface %s",
2289 he->addr, a->iface->name);
2290 goto done;
2291 }
2292
2293 /* The host is directly reachable, use link as a gateway */
2294 he->gw = he->link;
2295 he->dest = RTD_ROUTER;
2296 }
2297 else
2298 {
2299 /* The host is reachable through some route entry */
2300 he->gw = a->gw;
2301 he->dest = a->dest;
2302 }
2303
2304 he->src = rta_clone(a);
2305 he->igp_metric = rt_get_igp_metric(e);
2306 }
2307
2308 done:
2309 /* Add a prefix range to the trie */
2310 trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
2311
2312 rta_free(old_src);
2313 return old_src != he->src;
2314 }
2315
2316 static void
2317 rt_update_hostcache(rtable *tab)
2318 {
2319 struct hostcache *hc = tab->hostcache;
2320 struct hostentry *he;
2321 node *n, *x;
2322
2323 /* Reset the trie */
2324 lp_flush(hc->lp);
2325 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2326
2327 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2328 {
2329 he = SKIP_BACK(struct hostentry, ln, n);
2330 if (!he->uc)
2331 {
2332 hc_delete_hostentry(hc, he);
2333 continue;
2334 }
2335
2336 if (rt_update_hostentry(tab, he))
2337 rt_schedule_nhu(he->tab);
2338 }
2339
2340 tab->hcu_scheduled = 0;
2341 }
2342
2343 static struct hostentry *
2344 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2345 {
2346 struct hostentry *he;
2347
2348 if (!tab->hostcache)
2349 rt_init_hostcache(tab);
2350
2351 uint k = hc_hash(a, dep);
2352 struct hostcache *hc = tab->hostcache;
2353 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2354 if (ipa_equal(he->addr, a) && (he->tab == dep))
2355 return he;
2356
2357 he = hc_new_hostentry(hc, a, ll, dep, k);
2358 rt_update_hostentry(tab, he);
2359 return he;
2360 }
2361
2362 void
2363 rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2364 {
2365 rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2366 }
2367
2368
2369 /*
2370 * CLI commands
2371 */
2372
2373 static byte *
2374 rt_format_via(rte *e)
2375 {
2376 rta *a = e->attrs;
2377
2378 /* Max text length w/o IP addr and interface name is 16 */
2379 static byte via[STD_ADDRESS_P_LENGTH+sizeof(a->iface->name)+16];
2380
2381 switch (a->dest)
2382 {
2383 case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2384 case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break;
2385 case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
2386 case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
2387 case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
2388 case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
2389 default: bsprintf(via, "???");
2390 }
2391 return via;
2392 }
2393
2394 static void
2395 rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2396 {
2397 byte from[STD_ADDRESS_P_LENGTH+8];
2398 byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2399 rta *a = e->attrs;
2400 int primary = (e->net->routes == e);
2401 int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2402 void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2403 struct mpnh *nh;
2404
2405 tm_format_datetime(tm, &config->tf_route, e->lastmod);
2406 if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2407 bsprintf(from, " from %I", a->from);
2408 else
2409 from[0] = 0;
2410
2411 get_route_info = a->src->proto->proto->get_route_info;
2412 if (get_route_info || d->verbose)
2413 {
2414 /* Need to normalize the extended attributes */
2415 ea_list *t = tmpa;
2416 t = ea_append(t, a->eattrs);
2417 tmpa = alloca(ea_scan(t));
2418 ea_merge(t, tmpa);
2419 ea_sort(tmpa);
2420 }
2421 if (get_route_info)
2422 get_route_info(e, info, tmpa);
2423 else
2424 bsprintf(info, " (%d)", e->pref);
2425 cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name,
2426 tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2427 for (nh = a->nexthops; nh; nh = nh->next)
2428 cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2429 if (d->verbose)
2430 rta_show(c, a, tmpa);
2431 }
2432
2433 static void
2434 rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2435 {
2436 rte *e, *ee;
2437 byte ia[STD_ADDRESS_P_LENGTH+8];
2438 struct ea_list *tmpa;
2439 struct announce_hook *a = NULL;
2440 int first = 1;
2441 int pass = 0;
2442
2443 bsprintf(ia, "%I/%d", n->n.prefix, n->n.pxlen);
2444
2445 if (d->export_mode)
2446 {
2447 if (! d->export_protocol->rt_notify)
2448 return;
2449
2450 a = proto_find_announce_hook(d->export_protocol, d->table);
2451 if (!a)
2452 return;
2453 }
2454
2455 for (e = n->routes; e; e = e->next)
2456 {
2457 if (rte_is_filtered(e) != d->filtered)
2458 continue;
2459
2460 d->rt_counter++;
2461 d->net_counter += first;
2462 first = 0;
2463
2464 if (pass)
2465 continue;
2466
2467 ee = e;
2468 rte_update_lock(); /* We use the update buffer for filtering */
2469 tmpa = rte_make_tmp_attrs(e, rte_update_pool);
2470
2471 /* Special case for merged export */
2472 if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
2473 {
2474 rte *rt_free;
2475 e = rt_export_merged(a, n, &rt_free, &tmpa, rte_update_pool, 1);
2476 pass = 1;
2477
2478 if (!e)
2479 { e = ee; goto skip; }
2480 }
2481 else if (d->export_mode)
2482 {
2483 struct proto *ep = d->export_protocol;
2484 int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2485
2486 if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
2487 pass = 1;
2488
2489 if (ic < 0)
2490 goto skip;
2491
2492 if (d->export_mode > RSEM_PREEXPORT)
2493 {
2494 /*
2495 * FIXME - This shows what should be exported according to current
2496 * filters, but not what was really exported. 'configure soft'
2497 * command may change the export filter and do not update routes.
2498 */
2499 int do_export = (ic > 0) ||
2500 (f_run(a->out_filter, &e, &tmpa, rte_update_pool,
2501 FF_FORCE_TMPATTR | FF_SILENT) <= F_ACCEPT);
2502
2503 if (do_export != (d->export_mode == RSEM_EXPORT))
2504 goto skip;
2505
2506 if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED))
2507 pass = 1;
2508 }
2509 }
2510
2511 if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2512 goto skip;
2513
2514 if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2515 goto skip;
2516
2517 d->show_counter++;
2518 if (d->stats < 2)
2519 rt_show_rte(c, ia, e, d, tmpa);
2520 ia[0] = 0;
2521
2522 skip:
2523 if (e != ee)
2524 {
2525 rte_free(e);
2526 e = ee;
2527 }
2528 rte_update_unlock();
2529
2530 if (d->primary_only)
2531 break;
2532 }
2533 }
2534
2535 static void
2536 rt_show_cont(struct cli *c)
2537 {
2538 struct rt_show_data *d = c->rover;
2539 #ifdef DEBUGGING
2540 unsigned max = 4;
2541 #else
2542 unsigned max = 64;
2543 #endif
2544 struct fib *fib = &d->table->fib;
2545 struct fib_iterator *it = &d->fit;
2546
2547 FIB_ITERATE_START(fib, it, f)
2548 {
2549 net *n = (net *) f;
2550 if (d->running_on_config && d->running_on_config != config)
2551 {
2552 cli_printf(c, 8004, "Stopped due to reconfiguration");
2553 goto done;
2554 }
2555 if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
2556 {
2557 cli_printf(c, 8005, "Protocol is down");
2558 goto done;
2559 }
2560 if (!max--)
2561 {
2562 FIB_ITERATE_PUT(it, f);
2563 return;
2564 }
2565 rt_show_net(c, n, d);
2566 }
2567 FIB_ITERATE_END(f);
2568 if (d->stats)
2569 cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2570 else
2571 cli_printf(c, 0, "");
2572 done:
2573 c->cont = c->cleanup = NULL;
2574 }
2575
2576 static void
2577 rt_show_cleanup(struct cli *c)
2578 {
2579 struct rt_show_data *d = c->rover;
2580
2581 /* Unlink the iterator */
2582 fit_get(&d->table->fib, &d->fit);
2583 }
2584
2585 void
2586 rt_show(struct rt_show_data *d)
2587 {
2588 net *n;
2589
2590 /* Default is either a master table or a table related to a respective protocol */
2591 if (!d->table && d->export_protocol) d->table = d->export_protocol->table;
2592 if (!d->table && d->show_protocol) d->table = d->show_protocol->table;
2593 if (!d->table) d->table = config->master_rtc->table;
2594
2595 /* Filtered routes are neither exported nor have sensible ordering */
2596 if (d->filtered && (d->export_mode || d->primary_only))
2597 cli_msg(0, "");
2598
2599 if (d->pxlen == 256)
2600 {
2601 FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2602 this_cli->cont = rt_show_cont;
2603 this_cli->cleanup = rt_show_cleanup;
2604 this_cli->rover = d;
2605 }
2606 else
2607 {
2608 if (d->show_for)
2609 n = net_route(d->table, d->prefix, d->pxlen);
2610 else
2611 n = net_find(d->table, d->prefix, d->pxlen);
2612
2613 if (n)
2614 rt_show_net(this_cli, n, d);
2615
2616 if (d->rt_counter)
2617 cli_msg(0, "");
2618 else
2619 cli_msg(8001, "Network not in table");
2620 }
2621 }
2622
2623 /*
2624 * Documentation for functions declared inline in route.h
2625 */
2626 #if 0
2627
2628 /**
2629 * net_find - find a network entry
2630 * @tab: a routing table
2631 * @addr: address of the network
2632 * @len: length of the network prefix
2633 *
2634 * net_find() looks up the given network in routing table @tab and
2635 * returns a pointer to its &net entry or %NULL if no such network
2636 * exists.
2637 */
2638 static inline net *net_find(rtable *tab, ip_addr addr, unsigned len)
2639 { DUMMY; }
2640
2641 /**
2642 * net_get - obtain a network entry
2643 * @tab: a routing table
2644 * @addr: address of the network
2645 * @len: length of the network prefix
2646 *
2647 * net_get() looks up the given network in routing table @tab and
2648 * returns a pointer to its &net entry. If no such entry exists, it's
2649 * created.
2650 */
2651 static inline net *net_get(rtable *tab, ip_addr addr, unsigned len)
2652 { DUMMY; }
2653
2654 /**
2655 * rte_cow - copy a route for writing
2656 * @r: a route entry to be copied
2657 *
2658 * rte_cow() takes a &rte and prepares it for modification. The exact action
2659 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2660 * just returned unchanged, else a new temporary entry with the same contents
2661 * is created.
2662 *
2663 * The primary use of this function is inside the filter machinery -- when
2664 * a filter wants to modify &rte contents (to change the preference or to
2665 * attach another set of attributes), it must ensure that the &rte is not
2666 * shared with anyone else (and especially that it isn't stored in any routing
2667 * table).
2668 *
2669 * Result: a pointer to the new writable &rte.
2670 */
2671 static inline rte * rte_cow(rte *r)
2672 { DUMMY; }
2673
2674 #endif