]> git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
Implements router advertisements activated by received routes.
[thirdparty/bird.git] / nest / rt-table.c
1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/cli.h"
37 #include "nest/iface.h"
38 #include "lib/resource.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
45
46 pool *rt_table_pool;
47
48 static slab *rte_slab;
49 static linpool *rte_update_pool;
50
51 static list routing_tables;
52
53 static void rt_format_via(rte *e, byte *via);
54 static void rt_free_hostcache(rtable *tab);
55 static void rt_notify_hostcache(rtable *tab, net *net);
56 static void rt_update_hostcache(rtable *tab);
57 static void rt_next_hop_update(rtable *tab);
58
59 static inline void rt_schedule_gc(rtable *tab);
60
61 /* Like fib_route(), but skips empty net entries */
62 static net *
63 net_route(rtable *tab, ip_addr a, int len)
64 {
65 ip_addr a0;
66 net *n;
67
68 while (len >= 0)
69 {
70 a0 = ipa_and(a, ipa_mkmask(len));
71 n = fib_find(&tab->fib, &a0, len);
72 if (n && rte_is_valid(n->routes))
73 return n;
74 len--;
75 }
76 return NULL;
77 }
78
79 static void
80 rte_init(struct fib_node *N)
81 {
82 net *n = (net *) N;
83
84 N->flags = 0;
85 n->routes = NULL;
86 }
87
88 /**
89 * rte_find - find a route
90 * @net: network node
91 * @p: protocol
92 *
93 * The rte_find() function returns a route for destination @net
94 * which belongs has been defined by protocol @p.
95 */
96 rte *
97 rte_find(net *net, struct proto *p)
98 {
99 rte *e = net->routes;
100
101 while (e && e->attrs->proto != p)
102 e = e->next;
103 return e;
104 }
105
106 /**
107 * rte_get_temp - get a temporary &rte
108 * @a: attributes to assign to the new route (a &rta; in case it's
109 * un-cached, rte_update() will create a cached copy automatically)
110 *
111 * Create a temporary &rte and bind it with the attributes @a.
112 * Also set route preference to the default preference set for
113 * the protocol.
114 */
115 rte *
116 rte_get_temp(rta *a)
117 {
118 rte *e = sl_alloc(rte_slab);
119
120 e->attrs = a;
121 e->flags = 0;
122 e->pref = a->proto->preference;
123 return e;
124 }
125
126 rte *
127 rte_do_cow(rte *r)
128 {
129 rte *e = sl_alloc(rte_slab);
130
131 memcpy(e, r, sizeof(rte));
132 e->attrs = rta_clone(r->attrs);
133 e->flags = 0;
134 return e;
135 }
136
137 static int /* Actually better or at least as good as */
138 rte_better(rte *new, rte *old)
139 {
140 int (*better)(rte *, rte *);
141
142 if (!rte_is_valid(old))
143 return 1;
144 if (!rte_is_valid(new))
145 return 0;
146
147 if (new->pref > old->pref)
148 return 1;
149 if (new->pref < old->pref)
150 return 0;
151 if (new->attrs->proto->proto != old->attrs->proto->proto)
152 {
153 /*
154 * If the user has configured protocol preferences, so that two different protocols
155 * have the same preference, try to break the tie by comparing addresses. Not too
156 * useful, but keeps the ordering of routes unambiguous.
157 */
158 return new->attrs->proto->proto > old->attrs->proto->proto;
159 }
160 if (better = new->attrs->proto->rte_better)
161 return better(new, old);
162 return 0;
163 }
164
165 static void
166 rte_trace(struct proto *p, rte *e, int dir, char *msg)
167 {
168 byte via[STD_ADDRESS_P_LENGTH+32];
169
170 rt_format_via(e, via);
171 log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, via);
172 }
173
174 static inline void
175 rte_trace_in(unsigned int flag, struct proto *p, rte *e, char *msg)
176 {
177 if (p->debug & flag)
178 rte_trace(p, e, '>', msg);
179 }
180
181 static inline void
182 rte_trace_out(unsigned int flag, struct proto *p, rte *e, char *msg)
183 {
184 if (p->debug & flag)
185 rte_trace(p, e, '<', msg);
186 }
187
188 static rte *
189 export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
190 {
191 struct proto *p = ah->proto;
192 struct filter *filter = ah->out_filter;
193 struct proto_stats *stats = ah->stats;
194 ea_list *tmpb = NULL;
195 rte *rt;
196 int v;
197
198 rt = rt0;
199 *rt_free = NULL;
200
201 /* If called does not care for eattrs, we prepare one internally */
202 if (!tmpa)
203 {
204 struct proto *src = rt->attrs->proto;
205 tmpb = src->make_tmp_attrs ? src->make_tmp_attrs(rt, rte_update_pool) : NULL;
206 tmpa = &tmpb;
207 }
208
209 v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
210 if (v < 0)
211 {
212 if (silent)
213 goto reject;
214
215 stats->exp_updates_rejected++;
216 if (v == RIC_REJECT)
217 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
218 goto reject;
219 }
220 if (v > 0)
221 {
222 if (!silent)
223 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
224 goto accept;
225 }
226
227 v = filter && ((filter == FILTER_REJECT) ||
228 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
229 if (v)
230 {
231 if (silent)
232 goto reject;
233
234 stats->exp_updates_filtered++;
235 rte_trace_out(D_FILTERS, p, rt, "filtered out");
236 goto reject;
237 }
238
239 accept:
240 if (rt != rt0)
241 *rt_free = rt;
242 return rt;
243
244 reject:
245 /* Discard temporary rte */
246 if (rt != rt0)
247 rte_free(rt);
248 return NULL;
249 }
250
251 static void
252 do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
253 {
254 struct proto *p = ah->proto;
255 struct proto_stats *stats = ah->stats;
256
257
258 /*
259 * First, apply export limit.
260 *
261 * Export route limits has several problems. Because exp_routes
262 * counter is reset before refeed, we don't really know whether
263 * limit is breached and whether the update is new or not. Therefore
264 * the number of really exported routes may exceed the limit
265 * temporarily (routes exported before and new routes in refeed).
266 *
267 * Minor advantage is that if the limit is decreased and refeed is
268 * requested, the number of exported routes really decrease.
269 *
270 * Second problem is that with export limits, we don't know whether
271 * old was really exported (it might be blocked by limit). When a
272 * withdraw is exported, we announce it even when the previous
273 * update was blocked. This is not a big issue, but the same problem
274 * is in updating exp_routes counter. Therefore, to be consistent in
275 * increases and decreases of exp_routes, we count exported routes
276 * regardless of blocking by limits.
277 *
278 * Similar problem is in handling updates - when a new route is
279 * received and blocking is active, the route would be blocked, but
280 * when an update for the route will be received later, the update
281 * would be propagated (as old != NULL). Therefore, we have to block
282 * also non-new updates (contrary to import blocking).
283 */
284
285 struct proto_limit *l = ah->out_limit;
286 if (l && new)
287 {
288 if ((!old || refeed) && (stats->exp_routes >= l->limit))
289 proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
290
291 if (l->state == PLS_BLOCKED)
292 {
293 stats->exp_routes++; /* see note above */
294 stats->exp_updates_rejected++;
295 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
296 new = NULL;
297
298 if (!old)
299 return;
300 }
301 }
302
303
304 if (new)
305 stats->exp_updates_accepted++;
306 else
307 stats->exp_withdraws_accepted++;
308
309 /* Hack: We do not decrease exp_routes during refeed, we instead
310 reset exp_routes at the start of refeed. */
311 if (new)
312 stats->exp_routes++;
313 if (old && !refeed)
314 stats->exp_routes--;
315
316 if (p->debug & D_ROUTES)
317 {
318 if (new && old)
319 rte_trace_out(D_ROUTES, p, new, "replaced");
320 else if (new)
321 rte_trace_out(D_ROUTES, p, new, "added");
322 else if (old)
323 rte_trace_out(D_ROUTES, p, old, "removed");
324 }
325 if (!new)
326 p->rt_notify(p, ah->table, net, NULL, old, NULL);
327 else if (tmpa)
328 {
329 ea_list *t = tmpa;
330 while (t->next)
331 t = t->next;
332 t->next = new->attrs->eattrs;
333 p->rt_notify(p, ah->table, net, new, old, tmpa);
334 t->next = NULL;
335 }
336 else
337 p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
338 }
339
340 static void
341 rt_notify_basic(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
342 {
343 // struct proto *p = ah->proto;
344 struct proto_stats *stats = ah->stats;
345
346 rte *new_free = NULL;
347 rte *old_free = NULL;
348
349 if (new)
350 stats->exp_updates_received++;
351 else
352 stats->exp_withdraws_received++;
353
354 /*
355 * This is a tricky part - we don't know whether route 'old' was
356 * exported to protocol 'p' or was filtered by the export filter.
357 * We try to run the export filter to know this to have a correct
358 * value in 'old' argument of rte_update (and proper filter value)
359 *
360 * FIXME - this is broken because 'configure soft' may change
361 * filters but keep routes. Refeed is expected to be called after
362 * change of the filters and with old == new, therefore we do not
363 * even try to run the filter on an old route, This may lead to
364 * 'spurious withdraws' but ensure that there are no 'missing
365 * withdraws'.
366 *
367 * This is not completely safe as there is a window between
368 * reconfiguration and the end of refeed - if a newly filtered
369 * route disappears during this period, proper withdraw is not
370 * sent (because old would be also filtered) and the route is
371 * not refeeded (because it disappeared before that).
372 */
373
374 if (new)
375 new = export_filter(ah, new, &new_free, &tmpa, 0);
376
377 if (old && !refeed)
378 old = export_filter(ah, old, &old_free, NULL, 1);
379
380 /* FIXME - This is broken because of incorrect 'old' value (see above) */
381 if (!new && !old)
382 return;
383
384 do_rt_notify(ah, net, new, old, tmpa, refeed);
385
386 /* Discard temporary rte's */
387 if (new_free)
388 rte_free(new_free);
389 if (old_free)
390 rte_free(old_free);
391 }
392
393 static void
394 rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old,
395 ea_list *tmpa, int feed)
396 {
397 // struct proto *p = ah->proto;
398 struct proto_stats *stats = ah->stats;
399
400 rte *new_best = NULL;
401 rte *old_best = NULL;
402 rte *new_free = NULL;
403 rte *old_free = NULL;
404 rte *r;
405
406 /* Used to track whether we met old_changed position. If before_old is NULL
407 old_changed was the first and we met it implicitly before current best route. */
408 int old_meet = old_changed && !before_old;
409
410 /* Note that before_old is either NULL or valid (not rejected) route.
411 If old_changed is valid, before_old have to be too. If old changed route
412 was not valid, caller must use NULL for both old_changed and before_old. */
413
414 if (new_changed)
415 stats->exp_updates_received++;
416 else
417 stats->exp_withdraws_received++;
418
419 /* First, find the new_best route - first accepted by filters */
420 for (r=net->routes; rte_is_valid(r); r=r->next)
421 {
422 if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
423 break;
424
425 /* Note if we walked around the position of old_changed route */
426 if (r == before_old)
427 old_meet = 1;
428 }
429
430 /*
431 * Second, handle the feed case. That means we do not care for
432 * old_best. It is NULL for feed, and the new_best for refeed.
433 * For refeed, there is a hack similar to one in rt_notify_basic()
434 * to ensure withdraws in case of changed filters
435 */
436 if (feed)
437 {
438 if (feed == 2) /* refeed */
439 old_best = new_best ? new_best :
440 (rte_is_valid(net->routes) ? net->routes : NULL);
441 else
442 old_best = NULL;
443
444 if (!new_best && !old_best)
445 return;
446
447 goto found;
448 }
449
450 /*
451 * Now, we find the old_best route. Generally, it is the same as the
452 * new_best, unless new_best is the same as new_changed or
453 * old_changed is accepted before new_best.
454 *
455 * There are four cases:
456 *
457 * - We would find and accept old_changed before new_best, therefore
458 * old_changed is old_best. In remaining cases we suppose this
459 * is not true.
460 *
461 * - We found no new_best, therefore there is also no old_best and
462 * we ignore this withdraw.
463 *
464 * - We found new_best different than new_changed, therefore
465 * old_best is the same as new_best and we ignore this update.
466 *
467 * - We found new_best the same as new_changed, therefore it cannot
468 * be old_best and we have to continue search for old_best.
469 */
470
471 /* First case */
472 if (old_meet)
473 if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
474 goto found;
475
476 /* Second case */
477 if (!new_best)
478 return;
479
480 /* Third case, we use r instead of new_best, because export_filter() could change it */
481 if (r != new_changed)
482 {
483 if (new_free)
484 rte_free(new_free);
485 return;
486 }
487
488 /* Fourth case */
489 for (r=r->next; rte_is_valid(r); r=r->next)
490 {
491 if (old_best = export_filter(ah, r, &old_free, NULL, 1))
492 goto found;
493
494 if (r == before_old)
495 if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
496 goto found;
497 }
498
499 /* Implicitly, old_best is NULL and new_best is non-NULL */
500
501 found:
502 do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));
503
504 /* Discard temporary rte's */
505 if (new_free)
506 rte_free(new_free);
507 if (old_free)
508 rte_free(old_free);
509 }
510
511 /**
512 * rte_announce - announce a routing table change
513 * @tab: table the route has been added to
514 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
515 * @net: network in question
516 * @new: the new route to be announced
517 * @old: the previous route for the same network
518 * @tmpa: a list of temporary attributes belonging to the new route
519 *
520 * This function gets a routing table update and announces it
521 * to all protocols that acccepts given type of route announcement
522 * and are connected to the same table by their announcement hooks.
523 *
524 * Route announcement of type RA_OPTIMAL si generated when optimal
525 * route (in routing table @tab) changes. In that case @old stores the
526 * old optimal route.
527 *
528 * Route announcement of type RA_ANY si generated when any route (in
529 * routing table @tab) changes In that case @old stores the old route
530 * from the same protocol.
531 *
532 * For each appropriate protocol, we first call its import_control()
533 * hook which performs basic checks on the route (each protocol has a
534 * right to veto or force accept of the route before any filter is
535 * asked) and adds default values of attributes specific to the new
536 * protocol (metrics, tags etc.). Then it consults the protocol's
537 * export filter and if it accepts the route, the rt_notify() hook of
538 * the protocol gets called.
539 */
540 static void
541 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old, ea_list *tmpa)
542 {
543 if (!rte_is_valid(old))
544 old = before_old = NULL;
545
546 if (!rte_is_valid(new))
547 new = NULL;
548
549 if (!old && !new)
550 return;
551
552 if (type == RA_OPTIMAL)
553 {
554 if (new)
555 new->attrs->proto->stats.pref_routes++;
556 if (old)
557 old->attrs->proto->stats.pref_routes--;
558
559 if (tab->hostcache)
560 rt_notify_hostcache(tab, net);
561 }
562
563 struct announce_hook *a;
564 WALK_LIST(a, tab->hooks)
565 {
566 ASSERT(a->proto->core_state == FS_HAPPY || a->proto->core_state == FS_FEEDING);
567 if (a->proto->accept_ra_types == type)
568 if (type == RA_ACCEPTED)
569 rt_notify_accepted(a, net, new, old, before_old, tmpa, 0);
570 else
571 rt_notify_basic(a, net, new, old, tmpa, 0);
572 }
573 }
574
575 static inline int
576 rte_validate(rte *e)
577 {
578 int c;
579 net *n = e->net;
580
581 if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
582 {
583 log(L_WARN "Ignoring bogus prefix %I/%d received via %s",
584 n->n.prefix, n->n.pxlen, e->sender->proto->name);
585 return 0;
586 }
587
588 c = ipa_classify_net(n->n.prefix);
589 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
590 {
591 log(L_WARN "Ignoring bogus route %I/%d received via %s",
592 n->n.prefix, n->n.pxlen, e->sender->proto->name);
593 return 0;
594 }
595
596 return 1;
597 }
598
599 /**
600 * rte_free - delete a &rte
601 * @e: &rte to be deleted
602 *
603 * rte_free() deletes the given &rte from the routing table it's linked to.
604 */
605 void
606 rte_free(rte *e)
607 {
608 if (e->attrs->aflags & RTAF_CACHED)
609 rta_free(e->attrs);
610 sl_free(rte_slab, e);
611 }
612
613 static inline void
614 rte_free_quick(rte *e)
615 {
616 rta_free(e->attrs);
617 sl_free(rte_slab, e);
618 }
619
620 static int
621 rte_same(rte *x, rte *y)
622 {
623 return
624 x->attrs == y->attrs &&
625 x->flags == y->flags &&
626 x->pflags == y->pflags &&
627 x->pref == y->pref &&
628 (!x->attrs->proto->rte_same || x->attrs->proto->rte_same(x, y));
629 }
630
631 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
632
633 static void
634 rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, struct proto *src)
635 {
636 struct proto *p = ah->proto;
637 struct rtable *table = ah->table;
638 struct proto_stats *stats = ah->stats;
639 rte *before_old = NULL;
640 rte *old_best = net->routes;
641 rte *old = NULL;
642 rte **k;
643
644 k = &net->routes; /* Find and remove original route from the same protocol */
645 while (old = *k)
646 {
647 if (old->attrs->proto == src)
648 {
649 /* If there is the same route in the routing table but from
650 * a different sender, then there are two paths from the
651 * source protocol to this routing table through transparent
652 * pipes, which is not allowed.
653 *
654 * We log that and ignore the route. If it is withdraw, we
655 * ignore it completely (there might be 'spurious withdraws',
656 * see FIXME in do_rte_announce())
657 */
658 if (old->sender->proto != p)
659 {
660 if (new)
661 {
662 log(L_ERR "Pipe collision detected when sending %I/%d to table %s",
663 net->n.prefix, net->n.pxlen, table->name);
664 rte_free_quick(new);
665 }
666 return;
667 }
668
669 if (new && rte_same(old, new))
670 {
671 /* No changes, ignore the new route */
672
673 if (!rte_is_filtered(new))
674 {
675 stats->imp_updates_ignored++;
676 rte_trace_in(D_ROUTES, p, new, "ignored");
677 }
678
679 rte_free_quick(new);
680 #ifdef CONFIG_RIP
681 /* lastmod is used internally by RIP as the last time
682 when the route was received. */
683 if (src->proto == &proto_rip)
684 old->lastmod = now;
685 #endif
686 return;
687 }
688 *k = old->next;
689 break;
690 }
691 k = &old->next;
692 before_old = old;
693 }
694
695 if (!old)
696 before_old = NULL;
697
698 if (!old && !new)
699 {
700 stats->imp_withdraws_ignored++;
701 return;
702 }
703
704 int new_ok = rte_is_ok(new);
705 int old_ok = rte_is_ok(old);
706
707 struct proto_limit *l = ah->rx_limit;
708 if (l && !old && new)
709 {
710 u32 all_routes = stats->imp_routes + stats->filt_routes;
711
712 if (all_routes >= l->limit)
713 proto_notify_limit(ah, l, PLD_RX, all_routes);
714
715 if (l->state == PLS_BLOCKED)
716 {
717 /* In receive limit the situation is simple, old is NULL so
718 we just free new and exit like nothing happened */
719
720 stats->imp_updates_ignored++;
721 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
722 rte_free_quick(new);
723 return;
724 }
725 }
726
727 l = ah->in_limit;
728 if (l && !old_ok && new_ok)
729 {
730 if (stats->imp_routes >= l->limit)
731 proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
732
733 if (l->state == PLS_BLOCKED)
734 {
735 /* In import limit the situation is more complicated. We
736 shouldn't just drop the route, we should handle it like
737 it was filtered. We also have to continue the route
738 processing if old or new is non-NULL, but we should exit
739 if both are NULL as this case is probably assumed to be
740 already handled. */
741
742 stats->imp_updates_ignored++;
743 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
744
745 if (ah->in_keep_filtered)
746 new->flags |= REF_FILTERED;
747 else
748 { rte_free_quick(new); new = NULL; }
749
750 /* Note that old && !new could be possible when
751 ah->in_keep_filtered changed in the recent past. */
752
753 if (!old && !new)
754 return;
755
756 new_ok = 0;
757 goto skip_stats1;
758 }
759 }
760
761 if (new_ok)
762 stats->imp_updates_accepted++;
763 else if (old_ok)
764 stats->imp_withdraws_accepted++;
765 else
766 stats->imp_withdraws_ignored++;
767
768 skip_stats1:
769
770 if (new)
771 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
772 if (old)
773 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
774
775 if (table->config->sorted)
776 {
777 /* If routes are sorted, just insert new route to appropriate position */
778 if (new)
779 {
780 if (before_old && !rte_better(new, before_old))
781 k = &before_old->next;
782 else
783 k = &net->routes;
784
785 for (; *k; k=&(*k)->next)
786 if (rte_better(new, *k))
787 break;
788
789 new->next = *k;
790 *k = new;
791 }
792 }
793 else
794 {
795 /* If routes are not sorted, find the best route and move it on
796 the first position. There are several optimized cases. */
797
798 if (src->rte_recalculate && src->rte_recalculate(table, net, new, old, old_best))
799 goto do_recalculate;
800
801 if (new && rte_better(new, old_best))
802 {
803 /* The first case - the new route is cleary optimal,
804 we link it at the first position */
805
806 new->next = net->routes;
807 net->routes = new;
808 }
809 else if (old == old_best)
810 {
811 /* The second case - the old best route disappeared, we add the
812 new route (if we have any) to the list (we don't care about
813 position) and then we elect the new optimal route and relink
814 that route at the first position and announce it. New optimal
815 route might be NULL if there is no more routes */
816
817 do_recalculate:
818 /* Add the new route to the list */
819 if (new)
820 {
821 new->next = net->routes;
822 net->routes = new;
823 }
824
825 /* Find a new optimal route (if there is any) */
826 if (net->routes)
827 {
828 rte **bp = &net->routes;
829 for (k=&(*bp)->next; *k; k=&(*k)->next)
830 if (rte_better(*k, *bp))
831 bp = k;
832
833 /* And relink it */
834 rte *best = *bp;
835 *bp = best->next;
836 best->next = net->routes;
837 net->routes = best;
838 }
839 }
840 else if (new)
841 {
842 /* The third case - the new route is not better than the old
843 best route (therefore old_best != NULL) and the old best
844 route was not removed (therefore old_best == net->routes).
845 We just link the new route after the old best route. */
846
847 ASSERT(net->routes != NULL);
848 new->next = net->routes->next;
849 net->routes->next = new;
850 }
851 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
852 }
853
854 if (new)
855 new->lastmod = now;
856
857 /* Log the route change */
858 if (p->debug & D_ROUTES)
859 {
860 if (new_ok)
861 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
862 else if (old_ok)
863 {
864 if (old != old_best)
865 rte_trace(p, old, '>', "removed");
866 else if (rte_is_ok(net->routes))
867 rte_trace(p, old, '>', "removed [replaced]");
868 else
869 rte_trace(p, old, '>', "removed [sole]");
870 }
871 }
872
873 /* Propagate the route change */
874 rte_announce(table, RA_ANY, net, new, old, NULL, tmpa);
875 if (net->routes != old_best)
876 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, tmpa);
877 if (table->config->sorted)
878 rte_announce(table, RA_ACCEPTED, net, new, old, before_old, tmpa);
879
880 if (!net->routes &&
881 (table->gc_counter++ >= table->config->gc_max_ops) &&
882 (table->gc_time + table->config->gc_min_time <= now))
883 rt_schedule_gc(table);
884
885 if (old_ok && p->rte_remove)
886 p->rte_remove(net, old);
887 if (new_ok && p->rte_insert)
888 p->rte_insert(net, new);
889
890 if (old)
891 rte_free_quick(old);
892 }
893
894 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
895
896 static inline void
897 rte_update_lock(void)
898 {
899 rte_update_nest_cnt++;
900 }
901
902 static inline void
903 rte_update_unlock(void)
904 {
905 if (!--rte_update_nest_cnt)
906 lp_flush(rte_update_pool);
907 }
908
909 /**
910 * rte_update - enter a new update to a routing table
911 * @table: table to be updated
912 * @ah: pointer to table announce hook
913 * @net: network node
914 * @p: protocol submitting the update
915 * @src: protocol originating the update
916 * @new: a &rte representing the new route or %NULL for route removal.
917 *
918 * This function is called by the routing protocols whenever they discover
919 * a new route or wish to update/remove an existing route. The right announcement
920 * sequence is to build route attributes first (either un-cached with @aflags set
921 * to zero or a cached one using rta_lookup(); in this case please note that
922 * you need to increase the use count of the attributes yourself by calling
923 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
924 * the appropriate data and finally submit the new &rte by calling rte_update().
925 *
926 * @src specifies the protocol that originally created the route and the meaning
927 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
928 * same value as @new->attrs->proto. @p specifies the protocol that called
929 * rte_update(). In most cases it is the same protocol as @src. rte_update()
930 * stores @p in @new->sender;
931 *
932 * When rte_update() gets any route, it automatically validates it (checks,
933 * whether the network and next hop address are valid IP addresses and also
934 * whether a normal routing protocol doesn't try to smuggle a host or link
935 * scope route to the table), converts all protocol dependent attributes stored
936 * in the &rte to temporary extended attributes, consults import filters of the
937 * protocol to see if the route should be accepted and/or its attributes modified,
938 * stores the temporary attributes back to the &rte.
939 *
940 * Now, having a "public" version of the route, we
941 * automatically find any old route defined by the protocol @src
942 * for network @n, replace it by the new one (or removing it if @new is %NULL),
943 * recalculate the optimal route for this destination and finally broadcast
944 * the change (if any) to all routing protocols by calling rte_announce().
945 *
946 * All memory used for attribute lists and other temporary allocations is taken
947 * from a special linear pool @rte_update_pool and freed when rte_update()
948 * finishes.
949 */
950
951 void
952 rte_update2(struct announce_hook *ah, net *net, rte *new, struct proto *src)
953 {
954 struct proto *p = ah->proto;
955 struct proto_stats *stats = ah->stats;
956 struct filter *filter = ah->in_filter;
957 ea_list *tmpa = NULL;
958
959 rte_update_lock();
960 if (new)
961 {
962 new->sender = ah;
963
964 stats->imp_updates_received++;
965 if (!rte_validate(new))
966 {
967 rte_trace_in(D_FILTERS, p, new, "invalid");
968 stats->imp_updates_invalid++;
969 goto drop;
970 }
971
972 if (filter == FILTER_REJECT)
973 {
974 stats->imp_updates_filtered++;
975 rte_trace_in(D_FILTERS, p, new, "filtered out");
976
977 if (! ah->in_keep_filtered)
978 goto drop;
979
980 /* new is a private copy, i could modify it */
981 new->flags |= REF_FILTERED;
982 }
983 else
984 {
985 if (src->make_tmp_attrs)
986 tmpa = src->make_tmp_attrs(new, rte_update_pool);
987 if (filter && (filter != FILTER_REJECT))
988 {
989 ea_list *old_tmpa = tmpa;
990 int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
991 if (fr > F_ACCEPT)
992 {
993 stats->imp_updates_filtered++;
994 rte_trace_in(D_FILTERS, p, new, "filtered out");
995
996 if (! ah->in_keep_filtered)
997 goto drop;
998
999 new->flags |= REF_FILTERED;
1000 }
1001 if (tmpa != old_tmpa && src->store_tmp_attrs)
1002 src->store_tmp_attrs(new, tmpa);
1003 }
1004 }
1005
1006 if (!(new->attrs->aflags & RTAF_CACHED)) /* Need to copy attributes */
1007 new->attrs = rta_lookup(new->attrs);
1008 new->flags |= REF_COW;
1009 }
1010 else
1011 stats->imp_withdraws_received++;
1012
1013 rte_recalculate(ah, net, new, tmpa, src);
1014 rte_update_unlock();
1015 return;
1016
1017 drop:
1018 rte_free(new);
1019 rte_recalculate(ah, net, NULL, NULL, src);
1020 rte_update_unlock();
1021 }
1022
1023 /* Independent call to rte_announce(), used from next hop
1024 recalculation, outside of rte_update(). new must be non-NULL */
1025 static inline void
1026 rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old)
1027 {
1028 struct proto *src;
1029 ea_list *tmpa;
1030
1031 rte_update_lock();
1032 src = new->attrs->proto;
1033 tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(new, rte_update_pool) : NULL;
1034 rte_announce(tab, type, n, new, old, NULL, tmpa);
1035 rte_update_unlock();
1036 }
1037
1038 void
1039 rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */
1040 {
1041 rte_update_lock();
1042 rte_recalculate(old->sender, old->net, NULL, NULL, old->attrs->proto);
1043 rte_update_unlock();
1044 }
1045
1046 /* Check rtable for best route to given net whether it would be exported do p */
1047 int
1048 rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter)
1049 {
1050 net *n = net_find(t, prefix, pxlen);
1051 rte *rt = n ? n->routes : NULL;
1052
1053 if (!rte_is_valid(rt))
1054 return 0;
1055
1056 rte_update_lock();
1057
1058 /* Rest is stripped down export_filter() */
1059 struct proto *src = rt->attrs->proto;
1060 ea_list *tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(rt, rte_update_pool) : NULL;
1061 int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1062 if (v == RIC_PROCESS)
1063 v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1064
1065 /* Discard temporary rte */
1066 if (rt != n->routes)
1067 rte_free(rt);
1068
1069 rte_update_unlock();
1070
1071 return v > 0;
1072 }
1073
1074 /**
1075 * rte_dump - dump a route
1076 * @e: &rte to be dumped
1077 *
1078 * This functions dumps contents of a &rte to debug output.
1079 */
1080 void
1081 rte_dump(rte *e)
1082 {
1083 net *n = e->net;
1084 debug("%-1I/%2d ", n->n.prefix, n->n.pxlen);
1085 debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1086 rta_dump(e->attrs);
1087 if (e->attrs->proto->proto->dump_attrs)
1088 e->attrs->proto->proto->dump_attrs(e);
1089 debug("\n");
1090 }
1091
1092 /**
1093 * rt_dump - dump a routing table
1094 * @t: routing table to be dumped
1095 *
1096 * This function dumps contents of a given routing table to debug output.
1097 */
1098 void
1099 rt_dump(rtable *t)
1100 {
1101 rte *e;
1102 net *n;
1103 struct announce_hook *a;
1104
1105 debug("Dump of routing table <%s>\n", t->name);
1106 #ifdef DEBUGGING
1107 fib_check(&t->fib);
1108 #endif
1109 FIB_WALK(&t->fib, fn)
1110 {
1111 n = (net *) fn;
1112 for(e=n->routes; e; e=e->next)
1113 rte_dump(e);
1114 }
1115 FIB_WALK_END;
1116 WALK_LIST(a, t->hooks)
1117 debug("\tAnnounces routes to protocol %s\n", a->proto->name);
1118 debug("\n");
1119 }
1120
1121 /**
1122 * rt_dump_all - dump all routing tables
1123 *
1124 * This function dumps contents of all routing tables to debug output.
1125 */
1126 void
1127 rt_dump_all(void)
1128 {
1129 rtable *t;
1130
1131 WALK_LIST(t, routing_tables)
1132 rt_dump(t);
1133 }
1134
1135 static inline void
1136 rt_schedule_gc(rtable *tab)
1137 {
1138 if (tab->gc_scheduled)
1139 return;
1140
1141 tab->gc_scheduled = 1;
1142 ev_schedule(tab->rt_event);
1143 }
1144
1145 static inline void
1146 rt_schedule_hcu(rtable *tab)
1147 {
1148 if (tab->hcu_scheduled)
1149 return;
1150
1151 tab->hcu_scheduled = 1;
1152 ev_schedule(tab->rt_event);
1153 }
1154
1155 static inline void
1156 rt_schedule_nhu(rtable *tab)
1157 {
1158 if (tab->nhu_state == 0)
1159 ev_schedule(tab->rt_event);
1160
1161 /* state change 0->1, 2->3 */
1162 tab->nhu_state |= 1;
1163 }
1164
1165 static void
1166 rt_prune_nets(rtable *tab)
1167 {
1168 struct fib_iterator fit;
1169 int ncnt = 0, ndel = 0;
1170
1171 #ifdef DEBUGGING
1172 fib_check(&tab->fib);
1173 #endif
1174
1175 FIB_ITERATE_INIT(&fit, &tab->fib);
1176 again:
1177 FIB_ITERATE_START(&tab->fib, &fit, f)
1178 {
1179 net *n = (net *) f;
1180 ncnt++;
1181 if (!n->routes) /* Orphaned FIB entry */
1182 {
1183 FIB_ITERATE_PUT(&fit, f);
1184 fib_delete(&tab->fib, f);
1185 ndel++;
1186 goto again;
1187 }
1188 }
1189 FIB_ITERATE_END(f);
1190 DBG("Pruned %d of %d networks\n", ndel, ncnt);
1191
1192 tab->gc_counter = 0;
1193 tab->gc_time = now;
1194 tab->gc_scheduled = 0;
1195 }
1196
1197 static void
1198 rt_event(void *ptr)
1199 {
1200 rtable *tab = ptr;
1201
1202 if (tab->hcu_scheduled)
1203 rt_update_hostcache(tab);
1204
1205 if (tab->nhu_state)
1206 rt_next_hop_update(tab);
1207
1208 if (tab->gc_scheduled)
1209 rt_prune_nets(tab);
1210 }
1211
1212 void
1213 rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1214 {
1215 bzero(t, sizeof(*t));
1216 fib_init(&t->fib, p, sizeof(net), 0, rte_init);
1217 t->name = name;
1218 t->config = cf;
1219 init_list(&t->hooks);
1220 if (cf)
1221 {
1222 t->rt_event = ev_new(p);
1223 t->rt_event->hook = rt_event;
1224 t->rt_event->data = t;
1225 t->gc_time = now;
1226 }
1227 }
1228
1229 /**
1230 * rt_init - initialize routing tables
1231 *
1232 * This function is called during BIRD startup. It initializes the
1233 * routing table module.
1234 */
1235 void
1236 rt_init(void)
1237 {
1238 rta_init();
1239 rt_table_pool = rp_new(&root_pool, "Routing tables");
1240 rte_update_pool = lp_new(rt_table_pool, 4080);
1241 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1242 init_list(&routing_tables);
1243 }
1244
1245
1246 /* Called from proto_schedule_flush_loop() only,
1247 ensuring that all prune states are zero */
1248 void
1249 rt_schedule_prune_all(void)
1250 {
1251 rtable *t;
1252
1253 WALK_LIST(t, routing_tables)
1254 t->prune_state = 1;
1255 }
1256
1257 static inline int
1258 rt_prune_step(rtable *tab, int *max_feed)
1259 {
1260 struct fib_iterator *fit = &tab->prune_fit;
1261
1262 DBG("Pruning route table %s\n", tab->name);
1263 #ifdef DEBUGGING
1264 fib_check(&tab->fib);
1265 #endif
1266
1267 if (tab->prune_state == 0)
1268 return 1;
1269
1270 if (tab->prune_state == 1)
1271 {
1272 FIB_ITERATE_INIT(fit, &tab->fib);
1273 tab->prune_state = 2;
1274 }
1275
1276 again:
1277 FIB_ITERATE_START(&tab->fib, fit, fn)
1278 {
1279 net *n = (net *) fn;
1280 rte *e;
1281
1282 rescan:
1283 for (e=n->routes; e; e=e->next)
1284 if (e->sender->proto->core_state != FS_HAPPY &&
1285 e->sender->proto->core_state != FS_FEEDING)
1286 {
1287 if (*max_feed <= 0)
1288 {
1289 FIB_ITERATE_PUT(fit, fn);
1290 return 0;
1291 }
1292
1293 rte_discard(tab, e);
1294 (*max_feed)--;
1295
1296 goto rescan;
1297 }
1298 if (!n->routes) /* Orphaned FIB entry */
1299 {
1300 FIB_ITERATE_PUT(fit, fn);
1301 fib_delete(&tab->fib, fn);
1302 goto again;
1303 }
1304 }
1305 FIB_ITERATE_END(fn);
1306
1307 #ifdef DEBUGGING
1308 fib_check(&tab->fib);
1309 #endif
1310
1311 tab->prune_state = 0;
1312 return 1;
1313 }
1314
1315 /**
1316 * rt_prune_loop - prune routing tables
1317 * @tab: routing table to be pruned
1318 *
1319 * The prune loop scans routing tables and removes routes belonging to
1320 * inactive protocols and also stale network entries. Returns 1 when
1321 * all such routes are pruned. It is a part of the protocol flushing
1322 * loop.
1323 */
1324 int
1325 rt_prune_loop(void)
1326 {
1327 rtable *t;
1328 int max_feed = 512;
1329
1330 WALK_LIST(t, routing_tables)
1331 if (! rt_prune_step(t, &max_feed))
1332 return 0;
1333
1334 return 1;
1335 }
1336
1337 void
1338 rt_preconfig(struct config *c)
1339 {
1340 struct symbol *s = cf_find_symbol("master");
1341
1342 init_list(&c->tables);
1343 c->master_rtc = rt_new_table(s);
1344 }
1345
1346
1347 /*
1348 * Some functions for handing internal next hop updates
1349 * triggered by rt_schedule_nhu().
1350 */
1351
1352 static inline int
1353 rta_next_hop_outdated(rta *a)
1354 {
1355 struct hostentry *he = a->hostentry;
1356
1357 if (!he)
1358 return 0;
1359
1360 if (!he->src)
1361 return a->dest != RTD_UNREACHABLE;
1362
1363 return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1364 (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1365 !mpnh_same(a->nexthops, he->src->nexthops);
1366 }
1367
1368 static inline void
1369 rta_apply_hostentry(rta *a, struct hostentry *he)
1370 {
1371 a->hostentry = he;
1372 a->iface = he->src ? he->src->iface : NULL;
1373 a->gw = he->gw;
1374 a->dest = he->dest;
1375 a->igp_metric = he->igp_metric;
1376 a->nexthops = he->src ? he->src->nexthops : NULL;
1377 }
1378
1379 static inline rte *
1380 rt_next_hop_update_rte(rtable *tab, rte *old)
1381 {
1382 rta a;
1383 memcpy(&a, old->attrs, sizeof(rta));
1384 rta_apply_hostentry(&a, old->attrs->hostentry);
1385 a.aflags = 0;
1386
1387 rte *e = sl_alloc(rte_slab);
1388 memcpy(e, old, sizeof(rte));
1389 e->attrs = rta_lookup(&a);
1390
1391 return e;
1392 }
1393
1394 static inline int
1395 rt_next_hop_update_net(rtable *tab, net *n)
1396 {
1397 rte **k, *e, *new, *old_best, **new_best;
1398 int count = 0;
1399 int free_old_best = 0;
1400
1401 old_best = n->routes;
1402 if (!old_best)
1403 return 0;
1404
1405 for (k = &n->routes; e = *k; k = &e->next)
1406 if (rta_next_hop_outdated(e->attrs))
1407 {
1408 new = rt_next_hop_update_rte(tab, e);
1409 *k = new;
1410
1411 rte_announce_i(tab, RA_ANY, n, new, e);
1412 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1413
1414 /* Call a pre-comparison hook */
1415 /* Not really an efficient way to compute this */
1416 if (e->attrs->proto->rte_recalculate)
1417 e->attrs->proto->rte_recalculate(tab, n, new, e, NULL);
1418
1419 if (e != old_best)
1420 rte_free_quick(e);
1421 else /* Freeing of the old best rte is postponed */
1422 free_old_best = 1;
1423
1424 e = new;
1425 count++;
1426 }
1427
1428 if (!count)
1429 return 0;
1430
1431 /* Find the new best route */
1432 new_best = NULL;
1433 for (k = &n->routes; e = *k; k = &e->next)
1434 {
1435 if (!new_best || rte_better(e, *new_best))
1436 new_best = k;
1437 }
1438
1439 /* Relink the new best route to the first position */
1440 new = *new_best;
1441 if (new != n->routes)
1442 {
1443 *new_best = new->next;
1444 new->next = n->routes;
1445 n->routes = new;
1446 }
1447
1448 /* Announce the new best route */
1449 if (new != old_best)
1450 {
1451 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best);
1452 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1453 }
1454
1455 if (free_old_best)
1456 rte_free_quick(old_best);
1457
1458 return count;
1459 }
1460
1461 static void
1462 rt_next_hop_update(rtable *tab)
1463 {
1464 struct fib_iterator *fit = &tab->nhu_fit;
1465 int max_feed = 32;
1466
1467 if (tab->nhu_state == 0)
1468 return;
1469
1470 if (tab->nhu_state == 1)
1471 {
1472 FIB_ITERATE_INIT(fit, &tab->fib);
1473 tab->nhu_state = 2;
1474 }
1475
1476 FIB_ITERATE_START(&tab->fib, fit, fn)
1477 {
1478 if (max_feed <= 0)
1479 {
1480 FIB_ITERATE_PUT(fit, fn);
1481 ev_schedule(tab->rt_event);
1482 return;
1483 }
1484 max_feed -= rt_next_hop_update_net(tab, (net *) fn);
1485 }
1486 FIB_ITERATE_END(fn);
1487
1488 /* state change 2->0, 3->1 */
1489 tab->nhu_state &= 1;
1490
1491 if (tab->nhu_state > 0)
1492 ev_schedule(tab->rt_event);
1493 }
1494
1495
1496 struct rtable_config *
1497 rt_new_table(struct symbol *s)
1498 {
1499 /* Hack that allows to 'redefine' the master table */
1500 if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc))
1501 return s->def;
1502
1503 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1504
1505 cf_define_symbol(s, SYM_TABLE, c);
1506 c->name = s->name;
1507 add_tail(&new_config->tables, &c->n);
1508 c->gc_max_ops = 1000;
1509 c->gc_min_time = 5;
1510 return c;
1511 }
1512
1513 /**
1514 * rt_lock_table - lock a routing table
1515 * @r: routing table to be locked
1516 *
1517 * Lock a routing table, because it's in use by a protocol,
1518 * preventing it from being freed when it gets undefined in a new
1519 * configuration.
1520 */
1521 void
1522 rt_lock_table(rtable *r)
1523 {
1524 r->use_count++;
1525 }
1526
1527 /**
1528 * rt_unlock_table - unlock a routing table
1529 * @r: routing table to be unlocked
1530 *
1531 * Unlock a routing table formerly locked by rt_lock_table(),
1532 * that is decrease its use count and delete it if it's scheduled
1533 * for deletion by configuration changes.
1534 */
1535 void
1536 rt_unlock_table(rtable *r)
1537 {
1538 if (!--r->use_count && r->deleted)
1539 {
1540 struct config *conf = r->deleted;
1541 DBG("Deleting routing table %s\n", r->name);
1542 if (r->hostcache)
1543 rt_free_hostcache(r);
1544 rem_node(&r->n);
1545 fib_free(&r->fib);
1546 rfree(r->rt_event);
1547 mb_free(r);
1548 config_del_obstacle(conf);
1549 }
1550 }
1551
1552 /**
1553 * rt_commit - commit new routing table configuration
1554 * @new: new configuration
1555 * @old: original configuration or %NULL if it's boot time config
1556 *
1557 * Scan differences between @old and @new configuration and modify
1558 * the routing tables according to these changes. If @new defines a
1559 * previously unknown table, create it, if it omits a table existing
1560 * in @old, schedule it for deletion (it gets deleted when all protocols
1561 * disconnect from it by calling rt_unlock_table()), if it exists
1562 * in both configurations, leave it unchanged.
1563 */
1564 void
1565 rt_commit(struct config *new, struct config *old)
1566 {
1567 struct rtable_config *o, *r;
1568
1569 DBG("rt_commit:\n");
1570 if (old)
1571 {
1572 WALK_LIST(o, old->tables)
1573 {
1574 rtable *ot = o->table;
1575 if (!ot->deleted)
1576 {
1577 struct symbol *sym = cf_find_symbol(o->name);
1578 if (sym && sym->class == SYM_TABLE && !new->shutdown)
1579 {
1580 DBG("\t%s: same\n", o->name);
1581 r = sym->def;
1582 r->table = ot;
1583 ot->name = r->name;
1584 ot->config = r;
1585 if (o->sorted != r->sorted)
1586 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1587 }
1588 else
1589 {
1590 DBG("\t%s: deleted\n", o->name);
1591 ot->deleted = old;
1592 config_add_obstacle(old);
1593 rt_lock_table(ot);
1594 rt_unlock_table(ot);
1595 }
1596 }
1597 }
1598 }
1599
1600 WALK_LIST(r, new->tables)
1601 if (!r->table)
1602 {
1603 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1604 DBG("\t%s: created\n", r->name);
1605 rt_setup(rt_table_pool, t, r->name, r);
1606 add_tail(&routing_tables, &t->n);
1607 r->table = t;
1608 }
1609 DBG("\tdone\n");
1610 }
1611
1612 static inline void
1613 do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
1614 {
1615 struct proto *src = e->attrs->proto;
1616 ea_list *tmpa;
1617
1618 rte_update_lock();
1619 tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(e, rte_update_pool) : NULL;
1620 if (type == RA_ACCEPTED)
1621 rt_notify_accepted(h, n, e, NULL, NULL, tmpa, p->refeeding ? 2 : 1);
1622 else
1623 rt_notify_basic(h, n, e, p->refeeding ? e : NULL, tmpa, p->refeeding);
1624 rte_update_unlock();
1625 }
1626
1627 /**
1628 * rt_feed_baby - advertise routes to a new protocol
1629 * @p: protocol to be fed
1630 *
1631 * This function performs one pass of advertisement of routes to a newly
1632 * initialized protocol. It's called by the protocol code as long as it
1633 * has something to do. (We avoid transferring all the routes in single
1634 * pass in order not to monopolize CPU time.)
1635 */
1636 int
1637 rt_feed_baby(struct proto *p)
1638 {
1639 struct announce_hook *h;
1640 struct fib_iterator *fit;
1641 int max_feed = 256;
1642
1643 if (!p->feed_ahook) /* Need to initialize first */
1644 {
1645 if (!p->ahooks)
1646 return 1;
1647 DBG("Announcing routes to new protocol %s\n", p->name);
1648 p->feed_ahook = p->ahooks;
1649 fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator));
1650 goto next_hook;
1651 }
1652 fit = p->feed_iterator;
1653
1654 again:
1655 h = p->feed_ahook;
1656 FIB_ITERATE_START(&h->table->fib, fit, fn)
1657 {
1658 net *n = (net *) fn;
1659 rte *e = n->routes;
1660 if (max_feed <= 0)
1661 {
1662 FIB_ITERATE_PUT(fit, fn);
1663 return 0;
1664 }
1665
1666 /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
1667
1668 if ((p->accept_ra_types == RA_OPTIMAL) ||
1669 (p->accept_ra_types == RA_ACCEPTED))
1670 if (rte_is_valid(e))
1671 {
1672 if (p->core_state != FS_FEEDING)
1673 return 1; /* In the meantime, the protocol fell down. */
1674 do_feed_baby(p, p->accept_ra_types, h, n, e);
1675 max_feed--;
1676 }
1677
1678 if (p->accept_ra_types == RA_ANY)
1679 for(e = n->routes; rte_is_valid(e); e = e->next)
1680 {
1681 if (p->core_state != FS_FEEDING)
1682 return 1; /* In the meantime, the protocol fell down. */
1683 do_feed_baby(p, RA_ANY, h, n, e);
1684 max_feed--;
1685 }
1686 }
1687 FIB_ITERATE_END(fn);
1688 p->feed_ahook = h->next;
1689 if (!p->feed_ahook)
1690 {
1691 mb_free(p->feed_iterator);
1692 p->feed_iterator = NULL;
1693 return 1;
1694 }
1695
1696 next_hook:
1697 h = p->feed_ahook;
1698 FIB_ITERATE_INIT(fit, &h->table->fib);
1699 goto again;
1700 }
1701
1702 /**
1703 * rt_feed_baby_abort - abort protocol feeding
1704 * @p: protocol
1705 *
1706 * This function is called by the protocol code when the protocol
1707 * stops or ceases to exist before the last iteration of rt_feed_baby()
1708 * has finished.
1709 */
1710 void
1711 rt_feed_baby_abort(struct proto *p)
1712 {
1713 if (p->feed_ahook)
1714 {
1715 /* Unlink the iterator and exit */
1716 fit_get(&p->feed_ahook->table->fib, p->feed_iterator);
1717 p->feed_ahook = NULL;
1718 }
1719 }
1720
1721
1722 static inline unsigned
1723 ptr_hash(void *ptr)
1724 {
1725 uintptr_t p = (uintptr_t) ptr;
1726 return p ^ (p << 8) ^ (p >> 16);
1727 }
1728
1729 static inline unsigned
1730 hc_hash(ip_addr a, rtable *dep)
1731 {
1732 return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff;
1733 }
1734
1735 static inline void
1736 hc_insert(struct hostcache *hc, struct hostentry *he)
1737 {
1738 unsigned int k = he->hash_key >> hc->hash_shift;
1739 he->next = hc->hash_table[k];
1740 hc->hash_table[k] = he;
1741 }
1742
1743 static inline void
1744 hc_remove(struct hostcache *hc, struct hostentry *he)
1745 {
1746 struct hostentry **hep;
1747 unsigned int k = he->hash_key >> hc->hash_shift;
1748
1749 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
1750 *hep = he->next;
1751 }
1752
1753 #define HC_DEF_ORDER 10
1754 #define HC_HI_MARK *4
1755 #define HC_HI_STEP 2
1756 #define HC_HI_ORDER 16 /* Must be at most 16 */
1757 #define HC_LO_MARK /5
1758 #define HC_LO_STEP 2
1759 #define HC_LO_ORDER 10
1760
1761 static void
1762 hc_alloc_table(struct hostcache *hc, unsigned order)
1763 {
1764 unsigned hsize = 1 << order;
1765 hc->hash_order = order;
1766 hc->hash_shift = 16 - order;
1767 hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
1768 hc->hash_min = (order <= HC_LO_ORDER) ? 0 : (hsize HC_LO_MARK);
1769
1770 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
1771 }
1772
1773 static void
1774 hc_resize(struct hostcache *hc, unsigned new_order)
1775 {
1776 unsigned old_size = 1 << hc->hash_order;
1777 struct hostentry **old_table = hc->hash_table;
1778 struct hostentry *he, *hen;
1779 int i;
1780
1781 hc_alloc_table(hc, new_order);
1782 for (i = 0; i < old_size; i++)
1783 for (he = old_table[i]; he != NULL; he=hen)
1784 {
1785 hen = he->next;
1786 hc_insert(hc, he);
1787 }
1788 mb_free(old_table);
1789 }
1790
1791 static struct hostentry *
1792 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
1793 {
1794 struct hostentry *he = sl_alloc(hc->slab);
1795
1796 he->addr = a;
1797 he->link = ll;
1798 he->tab = dep;
1799 he->hash_key = k;
1800 he->uc = 0;
1801 he->src = NULL;
1802
1803 add_tail(&hc->hostentries, &he->ln);
1804 hc_insert(hc, he);
1805
1806 hc->hash_items++;
1807 if (hc->hash_items > hc->hash_max)
1808 hc_resize(hc, hc->hash_order + HC_HI_STEP);
1809
1810 return he;
1811 }
1812
1813 static void
1814 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
1815 {
1816 rta_free(he->src);
1817
1818 rem_node(&he->ln);
1819 hc_remove(hc, he);
1820 sl_free(hc->slab, he);
1821
1822 hc->hash_items--;
1823 if (hc->hash_items < hc->hash_min)
1824 hc_resize(hc, hc->hash_order - HC_LO_STEP);
1825 }
1826
1827 static void
1828 rt_init_hostcache(rtable *tab)
1829 {
1830 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
1831 init_list(&hc->hostentries);
1832
1833 hc->hash_items = 0;
1834 hc_alloc_table(hc, HC_DEF_ORDER);
1835 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
1836
1837 hc->lp = lp_new(rt_table_pool, 1008);
1838 hc->trie = f_new_trie(hc->lp);
1839
1840 tab->hostcache = hc;
1841 }
1842
1843 static void
1844 rt_free_hostcache(rtable *tab)
1845 {
1846 struct hostcache *hc = tab->hostcache;
1847
1848 node *n;
1849 WALK_LIST(n, hc->hostentries)
1850 {
1851 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
1852 rta_free(he->src);
1853
1854 if (he->uc)
1855 log(L_ERR "Hostcache is not empty in table %s", tab->name);
1856 }
1857
1858 rfree(hc->slab);
1859 rfree(hc->lp);
1860 mb_free(hc->hash_table);
1861 mb_free(hc);
1862 }
1863
1864 static void
1865 rt_notify_hostcache(rtable *tab, net *net)
1866 {
1867 struct hostcache *hc = tab->hostcache;
1868
1869 if (tab->hcu_scheduled)
1870 return;
1871
1872 if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen))
1873 rt_schedule_hcu(tab);
1874 }
1875
1876 static int
1877 if_local_addr(ip_addr a, struct iface *i)
1878 {
1879 struct ifa *b;
1880
1881 WALK_LIST(b, i->addrs)
1882 if (ipa_equal(a, b->ip))
1883 return 1;
1884
1885 return 0;
1886 }
1887
1888 static u32
1889 rt_get_igp_metric(rte *rt)
1890 {
1891 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
1892
1893 if (ea)
1894 return ea->u.data;
1895
1896 rta *a = rt->attrs;
1897
1898 #ifdef CONFIG_OSPF
1899 if ((a->source == RTS_OSPF) ||
1900 (a->source == RTS_OSPF_IA) ||
1901 (a->source == RTS_OSPF_EXT1))
1902 return rt->u.ospf.metric1;
1903 #endif
1904
1905 #ifdef CONFIG_RIP
1906 if (a->source == RTS_RIP)
1907 return rt->u.rip.metric;
1908 #endif
1909
1910 /* Device routes */
1911 if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
1912 return 0;
1913
1914 return IGP_METRIC_UNKNOWN;
1915 }
1916
1917 static int
1918 rt_update_hostentry(rtable *tab, struct hostentry *he)
1919 {
1920 rta *old_src = he->src;
1921 int pxlen = 0;
1922
1923 /* Reset the hostentry */
1924 he->src = NULL;
1925 he->gw = IPA_NONE;
1926 he->dest = RTD_UNREACHABLE;
1927 he->igp_metric = 0;
1928
1929 net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
1930 if (n)
1931 {
1932 rte *e = n->routes;
1933 rta *a = e->attrs;
1934 pxlen = n->n.pxlen;
1935
1936 if (a->hostentry)
1937 {
1938 /* Recursive route should not depend on another recursive route */
1939 log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d",
1940 he->addr, n->n.prefix, pxlen);
1941 goto done;
1942 }
1943
1944 if (a->dest == RTD_DEVICE)
1945 {
1946 if (if_local_addr(he->addr, a->iface))
1947 {
1948 /* The host address is a local address, this is not valid */
1949 log(L_WARN "Next hop address %I is a local address of iface %s",
1950 he->addr, a->iface->name);
1951 goto done;
1952 }
1953
1954 /* The host is directly reachable, use link as a gateway */
1955 he->gw = he->link;
1956 he->dest = RTD_ROUTER;
1957 }
1958 else
1959 {
1960 /* The host is reachable through some route entry */
1961 he->gw = a->gw;
1962 he->dest = a->dest;
1963 }
1964
1965 he->src = rta_clone(a);
1966 he->igp_metric = rt_get_igp_metric(e);
1967 }
1968
1969 done:
1970 /* Add a prefix range to the trie */
1971 trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
1972
1973 rta_free(old_src);
1974 return old_src != he->src;
1975 }
1976
1977 static void
1978 rt_update_hostcache(rtable *tab)
1979 {
1980 struct hostcache *hc = tab->hostcache;
1981 struct hostentry *he;
1982 node *n, *x;
1983
1984 /* Reset the trie */
1985 lp_flush(hc->lp);
1986 hc->trie = f_new_trie(hc->lp);
1987
1988 WALK_LIST_DELSAFE(n, x, hc->hostentries)
1989 {
1990 he = SKIP_BACK(struct hostentry, ln, n);
1991 if (!he->uc)
1992 {
1993 hc_delete_hostentry(hc, he);
1994 continue;
1995 }
1996
1997 if (rt_update_hostentry(tab, he))
1998 rt_schedule_nhu(he->tab);
1999 }
2000
2001 tab->hcu_scheduled = 0;
2002 }
2003
2004 static struct hostentry *
2005 rt_find_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2006 {
2007 struct hostentry *he;
2008
2009 if (!tab->hostcache)
2010 rt_init_hostcache(tab);
2011
2012 unsigned int k = hc_hash(a, dep);
2013 struct hostcache *hc = tab->hostcache;
2014 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2015 if (ipa_equal(he->addr, a) && (he->tab == dep))
2016 return he;
2017
2018 he = hc_new_hostentry(hc, a, ll, dep, k);
2019 rt_update_hostentry(tab, he);
2020 return he;
2021 }
2022
2023 void
2024 rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2025 {
2026 rta_apply_hostentry(a, rt_find_hostentry(tab, *gw, *ll, dep));
2027 }
2028
2029 /*
2030 * CLI commands
2031 */
2032
2033 static void
2034 rt_format_via(rte *e, byte *via)
2035 {
2036 rta *a = e->attrs;
2037
2038 switch (a->dest)
2039 {
2040 case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2041 case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break;
2042 case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
2043 case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
2044 case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
2045 case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
2046 default: bsprintf(via, "???");
2047 }
2048 }
2049
2050 static void
2051 rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2052 {
2053 byte via[STD_ADDRESS_P_LENGTH+32], from[STD_ADDRESS_P_LENGTH+8];
2054 byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2055 rta *a = e->attrs;
2056 int primary = (e->net->routes == e);
2057 int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2058 struct mpnh *nh;
2059
2060 rt_format_via(e, via);
2061 tm_format_datetime(tm, &config->tf_route, e->lastmod);
2062 if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2063 bsprintf(from, " from %I", a->from);
2064 else
2065 from[0] = 0;
2066 if (a->proto->proto->get_route_info || d->verbose)
2067 {
2068 /* Need to normalize the extended attributes */
2069 ea_list *t = tmpa;
2070 t = ea_append(t, a->eattrs);
2071 tmpa = alloca(ea_scan(t));
2072 ea_merge(t, tmpa);
2073 ea_sort(tmpa);
2074 }
2075 if (a->proto->proto->get_route_info)
2076 a->proto->proto->get_route_info(e, info, tmpa);
2077 else
2078 bsprintf(info, " (%d)", e->pref);
2079 cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->proto->name,
2080 tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2081 for (nh = a->nexthops; nh; nh = nh->next)
2082 cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2083 if (d->verbose)
2084 rta_show(c, a, tmpa);
2085 }
2086
2087 static void
2088 rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2089 {
2090 rte *e, *ee;
2091 byte ia[STD_ADDRESS_P_LENGTH+8];
2092 struct announce_hook *a;
2093 int ok;
2094
2095 bsprintf(ia, "%I/%d", n->n.prefix, n->n.pxlen);
2096
2097 for(e=n->routes; e; e=e->next)
2098 {
2099 if (rte_is_filtered(e) != d->filtered)
2100 continue;
2101
2102 struct ea_list *tmpa;
2103 struct proto *p0 = e->attrs->proto;
2104 struct proto *p1 = d->export_protocol;
2105 struct proto *p2 = d->show_protocol;
2106
2107 if (ia[0])
2108 d->net_counter++;
2109 d->rt_counter++;
2110 ee = e;
2111 rte_update_lock(); /* We use the update buffer for filtering */
2112 tmpa = p0->make_tmp_attrs ? p0->make_tmp_attrs(e, rte_update_pool) : NULL;
2113 ok = f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT;
2114 if (p2 && p2 != p0) ok = 0;
2115 if (ok && d->export_mode)
2116 {
2117 int ic;
2118 if ((ic = p1->import_control ? p1->import_control(p1, &e, &tmpa, rte_update_pool) : 0) < 0)
2119 ok = 0;
2120 else if (!ic && d->export_mode > 1)
2121 {
2122 /* FIXME - this shows what should be exported according
2123 to current filters, but not what was really exported.
2124 'configure soft' command may change the export filter
2125 and do not update routes */
2126
2127 if ((a = proto_find_announce_hook(p1, d->table)) &&
2128 (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT))
2129 ok = 0;
2130 }
2131 }
2132 if (ok)
2133 {
2134 d->show_counter++;
2135 if (d->stats < 2)
2136 rt_show_rte(c, ia, e, d, tmpa);
2137 ia[0] = 0;
2138 }
2139 if (e != ee)
2140 {
2141 rte_free(e);
2142 e = ee;
2143 }
2144 rte_update_unlock();
2145 if (d->primary_only)
2146 break;
2147 }
2148 }
2149
2150 static void
2151 rt_show_cont(struct cli *c)
2152 {
2153 struct rt_show_data *d = c->rover;
2154 #ifdef DEBUGGING
2155 unsigned max = 4;
2156 #else
2157 unsigned max = 64;
2158 #endif
2159 struct fib *fib = &d->table->fib;
2160 struct fib_iterator *it = &d->fit;
2161
2162 FIB_ITERATE_START(fib, it, f)
2163 {
2164 net *n = (net *) f;
2165 if (d->running_on_config && d->running_on_config != config)
2166 {
2167 cli_printf(c, 8004, "Stopped due to reconfiguration");
2168 goto done;
2169 }
2170 if (d->export_protocol &&
2171 d->export_protocol->core_state != FS_HAPPY &&
2172 d->export_protocol->core_state != FS_FEEDING)
2173 {
2174 cli_printf(c, 8005, "Protocol is down");
2175 goto done;
2176 }
2177 if (!max--)
2178 {
2179 FIB_ITERATE_PUT(it, f);
2180 return;
2181 }
2182 rt_show_net(c, n, d);
2183 }
2184 FIB_ITERATE_END(f);
2185 if (d->stats)
2186 cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2187 else
2188 cli_printf(c, 0, "");
2189 done:
2190 c->cont = c->cleanup = NULL;
2191 }
2192
2193 static void
2194 rt_show_cleanup(struct cli *c)
2195 {
2196 struct rt_show_data *d = c->rover;
2197
2198 /* Unlink the iterator */
2199 fit_get(&d->table->fib, &d->fit);
2200 }
2201
2202 void
2203 rt_show(struct rt_show_data *d)
2204 {
2205 net *n;
2206
2207 if (d->pxlen == 256)
2208 {
2209 FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2210 this_cli->cont = rt_show_cont;
2211 this_cli->cleanup = rt_show_cleanup;
2212 this_cli->rover = d;
2213 }
2214 else
2215 {
2216 if (d->show_for)
2217 n = net_route(d->table, d->prefix, d->pxlen);
2218 else
2219 n = net_find(d->table, d->prefix, d->pxlen);
2220 if (n)
2221 {
2222 rt_show_net(this_cli, n, d);
2223 cli_msg(0, "");
2224 }
2225 else
2226 cli_msg(8001, "Network not in table");
2227 }
2228 }
2229
2230 /*
2231 * Documentation for functions declared inline in route.h
2232 */
2233 #if 0
2234
2235 /**
2236 * net_find - find a network entry
2237 * @tab: a routing table
2238 * @addr: address of the network
2239 * @len: length of the network prefix
2240 *
2241 * net_find() looks up the given network in routing table @tab and
2242 * returns a pointer to its &net entry or %NULL if no such network
2243 * exists.
2244 */
2245 static inline net *net_find(rtable *tab, ip_addr addr, unsigned len)
2246 { DUMMY; }
2247
2248 /**
2249 * net_get - obtain a network entry
2250 * @tab: a routing table
2251 * @addr: address of the network
2252 * @len: length of the network prefix
2253 *
2254 * net_get() looks up the given network in routing table @tab and
2255 * returns a pointer to its &net entry. If no such entry exists, it's
2256 * created.
2257 */
2258 static inline net *net_get(rtable *tab, ip_addr addr, unsigned len)
2259 { DUMMY; }
2260
2261 /**
2262 * rte_cow - copy a route for writing
2263 * @r: a route entry to be copied
2264 *
2265 * rte_cow() takes a &rte and prepares it for modification. The exact action
2266 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2267 * just returned unchanged, else a new temporary entry with the same contents
2268 * is created.
2269 *
2270 * The primary use of this function is inside the filter machinery -- when
2271 * a filter wants to modify &rte contents (to change the preference or to
2272 * attach another set of attributes), it must ensure that the &rte is not
2273 * shared with anyone else (and especially that it isn't stored in any routing
2274 * table).
2275 *
2276 * Result: a pointer to the new writable &rte.
2277 */
2278 static inline rte * rte_cow(rte *r)
2279 { DUMMY; }
2280
2281 #endif