]> git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
Miscellaneous minor fixes
[thirdparty/bird.git] / nest / rt-table.c
1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/cli.h"
37 #include "nest/iface.h"
38 #include "lib/resource.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
45
46 pool *rt_table_pool;
47
48 static slab *rte_slab;
49 static linpool *rte_update_pool;
50
51 static list routing_tables;
52
53 static void rt_format_via(rte *e, byte *via);
54 static void rt_free_hostcache(rtable *tab);
55 static void rt_notify_hostcache(rtable *tab, net *net);
56 static void rt_update_hostcache(rtable *tab);
57 static void rt_next_hop_update(rtable *tab);
58 static inline void rt_prune_table(rtable *tab);
59
60
61 static inline struct ea_list *
62 make_tmp_attrs(struct rte *rt, struct linpool *pool)
63 {
64 struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
65 mta = rt->attrs->src->proto->make_tmp_attrs;
66 return mta ? mta(rt, rte_update_pool) : NULL;
67 }
68
69
70 /* Like fib_route(), but skips empty net entries */
71 static inline void *
72 net_route_ip4(struct fib *f, net_addr_ip4 *n)
73 {
74 net *r;
75
76 while (r = fib_find(f, (net_addr *) n),
77 !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
78 {
79 n->pxlen--;
80 ip4_clrbit(&n->prefix, n->pxlen);
81 }
82
83 return r;
84 }
85
86 static inline void *
87 net_route_ip6(struct fib *f, net_addr_ip6 *n)
88 {
89 net *r;
90
91 while (r = fib_find(f, (net_addr *) n),
92 !(r && rte_is_valid(r->routes)) && (n->pxlen > 0))
93 {
94 n->pxlen--;
95 ip6_clrbit(&n->prefix, n->pxlen);
96 }
97
98 return r;
99 }
100
101 void *
102 net_route(rtable *tab, const net_addr *n)
103 {
104 ASSERT(tab->addr_type == n->type);
105
106 net_addr *n0 = alloca(n->length);
107 net_copy(n0, n);
108
109 switch (n->type)
110 {
111 case NET_IP4:
112 case NET_VPN4:
113 case NET_ROA4:
114 return net_route_ip4(&tab->fib, (net_addr_ip4 *) n0);
115
116 case NET_IP6:
117 case NET_VPN6:
118 case NET_ROA6:
119 return net_route_ip6(&tab->fib, (net_addr_ip6 *) n0);
120
121 default:
122 return NULL;
123 }
124 }
125
126
127 static int
128 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
129 {
130 struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
131 struct fib_node *fn;
132 int anything = 0;
133
134 while (1)
135 {
136 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
137 {
138 net_addr_roa4 *roa = (void *) fn->addr;
139 net *r = fib_node_to_user(&tab->fib, fn);
140
141 if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
142 {
143 anything = 1;
144 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
145 return ROA_VALID;
146 }
147 }
148
149 if (n.pxlen == 0)
150 break;
151
152 n.pxlen--;
153 ip4_clrbit(&n.prefix, n.pxlen);
154 }
155
156 return anything ? ROA_INVALID : ROA_UNKNOWN;
157 }
158
159 static int
160 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
161 {
162 struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
163 struct fib_node *fn;
164 int anything = 0;
165
166 while (1)
167 {
168 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
169 {
170 net_addr_roa6 *roa = (void *) fn->addr;
171 net *r = fib_node_to_user(&tab->fib, fn);
172
173 if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
174 {
175 anything = 1;
176 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
177 return ROA_VALID;
178 }
179 }
180
181 if (n.pxlen == 0)
182 break;
183
184 n.pxlen--;
185 ip6_clrbit(&n.prefix, n.pxlen);
186 }
187
188 return anything ? ROA_INVALID : ROA_UNKNOWN;
189 }
190
191 /**
192 * roa_check - check validity of route origination in a ROA table
193 * @tab: ROA table
194 * @n: network prefix to check
195 * @asn: AS number of network prefix
196 *
197 * Implements RFC 6483 route validation for the given network prefix. The
198 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
199 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
200 * a candidate ROA with matching ASN and maxlen field greater than or equal to
201 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
202 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
203 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
204 * must have type NET_IP4 or NET_IP6, respectively.
205 */
206 int
207 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
208 {
209 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
210 return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
211 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
212 return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
213 else
214 return ROA_UNKNOWN; /* Should not happen */
215 }
216
217 /**
218 * rte_find - find a route
219 * @net: network node
220 * @src: route source
221 *
222 * The rte_find() function returns a route for destination @net
223 * which is from route source @src.
224 */
225 rte *
226 rte_find(net *net, struct rte_src *src)
227 {
228 rte *e = net->routes;
229
230 while (e && e->attrs->src != src)
231 e = e->next;
232 return e;
233 }
234
235 /**
236 * rte_get_temp - get a temporary &rte
237 * @a: attributes to assign to the new route (a &rta; in case it's
238 * un-cached, rte_update() will create a cached copy automatically)
239 *
240 * Create a temporary &rte and bind it with the attributes @a.
241 * Also set route preference to the default preference set for
242 * the protocol.
243 */
244 rte *
245 rte_get_temp(rta *a)
246 {
247 rte *e = sl_alloc(rte_slab);
248
249 e->attrs = a;
250 e->flags = 0;
251 e->pref = 0;
252 return e;
253 }
254
255 rte *
256 rte_do_cow(rte *r)
257 {
258 rte *e = sl_alloc(rte_slab);
259
260 memcpy(e, r, sizeof(rte));
261 e->attrs = rta_clone(r->attrs);
262 e->flags = 0;
263 return e;
264 }
265
266 /**
267 * rte_cow_rta - get a private writable copy of &rte with writable &rta
268 * @r: a route entry to be copied
269 * @lp: a linpool from which to allocate &rta
270 *
271 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
272 * modification. There are three possibilities: First, both &rte and &rta are
273 * private copies, in that case they are returned unchanged. Second, &rte is
274 * private copy, but &rta is cached, in that case &rta is duplicated using
275 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
276 * both structures are duplicated by rte_do_cow() and rta_do_cow().
277 *
278 * Note that in the second case, cached &rta loses one reference, while private
279 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
280 * nexthops, ...) with it. To work properly, original shared &rta should have
281 * another reference during the life of created private copy.
282 *
283 * Result: a pointer to the new writable &rte with writable &rta.
284 */
285 rte *
286 rte_cow_rta(rte *r, linpool *lp)
287 {
288 if (!rta_is_cached(r->attrs))
289 return r;
290
291 rte *e = rte_cow(r);
292 rta *a = rta_do_cow(r->attrs, lp);
293 rta_free(e->attrs);
294 e->attrs = a;
295 return e;
296 }
297
298 static int /* Actually better or at least as good as */
299 rte_better(rte *new, rte *old)
300 {
301 int (*better)(rte *, rte *);
302
303 if (!rte_is_valid(old))
304 return 1;
305 if (!rte_is_valid(new))
306 return 0;
307
308 if (new->pref > old->pref)
309 return 1;
310 if (new->pref < old->pref)
311 return 0;
312 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
313 {
314 /*
315 * If the user has configured protocol preferences, so that two different protocols
316 * have the same preference, try to break the tie by comparing addresses. Not too
317 * useful, but keeps the ordering of routes unambiguous.
318 */
319 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
320 }
321 if (better = new->attrs->src->proto->rte_better)
322 return better(new, old);
323 return 0;
324 }
325
326 static int
327 rte_mergable(rte *pri, rte *sec)
328 {
329 int (*mergable)(rte *, rte *);
330
331 if (!rte_is_valid(pri) || !rte_is_valid(sec))
332 return 0;
333
334 if (pri->pref != sec->pref)
335 return 0;
336
337 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
338 return 0;
339
340 if (mergable = pri->attrs->src->proto->rte_mergable)
341 return mergable(pri, sec);
342
343 return 0;
344 }
345
346 static void
347 rte_trace(struct proto *p, rte *e, int dir, char *msg)
348 {
349 byte via[IPA_MAX_TEXT_LENGTH+32];
350
351 rt_format_via(e, via);
352 log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, via);
353 }
354
355 static inline void
356 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
357 {
358 if (p->debug & flag)
359 rte_trace(p, e, '>', msg);
360 }
361
362 static inline void
363 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
364 {
365 if (p->debug & flag)
366 rte_trace(p, e, '<', msg);
367 }
368
369 static rte *
370 export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
371 {
372 struct proto *p = c->proto;
373 struct filter *filter = c->out_filter;
374 struct proto_stats *stats = &c->stats;
375 ea_list *tmpb = NULL;
376 rte *rt;
377 int v;
378
379 rt = rt0;
380 *rt_free = NULL;
381
382 if (!tmpa)
383 tmpa = &tmpb;
384
385 *tmpa = make_tmp_attrs(rt, rte_update_pool);
386
387 v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
388 if (v < 0)
389 {
390 if (silent)
391 goto reject;
392
393 stats->exp_updates_rejected++;
394 if (v == RIC_REJECT)
395 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
396 goto reject;
397 }
398 if (v > 0)
399 {
400 if (!silent)
401 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
402 goto accept;
403 }
404
405 v = filter && ((filter == FILTER_REJECT) ||
406 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
407 if (v)
408 {
409 if (silent)
410 goto reject;
411
412 stats->exp_updates_filtered++;
413 rte_trace_out(D_FILTERS, p, rt, "filtered out");
414 goto reject;
415 }
416
417 accept:
418 if (rt != rt0)
419 *rt_free = rt;
420 return rt;
421
422 reject:
423 /* Discard temporary rte */
424 if (rt != rt0)
425 rte_free(rt);
426 return NULL;
427 }
428
429 static void
430 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
431 {
432 struct proto *p = c->proto;
433 struct proto_stats *stats = &c->stats;
434
435
436 /*
437 * First, apply export limit.
438 *
439 * Export route limits has several problems. Because exp_routes
440 * counter is reset before refeed, we don't really know whether
441 * limit is breached and whether the update is new or not. Therefore
442 * the number of really exported routes may exceed the limit
443 * temporarily (routes exported before and new routes in refeed).
444 *
445 * Minor advantage is that if the limit is decreased and refeed is
446 * requested, the number of exported routes really decrease.
447 *
448 * Second problem is that with export limits, we don't know whether
449 * old was really exported (it might be blocked by limit). When a
450 * withdraw is exported, we announce it even when the previous
451 * update was blocked. This is not a big issue, but the same problem
452 * is in updating exp_routes counter. Therefore, to be consistent in
453 * increases and decreases of exp_routes, we count exported routes
454 * regardless of blocking by limits.
455 *
456 * Similar problem is in handling updates - when a new route is
457 * received and blocking is active, the route would be blocked, but
458 * when an update for the route will be received later, the update
459 * would be propagated (as old != NULL). Therefore, we have to block
460 * also non-new updates (contrary to import blocking).
461 */
462
463 struct channel_limit *l = &c->out_limit;
464 if (l->action && new)
465 {
466 if ((!old || refeed) && (stats->exp_routes >= l->limit))
467 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
468
469 if (l->state == PLS_BLOCKED)
470 {
471 stats->exp_routes++; /* see note above */
472 stats->exp_updates_rejected++;
473 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
474 new = NULL;
475
476 if (!old)
477 return;
478 }
479 }
480
481
482 if (new)
483 stats->exp_updates_accepted++;
484 else
485 stats->exp_withdraws_accepted++;
486
487 /* Hack: We do not decrease exp_routes during refeed, we instead
488 reset exp_routes at the start of refeed. */
489 if (new)
490 stats->exp_routes++;
491 if (old && !refeed)
492 stats->exp_routes--;
493
494 if (p->debug & D_ROUTES)
495 {
496 if (new && old)
497 rte_trace_out(D_ROUTES, p, new, "replaced");
498 else if (new)
499 rte_trace_out(D_ROUTES, p, new, "added");
500 else if (old)
501 rte_trace_out(D_ROUTES, p, old, "removed");
502 }
503 if (!new)
504 p->rt_notify(p, c, net, NULL, old, NULL);
505 else if (tmpa)
506 {
507 ea_list *t = tmpa;
508 while (t->next)
509 t = t->next;
510 t->next = new->attrs->eattrs;
511 p->rt_notify(p, c, net, new, old, tmpa);
512 t->next = NULL;
513 }
514 else
515 p->rt_notify(p, c, net, new, old, new->attrs->eattrs);
516 }
517
518 static void
519 rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
520 {
521 struct proto *p = c->proto;
522
523 rte *new = new0;
524 rte *old = old0;
525 rte *new_free = NULL;
526 rte *old_free = NULL;
527 ea_list *tmpa = NULL;
528
529 if (new)
530 c->stats.exp_updates_received++;
531 else
532 c->stats.exp_withdraws_received++;
533
534 /*
535 * This is a tricky part - we don't know whether route 'old' was
536 * exported to protocol 'p' or was filtered by the export filter.
537 * We try to run the export filter to know this to have a correct
538 * value in 'old' argument of rte_update (and proper filter value)
539 *
540 * FIXME - this is broken because 'configure soft' may change
541 * filters but keep routes. Refeed is expected to be called after
542 * change of the filters and with old == new, therefore we do not
543 * even try to run the filter on an old route, This may lead to
544 * 'spurious withdraws' but ensure that there are no 'missing
545 * withdraws'.
546 *
547 * This is not completely safe as there is a window between
548 * reconfiguration and the end of refeed - if a newly filtered
549 * route disappears during this period, proper withdraw is not
550 * sent (because old would be also filtered) and the route is
551 * not refeeded (because it disappeared before that).
552 */
553
554 if (new)
555 new = export_filter(c, new, &new_free, &tmpa, 0);
556
557 if (old && !refeed)
558 old = export_filter(c, old, &old_free, NULL, 1);
559
560 if (!new && !old)
561 {
562 /*
563 * As mentioned above, 'old' value may be incorrect in some race conditions.
564 * We generally ignore it with the exception of withdraw to pipe protocol.
565 * In that case we rather propagate unfiltered withdraws regardless of
566 * export filters to ensure that when a protocol is flushed, its routes are
567 * removed from all tables. Possible spurious unfiltered withdraws are not
568 * problem here as they are ignored if there is no corresponding route at
569 * the other end of the pipe. We directly call rt_notify() hook instead of
570 * do_rt_notify() to avoid logging and stat counters.
571 */
572
573 #ifdef CONFIG_PIPE
574 if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
575 p->rt_notify(p, c, net, NULL, old0, NULL);
576 #endif
577
578 return;
579 }
580
581 do_rt_notify(c, net, new, old, tmpa, refeed);
582
583 /* Discard temporary rte's */
584 if (new_free)
585 rte_free(new_free);
586 if (old_free)
587 rte_free(old_free);
588 }
589
590 static void
591 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
592 {
593 // struct proto *p = c->proto;
594
595 rte *r;
596 rte *new_best = NULL;
597 rte *old_best = NULL;
598 rte *new_free = NULL;
599 rte *old_free = NULL;
600 ea_list *tmpa = NULL;
601
602 /* Used to track whether we met old_changed position. If before_old is NULL
603 old_changed was the first and we met it implicitly before current best route. */
604 int old_meet = old_changed && !before_old;
605
606 /* Note that before_old is either NULL or valid (not rejected) route.
607 If old_changed is valid, before_old have to be too. If old changed route
608 was not valid, caller must use NULL for both old_changed and before_old. */
609
610 if (new_changed)
611 c->stats.exp_updates_received++;
612 else
613 c->stats.exp_withdraws_received++;
614
615 /* First, find the new_best route - first accepted by filters */
616 for (r=net->routes; rte_is_valid(r); r=r->next)
617 {
618 if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
619 break;
620
621 /* Note if we walked around the position of old_changed route */
622 if (r == before_old)
623 old_meet = 1;
624 }
625
626 /*
627 * Second, handle the feed case. That means we do not care for
628 * old_best. It is NULL for feed, and the new_best for refeed.
629 * For refeed, there is a hack similar to one in rt_notify_basic()
630 * to ensure withdraws in case of changed filters
631 */
632 if (feed)
633 {
634 if (feed == 2) /* refeed */
635 old_best = new_best ? new_best :
636 (rte_is_valid(net->routes) ? net->routes : NULL);
637 else
638 old_best = NULL;
639
640 if (!new_best && !old_best)
641 return;
642
643 goto found;
644 }
645
646 /*
647 * Now, we find the old_best route. Generally, it is the same as the
648 * new_best, unless new_best is the same as new_changed or
649 * old_changed is accepted before new_best.
650 *
651 * There are four cases:
652 *
653 * - We would find and accept old_changed before new_best, therefore
654 * old_changed is old_best. In remaining cases we suppose this
655 * is not true.
656 *
657 * - We found no new_best, therefore there is also no old_best and
658 * we ignore this withdraw.
659 *
660 * - We found new_best different than new_changed, therefore
661 * old_best is the same as new_best and we ignore this update.
662 *
663 * - We found new_best the same as new_changed, therefore it cannot
664 * be old_best and we have to continue search for old_best.
665 */
666
667 /* First case */
668 if (old_meet)
669 if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
670 goto found;
671
672 /* Second case */
673 if (!new_best)
674 return;
675
676 /* Third case, we use r instead of new_best, because export_filter() could change it */
677 if (r != new_changed)
678 {
679 if (new_free)
680 rte_free(new_free);
681 return;
682 }
683
684 /* Fourth case */
685 for (r=r->next; rte_is_valid(r); r=r->next)
686 {
687 if (old_best = export_filter(c, r, &old_free, NULL, 1))
688 goto found;
689
690 if (r == before_old)
691 if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
692 goto found;
693 }
694
695 /* Implicitly, old_best is NULL and new_best is non-NULL */
696
697 found:
698 do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
699
700 /* Discard temporary rte's */
701 if (new_free)
702 rte_free(new_free);
703 if (old_free)
704 rte_free(old_free);
705 }
706
707
708 static struct mpnh *
709 mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
710 {
711 struct mpnh nh = { .gw = a->gw, .iface = a->iface };
712 struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
713 return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
714 }
715
716 rte *
717 rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, int silent)
718 {
719 // struct proto *p = c->proto;
720 struct mpnh *nhs = NULL;
721 rte *best0, *best, *rt0, *rt, *tmp;
722
723 best0 = net->routes;
724 *rt_free = NULL;
725
726 if (!rte_is_valid(best0))
727 return NULL;
728
729 best = export_filter(c, best0, rt_free, tmpa, silent);
730
731 if (!best || !rte_is_reachable(best))
732 return best;
733
734 for (rt0 = best0->next; rt0; rt0 = rt0->next)
735 {
736 if (!rte_mergable(best0, rt0))
737 continue;
738
739 rt = export_filter(c, rt0, &tmp, NULL, 1);
740
741 if (!rt)
742 continue;
743
744 if (rte_is_reachable(rt))
745 nhs = mpnh_merge_rta(nhs, rt->attrs, c->merge_limit);
746
747 if (tmp)
748 rte_free(tmp);
749 }
750
751 if (nhs)
752 {
753 nhs = mpnh_merge_rta(nhs, best->attrs, c->merge_limit);
754
755 if (nhs->next)
756 {
757 best = rte_cow_rta(best, rte_update_pool);
758 best->attrs->dest = RTD_MULTIPATH;
759 best->attrs->nexthops = nhs;
760 }
761 }
762
763 if (best != best0)
764 *rt_free = best;
765
766 return best;
767 }
768
769
770 static void
771 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
772 rte *new_best, rte*old_best, int refeed)
773 {
774 // struct proto *p = c->proto;
775
776 rte *new_best_free = NULL;
777 rte *old_best_free = NULL;
778 rte *new_changed_free = NULL;
779 rte *old_changed_free = NULL;
780 ea_list *tmpa = NULL;
781
782 /* We assume that all rte arguments are either NULL or rte_is_valid() */
783
784 /* This check should be done by the caller */
785 if (!new_best && !old_best)
786 return;
787
788 /* Check whether the change is relevant to the merged route */
789 if ((new_best == old_best) && !refeed)
790 {
791 new_changed = rte_mergable(new_best, new_changed) ?
792 export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
793
794 old_changed = rte_mergable(old_best, old_changed) ?
795 export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
796
797 if (!new_changed && !old_changed)
798 return;
799 }
800
801 if (new_best)
802 c->stats.exp_updates_received++;
803 else
804 c->stats.exp_withdraws_received++;
805
806 /* Prepare new merged route */
807 if (new_best)
808 new_best = rt_export_merged(c, net, &new_best_free, &tmpa, 0);
809
810 /* Prepare old merged route (without proper merged next hops) */
811 /* There are some issues with running filter on old route - see rt_notify_basic() */
812 if (old_best && !refeed)
813 old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
814
815 if (new_best || old_best)
816 do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
817
818 /* Discard temporary rte's */
819 if (new_best_free)
820 rte_free(new_best_free);
821 if (old_best_free)
822 rte_free(old_best_free);
823 if (new_changed_free)
824 rte_free(new_changed_free);
825 if (old_changed_free)
826 rte_free(old_changed_free);
827 }
828
829
830 /**
831 * rte_announce - announce a routing table change
832 * @tab: table the route has been added to
833 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
834 * @net: network in question
835 * @new: the new route to be announced
836 * @old: the previous route for the same network
837 *
838 * This function gets a routing table update and announces it
839 * to all protocols that acccepts given type of route announcement
840 * and are connected to the same table by their announcement hooks.
841 *
842 * Route announcement of type RA_OPTIMAL si generated when optimal
843 * route (in routing table @tab) changes. In that case @old stores the
844 * old optimal route.
845 *
846 * Route announcement of type RA_ANY si generated when any route (in
847 * routing table @tab) changes In that case @old stores the old route
848 * from the same protocol.
849 *
850 * For each appropriate protocol, we first call its import_control()
851 * hook which performs basic checks on the route (each protocol has a
852 * right to veto or force accept of the route before any filter is
853 * asked) and adds default values of attributes specific to the new
854 * protocol (metrics, tags etc.). Then it consults the protocol's
855 * export filter and if it accepts the route, the rt_notify() hook of
856 * the protocol gets called.
857 */
858 static void
859 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
860 rte *new_best, rte *old_best, rte *before_old)
861 {
862 if (!rte_is_valid(new))
863 new = NULL;
864
865 if (!rte_is_valid(old))
866 old = before_old = NULL;
867
868 if (!rte_is_valid(new_best))
869 new_best = NULL;
870
871 if (!rte_is_valid(old_best))
872 old_best = NULL;
873
874 if (!old && !new)
875 return;
876
877 if ((type == RA_OPTIMAL) && tab->hostcache)
878 rt_notify_hostcache(tab, net);
879
880 struct channel *c; node *n;
881 WALK_LIST2(c, n, tab->channels, table_node)
882 {
883 if (c->export_state == ES_DOWN)
884 continue;
885
886 if (c->ra_mode == type)
887 if (type == RA_ACCEPTED)
888 rt_notify_accepted(c, net, new, old, before_old, 0);
889 else if (type == RA_MERGED)
890 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
891 else
892 rt_notify_basic(c, net, new, old, 0);
893 }
894 }
895
896 static inline int
897 rte_validate(rte *e)
898 {
899 int c;
900 net *n = e->net;
901
902 // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
903 if (!net_validate(n->n.addr))
904 {
905 log(L_WARN "Ignoring bogus prefix %N received via %s",
906 n->n.addr, e->sender->proto->name);
907 return 0;
908 }
909
910 c = net_classify(n->n.addr);
911 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
912 {
913 log(L_WARN "Ignoring bogus route %N received via %s",
914 n->n.addr, e->sender->proto->name);
915 return 0;
916 }
917
918 return 1;
919 }
920
921 /**
922 * rte_free - delete a &rte
923 * @e: &rte to be deleted
924 *
925 * rte_free() deletes the given &rte from the routing table it's linked to.
926 */
927 void
928 rte_free(rte *e)
929 {
930 if (rta_is_cached(e->attrs))
931 rta_free(e->attrs);
932 sl_free(rte_slab, e);
933 }
934
935 static inline void
936 rte_free_quick(rte *e)
937 {
938 rta_free(e->attrs);
939 sl_free(rte_slab, e);
940 }
941
942 static int
943 rte_same(rte *x, rte *y)
944 {
945 return
946 x->attrs == y->attrs &&
947 x->flags == y->flags &&
948 x->pflags == y->pflags &&
949 x->pref == y->pref &&
950 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
951 }
952
953 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
954
955 static void
956 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
957 {
958 struct proto *p = c->proto;
959 struct rtable *table = c->table;
960 struct proto_stats *stats = &c->stats;
961 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
962 rte *before_old = NULL;
963 rte *old_best = net->routes;
964 rte *old = NULL;
965 rte **k;
966
967 k = &net->routes; /* Find and remove original route from the same protocol */
968 while (old = *k)
969 {
970 if (old->attrs->src == src)
971 {
972 /* If there is the same route in the routing table but from
973 * a different sender, then there are two paths from the
974 * source protocol to this routing table through transparent
975 * pipes, which is not allowed.
976 *
977 * We log that and ignore the route. If it is withdraw, we
978 * ignore it completely (there might be 'spurious withdraws',
979 * see FIXME in do_rte_announce())
980 */
981 if (old->sender->proto != p)
982 {
983 if (new)
984 {
985 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
986 net->n.addr, table->name);
987 rte_free_quick(new);
988 }
989 return;
990 }
991
992 if (new && rte_same(old, new))
993 {
994 /* No changes, ignore the new route */
995
996 if (!rte_is_filtered(new))
997 {
998 stats->imp_updates_ignored++;
999 rte_trace_in(D_ROUTES, p, new, "ignored");
1000 }
1001
1002 rte_free_quick(new);
1003 return;
1004 }
1005 *k = old->next;
1006 break;
1007 }
1008 k = &old->next;
1009 before_old = old;
1010 }
1011
1012 if (!old)
1013 before_old = NULL;
1014
1015 if (!old && !new)
1016 {
1017 stats->imp_withdraws_ignored++;
1018 return;
1019 }
1020
1021 int new_ok = rte_is_ok(new);
1022 int old_ok = rte_is_ok(old);
1023
1024 struct channel_limit *l = &c->rx_limit;
1025 if (l->action && !old && new)
1026 {
1027 u32 all_routes = stats->imp_routes + stats->filt_routes;
1028
1029 if (all_routes >= l->limit)
1030 channel_notify_limit(c, l, PLD_RX, all_routes);
1031
1032 if (l->state == PLS_BLOCKED)
1033 {
1034 /* In receive limit the situation is simple, old is NULL so
1035 we just free new and exit like nothing happened */
1036
1037 stats->imp_updates_ignored++;
1038 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1039 rte_free_quick(new);
1040 return;
1041 }
1042 }
1043
1044 l = &c->in_limit;
1045 if (l->action && !old_ok && new_ok)
1046 {
1047 if (stats->imp_routes >= l->limit)
1048 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1049
1050 if (l->state == PLS_BLOCKED)
1051 {
1052 /* In import limit the situation is more complicated. We
1053 shouldn't just drop the route, we should handle it like
1054 it was filtered. We also have to continue the route
1055 processing if old or new is non-NULL, but we should exit
1056 if both are NULL as this case is probably assumed to be
1057 already handled. */
1058
1059 stats->imp_updates_ignored++;
1060 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1061
1062 if (c->in_keep_filtered)
1063 new->flags |= REF_FILTERED;
1064 else
1065 { rte_free_quick(new); new = NULL; }
1066
1067 /* Note that old && !new could be possible when
1068 c->in_keep_filtered changed in the recent past. */
1069
1070 if (!old && !new)
1071 return;
1072
1073 new_ok = 0;
1074 goto skip_stats1;
1075 }
1076 }
1077
1078 if (new_ok)
1079 stats->imp_updates_accepted++;
1080 else if (old_ok)
1081 stats->imp_withdraws_accepted++;
1082 else
1083 stats->imp_withdraws_ignored++;
1084
1085 skip_stats1:
1086
1087 if (new)
1088 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1089 if (old)
1090 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1091
1092 if (table->config->sorted)
1093 {
1094 /* If routes are sorted, just insert new route to appropriate position */
1095 if (new)
1096 {
1097 if (before_old && !rte_better(new, before_old))
1098 k = &before_old->next;
1099 else
1100 k = &net->routes;
1101
1102 for (; *k; k=&(*k)->next)
1103 if (rte_better(new, *k))
1104 break;
1105
1106 new->next = *k;
1107 *k = new;
1108 }
1109 }
1110 else
1111 {
1112 /* If routes are not sorted, find the best route and move it on
1113 the first position. There are several optimized cases. */
1114
1115 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1116 goto do_recalculate;
1117
1118 if (new && rte_better(new, old_best))
1119 {
1120 /* The first case - the new route is cleary optimal,
1121 we link it at the first position */
1122
1123 new->next = net->routes;
1124 net->routes = new;
1125 }
1126 else if (old == old_best)
1127 {
1128 /* The second case - the old best route disappeared, we add the
1129 new route (if we have any) to the list (we don't care about
1130 position) and then we elect the new optimal route and relink
1131 that route at the first position and announce it. New optimal
1132 route might be NULL if there is no more routes */
1133
1134 do_recalculate:
1135 /* Add the new route to the list */
1136 if (new)
1137 {
1138 new->next = net->routes;
1139 net->routes = new;
1140 }
1141
1142 /* Find a new optimal route (if there is any) */
1143 if (net->routes)
1144 {
1145 rte **bp = &net->routes;
1146 for (k=&(*bp)->next; *k; k=&(*k)->next)
1147 if (rte_better(*k, *bp))
1148 bp = k;
1149
1150 /* And relink it */
1151 rte *best = *bp;
1152 *bp = best->next;
1153 best->next = net->routes;
1154 net->routes = best;
1155 }
1156 }
1157 else if (new)
1158 {
1159 /* The third case - the new route is not better than the old
1160 best route (therefore old_best != NULL) and the old best
1161 route was not removed (therefore old_best == net->routes).
1162 We just link the new route after the old best route. */
1163
1164 ASSERT(net->routes != NULL);
1165 new->next = net->routes->next;
1166 net->routes->next = new;
1167 }
1168 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1169 }
1170
1171 if (new)
1172 new->lastmod = now;
1173
1174 /* Log the route change */
1175 if (p->debug & D_ROUTES)
1176 {
1177 if (new_ok)
1178 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1179 else if (old_ok)
1180 {
1181 if (old != old_best)
1182 rte_trace(p, old, '>', "removed");
1183 else if (rte_is_ok(net->routes))
1184 rte_trace(p, old, '>', "removed [replaced]");
1185 else
1186 rte_trace(p, old, '>', "removed [sole]");
1187 }
1188 }
1189
1190 /* Propagate the route change */
1191 rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1192 if (net->routes != old_best)
1193 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1194 if (table->config->sorted)
1195 rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1196 rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1197
1198 if (!net->routes &&
1199 (table->gc_counter++ >= table->config->gc_max_ops) &&
1200 (table->gc_time + table->config->gc_min_time <= now))
1201 rt_schedule_prune(table);
1202
1203 if (old_ok && p->rte_remove)
1204 p->rte_remove(net, old);
1205 if (new_ok && p->rte_insert)
1206 p->rte_insert(net, new);
1207
1208 if (old)
1209 rte_free_quick(old);
1210 }
1211
1212 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1213
1214 static inline void
1215 rte_update_lock(void)
1216 {
1217 rte_update_nest_cnt++;
1218 }
1219
1220 static inline void
1221 rte_update_unlock(void)
1222 {
1223 if (!--rte_update_nest_cnt)
1224 lp_flush(rte_update_pool);
1225 }
1226
1227 static inline void
1228 rte_hide_dummy_routes(net *net, rte **dummy)
1229 {
1230 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1231 {
1232 *dummy = net->routes;
1233 net->routes = (*dummy)->next;
1234 }
1235 }
1236
1237 static inline void
1238 rte_unhide_dummy_routes(net *net, rte **dummy)
1239 {
1240 if (*dummy)
1241 {
1242 (*dummy)->next = net->routes;
1243 net->routes = *dummy;
1244 }
1245 }
1246
1247 /**
1248 * rte_update - enter a new update to a routing table
1249 * @table: table to be updated
1250 * @c: channel doing the update
1251 * @net: network node
1252 * @p: protocol submitting the update
1253 * @src: protocol originating the update
1254 * @new: a &rte representing the new route or %NULL for route removal.
1255 *
1256 * This function is called by the routing protocols whenever they discover
1257 * a new route or wish to update/remove an existing route. The right announcement
1258 * sequence is to build route attributes first (either un-cached with @aflags set
1259 * to zero or a cached one using rta_lookup(); in this case please note that
1260 * you need to increase the use count of the attributes yourself by calling
1261 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1262 * the appropriate data and finally submit the new &rte by calling rte_update().
1263 *
1264 * @src specifies the protocol that originally created the route and the meaning
1265 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1266 * same value as @new->attrs->proto. @p specifies the protocol that called
1267 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1268 * stores @p in @new->sender;
1269 *
1270 * When rte_update() gets any route, it automatically validates it (checks,
1271 * whether the network and next hop address are valid IP addresses and also
1272 * whether a normal routing protocol doesn't try to smuggle a host or link
1273 * scope route to the table), converts all protocol dependent attributes stored
1274 * in the &rte to temporary extended attributes, consults import filters of the
1275 * protocol to see if the route should be accepted and/or its attributes modified,
1276 * stores the temporary attributes back to the &rte.
1277 *
1278 * Now, having a "public" version of the route, we
1279 * automatically find any old route defined by the protocol @src
1280 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1281 * recalculate the optimal route for this destination and finally broadcast
1282 * the change (if any) to all routing protocols by calling rte_announce().
1283 *
1284 * All memory used for attribute lists and other temporary allocations is taken
1285 * from a special linear pool @rte_update_pool and freed when rte_update()
1286 * finishes.
1287 */
1288
1289 void
1290 rte_update2(struct channel *c, net_addr *n, rte *new, struct rte_src *src)
1291 {
1292 struct proto *p = c->proto;
1293 struct proto_stats *stats = &c->stats;
1294 struct filter *filter = c->in_filter;
1295 ea_list *tmpa = NULL;
1296 rte *dummy = NULL;
1297 net *nn;
1298
1299 ASSERT(c->channel_state == CS_UP);
1300
1301 rte_update_lock();
1302 if (new)
1303 {
1304 nn = net_get(c->table, n);
1305
1306 new->net = nn;
1307 new->sender = c;
1308
1309 if (!new->pref)
1310 new->pref = c->preference;
1311
1312 stats->imp_updates_received++;
1313 if (!rte_validate(new))
1314 {
1315 rte_trace_in(D_FILTERS, p, new, "invalid");
1316 stats->imp_updates_invalid++;
1317 goto drop;
1318 }
1319
1320 if (filter == FILTER_REJECT)
1321 {
1322 stats->imp_updates_filtered++;
1323 rte_trace_in(D_FILTERS, p, new, "filtered out");
1324
1325 if (! c->in_keep_filtered)
1326 goto drop;
1327
1328 /* new is a private copy, i could modify it */
1329 new->flags |= REF_FILTERED;
1330 }
1331 else
1332 {
1333 tmpa = make_tmp_attrs(new, rte_update_pool);
1334 if (filter && (filter != FILTER_REJECT))
1335 {
1336 ea_list *old_tmpa = tmpa;
1337 int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1338 if (fr > F_ACCEPT)
1339 {
1340 stats->imp_updates_filtered++;
1341 rte_trace_in(D_FILTERS, p, new, "filtered out");
1342
1343 if (! c->in_keep_filtered)
1344 goto drop;
1345
1346 new->flags |= REF_FILTERED;
1347 }
1348 if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1349 src->proto->store_tmp_attrs(new, tmpa);
1350 }
1351 }
1352 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1353 new->attrs = rta_lookup(new->attrs);
1354 new->flags |= REF_COW;
1355 }
1356 else
1357 {
1358 stats->imp_withdraws_received++;
1359
1360 if (!(nn = net_find(c->table, n)) || !src)
1361 {
1362 stats->imp_withdraws_ignored++;
1363 rte_update_unlock();
1364 return;
1365 }
1366 }
1367
1368 recalc:
1369 rte_hide_dummy_routes(nn, &dummy);
1370 rte_recalculate(c, nn, new, src);
1371 rte_unhide_dummy_routes(nn, &dummy);
1372 rte_update_unlock();
1373 return;
1374
1375 drop:
1376 rte_free(new);
1377 new = NULL;
1378 goto recalc;
1379 }
1380
1381 /* Independent call to rte_announce(), used from next hop
1382 recalculation, outside of rte_update(). new must be non-NULL */
1383 static inline void
1384 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1385 rte *new_best, rte *old_best)
1386 {
1387 rte_update_lock();
1388 rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1389 rte_update_unlock();
1390 }
1391
1392 void
1393 rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */
1394 {
1395 rte_update_lock();
1396 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1397 rte_update_unlock();
1398 }
1399
1400 /* Check rtable for best route to given net whether it would be exported do p */
1401 int
1402 rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1403 {
1404 net *n = net_find(t, a);
1405 rte *rt = n ? n->routes : NULL;
1406
1407 if (!rte_is_valid(rt))
1408 return 0;
1409
1410 rte_update_lock();
1411
1412 /* Rest is stripped down export_filter() */
1413 ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
1414 int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1415 if (v == RIC_PROCESS)
1416 v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1417
1418 /* Discard temporary rte */
1419 if (rt != n->routes)
1420 rte_free(rt);
1421
1422 rte_update_unlock();
1423
1424 return v > 0;
1425 }
1426
1427
1428 /**
1429 * rt_refresh_begin - start a refresh cycle
1430 * @t: related routing table
1431 * @c related channel
1432 *
1433 * This function starts a refresh cycle for given routing table and announce
1434 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1435 * routes to the routing table (by rte_update()). After that, all protocol
1436 * routes (more precisely routes with @c as @sender) not sent during the
1437 * refresh cycle but still in the table from the past are pruned. This is
1438 * implemented by marking all related routes as stale by REF_STALE flag in
1439 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1440 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1441 */
1442 void
1443 rt_refresh_begin(rtable *t, struct channel *c)
1444 {
1445 FIB_WALK(&t->fib, net, n)
1446 {
1447 rte *e;
1448 for (e = n->routes; e; e = e->next)
1449 if (e->sender == c)
1450 e->flags |= REF_STALE;
1451 }
1452 FIB_WALK_END;
1453 }
1454
1455 /**
1456 * rt_refresh_end - end a refresh cycle
1457 * @t: related routing table
1458 * @c: related channel
1459 *
1460 * This function ends a refresh cycle for given routing table and announce
1461 * hook. See rt_refresh_begin() for description of refresh cycles.
1462 */
1463 void
1464 rt_refresh_end(rtable *t, struct channel *c)
1465 {
1466 int prune = 0;
1467
1468 FIB_WALK(&t->fib, net, n)
1469 {
1470 rte *e;
1471 for (e = n->routes; e; e = e->next)
1472 if ((e->sender == c) && (e->flags & REF_STALE))
1473 {
1474 e->flags |= REF_DISCARD;
1475 prune = 1;
1476 }
1477 }
1478 FIB_WALK_END;
1479
1480 if (prune)
1481 rt_schedule_prune(t);
1482 }
1483
1484
1485 /**
1486 * rte_dump - dump a route
1487 * @e: &rte to be dumped
1488 *
1489 * This functions dumps contents of a &rte to debug output.
1490 */
1491 void
1492 rte_dump(rte *e)
1493 {
1494 net *n = e->net;
1495 debug("%-1N ", n->n.addr);
1496 debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1497 rta_dump(e->attrs);
1498 if (e->attrs->src->proto->proto->dump_attrs)
1499 e->attrs->src->proto->proto->dump_attrs(e);
1500 debug("\n");
1501 }
1502
1503 /**
1504 * rt_dump - dump a routing table
1505 * @t: routing table to be dumped
1506 *
1507 * This function dumps contents of a given routing table to debug output.
1508 */
1509 void
1510 rt_dump(rtable *t)
1511 {
1512 debug("Dump of routing table <%s>\n", t->name);
1513 #ifdef DEBUGGING
1514 fib_check(&t->fib);
1515 #endif
1516 FIB_WALK(&t->fib, net, n)
1517 {
1518 rte *e;
1519 for(e=n->routes; e; e=e->next)
1520 rte_dump(e);
1521 }
1522 FIB_WALK_END;
1523 debug("\n");
1524 }
1525
1526 /**
1527 * rt_dump_all - dump all routing tables
1528 *
1529 * This function dumps contents of all routing tables to debug output.
1530 */
1531 void
1532 rt_dump_all(void)
1533 {
1534 rtable *t;
1535
1536 WALK_LIST(t, routing_tables)
1537 rt_dump(t);
1538 }
1539
1540 static inline void
1541 rt_schedule_hcu(rtable *tab)
1542 {
1543 if (tab->hcu_scheduled)
1544 return;
1545
1546 tab->hcu_scheduled = 1;
1547 ev_schedule(tab->rt_event);
1548 }
1549
1550 static inline void
1551 rt_schedule_nhu(rtable *tab)
1552 {
1553 if (tab->nhu_state == 0)
1554 ev_schedule(tab->rt_event);
1555
1556 /* state change 0->1, 2->3 */
1557 tab->nhu_state |= 1;
1558 }
1559
1560 void
1561 rt_schedule_prune(rtable *tab)
1562 {
1563 if (tab->prune_state == 0)
1564 ev_schedule(tab->rt_event);
1565
1566 /* state change 0->1, 2->3 */
1567 tab->prune_state |= 1;
1568 }
1569
1570
1571 static void
1572 rt_event(void *ptr)
1573 {
1574 rtable *tab = ptr;
1575
1576 rt_lock_table(tab);
1577
1578 if (tab->hcu_scheduled)
1579 rt_update_hostcache(tab);
1580
1581 if (tab->nhu_state)
1582 rt_next_hop_update(tab);
1583
1584 if (tab->prune_state)
1585 rt_prune_table(tab);
1586
1587 rt_unlock_table(tab);
1588 }
1589
1590 void
1591 rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1592 {
1593 bzero(t, sizeof(*t));
1594 t->name = name;
1595 t->config = cf;
1596 t->addr_type = cf ? cf->addr_type : NET_IP4;
1597 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1598 init_list(&t->channels);
1599
1600 if (cf)
1601 {
1602 t->rt_event = ev_new(p);
1603 t->rt_event->hook = rt_event;
1604 t->rt_event->data = t;
1605 t->gc_time = now;
1606 }
1607 }
1608
1609 /**
1610 * rt_init - initialize routing tables
1611 *
1612 * This function is called during BIRD startup. It initializes the
1613 * routing table module.
1614 */
1615 void
1616 rt_init(void)
1617 {
1618 rta_init();
1619 rt_table_pool = rp_new(&root_pool, "Routing tables");
1620 rte_update_pool = lp_new(rt_table_pool, 4080);
1621 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1622 init_list(&routing_tables);
1623 }
1624
1625
1626 /**
1627 * rt_prune_table - prune a routing table
1628 *
1629 * The prune loop scans routing tables and removes routes belonging to flushing
1630 * protocols, discarded routes and also stale network entries. It is called from
1631 * rt_event(). The event is rescheduled if the current iteration do not finish
1632 * the table. The pruning is directed by the prune state (@prune_state),
1633 * specifying whether the prune cycle is scheduled or running, and there
1634 * is also a persistent pruning iterator (@prune_fit).
1635 *
1636 * The prune loop is used also for channel flushing. For this purpose, the
1637 * channels to flush are marked before the iteration and notified after the
1638 * iteration.
1639 */
1640 static void
1641 rt_prune_table(rtable *tab)
1642 {
1643 struct fib_iterator *fit = &tab->prune_fit;
1644 int limit = 512;
1645
1646 struct channel *c;
1647 node *n, *x;
1648
1649 DBG("Pruning route table %s\n", tab->name);
1650 #ifdef DEBUGGING
1651 fib_check(&tab->fib);
1652 #endif
1653
1654 if (tab->prune_state == 0)
1655 return;
1656
1657 if (tab->prune_state == 1)
1658 {
1659 /* Mark channels to flush */
1660 WALK_LIST2(c, n, tab->channels, table_node)
1661 if (c->channel_state == CS_FLUSHING)
1662 c->flush_active = 1;
1663
1664 FIB_ITERATE_INIT(fit, &tab->fib);
1665 tab->prune_state = 2;
1666 }
1667
1668 again:
1669 FIB_ITERATE_START(&tab->fib, fit, net, n)
1670 {
1671 rte *e;
1672
1673 rescan:
1674 for (e=n->routes; e; e=e->next)
1675 if (e->sender->flush_active || (e->flags & REF_DISCARD))
1676 {
1677 if (limit <= 0)
1678 {
1679 FIB_ITERATE_PUT(fit);
1680 ev_schedule(tab->rt_event);
1681 return;
1682 }
1683
1684 rte_discard(tab, e);
1685 limit--;
1686
1687 goto rescan;
1688 }
1689
1690 if (!n->routes) /* Orphaned FIB entry */
1691 {
1692 FIB_ITERATE_PUT(fit);
1693 fib_delete(&tab->fib, n);
1694 goto again;
1695 }
1696 }
1697 FIB_ITERATE_END;
1698
1699 #ifdef DEBUGGING
1700 fib_check(&tab->fib);
1701 #endif
1702
1703 tab->gc_counter = 0;
1704 tab->gc_time = now;
1705
1706 /* state change 2->0, 3->1 */
1707 tab->prune_state &= 1;
1708
1709 if (tab->prune_state > 0)
1710 ev_schedule(tab->rt_event);
1711
1712 /* FIXME: This should be handled in a better way */
1713 rt_prune_sources();
1714
1715 /* Close flushed channels */
1716 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1717 if (c->flush_active)
1718 {
1719 c->flush_active = 0;
1720 channel_set_state(c, CS_DOWN);
1721 }
1722
1723 return;
1724 }
1725
1726 void
1727 rt_preconfig(struct config *c)
1728 {
1729 init_list(&c->tables);
1730
1731 rt_new_table(cf_get_symbol("master4"), NET_IP4);
1732 rt_new_table(cf_get_symbol("master6"), NET_IP6);
1733 }
1734
1735
1736 /*
1737 * Some functions for handing internal next hop updates
1738 * triggered by rt_schedule_nhu().
1739 */
1740
1741 static inline int
1742 rta_next_hop_outdated(rta *a)
1743 {
1744 struct hostentry *he = a->hostentry;
1745
1746 if (!he)
1747 return 0;
1748
1749 if (!he->src)
1750 return a->dest != RTD_UNREACHABLE;
1751
1752 return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1753 (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1754 !mpnh_same(a->nexthops, he->src->nexthops);
1755 }
1756
1757 static inline void
1758 rta_apply_hostentry(rta *a, struct hostentry *he)
1759 {
1760 a->hostentry = he;
1761 a->iface = he->src ? he->src->iface : NULL;
1762 a->gw = he->gw;
1763 a->dest = he->dest;
1764 a->igp_metric = he->igp_metric;
1765 a->nexthops = he->src ? he->src->nexthops : NULL;
1766 }
1767
1768 static inline rte *
1769 rt_next_hop_update_rte(rtable *tab, rte *old)
1770 {
1771 rta a;
1772 memcpy(&a, old->attrs, sizeof(rta));
1773 rta_apply_hostentry(&a, old->attrs->hostentry);
1774 a.aflags = 0;
1775
1776 rte *e = sl_alloc(rte_slab);
1777 memcpy(e, old, sizeof(rte));
1778 e->attrs = rta_lookup(&a);
1779
1780 return e;
1781 }
1782
1783 static inline int
1784 rt_next_hop_update_net(rtable *tab, net *n)
1785 {
1786 rte **k, *e, *new, *old_best, **new_best;
1787 int count = 0;
1788 int free_old_best = 0;
1789
1790 old_best = n->routes;
1791 if (!old_best)
1792 return 0;
1793
1794 for (k = &n->routes; e = *k; k = &e->next)
1795 if (rta_next_hop_outdated(e->attrs))
1796 {
1797 new = rt_next_hop_update_rte(tab, e);
1798 *k = new;
1799
1800 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1801 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1802
1803 /* Call a pre-comparison hook */
1804 /* Not really an efficient way to compute this */
1805 if (e->attrs->src->proto->rte_recalculate)
1806 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1807
1808 if (e != old_best)
1809 rte_free_quick(e);
1810 else /* Freeing of the old best rte is postponed */
1811 free_old_best = 1;
1812
1813 e = new;
1814 count++;
1815 }
1816
1817 if (!count)
1818 return 0;
1819
1820 /* Find the new best route */
1821 new_best = NULL;
1822 for (k = &n->routes; e = *k; k = &e->next)
1823 {
1824 if (!new_best || rte_better(e, *new_best))
1825 new_best = k;
1826 }
1827
1828 /* Relink the new best route to the first position */
1829 new = *new_best;
1830 if (new != n->routes)
1831 {
1832 *new_best = new->next;
1833 new->next = n->routes;
1834 n->routes = new;
1835 }
1836
1837 /* Announce the new best route */
1838 if (new != old_best)
1839 {
1840 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1841 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1842 }
1843
1844 /* FIXME: Better announcement of merged routes */
1845 rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1846
1847 if (free_old_best)
1848 rte_free_quick(old_best);
1849
1850 return count;
1851 }
1852
1853 static void
1854 rt_next_hop_update(rtable *tab)
1855 {
1856 struct fib_iterator *fit = &tab->nhu_fit;
1857 int max_feed = 32;
1858
1859 if (tab->nhu_state == 0)
1860 return;
1861
1862 if (tab->nhu_state == 1)
1863 {
1864 FIB_ITERATE_INIT(fit, &tab->fib);
1865 tab->nhu_state = 2;
1866 }
1867
1868 FIB_ITERATE_START(&tab->fib, fit, net, n)
1869 {
1870 if (max_feed <= 0)
1871 {
1872 FIB_ITERATE_PUT(fit);
1873 ev_schedule(tab->rt_event);
1874 return;
1875 }
1876 max_feed -= rt_next_hop_update_net(tab, n);
1877 }
1878 FIB_ITERATE_END;
1879
1880 /* state change 2->0, 3->1 */
1881 tab->nhu_state &= 1;
1882
1883 if (tab->nhu_state > 0)
1884 ev_schedule(tab->rt_event);
1885 }
1886
1887
1888 struct rtable_config *
1889 rt_new_table(struct symbol *s, uint addr_type)
1890 {
1891 /* Hack that allows to 'redefine' the master table */
1892 if ((s->class == SYM_TABLE) &&
1893 (s->def == new_config->def_tables[addr_type]) &&
1894 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1895 return s->def;
1896
1897 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1898
1899 cf_define_symbol(s, SYM_TABLE, c);
1900 c->name = s->name;
1901 c->addr_type = addr_type;
1902 c->gc_max_ops = 1000;
1903 c->gc_min_time = 5;
1904
1905 add_tail(&new_config->tables, &c->n);
1906
1907 /* First table of each type is kept as default */
1908 if (! new_config->def_tables[addr_type])
1909 new_config->def_tables[addr_type] = c;
1910
1911 return c;
1912 }
1913
1914 /**
1915 * rt_lock_table - lock a routing table
1916 * @r: routing table to be locked
1917 *
1918 * Lock a routing table, because it's in use by a protocol,
1919 * preventing it from being freed when it gets undefined in a new
1920 * configuration.
1921 */
1922 void
1923 rt_lock_table(rtable *r)
1924 {
1925 r->use_count++;
1926 }
1927
1928 /**
1929 * rt_unlock_table - unlock a routing table
1930 * @r: routing table to be unlocked
1931 *
1932 * Unlock a routing table formerly locked by rt_lock_table(),
1933 * that is decrease its use count and delete it if it's scheduled
1934 * for deletion by configuration changes.
1935 */
1936 void
1937 rt_unlock_table(rtable *r)
1938 {
1939 if (!--r->use_count && r->deleted)
1940 {
1941 struct config *conf = r->deleted;
1942 DBG("Deleting routing table %s\n", r->name);
1943 r->config->table = NULL;
1944 if (r->hostcache)
1945 rt_free_hostcache(r);
1946 rem_node(&r->n);
1947 fib_free(&r->fib);
1948 rfree(r->rt_event);
1949 mb_free(r);
1950 config_del_obstacle(conf);
1951 }
1952 }
1953
1954 /**
1955 * rt_commit - commit new routing table configuration
1956 * @new: new configuration
1957 * @old: original configuration or %NULL if it's boot time config
1958 *
1959 * Scan differences between @old and @new configuration and modify
1960 * the routing tables according to these changes. If @new defines a
1961 * previously unknown table, create it, if it omits a table existing
1962 * in @old, schedule it for deletion (it gets deleted when all protocols
1963 * disconnect from it by calling rt_unlock_table()), if it exists
1964 * in both configurations, leave it unchanged.
1965 */
1966 void
1967 rt_commit(struct config *new, struct config *old)
1968 {
1969 struct rtable_config *o, *r;
1970
1971 DBG("rt_commit:\n");
1972 if (old)
1973 {
1974 WALK_LIST(o, old->tables)
1975 {
1976 rtable *ot = o->table;
1977 if (!ot->deleted)
1978 {
1979 struct symbol *sym = cf_find_symbol(new, o->name);
1980 if (sym && sym->class == SYM_TABLE && !new->shutdown)
1981 {
1982 DBG("\t%s: same\n", o->name);
1983 r = sym->def;
1984 r->table = ot;
1985 ot->name = r->name;
1986 ot->config = r;
1987 if (o->sorted != r->sorted)
1988 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
1989 }
1990 else
1991 {
1992 DBG("\t%s: deleted\n", o->name);
1993 ot->deleted = old;
1994 config_add_obstacle(old);
1995 rt_lock_table(ot);
1996 rt_unlock_table(ot);
1997 }
1998 }
1999 }
2000 }
2001
2002 WALK_LIST(r, new->tables)
2003 if (!r->table)
2004 {
2005 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2006 DBG("\t%s: created\n", r->name);
2007 rt_setup(rt_table_pool, t, r->name, r);
2008 add_tail(&routing_tables, &t->n);
2009 r->table = t;
2010 }
2011 DBG("\tdone\n");
2012 }
2013
2014 static inline void
2015 do_feed_channel(struct channel *c, net *n, rte *e)
2016 {
2017 rte_update_lock();
2018 if (c->ra_mode == RA_ACCEPTED)
2019 rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2020 else if (c->ra_mode == RA_MERGED)
2021 rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2022 else /* RA_BASIC */
2023 rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2024 rte_update_unlock();
2025 }
2026
2027 /**
2028 * rt_feed_channel - advertise all routes to a channel
2029 * @c: channel to be fed
2030 *
2031 * This function performs one pass of advertisement of routes to a channel that
2032 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2033 * has something to do. (We avoid transferring all the routes in single pass in
2034 * order not to monopolize CPU time.)
2035 */
2036 int
2037 rt_feed_channel(struct channel *c)
2038 {
2039 struct fib_iterator *fit = &c->feed_fit;
2040 int max_feed = 256;
2041
2042 ASSERT(c->export_state == ES_FEEDING);
2043
2044 if (!c->feed_active)
2045 {
2046 FIB_ITERATE_INIT(fit, &c->table->fib);
2047 c->feed_active = 1;
2048 }
2049
2050 FIB_ITERATE_START(&c->table->fib, fit, net, n)
2051 {
2052 rte *e = n->routes;
2053 if (max_feed <= 0)
2054 {
2055 FIB_ITERATE_PUT(fit);
2056 return 0;
2057 }
2058
2059 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2060
2061 if ((c->ra_mode == RA_OPTIMAL) ||
2062 (c->ra_mode == RA_ACCEPTED) ||
2063 (c->ra_mode == RA_MERGED))
2064 if (rte_is_valid(e))
2065 {
2066 /* In the meantime, the protocol may fell down */
2067 if (c->export_state != ES_FEEDING)
2068 goto done;
2069
2070 do_feed_channel(c, n, e);
2071 max_feed--;
2072 }
2073
2074 if (c->ra_mode == RA_ANY)
2075 for(e = n->routes; e; e = e->next)
2076 {
2077 /* In the meantime, the protocol may fell down */
2078 if (c->export_state != ES_FEEDING)
2079 goto done;
2080
2081 if (!rte_is_valid(e))
2082 continue;
2083
2084 do_feed_channel(c, n, e);
2085 max_feed--;
2086 }
2087 }
2088 FIB_ITERATE_END;
2089
2090 done:
2091 c->feed_active = 0;
2092 return 1;
2093 }
2094
2095 /**
2096 * rt_feed_baby_abort - abort protocol feeding
2097 * @c: channel
2098 *
2099 * This function is called by the protocol code when the protocol stops or
2100 * ceases to exist during the feeding.
2101 */
2102 void
2103 rt_feed_channel_abort(struct channel *c)
2104 {
2105 if (c->feed_active)
2106 {
2107 /* Unlink the iterator */
2108 fit_get(&c->table->fib, &c->feed_fit);
2109 c->feed_active = 0;
2110 }
2111 }
2112
2113 static inline unsigned
2114 ptr_hash(void *ptr)
2115 {
2116 uintptr_t p = (uintptr_t) ptr;
2117 return p ^ (p << 8) ^ (p >> 16);
2118 }
2119
2120 static inline u32
2121 hc_hash(ip_addr a, rtable *dep)
2122 {
2123 return ipa_hash(a) ^ ptr_hash(dep);
2124 }
2125
2126 static inline void
2127 hc_insert(struct hostcache *hc, struct hostentry *he)
2128 {
2129 uint k = he->hash_key >> hc->hash_shift;
2130 he->next = hc->hash_table[k];
2131 hc->hash_table[k] = he;
2132 }
2133
2134 static inline void
2135 hc_remove(struct hostcache *hc, struct hostentry *he)
2136 {
2137 struct hostentry **hep;
2138 uint k = he->hash_key >> hc->hash_shift;
2139
2140 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2141 *hep = he->next;
2142 }
2143
2144 #define HC_DEF_ORDER 10
2145 #define HC_HI_MARK *4
2146 #define HC_HI_STEP 2
2147 #define HC_HI_ORDER 16 /* Must be at most 16 */
2148 #define HC_LO_MARK /5
2149 #define HC_LO_STEP 2
2150 #define HC_LO_ORDER 10
2151
2152 static void
2153 hc_alloc_table(struct hostcache *hc, unsigned order)
2154 {
2155 unsigned hsize = 1 << order;
2156 hc->hash_order = order;
2157 hc->hash_shift = 32 - order;
2158 hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
2159 hc->hash_min = (order <= HC_LO_ORDER) ? 0 : (hsize HC_LO_MARK);
2160
2161 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2162 }
2163
2164 static void
2165 hc_resize(struct hostcache *hc, unsigned new_order)
2166 {
2167 unsigned old_size = 1 << hc->hash_order;
2168 struct hostentry **old_table = hc->hash_table;
2169 struct hostentry *he, *hen;
2170 int i;
2171
2172 hc_alloc_table(hc, new_order);
2173 for (i = 0; i < old_size; i++)
2174 for (he = old_table[i]; he != NULL; he=hen)
2175 {
2176 hen = he->next;
2177 hc_insert(hc, he);
2178 }
2179 mb_free(old_table);
2180 }
2181
2182 static struct hostentry *
2183 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2184 {
2185 struct hostentry *he = sl_alloc(hc->slab);
2186
2187 he->addr = a;
2188 he->link = ll;
2189 he->tab = dep;
2190 he->hash_key = k;
2191 he->uc = 0;
2192 he->src = NULL;
2193
2194 add_tail(&hc->hostentries, &he->ln);
2195 hc_insert(hc, he);
2196
2197 hc->hash_items++;
2198 if (hc->hash_items > hc->hash_max)
2199 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2200
2201 return he;
2202 }
2203
2204 static void
2205 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2206 {
2207 rta_free(he->src);
2208
2209 rem_node(&he->ln);
2210 hc_remove(hc, he);
2211 sl_free(hc->slab, he);
2212
2213 hc->hash_items--;
2214 if (hc->hash_items < hc->hash_min)
2215 hc_resize(hc, hc->hash_order - HC_LO_STEP);
2216 }
2217
2218 static void
2219 rt_init_hostcache(rtable *tab)
2220 {
2221 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2222 init_list(&hc->hostentries);
2223
2224 hc->hash_items = 0;
2225 hc_alloc_table(hc, HC_DEF_ORDER);
2226 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2227
2228 hc->lp = lp_new(rt_table_pool, 1008);
2229 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2230
2231 tab->hostcache = hc;
2232 }
2233
2234 static void
2235 rt_free_hostcache(rtable *tab)
2236 {
2237 struct hostcache *hc = tab->hostcache;
2238
2239 node *n;
2240 WALK_LIST(n, hc->hostentries)
2241 {
2242 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2243 rta_free(he->src);
2244
2245 if (he->uc)
2246 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2247 }
2248
2249 rfree(hc->slab);
2250 rfree(hc->lp);
2251 mb_free(hc->hash_table);
2252 mb_free(hc);
2253 }
2254
2255 static void
2256 rt_notify_hostcache(rtable *tab, net *net)
2257 {
2258 if (tab->hcu_scheduled)
2259 return;
2260
2261 if (trie_match_net(tab->hostcache->trie, net->n.addr))
2262 rt_schedule_hcu(tab);
2263 }
2264
2265 static int
2266 if_local_addr(ip_addr a, struct iface *i)
2267 {
2268 struct ifa *b;
2269
2270 WALK_LIST(b, i->addrs)
2271 if (ipa_equal(a, b->ip))
2272 return 1;
2273
2274 return 0;
2275 }
2276
2277 static u32
2278 rt_get_igp_metric(rte *rt)
2279 {
2280 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2281
2282 if (ea)
2283 return ea->u.data;
2284
2285 rta *a = rt->attrs;
2286
2287 #ifdef CONFIG_OSPF
2288 if ((a->source == RTS_OSPF) ||
2289 (a->source == RTS_OSPF_IA) ||
2290 (a->source == RTS_OSPF_EXT1))
2291 return rt->u.ospf.metric1;
2292 #endif
2293
2294 #ifdef CONFIG_RIP
2295 if (a->source == RTS_RIP)
2296 return rt->u.rip.metric;
2297 #endif
2298
2299 /* Device routes */
2300 if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
2301 return 0;
2302
2303 return IGP_METRIC_UNKNOWN;
2304 }
2305
2306 static int
2307 rt_update_hostentry(rtable *tab, struct hostentry *he)
2308 {
2309 rta *old_src = he->src;
2310 int pxlen = 0;
2311
2312 /* Reset the hostentry */
2313 he->src = NULL;
2314 he->gw = IPA_NONE;
2315 he->dest = RTD_UNREACHABLE;
2316 he->igp_metric = 0;
2317
2318 net_addr he_addr;
2319 net_fill_ip_host(&he_addr, he->addr);
2320 net *n = net_route(tab, &he_addr);
2321 if (n)
2322 {
2323 rte *e = n->routes;
2324 rta *a = e->attrs;
2325 pxlen = n->n.addr->pxlen;
2326
2327 if (a->hostentry)
2328 {
2329 /* Recursive route should not depend on another recursive route */
2330 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2331 he->addr, n->n.addr);
2332 goto done;
2333 }
2334
2335 if (a->dest == RTD_DEVICE)
2336 {
2337 if (if_local_addr(he->addr, a->iface))
2338 {
2339 /* The host address is a local address, this is not valid */
2340 log(L_WARN "Next hop address %I is a local address of iface %s",
2341 he->addr, a->iface->name);
2342 goto done;
2343 }
2344
2345 /* The host is directly reachable, use link as a gateway */
2346 he->gw = he->link;
2347 he->dest = RTD_ROUTER;
2348 }
2349 else
2350 {
2351 /* The host is reachable through some route entry */
2352 he->gw = a->gw;
2353 he->dest = a->dest;
2354 }
2355
2356 he->src = rta_clone(a);
2357 he->igp_metric = rt_get_igp_metric(e);
2358 }
2359
2360 done:
2361 /* Add a prefix range to the trie */
2362 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2363
2364 rta_free(old_src);
2365 return old_src != he->src;
2366 }
2367
2368 static void
2369 rt_update_hostcache(rtable *tab)
2370 {
2371 struct hostcache *hc = tab->hostcache;
2372 struct hostentry *he;
2373 node *n, *x;
2374
2375 /* Reset the trie */
2376 lp_flush(hc->lp);
2377 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2378
2379 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2380 {
2381 he = SKIP_BACK(struct hostentry, ln, n);
2382 if (!he->uc)
2383 {
2384 hc_delete_hostentry(hc, he);
2385 continue;
2386 }
2387
2388 if (rt_update_hostentry(tab, he))
2389 rt_schedule_nhu(he->tab);
2390 }
2391
2392 tab->hcu_scheduled = 0;
2393 }
2394
2395 static struct hostentry *
2396 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2397 {
2398 struct hostentry *he;
2399
2400 if (!tab->hostcache)
2401 rt_init_hostcache(tab);
2402
2403 u32 k = hc_hash(a, dep);
2404 struct hostcache *hc = tab->hostcache;
2405 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2406 if (ipa_equal(he->addr, a) && (he->tab == dep))
2407 return he;
2408
2409 he = hc_new_hostentry(hc, a, ll, dep, k);
2410 rt_update_hostentry(tab, he);
2411 return he;
2412 }
2413
2414 void
2415 rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
2416 {
2417 rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
2418 }
2419
2420
2421 /*
2422 * CLI commands
2423 */
2424
2425 static void
2426 rt_format_via(rte *e, byte *via)
2427 {
2428 rta *a = e->attrs;
2429
2430 switch (a->dest)
2431 {
2432 case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2433 case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break;
2434 case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
2435 case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
2436 case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
2437 case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
2438 default: bsprintf(via, "???");
2439 }
2440 }
2441
2442 static void
2443 rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
2444 {
2445 byte via[IPA_MAX_TEXT_LENGTH+32];
2446 byte from[IPA_MAX_TEXT_LENGTH+8];
2447 byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
2448 rta *a = e->attrs;
2449 int primary = (e->net->routes == e);
2450 int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
2451 void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
2452 struct mpnh *nh;
2453
2454 rt_format_via(e, via);
2455 tm_format_datetime(tm, &config->tf_route, e->lastmod);
2456 if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2457 bsprintf(from, " from %I", a->from);
2458 else
2459 from[0] = 0;
2460
2461 get_route_info = a->src->proto->proto->get_route_info;
2462 if (get_route_info || d->verbose)
2463 {
2464 /* Need to normalize the extended attributes */
2465 ea_list *t = tmpa;
2466 t = ea_append(t, a->eattrs);
2467 tmpa = alloca(ea_scan(t));
2468 ea_merge(t, tmpa);
2469 ea_sort(tmpa);
2470 }
2471 if (get_route_info)
2472 get_route_info(e, info, tmpa);
2473 else
2474 bsprintf(info, " (%d)", e->pref);
2475 cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name,
2476 tm, from, primary ? (sync_error ? " !" : " *") : "", info);
2477 for (nh = a->nexthops; nh; nh = nh->next)
2478 cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
2479 if (d->verbose)
2480 rta_show(c, a, tmpa);
2481 }
2482
2483 static void
2484 rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2485 {
2486 rte *e, *ee;
2487 byte ia[NET_MAX_TEXT_LENGTH+1];
2488 struct ea_list *tmpa;
2489 struct channel *ec = d->export_channel;
2490 int first = 1;
2491 int pass = 0;
2492
2493 bsprintf(ia, "%N", n->n.addr);
2494
2495
2496 for (e = n->routes; e; e = e->next)
2497 {
2498 if (rte_is_filtered(e) != d->filtered)
2499 continue;
2500
2501 d->rt_counter++;
2502 d->net_counter += first;
2503 first = 0;
2504
2505 if (pass)
2506 continue;
2507
2508 ee = e;
2509 rte_update_lock(); /* We use the update buffer for filtering */
2510 tmpa = make_tmp_attrs(e, rte_update_pool);
2511
2512 /* Special case for merged export */
2513 if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED))
2514 {
2515 rte *rt_free;
2516 e = rt_export_merged(ec, n, &rt_free, &tmpa, 1);
2517 pass = 1;
2518
2519 if (!e)
2520 { e = ee; goto skip; }
2521 }
2522 else if (d->export_mode)
2523 {
2524 struct proto *ep = d->export_protocol;
2525 int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2526
2527 if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED)
2528 pass = 1;
2529
2530 if (ic < 0)
2531 goto skip;
2532
2533 if (d->export_mode > RSEM_PREEXPORT)
2534 {
2535 /*
2536 * FIXME - This shows what should be exported according to current
2537 * filters, but not what was really exported. 'configure soft'
2538 * command may change the export filter and do not update routes.
2539 */
2540 int do_export = (ic > 0) ||
2541 (f_run(ec->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
2542
2543 if (do_export != (d->export_mode == RSEM_EXPORT))
2544 goto skip;
2545
2546 if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_ACCEPTED))
2547 pass = 1;
2548 }
2549 }
2550
2551 if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2552 goto skip;
2553
2554 if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2555 goto skip;
2556
2557 d->show_counter++;
2558 if (d->stats < 2)
2559 rt_show_rte(c, ia, e, d, tmpa);
2560 ia[0] = 0;
2561
2562 skip:
2563 if (e != ee)
2564 {
2565 rte_free(e);
2566 e = ee;
2567 }
2568 rte_update_unlock();
2569
2570 if (d->primary_only)
2571 break;
2572 }
2573 }
2574
2575 static struct channel *
2576 rt_show_export_channel(struct rt_show_data *d)
2577 {
2578 if (! d->export_protocol->rt_notify)
2579 return NULL;
2580
2581 return proto_find_channel_by_table(d->export_protocol, d->table);
2582 }
2583
2584 static void
2585 rt_show_cont(struct cli *c)
2586 {
2587 struct rt_show_data *d = c->rover;
2588 #ifdef DEBUGGING
2589 unsigned max = 4;
2590 #else
2591 unsigned max = 64;
2592 #endif
2593 struct fib *fib = &d->table->fib;
2594 struct fib_iterator *it = &d->fit;
2595
2596 if (d->export_mode)
2597 {
2598 /* Ensure we have current export channel */
2599 d->export_channel = rt_show_export_channel(d);
2600 if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2601 {
2602 cli_printf(c, 8005, "Channel is down");
2603 goto done;
2604 }
2605 }
2606
2607 FIB_ITERATE_START(fib, it, net, n)
2608 {
2609 if (!max--)
2610 {
2611 FIB_ITERATE_PUT(it);
2612 return;
2613 }
2614 rt_show_net(c, n, d);
2615 }
2616 FIB_ITERATE_END;
2617 if (d->stats)
2618 cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2619 else
2620 cli_printf(c, 0, "");
2621 done:
2622 c->cont = c->cleanup = NULL;
2623 }
2624
2625 static void
2626 rt_show_cleanup(struct cli *c)
2627 {
2628 struct rt_show_data *d = c->rover;
2629
2630 /* Unlink the iterator */
2631 fit_get(&d->table->fib, &d->fit);
2632 }
2633
2634 static inline rtable *
2635 rt_show_get_table(struct proto *p)
2636 {
2637 /* FIXME: Use a better way to handle multi-channel protocols */
2638
2639 if (p->main_channel)
2640 return p->main_channel->table;
2641
2642 if (!EMPTY_LIST(p->channels))
2643 return ((struct channel *) HEAD(p->channels))->table;
2644
2645 return NULL;
2646 }
2647
2648 void
2649 rt_show(struct rt_show_data *d)
2650 {
2651 net *n;
2652
2653 /* Default is either a master table or a table related to a respective protocol */
2654 if (!d->table && d->export_protocol) d->table = rt_show_get_table(d->export_protocol);
2655 if (!d->table && d->show_protocol) d->table = rt_show_get_table(d->show_protocol);
2656 if (!d->table) d->table = config->def_tables[NET_IP4]->table; /* FIXME: iterate through all tables ? */
2657
2658 /* Filtered routes are neither exported nor have sensible ordering */
2659 if (d->filtered && (d->export_mode || d->primary_only))
2660 cli_msg(0, "");
2661
2662 if (!d->addr)
2663 {
2664 FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2665 this_cli->cont = rt_show_cont;
2666 this_cli->cleanup = rt_show_cleanup;
2667 this_cli->rover = d;
2668 }
2669 else
2670 {
2671 if (d->export_mode)
2672 {
2673 /* Find channel associated with the export protocol */
2674 d->export_channel = rt_show_export_channel(d);
2675 if (!d->export_channel || (d->export_channel->export_state == ES_DOWN))
2676 {
2677 cli_msg(8005, "Channel is down");
2678 return;
2679 }
2680 }
2681
2682 if (d->show_for)
2683 n = net_route(d->table, d->addr);
2684 else
2685 n = net_find(d->table, d->addr);
2686
2687 if (n)
2688 rt_show_net(this_cli, n, d);
2689
2690 if (d->rt_counter)
2691 cli_msg(0, "");
2692 else
2693 cli_msg(8001, "Network not in table");
2694 }
2695 }
2696
2697 /*
2698 * Documentation for functions declared inline in route.h
2699 */
2700 #if 0
2701
2702 /**
2703 * net_find - find a network entry
2704 * @tab: a routing table
2705 * @addr: address of the network
2706 *
2707 * net_find() looks up the given network in routing table @tab and
2708 * returns a pointer to its &net entry or %NULL if no such network
2709 * exists.
2710 */
2711 static inline net *net_find(rtable *tab, net_addr *addr)
2712 { DUMMY; }
2713
2714 /**
2715 * net_get - obtain a network entry
2716 * @tab: a routing table
2717 * @addr: address of the network
2718 *
2719 * net_get() looks up the given network in routing table @tab and
2720 * returns a pointer to its &net entry. If no such entry exists, it's
2721 * created.
2722 */
2723 static inline net *net_get(rtable *tab, net_addr *addr)
2724 { DUMMY; }
2725
2726 /**
2727 * rte_cow - copy a route for writing
2728 * @r: a route entry to be copied
2729 *
2730 * rte_cow() takes a &rte and prepares it for modification. The exact action
2731 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2732 * just returned unchanged, else a new temporary entry with the same contents
2733 * is created.
2734 *
2735 * The primary use of this function is inside the filter machinery -- when
2736 * a filter wants to modify &rte contents (to change the preference or to
2737 * attach another set of attributes), it must ensure that the &rte is not
2738 * shared with anyone else (and especially that it isn't stored in any routing
2739 * table).
2740 *
2741 * Result: a pointer to the new writable &rte.
2742 */
2743 static inline rte * rte_cow(rte *r)
2744 { DUMMY; }
2745
2746 #endif