]>
git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
1cb8d2a179b40df1cbd2c8baa1aacf0b537bd15f
2 * BIRD -- Routing Tables
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/iface.h"
37 #include "lib/resource.h"
38 #include "lib/event.h"
39 #include "lib/string.h"
40 #include "conf/conf.h"
41 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
48 static slab
*rte_slab
;
49 static linpool
*rte_update_pool
;
53 static void rt_free_hostcache(rtable
*tab
);
54 static void rt_notify_hostcache(rtable
*tab
, net
*net
);
55 static void rt_update_hostcache(rtable
*tab
);
56 static void rt_next_hop_update(rtable
*tab
);
57 static inline void rt_prune_table(rtable
*tab
);
60 /* Like fib_route(), but skips empty net entries */
62 net_route_ip4(rtable
*t
, net_addr_ip4
*n
)
66 while (r
= net_find_valid(t
, (net_addr
*) n
), (!r
) && (n
->pxlen
> 0))
69 ip4_clrbit(&n
->prefix
, n
->pxlen
);
76 net_route_ip6(rtable
*t
, net_addr_ip6
*n
)
80 while (r
= net_find_valid(t
, (net_addr
*) n
), (!r
) && (n
->pxlen
> 0))
83 ip6_clrbit(&n
->prefix
, n
->pxlen
);
90 net_route_ip6_sadr(rtable
*t
, net_addr_ip6_sadr
*n
)
99 /* We need to do dst first matching. Since sadr addresses are hashed on dst
100 prefix only, find the hash table chain and go through it to find the
101 match with the smallest matching src prefix. */
102 for (fn
= fib_get_chain(&t
->fib
, (net_addr
*) n
); fn
; fn
= fn
->next
)
104 net_addr_ip6_sadr
*a
= (void *) fn
->addr
;
106 if (net_equal_dst_ip6_sadr(n
, a
) &&
107 net_in_net_src_ip6_sadr(n
, a
) &&
108 (a
->src_pxlen
>= best_pxlen
))
110 best
= fib_node_to_user(&t
->fib
, fn
);
111 best_pxlen
= a
->src_pxlen
;
122 ip6_clrbit(&n
->dst_prefix
, n
->dst_pxlen
);
129 net_route(rtable
*tab
, const net_addr
*n
)
131 ASSERT(tab
->addr_type
== n
->type
);
133 net_addr
*n0
= alloca(n
->length
);
141 return net_route_ip4(tab
, (net_addr_ip4
*) n0
);
146 return net_route_ip6(tab
, (net_addr_ip6
*) n0
);
149 return net_route_ip6_sadr(tab
, (net_addr_ip6_sadr
*) n0
);
158 net_roa_check_ip4(rtable
*tab
, const net_addr_ip4
*px
, u32 asn
)
160 struct net_addr_roa4 n
= NET_ADDR_ROA4(px
->prefix
, px
->pxlen
, 0, 0);
166 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
168 net_addr_roa4
*roa
= (void *) fn
->addr
;
169 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
171 if (net_equal_prefix_roa4(roa
, &n
) && rte_is_valid(r
->routes
))
174 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
183 ip4_clrbit(&n
.prefix
, n
.pxlen
);
186 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
190 net_roa_check_ip6(rtable
*tab
, const net_addr_ip6
*px
, u32 asn
)
192 struct net_addr_roa6 n
= NET_ADDR_ROA6(px
->prefix
, px
->pxlen
, 0, 0);
198 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
200 net_addr_roa6
*roa
= (void *) fn
->addr
;
201 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
203 if (net_equal_prefix_roa6(roa
, &n
) && rte_is_valid(r
->routes
))
206 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
215 ip6_clrbit(&n
.prefix
, n
.pxlen
);
218 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
222 * roa_check - check validity of route origination in a ROA table
224 * @n: network prefix to check
225 * @asn: AS number of network prefix
227 * Implements RFC 6483 route validation for the given network prefix. The
228 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
229 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
230 * a candidate ROA with matching ASN and maxlen field greater than or equal to
231 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
232 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
233 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
234 * must have type NET_IP4 or NET_IP6, respectively.
237 net_roa_check(rtable
*tab
, const net_addr
*n
, u32 asn
)
239 if ((tab
->addr_type
== NET_ROA4
) && (n
->type
== NET_IP4
))
240 return net_roa_check_ip4(tab
, (const net_addr_ip4
*) n
, asn
);
241 else if ((tab
->addr_type
== NET_ROA6
) && (n
->type
== NET_IP6
))
242 return net_roa_check_ip6(tab
, (const net_addr_ip6
*) n
, asn
);
244 return ROA_UNKNOWN
; /* Should not happen */
248 * rte_find - find a route
252 * The rte_find() function returns a route for destination @net
253 * which is from route source @src.
256 rte_find(net
*net
, struct rte_src
*src
)
258 rte
*e
= net
->routes
;
260 while (e
&& e
->attrs
->src
!= src
)
266 * rte_get_temp - get a temporary &rte
267 * @a: attributes to assign to the new route (a &rta; in case it's
268 * un-cached, rte_update() will create a cached copy automatically)
270 * Create a temporary &rte and bind it with the attributes @a.
271 * Also set route preference to the default preference set for
277 rte
*e
= sl_alloc(rte_slab
);
288 rte
*e
= sl_alloc(rte_slab
);
290 memcpy(e
, r
, sizeof(rte
));
291 e
->attrs
= rta_clone(r
->attrs
);
297 * rte_cow_rta - get a private writable copy of &rte with writable &rta
298 * @r: a route entry to be copied
299 * @lp: a linpool from which to allocate &rta
301 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
302 * modification. There are three possibilities: First, both &rte and &rta are
303 * private copies, in that case they are returned unchanged. Second, &rte is
304 * private copy, but &rta is cached, in that case &rta is duplicated using
305 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
306 * both structures are duplicated by rte_do_cow() and rta_do_cow().
308 * Note that in the second case, cached &rta loses one reference, while private
309 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
310 * nexthops, ...) with it. To work properly, original shared &rta should have
311 * another reference during the life of created private copy.
313 * Result: a pointer to the new writable &rte with writable &rta.
316 rte_cow_rta(rte
*r
, linpool
*lp
)
318 if (!rta_is_cached(r
->attrs
))
322 rta
*a
= rta_do_cow(r
->attrs
, lp
);
328 static int /* Actually better or at least as good as */
329 rte_better(rte
*new, rte
*old
)
331 int (*better
)(rte
*, rte
*);
333 if (!rte_is_valid(old
))
335 if (!rte_is_valid(new))
338 if (new->pref
> old
->pref
)
340 if (new->pref
< old
->pref
)
342 if (new->attrs
->src
->proto
->proto
!= old
->attrs
->src
->proto
->proto
)
345 * If the user has configured protocol preferences, so that two different protocols
346 * have the same preference, try to break the tie by comparing addresses. Not too
347 * useful, but keeps the ordering of routes unambiguous.
349 return new->attrs
->src
->proto
->proto
> old
->attrs
->src
->proto
->proto
;
351 if (better
= new->attrs
->src
->proto
->rte_better
)
352 return better(new, old
);
357 rte_mergable(rte
*pri
, rte
*sec
)
359 int (*mergable
)(rte
*, rte
*);
361 if (!rte_is_valid(pri
) || !rte_is_valid(sec
))
364 if (pri
->pref
!= sec
->pref
)
367 if (pri
->attrs
->src
->proto
->proto
!= sec
->attrs
->src
->proto
->proto
)
370 if (mergable
= pri
->attrs
->src
->proto
->rte_mergable
)
371 return mergable(pri
, sec
);
377 rte_trace(struct proto
*p
, rte
*e
, int dir
, char *msg
)
379 log(L_TRACE
"%s %c %s %N %s", p
->name
, dir
, msg
, e
->net
->n
.addr
, rta_dest_name(e
->attrs
->dest
));
383 rte_trace_in(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
386 rte_trace(p
, e
, '>', msg
);
390 rte_trace_out(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
393 rte_trace(p
, e
, '<', msg
);
397 export_filter_(struct channel
*c
, rte
*rt0
, rte
**rt_free
, linpool
*pool
, int silent
)
399 struct proto
*p
= c
->proto
;
400 struct filter
*filter
= c
->out_filter
;
401 struct proto_stats
*stats
= &c
->stats
;
408 v
= p
->preexport
? p
->preexport(p
, &rt
, pool
) : 0;
414 stats
->exp_updates_rejected
++;
416 rte_trace_out(D_FILTERS
, p
, rt
, "rejected by protocol");
422 rte_trace_out(D_FILTERS
, p
, rt
, "forced accept by protocol");
426 rte_make_tmp_attrs(&rt
, pool
);
428 v
= filter
&& ((filter
== FILTER_REJECT
) ||
429 (f_run(filter
, &rt
, pool
,
430 (silent
? FF_SILENT
: 0)) > F_ACCEPT
));
436 stats
->exp_updates_filtered
++;
437 rte_trace_out(D_FILTERS
, p
, rt
, "filtered out");
447 /* Discard temporary rte */
454 export_filter(struct channel
*c
, rte
*rt0
, rte
**rt_free
, int silent
)
456 return export_filter_(c
, rt0
, rt_free
, rte_update_pool
, silent
);
460 do_rt_notify(struct channel
*c
, net
*net
, rte
*new, rte
*old
, int refeed
)
462 struct proto
*p
= c
->proto
;
463 struct proto_stats
*stats
= &c
->stats
;
467 * First, apply export limit.
469 * Export route limits has several problems. Because exp_routes
470 * counter is reset before refeed, we don't really know whether
471 * limit is breached and whether the update is new or not. Therefore
472 * the number of really exported routes may exceed the limit
473 * temporarily (routes exported before and new routes in refeed).
475 * Minor advantage is that if the limit is decreased and refeed is
476 * requested, the number of exported routes really decrease.
478 * Second problem is that with export limits, we don't know whether
479 * old was really exported (it might be blocked by limit). When a
480 * withdraw is exported, we announce it even when the previous
481 * update was blocked. This is not a big issue, but the same problem
482 * is in updating exp_routes counter. Therefore, to be consistent in
483 * increases and decreases of exp_routes, we count exported routes
484 * regardless of blocking by limits.
486 * Similar problem is in handling updates - when a new route is
487 * received and blocking is active, the route would be blocked, but
488 * when an update for the route will be received later, the update
489 * would be propagated (as old != NULL). Therefore, we have to block
490 * also non-new updates (contrary to import blocking).
493 struct channel_limit
*l
= &c
->out_limit
;
494 if (l
->action
&& new)
496 if ((!old
|| refeed
) && (stats
->exp_routes
>= l
->limit
))
497 channel_notify_limit(c
, l
, PLD_OUT
, stats
->exp_routes
);
499 if (l
->state
== PLS_BLOCKED
)
501 stats
->exp_routes
++; /* see note above */
502 stats
->exp_updates_rejected
++;
503 rte_trace_out(D_FILTERS
, p
, new, "rejected [limit]");
513 stats
->exp_updates_accepted
++;
515 stats
->exp_withdraws_accepted
++;
517 /* Hack: We do not decrease exp_routes during refeed, we instead
518 reset exp_routes at the start of refeed. */
524 if (p
->debug
& D_ROUTES
)
527 rte_trace_out(D_ROUTES
, p
, new, "replaced");
529 rte_trace_out(D_ROUTES
, p
, new, "added");
531 rte_trace_out(D_ROUTES
, p
, old
, "removed");
533 p
->rt_notify(p
, c
, net
, new, old
);
537 rt_notify_basic(struct channel
*c
, net
*net
, rte
*new0
, rte
*old0
, int refeed
)
539 struct proto
*p
= c
->proto
;
543 rte
*new_free
= NULL
;
544 rte
*old_free
= NULL
;
547 c
->stats
.exp_updates_received
++;
549 c
->stats
.exp_withdraws_received
++;
552 * This is a tricky part - we don't know whether route 'old' was exported to
553 * protocol 'p' or was filtered by the export filter. We try to run the export
554 * filter to know this to have a correct value in 'old' argument of rte_update
555 * (and proper filter value).
557 * This is broken because 'configure soft' may change filters but keep routes.
558 * Refeed cycle is expected to be called after change of the filters and with
559 * old == new, therefore we do not even try to run the filter on an old route.
560 * This may lead to 'spurious withdraws' but ensure that there are no 'missing
563 * This is not completely safe as there is a window between reconfiguration
564 * and the end of refeed - if a newly filtered route disappears during this
565 * period, proper withdraw is not sent (because old would be also filtered)
566 * and the route is not refeeded (because it disappeared before that).
567 * Therefore, we also do not try to run the filter on old routes that are
568 * older than the last filter change.
572 new = export_filter(c
, new, &new_free
, 0);
574 if (old
&& !(refeed
|| (old
->lastmod
<= c
->last_tx_filter_change
)))
575 old
= export_filter(c
, old
, &old_free
, 1);
580 * As mentioned above, 'old' value may be incorrect in some race conditions.
581 * We generally ignore it with the exception of withdraw to pipe protocol.
582 * In that case we rather propagate unfiltered withdraws regardless of
583 * export filters to ensure that when a protocol is flushed, its routes are
584 * removed from all tables. Possible spurious unfiltered withdraws are not
585 * problem here as they are ignored if there is no corresponding route at
586 * the other end of the pipe. We directly call rt_notify() hook instead of
587 * do_rt_notify() to avoid logging and stat counters.
591 if ((p
->proto
== &proto_pipe
) && !new0
&& (p
!= old0
->sender
->proto
))
592 p
->rt_notify(p
, c
, net
, NULL
, old0
);
598 do_rt_notify(c
, net
, new, old
, refeed
);
600 /* Discard temporary rte's */
608 rt_notify_accepted(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
, rte
*before_old
, int feed
)
610 // struct proto *p = c->proto;
613 rte
*new_best
= NULL
;
614 rte
*old_best
= NULL
;
615 rte
*new_free
= NULL
;
616 rte
*old_free
= NULL
;
618 /* Used to track whether we met old_changed position. If before_old is NULL
619 old_changed was the first and we met it implicitly before current best route. */
620 int old_meet
= old_changed
&& !before_old
;
622 /* Note that before_old is either NULL or valid (not rejected) route.
623 If old_changed is valid, before_old have to be too. If old changed route
624 was not valid, caller must use NULL for both old_changed and before_old. */
627 c
->stats
.exp_updates_received
++;
629 c
->stats
.exp_withdraws_received
++;
631 /* First, find the new_best route - first accepted by filters */
632 for (r
=net
->routes
; rte_is_valid(r
); r
=r
->next
)
634 if (new_best
= export_filter(c
, r
, &new_free
, 0))
637 /* Note if we walked around the position of old_changed route */
643 * Second, handle the feed case. That means we do not care for
644 * old_best. It is NULL for feed, and the new_best for refeed.
645 * For refeed, there is a hack similar to one in rt_notify_basic()
646 * to ensure withdraws in case of changed filters
650 if (feed
== 2) /* refeed */
651 old_best
= new_best
? new_best
:
652 (rte_is_valid(net
->routes
) ? net
->routes
: NULL
);
656 if (!new_best
&& !old_best
)
663 * Now, we find the old_best route. Generally, it is the same as the
664 * new_best, unless new_best is the same as new_changed or
665 * old_changed is accepted before new_best.
667 * There are four cases:
669 * - We would find and accept old_changed before new_best, therefore
670 * old_changed is old_best. In remaining cases we suppose this
673 * - We found no new_best, therefore there is also no old_best and
674 * we ignore this withdraw.
676 * - We found new_best different than new_changed, therefore
677 * old_best is the same as new_best and we ignore this update.
679 * - We found new_best the same as new_changed, therefore it cannot
680 * be old_best and we have to continue search for old_best.
682 * There is also a hack to ensure consistency in case of changed filters.
683 * It does not find the proper old_best, just selects a non-NULL route.
686 /* Hack for changed filters */
687 if (old_changed
&& (old_changed
->lastmod
<= c
->last_tx_filter_change
))
689 old_best
= old_changed
;
695 if (old_best
= export_filter(c
, old_changed
, &old_free
, 1))
702 /* Third case, we use r instead of new_best, because export_filter() could change it */
703 if (r
!= new_changed
)
711 for (r
=r
->next
; rte_is_valid(r
); r
=r
->next
)
713 if (old_best
= export_filter(c
, r
, &old_free
, 1))
717 if (old_best
= export_filter(c
, old_changed
, &old_free
, 1))
721 /* Implicitly, old_best is NULL and new_best is non-NULL */
724 do_rt_notify(c
, net
, new_best
, old_best
, (feed
== 2));
726 /* Discard temporary rte's */
734 static struct nexthop
*
735 nexthop_merge_rta(struct nexthop
*nhs
, rta
*a
, linpool
*pool
, int max
)
737 return nexthop_merge(nhs
, &(a
->nh
), 1, 0, max
, pool
);
741 rt_export_merged(struct channel
*c
, net
*net
, rte
**rt_free
, linpool
*pool
, int silent
)
743 // struct proto *p = c->proto;
744 struct nexthop
*nhs
= NULL
;
745 rte
*best0
, *best
, *rt0
, *rt
, *tmp
;
750 if (!rte_is_valid(best0
))
753 best
= export_filter_(c
, best0
, rt_free
, pool
, silent
);
755 if (!best
|| !rte_is_reachable(best
))
758 for (rt0
= best0
->next
; rt0
; rt0
= rt0
->next
)
760 if (!rte_mergable(best0
, rt0
))
763 rt
= export_filter_(c
, rt0
, &tmp
, pool
, 1);
768 if (rte_is_reachable(rt
))
769 nhs
= nexthop_merge_rta(nhs
, rt
->attrs
, pool
, c
->merge_limit
);
777 nhs
= nexthop_merge_rta(nhs
, best
->attrs
, pool
, c
->merge_limit
);
781 best
= rte_cow_rta(best
, pool
);
782 nexthop_link(best
->attrs
, nhs
);
794 rt_notify_merged(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
,
795 rte
*new_best
, rte
*old_best
, int refeed
)
797 // struct proto *p = c->proto;
799 rte
*new_best_free
= NULL
;
800 rte
*old_best_free
= NULL
;
801 rte
*new_changed_free
= NULL
;
802 rte
*old_changed_free
= NULL
;
804 /* We assume that all rte arguments are either NULL or rte_is_valid() */
806 /* This check should be done by the caller */
807 if (!new_best
&& !old_best
)
810 /* Check whether the change is relevant to the merged route */
811 if ((new_best
== old_best
) && !refeed
)
813 new_changed
= rte_mergable(new_best
, new_changed
) ?
814 export_filter(c
, new_changed
, &new_changed_free
, 1) : NULL
;
816 old_changed
= rte_mergable(old_best
, old_changed
) ?
817 export_filter(c
, old_changed
, &old_changed_free
, 1) : NULL
;
819 if (!new_changed
&& !old_changed
)
824 c
->stats
.exp_updates_received
++;
826 c
->stats
.exp_withdraws_received
++;
828 /* Prepare new merged route */
830 new_best
= rt_export_merged(c
, net
, &new_best_free
, rte_update_pool
, 0);
832 /* Prepare old merged route (without proper merged next hops) */
833 /* There are some issues with running filter on old route - see rt_notify_basic() */
834 if (old_best
&& !refeed
)
835 old_best
= export_filter(c
, old_best
, &old_best_free
, 1);
837 if (new_best
|| old_best
)
838 do_rt_notify(c
, net
, new_best
, old_best
, refeed
);
840 /* Discard temporary rte's */
842 rte_free(new_best_free
);
844 rte_free(old_best_free
);
845 if (new_changed_free
)
846 rte_free(new_changed_free
);
847 if (old_changed_free
)
848 rte_free(old_changed_free
);
853 * rte_announce - announce a routing table change
854 * @tab: table the route has been added to
855 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
856 * @net: network in question
857 * @new: the new route to be announced
858 * @old: the previous route for the same network
859 * @new_best: the new best route for the same network
860 * @old_best: the previous best route for the same network
861 * @before_old: The previous route before @old for the same network.
862 * If @before_old is NULL @old was the first.
864 * This function gets a routing table update and announces it
865 * to all protocols that acccepts given type of route announcement
866 * and are connected to the same table by their announcement hooks.
868 * Route announcement of type %RA_OPTIMAL si generated when optimal
869 * route (in routing table @tab) changes. In that case @old stores the
872 * Route announcement of type %RA_ANY si generated when any route (in
873 * routing table @tab) changes In that case @old stores the old route
874 * from the same protocol.
876 * For each appropriate protocol, we first call its preexport()
877 * hook which performs basic checks on the route (each protocol has a
878 * right to veto or force accept of the route before any filter is
879 * asked) and adds default values of attributes specific to the new
880 * protocol (metrics, tags etc.). Then it consults the protocol's
881 * export filter and if it accepts the route, the rt_notify() hook of
882 * the protocol gets called.
885 rte_announce(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
886 rte
*new_best
, rte
*old_best
, rte
*before_old
)
888 if (!rte_is_valid(new))
891 if (!rte_is_valid(old
))
892 old
= before_old
= NULL
;
894 if (!rte_is_valid(new_best
))
897 if (!rte_is_valid(old_best
))
903 if ((type
== RA_OPTIMAL
) && tab
->hostcache
)
904 rt_notify_hostcache(tab
, net
);
906 struct channel
*c
; node
*n
;
907 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
909 if (c
->export_state
== ES_DOWN
)
912 if (c
->ra_mode
== type
)
913 if (type
== RA_ACCEPTED
)
914 rt_notify_accepted(c
, net
, new, old
, before_old
, 0);
915 else if (type
== RA_MERGED
)
916 rt_notify_merged(c
, net
, new, old
, new_best
, old_best
, 0);
918 rt_notify_basic(c
, net
, new, old
, 0);
928 if (!net_validate(n
->n
.addr
))
930 log(L_WARN
"Ignoring bogus prefix %N received via %s",
931 n
->n
.addr
, e
->sender
->proto
->name
);
935 /* FIXME: better handling different nettypes */
936 c
= !net_is_flow(n
->n
.addr
) ?
937 net_classify(n
->n
.addr
): (IADDR_HOST
| SCOPE_UNIVERSE
);
938 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
940 log(L_WARN
"Ignoring bogus route %N received via %s",
941 n
->n
.addr
, e
->sender
->proto
->name
);
945 if (net_type_match(n
->n
.addr
, NB_DEST
) == !e
->attrs
->dest
)
947 log(L_WARN
"Ignoring route %N with invalid dest %d received via %s",
948 n
->n
.addr
, e
->attrs
->dest
, e
->sender
->proto
->name
);
952 if ((e
->attrs
->dest
== RTD_UNICAST
) && !nexthop_is_sorted(&(e
->attrs
->nh
)))
954 log(L_WARN
"Ignoring unsorted multipath route %N received via %s",
955 n
->n
.addr
, e
->sender
->proto
->name
);
963 * rte_free - delete a &rte
964 * @e: &rte to be deleted
966 * rte_free() deletes the given &rte from the routing table it's linked to.
971 if (rta_is_cached(e
->attrs
))
973 sl_free(rte_slab
, e
);
977 rte_free_quick(rte
*e
)
980 sl_free(rte_slab
, e
);
984 rte_same(rte
*x
, rte
*y
)
987 x
->attrs
== y
->attrs
&&
988 x
->flags
== y
->flags
&&
989 x
->pflags
== y
->pflags
&&
990 x
->pref
== y
->pref
&&
991 (!x
->attrs
->src
->proto
->rte_same
|| x
->attrs
->src
->proto
->rte_same(x
, y
));
994 static inline int rte_is_ok(rte
*e
) { return e
&& !rte_is_filtered(e
); }
997 rte_recalculate(struct channel
*c
, net
*net
, rte
*new, struct rte_src
*src
)
999 struct proto
*p
= c
->proto
;
1000 struct rtable
*table
= c
->table
;
1001 struct proto_stats
*stats
= &c
->stats
;
1002 static struct tbf rl_pipe
= TBF_DEFAULT_LOG_LIMITS
;
1003 rte
*before_old
= NULL
;
1004 rte
*old_best
= net
->routes
;
1008 k
= &net
->routes
; /* Find and remove original route from the same protocol */
1011 if (old
->attrs
->src
== src
)
1013 /* If there is the same route in the routing table but from
1014 * a different sender, then there are two paths from the
1015 * source protocol to this routing table through transparent
1016 * pipes, which is not allowed.
1018 * We log that and ignore the route. If it is withdraw, we
1019 * ignore it completely (there might be 'spurious withdraws',
1020 * see FIXME in do_rte_announce())
1022 if (old
->sender
->proto
!= p
)
1026 log_rl(&rl_pipe
, L_ERR
"Pipe collision detected when sending %N to table %s",
1027 net
->n
.addr
, table
->name
);
1028 rte_free_quick(new);
1033 if (new && rte_same(old
, new))
1035 /* No changes, ignore the new route */
1037 if (!rte_is_filtered(new))
1039 stats
->imp_updates_ignored
++;
1040 rte_trace_in(D_ROUTES
, p
, new, "ignored");
1043 rte_free_quick(new);
1059 stats
->imp_withdraws_ignored
++;
1063 int new_ok
= rte_is_ok(new);
1064 int old_ok
= rte_is_ok(old
);
1066 struct channel_limit
*l
= &c
->rx_limit
;
1067 if (l
->action
&& !old
&& new && !c
->in_table
)
1069 u32 all_routes
= stats
->imp_routes
+ stats
->filt_routes
;
1071 if (all_routes
>= l
->limit
)
1072 channel_notify_limit(c
, l
, PLD_RX
, all_routes
);
1074 if (l
->state
== PLS_BLOCKED
)
1076 /* In receive limit the situation is simple, old is NULL so
1077 we just free new and exit like nothing happened */
1079 stats
->imp_updates_ignored
++;
1080 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
1081 rte_free_quick(new);
1087 if (l
->action
&& !old_ok
&& new_ok
)
1089 if (stats
->imp_routes
>= l
->limit
)
1090 channel_notify_limit(c
, l
, PLD_IN
, stats
->imp_routes
);
1092 if (l
->state
== PLS_BLOCKED
)
1094 /* In import limit the situation is more complicated. We
1095 shouldn't just drop the route, we should handle it like
1096 it was filtered. We also have to continue the route
1097 processing if old or new is non-NULL, but we should exit
1098 if both are NULL as this case is probably assumed to be
1101 stats
->imp_updates_ignored
++;
1102 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
1104 if (c
->in_keep_filtered
)
1105 new->flags
|= REF_FILTERED
;
1107 { rte_free_quick(new); new = NULL
; }
1109 /* Note that old && !new could be possible when
1110 c->in_keep_filtered changed in the recent past. */
1121 stats
->imp_updates_accepted
++;
1123 stats
->imp_withdraws_accepted
++;
1125 stats
->imp_withdraws_ignored
++;
1130 rte_is_filtered(new) ? stats
->filt_routes
++ : stats
->imp_routes
++;
1132 rte_is_filtered(old
) ? stats
->filt_routes
-- : stats
->imp_routes
--;
1134 if (table
->config
->sorted
)
1136 /* If routes are sorted, just insert new route to appropriate position */
1139 if (before_old
&& !rte_better(new, before_old
))
1140 k
= &before_old
->next
;
1144 for (; *k
; k
=&(*k
)->next
)
1145 if (rte_better(new, *k
))
1155 /* If routes are not sorted, find the best route and move it on
1156 the first position. There are several optimized cases. */
1158 if (src
->proto
->rte_recalculate
&& src
->proto
->rte_recalculate(table
, net
, new, old
, old_best
))
1159 goto do_recalculate
;
1161 if (new && rte_better(new, old_best
))
1163 /* The first case - the new route is cleary optimal,
1164 we link it at the first position */
1166 new->next
= net
->routes
;
1170 else if (old
== old_best
)
1172 /* The second case - the old best route disappeared, we add the
1173 new route (if we have any) to the list (we don't care about
1174 position) and then we elect the new optimal route and relink
1175 that route at the first position and announce it. New optimal
1176 route might be NULL if there is no more routes */
1179 /* Add the new route to the list */
1182 new->next
= net
->routes
;
1187 /* Find a new optimal route (if there is any) */
1190 rte
**bp
= &net
->routes
;
1191 for (k
=&(*bp
)->next
; *k
; k
=&(*k
)->next
)
1192 if (rte_better(*k
, *bp
))
1198 best
->next
= net
->routes
;
1204 /* The third case - the new route is not better than the old
1205 best route (therefore old_best != NULL) and the old best
1206 route was not removed (therefore old_best == net->routes).
1207 We just link the new route after the old best route. */
1209 ASSERT(net
->routes
!= NULL
);
1210 new->next
= net
->routes
->next
;
1211 net
->routes
->next
= new;
1214 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1218 new->lastmod
= current_time();
1220 /* Log the route change */
1221 if (p
->debug
& D_ROUTES
)
1224 rte_trace(p
, new, '>', new == net
->routes
? "added [best]" : "added");
1227 if (old
!= old_best
)
1228 rte_trace(p
, old
, '>', "removed");
1229 else if (rte_is_ok(net
->routes
))
1230 rte_trace(p
, old
, '>', "removed [replaced]");
1232 rte_trace(p
, old
, '>', "removed [sole]");
1236 /* Propagate the route change */
1237 rte_announce(table
, RA_ANY
, net
, new, old
, NULL
, NULL
, NULL
);
1238 if (net
->routes
!= old_best
)
1239 rte_announce(table
, RA_OPTIMAL
, net
, net
->routes
, old_best
, NULL
, NULL
, NULL
);
1240 if (table
->config
->sorted
)
1241 rte_announce(table
, RA_ACCEPTED
, net
, new, old
, NULL
, NULL
, before_old
);
1242 rte_announce(table
, RA_MERGED
, net
, new, old
, net
->routes
, old_best
, NULL
);
1245 (table
->gc_counter
++ >= table
->config
->gc_max_ops
) &&
1246 (table
->gc_time
+ table
->config
->gc_min_time
<= current_time()))
1247 rt_schedule_prune(table
);
1249 if (old_ok
&& p
->rte_remove
)
1250 p
->rte_remove(net
, old
);
1251 if (new_ok
&& p
->rte_insert
)
1252 p
->rte_insert(net
, new);
1255 rte_free_quick(old
);
1258 static int rte_update_nest_cnt
; /* Nesting counter to allow recursive updates */
1261 rte_update_lock(void)
1263 rte_update_nest_cnt
++;
1267 rte_update_unlock(void)
1269 if (!--rte_update_nest_cnt
)
1270 lp_flush(rte_update_pool
);
1274 rte_hide_dummy_routes(net
*net
, rte
**dummy
)
1276 if (net
->routes
&& net
->routes
->attrs
->source
== RTS_DUMMY
)
1278 *dummy
= net
->routes
;
1279 net
->routes
= (*dummy
)->next
;
1284 rte_unhide_dummy_routes(net
*net
, rte
**dummy
)
1288 (*dummy
)->next
= net
->routes
;
1289 net
->routes
= *dummy
;
1294 * rte_update - enter a new update to a routing table
1295 * @table: table to be updated
1296 * @c: channel doing the update
1297 * @net: network node
1298 * @p: protocol submitting the update
1299 * @src: protocol originating the update
1300 * @new: a &rte representing the new route or %NULL for route removal.
1302 * This function is called by the routing protocols whenever they discover
1303 * a new route or wish to update/remove an existing route. The right announcement
1304 * sequence is to build route attributes first (either un-cached with @aflags set
1305 * to zero or a cached one using rta_lookup(); in this case please note that
1306 * you need to increase the use count of the attributes yourself by calling
1307 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1308 * the appropriate data and finally submit the new &rte by calling rte_update().
1310 * @src specifies the protocol that originally created the route and the meaning
1311 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1312 * same value as @new->attrs->proto. @p specifies the protocol that called
1313 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1314 * stores @p in @new->sender;
1316 * When rte_update() gets any route, it automatically validates it (checks,
1317 * whether the network and next hop address are valid IP addresses and also
1318 * whether a normal routing protocol doesn't try to smuggle a host or link
1319 * scope route to the table), converts all protocol dependent attributes stored
1320 * in the &rte to temporary extended attributes, consults import filters of the
1321 * protocol to see if the route should be accepted and/or its attributes modified,
1322 * stores the temporary attributes back to the &rte.
1324 * Now, having a "public" version of the route, we
1325 * automatically find any old route defined by the protocol @src
1326 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1327 * recalculate the optimal route for this destination and finally broadcast
1328 * the change (if any) to all routing protocols by calling rte_announce().
1330 * All memory used for attribute lists and other temporary allocations is taken
1331 * from a special linear pool @rte_update_pool and freed when rte_update()
1336 rte_update2(struct channel
*c
, const net_addr
*n
, rte
*new, struct rte_src
*src
)
1338 struct proto
*p
= c
->proto
;
1339 struct proto_stats
*stats
= &c
->stats
;
1340 struct filter
*filter
= c
->in_filter
;
1344 ASSERT(c
->channel_state
== CS_UP
);
1349 nn
= net_get(c
->table
, n
);
1355 new->pref
= c
->preference
;
1357 stats
->imp_updates_received
++;
1358 if (!rte_validate(new))
1360 rte_trace_in(D_FILTERS
, p
, new, "invalid");
1361 stats
->imp_updates_invalid
++;
1365 if (filter
== FILTER_REJECT
)
1367 stats
->imp_updates_filtered
++;
1368 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1370 if (! c
->in_keep_filtered
)
1373 /* new is a private copy, i could modify it */
1374 new->flags
|= REF_FILTERED
;
1378 rte_make_tmp_attrs(&new, rte_update_pool
);
1379 if (filter
&& (filter
!= FILTER_REJECT
))
1381 ea_list
*oldea
= new->attrs
->eattrs
;
1382 int fr
= f_run(filter
, &new, rte_update_pool
, 0);
1385 stats
->imp_updates_filtered
++;
1386 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1388 if (! c
->in_keep_filtered
)
1391 new->flags
|= REF_FILTERED
;
1393 if (new->attrs
->eattrs
!= oldea
&& src
->proto
->store_tmp_attrs
)
1394 src
->proto
->store_tmp_attrs(new);
1397 if (!rta_is_cached(new->attrs
)) /* Need to copy attributes */
1398 new->attrs
= rta_lookup(new->attrs
);
1399 new->flags
|= REF_COW
;
1403 stats
->imp_withdraws_received
++;
1405 if (!(nn
= net_find(c
->table
, n
)) || !src
)
1407 stats
->imp_withdraws_ignored
++;
1408 rte_update_unlock();
1414 rte_hide_dummy_routes(nn
, &dummy
);
1415 rte_recalculate(c
, nn
, new, src
);
1416 rte_unhide_dummy_routes(nn
, &dummy
);
1417 rte_update_unlock();
1426 /* Independent call to rte_announce(), used from next hop
1427 recalculation, outside of rte_update(). new must be non-NULL */
1429 rte_announce_i(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
1430 rte
*new_best
, rte
*old_best
)
1433 rte_announce(tab
, type
, net
, new, old
, new_best
, old_best
, NULL
);
1434 rte_update_unlock();
1438 rte_discard(rte
*old
) /* Non-filtered route deletion, used during garbage collection */
1441 rte_recalculate(old
->sender
, old
->net
, NULL
, old
->attrs
->src
);
1442 rte_update_unlock();
1445 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1447 rte_modify(rte
*old
)
1451 rte
*new = old
->sender
->proto
->rte_modify(old
, rte_update_pool
);
1456 if (!rta_is_cached(new->attrs
))
1457 new->attrs
= rta_lookup(new->attrs
);
1458 new->flags
= (old
->flags
& ~REF_MODIFY
) | REF_COW
;
1461 rte_recalculate(old
->sender
, old
->net
, new, old
->attrs
->src
);
1464 rte_update_unlock();
1467 /* Check rtable for best route to given net whether it would be exported do p */
1469 rt_examine(rtable
*t
, net_addr
*a
, struct proto
*p
, struct filter
*filter
)
1471 net
*n
= net_find(t
, a
);
1472 rte
*rt
= n
? n
->routes
: NULL
;
1474 if (!rte_is_valid(rt
))
1479 /* Rest is stripped down export_filter() */
1480 int v
= p
->preexport
? p
->preexport(p
, &rt
, rte_update_pool
) : 0;
1481 if (v
== RIC_PROCESS
)
1483 rte_make_tmp_attrs(&rt
, rte_update_pool
);
1484 v
= (f_run(filter
, &rt
, rte_update_pool
, FF_SILENT
) <= F_ACCEPT
);
1487 /* Discard temporary rte */
1488 if (rt
!= n
->routes
)
1491 rte_update_unlock();
1498 * rt_refresh_begin - start a refresh cycle
1499 * @t: related routing table
1500 * @c related channel
1502 * This function starts a refresh cycle for given routing table and announce
1503 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1504 * routes to the routing table (by rte_update()). After that, all protocol
1505 * routes (more precisely routes with @c as @sender) not sent during the
1506 * refresh cycle but still in the table from the past are pruned. This is
1507 * implemented by marking all related routes as stale by REF_STALE flag in
1508 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1509 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1512 rt_refresh_begin(rtable
*t
, struct channel
*c
)
1514 FIB_WALK(&t
->fib
, net
, n
)
1517 for (e
= n
->routes
; e
; e
= e
->next
)
1519 e
->flags
|= REF_STALE
;
1525 * rt_refresh_end - end a refresh cycle
1526 * @t: related routing table
1527 * @c: related channel
1529 * This function ends a refresh cycle for given routing table and announce
1530 * hook. See rt_refresh_begin() for description of refresh cycles.
1533 rt_refresh_end(rtable
*t
, struct channel
*c
)
1537 FIB_WALK(&t
->fib
, net
, n
)
1540 for (e
= n
->routes
; e
; e
= e
->next
)
1541 if ((e
->sender
== c
) && (e
->flags
& REF_STALE
))
1543 e
->flags
|= REF_DISCARD
;
1550 rt_schedule_prune(t
);
1554 rt_modify_stale(rtable
*t
, struct channel
*c
)
1558 FIB_WALK(&t
->fib
, net
, n
)
1561 for (e
= n
->routes
; e
; e
= e
->next
)
1562 if ((e
->sender
== c
) && (e
->flags
& REF_STALE
) && !(e
->flags
& REF_FILTERED
))
1564 e
->flags
|= REF_MODIFY
;
1571 rt_schedule_prune(t
);
1575 * rte_dump - dump a route
1576 * @e: &rte to be dumped
1578 * This functions dumps contents of a &rte to debug output.
1584 debug("%-1N ", n
->n
.addr
);
1585 debug("KF=%02x PF=%02x pref=%d ", n
->n
.flags
, e
->pflags
, e
->pref
);
1587 if (e
->attrs
->src
->proto
->proto
->dump_attrs
)
1588 e
->attrs
->src
->proto
->proto
->dump_attrs(e
);
1593 * rt_dump - dump a routing table
1594 * @t: routing table to be dumped
1596 * This function dumps contents of a given routing table to debug output.
1601 debug("Dump of routing table <%s>\n", t
->name
);
1605 FIB_WALK(&t
->fib
, net
, n
)
1608 for(e
=n
->routes
; e
; e
=e
->next
)
1616 * rt_dump_all - dump all routing tables
1618 * This function dumps contents of all routing tables to debug output.
1625 WALK_LIST(t
, routing_tables
)
1630 rt_schedule_hcu(rtable
*tab
)
1632 if (tab
->hcu_scheduled
)
1635 tab
->hcu_scheduled
= 1;
1636 ev_schedule(tab
->rt_event
);
1640 rt_schedule_nhu(rtable
*tab
)
1642 if (tab
->nhu_state
== NHU_CLEAN
)
1643 ev_schedule(tab
->rt_event
);
1646 * NHU_CLEAN -> NHU_SCHEDULED
1647 * NHU_RUNNING -> NHU_DIRTY
1649 tab
->nhu_state
|= NHU_SCHEDULED
;
1653 rt_schedule_prune(rtable
*tab
)
1655 if (tab
->prune_state
== 0)
1656 ev_schedule(tab
->rt_event
);
1658 /* state change 0->1, 2->3 */
1659 tab
->prune_state
|= 1;
1670 if (tab
->hcu_scheduled
)
1671 rt_update_hostcache(tab
);
1674 rt_next_hop_update(tab
);
1676 if (tab
->prune_state
)
1677 rt_prune_table(tab
);
1679 rt_unlock_table(tab
);
1683 rt_setup(pool
*p
, rtable
*t
, struct rtable_config
*cf
)
1685 bzero(t
, sizeof(*t
));
1688 t
->addr_type
= cf
->addr_type
;
1689 fib_init(&t
->fib
, p
, t
->addr_type
, sizeof(net
), OFFSETOF(net
, n
), 0, NULL
);
1690 init_list(&t
->channels
);
1692 t
->rt_event
= ev_new_init(p
, rt_event
, t
);
1693 t
->gc_time
= current_time();
1697 * rt_init - initialize routing tables
1699 * This function is called during BIRD startup. It initializes the
1700 * routing table module.
1706 rt_table_pool
= rp_new(&root_pool
, "Routing tables");
1707 rte_update_pool
= lp_new_default(rt_table_pool
);
1708 rte_slab
= sl_new(rt_table_pool
, sizeof(rte
));
1709 init_list(&routing_tables
);
1714 * rt_prune_table - prune a routing table
1716 * The prune loop scans routing tables and removes routes belonging to flushing
1717 * protocols, discarded routes and also stale network entries. It is called from
1718 * rt_event(). The event is rescheduled if the current iteration do not finish
1719 * the table. The pruning is directed by the prune state (@prune_state),
1720 * specifying whether the prune cycle is scheduled or running, and there
1721 * is also a persistent pruning iterator (@prune_fit).
1723 * The prune loop is used also for channel flushing. For this purpose, the
1724 * channels to flush are marked before the iteration and notified after the
1728 rt_prune_table(rtable
*tab
)
1730 struct fib_iterator
*fit
= &tab
->prune_fit
;
1736 DBG("Pruning route table %s\n", tab
->name
);
1738 fib_check(&tab
->fib
);
1741 if (tab
->prune_state
== 0)
1744 if (tab
->prune_state
== 1)
1746 /* Mark channels to flush */
1747 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
1748 if (c
->channel_state
== CS_FLUSHING
)
1749 c
->flush_active
= 1;
1751 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1752 tab
->prune_state
= 2;
1756 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
1761 for (e
=n
->routes
; e
; e
=e
->next
)
1763 if (e
->sender
->flush_active
|| (e
->flags
& REF_DISCARD
))
1767 FIB_ITERATE_PUT(fit
);
1768 ev_schedule(tab
->rt_event
);
1778 if (e
->flags
& REF_MODIFY
)
1782 FIB_ITERATE_PUT(fit
);
1783 ev_schedule(tab
->rt_event
);
1794 if (!n
->routes
) /* Orphaned FIB entry */
1796 FIB_ITERATE_PUT(fit
);
1797 fib_delete(&tab
->fib
, n
);
1804 fib_check(&tab
->fib
);
1807 tab
->gc_counter
= 0;
1808 tab
->gc_time
= current_time();
1810 /* state change 2->0, 3->1 */
1811 tab
->prune_state
&= 1;
1813 if (tab
->prune_state
> 0)
1814 ev_schedule(tab
->rt_event
);
1816 /* FIXME: This should be handled in a better way */
1819 /* Close flushed channels */
1820 WALK_LIST2_DELSAFE(c
, n
, x
, tab
->channels
, table_node
)
1821 if (c
->flush_active
)
1823 c
->flush_active
= 0;
1824 channel_set_state(c
, CS_DOWN
);
1831 rt_preconfig(struct config
*c
)
1833 init_list(&c
->tables
);
1835 rt_new_table(cf_get_symbol("master4"), NET_IP4
);
1836 rt_new_table(cf_get_symbol("master6"), NET_IP6
);
1841 * Some functions for handing internal next hop updates
1842 * triggered by rt_schedule_nhu().
1846 rta_next_hop_outdated(rta
*a
)
1848 struct hostentry
*he
= a
->hostentry
;
1854 return a
->dest
!= RTD_UNREACHABLE
;
1856 return (a
->dest
!= he
->dest
) || (a
->igp_metric
!= he
->igp_metric
) ||
1857 (!he
->nexthop_linkable
) || !nexthop_same(&(a
->nh
), &(he
->src
->nh
));
1861 rta_apply_hostentry(rta
*a
, struct hostentry
*he
, mpls_label_stack
*mls
)
1865 a
->igp_metric
= he
->igp_metric
;
1867 if (a
->dest
!= RTD_UNICAST
)
1871 a
->nh
= (struct nexthop
) {};
1873 { /* Store the label stack for later changes */
1874 a
->nh
.labels_orig
= a
->nh
.labels
= mls
->len
;
1875 memcpy(a
->nh
.label
, mls
->stack
, mls
->len
* sizeof(u32
));
1880 if (((!mls
) || (!mls
->len
)) && he
->nexthop_linkable
)
1881 { /* Just link the nexthop chain, no label append happens. */
1882 memcpy(&(a
->nh
), &(he
->src
->nh
), nexthop_size(&(he
->src
->nh
)));
1886 struct nexthop
*nhp
= NULL
, *nhr
= NULL
;
1887 int skip_nexthop
= 0;
1889 for (struct nexthop
*nh
= &(he
->src
->nh
); nh
; nh
= nh
->next
)
1896 nhp
= (nhp
? (nhp
->next
= lp_allocz(rte_update_pool
, NEXTHOP_MAX_SIZE
)) : &(a
->nh
));
1899 nhp
->iface
= nh
->iface
;
1900 nhp
->weight
= nh
->weight
;
1903 nhp
->labels
= nh
->labels
+ mls
->len
;
1904 nhp
->labels_orig
= mls
->len
;
1905 if (nhp
->labels
<= MPLS_MAX_LABEL_STACK
)
1907 memcpy(nhp
->label
, nh
->label
, nh
->labels
* sizeof(u32
)); /* First the hostentry labels */
1908 memcpy(&(nhp
->label
[nh
->labels
]), mls
->stack
, mls
->len
* sizeof(u32
)); /* Then the bottom labels */
1912 log(L_WARN
"Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1913 nh
->labels
, mls
->len
, nhp
->labels
, MPLS_MAX_LABEL_STACK
);
1918 if (ipa_nonzero(nh
->gw
))
1920 nhp
->gw
= nh
->gw
; /* Router nexthop */
1921 nhp
->flags
|= (nh
->flags
& RNF_ONLINK
);
1923 else if (ipa_nonzero(he
->link
))
1924 nhp
->gw
= he
->link
; /* Device nexthop with link-local address known */
1926 nhp
->gw
= he
->addr
; /* Device nexthop with link-local address unknown */
1934 a
->dest
= RTD_UNREACHABLE
;
1935 log(L_WARN
"No valid nexthop remaining, setting route unreachable");
1941 rt_next_hop_update_rte(rtable
*tab UNUSED
, rte
*old
)
1943 rta
*a
= alloca(RTA_MAX_SIZE
);
1944 memcpy(a
, old
->attrs
, rta_size(old
->attrs
));
1946 mpls_label_stack mls
= { .len
= a
->nh
.labels_orig
};
1947 memcpy(mls
.stack
, &a
->nh
.label
[a
->nh
.labels
- mls
.len
], mls
.len
* sizeof(u32
));
1949 rta_apply_hostentry(a
, old
->attrs
->hostentry
, &mls
);
1952 rte
*e
= sl_alloc(rte_slab
);
1953 memcpy(e
, old
, sizeof(rte
));
1954 e
->attrs
= rta_lookup(a
);
1960 rt_next_hop_update_net(rtable
*tab
, net
*n
)
1962 rte
**k
, *e
, *new, *old_best
, **new_best
;
1964 int free_old_best
= 0;
1966 old_best
= n
->routes
;
1970 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1971 if (rta_next_hop_outdated(e
->attrs
))
1973 new = rt_next_hop_update_rte(tab
, e
);
1976 rte_announce_i(tab
, RA_ANY
, n
, new, e
, NULL
, NULL
);
1977 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated");
1979 /* Call a pre-comparison hook */
1980 /* Not really an efficient way to compute this */
1981 if (e
->attrs
->src
->proto
->rte_recalculate
)
1982 e
->attrs
->src
->proto
->rte_recalculate(tab
, n
, new, e
, NULL
);
1986 else /* Freeing of the old best rte is postponed */
1996 /* Find the new best route */
1998 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
2000 if (!new_best
|| rte_better(e
, *new_best
))
2004 /* Relink the new best route to the first position */
2006 if (new != n
->routes
)
2008 *new_best
= new->next
;
2009 new->next
= n
->routes
;
2013 /* Announce the new best route */
2014 if (new != old_best
)
2016 rte_announce_i(tab
, RA_OPTIMAL
, n
, new, old_best
, NULL
, NULL
);
2017 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated [best]");
2020 /* FIXME: Better announcement of merged routes */
2021 rte_announce_i(tab
, RA_MERGED
, n
, new, old_best
, new, old_best
);
2024 rte_free_quick(old_best
);
2030 rt_next_hop_update(rtable
*tab
)
2032 struct fib_iterator
*fit
= &tab
->nhu_fit
;
2035 if (tab
->nhu_state
== NHU_CLEAN
)
2038 if (tab
->nhu_state
== NHU_SCHEDULED
)
2040 FIB_ITERATE_INIT(fit
, &tab
->fib
);
2041 tab
->nhu_state
= NHU_RUNNING
;
2044 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
2048 FIB_ITERATE_PUT(fit
);
2049 ev_schedule(tab
->rt_event
);
2052 max_feed
-= rt_next_hop_update_net(tab
, n
);
2057 * NHU_DIRTY -> NHU_SCHEDULED
2058 * NHU_RUNNING -> NHU_CLEAN
2060 tab
->nhu_state
&= 1;
2062 if (tab
->nhu_state
!= NHU_CLEAN
)
2063 ev_schedule(tab
->rt_event
);
2067 struct rtable_config
*
2068 rt_new_table(struct symbol
*s
, uint addr_type
)
2070 /* Hack that allows to 'redefine' the master table */
2071 if ((s
->class == SYM_TABLE
) &&
2072 (s
->def
== new_config
->def_tables
[addr_type
]) &&
2073 ((addr_type
== NET_IP4
) || (addr_type
== NET_IP6
)))
2076 struct rtable_config
*c
= cfg_allocz(sizeof(struct rtable_config
));
2078 cf_define_symbol(s
, SYM_TABLE
, c
);
2080 c
->addr_type
= addr_type
;
2081 c
->gc_max_ops
= 1000;
2084 add_tail(&new_config
->tables
, &c
->n
);
2086 /* First table of each type is kept as default */
2087 if (! new_config
->def_tables
[addr_type
])
2088 new_config
->def_tables
[addr_type
] = c
;
2094 * rt_lock_table - lock a routing table
2095 * @r: routing table to be locked
2097 * Lock a routing table, because it's in use by a protocol,
2098 * preventing it from being freed when it gets undefined in a new
2102 rt_lock_table(rtable
*r
)
2108 * rt_unlock_table - unlock a routing table
2109 * @r: routing table to be unlocked
2111 * Unlock a routing table formerly locked by rt_lock_table(),
2112 * that is decrease its use count and delete it if it's scheduled
2113 * for deletion by configuration changes.
2116 rt_unlock_table(rtable
*r
)
2118 if (!--r
->use_count
&& r
->deleted
)
2120 struct config
*conf
= r
->deleted
;
2121 DBG("Deleting routing table %s\n", r
->name
);
2122 r
->config
->table
= NULL
;
2124 rt_free_hostcache(r
);
2129 config_del_obstacle(conf
);
2133 static struct rtable_config
*
2134 rt_find_table_config(struct config
*cf
, char *name
)
2136 struct symbol
*sym
= cf_find_symbol(cf
, name
);
2137 return (sym
&& (sym
->class == SYM_TABLE
)) ? sym
->def
: NULL
;
2141 * rt_commit - commit new routing table configuration
2142 * @new: new configuration
2143 * @old: original configuration or %NULL if it's boot time config
2145 * Scan differences between @old and @new configuration and modify
2146 * the routing tables according to these changes. If @new defines a
2147 * previously unknown table, create it, if it omits a table existing
2148 * in @old, schedule it for deletion (it gets deleted when all protocols
2149 * disconnect from it by calling rt_unlock_table()), if it exists
2150 * in both configurations, leave it unchanged.
2153 rt_commit(struct config
*new, struct config
*old
)
2155 struct rtable_config
*o
, *r
;
2157 DBG("rt_commit:\n");
2160 WALK_LIST(o
, old
->tables
)
2162 rtable
*ot
= o
->table
;
2165 r
= rt_find_table_config(new, o
->name
);
2166 if (r
&& (r
->addr_type
== o
->addr_type
) && !new->shutdown
)
2168 DBG("\t%s: same\n", o
->name
);
2172 if (o
->sorted
!= r
->sorted
)
2173 log(L_WARN
"Reconfiguration of rtable sorted flag not implemented");
2177 DBG("\t%s: deleted\n", o
->name
);
2179 config_add_obstacle(old
);
2181 rt_unlock_table(ot
);
2187 WALK_LIST(r
, new->tables
)
2190 rtable
*t
= mb_alloc(rt_table_pool
, sizeof(struct rtable
));
2191 DBG("\t%s: created\n", r
->name
);
2192 rt_setup(rt_table_pool
, t
, r
);
2193 add_tail(&routing_tables
, &t
->n
);
2200 do_feed_channel(struct channel
*c
, net
*n
, rte
*e
)
2203 if (c
->ra_mode
== RA_ACCEPTED
)
2204 rt_notify_accepted(c
, n
, e
, NULL
, NULL
, c
->refeeding
? 2 : 1);
2205 else if (c
->ra_mode
== RA_MERGED
)
2206 rt_notify_merged(c
, n
, NULL
, NULL
, e
, c
->refeeding
? e
: NULL
, c
->refeeding
);
2208 rt_notify_basic(c
, n
, e
, c
->refeeding
? e
: NULL
, c
->refeeding
);
2209 rte_update_unlock();
2213 * rt_feed_channel - advertise all routes to a channel
2214 * @c: channel to be fed
2216 * This function performs one pass of advertisement of routes to a channel that
2217 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2218 * has something to do. (We avoid transferring all the routes in single pass in
2219 * order not to monopolize CPU time.)
2222 rt_feed_channel(struct channel
*c
)
2224 struct fib_iterator
*fit
= &c
->feed_fit
;
2227 ASSERT(c
->export_state
== ES_FEEDING
);
2229 if (!c
->feed_active
)
2231 FIB_ITERATE_INIT(fit
, &c
->table
->fib
);
2235 FIB_ITERATE_START(&c
->table
->fib
, fit
, net
, n
)
2240 FIB_ITERATE_PUT(fit
);
2244 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2246 if ((c
->ra_mode
== RA_OPTIMAL
) ||
2247 (c
->ra_mode
== RA_ACCEPTED
) ||
2248 (c
->ra_mode
== RA_MERGED
))
2249 if (rte_is_valid(e
))
2251 /* In the meantime, the protocol may fell down */
2252 if (c
->export_state
!= ES_FEEDING
)
2255 do_feed_channel(c
, n
, e
);
2259 if (c
->ra_mode
== RA_ANY
)
2260 for(e
= n
->routes
; e
; e
= e
->next
)
2262 /* In the meantime, the protocol may fell down */
2263 if (c
->export_state
!= ES_FEEDING
)
2266 if (!rte_is_valid(e
))
2269 do_feed_channel(c
, n
, e
);
2281 * rt_feed_baby_abort - abort protocol feeding
2284 * This function is called by the protocol code when the protocol stops or
2285 * ceases to exist during the feeding.
2288 rt_feed_channel_abort(struct channel
*c
)
2292 /* Unlink the iterator */
2293 fit_get(&c
->table
->fib
, &c
->feed_fit
);
2300 rte_update_in(struct channel
*c
, const net_addr
*n
, rte
*new, struct rte_src
*src
)
2302 struct rtable
*tab
= c
->in_table
;
2308 net
= net_get(tab
, n
);
2311 new->pref
= c
->preference
;
2313 if (!rta_is_cached(new->attrs
))
2314 new->attrs
= rta_lookup(new->attrs
);
2318 net
= net_find(tab
, n
);
2324 /* Find the old rte */
2325 for (pos
= &net
->routes
; old
= *pos
; pos
= &old
->next
)
2326 if (old
->attrs
->src
== src
)
2328 if (new && rte_same(old
, new))
2331 /* Remove the old rte */
2333 rte_free_quick(old
);
2347 struct channel_limit
*l
= &c
->rx_limit
;
2348 if (l
->action
&& !old
)
2350 if (tab
->rt_count
>= l
->limit
)
2351 channel_notify_limit(c
, l
, PLD_RX
, tab
->rt_count
);
2353 if (l
->state
== PLS_BLOCKED
)
2355 rte_trace_in(D_FILTERS
, c
->proto
, new, "ignored [limit]");
2360 /* Insert the new rte */
2361 rte
*e
= rte_do_cow(new);
2362 e
->flags
|= REF_COW
;
2365 e
->lastmod
= current_time();
2372 c
->stats
.imp_updates_received
++;
2373 c
->stats
.imp_updates_ignored
++;
2378 c
->stats
.imp_withdraws_received
++;
2379 c
->stats
.imp_withdraws_ignored
++;
2384 rt_reload_channel(struct channel
*c
)
2386 struct rtable
*tab
= c
->in_table
;
2387 struct fib_iterator
*fit
= &c
->reload_fit
;
2390 ASSERT(c
->channel_state
== CS_UP
);
2392 if (!c
->reload_active
)
2394 FIB_ITERATE_INIT(fit
, &tab
->fib
);
2395 c
->reload_active
= 1;
2398 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
2402 FIB_ITERATE_PUT(fit
);
2406 for (rte
*e
= n
->routes
; e
; e
= e
->next
)
2408 rte_update2(c
, n
->n
.addr
, rte_do_cow(e
), e
->attrs
->src
);
2414 c
->reload_active
= 0;
2419 rt_reload_channel_abort(struct channel
*c
)
2421 if (c
->reload_active
)
2423 /* Unlink the iterator */
2424 fit_get(&c
->in_table
->fib
, &c
->reload_fit
);
2425 c
->reload_active
= 0;
2430 rt_prune_sync(rtable
*t
, int all
)
2432 FIB_WALK(&t
->fib
, net
, n
)
2434 rte
*e
, **ee
= &n
->routes
;
2437 if (all
|| (e
->flags
& (REF_STALE
| REF_DISCARD
)))
2452 hc_hash(ip_addr a
, rtable
*dep
)
2454 return ipa_hash(a
) ^ ptr_hash(dep
);
2458 hc_insert(struct hostcache
*hc
, struct hostentry
*he
)
2460 uint k
= he
->hash_key
>> hc
->hash_shift
;
2461 he
->next
= hc
->hash_table
[k
];
2462 hc
->hash_table
[k
] = he
;
2466 hc_remove(struct hostcache
*hc
, struct hostentry
*he
)
2468 struct hostentry
**hep
;
2469 uint k
= he
->hash_key
>> hc
->hash_shift
;
2471 for (hep
= &hc
->hash_table
[k
]; *hep
!= he
; hep
= &(*hep
)->next
);
2475 #define HC_DEF_ORDER 10
2476 #define HC_HI_MARK *4
2477 #define HC_HI_STEP 2
2478 #define HC_HI_ORDER 16 /* Must be at most 16 */
2479 #define HC_LO_MARK /5
2480 #define HC_LO_STEP 2
2481 #define HC_LO_ORDER 10
2484 hc_alloc_table(struct hostcache
*hc
, unsigned order
)
2486 uint hsize
= 1 << order
;
2487 hc
->hash_order
= order
;
2488 hc
->hash_shift
= 32 - order
;
2489 hc
->hash_max
= (order
>= HC_HI_ORDER
) ? ~0U : (hsize HC_HI_MARK
);
2490 hc
->hash_min
= (order
<= HC_LO_ORDER
) ? 0U : (hsize HC_LO_MARK
);
2492 hc
->hash_table
= mb_allocz(rt_table_pool
, hsize
* sizeof(struct hostentry
*));
2496 hc_resize(struct hostcache
*hc
, unsigned new_order
)
2498 struct hostentry
**old_table
= hc
->hash_table
;
2499 struct hostentry
*he
, *hen
;
2500 uint old_size
= 1 << hc
->hash_order
;
2503 hc_alloc_table(hc
, new_order
);
2504 for (i
= 0; i
< old_size
; i
++)
2505 for (he
= old_table
[i
]; he
!= NULL
; he
=hen
)
2513 static struct hostentry
*
2514 hc_new_hostentry(struct hostcache
*hc
, ip_addr a
, ip_addr ll
, rtable
*dep
, unsigned k
)
2516 struct hostentry
*he
= sl_alloc(hc
->slab
);
2518 *he
= (struct hostentry
) {
2525 add_tail(&hc
->hostentries
, &he
->ln
);
2529 if (hc
->hash_items
> hc
->hash_max
)
2530 hc_resize(hc
, hc
->hash_order
+ HC_HI_STEP
);
2536 hc_delete_hostentry(struct hostcache
*hc
, struct hostentry
*he
)
2542 sl_free(hc
->slab
, he
);
2545 if (hc
->hash_items
< hc
->hash_min
)
2546 hc_resize(hc
, hc
->hash_order
- HC_LO_STEP
);
2550 rt_init_hostcache(rtable
*tab
)
2552 struct hostcache
*hc
= mb_allocz(rt_table_pool
, sizeof(struct hostcache
));
2553 init_list(&hc
->hostentries
);
2556 hc_alloc_table(hc
, HC_DEF_ORDER
);
2557 hc
->slab
= sl_new(rt_table_pool
, sizeof(struct hostentry
));
2559 hc
->lp
= lp_new(rt_table_pool
, LP_GOOD_SIZE(1024));
2560 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2562 tab
->hostcache
= hc
;
2566 rt_free_hostcache(rtable
*tab
)
2568 struct hostcache
*hc
= tab
->hostcache
;
2571 WALK_LIST(n
, hc
->hostentries
)
2573 struct hostentry
*he
= SKIP_BACK(struct hostentry
, ln
, n
);
2577 log(L_ERR
"Hostcache is not empty in table %s", tab
->name
);
2582 mb_free(hc
->hash_table
);
2587 rt_notify_hostcache(rtable
*tab
, net
*net
)
2589 if (tab
->hcu_scheduled
)
2592 if (trie_match_net(tab
->hostcache
->trie
, net
->n
.addr
))
2593 rt_schedule_hcu(tab
);
2597 if_local_addr(ip_addr a
, struct iface
*i
)
2601 WALK_LIST(b
, i
->addrs
)
2602 if (ipa_equal(a
, b
->ip
))
2609 rt_get_igp_metric(rte
*rt
)
2611 eattr
*ea
= ea_find(rt
->attrs
->eattrs
, EA_GEN_IGP_METRIC
);
2619 if ((a
->source
== RTS_OSPF
) ||
2620 (a
->source
== RTS_OSPF_IA
) ||
2621 (a
->source
== RTS_OSPF_EXT1
))
2622 return rt
->u
.ospf
.metric1
;
2626 if (a
->source
== RTS_RIP
)
2627 return rt
->u
.rip
.metric
;
2630 if (a
->source
== RTS_DEVICE
)
2633 return IGP_METRIC_UNKNOWN
;
2637 rt_update_hostentry(rtable
*tab
, struct hostentry
*he
)
2639 rta
*old_src
= he
->src
;
2643 /* Reset the hostentry */
2645 he
->dest
= RTD_UNREACHABLE
;
2646 he
->nexthop_linkable
= 0;
2650 net_fill_ip_host(&he_addr
, he
->addr
);
2651 net
*n
= net_route(tab
, &he_addr
);
2656 pxlen
= n
->n
.addr
->pxlen
;
2660 /* Recursive route should not depend on another recursive route */
2661 log(L_WARN
"Next hop address %I resolvable through recursive route for %N",
2662 he
->addr
, n
->n
.addr
);
2666 if (a
->dest
== RTD_UNICAST
)
2668 for (struct nexthop
*nh
= &(a
->nh
); nh
; nh
= nh
->next
)
2669 if (ipa_zero(nh
->gw
))
2671 if (if_local_addr(he
->addr
, nh
->iface
))
2673 /* The host address is a local address, this is not valid */
2674 log(L_WARN
"Next hop address %I is a local address of iface %s",
2675 he
->addr
, nh
->iface
->name
);
2683 he
->src
= rta_clone(a
);
2685 he
->nexthop_linkable
= !direct
;
2686 he
->igp_metric
= rt_get_igp_metric(e
);
2690 /* Add a prefix range to the trie */
2691 trie_add_prefix(tab
->hostcache
->trie
, &he_addr
, pxlen
, he_addr
.pxlen
);
2694 return old_src
!= he
->src
;
2698 rt_update_hostcache(rtable
*tab
)
2700 struct hostcache
*hc
= tab
->hostcache
;
2701 struct hostentry
*he
;
2704 /* Reset the trie */
2706 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2708 WALK_LIST_DELSAFE(n
, x
, hc
->hostentries
)
2710 he
= SKIP_BACK(struct hostentry
, ln
, n
);
2713 hc_delete_hostentry(hc
, he
);
2717 if (rt_update_hostentry(tab
, he
))
2718 rt_schedule_nhu(he
->tab
);
2721 tab
->hcu_scheduled
= 0;
2725 rt_get_hostentry(rtable
*tab
, ip_addr a
, ip_addr ll
, rtable
*dep
)
2727 struct hostentry
*he
;
2729 if (!tab
->hostcache
)
2730 rt_init_hostcache(tab
);
2732 u32 k
= hc_hash(a
, dep
);
2733 struct hostcache
*hc
= tab
->hostcache
;
2734 for (he
= hc
->hash_table
[k
>> hc
->hash_shift
]; he
!= NULL
; he
= he
->next
)
2735 if (ipa_equal(he
->addr
, a
) && (he
->tab
== dep
))
2738 he
= hc_new_hostentry(hc
, a
, ipa_zero(ll
) ? a
: ll
, dep
, k
);
2739 rt_update_hostentry(tab
, he
);
2745 * Documentation for functions declared inline in route.h
2750 * net_find - find a network entry
2751 * @tab: a routing table
2752 * @addr: address of the network
2754 * net_find() looks up the given network in routing table @tab and
2755 * returns a pointer to its &net entry or %NULL if no such network
2758 static inline net
*net_find(rtable
*tab
, net_addr
*addr
)
2762 * net_get - obtain a network entry
2763 * @tab: a routing table
2764 * @addr: address of the network
2766 * net_get() looks up the given network in routing table @tab and
2767 * returns a pointer to its &net entry. If no such entry exists, it's
2770 static inline net
*net_get(rtable
*tab
, net_addr
*addr
)
2774 * rte_cow - copy a route for writing
2775 * @r: a route entry to be copied
2777 * rte_cow() takes a &rte and prepares it for modification. The exact action
2778 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2779 * just returned unchanged, else a new temporary entry with the same contents
2782 * The primary use of this function is inside the filter machinery -- when
2783 * a filter wants to modify &rte contents (to change the preference or to
2784 * attach another set of attributes), it must ensure that the &rte is not
2785 * shared with anyone else (and especially that it isn't stored in any routing
2788 * Result: a pointer to the new writable &rte.
2790 static inline rte
* rte_cow(rte
*r
)