]>
git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
2 * BIRD -- Routing Tables
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
24 * The &rte contains information about the route. There are net and src, which
25 * together forms a key identifying the route in a routing table. There is a
26 * pointer to a &rta structure (see the route attribute module for a precise
27 * explanation) holding the route attributes, which are primary data about the
28 * route. There are several technical fields used by routing table code (route
29 * id, REF_* flags), There is also the pflags field, holding protocol-specific
30 * flags. They are not used by routing table code, but by protocol-specific
31 * hooks. In contrast to route attributes, they are not primary data and their
32 * validity is also limited to the routing table.
34 * There are several mechanisms that allow automatic update of routes in one
35 * routing table (dst) as a result of changes in another routing table (src).
36 * They handle issues of recursive next hop resolving, flowspec validation and
39 * The first such mechanism is handling of recursive next hops. A route in the
40 * dst table has an indirect next hop address, which is resolved through a route
41 * in the src table (which may also be the same table) to get an immediate next
42 * hop. This is implemented using structure &hostcache attached to the src
43 * table, which contains &hostentry structures for each tracked next hop
44 * address. These structures are linked from recursive routes in dst tables,
45 * possibly multiple routes sharing one hostentry (as many routes may have the
46 * same indirect next hop). There is also a trie in the hostcache, which matches
47 * all prefixes that may influence resolving of tracked next hops.
49 * When a best route changes in the src table, the hostcache is notified using
50 * rt_notify_hostcache(), which immediately checks using the trie whether the
51 * change is relevant and if it is, then it schedules asynchronous hostcache
52 * recomputation. The recomputation is done by rt_update_hostcache() (called
53 * from rt_event() of src table), it walks through all hostentries and resolves
54 * them (by rt_update_hostentry()). It also updates the trie. If a change in
55 * hostentry resolution was found, then it schedules asynchronous nexthop
56 * recomputation of associated dst table. That is done by rt_next_hop_update()
57 * (called from rt_event() of dst table), it iterates over all routes in the dst
58 * table and re-examines their hostentries for changes. Note that in contrast to
59 * hostcache update, next hop update can be interrupted by main loop. These two
60 * full-table walks (over hostcache and dst table) are necessary due to absence
61 * of direct lookups (route -> affected nexthop, nexthop -> its route).
63 * The second mechanism is for flowspec validation, where validity of flowspec
64 * routes depends of resolving their network prefixes in IP routing tables. This
65 * is similar to the recursive next hop mechanism, but simpler as there are no
66 * intermediate hostcache and hostentries (because flows are less likely to
67 * share common net prefix than routes sharing a common next hop). In src table,
68 * there is a list of dst tables (list flowspec_links), this list is updated by
69 * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during
70 * channel start/stop). Each dst table has its own trie of prefixes that may
71 * influence validation of flowspec routes in it (flowspec_trie).
73 * When a best route changes in the src table, rt_flowspec_notify() immediately
74 * checks all dst tables from the list using their tries to see whether the
75 * change is relevant for them. If it is, then an asynchronous re-validation of
76 * flowspec routes in the dst table is scheduled. That is also done by function
77 * rt_next_hop_update(), like nexthop recomputation above. It iterates over all
78 * flowspec routes and re-validates them. It also recalculates the trie.
80 * Note that in contrast to the hostcache update, here the trie is recalculated
81 * during the rt_next_hop_update(), which may be interleaved with IP route
82 * updates. The trie is flushed at the beginning of recalculation, which means
83 * that such updates may use partial trie to see if they are relevant. But it
84 * works anyway! Either affected flowspec was already re-validated and added to
85 * the trie, then IP route change would match the trie and trigger a next round
86 * of re-validation, or it was not yet re-validated and added to the trie, but
87 * will be re-validated later in this round anyway.
89 * The third mechanism is used for RPKI re-validation of IP routes and it is the
90 * simplest. It is just a list of subscribers in src table, who are notified
91 * when any change happened, but only after a settle time. Also, in RPKI case
92 * the dst is not a table, but a channel, who refeeds routes through a filter.
97 #include "nest/bird.h"
98 #include "nest/route.h"
99 #include "nest/protocol.h"
100 #include "nest/iface.h"
101 #include "nest/mpls.h"
102 #include "lib/resource.h"
103 #include "lib/event.h"
104 #include "lib/timer.h"
105 #include "lib/string.h"
106 #include "conf/conf.h"
107 #include "filter/filter.h"
108 #include "filter/data.h"
109 #include "lib/hash.h"
110 #include "lib/string.h"
111 #include "lib/alloca.h"
112 #include "lib/flowspec.h"
115 #include "proto/bgp/bgp.h"
120 static slab
*rte_slab
;
121 linpool
*rte_update_pool
;
125 static void rt_free_hostcache(rtable
*tab
);
126 static void rt_notify_hostcache(rtable
*tab
, net
*net
);
127 static void rt_update_hostcache(rtable
*tab
);
128 static void rt_next_hop_update(rtable
*tab
);
129 static inline void rt_prune_table(rtable
*tab
);
130 static inline void rt_schedule_notify(rtable
*tab
);
131 static void rt_flowspec_notify(rtable
*tab
, net
*net
);
132 static void rt_kick_prune_timer(rtable
*tab
);
136 net_init_with_trie(struct fib
*f
, void *N
)
138 rtable
*tab
= SKIP_BACK(rtable
, fib
, f
);
142 trie_add_prefix(tab
->trie
, n
->n
.addr
, n
->n
.addr
->pxlen
, n
->n
.addr
->pxlen
);
145 trie_add_prefix(tab
->trie_new
, n
->n
.addr
, n
->n
.addr
->pxlen
, n
->n
.addr
->pxlen
);
149 net_route_ip4_trie(rtable
*t
, const net_addr_ip4
*n0
)
151 TRIE_WALK_TO_ROOT_IP4(t
->trie
, n0
, n
)
154 if (r
= net_find_valid(t
, (net_addr
*) &n
))
157 TRIE_WALK_TO_ROOT_END
;
163 net_route_vpn4_trie(rtable
*t
, const net_addr_vpn4
*n0
)
165 TRIE_WALK_TO_ROOT_IP4(t
->trie
, (const net_addr_ip4
*) n0
, px
)
167 net_addr_vpn4 n
= NET_ADDR_VPN4(px
.prefix
, px
.pxlen
, n0
->rd
);
170 if (r
= net_find_valid(t
, (net_addr
*) &n
))
173 TRIE_WALK_TO_ROOT_END
;
179 net_route_ip6_trie(rtable
*t
, const net_addr_ip6
*n0
)
181 TRIE_WALK_TO_ROOT_IP6(t
->trie
, n0
, n
)
184 if (r
= net_find_valid(t
, (net_addr
*) &n
))
187 TRIE_WALK_TO_ROOT_END
;
193 net_route_vpn6_trie(rtable
*t
, const net_addr_vpn6
*n0
)
195 TRIE_WALK_TO_ROOT_IP6(t
->trie
, (const net_addr_ip6
*) n0
, px
)
197 net_addr_vpn6 n
= NET_ADDR_VPN6(px
.prefix
, px
.pxlen
, n0
->rd
);
200 if (r
= net_find_valid(t
, (net_addr
*) &n
))
203 TRIE_WALK_TO_ROOT_END
;
209 net_route_ip6_sadr_trie(rtable
*t
, const net_addr_ip6_sadr
*n0
)
211 TRIE_WALK_TO_ROOT_IP6(t
->trie
, (const net_addr_ip6
*) n0
, px
)
213 net_addr_ip6_sadr n
= NET_ADDR_IP6_SADR(px
.prefix
, px
.pxlen
, n0
->src_prefix
, n0
->src_pxlen
);
217 /* We need to do dst first matching. Since sadr addresses are hashed on dst
218 prefix only, find the hash table chain and go through it to find the
219 match with the longest matching src prefix. */
220 for (struct fib_node
*fn
= fib_get_chain(&t
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
222 net_addr_ip6_sadr
*a
= (void *) fn
->addr
;
224 if (net_equal_dst_ip6_sadr(&n
, a
) &&
225 net_in_net_src_ip6_sadr(&n
, a
) &&
226 (a
->src_pxlen
>= best_pxlen
))
228 best
= fib_node_to_user(&t
->fib
, fn
);
229 best_pxlen
= a
->src_pxlen
;
236 TRIE_WALK_TO_ROOT_END
;
242 net_route_ip4_fib(rtable
*t
, const net_addr_ip4
*n0
)
245 net_copy_ip4(&n
, n0
);
248 while (r
= net_find_valid(t
, (net_addr
*) &n
), (!r
) && (n
.pxlen
> 0))
251 ip4_clrbit(&n
.prefix
, n
.pxlen
);
258 net_route_vpn4_fib(rtable
*t
, const net_addr_vpn4
*n0
)
261 net_copy_vpn4(&n
, n0
);
264 while (r
= net_find_valid(t
, (net_addr
*) &n
), (!r
) && (n
.pxlen
> 0))
267 ip4_clrbit(&n
.prefix
, n
.pxlen
);
274 net_route_ip6_fib(rtable
*t
, const net_addr_ip6
*n0
)
277 net_copy_ip6(&n
, n0
);
280 while (r
= net_find_valid(t
, (net_addr
*) &n
), (!r
) && (n
.pxlen
> 0))
283 ip6_clrbit(&n
.prefix
, n
.pxlen
);
290 net_route_vpn6_fib(rtable
*t
, const net_addr_vpn6
*n0
)
293 net_copy_vpn6(&n
, n0
);
296 while (r
= net_find_valid(t
, (net_addr
*) &n
), (!r
) && (n
.pxlen
> 0))
299 ip6_clrbit(&n
.prefix
, n
.pxlen
);
306 net_route_ip6_sadr_fib(rtable
*t
, const net_addr_ip6_sadr
*n0
)
309 net_copy_ip6_sadr(&n
, n0
);
316 /* We need to do dst first matching. Since sadr addresses are hashed on dst
317 prefix only, find the hash table chain and go through it to find the
318 match with the longest matching src prefix. */
319 for (struct fib_node
*fn
= fib_get_chain(&t
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
321 net_addr_ip6_sadr
*a
= (void *) fn
->addr
;
323 if (net_equal_dst_ip6_sadr(&n
, a
) &&
324 net_in_net_src_ip6_sadr(&n
, a
) &&
325 (a
->src_pxlen
>= best_pxlen
))
327 best
= fib_node_to_user(&t
->fib
, fn
);
328 best_pxlen
= a
->src_pxlen
;
339 ip6_clrbit(&n
.dst_prefix
, n
.dst_pxlen
);
346 net_route(rtable
*tab
, const net_addr
*n
)
348 ASSERT(tab
->addr_type
== n
->type
);
354 return net_route_ip4_trie(tab
, (net_addr_ip4
*) n
);
356 return net_route_ip4_fib (tab
, (net_addr_ip4
*) n
);
360 return net_route_vpn4_trie(tab
, (net_addr_vpn4
*) n
);
362 return net_route_vpn4_fib (tab
, (net_addr_vpn4
*) n
);
366 return net_route_ip6_trie(tab
, (net_addr_ip6
*) n
);
368 return net_route_ip6_fib (tab
, (net_addr_ip6
*) n
);
372 return net_route_vpn6_trie(tab
, (net_addr_vpn6
*) n
);
374 return net_route_vpn6_fib (tab
, (net_addr_vpn6
*) n
);
378 return net_route_ip6_sadr_trie(tab
, (net_addr_ip6_sadr
*) n
);
380 return net_route_ip6_sadr_fib (tab
, (net_addr_ip6_sadr
*) n
);
389 net_roa_check_ip4_trie(rtable
*tab
, const net_addr_ip4
*px
, u32 asn
)
393 TRIE_WALK_TO_ROOT_IP4(tab
->trie
, px
, px0
)
395 net_addr_roa4 roa0
= NET_ADDR_ROA4(px0
.prefix
, px0
.pxlen
, 0, 0);
398 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &roa0
); fn
; fn
= fn
->next
)
400 net_addr_roa4
*roa
= (void *) fn
->addr
;
401 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
403 if (net_equal_prefix_roa4(roa
, &roa0
) && rte_is_valid(r
->routes
))
406 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
411 TRIE_WALK_TO_ROOT_END
;
413 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
417 net_roa_check_ip4_fib(rtable
*tab
, const net_addr_ip4
*px
, u32 asn
)
419 struct net_addr_roa4 n
= NET_ADDR_ROA4(px
->prefix
, px
->pxlen
, 0, 0);
425 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
427 net_addr_roa4
*roa
= (void *) fn
->addr
;
428 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
430 if (net_equal_prefix_roa4(roa
, &n
) && rte_is_valid(r
->routes
))
433 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
442 ip4_clrbit(&n
.prefix
, n
.pxlen
);
445 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
449 net_roa_check_ip6_trie(rtable
*tab
, const net_addr_ip6
*px
, u32 asn
)
453 TRIE_WALK_TO_ROOT_IP6(tab
->trie
, px
, px0
)
455 net_addr_roa6 roa0
= NET_ADDR_ROA6(px0
.prefix
, px0
.pxlen
, 0, 0);
458 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &roa0
); fn
; fn
= fn
->next
)
460 net_addr_roa6
*roa
= (void *) fn
->addr
;
461 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
463 if (net_equal_prefix_roa6(roa
, &roa0
) && rte_is_valid(r
->routes
))
466 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
471 TRIE_WALK_TO_ROOT_END
;
473 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
477 net_roa_check_ip6_fib(rtable
*tab
, const net_addr_ip6
*px
, u32 asn
)
479 struct net_addr_roa6 n
= NET_ADDR_ROA6(px
->prefix
, px
->pxlen
, 0, 0);
485 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
487 net_addr_roa6
*roa
= (void *) fn
->addr
;
488 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
490 if (net_equal_prefix_roa6(roa
, &n
) && rte_is_valid(r
->routes
))
493 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
502 ip6_clrbit(&n
.prefix
, n
.pxlen
);
505 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
509 * roa_check - check validity of route origination in a ROA table
511 * @n: network prefix to check
512 * @asn: AS number of network prefix
514 * Implements RFC 6483 route validation for the given network prefix. The
515 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
516 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
517 * a candidate ROA with matching ASN and maxlen field greater than or equal to
518 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
519 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
520 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
521 * must have type NET_IP4 or NET_IP6, respectively.
524 net_roa_check(rtable
*tab
, const net_addr
*n
, u32 asn
)
526 if ((tab
->addr_type
== NET_ROA4
) && (n
->type
== NET_IP4
))
529 return net_roa_check_ip4_trie(tab
, (const net_addr_ip4
*) n
, asn
);
531 return net_roa_check_ip4_fib (tab
, (const net_addr_ip4
*) n
, asn
);
533 else if ((tab
->addr_type
== NET_ROA6
) && (n
->type
== NET_IP6
))
536 return net_roa_check_ip6_trie(tab
, (const net_addr_ip6
*) n
, asn
);
538 return net_roa_check_ip6_fib (tab
, (const net_addr_ip6
*) n
, asn
);
541 return ROA_UNKNOWN
; /* Should not happen */
545 * rte_find - find a route
549 * The rte_find() function returns a route for destination @net
550 * which is from route source @src.
553 rte_find(net
*net
, struct rte_src
*src
)
555 rte
*e
= net
->routes
;
557 while (e
&& e
->src
!= src
)
563 * rte_get_temp - get a temporary &rte
564 * @a: attributes to assign to the new route (a &rta; in case it's
565 * un-cached, rte_update() will create a cached copy automatically)
568 * Create a temporary &rte and bind it with the attributes @a.
571 rte_get_temp(rta
*a
, struct rte_src
*src
)
573 rte
*e
= sl_alloc(rte_slab
);
579 rt_lock_source(e
->src
= src
);
586 rte
*e
= sl_alloc(rte_slab
);
588 memcpy(e
, r
, sizeof(rte
));
590 rt_lock_source(e
->src
);
591 e
->attrs
= rta_clone(r
->attrs
);
597 * rte_cow_rta - get a private writable copy of &rte with writable &rta
598 * @r: a route entry to be copied
599 * @lp: a linpool from which to allocate &rta
601 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
602 * modification. There are three possibilities: First, both &rte and &rta are
603 * private copies, in that case they are returned unchanged. Second, &rte is
604 * private copy, but &rta is cached, in that case &rta is duplicated using
605 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
606 * both structures are duplicated by rte_do_cow() and rta_do_cow().
608 * Note that in the second case, cached &rta loses one reference, while private
609 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
610 * nexthops, ...) with it. To work properly, original shared &rta should have
611 * another reference during the life of created private copy.
613 * Result: a pointer to the new writable &rte with writable &rta.
616 rte_cow_rta(rte
*r
, linpool
*lp
)
618 if (!rta_is_cached(r
->attrs
))
622 rta
*a
= rta_do_cow(r
->attrs
, lp
);
628 static int /* Actually better or at least as good as */
629 rte_better(rte
*new, rte
*old
)
631 int (*better
)(rte
*, rte
*);
633 if (!rte_is_valid(old
))
635 if (!rte_is_valid(new))
638 if (new->attrs
->pref
> old
->attrs
->pref
)
640 if (new->attrs
->pref
< old
->attrs
->pref
)
642 if (new->src
->proto
->proto
!= old
->src
->proto
->proto
)
645 * If the user has configured protocol preferences, so that two different protocols
646 * have the same preference, try to break the tie by comparing addresses. Not too
647 * useful, but keeps the ordering of routes unambiguous.
649 return new->src
->proto
->proto
> old
->src
->proto
->proto
;
651 if (better
= new->src
->proto
->rte_better
)
652 return better(new, old
);
657 rte_mergable(rte
*pri
, rte
*sec
)
659 int (*mergable
)(rte
*, rte
*);
661 if (!rte_is_valid(pri
) || !rte_is_valid(sec
))
664 if (pri
->attrs
->pref
!= sec
->attrs
->pref
)
667 if (pri
->src
->proto
->proto
!= sec
->src
->proto
->proto
)
670 if (mergable
= pri
->src
->proto
->rte_mergable
)
671 return mergable(pri
, sec
);
677 rte_trace(struct channel
*c
, rte
*e
, int dir
, char *msg
)
679 log(L_TRACE
"%s.%s %c %s %N %luL %uG %s",
680 c
->proto
->name
, c
->name
?: "?", dir
, msg
, e
->net
->n
.addr
, e
->src
->private_id
, e
->src
->global_id
,
681 rta_dest_name(e
->attrs
->dest
));
685 rte_trace_in(uint flag
, struct channel
*c
, rte
*e
, char *msg
)
687 if ((c
->debug
& flag
) || (c
->proto
->debug
& flag
))
688 rte_trace(c
, e
, '>', msg
);
692 rte_trace_out(uint flag
, struct channel
*c
, rte
*e
, char *msg
)
694 if ((c
->debug
& flag
) || (c
->proto
->debug
& flag
))
695 rte_trace(c
, e
, '<', msg
);
699 export_filter_(struct channel
*c
, rte
*rt0
, rte
**rt_free
, linpool
*pool
, int silent
)
701 struct proto
*p
= c
->proto
;
702 const struct filter
*filter
= c
->out_filter
;
703 struct proto_stats
*stats
= &c
->stats
;
710 v
= p
->preexport
? p
->preexport(c
, rt
) : 0;
716 stats
->exp_updates_rejected
++;
718 rte_trace_out(D_FILTERS
, c
, rt
, "rejected by protocol");
724 rte_trace_out(D_FILTERS
, c
, rt
, "forced accept by protocol");
728 v
= filter
&& ((filter
== FILTER_REJECT
) ||
729 (f_run(filter
, &rt
, pool
,
730 (silent
? FF_SILENT
: 0)) > F_ACCEPT
));
736 stats
->exp_updates_filtered
++;
737 rte_trace_out(D_FILTERS
, c
, rt
, "filtered out");
747 /* Discard temporary rte */
754 export_filter(struct channel
*c
, rte
*rt0
, rte
**rt_free
, int silent
)
756 return export_filter_(c
, rt0
, rt_free
, rte_update_pool
, silent
);
760 do_rt_notify(struct channel
*c
, net
*net
, rte
*new, rte
*old
, int refeed
)
762 struct proto
*p
= c
->proto
;
763 struct proto_stats
*stats
= &c
->stats
;
768 /* Apply export limit */
769 struct channel_limit
*l
= &c
->out_limit
;
770 if (l
->action
&& !old
&& new)
772 if (stats
->exp_routes
>= l
->limit
)
773 channel_notify_limit(c
, l
, PLD_OUT
, stats
->exp_routes
);
775 if (l
->state
== PLS_BLOCKED
)
777 stats
->exp_updates_rejected
++;
778 rte_trace_out(D_FILTERS
, c
, new, "rejected [limit]");
783 /* Apply export table */
784 if (c
->out_table
&& !rte_update_out(c
, net
->n
.addr
, new, old
, refeed
))
788 stats
->exp_updates_accepted
++;
790 stats
->exp_withdraws_accepted
++;
794 bmap_clear(&c
->export_map
, old
->id
);
800 bmap_set(&c
->export_map
, new->id
);
804 if (p
->debug
& D_ROUTES
)
807 rte_trace_out(D_ROUTES
, c
, new, "replaced");
809 rte_trace_out(D_ROUTES
, c
, new, "added");
811 rte_trace_out(D_ROUTES
, c
, old
, "removed");
814 p
->rt_notify(p
, c
, net
, new, old
);
818 rt_notify_basic(struct channel
*c
, net
*net
, rte
*new, rte
*old
, int refeed
)
820 // struct proto *p = c->proto;
821 rte
*new_free
= NULL
;
824 c
->stats
.exp_updates_received
++;
826 c
->stats
.exp_withdraws_received
++;
829 new = export_filter(c
, new, &new_free
, 0);
831 if (old
&& !bmap_test(&c
->export_map
, old
->id
))
837 do_rt_notify(c
, net
, new, old
, refeed
);
839 /* Discard temporary rte */
845 rt_notify_accepted(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
, int refeed
)
847 // struct proto *p = c->proto;
848 rte
*new_best
= NULL
;
849 rte
*old_best
= NULL
;
850 rte
*new_free
= NULL
;
854 * We assume that there are no changes in net route order except (added)
855 * new_changed and (removed) old_changed. Therefore, the function is not
856 * compatible with deterministic_med (where nontrivial reordering can happen
857 * as a result of a route change) and with recomputation of recursive routes
858 * due to next hop update (where many routes can be changed in one step).
860 * Note that we need this assumption just for optimizations, we could just
861 * run full new_best recomputation otherwise.
863 * There are three cases:
864 * feed or old_best is old_changed -> we need to recompute new_best
865 * old_best is before new_changed -> new_best is old_best, ignore
866 * old_best is after new_changed -> try new_changed, otherwise old_best
870 c
->stats
.exp_updates_received
++;
872 c
->stats
.exp_withdraws_received
++;
874 /* Find old_best - either old_changed, or route for net->routes */
875 if (old_changed
&& bmap_test(&c
->export_map
, old_changed
->id
))
876 old_best
= old_changed
;
879 for (rte
*r
= net
->routes
; rte_is_valid(r
); r
= r
->next
)
881 if (bmap_test(&c
->export_map
, r
->id
))
887 /* Note if new_changed found before old_best */
888 if (r
== new_changed
)
894 if ((new_changed
== old_changed
) || (old_best
== old_changed
))
896 /* Feed or old_best changed -> find first accepted by filters */
897 for (rte
*r
= net
->routes
; rte_is_valid(r
); r
= r
->next
)
898 if (new_best
= export_filter(c
, r
, &new_free
, 0))
903 /* Other cases -> either new_changed, or old_best (and nothing changed) */
904 if (new_first
&& (new_changed
= export_filter(c
, new_changed
, &new_free
, 0)))
905 new_best
= new_changed
;
910 if (!new_best
&& !old_best
)
913 do_rt_notify(c
, net
, new_best
, old_best
, refeed
);
915 /* Discard temporary rte */
921 static struct nexthop
*
922 nexthop_merge_rta(struct nexthop
*nhs
, rta
*a
, linpool
*pool
, int max
)
924 return nexthop_merge(nhs
, &(a
->nh
), 1, 0, max
, pool
);
928 rt_export_merged(struct channel
*c
, net
*net
, rte
**rt_free
, linpool
*pool
, int silent
)
930 // struct proto *p = c->proto;
931 struct nexthop
*nhs
= NULL
;
932 rte
*best0
, *best
, *rt0
, *rt
, *tmp
;
937 if (!rte_is_valid(best0
))
940 best
= export_filter_(c
, best0
, rt_free
, pool
, silent
);
942 if (!best
|| !rte_is_reachable(best
))
945 for (rt0
= best0
->next
; rt0
; rt0
= rt0
->next
)
947 if (!rte_mergable(best0
, rt0
))
950 rt
= export_filter_(c
, rt0
, &tmp
, pool
, 1);
955 if (rte_is_reachable(rt
))
956 nhs
= nexthop_merge_rta(nhs
, rt
->attrs
, pool
, c
->merge_limit
);
964 nhs
= nexthop_merge_rta(nhs
, best
->attrs
, pool
, c
->merge_limit
);
968 best
= rte_cow_rta(best
, pool
);
969 nexthop_link(best
->attrs
, nhs
);
980 rt_notify_merged(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
,
981 rte
*new_best
, rte
*old_best
, int refeed
)
983 // struct proto *p = c->proto;
984 rte
*new_free
= NULL
;
986 /* We assume that all rte arguments are either NULL or rte_is_valid() */
988 /* This check should be done by the caller */
989 if (!new_best
&& !old_best
)
992 /* Check whether the change is relevant to the merged route */
993 if ((new_best
== old_best
) &&
994 (new_changed
!= old_changed
) &&
995 !rte_mergable(new_best
, new_changed
) &&
996 !rte_mergable(old_best
, old_changed
))
1000 c
->stats
.exp_updates_received
++;
1002 c
->stats
.exp_withdraws_received
++;
1004 /* Prepare new merged route */
1006 new_best
= rt_export_merged(c
, net
, &new_free
, rte_update_pool
, 0);
1008 /* Check old merged route */
1009 if (old_best
&& !bmap_test(&c
->export_map
, old_best
->id
))
1012 if (!new_best
&& !old_best
)
1015 do_rt_notify(c
, net
, new_best
, old_best
, refeed
);
1017 /* Discard temporary rte */
1024 * rte_announce - announce a routing table change
1025 * @tab: table the route has been added to
1026 * @type: type of route announcement (RA_UNDEF or RA_ANY)
1027 * @net: network in question
1028 * @new: the new or changed route
1029 * @old: the previous route replaced by the new one
1030 * @new_best: the new best route for the same network
1031 * @old_best: the previous best route for the same network
1033 * This function gets a routing table update and announces it to all protocols
1034 * that are connected to the same table by their channels.
1036 * There are two ways of how routing table changes are announced. First, there
1037 * is a change of just one route in @net (which may caused a change of the best
1038 * route of the network). In this case @new and @old describes the changed route
1039 * and @new_best and @old_best describes best routes. Other routes are not
1040 * affected, but in sorted table the order of other routes might change.
1042 * Second, There is a bulk change of multiple routes in @net, with shared best
1043 * route selection. In such case separate route changes are described using
1044 * @type of %RA_ANY, with @new and @old specifying the changed route, while
1045 * @new_best and @old_best are NULL. After that, another notification is done
1046 * where @new_best and @old_best are filled (may be the same), but @new and @old
1049 * The function announces the change to all associated channels. For each
1050 * channel, an appropriate preprocessing is done according to channel &ra_mode.
1051 * For example, %RA_OPTIMAL channels receive just changes of best routes.
1053 * In general, we first call preexport() hook of a protocol, which performs
1054 * basic checks on the route (each protocol has a right to veto or force accept
1055 * of the route before any filter is asked). Then we consult an export filter
1056 * of the channel and verify the old route in an export map of the channel.
1057 * Finally, the rt_notify() hook of the protocol gets called.
1059 * Note that there are also calls of rt_notify() hooks due to feed, but that is
1060 * done outside of scope of rte_announce().
1063 rte_announce(rtable
*tab
, uint type
, net
*net
, rte
*new, rte
*old
,
1064 rte
*new_best
, rte
*old_best
)
1066 if (!rte_is_valid(new))
1069 if (!rte_is_valid(old
))
1072 if (!rte_is_valid(new_best
))
1075 if (!rte_is_valid(old_best
))
1078 if (!new && !old
&& !new_best
&& !old_best
)
1081 if (new_best
!= old_best
)
1084 new_best
->sender
->stats
.pref_routes
++;
1086 old_best
->sender
->stats
.pref_routes
--;
1089 rt_notify_hostcache(tab
, net
);
1091 if (!EMPTY_LIST(tab
->flowspec_links
))
1092 rt_flowspec_notify(tab
, net
);
1095 rt_schedule_notify(tab
);
1097 struct channel
*c
; node
*n
;
1098 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
1100 if (c
->export_state
== ES_DOWN
)
1103 if (type
&& (type
!= c
->ra_mode
))
1109 if (new_best
!= old_best
)
1110 rt_notify_basic(c
, net
, new_best
, old_best
, 0);
1115 rt_notify_basic(c
, net
, new, old
, 0);
1120 * The (new != old) condition is problematic here, as it would break
1121 * the second usage pattern (announcement after bulk change, used in
1122 * rt_next_hop_update_net(), which sends both new and old as NULL).
1124 * But recursive next hops do not work with sorted tables anyways,
1125 * such configuration is forbidden in BGP and not supported in
1126 * rt_notify_accepted().
1128 * The condition is needed to eliminate spurious announcements where
1129 * both old and new routes are not valid (so they are NULL).
1132 rt_notify_accepted(c
, net
, new, old
, 0);
1136 rt_notify_merged(c
, net
, new, old
, new_best
, old_best
, 0);
1143 rte_validate(rte
*e
)
1148 if (!net_validate(n
->n
.addr
))
1150 log(L_WARN
"Ignoring bogus prefix %N received via %s",
1151 n
->n
.addr
, e
->sender
->proto
->name
);
1155 /* FIXME: better handling different nettypes */
1156 c
= !net_is_flow(n
->n
.addr
) ?
1157 net_classify(n
->n
.addr
): (IADDR_HOST
| SCOPE_UNIVERSE
);
1158 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
1160 log(L_WARN
"Ignoring bogus route %N received via %s",
1161 n
->n
.addr
, e
->sender
->proto
->name
);
1165 if (net_type_match(n
->n
.addr
, NB_DEST
) == !e
->attrs
->dest
)
1167 /* Exception for flowspec that failed validation */
1168 if (net_is_flow(n
->n
.addr
) && (e
->attrs
->dest
== RTD_UNREACHABLE
))
1171 log(L_WARN
"Ignoring route %N with invalid dest %d received via %s",
1172 n
->n
.addr
, e
->attrs
->dest
, e
->sender
->proto
->name
);
1176 if ((e
->attrs
->dest
== RTD_UNICAST
) && !nexthop_is_sorted(&(e
->attrs
->nh
)))
1178 log(L_WARN
"Ignoring unsorted multipath route %N received via %s",
1179 n
->n
.addr
, e
->sender
->proto
->name
);
1187 * rte_free - delete a &rte
1188 * @e: &rte to be deleted
1190 * rte_free() deletes the given &rte from the routing table it's linked to.
1195 rt_unlock_source(e
->src
);
1196 if (rta_is_cached(e
->attrs
))
1202 rte_free_quick(rte
*e
)
1204 rt_unlock_source(e
->src
);
1210 rte_same(rte
*x
, rte
*y
)
1212 /* rte.flags / rte.pflags are not checked, as they are internal to rtable */
1214 x
->attrs
== y
->attrs
&&
1216 rte_is_filtered(x
) == rte_is_filtered(y
);
1219 static inline int rte_is_ok(rte
*e
) { return e
&& !rte_is_filtered(e
); }
1222 rte_recalculate(struct channel
*c
, net
*net
, rte
*new, struct rte_src
*src
)
1224 struct proto
*p
= c
->proto
;
1225 struct rtable
*table
= c
->table
;
1226 struct proto_stats
*stats
= &c
->stats
;
1227 static struct tbf rl_pipe
= TBF_DEFAULT_LOG_LIMITS
;
1228 rte
*before_old
= NULL
;
1229 rte
*old_best
= net
->routes
;
1233 k
= &net
->routes
; /* Find and remove original route from the same protocol */
1236 if (old
->src
== src
)
1238 /* If there is the same route in the routing table but from
1239 * a different sender, then there are two paths from the
1240 * source protocol to this routing table through transparent
1241 * pipes, which is not allowed.
1243 * We log that and ignore the route. If it is withdraw, we
1244 * ignore it completely (there might be 'spurious withdraws',
1245 * see FIXME in do_rte_announce())
1247 if (old
->sender
->proto
!= p
)
1251 log_rl(&rl_pipe
, L_ERR
"Pipe collision detected when sending %N to table %s",
1252 net
->n
.addr
, table
->name
);
1253 rte_free_quick(new);
1258 if (new && rte_same(old
, new))
1260 /* No changes, ignore the new route and refresh the old one */
1262 old
->flags
&= ~(REF_STALE
| REF_DISCARD
| REF_MODIFY
);
1264 if (!rte_is_filtered(new))
1266 stats
->imp_updates_ignored
++;
1267 rte_trace_in(D_ROUTES
, c
, new, "ignored");
1270 rte_free_quick(new);
1281 /* Save the last accessed position */
1289 stats
->imp_withdraws_ignored
++;
1293 int new_ok
= rte_is_ok(new);
1294 int old_ok
= rte_is_ok(old
);
1296 struct channel_limit
*l
= &c
->rx_limit
;
1297 if (l
->action
&& !old
&& new && !c
->in_table
)
1299 u32 all_routes
= stats
->imp_routes
+ stats
->filt_routes
;
1301 if (all_routes
>= l
->limit
)
1302 channel_notify_limit(c
, l
, PLD_RX
, all_routes
);
1304 if (l
->state
== PLS_BLOCKED
)
1306 /* In receive limit the situation is simple, old is NULL so
1307 we just free new and exit like nothing happened */
1309 stats
->imp_updates_ignored
++;
1310 rte_trace_in(D_FILTERS
, c
, new, "ignored [limit]");
1311 rte_free_quick(new);
1317 if (l
->action
&& !old_ok
&& new_ok
)
1319 if (stats
->imp_routes
>= l
->limit
)
1320 channel_notify_limit(c
, l
, PLD_IN
, stats
->imp_routes
);
1322 if (l
->state
== PLS_BLOCKED
)
1324 /* In import limit the situation is more complicated. We
1325 shouldn't just drop the route, we should handle it like
1326 it was filtered. We also have to continue the route
1327 processing if old or new is non-NULL, but we should exit
1328 if both are NULL as this case is probably assumed to be
1331 stats
->imp_updates_ignored
++;
1332 rte_trace_in(D_FILTERS
, c
, new, "ignored [limit]");
1334 if (c
->in_keep_filtered
)
1335 new->flags
|= REF_FILTERED
;
1337 { rte_free_quick(new); new = NULL
; }
1339 /* Note that old && !new could be possible when
1340 c->in_keep_filtered changed in the recent past. */
1351 stats
->imp_updates_accepted
++;
1353 stats
->imp_withdraws_accepted
++;
1355 stats
->imp_withdraws_ignored
++;
1357 if (old_ok
|| new_ok
)
1358 table
->last_rt_change
= current_time();
1363 rte_is_filtered(new) ? stats
->filt_routes
++ : stats
->imp_routes
++;
1365 rte_is_filtered(old
) ? stats
->filt_routes
-- : stats
->imp_routes
--;
1367 if (table
->config
->sorted
)
1369 /* If routes are sorted, just insert new route to appropriate position */
1372 if (before_old
&& !rte_better(new, before_old
))
1373 k
= &before_old
->next
;
1377 for (; *k
; k
=&(*k
)->next
)
1378 if (rte_better(new, *k
))
1389 /* If routes are not sorted, find the best route and move it on
1390 the first position. There are several optimized cases. */
1392 if (src
->proto
->rte_recalculate
&& src
->proto
->rte_recalculate(table
, net
, new, old
, old_best
))
1393 goto do_recalculate
;
1395 if (new && rte_better(new, old_best
))
1397 /* The first case - the new route is cleary optimal,
1398 we link it at the first position */
1400 new->next
= net
->routes
;
1405 else if (old
== old_best
)
1407 /* The second case - the old best route disappeared, we add the
1408 new route (if we have any) to the list (we don't care about
1409 position) and then we elect the new optimal route and relink
1410 that route at the first position and announce it. New optimal
1411 route might be NULL if there is no more routes */
1414 /* Add the new route to the list */
1423 /* Find a new optimal route (if there is any) */
1426 rte
**bp
= &net
->routes
;
1427 for (k
=&(*bp
)->next
; *k
; k
=&(*k
)->next
)
1428 if (rte_better(*k
, *bp
))
1434 best
->next
= net
->routes
;
1440 /* The third case - the new route is not better than the old
1441 best route (therefore old_best != NULL) and the old best
1442 route was not removed (therefore old_best == net->routes).
1443 We just link the new route to the old/last position. */
1450 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1455 new->lastmod
= current_time();
1459 new->id
= hmap_first_zero(&table
->id_map
);
1460 hmap_set(&table
->id_map
, new->id
);
1466 /* Log the route change */
1467 if ((c
->debug
& D_ROUTES
) || (p
->debug
& D_ROUTES
))
1470 rte_trace(c
, new, '>', new == net
->routes
? "added [best]" : "added");
1473 if (old
!= old_best
)
1474 rte_trace(c
, old
, '>', "removed");
1475 else if (rte_is_ok(net
->routes
))
1476 rte_trace(c
, old
, '>', "removed [replaced]");
1478 rte_trace(c
, old
, '>', "removed [sole]");
1482 /* Propagate the route change */
1483 rte_announce(table
, RA_UNDEF
, net
, new, old
, net
->routes
, old_best
);
1486 (table
->gc_counter
++ >= table
->config
->gc_threshold
))
1487 rt_kick_prune_timer(table
);
1489 if (old_ok
&& p
->rte_remove
)
1490 p
->rte_remove(net
, old
);
1491 if (new_ok
&& p
->rte_insert
)
1492 p
->rte_insert(net
, new);
1497 hmap_clear(&table
->id_map
, old
->id
);
1499 rte_free_quick(old
);
1503 static int rte_update_nest_cnt
; /* Nesting counter to allow recursive updates */
1506 rte_update_lock(void)
1508 rte_update_nest_cnt
++;
1512 rte_update_unlock(void)
1514 if (!--rte_update_nest_cnt
)
1515 lp_flush(rte_update_pool
);
1519 * rte_update - enter a new update to a routing table
1520 * @table: table to be updated
1521 * @c: channel doing the update
1522 * @net: network node
1523 * @p: protocol submitting the update
1524 * @src: protocol originating the update
1525 * @new: a &rte representing the new route or %NULL for route removal.
1527 * This function is called by the routing protocols whenever they discover
1528 * a new route or wish to update/remove an existing route. The right announcement
1529 * sequence is to build route attributes first (either un-cached with @aflags set
1530 * to zero or a cached one using rta_lookup(); in this case please note that
1531 * you need to increase the use count of the attributes yourself by calling
1532 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1533 * the appropriate data and finally submit the new &rte by calling rte_update().
1535 * @src specifies the protocol that originally created the route and the meaning
1536 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1537 * same value as @new->attrs->proto. @p specifies the protocol that called
1538 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1539 * stores @p in @new->sender;
1541 * When rte_update() gets any route, it automatically validates it (checks,
1542 * whether the network and next hop address are valid IP addresses and also
1543 * whether a normal routing protocol doesn't try to smuggle a host or link
1544 * scope route to the table), converts all protocol dependent attributes stored
1545 * in the &rte to temporary extended attributes, consults import filters of the
1546 * protocol to see if the route should be accepted and/or its attributes modified,
1547 * stores the temporary attributes back to the &rte.
1549 * Now, having a "public" version of the route, we
1550 * automatically find any old route defined by the protocol @src
1551 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1552 * recalculate the optimal route for this destination and finally broadcast
1553 * the change (if any) to all routing protocols by calling rte_announce().
1555 * All memory used for attribute lists and other temporary allocations is taken
1556 * from a special linear pool @rte_update_pool and freed when rte_update()
1561 rte_update2(struct channel
*c
, const net_addr
*n
, rte
*new, struct rte_src
*src
)
1563 struct proto
*p
= c
->proto
;
1564 struct proto_stats
*stats
= &c
->stats
;
1565 const struct filter
*filter
= c
->in_filter
;
1566 struct mpls_fec
*fec
= NULL
;
1569 ASSERT(c
->channel_state
== CS_UP
);
1574 /* Create a temporary table node */
1575 nn
= alloca(sizeof(net
) + n
->length
);
1576 memset(nn
, 0, sizeof(net
) + n
->length
);
1577 net_copy(nn
->n
.addr
, n
);
1582 stats
->imp_updates_received
++;
1583 if (!rte_validate(new))
1585 rte_trace_in(D_FILTERS
, c
, new, "invalid");
1586 stats
->imp_updates_invalid
++;
1590 if (filter
== FILTER_REJECT
)
1592 stats
->imp_updates_filtered
++;
1593 rte_trace_in(D_FILTERS
, c
, new, "filtered out");
1595 if (! c
->in_keep_filtered
)
1598 /* new is a private copy, i could modify it */
1599 new->flags
|= REF_FILTERED
;
1603 int fr
= f_run(filter
, &new, rte_update_pool
, 0);
1606 stats
->imp_updates_filtered
++;
1607 rte_trace_in(D_FILTERS
, c
, new, "filtered out");
1609 if (! c
->in_keep_filtered
)
1612 new->flags
|= REF_FILTERED
;
1618 if (mpls_handle_rte(p
->mpls_map
, n
, new, rte_update_pool
, &fec
) < 0)
1620 rte_trace_in(D_FILTERS
, c
, new, "invalid");
1621 stats
->imp_updates_invalid
++;
1626 if (!rta_is_cached(new->attrs
)) /* Need to copy attributes */
1627 new->attrs
= rta_lookup(new->attrs
);
1628 new->flags
|= REF_COW
;
1630 /* Use the actual struct network, not the dummy one */
1631 nn
= net_get(c
->table
, n
);
1636 stats
->imp_withdraws_received
++;
1638 if (!(nn
= net_find(c
->table
, n
)) || !src
)
1640 stats
->imp_withdraws_ignored
++;
1641 rte_update_unlock();
1647 /* And recalculate the best route */
1648 rte_recalculate(c
, nn
, new, src
);
1651 mpls_handle_rte_cleanup(p
->mpls_map
, &fec
);
1653 rte_update_unlock();
1659 if (nn
= net_find(c
->table
, n
))
1662 rte_update_unlock();
1665 /* Independent call to rte_announce(), used from next hop
1666 recalculation, outside of rte_update(). new must be non-NULL */
1668 rte_announce_i(rtable
*tab
, uint type
, net
*net
, rte
*new, rte
*old
,
1669 rte
*new_best
, rte
*old_best
)
1672 rte_announce(tab
, type
, net
, new, old
, new_best
, old_best
);
1673 rte_update_unlock();
1677 rte_discard(rte
*old
) /* Non-filtered route deletion, used during garbage collection */
1680 rte_recalculate(old
->sender
, old
->net
, NULL
, old
->src
);
1681 rte_update_unlock();
1684 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1686 rte_modify(rte
*old
)
1690 rte
*new = old
->sender
->proto
->rte_modify(old
, rte_update_pool
);
1695 if (!rta_is_cached(new->attrs
))
1696 new->attrs
= rta_lookup(new->attrs
);
1697 new->flags
= (old
->flags
& ~REF_MODIFY
) | REF_COW
;
1700 rte_recalculate(old
->sender
, old
->net
, new, old
->src
);
1703 rte_update_unlock();
1706 /* Check rtable for best route to given net whether it would be exported do p */
1708 rt_examine(rtable
*t
, net_addr
*a
, struct channel
*c
, const struct filter
*filter
)
1710 struct proto
*p
= c
->proto
;
1711 net
*n
= net_find(t
, a
);
1712 rte
*rt
= n
? n
->routes
: NULL
;
1714 if (!rte_is_valid(rt
))
1719 /* Rest is stripped down export_filter() */
1720 int v
= p
->preexport
? p
->preexport(c
, rt
) : 0;
1721 if (v
== RIC_PROCESS
)
1722 v
= (f_run(filter
, &rt
, rte_update_pool
, FF_SILENT
) <= F_ACCEPT
);
1724 /* Discard temporary rte */
1725 if (rt
!= n
->routes
)
1728 rte_update_unlock();
1735 * rt_refresh_begin - start a refresh cycle
1736 * @t: related routing table
1737 * @c related channel
1739 * This function starts a refresh cycle for given routing table and announce
1740 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1741 * routes to the routing table (by rte_update()). After that, all protocol
1742 * routes (more precisely routes with @c as @sender) not sent during the
1743 * refresh cycle but still in the table from the past are pruned. This is
1744 * implemented by marking all related routes as stale by REF_STALE flag in
1745 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1746 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1749 rt_refresh_begin(rtable
*t
, struct channel
*c
)
1751 if (c
->debug
& D_EVENTS
)
1752 log(L_TRACE
"%s.%s: Route refresh begin", c
->proto
->name
, c
->name
);
1754 FIB_WALK(&t
->fib
, net
, n
)
1757 for (e
= n
->routes
; e
; e
= e
->next
)
1759 e
->flags
|= REF_STALE
;
1765 * rt_refresh_end - end a refresh cycle
1766 * @t: related routing table
1767 * @c: related channel
1769 * This function ends a refresh cycle for given routing table and announce
1770 * hook. See rt_refresh_begin() for description of refresh cycles.
1773 rt_refresh_end(rtable
*t
, struct channel
*c
)
1775 if (c
->debug
& D_EVENTS
)
1776 log(L_TRACE
"%s.%s: Route refresh end", c
->proto
->name
, c
->name
);
1780 FIB_WALK(&t
->fib
, net
, n
)
1783 for (e
= n
->routes
; e
; e
= e
->next
)
1784 if ((e
->sender
== c
) && (e
->flags
& REF_STALE
))
1786 e
->flags
|= REF_DISCARD
;
1793 rt_schedule_prune(t
);
1797 rt_modify_stale(rtable
*t
, struct channel
*c
)
1801 FIB_WALK(&t
->fib
, net
, n
)
1804 for (e
= n
->routes
; e
; e
= e
->next
)
1805 if ((e
->sender
== c
) && (e
->flags
& REF_STALE
) && !(e
->flags
& REF_FILTERED
))
1807 e
->flags
|= REF_MODIFY
;
1814 rt_schedule_prune(t
);
1818 * rte_dump - dump a route
1819 * @e: &rte to be dumped
1821 * This functions dumps contents of a &rte to debug output.
1827 debug("%-1N ", n
->n
.addr
);
1828 debug("PF=%02x ", e
->pflags
);
1834 * rt_dump - dump a routing table
1835 * @t: routing table to be dumped
1837 * This function dumps contents of a given routing table to debug output.
1842 debug("Dump of routing table <%s>\n", t
->name
);
1846 FIB_WALK(&t
->fib
, net
, n
)
1849 for(e
=n
->routes
; e
; e
=e
->next
)
1857 * rt_dump_all - dump all routing tables
1859 * This function dumps contents of all routing tables to debug output.
1867 WALK_LIST2(t
, n
, routing_tables
, n
)
1872 rt_schedule_hcu(rtable
*tab
)
1874 if (tab
->hcu_scheduled
)
1877 tab
->hcu_scheduled
= 1;
1878 ev_schedule(tab
->rt_event
);
1882 rt_schedule_nhu(rtable
*tab
)
1884 if (tab
->nhu_state
== NHU_CLEAN
)
1885 ev_schedule(tab
->rt_event
);
1888 * NHU_CLEAN -> NHU_SCHEDULED
1889 * NHU_RUNNING -> NHU_DIRTY
1891 tab
->nhu_state
|= NHU_SCHEDULED
;
1895 rt_schedule_prune(rtable
*tab
)
1897 if (tab
->prune_state
== 0)
1898 ev_schedule(tab
->rt_event
);
1900 /* state change 0->1, 2->3 */
1901 tab
->prune_state
|= 1;
1912 if (tab
->hcu_scheduled
)
1913 rt_update_hostcache(tab
);
1916 rt_next_hop_update(tab
);
1918 if (tab
->prune_state
)
1919 rt_prune_table(tab
);
1921 rt_unlock_table(tab
);
1926 rt_prune_timer(timer
*t
)
1928 rtable
*tab
= t
->data
;
1930 if (tab
->gc_counter
>= tab
->config
->gc_threshold
)
1931 rt_schedule_prune(tab
);
1935 rt_kick_prune_timer(rtable
*tab
)
1937 /* Return if prune is already scheduled */
1938 if (tm_active(tab
->prune_timer
) || (tab
->prune_state
& 1))
1941 /* Randomize GC period to +/- 50% */
1942 btime gc_period
= tab
->config
->gc_period
;
1943 gc_period
= (gc_period
/ 2) + (random_u32() % (uint
) gc_period
);
1944 tm_start(tab
->prune_timer
, gc_period
);
1949 rt_settled_time(rtable
*tab
)
1951 ASSUME(tab
->base_settle_time
!= 0);
1953 return MIN(tab
->last_rt_change
+ tab
->config
->min_settle_time
,
1954 tab
->base_settle_time
+ tab
->config
->max_settle_time
);
1958 rt_settle_timer(timer
*t
)
1960 rtable
*tab
= t
->data
;
1962 if (!tab
->base_settle_time
)
1965 btime settled_time
= rt_settled_time(tab
);
1966 if (current_time() < settled_time
)
1968 tm_set(tab
->settle_timer
, settled_time
);
1973 tab
->base_settle_time
= 0;
1975 struct rt_subscription
*s
;
1976 WALK_LIST(s
, tab
->subscribers
)
1981 rt_kick_settle_timer(rtable
*tab
)
1983 tab
->base_settle_time
= current_time();
1985 if (!tab
->settle_timer
)
1986 tab
->settle_timer
= tm_new_init(tab
->rp
, rt_settle_timer
, tab
, 0, 0);
1988 if (!tm_active(tab
->settle_timer
))
1989 tm_set(tab
->settle_timer
, rt_settled_time(tab
));
1993 rt_schedule_notify(rtable
*tab
)
1995 if (EMPTY_LIST(tab
->subscribers
))
1998 if (tab
->base_settle_time
)
2001 rt_kick_settle_timer(tab
);
2005 rt_subscribe(rtable
*tab
, struct rt_subscription
*s
)
2009 add_tail(&tab
->subscribers
, &s
->n
);
2013 rt_unsubscribe(struct rt_subscription
*s
)
2016 rt_unlock_table(s
->tab
);
2019 static struct rt_flowspec_link
*
2020 rt_flowspec_find_link(rtable
*src
, rtable
*dst
)
2022 struct rt_flowspec_link
*ln
;
2023 WALK_LIST(ln
, src
->flowspec_links
)
2024 if ((ln
->src
== src
) && (ln
->dst
== dst
))
2031 rt_flowspec_link(rtable
*src
, rtable
*dst
)
2033 ASSERT(rt_is_ip(src
));
2034 ASSERT(rt_is_flow(dst
));
2036 struct rt_flowspec_link
*ln
= rt_flowspec_find_link(src
, dst
);
2043 ln
= mb_allocz(src
->rp
, sizeof(struct rt_flowspec_link
));
2046 add_tail(&src
->flowspec_links
, &ln
->n
);
2053 rt_flowspec_unlink(rtable
*src
, rtable
*dst
)
2055 struct rt_flowspec_link
*ln
= rt_flowspec_find_link(src
, dst
);
2057 ASSERT(ln
&& (ln
->uc
> 0));
2066 rt_unlock_table(src
);
2067 rt_unlock_table(dst
);
2072 rt_flowspec_notify(rtable
*src
, net
*net
)
2074 /* Only IP tables are src links */
2075 ASSERT(rt_is_ip(src
));
2077 struct rt_flowspec_link
*ln
;
2078 WALK_LIST(ln
, src
->flowspec_links
)
2080 rtable
*dst
= ln
->dst
;
2081 ASSERT(rt_is_flow(dst
));
2083 /* No need to inspect it further if recalculation is already active */
2084 if ((dst
->nhu_state
== NHU_SCHEDULED
) || (dst
->nhu_state
== NHU_DIRTY
))
2087 if (trie_match_net(dst
->flowspec_trie
, net
->n
.addr
))
2088 rt_schedule_nhu(dst
);
2093 rt_flowspec_reset_trie(rtable
*tab
)
2095 linpool
*lp
= tab
->flowspec_trie
->lp
;
2096 int ipv4
= tab
->flowspec_trie
->ipv4
;
2099 tab
->flowspec_trie
= f_new_trie(lp
, 0);
2100 tab
->flowspec_trie
->ipv4
= ipv4
;
2104 rt_free(resource
*_r
)
2106 rtable
*r
= (rtable
*) _r
;
2108 DBG("Deleting routing table %s\n", r
->name
);
2109 ASSERT_DIE(r
->use_count
== 0);
2114 r
->config
->table
= NULL
;
2118 rt_free_hostcache(r
);
2120 /* Freed automagically by the resource pool
2122 hmap_free(&r->id_map);
2124 rfree(r->settle_timer);
2130 rt_res_dump(resource
*_r
)
2132 rtable
*r
= (rtable
*) _r
;
2133 debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n",
2134 r
->name
, net_label
[r
->addr_type
], r
->rt_count
, r
->use_count
);
2137 static struct resclass rt_class
= {
2138 .name
= "Routing table",
2139 .size
= sizeof(struct rtable
),
2141 .dump
= rt_res_dump
,
2147 rt_setup(pool
*pp
, struct rtable_config
*cf
)
2149 pool
*p
= rp_newf(pp
, "Routing table %s", cf
->name
);
2151 rtable
*t
= ralloc(p
, &rt_class
);
2156 t
->addr_type
= cf
->addr_type
;
2158 fib_init(&t
->fib
, p
, t
->addr_type
, sizeof(net
), OFFSETOF(net
, n
), 0, NULL
);
2162 t
->trie
= f_new_trie(lp_new_default(p
), 0);
2163 t
->trie
->ipv4
= net_val_match(t
->addr_type
, NB_IP4
| NB_VPN4
| NB_ROA4
);
2165 t
->fib
.init
= net_init_with_trie
;
2168 init_list(&t
->channels
);
2169 init_list(&t
->flowspec_links
);
2170 init_list(&t
->subscribers
);
2172 hmap_init(&t
->id_map
, p
, 1024);
2173 hmap_set(&t
->id_map
, 0);
2175 if (!(t
->internal
= cf
->internal
))
2177 t
->rt_event
= ev_new_init(p
, rt_event
, t
);
2178 t
->prune_timer
= tm_new_init(p
, rt_prune_timer
, t
, 0, 0);
2179 t
->last_rt_change
= t
->gc_time
= current_time();
2183 t
->flowspec_trie
= f_new_trie(lp_new_default(p
), 0);
2184 t
->flowspec_trie
->ipv4
= (t
->addr_type
== NET_FLOW4
);
2192 * rt_init - initialize routing tables
2194 * This function is called during BIRD startup. It initializes the
2195 * routing table module.
2201 rt_table_pool
= rp_new(&root_pool
, "Routing tables");
2202 rte_update_pool
= lp_new_default(rt_table_pool
);
2203 rte_slab
= sl_new(rt_table_pool
, sizeof(rte
));
2204 init_list(&routing_tables
);
2209 * rt_prune_table - prune a routing table
2211 * The prune loop scans routing tables and removes routes belonging to flushing
2212 * protocols, discarded routes and also stale network entries. It is called from
2213 * rt_event(). The event is rescheduled if the current iteration do not finish
2214 * the table. The pruning is directed by the prune state (@prune_state),
2215 * specifying whether the prune cycle is scheduled or running, and there
2216 * is also a persistent pruning iterator (@prune_fit).
2218 * The prune loop is used also for channel flushing. For this purpose, the
2219 * channels to flush are marked before the iteration and notified after the
2223 rt_prune_table(rtable
*tab
)
2225 struct fib_iterator
*fit
= &tab
->prune_fit
;
2231 DBG("Pruning route table %s\n", tab
->name
);
2233 fib_check(&tab
->fib
);
2236 if (tab
->prune_state
== 0)
2239 if (tab
->prune_state
== 1)
2241 /* Mark channels to flush */
2242 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
2243 if (c
->channel_state
== CS_FLUSHING
)
2244 c
->flush_active
= 1;
2246 FIB_ITERATE_INIT(fit
, &tab
->fib
);
2247 tab
->prune_state
= 2;
2249 tab
->gc_counter
= 0;
2250 tab
->gc_time
= current_time();
2252 if (tab
->prune_trie
)
2254 /* Init prefix trie pruning */
2255 tab
->trie_new
= f_new_trie(lp_new_default(tab
->rp
), 0);
2256 tab
->trie_new
->ipv4
= tab
->trie
->ipv4
;
2261 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
2268 FIB_ITERATE_PUT(fit
);
2269 ev_schedule(tab
->rt_event
);
2273 for (e
=n
->routes
; e
; e
=e
->next
)
2275 if (e
->sender
->flush_active
|| (e
->flags
& REF_DISCARD
))
2283 if (e
->flags
& REF_MODIFY
)
2292 if (!n
->routes
) /* Orphaned FIB entry */
2294 FIB_ITERATE_PUT(fit
);
2295 fib_delete(&tab
->fib
, n
);
2301 trie_add_prefix(tab
->trie_new
, n
->n
.addr
, n
->n
.addr
->pxlen
, n
->n
.addr
->pxlen
);
2308 fib_check(&tab
->fib
);
2311 /* state change 2->0, 3->1 */
2312 tab
->prune_state
&= 1;
2316 /* Finish prefix trie pruning */
2318 if (!tab
->trie_lock_count
)
2320 rfree(tab
->trie
->lp
);
2324 ASSERT(!tab
->trie_old
);
2325 tab
->trie_old
= tab
->trie
;
2326 tab
->trie_old_lock_count
= tab
->trie_lock_count
;
2327 tab
->trie_lock_count
= 0;
2330 tab
->trie
= tab
->trie_new
;
2331 tab
->trie_new
= NULL
;
2332 tab
->prune_trie
= 0;
2336 /* Schedule prefix trie pruning */
2337 if (tab
->trie
&& !tab
->trie_old
&& (tab
->trie
->prefix_count
> (2 * tab
->fib
.entries
)))
2339 /* state change 0->1, 2->3 */
2340 tab
->prune_state
|= 1;
2341 tab
->prune_trie
= 1;
2345 if (tab
->prune_state
> 0)
2346 ev_schedule(tab
->rt_event
);
2348 /* FIXME: This should be handled in a better way */
2351 /* Close flushed channels */
2352 WALK_LIST2_DELSAFE(c
, n
, x
, tab
->channels
, table_node
)
2353 if (c
->flush_active
)
2355 c
->flush_active
= 0;
2356 channel_set_state(c
, CS_DOWN
);
2363 * rt_lock_trie - lock a prefix trie of a routing table
2364 * @tab: routing table with prefix trie to be locked
2366 * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state
2367 * structures. Therefore, asynchronous walks should lock the prefix trie using
2368 * this function. That allows the prune loop to rebuild the trie, but postpones
2369 * its freeing until all walks are done (unlocked by rt_unlock_trie()).
2371 * Return a current trie that will be locked, the value should be passed back to
2372 * rt_unlock_trie() for unlocking.
2376 rt_lock_trie(rtable
*tab
)
2380 tab
->trie_lock_count
++;
2385 * rt_unlock_trie - unlock a prefix trie of a routing table
2386 * @tab: routing table with prefix trie to be locked
2387 * @trie: value returned by matching rt_lock_trie()
2389 * Done for trie locked by rt_lock_trie() after walk over the trie is done.
2390 * It may free the trie and schedule next trie pruning.
2393 rt_unlock_trie(rtable
*tab
, struct f_trie
*trie
)
2397 if (trie
== tab
->trie
)
2399 /* Unlock the current prefix trie */
2400 ASSERT(tab
->trie_lock_count
);
2401 tab
->trie_lock_count
--;
2403 else if (trie
== tab
->trie_old
)
2405 /* Unlock the old prefix trie */
2406 ASSERT(tab
->trie_old_lock_count
);
2407 tab
->trie_old_lock_count
--;
2409 /* Free old prefix trie that is no longer needed */
2410 if (!tab
->trie_old_lock_count
)
2412 rfree(tab
->trie_old
->lp
);
2413 tab
->trie_old
= NULL
;
2415 /* Kick prefix trie pruning that was postponed */
2416 if (tab
->trie
&& (tab
->trie
->prefix_count
> (2 * tab
->fib
.entries
)))
2418 tab
->prune_trie
= 1;
2419 rt_schedule_prune(tab
);
2424 log(L_BUG
"Invalid arg to rt_unlock_trie()");
2429 rt_preconfig(struct config
*c
)
2431 init_list(&c
->tables
);
2433 rt_new_table(cf_get_symbol(c
, "master4"), NET_IP4
);
2434 rt_new_table(cf_get_symbol(c
, "master6"), NET_IP6
);
2438 rt_postconfig(struct config
*c
)
2440 uint num_tables
= list_length(&c
->tables
);
2441 btime def_gc_period
= 400 MS
* num_tables
;
2442 def_gc_period
= MAX(def_gc_period
, 10 S
);
2443 def_gc_period
= MIN(def_gc_period
, 600 S
);
2445 struct rtable_config
*rc
;
2446 WALK_LIST(rc
, c
->tables
)
2447 if (rc
->gc_period
== (uint
) -1)
2448 rc
->gc_period
= (uint
) def_gc_period
;
2453 * Some functions for handing internal next hop updates
2454 * triggered by rt_schedule_nhu().
2458 rta_apply_hostentry(rta
*a
, struct hostentry
*he
, mpls_label_stack
*mls
)
2462 a
->igp_metric
= he
->igp_metric
;
2464 if (a
->dest
!= RTD_UNICAST
)
2468 a
->nh
= (struct nexthop
) {};
2470 { /* Store the label stack for later changes */
2471 a
->nh
.labels_orig
= a
->nh
.labels
= mls
->len
;
2472 memcpy(a
->nh
.label
, mls
->stack
, mls
->len
* sizeof(u32
));
2477 if (((!mls
) || (!mls
->len
)) && he
->nexthop_linkable
)
2478 { /* Just link the nexthop chain, no label append happens. */
2479 memcpy(&(a
->nh
), &(he
->src
->nh
), nexthop_size(&(he
->src
->nh
)));
2483 struct nexthop
*nhp
= NULL
, *nhr
= NULL
;
2484 int skip_nexthop
= 0;
2486 for (struct nexthop
*nh
= &(he
->src
->nh
); nh
; nh
= nh
->next
)
2493 nhp
= (nhp
? (nhp
->next
= lp_alloc(rte_update_pool
, NEXTHOP_MAX_SIZE
)) : &(a
->nh
));
2496 memset(nhp
, 0, NEXTHOP_MAX_SIZE
);
2497 nhp
->iface
= nh
->iface
;
2498 nhp
->weight
= nh
->weight
;
2502 nhp
->labels
= nh
->labels
+ mls
->len
;
2503 nhp
->labels_orig
= mls
->len
;
2504 if (nhp
->labels
<= MPLS_MAX_LABEL_STACK
)
2506 memcpy(nhp
->label
, nh
->label
, nh
->labels
* sizeof(u32
)); /* First the hostentry labels */
2507 memcpy(&(nhp
->label
[nh
->labels
]), mls
->stack
, mls
->len
* sizeof(u32
)); /* Then the bottom labels */
2511 log(L_WARN
"Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
2512 nh
->labels
, mls
->len
, nhp
->labels
, MPLS_MAX_LABEL_STACK
);
2517 else if (nh
->labels
)
2519 nhp
->labels
= nh
->labels
;
2520 nhp
->labels_orig
= 0;
2521 memcpy(nhp
->label
, nh
->label
, nh
->labels
* sizeof(u32
));
2524 if (ipa_nonzero(nh
->gw
))
2526 nhp
->gw
= nh
->gw
; /* Router nexthop */
2527 nhp
->flags
|= (nh
->flags
& RNF_ONLINK
);
2529 else if (!(nh
->iface
->flags
& IF_MULTIACCESS
) || (nh
->iface
->flags
& IF_LOOPBACK
))
2530 nhp
->gw
= IPA_NONE
; /* PtP link - no need for nexthop */
2531 else if (ipa_nonzero(he
->link
))
2532 nhp
->gw
= he
->link
; /* Device nexthop with link-local address known */
2534 nhp
->gw
= he
->addr
; /* Device nexthop with link-local address unknown */
2542 a
->dest
= RTD_UNREACHABLE
;
2543 log(L_WARN
"No valid nexthop remaining, setting route unreachable");
2549 rta_next_hop_outdated(rta
*a
)
2551 struct hostentry
*he
= a
->hostentry
;
2557 return a
->dest
!= RTD_UNREACHABLE
;
2559 return (a
->dest
!= he
->dest
) || (a
->igp_metric
!= he
->igp_metric
) ||
2560 (!he
->nexthop_linkable
) || !nexthop_same(&(a
->nh
), &(he
->src
->nh
));
2564 rt_next_hop_update_rte(rtable
*tab UNUSED
, rte
*old
)
2566 if (!rta_next_hop_outdated(old
->attrs
))
2569 rta
*a
= alloca(RTA_MAX_SIZE
);
2570 memcpy(a
, old
->attrs
, rta_size(old
->attrs
));
2572 mpls_label_stack mls
= { .len
= a
->nh
.labels_orig
};
2573 memcpy(mls
.stack
, &a
->nh
.label
[a
->nh
.labels
- mls
.len
], mls
.len
* sizeof(u32
));
2575 rta_apply_hostentry(a
, old
->attrs
->hostentry
, &mls
);
2578 rte
*e
= sl_alloc(rte_slab
);
2579 memcpy(e
, old
, sizeof(rte
));
2580 e
->attrs
= rta_lookup(a
);
2581 rt_lock_source(e
->src
);
2590 net_flow_has_dst_prefix(const net_addr
*n
)
2592 ASSUME(net_is_flow(n
));
2597 if (n
->type
== NET_FLOW4
)
2599 const net_addr_flow4
*n4
= (void *) n
;
2600 return (n4
->length
> sizeof(net_addr_flow4
)) && (n4
->data
[0] == FLOW_TYPE_DST_PREFIX
);
2604 const net_addr_flow6
*n6
= (void *) n
;
2605 return (n6
->length
> sizeof(net_addr_flow6
)) && (n6
->data
[0] == FLOW_TYPE_DST_PREFIX
);
2610 rta_as_path_is_empty(rta
*a
)
2612 eattr
*e
= ea_find(a
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AS_PATH
));
2613 return !e
|| (as_path_getlen(e
->u
.ptr
) == 0);
2617 rta_get_first_asn(rta
*a
)
2619 eattr
*e
= ea_find(a
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AS_PATH
));
2622 return (e
&& as_path_get_first_regular(e
->u
.ptr
, &asn
)) ? asn
: 0;
2626 rt_flowspec_check(rtable
*tab_ip
, rtable
*tab_flow
, const net_addr
*n
, rta
*a
, int interior
)
2628 ASSERT(rt_is_ip(tab_ip
));
2629 ASSERT(rt_is_flow(tab_flow
));
2630 ASSERT(tab_ip
->trie
);
2632 /* RFC 8955 6. a) Flowspec has defined dst prefix */
2633 if (!net_flow_has_dst_prefix(n
))
2636 /* RFC 9117 4.1. Accept AS_PATH is empty (fr */
2637 if (interior
&& rta_as_path_is_empty(a
))
2641 /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */
2643 /* Find flowspec dst prefix */
2645 if (n
->type
== NET_FLOW4
)
2646 net_fill_ip4(&dst
, net4_prefix(n
), net4_pxlen(n
));
2648 net_fill_ip6(&dst
, net6_prefix(n
), net6_pxlen(n
));
2650 /* Find best-match BGP unicast route for flowspec dst prefix */
2651 net
*nb
= net_route(tab_ip
, &dst
);
2652 rte
*rb
= nb
? nb
->routes
: NULL
;
2654 /* Register prefix to trie for tracking further changes */
2655 int max_pxlen
= (n
->type
== NET_FLOW4
) ? IP4_MAX_PREFIX_LENGTH
: IP6_MAX_PREFIX_LENGTH
;
2656 trie_add_prefix(tab_flow
->flowspec_trie
, &dst
, (nb
? nb
->n
.addr
->pxlen
: 0), max_pxlen
);
2658 /* No best-match BGP route -> no flowspec */
2659 if (!rb
|| (rb
->attrs
->source
!= RTS_BGP
))
2662 /* Find ORIGINATOR_ID values */
2663 u32 orig_a
= ea_get_int(a
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGINATOR_ID
), 0);
2664 u32 orig_b
= ea_get_int(rb
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGINATOR_ID
), 0);
2666 /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */
2667 if ((orig_a
!= orig_b
) || (!orig_a
&& !orig_b
&& !ipa_equal(a
->from
, rb
->attrs
->from
)))
2671 /* Find ASN of the best-match route, for use in next checks */
2672 u32 asn_b
= rta_get_first_asn(rb
->attrs
);
2676 /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */
2677 if (!interior
&& (rta_get_first_asn(a
) != asn_b
))
2680 /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */
2681 TRIE_WALK(tab_ip
->trie
, subnet
, &dst
)
2683 net
*nc
= net_find_valid(tab_ip
, &subnet
);
2687 rte
*rc
= nc
->routes
;
2688 if (rc
->attrs
->source
!= RTS_BGP
)
2691 if (rta_get_first_asn(rc
->attrs
) != asn_b
)
2699 #endif /* CONFIG_BGP */
2702 rt_flowspec_update_rte(rtable
*tab
, rte
*r
)
2705 if ((r
->attrs
->source
!= RTS_BGP
) || (r
->sender
->proto
!= r
->src
->proto
))
2708 struct bgp_channel
*bc
= (struct bgp_channel
*) r
->sender
;
2709 if (!bc
->base_table
)
2712 const net_addr
*n
= r
->net
->n
.addr
;
2713 struct bgp_proto
*p
= (void *) r
->src
->proto
;
2714 int valid
= rt_flowspec_check(bc
->base_table
, tab
, n
, r
->attrs
, p
->is_interior
);
2715 int dest
= valid
? RTD_NONE
: RTD_UNREACHABLE
;
2717 if (dest
== r
->attrs
->dest
)
2720 rta
*a
= alloca(RTA_MAX_SIZE
);
2721 memcpy(a
, r
->attrs
, rta_size(r
->attrs
));
2725 rte
*new = sl_alloc(rte_slab
);
2726 memcpy(new, r
, sizeof(rte
));
2727 new->attrs
= rta_lookup(a
);
2728 rt_lock_source(new->src
);
2738 rt_next_hop_update_net(rtable
*tab
, net
*n
)
2740 rte
**k
, *e
, *new, *old_best
, **new_best
;
2742 int free_old_best
= 0;
2744 old_best
= n
->routes
;
2748 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
2750 if (!net_is_flow(n
->n
.addr
))
2751 new = rt_next_hop_update_rte(tab
, e
);
2753 new = rt_flowspec_update_rte(tab
, e
);
2759 rte_trace_in(D_ROUTES
, new->sender
, new, "updated");
2760 rte_announce_i(tab
, RA_ANY
, n
, new, e
, NULL
, NULL
);
2762 /* Call a pre-comparison hook */
2763 /* Not really an efficient way to compute this */
2764 if (e
->src
->proto
->rte_recalculate
)
2765 e
->src
->proto
->rte_recalculate(tab
, n
, new, e
, NULL
);
2769 else /* Freeing of the old best rte is postponed */
2780 /* Find the new best route */
2782 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
2784 if (!new_best
|| rte_better(e
, *new_best
))
2788 /* Relink the new best route to the first position */
2790 if (new != n
->routes
)
2792 *new_best
= new->next
;
2793 new->next
= n
->routes
;
2797 /* Announce the new best route */
2798 if (new != old_best
)
2799 rte_trace_in(D_ROUTES
, new->sender
, new, "updated [best]");
2801 /* Propagate changes */
2802 rte_announce_i(tab
, RA_UNDEF
, n
, NULL
, NULL
, n
->routes
, old_best
);
2805 rte_free_quick(old_best
);
2811 rt_next_hop_update(rtable
*tab
)
2813 struct fib_iterator
*fit
= &tab
->nhu_fit
;
2816 if (tab
->nhu_state
== NHU_CLEAN
)
2819 if (tab
->nhu_state
== NHU_SCHEDULED
)
2821 FIB_ITERATE_INIT(fit
, &tab
->fib
);
2822 tab
->nhu_state
= NHU_RUNNING
;
2824 if (tab
->flowspec_trie
)
2825 rt_flowspec_reset_trie(tab
);
2828 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
2832 FIB_ITERATE_PUT(fit
);
2833 ev_schedule(tab
->rt_event
);
2836 max_feed
-= rt_next_hop_update_net(tab
, n
);
2841 * NHU_DIRTY -> NHU_SCHEDULED
2842 * NHU_RUNNING -> NHU_CLEAN
2844 tab
->nhu_state
&= 1;
2846 if (tab
->nhu_state
!= NHU_CLEAN
)
2847 ev_schedule(tab
->rt_event
);
2851 struct rtable_config
*
2852 rt_new_table(struct symbol
*s
, uint addr_type
)
2854 /* Hack that allows to 'redefine' the master table */
2855 if ((s
->class == SYM_TABLE
) &&
2856 (s
->table
== new_config
->def_tables
[addr_type
]) &&
2857 ((addr_type
== NET_IP4
) || (addr_type
== NET_IP6
)))
2860 struct rtable_config
*c
= cfg_allocz(sizeof(struct rtable_config
));
2862 cf_define_symbol(new_config
, s
, SYM_TABLE
, table
, c
);
2864 c
->addr_type
= addr_type
;
2865 c
->gc_threshold
= 1000;
2866 c
->gc_period
= (uint
) -1; /* set in rt_postconfig() */
2867 c
->min_settle_time
= 1 S
;
2868 c
->max_settle_time
= 20 S
;
2870 add_tail(&new_config
->tables
, &c
->n
);
2872 /* First table of each type is kept as default */
2873 if (! new_config
->def_tables
[addr_type
])
2874 new_config
->def_tables
[addr_type
] = c
;
2880 * rt_lock_table - lock a routing table
2881 * @r: routing table to be locked
2883 * Lock a routing table, because it's in use by a protocol,
2884 * preventing it from being freed when it gets undefined in a new
2888 rt_lock_table(rtable
*r
)
2894 * rt_unlock_table - unlock a routing table
2895 * @r: routing table to be unlocked
2897 * Unlock a routing table formerly locked by rt_lock_table(),
2898 * that is decrease its use count and delete it if it's scheduled
2899 * for deletion by configuration changes.
2902 rt_unlock_table(rtable
*r
)
2904 if (!--r
->use_count
&& r
->deleted
)
2906 struct config
*conf
= r
->deleted
;
2908 /* Delete the routing table by freeing its pool */
2910 config_del_obstacle(conf
);
2915 rt_reconfigure(rtable
*tab
, struct rtable_config
*new, struct rtable_config
*old
)
2917 if ((new->addr_type
!= old
->addr_type
) ||
2918 (new->sorted
!= old
->sorted
) ||
2919 (new->trie_used
!= old
->trie_used
))
2922 DBG("\t%s: same\n", new->name
);
2924 tab
->name
= new->name
;
2930 static struct rtable_config
*
2931 rt_find_table_config(struct config
*cf
, char *name
)
2933 struct symbol
*sym
= cf_find_symbol(cf
, name
);
2934 return (sym
&& (sym
->class == SYM_TABLE
)) ? sym
->table
: NULL
;
2938 * rt_commit - commit new routing table configuration
2939 * @new: new configuration
2940 * @old: original configuration or %NULL if it's boot time config
2942 * Scan differences between @old and @new configuration and modify
2943 * the routing tables according to these changes. If @new defines a
2944 * previously unknown table, create it, if it omits a table existing
2945 * in @old, schedule it for deletion (it gets deleted when all protocols
2946 * disconnect from it by calling rt_unlock_table()), if it exists
2947 * in both configurations, leave it unchanged.
2950 rt_commit(struct config
*new, struct config
*old
)
2952 struct rtable_config
*o
, *r
;
2954 DBG("rt_commit:\n");
2957 WALK_LIST(o
, old
->tables
)
2959 rtable
*tab
= o
->table
;
2963 r
= rt_find_table_config(new, o
->name
);
2964 if (r
&& !new->shutdown
&& rt_reconfigure(tab
, r
, o
))
2967 DBG("\t%s: deleted\n", o
->name
);
2969 config_add_obstacle(old
);
2971 rt_unlock_table(tab
);
2975 WALK_LIST(r
, new->tables
)
2978 r
->table
= rt_setup(rt_table_pool
, r
);
2979 DBG("\t%s: created\n", r
->name
);
2980 add_tail(&routing_tables
, &r
->table
->n
);
2986 do_feed_channel(struct channel
*c
, net
*n
, rte
*e
)
2989 if (c
->ra_mode
== RA_ACCEPTED
)
2990 rt_notify_accepted(c
, n
, NULL
, NULL
, c
->refeeding
);
2991 else if (c
->ra_mode
== RA_MERGED
)
2992 rt_notify_merged(c
, n
, NULL
, NULL
, e
, e
, c
->refeeding
);
2994 rt_notify_basic(c
, n
, e
, e
, c
->refeeding
);
2995 rte_update_unlock();
2999 * rt_feed_channel - advertise all routes to a channel
3000 * @c: channel to be fed
3002 * This function performs one pass of advertisement of routes to a channel that
3003 * is in the ES_FEEDING state. It is called by the protocol code as long as it
3004 * has something to do. (We avoid transferring all the routes in single pass in
3005 * order not to monopolize CPU time.)
3008 rt_feed_channel(struct channel
*c
)
3010 struct fib_iterator
*fit
= &c
->feed_fit
;
3013 ASSERT(c
->export_state
== ES_FEEDING
);
3015 if (!c
->feed_active
)
3017 FIB_ITERATE_INIT(fit
, &c
->table
->fib
);
3021 FIB_ITERATE_START(&c
->table
->fib
, fit
, net
, n
)
3026 FIB_ITERATE_PUT(fit
);
3030 if ((c
->ra_mode
== RA_OPTIMAL
) ||
3031 (c
->ra_mode
== RA_ACCEPTED
) ||
3032 (c
->ra_mode
== RA_MERGED
))
3033 if (rte_is_valid(e
))
3035 /* In the meantime, the protocol may fell down */
3036 if (c
->export_state
!= ES_FEEDING
)
3039 do_feed_channel(c
, n
, e
);
3043 if (c
->ra_mode
== RA_ANY
)
3044 for(e
= n
->routes
; e
; e
= e
->next
)
3046 /* In the meantime, the protocol may fell down */
3047 if (c
->export_state
!= ES_FEEDING
)
3050 if (!rte_is_valid(e
))
3053 do_feed_channel(c
, n
, e
);
3065 * rt_feed_baby_abort - abort protocol feeding
3068 * This function is called by the protocol code when the protocol stops or
3069 * ceases to exist during the feeding.
3072 rt_feed_channel_abort(struct channel
*c
)
3076 /* Unlink the iterator */
3077 fit_get(&c
->table
->fib
, &c
->feed_fit
);
3088 rte_update_in(struct channel
*c
, const net_addr
*n
, rte
*new, struct rte_src
*src
)
3090 struct rtable
*tab
= c
->in_table
;
3096 net
= net_get(tab
, n
);
3098 if (!rta_is_cached(new->attrs
))
3099 new->attrs
= rta_lookup(new->attrs
);
3103 net
= net_find(tab
, n
);
3109 /* Find the old rte */
3110 for (pos
= &net
->routes
; old
= *pos
; pos
= &old
->next
)
3111 if (old
->src
== src
)
3113 if (new && rte_same(old
, new))
3115 /* Refresh the old rte, continue with update to main rtable */
3116 if (old
->flags
& (REF_STALE
| REF_DISCARD
| REF_MODIFY
))
3118 old
->flags
&= ~(REF_STALE
| REF_DISCARD
| REF_MODIFY
);
3126 /* Move iterator if needed */
3127 if (old
== c
->reload_next_rte
)
3128 c
->reload_next_rte
= old
->next
;
3130 /* Remove the old rte */
3139 struct channel_limit
*l
= &c
->rx_limit
;
3140 if (l
->action
&& !old
&& new)
3142 if (tab
->rt_count
>= l
->limit
)
3143 channel_notify_limit(c
, l
, PLD_RX
, tab
->rt_count
);
3145 if (l
->state
== PLS_BLOCKED
)
3147 /* Required by rte_trace_in() */
3150 rte_trace_in(D_FILTERS
, c
, new, "ignored [limit]");
3157 /* Insert the new rte */
3158 rte
*e
= rte_do_cow(new);
3159 e
->flags
|= REF_COW
;
3162 e
->lastmod
= current_time();
3169 new->id
= hmap_first_zero(&tab
->id_map
);
3170 hmap_set(&tab
->id_map
, new->id
);
3176 rte_announce(tab
, RA_ANY
, net
, new, old
, NULL
, NULL
);
3181 hmap_clear(&tab
->id_map
, old
->id
);
3183 rte_free_quick(old
);
3187 fib_delete(&tab
->fib
, net
);
3192 c
->stats
.imp_updates_received
++;
3193 c
->stats
.imp_updates_ignored
++;
3197 fib_delete(&tab
->fib
, net
);
3202 c
->stats
.imp_withdraws_received
++;
3203 c
->stats
.imp_withdraws_ignored
++;
3208 rt_reload_channel(struct channel
*c
)
3210 struct rtable
*tab
= c
->in_table
;
3211 struct fib_iterator
*fit
= &c
->reload_fit
;
3214 ASSERT(c
->channel_state
== CS_UP
);
3216 if (!c
->reload_active
)
3218 FIB_ITERATE_INIT(fit
, &tab
->fib
);
3219 c
->reload_active
= 1;
3223 for (rte
*e
= c
->reload_next_rte
; e
; e
= e
->next
)
3225 if (max_feed
-- <= 0)
3227 c
->reload_next_rte
= e
;
3228 debug("%s channel reload burst split (max_feed=%d)", c
->proto
->name
, max_feed
);
3232 rte_update2(c
, e
->net
->n
.addr
, rte_do_cow(e
), e
->src
);
3235 c
->reload_next_rte
= NULL
;
3237 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
3239 if (c
->reload_next_rte
= n
->routes
)
3241 FIB_ITERATE_PUT_NEXT(fit
, &tab
->fib
);
3247 while (c
->reload_next_rte
);
3249 c
->reload_active
= 0;
3254 rt_reload_channel_abort(struct channel
*c
)
3256 if (c
->reload_active
)
3258 /* Unlink the iterator */
3259 fit_get(&c
->in_table
->fib
, &c
->reload_fit
);
3260 c
->reload_next_rte
= NULL
;
3261 c
->reload_active
= 0;
3266 rt_prune_sync(rtable
*t
, int all
)
3268 struct fib_iterator fit
;
3270 FIB_ITERATE_INIT(&fit
, &t
->fib
);
3273 FIB_ITERATE_START(&t
->fib
, &fit
, net
, n
)
3275 rte
*e
, **ee
= &n
->routes
;
3279 if (all
|| (e
->flags
& (REF_STALE
| REF_DISCARD
)))
3289 if (all
|| !n
->routes
)
3291 FIB_ITERATE_PUT(&fit
);
3292 fib_delete(&t
->fib
, n
);
3305 rte_update_out(struct channel
*c
, const net_addr
*n
, rte
*new, rte
*old0
, int refeed
)
3307 struct rtable
*tab
= c
->out_table
;
3308 struct rte_src
*src
;
3314 net
= net_get(tab
, n
);
3317 if (!rta_is_cached(new->attrs
))
3318 new->attrs
= rta_lookup(new->attrs
);
3322 net
= net_find(tab
, n
);
3329 /* Find the old rte */
3330 for (pos
= &net
->routes
; old
= *pos
; pos
= &old
->next
)
3331 if ((c
->ra_mode
!= RA_ANY
) || (old
->src
== src
))
3333 if (new && rte_same(old
, new))
3335 /* REF_STALE / REF_DISCARD not used in export table */
3337 if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
3339 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
3347 /* Remove the old rte */
3349 rte_free_quick(old
);
3361 fib_delete(&tab
->fib
, net
);
3366 /* Insert the new rte */
3367 rte
*e
= rte_do_cow(new);
3368 e
->flags
|= REF_COW
;
3371 e
->lastmod
= current_time();
3390 hc_hash(ip_addr a
, rtable
*dep
)
3392 return ipa_hash(a
) ^ ptr_hash(dep
);
3396 hc_insert(struct hostcache
*hc
, struct hostentry
*he
)
3398 uint k
= he
->hash_key
>> hc
->hash_shift
;
3399 he
->next
= hc
->hash_table
[k
];
3400 hc
->hash_table
[k
] = he
;
3404 hc_remove(struct hostcache
*hc
, struct hostentry
*he
)
3406 struct hostentry
**hep
;
3407 uint k
= he
->hash_key
>> hc
->hash_shift
;
3409 for (hep
= &hc
->hash_table
[k
]; *hep
!= he
; hep
= &(*hep
)->next
);
3413 #define HC_DEF_ORDER 10
3414 #define HC_HI_MARK *4
3415 #define HC_HI_STEP 2
3416 #define HC_HI_ORDER 16 /* Must be at most 16 */
3417 #define HC_LO_MARK /5
3418 #define HC_LO_STEP 2
3419 #define HC_LO_ORDER 10
3422 hc_alloc_table(struct hostcache
*hc
, pool
*p
, unsigned order
)
3424 uint hsize
= 1 << order
;
3425 hc
->hash_order
= order
;
3426 hc
->hash_shift
= 32 - order
;
3427 hc
->hash_max
= (order
>= HC_HI_ORDER
) ? ~0U : (hsize HC_HI_MARK
);
3428 hc
->hash_min
= (order
<= HC_LO_ORDER
) ? 0U : (hsize HC_LO_MARK
);
3430 hc
->hash_table
= mb_allocz(p
, hsize
* sizeof(struct hostentry
*));
3434 hc_resize(struct hostcache
*hc
, pool
*p
, unsigned new_order
)
3436 struct hostentry
**old_table
= hc
->hash_table
;
3437 struct hostentry
*he
, *hen
;
3438 uint old_size
= 1 << hc
->hash_order
;
3441 hc_alloc_table(hc
, p
, new_order
);
3442 for (i
= 0; i
< old_size
; i
++)
3443 for (he
= old_table
[i
]; he
!= NULL
; he
=hen
)
3451 static struct hostentry
*
3452 hc_new_hostentry(struct hostcache
*hc
, pool
*p
, ip_addr a
, ip_addr ll
, rtable
*dep
, unsigned k
)
3454 struct hostentry
*he
= sl_alloc(hc
->slab
);
3456 *he
= (struct hostentry
) {
3463 add_tail(&hc
->hostentries
, &he
->ln
);
3467 if (hc
->hash_items
> hc
->hash_max
)
3468 hc_resize(hc
, p
, hc
->hash_order
+ HC_HI_STEP
);
3474 hc_delete_hostentry(struct hostcache
*hc
, pool
*p
, struct hostentry
*he
)
3483 if (hc
->hash_items
< hc
->hash_min
)
3484 hc_resize(hc
, p
, hc
->hash_order
- HC_LO_STEP
);
3488 rt_init_hostcache(rtable
*tab
)
3490 struct hostcache
*hc
= mb_allocz(tab
->rp
, sizeof(struct hostcache
));
3491 init_list(&hc
->hostentries
);
3494 hc_alloc_table(hc
, tab
->rp
, HC_DEF_ORDER
);
3495 hc
->slab
= sl_new(tab
->rp
, sizeof(struct hostentry
));
3497 hc
->lp
= lp_new(tab
->rp
);
3498 hc
->trie
= f_new_trie(hc
->lp
, 0);
3500 tab
->hostcache
= hc
;
3504 rt_free_hostcache(rtable
*tab
)
3506 struct hostcache
*hc
= tab
->hostcache
;
3509 WALK_LIST(n
, hc
->hostentries
)
3511 struct hostentry
*he
= SKIP_BACK(struct hostentry
, ln
, n
);
3515 log(L_ERR
"Hostcache is not empty in table %s", tab
->name
);
3518 /* Freed automagically by the resource pool
3521 mb_free(hc->hash_table);
3527 rt_notify_hostcache(rtable
*tab
, net
*net
)
3529 if (tab
->hcu_scheduled
)
3532 if (trie_match_net(tab
->hostcache
->trie
, net
->n
.addr
))
3533 rt_schedule_hcu(tab
);
3537 if_local_addr(ip_addr a
, struct iface
*i
)
3541 WALK_LIST(b
, i
->addrs
)
3542 if (ipa_equal(a
, b
->ip
))
3549 rt_get_igp_metric(rte
*rt
)
3551 eattr
*ea
= ea_find(rt
->attrs
->eattrs
, EA_GEN_IGP_METRIC
);
3556 if (rt
->attrs
->source
== RTS_DEVICE
)
3559 if (rt
->src
->proto
->rte_igp_metric
)
3560 return rt
->src
->proto
->rte_igp_metric(rt
);
3562 return IGP_METRIC_UNKNOWN
;
3566 rt_update_hostentry(rtable
*tab
, struct hostentry
*he
)
3568 rta
*old_src
= he
->src
;
3572 /* Reset the hostentry */
3574 he
->dest
= RTD_UNREACHABLE
;
3575 he
->nexthop_linkable
= 0;
3579 net_fill_ip_host(&he_addr
, he
->addr
);
3580 net
*n
= net_route(tab
, &he_addr
);
3585 word pref
= a
->pref
;
3587 for (rte
*ee
= n
->routes
; ee
; ee
= ee
->next
)
3588 if ((ee
->attrs
->pref
>= pref
) && ee
->attrs
->hostentry
)
3590 /* Recursive route should not depend on another recursive route */
3591 log(L_WARN
"Next hop address %I resolvable through recursive route for %N",
3592 he
->addr
, n
->n
.addr
);
3596 pxlen
= n
->n
.addr
->pxlen
;
3598 if (a
->dest
== RTD_UNICAST
)
3600 for (struct nexthop
*nh
= &(a
->nh
); nh
; nh
= nh
->next
)
3601 if (ipa_zero(nh
->gw
))
3603 if (if_local_addr(he
->addr
, nh
->iface
))
3605 /* The host address is a local address, this is not valid */
3606 log(L_WARN
"Next hop address %I is a local address of iface %s",
3607 he
->addr
, nh
->iface
->name
);
3615 he
->src
= rta_clone(a
);
3617 he
->nexthop_linkable
= !direct
;
3618 he
->igp_metric
= rt_get_igp_metric(e
);
3622 /* Add a prefix range to the trie */
3623 trie_add_prefix(tab
->hostcache
->trie
, &he_addr
, pxlen
, he_addr
.pxlen
);
3626 return old_src
!= he
->src
;
3630 rt_update_hostcache(rtable
*tab
)
3632 struct hostcache
*hc
= tab
->hostcache
;
3633 struct hostentry
*he
;
3636 /* Reset the trie */
3638 hc
->trie
= f_new_trie(hc
->lp
, 0);
3640 WALK_LIST_DELSAFE(n
, x
, hc
->hostentries
)
3642 he
= SKIP_BACK(struct hostentry
, ln
, n
);
3645 hc_delete_hostentry(hc
, tab
->rp
, he
);
3649 if (rt_update_hostentry(tab
, he
))
3650 rt_schedule_nhu(he
->tab
);
3653 tab
->hcu_scheduled
= 0;
3657 rt_get_hostentry(rtable
*tab
, ip_addr a
, ip_addr ll
, rtable
*dep
)
3659 ip_addr link
= ipa_zero(ll
) ? a
: ll
;
3660 struct hostentry
*he
;
3662 if (!tab
->hostcache
)
3663 rt_init_hostcache(tab
);
3665 u32 k
= hc_hash(a
, dep
);
3666 struct hostcache
*hc
= tab
->hostcache
;
3667 for (he
= hc
->hash_table
[k
>> hc
->hash_shift
]; he
!= NULL
; he
= he
->next
)
3668 if (ipa_equal(he
->addr
, a
) && ipa_equal(he
->link
, link
) && (he
->tab
== dep
))
3671 he
= hc_new_hostentry(hc
, tab
->rp
, a
, link
, dep
, k
);
3672 rt_update_hostentry(tab
, he
);
3678 * Documentation for functions declared inline in route.h
3683 * net_find - find a network entry
3684 * @tab: a routing table
3685 * @addr: address of the network
3687 * net_find() looks up the given network in routing table @tab and
3688 * returns a pointer to its &net entry or %NULL if no such network
3691 static inline net
*net_find(rtable
*tab
, net_addr
*addr
)
3695 * net_get - obtain a network entry
3696 * @tab: a routing table
3697 * @addr: address of the network
3699 * net_get() looks up the given network in routing table @tab and
3700 * returns a pointer to its &net entry. If no such entry exists, it's
3703 static inline net
*net_get(rtable
*tab
, net_addr
*addr
)
3707 * rte_cow - copy a route for writing
3708 * @r: a route entry to be copied
3710 * rte_cow() takes a &rte and prepares it for modification. The exact action
3711 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
3712 * just returned unchanged, else a new temporary entry with the same contents
3715 * The primary use of this function is inside the filter machinery -- when
3716 * a filter wants to modify &rte contents (to change the preference or to
3717 * attach another set of attributes), it must ensure that the &rte is not
3718 * shared with anyone else (and especially that it isn't stored in any routing
3721 * Result: a pointer to the new writable &rte.
3723 static inline rte
* rte_cow(rte
*r
)