]>
git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
2 * BIRD -- Routing Tables
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/iface.h"
37 #include "lib/resource.h"
38 #include "lib/event.h"
39 #include "lib/string.h"
40 #include "conf/conf.h"
41 #include "filter/filter.h"
42 #include "lib/string.h"
43 #include "lib/alloca.h"
47 static slab
*rte_slab
;
48 static linpool
*rte_update_pool
;
50 static list routing_tables
;
52 static void rt_free_hostcache(rtable
*tab
);
53 static void rt_notify_hostcache(rtable
*tab
, net
*net
);
54 static void rt_update_hostcache(rtable
*tab
);
55 static void rt_next_hop_update(rtable
*tab
);
56 static inline void rt_prune_table(rtable
*tab
);
59 /* Like fib_route(), but skips empty net entries */
61 net_route_ip4(rtable
*t
, net_addr_ip4
*n
)
65 while (r
= net_find_valid(t
, (net_addr
*) n
), (!r
) && (n
->pxlen
> 0))
68 ip4_clrbit(&n
->prefix
, n
->pxlen
);
75 net_route_ip6(rtable
*t
, net_addr_ip6
*n
)
79 while (r
= net_find_valid(t
, (net_addr
*) n
), (!r
) && (n
->pxlen
> 0))
82 ip6_clrbit(&n
->prefix
, n
->pxlen
);
89 net_route(rtable
*tab
, const net_addr
*n
)
91 ASSERT(tab
->addr_type
== n
->type
);
93 net_addr
*n0
= alloca(n
->length
);
101 return net_route_ip4(tab
, (net_addr_ip4
*) n0
);
106 return net_route_ip6(tab
, (net_addr_ip6
*) n0
);
115 net_roa_check_ip4(rtable
*tab
, const net_addr_ip4
*px
, u32 asn
)
117 struct net_addr_roa4 n
= NET_ADDR_ROA4(px
->prefix
, px
->pxlen
, 0, 0);
123 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
125 net_addr_roa4
*roa
= (void *) fn
->addr
;
126 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
128 if (net_equal_prefix_roa4(roa
, &n
) && rte_is_valid(r
->routes
))
131 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
140 ip4_clrbit(&n
.prefix
, n
.pxlen
);
143 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
147 net_roa_check_ip6(rtable
*tab
, const net_addr_ip6
*px
, u32 asn
)
149 struct net_addr_roa6 n
= NET_ADDR_ROA6(px
->prefix
, px
->pxlen
, 0, 0);
155 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
157 net_addr_roa6
*roa
= (void *) fn
->addr
;
158 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
160 if (net_equal_prefix_roa6(roa
, &n
) && rte_is_valid(r
->routes
))
163 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
172 ip6_clrbit(&n
.prefix
, n
.pxlen
);
175 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
179 * roa_check - check validity of route origination in a ROA table
181 * @n: network prefix to check
182 * @asn: AS number of network prefix
184 * Implements RFC 6483 route validation for the given network prefix. The
185 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
186 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
187 * a candidate ROA with matching ASN and maxlen field greater than or equal to
188 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
189 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
190 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
191 * must have type NET_IP4 or NET_IP6, respectively.
194 net_roa_check(rtable
*tab
, const net_addr
*n
, u32 asn
)
196 if ((tab
->addr_type
== NET_ROA4
) && (n
->type
== NET_IP4
))
197 return net_roa_check_ip4(tab
, (const net_addr_ip4
*) n
, asn
);
198 else if ((tab
->addr_type
== NET_ROA6
) && (n
->type
== NET_IP6
))
199 return net_roa_check_ip6(tab
, (const net_addr_ip6
*) n
, asn
);
201 return ROA_UNKNOWN
; /* Should not happen */
205 * rte_find - find a route
209 * The rte_find() function returns a route for destination @net
210 * which is from route source @src.
213 rte_find(net
*net
, struct rte_src
*src
)
215 rte
*e
= net
->routes
;
217 while (e
&& e
->attrs
->src
!= src
)
223 * rte_get_temp - get a temporary &rte
224 * @a: attributes to assign to the new route (a &rta; in case it's
225 * un-cached, rte_update() will create a cached copy automatically)
227 * Create a temporary &rte and bind it with the attributes @a.
228 * Also set route preference to the default preference set for
234 rte
*e
= sl_alloc(rte_slab
);
245 rte
*e
= sl_alloc(rte_slab
);
247 memcpy(e
, r
, sizeof(rte
));
248 e
->attrs
= rta_clone(r
->attrs
);
254 * rte_cow_rta - get a private writable copy of &rte with writable &rta
255 * @r: a route entry to be copied
256 * @lp: a linpool from which to allocate &rta
258 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
259 * modification. There are three possibilities: First, both &rte and &rta are
260 * private copies, in that case they are returned unchanged. Second, &rte is
261 * private copy, but &rta is cached, in that case &rta is duplicated using
262 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
263 * both structures are duplicated by rte_do_cow() and rta_do_cow().
265 * Note that in the second case, cached &rta loses one reference, while private
266 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
267 * nexthops, ...) with it. To work properly, original shared &rta should have
268 * another reference during the life of created private copy.
270 * Result: a pointer to the new writable &rte with writable &rta.
273 rte_cow_rta(rte
*r
, linpool
*lp
)
275 if (!rta_is_cached(r
->attrs
))
279 rta
*a
= rta_do_cow(r
->attrs
, lp
);
285 static int /* Actually better or at least as good as */
286 rte_better(rte
*new, rte
*old
)
288 int (*better
)(rte
*, rte
*);
290 if (!rte_is_valid(old
))
292 if (!rte_is_valid(new))
295 if (new->pref
> old
->pref
)
297 if (new->pref
< old
->pref
)
299 if (new->attrs
->src
->proto
->proto
!= old
->attrs
->src
->proto
->proto
)
302 * If the user has configured protocol preferences, so that two different protocols
303 * have the same preference, try to break the tie by comparing addresses. Not too
304 * useful, but keeps the ordering of routes unambiguous.
306 return new->attrs
->src
->proto
->proto
> old
->attrs
->src
->proto
->proto
;
308 if (better
= new->attrs
->src
->proto
->rte_better
)
309 return better(new, old
);
314 rte_mergable(rte
*pri
, rte
*sec
)
316 int (*mergable
)(rte
*, rte
*);
318 if (!rte_is_valid(pri
) || !rte_is_valid(sec
))
321 if (pri
->pref
!= sec
->pref
)
324 if (pri
->attrs
->src
->proto
->proto
!= sec
->attrs
->src
->proto
->proto
)
327 if (mergable
= pri
->attrs
->src
->proto
->rte_mergable
)
328 return mergable(pri
, sec
);
334 rte_trace(struct proto
*p
, rte
*e
, int dir
, char *msg
)
336 log(L_TRACE
"%s %c %s %N %s", p
->name
, dir
, msg
, e
->net
->n
.addr
, rta_dest_name(e
->attrs
->dest
));
340 rte_trace_in(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
343 rte_trace(p
, e
, '>', msg
);
347 rte_trace_out(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
350 rte_trace(p
, e
, '<', msg
);
354 export_filter_(struct channel
*c
, rte
*rt0
, rte
**rt_free
, ea_list
**tmpa
, linpool
*pool
, int silent
)
356 struct proto
*p
= c
->proto
;
357 struct filter
*filter
= c
->out_filter
;
358 struct proto_stats
*stats
= &c
->stats
;
359 ea_list
*tmpb
= NULL
;
369 *tmpa
= rte_make_tmp_attrs(rt
, pool
);
371 v
= p
->import_control
? p
->import_control(p
, &rt
, tmpa
, pool
) : 0;
377 stats
->exp_updates_rejected
++;
379 rte_trace_out(D_FILTERS
, p
, rt
, "rejected by protocol");
385 rte_trace_out(D_FILTERS
, p
, rt
, "forced accept by protocol");
389 v
= filter
&& ((filter
== FILTER_REJECT
) ||
390 (f_run(filter
, &rt
, tmpa
, pool
, FF_FORCE_TMPATTR
) > F_ACCEPT
));
396 stats
->exp_updates_filtered
++;
397 rte_trace_out(D_FILTERS
, p
, rt
, "filtered out");
407 /* Discard temporary rte */
414 export_filter(struct channel
*c
, rte
*rt0
, rte
**rt_free
, ea_list
**tmpa
, int silent
)
416 return export_filter_(c
, rt0
, rt_free
, tmpa
, rte_update_pool
, silent
);
420 do_rt_notify(struct channel
*c
, net
*net
, rte
*new, rte
*old
, ea_list
*tmpa
, int refeed
)
422 struct proto
*p
= c
->proto
;
423 struct proto_stats
*stats
= &c
->stats
;
427 * First, apply export limit.
429 * Export route limits has several problems. Because exp_routes
430 * counter is reset before refeed, we don't really know whether
431 * limit is breached and whether the update is new or not. Therefore
432 * the number of really exported routes may exceed the limit
433 * temporarily (routes exported before and new routes in refeed).
435 * Minor advantage is that if the limit is decreased and refeed is
436 * requested, the number of exported routes really decrease.
438 * Second problem is that with export limits, we don't know whether
439 * old was really exported (it might be blocked by limit). When a
440 * withdraw is exported, we announce it even when the previous
441 * update was blocked. This is not a big issue, but the same problem
442 * is in updating exp_routes counter. Therefore, to be consistent in
443 * increases and decreases of exp_routes, we count exported routes
444 * regardless of blocking by limits.
446 * Similar problem is in handling updates - when a new route is
447 * received and blocking is active, the route would be blocked, but
448 * when an update for the route will be received later, the update
449 * would be propagated (as old != NULL). Therefore, we have to block
450 * also non-new updates (contrary to import blocking).
453 struct channel_limit
*l
= &c
->out_limit
;
454 if (l
->action
&& new)
456 if ((!old
|| refeed
) && (stats
->exp_routes
>= l
->limit
))
457 channel_notify_limit(c
, l
, PLD_OUT
, stats
->exp_routes
);
459 if (l
->state
== PLS_BLOCKED
)
461 stats
->exp_routes
++; /* see note above */
462 stats
->exp_updates_rejected
++;
463 rte_trace_out(D_FILTERS
, p
, new, "rejected [limit]");
473 stats
->exp_updates_accepted
++;
475 stats
->exp_withdraws_accepted
++;
477 /* Hack: We do not decrease exp_routes during refeed, we instead
478 reset exp_routes at the start of refeed. */
484 if (p
->debug
& D_ROUTES
)
487 rte_trace_out(D_ROUTES
, p
, new, "replaced");
489 rte_trace_out(D_ROUTES
, p
, new, "added");
491 rte_trace_out(D_ROUTES
, p
, old
, "removed");
494 p
->rt_notify(p
, c
, net
, NULL
, old
, NULL
);
500 t
->next
= new->attrs
->eattrs
;
501 p
->rt_notify(p
, c
, net
, new, old
, tmpa
);
505 p
->rt_notify(p
, c
, net
, new, old
, new->attrs
->eattrs
);
509 rt_notify_basic(struct channel
*c
, net
*net
, rte
*new0
, rte
*old0
, int refeed
)
511 struct proto
*p
= c
->proto
;
515 rte
*new_free
= NULL
;
516 rte
*old_free
= NULL
;
517 ea_list
*tmpa
= NULL
;
520 c
->stats
.exp_updates_received
++;
522 c
->stats
.exp_withdraws_received
++;
525 * This is a tricky part - we don't know whether route 'old' was
526 * exported to protocol 'p' or was filtered by the export filter.
527 * We try to run the export filter to know this to have a correct
528 * value in 'old' argument of rte_update (and proper filter value)
530 * FIXME - this is broken because 'configure soft' may change
531 * filters but keep routes. Refeed is expected to be called after
532 * change of the filters and with old == new, therefore we do not
533 * even try to run the filter on an old route, This may lead to
534 * 'spurious withdraws' but ensure that there are no 'missing
537 * This is not completely safe as there is a window between
538 * reconfiguration and the end of refeed - if a newly filtered
539 * route disappears during this period, proper withdraw is not
540 * sent (because old would be also filtered) and the route is
541 * not refeeded (because it disappeared before that).
545 new = export_filter(c
, new, &new_free
, &tmpa
, 0);
548 old
= export_filter(c
, old
, &old_free
, NULL
, 1);
553 * As mentioned above, 'old' value may be incorrect in some race conditions.
554 * We generally ignore it with the exception of withdraw to pipe protocol.
555 * In that case we rather propagate unfiltered withdraws regardless of
556 * export filters to ensure that when a protocol is flushed, its routes are
557 * removed from all tables. Possible spurious unfiltered withdraws are not
558 * problem here as they are ignored if there is no corresponding route at
559 * the other end of the pipe. We directly call rt_notify() hook instead of
560 * do_rt_notify() to avoid logging and stat counters.
564 if ((p
->proto
== &proto_pipe
) && !new0
&& (p
!= old0
->sender
->proto
))
565 p
->rt_notify(p
, c
, net
, NULL
, old0
, NULL
);
571 do_rt_notify(c
, net
, new, old
, tmpa
, refeed
);
573 /* Discard temporary rte's */
581 rt_notify_accepted(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
, rte
*before_old
, int feed
)
583 // struct proto *p = c->proto;
586 rte
*new_best
= NULL
;
587 rte
*old_best
= NULL
;
588 rte
*new_free
= NULL
;
589 rte
*old_free
= NULL
;
590 ea_list
*tmpa
= NULL
;
592 /* Used to track whether we met old_changed position. If before_old is NULL
593 old_changed was the first and we met it implicitly before current best route. */
594 int old_meet
= old_changed
&& !before_old
;
596 /* Note that before_old is either NULL or valid (not rejected) route.
597 If old_changed is valid, before_old have to be too. If old changed route
598 was not valid, caller must use NULL for both old_changed and before_old. */
601 c
->stats
.exp_updates_received
++;
603 c
->stats
.exp_withdraws_received
++;
605 /* First, find the new_best route - first accepted by filters */
606 for (r
=net
->routes
; rte_is_valid(r
); r
=r
->next
)
608 if (new_best
= export_filter(c
, r
, &new_free
, &tmpa
, 0))
611 /* Note if we walked around the position of old_changed route */
617 * Second, handle the feed case. That means we do not care for
618 * old_best. It is NULL for feed, and the new_best for refeed.
619 * For refeed, there is a hack similar to one in rt_notify_basic()
620 * to ensure withdraws in case of changed filters
624 if (feed
== 2) /* refeed */
625 old_best
= new_best
? new_best
:
626 (rte_is_valid(net
->routes
) ? net
->routes
: NULL
);
630 if (!new_best
&& !old_best
)
637 * Now, we find the old_best route. Generally, it is the same as the
638 * new_best, unless new_best is the same as new_changed or
639 * old_changed is accepted before new_best.
641 * There are four cases:
643 * - We would find and accept old_changed before new_best, therefore
644 * old_changed is old_best. In remaining cases we suppose this
647 * - We found no new_best, therefore there is also no old_best and
648 * we ignore this withdraw.
650 * - We found new_best different than new_changed, therefore
651 * old_best is the same as new_best and we ignore this update.
653 * - We found new_best the same as new_changed, therefore it cannot
654 * be old_best and we have to continue search for old_best.
659 if (old_best
= export_filter(c
, old_changed
, &old_free
, NULL
, 1))
666 /* Third case, we use r instead of new_best, because export_filter() could change it */
667 if (r
!= new_changed
)
675 for (r
=r
->next
; rte_is_valid(r
); r
=r
->next
)
677 if (old_best
= export_filter(c
, r
, &old_free
, NULL
, 1))
681 if (old_best
= export_filter(c
, old_changed
, &old_free
, NULL
, 1))
685 /* Implicitly, old_best is NULL and new_best is non-NULL */
688 do_rt_notify(c
, net
, new_best
, old_best
, tmpa
, (feed
== 2));
690 /* Discard temporary rte's */
698 static struct nexthop
*
699 nexthop_merge_rta(struct nexthop
*nhs
, rta
*a
, linpool
*pool
, int max
)
701 return nexthop_merge(nhs
, &(a
->nh
), 1, 0, max
, pool
);
705 rt_export_merged(struct channel
*c
, net
*net
, rte
**rt_free
, ea_list
**tmpa
, linpool
*pool
, int silent
)
707 // struct proto *p = c->proto;
708 struct nexthop
*nhs
= NULL
;
709 rte
*best0
, *best
, *rt0
, *rt
, *tmp
;
714 if (!rte_is_valid(best0
))
717 best
= export_filter_(c
, best0
, rt_free
, tmpa
, pool
, silent
);
719 if (!best
|| !rte_is_reachable(best
))
722 for (rt0
= best0
->next
; rt0
; rt0
= rt0
->next
)
724 if (!rte_mergable(best0
, rt0
))
727 rt
= export_filter_(c
, rt0
, &tmp
, NULL
, pool
, 1);
732 if (rte_is_reachable(rt
))
733 nhs
= nexthop_merge_rta(nhs
, rt
->attrs
, pool
, c
->merge_limit
);
741 nhs
= nexthop_merge_rta(nhs
, best
->attrs
, pool
, c
->merge_limit
);
745 best
= rte_cow_rta(best
, pool
);
746 nexthop_link(best
->attrs
, nhs
);
758 rt_notify_merged(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
,
759 rte
*new_best
, rte
*old_best
, int refeed
)
761 // struct proto *p = c->proto;
763 rte
*new_best_free
= NULL
;
764 rte
*old_best_free
= NULL
;
765 rte
*new_changed_free
= NULL
;
766 rte
*old_changed_free
= NULL
;
767 ea_list
*tmpa
= NULL
;
769 /* We assume that all rte arguments are either NULL or rte_is_valid() */
771 /* This check should be done by the caller */
772 if (!new_best
&& !old_best
)
775 /* Check whether the change is relevant to the merged route */
776 if ((new_best
== old_best
) && !refeed
)
778 new_changed
= rte_mergable(new_best
, new_changed
) ?
779 export_filter(c
, new_changed
, &new_changed_free
, NULL
, 1) : NULL
;
781 old_changed
= rte_mergable(old_best
, old_changed
) ?
782 export_filter(c
, old_changed
, &old_changed_free
, NULL
, 1) : NULL
;
784 if (!new_changed
&& !old_changed
)
789 c
->stats
.exp_updates_received
++;
791 c
->stats
.exp_withdraws_received
++;
793 /* Prepare new merged route */
795 new_best
= rt_export_merged(c
, net
, &new_best_free
, &tmpa
, rte_update_pool
, 0);
797 /* Prepare old merged route (without proper merged next hops) */
798 /* There are some issues with running filter on old route - see rt_notify_basic() */
799 if (old_best
&& !refeed
)
800 old_best
= export_filter(c
, old_best
, &old_best_free
, NULL
, 1);
802 if (new_best
|| old_best
)
803 do_rt_notify(c
, net
, new_best
, old_best
, tmpa
, refeed
);
805 /* Discard temporary rte's */
807 rte_free(new_best_free
);
809 rte_free(old_best_free
);
810 if (new_changed_free
)
811 rte_free(new_changed_free
);
812 if (old_changed_free
)
813 rte_free(old_changed_free
);
818 * rte_announce - announce a routing table change
819 * @tab: table the route has been added to
820 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
821 * @net: network in question
822 * @new: the new route to be announced
823 * @old: the previous route for the same network
824 * @new_best: the new best route for the same network
825 * @old_best: the previous best route for the same network
826 * @before_old: The previous route before @old for the same network.
827 * If @before_old is NULL @old was the first.
829 * This function gets a routing table update and announces it
830 * to all protocols that acccepts given type of route announcement
831 * and are connected to the same table by their announcement hooks.
833 * Route announcement of type %RA_OPTIMAL si generated when optimal
834 * route (in routing table @tab) changes. In that case @old stores the
837 * Route announcement of type %RA_ANY si generated when any route (in
838 * routing table @tab) changes In that case @old stores the old route
839 * from the same protocol.
841 * For each appropriate protocol, we first call its import_control()
842 * hook which performs basic checks on the route (each protocol has a
843 * right to veto or force accept of the route before any filter is
844 * asked) and adds default values of attributes specific to the new
845 * protocol (metrics, tags etc.). Then it consults the protocol's
846 * export filter and if it accepts the route, the rt_notify() hook of
847 * the protocol gets called.
850 rte_announce(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
851 rte
*new_best
, rte
*old_best
, rte
*before_old
)
853 if (!rte_is_valid(new))
856 if (!rte_is_valid(old
))
857 old
= before_old
= NULL
;
859 if (!rte_is_valid(new_best
))
862 if (!rte_is_valid(old_best
))
868 if ((type
== RA_OPTIMAL
) && tab
->hostcache
)
869 rt_notify_hostcache(tab
, net
);
871 struct channel
*c
; node
*n
;
872 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
874 if (c
->export_state
== ES_DOWN
)
877 if (c
->ra_mode
== type
)
878 if (type
== RA_ACCEPTED
)
879 rt_notify_accepted(c
, net
, new, old
, before_old
, 0);
880 else if (type
== RA_MERGED
)
881 rt_notify_merged(c
, net
, new, old
, new_best
, old_best
, 0);
883 rt_notify_basic(c
, net
, new, old
, 0);
893 if (!net_validate(n
->n
.addr
))
895 log(L_WARN
"Ignoring bogus prefix %N received via %s",
896 n
->n
.addr
, e
->sender
->proto
->name
);
900 c
= net_classify(n
->n
.addr
);
901 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
903 log(L_WARN
"Ignoring bogus route %N received via %s",
904 n
->n
.addr
, e
->sender
->proto
->name
);
908 if (net_type_match(n
->n
.addr
, NB_DEST
) == !e
->attrs
->dest
)
910 log(L_WARN
"Ignoring route %N with invalid dest %d received via %s",
911 n
->n
.addr
, e
->attrs
->dest
, e
->sender
->proto
->name
);
915 if ((e
->attrs
->dest
== RTD_UNICAST
) && !nexthop_is_sorted(&(e
->attrs
->nh
)))
917 log(L_WARN
"Ignoring unsorted multipath route %N received via %s",
918 n
->n
.addr
, e
->sender
->proto
->name
);
926 * rte_free - delete a &rte
927 * @e: &rte to be deleted
929 * rte_free() deletes the given &rte from the routing table it's linked to.
934 if (rta_is_cached(e
->attrs
))
936 sl_free(rte_slab
, e
);
940 rte_free_quick(rte
*e
)
943 sl_free(rte_slab
, e
);
947 rte_same(rte
*x
, rte
*y
)
950 x
->attrs
== y
->attrs
&&
951 x
->flags
== y
->flags
&&
952 x
->pflags
== y
->pflags
&&
953 x
->pref
== y
->pref
&&
954 (!x
->attrs
->src
->proto
->rte_same
|| x
->attrs
->src
->proto
->rte_same(x
, y
));
957 static inline int rte_is_ok(rte
*e
) { return e
&& !rte_is_filtered(e
); }
960 rte_recalculate(struct channel
*c
, net
*net
, rte
*new, struct rte_src
*src
)
962 struct proto
*p
= c
->proto
;
963 struct rtable
*table
= c
->table
;
964 struct proto_stats
*stats
= &c
->stats
;
965 static struct tbf rl_pipe
= TBF_DEFAULT_LOG_LIMITS
;
966 rte
*before_old
= NULL
;
967 rte
*old_best
= net
->routes
;
971 k
= &net
->routes
; /* Find and remove original route from the same protocol */
974 if (old
->attrs
->src
== src
)
976 /* If there is the same route in the routing table but from
977 * a different sender, then there are two paths from the
978 * source protocol to this routing table through transparent
979 * pipes, which is not allowed.
981 * We log that and ignore the route. If it is withdraw, we
982 * ignore it completely (there might be 'spurious withdraws',
983 * see FIXME in do_rte_announce())
985 if (old
->sender
->proto
!= p
)
989 log_rl(&rl_pipe
, L_ERR
"Pipe collision detected when sending %N to table %s",
990 net
->n
.addr
, table
->name
);
996 if (new && rte_same(old
, new))
998 /* No changes, ignore the new route */
1000 if (!rte_is_filtered(new))
1002 stats
->imp_updates_ignored
++;
1003 rte_trace_in(D_ROUTES
, p
, new, "ignored");
1006 rte_free_quick(new);
1021 stats
->imp_withdraws_ignored
++;
1025 int new_ok
= rte_is_ok(new);
1026 int old_ok
= rte_is_ok(old
);
1028 struct channel_limit
*l
= &c
->rx_limit
;
1029 if (l
->action
&& !old
&& new)
1031 u32 all_routes
= stats
->imp_routes
+ stats
->filt_routes
;
1033 if (all_routes
>= l
->limit
)
1034 channel_notify_limit(c
, l
, PLD_RX
, all_routes
);
1036 if (l
->state
== PLS_BLOCKED
)
1038 /* In receive limit the situation is simple, old is NULL so
1039 we just free new and exit like nothing happened */
1041 stats
->imp_updates_ignored
++;
1042 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
1043 rte_free_quick(new);
1049 if (l
->action
&& !old_ok
&& new_ok
)
1051 if (stats
->imp_routes
>= l
->limit
)
1052 channel_notify_limit(c
, l
, PLD_IN
, stats
->imp_routes
);
1054 if (l
->state
== PLS_BLOCKED
)
1056 /* In import limit the situation is more complicated. We
1057 shouldn't just drop the route, we should handle it like
1058 it was filtered. We also have to continue the route
1059 processing if old or new is non-NULL, but we should exit
1060 if both are NULL as this case is probably assumed to be
1063 stats
->imp_updates_ignored
++;
1064 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
1066 if (c
->in_keep_filtered
)
1067 new->flags
|= REF_FILTERED
;
1069 { rte_free_quick(new); new = NULL
; }
1071 /* Note that old && !new could be possible when
1072 c->in_keep_filtered changed in the recent past. */
1083 stats
->imp_updates_accepted
++;
1085 stats
->imp_withdraws_accepted
++;
1087 stats
->imp_withdraws_ignored
++;
1092 rte_is_filtered(new) ? stats
->filt_routes
++ : stats
->imp_routes
++;
1094 rte_is_filtered(old
) ? stats
->filt_routes
-- : stats
->imp_routes
--;
1096 if (table
->config
->sorted
)
1098 /* If routes are sorted, just insert new route to appropriate position */
1101 if (before_old
&& !rte_better(new, before_old
))
1102 k
= &before_old
->next
;
1106 for (; *k
; k
=&(*k
)->next
)
1107 if (rte_better(new, *k
))
1116 /* If routes are not sorted, find the best route and move it on
1117 the first position. There are several optimized cases. */
1119 if (src
->proto
->rte_recalculate
&& src
->proto
->rte_recalculate(table
, net
, new, old
, old_best
))
1120 goto do_recalculate
;
1122 if (new && rte_better(new, old_best
))
1124 /* The first case - the new route is cleary optimal,
1125 we link it at the first position */
1127 new->next
= net
->routes
;
1130 else if (old
== old_best
)
1132 /* The second case - the old best route disappeared, we add the
1133 new route (if we have any) to the list (we don't care about
1134 position) and then we elect the new optimal route and relink
1135 that route at the first position and announce it. New optimal
1136 route might be NULL if there is no more routes */
1139 /* Add the new route to the list */
1142 new->next
= net
->routes
;
1146 /* Find a new optimal route (if there is any) */
1149 rte
**bp
= &net
->routes
;
1150 for (k
=&(*bp
)->next
; *k
; k
=&(*k
)->next
)
1151 if (rte_better(*k
, *bp
))
1157 best
->next
= net
->routes
;
1163 /* The third case - the new route is not better than the old
1164 best route (therefore old_best != NULL) and the old best
1165 route was not removed (therefore old_best == net->routes).
1166 We just link the new route after the old best route. */
1168 ASSERT(net
->routes
!= NULL
);
1169 new->next
= net
->routes
->next
;
1170 net
->routes
->next
= new;
1172 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1178 /* Log the route change */
1179 if (p
->debug
& D_ROUTES
)
1182 rte_trace(p
, new, '>', new == net
->routes
? "added [best]" : "added");
1185 if (old
!= old_best
)
1186 rte_trace(p
, old
, '>', "removed");
1187 else if (rte_is_ok(net
->routes
))
1188 rte_trace(p
, old
, '>', "removed [replaced]");
1190 rte_trace(p
, old
, '>', "removed [sole]");
1194 /* Propagate the route change */
1195 rte_announce(table
, RA_ANY
, net
, new, old
, NULL
, NULL
, NULL
);
1196 if (net
->routes
!= old_best
)
1197 rte_announce(table
, RA_OPTIMAL
, net
, net
->routes
, old_best
, NULL
, NULL
, NULL
);
1198 if (table
->config
->sorted
)
1199 rte_announce(table
, RA_ACCEPTED
, net
, new, old
, NULL
, NULL
, before_old
);
1200 rte_announce(table
, RA_MERGED
, net
, new, old
, net
->routes
, old_best
, NULL
);
1203 (table
->gc_counter
++ >= table
->config
->gc_max_ops
) &&
1204 (table
->gc_time
+ table
->config
->gc_min_time
<= now
))
1205 rt_schedule_prune(table
);
1207 if (old_ok
&& p
->rte_remove
)
1208 p
->rte_remove(net
, old
);
1209 if (new_ok
&& p
->rte_insert
)
1210 p
->rte_insert(net
, new);
1213 rte_free_quick(old
);
1216 static int rte_update_nest_cnt
; /* Nesting counter to allow recursive updates */
1219 rte_update_lock(void)
1221 rte_update_nest_cnt
++;
1225 rte_update_unlock(void)
1227 if (!--rte_update_nest_cnt
)
1228 lp_flush(rte_update_pool
);
1232 rte_hide_dummy_routes(net
*net
, rte
**dummy
)
1234 if (net
->routes
&& net
->routes
->attrs
->source
== RTS_DUMMY
)
1236 *dummy
= net
->routes
;
1237 net
->routes
= (*dummy
)->next
;
1242 rte_unhide_dummy_routes(net
*net
, rte
**dummy
)
1246 (*dummy
)->next
= net
->routes
;
1247 net
->routes
= *dummy
;
1252 * rte_update - enter a new update to a routing table
1253 * @table: table to be updated
1254 * @c: channel doing the update
1255 * @net: network node
1256 * @p: protocol submitting the update
1257 * @src: protocol originating the update
1258 * @new: a &rte representing the new route or %NULL for route removal.
1260 * This function is called by the routing protocols whenever they discover
1261 * a new route or wish to update/remove an existing route. The right announcement
1262 * sequence is to build route attributes first (either un-cached with @aflags set
1263 * to zero or a cached one using rta_lookup(); in this case please note that
1264 * you need to increase the use count of the attributes yourself by calling
1265 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1266 * the appropriate data and finally submit the new &rte by calling rte_update().
1268 * @src specifies the protocol that originally created the route and the meaning
1269 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1270 * same value as @new->attrs->proto. @p specifies the protocol that called
1271 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1272 * stores @p in @new->sender;
1274 * When rte_update() gets any route, it automatically validates it (checks,
1275 * whether the network and next hop address are valid IP addresses and also
1276 * whether a normal routing protocol doesn't try to smuggle a host or link
1277 * scope route to the table), converts all protocol dependent attributes stored
1278 * in the &rte to temporary extended attributes, consults import filters of the
1279 * protocol to see if the route should be accepted and/or its attributes modified,
1280 * stores the temporary attributes back to the &rte.
1282 * Now, having a "public" version of the route, we
1283 * automatically find any old route defined by the protocol @src
1284 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1285 * recalculate the optimal route for this destination and finally broadcast
1286 * the change (if any) to all routing protocols by calling rte_announce().
1288 * All memory used for attribute lists and other temporary allocations is taken
1289 * from a special linear pool @rte_update_pool and freed when rte_update()
1294 rte_update2(struct channel
*c
, const net_addr
*n
, rte
*new, struct rte_src
*src
)
1296 struct proto
*p
= c
->proto
;
1297 struct proto_stats
*stats
= &c
->stats
;
1298 struct filter
*filter
= c
->in_filter
;
1299 ea_list
*tmpa
= NULL
;
1303 ASSERT(c
->channel_state
== CS_UP
);
1308 nn
= net_get(c
->table
, n
);
1314 new->pref
= c
->preference
;
1316 stats
->imp_updates_received
++;
1317 if (!rte_validate(new))
1319 rte_trace_in(D_FILTERS
, p
, new, "invalid");
1320 stats
->imp_updates_invalid
++;
1324 if (filter
== FILTER_REJECT
)
1326 stats
->imp_updates_filtered
++;
1327 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1329 if (! c
->in_keep_filtered
)
1332 /* new is a private copy, i could modify it */
1333 new->flags
|= REF_FILTERED
;
1337 tmpa
= rte_make_tmp_attrs(new, rte_update_pool
);
1338 if (filter
&& (filter
!= FILTER_REJECT
))
1340 ea_list
*old_tmpa
= tmpa
;
1341 int fr
= f_run(filter
, &new, &tmpa
, rte_update_pool
, 0);
1344 stats
->imp_updates_filtered
++;
1345 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1347 if (! c
->in_keep_filtered
)
1350 new->flags
|= REF_FILTERED
;
1352 if (tmpa
!= old_tmpa
&& src
->proto
->store_tmp_attrs
)
1353 src
->proto
->store_tmp_attrs(new, tmpa
);
1356 if (!rta_is_cached(new->attrs
)) /* Need to copy attributes */
1357 new->attrs
= rta_lookup(new->attrs
);
1358 new->flags
|= REF_COW
;
1362 stats
->imp_withdraws_received
++;
1364 if (!(nn
= net_find(c
->table
, n
)) || !src
)
1366 stats
->imp_withdraws_ignored
++;
1367 rte_update_unlock();
1373 rte_hide_dummy_routes(nn
, &dummy
);
1374 rte_recalculate(c
, nn
, new, src
);
1375 rte_unhide_dummy_routes(nn
, &dummy
);
1376 rte_update_unlock();
1385 /* Independent call to rte_announce(), used from next hop
1386 recalculation, outside of rte_update(). new must be non-NULL */
1388 rte_announce_i(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
1389 rte
*new_best
, rte
*old_best
)
1392 rte_announce(tab
, type
, net
, new, old
, new_best
, old_best
, NULL
);
1393 rte_update_unlock();
1397 rte_discard(rte
*old
) /* Non-filtered route deletion, used during garbage collection */
1400 rte_recalculate(old
->sender
, old
->net
, NULL
, old
->attrs
->src
);
1401 rte_update_unlock();
1404 /* Check rtable for best route to given net whether it would be exported do p */
1406 rt_examine(rtable
*t
, net_addr
*a
, struct proto
*p
, struct filter
*filter
)
1408 net
*n
= net_find(t
, a
);
1409 rte
*rt
= n
? n
->routes
: NULL
;
1411 if (!rte_is_valid(rt
))
1416 /* Rest is stripped down export_filter() */
1417 ea_list
*tmpa
= rte_make_tmp_attrs(rt
, rte_update_pool
);
1418 int v
= p
->import_control
? p
->import_control(p
, &rt
, &tmpa
, rte_update_pool
) : 0;
1419 if (v
== RIC_PROCESS
)
1420 v
= (f_run(filter
, &rt
, &tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) <= F_ACCEPT
);
1422 /* Discard temporary rte */
1423 if (rt
!= n
->routes
)
1426 rte_update_unlock();
1433 * rt_refresh_begin - start a refresh cycle
1434 * @t: related routing table
1435 * @c related channel
1437 * This function starts a refresh cycle for given routing table and announce
1438 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1439 * routes to the routing table (by rte_update()). After that, all protocol
1440 * routes (more precisely routes with @c as @sender) not sent during the
1441 * refresh cycle but still in the table from the past are pruned. This is
1442 * implemented by marking all related routes as stale by REF_STALE flag in
1443 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1444 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1447 rt_refresh_begin(rtable
*t
, struct channel
*c
)
1449 FIB_WALK(&t
->fib
, net
, n
)
1452 for (e
= n
->routes
; e
; e
= e
->next
)
1454 e
->flags
|= REF_STALE
;
1460 * rt_refresh_end - end a refresh cycle
1461 * @t: related routing table
1462 * @c: related channel
1464 * This function ends a refresh cycle for given routing table and announce
1465 * hook. See rt_refresh_begin() for description of refresh cycles.
1468 rt_refresh_end(rtable
*t
, struct channel
*c
)
1472 FIB_WALK(&t
->fib
, net
, n
)
1475 for (e
= n
->routes
; e
; e
= e
->next
)
1476 if ((e
->sender
== c
) && (e
->flags
& REF_STALE
))
1478 e
->flags
|= REF_DISCARD
;
1485 rt_schedule_prune(t
);
1490 * rte_dump - dump a route
1491 * @e: &rte to be dumped
1493 * This functions dumps contents of a &rte to debug output.
1499 debug("%-1N ", n
->n
.addr
);
1500 debug("KF=%02x PF=%02x pref=%d lm=%d ", n
->n
.flags
, e
->pflags
, e
->pref
, now
-e
->lastmod
);
1502 if (e
->attrs
->src
->proto
->proto
->dump_attrs
)
1503 e
->attrs
->src
->proto
->proto
->dump_attrs(e
);
1508 * rt_dump - dump a routing table
1509 * @t: routing table to be dumped
1511 * This function dumps contents of a given routing table to debug output.
1516 debug("Dump of routing table <%s>\n", t
->name
);
1520 FIB_WALK(&t
->fib
, net
, n
)
1523 for(e
=n
->routes
; e
; e
=e
->next
)
1531 * rt_dump_all - dump all routing tables
1533 * This function dumps contents of all routing tables to debug output.
1540 WALK_LIST(t
, routing_tables
)
1545 rt_schedule_hcu(rtable
*tab
)
1547 if (tab
->hcu_scheduled
)
1550 tab
->hcu_scheduled
= 1;
1551 ev_schedule(tab
->rt_event
);
1555 rt_schedule_nhu(rtable
*tab
)
1557 if (tab
->nhu_state
== NHU_CLEAN
)
1558 ev_schedule(tab
->rt_event
);
1561 * NHU_CLEAN -> NHU_SCHEDULED
1562 * NHU_RUNNING -> NHU_DIRTY
1564 tab
->nhu_state
|= NHU_SCHEDULED
;
1568 rt_schedule_prune(rtable
*tab
)
1570 if (tab
->prune_state
== 0)
1571 ev_schedule(tab
->rt_event
);
1573 /* state change 0->1, 2->3 */
1574 tab
->prune_state
|= 1;
1585 if (tab
->hcu_scheduled
)
1586 rt_update_hostcache(tab
);
1589 rt_next_hop_update(tab
);
1591 if (tab
->prune_state
)
1592 rt_prune_table(tab
);
1594 rt_unlock_table(tab
);
1598 rt_setup(pool
*p
, rtable
*t
, char *name
, struct rtable_config
*cf
)
1600 bzero(t
, sizeof(*t
));
1603 t
->addr_type
= cf
? cf
->addr_type
: NET_IP4
;
1604 fib_init(&t
->fib
, p
, t
->addr_type
, sizeof(net
), OFFSETOF(net
, n
), 0, NULL
);
1605 init_list(&t
->channels
);
1609 t
->rt_event
= ev_new(p
);
1610 t
->rt_event
->hook
= rt_event
;
1611 t
->rt_event
->data
= t
;
1617 * rt_init - initialize routing tables
1619 * This function is called during BIRD startup. It initializes the
1620 * routing table module.
1626 rt_table_pool
= rp_new(&root_pool
, "Routing tables");
1627 rte_update_pool
= lp_new_default(rt_table_pool
);
1628 rte_slab
= sl_new(rt_table_pool
, sizeof(rte
));
1629 init_list(&routing_tables
);
1634 * rt_prune_table - prune a routing table
1636 * The prune loop scans routing tables and removes routes belonging to flushing
1637 * protocols, discarded routes and also stale network entries. It is called from
1638 * rt_event(). The event is rescheduled if the current iteration do not finish
1639 * the table. The pruning is directed by the prune state (@prune_state),
1640 * specifying whether the prune cycle is scheduled or running, and there
1641 * is also a persistent pruning iterator (@prune_fit).
1643 * The prune loop is used also for channel flushing. For this purpose, the
1644 * channels to flush are marked before the iteration and notified after the
1648 rt_prune_table(rtable
*tab
)
1650 struct fib_iterator
*fit
= &tab
->prune_fit
;
1656 DBG("Pruning route table %s\n", tab
->name
);
1658 fib_check(&tab
->fib
);
1661 if (tab
->prune_state
== 0)
1664 if (tab
->prune_state
== 1)
1666 /* Mark channels to flush */
1667 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
1668 if (c
->channel_state
== CS_FLUSHING
)
1669 c
->flush_active
= 1;
1671 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1672 tab
->prune_state
= 2;
1676 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
1681 for (e
=n
->routes
; e
; e
=e
->next
)
1682 if (e
->sender
->flush_active
|| (e
->flags
& REF_DISCARD
))
1686 FIB_ITERATE_PUT(fit
);
1687 ev_schedule(tab
->rt_event
);
1697 if (!n
->routes
) /* Orphaned FIB entry */
1699 FIB_ITERATE_PUT(fit
);
1700 fib_delete(&tab
->fib
, n
);
1707 fib_check(&tab
->fib
);
1710 tab
->gc_counter
= 0;
1713 /* state change 2->0, 3->1 */
1714 tab
->prune_state
&= 1;
1716 if (tab
->prune_state
> 0)
1717 ev_schedule(tab
->rt_event
);
1719 /* FIXME: This should be handled in a better way */
1722 /* Close flushed channels */
1723 WALK_LIST2_DELSAFE(c
, n
, x
, tab
->channels
, table_node
)
1724 if (c
->flush_active
)
1726 c
->flush_active
= 0;
1727 channel_set_state(c
, CS_DOWN
);
1734 rt_preconfig(struct config
*c
)
1736 init_list(&c
->tables
);
1738 rt_new_table(cf_get_symbol("master4"), NET_IP4
);
1739 rt_new_table(cf_get_symbol("master6"), NET_IP6
);
1744 * Some functions for handing internal next hop updates
1745 * triggered by rt_schedule_nhu().
1749 rta_next_hop_outdated(rta
*a
)
1751 struct hostentry
*he
= a
->hostentry
;
1757 return a
->dest
!= RTD_UNREACHABLE
;
1759 return (a
->dest
!= he
->dest
) || (a
->igp_metric
!= he
->igp_metric
) ||
1760 (!he
->nexthop_linkable
) || !nexthop_same(&(a
->nh
), &(he
->src
->nh
));
1764 rta_apply_hostentry(rta
*a
, struct hostentry
*he
, mpls_label_stack
*mls
)
1768 a
->igp_metric
= he
->igp_metric
;
1770 if (a
->dest
!= RTD_UNICAST
)
1774 a
->nh
= (struct nexthop
) {};
1776 { /* Store the label stack for later changes */
1777 a
->nh
.labels_orig
= a
->nh
.labels
= mls
->len
;
1778 memcpy(a
->nh
.label
, mls
->stack
, mls
->len
* sizeof(u32
));
1783 if (((!mls
) || (!mls
->len
)) && he
->nexthop_linkable
)
1784 { /* Just link the nexthop chain, no label append happens. */
1785 memcpy(&(a
->nh
), &(he
->src
->nh
), nexthop_size(&(he
->src
->nh
)));
1789 struct nexthop
*nhp
= NULL
, *nhr
= NULL
;
1790 int skip_nexthop
= 0;
1792 for (struct nexthop
*nh
= &(he
->src
->nh
); nh
; nh
= nh
->next
)
1799 nhp
= (nhp
? (nhp
->next
= lp_allocz(rte_update_pool
, NEXTHOP_MAX_SIZE
)) : &(a
->nh
));
1802 nhp
->iface
= nh
->iface
;
1803 nhp
->weight
= nh
->weight
;
1806 nhp
->labels
= nh
->labels
+ mls
->len
;
1807 nhp
->labels_orig
= mls
->len
;
1808 if (nhp
->labels
<= MPLS_MAX_LABEL_STACK
)
1810 memcpy(nhp
->label
, nh
->label
, nh
->labels
* sizeof(u32
)); /* First the hostentry labels */
1811 memcpy(&(nhp
->label
[nh
->labels
]), mls
->stack
, mls
->len
* sizeof(u32
)); /* Then the bottom labels */
1815 log(L_WARN
"Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1816 nh
->labels
, mls
->len
, nhp
->labels
, MPLS_MAX_LABEL_STACK
);
1821 if (ipa_nonzero(nh
->gw
))
1822 nhp
->gw
= nh
->gw
; /* Router nexthop */
1823 else if (ipa_nonzero(he
->link
))
1824 nhp
->gw
= he
->link
; /* Device nexthop with link-local address known */
1826 nhp
->gw
= he
->addr
; /* Device nexthop with link-local address unknown */
1834 a
->dest
= RTD_UNREACHABLE
;
1835 log(L_WARN
"No valid nexthop remaining, setting route unreachable");
1841 rt_next_hop_update_rte(rtable
*tab UNUSED
, rte
*old
)
1843 rta
*a
= alloca(RTA_MAX_SIZE
);
1844 memcpy(a
, old
->attrs
, rta_size(old
->attrs
));
1846 mpls_label_stack mls
= { .len
= a
->nh
.labels_orig
};
1847 memcpy(mls
.stack
, &a
->nh
.label
[a
->nh
.labels
- mls
.len
], mls
.len
* sizeof(u32
));
1849 rta_apply_hostentry(a
, old
->attrs
->hostentry
, &mls
);
1852 rte
*e
= sl_alloc(rte_slab
);
1853 memcpy(e
, old
, sizeof(rte
));
1854 e
->attrs
= rta_lookup(a
);
1860 rt_next_hop_update_net(rtable
*tab
, net
*n
)
1862 rte
**k
, *e
, *new, *old_best
, **new_best
;
1864 int free_old_best
= 0;
1866 old_best
= n
->routes
;
1870 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1871 if (rta_next_hop_outdated(e
->attrs
))
1873 new = rt_next_hop_update_rte(tab
, e
);
1876 rte_announce_i(tab
, RA_ANY
, n
, new, e
, NULL
, NULL
);
1877 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated");
1879 /* Call a pre-comparison hook */
1880 /* Not really an efficient way to compute this */
1881 if (e
->attrs
->src
->proto
->rte_recalculate
)
1882 e
->attrs
->src
->proto
->rte_recalculate(tab
, n
, new, e
, NULL
);
1886 else /* Freeing of the old best rte is postponed */
1896 /* Find the new best route */
1898 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1900 if (!new_best
|| rte_better(e
, *new_best
))
1904 /* Relink the new best route to the first position */
1906 if (new != n
->routes
)
1908 *new_best
= new->next
;
1909 new->next
= n
->routes
;
1913 /* Announce the new best route */
1914 if (new != old_best
)
1916 rte_announce_i(tab
, RA_OPTIMAL
, n
, new, old_best
, NULL
, NULL
);
1917 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated [best]");
1920 /* FIXME: Better announcement of merged routes */
1921 rte_announce_i(tab
, RA_MERGED
, n
, new, old_best
, new, old_best
);
1924 rte_free_quick(old_best
);
1930 rt_next_hop_update(rtable
*tab
)
1932 struct fib_iterator
*fit
= &tab
->nhu_fit
;
1935 if (tab
->nhu_state
== NHU_CLEAN
)
1938 if (tab
->nhu_state
== NHU_SCHEDULED
)
1940 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1941 tab
->nhu_state
= NHU_RUNNING
;
1944 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
1948 FIB_ITERATE_PUT(fit
);
1949 ev_schedule(tab
->rt_event
);
1952 max_feed
-= rt_next_hop_update_net(tab
, n
);
1957 * NHU_DIRTY -> NHU_SCHEDULED
1958 * NHU_RUNNING -> NHU_CLEAN
1960 tab
->nhu_state
&= 1;
1962 if (tab
->nhu_state
!= NHU_CLEAN
)
1963 ev_schedule(tab
->rt_event
);
1967 struct rtable_config
*
1968 rt_new_table(struct symbol
*s
, uint addr_type
)
1970 /* Hack that allows to 'redefine' the master table */
1971 if ((s
->class == SYM_TABLE
) &&
1972 (s
->def
== new_config
->def_tables
[addr_type
]) &&
1973 ((addr_type
== NET_IP4
) || (addr_type
== NET_IP6
)))
1976 struct rtable_config
*c
= cfg_allocz(sizeof(struct rtable_config
));
1978 cf_define_symbol(s
, SYM_TABLE
, c
);
1980 c
->addr_type
= addr_type
;
1981 c
->gc_max_ops
= 1000;
1984 add_tail(&new_config
->tables
, &c
->n
);
1986 /* First table of each type is kept as default */
1987 if (! new_config
->def_tables
[addr_type
])
1988 new_config
->def_tables
[addr_type
] = c
;
1994 * rt_lock_table - lock a routing table
1995 * @r: routing table to be locked
1997 * Lock a routing table, because it's in use by a protocol,
1998 * preventing it from being freed when it gets undefined in a new
2002 rt_lock_table(rtable
*r
)
2008 * rt_unlock_table - unlock a routing table
2009 * @r: routing table to be unlocked
2011 * Unlock a routing table formerly locked by rt_lock_table(),
2012 * that is decrease its use count and delete it if it's scheduled
2013 * for deletion by configuration changes.
2016 rt_unlock_table(rtable
*r
)
2018 if (!--r
->use_count
&& r
->deleted
)
2020 struct config
*conf
= r
->deleted
;
2021 DBG("Deleting routing table %s\n", r
->name
);
2022 r
->config
->table
= NULL
;
2024 rt_free_hostcache(r
);
2029 config_del_obstacle(conf
);
2034 * rt_commit - commit new routing table configuration
2035 * @new: new configuration
2036 * @old: original configuration or %NULL if it's boot time config
2038 * Scan differences between @old and @new configuration and modify
2039 * the routing tables according to these changes. If @new defines a
2040 * previously unknown table, create it, if it omits a table existing
2041 * in @old, schedule it for deletion (it gets deleted when all protocols
2042 * disconnect from it by calling rt_unlock_table()), if it exists
2043 * in both configurations, leave it unchanged.
2046 rt_commit(struct config
*new, struct config
*old
)
2048 struct rtable_config
*o
, *r
;
2050 DBG("rt_commit:\n");
2053 WALK_LIST(o
, old
->tables
)
2055 rtable
*ot
= o
->table
;
2058 struct symbol
*sym
= cf_find_symbol(new, o
->name
);
2059 if (sym
&& sym
->class == SYM_TABLE
&& !new->shutdown
)
2061 DBG("\t%s: same\n", o
->name
);
2066 if (o
->sorted
!= r
->sorted
)
2067 log(L_WARN
"Reconfiguration of rtable sorted flag not implemented");
2071 DBG("\t%s: deleted\n", o
->name
);
2073 config_add_obstacle(old
);
2075 rt_unlock_table(ot
);
2081 WALK_LIST(r
, new->tables
)
2084 rtable
*t
= mb_alloc(rt_table_pool
, sizeof(struct rtable
));
2085 DBG("\t%s: created\n", r
->name
);
2086 rt_setup(rt_table_pool
, t
, r
->name
, r
);
2087 add_tail(&routing_tables
, &t
->n
);
2094 do_feed_channel(struct channel
*c
, net
*n
, rte
*e
)
2097 if (c
->ra_mode
== RA_ACCEPTED
)
2098 rt_notify_accepted(c
, n
, e
, NULL
, NULL
, c
->refeeding
? 2 : 1);
2099 else if (c
->ra_mode
== RA_MERGED
)
2100 rt_notify_merged(c
, n
, NULL
, NULL
, e
, c
->refeeding
? e
: NULL
, c
->refeeding
);
2102 rt_notify_basic(c
, n
, e
, c
->refeeding
? e
: NULL
, c
->refeeding
);
2103 rte_update_unlock();
2107 * rt_feed_channel - advertise all routes to a channel
2108 * @c: channel to be fed
2110 * This function performs one pass of advertisement of routes to a channel that
2111 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2112 * has something to do. (We avoid transferring all the routes in single pass in
2113 * order not to monopolize CPU time.)
2116 rt_feed_channel(struct channel
*c
)
2118 struct fib_iterator
*fit
= &c
->feed_fit
;
2121 ASSERT(c
->export_state
== ES_FEEDING
);
2123 if (!c
->feed_active
)
2125 FIB_ITERATE_INIT(fit
, &c
->table
->fib
);
2129 FIB_ITERATE_START(&c
->table
->fib
, fit
, net
, n
)
2134 FIB_ITERATE_PUT(fit
);
2138 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2140 if ((c
->ra_mode
== RA_OPTIMAL
) ||
2141 (c
->ra_mode
== RA_ACCEPTED
) ||
2142 (c
->ra_mode
== RA_MERGED
))
2143 if (rte_is_valid(e
))
2145 /* In the meantime, the protocol may fell down */
2146 if (c
->export_state
!= ES_FEEDING
)
2149 do_feed_channel(c
, n
, e
);
2153 if (c
->ra_mode
== RA_ANY
)
2154 for(e
= n
->routes
; e
; e
= e
->next
)
2156 /* In the meantime, the protocol may fell down */
2157 if (c
->export_state
!= ES_FEEDING
)
2160 if (!rte_is_valid(e
))
2163 do_feed_channel(c
, n
, e
);
2175 * rt_feed_baby_abort - abort protocol feeding
2178 * This function is called by the protocol code when the protocol stops or
2179 * ceases to exist during the feeding.
2182 rt_feed_channel_abort(struct channel
*c
)
2186 /* Unlink the iterator */
2187 fit_get(&c
->table
->fib
, &c
->feed_fit
);
2192 static inline unsigned
2195 uintptr_t p
= (uintptr_t) ptr
;
2196 return p
^ (p
<< 8) ^ (p
>> 16);
2200 hc_hash(ip_addr a
, rtable
*dep
)
2202 return ipa_hash(a
) ^ ptr_hash(dep
);
2206 hc_insert(struct hostcache
*hc
, struct hostentry
*he
)
2208 uint k
= he
->hash_key
>> hc
->hash_shift
;
2209 he
->next
= hc
->hash_table
[k
];
2210 hc
->hash_table
[k
] = he
;
2214 hc_remove(struct hostcache
*hc
, struct hostentry
*he
)
2216 struct hostentry
**hep
;
2217 uint k
= he
->hash_key
>> hc
->hash_shift
;
2219 for (hep
= &hc
->hash_table
[k
]; *hep
!= he
; hep
= &(*hep
)->next
);
2223 #define HC_DEF_ORDER 10
2224 #define HC_HI_MARK *4
2225 #define HC_HI_STEP 2
2226 #define HC_HI_ORDER 16 /* Must be at most 16 */
2227 #define HC_LO_MARK /5
2228 #define HC_LO_STEP 2
2229 #define HC_LO_ORDER 10
2232 hc_alloc_table(struct hostcache
*hc
, unsigned order
)
2234 uint hsize
= 1 << order
;
2235 hc
->hash_order
= order
;
2236 hc
->hash_shift
= 32 - order
;
2237 hc
->hash_max
= (order
>= HC_HI_ORDER
) ? ~0U : (hsize HC_HI_MARK
);
2238 hc
->hash_min
= (order
<= HC_LO_ORDER
) ? 0U : (hsize HC_LO_MARK
);
2240 hc
->hash_table
= mb_allocz(rt_table_pool
, hsize
* sizeof(struct hostentry
*));
2244 hc_resize(struct hostcache
*hc
, unsigned new_order
)
2246 struct hostentry
**old_table
= hc
->hash_table
;
2247 struct hostentry
*he
, *hen
;
2248 uint old_size
= 1 << hc
->hash_order
;
2251 hc_alloc_table(hc
, new_order
);
2252 for (i
= 0; i
< old_size
; i
++)
2253 for (he
= old_table
[i
]; he
!= NULL
; he
=hen
)
2261 static struct hostentry
*
2262 hc_new_hostentry(struct hostcache
*hc
, ip_addr a
, ip_addr ll
, rtable
*dep
, unsigned k
)
2264 struct hostentry
*he
= sl_alloc(hc
->slab
);
2266 *he
= (struct hostentry
) {
2273 add_tail(&hc
->hostentries
, &he
->ln
);
2277 if (hc
->hash_items
> hc
->hash_max
)
2278 hc_resize(hc
, hc
->hash_order
+ HC_HI_STEP
);
2284 hc_delete_hostentry(struct hostcache
*hc
, struct hostentry
*he
)
2290 sl_free(hc
->slab
, he
);
2293 if (hc
->hash_items
< hc
->hash_min
)
2294 hc_resize(hc
, hc
->hash_order
- HC_LO_STEP
);
2298 rt_init_hostcache(rtable
*tab
)
2300 struct hostcache
*hc
= mb_allocz(rt_table_pool
, sizeof(struct hostcache
));
2301 init_list(&hc
->hostentries
);
2304 hc_alloc_table(hc
, HC_DEF_ORDER
);
2305 hc
->slab
= sl_new(rt_table_pool
, sizeof(struct hostentry
));
2307 hc
->lp
= lp_new(rt_table_pool
, LP_GOOD_SIZE(1024));
2308 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2310 tab
->hostcache
= hc
;
2314 rt_free_hostcache(rtable
*tab
)
2316 struct hostcache
*hc
= tab
->hostcache
;
2319 WALK_LIST(n
, hc
->hostentries
)
2321 struct hostentry
*he
= SKIP_BACK(struct hostentry
, ln
, n
);
2325 log(L_ERR
"Hostcache is not empty in table %s", tab
->name
);
2330 mb_free(hc
->hash_table
);
2335 rt_notify_hostcache(rtable
*tab
, net
*net
)
2337 if (tab
->hcu_scheduled
)
2340 if (trie_match_net(tab
->hostcache
->trie
, net
->n
.addr
))
2341 rt_schedule_hcu(tab
);
2345 if_local_addr(ip_addr a
, struct iface
*i
)
2349 WALK_LIST(b
, i
->addrs
)
2350 if (ipa_equal(a
, b
->ip
))
2357 rt_get_igp_metric(rte
*rt
)
2359 eattr
*ea
= ea_find(rt
->attrs
->eattrs
, EA_GEN_IGP_METRIC
);
2367 if ((a
->source
== RTS_OSPF
) ||
2368 (a
->source
== RTS_OSPF_IA
) ||
2369 (a
->source
== RTS_OSPF_EXT1
))
2370 return rt
->u
.ospf
.metric1
;
2374 if (a
->source
== RTS_RIP
)
2375 return rt
->u
.rip
.metric
;
2378 if (a
->source
== RTS_DEVICE
)
2381 return IGP_METRIC_UNKNOWN
;
2385 rt_update_hostentry(rtable
*tab
, struct hostentry
*he
)
2387 rta
*old_src
= he
->src
;
2390 /* Reset the hostentry */
2392 he
->nexthop_linkable
= 0;
2393 he
->dest
= RTD_UNREACHABLE
;
2397 net_fill_ip_host(&he_addr
, he
->addr
);
2398 net
*n
= net_route(tab
, &he_addr
);
2403 pxlen
= n
->n
.addr
->pxlen
;
2407 /* Recursive route should not depend on another recursive route */
2408 log(L_WARN
"Next hop address %I resolvable through recursive route for %N",
2409 he
->addr
, n
->n
.addr
);
2414 he
->nexthop_linkable
= 1;
2415 if (he
->dest
== RTD_UNICAST
)
2417 for (struct nexthop
*nh
= &(a
->nh
); nh
; nh
= nh
->next
)
2418 if (ipa_zero(nh
->gw
))
2420 if (if_local_addr(he
->addr
, nh
->iface
))
2422 /* The host address is a local address, this is not valid */
2423 log(L_WARN
"Next hop address %I is a local address of iface %s",
2424 he
->addr
, nh
->iface
->name
);
2428 he
->nexthop_linkable
= 0;
2433 he
->src
= rta_clone(a
);
2434 he
->igp_metric
= rt_get_igp_metric(e
);
2438 /* Add a prefix range to the trie */
2439 trie_add_prefix(tab
->hostcache
->trie
, &he_addr
, pxlen
, he_addr
.pxlen
);
2442 return old_src
!= he
->src
;
2446 rt_update_hostcache(rtable
*tab
)
2448 struct hostcache
*hc
= tab
->hostcache
;
2449 struct hostentry
*he
;
2452 /* Reset the trie */
2454 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2456 WALK_LIST_DELSAFE(n
, x
, hc
->hostentries
)
2458 he
= SKIP_BACK(struct hostentry
, ln
, n
);
2461 hc_delete_hostentry(hc
, he
);
2465 if (rt_update_hostentry(tab
, he
))
2466 rt_schedule_nhu(he
->tab
);
2469 tab
->hcu_scheduled
= 0;
2473 rt_get_hostentry(rtable
*tab
, ip_addr a
, ip_addr ll
, rtable
*dep
)
2475 struct hostentry
*he
;
2477 if (!tab
->hostcache
)
2478 rt_init_hostcache(tab
);
2480 u32 k
= hc_hash(a
, dep
);
2481 struct hostcache
*hc
= tab
->hostcache
;
2482 for (he
= hc
->hash_table
[k
>> hc
->hash_shift
]; he
!= NULL
; he
= he
->next
)
2483 if (ipa_equal(he
->addr
, a
) && (he
->tab
== dep
))
2486 he
= hc_new_hostentry(hc
, a
, ipa_zero(ll
) ? a
: ll
, dep
, k
);
2487 rt_update_hostentry(tab
, he
);
2493 * Documentation for functions declared inline in route.h
2498 * net_find - find a network entry
2499 * @tab: a routing table
2500 * @addr: address of the network
2502 * net_find() looks up the given network in routing table @tab and
2503 * returns a pointer to its &net entry or %NULL if no such network
2506 static inline net
*net_find(rtable
*tab
, net_addr
*addr
)
2510 * net_get - obtain a network entry
2511 * @tab: a routing table
2512 * @addr: address of the network
2514 * net_get() looks up the given network in routing table @tab and
2515 * returns a pointer to its &net entry. If no such entry exists, it's
2518 static inline net
*net_get(rtable
*tab
, net_addr
*addr
)
2522 * rte_cow - copy a route for writing
2523 * @r: a route entry to be copied
2525 * rte_cow() takes a &rte and prepares it for modification. The exact action
2526 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2527 * just returned unchanged, else a new temporary entry with the same contents
2530 * The primary use of this function is inside the filter machinery -- when
2531 * a filter wants to modify &rte contents (to change the preference or to
2532 * attach another set of attributes), it must ensure that the &rte is not
2533 * shared with anyone else (and especially that it isn't stored in any routing
2536 * Result: a pointer to the new writable &rte.
2538 static inline rte
* rte_cow(rte
*r
)