]>
git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
eb9dc3a503564311bcaa3b49953a257c65af33ba
2 * BIRD -- Routing Tables
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
37 #include "nest/iface.h"
38 #include "lib/resource.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
48 static slab
*rte_slab
;
49 static linpool
*rte_update_pool
;
51 static list routing_tables
;
53 static byte
*rt_format_via(rte
*e
);
54 static void rt_free_hostcache(rtable
*tab
);
55 static void rt_notify_hostcache(rtable
*tab
, net
*net
);
56 static void rt_update_hostcache(rtable
*tab
);
57 static void rt_next_hop_update(rtable
*tab
);
58 static inline void rt_prune_table(rtable
*tab
);
61 static inline struct ea_list
*
62 make_tmp_attrs(struct rte
*rt
, struct linpool
*pool
)
64 struct ea_list
*(*mta
)(struct rte
*rt
, struct linpool
*pool
);
65 mta
= rt
->attrs
->src
->proto
->make_tmp_attrs
;
66 return mta
? mta(rt
, rte_update_pool
) : NULL
;
70 /* Like fib_route(), but skips empty net entries */
72 net_route_ip4(struct fib
*f
, net_addr_ip4
*n
)
76 while (r
= fib_find(f
, (net_addr
*) n
),
77 !(r
&& rte_is_valid(r
->routes
)) && (n
->pxlen
> 0))
80 ip4_clrbit(&n
->prefix
, n
->pxlen
);
87 net_route_ip6(struct fib
*f
, net_addr_ip6
*n
)
91 while (r
= fib_find(f
, (net_addr
*) n
),
92 !(r
&& rte_is_valid(r
->routes
)) && (n
->pxlen
> 0))
95 ip6_clrbit(&n
->prefix
, n
->pxlen
);
102 net_route(rtable
*tab
, const net_addr
*n
)
104 ASSERT(tab
->addr_type
== n
->type
);
106 net_addr
*n0
= alloca(n
->length
);
114 return net_route_ip4(&tab
->fib
, (net_addr_ip4
*) n0
);
119 return net_route_ip6(&tab
->fib
, (net_addr_ip6
*) n0
);
128 net_roa_check_ip4(rtable
*tab
, const net_addr_ip4
*px
, u32 asn
)
130 struct net_addr_roa4 n
= NET_ADDR_ROA4(px
->prefix
, px
->pxlen
, 0, 0);
136 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
138 net_addr_roa4
*roa
= (void *) fn
->addr
;
139 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
141 if (net_equal_prefix_roa4(roa
, &n
) && rte_is_valid(r
->routes
))
144 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
153 ip4_clrbit(&n
.prefix
, n
.pxlen
);
156 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
160 net_roa_check_ip6(rtable
*tab
, const net_addr_ip6
*px
, u32 asn
)
162 struct net_addr_roa6 n
= NET_ADDR_ROA6(px
->prefix
, px
->pxlen
, 0, 0);
168 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
170 net_addr_roa6
*roa
= (void *) fn
->addr
;
171 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
173 if (net_equal_prefix_roa6(roa
, &n
) && rte_is_valid(r
->routes
))
176 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
185 ip6_clrbit(&n
.prefix
, n
.pxlen
);
188 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
192 * roa_check - check validity of route origination in a ROA table
194 * @n: network prefix to check
195 * @asn: AS number of network prefix
197 * Implements RFC 6483 route validation for the given network prefix. The
198 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
199 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
200 * a candidate ROA with matching ASN and maxlen field greater than or equal to
201 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
202 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
203 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
204 * must have type NET_IP4 or NET_IP6, respectively.
207 net_roa_check(rtable
*tab
, const net_addr
*n
, u32 asn
)
209 if ((tab
->addr_type
== NET_ROA4
) && (n
->type
== NET_IP4
))
210 return net_roa_check_ip4(tab
, (const net_addr_ip4
*) n
, asn
);
211 else if ((tab
->addr_type
== NET_ROA6
) && (n
->type
== NET_IP6
))
212 return net_roa_check_ip6(tab
, (const net_addr_ip6
*) n
, asn
);
214 return ROA_UNKNOWN
; /* Should not happen */
218 * rte_find - find a route
222 * The rte_find() function returns a route for destination @net
223 * which is from route source @src.
226 rte_find(net
*net
, struct rte_src
*src
)
228 rte
*e
= net
->routes
;
230 while (e
&& e
->attrs
->src
!= src
)
236 * rte_get_temp - get a temporary &rte
237 * @a: attributes to assign to the new route (a &rta; in case it's
238 * un-cached, rte_update() will create a cached copy automatically)
240 * Create a temporary &rte and bind it with the attributes @a.
241 * Also set route preference to the default preference set for
247 rte
*e
= sl_alloc(rte_slab
);
258 rte
*e
= sl_alloc(rte_slab
);
260 memcpy(e
, r
, sizeof(rte
));
261 e
->attrs
= rta_clone(r
->attrs
);
267 * rte_cow_rta - get a private writable copy of &rte with writable &rta
268 * @r: a route entry to be copied
269 * @lp: a linpool from which to allocate &rta
271 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
272 * modification. There are three possibilities: First, both &rte and &rta are
273 * private copies, in that case they are returned unchanged. Second, &rte is
274 * private copy, but &rta is cached, in that case &rta is duplicated using
275 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
276 * both structures are duplicated by rte_do_cow() and rta_do_cow().
278 * Note that in the second case, cached &rta loses one reference, while private
279 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
280 * nexthops, ...) with it. To work properly, original shared &rta should have
281 * another reference during the life of created private copy.
283 * Result: a pointer to the new writable &rte with writable &rta.
286 rte_cow_rta(rte
*r
, linpool
*lp
)
288 if (!rta_is_cached(r
->attrs
))
292 rta
*a
= rta_do_cow(r
->attrs
, lp
);
298 static int /* Actually better or at least as good as */
299 rte_better(rte
*new, rte
*old
)
301 int (*better
)(rte
*, rte
*);
303 if (!rte_is_valid(old
))
305 if (!rte_is_valid(new))
308 if (new->pref
> old
->pref
)
310 if (new->pref
< old
->pref
)
312 if (new->attrs
->src
->proto
->proto
!= old
->attrs
->src
->proto
->proto
)
315 * If the user has configured protocol preferences, so that two different protocols
316 * have the same preference, try to break the tie by comparing addresses. Not too
317 * useful, but keeps the ordering of routes unambiguous.
319 return new->attrs
->src
->proto
->proto
> old
->attrs
->src
->proto
->proto
;
321 if (better
= new->attrs
->src
->proto
->rte_better
)
322 return better(new, old
);
327 rte_mergable(rte
*pri
, rte
*sec
)
329 int (*mergable
)(rte
*, rte
*);
331 if (!rte_is_valid(pri
) || !rte_is_valid(sec
))
334 if (pri
->pref
!= sec
->pref
)
337 if (pri
->attrs
->src
->proto
->proto
!= sec
->attrs
->src
->proto
->proto
)
340 if (mergable
= pri
->attrs
->src
->proto
->rte_mergable
)
341 return mergable(pri
, sec
);
347 rte_trace(struct proto
*p
, rte
*e
, int dir
, char *msg
)
349 log(L_TRACE
"%s %c %s %N %s", p
->name
, dir
, msg
, e
->net
->n
.addr
, rt_format_via(e
));
353 rte_trace_in(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
356 rte_trace(p
, e
, '>', msg
);
360 rte_trace_out(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
363 rte_trace(p
, e
, '<', msg
);
367 export_filter_(struct channel
*c
, rte
*rt0
, rte
**rt_free
, ea_list
**tmpa
, linpool
*pool
, int silent
)
369 struct proto
*p
= c
->proto
;
370 struct filter
*filter
= c
->out_filter
;
371 struct proto_stats
*stats
= &c
->stats
;
372 ea_list
*tmpb
= NULL
;
382 *tmpa
= make_tmp_attrs(rt
, pool
);
384 v
= p
->import_control
? p
->import_control(p
, &rt
, tmpa
, pool
) : 0;
390 stats
->exp_updates_rejected
++;
392 rte_trace_out(D_FILTERS
, p
, rt
, "rejected by protocol");
398 rte_trace_out(D_FILTERS
, p
, rt
, "forced accept by protocol");
402 v
= filter
&& ((filter
== FILTER_REJECT
) ||
403 (f_run(filter
, &rt
, tmpa
, pool
, FF_FORCE_TMPATTR
) > F_ACCEPT
));
409 stats
->exp_updates_filtered
++;
410 rte_trace_out(D_FILTERS
, p
, rt
, "filtered out");
420 /* Discard temporary rte */
427 export_filter(struct channel
*c
, rte
*rt0
, rte
**rt_free
, ea_list
**tmpa
, int silent
)
429 return export_filter_(c
, rt0
, rt_free
, tmpa
, rte_update_pool
, silent
);
433 do_rt_notify(struct channel
*c
, net
*net
, rte
*new, rte
*old
, ea_list
*tmpa
, int refeed
)
435 struct proto
*p
= c
->proto
;
436 struct proto_stats
*stats
= &c
->stats
;
440 * First, apply export limit.
442 * Export route limits has several problems. Because exp_routes
443 * counter is reset before refeed, we don't really know whether
444 * limit is breached and whether the update is new or not. Therefore
445 * the number of really exported routes may exceed the limit
446 * temporarily (routes exported before and new routes in refeed).
448 * Minor advantage is that if the limit is decreased and refeed is
449 * requested, the number of exported routes really decrease.
451 * Second problem is that with export limits, we don't know whether
452 * old was really exported (it might be blocked by limit). When a
453 * withdraw is exported, we announce it even when the previous
454 * update was blocked. This is not a big issue, but the same problem
455 * is in updating exp_routes counter. Therefore, to be consistent in
456 * increases and decreases of exp_routes, we count exported routes
457 * regardless of blocking by limits.
459 * Similar problem is in handling updates - when a new route is
460 * received and blocking is active, the route would be blocked, but
461 * when an update for the route will be received later, the update
462 * would be propagated (as old != NULL). Therefore, we have to block
463 * also non-new updates (contrary to import blocking).
466 struct channel_limit
*l
= &c
->out_limit
;
467 if (l
->action
&& new)
469 if ((!old
|| refeed
) && (stats
->exp_routes
>= l
->limit
))
470 channel_notify_limit(c
, l
, PLD_OUT
, stats
->exp_routes
);
472 if (l
->state
== PLS_BLOCKED
)
474 stats
->exp_routes
++; /* see note above */
475 stats
->exp_updates_rejected
++;
476 rte_trace_out(D_FILTERS
, p
, new, "rejected [limit]");
486 stats
->exp_updates_accepted
++;
488 stats
->exp_withdraws_accepted
++;
490 /* Hack: We do not decrease exp_routes during refeed, we instead
491 reset exp_routes at the start of refeed. */
497 if (p
->debug
& D_ROUTES
)
500 rte_trace_out(D_ROUTES
, p
, new, "replaced");
502 rte_trace_out(D_ROUTES
, p
, new, "added");
504 rte_trace_out(D_ROUTES
, p
, old
, "removed");
507 p
->rt_notify(p
, c
, net
, NULL
, old
, NULL
);
513 t
->next
= new->attrs
->eattrs
;
514 p
->rt_notify(p
, c
, net
, new, old
, tmpa
);
518 p
->rt_notify(p
, c
, net
, new, old
, new->attrs
->eattrs
);
522 rt_notify_basic(struct channel
*c
, net
*net
, rte
*new0
, rte
*old0
, int refeed
)
524 struct proto
*p
= c
->proto
;
528 rte
*new_free
= NULL
;
529 rte
*old_free
= NULL
;
530 ea_list
*tmpa
= NULL
;
533 c
->stats
.exp_updates_received
++;
535 c
->stats
.exp_withdraws_received
++;
538 * This is a tricky part - we don't know whether route 'old' was
539 * exported to protocol 'p' or was filtered by the export filter.
540 * We try to run the export filter to know this to have a correct
541 * value in 'old' argument of rte_update (and proper filter value)
543 * FIXME - this is broken because 'configure soft' may change
544 * filters but keep routes. Refeed is expected to be called after
545 * change of the filters and with old == new, therefore we do not
546 * even try to run the filter on an old route, This may lead to
547 * 'spurious withdraws' but ensure that there are no 'missing
550 * This is not completely safe as there is a window between
551 * reconfiguration and the end of refeed - if a newly filtered
552 * route disappears during this period, proper withdraw is not
553 * sent (because old would be also filtered) and the route is
554 * not refeeded (because it disappeared before that).
558 new = export_filter(c
, new, &new_free
, &tmpa
, 0);
561 old
= export_filter(c
, old
, &old_free
, NULL
, 1);
566 * As mentioned above, 'old' value may be incorrect in some race conditions.
567 * We generally ignore it with the exception of withdraw to pipe protocol.
568 * In that case we rather propagate unfiltered withdraws regardless of
569 * export filters to ensure that when a protocol is flushed, its routes are
570 * removed from all tables. Possible spurious unfiltered withdraws are not
571 * problem here as they are ignored if there is no corresponding route at
572 * the other end of the pipe. We directly call rt_notify() hook instead of
573 * do_rt_notify() to avoid logging and stat counters.
577 if ((p
->proto
== &proto_pipe
) && !new0
&& (p
!= old0
->sender
->proto
))
578 p
->rt_notify(p
, c
, net
, NULL
, old0
, NULL
);
584 do_rt_notify(c
, net
, new, old
, tmpa
, refeed
);
586 /* Discard temporary rte's */
594 rt_notify_accepted(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
, rte
*before_old
, int feed
)
596 // struct proto *p = c->proto;
599 rte
*new_best
= NULL
;
600 rte
*old_best
= NULL
;
601 rte
*new_free
= NULL
;
602 rte
*old_free
= NULL
;
603 ea_list
*tmpa
= NULL
;
605 /* Used to track whether we met old_changed position. If before_old is NULL
606 old_changed was the first and we met it implicitly before current best route. */
607 int old_meet
= old_changed
&& !before_old
;
609 /* Note that before_old is either NULL or valid (not rejected) route.
610 If old_changed is valid, before_old have to be too. If old changed route
611 was not valid, caller must use NULL for both old_changed and before_old. */
614 c
->stats
.exp_updates_received
++;
616 c
->stats
.exp_withdraws_received
++;
618 /* First, find the new_best route - first accepted by filters */
619 for (r
=net
->routes
; rte_is_valid(r
); r
=r
->next
)
621 if (new_best
= export_filter(c
, r
, &new_free
, &tmpa
, 0))
624 /* Note if we walked around the position of old_changed route */
630 * Second, handle the feed case. That means we do not care for
631 * old_best. It is NULL for feed, and the new_best for refeed.
632 * For refeed, there is a hack similar to one in rt_notify_basic()
633 * to ensure withdraws in case of changed filters
637 if (feed
== 2) /* refeed */
638 old_best
= new_best
? new_best
:
639 (rte_is_valid(net
->routes
) ? net
->routes
: NULL
);
643 if (!new_best
&& !old_best
)
650 * Now, we find the old_best route. Generally, it is the same as the
651 * new_best, unless new_best is the same as new_changed or
652 * old_changed is accepted before new_best.
654 * There are four cases:
656 * - We would find and accept old_changed before new_best, therefore
657 * old_changed is old_best. In remaining cases we suppose this
660 * - We found no new_best, therefore there is also no old_best and
661 * we ignore this withdraw.
663 * - We found new_best different than new_changed, therefore
664 * old_best is the same as new_best and we ignore this update.
666 * - We found new_best the same as new_changed, therefore it cannot
667 * be old_best and we have to continue search for old_best.
672 if (old_best
= export_filter(c
, old_changed
, &old_free
, NULL
, 1))
679 /* Third case, we use r instead of new_best, because export_filter() could change it */
680 if (r
!= new_changed
)
688 for (r
=r
->next
; rte_is_valid(r
); r
=r
->next
)
690 if (old_best
= export_filter(c
, r
, &old_free
, NULL
, 1))
694 if (old_best
= export_filter(c
, old_changed
, &old_free
, NULL
, 1))
698 /* Implicitly, old_best is NULL and new_best is non-NULL */
701 do_rt_notify(c
, net
, new_best
, old_best
, tmpa
, (feed
== 2));
703 /* Discard temporary rte's */
712 mpnh_merge_rta(struct mpnh
*nhs
, rta
*a
, linpool
*pool
, int max
)
714 struct mpnh nh
= { .gw
= a
->gw
, .iface
= a
->iface
};
715 struct mpnh
*nh2
= (a
->dest
== RTD_MULTIPATH
) ? a
->nexthops
: &nh
;
716 return mpnh_merge(nhs
, nh2
, 1, 0, max
, rte_update_pool
);
720 rt_export_merged(struct channel
*c
, net
*net
, rte
**rt_free
, ea_list
**tmpa
, linpool
*pool
, int silent
)
722 // struct proto *p = c->proto;
723 struct mpnh
*nhs
= NULL
;
724 rte
*best0
, *best
, *rt0
, *rt
, *tmp
;
729 if (!rte_is_valid(best0
))
732 best
= export_filter_(c
, best0
, rt_free
, tmpa
, pool
, silent
);
734 if (!best
|| !rte_is_reachable(best
))
737 for (rt0
= best0
->next
; rt0
; rt0
= rt0
->next
)
739 if (!rte_mergable(best0
, rt0
))
742 rt
= export_filter_(c
, rt0
, &tmp
, NULL
, pool
, 1);
747 if (rte_is_reachable(rt
))
748 nhs
= mpnh_merge_rta(nhs
, rt
->attrs
, pool
, c
->merge_limit
);
756 nhs
= mpnh_merge_rta(nhs
, best
->attrs
, pool
, c
->merge_limit
);
760 best
= rte_cow_rta(best
, pool
);
761 best
->attrs
->dest
= RTD_MULTIPATH
;
762 best
->attrs
->nexthops
= nhs
;
774 rt_notify_merged(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
,
775 rte
*new_best
, rte
*old_best
, int refeed
)
777 // struct proto *p = c->proto;
779 rte
*new_best_free
= NULL
;
780 rte
*old_best_free
= NULL
;
781 rte
*new_changed_free
= NULL
;
782 rte
*old_changed_free
= NULL
;
783 ea_list
*tmpa
= NULL
;
785 /* We assume that all rte arguments are either NULL or rte_is_valid() */
787 /* This check should be done by the caller */
788 if (!new_best
&& !old_best
)
791 /* Check whether the change is relevant to the merged route */
792 if ((new_best
== old_best
) && !refeed
)
794 new_changed
= rte_mergable(new_best
, new_changed
) ?
795 export_filter(c
, new_changed
, &new_changed_free
, NULL
, 1) : NULL
;
797 old_changed
= rte_mergable(old_best
, old_changed
) ?
798 export_filter(c
, old_changed
, &old_changed_free
, NULL
, 1) : NULL
;
800 if (!new_changed
&& !old_changed
)
805 c
->stats
.exp_updates_received
++;
807 c
->stats
.exp_withdraws_received
++;
809 /* Prepare new merged route */
811 new_best
= rt_export_merged(c
, net
, &new_best_free
, &tmpa
, rte_update_pool
, 0);
813 /* Prepare old merged route (without proper merged next hops) */
814 /* There are some issues with running filter on old route - see rt_notify_basic() */
815 if (old_best
&& !refeed
)
816 old_best
= export_filter(c
, old_best
, &old_best_free
, NULL
, 1);
818 if (new_best
|| old_best
)
819 do_rt_notify(c
, net
, new_best
, old_best
, tmpa
, refeed
);
821 /* Discard temporary rte's */
823 rte_free(new_best_free
);
825 rte_free(old_best_free
);
826 if (new_changed_free
)
827 rte_free(new_changed_free
);
828 if (old_changed_free
)
829 rte_free(old_changed_free
);
834 * rte_announce - announce a routing table change
835 * @tab: table the route has been added to
836 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
837 * @net: network in question
838 * @new: the new route to be announced
839 * @old: the previous route for the same network
840 * @new_best: the new best route for the same network
841 * @old_best: the previous best route for the same network
842 * @before_old: The previous route before @old for the same network.
843 * If @before_old is NULL @old was the first.
845 * This function gets a routing table update and announces it
846 * to all protocols that acccepts given type of route announcement
847 * and are connected to the same table by their announcement hooks.
849 * Route announcement of type %RA_OPTIMAL si generated when optimal
850 * route (in routing table @tab) changes. In that case @old stores the
853 * Route announcement of type %RA_ANY si generated when any route (in
854 * routing table @tab) changes In that case @old stores the old route
855 * from the same protocol.
857 * For each appropriate protocol, we first call its import_control()
858 * hook which performs basic checks on the route (each protocol has a
859 * right to veto or force accept of the route before any filter is
860 * asked) and adds default values of attributes specific to the new
861 * protocol (metrics, tags etc.). Then it consults the protocol's
862 * export filter and if it accepts the route, the rt_notify() hook of
863 * the protocol gets called.
866 rte_announce(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
867 rte
*new_best
, rte
*old_best
, rte
*before_old
)
869 if (!rte_is_valid(new))
872 if (!rte_is_valid(old
))
873 old
= before_old
= NULL
;
875 if (!rte_is_valid(new_best
))
878 if (!rte_is_valid(old_best
))
884 if ((type
== RA_OPTIMAL
) && tab
->hostcache
)
885 rt_notify_hostcache(tab
, net
);
887 struct channel
*c
; node
*n
;
888 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
890 if (c
->export_state
== ES_DOWN
)
893 if (c
->ra_mode
== type
)
894 if (type
== RA_ACCEPTED
)
895 rt_notify_accepted(c
, net
, new, old
, before_old
, 0);
896 else if (type
== RA_MERGED
)
897 rt_notify_merged(c
, net
, new, old
, new_best
, old_best
, 0);
899 rt_notify_basic(c
, net
, new, old
, 0);
909 // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
910 if (!net_validate(n
->n
.addr
))
912 log(L_WARN
"Ignoring bogus prefix %N received via %s",
913 n
->n
.addr
, e
->sender
->proto
->name
);
917 c
= net_classify(n
->n
.addr
);
918 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
920 log(L_WARN
"Ignoring bogus route %N received via %s",
921 n
->n
.addr
, e
->sender
->proto
->name
);
925 if ((e
->attrs
->dest
== RTD_MULTIPATH
) && !mpnh_is_sorted(e
->attrs
->nexthops
))
927 log(L_WARN
"Ignoring unsorted multipath route %N received via %s",
928 n
->n
.addr
, e
->sender
->proto
->name
);
936 * rte_free - delete a &rte
937 * @e: &rte to be deleted
939 * rte_free() deletes the given &rte from the routing table it's linked to.
944 if (rta_is_cached(e
->attrs
))
946 sl_free(rte_slab
, e
);
950 rte_free_quick(rte
*e
)
953 sl_free(rte_slab
, e
);
957 rte_same(rte
*x
, rte
*y
)
960 x
->attrs
== y
->attrs
&&
961 x
->flags
== y
->flags
&&
962 x
->pflags
== y
->pflags
&&
963 x
->pref
== y
->pref
&&
964 (!x
->attrs
->src
->proto
->rte_same
|| x
->attrs
->src
->proto
->rte_same(x
, y
));
967 static inline int rte_is_ok(rte
*e
) { return e
&& !rte_is_filtered(e
); }
970 rte_recalculate(struct channel
*c
, net
*net
, rte
*new, struct rte_src
*src
)
972 struct proto
*p
= c
->proto
;
973 struct rtable
*table
= c
->table
;
974 struct proto_stats
*stats
= &c
->stats
;
975 static struct tbf rl_pipe
= TBF_DEFAULT_LOG_LIMITS
;
976 rte
*before_old
= NULL
;
977 rte
*old_best
= net
->routes
;
981 k
= &net
->routes
; /* Find and remove original route from the same protocol */
984 if (old
->attrs
->src
== src
)
986 /* If there is the same route in the routing table but from
987 * a different sender, then there are two paths from the
988 * source protocol to this routing table through transparent
989 * pipes, which is not allowed.
991 * We log that and ignore the route. If it is withdraw, we
992 * ignore it completely (there might be 'spurious withdraws',
993 * see FIXME in do_rte_announce())
995 if (old
->sender
->proto
!= p
)
999 log_rl(&rl_pipe
, L_ERR
"Pipe collision detected when sending %N to table %s",
1000 net
->n
.addr
, table
->name
);
1001 rte_free_quick(new);
1006 if (new && rte_same(old
, new))
1008 /* No changes, ignore the new route */
1010 if (!rte_is_filtered(new))
1012 stats
->imp_updates_ignored
++;
1013 rte_trace_in(D_ROUTES
, p
, new, "ignored");
1016 rte_free_quick(new);
1031 stats
->imp_withdraws_ignored
++;
1035 int new_ok
= rte_is_ok(new);
1036 int old_ok
= rte_is_ok(old
);
1038 struct channel_limit
*l
= &c
->rx_limit
;
1039 if (l
->action
&& !old
&& new)
1041 u32 all_routes
= stats
->imp_routes
+ stats
->filt_routes
;
1043 if (all_routes
>= l
->limit
)
1044 channel_notify_limit(c
, l
, PLD_RX
, all_routes
);
1046 if (l
->state
== PLS_BLOCKED
)
1048 /* In receive limit the situation is simple, old is NULL so
1049 we just free new and exit like nothing happened */
1051 stats
->imp_updates_ignored
++;
1052 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
1053 rte_free_quick(new);
1059 if (l
->action
&& !old_ok
&& new_ok
)
1061 if (stats
->imp_routes
>= l
->limit
)
1062 channel_notify_limit(c
, l
, PLD_IN
, stats
->imp_routes
);
1064 if (l
->state
== PLS_BLOCKED
)
1066 /* In import limit the situation is more complicated. We
1067 shouldn't just drop the route, we should handle it like
1068 it was filtered. We also have to continue the route
1069 processing if old or new is non-NULL, but we should exit
1070 if both are NULL as this case is probably assumed to be
1073 stats
->imp_updates_ignored
++;
1074 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
1076 if (c
->in_keep_filtered
)
1077 new->flags
|= REF_FILTERED
;
1079 { rte_free_quick(new); new = NULL
; }
1081 /* Note that old && !new could be possible when
1082 c->in_keep_filtered changed in the recent past. */
1093 stats
->imp_updates_accepted
++;
1095 stats
->imp_withdraws_accepted
++;
1097 stats
->imp_withdraws_ignored
++;
1102 rte_is_filtered(new) ? stats
->filt_routes
++ : stats
->imp_routes
++;
1104 rte_is_filtered(old
) ? stats
->filt_routes
-- : stats
->imp_routes
--;
1106 if (table
->config
->sorted
)
1108 /* If routes are sorted, just insert new route to appropriate position */
1111 if (before_old
&& !rte_better(new, before_old
))
1112 k
= &before_old
->next
;
1116 for (; *k
; k
=&(*k
)->next
)
1117 if (rte_better(new, *k
))
1126 /* If routes are not sorted, find the best route and move it on
1127 the first position. There are several optimized cases. */
1129 if (src
->proto
->rte_recalculate
&& src
->proto
->rte_recalculate(table
, net
, new, old
, old_best
))
1130 goto do_recalculate
;
1132 if (new && rte_better(new, old_best
))
1134 /* The first case - the new route is cleary optimal,
1135 we link it at the first position */
1137 new->next
= net
->routes
;
1140 else if (old
== old_best
)
1142 /* The second case - the old best route disappeared, we add the
1143 new route (if we have any) to the list (we don't care about
1144 position) and then we elect the new optimal route and relink
1145 that route at the first position and announce it. New optimal
1146 route might be NULL if there is no more routes */
1149 /* Add the new route to the list */
1152 new->next
= net
->routes
;
1156 /* Find a new optimal route (if there is any) */
1159 rte
**bp
= &net
->routes
;
1160 for (k
=&(*bp
)->next
; *k
; k
=&(*k
)->next
)
1161 if (rte_better(*k
, *bp
))
1167 best
->next
= net
->routes
;
1173 /* The third case - the new route is not better than the old
1174 best route (therefore old_best != NULL) and the old best
1175 route was not removed (therefore old_best == net->routes).
1176 We just link the new route after the old best route. */
1178 ASSERT(net
->routes
!= NULL
);
1179 new->next
= net
->routes
->next
;
1180 net
->routes
->next
= new;
1182 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1188 /* Log the route change */
1189 if (p
->debug
& D_ROUTES
)
1192 rte_trace(p
, new, '>', new == net
->routes
? "added [best]" : "added");
1195 if (old
!= old_best
)
1196 rte_trace(p
, old
, '>', "removed");
1197 else if (rte_is_ok(net
->routes
))
1198 rte_trace(p
, old
, '>', "removed [replaced]");
1200 rte_trace(p
, old
, '>', "removed [sole]");
1204 /* Propagate the route change */
1205 rte_announce(table
, RA_ANY
, net
, new, old
, NULL
, NULL
, NULL
);
1206 if (net
->routes
!= old_best
)
1207 rte_announce(table
, RA_OPTIMAL
, net
, net
->routes
, old_best
, NULL
, NULL
, NULL
);
1208 if (table
->config
->sorted
)
1209 rte_announce(table
, RA_ACCEPTED
, net
, new, old
, NULL
, NULL
, before_old
);
1210 rte_announce(table
, RA_MERGED
, net
, new, old
, net
->routes
, old_best
, NULL
);
1213 (table
->gc_counter
++ >= table
->config
->gc_max_ops
) &&
1214 (table
->gc_time
+ table
->config
->gc_min_time
<= now
))
1215 rt_schedule_prune(table
);
1217 if (old_ok
&& p
->rte_remove
)
1218 p
->rte_remove(net
, old
);
1219 if (new_ok
&& p
->rte_insert
)
1220 p
->rte_insert(net
, new);
1223 rte_free_quick(old
);
1226 static int rte_update_nest_cnt
; /* Nesting counter to allow recursive updates */
1229 rte_update_lock(void)
1231 rte_update_nest_cnt
++;
1235 rte_update_unlock(void)
1237 if (!--rte_update_nest_cnt
)
1238 lp_flush(rte_update_pool
);
1242 rte_hide_dummy_routes(net
*net
, rte
**dummy
)
1244 if (net
->routes
&& net
->routes
->attrs
->source
== RTS_DUMMY
)
1246 *dummy
= net
->routes
;
1247 net
->routes
= (*dummy
)->next
;
1252 rte_unhide_dummy_routes(net
*net
, rte
**dummy
)
1256 (*dummy
)->next
= net
->routes
;
1257 net
->routes
= *dummy
;
1262 * rte_update - enter a new update to a routing table
1263 * @table: table to be updated
1264 * @c: channel doing the update
1265 * @net: network node
1266 * @p: protocol submitting the update
1267 * @src: protocol originating the update
1268 * @new: a &rte representing the new route or %NULL for route removal.
1270 * This function is called by the routing protocols whenever they discover
1271 * a new route or wish to update/remove an existing route. The right announcement
1272 * sequence is to build route attributes first (either un-cached with @aflags set
1273 * to zero or a cached one using rta_lookup(); in this case please note that
1274 * you need to increase the use count of the attributes yourself by calling
1275 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1276 * the appropriate data and finally submit the new &rte by calling rte_update().
1278 * @src specifies the protocol that originally created the route and the meaning
1279 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1280 * same value as @new->attrs->proto. @p specifies the protocol that called
1281 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1282 * stores @p in @new->sender;
1284 * When rte_update() gets any route, it automatically validates it (checks,
1285 * whether the network and next hop address are valid IP addresses and also
1286 * whether a normal routing protocol doesn't try to smuggle a host or link
1287 * scope route to the table), converts all protocol dependent attributes stored
1288 * in the &rte to temporary extended attributes, consults import filters of the
1289 * protocol to see if the route should be accepted and/or its attributes modified,
1290 * stores the temporary attributes back to the &rte.
1292 * Now, having a "public" version of the route, we
1293 * automatically find any old route defined by the protocol @src
1294 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1295 * recalculate the optimal route for this destination and finally broadcast
1296 * the change (if any) to all routing protocols by calling rte_announce().
1298 * All memory used for attribute lists and other temporary allocations is taken
1299 * from a special linear pool @rte_update_pool and freed when rte_update()
1304 rte_update2(struct channel
*c
, net_addr
*n
, rte
*new, struct rte_src
*src
)
1306 struct proto
*p
= c
->proto
;
1307 struct proto_stats
*stats
= &c
->stats
;
1308 struct filter
*filter
= c
->in_filter
;
1309 ea_list
*tmpa
= NULL
;
1313 ASSERT(c
->channel_state
== CS_UP
);
1318 nn
= net_get(c
->table
, n
);
1324 new->pref
= c
->preference
;
1326 stats
->imp_updates_received
++;
1327 if (!rte_validate(new))
1329 rte_trace_in(D_FILTERS
, p
, new, "invalid");
1330 stats
->imp_updates_invalid
++;
1334 if (filter
== FILTER_REJECT
)
1336 stats
->imp_updates_filtered
++;
1337 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1339 if (! c
->in_keep_filtered
)
1342 /* new is a private copy, i could modify it */
1343 new->flags
|= REF_FILTERED
;
1347 tmpa
= make_tmp_attrs(new, rte_update_pool
);
1348 if (filter
&& (filter
!= FILTER_REJECT
))
1350 ea_list
*old_tmpa
= tmpa
;
1351 int fr
= f_run(filter
, &new, &tmpa
, rte_update_pool
, 0);
1354 stats
->imp_updates_filtered
++;
1355 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1357 if (! c
->in_keep_filtered
)
1360 new->flags
|= REF_FILTERED
;
1362 if (tmpa
!= old_tmpa
&& src
->proto
->store_tmp_attrs
)
1363 src
->proto
->store_tmp_attrs(new, tmpa
);
1366 if (!rta_is_cached(new->attrs
)) /* Need to copy attributes */
1367 new->attrs
= rta_lookup(new->attrs
);
1368 new->flags
|= REF_COW
;
1372 stats
->imp_withdraws_received
++;
1374 if (!(nn
= net_find(c
->table
, n
)) || !src
)
1376 stats
->imp_withdraws_ignored
++;
1377 rte_update_unlock();
1383 rte_hide_dummy_routes(nn
, &dummy
);
1384 rte_recalculate(c
, nn
, new, src
);
1385 rte_unhide_dummy_routes(nn
, &dummy
);
1386 rte_update_unlock();
1395 /* Independent call to rte_announce(), used from next hop
1396 recalculation, outside of rte_update(). new must be non-NULL */
1398 rte_announce_i(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
1399 rte
*new_best
, rte
*old_best
)
1402 rte_announce(tab
, type
, net
, new, old
, new_best
, old_best
, NULL
);
1403 rte_update_unlock();
1407 rte_discard(rtable
*t
, rte
*old
) /* Non-filtered route deletion, used during garbage collection */
1410 rte_recalculate(old
->sender
, old
->net
, NULL
, old
->attrs
->src
);
1411 rte_update_unlock();
1414 /* Check rtable for best route to given net whether it would be exported do p */
1416 rt_examine(rtable
*t
, net_addr
*a
, struct proto
*p
, struct filter
*filter
)
1418 net
*n
= net_find(t
, a
);
1419 rte
*rt
= n
? n
->routes
: NULL
;
1421 if (!rte_is_valid(rt
))
1426 /* Rest is stripped down export_filter() */
1427 ea_list
*tmpa
= make_tmp_attrs(rt
, rte_update_pool
);
1428 int v
= p
->import_control
? p
->import_control(p
, &rt
, &tmpa
, rte_update_pool
) : 0;
1429 if (v
== RIC_PROCESS
)
1430 v
= (f_run(filter
, &rt
, &tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) <= F_ACCEPT
);
1432 /* Discard temporary rte */
1433 if (rt
!= n
->routes
)
1436 rte_update_unlock();
1443 * rt_refresh_begin - start a refresh cycle
1444 * @t: related routing table
1445 * @c related channel
1447 * This function starts a refresh cycle for given routing table and announce
1448 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1449 * routes to the routing table (by rte_update()). After that, all protocol
1450 * routes (more precisely routes with @c as @sender) not sent during the
1451 * refresh cycle but still in the table from the past are pruned. This is
1452 * implemented by marking all related routes as stale by REF_STALE flag in
1453 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1454 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1457 rt_refresh_begin(rtable
*t
, struct channel
*c
)
1459 FIB_WALK(&t
->fib
, net
, n
)
1462 for (e
= n
->routes
; e
; e
= e
->next
)
1464 e
->flags
|= REF_STALE
;
1470 * rt_refresh_end - end a refresh cycle
1471 * @t: related routing table
1472 * @c: related channel
1474 * This function ends a refresh cycle for given routing table and announce
1475 * hook. See rt_refresh_begin() for description of refresh cycles.
1478 rt_refresh_end(rtable
*t
, struct channel
*c
)
1482 FIB_WALK(&t
->fib
, net
, n
)
1485 for (e
= n
->routes
; e
; e
= e
->next
)
1486 if ((e
->sender
== c
) && (e
->flags
& REF_STALE
))
1488 e
->flags
|= REF_DISCARD
;
1495 rt_schedule_prune(t
);
1500 * rte_dump - dump a route
1501 * @e: &rte to be dumped
1503 * This functions dumps contents of a &rte to debug output.
1509 debug("%-1N ", n
->n
.addr
);
1510 debug("KF=%02x PF=%02x pref=%d lm=%d ", n
->n
.flags
, e
->pflags
, e
->pref
, now
-e
->lastmod
);
1512 if (e
->attrs
->src
->proto
->proto
->dump_attrs
)
1513 e
->attrs
->src
->proto
->proto
->dump_attrs(e
);
1518 * rt_dump - dump a routing table
1519 * @t: routing table to be dumped
1521 * This function dumps contents of a given routing table to debug output.
1526 debug("Dump of routing table <%s>\n", t
->name
);
1530 FIB_WALK(&t
->fib
, net
, n
)
1533 for(e
=n
->routes
; e
; e
=e
->next
)
1541 * rt_dump_all - dump all routing tables
1543 * This function dumps contents of all routing tables to debug output.
1550 WALK_LIST(t
, routing_tables
)
1555 rt_schedule_hcu(rtable
*tab
)
1557 if (tab
->hcu_scheduled
)
1560 tab
->hcu_scheduled
= 1;
1561 ev_schedule(tab
->rt_event
);
1565 rt_schedule_nhu(rtable
*tab
)
1567 if (tab
->nhu_state
== 0)
1568 ev_schedule(tab
->rt_event
);
1570 /* state change 0->1, 2->3 */
1571 tab
->nhu_state
|= 1;
1575 rt_schedule_prune(rtable
*tab
)
1577 if (tab
->prune_state
== 0)
1578 ev_schedule(tab
->rt_event
);
1580 /* state change 0->1, 2->3 */
1581 tab
->prune_state
|= 1;
1592 if (tab
->hcu_scheduled
)
1593 rt_update_hostcache(tab
);
1596 rt_next_hop_update(tab
);
1598 if (tab
->prune_state
)
1599 rt_prune_table(tab
);
1601 rt_unlock_table(tab
);
1605 rt_setup(pool
*p
, rtable
*t
, char *name
, struct rtable_config
*cf
)
1607 bzero(t
, sizeof(*t
));
1610 t
->addr_type
= cf
? cf
->addr_type
: NET_IP4
;
1611 fib_init(&t
->fib
, p
, t
->addr_type
, sizeof(net
), OFFSETOF(net
, n
), 0, NULL
);
1612 init_list(&t
->channels
);
1616 t
->rt_event
= ev_new(p
);
1617 t
->rt_event
->hook
= rt_event
;
1618 t
->rt_event
->data
= t
;
1624 * rt_init - initialize routing tables
1626 * This function is called during BIRD startup. It initializes the
1627 * routing table module.
1633 rt_table_pool
= rp_new(&root_pool
, "Routing tables");
1634 rte_update_pool
= lp_new(rt_table_pool
, 4080);
1635 rte_slab
= sl_new(rt_table_pool
, sizeof(rte
));
1636 init_list(&routing_tables
);
1641 * rt_prune_table - prune a routing table
1643 * The prune loop scans routing tables and removes routes belonging to flushing
1644 * protocols, discarded routes and also stale network entries. It is called from
1645 * rt_event(). The event is rescheduled if the current iteration do not finish
1646 * the table. The pruning is directed by the prune state (@prune_state),
1647 * specifying whether the prune cycle is scheduled or running, and there
1648 * is also a persistent pruning iterator (@prune_fit).
1650 * The prune loop is used also for channel flushing. For this purpose, the
1651 * channels to flush are marked before the iteration and notified after the
1655 rt_prune_table(rtable
*tab
)
1657 struct fib_iterator
*fit
= &tab
->prune_fit
;
1663 DBG("Pruning route table %s\n", tab
->name
);
1665 fib_check(&tab
->fib
);
1668 if (tab
->prune_state
== 0)
1671 if (tab
->prune_state
== 1)
1673 /* Mark channels to flush */
1674 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
1675 if (c
->channel_state
== CS_FLUSHING
)
1676 c
->flush_active
= 1;
1678 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1679 tab
->prune_state
= 2;
1683 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
1688 for (e
=n
->routes
; e
; e
=e
->next
)
1689 if (e
->sender
->flush_active
|| (e
->flags
& REF_DISCARD
))
1693 FIB_ITERATE_PUT(fit
);
1694 ev_schedule(tab
->rt_event
);
1698 rte_discard(tab
, e
);
1704 if (!n
->routes
) /* Orphaned FIB entry */
1706 FIB_ITERATE_PUT(fit
);
1707 fib_delete(&tab
->fib
, n
);
1714 fib_check(&tab
->fib
);
1717 tab
->gc_counter
= 0;
1720 /* state change 2->0, 3->1 */
1721 tab
->prune_state
&= 1;
1723 if (tab
->prune_state
> 0)
1724 ev_schedule(tab
->rt_event
);
1726 /* FIXME: This should be handled in a better way */
1729 /* Close flushed channels */
1730 WALK_LIST2_DELSAFE(c
, n
, x
, tab
->channels
, table_node
)
1731 if (c
->flush_active
)
1733 c
->flush_active
= 0;
1734 channel_set_state(c
, CS_DOWN
);
1741 rt_preconfig(struct config
*c
)
1743 init_list(&c
->tables
);
1745 rt_new_table(cf_get_symbol("master4"), NET_IP4
);
1746 rt_new_table(cf_get_symbol("master6"), NET_IP6
);
1751 * Some functions for handing internal next hop updates
1752 * triggered by rt_schedule_nhu().
1756 rta_next_hop_outdated(rta
*a
)
1758 struct hostentry
*he
= a
->hostentry
;
1764 return a
->dest
!= RTD_UNREACHABLE
;
1766 return (a
->iface
!= he
->src
->iface
) || !ipa_equal(a
->gw
, he
->gw
) ||
1767 (a
->dest
!= he
->dest
) || (a
->igp_metric
!= he
->igp_metric
) ||
1768 !mpnh_same(a
->nexthops
, he
->src
->nexthops
);
1772 rta_apply_hostentry(rta
*a
, struct hostentry
*he
)
1775 a
->iface
= he
->src
? he
->src
->iface
: NULL
;
1778 a
->igp_metric
= he
->igp_metric
;
1779 a
->nexthops
= he
->src
? he
->src
->nexthops
: NULL
;
1783 rt_next_hop_update_rte(rtable
*tab
, rte
*old
)
1786 memcpy(&a
, old
->attrs
, sizeof(rta
));
1787 rta_apply_hostentry(&a
, old
->attrs
->hostentry
);
1790 rte
*e
= sl_alloc(rte_slab
);
1791 memcpy(e
, old
, sizeof(rte
));
1792 e
->attrs
= rta_lookup(&a
);
1798 rt_next_hop_update_net(rtable
*tab
, net
*n
)
1800 rte
**k
, *e
, *new, *old_best
, **new_best
;
1802 int free_old_best
= 0;
1804 old_best
= n
->routes
;
1808 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1809 if (rta_next_hop_outdated(e
->attrs
))
1811 new = rt_next_hop_update_rte(tab
, e
);
1814 rte_announce_i(tab
, RA_ANY
, n
, new, e
, NULL
, NULL
);
1815 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated");
1817 /* Call a pre-comparison hook */
1818 /* Not really an efficient way to compute this */
1819 if (e
->attrs
->src
->proto
->rte_recalculate
)
1820 e
->attrs
->src
->proto
->rte_recalculate(tab
, n
, new, e
, NULL
);
1824 else /* Freeing of the old best rte is postponed */
1834 /* Find the new best route */
1836 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1838 if (!new_best
|| rte_better(e
, *new_best
))
1842 /* Relink the new best route to the first position */
1844 if (new != n
->routes
)
1846 *new_best
= new->next
;
1847 new->next
= n
->routes
;
1851 /* Announce the new best route */
1852 if (new != old_best
)
1854 rte_announce_i(tab
, RA_OPTIMAL
, n
, new, old_best
, NULL
, NULL
);
1855 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated [best]");
1858 /* FIXME: Better announcement of merged routes */
1859 rte_announce_i(tab
, RA_MERGED
, n
, new, old_best
, new, old_best
);
1862 rte_free_quick(old_best
);
1868 rt_next_hop_update(rtable
*tab
)
1870 struct fib_iterator
*fit
= &tab
->nhu_fit
;
1873 if (tab
->nhu_state
== 0)
1876 if (tab
->nhu_state
== 1)
1878 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1882 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
1886 FIB_ITERATE_PUT(fit
);
1887 ev_schedule(tab
->rt_event
);
1890 max_feed
-= rt_next_hop_update_net(tab
, n
);
1894 /* state change 2->0, 3->1 */
1895 tab
->nhu_state
&= 1;
1897 if (tab
->nhu_state
> 0)
1898 ev_schedule(tab
->rt_event
);
1902 struct rtable_config
*
1903 rt_new_table(struct symbol
*s
, uint addr_type
)
1905 /* Hack that allows to 'redefine' the master table */
1906 if ((s
->class == SYM_TABLE
) &&
1907 (s
->def
== new_config
->def_tables
[addr_type
]) &&
1908 ((addr_type
== NET_IP4
) || (addr_type
== NET_IP6
)))
1911 struct rtable_config
*c
= cfg_allocz(sizeof(struct rtable_config
));
1913 cf_define_symbol(s
, SYM_TABLE
, c
);
1915 c
->addr_type
= addr_type
;
1916 c
->gc_max_ops
= 1000;
1919 add_tail(&new_config
->tables
, &c
->n
);
1921 /* First table of each type is kept as default */
1922 if (! new_config
->def_tables
[addr_type
])
1923 new_config
->def_tables
[addr_type
] = c
;
1929 * rt_lock_table - lock a routing table
1930 * @r: routing table to be locked
1932 * Lock a routing table, because it's in use by a protocol,
1933 * preventing it from being freed when it gets undefined in a new
1937 rt_lock_table(rtable
*r
)
1943 * rt_unlock_table - unlock a routing table
1944 * @r: routing table to be unlocked
1946 * Unlock a routing table formerly locked by rt_lock_table(),
1947 * that is decrease its use count and delete it if it's scheduled
1948 * for deletion by configuration changes.
1951 rt_unlock_table(rtable
*r
)
1953 if (!--r
->use_count
&& r
->deleted
)
1955 struct config
*conf
= r
->deleted
;
1956 DBG("Deleting routing table %s\n", r
->name
);
1957 r
->config
->table
= NULL
;
1959 rt_free_hostcache(r
);
1964 config_del_obstacle(conf
);
1969 * rt_commit - commit new routing table configuration
1970 * @new: new configuration
1971 * @old: original configuration or %NULL if it's boot time config
1973 * Scan differences between @old and @new configuration and modify
1974 * the routing tables according to these changes. If @new defines a
1975 * previously unknown table, create it, if it omits a table existing
1976 * in @old, schedule it for deletion (it gets deleted when all protocols
1977 * disconnect from it by calling rt_unlock_table()), if it exists
1978 * in both configurations, leave it unchanged.
1981 rt_commit(struct config
*new, struct config
*old
)
1983 struct rtable_config
*o
, *r
;
1985 DBG("rt_commit:\n");
1988 WALK_LIST(o
, old
->tables
)
1990 rtable
*ot
= o
->table
;
1993 struct symbol
*sym
= cf_find_symbol(new, o
->name
);
1994 if (sym
&& sym
->class == SYM_TABLE
&& !new->shutdown
)
1996 DBG("\t%s: same\n", o
->name
);
2001 if (o
->sorted
!= r
->sorted
)
2002 log(L_WARN
"Reconfiguration of rtable sorted flag not implemented");
2006 DBG("\t%s: deleted\n", o
->name
);
2008 config_add_obstacle(old
);
2010 rt_unlock_table(ot
);
2016 WALK_LIST(r
, new->tables
)
2019 rtable
*t
= mb_alloc(rt_table_pool
, sizeof(struct rtable
));
2020 DBG("\t%s: created\n", r
->name
);
2021 rt_setup(rt_table_pool
, t
, r
->name
, r
);
2022 add_tail(&routing_tables
, &t
->n
);
2029 do_feed_channel(struct channel
*c
, net
*n
, rte
*e
)
2032 if (c
->ra_mode
== RA_ACCEPTED
)
2033 rt_notify_accepted(c
, n
, e
, NULL
, NULL
, c
->refeeding
? 2 : 1);
2034 else if (c
->ra_mode
== RA_MERGED
)
2035 rt_notify_merged(c
, n
, NULL
, NULL
, e
, c
->refeeding
? e
: NULL
, c
->refeeding
);
2037 rt_notify_basic(c
, n
, e
, c
->refeeding
? e
: NULL
, c
->refeeding
);
2038 rte_update_unlock();
2042 * rt_feed_channel - advertise all routes to a channel
2043 * @c: channel to be fed
2045 * This function performs one pass of advertisement of routes to a channel that
2046 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2047 * has something to do. (We avoid transferring all the routes in single pass in
2048 * order not to monopolize CPU time.)
2051 rt_feed_channel(struct channel
*c
)
2053 struct fib_iterator
*fit
= &c
->feed_fit
;
2056 ASSERT(c
->export_state
== ES_FEEDING
);
2058 if (!c
->feed_active
)
2060 FIB_ITERATE_INIT(fit
, &c
->table
->fib
);
2064 FIB_ITERATE_START(&c
->table
->fib
, fit
, net
, n
)
2069 FIB_ITERATE_PUT(fit
);
2073 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2075 if ((c
->ra_mode
== RA_OPTIMAL
) ||
2076 (c
->ra_mode
== RA_ACCEPTED
) ||
2077 (c
->ra_mode
== RA_MERGED
))
2078 if (rte_is_valid(e
))
2080 /* In the meantime, the protocol may fell down */
2081 if (c
->export_state
!= ES_FEEDING
)
2084 do_feed_channel(c
, n
, e
);
2088 if (c
->ra_mode
== RA_ANY
)
2089 for(e
= n
->routes
; e
; e
= e
->next
)
2091 /* In the meantime, the protocol may fell down */
2092 if (c
->export_state
!= ES_FEEDING
)
2095 if (!rte_is_valid(e
))
2098 do_feed_channel(c
, n
, e
);
2110 * rt_feed_baby_abort - abort protocol feeding
2113 * This function is called by the protocol code when the protocol stops or
2114 * ceases to exist during the feeding.
2117 rt_feed_channel_abort(struct channel
*c
)
2121 /* Unlink the iterator */
2122 fit_get(&c
->table
->fib
, &c
->feed_fit
);
2127 static inline unsigned
2130 uintptr_t p
= (uintptr_t) ptr
;
2131 return p
^ (p
<< 8) ^ (p
>> 16);
2135 hc_hash(ip_addr a
, rtable
*dep
)
2137 return ipa_hash(a
) ^ ptr_hash(dep
);
2141 hc_insert(struct hostcache
*hc
, struct hostentry
*he
)
2143 uint k
= he
->hash_key
>> hc
->hash_shift
;
2144 he
->next
= hc
->hash_table
[k
];
2145 hc
->hash_table
[k
] = he
;
2149 hc_remove(struct hostcache
*hc
, struct hostentry
*he
)
2151 struct hostentry
**hep
;
2152 uint k
= he
->hash_key
>> hc
->hash_shift
;
2154 for (hep
= &hc
->hash_table
[k
]; *hep
!= he
; hep
= &(*hep
)->next
);
2158 #define HC_DEF_ORDER 10
2159 #define HC_HI_MARK *4
2160 #define HC_HI_STEP 2
2161 #define HC_HI_ORDER 16 /* Must be at most 16 */
2162 #define HC_LO_MARK /5
2163 #define HC_LO_STEP 2
2164 #define HC_LO_ORDER 10
2167 hc_alloc_table(struct hostcache
*hc
, unsigned order
)
2169 unsigned hsize
= 1 << order
;
2170 hc
->hash_order
= order
;
2171 hc
->hash_shift
= 32 - order
;
2172 hc
->hash_max
= (order
>= HC_HI_ORDER
) ? ~0 : (hsize HC_HI_MARK
);
2173 hc
->hash_min
= (order
<= HC_LO_ORDER
) ? 0 : (hsize HC_LO_MARK
);
2175 hc
->hash_table
= mb_allocz(rt_table_pool
, hsize
* sizeof(struct hostentry
*));
2179 hc_resize(struct hostcache
*hc
, unsigned new_order
)
2181 unsigned old_size
= 1 << hc
->hash_order
;
2182 struct hostentry
**old_table
= hc
->hash_table
;
2183 struct hostentry
*he
, *hen
;
2186 hc_alloc_table(hc
, new_order
);
2187 for (i
= 0; i
< old_size
; i
++)
2188 for (he
= old_table
[i
]; he
!= NULL
; he
=hen
)
2196 static struct hostentry
*
2197 hc_new_hostentry(struct hostcache
*hc
, ip_addr a
, ip_addr ll
, rtable
*dep
, unsigned k
)
2199 struct hostentry
*he
= sl_alloc(hc
->slab
);
2208 add_tail(&hc
->hostentries
, &he
->ln
);
2212 if (hc
->hash_items
> hc
->hash_max
)
2213 hc_resize(hc
, hc
->hash_order
+ HC_HI_STEP
);
2219 hc_delete_hostentry(struct hostcache
*hc
, struct hostentry
*he
)
2225 sl_free(hc
->slab
, he
);
2228 if (hc
->hash_items
< hc
->hash_min
)
2229 hc_resize(hc
, hc
->hash_order
- HC_LO_STEP
);
2233 rt_init_hostcache(rtable
*tab
)
2235 struct hostcache
*hc
= mb_allocz(rt_table_pool
, sizeof(struct hostcache
));
2236 init_list(&hc
->hostentries
);
2239 hc_alloc_table(hc
, HC_DEF_ORDER
);
2240 hc
->slab
= sl_new(rt_table_pool
, sizeof(struct hostentry
));
2242 hc
->lp
= lp_new(rt_table_pool
, 1008);
2243 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2245 tab
->hostcache
= hc
;
2249 rt_free_hostcache(rtable
*tab
)
2251 struct hostcache
*hc
= tab
->hostcache
;
2254 WALK_LIST(n
, hc
->hostentries
)
2256 struct hostentry
*he
= SKIP_BACK(struct hostentry
, ln
, n
);
2260 log(L_ERR
"Hostcache is not empty in table %s", tab
->name
);
2265 mb_free(hc
->hash_table
);
2270 rt_notify_hostcache(rtable
*tab
, net
*net
)
2272 if (tab
->hcu_scheduled
)
2275 if (trie_match_net(tab
->hostcache
->trie
, net
->n
.addr
))
2276 rt_schedule_hcu(tab
);
2280 if_local_addr(ip_addr a
, struct iface
*i
)
2284 WALK_LIST(b
, i
->addrs
)
2285 if (ipa_equal(a
, b
->ip
))
2292 rt_get_igp_metric(rte
*rt
)
2294 eattr
*ea
= ea_find(rt
->attrs
->eattrs
, EA_GEN_IGP_METRIC
);
2302 if ((a
->source
== RTS_OSPF
) ||
2303 (a
->source
== RTS_OSPF_IA
) ||
2304 (a
->source
== RTS_OSPF_EXT1
))
2305 return rt
->u
.ospf
.metric1
;
2309 if (a
->source
== RTS_RIP
)
2310 return rt
->u
.rip
.metric
;
2314 if ((a
->dest
!= RTD_ROUTER
) && (a
->dest
!= RTD_MULTIPATH
))
2317 return IGP_METRIC_UNKNOWN
;
2321 rt_update_hostentry(rtable
*tab
, struct hostentry
*he
)
2323 rta
*old_src
= he
->src
;
2326 /* Reset the hostentry */
2329 he
->dest
= RTD_UNREACHABLE
;
2333 net_fill_ip_host(&he_addr
, he
->addr
);
2334 net
*n
= net_route(tab
, &he_addr
);
2339 pxlen
= n
->n
.addr
->pxlen
;
2343 /* Recursive route should not depend on another recursive route */
2344 log(L_WARN
"Next hop address %I resolvable through recursive route for %N",
2345 he
->addr
, n
->n
.addr
);
2349 if (a
->dest
== RTD_DEVICE
)
2351 if (if_local_addr(he
->addr
, a
->iface
))
2353 /* The host address is a local address, this is not valid */
2354 log(L_WARN
"Next hop address %I is a local address of iface %s",
2355 he
->addr
, a
->iface
->name
);
2359 /* The host is directly reachable, use link as a gateway */
2361 he
->dest
= RTD_ROUTER
;
2365 /* The host is reachable through some route entry */
2370 he
->src
= rta_clone(a
);
2371 he
->igp_metric
= rt_get_igp_metric(e
);
2375 /* Add a prefix range to the trie */
2376 trie_add_prefix(tab
->hostcache
->trie
, &he_addr
, pxlen
, he_addr
.pxlen
);
2379 return old_src
!= he
->src
;
2383 rt_update_hostcache(rtable
*tab
)
2385 struct hostcache
*hc
= tab
->hostcache
;
2386 struct hostentry
*he
;
2389 /* Reset the trie */
2391 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2393 WALK_LIST_DELSAFE(n
, x
, hc
->hostentries
)
2395 he
= SKIP_BACK(struct hostentry
, ln
, n
);
2398 hc_delete_hostentry(hc
, he
);
2402 if (rt_update_hostentry(tab
, he
))
2403 rt_schedule_nhu(he
->tab
);
2406 tab
->hcu_scheduled
= 0;
2409 static struct hostentry
*
2410 rt_get_hostentry(rtable
*tab
, ip_addr a
, ip_addr ll
, rtable
*dep
)
2412 struct hostentry
*he
;
2414 if (!tab
->hostcache
)
2415 rt_init_hostcache(tab
);
2417 u32 k
= hc_hash(a
, dep
);
2418 struct hostcache
*hc
= tab
->hostcache
;
2419 for (he
= hc
->hash_table
[k
>> hc
->hash_shift
]; he
!= NULL
; he
= he
->next
)
2420 if (ipa_equal(he
->addr
, a
) && (he
->tab
== dep
))
2423 he
= hc_new_hostentry(hc
, a
, ll
, dep
, k
);
2424 rt_update_hostentry(tab
, he
);
2429 rta_set_recursive_next_hop(rtable
*dep
, rta
*a
, rtable
*tab
, ip_addr
*gw
, ip_addr
*ll
)
2431 rta_apply_hostentry(a
, rt_get_hostentry(tab
, *gw
, *ll
, dep
));
2440 rt_format_via(rte
*e
)
2444 /* Max text length w/o IP addr and interface name is 16 */
2445 static byte via
[IPA_MAX_TEXT_LENGTH
+sizeof(a
->iface
->name
)+16];
2449 case RTD_ROUTER
: bsprintf(via
, "via %I on %s", a
->gw
, a
->iface
->name
); break;
2450 case RTD_DEVICE
: bsprintf(via
, "dev %s", a
->iface
->name
); break;
2451 case RTD_BLACKHOLE
: bsprintf(via
, "blackhole"); break;
2452 case RTD_UNREACHABLE
: bsprintf(via
, "unreachable"); break;
2453 case RTD_PROHIBIT
: bsprintf(via
, "prohibited"); break;
2454 case RTD_MULTIPATH
: bsprintf(via
, "multipath"); break;
2455 default: bsprintf(via
, "???");
2461 rt_show_rte(struct cli
*c
, byte
*ia
, rte
*e
, struct rt_show_data
*d
, ea_list
*tmpa
)
2463 byte from
[IPA_MAX_TEXT_LENGTH
+8];
2464 byte tm
[TM_DATETIME_BUFFER_SIZE
], info
[256];
2466 int primary
= (e
->net
->routes
== e
);
2467 int sync_error
= (e
->net
->n
.flags
& KRF_SYNC_ERROR
);
2468 void (*get_route_info
)(struct rte
*, byte
*buf
, struct ea_list
*attrs
);
2471 tm_format_datetime(tm
, &config
->tf_route
, e
->lastmod
);
2472 if (ipa_nonzero(a
->from
) && !ipa_equal(a
->from
, a
->gw
))
2473 bsprintf(from
, " from %I", a
->from
);
2477 get_route_info
= a
->src
->proto
->proto
->get_route_info
;
2478 if (get_route_info
|| d
->verbose
)
2480 /* Need to normalize the extended attributes */
2482 t
= ea_append(t
, a
->eattrs
);
2483 tmpa
= alloca(ea_scan(t
));
2488 get_route_info(e
, info
, tmpa
);
2490 bsprintf(info
, " (%d)", e
->pref
);
2491 cli_printf(c
, -1007, "%-18s %s [%s %s%s]%s%s", ia
, rt_format_via(e
), a
->src
->proto
->name
,
2492 tm
, from
, primary
? (sync_error
? " !" : " *") : "", info
);
2493 for (nh
= a
->nexthops
; nh
; nh
= nh
->next
)
2494 cli_printf(c
, -1007, "\tvia %I on %s weight %d", nh
->gw
, nh
->iface
->name
, nh
->weight
+ 1);
2496 rta_show(c
, a
, tmpa
);
2500 rt_show_net(struct cli
*c
, net
*n
, struct rt_show_data
*d
)
2503 byte ia
[NET_MAX_TEXT_LENGTH
+1];
2504 struct ea_list
*tmpa
;
2505 struct channel
*ec
= d
->export_channel
;
2509 bsprintf(ia
, "%N", n
->n
.addr
);
2512 for (e
= n
->routes
; e
; e
= e
->next
)
2514 if (rte_is_filtered(e
) != d
->filtered
)
2518 d
->net_counter
+= first
;
2525 rte_update_lock(); /* We use the update buffer for filtering */
2526 tmpa
= make_tmp_attrs(e
, rte_update_pool
);
2528 /* Special case for merged export */
2529 if ((d
->export_mode
== RSEM_EXPORT
) && (ec
->ra_mode
== RA_MERGED
))
2532 e
= rt_export_merged(ec
, n
, &rt_free
, &tmpa
, rte_update_pool
, 1);
2536 { e
= ee
; goto skip
; }
2538 else if (d
->export_mode
)
2540 struct proto
*ep
= d
->export_protocol
;
2541 int ic
= ep
->import_control
? ep
->import_control(ep
, &e
, &tmpa
, rte_update_pool
) : 0;
2543 if (ec
->ra_mode
== RA_OPTIMAL
|| ec
->ra_mode
== RA_MERGED
)
2549 if (d
->export_mode
> RSEM_PREEXPORT
)
2552 * FIXME - This shows what should be exported according to current
2553 * filters, but not what was really exported. 'configure soft'
2554 * command may change the export filter and do not update routes.
2556 int do_export
= (ic
> 0) ||
2557 (f_run(ec
->out_filter
, &e
, &tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) <= F_ACCEPT
);
2559 if (do_export
!= (d
->export_mode
== RSEM_EXPORT
))
2562 if ((d
->export_mode
== RSEM_EXPORT
) && (ec
->ra_mode
== RA_ACCEPTED
))
2567 if (d
->show_protocol
&& (d
->show_protocol
!= e
->attrs
->src
->proto
))
2570 if (f_run(d
->filter
, &e
, &tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) > F_ACCEPT
)
2575 rt_show_rte(c
, ia
, e
, d
, tmpa
);
2584 rte_update_unlock();
2586 if (d
->primary_only
)
2591 static struct channel
*
2592 rt_show_export_channel(struct rt_show_data
*d
)
2594 if (! d
->export_protocol
->rt_notify
)
2597 return proto_find_channel_by_table(d
->export_protocol
, d
->table
);
2601 rt_show_cont(struct cli
*c
)
2603 struct rt_show_data
*d
= c
->rover
;
2609 struct fib
*fib
= &d
->table
->fib
;
2610 struct fib_iterator
*it
= &d
->fit
;
2614 /* Ensure we have current export channel */
2615 d
->export_channel
= rt_show_export_channel(d
);
2616 if (!d
->export_channel
|| (d
->export_channel
->export_state
== ES_DOWN
))
2618 cli_printf(c
, 8005, "Channel is down");
2623 FIB_ITERATE_START(fib
, it
, net
, n
)
2627 FIB_ITERATE_PUT(it
);
2630 rt_show_net(c
, n
, d
);
2634 cli_printf(c
, 14, "%d of %d routes for %d networks", d
->show_counter
, d
->rt_counter
, d
->net_counter
);
2636 cli_printf(c
, 0, "");
2638 c
->cont
= c
->cleanup
= NULL
;
2642 rt_show_cleanup(struct cli
*c
)
2644 struct rt_show_data
*d
= c
->rover
;
2646 /* Unlink the iterator */
2647 fit_get(&d
->table
->fib
, &d
->fit
);
2650 static inline rtable
*
2651 rt_show_get_table(struct proto
*p
)
2653 /* FIXME: Use a better way to handle multi-channel protocols */
2655 if (p
->main_channel
)
2656 return p
->main_channel
->table
;
2658 if (!EMPTY_LIST(p
->channels
))
2659 return ((struct channel
*) HEAD(p
->channels
))->table
;
2665 rt_show(struct rt_show_data
*d
)
2669 /* Default is either a master table or a table related to a respective protocol */
2670 if (!d
->table
&& d
->export_protocol
) d
->table
= rt_show_get_table(d
->export_protocol
);
2671 if (!d
->table
&& d
->show_protocol
) d
->table
= rt_show_get_table(d
->show_protocol
);
2672 if (!d
->table
) d
->table
= config
->def_tables
[NET_IP4
]->table
; /* FIXME: iterate through all tables ? */
2674 /* Filtered routes are neither exported nor have sensible ordering */
2675 if (d
->filtered
&& (d
->export_mode
|| d
->primary_only
))
2680 FIB_ITERATE_INIT(&d
->fit
, &d
->table
->fib
);
2681 this_cli
->cont
= rt_show_cont
;
2682 this_cli
->cleanup
= rt_show_cleanup
;
2683 this_cli
->rover
= d
;
2689 /* Find channel associated with the export protocol */
2690 d
->export_channel
= rt_show_export_channel(d
);
2691 if (!d
->export_channel
|| (d
->export_channel
->export_state
== ES_DOWN
))
2693 cli_msg(8005, "Channel is down");
2699 n
= net_route(d
->table
, d
->addr
);
2701 n
= net_find(d
->table
, d
->addr
);
2704 rt_show_net(this_cli
, n
, d
);
2709 cli_msg(8001, "Network not in table");
2714 * Documentation for functions declared inline in route.h
2719 * net_find - find a network entry
2720 * @tab: a routing table
2721 * @addr: address of the network
2723 * net_find() looks up the given network in routing table @tab and
2724 * returns a pointer to its &net entry or %NULL if no such network
2727 static inline net
*net_find(rtable
*tab
, net_addr
*addr
)
2731 * net_get - obtain a network entry
2732 * @tab: a routing table
2733 * @addr: address of the network
2735 * net_get() looks up the given network in routing table @tab and
2736 * returns a pointer to its &net entry. If no such entry exists, it's
2739 static inline net
*net_get(rtable
*tab
, net_addr
*addr
)
2743 * rte_cow - copy a route for writing
2744 * @r: a route entry to be copied
2746 * rte_cow() takes a &rte and prepares it for modification. The exact action
2747 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2748 * just returned unchanged, else a new temporary entry with the same contents
2751 * The primary use of this function is inside the filter machinery -- when
2752 * a filter wants to modify &rte contents (to change the preference or to
2753 * attach another set of attributes), it must ensure that the &rte is not
2754 * shared with anyone else (and especially that it isn't stored in any routing
2757 * Result: a pointer to the new writable &rte.
2759 static inline rte
* rte_cow(rte
*r
)