]>
git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
2 * BIRD -- Routing Tables
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
37 #include "nest/iface.h"
38 #include "lib/resource.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
48 static slab
*rte_slab
;
49 static linpool
*rte_update_pool
;
51 static list routing_tables
;
53 static void rt_format_via(rte
*e
, byte
*via
);
54 static void rt_free_hostcache(rtable
*tab
);
55 static void rt_notify_hostcache(rtable
*tab
, net
*net
);
56 static void rt_update_hostcache(rtable
*tab
);
57 static void rt_next_hop_update(rtable
*tab
);
58 static inline void rt_prune_table(rtable
*tab
);
61 static inline struct ea_list
*
62 make_tmp_attrs(struct rte
*rt
, struct linpool
*pool
)
64 struct ea_list
*(*mta
)(struct rte
*rt
, struct linpool
*pool
);
65 mta
= rt
->attrs
->src
->proto
->make_tmp_attrs
;
66 return mta
? mta(rt
, rte_update_pool
) : NULL
;
70 /* Like fib_route(), but skips empty net entries */
72 net_route_ip4(struct fib
*f
, net_addr_ip4
*n
)
76 while (r
= fib_find(f
, (net_addr
*) n
),
77 !(r
&& rte_is_valid(r
->routes
)) && (n
->pxlen
> 0))
80 ip4_clrbit(&n
->prefix
, n
->pxlen
);
87 net_route_ip6(struct fib
*f
, net_addr_ip6
*n
)
91 while (r
= fib_find(f
, (net_addr
*) n
),
92 !(r
&& rte_is_valid(r
->routes
)) && (n
->pxlen
> 0))
95 ip6_clrbit(&n
->prefix
, n
->pxlen
);
102 net_route(rtable
*tab
, const net_addr
*n
)
104 ASSERT(tab
->addr_type
== n
->type
);
106 net_addr
*n0
= alloca(n
->length
);
114 return net_route_ip4(&tab
->fib
, (net_addr_ip4
*) n0
);
119 return net_route_ip6(&tab
->fib
, (net_addr_ip6
*) n0
);
128 net_roa_check_ip4(rtable
*tab
, const net_addr_ip4
*px
, u32 asn
)
130 struct net_addr_roa4 n
= NET_ADDR_ROA4(px
->prefix
, px
->pxlen
, 0, 0);
136 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
138 net_addr_roa4
*roa
= (void *) fn
->addr
;
139 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
141 if (net_equal_prefix_roa4(roa
, &n
) && rte_is_valid(r
->routes
))
144 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
153 ip4_clrbit(&n
.prefix
, n
.pxlen
);
156 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
160 net_roa_check_ip6(rtable
*tab
, const net_addr_ip6
*px
, u32 asn
)
162 struct net_addr_roa6 n
= NET_ADDR_ROA6(px
->prefix
, px
->pxlen
, 0, 0);
168 for (fn
= fib_get_chain(&tab
->fib
, (net_addr
*) &n
); fn
; fn
= fn
->next
)
170 net_addr_roa6
*roa
= (void *) fn
->addr
;
171 net
*r
= fib_node_to_user(&tab
->fib
, fn
);
173 if (net_equal_prefix_roa6(roa
, &n
) && rte_is_valid(r
->routes
))
176 if (asn
&& (roa
->asn
== asn
) && (roa
->max_pxlen
>= px
->pxlen
))
185 ip6_clrbit(&n
.prefix
, n
.pxlen
);
188 return anything
? ROA_INVALID
: ROA_UNKNOWN
;
192 * roa_check - check validity of route origination in a ROA table
194 * @n: network prefix to check
195 * @asn: AS number of network prefix
197 * Implements RFC 6483 route validation for the given network prefix. The
198 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
199 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
200 * a candidate ROA with matching ASN and maxlen field greater than or equal to
201 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
202 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
203 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
204 * must have type NET_IP4 or NET_IP6, respectively.
207 net_roa_check(rtable
*tab
, const net_addr
*n
, u32 asn
)
209 if ((tab
->addr_type
== NET_ROA4
) && (n
->type
== NET_IP4
))
210 return net_roa_check_ip4(tab
, (const net_addr_ip4
*) n
, asn
);
211 else if ((tab
->addr_type
== NET_ROA6
) && (n
->type
== NET_IP6
))
212 return net_roa_check_ip6(tab
, (const net_addr_ip6
*) n
, asn
);
214 return ROA_UNKNOWN
; /* Should not happen */
218 * rte_find - find a route
222 * The rte_find() function returns a route for destination @net
223 * which is from route source @src.
226 rte_find(net
*net
, struct rte_src
*src
)
228 rte
*e
= net
->routes
;
230 while (e
&& e
->attrs
->src
!= src
)
236 * rte_get_temp - get a temporary &rte
237 * @a: attributes to assign to the new route (a &rta; in case it's
238 * un-cached, rte_update() will create a cached copy automatically)
240 * Create a temporary &rte and bind it with the attributes @a.
241 * Also set route preference to the default preference set for
247 rte
*e
= sl_alloc(rte_slab
);
258 rte
*e
= sl_alloc(rte_slab
);
260 memcpy(e
, r
, sizeof(rte
));
261 e
->attrs
= rta_clone(r
->attrs
);
267 * rte_cow_rta - get a private writable copy of &rte with writable &rta
268 * @r: a route entry to be copied
269 * @lp: a linpool from which to allocate &rta
271 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
272 * modification. There are three possibilities: First, both &rte and &rta are
273 * private copies, in that case they are returned unchanged. Second, &rte is
274 * private copy, but &rta is cached, in that case &rta is duplicated using
275 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
276 * both structures are duplicated by rte_do_cow() and rta_do_cow().
278 * Note that in the second case, cached &rta loses one reference, while private
279 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
280 * nexthops, ...) with it. To work properly, original shared &rta should have
281 * another reference during the life of created private copy.
283 * Result: a pointer to the new writable &rte with writable &rta.
286 rte_cow_rta(rte
*r
, linpool
*lp
)
288 if (!rta_is_cached(r
->attrs
))
292 rta
*a
= rta_do_cow(r
->attrs
, lp
);
298 static int /* Actually better or at least as good as */
299 rte_better(rte
*new, rte
*old
)
301 int (*better
)(rte
*, rte
*);
303 if (!rte_is_valid(old
))
305 if (!rte_is_valid(new))
308 if (new->pref
> old
->pref
)
310 if (new->pref
< old
->pref
)
312 if (new->attrs
->src
->proto
->proto
!= old
->attrs
->src
->proto
->proto
)
315 * If the user has configured protocol preferences, so that two different protocols
316 * have the same preference, try to break the tie by comparing addresses. Not too
317 * useful, but keeps the ordering of routes unambiguous.
319 return new->attrs
->src
->proto
->proto
> old
->attrs
->src
->proto
->proto
;
321 if (better
= new->attrs
->src
->proto
->rte_better
)
322 return better(new, old
);
327 rte_mergable(rte
*pri
, rte
*sec
)
329 int (*mergable
)(rte
*, rte
*);
331 if (!rte_is_valid(pri
) || !rte_is_valid(sec
))
334 if (pri
->pref
!= sec
->pref
)
337 if (pri
->attrs
->src
->proto
->proto
!= sec
->attrs
->src
->proto
->proto
)
340 if (mergable
= pri
->attrs
->src
->proto
->rte_mergable
)
341 return mergable(pri
, sec
);
347 rte_trace(struct proto
*p
, rte
*e
, int dir
, char *msg
)
349 byte via
[IPA_MAX_TEXT_LENGTH
+32];
351 rt_format_via(e
, via
);
352 log(L_TRACE
"%s %c %s %N %s", p
->name
, dir
, msg
, e
->net
->n
.addr
, via
);
356 rte_trace_in(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
359 rte_trace(p
, e
, '>', msg
);
363 rte_trace_out(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
366 rte_trace(p
, e
, '<', msg
);
370 export_filter(struct channel
*c
, rte
*rt0
, rte
**rt_free
, ea_list
**tmpa
, int silent
)
372 struct proto
*p
= c
->proto
;
373 struct filter
*filter
= c
->out_filter
;
374 struct proto_stats
*stats
= &c
->stats
;
375 ea_list
*tmpb
= NULL
;
385 *tmpa
= make_tmp_attrs(rt
, rte_update_pool
);
387 v
= p
->import_control
? p
->import_control(p
, &rt
, tmpa
, rte_update_pool
) : 0;
393 stats
->exp_updates_rejected
++;
395 rte_trace_out(D_FILTERS
, p
, rt
, "rejected by protocol");
401 rte_trace_out(D_FILTERS
, p
, rt
, "forced accept by protocol");
405 v
= filter
&& ((filter
== FILTER_REJECT
) ||
406 (f_run(filter
, &rt
, tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) > F_ACCEPT
));
412 stats
->exp_updates_filtered
++;
413 rte_trace_out(D_FILTERS
, p
, rt
, "filtered out");
423 /* Discard temporary rte */
430 do_rt_notify(struct channel
*c
, net
*net
, rte
*new, rte
*old
, ea_list
*tmpa
, int refeed
)
432 struct proto
*p
= c
->proto
;
433 struct proto_stats
*stats
= &c
->stats
;
437 * First, apply export limit.
439 * Export route limits has several problems. Because exp_routes
440 * counter is reset before refeed, we don't really know whether
441 * limit is breached and whether the update is new or not. Therefore
442 * the number of really exported routes may exceed the limit
443 * temporarily (routes exported before and new routes in refeed).
445 * Minor advantage is that if the limit is decreased and refeed is
446 * requested, the number of exported routes really decrease.
448 * Second problem is that with export limits, we don't know whether
449 * old was really exported (it might be blocked by limit). When a
450 * withdraw is exported, we announce it even when the previous
451 * update was blocked. This is not a big issue, but the same problem
452 * is in updating exp_routes counter. Therefore, to be consistent in
453 * increases and decreases of exp_routes, we count exported routes
454 * regardless of blocking by limits.
456 * Similar problem is in handling updates - when a new route is
457 * received and blocking is active, the route would be blocked, but
458 * when an update for the route will be received later, the update
459 * would be propagated (as old != NULL). Therefore, we have to block
460 * also non-new updates (contrary to import blocking).
463 struct channel_limit
*l
= &c
->out_limit
;
464 if (l
->action
&& new)
466 if ((!old
|| refeed
) && (stats
->exp_routes
>= l
->limit
))
467 channel_notify_limit(c
, l
, PLD_OUT
, stats
->exp_routes
);
469 if (l
->state
== PLS_BLOCKED
)
471 stats
->exp_routes
++; /* see note above */
472 stats
->exp_updates_rejected
++;
473 rte_trace_out(D_FILTERS
, p
, new, "rejected [limit]");
483 stats
->exp_updates_accepted
++;
485 stats
->exp_withdraws_accepted
++;
487 /* Hack: We do not decrease exp_routes during refeed, we instead
488 reset exp_routes at the start of refeed. */
494 if (p
->debug
& D_ROUTES
)
497 rte_trace_out(D_ROUTES
, p
, new, "replaced");
499 rte_trace_out(D_ROUTES
, p
, new, "added");
501 rte_trace_out(D_ROUTES
, p
, old
, "removed");
504 p
->rt_notify(p
, c
, net
, NULL
, old
, NULL
);
510 t
->next
= new->attrs
->eattrs
;
511 p
->rt_notify(p
, c
, net
, new, old
, tmpa
);
515 p
->rt_notify(p
, c
, net
, new, old
, new->attrs
->eattrs
);
519 rt_notify_basic(struct channel
*c
, net
*net
, rte
*new0
, rte
*old0
, int refeed
)
521 struct proto
*p
= c
->proto
;
525 rte
*new_free
= NULL
;
526 rte
*old_free
= NULL
;
527 ea_list
*tmpa
= NULL
;
530 c
->stats
.exp_updates_received
++;
532 c
->stats
.exp_withdraws_received
++;
535 * This is a tricky part - we don't know whether route 'old' was
536 * exported to protocol 'p' or was filtered by the export filter.
537 * We try to run the export filter to know this to have a correct
538 * value in 'old' argument of rte_update (and proper filter value)
540 * FIXME - this is broken because 'configure soft' may change
541 * filters but keep routes. Refeed is expected to be called after
542 * change of the filters and with old == new, therefore we do not
543 * even try to run the filter on an old route, This may lead to
544 * 'spurious withdraws' but ensure that there are no 'missing
547 * This is not completely safe as there is a window between
548 * reconfiguration and the end of refeed - if a newly filtered
549 * route disappears during this period, proper withdraw is not
550 * sent (because old would be also filtered) and the route is
551 * not refeeded (because it disappeared before that).
555 new = export_filter(c
, new, &new_free
, &tmpa
, 0);
558 old
= export_filter(c
, old
, &old_free
, NULL
, 1);
563 * As mentioned above, 'old' value may be incorrect in some race conditions.
564 * We generally ignore it with the exception of withdraw to pipe protocol.
565 * In that case we rather propagate unfiltered withdraws regardless of
566 * export filters to ensure that when a protocol is flushed, its routes are
567 * removed from all tables. Possible spurious unfiltered withdraws are not
568 * problem here as they are ignored if there is no corresponding route at
569 * the other end of the pipe. We directly call rt_notify() hook instead of
570 * do_rt_notify() to avoid logging and stat counters.
574 if ((p
->proto
== &proto_pipe
) && !new0
&& (p
!= old0
->sender
->proto
))
575 p
->rt_notify(p
, c
, net
, NULL
, old0
, NULL
);
581 do_rt_notify(c
, net
, new, old
, tmpa
, refeed
);
583 /* Discard temporary rte's */
591 rt_notify_accepted(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
, rte
*before_old
, int feed
)
593 // struct proto *p = c->proto;
596 rte
*new_best
= NULL
;
597 rte
*old_best
= NULL
;
598 rte
*new_free
= NULL
;
599 rte
*old_free
= NULL
;
600 ea_list
*tmpa
= NULL
;
602 /* Used to track whether we met old_changed position. If before_old is NULL
603 old_changed was the first and we met it implicitly before current best route. */
604 int old_meet
= old_changed
&& !before_old
;
606 /* Note that before_old is either NULL or valid (not rejected) route.
607 If old_changed is valid, before_old have to be too. If old changed route
608 was not valid, caller must use NULL for both old_changed and before_old. */
611 c
->stats
.exp_updates_received
++;
613 c
->stats
.exp_withdraws_received
++;
615 /* First, find the new_best route - first accepted by filters */
616 for (r
=net
->routes
; rte_is_valid(r
); r
=r
->next
)
618 if (new_best
= export_filter(c
, r
, &new_free
, &tmpa
, 0))
621 /* Note if we walked around the position of old_changed route */
627 * Second, handle the feed case. That means we do not care for
628 * old_best. It is NULL for feed, and the new_best for refeed.
629 * For refeed, there is a hack similar to one in rt_notify_basic()
630 * to ensure withdraws in case of changed filters
634 if (feed
== 2) /* refeed */
635 old_best
= new_best
? new_best
:
636 (rte_is_valid(net
->routes
) ? net
->routes
: NULL
);
640 if (!new_best
&& !old_best
)
647 * Now, we find the old_best route. Generally, it is the same as the
648 * new_best, unless new_best is the same as new_changed or
649 * old_changed is accepted before new_best.
651 * There are four cases:
653 * - We would find and accept old_changed before new_best, therefore
654 * old_changed is old_best. In remaining cases we suppose this
657 * - We found no new_best, therefore there is also no old_best and
658 * we ignore this withdraw.
660 * - We found new_best different than new_changed, therefore
661 * old_best is the same as new_best and we ignore this update.
663 * - We found new_best the same as new_changed, therefore it cannot
664 * be old_best and we have to continue search for old_best.
669 if (old_best
= export_filter(c
, old_changed
, &old_free
, NULL
, 1))
676 /* Third case, we use r instead of new_best, because export_filter() could change it */
677 if (r
!= new_changed
)
685 for (r
=r
->next
; rte_is_valid(r
); r
=r
->next
)
687 if (old_best
= export_filter(c
, r
, &old_free
, NULL
, 1))
691 if (old_best
= export_filter(c
, old_changed
, &old_free
, NULL
, 1))
695 /* Implicitly, old_best is NULL and new_best is non-NULL */
698 do_rt_notify(c
, net
, new_best
, old_best
, tmpa
, (feed
== 2));
700 /* Discard temporary rte's */
709 mpnh_merge_rta(struct mpnh
*nhs
, rta
*a
, int max
)
711 struct mpnh nh
= { .gw
= a
->gw
, .iface
= a
->iface
};
712 struct mpnh
*nh2
= (a
->dest
== RTD_MULTIPATH
) ? a
->nexthops
: &nh
;
713 return mpnh_merge(nhs
, nh2
, 1, 0, max
, rte_update_pool
);
717 rt_export_merged(struct channel
*c
, net
*net
, rte
**rt_free
, ea_list
**tmpa
, int silent
)
719 // struct proto *p = c->proto;
720 struct mpnh
*nhs
= NULL
;
721 rte
*best0
, *best
, *rt0
, *rt
, *tmp
;
726 if (!rte_is_valid(best0
))
729 best
= export_filter(c
, best0
, rt_free
, tmpa
, silent
);
731 if (!best
|| !rte_is_reachable(best
))
734 for (rt0
= best0
->next
; rt0
; rt0
= rt0
->next
)
736 if (!rte_mergable(best0
, rt0
))
739 rt
= export_filter(c
, rt0
, &tmp
, NULL
, 1);
744 if (rte_is_reachable(rt
))
745 nhs
= mpnh_merge_rta(nhs
, rt
->attrs
, c
->merge_limit
);
753 nhs
= mpnh_merge_rta(nhs
, best
->attrs
, c
->merge_limit
);
757 best
= rte_cow_rta(best
, rte_update_pool
);
758 best
->attrs
->dest
= RTD_MULTIPATH
;
759 best
->attrs
->nexthops
= nhs
;
771 rt_notify_merged(struct channel
*c
, net
*net
, rte
*new_changed
, rte
*old_changed
,
772 rte
*new_best
, rte
*old_best
, int refeed
)
774 // struct proto *p = c->proto;
776 rte
*new_best_free
= NULL
;
777 rte
*old_best_free
= NULL
;
778 rte
*new_changed_free
= NULL
;
779 rte
*old_changed_free
= NULL
;
780 ea_list
*tmpa
= NULL
;
782 /* We assume that all rte arguments are either NULL or rte_is_valid() */
784 /* This check should be done by the caller */
785 if (!new_best
&& !old_best
)
788 /* Check whether the change is relevant to the merged route */
789 if ((new_best
== old_best
) && !refeed
)
791 new_changed
= rte_mergable(new_best
, new_changed
) ?
792 export_filter(c
, new_changed
, &new_changed_free
, NULL
, 1) : NULL
;
794 old_changed
= rte_mergable(old_best
, old_changed
) ?
795 export_filter(c
, old_changed
, &old_changed_free
, NULL
, 1) : NULL
;
797 if (!new_changed
&& !old_changed
)
802 c
->stats
.exp_updates_received
++;
804 c
->stats
.exp_withdraws_received
++;
806 /* Prepare new merged route */
808 new_best
= rt_export_merged(c
, net
, &new_best_free
, &tmpa
, 0);
810 /* Prepare old merged route (without proper merged next hops) */
811 /* There are some issues with running filter on old route - see rt_notify_basic() */
812 if (old_best
&& !refeed
)
813 old_best
= export_filter(c
, old_best
, &old_best_free
, NULL
, 1);
815 if (new_best
|| old_best
)
816 do_rt_notify(c
, net
, new_best
, old_best
, tmpa
, refeed
);
818 /* Discard temporary rte's */
820 rte_free(new_best_free
);
822 rte_free(old_best_free
);
823 if (new_changed_free
)
824 rte_free(new_changed_free
);
825 if (old_changed_free
)
826 rte_free(old_changed_free
);
831 * rte_announce - announce a routing table change
832 * @tab: table the route has been added to
833 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
834 * @net: network in question
835 * @new: the new route to be announced
836 * @old: the previous route for the same network
838 * This function gets a routing table update and announces it
839 * to all protocols that acccepts given type of route announcement
840 * and are connected to the same table by their announcement hooks.
842 * Route announcement of type RA_OPTIMAL si generated when optimal
843 * route (in routing table @tab) changes. In that case @old stores the
846 * Route announcement of type RA_ANY si generated when any route (in
847 * routing table @tab) changes In that case @old stores the old route
848 * from the same protocol.
850 * For each appropriate protocol, we first call its import_control()
851 * hook which performs basic checks on the route (each protocol has a
852 * right to veto or force accept of the route before any filter is
853 * asked) and adds default values of attributes specific to the new
854 * protocol (metrics, tags etc.). Then it consults the protocol's
855 * export filter and if it accepts the route, the rt_notify() hook of
856 * the protocol gets called.
859 rte_announce(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
860 rte
*new_best
, rte
*old_best
, rte
*before_old
)
862 if (!rte_is_valid(new))
865 if (!rte_is_valid(old
))
866 old
= before_old
= NULL
;
868 if (!rte_is_valid(new_best
))
871 if (!rte_is_valid(old_best
))
877 if ((type
== RA_OPTIMAL
) && tab
->hostcache
)
878 rt_notify_hostcache(tab
, net
);
880 struct channel
*c
; node
*n
;
881 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
883 if (c
->export_state
== ES_DOWN
)
886 if (c
->ra_mode
== type
)
887 if (type
== RA_ACCEPTED
)
888 rt_notify_accepted(c
, net
, new, old
, before_old
, 0);
889 else if (type
== RA_MERGED
)
890 rt_notify_merged(c
, net
, new, old
, new_best
, old_best
, 0);
892 rt_notify_basic(c
, net
, new, old
, 0);
902 // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
903 if (!net_validate(n
->n
.addr
))
905 log(L_WARN
"Ignoring bogus prefix %N received via %s",
906 n
->n
.addr
, e
->sender
->proto
->name
);
910 c
= net_classify(n
->n
.addr
);
911 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
913 log(L_WARN
"Ignoring bogus route %N received via %s",
914 n
->n
.addr
, e
->sender
->proto
->name
);
922 * rte_free - delete a &rte
923 * @e: &rte to be deleted
925 * rte_free() deletes the given &rte from the routing table it's linked to.
930 if (rta_is_cached(e
->attrs
))
932 sl_free(rte_slab
, e
);
936 rte_free_quick(rte
*e
)
939 sl_free(rte_slab
, e
);
943 rte_same(rte
*x
, rte
*y
)
946 x
->attrs
== y
->attrs
&&
947 x
->flags
== y
->flags
&&
948 x
->pflags
== y
->pflags
&&
949 x
->pref
== y
->pref
&&
950 (!x
->attrs
->src
->proto
->rte_same
|| x
->attrs
->src
->proto
->rte_same(x
, y
));
953 static inline int rte_is_ok(rte
*e
) { return e
&& !rte_is_filtered(e
); }
956 rte_recalculate(struct channel
*c
, net
*net
, rte
*new, struct rte_src
*src
)
958 struct proto
*p
= c
->proto
;
959 struct rtable
*table
= c
->table
;
960 struct proto_stats
*stats
= &c
->stats
;
961 static struct tbf rl_pipe
= TBF_DEFAULT_LOG_LIMITS
;
962 rte
*before_old
= NULL
;
963 rte
*old_best
= net
->routes
;
967 k
= &net
->routes
; /* Find and remove original route from the same protocol */
970 if (old
->attrs
->src
== src
)
972 /* If there is the same route in the routing table but from
973 * a different sender, then there are two paths from the
974 * source protocol to this routing table through transparent
975 * pipes, which is not allowed.
977 * We log that and ignore the route. If it is withdraw, we
978 * ignore it completely (there might be 'spurious withdraws',
979 * see FIXME in do_rte_announce())
981 if (old
->sender
->proto
!= p
)
985 log_rl(&rl_pipe
, L_ERR
"Pipe collision detected when sending %N to table %s",
986 net
->n
.addr
, table
->name
);
992 if (new && rte_same(old
, new))
994 /* No changes, ignore the new route */
996 if (!rte_is_filtered(new))
998 stats
->imp_updates_ignored
++;
999 rte_trace_in(D_ROUTES
, p
, new, "ignored");
1002 rte_free_quick(new);
1017 stats
->imp_withdraws_ignored
++;
1021 int new_ok
= rte_is_ok(new);
1022 int old_ok
= rte_is_ok(old
);
1024 struct channel_limit
*l
= &c
->rx_limit
;
1025 if (l
->action
&& !old
&& new)
1027 u32 all_routes
= stats
->imp_routes
+ stats
->filt_routes
;
1029 if (all_routes
>= l
->limit
)
1030 channel_notify_limit(c
, l
, PLD_RX
, all_routes
);
1032 if (l
->state
== PLS_BLOCKED
)
1034 /* In receive limit the situation is simple, old is NULL so
1035 we just free new and exit like nothing happened */
1037 stats
->imp_updates_ignored
++;
1038 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
1039 rte_free_quick(new);
1045 if (l
->action
&& !old_ok
&& new_ok
)
1047 if (stats
->imp_routes
>= l
->limit
)
1048 channel_notify_limit(c
, l
, PLD_IN
, stats
->imp_routes
);
1050 if (l
->state
== PLS_BLOCKED
)
1052 /* In import limit the situation is more complicated. We
1053 shouldn't just drop the route, we should handle it like
1054 it was filtered. We also have to continue the route
1055 processing if old or new is non-NULL, but we should exit
1056 if both are NULL as this case is probably assumed to be
1059 stats
->imp_updates_ignored
++;
1060 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
1062 if (c
->in_keep_filtered
)
1063 new->flags
|= REF_FILTERED
;
1065 { rte_free_quick(new); new = NULL
; }
1067 /* Note that old && !new could be possible when
1068 c->in_keep_filtered changed in the recent past. */
1079 stats
->imp_updates_accepted
++;
1081 stats
->imp_withdraws_accepted
++;
1083 stats
->imp_withdraws_ignored
++;
1088 rte_is_filtered(new) ? stats
->filt_routes
++ : stats
->imp_routes
++;
1090 rte_is_filtered(old
) ? stats
->filt_routes
-- : stats
->imp_routes
--;
1092 if (table
->config
->sorted
)
1094 /* If routes are sorted, just insert new route to appropriate position */
1097 if (before_old
&& !rte_better(new, before_old
))
1098 k
= &before_old
->next
;
1102 for (; *k
; k
=&(*k
)->next
)
1103 if (rte_better(new, *k
))
1112 /* If routes are not sorted, find the best route and move it on
1113 the first position. There are several optimized cases. */
1115 if (src
->proto
->rte_recalculate
&& src
->proto
->rte_recalculate(table
, net
, new, old
, old_best
))
1116 goto do_recalculate
;
1118 if (new && rte_better(new, old_best
))
1120 /* The first case - the new route is cleary optimal,
1121 we link it at the first position */
1123 new->next
= net
->routes
;
1126 else if (old
== old_best
)
1128 /* The second case - the old best route disappeared, we add the
1129 new route (if we have any) to the list (we don't care about
1130 position) and then we elect the new optimal route and relink
1131 that route at the first position and announce it. New optimal
1132 route might be NULL if there is no more routes */
1135 /* Add the new route to the list */
1138 new->next
= net
->routes
;
1142 /* Find a new optimal route (if there is any) */
1145 rte
**bp
= &net
->routes
;
1146 for (k
=&(*bp
)->next
; *k
; k
=&(*k
)->next
)
1147 if (rte_better(*k
, *bp
))
1153 best
->next
= net
->routes
;
1159 /* The third case - the new route is not better than the old
1160 best route (therefore old_best != NULL) and the old best
1161 route was not removed (therefore old_best == net->routes).
1162 We just link the new route after the old best route. */
1164 ASSERT(net
->routes
!= NULL
);
1165 new->next
= net
->routes
->next
;
1166 net
->routes
->next
= new;
1168 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1174 /* Log the route change */
1175 if (p
->debug
& D_ROUTES
)
1178 rte_trace(p
, new, '>', new == net
->routes
? "added [best]" : "added");
1181 if (old
!= old_best
)
1182 rte_trace(p
, old
, '>', "removed");
1183 else if (rte_is_ok(net
->routes
))
1184 rte_trace(p
, old
, '>', "removed [replaced]");
1186 rte_trace(p
, old
, '>', "removed [sole]");
1190 /* Propagate the route change */
1191 rte_announce(table
, RA_ANY
, net
, new, old
, NULL
, NULL
, NULL
);
1192 if (net
->routes
!= old_best
)
1193 rte_announce(table
, RA_OPTIMAL
, net
, net
->routes
, old_best
, NULL
, NULL
, NULL
);
1194 if (table
->config
->sorted
)
1195 rte_announce(table
, RA_ACCEPTED
, net
, new, old
, NULL
, NULL
, before_old
);
1196 rte_announce(table
, RA_MERGED
, net
, new, old
, net
->routes
, old_best
, NULL
);
1199 (table
->gc_counter
++ >= table
->config
->gc_max_ops
) &&
1200 (table
->gc_time
+ table
->config
->gc_min_time
<= now
))
1201 rt_schedule_prune(table
);
1203 if (old_ok
&& p
->rte_remove
)
1204 p
->rte_remove(net
, old
);
1205 if (new_ok
&& p
->rte_insert
)
1206 p
->rte_insert(net
, new);
1209 rte_free_quick(old
);
1212 static int rte_update_nest_cnt
; /* Nesting counter to allow recursive updates */
1215 rte_update_lock(void)
1217 rte_update_nest_cnt
++;
1221 rte_update_unlock(void)
1223 if (!--rte_update_nest_cnt
)
1224 lp_flush(rte_update_pool
);
1228 rte_hide_dummy_routes(net
*net
, rte
**dummy
)
1230 if (net
->routes
&& net
->routes
->attrs
->source
== RTS_DUMMY
)
1232 *dummy
= net
->routes
;
1233 net
->routes
= (*dummy
)->next
;
1238 rte_unhide_dummy_routes(net
*net
, rte
**dummy
)
1242 (*dummy
)->next
= net
->routes
;
1243 net
->routes
= *dummy
;
1248 * rte_update - enter a new update to a routing table
1249 * @table: table to be updated
1250 * @c: channel doing the update
1251 * @net: network node
1252 * @p: protocol submitting the update
1253 * @src: protocol originating the update
1254 * @new: a &rte representing the new route or %NULL for route removal.
1256 * This function is called by the routing protocols whenever they discover
1257 * a new route or wish to update/remove an existing route. The right announcement
1258 * sequence is to build route attributes first (either un-cached with @aflags set
1259 * to zero or a cached one using rta_lookup(); in this case please note that
1260 * you need to increase the use count of the attributes yourself by calling
1261 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1262 * the appropriate data and finally submit the new &rte by calling rte_update().
1264 * @src specifies the protocol that originally created the route and the meaning
1265 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1266 * same value as @new->attrs->proto. @p specifies the protocol that called
1267 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1268 * stores @p in @new->sender;
1270 * When rte_update() gets any route, it automatically validates it (checks,
1271 * whether the network and next hop address are valid IP addresses and also
1272 * whether a normal routing protocol doesn't try to smuggle a host or link
1273 * scope route to the table), converts all protocol dependent attributes stored
1274 * in the &rte to temporary extended attributes, consults import filters of the
1275 * protocol to see if the route should be accepted and/or its attributes modified,
1276 * stores the temporary attributes back to the &rte.
1278 * Now, having a "public" version of the route, we
1279 * automatically find any old route defined by the protocol @src
1280 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1281 * recalculate the optimal route for this destination and finally broadcast
1282 * the change (if any) to all routing protocols by calling rte_announce().
1284 * All memory used for attribute lists and other temporary allocations is taken
1285 * from a special linear pool @rte_update_pool and freed when rte_update()
1290 rte_update2(struct channel
*c
, net_addr
*n
, rte
*new, struct rte_src
*src
)
1292 struct proto
*p
= c
->proto
;
1293 struct proto_stats
*stats
= &c
->stats
;
1294 struct filter
*filter
= c
->in_filter
;
1295 ea_list
*tmpa
= NULL
;
1299 ASSERT(c
->channel_state
== CS_UP
);
1304 nn
= net_get(c
->table
, n
);
1310 new->pref
= c
->preference
;
1312 stats
->imp_updates_received
++;
1313 if (!rte_validate(new))
1315 rte_trace_in(D_FILTERS
, p
, new, "invalid");
1316 stats
->imp_updates_invalid
++;
1320 if (filter
== FILTER_REJECT
)
1322 stats
->imp_updates_filtered
++;
1323 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1325 if (! c
->in_keep_filtered
)
1328 /* new is a private copy, i could modify it */
1329 new->flags
|= REF_FILTERED
;
1333 tmpa
= make_tmp_attrs(new, rte_update_pool
);
1334 if (filter
&& (filter
!= FILTER_REJECT
))
1336 ea_list
*old_tmpa
= tmpa
;
1337 int fr
= f_run(filter
, &new, &tmpa
, rte_update_pool
, 0);
1340 stats
->imp_updates_filtered
++;
1341 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1343 if (! c
->in_keep_filtered
)
1346 new->flags
|= REF_FILTERED
;
1348 if (tmpa
!= old_tmpa
&& src
->proto
->store_tmp_attrs
)
1349 src
->proto
->store_tmp_attrs(new, tmpa
);
1352 if (!rta_is_cached(new->attrs
)) /* Need to copy attributes */
1353 new->attrs
= rta_lookup(new->attrs
);
1354 new->flags
|= REF_COW
;
1358 stats
->imp_withdraws_received
++;
1360 if (!(nn
= net_find(c
->table
, n
)) || !src
)
1362 stats
->imp_withdraws_ignored
++;
1363 rte_update_unlock();
1369 rte_hide_dummy_routes(nn
, &dummy
);
1370 rte_recalculate(c
, nn
, new, src
);
1371 rte_unhide_dummy_routes(nn
, &dummy
);
1372 rte_update_unlock();
1381 /* Independent call to rte_announce(), used from next hop
1382 recalculation, outside of rte_update(). new must be non-NULL */
1384 rte_announce_i(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
1385 rte
*new_best
, rte
*old_best
)
1388 rte_announce(tab
, type
, net
, new, old
, new_best
, old_best
, NULL
);
1389 rte_update_unlock();
1393 rte_discard(rtable
*t
, rte
*old
) /* Non-filtered route deletion, used during garbage collection */
1396 rte_recalculate(old
->sender
, old
->net
, NULL
, old
->attrs
->src
);
1397 rte_update_unlock();
1400 /* Check rtable for best route to given net whether it would be exported do p */
1402 rt_examine(rtable
*t
, net_addr
*a
, struct proto
*p
, struct filter
*filter
)
1404 net
*n
= net_find(t
, a
);
1405 rte
*rt
= n
? n
->routes
: NULL
;
1407 if (!rte_is_valid(rt
))
1412 /* Rest is stripped down export_filter() */
1413 ea_list
*tmpa
= make_tmp_attrs(rt
, rte_update_pool
);
1414 int v
= p
->import_control
? p
->import_control(p
, &rt
, &tmpa
, rte_update_pool
) : 0;
1415 if (v
== RIC_PROCESS
)
1416 v
= (f_run(filter
, &rt
, &tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) <= F_ACCEPT
);
1418 /* Discard temporary rte */
1419 if (rt
!= n
->routes
)
1422 rte_update_unlock();
1429 * rt_refresh_begin - start a refresh cycle
1430 * @t: related routing table
1431 * @c related channel
1433 * This function starts a refresh cycle for given routing table and announce
1434 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1435 * routes to the routing table (by rte_update()). After that, all protocol
1436 * routes (more precisely routes with @c as @sender) not sent during the
1437 * refresh cycle but still in the table from the past are pruned. This is
1438 * implemented by marking all related routes as stale by REF_STALE flag in
1439 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1440 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1443 rt_refresh_begin(rtable
*t
, struct channel
*c
)
1445 FIB_WALK(&t
->fib
, net
, n
)
1448 for (e
= n
->routes
; e
; e
= e
->next
)
1450 e
->flags
|= REF_STALE
;
1456 * rt_refresh_end - end a refresh cycle
1457 * @t: related routing table
1458 * @c: related channel
1460 * This function ends a refresh cycle for given routing table and announce
1461 * hook. See rt_refresh_begin() for description of refresh cycles.
1464 rt_refresh_end(rtable
*t
, struct channel
*c
)
1468 FIB_WALK(&t
->fib
, net
, n
)
1471 for (e
= n
->routes
; e
; e
= e
->next
)
1472 if ((e
->sender
== c
) && (e
->flags
& REF_STALE
))
1474 e
->flags
|= REF_DISCARD
;
1481 rt_schedule_prune(t
);
1486 * rte_dump - dump a route
1487 * @e: &rte to be dumped
1489 * This functions dumps contents of a &rte to debug output.
1495 debug("%-1N ", n
->n
.addr
);
1496 debug("KF=%02x PF=%02x pref=%d lm=%d ", n
->n
.flags
, e
->pflags
, e
->pref
, now
-e
->lastmod
);
1498 if (e
->attrs
->src
->proto
->proto
->dump_attrs
)
1499 e
->attrs
->src
->proto
->proto
->dump_attrs(e
);
1504 * rt_dump - dump a routing table
1505 * @t: routing table to be dumped
1507 * This function dumps contents of a given routing table to debug output.
1512 debug("Dump of routing table <%s>\n", t
->name
);
1516 FIB_WALK(&t
->fib
, net
, n
)
1519 for(e
=n
->routes
; e
; e
=e
->next
)
1527 * rt_dump_all - dump all routing tables
1529 * This function dumps contents of all routing tables to debug output.
1536 WALK_LIST(t
, routing_tables
)
1541 rt_schedule_hcu(rtable
*tab
)
1543 if (tab
->hcu_scheduled
)
1546 tab
->hcu_scheduled
= 1;
1547 ev_schedule(tab
->rt_event
);
1551 rt_schedule_nhu(rtable
*tab
)
1553 if (tab
->nhu_state
== 0)
1554 ev_schedule(tab
->rt_event
);
1556 /* state change 0->1, 2->3 */
1557 tab
->nhu_state
|= 1;
1561 rt_schedule_prune(rtable
*tab
)
1563 if (tab
->prune_state
== 0)
1564 ev_schedule(tab
->rt_event
);
1566 /* state change 0->1, 2->3 */
1567 tab
->prune_state
|= 1;
1578 if (tab
->hcu_scheduled
)
1579 rt_update_hostcache(tab
);
1582 rt_next_hop_update(tab
);
1584 if (tab
->prune_state
)
1585 rt_prune_table(tab
);
1587 rt_unlock_table(tab
);
1591 rt_setup(pool
*p
, rtable
*t
, char *name
, struct rtable_config
*cf
)
1593 bzero(t
, sizeof(*t
));
1596 t
->addr_type
= cf
? cf
->addr_type
: NET_IP4
;
1597 fib_init(&t
->fib
, p
, t
->addr_type
, sizeof(net
), OFFSETOF(net
, n
), 0, NULL
);
1598 init_list(&t
->channels
);
1602 t
->rt_event
= ev_new(p
);
1603 t
->rt_event
->hook
= rt_event
;
1604 t
->rt_event
->data
= t
;
1610 * rt_init - initialize routing tables
1612 * This function is called during BIRD startup. It initializes the
1613 * routing table module.
1619 rt_table_pool
= rp_new(&root_pool
, "Routing tables");
1620 rte_update_pool
= lp_new(rt_table_pool
, 4080);
1621 rte_slab
= sl_new(rt_table_pool
, sizeof(rte
));
1622 init_list(&routing_tables
);
1627 * rt_prune_table - prune a routing table
1629 * The prune loop scans routing tables and removes routes belonging to flushing
1630 * protocols, discarded routes and also stale network entries. It is called from
1631 * rt_event(). The event is rescheduled if the current iteration do not finish
1632 * the table. The pruning is directed by the prune state (@prune_state),
1633 * specifying whether the prune cycle is scheduled or running, and there
1634 * is also a persistent pruning iterator (@prune_fit).
1636 * The prune loop is used also for channel flushing. For this purpose, the
1637 * channels to flush are marked before the iteration and notified after the
1641 rt_prune_table(rtable
*tab
)
1643 struct fib_iterator
*fit
= &tab
->prune_fit
;
1649 DBG("Pruning route table %s\n", tab
->name
);
1651 fib_check(&tab
->fib
);
1654 if (tab
->prune_state
== 0)
1657 if (tab
->prune_state
== 1)
1659 /* Mark channels to flush */
1660 WALK_LIST2(c
, n
, tab
->channels
, table_node
)
1661 if (c
->channel_state
== CS_FLUSHING
)
1662 c
->flush_active
= 1;
1664 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1665 tab
->prune_state
= 2;
1669 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
1674 for (e
=n
->routes
; e
; e
=e
->next
)
1675 if (e
->sender
->flush_active
|| (e
->flags
& REF_DISCARD
))
1679 FIB_ITERATE_PUT(fit
);
1680 ev_schedule(tab
->rt_event
);
1684 rte_discard(tab
, e
);
1690 if (!n
->routes
) /* Orphaned FIB entry */
1692 FIB_ITERATE_PUT(fit
);
1693 fib_delete(&tab
->fib
, n
);
1700 fib_check(&tab
->fib
);
1703 tab
->gc_counter
= 0;
1706 /* state change 2->0, 3->1 */
1707 tab
->prune_state
&= 1;
1709 if (tab
->prune_state
> 0)
1710 ev_schedule(tab
->rt_event
);
1712 /* FIXME: This should be handled in a better way */
1715 /* Close flushed channels */
1716 WALK_LIST2_DELSAFE(c
, n
, x
, tab
->channels
, table_node
)
1717 if (c
->flush_active
)
1719 c
->flush_active
= 0;
1720 channel_set_state(c
, CS_DOWN
);
1727 rt_preconfig(struct config
*c
)
1729 init_list(&c
->tables
);
1731 rt_new_table(cf_get_symbol("master4"), NET_IP4
);
1732 rt_new_table(cf_get_symbol("master6"), NET_IP6
);
1737 * Some functions for handing internal next hop updates
1738 * triggered by rt_schedule_nhu().
1742 rta_next_hop_outdated(rta
*a
)
1744 struct hostentry
*he
= a
->hostentry
;
1750 return a
->dest
!= RTD_UNREACHABLE
;
1752 return (a
->iface
!= he
->src
->iface
) || !ipa_equal(a
->gw
, he
->gw
) ||
1753 (a
->dest
!= he
->dest
) || (a
->igp_metric
!= he
->igp_metric
) ||
1754 !mpnh_same(a
->nexthops
, he
->src
->nexthops
);
1758 rta_apply_hostentry(rta
*a
, struct hostentry
*he
)
1761 a
->iface
= he
->src
? he
->src
->iface
: NULL
;
1764 a
->igp_metric
= he
->igp_metric
;
1765 a
->nexthops
= he
->src
? he
->src
->nexthops
: NULL
;
1769 rt_next_hop_update_rte(rtable
*tab
, rte
*old
)
1772 memcpy(&a
, old
->attrs
, sizeof(rta
));
1773 rta_apply_hostentry(&a
, old
->attrs
->hostentry
);
1776 rte
*e
= sl_alloc(rte_slab
);
1777 memcpy(e
, old
, sizeof(rte
));
1778 e
->attrs
= rta_lookup(&a
);
1784 rt_next_hop_update_net(rtable
*tab
, net
*n
)
1786 rte
**k
, *e
, *new, *old_best
, **new_best
;
1788 int free_old_best
= 0;
1790 old_best
= n
->routes
;
1794 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1795 if (rta_next_hop_outdated(e
->attrs
))
1797 new = rt_next_hop_update_rte(tab
, e
);
1800 rte_announce_i(tab
, RA_ANY
, n
, new, e
, NULL
, NULL
);
1801 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated");
1803 /* Call a pre-comparison hook */
1804 /* Not really an efficient way to compute this */
1805 if (e
->attrs
->src
->proto
->rte_recalculate
)
1806 e
->attrs
->src
->proto
->rte_recalculate(tab
, n
, new, e
, NULL
);
1810 else /* Freeing of the old best rte is postponed */
1820 /* Find the new best route */
1822 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1824 if (!new_best
|| rte_better(e
, *new_best
))
1828 /* Relink the new best route to the first position */
1830 if (new != n
->routes
)
1832 *new_best
= new->next
;
1833 new->next
= n
->routes
;
1837 /* Announce the new best route */
1838 if (new != old_best
)
1840 rte_announce_i(tab
, RA_OPTIMAL
, n
, new, old_best
, NULL
, NULL
);
1841 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated [best]");
1844 /* FIXME: Better announcement of merged routes */
1845 rte_announce_i(tab
, RA_MERGED
, n
, new, old_best
, new, old_best
);
1848 rte_free_quick(old_best
);
1854 rt_next_hop_update(rtable
*tab
)
1856 struct fib_iterator
*fit
= &tab
->nhu_fit
;
1859 if (tab
->nhu_state
== 0)
1862 if (tab
->nhu_state
== 1)
1864 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1868 FIB_ITERATE_START(&tab
->fib
, fit
, net
, n
)
1872 FIB_ITERATE_PUT(fit
);
1873 ev_schedule(tab
->rt_event
);
1876 max_feed
-= rt_next_hop_update_net(tab
, n
);
1880 /* state change 2->0, 3->1 */
1881 tab
->nhu_state
&= 1;
1883 if (tab
->nhu_state
> 0)
1884 ev_schedule(tab
->rt_event
);
1888 struct rtable_config
*
1889 rt_new_table(struct symbol
*s
, uint addr_type
)
1891 /* Hack that allows to 'redefine' the master table */
1892 if ((s
->class == SYM_TABLE
) &&
1893 (s
->def
== new_config
->def_tables
[addr_type
]) &&
1894 ((addr_type
== NET_IP4
) || (addr_type
== NET_IP6
)))
1897 struct rtable_config
*c
= cfg_allocz(sizeof(struct rtable_config
));
1899 cf_define_symbol(s
, SYM_TABLE
, c
);
1901 c
->addr_type
= addr_type
;
1902 c
->gc_max_ops
= 1000;
1905 add_tail(&new_config
->tables
, &c
->n
);
1907 /* First table of each type is kept as default */
1908 if (! new_config
->def_tables
[addr_type
])
1909 new_config
->def_tables
[addr_type
] = c
;
1915 * rt_lock_table - lock a routing table
1916 * @r: routing table to be locked
1918 * Lock a routing table, because it's in use by a protocol,
1919 * preventing it from being freed when it gets undefined in a new
1923 rt_lock_table(rtable
*r
)
1929 * rt_unlock_table - unlock a routing table
1930 * @r: routing table to be unlocked
1932 * Unlock a routing table formerly locked by rt_lock_table(),
1933 * that is decrease its use count and delete it if it's scheduled
1934 * for deletion by configuration changes.
1937 rt_unlock_table(rtable
*r
)
1939 if (!--r
->use_count
&& r
->deleted
)
1941 struct config
*conf
= r
->deleted
;
1942 DBG("Deleting routing table %s\n", r
->name
);
1943 r
->config
->table
= NULL
;
1945 rt_free_hostcache(r
);
1950 config_del_obstacle(conf
);
1955 * rt_commit - commit new routing table configuration
1956 * @new: new configuration
1957 * @old: original configuration or %NULL if it's boot time config
1959 * Scan differences between @old and @new configuration and modify
1960 * the routing tables according to these changes. If @new defines a
1961 * previously unknown table, create it, if it omits a table existing
1962 * in @old, schedule it for deletion (it gets deleted when all protocols
1963 * disconnect from it by calling rt_unlock_table()), if it exists
1964 * in both configurations, leave it unchanged.
1967 rt_commit(struct config
*new, struct config
*old
)
1969 struct rtable_config
*o
, *r
;
1971 DBG("rt_commit:\n");
1974 WALK_LIST(o
, old
->tables
)
1976 rtable
*ot
= o
->table
;
1979 struct symbol
*sym
= cf_find_symbol(new, o
->name
);
1980 if (sym
&& sym
->class == SYM_TABLE
&& !new->shutdown
)
1982 DBG("\t%s: same\n", o
->name
);
1987 if (o
->sorted
!= r
->sorted
)
1988 log(L_WARN
"Reconfiguration of rtable sorted flag not implemented");
1992 DBG("\t%s: deleted\n", o
->name
);
1994 config_add_obstacle(old
);
1996 rt_unlock_table(ot
);
2002 WALK_LIST(r
, new->tables
)
2005 rtable
*t
= mb_alloc(rt_table_pool
, sizeof(struct rtable
));
2006 DBG("\t%s: created\n", r
->name
);
2007 rt_setup(rt_table_pool
, t
, r
->name
, r
);
2008 add_tail(&routing_tables
, &t
->n
);
2015 do_feed_channel(struct channel
*c
, net
*n
, rte
*e
)
2018 if (c
->ra_mode
== RA_ACCEPTED
)
2019 rt_notify_accepted(c
, n
, e
, NULL
, NULL
, c
->refeeding
? 2 : 1);
2020 else if (c
->ra_mode
== RA_MERGED
)
2021 rt_notify_merged(c
, n
, NULL
, NULL
, e
, c
->refeeding
? e
: NULL
, c
->refeeding
);
2023 rt_notify_basic(c
, n
, e
, c
->refeeding
? e
: NULL
, c
->refeeding
);
2024 rte_update_unlock();
2028 * rt_feed_channel - advertise all routes to a channel
2029 * @c: channel to be fed
2031 * This function performs one pass of advertisement of routes to a channel that
2032 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2033 * has something to do. (We avoid transferring all the routes in single pass in
2034 * order not to monopolize CPU time.)
2037 rt_feed_channel(struct channel
*c
)
2039 struct fib_iterator
*fit
= &c
->feed_fit
;
2042 ASSERT(c
->export_state
== ES_FEEDING
);
2044 if (!c
->feed_active
)
2046 FIB_ITERATE_INIT(fit
, &c
->table
->fib
);
2050 FIB_ITERATE_START(&c
->table
->fib
, fit
, net
, n
)
2055 FIB_ITERATE_PUT(fit
);
2059 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2061 if ((c
->ra_mode
== RA_OPTIMAL
) ||
2062 (c
->ra_mode
== RA_ACCEPTED
) ||
2063 (c
->ra_mode
== RA_MERGED
))
2064 if (rte_is_valid(e
))
2066 /* In the meantime, the protocol may fell down */
2067 if (c
->export_state
!= ES_FEEDING
)
2070 do_feed_channel(c
, n
, e
);
2074 if (c
->ra_mode
== RA_ANY
)
2075 for(e
= n
->routes
; e
; e
= e
->next
)
2077 /* In the meantime, the protocol may fell down */
2078 if (c
->export_state
!= ES_FEEDING
)
2081 if (!rte_is_valid(e
))
2084 do_feed_channel(c
, n
, e
);
2096 * rt_feed_baby_abort - abort protocol feeding
2099 * This function is called by the protocol code when the protocol stops or
2100 * ceases to exist during the feeding.
2103 rt_feed_channel_abort(struct channel
*c
)
2107 /* Unlink the iterator */
2108 fit_get(&c
->table
->fib
, &c
->feed_fit
);
2113 static inline unsigned
2116 uintptr_t p
= (uintptr_t) ptr
;
2117 return p
^ (p
<< 8) ^ (p
>> 16);
2121 hc_hash(ip_addr a
, rtable
*dep
)
2123 return ipa_hash(a
) ^ ptr_hash(dep
);
2127 hc_insert(struct hostcache
*hc
, struct hostentry
*he
)
2129 uint k
= he
->hash_key
>> hc
->hash_shift
;
2130 he
->next
= hc
->hash_table
[k
];
2131 hc
->hash_table
[k
] = he
;
2135 hc_remove(struct hostcache
*hc
, struct hostentry
*he
)
2137 struct hostentry
**hep
;
2138 uint k
= he
->hash_key
>> hc
->hash_shift
;
2140 for (hep
= &hc
->hash_table
[k
]; *hep
!= he
; hep
= &(*hep
)->next
);
2144 #define HC_DEF_ORDER 10
2145 #define HC_HI_MARK *4
2146 #define HC_HI_STEP 2
2147 #define HC_HI_ORDER 16 /* Must be at most 16 */
2148 #define HC_LO_MARK /5
2149 #define HC_LO_STEP 2
2150 #define HC_LO_ORDER 10
2153 hc_alloc_table(struct hostcache
*hc
, unsigned order
)
2155 unsigned hsize
= 1 << order
;
2156 hc
->hash_order
= order
;
2157 hc
->hash_shift
= 32 - order
;
2158 hc
->hash_max
= (order
>= HC_HI_ORDER
) ? ~0 : (hsize HC_HI_MARK
);
2159 hc
->hash_min
= (order
<= HC_LO_ORDER
) ? 0 : (hsize HC_LO_MARK
);
2161 hc
->hash_table
= mb_allocz(rt_table_pool
, hsize
* sizeof(struct hostentry
*));
2165 hc_resize(struct hostcache
*hc
, unsigned new_order
)
2167 unsigned old_size
= 1 << hc
->hash_order
;
2168 struct hostentry
**old_table
= hc
->hash_table
;
2169 struct hostentry
*he
, *hen
;
2172 hc_alloc_table(hc
, new_order
);
2173 for (i
= 0; i
< old_size
; i
++)
2174 for (he
= old_table
[i
]; he
!= NULL
; he
=hen
)
2182 static struct hostentry
*
2183 hc_new_hostentry(struct hostcache
*hc
, ip_addr a
, ip_addr ll
, rtable
*dep
, unsigned k
)
2185 struct hostentry
*he
= sl_alloc(hc
->slab
);
2194 add_tail(&hc
->hostentries
, &he
->ln
);
2198 if (hc
->hash_items
> hc
->hash_max
)
2199 hc_resize(hc
, hc
->hash_order
+ HC_HI_STEP
);
2205 hc_delete_hostentry(struct hostcache
*hc
, struct hostentry
*he
)
2211 sl_free(hc
->slab
, he
);
2214 if (hc
->hash_items
< hc
->hash_min
)
2215 hc_resize(hc
, hc
->hash_order
- HC_LO_STEP
);
2219 rt_init_hostcache(rtable
*tab
)
2221 struct hostcache
*hc
= mb_allocz(rt_table_pool
, sizeof(struct hostcache
));
2222 init_list(&hc
->hostentries
);
2225 hc_alloc_table(hc
, HC_DEF_ORDER
);
2226 hc
->slab
= sl_new(rt_table_pool
, sizeof(struct hostentry
));
2228 hc
->lp
= lp_new(rt_table_pool
, 1008);
2229 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2231 tab
->hostcache
= hc
;
2235 rt_free_hostcache(rtable
*tab
)
2237 struct hostcache
*hc
= tab
->hostcache
;
2240 WALK_LIST(n
, hc
->hostentries
)
2242 struct hostentry
*he
= SKIP_BACK(struct hostentry
, ln
, n
);
2246 log(L_ERR
"Hostcache is not empty in table %s", tab
->name
);
2251 mb_free(hc
->hash_table
);
2256 rt_notify_hostcache(rtable
*tab
, net
*net
)
2258 if (tab
->hcu_scheduled
)
2261 if (trie_match_net(tab
->hostcache
->trie
, net
->n
.addr
))
2262 rt_schedule_hcu(tab
);
2266 if_local_addr(ip_addr a
, struct iface
*i
)
2270 WALK_LIST(b
, i
->addrs
)
2271 if (ipa_equal(a
, b
->ip
))
2278 rt_get_igp_metric(rte
*rt
)
2280 eattr
*ea
= ea_find(rt
->attrs
->eattrs
, EA_GEN_IGP_METRIC
);
2288 if ((a
->source
== RTS_OSPF
) ||
2289 (a
->source
== RTS_OSPF_IA
) ||
2290 (a
->source
== RTS_OSPF_EXT1
))
2291 return rt
->u
.ospf
.metric1
;
2295 if (a
->source
== RTS_RIP
)
2296 return rt
->u
.rip
.metric
;
2300 if ((a
->dest
!= RTD_ROUTER
) && (a
->dest
!= RTD_MULTIPATH
))
2303 return IGP_METRIC_UNKNOWN
;
2307 rt_update_hostentry(rtable
*tab
, struct hostentry
*he
)
2309 rta
*old_src
= he
->src
;
2312 /* Reset the hostentry */
2315 he
->dest
= RTD_UNREACHABLE
;
2319 net_fill_ip_host(&he_addr
, he
->addr
);
2320 net
*n
= net_route(tab
, &he_addr
);
2325 pxlen
= n
->n
.addr
->pxlen
;
2329 /* Recursive route should not depend on another recursive route */
2330 log(L_WARN
"Next hop address %I resolvable through recursive route for %N",
2331 he
->addr
, n
->n
.addr
);
2335 if (a
->dest
== RTD_DEVICE
)
2337 if (if_local_addr(he
->addr
, a
->iface
))
2339 /* The host address is a local address, this is not valid */
2340 log(L_WARN
"Next hop address %I is a local address of iface %s",
2341 he
->addr
, a
->iface
->name
);
2345 /* The host is directly reachable, use link as a gateway */
2347 he
->dest
= RTD_ROUTER
;
2351 /* The host is reachable through some route entry */
2356 he
->src
= rta_clone(a
);
2357 he
->igp_metric
= rt_get_igp_metric(e
);
2361 /* Add a prefix range to the trie */
2362 trie_add_prefix(tab
->hostcache
->trie
, &he_addr
, pxlen
, he_addr
.pxlen
);
2365 return old_src
!= he
->src
;
2369 rt_update_hostcache(rtable
*tab
)
2371 struct hostcache
*hc
= tab
->hostcache
;
2372 struct hostentry
*he
;
2375 /* Reset the trie */
2377 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2379 WALK_LIST_DELSAFE(n
, x
, hc
->hostentries
)
2381 he
= SKIP_BACK(struct hostentry
, ln
, n
);
2384 hc_delete_hostentry(hc
, he
);
2388 if (rt_update_hostentry(tab
, he
))
2389 rt_schedule_nhu(he
->tab
);
2392 tab
->hcu_scheduled
= 0;
2395 static struct hostentry
*
2396 rt_get_hostentry(rtable
*tab
, ip_addr a
, ip_addr ll
, rtable
*dep
)
2398 struct hostentry
*he
;
2400 if (!tab
->hostcache
)
2401 rt_init_hostcache(tab
);
2403 u32 k
= hc_hash(a
, dep
);
2404 struct hostcache
*hc
= tab
->hostcache
;
2405 for (he
= hc
->hash_table
[k
>> hc
->hash_shift
]; he
!= NULL
; he
= he
->next
)
2406 if (ipa_equal(he
->addr
, a
) && (he
->tab
== dep
))
2409 he
= hc_new_hostentry(hc
, a
, ll
, dep
, k
);
2410 rt_update_hostentry(tab
, he
);
2415 rta_set_recursive_next_hop(rtable
*dep
, rta
*a
, rtable
*tab
, ip_addr
*gw
, ip_addr
*ll
)
2417 rta_apply_hostentry(a
, rt_get_hostentry(tab
, *gw
, *ll
, dep
));
2426 rt_format_via(rte
*e
, byte
*via
)
2432 case RTD_ROUTER
: bsprintf(via
, "via %I on %s", a
->gw
, a
->iface
->name
); break;
2433 case RTD_DEVICE
: bsprintf(via
, "dev %s", a
->iface
->name
); break;
2434 case RTD_BLACKHOLE
: bsprintf(via
, "blackhole"); break;
2435 case RTD_UNREACHABLE
: bsprintf(via
, "unreachable"); break;
2436 case RTD_PROHIBIT
: bsprintf(via
, "prohibited"); break;
2437 case RTD_MULTIPATH
: bsprintf(via
, "multipath"); break;
2438 default: bsprintf(via
, "???");
2443 rt_show_rte(struct cli
*c
, byte
*ia
, rte
*e
, struct rt_show_data
*d
, ea_list
*tmpa
)
2445 byte via
[IPA_MAX_TEXT_LENGTH
+32];
2446 byte from
[IPA_MAX_TEXT_LENGTH
+8];
2447 byte tm
[TM_DATETIME_BUFFER_SIZE
], info
[256];
2449 int primary
= (e
->net
->routes
== e
);
2450 int sync_error
= (e
->net
->n
.flags
& KRF_SYNC_ERROR
);
2451 void (*get_route_info
)(struct rte
*, byte
*buf
, struct ea_list
*attrs
);
2454 rt_format_via(e
, via
);
2455 tm_format_datetime(tm
, &config
->tf_route
, e
->lastmod
);
2456 if (ipa_nonzero(a
->from
) && !ipa_equal(a
->from
, a
->gw
))
2457 bsprintf(from
, " from %I", a
->from
);
2461 get_route_info
= a
->src
->proto
->proto
->get_route_info
;
2462 if (get_route_info
|| d
->verbose
)
2464 /* Need to normalize the extended attributes */
2466 t
= ea_append(t
, a
->eattrs
);
2467 tmpa
= alloca(ea_scan(t
));
2472 get_route_info(e
, info
, tmpa
);
2474 bsprintf(info
, " (%d)", e
->pref
);
2475 cli_printf(c
, -1007, "%-18s %s [%s %s%s]%s%s", ia
, via
, a
->src
->proto
->name
,
2476 tm
, from
, primary
? (sync_error
? " !" : " *") : "", info
);
2477 for (nh
= a
->nexthops
; nh
; nh
= nh
->next
)
2478 cli_printf(c
, -1007, "\tvia %I on %s weight %d", nh
->gw
, nh
->iface
->name
, nh
->weight
+ 1);
2480 rta_show(c
, a
, tmpa
);
2484 rt_show_net(struct cli
*c
, net
*n
, struct rt_show_data
*d
)
2487 byte ia
[NET_MAX_TEXT_LENGTH
+1];
2488 struct ea_list
*tmpa
;
2489 struct channel
*ec
= d
->export_channel
;
2493 bsprintf(ia
, "%N", n
->n
.addr
);
2496 for (e
= n
->routes
; e
; e
= e
->next
)
2498 if (rte_is_filtered(e
) != d
->filtered
)
2502 d
->net_counter
+= first
;
2509 rte_update_lock(); /* We use the update buffer for filtering */
2510 tmpa
= make_tmp_attrs(e
, rte_update_pool
);
2512 /* Special case for merged export */
2513 if ((d
->export_mode
== RSEM_EXPORT
) && (ec
->ra_mode
== RA_MERGED
))
2516 e
= rt_export_merged(ec
, n
, &rt_free
, &tmpa
, 1);
2520 { e
= ee
; goto skip
; }
2522 else if (d
->export_mode
)
2524 struct proto
*ep
= d
->export_protocol
;
2525 int ic
= ep
->import_control
? ep
->import_control(ep
, &e
, &tmpa
, rte_update_pool
) : 0;
2527 if (ec
->ra_mode
== RA_OPTIMAL
|| ec
->ra_mode
== RA_MERGED
)
2533 if (d
->export_mode
> RSEM_PREEXPORT
)
2536 * FIXME - This shows what should be exported according to current
2537 * filters, but not what was really exported. 'configure soft'
2538 * command may change the export filter and do not update routes.
2540 int do_export
= (ic
> 0) ||
2541 (f_run(ec
->out_filter
, &e
, &tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) <= F_ACCEPT
);
2543 if (do_export
!= (d
->export_mode
== RSEM_EXPORT
))
2546 if ((d
->export_mode
== RSEM_EXPORT
) && (ec
->ra_mode
== RA_ACCEPTED
))
2551 if (d
->show_protocol
&& (d
->show_protocol
!= e
->attrs
->src
->proto
))
2554 if (f_run(d
->filter
, &e
, &tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) > F_ACCEPT
)
2559 rt_show_rte(c
, ia
, e
, d
, tmpa
);
2568 rte_update_unlock();
2570 if (d
->primary_only
)
2575 static struct channel
*
2576 rt_show_export_channel(struct rt_show_data
*d
)
2578 if (! d
->export_protocol
->rt_notify
)
2581 return proto_find_channel_by_table(d
->export_protocol
, d
->table
);
2585 rt_show_cont(struct cli
*c
)
2587 struct rt_show_data
*d
= c
->rover
;
2593 struct fib
*fib
= &d
->table
->fib
;
2594 struct fib_iterator
*it
= &d
->fit
;
2598 /* Ensure we have current export channel */
2599 d
->export_channel
= rt_show_export_channel(d
);
2600 if (!d
->export_channel
|| (d
->export_channel
->export_state
== ES_DOWN
))
2602 cli_printf(c
, 8005, "Channel is down");
2607 FIB_ITERATE_START(fib
, it
, net
, n
)
2611 FIB_ITERATE_PUT(it
);
2614 rt_show_net(c
, n
, d
);
2618 cli_printf(c
, 14, "%d of %d routes for %d networks", d
->show_counter
, d
->rt_counter
, d
->net_counter
);
2620 cli_printf(c
, 0, "");
2622 c
->cont
= c
->cleanup
= NULL
;
2626 rt_show_cleanup(struct cli
*c
)
2628 struct rt_show_data
*d
= c
->rover
;
2630 /* Unlink the iterator */
2631 fit_get(&d
->table
->fib
, &d
->fit
);
2634 static inline rtable
*
2635 rt_show_get_table(struct proto
*p
)
2637 /* FIXME: Use a better way to handle multi-channel protocols */
2639 if (p
->main_channel
)
2640 return p
->main_channel
->table
;
2642 if (!EMPTY_LIST(p
->channels
))
2643 return ((struct channel
*) HEAD(p
->channels
))->table
;
2649 rt_show(struct rt_show_data
*d
)
2653 /* Default is either a master table or a table related to a respective protocol */
2654 if (!d
->table
&& d
->export_protocol
) d
->table
= rt_show_get_table(d
->export_protocol
);
2655 if (!d
->table
&& d
->show_protocol
) d
->table
= rt_show_get_table(d
->show_protocol
);
2656 if (!d
->table
) d
->table
= config
->def_tables
[NET_IP4
]->table
; /* FIXME: iterate through all tables ? */
2658 /* Filtered routes are neither exported nor have sensible ordering */
2659 if (d
->filtered
&& (d
->export_mode
|| d
->primary_only
))
2664 FIB_ITERATE_INIT(&d
->fit
, &d
->table
->fib
);
2665 this_cli
->cont
= rt_show_cont
;
2666 this_cli
->cleanup
= rt_show_cleanup
;
2667 this_cli
->rover
= d
;
2673 /* Find channel associated with the export protocol */
2674 d
->export_channel
= rt_show_export_channel(d
);
2675 if (!d
->export_channel
|| (d
->export_channel
->export_state
== ES_DOWN
))
2677 cli_msg(8005, "Channel is down");
2683 n
= net_route(d
->table
, d
->addr
);
2685 n
= net_find(d
->table
, d
->addr
);
2688 rt_show_net(this_cli
, n
, d
);
2693 cli_msg(8001, "Network not in table");
2698 * Documentation for functions declared inline in route.h
2703 * net_find - find a network entry
2704 * @tab: a routing table
2705 * @addr: address of the network
2707 * net_find() looks up the given network in routing table @tab and
2708 * returns a pointer to its &net entry or %NULL if no such network
2711 static inline net
*net_find(rtable
*tab
, net_addr
*addr
)
2715 * net_get - obtain a network entry
2716 * @tab: a routing table
2717 * @addr: address of the network
2719 * net_get() looks up the given network in routing table @tab and
2720 * returns a pointer to its &net entry. If no such entry exists, it's
2723 static inline net
*net_get(rtable
*tab
, net_addr
*addr
)
2727 * rte_cow - copy a route for writing
2728 * @r: a route entry to be copied
2730 * rte_cow() takes a &rte and prepares it for modification. The exact action
2731 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2732 * just returned unchanged, else a new temporary entry with the same contents
2735 * The primary use of this function is inside the filter machinery -- when
2736 * a filter wants to modify &rte contents (to change the preference or to
2737 * attach another set of attributes), it must ensure that the &rte is not
2738 * shared with anyone else (and especially that it isn't stored in any routing
2741 * Result: a pointer to the new writable &rte.
2743 static inline rte
* rte_cow(rte
*r
)