]>
git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
2 * BIRD -- Routing Tables
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
37 #include "nest/iface.h"
38 #include "lib/resource.h"
39 #include "lib/event.h"
40 #include "lib/string.h"
41 #include "conf/conf.h"
42 #include "filter/filter.h"
43 #include "lib/string.h"
44 #include "lib/alloca.h"
48 static slab
*rte_slab
;
49 static linpool
*rte_update_pool
;
51 static list routing_tables
;
53 static byte
*rt_format_via(rte
*e
);
54 static void rt_free_hostcache(rtable
*tab
);
55 static void rt_notify_hostcache(rtable
*tab
, net
*net
);
56 static void rt_update_hostcache(rtable
*tab
);
57 static void rt_next_hop_update(rtable
*tab
);
58 static inline int rt_prune_table(rtable
*tab
);
59 static inline void rt_schedule_gc(rtable
*tab
);
60 static inline void rt_schedule_prune(rtable
*tab
);
63 /* Like fib_route(), but skips empty net entries */
65 net_route(rtable
*tab
, ip_addr a
, int len
)
72 a0
= ipa_and(a
, ipa_mkmask(len
));
73 n
= fib_find(&tab
->fib
, &a0
, len
);
74 if (n
&& rte_is_valid(n
->routes
))
82 rte_init(struct fib_node
*N
)
91 * rte_find - find a route
95 * The rte_find() function returns a route for destination @net
96 * which is from route source @src.
99 rte_find(net
*net
, struct rte_src
*src
)
101 rte
*e
= net
->routes
;
103 while (e
&& e
->attrs
->src
!= src
)
109 * rte_get_temp - get a temporary &rte
110 * @a: attributes to assign to the new route (a &rta; in case it's
111 * un-cached, rte_update() will create a cached copy automatically)
113 * Create a temporary &rte and bind it with the attributes @a.
114 * Also set route preference to the default preference set for
120 rte
*e
= sl_alloc(rte_slab
);
124 e
->pref
= a
->src
->proto
->preference
;
131 rte
*e
= sl_alloc(rte_slab
);
133 memcpy(e
, r
, sizeof(rte
));
134 e
->attrs
= rta_clone(r
->attrs
);
140 * rte_cow_rta - get a private writable copy of &rte with writable &rta
141 * @r: a route entry to be copied
142 * @lp: a linpool from which to allocate &rta
144 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
145 * modification. There are three possibilities: First, both &rte and &rta are
146 * private copies, in that case they are returned unchanged. Second, &rte is
147 * private copy, but &rta is cached, in that case &rta is duplicated using
148 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
149 * both structures are duplicated by rte_do_cow() and rta_do_cow().
151 * Note that in the second case, cached &rta loses one reference, while private
152 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
153 * nexthops, ...) with it. To work properly, original shared &rta should have
154 * another reference during the life of created private copy.
156 * Result: a pointer to the new writable &rte with writable &rta.
159 rte_cow_rta(rte
*r
, linpool
*lp
)
161 if (!rta_is_cached(r
->attrs
))
165 rta
*a
= rta_do_cow(r
->attrs
, lp
);
171 static int /* Actually better or at least as good as */
172 rte_better(rte
*new, rte
*old
)
174 int (*better
)(rte
*, rte
*);
176 if (!rte_is_valid(old
))
178 if (!rte_is_valid(new))
181 if (new->pref
> old
->pref
)
183 if (new->pref
< old
->pref
)
185 if (new->attrs
->src
->proto
->proto
!= old
->attrs
->src
->proto
->proto
)
188 * If the user has configured protocol preferences, so that two different protocols
189 * have the same preference, try to break the tie by comparing addresses. Not too
190 * useful, but keeps the ordering of routes unambiguous.
192 return new->attrs
->src
->proto
->proto
> old
->attrs
->src
->proto
->proto
;
194 if (better
= new->attrs
->src
->proto
->rte_better
)
195 return better(new, old
);
200 rte_mergable(rte
*pri
, rte
*sec
)
202 int (*mergable
)(rte
*, rte
*);
204 if (!rte_is_valid(pri
) || !rte_is_valid(sec
))
207 if (pri
->pref
!= sec
->pref
)
210 if (pri
->attrs
->src
->proto
->proto
!= sec
->attrs
->src
->proto
->proto
)
213 if (mergable
= pri
->attrs
->src
->proto
->rte_mergable
)
214 return mergable(pri
, sec
);
220 rte_trace(struct proto
*p
, rte
*e
, int dir
, char *msg
)
222 log(L_TRACE
"%s %c %s %I/%d %s", p
->name
, dir
, msg
, e
->net
->n
.prefix
, e
->net
->n
.pxlen
, rt_format_via(e
));
226 rte_trace_in(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
229 rte_trace(p
, e
, '>', msg
);
233 rte_trace_out(uint flag
, struct proto
*p
, rte
*e
, char *msg
)
236 rte_trace(p
, e
, '<', msg
);
240 export_filter_(struct announce_hook
*ah
, rte
*rt0
, rte
**rt_free
, ea_list
**tmpa
, linpool
*pool
, int silent
)
242 struct proto
*p
= ah
->proto
;
243 struct filter
*filter
= ah
->out_filter
;
244 struct proto_stats
*stats
= ah
->stats
;
245 ea_list
*tmpb
= NULL
;
255 *tmpa
= rte_make_tmp_attrs(rt
, pool
);
257 v
= p
->import_control
? p
->import_control(p
, &rt
, tmpa
, pool
) : 0;
263 stats
->exp_updates_rejected
++;
265 rte_trace_out(D_FILTERS
, p
, rt
, "rejected by protocol");
271 rte_trace_out(D_FILTERS
, p
, rt
, "forced accept by protocol");
275 v
= filter
&& ((filter
== FILTER_REJECT
) ||
276 (f_run(filter
, &rt
, tmpa
, pool
,
277 FF_FORCE_TMPATTR
| (silent
? FF_SILENT
: 0)) > F_ACCEPT
));
283 stats
->exp_updates_filtered
++;
284 rte_trace_out(D_FILTERS
, p
, rt
, "filtered out");
294 /* Discard temporary rte */
301 export_filter(struct announce_hook
*ah
, rte
*rt0
, rte
**rt_free
, ea_list
**tmpa
, int silent
)
303 return export_filter_(ah
, rt0
, rt_free
, tmpa
, rte_update_pool
, silent
);
307 do_rt_notify(struct announce_hook
*ah
, net
*net
, rte
*new, rte
*old
, ea_list
*tmpa
, int refeed
)
309 struct proto
*p
= ah
->proto
;
310 struct proto_stats
*stats
= ah
->stats
;
314 * First, apply export limit.
316 * Export route limits has several problems. Because exp_routes
317 * counter is reset before refeed, we don't really know whether
318 * limit is breached and whether the update is new or not. Therefore
319 * the number of really exported routes may exceed the limit
320 * temporarily (routes exported before and new routes in refeed).
322 * Minor advantage is that if the limit is decreased and refeed is
323 * requested, the number of exported routes really decrease.
325 * Second problem is that with export limits, we don't know whether
326 * old was really exported (it might be blocked by limit). When a
327 * withdraw is exported, we announce it even when the previous
328 * update was blocked. This is not a big issue, but the same problem
329 * is in updating exp_routes counter. Therefore, to be consistent in
330 * increases and decreases of exp_routes, we count exported routes
331 * regardless of blocking by limits.
333 * Similar problem is in handling updates - when a new route is
334 * received and blocking is active, the route would be blocked, but
335 * when an update for the route will be received later, the update
336 * would be propagated (as old != NULL). Therefore, we have to block
337 * also non-new updates (contrary to import blocking).
340 struct proto_limit
*l
= ah
->out_limit
;
343 if ((!old
|| refeed
) && (stats
->exp_routes
>= l
->limit
))
344 proto_notify_limit(ah
, l
, PLD_OUT
, stats
->exp_routes
);
346 if (l
->state
== PLS_BLOCKED
)
348 stats
->exp_routes
++; /* see note above */
349 stats
->exp_updates_rejected
++;
350 rte_trace_out(D_FILTERS
, p
, new, "rejected [limit]");
360 stats
->exp_updates_accepted
++;
362 stats
->exp_withdraws_accepted
++;
364 /* Hack: We do not decrease exp_routes during refeed, we instead
365 reset exp_routes at the start of refeed. */
371 if (p
->debug
& D_ROUTES
)
374 rte_trace_out(D_ROUTES
, p
, new, "replaced");
376 rte_trace_out(D_ROUTES
, p
, new, "added");
378 rte_trace_out(D_ROUTES
, p
, old
, "removed");
381 p
->rt_notify(p
, ah
->table
, net
, NULL
, old
, NULL
);
387 t
->next
= new->attrs
->eattrs
;
388 p
->rt_notify(p
, ah
->table
, net
, new, old
, tmpa
);
392 p
->rt_notify(p
, ah
->table
, net
, new, old
, new->attrs
->eattrs
);
396 rt_notify_basic(struct announce_hook
*ah
, net
*net
, rte
*new0
, rte
*old0
, int refeed
)
398 struct proto
*p
= ah
->proto
;
399 struct proto_stats
*stats
= ah
->stats
;
403 rte
*new_free
= NULL
;
404 rte
*old_free
= NULL
;
405 ea_list
*tmpa
= NULL
;
408 stats
->exp_updates_received
++;
410 stats
->exp_withdraws_received
++;
413 * This is a tricky part - we don't know whether route 'old' was
414 * exported to protocol 'p' or was filtered by the export filter.
415 * We try to run the export filter to know this to have a correct
416 * value in 'old' argument of rte_update (and proper filter value)
418 * FIXME - this is broken because 'configure soft' may change
419 * filters but keep routes. Refeed is expected to be called after
420 * change of the filters and with old == new, therefore we do not
421 * even try to run the filter on an old route, This may lead to
422 * 'spurious withdraws' but ensure that there are no 'missing
425 * This is not completely safe as there is a window between
426 * reconfiguration and the end of refeed - if a newly filtered
427 * route disappears during this period, proper withdraw is not
428 * sent (because old would be also filtered) and the route is
429 * not refeeded (because it disappeared before that). Therefore,
430 * we also do not try to run the filter on old routes that are
431 * older than the last filter change.
435 new = export_filter(ah
, new, &new_free
, &tmpa
, 0);
437 if (old
&& !(refeed
|| (old
->lastmod
<= ah
->last_out_filter_change
)))
438 old
= export_filter(ah
, old
, &old_free
, NULL
, 1);
443 * As mentioned above, 'old' value may be incorrect in some race conditions.
444 * We generally ignore it with the exception of withdraw to pipe protocol.
445 * In that case we rather propagate unfiltered withdraws regardless of
446 * export filters to ensure that when a protocol is flushed, its routes are
447 * removed from all tables. Possible spurious unfiltered withdraws are not
448 * problem here as they are ignored if there is no corresponding route at
449 * the other end of the pipe. We directly call rt_notify() hook instead of
450 * do_rt_notify() to avoid logging and stat counters.
454 if ((p
->proto
== &proto_pipe
) && !new0
&& (p
!= old0
->sender
->proto
))
455 p
->rt_notify(p
, ah
->table
, net
, NULL
, old0
, NULL
);
461 do_rt_notify(ah
, net
, new, old
, tmpa
, refeed
);
463 /* Discard temporary rte's */
471 rt_notify_accepted(struct announce_hook
*ah
, net
*net
, rte
*new_changed
, rte
*old_changed
, rte
*before_old
, int feed
)
473 // struct proto *p = ah->proto;
474 struct proto_stats
*stats
= ah
->stats
;
477 rte
*new_best
= NULL
;
478 rte
*old_best
= NULL
;
479 rte
*new_free
= NULL
;
480 rte
*old_free
= NULL
;
481 ea_list
*tmpa
= NULL
;
483 /* Used to track whether we met old_changed position. If before_old is NULL
484 old_changed was the first and we met it implicitly before current best route. */
485 int old_meet
= old_changed
&& !before_old
;
487 /* Note that before_old is either NULL or valid (not rejected) route.
488 If old_changed is valid, before_old have to be too. If old changed route
489 was not valid, caller must use NULL for both old_changed and before_old. */
492 stats
->exp_updates_received
++;
494 stats
->exp_withdraws_received
++;
496 /* First, find the new_best route - first accepted by filters */
497 for (r
=net
->routes
; rte_is_valid(r
); r
=r
->next
)
499 if (new_best
= export_filter(ah
, r
, &new_free
, &tmpa
, 0))
502 /* Note if we walked around the position of old_changed route */
508 * Second, handle the feed case. That means we do not care for
509 * old_best. It is NULL for feed, and the new_best for refeed.
510 * For refeed, there is a hack similar to one in rt_notify_basic()
511 * to ensure withdraws in case of changed filters
515 if (feed
== 2) /* refeed */
516 old_best
= new_best
? new_best
:
517 (rte_is_valid(net
->routes
) ? net
->routes
: NULL
);
521 if (!new_best
&& !old_best
)
528 * Now, we find the old_best route. Generally, it is the same as the
529 * new_best, unless new_best is the same as new_changed or
530 * old_changed is accepted before new_best.
532 * There are four cases:
534 * - We would find and accept old_changed before new_best, therefore
535 * old_changed is old_best. In remaining cases we suppose this
538 * - We found no new_best, therefore there is also no old_best and
539 * we ignore this withdraw.
541 * - We found new_best different than new_changed, therefore
542 * old_best is the same as new_best and we ignore this update.
544 * - We found new_best the same as new_changed, therefore it cannot
545 * be old_best and we have to continue search for old_best.
550 if (old_best
= export_filter(ah
, old_changed
, &old_free
, NULL
, 1))
557 /* Third case, we use r instead of new_best, because export_filter() could change it */
558 if (r
!= new_changed
)
566 for (r
=r
->next
; rte_is_valid(r
); r
=r
->next
)
568 if (old_best
= export_filter(ah
, r
, &old_free
, NULL
, 1))
572 if (old_best
= export_filter(ah
, old_changed
, &old_free
, NULL
, 1))
576 /* Implicitly, old_best is NULL and new_best is non-NULL */
579 do_rt_notify(ah
, net
, new_best
, old_best
, tmpa
, (feed
== 2));
581 /* Discard temporary rte's */
590 mpnh_merge_rta(struct mpnh
*nhs
, rta
*a
, linpool
*pool
, int max
)
592 struct mpnh nh
= { .gw
= a
->gw
, .iface
= a
->iface
};
593 struct mpnh
*nh2
= (a
->dest
== RTD_MULTIPATH
) ? a
->nexthops
: &nh
;
594 return mpnh_merge(nhs
, nh2
, 1, 0, max
, pool
);
598 rt_export_merged(struct announce_hook
*ah
, net
*net
, rte
**rt_free
, ea_list
**tmpa
, linpool
*pool
, int silent
)
600 // struct proto *p = ah->proto;
601 struct mpnh
*nhs
= NULL
;
602 rte
*best0
, *best
, *rt0
, *rt
, *tmp
;
607 if (!rte_is_valid(best0
))
610 best
= export_filter_(ah
, best0
, rt_free
, tmpa
, pool
, silent
);
612 if (!best
|| !rte_is_reachable(best
))
615 for (rt0
= best0
->next
; rt0
; rt0
= rt0
->next
)
617 if (!rte_mergable(best0
, rt0
))
620 rt
= export_filter_(ah
, rt0
, &tmp
, NULL
, pool
, 1);
625 if (rte_is_reachable(rt
))
626 nhs
= mpnh_merge_rta(nhs
, rt
->attrs
, pool
, ah
->proto
->merge_limit
);
634 nhs
= mpnh_merge_rta(nhs
, best
->attrs
, pool
, ah
->proto
->merge_limit
);
638 best
= rte_cow_rta(best
, pool
);
639 best
->attrs
->dest
= RTD_MULTIPATH
;
640 best
->attrs
->nexthops
= nhs
;
652 rt_notify_merged(struct announce_hook
*ah
, net
*net
, rte
*new_changed
, rte
*old_changed
,
653 rte
*new_best
, rte
*old_best
, int refeed
)
655 // struct proto *p = ah->proto;
657 rte
*new_best_free
= NULL
;
658 rte
*old_best_free
= NULL
;
659 rte
*new_changed_free
= NULL
;
660 rte
*old_changed_free
= NULL
;
661 ea_list
*tmpa
= NULL
;
663 /* We assume that all rte arguments are either NULL or rte_is_valid() */
665 /* This check should be done by the caller */
666 if (!new_best
&& !old_best
)
669 /* Check whether the change is relevant to the merged route */
670 if ((new_best
== old_best
) && !refeed
)
672 new_changed
= rte_mergable(new_best
, new_changed
) ?
673 export_filter(ah
, new_changed
, &new_changed_free
, NULL
, 1) : NULL
;
675 old_changed
= rte_mergable(old_best
, old_changed
) ?
676 export_filter(ah
, old_changed
, &old_changed_free
, NULL
, 1) : NULL
;
678 if (!new_changed
&& !old_changed
)
683 ah
->stats
->exp_updates_received
++;
685 ah
->stats
->exp_withdraws_received
++;
687 /* Prepare new merged route */
689 new_best
= rt_export_merged(ah
, net
, &new_best_free
, &tmpa
, rte_update_pool
, 0);
691 /* Prepare old merged route (without proper merged next hops) */
692 /* There are some issues with running filter on old route - see rt_notify_basic() */
693 if (old_best
&& !refeed
)
694 old_best
= export_filter(ah
, old_best
, &old_best_free
, NULL
, 1);
696 if (new_best
|| old_best
)
697 do_rt_notify(ah
, net
, new_best
, old_best
, tmpa
, refeed
);
699 /* Discard temporary rte's */
701 rte_free(new_best_free
);
703 rte_free(old_best_free
);
704 if (new_changed_free
)
705 rte_free(new_changed_free
);
706 if (old_changed_free
)
707 rte_free(old_changed_free
);
712 * rte_announce - announce a routing table change
713 * @tab: table the route has been added to
714 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
715 * @net: network in question
716 * @new: the new route to be announced
717 * @old: the previous route for the same network
718 * @new_best: the new best route for the same network
719 * @old_best: the previous best route for the same network
720 * @before_old: The previous route before @old for the same network.
721 * If @before_old is NULL @old was the first.
723 * This function gets a routing table update and announces it
724 * to all protocols that acccepts given type of route announcement
725 * and are connected to the same table by their announcement hooks.
727 * Route announcement of type %RA_OPTIMAL si generated when optimal
728 * route (in routing table @tab) changes. In that case @old stores the
731 * Route announcement of type %RA_ANY si generated when any route (in
732 * routing table @tab) changes In that case @old stores the old route
733 * from the same protocol.
735 * For each appropriate protocol, we first call its import_control()
736 * hook which performs basic checks on the route (each protocol has a
737 * right to veto or force accept of the route before any filter is
738 * asked) and adds default values of attributes specific to the new
739 * protocol (metrics, tags etc.). Then it consults the protocol's
740 * export filter and if it accepts the route, the rt_notify() hook of
741 * the protocol gets called.
744 rte_announce(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
745 rte
*new_best
, rte
*old_best
, rte
*before_old
)
747 if (!rte_is_valid(new))
750 if (!rte_is_valid(old
))
751 old
= before_old
= NULL
;
753 if (!rte_is_valid(new_best
))
756 if (!rte_is_valid(old_best
))
762 if (type
== RA_OPTIMAL
)
765 new->attrs
->src
->proto
->stats
.pref_routes
++;
767 old
->attrs
->src
->proto
->stats
.pref_routes
--;
770 rt_notify_hostcache(tab
, net
);
773 struct announce_hook
*a
;
774 WALK_LIST(a
, tab
->hooks
)
776 ASSERT(a
->proto
->export_state
!= ES_DOWN
);
777 if (a
->proto
->accept_ra_types
== type
)
778 if (type
== RA_ACCEPTED
)
779 rt_notify_accepted(a
, net
, new, old
, before_old
, 0);
780 else if (type
== RA_MERGED
)
781 rt_notify_merged(a
, net
, new, old
, new_best
, old_best
, 0);
783 rt_notify_basic(a
, net
, new, old
, 0);
793 if ((n
->n
.pxlen
> BITS_PER_IP_ADDRESS
) || !ip_is_prefix(n
->n
.prefix
,n
->n
.pxlen
))
795 log(L_WARN
"Ignoring bogus prefix %I/%d received via %s",
796 n
->n
.prefix
, n
->n
.pxlen
, e
->sender
->proto
->name
);
800 c
= ipa_classify_net(n
->n
.prefix
);
801 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
803 log(L_WARN
"Ignoring bogus route %I/%d received via %s",
804 n
->n
.prefix
, n
->n
.pxlen
, e
->sender
->proto
->name
);
808 if ((e
->attrs
->dest
== RTD_MULTIPATH
) && !mpnh_is_sorted(e
->attrs
->nexthops
))
810 log(L_WARN
"Ignoring unsorted multipath route %I/%d received via %s",
811 n
->n
.prefix
, n
->n
.pxlen
, e
->sender
->proto
->name
);
819 * rte_free - delete a &rte
820 * @e: &rte to be deleted
822 * rte_free() deletes the given &rte from the routing table it's linked to.
827 if (rta_is_cached(e
->attrs
))
829 sl_free(rte_slab
, e
);
833 rte_free_quick(rte
*e
)
836 sl_free(rte_slab
, e
);
840 rte_same(rte
*x
, rte
*y
)
843 x
->attrs
== y
->attrs
&&
844 x
->flags
== y
->flags
&&
845 x
->pflags
== y
->pflags
&&
846 x
->pref
== y
->pref
&&
847 (!x
->attrs
->src
->proto
->rte_same
|| x
->attrs
->src
->proto
->rte_same(x
, y
));
850 static inline int rte_is_ok(rte
*e
) { return e
&& !rte_is_filtered(e
); }
853 rte_recalculate(struct announce_hook
*ah
, net
*net
, rte
*new, struct rte_src
*src
)
855 struct proto
*p
= ah
->proto
;
856 struct rtable
*table
= ah
->table
;
857 struct proto_stats
*stats
= ah
->stats
;
858 static struct tbf rl_pipe
= TBF_DEFAULT_LOG_LIMITS
;
859 rte
*before_old
= NULL
;
860 rte
*old_best
= net
->routes
;
864 k
= &net
->routes
; /* Find and remove original route from the same protocol */
867 if (old
->attrs
->src
== src
)
869 /* If there is the same route in the routing table but from
870 * a different sender, then there are two paths from the
871 * source protocol to this routing table through transparent
872 * pipes, which is not allowed.
874 * We log that and ignore the route. If it is withdraw, we
875 * ignore it completely (there might be 'spurious withdraws',
876 * see FIXME in do_rte_announce())
878 if (old
->sender
->proto
!= p
)
882 log_rl(&rl_pipe
, L_ERR
"Pipe collision detected when sending %I/%d to table %s",
883 net
->n
.prefix
, net
->n
.pxlen
, table
->name
);
889 if (new && rte_same(old
, new))
891 /* No changes, ignore the new route */
893 if (!rte_is_filtered(new))
895 stats
->imp_updates_ignored
++;
896 rte_trace_in(D_ROUTES
, p
, new, "ignored");
914 stats
->imp_withdraws_ignored
++;
918 int new_ok
= rte_is_ok(new);
919 int old_ok
= rte_is_ok(old
);
921 struct proto_limit
*l
= ah
->rx_limit
;
922 if (l
&& !old
&& new)
924 u32 all_routes
= stats
->imp_routes
+ stats
->filt_routes
;
926 if (all_routes
>= l
->limit
)
927 proto_notify_limit(ah
, l
, PLD_RX
, all_routes
);
929 if (l
->state
== PLS_BLOCKED
)
931 /* In receive limit the situation is simple, old is NULL so
932 we just free new and exit like nothing happened */
934 stats
->imp_updates_ignored
++;
935 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
942 if (l
&& !old_ok
&& new_ok
)
944 if (stats
->imp_routes
>= l
->limit
)
945 proto_notify_limit(ah
, l
, PLD_IN
, stats
->imp_routes
);
947 if (l
->state
== PLS_BLOCKED
)
949 /* In import limit the situation is more complicated. We
950 shouldn't just drop the route, we should handle it like
951 it was filtered. We also have to continue the route
952 processing if old or new is non-NULL, but we should exit
953 if both are NULL as this case is probably assumed to be
956 stats
->imp_updates_ignored
++;
957 rte_trace_in(D_FILTERS
, p
, new, "ignored [limit]");
959 if (ah
->in_keep_filtered
)
960 new->flags
|= REF_FILTERED
;
962 { rte_free_quick(new); new = NULL
; }
964 /* Note that old && !new could be possible when
965 ah->in_keep_filtered changed in the recent past. */
976 stats
->imp_updates_accepted
++;
978 stats
->imp_withdraws_accepted
++;
980 stats
->imp_withdraws_ignored
++;
985 rte_is_filtered(new) ? stats
->filt_routes
++ : stats
->imp_routes
++;
987 rte_is_filtered(old
) ? stats
->filt_routes
-- : stats
->imp_routes
--;
989 if (table
->config
->sorted
)
991 /* If routes are sorted, just insert new route to appropriate position */
994 if (before_old
&& !rte_better(new, before_old
))
995 k
= &before_old
->next
;
999 for (; *k
; k
=&(*k
)->next
)
1000 if (rte_better(new, *k
))
1009 /* If routes are not sorted, find the best route and move it on
1010 the first position. There are several optimized cases. */
1012 if (src
->proto
->rte_recalculate
&& src
->proto
->rte_recalculate(table
, net
, new, old
, old_best
))
1013 goto do_recalculate
;
1015 if (new && rte_better(new, old_best
))
1017 /* The first case - the new route is cleary optimal,
1018 we link it at the first position */
1020 new->next
= net
->routes
;
1023 else if (old
== old_best
)
1025 /* The second case - the old best route disappeared, we add the
1026 new route (if we have any) to the list (we don't care about
1027 position) and then we elect the new optimal route and relink
1028 that route at the first position and announce it. New optimal
1029 route might be NULL if there is no more routes */
1032 /* Add the new route to the list */
1035 new->next
= net
->routes
;
1039 /* Find a new optimal route (if there is any) */
1042 rte
**bp
= &net
->routes
;
1043 for (k
=&(*bp
)->next
; *k
; k
=&(*k
)->next
)
1044 if (rte_better(*k
, *bp
))
1050 best
->next
= net
->routes
;
1056 /* The third case - the new route is not better than the old
1057 best route (therefore old_best != NULL) and the old best
1058 route was not removed (therefore old_best == net->routes).
1059 We just link the new route after the old best route. */
1061 ASSERT(net
->routes
!= NULL
);
1062 new->next
= net
->routes
->next
;
1063 net
->routes
->next
= new;
1065 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1071 /* Log the route change */
1072 if (p
->debug
& D_ROUTES
)
1075 rte_trace(p
, new, '>', new == net
->routes
? "added [best]" : "added");
1078 if (old
!= old_best
)
1079 rte_trace(p
, old
, '>', "removed");
1080 else if (rte_is_ok(net
->routes
))
1081 rte_trace(p
, old
, '>', "removed [replaced]");
1083 rte_trace(p
, old
, '>', "removed [sole]");
1087 /* Propagate the route change */
1088 rte_announce(table
, RA_ANY
, net
, new, old
, NULL
, NULL
, NULL
);
1089 if (net
->routes
!= old_best
)
1090 rte_announce(table
, RA_OPTIMAL
, net
, net
->routes
, old_best
, NULL
, NULL
, NULL
);
1091 if (table
->config
->sorted
)
1092 rte_announce(table
, RA_ACCEPTED
, net
, new, old
, NULL
, NULL
, before_old
);
1093 rte_announce(table
, RA_MERGED
, net
, new, old
, net
->routes
, old_best
, NULL
);
1096 (table
->gc_counter
++ >= table
->config
->gc_max_ops
) &&
1097 (table
->gc_time
+ table
->config
->gc_min_time
<= now
))
1098 rt_schedule_gc(table
);
1100 if (old_ok
&& p
->rte_remove
)
1101 p
->rte_remove(net
, old
);
1102 if (new_ok
&& p
->rte_insert
)
1103 p
->rte_insert(net
, new);
1106 rte_free_quick(old
);
1109 static int rte_update_nest_cnt
; /* Nesting counter to allow recursive updates */
1112 rte_update_lock(void)
1114 rte_update_nest_cnt
++;
1118 rte_update_unlock(void)
1120 if (!--rte_update_nest_cnt
)
1121 lp_flush(rte_update_pool
);
1125 rte_hide_dummy_routes(net
*net
, rte
**dummy
)
1127 if (net
->routes
&& net
->routes
->attrs
->source
== RTS_DUMMY
)
1129 *dummy
= net
->routes
;
1130 net
->routes
= (*dummy
)->next
;
1135 rte_unhide_dummy_routes(net
*net
, rte
**dummy
)
1139 (*dummy
)->next
= net
->routes
;
1140 net
->routes
= *dummy
;
1145 * rte_update - enter a new update to a routing table
1146 * @table: table to be updated
1147 * @ah: pointer to table announce hook
1148 * @net: network node
1149 * @p: protocol submitting the update
1150 * @src: protocol originating the update
1151 * @new: a &rte representing the new route or %NULL for route removal.
1153 * This function is called by the routing protocols whenever they discover
1154 * a new route or wish to update/remove an existing route. The right announcement
1155 * sequence is to build route attributes first (either un-cached with @aflags set
1156 * to zero or a cached one using rta_lookup(); in this case please note that
1157 * you need to increase the use count of the attributes yourself by calling
1158 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1159 * the appropriate data and finally submit the new &rte by calling rte_update().
1161 * @src specifies the protocol that originally created the route and the meaning
1162 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1163 * same value as @new->attrs->proto. @p specifies the protocol that called
1164 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1165 * stores @p in @new->sender;
1167 * When rte_update() gets any route, it automatically validates it (checks,
1168 * whether the network and next hop address are valid IP addresses and also
1169 * whether a normal routing protocol doesn't try to smuggle a host or link
1170 * scope route to the table), converts all protocol dependent attributes stored
1171 * in the &rte to temporary extended attributes, consults import filters of the
1172 * protocol to see if the route should be accepted and/or its attributes modified,
1173 * stores the temporary attributes back to the &rte.
1175 * Now, having a "public" version of the route, we
1176 * automatically find any old route defined by the protocol @src
1177 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1178 * recalculate the optimal route for this destination and finally broadcast
1179 * the change (if any) to all routing protocols by calling rte_announce().
1181 * All memory used for attribute lists and other temporary allocations is taken
1182 * from a special linear pool @rte_update_pool and freed when rte_update()
1187 rte_update2(struct announce_hook
*ah
, net
*net
, rte
*new, struct rte_src
*src
)
1189 struct proto
*p
= ah
->proto
;
1190 struct proto_stats
*stats
= ah
->stats
;
1191 struct filter
*filter
= ah
->in_filter
;
1192 ea_list
*tmpa
= NULL
;
1200 stats
->imp_updates_received
++;
1201 if (!rte_validate(new))
1203 rte_trace_in(D_FILTERS
, p
, new, "invalid");
1204 stats
->imp_updates_invalid
++;
1208 if (filter
== FILTER_REJECT
)
1210 stats
->imp_updates_filtered
++;
1211 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1213 if (! ah
->in_keep_filtered
)
1216 /* new is a private copy, i could modify it */
1217 new->flags
|= REF_FILTERED
;
1221 tmpa
= rte_make_tmp_attrs(new, rte_update_pool
);
1222 if (filter
&& (filter
!= FILTER_REJECT
))
1224 ea_list
*old_tmpa
= tmpa
;
1225 int fr
= f_run(filter
, &new, &tmpa
, rte_update_pool
, 0);
1228 stats
->imp_updates_filtered
++;
1229 rte_trace_in(D_FILTERS
, p
, new, "filtered out");
1231 if (! ah
->in_keep_filtered
)
1234 new->flags
|= REF_FILTERED
;
1236 if (tmpa
!= old_tmpa
&& src
->proto
->store_tmp_attrs
)
1237 src
->proto
->store_tmp_attrs(new, tmpa
);
1240 if (!rta_is_cached(new->attrs
)) /* Need to copy attributes */
1241 new->attrs
= rta_lookup(new->attrs
);
1242 new->flags
|= REF_COW
;
1246 stats
->imp_withdraws_received
++;
1250 stats
->imp_withdraws_ignored
++;
1251 rte_update_unlock();
1257 rte_hide_dummy_routes(net
, &dummy
);
1258 rte_recalculate(ah
, net
, new, src
);
1259 rte_unhide_dummy_routes(net
, &dummy
);
1260 rte_update_unlock();
1269 /* Independent call to rte_announce(), used from next hop
1270 recalculation, outside of rte_update(). new must be non-NULL */
1272 rte_announce_i(rtable
*tab
, unsigned type
, net
*net
, rte
*new, rte
*old
,
1273 rte
*new_best
, rte
*old_best
)
1276 rte_announce(tab
, type
, net
, new, old
, new_best
, old_best
, NULL
);
1277 rte_update_unlock();
1281 rte_discard(rte
*old
) /* Non-filtered route deletion, used during garbage collection */
1284 rte_recalculate(old
->sender
, old
->net
, NULL
, old
->attrs
->src
);
1285 rte_update_unlock();
1288 /* Check rtable for best route to given net whether it would be exported do p */
1290 rt_examine(rtable
*t
, ip_addr prefix
, int pxlen
, struct proto
*p
, struct filter
*filter
)
1292 net
*n
= net_find(t
, prefix
, pxlen
);
1293 rte
*rt
= n
? n
->routes
: NULL
;
1295 if (!rte_is_valid(rt
))
1300 /* Rest is stripped down export_filter() */
1301 ea_list
*tmpa
= rte_make_tmp_attrs(rt
, rte_update_pool
);
1302 int v
= p
->import_control
? p
->import_control(p
, &rt
, &tmpa
, rte_update_pool
) : 0;
1303 if (v
== RIC_PROCESS
)
1304 v
= (f_run(filter
, &rt
, &tmpa
, rte_update_pool
,
1305 FF_FORCE_TMPATTR
| FF_SILENT
) <= F_ACCEPT
);
1307 /* Discard temporary rte */
1308 if (rt
!= n
->routes
)
1311 rte_update_unlock();
1318 * rt_refresh_begin - start a refresh cycle
1319 * @t: related routing table
1320 * @ah: related announce hook
1322 * This function starts a refresh cycle for given routing table and announce
1323 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1324 * routes to the routing table (by rte_update()). After that, all protocol
1325 * routes (more precisely routes with @ah as @sender) not sent during the
1326 * refresh cycle but still in the table from the past are pruned. This is
1327 * implemented by marking all related routes as stale by REF_STALE flag in
1328 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1329 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1332 rt_refresh_begin(rtable
*t
, struct announce_hook
*ah
)
1337 FIB_WALK(&t
->fib
, fn
)
1340 for (e
= n
->routes
; e
; e
= e
->next
)
1341 if (e
->sender
== ah
)
1342 e
->flags
|= REF_STALE
;
1348 * rt_refresh_end - end a refresh cycle
1349 * @t: related routing table
1350 * @ah: related announce hook
1352 * This function starts a refresh cycle for given routing table and announce
1353 * hook. See rt_refresh_begin() for description of refresh cycles.
1356 rt_refresh_end(rtable
*t
, struct announce_hook
*ah
)
1362 FIB_WALK(&t
->fib
, fn
)
1365 for (e
= n
->routes
; e
; e
= e
->next
)
1366 if ((e
->sender
== ah
) && (e
->flags
& REF_STALE
))
1368 e
->flags
|= REF_DISCARD
;
1375 rt_schedule_prune(t
);
1380 * rte_dump - dump a route
1381 * @e: &rte to be dumped
1383 * This functions dumps contents of a &rte to debug output.
1389 debug("%-1I/%2d ", n
->n
.prefix
, n
->n
.pxlen
);
1390 debug("KF=%02x PF=%02x pref=%d lm=%d ", n
->n
.flags
, e
->pflags
, e
->pref
, now
-e
->lastmod
);
1392 if (e
->attrs
->src
->proto
->proto
->dump_attrs
)
1393 e
->attrs
->src
->proto
->proto
->dump_attrs(e
);
1398 * rt_dump - dump a routing table
1399 * @t: routing table to be dumped
1401 * This function dumps contents of a given routing table to debug output.
1408 struct announce_hook
*a
;
1410 debug("Dump of routing table <%s>\n", t
->name
);
1414 FIB_WALK(&t
->fib
, fn
)
1417 for(e
=n
->routes
; e
; e
=e
->next
)
1421 WALK_LIST(a
, t
->hooks
)
1422 debug("\tAnnounces routes to protocol %s\n", a
->proto
->name
);
1427 * rt_dump_all - dump all routing tables
1429 * This function dumps contents of all routing tables to debug output.
1436 WALK_LIST(t
, routing_tables
)
1441 rt_schedule_prune(rtable
*tab
)
1443 rt_mark_for_prune(tab
);
1444 ev_schedule(tab
->rt_event
);
1448 rt_schedule_gc(rtable
*tab
)
1450 if (tab
->gc_scheduled
)
1453 tab
->gc_scheduled
= 1;
1454 ev_schedule(tab
->rt_event
);
1458 rt_schedule_hcu(rtable
*tab
)
1460 if (tab
->hcu_scheduled
)
1463 tab
->hcu_scheduled
= 1;
1464 ev_schedule(tab
->rt_event
);
1468 rt_schedule_nhu(rtable
*tab
)
1470 if (tab
->nhu_state
== 0)
1471 ev_schedule(tab
->rt_event
);
1473 /* state change 0->1, 2->3 */
1474 tab
->nhu_state
|= 1;
1479 rt_prune_nets(rtable
*tab
)
1481 struct fib_iterator fit
;
1482 int ncnt
= 0, ndel
= 0;
1485 fib_check(&tab
->fib
);
1488 FIB_ITERATE_INIT(&fit
, &tab
->fib
);
1490 FIB_ITERATE_START(&tab
->fib
, &fit
, f
)
1494 if (!n
->routes
) /* Orphaned FIB entry */
1496 FIB_ITERATE_PUT(&fit
, f
);
1497 fib_delete(&tab
->fib
, f
);
1503 DBG("Pruned %d of %d networks\n", ndel
, ncnt
);
1505 tab
->gc_counter
= 0;
1507 tab
->gc_scheduled
= 0;
1515 if (tab
->hcu_scheduled
)
1516 rt_update_hostcache(tab
);
1519 rt_next_hop_update(tab
);
1521 if (tab
->prune_state
)
1522 if (!rt_prune_table(tab
))
1524 /* Table prune unfinished */
1525 ev_schedule(tab
->rt_event
);
1529 if (tab
->gc_scheduled
)
1532 rt_prune_sources(); // FIXME this should be moved to independent event
1537 rt_setup(pool
*p
, rtable
*t
, char *name
, struct rtable_config
*cf
)
1539 bzero(t
, sizeof(*t
));
1540 fib_init(&t
->fib
, p
, sizeof(net
), 0, rte_init
);
1543 init_list(&t
->hooks
);
1546 t
->rt_event
= ev_new(p
);
1547 t
->rt_event
->hook
= rt_event
;
1548 t
->rt_event
->data
= t
;
1554 * rt_init - initialize routing tables
1556 * This function is called during BIRD startup. It initializes the
1557 * routing table module.
1563 rt_table_pool
= rp_new(&root_pool
, "Routing tables");
1564 rte_update_pool
= lp_new(rt_table_pool
, 4080);
1565 rte_slab
= sl_new(rt_table_pool
, sizeof(rte
));
1566 init_list(&routing_tables
);
1571 rt_prune_step(rtable
*tab
, int *limit
)
1573 struct fib_iterator
*fit
= &tab
->prune_fit
;
1575 DBG("Pruning route table %s\n", tab
->name
);
1577 fib_check(&tab
->fib
);
1580 if (tab
->prune_state
== RPS_NONE
)
1583 if (tab
->prune_state
== RPS_SCHEDULED
)
1585 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1586 tab
->prune_state
= RPS_RUNNING
;
1590 FIB_ITERATE_START(&tab
->fib
, fit
, fn
)
1592 net
*n
= (net
*) fn
;
1596 for (e
=n
->routes
; e
; e
=e
->next
)
1597 if (e
->sender
->proto
->flushing
|| (e
->flags
& REF_DISCARD
))
1601 FIB_ITERATE_PUT(fit
, fn
);
1610 if (!n
->routes
) /* Orphaned FIB entry */
1612 FIB_ITERATE_PUT(fit
, fn
);
1613 fib_delete(&tab
->fib
, fn
);
1617 FIB_ITERATE_END(fn
);
1620 fib_check(&tab
->fib
);
1623 tab
->prune_state
= RPS_NONE
;
1628 * rt_prune_table - prune a routing table
1629 * @tab: a routing table for pruning
1631 * This function scans the routing table @tab and removes routes belonging to
1632 * flushing protocols, discarded routes and also stale network entries, in a
1633 * similar fashion like rt_prune_loop(). Returns 1 when all such routes are
1634 * pruned. Contrary to rt_prune_loop(), this function is not a part of the
1635 * protocol flushing loop, but it is called from rt_event() for just one routing
1638 * Note that rt_prune_table() and rt_prune_loop() share (for each table) the
1639 * prune state (@prune_state) and also the pruning iterator (@prune_fit).
1642 rt_prune_table(rtable
*tab
)
1645 return rt_prune_step(tab
, &limit
);
1649 * rt_prune_loop - prune routing tables
1651 * The prune loop scans routing tables and removes routes belonging to flushing
1652 * protocols, discarded routes and also stale network entries. Returns 1 when
1653 * all such routes are pruned. It is a part of the protocol flushing loop.
1661 WALK_LIST(t
, routing_tables
)
1662 if (! rt_prune_step(t
, &limit
))
1669 rt_preconfig(struct config
*c
)
1671 struct symbol
*s
= cf_get_symbol("master");
1673 init_list(&c
->tables
);
1674 c
->master_rtc
= rt_new_table(s
);
1679 * Some functions for handing internal next hop updates
1680 * triggered by rt_schedule_nhu().
1684 rta_next_hop_outdated(rta
*a
)
1686 struct hostentry
*he
= a
->hostentry
;
1692 return a
->dest
!= RTD_UNREACHABLE
;
1694 return (a
->iface
!= he
->src
->iface
) || !ipa_equal(a
->gw
, he
->gw
) ||
1695 (a
->dest
!= he
->dest
) || (a
->igp_metric
!= he
->igp_metric
) ||
1696 !mpnh_same(a
->nexthops
, he
->src
->nexthops
);
1700 rta_apply_hostentry(rta
*a
, struct hostentry
*he
)
1703 a
->iface
= he
->src
? he
->src
->iface
: NULL
;
1706 a
->igp_metric
= he
->igp_metric
;
1707 a
->nexthops
= he
->src
? he
->src
->nexthops
: NULL
;
1711 rt_next_hop_update_rte(rtable
*tab UNUSED
, rte
*old
)
1714 memcpy(&a
, old
->attrs
, sizeof(rta
));
1715 rta_apply_hostentry(&a
, old
->attrs
->hostentry
);
1718 rte
*e
= sl_alloc(rte_slab
);
1719 memcpy(e
, old
, sizeof(rte
));
1720 e
->attrs
= rta_lookup(&a
);
1726 rt_next_hop_update_net(rtable
*tab
, net
*n
)
1728 rte
**k
, *e
, *new, *old_best
, **new_best
;
1730 int free_old_best
= 0;
1732 old_best
= n
->routes
;
1736 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1737 if (rta_next_hop_outdated(e
->attrs
))
1739 new = rt_next_hop_update_rte(tab
, e
);
1742 rte_announce_i(tab
, RA_ANY
, n
, new, e
, NULL
, NULL
);
1743 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated");
1745 /* Call a pre-comparison hook */
1746 /* Not really an efficient way to compute this */
1747 if (e
->attrs
->src
->proto
->rte_recalculate
)
1748 e
->attrs
->src
->proto
->rte_recalculate(tab
, n
, new, e
, NULL
);
1752 else /* Freeing of the old best rte is postponed */
1762 /* Find the new best route */
1764 for (k
= &n
->routes
; e
= *k
; k
= &e
->next
)
1766 if (!new_best
|| rte_better(e
, *new_best
))
1770 /* Relink the new best route to the first position */
1772 if (new != n
->routes
)
1774 *new_best
= new->next
;
1775 new->next
= n
->routes
;
1779 /* Announce the new best route */
1780 if (new != old_best
)
1782 rte_announce_i(tab
, RA_OPTIMAL
, n
, new, old_best
, NULL
, NULL
);
1783 rte_trace_in(D_ROUTES
, new->sender
->proto
, new, "updated [best]");
1786 /* FIXME: Better announcement of merged routes */
1787 rte_announce_i(tab
, RA_MERGED
, n
, new, old_best
, new, old_best
);
1790 rte_free_quick(old_best
);
1796 rt_next_hop_update(rtable
*tab
)
1798 struct fib_iterator
*fit
= &tab
->nhu_fit
;
1801 if (tab
->nhu_state
== 0)
1804 if (tab
->nhu_state
== 1)
1806 FIB_ITERATE_INIT(fit
, &tab
->fib
);
1810 FIB_ITERATE_START(&tab
->fib
, fit
, fn
)
1814 FIB_ITERATE_PUT(fit
, fn
);
1815 ev_schedule(tab
->rt_event
);
1818 max_feed
-= rt_next_hop_update_net(tab
, (net
*) fn
);
1820 FIB_ITERATE_END(fn
);
1822 /* state change 2->0, 3->1 */
1823 tab
->nhu_state
&= 1;
1825 if (tab
->nhu_state
> 0)
1826 ev_schedule(tab
->rt_event
);
1830 struct rtable_config
*
1831 rt_new_table(struct symbol
*s
)
1833 /* Hack that allows to 'redefine' the master table */
1834 if ((s
->class == SYM_TABLE
) && (s
->def
== new_config
->master_rtc
))
1837 struct rtable_config
*c
= cfg_allocz(sizeof(struct rtable_config
));
1839 cf_define_symbol(s
, SYM_TABLE
, c
);
1841 add_tail(&new_config
->tables
, &c
->n
);
1842 c
->gc_max_ops
= 1000;
1848 * rt_lock_table - lock a routing table
1849 * @r: routing table to be locked
1851 * Lock a routing table, because it's in use by a protocol,
1852 * preventing it from being freed when it gets undefined in a new
1856 rt_lock_table(rtable
*r
)
1862 * rt_unlock_table - unlock a routing table
1863 * @r: routing table to be unlocked
1865 * Unlock a routing table formerly locked by rt_lock_table(),
1866 * that is decrease its use count and delete it if it's scheduled
1867 * for deletion by configuration changes.
1870 rt_unlock_table(rtable
*r
)
1872 if (!--r
->use_count
&& r
->deleted
)
1874 struct config
*conf
= r
->deleted
;
1875 DBG("Deleting routing table %s\n", r
->name
);
1876 r
->config
->table
= NULL
;
1878 rt_free_hostcache(r
);
1883 config_del_obstacle(conf
);
1888 * rt_commit - commit new routing table configuration
1889 * @new: new configuration
1890 * @old: original configuration or %NULL if it's boot time config
1892 * Scan differences between @old and @new configuration and modify
1893 * the routing tables according to these changes. If @new defines a
1894 * previously unknown table, create it, if it omits a table existing
1895 * in @old, schedule it for deletion (it gets deleted when all protocols
1896 * disconnect from it by calling rt_unlock_table()), if it exists
1897 * in both configurations, leave it unchanged.
1900 rt_commit(struct config
*new, struct config
*old
)
1902 struct rtable_config
*o
, *r
;
1904 DBG("rt_commit:\n");
1907 WALK_LIST(o
, old
->tables
)
1909 rtable
*ot
= o
->table
;
1912 struct symbol
*sym
= cf_find_symbol(new, o
->name
);
1913 if (sym
&& sym
->class == SYM_TABLE
&& !new->shutdown
)
1915 DBG("\t%s: same\n", o
->name
);
1920 if (o
->sorted
!= r
->sorted
)
1921 log(L_WARN
"Reconfiguration of rtable sorted flag not implemented");
1925 DBG("\t%s: deleted\n", o
->name
);
1927 config_add_obstacle(old
);
1929 rt_unlock_table(ot
);
1935 WALK_LIST(r
, new->tables
)
1938 rtable
*t
= mb_alloc(rt_table_pool
, sizeof(struct rtable
));
1939 DBG("\t%s: created\n", r
->name
);
1940 rt_setup(rt_table_pool
, t
, r
->name
, r
);
1941 add_tail(&routing_tables
, &t
->n
);
1948 do_feed_baby(struct proto
*p
, int type
, struct announce_hook
*h
, net
*n
, rte
*e
)
1951 if (type
== RA_ACCEPTED
)
1952 rt_notify_accepted(h
, n
, e
, NULL
, NULL
, p
->refeeding
? 2 : 1);
1953 else if (type
== RA_MERGED
)
1954 rt_notify_merged(h
, n
, NULL
, NULL
, e
, p
->refeeding
? e
: NULL
, p
->refeeding
);
1956 rt_notify_basic(h
, n
, e
, p
->refeeding
? e
: NULL
, p
->refeeding
);
1957 rte_update_unlock();
1961 * rt_feed_baby - advertise routes to a new protocol
1962 * @p: protocol to be fed
1964 * This function performs one pass of advertisement of routes to a newly
1965 * initialized protocol. It's called by the protocol code as long as it
1966 * has something to do. (We avoid transferring all the routes in single
1967 * pass in order not to monopolize CPU time.)
1970 rt_feed_baby(struct proto
*p
)
1972 struct announce_hook
*h
;
1973 struct fib_iterator
*fit
;
1976 if (!p
->feed_ahook
) /* Need to initialize first */
1980 DBG("Announcing routes to new protocol %s\n", p
->name
);
1981 p
->feed_ahook
= p
->ahooks
;
1982 fit
= p
->feed_iterator
= mb_alloc(p
->pool
, sizeof(struct fib_iterator
));
1985 fit
= p
->feed_iterator
;
1989 FIB_ITERATE_START(&h
->table
->fib
, fit
, fn
)
1991 net
*n
= (net
*) fn
;
1995 FIB_ITERATE_PUT(fit
, fn
);
1999 /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2001 if ((p
->accept_ra_types
== RA_OPTIMAL
) ||
2002 (p
->accept_ra_types
== RA_ACCEPTED
) ||
2003 (p
->accept_ra_types
== RA_MERGED
))
2004 if (rte_is_valid(e
))
2006 if (p
->export_state
!= ES_FEEDING
)
2007 return 1; /* In the meantime, the protocol fell down. */
2009 do_feed_baby(p
, p
->accept_ra_types
, h
, n
, e
);
2013 if (p
->accept_ra_types
== RA_ANY
)
2014 for(e
= n
->routes
; e
; e
= e
->next
)
2016 if (p
->export_state
!= ES_FEEDING
)
2017 return 1; /* In the meantime, the protocol fell down. */
2019 if (!rte_is_valid(e
))
2022 do_feed_baby(p
, RA_ANY
, h
, n
, e
);
2026 FIB_ITERATE_END(fn
);
2027 p
->feed_ahook
= h
->next
;
2030 mb_free(p
->feed_iterator
);
2031 p
->feed_iterator
= NULL
;
2037 FIB_ITERATE_INIT(fit
, &h
->table
->fib
);
2042 * rt_feed_baby_abort - abort protocol feeding
2045 * This function is called by the protocol code when the protocol
2046 * stops or ceases to exist before the last iteration of rt_feed_baby()
2050 rt_feed_baby_abort(struct proto
*p
)
2054 /* Unlink the iterator and exit */
2055 fit_get(&p
->feed_ahook
->table
->fib
, p
->feed_iterator
);
2056 p
->feed_ahook
= NULL
;
2061 static inline unsigned
2064 uintptr_t p
= (uintptr_t) ptr
;
2065 return p
^ (p
<< 8) ^ (p
>> 16);
2068 static inline unsigned
2069 hc_hash(ip_addr a
, rtable
*dep
)
2071 return (ipa_hash(a
) ^ ptr_hash(dep
)) & 0xffff;
2075 hc_insert(struct hostcache
*hc
, struct hostentry
*he
)
2077 uint k
= he
->hash_key
>> hc
->hash_shift
;
2078 he
->next
= hc
->hash_table
[k
];
2079 hc
->hash_table
[k
] = he
;
2083 hc_remove(struct hostcache
*hc
, struct hostentry
*he
)
2085 struct hostentry
**hep
;
2086 uint k
= he
->hash_key
>> hc
->hash_shift
;
2088 for (hep
= &hc
->hash_table
[k
]; *hep
!= he
; hep
= &(*hep
)->next
);
2092 #define HC_DEF_ORDER 10
2093 #define HC_HI_MARK *4
2094 #define HC_HI_STEP 2
2095 #define HC_HI_ORDER 16 /* Must be at most 16 */
2096 #define HC_LO_MARK /5
2097 #define HC_LO_STEP 2
2098 #define HC_LO_ORDER 10
2101 hc_alloc_table(struct hostcache
*hc
, unsigned order
)
2103 uint hsize
= 1 << order
;
2104 hc
->hash_order
= order
;
2105 hc
->hash_shift
= 16 - order
;
2106 hc
->hash_max
= (order
>= HC_HI_ORDER
) ? ~0U : (hsize HC_HI_MARK
);
2107 hc
->hash_min
= (order
<= HC_LO_ORDER
) ? 0U : (hsize HC_LO_MARK
);
2109 hc
->hash_table
= mb_allocz(rt_table_pool
, hsize
* sizeof(struct hostentry
*));
2113 hc_resize(struct hostcache
*hc
, unsigned new_order
)
2115 struct hostentry
**old_table
= hc
->hash_table
;
2116 struct hostentry
*he
, *hen
;
2117 uint old_size
= 1 << hc
->hash_order
;
2120 hc_alloc_table(hc
, new_order
);
2121 for (i
= 0; i
< old_size
; i
++)
2122 for (he
= old_table
[i
]; he
!= NULL
; he
=hen
)
2130 static struct hostentry
*
2131 hc_new_hostentry(struct hostcache
*hc
, ip_addr a
, ip_addr ll
, rtable
*dep
, unsigned k
)
2133 struct hostentry
*he
= sl_alloc(hc
->slab
);
2142 add_tail(&hc
->hostentries
, &he
->ln
);
2146 if (hc
->hash_items
> hc
->hash_max
)
2147 hc_resize(hc
, hc
->hash_order
+ HC_HI_STEP
);
2153 hc_delete_hostentry(struct hostcache
*hc
, struct hostentry
*he
)
2159 sl_free(hc
->slab
, he
);
2162 if (hc
->hash_items
< hc
->hash_min
)
2163 hc_resize(hc
, hc
->hash_order
- HC_LO_STEP
);
2167 rt_init_hostcache(rtable
*tab
)
2169 struct hostcache
*hc
= mb_allocz(rt_table_pool
, sizeof(struct hostcache
));
2170 init_list(&hc
->hostentries
);
2173 hc_alloc_table(hc
, HC_DEF_ORDER
);
2174 hc
->slab
= sl_new(rt_table_pool
, sizeof(struct hostentry
));
2176 hc
->lp
= lp_new(rt_table_pool
, 1008);
2177 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2179 tab
->hostcache
= hc
;
2183 rt_free_hostcache(rtable
*tab
)
2185 struct hostcache
*hc
= tab
->hostcache
;
2188 WALK_LIST(n
, hc
->hostentries
)
2190 struct hostentry
*he
= SKIP_BACK(struct hostentry
, ln
, n
);
2194 log(L_ERR
"Hostcache is not empty in table %s", tab
->name
);
2199 mb_free(hc
->hash_table
);
2204 rt_notify_hostcache(rtable
*tab
, net
*net
)
2206 struct hostcache
*hc
= tab
->hostcache
;
2208 if (tab
->hcu_scheduled
)
2211 if (trie_match_prefix(hc
->trie
, net
->n
.prefix
, net
->n
.pxlen
))
2212 rt_schedule_hcu(tab
);
2216 if_local_addr(ip_addr a
, struct iface
*i
)
2220 WALK_LIST(b
, i
->addrs
)
2221 if (ipa_equal(a
, b
->ip
))
2228 rt_get_igp_metric(rte
*rt
)
2230 eattr
*ea
= ea_find(rt
->attrs
->eattrs
, EA_GEN_IGP_METRIC
);
2238 if ((a
->source
== RTS_OSPF
) ||
2239 (a
->source
== RTS_OSPF_IA
) ||
2240 (a
->source
== RTS_OSPF_EXT1
))
2241 return rt
->u
.ospf
.metric1
;
2245 if (a
->source
== RTS_RIP
)
2246 return rt
->u
.rip
.metric
;
2250 if ((a
->dest
!= RTD_ROUTER
) && (a
->dest
!= RTD_MULTIPATH
))
2253 return IGP_METRIC_UNKNOWN
;
2257 rt_update_hostentry(rtable
*tab
, struct hostentry
*he
)
2259 rta
*old_src
= he
->src
;
2262 /* Reset the hostentry */
2265 he
->dest
= RTD_UNREACHABLE
;
2268 net
*n
= net_route(tab
, he
->addr
, MAX_PREFIX_LENGTH
);
2277 /* Recursive route should not depend on another recursive route */
2278 log(L_WARN
"Next hop address %I resolvable through recursive route for %I/%d",
2279 he
->addr
, n
->n
.prefix
, pxlen
);
2283 if (a
->dest
== RTD_DEVICE
)
2285 if (if_local_addr(he
->addr
, a
->iface
))
2287 /* The host address is a local address, this is not valid */
2288 log(L_WARN
"Next hop address %I is a local address of iface %s",
2289 he
->addr
, a
->iface
->name
);
2293 /* The host is directly reachable, use link as a gateway */
2295 he
->dest
= RTD_ROUTER
;
2299 /* The host is reachable through some route entry */
2304 he
->src
= rta_clone(a
);
2305 he
->igp_metric
= rt_get_igp_metric(e
);
2309 /* Add a prefix range to the trie */
2310 trie_add_prefix(tab
->hostcache
->trie
, he
->addr
, MAX_PREFIX_LENGTH
, pxlen
, MAX_PREFIX_LENGTH
);
2313 return old_src
!= he
->src
;
2317 rt_update_hostcache(rtable
*tab
)
2319 struct hostcache
*hc
= tab
->hostcache
;
2320 struct hostentry
*he
;
2323 /* Reset the trie */
2325 hc
->trie
= f_new_trie(hc
->lp
, sizeof(struct f_trie_node
));
2327 WALK_LIST_DELSAFE(n
, x
, hc
->hostentries
)
2329 he
= SKIP_BACK(struct hostentry
, ln
, n
);
2332 hc_delete_hostentry(hc
, he
);
2336 if (rt_update_hostentry(tab
, he
))
2337 rt_schedule_nhu(he
->tab
);
2340 tab
->hcu_scheduled
= 0;
2343 static struct hostentry
*
2344 rt_get_hostentry(rtable
*tab
, ip_addr a
, ip_addr ll
, rtable
*dep
)
2346 struct hostentry
*he
;
2348 if (!tab
->hostcache
)
2349 rt_init_hostcache(tab
);
2351 uint k
= hc_hash(a
, dep
);
2352 struct hostcache
*hc
= tab
->hostcache
;
2353 for (he
= hc
->hash_table
[k
>> hc
->hash_shift
]; he
!= NULL
; he
= he
->next
)
2354 if (ipa_equal(he
->addr
, a
) && (he
->tab
== dep
))
2357 he
= hc_new_hostentry(hc
, a
, ll
, dep
, k
);
2358 rt_update_hostentry(tab
, he
);
2363 rta_set_recursive_next_hop(rtable
*dep
, rta
*a
, rtable
*tab
, ip_addr
*gw
, ip_addr
*ll
)
2365 rta_apply_hostentry(a
, rt_get_hostentry(tab
, *gw
, *ll
, dep
));
2374 rt_format_via(rte
*e
)
2378 /* Max text length w/o IP addr and interface name is 16 */
2379 static byte via
[STD_ADDRESS_P_LENGTH
+sizeof(a
->iface
->name
)+16];
2383 case RTD_ROUTER
: bsprintf(via
, "via %I on %s", a
->gw
, a
->iface
->name
); break;
2384 case RTD_DEVICE
: bsprintf(via
, "dev %s", a
->iface
->name
); break;
2385 case RTD_BLACKHOLE
: bsprintf(via
, "blackhole"); break;
2386 case RTD_UNREACHABLE
: bsprintf(via
, "unreachable"); break;
2387 case RTD_PROHIBIT
: bsprintf(via
, "prohibited"); break;
2388 case RTD_MULTIPATH
: bsprintf(via
, "multipath"); break;
2389 default: bsprintf(via
, "???");
2395 rt_show_rte(struct cli
*c
, byte
*ia
, rte
*e
, struct rt_show_data
*d
, ea_list
*tmpa
)
2397 byte from
[STD_ADDRESS_P_LENGTH
+8];
2398 byte tm
[TM_DATETIME_BUFFER_SIZE
], info
[256];
2400 int primary
= (e
->net
->routes
== e
);
2401 int sync_error
= (e
->net
->n
.flags
& KRF_SYNC_ERROR
);
2402 void (*get_route_info
)(struct rte
*, byte
*buf
, struct ea_list
*attrs
);
2405 tm_format_datetime(tm
, &config
->tf_route
, e
->lastmod
);
2406 if (ipa_nonzero(a
->from
) && !ipa_equal(a
->from
, a
->gw
))
2407 bsprintf(from
, " from %I", a
->from
);
2411 get_route_info
= a
->src
->proto
->proto
->get_route_info
;
2412 if (get_route_info
|| d
->verbose
)
2414 /* Need to normalize the extended attributes */
2416 t
= ea_append(t
, a
->eattrs
);
2417 tmpa
= alloca(ea_scan(t
));
2422 get_route_info(e
, info
, tmpa
);
2424 bsprintf(info
, " (%d)", e
->pref
);
2425 cli_printf(c
, -1007, "%-18s %s [%s %s%s]%s%s", ia
, rt_format_via(e
), a
->src
->proto
->name
,
2426 tm
, from
, primary
? (sync_error
? " !" : " *") : "", info
);
2427 for (nh
= a
->nexthops
; nh
; nh
= nh
->next
)
2428 cli_printf(c
, -1007, "\tvia %I on %s weight %d", nh
->gw
, nh
->iface
->name
, nh
->weight
+ 1);
2430 rta_show(c
, a
, tmpa
);
2434 rt_show_net(struct cli
*c
, net
*n
, struct rt_show_data
*d
)
2437 byte ia
[STD_ADDRESS_P_LENGTH
+8];
2438 struct ea_list
*tmpa
;
2439 struct announce_hook
*a
= NULL
;
2443 bsprintf(ia
, "%I/%d", n
->n
.prefix
, n
->n
.pxlen
);
2447 if (! d
->export_protocol
->rt_notify
)
2450 a
= proto_find_announce_hook(d
->export_protocol
, d
->table
);
2455 for (e
= n
->routes
; e
; e
= e
->next
)
2457 if (rte_is_filtered(e
) != d
->filtered
)
2461 d
->net_counter
+= first
;
2468 rte_update_lock(); /* We use the update buffer for filtering */
2469 tmpa
= rte_make_tmp_attrs(e
, rte_update_pool
);
2471 /* Special case for merged export */
2472 if ((d
->export_mode
== RSEM_EXPORT
) && (d
->export_protocol
->accept_ra_types
== RA_MERGED
))
2475 e
= rt_export_merged(a
, n
, &rt_free
, &tmpa
, rte_update_pool
, 1);
2479 { e
= ee
; goto skip
; }
2481 else if (d
->export_mode
)
2483 struct proto
*ep
= d
->export_protocol
;
2484 int ic
= ep
->import_control
? ep
->import_control(ep
, &e
, &tmpa
, rte_update_pool
) : 0;
2486 if (ep
->accept_ra_types
== RA_OPTIMAL
|| ep
->accept_ra_types
== RA_MERGED
)
2492 if (d
->export_mode
> RSEM_PREEXPORT
)
2495 * FIXME - This shows what should be exported according to current
2496 * filters, but not what was really exported. 'configure soft'
2497 * command may change the export filter and do not update routes.
2499 int do_export
= (ic
> 0) ||
2500 (f_run(a
->out_filter
, &e
, &tmpa
, rte_update_pool
,
2501 FF_FORCE_TMPATTR
| FF_SILENT
) <= F_ACCEPT
);
2503 if (do_export
!= (d
->export_mode
== RSEM_EXPORT
))
2506 if ((d
->export_mode
== RSEM_EXPORT
) && (ep
->accept_ra_types
== RA_ACCEPTED
))
2511 if (d
->show_protocol
&& (d
->show_protocol
!= e
->attrs
->src
->proto
))
2514 if (f_run(d
->filter
, &e
, &tmpa
, rte_update_pool
, FF_FORCE_TMPATTR
) > F_ACCEPT
)
2519 rt_show_rte(c
, ia
, e
, d
, tmpa
);
2528 rte_update_unlock();
2530 if (d
->primary_only
)
2536 rt_show_cont(struct cli
*c
)
2538 struct rt_show_data
*d
= c
->rover
;
2544 struct fib
*fib
= &d
->table
->fib
;
2545 struct fib_iterator
*it
= &d
->fit
;
2547 FIB_ITERATE_START(fib
, it
, f
)
2550 if (d
->running_on_config
&& d
->running_on_config
!= config
)
2552 cli_printf(c
, 8004, "Stopped due to reconfiguration");
2555 if (d
->export_protocol
&& (d
->export_protocol
->export_state
== ES_DOWN
))
2557 cli_printf(c
, 8005, "Protocol is down");
2562 FIB_ITERATE_PUT(it
, f
);
2565 rt_show_net(c
, n
, d
);
2569 cli_printf(c
, 14, "%d of %d routes for %d networks", d
->show_counter
, d
->rt_counter
, d
->net_counter
);
2571 cli_printf(c
, 0, "");
2573 c
->cont
= c
->cleanup
= NULL
;
2577 rt_show_cleanup(struct cli
*c
)
2579 struct rt_show_data
*d
= c
->rover
;
2581 /* Unlink the iterator */
2582 fit_get(&d
->table
->fib
, &d
->fit
);
2586 rt_show(struct rt_show_data
*d
)
2590 /* Default is either a master table or a table related to a respective protocol */
2591 if (!d
->table
&& d
->export_protocol
) d
->table
= d
->export_protocol
->table
;
2592 if (!d
->table
&& d
->show_protocol
) d
->table
= d
->show_protocol
->table
;
2593 if (!d
->table
) d
->table
= config
->master_rtc
->table
;
2595 /* Filtered routes are neither exported nor have sensible ordering */
2596 if (d
->filtered
&& (d
->export_mode
|| d
->primary_only
))
2599 if (d
->pxlen
== 256)
2601 FIB_ITERATE_INIT(&d
->fit
, &d
->table
->fib
);
2602 this_cli
->cont
= rt_show_cont
;
2603 this_cli
->cleanup
= rt_show_cleanup
;
2604 this_cli
->rover
= d
;
2609 n
= net_route(d
->table
, d
->prefix
, d
->pxlen
);
2611 n
= net_find(d
->table
, d
->prefix
, d
->pxlen
);
2614 rt_show_net(this_cli
, n
, d
);
2619 cli_msg(8001, "Network not in table");
2624 * Documentation for functions declared inline in route.h
2629 * net_find - find a network entry
2630 * @tab: a routing table
2631 * @addr: address of the network
2632 * @len: length of the network prefix
2634 * net_find() looks up the given network in routing table @tab and
2635 * returns a pointer to its &net entry or %NULL if no such network
2638 static inline net
*net_find(rtable
*tab
, ip_addr addr
, unsigned len
)
2642 * net_get - obtain a network entry
2643 * @tab: a routing table
2644 * @addr: address of the network
2645 * @len: length of the network prefix
2647 * net_get() looks up the given network in routing table @tab and
2648 * returns a pointer to its &net entry. If no such entry exists, it's
2651 static inline net
*net_get(rtable
*tab
, ip_addr addr
, unsigned len
)
2655 * rte_cow - copy a route for writing
2656 * @r: a route entry to be copied
2658 * rte_cow() takes a &rte and prepares it for modification. The exact action
2659 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2660 * just returned unchanged, else a new temporary entry with the same contents
2663 * The primary use of this function is inside the filter machinery -- when
2664 * a filter wants to modify &rte contents (to change the preference or to
2665 * attach another set of attributes), it must ensure that the &rte is not
2666 * shared with anyone else (and especially that it isn't stored in any routing
2669 * Result: a pointer to the new writable &rte.
2671 static inline rte
* rte_cow(rte
*r
)