From: Maria Matejka Date: Wed, 9 Mar 2022 14:03:48 +0000 (+0100) Subject: Merge commit 'f81702b7' into haugesund X-Git-Tag: v3.0-alpha1~171^2~76 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=41572e0c1b53e3aee6e84a2daada97c5f7620697;p=thirdparty%2Fbird.git Merge commit 'f81702b7' into haugesund --- 41572e0c1b53e3aee6e84a2daada97c5f7620697 diff --cc nest/route.h index 2543aaf6c,d05681332..ab8ee258a --- a/nest/route.h +++ b/nest/route.h @@@ -163,9 -161,7 +165,8 @@@ typedef struct rtable pool *rp; /* Resource pool to allocate everything from, including itself */ struct slab *rte_slab; /* Slab to allocate route objects */ struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ uint addr_type; /* Type of address data stored in table (NET_*) */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ @@@ -185,15 -182,11 +187,16 @@@ btime gc_time; /* Time of last GC */ int gc_counter; /* Number of operations since last GC */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ + byte prune_trie; /* Prune prefix trie during next table prune */ byte hcu_scheduled; /* Hostcache update is scheduled */ byte nhu_state; /* Next Hop Update state */ + byte internal; /* This table is internal for some other object */ struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ + struct f_trie *trie_new; /* New prefix trie defined during pruning */ + struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ + u32 trie_lock_count; /* Prefix trie locked by walks */ + u32 trie_old_lock_count; /* Old prefix trie locked by walks */ struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ list subscribers; /* Subscribers for notifications */ @@@ -281,14 -265,126 +284,129 @@@ struct rte_storage #define REF_MODIFY 16 /* Route is scheduled for modify */ /* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ - static inline int rte_is_valid_rte(rte *r) { return r && !(r->flags & REF_FILTERED); } - static inline int rte_is_valid_storage(struct rte_storage *r) { return r && rte_is_valid_rte(&r->rte); } -static inline int rte_is_valid(const rte *r) { return r && !(r->flags & REF_FILTERED); } ++static inline int rte_is_valid_rte(const rte *r) { return r && !(r->flags & REF_FILTERED); } ++static inline int rte_is_valid_storage(const struct rte_storage *r) { return r && rte_is_valid_rte(&r->rte); } + +#define rte_is_valid(r) _Generic((*r), rte: rte_is_valid_rte, struct rte_storage: rte_is_valid_storage)(r) /* Route just has REF_FILTERED flag */ - static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } + static inline int rte_is_filtered(const rte *r) { return !!(r->flags & REF_FILTERED); } + + + /* Table-channel connections */ + + struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + struct rte *(*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); + struct rte *(*rte_modify)(struct rte *, struct linpool *); + }; + + struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ + }; + + struct rt_pending_export { + struct rte_storage *new, *new_best, *old, *old_best; + }; + + struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + u8 trace_routes; + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); + }; + + struct rt_export_hook { + node n; + rtable *table; /* The connected table */ + + pool *pool; + linpool *lp; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + u8 export_state; /* Route export state (TES_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ + }; + + #define TIS_DOWN 0 + #define TIS_UP 1 + #define TIS_STOP 2 + #define TIS_FLUSHING 3 + #define TIS_WAITING 4 + #define TIS_CLEARED 5 + #define TIS_MAX 6 + + #define TES_DOWN 0 + #define TES_HUNGRY 1 + #define TES_FEEDING 2 + #define TES_READY 3 + #define TES_STOP 4 + #define TES_MAX 5 + + void rt_request_import(rtable *tab, struct rt_import_request *req); + void rt_request_export(rtable *tab, struct rt_export_request *req); + + void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); + void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + + const char *rt_import_state_name(u8 state); + const char *rt_export_state_name(u8 state); + + static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } + static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); /* Types of route announcement, also used as flags */ #define RA_UNDEF 0 /* Undefined RA type */ @@@ -358,14 -451,13 +477,14 @@@ static inline net *net_find(rtable *tab static inline net *net_find_valid(rtable *tab, const net_addr *addr) { net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); int net_roa_check(rtable *tab, const net_addr *n, u32 asn); int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); - rte *rt_export_merged_show(struct channel *c, net *n, linpool *pool); - void rt_refresh_begin(rtable *t, struct channel *c); - void rt_refresh_end(rtable *t, struct channel *c); - void rt_modify_stale(rtable *t, struct channel *c); + rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); + void rt_refresh_begin(rtable *t, struct rt_import_request *); + void rt_refresh_end(rtable *t, struct rt_import_request *); + void rt_modify_stale(rtable *t, struct rt_import_request *); void rt_schedule_prune(rtable *t); void rte_dump(struct rte_storage *); void rte_free(struct rte_storage *, rtable *); @@@ -378,21 -470,10 +497,22 @@@ int rt_reload_channel(struct channel *c void rt_reload_channel_abort(struct channel *c); void rt_refeed_channel(struct channel *c); void rt_prune_sync(rtable *t, int all); - int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old, struct rte_storage **old_exported); + int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); + int rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old, struct rte_storage **old_exported); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + /* Default limit for ECMP next hops, defined in sysdep code */ extern const int rt_default_ecmp; @@@ -418,10 -497,10 +538,11 @@@ struct rt_show_data struct channel *export_channel; struct config *running_on_config; struct krt_proto *kernel; + struct rt_export_hook *kernel_export_hook; - int export_mode, primary_only, filtered, stats, show_for; + int export_mode, addr_mode, primary_only, filtered, stats; int table_open; /* Iteration (fit) is open */ + int trie_walk; /* Current table is iterated using trie */ int net_counter, rt_counter, show_counter, table_counter; int net_counter_last, rt_counter_last, show_counter_last; }; diff --cc nest/rt-table.c index 4c3db55d4,e7ff28165..81929a0b7 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@@ -129,142 -58,52 +123,177 @@@ static void rt_update_hostcache(rtable static void rt_next_hop_update(rtable *tab); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); +static void rt_flowspec_notify(rtable *tab, net *net); + static void rt_feed_channel(void *); + + const char *rt_import_state_name_array[TIS_MAX] = { + [TIS_DOWN] = "DOWN", + [TIS_UP] = "UP", + [TIS_STOP] = "STOP", + [TIS_FLUSHING] = "FLUSHING", + [TIS_WAITING] = "WAITING", + [TIS_CLEARED] = "CLEARED", + }; + + const char *rt_export_state_name_array[TES_MAX] = { + [TES_DOWN] = "DOWN", + [TES_HUNGRY] = "HUNGRY", + [TES_FEEDING] = "FEEDING", + [TES_READY] = "READY", + [TES_STOP] = "STOP" + }; + + const char *rt_import_state_name(u8 state) + { + if (state >= TIS_MAX) + return "!! INVALID !!"; + else + return rt_import_state_name_array[state]; + } + + const char *rt_export_state_name(u8 state) + { + if (state >= TES_MAX) + return "!! INVALID !!"; + else + return rt_export_state_name_array[state]; + } + -/* Like fib_route(), but skips empty net entries */ + +static void +net_init_with_trie(struct fib *f, void *N) +{ + rtable *tab = SKIP_BACK(rtable, fib, f); + net *n = N; + + if (tab->trie) + trie_add_prefix(tab->trie, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); + + if (tab->trie_new) + trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); +} + +static inline net * +net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) + { + net_addr_vpn4 n = NET_ADDR_VPN4(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_vpn6 n = NET_ADDR_VPN6(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_ip6_sadr n = NET_ADDR_IP6_SADR(px.prefix, px.pxlen, n0->src_prefix, n0->src_pxlen); + net *best = NULL; + int best_pxlen = 0; + + /* We need to do dst first matching. Since sadr addresses are hashed on dst + prefix only, find the hash table chain and go through it to find the + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_ip6_sadr *a = (void *) fn->addr; + + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && + (a->src_pxlen >= best_pxlen)) + { + best = fib_node_to_user(&t->fib, fn); + best_pxlen = a->src_pxlen; + } + } + + if (best) + return best; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) { + net_addr_ip4 n; + net_copy_ip4(&n, n0); + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return r; +} - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) +static inline net * +net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +{ + net_addr_vpn4 n; + net_copy_vpn4(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) { - n->pxlen--; - ip4_clrbit(&n->prefix, n->pxlen); + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); } return r; @@@ -1173,12 -936,8 +1228,12 @@@ rte_validate(struct channel *ch, rte *e if (net_type_match(n, NB_DEST) == !e->attrs->dest) { + /* Exception for flowspec that failed validation */ + if (net_is_flow(n) && (e->attrs->dest == RTD_UNREACHABLE)) + return 1; + log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n, e->attrs->dest, e->sender->proto->name); + n, e->attrs->dest, ch->proto->name); return 0; } @@@ -2039,20 -1853,10 +2230,20 @@@ rt_setup(pool *pp, struct rtable_confi fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + if (cf->trie_used) + { + t->trie = f_new_trie(lp_new_default(p), 0); + t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4); + + t->fib.init = net_init_with_trie; + } + - init_list(&t->channels); + init_list(&t->flowspec_links); - init_list(&t->subscribers); + if (!(t->internal = cf->internal)) { + init_list(&t->imports); + init_list(&t->exports); hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); @@@ -2140,17 -1934,17 +2334,17 @@@ again FIB_ITERATE_START(&tab->fib, fit, net, n) { rescan: + if (limit <= 0) + { + FIB_ITERATE_PUT(fit); + ev_schedule(tab->rt_event); + return; + } + for (struct rte_storage *e=n->routes; e; e=e->next) { - if (e->rte.sender->flush_active || (e->rte.flags & REF_DISCARD)) + if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) { - if (limit <= 0) - { - FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; - } - rte_discard(n, &e->rte); limit--; @@@ -2191,120 -1986,20 +2385,117 @@@ /* state change 2->0, 3->1 */ tab->prune_state &= 1; + if (tab->trie_new) + { + /* Finish prefix trie pruning */ + + if (!tab->trie_lock_count) + { + rfree(tab->trie->lp); + } + else + { + ASSERT(!tab->trie_old); + tab->trie_old = tab->trie; + tab->trie_old_lock_count = tab->trie_lock_count; + tab->trie_lock_count = 0; + } + + tab->trie = tab->trie_new; + tab->trie_new = NULL; + tab->prune_trie = 0; + } + else + { + /* Schedule prefix trie pruning */ + if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + /* state change 0->1, 2->3 */ + tab->prune_state |= 1; + tab->prune_trie = 1; + } + } + - if (tab->prune_state > 0) - ev_schedule(tab->rt_event); - - /* FIXME: This should be handled in a better way */ rt_prune_sources(); /* Close flushed channels */ - WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) - if (c->flush_active) - { - c->flush_active = 0; - channel_set_state(c, CS_DOWN); - } - - return; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_FLUSHING) + { + rt_set_import_state(ih, TIS_CLEARED); + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + } } +/** + * rt_lock_trie - lock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * + * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state + * structures. Therefore, asynchronous walks should lock the prefix trie using + * this function. That allows the prune loop to rebuild the trie, but postpones + * its freeing until all walks are done (unlocked by rt_unlock_trie()). + * + * Return a current trie that will be locked, the value should be passed back to + * rt_unlock_trie() for unlocking. + * + */ +struct f_trie * +rt_lock_trie(rtable *tab) +{ + ASSERT(tab->trie); + + tab->trie_lock_count++; + return tab->trie; +} + +/** + * rt_unlock_trie - unlock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * @trie: value returned by matching rt_lock_trie() + * + * Done for trie locked by rt_lock_trie() after walk over the trie is done. + * It may free the trie and schedule next trie pruning. + */ +void +rt_unlock_trie(rtable *tab, struct f_trie *trie) +{ + ASSERT(trie); + + if (trie == tab->trie) + { + /* Unlock the current prefix trie */ + ASSERT(tab->trie_lock_count); + tab->trie_lock_count--; + } + else if (trie == tab->trie_old) + { + /* Unlock the old prefix trie */ + ASSERT(tab->trie_old_lock_count); + tab->trie_old_lock_count--; + + /* Free old prefix trie that is no longer needed */ + if (!tab->trie_old_lock_count) + { + rfree(tab->trie_old->lp); + tab->trie_old = NULL; + + /* Kick prefix trie pruning that was postponed */ + if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + tab->prune_trie = 1; + rt_schedule_prune(tab); + } + } + } + else + log(L_BUG "Invalid arg to rt_unlock_trie()"); +} + + void rt_preconfig(struct config *c) {