From: Maria Matejka Date: Fri, 15 Jul 2022 12:57:02 +0000 (+0200) Subject: Merge commit 'c70b3198' into thread-next [lots of conflicts] X-Git-Tag: v3.0-alpha1~171^2~14 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=05673b16a87792baf8734dfcbf12ac2fd867f80b;p=thirdparty%2Fbird.git Merge commit 'c70b3198' into thread-next [lots of conflicts] There were more conflicts that I'd like to see, most notably in route export. If a bisect identifies this commit with something related, it may be simply true that this commit introduces that bug. Let's hope it doesn't happen. --- 05673b16a87792baf8734dfcbf12ac2fd867f80b diff --cc lib/birdlib.h index 9b6e4a169,431b7c0d8..25545fc37 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@@ -178,8 -162,8 +178,13 @@@ void debug(const char *msg, ...); /* Pr #if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG) #define DBG(x, y...) debug(x, ##y) ++#define DBGL(x, y...) debug(x "\n", ##y) ++#elif defined(DEBUG_TO_LOG) ++#define DBG(...) do { } while (0) ++#define DBGL(...) log(L_DEBUG __VA_ARGS__) #else --#define DBG(x, y...) do { } while(0) ++#define DBG(...) do { } while(0) ++#define DBGL(...) do { } while (0) #endif #define ASSERT_DIE(x) do { if (!(x)) bug("Assertion '%s' failed at %s:%d", #x, __FILE__, __LINE__); } while(0) diff --cc lib/resource.h index 4cedbf005,26030aea6..8bb264b13 --- a/lib/resource.h +++ b/lib/resource.h @@@ -114,12 -94,12 +114,13 @@@ void sl_free(void *) void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size); +/* Allocator of whole pages; for use in slabs and other high-level allocators. */ ++#define PAGE_HEAD(x) ((void *) (((uintptr_t) (x)) & ~(page_size-1))) extern long page_size; +void *alloc_page(void); +void free_page(void *); -/* Allocator of whole pages; for use in slabs and other high-level allocators. */ -void *alloc_page(pool *); -void free_page(pool *, void *); -#define PAGE_HEAD(x) ((void *) (((intptr_t) (x)) & ~(page_size-1))) +void resource_sys_init(void); #ifdef HAVE_LIBDMALLOC /* diff --cc lib/slab.c index 38d106269,6348e29bf..054daea12 --- a/lib/slab.c +++ b/lib/slab.c @@@ -197,15 -167,18 +197,15 @@@ static struct resclass sl_class = slab_memsize }; - #define SL_GET_HEAD(x) ((struct sl_head *) (((uintptr_t) (x)) & ~(page_size-1))) -struct sl_head { - node n; - u32 num_full; - u32 used_bits[0]; -}; ++#define SL_GET_HEAD(x) PAGE_HEAD(x) -struct sl_alignment { /* Magic structure for testing of alignment */ - byte data; - int x[0]; -}; +#define SL_HEAD_CHANGE_STATE(_s, _h, _from, _to) ({ \ + ASSERT_DIE(_h->state == slh_##_from); \ + sl_head_rem_node(&_s->_from##_heads, _h); \ + sl_head_add_head(&_s->_to##_heads, _h); \ + _h->state = slh_##_to; \ + }) -#define SL_GET_HEAD(x) ((struct sl_head *) PAGE_HEAD(x)) /** * sl_new - create a new Slab diff --cc nest/proto.c index 72e479d72,fae0647ae..3a80ab0ed --- a/nest/proto.c +++ b/nest/proto.c @@@ -697,8 -975,7 +697,6 @@@ channel_do_stop(struct channel *c CALL(c->channel->shutdown, c); - /* This have to be done in here, as channel pool is freed before channel_do_down() */ - c->out_table = NULL; - channel_roa_unsubscribe_all(c); } static void diff --cc nest/rt-show.c index 1dbfaec29,d942b8e18..6dfb85f63 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@@ -78,18 -96,32 +78,18 @@@ rt_show_rte(struct cli *c, byte *ia, rt if (d->verbose) { - cli_printf(c, -1008, "\tInternal route ID: %uL %uG %uS", e->src->private_id, e->src->global_id, e->stale_cycle); - rta_show(c, a); + ea_show_list(c, a); - cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS", - e->src->private_id, e->src->global_id, e->stale_cycle); ++ cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS id %u", ++ e->src->private_id, e->src->global_id, e->stale_cycle, e->id); + } + else if (dest == RTD_UNICAST) + ea_show_nexthop_list(c, nhad); + else if (had) + { + char hetext[256]; + ea_show_hostentry(&had->ad, hetext, sizeof hetext); + cli_printf(c, -1007, "\t%s", hetext); } -} - -static uint -rte_feed_count(net *n) -{ - uint count = 0; - for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) - count++; - return count; -} - -static void -rte_feed_obtain(net *n, rte **feed, uint count) -{ - uint i = 0; - for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) - { - ASSERT_DIE(i < count); - feed[i++] = &e->rte; - } - ASSERT_DIE(i == count); } static void diff --cc nest/rt-table.c index 5e07c1296,c049101ad..85a6faf73 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@@ -104,35 -44,39 +104,55 @@@ #include "lib/hash.h" #include "lib/string.h" #include "lib/alloca.h" +#include "lib/flowspec.h" + +#ifdef CONFIG_BGP +#include "proto/bgp/bgp.h" +#endif + #include + pool *rt_table_pool; static linpool *rte_update_pool; list routing_tables; +list deleted_routing_tables; + /* Data structures for export journal */ + #define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) + + struct rt_export_block { + node n; + _Atomic u32 end; + _Atomic _Bool not_last; + struct rt_pending_export export[]; + }; + static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); +static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); -static void rt_feed_channel(void *); - -static inline void rt_export_used(rtable *tab); +static void rt_flowspec_notify(rtable *tab, net *net); +static void rt_kick_prune_timer(rtable *tab); +static void rt_feed_by_fib(void *); +static void rt_feed_by_trie(void *); +static void rt_feed_equal(void *); +static void rt_feed_for(void *); +static uint rt_feed_net(struct rt_export_hook *c, net *n); + ++static inline void rt_export_used(struct rt_exporter *); + static void rt_export_cleanup(rtable *tab); + + static inline void rte_update_lock(void); + static inline void rte_update_unlock(void); + ++static int rte_same(rte *x, rte *y); ++ const char *rt_import_state_name_array[TIS_MAX] = { [TIS_DOWN] = "DOWN", [TIS_UP] = "UP", @@@ -685,9 -403,11 +705,10 @@@ rte_mergable(rte *pri, rte *sec static void rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s %c %s %N %uL %uG %s", - name, dir, msg, e->net, e->src->private_id, e->src->global_id, - log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s%s", ++ log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s", + name, dir, msg, e->net, + e->src->private_id, e->src->global_id, e->stale_cycle, e->id, - rta_dest_name(e->attrs->dest), - rte_is_filtered(e) ? " (filtered)" : ""); + rta_dest_name(rte_dest(e))); } static inline void @@@ -848,6 -589,6 +869,16 @@@ do_rt_notify(struct channel *c, const n static void rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { ++ if (new && old && rte_same(new, old)) ++ { ++ if ((new->id != old->id) && bmap_test(&c->export_map, old->id)) ++ { ++ bmap_set(&c->export_map, new->id); ++ bmap_clear(&c->export_map, old->id); ++ } ++ return; ++ } ++ if (new) new = export_filter(c, new, 0); @@@ -929,14 -678,15 +968,8 @@@ done } do_rt_notify(c, n, new_best, old_best); - - done: - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); } - -static struct nexthop * -nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) -{ - return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); -} - rte * rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { @@@ -1060,19 -796,21 +1085,19 @@@ rt_notify_optimal(struct rt_export_requ { struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (rpe->new_best != rpe->old_best) - { - rte n0 = RTE_COPY_VALID(rpe->new_best); - rte *o = RTE_VALID_OR_NULL(rpe->old_best); - rte *old = RTES_OR_NULL(rpe->old_best); ++ rte *o = RTE_VALID_OR_NULL(rpe->old_best); + struct rte_storage *new_best = rpe->new_best; - if (n0.src || o) - rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_best = rpe->new_best; + rpe = rpe_next(rpe, NULL); } - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); - if (&new_best->rte != old) - { - rte n0, *new = RTES_CLONE(new_best, &n0); - rt_notify_basic(c, net, new, old); - } ++ rte n0 = RTE_COPY_VALID(new_best); ++ if (n0.src || o) ++ rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); } void @@@ -1080,18 -818,22 +1105,28 @@@ rt_notify_any(struct rt_export_request { struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (rpe->new != rpe->old) - struct rte_src *src = rpe->new ? rpe->new->rte.src : rpe->old->rte.src; - rte *old = RTES_OR_NULL(rpe->old); - struct rte_storage *new_any = rpe->new; ++ rte *n = RTE_VALID_OR_NULL(rpe->new); ++ rte *o = RTE_VALID_OR_NULL(rpe->old); + - while (rpe) ++ if (!n && !o) { - rte n0 = RTE_COPY_VALID(rpe->new); - rte *o = RTE_VALID_OR_NULL(rpe->old); - if (n0.src || o) - rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); + channel_rpe_mark_seen(req, rpe); - new_any = rpe->new; - rpe = rpe_next(rpe, src); ++ return; } - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); - if (&new_any->rte != old) ++ struct rte_src *src = n ? n->src : o->src; ++ struct rte_storage *new_latest = rpe->new; ++ ++ while (rpe) + { - rte n0, *new = RTES_CLONE(new_any, &n0); - rt_notify_basic(c, net, new, old); ++ channel_rpe_mark_seen(req, rpe); ++ new_latest = rpe->new; ++ rpe = rpe_next(rpe, src); + } ++ ++ rte n0 = RTE_COPY_VALID(new_latest); ++ if (n0.src || o) ++ rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); } void @@@ -1100,13 -842,82 +1135,105 @@@ rt_feed_any(struct rt_export_request *r struct channel *c = SKIP_BACK(struct channel, out_req, req); for (uint i=0; iseq_map, rpe->seq); + } + + struct rt_pending_export * + rpe_next(struct rt_pending_export *rpe, struct rte_src *src) + { + struct rt_pending_export *next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + if (!next) + return NULL; + + if (!src) + return next; + + while (rpe = next) + if (src == (rpe->new ? rpe->new->rte.src : rpe->old->rte.src)) + return rpe; + else + next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + return NULL; + } + + static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); + static void + rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) + { + if (bmap_test(&hook->seq_map, rpe->seq)) - goto seen; ++ goto ignore; /* Seen already */ + + const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net; + ++ switch (hook->req->addr_mode) ++ { ++ case TE_ADDR_NONE: ++ break; ++ ++ case TE_ADDR_IN: ++ if (!net_in_netX(n, hook->req->addr)) ++ goto ignore; ++ break; ++ ++ case TE_ADDR_EQUAL: ++ if (!net_equal(n, hook->req->addr)) ++ goto ignore; ++ break; ++ ++ case TE_ADDR_FOR: ++ bug("Continuos export of best prefix match not implemented yet."); ++ ++ default: ++ bug("Strange table export address mode: %d", hook->req->addr_mode); ++ } ++ + if (rpe->new) + hook->stats.updates_received++; + else + hook->stats.withdraws_received++; + + if (hook->req->export_one) + hook->req->export_one(hook->req, n, rpe); + else if (hook->req->export_bulk) + { + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) + { + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } + hook->req->export_bulk(hook->req, n, rpe, feed, count); + } + else + bug("Export request must always provide an export method"); + -seen: ++ignore: + /* Get the next export if exists */ + hook->rpe_next = rt_next_export_fast(rpe); + + /* The last block may be available to free */ + if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) - rt_export_used(hook->table); ++ CALL(hook->table->used, hook->table); + + /* Releasing this export for cleanup routine */ + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); + atomic_store_explicit(&hook->last_export, rpe, memory_order_release); + } + /** * rte_announce - announce a routing table change * @tab: table the route has been added to @@@ -1142,17 -953,30 +1269,17 @@@ static voi rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { - if (!new_best || !rte_is_valid(&new_best->rte)) - new_best = NULL; - - if (!old_best || !rte_is_valid(&old_best->rte)) - old_best = NULL; - - if (!new || !rte_is_valid(&new->rte)) - new = NULL; - - if (old && !rte_is_valid(&old->rte)) - { - /* Filtered old route isn't announced, should be freed immediately. */ - rte_free(old, tab); - old = NULL; - } + int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); + int old_best_valid = rte_is_valid(RTE_OR_NULL(old_best)); - if (!new && !old && !new_best && !old_best) + if ((new == old) && (new_best == old_best)) return; - if (new_best != old_best) + if (new_best_valid || old_best_valid) { - if (new_best) + if (new_best_valid) new_best->rte.sender->stats.pref++; - if (old_best) + if (old_best_valid) old_best->rte.sender->stats.pref--; if (tab->hostcache) @@@ -1164,57 -985,194 +1291,198 @@@ rt_schedule_notify(tab); - struct rt_pending_export rpe = { .new = new, .old = old, .new_best = new_best, .old_best = old_best }; - uint count = rte_feed_count(net); - rte **feed = NULL; - if (count) - if (EMPTY_LIST(tab->exports) && EMPTY_LIST(tab->pending_exports)) ++ if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending)) + { + /* No export hook and no pending exports to cleanup. We may free the route immediately. */ + if (!old) + return; + + hmap_clear(&tab->id_map, old->rte.id); - rte_free(old, tab); ++ rte_free(old); + return; + } + + /* Get the pending export structure */ + struct rt_export_block *rpeb = NULL, *rpebsnl = NULL; + u32 end = 0; + - if (!EMPTY_LIST(tab->pending_exports)) ++ if (!EMPTY_LIST(tab->exporter.pending)) { - feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(net, feed, count); - rpeb = TAIL(tab->pending_exports); ++ rpeb = TAIL(tab->exporter.pending); + end = atomic_load_explicit(&rpeb->end, memory_order_relaxed); + if (end >= RT_PENDING_EXPORT_ITEMS) + { + ASSERT_DIE(end == RT_PENDING_EXPORT_ITEMS); + rpebsnl = rpeb; + + rpeb = NULL; + end = 0; + } } - struct rt_export_hook *eh; - WALK_LIST(eh, tab->exporter.hooks) + if (!rpeb) + { - rpeb = alloc_page(tab->rp); ++ rpeb = alloc_page(); + *rpeb = (struct rt_export_block) {}; - add_tail(&tab->pending_exports, &rpeb->n); ++ add_tail(&tab->exporter.pending, &rpeb->n); + } + + /* Fill the pending export */ + struct rt_pending_export *rpe = &rpeb->export[rpeb->end]; + *rpe = (struct rt_pending_export) { + .new = new, + .new_best = new_best, + .old = old, + .old_best = old_best, - .seq = tab->next_export_seq++, ++ .seq = tab->exporter.next_seq++, + }; + - DBG("rte_announce: table=%s net=%N new=%p from %p old=%p from %p new_best=%p old_best=%p seq=%lu\n", tab->name, net->n.addr, new, new ? new->sender : NULL, old, old ? old->sender : NULL, new_best, old_best, rpe->seq); ++ DBGL("rte_announce: table=%s net=%N new=%p id %u from %s old=%p id %u from %s new_best=%p id %u old_best=%p id %u seq=%lu", ++ tab->name, net->n.addr, ++ new, new ? new->rte.id : 0, new ? new->rte.sender->req->name : NULL, ++ old, old ? old->rte.id : 0, old ? old->rte.sender->req->name : NULL, ++ new_best, old_best, rpe->seq); + + ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end); + + if (rpebsnl) { - if (eh->export_state == TES_STOP) + _Bool f = 0; + ASSERT_DIE(atomic_compare_exchange_strong_explicit(&rpebsnl->not_last, &f, 1, + memory_order_release, memory_order_relaxed)); + } + + /* Append to the same-network squasher list */ + if (net->last) + { + struct rt_pending_export *rpenull = NULL; + ASSERT_DIE(atomic_compare_exchange_strong_explicit( + &net->last->next, &rpenull, rpe, + memory_order_relaxed, + memory_order_relaxed)); + + } + + net->last = rpe; + + if (!net->first) + net->first = rpe; + - if (tab->first_export == NULL) - tab->first_export = rpe; ++ if (tab->exporter.first == NULL) ++ tab->exporter.first = rpe; + - if (!tm_active(tab->export_timer)) - tm_start(tab->export_timer, tab->config->export_settle_time); ++ if (!tm_active(tab->exporter.export_timer)) ++ tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + } + + static struct rt_pending_export * + rt_next_export_fast(struct rt_pending_export *last) + { + /* Get the whole export block and find our position in there. */ + struct rt_export_block *rpeb = PAGE_HEAD(last); + u32 pos = (last - &rpeb->export[0]); + u32 end = atomic_load_explicit(&rpeb->end, memory_order_acquire); + ASSERT_DIE(pos < end); + + /* Next is in the same block. */ + if (++pos < end) + return &rpeb->export[pos]; + + /* There is another block. */ + if (atomic_load_explicit(&rpeb->not_last, memory_order_acquire)) + { + /* This is OK to do non-atomically because of the not_last flag. */ + rpeb = NODE_NEXT(rpeb); + return &rpeb->export[0]; + } + + /* There is nothing more. */ + return NULL; + } + + static struct rt_pending_export * -rt_next_export(struct rt_export_hook *hook, rtable *tab) ++rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) + { + /* As the table is locked, it is safe to reload the last export pointer */ + struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); + + /* It is still valid, let's reuse it */ + if (last) + return rt_next_export_fast(last); + + /* No, therefore we must process the table's first pending export */ + else - return tab->first_export; ++ return tab->first; + } + + static void + rt_announce_exports(timer *tm) + { + rtable *tab = tm->data; + + struct rt_export_hook *c; node *n; - WALK_LIST2(c, n, tab->exports, n) ++ WALK_LIST2(c, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) continue; - switch (eh->req->addr_mode) - { - case TE_ADDR_NONE: - break; + ev_schedule_work(c->event); + } + } - case TE_ADDR_IN: - if (!net_in_netX(net->n.addr, eh->req->addr)) - continue; - break; + static struct rt_pending_export * -rt_last_export(rtable *tab) ++rt_last_export(struct rt_exporter *tab) + { + struct rt_pending_export *rpe = NULL; - case TE_ADDR_EQUAL: - if (!net_equal(net->n.addr, eh->req->addr)) - continue; - break; - if (!EMPTY_LIST(tab->pending_exports)) ++ if (!EMPTY_LIST(tab->pending)) + { + /* We'll continue processing exports from this export on */ - struct rt_export_block *reb = TAIL(tab->pending_exports); ++ struct rt_export_block *reb = TAIL(tab->pending); + ASSERT_DIE(reb->end); + rpe = &reb->export[reb->end - 1]; + } - case TE_ADDR_FOR: - bug("Continuos export of best prefix match not implemented yet."); + return rpe; + } - default: - bug("Strange table export address mode: %d", eh->req->addr_mode); + #define RT_EXPORT_BULK 1024 + + static void + rt_export_hook(void *_data) + { + struct rt_export_hook *c = _data; + + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY); + + if (!c->rpe_next) + { + c->rpe_next = rt_next_export(c, c->table); + + if (!c->rpe_next) + { - rt_export_used(c->table); ++ CALL(c->table->used, c->table); + return; } + } - if (new) - eh->stats.updates_received++; - else - eh->stats.withdraws_received++; + /* Process the export */ + for (uint i=0; ireq->export_one) - eh->req->export_one(eh->req, net->n.addr, &rpe); - else if (eh->req->export_bulk) - eh->req->export_bulk(eh->req, net->n.addr, &rpe, feed, count); - else - bug("Export request must always provide an export method"); + rte_export(c, c->rpe_next); + + if (!c->rpe_next) + break; + + rte_update_unlock(); } + + ev_schedule_work(c->event); } + static inline int rte_validate(struct channel *ch, rte *e) { @@@ -1459,28 -1393,27 +1727,22 @@@ rte_recalculate(struct rt_import_hook * if (new_stored) { new_stored->rte.lastmod = current_time(); - - if (!old) - { - new_stored->rte.id = hmap_first_zero(&table->id_map); - hmap_set(&table->id_map, new_stored->rte.id); - } - else - new_stored->rte.id = old->id; + new_stored->rte.id = hmap_first_zero(&table->id_map); + hmap_set(&table->id_map, new_stored->rte.id); } - _Bool nb = (new_stored == net->routes); - _Bool ob = (old_best == old); - /* Log the route change */ - if (new_ok && old_ok) + if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); + else if (old_ok) { - const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; - rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, best_indicator[nb][ob]); + if (old != old_best) + rt_rte_trace_in(D_ROUTES, req, old, "removed"); + else if (net->routes && rte_is_ok(&net->routes->rte)) + rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); + else + rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); } - else if (new_ok) - rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, - (!net->routes->next || !rte_is_ok(&net->routes->next->rte)) ? "added [sole]" : - nb ? "added [best]" : "added"); - else if (old_ok) - rt_rte_trace_in(D_ROUTES, req, old, - (!net->routes || !rte_is_ok(&net->routes->rte)) ? "removed [sole]" : - ob ? "removed [best]" : "removed"); /* Propagate the route change */ rte_announce(table, net, new_stored, old_stored, @@@ -1709,21 -1629,25 +1964,24 @@@ static voi rt_export_stopped(void *data) { struct rt_export_hook *hook = data; - rtable *tab = hook->table; + struct rt_exporter *tab = hook->table; + /* Drop pending exports */ - rt_export_used(tab); ++ CALL(tab->used, tab); + /* Unlist */ rem_node(&hook->n); - /* Reporting the channel as stopped. */ + /* Report the channel as stopped. */ hook->stopped(hook->req); + /* Reporting the hook as finished. */ + CALL(tab->done, hook); + - /* Freeing the hook together with its coroutine. */ + /* Free the hook together with its coroutine. */ rfree(hook->pool); - rt_unlock_table(tab); - - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); } - static inline void rt_set_import_state(struct rt_import_hook *hook, u8 state) { @@@ -1782,85 -1708,33 +2040,90 @@@ rt_table_export_start(struct rt_exporte rt_lock_table(tab); pool *p = rp_new(tab->rp, "Export hook"); - struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook)); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); hook->pool = p; - hook->lp = lp_new_default(p); - - hook->req = req; - hook->table = tab; /* stats zeroed by mb_allocz */ + switch (req->addr_mode) + { + case TE_ADDR_IN: + if (tab->trie && net_val_match(tab->addr_type, NB_IP)) + { + hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_lock = rt_lock_trie(tab); + trie_walk_init(hook->walk_state, tab->trie, req->addr); + hook->event = ev_new_init(p, rt_feed_by_trie, hook); + break; + } + /* fall through */ + case TE_ADDR_NONE: + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + hook->event = ev_new_init(p, rt_feed_by_fib, hook); + break; + + case TE_ADDR_EQUAL: + hook->event = ev_new_init(p, rt_feed_equal, hook); + break; + + case TE_ADDR_FOR: + hook->event = ev_new_init(p, rt_feed_for, hook); + break; + + default: + bug("Requested an unknown export address mode"); + } + + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); - bmap_init(&hook->seq_map, p, 1024); + return hook; +} - rt_set_export_state(hook, TES_HUNGRY); +void +rt_request_export(struct rt_exporter *re, struct rt_export_request *req) +{ + struct rt_export_hook *hook = req->hook = re->start(re, req); + + hook->req = req; + hook->table = re; + ++ bmap_init(&hook->seq_map, hook->pool, 1024); + + struct rt_pending_export *rpe = rt_last_export(hook->table); + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); + hook->n = (node) {}; - add_tail(&tab->exports, &hook->n); + add_tail(&re->hooks, &hook->n); - FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + /* Regular export */ + rt_set_export_state(hook, TES_FEEDING); + ev_schedule_work(hook->event); +} - DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); +static void +rt_table_export_stop(struct rt_export_hook *hook) +{ + rtable *tab = SKIP_BACK(rtable, exporter, hook->table); - if (hook->export_state != TES_FEEDING) - hook->event = ev_new_init(p, rt_feed_channel, hook); - ev_schedule_work(hook->event); ++ if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING) + return; - rt_set_export_state(hook, TES_FEEDING); + switch (hook->req->addr_mode) + { + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); + break; + } } void @@@ -2091,6 -1957,18 +2354,20 @@@ rt_schedule_prune(rtable *tab tab->prune_state |= 1; } -void -rt_export_used(rtable *tab) ++static void ++rt_export_used(struct rt_exporter *e) + { ++ rtable *tab = SKIP_BACK(rtable, exporter, e); ++ + if (config->table_debug) + log(L_TRACE "%s: Export cleanup requested", tab->name); + + if (tab->export_used) + return; + + tab->export_used = 1; + ev_schedule(tab->rt_event); + } static void rt_event(void *ptr) @@@ -2347,25 -2127,8 +2627,28 @@@ rt_setup(pool *pp, struct rtable_confi fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + if (cf->trie_used) + { + t->trie = f_new_trie(lp_new_default(p), 0); + t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4); + + t->fib.init = net_init_with_trie; + } + + init_list(&t->flowspec_links); + + t->exporter = (struct rt_exporter) { + .addr_type = t->addr_type, + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, ++ .used = rt_export_used, + }; ++ + init_list(&t->exporter.hooks); ++ init_list(&t->exporter.pending); + init_list(&t->imports); - init_list(&t->exports); hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); @@@ -2373,8 -2136,10 +2656,10 @@@ init_list(&t->subscribers); t->rt_event = ev_new_init(p, rt_event, t); - t->export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); ++ t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); t->last_rt_change = t->gc_time = current_time(); - t->next_export_seq = 1; ++ t->exporter.next_seq = 1; t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; @@@ -2510,48 -2253,20 +2795,48 @@@ again /* state change 2->0, 3->1 */ tab->prune_state &= 1; + if (tab->trie_new) + { + /* Finish prefix trie pruning */ + + if (!tab->trie_lock_count) + { + rfree(tab->trie->lp); + } + else + { + ASSERT(!tab->trie_old); + tab->trie_old = tab->trie; + tab->trie_old_lock_count = tab->trie_lock_count; + tab->trie_lock_count = 0; + } + + tab->trie = tab->trie_new; + tab->trie_new = NULL; + tab->prune_trie = 0; + } + else + { + /* Schedule prefix trie pruning */ + if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + /* state change 0->1, 2->3 */ + tab->prune_state |= 1; + tab->prune_trie = 1; + } + } + rt_prune_sources(); + uint flushed_channels = 0; + /* Close flushed channels */ WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) if (ih->import_state == TIS_FLUSHING) { - rt_set_import_state(ih, TIS_CLEARED); - ih->stopped(ih->req); - rem_node(&ih->n); - mb_free(ih); - rt_unlock_table(tab); - ih->flush_seq = tab->next_export_seq; ++ ih->flush_seq = tab->exporter.next_seq; + rt_set_import_state(ih, TIS_WAITING); + flushed_channels++; } else if (ih->stale_pruning != ih->stale_pruned) { @@@ -2559,74 -2274,188 +2844,252 @@@ if (ih->req->trace_routes & D_STATES) log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); } + + /* In some cases, we may want to directly proceed to export cleanup */ - if (EMPTY_LIST(tab->exports) && flushed_channels) ++ if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels) + rt_export_cleanup(tab); + } + + static void + rt_export_cleanup(rtable *tab) + { + tab->export_used = 0; + + u64 min_seq = ~((u64) 0); + struct rt_pending_export *last_export_to_free = NULL; - struct rt_pending_export *first_export = tab->first_export; ++ struct rt_pending_export *first = tab->exporter.first; + + struct rt_export_hook *eh; + node *n; - WALK_LIST2(eh, n, tab->exports, n) ++ WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) + { + case TES_DOWN: - case TES_HUNGRY: + continue; + + case TES_READY: + { + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (!last) + /* No last export means that the channel has exported nothing since last cleanup */ + goto done; + + else if (min_seq > last->seq) + { + min_seq = last->seq; + last_export_to_free = last; + } + continue; + } + + default: + /* It's only safe to cleanup when the export state is idle or regular. No feeding or stopping allowed. */ + goto done; + } + } + - tab->first_export = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; ++ tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; + + if (config->table_debug) - log(L_TRACE "%s: Export cleanup, old first_export seq %lu, new %lu, min_seq %ld", ++ log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", + tab->name, - first_export ? first_export->seq : 0, - tab->first_export ? tab->first_export->seq : 0, ++ first ? first->seq : 0, ++ tab->exporter.first ? tab->exporter.first->seq : 0, + min_seq); + - WALK_LIST2(eh, n, tab->exports, n) ++ WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (last == last_export_to_free) + { + /* This may fail when the channel managed to export more inbetween. This is OK. */ + atomic_compare_exchange_strong_explicit( + &eh->last_export, &last, NULL, + memory_order_release, + memory_order_relaxed); + + DBG("store hook=%p last_export=NULL\n", eh); + } + } + - while (first_export && (first_export->seq <= min_seq)) ++ while (first && (first->seq <= min_seq)) + { - ASSERT_DIE(first_export->new || first_export->old); ++ ASSERT_DIE(first->new || first->old); + - const net_addr *n = first_export->new ? - first_export->new->rte.net : - first_export->old->rte.net; ++ const net_addr *n = first->new ? ++ first->new->rte.net : ++ first->old->rte.net; + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + - ASSERT_DIE(net->first == first_export); ++ ASSERT_DIE(net->first == first); + - if (first_export == net->last) ++ if (first == net->last) + /* The only export here */ + net->last = net->first = NULL; + else + /* First is now the next one */ - net->first = atomic_load_explicit(&first_export->next, memory_order_relaxed); ++ net->first = atomic_load_explicit(&first->next, memory_order_relaxed); + + /* For now, the old route may be finally freed */ - if (first_export->old) ++ if (first->old) + { - rt_rte_trace_in(D_ROUTES, first_export->old->rte.sender->req, &first_export->old->rte, "freed"); - hmap_clear(&tab->id_map, first_export->old->rte.id); - rte_free(first_export->old, tab); ++ rt_rte_trace_in(D_ROUTES, first->old->rte.sender->req, &first->old->rte, "freed"); ++ hmap_clear(&tab->id_map, first->old->rte.id); ++ rte_free(first->old); + } + + #ifdef LOCAL_DEBUG - memset(first_export, 0xbd, sizeof(struct rt_pending_export)); ++ memset(first, 0xbd, sizeof(struct rt_pending_export)); + #endif + - struct rt_export_block *reb = HEAD(tab->pending_exports); - ASSERT_DIE(reb == PAGE_HEAD(first_export)); ++ struct rt_export_block *reb = HEAD(tab->exporter.pending); ++ ASSERT_DIE(reb == PAGE_HEAD(first)); + - u32 pos = (first_export - &reb->export[0]); ++ u32 pos = (first - &reb->export[0]); + u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed); + ASSERT_DIE(pos < end); + + struct rt_pending_export *next = NULL; + + if (++pos < end) + next = &reb->export[pos]; + else + { + rem_node(&reb->n); + + #ifdef LOCAL_DEBUG + memset(reb, 0xbe, page_size); + #endif + - free_page(tab->rp, reb); ++ free_page(reb); + - if (EMPTY_LIST(tab->pending_exports)) ++ if (EMPTY_LIST(tab->exporter.pending)) + { + if (config->table_debug) + log(L_TRACE "%s: Resetting export seq", tab->name); + + node *n; - WALK_LIST2(eh, n, tab->exports, n) ++ WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); + bmap_reset(&eh->seq_map, 1024); + } + - tab->next_export_seq = 1; ++ tab->exporter.next_seq = 1; + } + else + { - reb = HEAD(tab->pending_exports); ++ reb = HEAD(tab->exporter.pending); + next = &reb->export[0]; + } + } + - first_export = next; ++ first = next; + } + + done:; + struct rt_import_hook *ih; node *x; + _Bool imports_stopped = 0; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_WAITING) - if (!first_export || (first_export->seq >= ih->flush_seq)) ++ if (!first || (first->seq >= ih->flush_seq)) + { + ih->import_state = TIS_CLEARED; + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + imports_stopped = 1; + } + + if (tab->export_used) + ev_schedule(tab->rt_event); + + if (imports_stopped) + { + if (config->table_debug) + log(L_TRACE "%s: Sources pruning routine requested", tab->name); + + rt_prune_sources(); + } + - if (EMPTY_LIST(tab->pending_exports) && tm_active(tab->export_timer)) - tm_stop(tab->export_timer); ++ if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) ++ tm_stop(tab->exporter.export_timer); +} + +/** + * rt_lock_trie - lock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * + * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state + * structures. Therefore, asynchronous walks should lock the prefix trie using + * this function. That allows the prune loop to rebuild the trie, but postpones + * its freeing until all walks are done (unlocked by rt_unlock_trie()). + * + * Return a current trie that will be locked, the value should be passed back to + * rt_unlock_trie() for unlocking. + * + */ +struct f_trie * +rt_lock_trie(rtable *tab) +{ + ASSERT(tab->trie); + + tab->trie_lock_count++; + return tab->trie; +} + +/** + * rt_unlock_trie - unlock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * @trie: value returned by matching rt_lock_trie() + * + * Done for trie locked by rt_lock_trie() after walk over the trie is done. + * It may free the trie and schedule next trie pruning. + */ +void +rt_unlock_trie(rtable *tab, struct f_trie *trie) +{ + ASSERT(trie); + + if (trie == tab->trie) + { + /* Unlock the current prefix trie */ + ASSERT(tab->trie_lock_count); + tab->trie_lock_count--; + } + else if (trie == tab->trie_old) + { + /* Unlock the old prefix trie */ + ASSERT(tab->trie_old_lock_count); + tab->trie_old_lock_count--; + + /* Free old prefix trie that is no longer needed */ + if (!tab->trie_old_lock_count) + { + rfree(tab->trie_old->lp); + tab->trie_old = NULL; + + /* Kick prefix trie pruning that was postponed */ + if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + tab->prune_trie = 1; + rt_schedule_prune(tab); + } + } + } + else + log(L_BUG "Invalid arg to rt_unlock_trie()"); } + void rt_preconfig(struct config *c) { @@@ -3040,10 -2635,14 +3503,15 @@@ rt_next_hop_update_net(rtable *tab, ne /* Replace the route in the list */ new->next = e->next; *k = e = new; + + /* Get a new ID for the route */ + new->rte.lastmod = current_time(); + new->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, new->rte.id); } - ASSERT_DIE(pos == count); + ASSERT_DIE(pos <= count); + count = pos; /* Find the new best route */ struct rte_storage **new_best = NULL; @@@ -3252,8 -2845,8 +3720,18 @@@ rt_commit(struct config *new, struct co DBG("\tdone\n"); } ++static void ++rt_feed_done(struct rt_export_hook *c) ++{ ++ c->event->hook = rt_export_hook; ++ ++ rt_set_export_state(c, TES_READY); ++ ++ ev_schedule_work(c->event); ++} ++ /** - * rt_feed_channel - advertise all routes to a channel + * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed * * This function performs one pass of advertisement of routes to a channel that @@@ -3269,11 -2862,9 +3747,11 @@@ rt_feed_by_fib(void *data struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == TES_FEEDING); + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - FIB_ITERATE_START(&c->table->fib, fit, net, n) + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + + FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { @@@ -3282,134 -2873,40 +3760,137 @@@ return; } - ASSERT(c->export_state == TES_FEEDING); + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; - if (c->req->export_bulk) - { - uint count = rte_feed_count(n); - if (count) - { - rte_update_lock(); - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); - max_feed -= count; - rte_update_unlock(); - } - } - else if (n->routes && rte_is_valid(&n->routes->rte)) - { - rte_update_lock(); - struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - max_feed--; - rte_update_unlock(); - } - - for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) - rpe_mark_seen(c, rpe); + if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) + max_feed -= rt_feed_net(c, n); } FIB_ITERATE_END; - rt_set_export_state(c, TES_READY); - c->event->hook = rt_export_hook; - ev_schedule_work(c->event); ++ rt_feed_done(c); +} - rt_set_export_state(c, TES_READY); +static void +rt_feed_by_trie(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + + ASSERT_DIE(c->walk_state); + struct f_trie_walk_state *ws = c->walk_state; + + int max_feed = 256; + - ASSERT_DIE(c->export_state == TES_FEEDING); ++ ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + + net_addr addr; + while (trie_walk_next(ws, &addr)) + { + net *n = net_find(tab, &addr); + if (!n) + continue; + + if ((max_feed -= rt_feed_net(c, n)) <= 0) + return; + - ASSERT_DIE(c->export_state == TES_FEEDING); ++ if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) ++ return; + } + + rt_unlock_trie(tab, c->walk_lock); + c->walk_lock = NULL; + + mb_free(c->walk_state); + c->walk_state = NULL; + - rt_set_export_state(c, TES_READY); ++ rt_feed_done(c); +} + +static void +rt_feed_equal(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + - ASSERT_DIE(c->export_state == TES_FEEDING); ++ ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); + + net *n = net_find(tab, c->req->addr); + if (n) + rt_feed_net(c, n); + - rt_set_export_state(c, TES_READY); ++ rt_feed_done(c); +} + +static void +rt_feed_for(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + - ASSERT_DIE(c->export_state == TES_FEEDING); ++ ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); + + net *n = net_route(tab, c->req->addr); + if (n) + rt_feed_net(c, n); + - rt_set_export_state(c, TES_READY); ++ rt_feed_done(c); +} + +static uint +rt_feed_net(struct rt_export_hook *c, net *n) +{ - if (c->req->export_bulk) - { - uint count = rte_feed_count(n); - if (count) - { - rte_update_lock(); - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - struct rt_pending_export rpe = { .new_best = n->routes }; - c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count); - rte_update_unlock(); - } - return count; - } ++ uint count = 0; + - if (n->routes && rte_is_valid(&n->routes->rte)) - { - rte_update_lock(); - struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - rte_update_unlock(); - return 1; - } ++ if (c->req->export_bulk) ++ { ++ count = rte_feed_count(n); ++ if (count) ++ { ++ rte_update_lock(); ++ rte **feed = alloca(count * sizeof(rte *)); ++ rte_feed_obtain(n, feed, count); ++ c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); ++ rte_update_unlock(); ++ } ++ } + - return 0; - } ++ else if (n->routes) ++ { ++ rte_update_lock(); ++ struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; ++ c->req->export_one(c->req, n->n.addr, &rpe); ++ rte_update_unlock(); ++ count = 1; ++ } + ++ for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) ++ rpe_mark_seen(c, rpe); ++ ++ return count; ++} + +/* + * Import table + */ + - +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, reload_req, req); + + for (uint i=0; isender == c->in_req.hook) + { + /* Strip the later attribute layers */ + rte new = *feed[i]; + while (new.attrs->next) + new.attrs = new.attrs->next; + + /* And reload the route */ + rte_update(c, net, &new, new.src); + } } diff --cc nest/rt.h index bdbea05b4,000000000..20ed0ad05 mode 100644,000000..100644 --- a/nest/rt.h +++ b/nest/rt.h @@@ -1,512 -1,0 +1,541 @@@ +/* + * BIRD Internet Routing Daemon -- Routing Table + * + * (c) 1998--2000 Martin Mares + * (c) 2019--2021 Maria Matejka + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_NEST_RT_H_ +#define _BIRD_NEST_RT_H_ + +#include "lib/lists.h" +#include "lib/bitmap.h" +#include "lib/resource.h" +#include "lib/net.h" +#include "lib/type.h" +#include "lib/fib.h" +#include "lib/route.h" + ++#include ++ +struct ea_list; +struct protocol; +struct proto; +struct channel; +struct rte_src; +struct symbol; +struct timer; +struct filter; +struct f_trie; +struct f_trie_walk_state; +struct cli; + +/* + * Master Routing Tables. Generally speaking, each of them contains a FIB + * with each entry pointing to a list of route entries representing routes + * to given network (with the selected one at the head). + * + * Each of the RTE's contains variable data (the preference and protocol-dependent + * metrics) and a pointer to a route attribute block common for many routes). + * + * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. + */ + +struct rtable_config { + node n; + char *name; + struct rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + uint gc_threshold; /* Maximum number of operations before GC is run */ + uint gc_period; /* Approximate time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + byte trie_used; /* Rtable has attached trie */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ ++ btime export_settle_time; /* Delay before exports are announced */ +}; + +struct rt_export_hook; +struct rt_export_request; + +struct rt_exporter { + list hooks; /* Registered route export hooks */ + uint addr_type; /* Type of address data exported (NET_*) */ ++ + struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(struct rt_export_hook *); ++ void (*used)(struct rt_exporter *); ++ ++ list pending; /* List of packed struct rt_pending_export */ ++ struct timer *export_timer; ++ ++ struct rt_pending_export *first; /* First export to announce */ ++ u64 next_seq; /* The next export will have this ID */ +}; + +typedef struct rtable { + resource r; + node n; /* Node in list of all tables */ + pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ + struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ + char *name; /* Name of this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int use_count; /* Number of protocols using this table */ + u32 rt_count; /* Number of routes in the table */ + + list imports; /* Registered route importers */ + struct rt_exporter exporter; /* Exporter API structure */ + + struct hmap id_map; + struct hostcache *hostcache; + struct rtable_config *config; /* Configuration of this table */ + struct config *deleted; /* Table doesn't exist in current configuration, + * delete as soon as use_count becomes 0 and remove + * obstacle from this routing table. + */ + struct event *rt_event; /* Routing table event */ + struct timer *prune_timer; /* Timer for periodic pruning / GC */ + btime last_rt_change; /* Last time when route changed */ + btime base_settle_time; /* Start time of rtable settling interval */ + btime gc_time; /* Time of last GC */ + uint gc_counter; /* Number of operations since last GC */ + byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ + byte prune_trie; /* Prune prefix trie during next table prune */ + byte hcu_scheduled; /* Hostcache update is scheduled */ + byte nhu_state; /* Next Hop Update state */ ++ byte export_used; /* Pending Export pruning is scheduled */ + struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ + struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ + struct f_trie *trie_new; /* New prefix trie defined during pruning */ + struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ + u32 trie_lock_count; /* Prefix trie locked by walks */ + u32 trie_old_lock_count; /* Old prefix trie locked by walks */ + struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ + + list subscribers; /* Subscribers for notifications */ + struct timer *settle_timer; /* Settle time for notifications */ + list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ + struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ +} rtable; + +struct rt_subscription { + node n; + rtable *tab; + void (*hook)(struct rt_subscription *b); + void *data; +}; + +struct rt_flowspec_link { + node n; + rtable *src; + rtable *dst; + u32 uc; +}; + +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + +typedef struct network { - struct rte_storage *routes; /* Available routes for this network */ ++ struct rte_storage *routes; /* Available routes for this network */ ++ struct rt_pending_export *first, *last; + struct fib_node n; /* FIB flags reserved for kernel syncer */ +} net; + +struct hostcache { + slab *slab; /* Slab holding all hostentries */ + struct hostentry **hash_table; /* Hash table for hostentries */ + unsigned hash_order, hash_shift; + unsigned hash_max, hash_min; + unsigned hash_items; + linpool *lp; /* Linpool for trie */ + struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ + list hostentries; /* List of all hostentries */ + byte update_hostcache; +}; + +struct hostentry { + node ln; + ip_addr addr; /* IP address of host, part of key */ + ip_addr link; /* (link-local) IP address of host, used as gw + if host is directly attached */ + struct rtable *tab; /* Dependent table, part of key */ + struct hostentry *next; /* Next in hash chain */ + unsigned hash_key; /* Hash key */ + unsigned uc; /* Use count */ + ea_list *src; /* Source attributes */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ + u32 igp_metric; /* Chosen route IGP metric */ +}; + +struct rte_storage { + struct rte_storage *next; /* Next in chain */ + struct rte rte; /* Route data */ +}; + +#define RTE_COPY(r) ((r) ? (r)->rte : (rte) {}) +#define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {}) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL) + +/* Table-channel connections */ + +struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); +}; + +struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + ++ u64 flush_seq; /* Table export seq when the channel announced flushing */ + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ +}; + +struct rt_pending_export { ++ struct rt_pending_export * _Atomic next; /* Next export for the same destination */ + struct rte_storage *new, *new_best, *old, *old_best; ++ u64 seq; /* Sequential ID (table-local) of the pending export */ +}; + +struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + const net_addr *addr; /* Network prefilter address */ + u8 trace_routes; + u8 addr_mode; /* Network prefilter mode (TE_ADDR_*) */ + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); +}; + +struct rt_export_hook { + node n; + struct rt_exporter *table; /* The connected table */ + + pool *pool; - linpool *lp; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + union { + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + struct { + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + }; + u32 hash_iter; /* Iterator over hash */ + }; + ++ struct bmap seq_map; /* Keep track which exports were already procesed */ ++ ++ struct rt_pending_export * _Atomic last_export;/* Last export processed */ ++ struct rt_pending_export *rpe_next; /* Next pending export to process */ ++ + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ - u8 export_state; /* Route export state (TES_*, see below) */ ++ _Atomic u8 export_state; /* Route export state (TES_*, see below) */ + u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +#define TIS_DOWN 0 +#define TIS_UP 1 +#define TIS_STOP 2 +#define TIS_FLUSHING 3 +#define TIS_WAITING 4 +#define TIS_CLEARED 5 +#define TIS_MAX 6 + +#define TES_DOWN 0 +#define TES_FEEDING 2 +#define TES_READY 3 +#define TES_STOP 4 +#define TES_MAX 5 + +/* Value of addr_mode */ +#define TE_ADDR_NONE 0 /* No address matching */ +#define TE_ADDR_EQUAL 1 /* Exact query - show route */ +#define TE_ADDR_FOR 2 /* Longest prefix match - show route for */ +#define TE_ADDR_IN 3 /* Interval query - show route in */ + + +#define TFT_FIB 1 +#define TFT_TRIE 2 +#define TFT_HASH 3 + +void rt_request_import(rtable *tab, struct rt_import_request *req); +void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + +const char *rt_import_state_name(u8 state); +const char *rt_export_state_name(u8 state); + +static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } +static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + +void rt_set_export_state(struct rt_export_hook *hook, u8 state); + +void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + ++/* Get next rpe. If src is given, it must match. */ ++struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); ++ ++/* Mark the pending export processed */ ++void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); ++ ++/* Get pending export seen status */ ++int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); ++ +/* Types of route announcement, also used as flags */ +#define RA_UNDEF 0 /* Undefined RA type */ +#define RA_OPTIMAL 1 /* Announcement of optimal route change */ +#define RA_ACCEPTED 2 /* Announcement of first accepted route */ +#define RA_ANY 3 /* Announcement of any route change */ +#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ + +/* Return value of preexport() callback */ +#define RIC_ACCEPT 1 /* Accepted by protocol */ +#define RIC_PROCESS 0 /* Process it through import filter */ +#define RIC_REJECT -1 /* Rejected by protocol */ +#define RIC_DROP -2 /* Silently dropped by protocol */ + +#define rte_update channel_rte_import +/** + * rte_update - enter a new update to a routing table + * @c: channel doing the update + * @net: network address + * @rte: a &rte representing the new route + * @src: old route source identifier + * + * This function imports a new route to the appropriate table (via the channel). + * Table keys are @net (obligatory) and @rte->attrs->src. + * Both the @net and @rte pointers can be local. + * + * The route attributes (@rte->attrs) are obligatory. They can be also allocated + * locally. Anyway, if you use an already-cached attribute object, you shall + * call rta_clone() on that object yourself. (This semantics may change in future.) + * + * If the route attributes are local, you may set @rte->attrs->src to NULL, then + * the protocol's default route source will be supplied. + * + * When rte_update() gets a route, it automatically validates it. This includes + * checking for validity of the given network and next hop addresses and also + * checking for host-scope or link-scope routes. Then the import filters are + * processed and if accepted, the route is passed to route table recalculation. + * + * The accepted routes are then inserted into the table, replacing the old route + * for the same @net identified by @src. Then the route is announced + * to all the channels connected to the table using the standard export mechanism. + * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same + * as @src. + * + * All memory used for temporary allocations is taken from a special linpool + * @rte_update_pool and freed when rte_update() finishes. + */ +void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); + +extern list routing_tables; +struct config; + +void rt_init(void); +void rt_preconfig(struct config *); +void rt_postconfig(struct config *); +void rt_commit(struct config *new, struct config *old); +void rt_lock_table(rtable *); +void rt_unlock_table(rtable *); +struct f_trie * rt_lock_trie(rtable *tab); +void rt_unlock_trie(rtable *tab, struct f_trie *trie); +void rt_subscribe(rtable *tab, struct rt_subscription *s); +void rt_unsubscribe(struct rt_subscription *s); +void rt_flowspec_link(rtable *src, rtable *dst); +void rt_flowspec_unlink(rtable *src, rtable *dst); +rtable *rt_setup(pool *, struct rtable_config *); +static inline void rt_shutdown(rtable *r) { rfree(r->rp); } + +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); +int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); +void rt_modify_stale(rtable *t, struct rt_import_request *); +void rt_schedule_prune(rtable *t); +void rte_dump(struct rte_storage *); +void rte_free(struct rte_storage *); +struct rte_storage *rte_store(const rte *, net *net, rtable *); +void rt_dump(rtable *); +void rt_dump_all(void); +void rt_dump_hooks(rtable *); +void rt_dump_hooks_all(void); +int rt_reload_channel(struct channel *c); +void rt_reload_channel_abort(struct channel *c); +void rt_refeed_channel(struct channel *c); +void rt_prune_sync(rtable *t, int all); +struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); + +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + + +/* Default limit for ECMP next hops, defined in sysdep code */ +extern const int rt_default_ecmp; + +struct rt_show_data_rtable { + node n; + const char *name; + struct rt_exporter *table; + struct channel *export_channel; + struct channel *prefilter; + struct krt_proto *kernel; +}; + +struct rt_show_data { + struct cli *cli; /* Pointer back to the CLI */ + net_addr *addr; + list tables; + struct rt_show_data_rtable *tab; /* Iterator over table list */ + struct rt_show_data_rtable *last_table; /* Last table in output */ + struct rt_export_request req; /* Export request in use */ + int verbose, tables_defined_by; + const struct filter *filter; + struct proto *show_protocol; + struct proto *export_protocol; + struct channel *export_channel; + struct config *running_on_config; + struct rt_export_hook *kernel_export_hook; + int export_mode, addr_mode, primary_only, filtered, stats; + + int net_counter, rt_counter, show_counter, table_counter; + int net_counter_last, rt_counter_last, show_counter_last; + int show_counter_last_flush; +}; + +void rt_show(struct rt_show_data *); +struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t); + +/* Value of table definition mode in struct rt_show_data */ +#define RSD_TDB_DEFAULT 0 /* no table specified */ +#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ +#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ +#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ + +#define RSD_TDB_SET 0x1 /* internal: show empty tables */ +#define RSD_TDB_NMN 0x2 /* internal: need matching net */ + +/* Value of export_mode in struct rt_show_data */ +#define RSEM_NONE 0 /* Export mode not used */ +#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ +#define RSEM_EXPORT 2 /* Routes accepted by export filter */ +#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ +#define RSEM_EXPORTED 4 /* Routes marked in export map */ + +/* Host entry: Resolve hook for recursive nexthops */ +extern struct ea_class ea_gen_hostentry; +struct hostentry_adata { + adata ad; + struct hostentry *he; + u32 labels[0]; +}; + +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); + +void ea_show_hostentry(const struct adata *ad, byte *buf, uint size); +void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad); + +/* + * Default protocol preferences + */ + +#define DEF_PREF_DIRECT 240 /* Directly connected */ +#define DEF_PREF_STATIC 200 /* Static route */ +#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ +#define DEF_PREF_BABEL 130 /* Babel */ +#define DEF_PREF_RIP 120 /* RIP */ +#define DEF_PREF_BGP 100 /* BGP */ +#define DEF_PREF_RPKI 100 /* RPKI */ +#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ +#define DEF_PREF_UNKNOWN 0 /* Routes with no preference set */ + +/* + * Route Origin Authorization + */ + +#define ROA_UNKNOWN 0 +#define ROA_VALID 1 +#define ROA_INVALID 2 + +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); + +#endif diff --cc proto/bgp/attrs.c index 883a97462,892b26e3c..a7b1a7edc --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@@ -1762,167 -1662,6 +1762,167 @@@ bgp_free_prefix(struct bgp_channel *c, mb_free(px); } +void +bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck) +{ + /* Cleanup: We're called from bucket senders. */ + ASSERT_DIE(px->cur == buck); + rem_node(&px->buck_node_xx); + + /* We may want to store the updates */ + if (c->c.out_table) + { + /* Nothing to be sent right now */ + px->cur = NULL; + + /* Unref the previous sent version */ + if (px->last) + px->last->px_uc--; + + /* Ref the current sent version */ + if (buck != c->withdraw_bucket) + { + px->last = buck; + px->last->px_uc++; + return; + } + + /* Prefixes belonging to the withdraw bucket are freed always */ + } + + bgp_free_prefix(c, px); +} + + +/* + * Prefix hash table exporter + */ + +static void +bgp_out_table_feed(void *data) +{ + struct rt_export_hook *hook = data; + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table); + + int max = 512; + + const net_addr *neq = (hook->req->addr_mode == TE_ADDR_EQUAL) ? hook->req->addr : NULL; + const net_addr *cand = NULL; + + do { + HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter) + { + switch (hook->req->addr_mode) + { + case TE_ADDR_IN: + if (!net_in_netX(n->net, hook->req->addr)) + continue; + /* fall through */ + case TE_ADDR_NONE: + /* Splitting only for multi-net exports */ + if (--max <= 0) + HASH_WALK_ITER_PUT; + break; + + case TE_ADDR_FOR: + if (!neq) + { + if (net_in_netX(hook->req->addr, n->net) && (!cand || (n->net->length > cand->length))) + cand = n->net; + continue; + } + /* fall through */ + case TE_ADDR_EQUAL: + if (!net_equal(n->net, neq)) + continue; + break; + } + + struct bgp_bucket *buck = n->cur ?: n->last; + ea_list *ea = NULL; + if (buck == c->withdraw_bucket) + ea_set_dest(&ea, 0, RTD_UNREACHABLE); + else + { + ea = buck->eattrs; + eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP); + ASSERT_DIE(eanh); + const ip_addr *nh = (const void *) eanh->u.ptr->data; + + struct nexthop_adata nhad = { + .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), }, + .nh = { .gw = nh[0], }, + }; + + ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad))); + } + + struct rte_storage es = { + .rte = { + .attrs = ea, + .net = n->net, + .src = rt_find_source_global(n->path_id), + .sender = NULL, + .lastmod = n->lastmod, + .flags = n->cur ? REF_PENDING : 0, + }, + }; + + struct rt_pending_export rpe = { + .new = &es, .new_best = &es, + }; + + if (hook->req->export_bulk) + { + rte *feed = &es.rte; + hook->req->export_bulk(hook->req, n->net, &rpe, &feed, 1); + } + else if (hook->req->export_one) + hook->req->export_one(hook->req, n->net, &rpe); + else + bug("No export method in export request"); + } + HASH_WALK_ITER_END; + + neq = cand; + cand = NULL; + } while (neq); + + if (hook->hash_iter) + ev_schedule_work(hook->event); + else + rt_set_export_state(hook, TES_READY); +} + +static struct rt_export_hook * +bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, re); + pool *p = rp_new(c->c.proto->pool, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; - hook->lp = lp_new_default(p); + hook->event = ev_new_init(p, bgp_out_table_feed, hook); + hook->feed_type = TFT_HASH; + + return hook; +} + +void +bgp_setup_out_table(struct bgp_channel *c) +{ + ASSERT_DIE(c->c.out_table == NULL); + + c->prefix_exporter = (struct rt_exporter) { + .addr_type = c->c.table->addr_type, + .start = bgp_out_table_export_start, + }; + + init_list(&c->prefix_exporter.hooks); ++ init_list(&c->prefix_exporter.pending); + + c->c.out_table = &c->prefix_exporter; +} + /* * BGP protocol glue