#include "lib/hash.h"
#include "lib/string.h"
#include "lib/alloca.h"
+#include "lib/flowspec.h"
+
+#ifdef CONFIG_BGP
+#include "proto/bgp/bgp.h"
+#endif
+ #include <stdatomic.h>
+
pool *rt_table_pool;
static linpool *rte_update_pool;
list routing_tables;
+list deleted_routing_tables;
+ /* Data structures for export journal */
+ #define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export)
+
+ struct rt_export_block {
+ node n;
+ _Atomic u32 end;
+ _Atomic _Bool not_last;
+ struct rt_pending_export export[];
+ };
+
static void rt_free_hostcache(rtable *tab);
static void rt_notify_hostcache(rtable *tab, net *net);
static void rt_update_hostcache(rtable *tab);
static void rt_next_hop_update(rtable *tab);
+static inline void rt_next_hop_resolve_rte(rte *r);
+static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c);
static inline void rt_prune_table(rtable *tab);
static inline void rt_schedule_notify(rtable *tab);
-static void rt_feed_channel(void *);
-
-static inline void rt_export_used(rtable *tab);
+static void rt_flowspec_notify(rtable *tab, net *net);
+static void rt_kick_prune_timer(rtable *tab);
+static void rt_feed_by_fib(void *);
+static void rt_feed_by_trie(void *);
+static void rt_feed_equal(void *);
+static void rt_feed_for(void *);
+static uint rt_feed_net(struct rt_export_hook *c, net *n);
+
++static inline void rt_export_used(struct rt_exporter *);
+ static void rt_export_cleanup(rtable *tab);
+
+ static inline void rte_update_lock(void);
+ static inline void rte_update_unlock(void);
+
++static int rte_same(rte *x, rte *y);
++
const char *rt_import_state_name_array[TIS_MAX] = {
[TIS_DOWN] = "DOWN",
[TIS_UP] = "UP",
static void
rte_trace(const char *name, const rte *e, int dir, const char *msg)
{
- log(L_TRACE "%s %c %s %N %uL %uG %s",
- name, dir, msg, e->net, e->src->private_id, e->src->global_id,
- log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s%s",
++ log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s",
+ name, dir, msg, e->net,
+ e->src->private_id, e->src->global_id, e->stale_cycle, e->id,
- rta_dest_name(e->attrs->dest),
- rte_is_filtered(e) ? " (filtered)" : "");
+ rta_dest_name(rte_dest(e)));
}
static inline void
static void
rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old)
{
++ if (new && old && rte_same(new, old))
++ {
++ if ((new->id != old->id) && bmap_test(&c->export_map, old->id))
++ {
++ bmap_set(&c->export_map, new->id);
++ bmap_clear(&c->export_map, old->id);
++ }
++ return;
++ }
++
if (new)
new = export_filter(c, new, 0);
}
do_rt_notify(c, n, new_best, old_best);
-
- done:
- /* Drop the old stored rejection if applicable.
- * new->id == old->id happens when updating hostentries. */
- if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id)))
- bmap_clear(&c->export_reject_map, rpe->old->rte.id);
}
-
-static struct nexthop *
-nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
-{
- return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
-}
-
rte *
rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent)
{
{
struct channel *c = SKIP_BACK(struct channel, out_req, req);
- if (rpe->new_best != rpe->old_best)
- {
- rte n0 = RTE_COPY_VALID(rpe->new_best);
- rte *o = RTE_VALID_OR_NULL(rpe->old_best);
- rte *old = RTES_OR_NULL(rpe->old_best);
++ rte *o = RTE_VALID_OR_NULL(rpe->old_best);
+ struct rte_storage *new_best = rpe->new_best;
- if (n0.src || o)
- rt_notify_basic(c, net, n0.src ? &n0 : NULL, o);
+ while (rpe)
+ {
+ channel_rpe_mark_seen(req, rpe);
+ new_best = rpe->new_best;
+ rpe = rpe_next(rpe, NULL);
}
- /* Drop the old stored rejection if applicable.
- * new->id == old->id happens when updating hostentries. */
- if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id)))
- bmap_clear(&c->export_reject_map, rpe->old->rte.id);
- if (&new_best->rte != old)
- {
- rte n0, *new = RTES_CLONE(new_best, &n0);
- rt_notify_basic(c, net, new, old);
- }
++ rte n0 = RTE_COPY_VALID(new_best);
++ if (n0.src || o)
++ rt_notify_basic(c, net, n0.src ? &n0 : NULL, o);
}
void
{
struct channel *c = SKIP_BACK(struct channel, out_req, req);
- if (rpe->new != rpe->old)
- struct rte_src *src = rpe->new ? rpe->new->rte.src : rpe->old->rte.src;
- rte *old = RTES_OR_NULL(rpe->old);
- struct rte_storage *new_any = rpe->new;
++ rte *n = RTE_VALID_OR_NULL(rpe->new);
++ rte *o = RTE_VALID_OR_NULL(rpe->old);
+
- while (rpe)
++ if (!n && !o)
{
- rte n0 = RTE_COPY_VALID(rpe->new);
- rte *o = RTE_VALID_OR_NULL(rpe->old);
- if (n0.src || o)
- rt_notify_basic(c, net, n0.src ? &n0 : NULL, o);
+ channel_rpe_mark_seen(req, rpe);
- new_any = rpe->new;
- rpe = rpe_next(rpe, src);
++ return;
}
- /* Drop the old stored rejection if applicable.
- * new->id == old->id happens when updating hostentries. */
- if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id)))
- bmap_clear(&c->export_reject_map, rpe->old->rte.id);
- if (&new_any->rte != old)
++ struct rte_src *src = n ? n->src : o->src;
++ struct rte_storage *new_latest = rpe->new;
++
++ while (rpe)
+ {
- rte n0, *new = RTES_CLONE(new_any, &n0);
- rt_notify_basic(c, net, new, old);
++ channel_rpe_mark_seen(req, rpe);
++ new_latest = rpe->new;
++ rpe = rpe_next(rpe, src);
+ }
++
++ rte n0 = RTE_COPY_VALID(new_latest);
++ if (n0.src || o)
++ rt_notify_basic(c, net, n0.src ? &n0 : NULL, o);
}
void
struct channel *c = SKIP_BACK(struct channel, out_req, req);
for (uint i=0; i<count; i++)
- {
- rte n0 = *feed[i];
- rt_notify_basic(c, net, &n0, NULL);
- }
+ if (rte_is_valid(feed[i]))
+ {
+ rte n0 = *feed[i];
+ rt_notify_basic(c, net, &n0, NULL);
+ }
}
- goto seen;
+ void
+ rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe)
+ {
+ bmap_set(&hook->seq_map, rpe->seq);
+ }
+
+ struct rt_pending_export *
+ rpe_next(struct rt_pending_export *rpe, struct rte_src *src)
+ {
+ struct rt_pending_export *next = atomic_load_explicit(&rpe->next, memory_order_acquire);
+
+ if (!next)
+ return NULL;
+
+ if (!src)
+ return next;
+
+ while (rpe = next)
+ if (src == (rpe->new ? rpe->new->rte.src : rpe->old->rte.src))
+ return rpe;
+ else
+ next = atomic_load_explicit(&rpe->next, memory_order_acquire);
+
+ return NULL;
+ }
+
+ static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last);
+ static void
+ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe)
+ {
+ if (bmap_test(&hook->seq_map, rpe->seq))
-seen:
++ goto ignore; /* Seen already */
+
+ const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net;
+
++ switch (hook->req->addr_mode)
++ {
++ case TE_ADDR_NONE:
++ break;
++
++ case TE_ADDR_IN:
++ if (!net_in_netX(n, hook->req->addr))
++ goto ignore;
++ break;
++
++ case TE_ADDR_EQUAL:
++ if (!net_equal(n, hook->req->addr))
++ goto ignore;
++ break;
++
++ case TE_ADDR_FOR:
++ bug("Continuos export of best prefix match not implemented yet.");
++
++ default:
++ bug("Strange table export address mode: %d", hook->req->addr_mode);
++ }
++
+ if (rpe->new)
+ hook->stats.updates_received++;
+ else
+ hook->stats.withdraws_received++;
+
+ if (hook->req->export_one)
+ hook->req->export_one(hook->req, n, rpe);
+ else if (hook->req->export_bulk)
+ {
+ net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n);
+ uint count = rte_feed_count(net);
+ rte **feed = NULL;
+ if (count)
+ {
+ feed = alloca(count * sizeof(rte *));
+ rte_feed_obtain(net, feed, count);
+ }
+ hook->req->export_bulk(hook->req, n, rpe, feed, count);
+ }
+ else
+ bug("Export request must always provide an export method");
+
- rt_export_used(hook->table);
++ignore:
+ /* Get the next export if exists */
+ hook->rpe_next = rt_next_export_fast(rpe);
+
+ /* The last block may be available to free */
+ if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe))
++ CALL(hook->table->used, hook->table);
+
+ /* Releasing this export for cleanup routine */
+ DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq);
+ atomic_store_explicit(&hook->last_export, rpe, memory_order_release);
+ }
+
/**
* rte_announce - announce a routing table change
* @tab: table the route has been added to
rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old,
struct rte_storage *new_best, struct rte_storage *old_best)
{
- if (!new_best || !rte_is_valid(&new_best->rte))
- new_best = NULL;
-
- if (!old_best || !rte_is_valid(&old_best->rte))
- old_best = NULL;
-
- if (!new || !rte_is_valid(&new->rte))
- new = NULL;
-
- if (old && !rte_is_valid(&old->rte))
- {
- /* Filtered old route isn't announced, should be freed immediately. */
- rte_free(old, tab);
- old = NULL;
- }
+ int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best));
+ int old_best_valid = rte_is_valid(RTE_OR_NULL(old_best));
- if (!new && !old && !new_best && !old_best)
+ if ((new == old) && (new_best == old_best))
return;
- if (new_best != old_best)
+ if (new_best_valid || old_best_valid)
{
- if (new_best)
+ if (new_best_valid)
new_best->rte.sender->stats.pref++;
- if (old_best)
+ if (old_best_valid)
old_best->rte.sender->stats.pref--;
if (tab->hostcache)
rt_schedule_notify(tab);
- struct rt_pending_export rpe = { .new = new, .old = old, .new_best = new_best, .old_best = old_best };
- uint count = rte_feed_count(net);
- rte **feed = NULL;
- if (count)
- if (EMPTY_LIST(tab->exports) && EMPTY_LIST(tab->pending_exports))
++ if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending))
+ {
+ /* No export hook and no pending exports to cleanup. We may free the route immediately. */
+ if (!old)
+ return;
+
+ hmap_clear(&tab->id_map, old->rte.id);
- rte_free(old, tab);
++ rte_free(old);
+ return;
+ }
+
+ /* Get the pending export structure */
+ struct rt_export_block *rpeb = NULL, *rpebsnl = NULL;
+ u32 end = 0;
+
- if (!EMPTY_LIST(tab->pending_exports))
++ if (!EMPTY_LIST(tab->exporter.pending))
{
- feed = alloca(count * sizeof(rte *));
- rte_feed_obtain(net, feed, count);
- rpeb = TAIL(tab->pending_exports);
++ rpeb = TAIL(tab->exporter.pending);
+ end = atomic_load_explicit(&rpeb->end, memory_order_relaxed);
+ if (end >= RT_PENDING_EXPORT_ITEMS)
+ {
+ ASSERT_DIE(end == RT_PENDING_EXPORT_ITEMS);
+ rpebsnl = rpeb;
+
+ rpeb = NULL;
+ end = 0;
+ }
}
- struct rt_export_hook *eh;
- WALK_LIST(eh, tab->exporter.hooks)
+ if (!rpeb)
+ {
- rpeb = alloc_page(tab->rp);
++ rpeb = alloc_page();
+ *rpeb = (struct rt_export_block) {};
- add_tail(&tab->pending_exports, &rpeb->n);
++ add_tail(&tab->exporter.pending, &rpeb->n);
+ }
+
+ /* Fill the pending export */
+ struct rt_pending_export *rpe = &rpeb->export[rpeb->end];
+ *rpe = (struct rt_pending_export) {
+ .new = new,
+ .new_best = new_best,
+ .old = old,
+ .old_best = old_best,
- .seq = tab->next_export_seq++,
++ .seq = tab->exporter.next_seq++,
+ };
+
- DBG("rte_announce: table=%s net=%N new=%p from %p old=%p from %p new_best=%p old_best=%p seq=%lu\n", tab->name, net->n.addr, new, new ? new->sender : NULL, old, old ? old->sender : NULL, new_best, old_best, rpe->seq);
++ DBGL("rte_announce: table=%s net=%N new=%p id %u from %s old=%p id %u from %s new_best=%p id %u old_best=%p id %u seq=%lu",
++ tab->name, net->n.addr,
++ new, new ? new->rte.id : 0, new ? new->rte.sender->req->name : NULL,
++ old, old ? old->rte.id : 0, old ? old->rte.sender->req->name : NULL,
++ new_best, old_best, rpe->seq);
+
+ ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end);
+
+ if (rpebsnl)
{
- if (eh->export_state == TES_STOP)
+ _Bool f = 0;
+ ASSERT_DIE(atomic_compare_exchange_strong_explicit(&rpebsnl->not_last, &f, 1,
+ memory_order_release, memory_order_relaxed));
+ }
+
+ /* Append to the same-network squasher list */
+ if (net->last)
+ {
+ struct rt_pending_export *rpenull = NULL;
+ ASSERT_DIE(atomic_compare_exchange_strong_explicit(
+ &net->last->next, &rpenull, rpe,
+ memory_order_relaxed,
+ memory_order_relaxed));
+
+ }
+
+ net->last = rpe;
+
+ if (!net->first)
+ net->first = rpe;
+
- if (tab->first_export == NULL)
- tab->first_export = rpe;
++ if (tab->exporter.first == NULL)
++ tab->exporter.first = rpe;
+
- if (!tm_active(tab->export_timer))
- tm_start(tab->export_timer, tab->config->export_settle_time);
++ if (!tm_active(tab->exporter.export_timer))
++ tm_start(tab->exporter.export_timer, tab->config->export_settle_time);
+ }
+
+ static struct rt_pending_export *
+ rt_next_export_fast(struct rt_pending_export *last)
+ {
+ /* Get the whole export block and find our position in there. */
+ struct rt_export_block *rpeb = PAGE_HEAD(last);
+ u32 pos = (last - &rpeb->export[0]);
+ u32 end = atomic_load_explicit(&rpeb->end, memory_order_acquire);
+ ASSERT_DIE(pos < end);
+
+ /* Next is in the same block. */
+ if (++pos < end)
+ return &rpeb->export[pos];
+
+ /* There is another block. */
+ if (atomic_load_explicit(&rpeb->not_last, memory_order_acquire))
+ {
+ /* This is OK to do non-atomically because of the not_last flag. */
+ rpeb = NODE_NEXT(rpeb);
+ return &rpeb->export[0];
+ }
+
+ /* There is nothing more. */
+ return NULL;
+ }
+
+ static struct rt_pending_export *
-rt_next_export(struct rt_export_hook *hook, rtable *tab)
++rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab)
+ {
+ /* As the table is locked, it is safe to reload the last export pointer */
+ struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire);
+
+ /* It is still valid, let's reuse it */
+ if (last)
+ return rt_next_export_fast(last);
+
+ /* No, therefore we must process the table's first pending export */
+ else
- return tab->first_export;
++ return tab->first;
+ }
+
+ static void
+ rt_announce_exports(timer *tm)
+ {
+ rtable *tab = tm->data;
+
+ struct rt_export_hook *c; node *n;
- WALK_LIST2(c, n, tab->exports, n)
++ WALK_LIST2(c, n, tab->exporter.hooks, n)
+ {
+ if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY)
continue;
- switch (eh->req->addr_mode)
- {
- case TE_ADDR_NONE:
- break;
+ ev_schedule_work(c->event);
+ }
+ }
- case TE_ADDR_IN:
- if (!net_in_netX(net->n.addr, eh->req->addr))
- continue;
- break;
+ static struct rt_pending_export *
-rt_last_export(rtable *tab)
++rt_last_export(struct rt_exporter *tab)
+ {
+ struct rt_pending_export *rpe = NULL;
- case TE_ADDR_EQUAL:
- if (!net_equal(net->n.addr, eh->req->addr))
- continue;
- break;
- if (!EMPTY_LIST(tab->pending_exports))
++ if (!EMPTY_LIST(tab->pending))
+ {
+ /* We'll continue processing exports from this export on */
- struct rt_export_block *reb = TAIL(tab->pending_exports);
++ struct rt_export_block *reb = TAIL(tab->pending);
+ ASSERT_DIE(reb->end);
+ rpe = &reb->export[reb->end - 1];
+ }
- case TE_ADDR_FOR:
- bug("Continuos export of best prefix match not implemented yet.");
+ return rpe;
+ }
- default:
- bug("Strange table export address mode: %d", eh->req->addr_mode);
+ #define RT_EXPORT_BULK 1024
+
+ static void
+ rt_export_hook(void *_data)
+ {
+ struct rt_export_hook *c = _data;
+
+ ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY);
+
+ if (!c->rpe_next)
+ {
+ c->rpe_next = rt_next_export(c, c->table);
+
+ if (!c->rpe_next)
+ {
- rt_export_used(c->table);
++ CALL(c->table->used, c->table);
+ return;
}
+ }
- if (new)
- eh->stats.updates_received++;
- else
- eh->stats.withdraws_received++;
+ /* Process the export */
+ for (uint i=0; i<RT_EXPORT_BULK; i++)
+ {
+ rte_update_lock();
- if (eh->req->export_one)
- eh->req->export_one(eh->req, net->n.addr, &rpe);
- else if (eh->req->export_bulk)
- eh->req->export_bulk(eh->req, net->n.addr, &rpe, feed, count);
- else
- bug("Export request must always provide an export method");
+ rte_export(c, c->rpe_next);
+
+ if (!c->rpe_next)
+ break;
+
+ rte_update_unlock();
}
+
+ ev_schedule_work(c->event);
}
+
static inline int
rte_validate(struct channel *ch, rte *e)
{
if (new_stored)
{
new_stored->rte.lastmod = current_time();
-
- if (!old)
- {
- new_stored->rte.id = hmap_first_zero(&table->id_map);
- hmap_set(&table->id_map, new_stored->rte.id);
- }
- else
- new_stored->rte.id = old->id;
+ new_stored->rte.id = hmap_first_zero(&table->id_map);
+ hmap_set(&table->id_map, new_stored->rte.id);
}
- _Bool nb = (new_stored == net->routes);
- _Bool ob = (old_best == old);
-
/* Log the route change */
- if (new_ok && old_ok)
+ if (new_ok)
+ rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added");
+ else if (old_ok)
{
- const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } };
- rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, best_indicator[nb][ob]);
+ if (old != old_best)
+ rt_rte_trace_in(D_ROUTES, req, old, "removed");
+ else if (net->routes && rte_is_ok(&net->routes->rte))
+ rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]");
+ else
+ rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]");
}
- else if (new_ok)
- rt_rte_trace_in(D_ROUTES, req, &new_stored->rte,
- (!net->routes->next || !rte_is_ok(&net->routes->next->rte)) ? "added [sole]" :
- nb ? "added [best]" : "added");
- else if (old_ok)
- rt_rte_trace_in(D_ROUTES, req, old,
- (!net->routes || !rte_is_ok(&net->routes->rte)) ? "removed [sole]" :
- ob ? "removed [best]" : "removed");
/* Propagate the route change */
rte_announce(table, net, new_stored, old_stored,
rt_export_stopped(void *data)
{
struct rt_export_hook *hook = data;
- rtable *tab = hook->table;
+ struct rt_exporter *tab = hook->table;
- rt_export_used(tab);
+ /* Drop pending exports */
++ CALL(tab->used, tab);
+
/* Unlist */
rem_node(&hook->n);
- /* Reporting the channel as stopped. */
+ /* Report the channel as stopped. */
hook->stopped(hook->req);
- /* Freeing the hook together with its coroutine. */
+ /* Reporting the hook as finished. */
+ CALL(tab->done, hook);
+
+ /* Free the hook together with its coroutine. */
rfree(hook->pool);
- rt_unlock_table(tab);
-
- DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count);
}
-
static inline void
rt_set_import_state(struct rt_import_hook *hook, u8 state)
{
rt_lock_table(tab);
pool *p = rp_new(tab->rp, "Export hook");
- struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook));
+ struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook));
hook->pool = p;
- hook->lp = lp_new_default(p);
-
- hook->req = req;
- hook->table = tab;
/* stats zeroed by mb_allocz */
+ switch (req->addr_mode)
+ {
+ case TE_ADDR_IN:
+ if (tab->trie && net_val_match(tab->addr_type, NB_IP))
+ {
+ hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state));
+ hook->walk_lock = rt_lock_trie(tab);
+ trie_walk_init(hook->walk_state, tab->trie, req->addr);
+ hook->event = ev_new_init(p, rt_feed_by_trie, hook);
+ break;
+ }
+ /* fall through */
+ case TE_ADDR_NONE:
+ FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib);
+ hook->event = ev_new_init(p, rt_feed_by_fib, hook);
+ break;
+
+ case TE_ADDR_EQUAL:
+ hook->event = ev_new_init(p, rt_feed_equal, hook);
+ break;
+
+ case TE_ADDR_FOR:
+ hook->event = ev_new_init(p, rt_feed_for, hook);
+ break;
+
+ default:
+ bug("Requested an unknown export address mode");
+ }
+
+ DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count);
- bmap_init(&hook->seq_map, p, 1024);
+ return hook;
+}
- rt_set_export_state(hook, TES_HUNGRY);
+void
+rt_request_export(struct rt_exporter *re, struct rt_export_request *req)
+{
+ struct rt_export_hook *hook = req->hook = re->start(re, req);
+
+ hook->req = req;
+ hook->table = re;
+
++ bmap_init(&hook->seq_map, hook->pool, 1024);
+
+ struct rt_pending_export *rpe = rt_last_export(hook->table);
+ DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0);
+ atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed);
+
hook->n = (node) {};
- add_tail(&tab->exports, &hook->n);
+ add_tail(&re->hooks, &hook->n);
- FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib);
+ /* Regular export */
+ rt_set_export_state(hook, TES_FEEDING);
+ ev_schedule_work(hook->event);
+}
- DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count);
+static void
+rt_table_export_stop(struct rt_export_hook *hook)
+{
+ rtable *tab = SKIP_BACK(rtable, exporter, hook->table);
- if (hook->export_state != TES_FEEDING)
- hook->event = ev_new_init(p, rt_feed_channel, hook);
- ev_schedule_work(hook->event);
++ if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING)
+ return;
- rt_set_export_state(hook, TES_FEEDING);
+ switch (hook->req->addr_mode)
+ {
+ case TE_ADDR_IN:
+ if (hook->walk_lock)
+ {
+ rt_unlock_trie(tab, hook->walk_lock);
+ hook->walk_lock = NULL;
+ mb_free(hook->walk_state);
+ hook->walk_state = NULL;
+ break;
+ }
+ /* fall through */
+ case TE_ADDR_NONE:
+ fit_get(&tab->fib, &hook->feed_fit);
+ break;
+ }
}
void
tab->prune_state |= 1;
}
-void
-rt_export_used(rtable *tab)
++static void
++rt_export_used(struct rt_exporter *e)
+ {
++ rtable *tab = SKIP_BACK(rtable, exporter, e);
++
+ if (config->table_debug)
+ log(L_TRACE "%s: Export cleanup requested", tab->name);
+
+ if (tab->export_used)
+ return;
+
+ tab->export_used = 1;
+ ev_schedule(tab->rt_event);
+ }
static void
rt_event(void *ptr)
fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
+ if (cf->trie_used)
+ {
+ t->trie = f_new_trie(lp_new_default(p), 0);
+ t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4);
+
+ t->fib.init = net_init_with_trie;
+ }
+
+ init_list(&t->flowspec_links);
+
+ t->exporter = (struct rt_exporter) {
+ .addr_type = t->addr_type,
+ .start = rt_table_export_start,
+ .stop = rt_table_export_stop,
+ .done = rt_table_export_done,
++ .used = rt_export_used,
+ };
++
+ init_list(&t->exporter.hooks);
++ init_list(&t->exporter.pending);
+
init_list(&t->imports);
- init_list(&t->exports);
hmap_init(&t->id_map, p, 1024);
hmap_set(&t->id_map, 0);
init_list(&t->subscribers);
t->rt_event = ev_new_init(p, rt_event, t);
- t->export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0);
+ t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0);
++ t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0);
t->last_rt_change = t->gc_time = current_time();
- t->next_export_seq = 1;
++ t->exporter.next_seq = 1;
t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS;
/* state change 2->0, 3->1 */
tab->prune_state &= 1;
+ if (tab->trie_new)
+ {
+ /* Finish prefix trie pruning */
+
+ if (!tab->trie_lock_count)
+ {
+ rfree(tab->trie->lp);
+ }
+ else
+ {
+ ASSERT(!tab->trie_old);
+ tab->trie_old = tab->trie;
+ tab->trie_old_lock_count = tab->trie_lock_count;
+ tab->trie_lock_count = 0;
+ }
+
+ tab->trie = tab->trie_new;
+ tab->trie_new = NULL;
+ tab->prune_trie = 0;
+ }
+ else
+ {
+ /* Schedule prefix trie pruning */
+ if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries)))
+ {
+ /* state change 0->1, 2->3 */
+ tab->prune_state |= 1;
+ tab->prune_trie = 1;
+ }
+ }
+
rt_prune_sources();
+ uint flushed_channels = 0;
+
/* Close flushed channels */
WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n)
if (ih->import_state == TIS_FLUSHING)
{
- rt_set_import_state(ih, TIS_CLEARED);
- ih->stopped(ih->req);
- rem_node(&ih->n);
- mb_free(ih);
- rt_unlock_table(tab);
- ih->flush_seq = tab->next_export_seq;
++ ih->flush_seq = tab->exporter.next_seq;
+ rt_set_import_state(ih, TIS_WAITING);
+ flushed_channels++;
}
else if (ih->stale_pruning != ih->stale_pruned)
{
if (ih->req->trace_routes & D_STATES)
log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned);
}
- if (EMPTY_LIST(tab->exports) && flushed_channels)
+
+ /* In some cases, we may want to directly proceed to export cleanup */
- struct rt_pending_export *first_export = tab->first_export;
++ if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels)
+ rt_export_cleanup(tab);
+ }
+
+ static void
+ rt_export_cleanup(rtable *tab)
+ {
+ tab->export_used = 0;
+
+ u64 min_seq = ~((u64) 0);
+ struct rt_pending_export *last_export_to_free = NULL;
- WALK_LIST2(eh, n, tab->exports, n)
++ struct rt_pending_export *first = tab->exporter.first;
+
+ struct rt_export_hook *eh;
+ node *n;
- case TES_HUNGRY:
++ WALK_LIST2(eh, n, tab->exporter.hooks, n)
+ {
+ switch (atomic_load_explicit(&eh->export_state, memory_order_acquire))
+ {
+ case TES_DOWN:
- tab->first_export = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL;
+ continue;
+
+ case TES_READY:
+ {
+ struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire);
+ if (!last)
+ /* No last export means that the channel has exported nothing since last cleanup */
+ goto done;
+
+ else if (min_seq > last->seq)
+ {
+ min_seq = last->seq;
+ last_export_to_free = last;
+ }
+ continue;
+ }
+
+ default:
+ /* It's only safe to cleanup when the export state is idle or regular. No feeding or stopping allowed. */
+ goto done;
+ }
+ }
+
- log(L_TRACE "%s: Export cleanup, old first_export seq %lu, new %lu, min_seq %ld",
++ tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL;
+
+ if (config->table_debug)
- first_export ? first_export->seq : 0,
- tab->first_export ? tab->first_export->seq : 0,
++ log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld",
+ tab->name,
- WALK_LIST2(eh, n, tab->exports, n)
++ first ? first->seq : 0,
++ tab->exporter.first ? tab->exporter.first->seq : 0,
+ min_seq);
+
- while (first_export && (first_export->seq <= min_seq))
++ WALK_LIST2(eh, n, tab->exporter.hooks, n)
+ {
+ if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY)
+ continue;
+
+ struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire);
+ if (last == last_export_to_free)
+ {
+ /* This may fail when the channel managed to export more inbetween. This is OK. */
+ atomic_compare_exchange_strong_explicit(
+ &eh->last_export, &last, NULL,
+ memory_order_release,
+ memory_order_relaxed);
+
+ DBG("store hook=%p last_export=NULL\n", eh);
+ }
+ }
+
- ASSERT_DIE(first_export->new || first_export->old);
++ while (first && (first->seq <= min_seq))
+ {
- const net_addr *n = first_export->new ?
- first_export->new->rte.net :
- first_export->old->rte.net;
++ ASSERT_DIE(first->new || first->old);
+
- ASSERT_DIE(net->first == first_export);
++ const net_addr *n = first->new ?
++ first->new->rte.net :
++ first->old->rte.net;
+ net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n);
+
- if (first_export == net->last)
++ ASSERT_DIE(net->first == first);
+
- net->first = atomic_load_explicit(&first_export->next, memory_order_relaxed);
++ if (first == net->last)
+ /* The only export here */
+ net->last = net->first = NULL;
+ else
+ /* First is now the next one */
- if (first_export->old)
++ net->first = atomic_load_explicit(&first->next, memory_order_relaxed);
+
+ /* For now, the old route may be finally freed */
- rt_rte_trace_in(D_ROUTES, first_export->old->rte.sender->req, &first_export->old->rte, "freed");
- hmap_clear(&tab->id_map, first_export->old->rte.id);
- rte_free(first_export->old, tab);
++ if (first->old)
+ {
- memset(first_export, 0xbd, sizeof(struct rt_pending_export));
++ rt_rte_trace_in(D_ROUTES, first->old->rte.sender->req, &first->old->rte, "freed");
++ hmap_clear(&tab->id_map, first->old->rte.id);
++ rte_free(first->old);
+ }
+
+ #ifdef LOCAL_DEBUG
- struct rt_export_block *reb = HEAD(tab->pending_exports);
- ASSERT_DIE(reb == PAGE_HEAD(first_export));
++ memset(first, 0xbd, sizeof(struct rt_pending_export));
+ #endif
+
- u32 pos = (first_export - &reb->export[0]);
++ struct rt_export_block *reb = HEAD(tab->exporter.pending);
++ ASSERT_DIE(reb == PAGE_HEAD(first));
+
- free_page(tab->rp, reb);
++ u32 pos = (first - &reb->export[0]);
+ u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed);
+ ASSERT_DIE(pos < end);
+
+ struct rt_pending_export *next = NULL;
+
+ if (++pos < end)
+ next = &reb->export[pos];
+ else
+ {
+ rem_node(&reb->n);
+
+ #ifdef LOCAL_DEBUG
+ memset(reb, 0xbe, page_size);
+ #endif
+
- if (EMPTY_LIST(tab->pending_exports))
++ free_page(reb);
+
- WALK_LIST2(eh, n, tab->exports, n)
++ if (EMPTY_LIST(tab->exporter.pending))
+ {
+ if (config->table_debug)
+ log(L_TRACE "%s: Resetting export seq", tab->name);
+
+ node *n;
- tab->next_export_seq = 1;
++ WALK_LIST2(eh, n, tab->exporter.hooks, n)
+ {
+ if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY)
+ continue;
+
+ ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL);
+ bmap_reset(&eh->seq_map, 1024);
+ }
+
- reb = HEAD(tab->pending_exports);
++ tab->exporter.next_seq = 1;
+ }
+ else
+ {
- first_export = next;
++ reb = HEAD(tab->exporter.pending);
+ next = &reb->export[0];
+ }
+ }
+
- if (!first_export || (first_export->seq >= ih->flush_seq))
++ first = next;
+ }
+
+ done:;
+ struct rt_import_hook *ih; node *x;
+ _Bool imports_stopped = 0;
+ WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n)
+ if (ih->import_state == TIS_WAITING)
- if (EMPTY_LIST(tab->pending_exports) && tm_active(tab->export_timer))
- tm_stop(tab->export_timer);
++ if (!first || (first->seq >= ih->flush_seq))
+ {
+ ih->import_state = TIS_CLEARED;
+ ih->stopped(ih->req);
+ rem_node(&ih->n);
+ mb_free(ih);
+ rt_unlock_table(tab);
+ imports_stopped = 1;
+ }
+
+ if (tab->export_used)
+ ev_schedule(tab->rt_event);
+
+ if (imports_stopped)
+ {
+ if (config->table_debug)
+ log(L_TRACE "%s: Sources pruning routine requested", tab->name);
+
+ rt_prune_sources();
+ }
+
++ if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer))
++ tm_stop(tab->exporter.export_timer);
+}
+
+/**
+ * rt_lock_trie - lock a prefix trie of a routing table
+ * @tab: routing table with prefix trie to be locked
+ *
+ * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state
+ * structures. Therefore, asynchronous walks should lock the prefix trie using
+ * this function. That allows the prune loop to rebuild the trie, but postpones
+ * its freeing until all walks are done (unlocked by rt_unlock_trie()).
+ *
+ * Return a current trie that will be locked, the value should be passed back to
+ * rt_unlock_trie() for unlocking.
+ *
+ */
+struct f_trie *
+rt_lock_trie(rtable *tab)
+{
+ ASSERT(tab->trie);
+
+ tab->trie_lock_count++;
+ return tab->trie;
+}
+
+/**
+ * rt_unlock_trie - unlock a prefix trie of a routing table
+ * @tab: routing table with prefix trie to be locked
+ * @trie: value returned by matching rt_lock_trie()
+ *
+ * Done for trie locked by rt_lock_trie() after walk over the trie is done.
+ * It may free the trie and schedule next trie pruning.
+ */
+void
+rt_unlock_trie(rtable *tab, struct f_trie *trie)
+{
+ ASSERT(trie);
+
+ if (trie == tab->trie)
+ {
+ /* Unlock the current prefix trie */
+ ASSERT(tab->trie_lock_count);
+ tab->trie_lock_count--;
+ }
+ else if (trie == tab->trie_old)
+ {
+ /* Unlock the old prefix trie */
+ ASSERT(tab->trie_old_lock_count);
+ tab->trie_old_lock_count--;
+
+ /* Free old prefix trie that is no longer needed */
+ if (!tab->trie_old_lock_count)
+ {
+ rfree(tab->trie_old->lp);
+ tab->trie_old = NULL;
+
+ /* Kick prefix trie pruning that was postponed */
+ if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
+ {
+ tab->prune_trie = 1;
+ rt_schedule_prune(tab);
+ }
+ }
+ }
+ else
+ log(L_BUG "Invalid arg to rt_unlock_trie()");
}
+
void
rt_preconfig(struct config *c)
{
/* Replace the route in the list */
new->next = e->next;
*k = e = new;
+
+ /* Get a new ID for the route */
+ new->rte.lastmod = current_time();
+ new->rte.id = hmap_first_zero(&tab->id_map);
+ hmap_set(&tab->id_map, new->rte.id);
}
- ASSERT_DIE(pos == count);
+ ASSERT_DIE(pos <= count);
+ count = pos;
/* Find the new best route */
struct rte_storage **new_best = NULL;
DBG("\tdone\n");
}
++static void
++rt_feed_done(struct rt_export_hook *c)
++{
++ c->event->hook = rt_export_hook;
++
++ rt_set_export_state(c, TES_READY);
++
++ ev_schedule_work(c->event);
++}
++
/**
- * rt_feed_channel - advertise all routes to a channel
+ * rt_feed_by_fib - advertise all routes to a channel by walking a fib
* @c: channel to be fed
*
* This function performs one pass of advertisement of routes to a channel that
struct fib_iterator *fit = &c->feed_fit;
int max_feed = 256;
- ASSERT(c->export_state == TES_FEEDING);
+ ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING);
- FIB_ITERATE_START(&c->table->fib, fit, net, n)
+ rtable *tab = SKIP_BACK(rtable, exporter, c->table);
+
+ FIB_ITERATE_START(&tab->fib, fit, net, n)
{
if (max_feed <= 0)
{
return;
}
- ASSERT(c->export_state == TES_FEEDING);
+ if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING)
+ return;
- if (c->req->export_bulk)
- {
- uint count = rte_feed_count(n);
- if (count)
- {
- rte_update_lock();
- rte **feed = alloca(count * sizeof(rte *));
- rte_feed_obtain(n, feed, count);
- c->req->export_bulk(c->req, n->n.addr, NULL, feed, count);
- max_feed -= count;
- rte_update_unlock();
- }
- }
- else if (n->routes && rte_is_valid(&n->routes->rte))
- {
- rte_update_lock();
- struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes };
- c->req->export_one(c->req, n->n.addr, &rpe);
- max_feed--;
- rte_update_unlock();
- }
-
- for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL))
- rpe_mark_seen(c, rpe);
+ if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr))
+ max_feed -= rt_feed_net(c, n);
}
FIB_ITERATE_END;
- rt_set_export_state(c, TES_READY);
- c->event->hook = rt_export_hook;
- ev_schedule_work(c->event);
++ rt_feed_done(c);
+}
- rt_set_export_state(c, TES_READY);
+static void
+rt_feed_by_trie(void *data)
+{
+ struct rt_export_hook *c = data;
+ rtable *tab = SKIP_BACK(rtable, exporter, c->table);
+
+ ASSERT_DIE(c->walk_state);
+ struct f_trie_walk_state *ws = c->walk_state;
+
+ int max_feed = 256;
+
- ASSERT_DIE(c->export_state == TES_FEEDING);
++ ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING);
+
+ net_addr addr;
+ while (trie_walk_next(ws, &addr))
+ {
+ net *n = net_find(tab, &addr);
+ if (!n)
+ continue;
+
+ if ((max_feed -= rt_feed_net(c, n)) <= 0)
+ return;
+
- ASSERT_DIE(c->export_state == TES_FEEDING);
++ if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING)
++ return;
+ }
+
+ rt_unlock_trie(tab, c->walk_lock);
+ c->walk_lock = NULL;
+
+ mb_free(c->walk_state);
+ c->walk_state = NULL;
+
- rt_set_export_state(c, TES_READY);
++ rt_feed_done(c);
+}
+
+static void
+rt_feed_equal(void *data)
+{
+ struct rt_export_hook *c = data;
+ rtable *tab = SKIP_BACK(rtable, exporter, c->table);
+
- ASSERT_DIE(c->export_state == TES_FEEDING);
++ ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING);
+ ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL);
+
+ net *n = net_find(tab, c->req->addr);
+ if (n)
+ rt_feed_net(c, n);
+
- rt_set_export_state(c, TES_READY);
++ rt_feed_done(c);
+}
+
+static void
+rt_feed_for(void *data)
+{
+ struct rt_export_hook *c = data;
+ rtable *tab = SKIP_BACK(rtable, exporter, c->table);
+
- ASSERT_DIE(c->export_state == TES_FEEDING);
++ ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING);
+ ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR);
+
+ net *n = net_route(tab, c->req->addr);
+ if (n)
+ rt_feed_net(c, n);
+
- rt_set_export_state(c, TES_READY);
++ rt_feed_done(c);
+}
+
+static uint
+rt_feed_net(struct rt_export_hook *c, net *n)
+{
- if (c->req->export_bulk)
- {
- uint count = rte_feed_count(n);
- if (count)
- {
- rte_update_lock();
- rte **feed = alloca(count * sizeof(rte *));
- rte_feed_obtain(n, feed, count);
- struct rt_pending_export rpe = { .new_best = n->routes };
- c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count);
- rte_update_unlock();
- }
- return count;
- }
++ uint count = 0;
+
- if (n->routes && rte_is_valid(&n->routes->rte))
- {
- rte_update_lock();
- struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes };
- c->req->export_one(c->req, n->n.addr, &rpe);
- rte_update_unlock();
- return 1;
- }
++ if (c->req->export_bulk)
++ {
++ count = rte_feed_count(n);
++ if (count)
++ {
++ rte_update_lock();
++ rte **feed = alloca(count * sizeof(rte *));
++ rte_feed_obtain(n, feed, count);
++ c->req->export_bulk(c->req, n->n.addr, NULL, feed, count);
++ rte_update_unlock();
++ }
++ }
+
- return 0;
- }
++ else if (n->routes)
++ {
++ rte_update_lock();
++ struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes };
++ c->req->export_one(c->req, n->n.addr, &rpe);
++ rte_update_unlock();
++ count = 1;
++ }
+
++ for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL))
++ rpe_mark_seen(c, rpe);
++
++ return count;
++}
+
+/*
+ * Import table
+ */
+
-
+void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count)
+{
+ struct channel *c = SKIP_BACK(struct channel, reload_req, req);
+
+ for (uint i=0; i<count; i++)
+ if (feed[i]->sender == c->in_req.hook)
+ {
+ /* Strip the later attribute layers */
+ rte new = *feed[i];
+ while (new.attrs->next)
+ new.attrs = new.attrs->next;
+
+ /* And reload the route */
+ rte_update(c, net, &new, new.src);
+ }
}
--- /dev/null
- struct rte_storage *routes; /* Available routes for this network */
+/*
+ * BIRD Internet Routing Daemon -- Routing Table
+ *
+ * (c) 1998--2000 Martin Mares <mj@ucw.cz>
+ * (c) 2019--2021 Maria Matejka <mq@jmq.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_NEST_RT_H_
+#define _BIRD_NEST_RT_H_
+
+#include "lib/lists.h"
+#include "lib/bitmap.h"
+#include "lib/resource.h"
+#include "lib/net.h"
+#include "lib/type.h"
+#include "lib/fib.h"
+#include "lib/route.h"
+
++#include <stdatomic.h>
++
+struct ea_list;
+struct protocol;
+struct proto;
+struct channel;
+struct rte_src;
+struct symbol;
+struct timer;
+struct filter;
+struct f_trie;
+struct f_trie_walk_state;
+struct cli;
+
+/*
+ * Master Routing Tables. Generally speaking, each of them contains a FIB
+ * with each entry pointing to a list of route entries representing routes
+ * to given network (with the selected one at the head).
+ *
+ * Each of the RTE's contains variable data (the preference and protocol-dependent
+ * metrics) and a pointer to a route attribute block common for many routes).
+ *
+ * It's guaranteed that there is at most one RTE for every (prefix,proto) pair.
+ */
+
+struct rtable_config {
+ node n;
+ char *name;
+ struct rtable *table;
+ struct proto_config *krt_attached; /* Kernel syncer attached to this table */
+ uint addr_type; /* Type of address data stored in table (NET_*) */
+ uint gc_threshold; /* Maximum number of operations before GC is run */
+ uint gc_period; /* Approximate time between two consecutive GC runs */
+ byte sorted; /* Routes of network are sorted according to rte_better() */
+ byte trie_used; /* Rtable has attached trie */
+ btime min_settle_time; /* Minimum settle time for notifications */
+ btime max_settle_time; /* Maximum settle time for notifications */
++ btime export_settle_time; /* Delay before exports are announced */
+};
+
+struct rt_export_hook;
+struct rt_export_request;
+
+struct rt_exporter {
+ list hooks; /* Registered route export hooks */
+ uint addr_type; /* Type of address data exported (NET_*) */
++
+ struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *);
+ void (*stop)(struct rt_export_hook *);
+ void (*done)(struct rt_export_hook *);
++ void (*used)(struct rt_exporter *);
++
++ list pending; /* List of packed struct rt_pending_export */
++ struct timer *export_timer;
++
++ struct rt_pending_export *first; /* First export to announce */
++ u64 next_seq; /* The next export will have this ID */
+};
+
+typedef struct rtable {
+ resource r;
+ node n; /* Node in list of all tables */
+ pool *rp; /* Resource pool to allocate everything from, including itself */
+ struct slab *rte_slab; /* Slab to allocate route objects */
+ struct fib fib;
+ struct f_trie *trie; /* Trie of prefixes defined in fib */
+ char *name; /* Name of this table */
+ uint addr_type; /* Type of address data stored in table (NET_*) */
+ int use_count; /* Number of protocols using this table */
+ u32 rt_count; /* Number of routes in the table */
+
+ list imports; /* Registered route importers */
+ struct rt_exporter exporter; /* Exporter API structure */
+
+ struct hmap id_map;
+ struct hostcache *hostcache;
+ struct rtable_config *config; /* Configuration of this table */
+ struct config *deleted; /* Table doesn't exist in current configuration,
+ * delete as soon as use_count becomes 0 and remove
+ * obstacle from this routing table.
+ */
+ struct event *rt_event; /* Routing table event */
+ struct timer *prune_timer; /* Timer for periodic pruning / GC */
+ btime last_rt_change; /* Last time when route changed */
+ btime base_settle_time; /* Start time of rtable settling interval */
+ btime gc_time; /* Time of last GC */
+ uint gc_counter; /* Number of operations since last GC */
+ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */
+ byte prune_trie; /* Prune prefix trie during next table prune */
+ byte hcu_scheduled; /* Hostcache update is scheduled */
+ byte nhu_state; /* Next Hop Update state */
++ byte export_used; /* Pending Export pruning is scheduled */
+ struct fib_iterator prune_fit; /* Rtable prune FIB iterator */
+ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
+ struct f_trie *trie_new; /* New prefix trie defined during pruning */
+ struct f_trie *trie_old; /* Old prefix trie waiting to be freed */
+ u32 trie_lock_count; /* Prefix trie locked by walks */
+ u32 trie_old_lock_count; /* Old prefix trie locked by walks */
+ struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */
+
+ list subscribers; /* Subscribers for notifications */
+ struct timer *settle_timer; /* Settle time for notifications */
+ list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */
+ struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */
+} rtable;
+
+struct rt_subscription {
+ node n;
+ rtable *tab;
+ void (*hook)(struct rt_subscription *b);
+ void *data;
+};
+
+struct rt_flowspec_link {
+ node n;
+ rtable *src;
+ rtable *dst;
+ u32 uc;
+};
+
+#define NHU_CLEAN 0
+#define NHU_SCHEDULED 1
+#define NHU_RUNNING 2
+#define NHU_DIRTY 3
+
+typedef struct network {
- linpool *lp;
++ struct rte_storage *routes; /* Available routes for this network */
++ struct rt_pending_export *first, *last;
+ struct fib_node n; /* FIB flags reserved for kernel syncer */
+} net;
+
+struct hostcache {
+ slab *slab; /* Slab holding all hostentries */
+ struct hostentry **hash_table; /* Hash table for hostentries */
+ unsigned hash_order, hash_shift;
+ unsigned hash_max, hash_min;
+ unsigned hash_items;
+ linpool *lp; /* Linpool for trie */
+ struct f_trie *trie; /* Trie of prefixes that might affect hostentries */
+ list hostentries; /* List of all hostentries */
+ byte update_hostcache;
+};
+
+struct hostentry {
+ node ln;
+ ip_addr addr; /* IP address of host, part of key */
+ ip_addr link; /* (link-local) IP address of host, used as gw
+ if host is directly attached */
+ struct rtable *tab; /* Dependent table, part of key */
+ struct hostentry *next; /* Next in hash chain */
+ unsigned hash_key; /* Hash key */
+ unsigned uc; /* Use count */
+ ea_list *src; /* Source attributes */
+ byte nexthop_linkable; /* Nexthop list is completely non-device */
+ u32 igp_metric; /* Chosen route IGP metric */
+};
+
+struct rte_storage {
+ struct rte_storage *next; /* Next in chain */
+ struct rte rte; /* Route data */
+};
+
+#define RTE_COPY(r) ((r) ? (r)->rte : (rte) {})
+#define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {})
+#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL)
+#define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL)
+
+/* Table-channel connections */
+
+struct rt_import_request {
+ struct rt_import_hook *hook; /* The table part of importer */
+ char *name;
+ u8 trace_routes;
+
+ void (*dump_req)(struct rt_import_request *req);
+ void (*log_state_change)(struct rt_import_request *req, u8 state);
+ /* Preimport is called when the @new route is just-to-be inserted, replacing @old.
+ * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */
+ int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old);
+};
+
+struct rt_import_hook {
+ node n;
+ rtable *table; /* The connected table */
+ struct rt_import_request *req; /* The requestor */
+
+ struct rt_import_stats {
+ /* Import - from protocol to core */
+ u32 pref; /* Number of routes selected as best in the (adjacent) routing table */
+ u32 updates_ignored; /* Number of route updates rejected as already in route table */
+ u32 updates_accepted; /* Number of route updates accepted and imported */
+ u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */
+ u32 withdraws_accepted; /* Number of route withdraws accepted and processed */
+ } stats;
+
++ u64 flush_seq; /* Table export seq when the channel announced flushing */
+ btime last_state_change; /* Time of last state transition */
+
+ u8 import_state; /* IS_* */
+ u8 stale_set; /* Set this stale_cycle to imported routes */
+ u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */
+ u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */
+ u8 stale_pruning; /* Last prune started when this value was set at stale_valid */
+
+ void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */
+};
+
+struct rt_pending_export {
++ struct rt_pending_export * _Atomic next; /* Next export for the same destination */
+ struct rte_storage *new, *new_best, *old, *old_best;
++ u64 seq; /* Sequential ID (table-local) of the pending export */
+};
+
+struct rt_export_request {
+ struct rt_export_hook *hook; /* Table part of the export */
+ char *name;
+ const net_addr *addr; /* Network prefilter address */
+ u8 trace_routes;
+ u8 addr_mode; /* Network prefilter mode (TE_ADDR_*) */
+
+ /* There are two methods of export. You can either request feeding every single change
+ * or feeding the whole route feed. In case of regular export, &export_one is preferred.
+ * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one.
+ * Thus, for RA_OPTIMAL, &export_one is only set,
+ * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set
+ * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes
+ */
+ void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe);
+ void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count);
+
+ void (*dump_req)(struct rt_export_request *req);
+ void (*log_state_change)(struct rt_export_request *req, u8);
+};
+
+struct rt_export_hook {
+ node n;
+ struct rt_exporter *table; /* The connected table */
+
+ pool *pool;
- u8 export_state; /* Route export state (TES_*, see below) */
+
+ struct rt_export_request *req; /* The requestor */
+
+ struct rt_export_stats {
+ /* Export - from core to protocol */
+ u32 updates_received; /* Number of route updates received */
+ u32 withdraws_received; /* Number of route withdraws received */
+ } stats;
+
+ union {
+ struct fib_iterator feed_fit; /* Routing table iterator used during feeding */
+ struct {
+ struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
+ struct f_trie *walk_lock; /* Locked trie for walking */
+ };
+ u32 hash_iter; /* Iterator over hash */
+ };
+
++ struct bmap seq_map; /* Keep track which exports were already procesed */
++
++ struct rt_pending_export * _Atomic last_export;/* Last export processed */
++ struct rt_pending_export *rpe_next; /* Next pending export to process */
++
+ btime last_state_change; /* Time of last state transition */
+
+ u8 refeed_pending; /* Refeeding and another refeed is scheduled */
++ _Atomic u8 export_state; /* Route export state (TES_*, see below) */
+ u8 feed_type; /* Which feeding method is used (TFT_*, see below) */
+
+ struct event *event; /* Event running all the export operations */
+
+ void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */
+};
+
+#define TIS_DOWN 0
+#define TIS_UP 1
+#define TIS_STOP 2
+#define TIS_FLUSHING 3
+#define TIS_WAITING 4
+#define TIS_CLEARED 5
+#define TIS_MAX 6
+
+#define TES_DOWN 0
+#define TES_FEEDING 2
+#define TES_READY 3
+#define TES_STOP 4
+#define TES_MAX 5
+
+/* Value of addr_mode */
+#define TE_ADDR_NONE 0 /* No address matching */
+#define TE_ADDR_EQUAL 1 /* Exact query - show route <addr> */
+#define TE_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */
+#define TE_ADDR_IN 3 /* Interval query - show route in <addr> */
+
+
+#define TFT_FIB 1
+#define TFT_TRIE 2
+#define TFT_HASH 3
+
+void rt_request_import(rtable *tab, struct rt_import_request *req);
+void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req);
+
+void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req);
+
+void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *));
+void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *));
+
+const char *rt_import_state_name(u8 state);
+const char *rt_export_state_name(u8 state);
+
+static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; }
+static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; }
+
+void rt_set_export_state(struct rt_export_hook *hook, u8 state);
+
+void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src);
+
++/* Get next rpe. If src is given, it must match. */
++struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src);
++
++/* Mark the pending export processed */
++void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe);
++
++/* Get pending export seen status */
++int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe);
++
+/* Types of route announcement, also used as flags */
+#define RA_UNDEF 0 /* Undefined RA type */
+#define RA_OPTIMAL 1 /* Announcement of optimal route change */
+#define RA_ACCEPTED 2 /* Announcement of first accepted route */
+#define RA_ANY 3 /* Announcement of any route change */
+#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */
+
+/* Return value of preexport() callback */
+#define RIC_ACCEPT 1 /* Accepted by protocol */
+#define RIC_PROCESS 0 /* Process it through import filter */
+#define RIC_REJECT -1 /* Rejected by protocol */
+#define RIC_DROP -2 /* Silently dropped by protocol */
+
+#define rte_update channel_rte_import
+/**
+ * rte_update - enter a new update to a routing table
+ * @c: channel doing the update
+ * @net: network address
+ * @rte: a &rte representing the new route
+ * @src: old route source identifier
+ *
+ * This function imports a new route to the appropriate table (via the channel).
+ * Table keys are @net (obligatory) and @rte->attrs->src.
+ * Both the @net and @rte pointers can be local.
+ *
+ * The route attributes (@rte->attrs) are obligatory. They can be also allocated
+ * locally. Anyway, if you use an already-cached attribute object, you shall
+ * call rta_clone() on that object yourself. (This semantics may change in future.)
+ *
+ * If the route attributes are local, you may set @rte->attrs->src to NULL, then
+ * the protocol's default route source will be supplied.
+ *
+ * When rte_update() gets a route, it automatically validates it. This includes
+ * checking for validity of the given network and next hop addresses and also
+ * checking for host-scope or link-scope routes. Then the import filters are
+ * processed and if accepted, the route is passed to route table recalculation.
+ *
+ * The accepted routes are then inserted into the table, replacing the old route
+ * for the same @net identified by @src. Then the route is announced
+ * to all the channels connected to the table using the standard export mechanism.
+ * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same
+ * as @src.
+ *
+ * All memory used for temporary allocations is taken from a special linpool
+ * @rte_update_pool and freed when rte_update() finishes.
+ */
+void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src);
+
+extern list routing_tables;
+struct config;
+
+void rt_init(void);
+void rt_preconfig(struct config *);
+void rt_postconfig(struct config *);
+void rt_commit(struct config *new, struct config *old);
+void rt_lock_table(rtable *);
+void rt_unlock_table(rtable *);
+struct f_trie * rt_lock_trie(rtable *tab);
+void rt_unlock_trie(rtable *tab, struct f_trie *trie);
+void rt_subscribe(rtable *tab, struct rt_subscription *s);
+void rt_unsubscribe(struct rt_subscription *s);
+void rt_flowspec_link(rtable *src, rtable *dst);
+void rt_flowspec_unlink(rtable *src, rtable *dst);
+rtable *rt_setup(pool *, struct rtable_config *);
+static inline void rt_shutdown(rtable *r) { rfree(r->rp); }
+
+static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); }
+static inline net *net_find_valid(rtable *tab, const net_addr *addr)
+{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; }
+static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); }
+net *net_get(rtable *tab, const net_addr *addr);
+net *net_route(rtable *tab, const net_addr *n);
+int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter);
+rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent);
+void rt_refresh_begin(struct rt_import_request *);
+void rt_refresh_end(struct rt_import_request *);
+void rt_modify_stale(rtable *t, struct rt_import_request *);
+void rt_schedule_prune(rtable *t);
+void rte_dump(struct rte_storage *);
+void rte_free(struct rte_storage *);
+struct rte_storage *rte_store(const rte *, net *net, rtable *);
+void rt_dump(rtable *);
+void rt_dump_all(void);
+void rt_dump_hooks(rtable *);
+void rt_dump_hooks_all(void);
+int rt_reload_channel(struct channel *c);
+void rt_reload_channel_abort(struct channel *c);
+void rt_refeed_channel(struct channel *c);
+void rt_prune_sync(rtable *t, int all);
+struct rtable_config *rt_new_table(struct symbol *s, uint addr_type);
+
+static inline int rt_is_ip(rtable *tab)
+{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); }
+
+static inline int rt_is_vpn(rtable *tab)
+{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); }
+
+static inline int rt_is_roa(rtable *tab)
+{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); }
+
+static inline int rt_is_flow(rtable *tab)
+{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); }
+
+
+/* Default limit for ECMP next hops, defined in sysdep code */
+extern const int rt_default_ecmp;
+
+struct rt_show_data_rtable {
+ node n;
+ const char *name;
+ struct rt_exporter *table;
+ struct channel *export_channel;
+ struct channel *prefilter;
+ struct krt_proto *kernel;
+};
+
+struct rt_show_data {
+ struct cli *cli; /* Pointer back to the CLI */
+ net_addr *addr;
+ list tables;
+ struct rt_show_data_rtable *tab; /* Iterator over table list */
+ struct rt_show_data_rtable *last_table; /* Last table in output */
+ struct rt_export_request req; /* Export request in use */
+ int verbose, tables_defined_by;
+ const struct filter *filter;
+ struct proto *show_protocol;
+ struct proto *export_protocol;
+ struct channel *export_channel;
+ struct config *running_on_config;
+ struct rt_export_hook *kernel_export_hook;
+ int export_mode, addr_mode, primary_only, filtered, stats;
+
+ int net_counter, rt_counter, show_counter, table_counter;
+ int net_counter_last, rt_counter_last, show_counter_last;
+ int show_counter_last_flush;
+};
+
+void rt_show(struct rt_show_data *);
+struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name);
+struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t);
+
+/* Value of table definition mode in struct rt_show_data */
+#define RSD_TDB_DEFAULT 0 /* no table specified */
+#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */
+#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */
+#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */
+
+#define RSD_TDB_SET 0x1 /* internal: show empty tables */
+#define RSD_TDB_NMN 0x2 /* internal: need matching net */
+
+/* Value of export_mode in struct rt_show_data */
+#define RSEM_NONE 0 /* Export mode not used */
+#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */
+#define RSEM_EXPORT 2 /* Routes accepted by export filter */
+#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */
+#define RSEM_EXPORTED 4 /* Routes marked in export map */
+
+/* Host entry: Resolve hook for recursive nexthops */
+extern struct ea_class ea_gen_hostentry;
+struct hostentry_adata {
+ adata ad;
+ struct hostentry *he;
+ u32 labels[0];
+};
+
+void
+ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]);
+
+void ea_show_hostentry(const struct adata *ad, byte *buf, uint size);
+void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad);
+
+/*
+ * Default protocol preferences
+ */
+
+#define DEF_PREF_DIRECT 240 /* Directly connected */
+#define DEF_PREF_STATIC 200 /* Static route */
+#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */
+#define DEF_PREF_BABEL 130 /* Babel */
+#define DEF_PREF_RIP 120 /* RIP */
+#define DEF_PREF_BGP 100 /* BGP */
+#define DEF_PREF_RPKI 100 /* RPKI */
+#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */
+#define DEF_PREF_UNKNOWN 0 /* Routes with no preference set */
+
+/*
+ * Route Origin Authorization
+ */
+
+#define ROA_UNKNOWN 0
+#define ROA_VALID 1
+#define ROA_INVALID 2
+
+int net_roa_check(rtable *tab, const net_addr *n, u32 asn);
+
+#endif