From: Maria Matejka Date: Fri, 8 Dec 2023 10:33:43 +0000 (+0100) Subject: Merge commit 'db1eb46664d4c76d56dc55a63ce7abe853fc6862' into HEAD X-Git-Tag: v3.0.0~324 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8c19f8a209201fa3c23284291999e64ff7bb0b39;p=thirdparty%2Fbird.git Merge commit 'db1eb46664d4c76d56dc55a63ce7abe853fc6862' into HEAD --- 8c19f8a209201fa3c23284291999e64ff7bb0b39 diff --cc conf/conf.h index 691c86686,ff05f50ae..f6e9069dd --- a/conf/conf.h +++ b/conf/conf.h @@@ -35,7 -35,7 +35,8 @@@ struct config u32 proto_default_debug; /* Default protocol debug mask */ u32 proto_default_mrtdump; /* Default protocol mrtdump mask */ u32 channel_default_debug; /* Default channel debug mask */ + u32 table_default_debug; /* Default table debug mask */ + u16 filter_vstk, filter_estk; /* Filter stack depth */ struct timeformat tf_route; /* Time format for 'show route' */ struct timeformat tf_proto; /* Time format for 'show protocol' */ struct timeformat tf_log; /* Time format for the logfile */ diff --cc lib/route.h index 9570583b1,000000000..f97029319 mode 100644,000000..100644 --- a/lib/route.h +++ b/lib/route.h @@@ -1,545 -1,0 +1,546 @@@ +/* + * BIRD Internet Routing Daemon -- Routing data structures + * + * (c) 1998--2000 Martin Mares + * (c) 2022 Maria Matejka + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIB_ROUTE_H_ +#define _BIRD_LIB_ROUTE_H_ + +#undef RT_SOURCE_DEBUG + +#include "lib/type.h" +#include "lib/rcu.h" +#include "lib/hash.h" +#include "lib/event.h" +#include "lib/lockfree.h" + +struct network; +struct proto; +struct cli; +struct rtable_private; +struct rte_storage; + +#define RTE_IN_TABLE_WRITABLE \ + byte pflags; /* Protocol-specific flags; may change in-table (!) */ \ + u8 stale_cycle; /* Auxiliary value for route refresh; may change in-table (!) */ \ + +typedef struct rte { + RTE_IN_TABLE_WRITABLE; + byte flags; /* Table-specific flags */ + u8 generation; /* If this route import is based on other previously exported route, + this value should be 1 + MAX(generation of the parent routes). + Otherwise the route is independent and this value is zero. */ + u32 id; /* Table specific route id */ + struct ea_list *attrs; /* Attributes of this route */ + const net_addr *net; /* Network this RTE belongs to */ + struct rte_src *src; /* Route source that created the route */ + struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */ + btime lastmod; /* Last modified (set by table) */ +} rte; + +#define REF_FILTERED 2 /* Route is rejected by import filter */ +#define REF_PENDING 32 /* Route has not propagated completely yet */ + +/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ +static inline int rte_is_valid(const rte *r) { return r && !(r->flags & REF_FILTERED); } + +/* Route just has REF_FILTERED flag */ +static inline int rte_is_filtered(const rte *r) { return !!(r->flags & REF_FILTERED); } + +/* Strip the route of the table-specific values */ +static inline rte rte_init_from(const rte *r) +{ + return (rte) { + .attrs = r->attrs, + .net = r->net, + .src = r->src, + }; +} + +int rte_same(const rte *, const rte *); + +struct rte_src { + struct rte_src *next; /* Hash chain */ + struct rte_owner *owner; /* Route source owner */ + u64 private_id; /* Private ID, assigned by the protocol */ + u32 global_id; /* Globally unique ID of the source */ + struct lfuc uc; /* Use count */ +}; + +struct rte_owner_class { + void (*get_route_info)(const rte *, byte *buf); /* Get route information (for `show route' command) */ + int (*rte_better)(const rte *, const rte *); + int (*rte_mergable)(const rte *, const rte *); + u32 (*rte_igp_metric)(const rte *); +}; + +struct rte_owner { + struct rte_owner_class *class; + int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte_storage *new, struct rte_storage *, struct rte_storage *); + HASH(struct rte_src) hash; + const char *name; + u32 hash_key; + u32 uc; ++ u32 debug; + event_list *list; + event *prune; + event *stop; +}; + +extern DOMAIN(attrs) attrs_domain; + +#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) +#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) + +#define RTE_SRC_PU_SHIFT 44 +#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) + +/* Get a route source. This also locks the source, therefore the caller has to + * unlock the source after the route has been propagated. */ +struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id); +#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id)) + +struct rte_src *rt_find_source_global(u32 id); + +#ifdef RT_SOURCE_DEBUG +#define rt_lock_source _rt_lock_source_internal +#define rt_unlock_source _rt_unlock_source_internal +#endif + +static inline void rt_lock_source(struct rte_src *src) +{ + lfuc_lock(&src->uc); +} + +static inline void rt_unlock_source(struct rte_src *src) +{ + lfuc_unlock(&src->uc, src->owner->list, src->owner->prune); +} + +#ifdef RT_SOURCE_DEBUG +#undef rt_lock_source +#undef rt_unlock_source + +#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) ) +#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) ) +#endif + +void rt_init_sources(struct rte_owner *, const char *name, event_list *list); +void rt_destroy_sources(struct rte_owner *, event *); + +void rt_dump_sources(struct rte_owner *); + +/* + * Route Attributes + * + * Beware: All standard BGP attributes must be represented here instead + * of making them local to the route. This is needed to ensure proper + * construction of BGP route attribute lists. + */ + +/* Nexthop structure */ +struct nexthop { + ip_addr gw; /* Next hop */ + struct iface *iface; /* Outgoing interface */ + byte flags; + byte weight; + byte labels; /* Number of all labels */ + u32 label[0]; +}; + +/* For packing one into eattrs */ +struct nexthop_adata { + struct adata ad; + /* There is either a set of nexthops or a special destination (RTD_*) */ + union { + struct nexthop nh; + uint dest; + }; +}; + +#define NEXTHOP_DEST_SIZE (OFFSETOF(struct nexthop_adata, dest) + sizeof(uint) - OFFSETOF(struct adata, data)) +#define NEXTHOP_DEST_LITERAL(x) ((struct nexthop_adata) { \ + .ad.length = NEXTHOP_DEST_SIZE, .dest = (x), }) + +#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ + + +#define RTS_STATIC 1 /* Normal static route */ +#define RTS_INHERIT 2 /* Route inherited from kernel */ +#define RTS_DEVICE 3 /* Device route */ +#define RTS_STATIC_DEVICE 4 /* Static device route */ +#define RTS_REDIRECT 5 /* Learned via redirect */ +#define RTS_RIP 6 /* RIP route */ +#define RTS_OSPF 7 /* OSPF route */ +#define RTS_OSPF_IA 8 /* OSPF inter-area route */ +#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ +#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ +#define RTS_BGP 11 /* BGP route */ +#define RTS_PIPE 12 /* Inter-table wormhole */ +#define RTS_BABEL 13 /* Babel route */ +#define RTS_RPKI 14 /* Route Origin Authorization */ +#define RTS_PERF 15 /* Perf checker */ +#define RTS_L3VPN 16 /* MPLS L3VPN */ +#define RTS_AGGREGATED 17 /* Aggregated route */ +#define RTS_MAX 18 + +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* A standard next hop */ +#define RTD_BLACKHOLE 2 /* Silently drop packets */ +#define RTD_UNREACHABLE 3 /* Reject as unreachable */ +#define RTD_PROHIBIT 4 /* Administratively prohibited */ +#define RTD_MAX 5 + +extern const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + + +/* + * Extended Route Attributes + */ + +typedef struct eattr { + word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ + byte flags; /* Protocol-dependent flags */ + byte type; /* Attribute type */ + byte rfu:5; + byte originated:1; /* The attribute has originated locally */ + byte fresh:1; /* An uncached attribute (e.g. modified in export filter) */ + byte undef:1; /* Explicitly undefined */ + + PADDING(unused, 3, 3); + + union bval u; +} eattr; + + +#define EA_CODE_MASK 0xffff +#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ +#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ +#define EA_BIT_GET(ea) ((ea) >> 24) + +typedef struct ea_list { + struct ea_list *next; /* In case we have an override list */ + byte flags; /* Flags: EALF_... */ + byte rfu; + word count; /* Number of attributes */ + eattr attrs[0]; /* Attribute definitions themselves */ +} ea_list; + +struct ea_storage { + struct ea_storage *next_hash; /* Next in hash chain */ + struct ea_storage **pprev_hash; /* Previous in hash chain */ + _Atomic u32 uc; /* Use count */ + u32 hash_key; /* List hash */ + ea_list l[0]; /* The list itself */ +}; + +#define EALF_SORTED 1 /* Attributes are sorted by code */ +#define EALF_BISECT 2 /* Use interval bisection for searching */ +#define EALF_CACHED 4 /* List is cached */ +#define EALF_HUGE 8 /* List is too big to fit into slab */ + +struct ea_class { +#define EA_CLASS_INSIDE \ + const char *name; /* Name (both print and filter) */ \ + struct symbol *sym; /* Symbol to export to configs */ \ + uint id; /* Autoassigned attribute ID */ \ + uint uc; /* Reference count */ \ + btype type; /* Data type ID */ \ + uint readonly:1; /* This attribute can't be changed by filters */ \ + uint conf:1; /* Requested by config */ \ + uint hidden:1; /* Technical attribute, do not show, do not expose to filters */ \ + void (*format)(const eattr *ea, byte *buf, uint size); \ + void (*stored)(const eattr *ea); /* When stored into global hash */ \ + void (*freed)(const eattr *ea); /* When released from global hash */ \ + + EA_CLASS_INSIDE; +}; + +struct ea_class_ref { + resource r; + struct ea_class *class; +}; + +void ea_register_init(struct ea_class *); +struct ea_class_ref *ea_register_alloc(pool *, struct ea_class); +struct ea_class_ref *ea_ref_class(pool *, struct ea_class *); /* Reference for an attribute alias */ + +#define EA_REGISTER_ALL_HELPER(x) ea_register_init(x); +#define EA_REGISTER_ALL(...) MACRO_FOREACH(EA_REGISTER_ALL_HELPER, __VA_ARGS__) + +struct ea_class *ea_class_find_by_id(uint id); +struct ea_class *ea_class_find_by_name(const char *name); +static inline struct ea_class *ea_class_self(struct ea_class *self) { return self; } +#define ea_class_find(_arg) _Generic((_arg), \ + uint: ea_class_find_by_id, \ + word: ea_class_find_by_id, \ + char *: ea_class_find_by_name, \ + const char *: ea_class_find_by_name, \ + struct ea_class *: ea_class_self)(_arg) + +struct ea_walk_state { + ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ + eattr *ea; /* Current eattr, initially NULL */ + u32 visited[4]; /* Bitfield, limiting max to 128 */ +}; + +#define ea_find(_l, _arg) _Generic((_arg), uint: ea_find_by_id, struct ea_class *: ea_find_by_class, char *: ea_find_by_name)(_l, _arg) +eattr *ea_find_by_id(ea_list *, unsigned ea); +static inline eattr *ea_find_by_class(ea_list *l, const struct ea_class *def) +{ return ea_find_by_id(l, def->id); } +static inline eattr *ea_find_by_name(ea_list *l, const char *name) +{ + const struct ea_class *def = ea_class_find_by_name(name); + return def ? ea_find_by_class(l, def) : NULL; +} + +#define ea_get_int(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type & EAF_EMBEDDED); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? ea->u.data : (_def)); \ + }) + +#define ea_get_ip(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type == T_IP); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? *((const ip_addr *) ea->u.ptr->data) : (_def)); \ + }) + +#define ea_get_adata(_l, _ident) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(!(cls->type & EAF_EMBEDDED)); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? ea->u.ptr : &null_adata); \ + }) + +eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); +void ea_dump(ea_list *); +int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ +uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ +ea_list *ea_append(ea_list *to, ea_list *what); +void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); + +/* Normalize ea_list; allocates the result from tmp_linpool */ +ea_list *ea_normalize(ea_list *e, int overlay); + +uint ea_list_size(ea_list *); +void ea_list_copy(ea_list *dest, ea_list *src, uint size); + +#define EA_LOCAL_LIST(N) struct { ea_list l; eattr a[N]; } + +#define EA_LITERAL_EMBEDDED(_class, _flags, _val) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(_type & EAF_EMBEDDED); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.i = _val); \ + }) + +#define EA_LITERAL_STORE_ADATA(_class, _flags, _buf, _len) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = tmp_store_adata((_buf), (_len))); \ + }) + +#define EA_LITERAL_DIRECT_ADATA(_class, _flags, _adata) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = _adata); \ + }) + +#define EA_LITERAL_GENERIC(_id, _type, _flags, ...) \ + ((eattr) { .id = _id, .type = _type, .flags = _flags, __VA_ARGS__ }) + +static inline eattr * +ea_set_attr(ea_list **to, eattr a) +{ + EA_LOCAL_LIST(1) *ea = tmp_alloc(sizeof(*ea)); + *ea = (typeof(*ea)) { + .l.flags = EALF_SORTED, + .l.count = 1, + .l.next = *to, + .a[0] = a, + }; + + *to = &ea->l; + return &ea->a[0]; +} + +static inline void +ea_unset_attr(ea_list **to, _Bool local, const struct ea_class *def) +{ + ea_set_attr(to, EA_LITERAL_GENERIC(def->id, 0, 0, + .fresh = local, .originated = local, .undef = 1)); +} + +static inline void +ea_set_attr_u32(ea_list **to, const struct ea_class *def, uint flags, u64 data) +{ ea_set_attr(to, EA_LITERAL_EMBEDDED(def, flags, data)); } + +static inline void +ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, const void *data, uint len) +{ ea_set_attr(to, EA_LITERAL_STORE_ADATA(def, flags, data, len)); } + +static inline void +ea_copy_attr(ea_list **to, ea_list *from, const struct ea_class *def) +{ + eattr *e = ea_find_by_class(from, def); + if (e) + if (e->type & EAF_EMBEDDED) + ea_set_attr_u32(to, def, e->flags, e->u.data); + else + ea_set_attr_data(to, def, e->flags, e->u.ptr->data, e->u.ptr->length); + else + ea_unset_attr(to, 0, def); +} + +/* + * Common route attributes + */ + +/* Preference: first-order comparison */ +extern struct ea_class ea_gen_preference; +static inline u32 rt_get_preference(const rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_preference, 0); } + +/* IGP metric: second-order comparison */ +extern struct ea_class ea_gen_igp_metric; +u32 rt_get_igp_metric(const rte *rt); +#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other + protocol-specific metric is availabe */ + +/* From: Advertising router */ +extern struct ea_class ea_gen_from; + + +/* MPLS Label, Policy and Class */ +extern struct ea_class ea_gen_mpls_label, + ea_gen_mpls_policy, ea_gen_mpls_class; + + +/* Source: An old method to devise the route source protocol and kind. + * To be superseded in a near future by something more informative. */ +extern struct ea_class ea_gen_source; +static inline u32 rt_get_source_attr(const rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_source, 0); } + +/* Flowspec validation result */ +enum flowspec_valid { + FLOWSPEC_UNKNOWN = 0, + FLOWSPEC_VALID = 1, + FLOWSPEC_INVALID = 2, + FLOWSPEC__MAX, +}; + +extern const char * flowspec_valid_names[FLOWSPEC__MAX]; +static inline const char *flowspec_valid_name(enum flowspec_valid v) +{ return (v < FLOWSPEC__MAX) ? flowspec_valid_names[v] : "???"; } + +extern struct ea_class ea_gen_flowspec_valid; +static inline enum flowspec_valid rt_get_flowspec_valid(const rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN); } + +/* Next hop: For now, stored as adata */ +extern struct ea_class ea_gen_nexthop; + +static inline void ea_set_dest(struct ea_list **to, uint flags, uint dest) +{ + struct nexthop_adata nhad = NEXTHOP_DEST_LITERAL(dest); + ea_set_attr_data(to, &ea_gen_nexthop, flags, &nhad.ad.data, nhad.ad.length); +} + +/* Next hop structures */ + +#define NEXTHOP_ALIGNMENT (_Alignof(struct nexthop)) +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) +#define NEXTHOP_SIZE(_nh) NEXTHOP_SIZE_CNT(((_nh)->labels)) +#define NEXTHOP_SIZE_CNT(cnt) BIRD_ALIGN((sizeof(struct nexthop) + sizeof(u32) * (cnt)), NEXTHOP_ALIGNMENT) +#define nexthop_size(nh) NEXTHOP_SIZE((nh)) + +#define NEXTHOP_NEXT(_nh) ((void *) (_nh) + NEXTHOP_SIZE(_nh)) +#define NEXTHOP_END(_nhad) ((_nhad)->ad.data + (_nhad)->ad.length) +#define NEXTHOP_VALID(_nh, _nhad) ((void *) (_nh) < (void *) NEXTHOP_END(_nhad)) +#define NEXTHOP_ONE(_nhad) (NEXTHOP_NEXT(&(_nhad)->nh) == NEXTHOP_END(_nhad)) + +#define NEXTHOP_WALK(_iter, _nhad) for ( \ + struct nexthop *_iter = &(_nhad)->nh; \ + (void *) _iter < (void *) NEXTHOP_END(_nhad); \ + _iter = NEXTHOP_NEXT(_iter)) + + +static inline int nexthop_same(struct nexthop_adata *x, struct nexthop_adata *y) +{ return adata_same(&x->ad, &y->ad); } +struct nexthop_adata *nexthop_merge(struct nexthop_adata *x, struct nexthop_adata *y, int max, linpool *lp); +struct nexthop_adata *nexthop_sort(struct nexthop_adata *x, linpool *lp); +int nexthop_is_sorted(struct nexthop_adata *x); + +#define NEXTHOP_IS_REACHABLE(nhad) ((nhad)->ad.length > NEXTHOP_DEST_SIZE) + +static inline struct nexthop_adata * +rte_get_nexthops(rte *r) +{ + eattr *nhea = ea_find(r->attrs, &ea_gen_nexthop); + return nhea ? SKIP_BACK(struct nexthop_adata, ad, nhea->u.ptr) : NULL; +} + +/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ +static inline int rte_is_reachable(rte *r) +{ + struct nexthop_adata *nhad = rte_get_nexthops(r); + return nhad && NEXTHOP_IS_REACHABLE(nhad); +} + +static inline int nhea_dest(eattr *nhea) +{ + if (!nhea) + return RTD_NONE; + + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + if (NEXTHOP_IS_REACHABLE(nhad)) + return RTD_UNICAST; + else + return nhad->dest; +} + +static inline int rte_dest(const rte *r) +{ + return nhea_dest(ea_find(r->attrs, &ea_gen_nexthop)); +} + +void rta_init(void); +ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */ +static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } +static inline struct ea_storage *ea_get_storage(ea_list *r) +{ + ASSERT_DIE(ea_is_cached(r)); + return SKIP_BACK(struct ea_storage, l[0], r); +} + +static inline ea_list *ea_clone(ea_list *r) { + ASSERT_DIE(0 < atomic_fetch_add_explicit(&ea_get_storage(r)->uc, 1, memory_order_acq_rel)); + return r; +} +void ea__free(struct ea_storage *r); +static inline void ea_free(ea_list *l) { + if (!l) return; + struct ea_storage *r = ea_get_storage(l); + if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) ea__free(r); +} + +void ea_dump(ea_list *); +void ea_dump_all(void); +void ea_show_list(struct cli *, ea_list *); + +#define rta_lookup ea_lookup +#define rta_is_cached ea_is_cached +#define rta_clone ea_clone +#define rta_free ea_free + +#endif diff --cc nest/config.Y index 5e5213964,d97784733..49f701cd2 --- a/nest/config.Y +++ b/nest/config.Y @@@ -151,8 -113,8 +151,8 @@@ proto_call_cmd_reload(struct proto_spe CF_DECLS -CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) +CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT, PIPE) - CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, DEFAULT, TABLE, STATES, ROUTES, FILTERS) + CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, DEFAULT, TABLE, TABLES, STATES, ROUTES, FILTERS) CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) @@@ -274,15 -236,10 +275,14 @@@ table_opt cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]); this_table->trie_used = $2; } - | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } - | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } | GC THRESHOLD expr { this_table->gc_threshold = $3; } | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } + | CORK THRESHOLD expr expr { + if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); + this_table->cork_threshold.low = $3; + this_table->cork_threshold.high = $4; } + | EXPORT SETTLE TIME settle { this_table->export_settle = $4; } + | ROUTE REFRESH EXPORT SETTLE TIME settle { this_table->export_rr_settle = $6; } - | DEBUG bool { this_table->debug = $2; } ; table_opts: diff --cc nest/proto.c index 4b8fef03d,88f4813ef..70cfad521 --- a/nest/proto.c +++ b/nest/proto.c @@@ -1852,11 -1270,12 +1852,11 @@@ proto_reconfigure(struct proto *p, stru if ((nc->protocol != oc->protocol) || (nc->net_type != oc->net_type) || (nc->disabled != p->disabled) || - (nc->vrf != oc->vrf) || - (nc->vrf_set != oc->vrf_set)) + (nc->vrf != oc->vrf)) return 0; - p->name = nc->name; - p->debug = nc->debug; + p->sources.name = p->name = nc->name; - p->debug = nc->debug; ++ p->sources.debug = p->debug = nc->debug; p->mrtdump = nc->mrtdump; reconfigure_type = type; @@@ -2594,13 -1914,8 +2594,15 @@@ static inline voi proto_do_start(struct proto *p) { p->active = 1; - p->do_start = 1; - ev_schedule(p->event); + + rt_init_sources(&p->sources, p->name, proto_event_list(p)); + if (!p->sources.class) + p->sources.class = &default_rte_owner_class; + ++ p->sources.debug = p->debug; ++ + if (!p->cf->late_if_feed) + iface_subscribe(&p->iface_sub); } static void diff --cc nest/route.h index d6661afc5,f2745bdc0..4b3a74263 --- a/nest/route.h +++ b/nest/route.h @@@ -62,69 -150,29 +62,70 @@@ struct rtable_config uint addr_type; /* Type of address data stored in table (NET_*) */ uint gc_threshold; /* Maximum number of operations before GC is run */ uint gc_period; /* Approximate time between two consecutive GC runs */ + u32 debug; /* Debugging flags (D_*) */ byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ byte trie_used; /* Rtable has attached trie */ - byte debug; /* Whether to log */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ + struct rt_cork_threshold cork_threshold; /* Cork threshold values */ + struct settle_config export_settle; /* Export announcement settler */ + struct settle_config export_rr_settle;/* Export announcement settler config valid when any + route refresh is running */ +}; + +struct rt_export_hook; +struct rt_export_request; +struct rt_exporter; + +struct rt_exporter_class { + void (*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(void *_rt_export_hook); +}; + +struct rt_exporter { + const struct rt_exporter_class *class; + pool *rp; + list hooks; /* Registered route export hooks */ + uint addr_type; /* Type of address data exported (NET_*) */ +}; + +struct rt_table_exporter { + struct rt_exporter e; + list pending; /* List of packed struct rt_pending_export */ + + struct rt_pending_export *first; /* First export to announce */ + u64 next_seq; /* The next export will have this ID */ }; -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ +extern uint rtable_max_id; + +/* The public part of rtable structure */ +#define RTABLE_PUBLIC \ + resource r; \ + node n; /* Node in list of all tables */ \ + char *name; /* Name of this table */ \ + uint addr_type; /* Type of address data stored in table (NET_*) */ \ + uint id; /* Integer table ID for fast lookup */ \ + DOMAIN(rtable) lock; /* Lock to take to access the private parts */ \ + struct rtable_config *config; /* Configuration of this table */ \ + struct birdloop *loop; /* Service thread */ \ + +/* The complete rtable structure */ +struct rtable_private { + /* Once more the public part */ + struct { RTABLE_PUBLIC; }; + struct rtable_private **locked_at; + + /* Here the private items not to be accessed without locking */ pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ struct fib fib; struct f_trie *trie; /* Trie of prefixes defined in fib */ - char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - u32 debug; /* Debugging flags (D_*) */ - int pipe_busy; /* Pipe loop detection */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ ++ u32 debug; /* Debugging flags (D_*) */ - byte internal; /* Internal table of a protocol */ + list imports; /* Registered route importers */ + struct rt_table_exporter exporter; /* Exporter API structure */ struct hmap id_map; struct hostcache *hostcache; diff --cc nest/rt-attr.c index 481b432f2,0ae4b62e8..c8c2c0bbe --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@@ -242,190 -135,74 +242,196 @@@ rt_get_source_o(struct rte_owner *p, u3 struct rte_src *src = rt_find_source(p, id); if (src) + { + lfuc_lock_revive(&src->uc); return src; + } + RTA_LOCK; src = sl_allocz(rte_src_slab); - src->proto = p; + src->owner = p; src->private_id = id; src->global_id = idm_alloc(&src_ids); - src->uc = 0; - HASH_INSERT2(src_hash, RSH, rta_pool, src); + lfuc_init(&src->uc); + p->uc++; + + HASH_INSERT2(p->hash, RSH, rta_pool, src); - if (config->table_debug) - log(L_TRACE "Allocated new rte_src for %s, ID %luL %uG, have %u sources now", ++ if (p->debug & D_ROUTES) ++ log(L_TRACE "%s: new rte_src ID %luL %uG, have %u sources now", + p->name, src->private_id, src->global_id, p->uc); + + uint gm = atomic_load_explicit(&rte_src_global_max, memory_order_relaxed); + struct rte_src * _Atomic * g = atomic_load_explicit(&rte_src_global, memory_order_relaxed); + + if (src->global_id >= gm) + { + /* Allocate new block */ + size_t old_len = sizeof(struct rte_src * _Atomic) * gm; + struct rte_src * _Atomic * new_block = mb_alloc(rta_pool, old_len * 2); + memcpy(new_block, g, old_len); + + for (uint i = 0; i < gm; i++) + atomic_store_explicit(&new_block[gm+i], NULL, memory_order_relaxed); + + /* Update the pointer */ + atomic_store_explicit(&rte_src_global, new_block, memory_order_release); + atomic_store_explicit(&rte_src_global_max, gm * 2, memory_order_release); + + /* Wait for readers */ + synchronize_rcu(); + + /* Free the old block */ + mb_free(g); + g = new_block; + } + + atomic_store_explicit(&g[src->global_id], src, memory_order_release); + RTA_UNLOCK; + + return src; +} + +/** + * Find a rte source by its global ID. Only available for existing and locked + * sources stored by their ID. Checking for non-existent or foreign source is unsafe. + * + * @id: requested global ID + * + * Returns the found source or dies. Result of this function is guaranteed to + * be a valid source as long as the caller owns it. + */ +struct rte_src * +rt_find_source_global(u32 id) +{ + rcu_read_lock(); + ASSERT_DIE(id < atomic_load_explicit(&rte_src_global_max, memory_order_acquire)); + + struct rte_src * _Atomic * g = atomic_load_explicit(&rte_src_global, memory_order_acquire); + struct rte_src *src = atomic_load_explicit(&g[id], memory_order_acquire); + ASSERT_DIE(src); + ASSERT_DIE(src->global_id == id); + + rcu_read_unlock(); return src; } +static inline void +rt_done_sources(struct rte_owner *o) +{ + ev_send(o->list, o->stop); +} + void -rt_prune_sources(void) +rt_prune_sources(void *data) { - HASH_WALK_FILTER(src_hash, next, src, sp) + struct rte_owner *o = data; + + HASH_WALK_FILTER(o->hash, next, src, sp) { - if (src->uc == 0) + if (lfuc_finished(&src->uc)) { - HASH_DO_REMOVE(src_hash, RSH, sp); + o->uc--; + ++ if (o->debug & D_ROUTES) ++ log(L_TRACE "%s: freed rte_src ID %luL %uG, have %u sources now", ++ o->name, src->private_id, src->global_id, o->uc); ++ + HASH_DO_REMOVE(o->hash, RSH, sp); + + RTA_LOCK; + struct rte_src * _Atomic * g = atomic_load_explicit(&rte_src_global, memory_order_acquire); + atomic_store_explicit(&g[src->global_id], NULL, memory_order_release); idm_free(&src_ids, src->global_id); sl_free(src); + RTA_UNLOCK; } } HASH_WALK_FILTER_END; - HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool); -} + RTA_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool); + if (o->stop && !o->uc) + { + rfree(o->prune); + RTA_UNLOCK; - if (config->table_debug) - log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); -/* - * Multipath Next Hop - */ ++ if (o->debug & D_EVENTS) ++ log(L_TRACE "%s: all rte_src's pruned, scheduling stop event", o->name); -static inline u32 -nexthop_hash(struct nexthop *x) + rt_done_sources(o); + } + else + RTA_UNLOCK; +} + +void +rt_dump_sources(struct rte_owner *o) { - u32 h = 0; - for (; x; x = x->next) + debug("\t%s: hord=%u, uc=%u, cnt=%u prune=%p, stop=%p\n", + o->name, o->hash.order, o->uc, o->hash.count, o->prune, o->stop); + debug("\tget_route_info=%p, better=%p, mergable=%p, igp_metric=%p, recalculate=%p", + o->class->get_route_info, o->class->rte_better, o->class->rte_mergable, + o->class->rte_igp_metric, o->rte_recalculate); + + int splitting = 0; + HASH_WALK(o->hash, next, src) { - h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); + debug("%c%c%uL %uG %luU", + (splitting % 8) ? ',' : '\n', + (splitting % 8) ? ' ' : '\t', + src->private_id, src->global_id, + atomic_load_explicit(&src->uc, memory_order_relaxed)); - for (int i = 0; i < x->labels; i++) - h ^= x->label[i] ^ (h << 6) ^ (h >> 7); + splitting++; } + HASH_WALK_END; + debug("\n"); +} - return h; +void +rt_init_sources(struct rte_owner *o, const char *name, event_list *list) +{ + RTA_LOCK; + HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER); + o->hash_key = random_u32(); + o->uc = 0; + o->name = name; + o->prune = ev_new_init(rta_pool, rt_prune_sources, o); + o->stop = NULL; + o->list = list; + RTA_UNLOCK; ++ if (o->debug & D_EVENTS) ++ log(L_TRACE "%s: initialized rte_src owner", o->name); } -int -nexthop__same(struct nexthop *x, struct nexthop *y) +void +rt_destroy_sources(struct rte_owner *o, event *done) { - for (; x && y; x = x->next, y = y->next) + o->stop = done; + + if (!o->uc) { - if (config->table_debug) - log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || - (x->flags != y->flags) || (x->weight != y->weight) || - (x->labels_orig != y->labels_orig) || (x->labels != y->labels)) - return 0; ++ if (o->debug & D_EVENTS) ++ log(L_TRACE "%s: rte_src owner destroy requested, already clean, scheduling stop event", o->name); - for (int i = 0; i < x->labels; i++) - if (x->label[i] != y->label[i]) - return 0; - } + RTA_LOCK; + rfree(o->prune); + RTA_UNLOCK; - return x == y; + rt_done_sources(o); + } + else - if (config->table_debug) - log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc); ++ if (o->debug & D_EVENTS) ++ log(L_TRACE "%s: rte_src owner destroy requested, remaining %u rte_src's to prune.", o->name, o->uc); } +/* + * Multipath Next Hop + */ + static int nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) { diff --cc nest/rt-table.c index 54a471b2b,a1a651075..1f3f1008f --- a/nest/rt-table.c +++ b/nest/rt-table.c @@@ -225,68 -145,8 +225,8 @@@ net_init_with_trie(struct fib *f, void trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); } - static inline net * - net_route_ip4_trie(struct rtable_private *t, const net_addr_ip4 *n0) - { - TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) - { - net *r; - if (r = net_find_valid(t, (net_addr *) &n)) - return r; - } - TRIE_WALK_TO_ROOT_END; - - return NULL; - } - - static inline net * - net_route_vpn4_trie(struct rtable_private *t, const net_addr_vpn4 *n0) - { - TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) - { - net_addr_vpn4 n = NET_ADDR_VPN4(px.prefix, px.pxlen, n0->rd); - - net *r; - if (r = net_find_valid(t, (net_addr *) &n)) - return r; - } - TRIE_WALK_TO_ROOT_END; - - return NULL; - } - - static inline net * - net_route_ip6_trie(struct rtable_private *t, const net_addr_ip6 *n0) - { - TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) - { - net *r; - if (r = net_find_valid(t, (net_addr *) &n)) - return r; - } - TRIE_WALK_TO_ROOT_END; - - return NULL; - } - - static inline net * - net_route_vpn6_trie(struct rtable_private *t, const net_addr_vpn6 *n0) - { - TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) - { - net_addr_vpn6 n = NET_ADDR_VPN6(px.prefix, px.pxlen, n0->rd); - - net *r; - if (r = net_find_valid(t, (net_addr *) &n)) - return r; - } - TRIE_WALK_TO_ROOT_END; - - return NULL; - } - static inline void * -net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +net_route_ip6_sadr_trie(struct rtable_private *t, const net_addr_ip6_sadr *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) { @@@ -318,72 -178,9 +258,9 @@@ return NULL; } - static inline net * - net_route_ip4_fib(struct rtable_private *t, const net_addr_ip4 *n0) - { - net_addr_ip4 n; - net_copy_ip4(&n, n0); - - net *r; - while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) - { - n.pxlen--; - ip4_clrbit(&n.prefix, n.pxlen); - } - - return r; - } - - static inline net * - net_route_vpn4_fib(struct rtable_private *t, const net_addr_vpn4 *n0) - { - net_addr_vpn4 n; - net_copy_vpn4(&n, n0); - - net *r; - while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) - { - n.pxlen--; - ip4_clrbit(&n.prefix, n.pxlen); - } - - return r; - } - - static inline net * - net_route_ip6_fib(struct rtable_private *t, const net_addr_ip6 *n0) - { - net_addr_ip6 n; - net_copy_ip6(&n, n0); - - net *r; - while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) - { - n.pxlen--; - ip6_clrbit(&n.prefix, n.pxlen); - } - - return r; - } - - static inline net * - net_route_vpn6_fib(struct rtable_private *t, const net_addr_vpn6 *n0) - { - net_addr_vpn6 n; - net_copy_vpn6(&n, n0); - - net *r; - while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) - { - n.pxlen--; - ip6_clrbit(&n.prefix, n.pxlen); - } - - return r; - } static inline void * -net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) +net_route_ip6_sadr_fib(struct rtable_private *t, const net_addr_ip6_sadr *n0) { net_addr_ip6_sadr n; net_copy_ip6_sadr(&n, n0); @@@ -423,45 -220,54 +300,54 @@@ } net * -net_route(rtable *tab, const net_addr *n) +net_route(struct rtable_private *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); + net_addr_union *nu = SKIP_BACK(net_addr_union, n, n); - switch (n->type) - { - case NET_IP4: - if (tab->trie) - return net_route_ip4_trie(tab, (net_addr_ip4 *) n); - else - return net_route_ip4_fib (tab, (net_addr_ip4 *) n); + #define TW(ipv, what) \ + TRIE_WALK_TO_ROOT_IP##ipv(tab->trie, &(nu->ip##ipv), var) \ + { what(ipv, var); } \ + TRIE_WALK_TO_ROOT_END; return NULL; - case NET_VPN4: - if (tab->trie) - return net_route_vpn4_trie(tab, (net_addr_vpn4 *) n); - else - return net_route_vpn4_fib (tab, (net_addr_vpn4 *) n); + #define FW(ipv, what) do { \ + net_addr_union nuc; net_copy(&nuc.n, n); \ + while (1) { \ + what(ipv, nuc.ip##ipv); if (!nuc.n.pxlen) return NULL; \ + nuc.n.pxlen--; ip##ipv##_clrbit(&nuc.ip##ipv.prefix, nuc.ip##ipv.pxlen); \ + } \ + } while(0); return NULL; - case NET_IP6: - if (tab->trie) - return net_route_ip6_trie(tab, (net_addr_ip6 *) n); - else - return net_route_ip6_fib (tab, (net_addr_ip6 *) n); + #define FVR_IP(ipv, var) \ + net *r; if (r = net_find_valid(tab, (net_addr *) &var)) return r; - case NET_VPN6: - if (tab->trie) - return net_route_vpn6_trie(tab, (net_addr_vpn6 *) n); - else - return net_route_vpn6_fib (tab, (net_addr_vpn6 *) n); + #define FVR_VPN(ipv, var) \ + net_addr_vpn##ipv _var0 = NET_ADDR_VPN##ipv(var.prefix, var.pxlen, nu->vpn##ipv.rd); FVR_IP(ipv, _var0); - case NET_IP6_SADR: - if (tab->trie) - return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n); - else - return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n); - - default: - return NULL; - } + if (tab->trie) + switch (n->type) { + case NET_IP4: TW(4, FVR_IP); + case NET_VPN4: TW(4, FVR_VPN); + case NET_IP6: TW(6, FVR_IP); + case NET_VPN6: TW(6, FVR_VPN); + + case NET_IP6_SADR: + return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n); + default: + return NULL; + } + else + switch (n->type) { + case NET_IP4: FW(4, FVR_IP); + case NET_VPN4: FW(4, FVR_VPN); + case NET_IP6: FW(6, FVR_IP); + case NET_VPN6: FW(6, FVR_VPN); + + case NET_IP6_SADR: + return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n); + default: + return NULL; + } } @@@ -2942,9 -2043,7 +2828,10 @@@ rt_setup(pool *pp, struct rtable_confi t->name = cf->name; t->config = cf; t->addr_type = cf->addr_type; + t->debug = cf->debug; + t->id = idm_alloc(&rtable_idm); + if (t->id >= rtable_max_id) + rtable_max_id = t->id + 1; fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); @@@ -4082,17 -2754,9 +3969,17 @@@ rt_new_table(struct symbol *s, uint add c->addr_type = addr_type; c->gc_threshold = 1000; c->gc_period = (uint) -1; /* set in rt_postconfig() */ - c->min_settle_time = 1 S; - c->max_settle_time = 20 S; + c->cork_threshold.low = 1024; + c->cork_threshold.high = 8192; + c->export_settle = (struct settle_config) { + .min = 1 MS, + .max = 100 MS, + }; + c->export_rr_settle = (struct settle_config) { + .min = 100 MS, + .max = 3 S, + }; - c->debug = new_config->table_debug; + c->debug = new_config->table_default_debug; add_tail(&new_config->tables, &c->n); @@@ -4203,26 -2811,11 +4090,27 @@@ rt_reconfigure(struct rtable_private *t return 0; DBG("\t%s: same\n", new->name); - new->table = tab; + new->table = RT_PUB(tab); tab->name = new->name; tab->config = new; + tab->debug = new->debug; + if (tab->hostcache) + tab->hostcache->req.trace_routes = new->debug; + + struct rt_table_export_hook *hook; node *n; + WALK_LIST2(hook, n, tab->exporter.e.hooks, h.n) + if (hook->h.req->export_one == rt_flowspec_export_one) + hook->h.req->trace_routes = new->debug; + + tab->cork_threshold = new->cork_threshold; + + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); + + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + return 1; }