From: Maria Matejka Date: Thu, 9 Nov 2023 14:20:13 +0000 (+0100) Subject: Merge branch 'mq-aggregator-for-v3' into thread-next X-Git-Tag: v3.0.0~346 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=00e40a6b8049ba026eecb56ce3f461caf7f4bd16;p=thirdparty%2Fbird.git Merge branch 'mq-aggregator-for-v3' into thread-next --- 00e40a6b8049ba026eecb56ce3f461caf7f4bd16 diff --cc configure.ac index b99704437,b2f18c905..c71b5eab1 --- a/configure.ac +++ b/configure.ac @@@ -306,8 -312,8 +306,8 @@@ if test "$enable_mpls_kernel" != no ; t fi fi -all_protocols="aggregator $proto_bfd babel bgp l3vpn mrt ospf perf pipe radv rip rpki static" - +# temporarily removed "mrt" from all_protocols to speed up 3.0-alpha1 release - all_protocols="aggregator bfd babel bgp ospf perf pipe radv rip rpki static" ++all_protocols="aggregator bfd babel bgp l3vpn ospf perf pipe radv rip rpki static" all_protocols=`echo $all_protocols | sed 's/ /,/g'` if test "$with_protocols" = all ; then diff --cc lib/route.h index 9fcf20614,000000000..a0446b391 mode 100644,000000..100644 --- a/lib/route.h +++ b/lib/route.h @@@ -1,576 -1,0 +1,587 @@@ +/* + * BIRD Internet Routing Daemon -- Routing data structures + * + * (c) 1998--2000 Martin Mares + * (c) 2022 Maria Matejka + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIB_ROUTE_H_ +#define _BIRD_LIB_ROUTE_H_ + +#undef RT_SOURCE_DEBUG + +#include "lib/type.h" +#include "lib/rcu.h" +#include "lib/hash.h" +#include "lib/event.h" + +struct network; +struct proto; +struct cli; +struct rtable_private; +struct rte_storage; + +#define RTE_IN_TABLE_WRITABLE \ + byte pflags; /* Protocol-specific flags; may change in-table (!) */ \ + u8 stale_cycle; /* Auxiliary value for route refresh; may change in-table (!) */ \ + +typedef struct rte { + RTE_IN_TABLE_WRITABLE; + byte flags; /* Table-specific flags */ + u8 generation; /* If this route import is based on other previously exported route, + this value should be 1 + MAX(generation of the parent routes). + Otherwise the route is independent and this value is zero. */ + u32 id; /* Table specific route id */ + struct ea_list *attrs; /* Attributes of this route */ + const net_addr *net; /* Network this RTE belongs to */ + struct rte_src *src; /* Route source that created the route */ + struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */ + btime lastmod; /* Last modified (set by table) */ +} rte; + +#define REF_FILTERED 2 /* Route is rejected by import filter */ +#define REF_PENDING 32 /* Route has not propagated completely yet */ + +/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ +static inline int rte_is_valid(const rte *r) { return r && !(r->flags & REF_FILTERED); } + +/* Route just has REF_FILTERED flag */ +static inline int rte_is_filtered(const rte *r) { return !!(r->flags & REF_FILTERED); } + +/* Strip the route of the table-specific values */ +static inline rte rte_init_from(const rte *r) +{ + return (rte) { + .attrs = r->attrs, + .net = r->net, + .src = r->src, + }; +} + +int rte_same(const rte *, const rte *); + +struct rte_src { + struct rte_src *next; /* Hash chain */ + struct rte_owner *owner; /* Route source owner */ + u64 private_id; /* Private ID, assigned by the protocol */ + u32 global_id; /* Globally unique ID of the source */ + _Atomic u64 uc; /* Use count */ +}; + +struct rte_owner_class { + void (*get_route_info)(const rte *, byte *buf); /* Get route information (for `show route' command) */ + int (*rte_better)(const rte *, const rte *); + int (*rte_mergable)(const rte *, const rte *); + u32 (*rte_igp_metric)(const rte *); +}; + +struct rte_owner { + struct rte_owner_class *class; + int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte_storage *new, struct rte_storage *, struct rte_storage *); + HASH(struct rte_src) hash; + const char *name; + u32 hash_key; + u32 uc; + event_list *list; + event *prune; + event *stop; +}; + +DEFINE_DOMAIN(attrs); +extern DOMAIN(attrs) attrs_domain; + +#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) +#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) + +#define RTE_SRC_PU_SHIFT 44 +#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) + +/* Get a route source. This also locks the source, therefore the caller has to + * unlock the source after the route has been propagated. */ +struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id); +#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id)) + +struct rte_src *rt_find_source_global(u32 id); + +#ifdef RT_SOURCE_DEBUG +#define rt_lock_source _rt_lock_source_internal +#define rt_unlock_source _rt_unlock_source_internal +#endif + +static inline void rt_lock_source(struct rte_src *src) +{ + /* Locking a source is trivial; somebody already holds it so we just increase + * the use count. Nothing can be freed underneath our hands. */ + u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); + ASSERT_DIE(uc > 0); +} + +static inline void rt_unlock_source(struct rte_src *src) +{ + /* Unlocking is tricky. We do it lockless so at the same time, the prune + * event may be running, therefore if the unlock gets us to zero, it must be + * the last thing in this routine, otherwise the prune routine may find the + * source's usecount zeroed, freeing it prematurely. + * + * The usecount is split into two parts: + * the top 20 bits are an in-progress indicator + * the bottom 44 bits keep the actual usecount. + * + * Therefore at most 1 million of writers can simultaneously unlock the same + * source, while at most ~17T different routes can reference it. Both limits + * are insanely high from the 2022 point of view. Let's suppose that when 17T + * routes or 1M writers get real, we get also 128bit atomic variables in the + * C norm. */ + + /* First, we push the in-progress indicator */ + u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel); + + /* Then we split the indicator to its parts. Remember, we got the value before the operation happened. */ + u64 pending = (uc >> RTE_SRC_PU_SHIFT) + 1; + uc &= RTE_SRC_IN_PROGRESS - 1; + + /* We per-use the RCU critical section indicator to make the prune event wait + * until we finish here in the rare case we get preempted. */ + rcu_read_lock(); + + /* Obviously, there can't be more pending unlocks than the usecount itself */ + if (uc == pending) + /* If we're the last unlocker, schedule the owner's prune event */ + ev_send(src->owner->list, src->owner->prune); + else + ASSERT_DIE(uc > pending); + + /* And now, finally, simultaneously pop the in-progress indicator and the + * usecount, possibly allowing the source pruning routine to free this structure */ + atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel); + + /* ... and to reduce the load a bit, the source pruning routine will better wait for + * RCU synchronization instead of a busy loop. */ + rcu_read_unlock(); +} + +#ifdef RT_SOURCE_DEBUG +#undef rt_lock_source +#undef rt_unlock_source + +#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) ) +#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) ) +#endif + +void rt_init_sources(struct rte_owner *, const char *name, event_list *list); +void rt_destroy_sources(struct rte_owner *, event *); + +void rt_dump_sources(struct rte_owner *); + +/* + * Route Attributes + * + * Beware: All standard BGP attributes must be represented here instead + * of making them local to the route. This is needed to ensure proper + * construction of BGP route attribute lists. + */ + +/* Nexthop structure */ +struct nexthop { + ip_addr gw; /* Next hop */ + struct iface *iface; /* Outgoing interface */ + byte flags; + byte weight; + byte labels; /* Number of all labels */ + u32 label[0]; +}; + +/* For packing one into eattrs */ +struct nexthop_adata { + struct adata ad; + /* There is either a set of nexthops or a special destination (RTD_*) */ + union { + struct nexthop nh; + uint dest; + }; +}; + +#define NEXTHOP_DEST_SIZE (OFFSETOF(struct nexthop_adata, dest) + sizeof(uint) - OFFSETOF(struct adata, data)) +#define NEXTHOP_DEST_LITERAL(x) ((struct nexthop_adata) { \ + .ad.length = NEXTHOP_DEST_SIZE, .dest = (x), }) + +#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ + + +#define RTS_STATIC 1 /* Normal static route */ +#define RTS_INHERIT 2 /* Route inherited from kernel */ +#define RTS_DEVICE 3 /* Device route */ +#define RTS_STATIC_DEVICE 4 /* Static device route */ +#define RTS_REDIRECT 5 /* Learned via redirect */ +#define RTS_RIP 6 /* RIP route */ +#define RTS_OSPF 7 /* OSPF route */ +#define RTS_OSPF_IA 8 /* OSPF inter-area route */ +#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ +#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ +#define RTS_BGP 11 /* BGP route */ +#define RTS_PIPE 12 /* Inter-table wormhole */ +#define RTS_BABEL 13 /* Babel route */ +#define RTS_RPKI 14 /* Route Origin Authorization */ +#define RTS_PERF 15 /* Perf checker */ - #define RTS_AGGREGATED 16 /* Aggregated route */ - #define RTS_MAX 17 ++#define RTS_L3VPN 16 /* MPLS L3VPN */ ++#define RTS_AGGREGATED 17 /* Aggregated route */ ++#define RTS_MAX 18 + +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* A standard next hop */ +#define RTD_BLACKHOLE 2 /* Silently drop packets */ +#define RTD_UNREACHABLE 3 /* Reject as unreachable */ +#define RTD_PROHIBIT 4 /* Administratively prohibited */ +#define RTD_MAX 5 + +extern const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + + +/* + * Extended Route Attributes + */ + +typedef struct eattr { + word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ + byte flags; /* Protocol-dependent flags */ + byte type; /* Attribute type */ + byte rfu:5; + byte originated:1; /* The attribute has originated locally */ + byte fresh:1; /* An uncached attribute (e.g. modified in export filter) */ + byte undef:1; /* Explicitly undefined */ + + PADDING(unused, 3, 3); + + union bval u; +} eattr; + + +#define EA_CODE_MASK 0xffff +#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ +#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ +#define EA_BIT_GET(ea) ((ea) >> 24) + +typedef struct ea_list { + struct ea_list *next; /* In case we have an override list */ + byte flags; /* Flags: EALF_... */ + byte rfu; + word count; /* Number of attributes */ + eattr attrs[0]; /* Attribute definitions themselves */ +} ea_list; + +struct ea_storage { + struct ea_storage *next_hash; /* Next in hash chain */ + struct ea_storage **pprev_hash; /* Previous in hash chain */ + _Atomic u32 uc; /* Use count */ + u32 hash_key; /* List hash */ + ea_list l[0]; /* The list itself */ +}; + +#define EALF_SORTED 1 /* Attributes are sorted by code */ +#define EALF_BISECT 2 /* Use interval bisection for searching */ +#define EALF_CACHED 4 /* List is cached */ +#define EALF_HUGE 8 /* List is too big to fit into slab */ + +struct ea_class { +#define EA_CLASS_INSIDE \ + const char *name; /* Name (both print and filter) */ \ + struct symbol *sym; /* Symbol to export to configs */ \ + uint id; /* Autoassigned attribute ID */ \ + uint uc; /* Reference count */ \ + btype type; /* Data type ID */ \ + uint readonly:1; /* This attribute can't be changed by filters */ \ + uint conf:1; /* Requested by config */ \ + uint hidden:1; /* Technical attribute, do not show, do not expose to filters */ \ + void (*format)(const eattr *ea, byte *buf, uint size); \ + void (*stored)(const eattr *ea); /* When stored into global hash */ \ + void (*freed)(const eattr *ea); /* When released from global hash */ \ + + EA_CLASS_INSIDE; +}; + +struct ea_class_ref { + resource r; + struct ea_class *class; +}; + +void ea_register_init(struct ea_class *); +struct ea_class_ref *ea_register_alloc(pool *, struct ea_class); +struct ea_class_ref *ea_ref_class(pool *, struct ea_class *); /* Reference for an attribute alias */ + +#define EA_REGISTER_ALL_HELPER(x) ea_register_init(x); +#define EA_REGISTER_ALL(...) MACRO_FOREACH(EA_REGISTER_ALL_HELPER, __VA_ARGS__) + +struct ea_class *ea_class_find_by_id(uint id); +struct ea_class *ea_class_find_by_name(const char *name); +static inline struct ea_class *ea_class_self(struct ea_class *self) { return self; } +#define ea_class_find(_arg) _Generic((_arg), \ + uint: ea_class_find_by_id, \ + word: ea_class_find_by_id, \ + char *: ea_class_find_by_name, \ + const char *: ea_class_find_by_name, \ + struct ea_class *: ea_class_self)(_arg) + +struct ea_walk_state { + ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ + eattr *ea; /* Current eattr, initially NULL */ + u32 visited[4]; /* Bitfield, limiting max to 128 */ +}; + +#define ea_find(_l, _arg) _Generic((_arg), uint: ea_find_by_id, struct ea_class *: ea_find_by_class, char *: ea_find_by_name)(_l, _arg) +eattr *ea_find_by_id(ea_list *, unsigned ea); +static inline eattr *ea_find_by_class(ea_list *l, const struct ea_class *def) +{ return ea_find_by_id(l, def->id); } +static inline eattr *ea_find_by_name(ea_list *l, const char *name) +{ + const struct ea_class *def = ea_class_find_by_name(name); + return def ? ea_find_by_class(l, def) : NULL; +} + +#define ea_get_int(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type & EAF_EMBEDDED); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? ea->u.data : (_def)); \ + }) + +#define ea_get_ip(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type == T_IP); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? *((const ip_addr *) ea->u.ptr->data) : (_def)); \ + }) + ++#define ea_get_adata(_l, _ident) ({ \ ++ struct ea_class *cls = ea_class_find((_ident)); \ ++ ASSERT_DIE(!(cls->type & EAF_EMBEDDED)); \ ++ const eattr *ea = ea_find((_l), cls->id); \ ++ (ea ? ea->u.ptr : &null_adata); \ ++ }) ++ +eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); +void ea_dump(ea_list *); +int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ +uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ +ea_list *ea_append(ea_list *to, ea_list *what); +void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); + +/* Normalize ea_list; allocates the result from tmp_linpool */ +ea_list *ea_normalize(ea_list *e, int overlay); + +uint ea_list_size(ea_list *); +void ea_list_copy(ea_list *dest, ea_list *src, uint size); + +#define EA_LOCAL_LIST(N) struct { ea_list l; eattr a[N]; } + +#define EA_LITERAL_EMBEDDED(_class, _flags, _val) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(_type & EAF_EMBEDDED); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.i = _val); \ + }) + +#define EA_LITERAL_STORE_ADATA(_class, _flags, _buf, _len) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = tmp_store_adata((_buf), (_len))); \ + }) + +#define EA_LITERAL_DIRECT_ADATA(_class, _flags, _adata) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = _adata); \ + }) + +#define EA_LITERAL_GENERIC(_id, _type, _flags, ...) \ + ((eattr) { .id = _id, .type = _type, .flags = _flags, __VA_ARGS__ }) + +static inline eattr * +ea_set_attr(ea_list **to, eattr a) +{ + EA_LOCAL_LIST(1) *ea = tmp_alloc(sizeof(*ea)); + *ea = (typeof(*ea)) { + .l.flags = EALF_SORTED, + .l.count = 1, + .l.next = *to, + .a[0] = a, + }; + + *to = &ea->l; + return &ea->a[0]; +} + +static inline void +ea_unset_attr(ea_list **to, _Bool local, const struct ea_class *def) +{ + ea_set_attr(to, EA_LITERAL_GENERIC(def->id, 0, 0, + .fresh = local, .originated = local, .undef = 1)); +} + +static inline void +ea_set_attr_u32(ea_list **to, const struct ea_class *def, uint flags, u64 data) +{ ea_set_attr(to, EA_LITERAL_EMBEDDED(def, flags, data)); } + +static inline void +ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, const void *data, uint len) +{ ea_set_attr(to, EA_LITERAL_STORE_ADATA(def, flags, data, len)); } + +static inline void +ea_copy_attr(ea_list **to, ea_list *from, const struct ea_class *def) +{ + eattr *e = ea_find_by_class(from, def); + if (e) + if (e->type & EAF_EMBEDDED) + ea_set_attr_u32(to, def, e->flags, e->u.data); + else + ea_set_attr_data(to, def, e->flags, e->u.ptr->data, e->u.ptr->length); + else + ea_unset_attr(to, 0, def); +} + +/* + * Common route attributes + */ + +/* Preference: first-order comparison */ +extern struct ea_class ea_gen_preference; +static inline u32 rt_get_preference(const rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_preference, 0); } + +/* IGP metric: second-order comparison */ +extern struct ea_class ea_gen_igp_metric; +u32 rt_get_igp_metric(const rte *rt); +#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other + protocol-specific metric is availabe */ + +/* From: Advertising router */ +extern struct ea_class ea_gen_from; + + +/* MPLS Label, Policy and Class */ +extern struct ea_class ea_gen_mpls_label, + ea_gen_mpls_policy, ea_gen_mpls_class; + + +/* Source: An old method to devise the route source protocol and kind. + * To be superseded in a near future by something more informative. */ +extern struct ea_class ea_gen_source; +static inline u32 rt_get_source_attr(const rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_source, 0); } + +/* Flowspec validation result */ +enum flowspec_valid { + FLOWSPEC_UNKNOWN = 0, + FLOWSPEC_VALID = 1, + FLOWSPEC_INVALID = 2, + FLOWSPEC__MAX, +}; + +extern const char * flowspec_valid_names[FLOWSPEC__MAX]; +static inline const char *flowspec_valid_name(enum flowspec_valid v) +{ return (v < FLOWSPEC__MAX) ? flowspec_valid_names[v] : "???"; } + +extern struct ea_class ea_gen_flowspec_valid; +static inline enum flowspec_valid rt_get_flowspec_valid(const rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN); } + +/* Next hop: For now, stored as adata */ +extern struct ea_class ea_gen_nexthop; + +static inline void ea_set_dest(struct ea_list **to, uint flags, uint dest) +{ + struct nexthop_adata nhad = NEXTHOP_DEST_LITERAL(dest); + ea_set_attr_data(to, &ea_gen_nexthop, flags, &nhad.ad.data, nhad.ad.length); +} + +/* Next hop structures */ + +#define NEXTHOP_ALIGNMENT (_Alignof(struct nexthop)) +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) +#define NEXTHOP_SIZE(_nh) NEXTHOP_SIZE_CNT(((_nh)->labels)) +#define NEXTHOP_SIZE_CNT(cnt) BIRD_ALIGN((sizeof(struct nexthop) + sizeof(u32) * (cnt)), NEXTHOP_ALIGNMENT) +#define nexthop_size(nh) NEXTHOP_SIZE((nh)) + +#define NEXTHOP_NEXT(_nh) ((void *) (_nh) + NEXTHOP_SIZE(_nh)) +#define NEXTHOP_END(_nhad) ((_nhad)->ad.data + (_nhad)->ad.length) +#define NEXTHOP_VALID(_nh, _nhad) ((void *) (_nh) < (void *) NEXTHOP_END(_nhad)) +#define NEXTHOP_ONE(_nhad) (NEXTHOP_NEXT(&(_nhad)->nh) == NEXTHOP_END(_nhad)) + +#define NEXTHOP_WALK(_iter, _nhad) for ( \ + struct nexthop *_iter = &(_nhad)->nh; \ + (void *) _iter < (void *) NEXTHOP_END(_nhad); \ + _iter = NEXTHOP_NEXT(_iter)) + + +static inline int nexthop_same(struct nexthop_adata *x, struct nexthop_adata *y) +{ return adata_same(&x->ad, &y->ad); } +struct nexthop_adata *nexthop_merge(struct nexthop_adata *x, struct nexthop_adata *y, int max, linpool *lp); +struct nexthop_adata *nexthop_sort(struct nexthop_adata *x, linpool *lp); +int nexthop_is_sorted(struct nexthop_adata *x); + +#define NEXTHOP_IS_REACHABLE(nhad) ((nhad)->ad.length > NEXTHOP_DEST_SIZE) + - /* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ - static inline int rte_is_reachable(rte *r) ++static inline struct nexthop_adata * ++rte_get_nexthops(rte *r) +{ + eattr *nhea = ea_find(r->attrs, &ea_gen_nexthop); - if (!nhea) - return 0; ++ return nhea ? SKIP_BACK(struct nexthop_adata, ad, nhea->u.ptr) : NULL; ++} + - struct nexthop_adata *nhad = (void *) nhea->u.ptr; - return NEXTHOP_IS_REACHABLE(nhad); ++/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ ++static inline int rte_is_reachable(rte *r) ++{ ++ struct nexthop_adata *nhad = rte_get_nexthops(r); ++ return nhad && NEXTHOP_IS_REACHABLE(nhad); +} + +static inline int nhea_dest(eattr *nhea) +{ + if (!nhea) + return RTD_NONE; + + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + if (NEXTHOP_IS_REACHABLE(nhad)) + return RTD_UNICAST; + else + return nhad->dest; +} + +static inline int rte_dest(const rte *r) +{ + return nhea_dest(ea_find(r->attrs, &ea_gen_nexthop)); +} + +void rta_init(void); +ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */ +static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } +static inline struct ea_storage *ea_get_storage(ea_list *r) +{ + ASSERT_DIE(ea_is_cached(r)); + return SKIP_BACK(struct ea_storage, l[0], r); +} + +static inline ea_list *ea_clone(ea_list *r) { + ASSERT_DIE(0 < atomic_fetch_add_explicit(&ea_get_storage(r)->uc, 1, memory_order_acq_rel)); + return r; +} +void ea__free(struct ea_storage *r); +static inline void ea_free(ea_list *l) { + if (!l) return; + struct ea_storage *r = ea_get_storage(l); + if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) ea__free(r); +} + +void ea_dump(ea_list *); +void ea_dump_all(void); +void ea_show_list(struct cli *, ea_list *); + +#define rta_lookup ea_lookup +#define rta_is_cached ea_is_cached +#define rta_clone ea_clone +#define rta_free ea_free + +#endif diff --cc nest/route.h index 610bc21e3,332f9afd4..81c225b15 --- a/nest/route.h +++ b/nest/route.h @@@ -737,8 -763,9 +737,10 @@@ void ea_show_nexthop_list(struct cli *c #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ #define DEF_PREF_RPKI 100 /* RPKI */ + #define DEF_PREF_L3VPN_IMPORT 80 /* L3VPN import -> lower than BGP */ + #define DEF_PREF_L3VPN_EXPORT 120 /* L3VPN export -> higher than BGP */ #define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ +#define DEF_PREF_UNKNOWN 0 /* Routes with no preference set */ /* * Route Origin Authorization diff --cc proto/l3vpn/l3vpn.c index 000000000,3bf0df48c..af5f106ab mode 000000,100644..100644 --- a/proto/l3vpn/l3vpn.c +++ b/proto/l3vpn/l3vpn.c @@@ -1,0 -1,476 +1,508 @@@ + /* + * BIRD -- BGP/MPLS IP Virtual Private Networks (L3VPN) + * + * (c) 2022 Ondrej Zajicek + * (c) 2022 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + + /** + * DOC: L3VPN + * + * The L3VPN protocol implements RFC 4364 BGP/MPLS VPNs using MPLS backbone. + * It works similarly to pipe. It connects IP table (one per VRF) with (global) + * VPN table. Routes passed from VPN table to IP table are stripped of RD and + * filtered by import targets, routes passed in the other direction are extended + * with RD, MPLS labels and export targets in extended communities. Separate + * MPLS channel is used to announce MPLS routes for the labels. + * + * Note that in contrast to the pipe protocol, L3VPN protocol has both IPv4 and + * IPv6 channels in one instance, Also both IP and VPN channels are presented to + * users as separate channels, although that will change in the future. + * + * The L3VPN protocol has different default preferences on IP and VPN sides. + * The reason is that in import direction (VPN->IP) routes should have lower + * preferences that ones received from local CE (perhaps by EBGP), while in + * export direction (IP->VPN) routes should have higher preferences that ones + * received from remote PEs (by IBGP). + * + * Supported standards: + * RFC 4364 - BGP/MPLS IP Virtual Private Networks (L3VPN) + */ + + #undef LOCAL_DEBUG + + #include "nest/bird.h" + #include "nest/iface.h" + #include "nest/protocol.h" + #include "nest/route.h" + #include "nest/mpls.h" + #include "nest/cli.h" + #include "conf/conf.h" + #include "filter/filter.h" + #include "filter/data.h" + #include "lib/string.h" + + #include "l3vpn.h" + -#include "proto/bgp/bgp.h" ++#include "proto/pipe/pipe.h" ++ ++#include + + /* + * TODO: + * - import/export target reconfiguration + * - check for simple nodes in export route + * - replace pair of channels with shared channel for one address family + * - improve route comparisons in VRFs + * - optional import/export target all + * - optional support for route origins + * - optional automatic assignment of RDs + * - MPLS-in-IP encapsulation + */ + -#define EA_BGP_NEXT_HOP EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP) -#define EA_BGP_EXT_COMMUNITY EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY) -#define EA_BGP_MPLS_LABEL_STACK EA_CODE(PROTOCOL_BGP, BA_MPLS_LABEL_STACK) - -static inline const struct adata * ea_get_adata(ea_list *e, uint id) -{ eattr *a = ea_find(e, id); return a ? a->u.ptr : &null_adata; } ++static struct ea_class *ea_bgp_next_hop, ++ *ea_bgp_ext_community, ++ *ea_bgp_mpls_label_stack; + + static inline int -mpls_valid_nexthop(const rta *a) ++mpls_valid_nexthop(struct nexthop_adata *nhad) + { + /* MPLS does not support special blackhole targets */ - if (a->dest != RTD_UNICAST) ++ if (!NEXTHOP_IS_REACHABLE(nhad)) + return 0; + + /* MPLS does not support ARP / neighbor discovery */ - for (const struct nexthop *nh = &a->nh; nh ; nh = nh->next) ++ NEXTHOP_WALK(nh, nhad) + if (ipa_zero(nh->gw) && (nh->iface->flags & IF_MULTIACCESS)) + return 0; + + return 1; + } + + static int + l3vpn_import_targets(struct l3vpn_proto *p, const struct adata *list) + { + return (p->import_target_one) ? + ec_set_contains(list, p->import_target->from.val.ec) : + eclist_match_set(list, p->import_target); + } + + static struct adata * + l3vpn_export_targets(struct l3vpn_proto *p, const struct adata *src) + { + u32 *s = int_set_get_data(src); + int len = int_set_get_size(src); + + struct adata *dst = lp_alloc(tmp_linpool, sizeof(struct adata) + (len + p->export_target_length) * sizeof(u32)); + u32 *d = int_set_get_data(dst); + int end = 0; + + for (int i = 0; i < len; i += 2) + { + /* Remove existing route targets */ + uint type = s[i] >> 16; + if (ec_type_is_rt(type)) + continue; + + d[end++] = s[i]; + d[end++] = s[i+1]; + } + + /* Add new route targets */ + memcpy(d + end, p->export_target_data, p->export_target_length * sizeof(u32)); + end += p->export_target_length; + + /* Set length */ + dst->length = end * sizeof(u32); + + return dst; + } + + static void + l3vpn_add_ec(const struct f_tree *t, void *P) + { + struct l3vpn_proto *p = P; + ec_put(p->export_target_data, p->export_target_length, t->from.val.ec); + p->export_target_length += 2; + } + + static void + l3vpn_prepare_targets(struct l3vpn_proto *p) + { + const struct f_tree *t = p->import_target; + p->import_target_one = !t->left && !t->right && (t->from.val.ec == t->to.val.ec); + + uint len = 2 * tree_node_count(p->export_target); + p->export_target_data = mb_alloc(p->p.pool, len * sizeof(u32)); + p->export_target_length = 0; + tree_walk(p->export_target, l3vpn_add_ec, p); + ASSERT(p->export_target_length == len); + } + + /* Convert 64-bit RD to 32bit source ID, unfortunately it has collisions */ + static inline struct rte_src * l3vpn_get_source(struct l3vpn_proto *p, u64 rd) + { return rt_get_source(&p->p, (u32)(rd >> 32) ^ u32_hash(rd)); } + //{ return p->p.main_source; } + + static void -l3vpn_rt_notify(struct proto *P, struct channel *c0, net *net, rte *new, rte *old UNUSED) ++l3vpn_rt_notify(struct proto *P, struct channel *c0, const net_addr *n0, rte *new, const rte *old UNUSED) + { + struct l3vpn_proto *p = (void *) P; + struct rte_src *src = NULL; + struct channel *dst = NULL; + int export; + - const net_addr *n0 = net->n.addr; + net_addr *n = alloca(sizeof(net_addr_vpn6)); + + switch (c0->net_type) + { + case NET_IP4: + net_fill_vpn4(n, net4_prefix(n0), net4_pxlen(n0), p->rd); + src = p->p.main_source; + dst = p->vpn4_channel; + export = 1; + break; + + case NET_IP6: + net_fill_vpn6(n, net6_prefix(n0), net6_pxlen(n0), p->rd); + src = p->p.main_source; + dst = p->vpn6_channel; + export = 1; + break; + + case NET_VPN4: + net_fill_ip4(n, net4_prefix(n0), net4_pxlen(n0)); + src = l3vpn_get_source(p, ((const net_addr_vpn4 *) n0)->rd); + dst = p->ip4_channel; + export = 0; + break; + + case NET_VPN6: + net_fill_ip6(n, net6_prefix(n0), net6_pxlen(n0)); + src = l3vpn_get_source(p, ((const net_addr_vpn6 *) n0)->rd); + dst = p->ip6_channel; + export = 0; + break; + + case NET_MPLS: + return; + } + ++ const struct adata *ecad = ea_get_adata(new->attrs, ea_bgp_ext_community); ++ struct nexthop_adata *nhad_orig = rte_get_nexthops(new); ++ + if (new) + { - const rta *a0 = new->attrs; - rta *a = alloca(RTA_MAX_SIZE); - *a = (rta) { - .source = RTS_L3VPN, - .scope = SCOPE_UNIVERSE, - .dest = a0->dest, - .pref = dst->preference, - .eattrs = a0->eattrs - }; ++ new->src = src; + - nexthop_link(a, &a0->nh); ++ ea_set_attr_u32(&new->attrs, &ea_gen_source, 0, RTS_L3VPN); ++ ea_set_attr_u32(&new->attrs, &ea_gen_preference, 0, dst->preference); + + /* Do not keep original labels, we may assign new ones */ - ea_unset_attr(&a->eattrs, tmp_linpool, 0, EA_MPLS_LABEL); - ea_unset_attr(&a->eattrs, tmp_linpool, 0, EA_MPLS_POLICY); ++ ea_unset_attr(&new->attrs, 0, &ea_gen_mpls_label); ++ ea_unset_attr(&new->attrs, 0, &ea_gen_mpls_policy); + + /* We are crossing VRF boundary, NEXT_HOP is no longer valid */ - ea_unset_attr(&a->eattrs, tmp_linpool, 0, EA_BGP_NEXT_HOP); - ea_unset_attr(&a->eattrs, tmp_linpool, 0, EA_BGP_MPLS_LABEL_STACK); ++ ea_unset_attr(&new->attrs, 0, ea_bgp_next_hop); ++ ea_unset_attr(&new->attrs, 0, ea_bgp_mpls_label_stack); ++ ++ /* Hostentry also validn't */ ++ ea_unset_attr(&new->attrs, 0, &ea_gen_hostentry); + + if (export) + { + struct mpls_channel *mc = (void *) p->p.mpls_channel; - ea_set_attr_u32(&a->eattrs, tmp_linpool, EA_MPLS_POLICY, 0, EAF_TYPE_INT, mc->label_policy); ++ ea_set_attr_u32(&new->attrs, &ea_gen_mpls_policy, 0, mc->label_policy); + - struct adata *ad = l3vpn_export_targets(p, ea_get_adata(a0->eattrs, EA_BGP_EXT_COMMUNITY)); - ea_set_attr_ptr(&a->eattrs, tmp_linpool, EA_BGP_EXT_COMMUNITY, 0, EAF_TYPE_EC_SET, ad); ++ ea_set_attr(&new->attrs, EA_LITERAL_DIRECT_ADATA( ++ ea_bgp_ext_community, 0, l3vpn_export_targets(p, ecad))); + + /* Replace MPLS-incompatible nexthop with lookup in VRF table */ - if (!mpls_valid_nexthop(a) && p->p.vrf) ++ if (!nhad_orig || !mpls_valid_nexthop(nhad_orig) && p->p.vrf) + { - a->dest = RTD_UNICAST; - a->nh = (struct nexthop) { .iface = p->p.vrf }; ++ struct nexthop_adata nhad = { ++ .nh.iface = p->p.vrf, ++ .ad.length = sizeof nhad - sizeof nhad.ad, ++ }; ++ ea_set_attr_data(&new->attrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); + } - } + - /* Keep original IGP metric as a base for L3VPN metric */ - if (!export) - a->igp_metric = a0->igp_metric; ++ /* Drop original IGP metric on export; ++ * kept on import as a base for L3VPN metric */ ++ ea_unset_attr(&new->attrs, 0, &ea_gen_igp_metric); ++ } + - rte *e = rte_get_temp(a, src); - rte_update2(dst, n, e, src); ++ rte_update(dst, n, new, src); + } + else + { - rte_update2(dst, n, NULL, src); ++ rte_update(dst, n, NULL, src); + } + } + + + static int + l3vpn_preexport(struct channel *C, rte *e) + { + struct l3vpn_proto *p = (void *) C->proto; - struct proto *pp = e->sender->proto; + - if (pp == C->proto) ++ if (&C->in_req == e->sender->req) + return -1; /* Avoid local loops automatically */ + + switch (C->net_type) + { + case NET_IP4: + case NET_IP6: + return 0; + + case NET_VPN4: + case NET_VPN6: - return l3vpn_import_targets(p, ea_get_adata(e->attrs->eattrs, EA_BGP_EXT_COMMUNITY)) ? 0 : -1; ++ return l3vpn_import_targets(p, ea_get_adata(e->attrs, ea_bgp_ext_community)) ? 0 : -1; + + case NET_MPLS: + return -1; + + default: + bug("invalid type"); + } + } + -static void -l3vpn_reload_routes(struct channel *C) ++/* TODO: unify the code between l3vpn and pipe */ ++void pipe_import_by_refeed_free(struct channel_feeding_request *cfr); ++ ++static int ++l3vpn_reload_routes(struct channel *C, struct channel_import_request *cir) + { + struct l3vpn_proto *p = (void *) C->proto; ++ struct channel *feed = NULL; + + /* Route reload on one channel is just refeed on the other */ + switch (C->net_type) + { + case NET_IP4: - channel_request_feeding(p->vpn4_channel); ++ feed = p->vpn4_channel; + break; + + case NET_IP6: - channel_request_feeding(p->vpn6_channel); ++ feed = p->vpn6_channel; + break; + + case NET_VPN4: - channel_request_feeding(p->ip4_channel); ++ feed = p->ip4_channel; + break; + + case NET_VPN6: - channel_request_feeding(p->ip6_channel); ++ feed = p->ip6_channel; + break; + + case NET_MPLS: + /* FIXME */ - break; ++ return 1; + } -} + -static inline u32 -l3vpn_metric(rte *e) -{ - u32 metric = ea_get_int(e->attrs->eattrs, EA_GEN_IGP_METRIC, e->attrs->igp_metric); - return MIN(metric, IGP_METRIC_UNKNOWN); ++ if (cir->trie) ++ { ++ struct import_to_export_reload *reload = lp_alloc(cir->trie->lp, sizeof *reload); ++ *reload = (struct import_to_export_reload) { ++ .cir = cir, ++ .cfr = { ++ .type = CFRT_AUXILIARY, ++ .done = pipe_import_by_refeed_free, ++ .trie = cir->trie, ++ }, ++ }; ++ channel_request_feeding(feed, &reload->cfr); ++ } ++ else ++ { ++ /* Route reload on one channel is just refeed on the other */ ++ channel_request_feeding_dynamic(feed, CFRT_DIRECT); ++ cir->done(cir); ++ } ++ ++ return 1; + } + + static int -l3vpn_rte_better(rte *new, rte *old) ++l3vpn_rte_better(const rte *new, const rte *old) + { + /* This is hack, we should have full BGP-style comparison */ - return l3vpn_metric(new) < l3vpn_metric(old); ++ return rt_get_igp_metric(new) < rt_get_igp_metric(old); + } + + static void + l3vpn_postconfig(struct proto_config *CF) + { + struct l3vpn_config *cf = (void *) CF; + + if (!!proto_cf_find_channel(CF, NET_IP4) != !!proto_cf_find_channel(CF, NET_VPN4)) + cf_error("For IPv4 L3VPN, both IPv4 and VPNv4 channels must be specified"); + + if (!!proto_cf_find_channel(CF, NET_IP6) != !!proto_cf_find_channel(CF, NET_VPN6)) + cf_error("For IPv6 L3VPN, both IPv6 and VPNv6 channels must be specified"); + + if (!proto_cf_find_channel(CF, NET_MPLS)) + cf_error("MPLS channel not specified"); + + if (!cf->rd) + cf_error("Route distinguisher not specified"); + + if (!cf->import_target && !cf->export_target) + cf_error("Route target not specified"); + + if (!cf->import_target) + cf_error("Import target not specified"); + + if (!cf->export_target) + cf_error("Export target not specified"); + } + + static struct proto * + l3vpn_init(struct proto_config *CF) + { ++ ASSERT_DIE(the_bird_locked()); ++ ++ /* Resolve registered BGP attribute classes once */ ++ static bool bgp_attributes_resolved = 0; ++ if (!bgp_attributes_resolved) ++ { ++ ea_bgp_next_hop = ea_class_find_by_name("bgp_next_hop"); ++ ea_bgp_ext_community = ea_class_find_by_name("bgp_ext_community"); ++ ea_bgp_mpls_label_stack = ea_class_find_by_name("bgp_mpls_label_stack"); ++ bgp_attributes_resolved = 1; ++ } ++ + struct proto *P = proto_new(CF); + struct l3vpn_proto *p = (void *) P; + // struct l3vpn_config *cf = (void *) CF; + + proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)); + proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)); + proto_configure_channel(P, &p->vpn4_channel, proto_cf_find_channel(CF, NET_VPN4)); + proto_configure_channel(P, &p->vpn6_channel, proto_cf_find_channel(CF, NET_VPN6)); + proto_configure_channel(P, &P->mpls_channel, proto_cf_find_channel(CF, NET_MPLS)); + + P->rt_notify = l3vpn_rt_notify; + P->preexport = l3vpn_preexport; + P->reload_routes = l3vpn_reload_routes; - P->rte_better = l3vpn_rte_better; + + return P; + } + + static int + l3vpn_start(struct proto *P) + { + struct l3vpn_proto *p = (void *) P; + struct l3vpn_config *cf = (void *) P->cf; + + p->rd = cf->rd; + p->import_target = cf->import_target; + p->export_target = cf->export_target; + + l3vpn_prepare_targets(p); + - proto_setup_mpls_map(P, RTS_L3VPN, 1); ++ proto_setup_mpls_map(P, RTS_L3VPN); + - if (P->vrf_set) - P->mpls_map->vrf_iface = P->vrf; ++ P->mpls_map->vrf_iface = P->vrf; + + return PS_UP; + } + + static int + l3vpn_shutdown(struct proto *P) + { + // struct l3vpn_proto *p = (void *) P; + - proto_shutdown_mpls_map(P, 1); ++ proto_shutdown_mpls_map(P); + + return PS_DOWN; + } + + static int + l3vpn_reconfigure(struct proto *P, struct proto_config *CF) + { + struct l3vpn_proto *p = (void *) P; + struct l3vpn_config *cf = (void *) CF; + + if (!proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)) || + !proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)) || + !proto_configure_channel(P, &p->vpn4_channel, proto_cf_find_channel(CF, NET_VPN4)) || + !proto_configure_channel(P, &p->vpn6_channel, proto_cf_find_channel(CF, NET_VPN6)) || + !proto_configure_channel(P, &P->mpls_channel, proto_cf_find_channel(CF, NET_MPLS))) + return 0; + + if ((p->rd != cf->rd) || + !same_tree(p->import_target, cf->import_target) || + !same_tree(p->export_target, cf->export_target)) + return 0; + + /* + if (!same_tree(p->import_target, cf->import_target)) + { + if (p->vpn4_channel && (p->vpn4_channel->channel_state == CS_UP)) + channel_request_feeding(p->vpn4_channel); + + if (p->vpn6_channel && (p->vpn6_channel->channel_state == CS_UP)) + channel_request_feeding(p->vpn6_channel); + } + + if (!same_tree(p->export_target, cf->export_target)) + { + if (p->ip4_channel && (p->ip4_channel->channel_state == CS_UP)) + channel_request_feeding(p->ip4_channel); + + if (p->ip6_channel && (p->ip6_channel->channel_state == CS_UP)) + channel_request_feeding(p->ip6_channel); + } + */ + - proto_setup_mpls_map(P, RTS_L3VPN, 1); ++ proto_setup_mpls_map(P, RTS_L3VPN); + + return 1; + } + + static void + l3vpn_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED) + { + /* Just a shallow copy, not many items here */ + } + + static void -l3vpn_get_route_info(rte *rte, byte *buf) ++l3vpn_get_route_info(const rte *rte, byte *buf) + { - u32 metric = l3vpn_metric(rte); ++ u32 metric = rt_get_igp_metric(rte); ++ u32 pref = rt_get_preference(rte); ++ + if (metric < IGP_METRIC_UNKNOWN) - bsprintf(buf, " (%u/%u)", rte->attrs->pref, metric); ++ bsprintf(buf, " (%u/%u)", pref, metric); + else - bsprintf(buf, " (%u/?)", rte->attrs->pref); ++ bsprintf(buf, " (%u/?)", pref); + } + ++struct rte_owner_class l3vpn_rte_owner_class = { ++ .get_route_info = l3vpn_get_route_info, ++ .rte_better = l3vpn_rte_better, ++}; + + struct protocol proto_l3vpn = { + .name = "L3VPN", + .template = "l3vpn%d", - .class = PROTOCOL_L3VPN, + .channel_mask = NB_IP | NB_VPN | NB_MPLS, + .proto_size = sizeof(struct l3vpn_proto), + .config_size = sizeof(struct l3vpn_config), ++ .startup = PROTOCOL_STARTUP_CONNECTOR, + .postconfig = l3vpn_postconfig, + .init = l3vpn_init, + .start = l3vpn_start, + .shutdown = l3vpn_shutdown, + .reconfigure = l3vpn_reconfigure, + .copy_config = l3vpn_copy_config, - .get_route_info = l3vpn_get_route_info + }; + + void + l3vpn_build(void) + { + proto_build(&proto_l3vpn); + }