--- /dev/null
- #define RTS_AGGREGATED 16 /* Aggregated route */
- #define RTS_MAX 17
+/*
+ * BIRD Internet Routing Daemon -- Routing data structures
+ *
+ * (c) 1998--2000 Martin Mares <mj@ucw.cz>
+ * (c) 2022 Maria Matejka <mq@jmq.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_LIB_ROUTE_H_
+#define _BIRD_LIB_ROUTE_H_
+
+#undef RT_SOURCE_DEBUG
+
+#include "lib/type.h"
+#include "lib/rcu.h"
+#include "lib/hash.h"
+#include "lib/event.h"
+
+struct network;
+struct proto;
+struct cli;
+struct rtable_private;
+struct rte_storage;
+
+#define RTE_IN_TABLE_WRITABLE \
+ byte pflags; /* Protocol-specific flags; may change in-table (!) */ \
+ u8 stale_cycle; /* Auxiliary value for route refresh; may change in-table (!) */ \
+
+typedef struct rte {
+ RTE_IN_TABLE_WRITABLE;
+ byte flags; /* Table-specific flags */
+ u8 generation; /* If this route import is based on other previously exported route,
+ this value should be 1 + MAX(generation of the parent routes).
+ Otherwise the route is independent and this value is zero. */
+ u32 id; /* Table specific route id */
+ struct ea_list *attrs; /* Attributes of this route */
+ const net_addr *net; /* Network this RTE belongs to */
+ struct rte_src *src; /* Route source that created the route */
+ struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */
+ btime lastmod; /* Last modified (set by table) */
+} rte;
+
+#define REF_FILTERED 2 /* Route is rejected by import filter */
+#define REF_PENDING 32 /* Route has not propagated completely yet */
+
+/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
+static inline int rte_is_valid(const rte *r) { return r && !(r->flags & REF_FILTERED); }
+
+/* Route just has REF_FILTERED flag */
+static inline int rte_is_filtered(const rte *r) { return !!(r->flags & REF_FILTERED); }
+
+/* Strip the route of the table-specific values */
+static inline rte rte_init_from(const rte *r)
+{
+ return (rte) {
+ .attrs = r->attrs,
+ .net = r->net,
+ .src = r->src,
+ };
+}
+
+int rte_same(const rte *, const rte *);
+
+struct rte_src {
+ struct rte_src *next; /* Hash chain */
+ struct rte_owner *owner; /* Route source owner */
+ u64 private_id; /* Private ID, assigned by the protocol */
+ u32 global_id; /* Globally unique ID of the source */
+ _Atomic u64 uc; /* Use count */
+};
+
+struct rte_owner_class {
+ void (*get_route_info)(const rte *, byte *buf); /* Get route information (for `show route' command) */
+ int (*rte_better)(const rte *, const rte *);
+ int (*rte_mergable)(const rte *, const rte *);
+ u32 (*rte_igp_metric)(const rte *);
+};
+
+struct rte_owner {
+ struct rte_owner_class *class;
+ int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte_storage *new, struct rte_storage *, struct rte_storage *);
+ HASH(struct rte_src) hash;
+ const char *name;
+ u32 hash_key;
+ u32 uc;
+ event_list *list;
+ event *prune;
+ event *stop;
+};
+
+DEFINE_DOMAIN(attrs);
+extern DOMAIN(attrs) attrs_domain;
+
+#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain)
+#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain)
+
+#define RTE_SRC_PU_SHIFT 44
+#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT)
+
+/* Get a route source. This also locks the source, therefore the caller has to
+ * unlock the source after the route has been propagated. */
+struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id);
+#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id))
+
+struct rte_src *rt_find_source_global(u32 id);
+
+#ifdef RT_SOURCE_DEBUG
+#define rt_lock_source _rt_lock_source_internal
+#define rt_unlock_source _rt_unlock_source_internal
+#endif
+
+static inline void rt_lock_source(struct rte_src *src)
+{
+ /* Locking a source is trivial; somebody already holds it so we just increase
+ * the use count. Nothing can be freed underneath our hands. */
+ u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel);
+ ASSERT_DIE(uc > 0);
+}
+
+static inline void rt_unlock_source(struct rte_src *src)
+{
+ /* Unlocking is tricky. We do it lockless so at the same time, the prune
+ * event may be running, therefore if the unlock gets us to zero, it must be
+ * the last thing in this routine, otherwise the prune routine may find the
+ * source's usecount zeroed, freeing it prematurely.
+ *
+ * The usecount is split into two parts:
+ * the top 20 bits are an in-progress indicator
+ * the bottom 44 bits keep the actual usecount.
+ *
+ * Therefore at most 1 million of writers can simultaneously unlock the same
+ * source, while at most ~17T different routes can reference it. Both limits
+ * are insanely high from the 2022 point of view. Let's suppose that when 17T
+ * routes or 1M writers get real, we get also 128bit atomic variables in the
+ * C norm. */
+
+ /* First, we push the in-progress indicator */
+ u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel);
+
+ /* Then we split the indicator to its parts. Remember, we got the value before the operation happened. */
+ u64 pending = (uc >> RTE_SRC_PU_SHIFT) + 1;
+ uc &= RTE_SRC_IN_PROGRESS - 1;
+
+ /* We per-use the RCU critical section indicator to make the prune event wait
+ * until we finish here in the rare case we get preempted. */
+ rcu_read_lock();
+
+ /* Obviously, there can't be more pending unlocks than the usecount itself */
+ if (uc == pending)
+ /* If we're the last unlocker, schedule the owner's prune event */
+ ev_send(src->owner->list, src->owner->prune);
+ else
+ ASSERT_DIE(uc > pending);
+
+ /* And now, finally, simultaneously pop the in-progress indicator and the
+ * usecount, possibly allowing the source pruning routine to free this structure */
+ atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel);
+
+ /* ... and to reduce the load a bit, the source pruning routine will better wait for
+ * RCU synchronization instead of a busy loop. */
+ rcu_read_unlock();
+}
+
+#ifdef RT_SOURCE_DEBUG
+#undef rt_lock_source
+#undef rt_unlock_source
+
+#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) )
+#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) )
+#endif
+
+void rt_init_sources(struct rte_owner *, const char *name, event_list *list);
+void rt_destroy_sources(struct rte_owner *, event *);
+
+void rt_dump_sources(struct rte_owner *);
+
+/*
+ * Route Attributes
+ *
+ * Beware: All standard BGP attributes must be represented here instead
+ * of making them local to the route. This is needed to ensure proper
+ * construction of BGP route attribute lists.
+ */
+
+/* Nexthop structure */
+struct nexthop {
+ ip_addr gw; /* Next hop */
+ struct iface *iface; /* Outgoing interface */
+ byte flags;
+ byte weight;
+ byte labels; /* Number of all labels */
+ u32 label[0];
+};
+
+/* For packing one into eattrs */
+struct nexthop_adata {
+ struct adata ad;
+ /* There is either a set of nexthops or a special destination (RTD_*) */
+ union {
+ struct nexthop nh;
+ uint dest;
+ };
+};
+
+#define NEXTHOP_DEST_SIZE (OFFSETOF(struct nexthop_adata, dest) + sizeof(uint) - OFFSETOF(struct adata, data))
+#define NEXTHOP_DEST_LITERAL(x) ((struct nexthop_adata) { \
+ .ad.length = NEXTHOP_DEST_SIZE, .dest = (x), })
+
+#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */
+
+
+#define RTS_STATIC 1 /* Normal static route */
+#define RTS_INHERIT 2 /* Route inherited from kernel */
+#define RTS_DEVICE 3 /* Device route */
+#define RTS_STATIC_DEVICE 4 /* Static device route */
+#define RTS_REDIRECT 5 /* Learned via redirect */
+#define RTS_RIP 6 /* RIP route */
+#define RTS_OSPF 7 /* OSPF route */
+#define RTS_OSPF_IA 8 /* OSPF inter-area route */
+#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */
+#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */
+#define RTS_BGP 11 /* BGP route */
+#define RTS_PIPE 12 /* Inter-table wormhole */
+#define RTS_BABEL 13 /* Babel route */
+#define RTS_RPKI 14 /* Route Origin Authorization */
+#define RTS_PERF 15 /* Perf checker */
- /* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */
- static inline int rte_is_reachable(rte *r)
++#define RTS_L3VPN 16 /* MPLS L3VPN */
++#define RTS_AGGREGATED 17 /* Aggregated route */
++#define RTS_MAX 18
+
+#define RTD_NONE 0 /* Undefined next hop */
+#define RTD_UNICAST 1 /* A standard next hop */
+#define RTD_BLACKHOLE 2 /* Silently drop packets */
+#define RTD_UNREACHABLE 3 /* Reject as unreachable */
+#define RTD_PROHIBIT 4 /* Administratively prohibited */
+#define RTD_MAX 5
+
+extern const char * rta_dest_names[RTD_MAX];
+
+static inline const char *rta_dest_name(uint n)
+{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; }
+
+
+/*
+ * Extended Route Attributes
+ */
+
+typedef struct eattr {
+ word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */
+ byte flags; /* Protocol-dependent flags */
+ byte type; /* Attribute type */
+ byte rfu:5;
+ byte originated:1; /* The attribute has originated locally */
+ byte fresh:1; /* An uncached attribute (e.g. modified in export filter) */
+ byte undef:1; /* Explicitly undefined */
+
+ PADDING(unused, 3, 3);
+
+ union bval u;
+} eattr;
+
+
+#define EA_CODE_MASK 0xffff
+#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */
+#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */
+#define EA_BIT_GET(ea) ((ea) >> 24)
+
+typedef struct ea_list {
+ struct ea_list *next; /* In case we have an override list */
+ byte flags; /* Flags: EALF_... */
+ byte rfu;
+ word count; /* Number of attributes */
+ eattr attrs[0]; /* Attribute definitions themselves */
+} ea_list;
+
+struct ea_storage {
+ struct ea_storage *next_hash; /* Next in hash chain */
+ struct ea_storage **pprev_hash; /* Previous in hash chain */
+ _Atomic u32 uc; /* Use count */
+ u32 hash_key; /* List hash */
+ ea_list l[0]; /* The list itself */
+};
+
+#define EALF_SORTED 1 /* Attributes are sorted by code */
+#define EALF_BISECT 2 /* Use interval bisection for searching */
+#define EALF_CACHED 4 /* List is cached */
+#define EALF_HUGE 8 /* List is too big to fit into slab */
+
+struct ea_class {
+#define EA_CLASS_INSIDE \
+ const char *name; /* Name (both print and filter) */ \
+ struct symbol *sym; /* Symbol to export to configs */ \
+ uint id; /* Autoassigned attribute ID */ \
+ uint uc; /* Reference count */ \
+ btype type; /* Data type ID */ \
+ uint readonly:1; /* This attribute can't be changed by filters */ \
+ uint conf:1; /* Requested by config */ \
+ uint hidden:1; /* Technical attribute, do not show, do not expose to filters */ \
+ void (*format)(const eattr *ea, byte *buf, uint size); \
+ void (*stored)(const eattr *ea); /* When stored into global hash */ \
+ void (*freed)(const eattr *ea); /* When released from global hash */ \
+
+ EA_CLASS_INSIDE;
+};
+
+struct ea_class_ref {
+ resource r;
+ struct ea_class *class;
+};
+
+void ea_register_init(struct ea_class *);
+struct ea_class_ref *ea_register_alloc(pool *, struct ea_class);
+struct ea_class_ref *ea_ref_class(pool *, struct ea_class *); /* Reference for an attribute alias */
+
+#define EA_REGISTER_ALL_HELPER(x) ea_register_init(x);
+#define EA_REGISTER_ALL(...) MACRO_FOREACH(EA_REGISTER_ALL_HELPER, __VA_ARGS__)
+
+struct ea_class *ea_class_find_by_id(uint id);
+struct ea_class *ea_class_find_by_name(const char *name);
+static inline struct ea_class *ea_class_self(struct ea_class *self) { return self; }
+#define ea_class_find(_arg) _Generic((_arg), \
+ uint: ea_class_find_by_id, \
+ word: ea_class_find_by_id, \
+ char *: ea_class_find_by_name, \
+ const char *: ea_class_find_by_name, \
+ struct ea_class *: ea_class_self)(_arg)
+
+struct ea_walk_state {
+ ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */
+ eattr *ea; /* Current eattr, initially NULL */
+ u32 visited[4]; /* Bitfield, limiting max to 128 */
+};
+
+#define ea_find(_l, _arg) _Generic((_arg), uint: ea_find_by_id, struct ea_class *: ea_find_by_class, char *: ea_find_by_name)(_l, _arg)
+eattr *ea_find_by_id(ea_list *, unsigned ea);
+static inline eattr *ea_find_by_class(ea_list *l, const struct ea_class *def)
+{ return ea_find_by_id(l, def->id); }
+static inline eattr *ea_find_by_name(ea_list *l, const char *name)
+{
+ const struct ea_class *def = ea_class_find_by_name(name);
+ return def ? ea_find_by_class(l, def) : NULL;
+}
+
+#define ea_get_int(_l, _ident, _def) ({ \
+ struct ea_class *cls = ea_class_find((_ident)); \
+ ASSERT_DIE(cls->type & EAF_EMBEDDED); \
+ const eattr *ea = ea_find((_l), cls->id); \
+ (ea ? ea->u.data : (_def)); \
+ })
+
+#define ea_get_ip(_l, _ident, _def) ({ \
+ struct ea_class *cls = ea_class_find((_ident)); \
+ ASSERT_DIE(cls->type == T_IP); \
+ const eattr *ea = ea_find((_l), cls->id); \
+ (ea ? *((const ip_addr *) ea->u.ptr->data) : (_def)); \
+ })
+
++#define ea_get_adata(_l, _ident) ({ \
++ struct ea_class *cls = ea_class_find((_ident)); \
++ ASSERT_DIE(!(cls->type & EAF_EMBEDDED)); \
++ const eattr *ea = ea_find((_l), cls->id); \
++ (ea ? ea->u.ptr : &null_adata); \
++ })
++
+eattr *ea_walk(struct ea_walk_state *s, uint id, uint max);
+void ea_dump(ea_list *);
+int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */
+uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */
+ea_list *ea_append(ea_list *to, ea_list *what);
+void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max);
+
+/* Normalize ea_list; allocates the result from tmp_linpool */
+ea_list *ea_normalize(ea_list *e, int overlay);
+
+uint ea_list_size(ea_list *);
+void ea_list_copy(ea_list *dest, ea_list *src, uint size);
+
+#define EA_LOCAL_LIST(N) struct { ea_list l; eattr a[N]; }
+
+#define EA_LITERAL_EMBEDDED(_class, _flags, _val) ({ \
+ btype _type = (_class)->type; \
+ ASSERT_DIE(_type & EAF_EMBEDDED); \
+ EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.i = _val); \
+ })
+
+#define EA_LITERAL_STORE_ADATA(_class, _flags, _buf, _len) ({ \
+ btype _type = (_class)->type; \
+ ASSERT_DIE(!(_type & EAF_EMBEDDED)); \
+ EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = tmp_store_adata((_buf), (_len))); \
+ })
+
+#define EA_LITERAL_DIRECT_ADATA(_class, _flags, _adata) ({ \
+ btype _type = (_class)->type; \
+ ASSERT_DIE(!(_type & EAF_EMBEDDED)); \
+ EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = _adata); \
+ })
+
+#define EA_LITERAL_GENERIC(_id, _type, _flags, ...) \
+ ((eattr) { .id = _id, .type = _type, .flags = _flags, __VA_ARGS__ })
+
+static inline eattr *
+ea_set_attr(ea_list **to, eattr a)
+{
+ EA_LOCAL_LIST(1) *ea = tmp_alloc(sizeof(*ea));
+ *ea = (typeof(*ea)) {
+ .l.flags = EALF_SORTED,
+ .l.count = 1,
+ .l.next = *to,
+ .a[0] = a,
+ };
+
+ *to = &ea->l;
+ return &ea->a[0];
+}
+
+static inline void
+ea_unset_attr(ea_list **to, _Bool local, const struct ea_class *def)
+{
+ ea_set_attr(to, EA_LITERAL_GENERIC(def->id, 0, 0,
+ .fresh = local, .originated = local, .undef = 1));
+}
+
+static inline void
+ea_set_attr_u32(ea_list **to, const struct ea_class *def, uint flags, u64 data)
+{ ea_set_attr(to, EA_LITERAL_EMBEDDED(def, flags, data)); }
+
+static inline void
+ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, const void *data, uint len)
+{ ea_set_attr(to, EA_LITERAL_STORE_ADATA(def, flags, data, len)); }
+
+static inline void
+ea_copy_attr(ea_list **to, ea_list *from, const struct ea_class *def)
+{
+ eattr *e = ea_find_by_class(from, def);
+ if (e)
+ if (e->type & EAF_EMBEDDED)
+ ea_set_attr_u32(to, def, e->flags, e->u.data);
+ else
+ ea_set_attr_data(to, def, e->flags, e->u.ptr->data, e->u.ptr->length);
+ else
+ ea_unset_attr(to, 0, def);
+}
+
+/*
+ * Common route attributes
+ */
+
+/* Preference: first-order comparison */
+extern struct ea_class ea_gen_preference;
+static inline u32 rt_get_preference(const rte *rt)
+{ return ea_get_int(rt->attrs, &ea_gen_preference, 0); }
+
+/* IGP metric: second-order comparison */
+extern struct ea_class ea_gen_igp_metric;
+u32 rt_get_igp_metric(const rte *rt);
+#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other
+ protocol-specific metric is availabe */
+
+/* From: Advertising router */
+extern struct ea_class ea_gen_from;
+
+
+/* MPLS Label, Policy and Class */
+extern struct ea_class ea_gen_mpls_label,
+ ea_gen_mpls_policy, ea_gen_mpls_class;
+
+
+/* Source: An old method to devise the route source protocol and kind.
+ * To be superseded in a near future by something more informative. */
+extern struct ea_class ea_gen_source;
+static inline u32 rt_get_source_attr(const rte *rt)
+{ return ea_get_int(rt->attrs, &ea_gen_source, 0); }
+
+/* Flowspec validation result */
+enum flowspec_valid {
+ FLOWSPEC_UNKNOWN = 0,
+ FLOWSPEC_VALID = 1,
+ FLOWSPEC_INVALID = 2,
+ FLOWSPEC__MAX,
+};
+
+extern const char * flowspec_valid_names[FLOWSPEC__MAX];
+static inline const char *flowspec_valid_name(enum flowspec_valid v)
+{ return (v < FLOWSPEC__MAX) ? flowspec_valid_names[v] : "???"; }
+
+extern struct ea_class ea_gen_flowspec_valid;
+static inline enum flowspec_valid rt_get_flowspec_valid(const rte *rt)
+{ return ea_get_int(rt->attrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN); }
+
+/* Next hop: For now, stored as adata */
+extern struct ea_class ea_gen_nexthop;
+
+static inline void ea_set_dest(struct ea_list **to, uint flags, uint dest)
+{
+ struct nexthop_adata nhad = NEXTHOP_DEST_LITERAL(dest);
+ ea_set_attr_data(to, &ea_gen_nexthop, flags, &nhad.ad.data, nhad.ad.length);
+}
+
+/* Next hop structures */
+
+#define NEXTHOP_ALIGNMENT (_Alignof(struct nexthop))
+#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK)
+#define NEXTHOP_SIZE(_nh) NEXTHOP_SIZE_CNT(((_nh)->labels))
+#define NEXTHOP_SIZE_CNT(cnt) BIRD_ALIGN((sizeof(struct nexthop) + sizeof(u32) * (cnt)), NEXTHOP_ALIGNMENT)
+#define nexthop_size(nh) NEXTHOP_SIZE((nh))
+
+#define NEXTHOP_NEXT(_nh) ((void *) (_nh) + NEXTHOP_SIZE(_nh))
+#define NEXTHOP_END(_nhad) ((_nhad)->ad.data + (_nhad)->ad.length)
+#define NEXTHOP_VALID(_nh, _nhad) ((void *) (_nh) < (void *) NEXTHOP_END(_nhad))
+#define NEXTHOP_ONE(_nhad) (NEXTHOP_NEXT(&(_nhad)->nh) == NEXTHOP_END(_nhad))
+
+#define NEXTHOP_WALK(_iter, _nhad) for ( \
+ struct nexthop *_iter = &(_nhad)->nh; \
+ (void *) _iter < (void *) NEXTHOP_END(_nhad); \
+ _iter = NEXTHOP_NEXT(_iter))
+
+
+static inline int nexthop_same(struct nexthop_adata *x, struct nexthop_adata *y)
+{ return adata_same(&x->ad, &y->ad); }
+struct nexthop_adata *nexthop_merge(struct nexthop_adata *x, struct nexthop_adata *y, int max, linpool *lp);
+struct nexthop_adata *nexthop_sort(struct nexthop_adata *x, linpool *lp);
+int nexthop_is_sorted(struct nexthop_adata *x);
+
+#define NEXTHOP_IS_REACHABLE(nhad) ((nhad)->ad.length > NEXTHOP_DEST_SIZE)
+
- if (!nhea)
- return 0;
++static inline struct nexthop_adata *
++rte_get_nexthops(rte *r)
+{
+ eattr *nhea = ea_find(r->attrs, &ea_gen_nexthop);
- struct nexthop_adata *nhad = (void *) nhea->u.ptr;
- return NEXTHOP_IS_REACHABLE(nhad);
++ return nhea ? SKIP_BACK(struct nexthop_adata, ad, nhea->u.ptr) : NULL;
++}
+
++/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */
++static inline int rte_is_reachable(rte *r)
++{
++ struct nexthop_adata *nhad = rte_get_nexthops(r);
++ return nhad && NEXTHOP_IS_REACHABLE(nhad);
+}
+
+static inline int nhea_dest(eattr *nhea)
+{
+ if (!nhea)
+ return RTD_NONE;
+
+ struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL;
+ if (NEXTHOP_IS_REACHABLE(nhad))
+ return RTD_UNICAST;
+ else
+ return nhad->dest;
+}
+
+static inline int rte_dest(const rte *r)
+{
+ return nhea_dest(ea_find(r->attrs, &ea_gen_nexthop));
+}
+
+void rta_init(void);
+ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */
+static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; }
+static inline struct ea_storage *ea_get_storage(ea_list *r)
+{
+ ASSERT_DIE(ea_is_cached(r));
+ return SKIP_BACK(struct ea_storage, l[0], r);
+}
+
+static inline ea_list *ea_clone(ea_list *r) {
+ ASSERT_DIE(0 < atomic_fetch_add_explicit(&ea_get_storage(r)->uc, 1, memory_order_acq_rel));
+ return r;
+}
+void ea__free(struct ea_storage *r);
+static inline void ea_free(ea_list *l) {
+ if (!l) return;
+ struct ea_storage *r = ea_get_storage(l);
+ if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) ea__free(r);
+}
+
+void ea_dump(ea_list *);
+void ea_dump_all(void);
+void ea_show_list(struct cli *, ea_list *);
+
+#define rta_lookup ea_lookup
+#define rta_is_cached ea_is_cached
+#define rta_clone ea_clone
+#define rta_free ea_free
+
+#endif
--- /dev/null
-#include "proto/bgp/bgp.h"
+ /*
+ * BIRD -- BGP/MPLS IP Virtual Private Networks (L3VPN)
+ *
+ * (c) 2022 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2022 CZ.NIC z.s.p.o.
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+ /**
+ * DOC: L3VPN
+ *
+ * The L3VPN protocol implements RFC 4364 BGP/MPLS VPNs using MPLS backbone.
+ * It works similarly to pipe. It connects IP table (one per VRF) with (global)
+ * VPN table. Routes passed from VPN table to IP table are stripped of RD and
+ * filtered by import targets, routes passed in the other direction are extended
+ * with RD, MPLS labels and export targets in extended communities. Separate
+ * MPLS channel is used to announce MPLS routes for the labels.
+ *
+ * Note that in contrast to the pipe protocol, L3VPN protocol has both IPv4 and
+ * IPv6 channels in one instance, Also both IP and VPN channels are presented to
+ * users as separate channels, although that will change in the future.
+ *
+ * The L3VPN protocol has different default preferences on IP and VPN sides.
+ * The reason is that in import direction (VPN->IP) routes should have lower
+ * preferences that ones received from local CE (perhaps by EBGP), while in
+ * export direction (IP->VPN) routes should have higher preferences that ones
+ * received from remote PEs (by IBGP).
+ *
+ * Supported standards:
+ * RFC 4364 - BGP/MPLS IP Virtual Private Networks (L3VPN)
+ */
+
+ #undef LOCAL_DEBUG
+
+ #include "nest/bird.h"
+ #include "nest/iface.h"
+ #include "nest/protocol.h"
+ #include "nest/route.h"
+ #include "nest/mpls.h"
+ #include "nest/cli.h"
+ #include "conf/conf.h"
+ #include "filter/filter.h"
+ #include "filter/data.h"
+ #include "lib/string.h"
+
+ #include "l3vpn.h"
+
-#define EA_BGP_NEXT_HOP EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)
-#define EA_BGP_EXT_COMMUNITY EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)
-#define EA_BGP_MPLS_LABEL_STACK EA_CODE(PROTOCOL_BGP, BA_MPLS_LABEL_STACK)
-
-static inline const struct adata * ea_get_adata(ea_list *e, uint id)
-{ eattr *a = ea_find(e, id); return a ? a->u.ptr : &null_adata; }
++#include "proto/pipe/pipe.h"
++
++#include <stdbool.h>
+
+ /*
+ * TODO:
+ * - import/export target reconfiguration
+ * - check for simple nodes in export route
+ * - replace pair of channels with shared channel for one address family
+ * - improve route comparisons in VRFs
+ * - optional import/export target all
+ * - optional support for route origins
+ * - optional automatic assignment of RDs
+ * - MPLS-in-IP encapsulation
+ */
+
-mpls_valid_nexthop(const rta *a)
++static struct ea_class *ea_bgp_next_hop,
++ *ea_bgp_ext_community,
++ *ea_bgp_mpls_label_stack;
+
+ static inline int
- if (a->dest != RTD_UNICAST)
++mpls_valid_nexthop(struct nexthop_adata *nhad)
+ {
+ /* MPLS does not support special blackhole targets */
- for (const struct nexthop *nh = &a->nh; nh ; nh = nh->next)
++ if (!NEXTHOP_IS_REACHABLE(nhad))
+ return 0;
+
+ /* MPLS does not support ARP / neighbor discovery */
-l3vpn_rt_notify(struct proto *P, struct channel *c0, net *net, rte *new, rte *old UNUSED)
++ NEXTHOP_WALK(nh, nhad)
+ if (ipa_zero(nh->gw) && (nh->iface->flags & IF_MULTIACCESS))
+ return 0;
+
+ return 1;
+ }
+
+ static int
+ l3vpn_import_targets(struct l3vpn_proto *p, const struct adata *list)
+ {
+ return (p->import_target_one) ?
+ ec_set_contains(list, p->import_target->from.val.ec) :
+ eclist_match_set(list, p->import_target);
+ }
+
+ static struct adata *
+ l3vpn_export_targets(struct l3vpn_proto *p, const struct adata *src)
+ {
+ u32 *s = int_set_get_data(src);
+ int len = int_set_get_size(src);
+
+ struct adata *dst = lp_alloc(tmp_linpool, sizeof(struct adata) + (len + p->export_target_length) * sizeof(u32));
+ u32 *d = int_set_get_data(dst);
+ int end = 0;
+
+ for (int i = 0; i < len; i += 2)
+ {
+ /* Remove existing route targets */
+ uint type = s[i] >> 16;
+ if (ec_type_is_rt(type))
+ continue;
+
+ d[end++] = s[i];
+ d[end++] = s[i+1];
+ }
+
+ /* Add new route targets */
+ memcpy(d + end, p->export_target_data, p->export_target_length * sizeof(u32));
+ end += p->export_target_length;
+
+ /* Set length */
+ dst->length = end * sizeof(u32);
+
+ return dst;
+ }
+
+ static void
+ l3vpn_add_ec(const struct f_tree *t, void *P)
+ {
+ struct l3vpn_proto *p = P;
+ ec_put(p->export_target_data, p->export_target_length, t->from.val.ec);
+ p->export_target_length += 2;
+ }
+
+ static void
+ l3vpn_prepare_targets(struct l3vpn_proto *p)
+ {
+ const struct f_tree *t = p->import_target;
+ p->import_target_one = !t->left && !t->right && (t->from.val.ec == t->to.val.ec);
+
+ uint len = 2 * tree_node_count(p->export_target);
+ p->export_target_data = mb_alloc(p->p.pool, len * sizeof(u32));
+ p->export_target_length = 0;
+ tree_walk(p->export_target, l3vpn_add_ec, p);
+ ASSERT(p->export_target_length == len);
+ }
+
+ /* Convert 64-bit RD to 32bit source ID, unfortunately it has collisions */
+ static inline struct rte_src * l3vpn_get_source(struct l3vpn_proto *p, u64 rd)
+ { return rt_get_source(&p->p, (u32)(rd >> 32) ^ u32_hash(rd)); }
+ //{ return p->p.main_source; }
+
+ static void
- const net_addr *n0 = net->n.addr;
++l3vpn_rt_notify(struct proto *P, struct channel *c0, const net_addr *n0, rte *new, const rte *old UNUSED)
+ {
+ struct l3vpn_proto *p = (void *) P;
+ struct rte_src *src = NULL;
+ struct channel *dst = NULL;
+ int export;
+
- const rta *a0 = new->attrs;
- rta *a = alloca(RTA_MAX_SIZE);
- *a = (rta) {
- .source = RTS_L3VPN,
- .scope = SCOPE_UNIVERSE,
- .dest = a0->dest,
- .pref = dst->preference,
- .eattrs = a0->eattrs
- };
+ net_addr *n = alloca(sizeof(net_addr_vpn6));
+
+ switch (c0->net_type)
+ {
+ case NET_IP4:
+ net_fill_vpn4(n, net4_prefix(n0), net4_pxlen(n0), p->rd);
+ src = p->p.main_source;
+ dst = p->vpn4_channel;
+ export = 1;
+ break;
+
+ case NET_IP6:
+ net_fill_vpn6(n, net6_prefix(n0), net6_pxlen(n0), p->rd);
+ src = p->p.main_source;
+ dst = p->vpn6_channel;
+ export = 1;
+ break;
+
+ case NET_VPN4:
+ net_fill_ip4(n, net4_prefix(n0), net4_pxlen(n0));
+ src = l3vpn_get_source(p, ((const net_addr_vpn4 *) n0)->rd);
+ dst = p->ip4_channel;
+ export = 0;
+ break;
+
+ case NET_VPN6:
+ net_fill_ip6(n, net6_prefix(n0), net6_pxlen(n0));
+ src = l3vpn_get_source(p, ((const net_addr_vpn6 *) n0)->rd);
+ dst = p->ip6_channel;
+ export = 0;
+ break;
+
+ case NET_MPLS:
+ return;
+ }
+
++ const struct adata *ecad = ea_get_adata(new->attrs, ea_bgp_ext_community);
++ struct nexthop_adata *nhad_orig = rte_get_nexthops(new);
++
+ if (new)
+ {
- nexthop_link(a, &a0->nh);
++ new->src = src;
+
- ea_unset_attr(&a->eattrs, tmp_linpool, 0, EA_MPLS_LABEL);
- ea_unset_attr(&a->eattrs, tmp_linpool, 0, EA_MPLS_POLICY);
++ ea_set_attr_u32(&new->attrs, &ea_gen_source, 0, RTS_L3VPN);
++ ea_set_attr_u32(&new->attrs, &ea_gen_preference, 0, dst->preference);
+
+ /* Do not keep original labels, we may assign new ones */
- ea_unset_attr(&a->eattrs, tmp_linpool, 0, EA_BGP_NEXT_HOP);
- ea_unset_attr(&a->eattrs, tmp_linpool, 0, EA_BGP_MPLS_LABEL_STACK);
++ ea_unset_attr(&new->attrs, 0, &ea_gen_mpls_label);
++ ea_unset_attr(&new->attrs, 0, &ea_gen_mpls_policy);
+
+ /* We are crossing VRF boundary, NEXT_HOP is no longer valid */
- ea_set_attr_u32(&a->eattrs, tmp_linpool, EA_MPLS_POLICY, 0, EAF_TYPE_INT, mc->label_policy);
++ ea_unset_attr(&new->attrs, 0, ea_bgp_next_hop);
++ ea_unset_attr(&new->attrs, 0, ea_bgp_mpls_label_stack);
++
++ /* Hostentry also validn't */
++ ea_unset_attr(&new->attrs, 0, &ea_gen_hostentry);
+
+ if (export)
+ {
+ struct mpls_channel *mc = (void *) p->p.mpls_channel;
- struct adata *ad = l3vpn_export_targets(p, ea_get_adata(a0->eattrs, EA_BGP_EXT_COMMUNITY));
- ea_set_attr_ptr(&a->eattrs, tmp_linpool, EA_BGP_EXT_COMMUNITY, 0, EAF_TYPE_EC_SET, ad);
++ ea_set_attr_u32(&new->attrs, &ea_gen_mpls_policy, 0, mc->label_policy);
+
- if (!mpls_valid_nexthop(a) && p->p.vrf)
++ ea_set_attr(&new->attrs, EA_LITERAL_DIRECT_ADATA(
++ ea_bgp_ext_community, 0, l3vpn_export_targets(p, ecad)));
+
+ /* Replace MPLS-incompatible nexthop with lookup in VRF table */
- a->dest = RTD_UNICAST;
- a->nh = (struct nexthop) { .iface = p->p.vrf };
++ if (!nhad_orig || !mpls_valid_nexthop(nhad_orig) && p->p.vrf)
+ {
- }
++ struct nexthop_adata nhad = {
++ .nh.iface = p->p.vrf,
++ .ad.length = sizeof nhad - sizeof nhad.ad,
++ };
++ ea_set_attr_data(&new->attrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length);
+ }
- /* Keep original IGP metric as a base for L3VPN metric */
- if (!export)
- a->igp_metric = a0->igp_metric;
+
- rte *e = rte_get_temp(a, src);
- rte_update2(dst, n, e, src);
++ /* Drop original IGP metric on export;
++ * kept on import as a base for L3VPN metric */
++ ea_unset_attr(&new->attrs, 0, &ea_gen_igp_metric);
++ }
+
- rte_update2(dst, n, NULL, src);
++ rte_update(dst, n, new, src);
+ }
+ else
+ {
- struct proto *pp = e->sender->proto;
++ rte_update(dst, n, NULL, src);
+ }
+ }
+
+
+ static int
+ l3vpn_preexport(struct channel *C, rte *e)
+ {
+ struct l3vpn_proto *p = (void *) C->proto;
- if (pp == C->proto)
+
- return l3vpn_import_targets(p, ea_get_adata(e->attrs->eattrs, EA_BGP_EXT_COMMUNITY)) ? 0 : -1;
++ if (&C->in_req == e->sender->req)
+ return -1; /* Avoid local loops automatically */
+
+ switch (C->net_type)
+ {
+ case NET_IP4:
+ case NET_IP6:
+ return 0;
+
+ case NET_VPN4:
+ case NET_VPN6:
-static void
-l3vpn_reload_routes(struct channel *C)
++ return l3vpn_import_targets(p, ea_get_adata(e->attrs, ea_bgp_ext_community)) ? 0 : -1;
+
+ case NET_MPLS:
+ return -1;
+
+ default:
+ bug("invalid type");
+ }
+ }
+
- channel_request_feeding(p->vpn4_channel);
++/* TODO: unify the code between l3vpn and pipe */
++void pipe_import_by_refeed_free(struct channel_feeding_request *cfr);
++
++static int
++l3vpn_reload_routes(struct channel *C, struct channel_import_request *cir)
+ {
+ struct l3vpn_proto *p = (void *) C->proto;
++ struct channel *feed = NULL;
+
+ /* Route reload on one channel is just refeed on the other */
+ switch (C->net_type)
+ {
+ case NET_IP4:
- channel_request_feeding(p->vpn6_channel);
++ feed = p->vpn4_channel;
+ break;
+
+ case NET_IP6:
- channel_request_feeding(p->ip4_channel);
++ feed = p->vpn6_channel;
+ break;
+
+ case NET_VPN4:
- channel_request_feeding(p->ip6_channel);
++ feed = p->ip4_channel;
+ break;
+
+ case NET_VPN6:
- break;
++ feed = p->ip6_channel;
+ break;
+
+ case NET_MPLS:
+ /* FIXME */
-}
++ return 1;
+ }
-static inline u32
-l3vpn_metric(rte *e)
-{
- u32 metric = ea_get_int(e->attrs->eattrs, EA_GEN_IGP_METRIC, e->attrs->igp_metric);
- return MIN(metric, IGP_METRIC_UNKNOWN);
+
-l3vpn_rte_better(rte *new, rte *old)
++ if (cir->trie)
++ {
++ struct import_to_export_reload *reload = lp_alloc(cir->trie->lp, sizeof *reload);
++ *reload = (struct import_to_export_reload) {
++ .cir = cir,
++ .cfr = {
++ .type = CFRT_AUXILIARY,
++ .done = pipe_import_by_refeed_free,
++ .trie = cir->trie,
++ },
++ };
++ channel_request_feeding(feed, &reload->cfr);
++ }
++ else
++ {
++ /* Route reload on one channel is just refeed on the other */
++ channel_request_feeding_dynamic(feed, CFRT_DIRECT);
++ cir->done(cir);
++ }
++
++ return 1;
+ }
+
+ static int
- return l3vpn_metric(new) < l3vpn_metric(old);
++l3vpn_rte_better(const rte *new, const rte *old)
+ {
+ /* This is hack, we should have full BGP-style comparison */
- P->rte_better = l3vpn_rte_better;
++ return rt_get_igp_metric(new) < rt_get_igp_metric(old);
+ }
+
+ static void
+ l3vpn_postconfig(struct proto_config *CF)
+ {
+ struct l3vpn_config *cf = (void *) CF;
+
+ if (!!proto_cf_find_channel(CF, NET_IP4) != !!proto_cf_find_channel(CF, NET_VPN4))
+ cf_error("For IPv4 L3VPN, both IPv4 and VPNv4 channels must be specified");
+
+ if (!!proto_cf_find_channel(CF, NET_IP6) != !!proto_cf_find_channel(CF, NET_VPN6))
+ cf_error("For IPv6 L3VPN, both IPv6 and VPNv6 channels must be specified");
+
+ if (!proto_cf_find_channel(CF, NET_MPLS))
+ cf_error("MPLS channel not specified");
+
+ if (!cf->rd)
+ cf_error("Route distinguisher not specified");
+
+ if (!cf->import_target && !cf->export_target)
+ cf_error("Route target not specified");
+
+ if (!cf->import_target)
+ cf_error("Import target not specified");
+
+ if (!cf->export_target)
+ cf_error("Export target not specified");
+ }
+
+ static struct proto *
+ l3vpn_init(struct proto_config *CF)
+ {
++ ASSERT_DIE(the_bird_locked());
++
++ /* Resolve registered BGP attribute classes once */
++ static bool bgp_attributes_resolved = 0;
++ if (!bgp_attributes_resolved)
++ {
++ ea_bgp_next_hop = ea_class_find_by_name("bgp_next_hop");
++ ea_bgp_ext_community = ea_class_find_by_name("bgp_ext_community");
++ ea_bgp_mpls_label_stack = ea_class_find_by_name("bgp_mpls_label_stack");
++ bgp_attributes_resolved = 1;
++ }
++
+ struct proto *P = proto_new(CF);
+ struct l3vpn_proto *p = (void *) P;
+ // struct l3vpn_config *cf = (void *) CF;
+
+ proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4));
+ proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6));
+ proto_configure_channel(P, &p->vpn4_channel, proto_cf_find_channel(CF, NET_VPN4));
+ proto_configure_channel(P, &p->vpn6_channel, proto_cf_find_channel(CF, NET_VPN6));
+ proto_configure_channel(P, &P->mpls_channel, proto_cf_find_channel(CF, NET_MPLS));
+
+ P->rt_notify = l3vpn_rt_notify;
+ P->preexport = l3vpn_preexport;
+ P->reload_routes = l3vpn_reload_routes;
- proto_setup_mpls_map(P, RTS_L3VPN, 1);
+
+ return P;
+ }
+
+ static int
+ l3vpn_start(struct proto *P)
+ {
+ struct l3vpn_proto *p = (void *) P;
+ struct l3vpn_config *cf = (void *) P->cf;
+
+ p->rd = cf->rd;
+ p->import_target = cf->import_target;
+ p->export_target = cf->export_target;
+
+ l3vpn_prepare_targets(p);
+
- if (P->vrf_set)
- P->mpls_map->vrf_iface = P->vrf;
++ proto_setup_mpls_map(P, RTS_L3VPN);
+
- proto_shutdown_mpls_map(P, 1);
++ P->mpls_map->vrf_iface = P->vrf;
+
+ return PS_UP;
+ }
+
+ static int
+ l3vpn_shutdown(struct proto *P)
+ {
+ // struct l3vpn_proto *p = (void *) P;
+
- proto_setup_mpls_map(P, RTS_L3VPN, 1);
++ proto_shutdown_mpls_map(P);
+
+ return PS_DOWN;
+ }
+
+ static int
+ l3vpn_reconfigure(struct proto *P, struct proto_config *CF)
+ {
+ struct l3vpn_proto *p = (void *) P;
+ struct l3vpn_config *cf = (void *) CF;
+
+ if (!proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)) ||
+ !proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)) ||
+ !proto_configure_channel(P, &p->vpn4_channel, proto_cf_find_channel(CF, NET_VPN4)) ||
+ !proto_configure_channel(P, &p->vpn6_channel, proto_cf_find_channel(CF, NET_VPN6)) ||
+ !proto_configure_channel(P, &P->mpls_channel, proto_cf_find_channel(CF, NET_MPLS)))
+ return 0;
+
+ if ((p->rd != cf->rd) ||
+ !same_tree(p->import_target, cf->import_target) ||
+ !same_tree(p->export_target, cf->export_target))
+ return 0;
+
+ /*
+ if (!same_tree(p->import_target, cf->import_target))
+ {
+ if (p->vpn4_channel && (p->vpn4_channel->channel_state == CS_UP))
+ channel_request_feeding(p->vpn4_channel);
+
+ if (p->vpn6_channel && (p->vpn6_channel->channel_state == CS_UP))
+ channel_request_feeding(p->vpn6_channel);
+ }
+
+ if (!same_tree(p->export_target, cf->export_target))
+ {
+ if (p->ip4_channel && (p->ip4_channel->channel_state == CS_UP))
+ channel_request_feeding(p->ip4_channel);
+
+ if (p->ip6_channel && (p->ip6_channel->channel_state == CS_UP))
+ channel_request_feeding(p->ip6_channel);
+ }
+ */
+
-l3vpn_get_route_info(rte *rte, byte *buf)
++ proto_setup_mpls_map(P, RTS_L3VPN);
+
+ return 1;
+ }
+
+ static void
+ l3vpn_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
+ {
+ /* Just a shallow copy, not many items here */
+ }
+
+ static void
- u32 metric = l3vpn_metric(rte);
++l3vpn_get_route_info(const rte *rte, byte *buf)
+ {
- bsprintf(buf, " (%u/%u)", rte->attrs->pref, metric);
++ u32 metric = rt_get_igp_metric(rte);
++ u32 pref = rt_get_preference(rte);
++
+ if (metric < IGP_METRIC_UNKNOWN)
- bsprintf(buf, " (%u/?)", rte->attrs->pref);
++ bsprintf(buf, " (%u/%u)", pref, metric);
+ else
- .class = PROTOCOL_L3VPN,
++ bsprintf(buf, " (%u/?)", pref);
+ }
+
++struct rte_owner_class l3vpn_rte_owner_class = {
++ .get_route_info = l3vpn_get_route_info,
++ .rte_better = l3vpn_rte_better,
++};
+
+ struct protocol proto_l3vpn = {
+ .name = "L3VPN",
+ .template = "l3vpn%d",
- .get_route_info = l3vpn_get_route_info
+ .channel_mask = NB_IP | NB_VPN | NB_MPLS,
+ .proto_size = sizeof(struct l3vpn_proto),
+ .config_size = sizeof(struct l3vpn_config),
++ .startup = PROTOCOL_STARTUP_CONNECTOR,
+ .postconfig = l3vpn_postconfig,
+ .init = l3vpn_init,
+ .start = l3vpn_start,
+ .shutdown = l3vpn_shutdown,
+ .reconfigure = l3vpn_reconfigure,
+ .copy_config = l3vpn_copy_config,
+ };
+
+ void
+ l3vpn_build(void)
+ {
+ proto_build(&proto_l3vpn);
+ }