From: Maria Matejka Date: Tue, 2 Aug 2022 20:08:59 +0000 (+0200) Subject: Merge commit 'f0507f05ce57398e135651896dace4cb68eeed54' into thread-next X-Git-Tag: v3.0-alpha1~170 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=71b434a987067475b517792360f58dbe03bfee9e;p=thirdparty%2Fbird.git Merge commit 'f0507f05ce57398e135651896dace4cb68eeed54' into thread-next --- 71b434a987067475b517792360f58dbe03bfee9e diff --cc filter/f-inst.c index fff935179,706eb6841..60042476a --- a/filter/f-inst.c +++ b/filter/f-inst.c @@@ -691,40 -523,21 +691,40 @@@ switch (sa.sa_code) { - case SA_FROM: RESULT(sa.f_type, ip, rta->from); break; - case SA_GW: RESULT(sa.f_type, ip, rta->nh.gw); break; - case SA_NET: RESULT(sa.f_type, net, fs->rte->net); break; - case SA_PROTO: RESULT(sa.f_type, s, fs->rte->src->owner->name); break; - case SA_SOURCE: RESULT(sa.f_type, i, rta->source); break; - case SA_SCOPE: RESULT(sa.f_type, i, rta->scope); break; - case SA_DEST: RESULT(sa.f_type, i, rta->dest); break; - case SA_IFNAME: RESULT(sa.f_type, s, rta->nh.iface ? rta->nh.iface->name : ""); break; - case SA_IFINDEX: RESULT(sa.f_type, i, rta->nh.iface ? rta->nh.iface->index : 0); break; - case SA_WEIGHT: RESULT(sa.f_type, i, rta->nh.weight + 1); break; - case SA_PREF: RESULT(sa.f_type, i, rta->pref); break; - case SA_GW_MPLS: RESULT(sa.f_type, i, rta->nh.labels ? rta->nh.label[0] : MPLS_NULL); break; - + case SA_NET: RESULT(sa.type, net, fs->rte->net); break; - case SA_PROTO: RESULT(sa.type, s, fs->rte->src->proto->name); break; ++ case SA_PROTO: RESULT(sa.type, s, fs->rte->src->owner->name); break; default: - bug("Invalid static attribute access (%u/%u)", sa.f_type, sa.sa_code); + { + struct eattr *nhea = ea_find(*fs->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + struct nexthop *nh = nhad ? &nhad->nh : NULL; + + switch (sa.sa_code) + { + case SA_DEST: + RESULT(sa.type, i, nhad ? + (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) + : RTD_NONE); + break; + case SA_GW: + RESULT(sa.type, ip, nh ? nh->gw : IPA_NONE); + break; + case SA_IFNAME: + RESULT(sa.type, s, (nh && nh->iface) ? nh->iface->name : ""); + break; + case SA_IFINDEX: + RESULT(sa.type, i, (nh && nh->iface) ? nh->iface->index : 0); + break; + case SA_WEIGHT: + RESULT(sa.type, i, (nh ? nh->weight : 0) + 1); + break; + case SA_GW_MPLS: + RESULT(sa.type, i, (nh && nh->labels) ? nh->label[0] : MPLS_NULL); + break; + default: + bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code); + } + } } } } @@@ -753,25 -554,16 +753,26 @@@ switch (sa.sa_code) { - case SA_FROM: - rta->from = v1.val.ip; - break; + case SA_DEST: + { + int i = v1.val.i; + if ((i != RTD_BLACKHOLE) && (i != RTD_UNREACHABLE) && (i != RTD_PROHIBIT)) + runtime( "Destination can be changed only to blackhole, unreachable or prohibit" ); + nha.nha.dest = i; + nha.ad.length = NEXTHOP_DEST_SIZE; + break; + } case SA_GW: { + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + ip_addr ip = v1.val.ip; - struct iface *ifa = ipa_is_link_local(ip) ? rta->nh.iface : NULL; + struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ? + ((struct nexthop_adata *) nh_ea->u.ptr)->nh.iface : NULL; + - neighbor *n = neigh_find(fs->rte->src->proto, ip, ifa, 0); + /* XXX this code supposes that every owner is a protocol XXX */ + neighbor *n = neigh_find(SKIP_BACK(struct proto, sources, fs->rte->src->owner), ip, ifa, 0); if (!n || (n->scope == SCOPE_HOST)) runtime( "Invalid gw address" ); diff --cc lib/locking.h index a9a8aa9b8,0a69f50f0..1df300631 --- a/lib/locking.h +++ b/lib/locking.h @@@ -16,7 -16,9 +16,8 @@@ struct lock_order struct domain_generic *the_bird; struct domain_generic *proto; struct domain_generic *rtable; + struct domain_generic *attrs; - struct domain_generic *cork; - struct domain_generic *event; + struct domain_generic *resource; }; extern _Thread_local struct lock_order locking_stack; diff --cc lib/route.h index 2d6912156,000000000..cf3c70ba9 mode 100644,000000..100644 --- a/lib/route.h +++ b/lib/route.h @@@ -1,446 -1,0 +1,531 @@@ +/* + * BIRD Internet Routing Daemon -- Routing data structures + * + * (c) 1998--2000 Martin Mares + * (c) 2022 Maria Matejka + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIB_ROUTE_H_ +#define _BIRD_LIB_ROUTE_H_ + +#include "lib/type.h" ++#include "lib/rcu.h" ++#include "lib/hash.h" ++#include "lib/event.h" + +struct network; +struct proto; +struct cli; - ++struct rtable; + +typedef struct rte { + struct ea_list *attrs; /* Attributes of this route */ + const net_addr *net; /* Network this RTE belongs to */ + struct rte_src *src; /* Route source that created the route */ + struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */ + btime lastmod; /* Last modified (set by table) */ + u32 id; /* Table specific route id */ + byte flags; /* Table-specific flags */ + byte pflags; /* Protocol-specific flags */ + u8 generation; /* If this route import is based on other previously exported route, + this value should be 1 + MAX(generation of the parent routes). + Otherwise the route is independent and this value is zero. */ + u8 stale_cycle; /* Auxiliary value for route refresh */ +} rte; + +#define REF_FILTERED 2 /* Route is rejected by import filter */ +#define REF_PENDING 32 /* Route has not propagated completely yet */ + +/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ +static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } + +/* Route just has REF_FILTERED flag */ +static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } + +struct rte_src { + struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ ++ struct rte_owner *owner; /* Route source owner */ + u32 private_id; /* Private ID, assigned by the protocol */ + u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ ++ _Atomic u64 uc; /* Use count */ ++}; ++ ++struct rte_owner_class { ++ void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ ++ int (*rte_better)(struct rte *, struct rte *); ++ int (*rte_mergable)(struct rte *, struct rte *); ++ u32 (*rte_igp_metric)(const rte *); ++}; ++ ++struct rte_owner { ++ struct rte_owner_class *class; ++ int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); ++ HASH(struct rte_src) hash; ++ const char *name; ++ u32 hash_key; ++ u32 uc; ++ event_list *list; ++ event *prune; ++ event *stop; +}; + ++DEFINE_DOMAIN(attrs); ++extern DOMAIN(attrs) attrs_domain; ++ ++#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) ++#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) ++ ++#define RTE_SRC_PU_SHIFT 44 ++#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) ++ ++/* Get a route source. This also locks the source, therefore the caller has to ++ * unlock the source after the route has been propagated. */ ++struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id); ++#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id)) + - struct rte_src *rt_find_source(struct proto *p, u32 id); - struct rte_src *rt_get_source(struct proto *p, u32 id); +struct rte_src *rt_find_source_global(u32 id); - static inline void rt_lock_source(struct rte_src *src) { src->uc++; } - static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } - void rt_prune_sources(void); ++ ++static inline void rt_lock_source(struct rte_src *src) ++{ ++ /* Locking a source is trivial; somebody already holds it so we just increase ++ * the use count. Nothing can be freed underneath our hands. */ ++ u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); ++ ASSERT_DIE(uc > 0); ++} ++ ++static inline void rt_unlock_source(struct rte_src *src) ++{ ++ /* Unlocking is tricky. We do it lockless so at the same time, the prune ++ * event may be running, therefore if the unlock gets us to zero, it must be ++ * the last thing in this routine, otherwise the prune routine may find the ++ * source's usecount zeroed, freeing it prematurely. ++ * ++ * The usecount is split into two parts: ++ * the top 20 bits are an in-progress indicator ++ * the bottom 44 bits keep the actual usecount. ++ * ++ * Therefore at most 1 million of writers can simultaneously unlock the same ++ * source, while at most ~17T different routes can reference it. Both limits ++ * are insanely high from the 2022 point of view. Let's suppose that when 17T ++ * routes or 1M writers get real, we get also 128bit atomic variables in the ++ * C norm. */ ++ ++ /* First, we push the in-progress indicator */ ++ u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel); ++ ++ /* Then we split the indicator to its parts. Remember, we got the value before the operation happened. */ ++ u64 pending = (uc >> RTE_SRC_PU_SHIFT) + 1; ++ uc &= RTE_SRC_IN_PROGRESS - 1; ++ ++ /* We per-use the RCU critical section indicator to make the prune event wait ++ * until we finish here in the rare case we get preempted. */ ++ rcu_read_lock(); ++ ++ /* Obviously, there can't be more pending unlocks than the usecount itself */ ++ if (uc == pending) ++ /* If we're the last unlocker, schedule the owner's prune event */ ++ ev_send(src->owner->list, src->owner->prune); ++ else ++ ASSERT_DIE(uc > pending); ++ ++ /* And now, finally, simultaneously pop the in-progress indicator and the ++ * usecount, possibly allowing the source pruning routine to free this structure */ ++ atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel); ++ ++ /* ... and to reduce the load a bit, the source pruning routine will better wait for ++ * RCU synchronization instead of a busy loop. */ ++ rcu_read_unlock(); ++} ++ ++void rt_init_sources(struct rte_owner *, const char *name, event_list *list); ++void rt_destroy_sources(struct rte_owner *, event *); + +/* + * Route Attributes + * + * Beware: All standard BGP attributes must be represented here instead + * of making them local to the route. This is needed to ensure proper + * construction of BGP route attribute lists. + */ + +/* Nexthop structure */ +struct nexthop { + ip_addr gw; /* Next hop */ + struct iface *iface; /* Outgoing interface */ + byte flags; + byte weight; + byte labels; /* Number of all labels */ + u32 label[0]; +}; + +/* For packing one into eattrs */ +struct nexthop_adata { + struct adata ad; + /* There is either a set of nexthops or a special destination (RTD_*) */ + union { + struct nexthop nh; + uint dest; + }; +}; + +#define NEXTHOP_DEST_SIZE (OFFSETOF(struct nexthop_adata, dest) + sizeof(uint) - OFFSETOF(struct adata, data)) +#define NEXTHOP_DEST_LITERAL(x) ((struct nexthop_adata) { \ + .ad.length = NEXTHOP_DEST_SIZE, .dest = (x), }) + +#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ + + +#define RTS_STATIC 1 /* Normal static route */ +#define RTS_INHERIT 2 /* Route inherited from kernel */ +#define RTS_DEVICE 3 /* Device route */ +#define RTS_STATIC_DEVICE 4 /* Static device route */ +#define RTS_REDIRECT 5 /* Learned via redirect */ +#define RTS_RIP 6 /* RIP route */ +#define RTS_OSPF 7 /* OSPF route */ +#define RTS_OSPF_IA 8 /* OSPF inter-area route */ +#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ +#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ +#define RTS_BGP 11 /* BGP route */ +#define RTS_PIPE 12 /* Inter-table wormhole */ +#define RTS_BABEL 13 /* Babel route */ +#define RTS_RPKI 14 /* Route Origin Authorization */ +#define RTS_PERF 15 /* Perf checker */ +#define RTS_MAX 16 + +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* A standard next hop */ +#define RTD_BLACKHOLE 2 /* Silently drop packets */ +#define RTD_UNREACHABLE 3 /* Reject as unreachable */ +#define RTD_PROHIBIT 4 /* Administratively prohibited */ +#define RTD_MAX 5 + +extern const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + + +/* + * Extended Route Attributes + */ + +typedef struct eattr { + word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ + byte flags; /* Protocol-dependent flags */ + byte type; /* Attribute type */ + byte rfu:5; + byte originated:1; /* The attribute has originated locally */ + byte fresh:1; /* An uncached attribute (e.g. modified in export filter) */ + byte undef:1; /* Explicitly undefined */ + + PADDING(unused, 3, 3); + + union bval u; +} eattr; + + +#define EA_CODE_MASK 0xffff +#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ +#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ +#define EA_BIT_GET(ea) ((ea) >> 24) + +typedef struct ea_list { + struct ea_list *next; /* In case we have an override list */ + byte flags; /* Flags: EALF_... */ + byte rfu; + word count; /* Number of attributes */ + eattr attrs[0]; /* Attribute definitions themselves */ +} ea_list; + +struct ea_storage { + struct ea_storage *next_hash; /* Next in hash chain */ + struct ea_storage **pprev_hash; /* Previous in hash chain */ + u32 uc; /* Use count */ + u32 hash_key; /* List hash */ + ea_list l[0]; /* The list itself */ +}; + +#define EALF_SORTED 1 /* Attributes are sorted by code */ +#define EALF_BISECT 2 /* Use interval bisection for searching */ +#define EALF_CACHED 4 /* List is cached */ + +struct ea_class { +#define EA_CLASS_INSIDE \ + const char *name; /* Name (both print and filter) */ \ + struct symbol *sym; /* Symbol to export to configs */ \ + uint id; /* Autoassigned attribute ID */ \ + uint uc; /* Reference count */ \ + btype type; /* Data type ID */ \ + uint readonly:1; /* This attribute can't be changed by filters */ \ + uint conf:1; /* Requested by config */ \ + uint hidden:1; /* Technical attribute, do not show, do not expose to filters */ \ + void (*format)(const eattr *ea, byte *buf, uint size); \ + void (*stored)(const eattr *ea); /* When stored into global hash */ \ + void (*freed)(const eattr *ea); /* When released from global hash */ \ + + EA_CLASS_INSIDE; +}; + +struct ea_class_ref { + resource r; + struct ea_class *class; +}; + +void ea_register_init(struct ea_class *); +struct ea_class_ref *ea_register_alloc(pool *, struct ea_class); + +#define EA_REGISTER_ALL_HELPER(x) ea_register_init(x); +#define EA_REGISTER_ALL(...) MACRO_FOREACH(EA_REGISTER_ALL_HELPER, __VA_ARGS__) + +struct ea_class *ea_class_find_by_id(uint id); +struct ea_class *ea_class_find_by_name(const char *name); +static inline struct ea_class *ea_class_self(struct ea_class *self) { return self; } +#define ea_class_find(_arg) _Generic((_arg), \ + uint: ea_class_find_by_id, \ + word: ea_class_find_by_id, \ + char *: ea_class_find_by_name, \ + const char *: ea_class_find_by_name, \ + struct ea_class *: ea_class_self)(_arg) + +struct ea_walk_state { + ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ + eattr *ea; /* Current eattr, initially NULL */ + u32 visited[4]; /* Bitfield, limiting max to 128 */ +}; + +#define ea_find(_l, _arg) _Generic((_arg), uint: ea_find_by_id, struct ea_class *: ea_find_by_class, char *: ea_find_by_name)(_l, _arg) +eattr *ea_find_by_id(ea_list *, unsigned ea); +static inline eattr *ea_find_by_class(ea_list *l, const struct ea_class *def) +{ return ea_find_by_id(l, def->id); } +static inline eattr *ea_find_by_name(ea_list *l, const char *name) +{ + const struct ea_class *def = ea_class_find_by_name(name); + return def ? ea_find_by_class(l, def) : NULL; +} + +#define ea_get_int(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type & EAF_EMBEDDED); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? ea->u.data : (_def)); \ + }) + +#define ea_get_ip(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type == T_IP); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? *((const ip_addr *) ea->u.ptr->data) : (_def)); \ + }) + +eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); +void ea_dump(ea_list *); +int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ +uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ +ea_list *ea_append(ea_list *to, ea_list *what); +void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); + +/* Normalize ea_list; allocates the result from tmp_linpool */ +ea_list *ea_normalize(ea_list *e, int overlay); + +uint ea_list_size(ea_list *); +void ea_list_copy(ea_list *dest, ea_list *src, uint size); + +#define EA_LOCAL_LIST(N) struct { ea_list l; eattr a[N]; } + +#define EA_LITERAL_EMBEDDED(_class, _flags, _val) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(_type & EAF_EMBEDDED); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.i = _val); \ + }) + +#define EA_LITERAL_STORE_ADATA(_class, _flags, _buf, _len) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = tmp_store_adata((_buf), (_len))); \ + }) + +#define EA_LITERAL_DIRECT_ADATA(_class, _flags, _adata) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = _adata); \ + }) + +#define EA_LITERAL_GENERIC(_id, _type, _flags, ...) \ + ((eattr) { .id = _id, .type = _type, .flags = _flags, __VA_ARGS__ }) + +static inline eattr * +ea_set_attr(ea_list **to, eattr a) +{ + EA_LOCAL_LIST(1) *ea = tmp_alloc(sizeof(*ea)); + *ea = (typeof(*ea)) { + .l.flags = EALF_SORTED, + .l.count = 1, + .l.next = *to, + .a[0] = a, + }; + + *to = &ea->l; + return &ea->a[0]; +} + +static inline void +ea_unset_attr(ea_list **to, _Bool local, const struct ea_class *def) +{ + ea_set_attr(to, EA_LITERAL_GENERIC(def->id, 0, 0, + .fresh = local, .originated = local, .undef = 1)); +} + +static inline void +ea_set_attr_u32(ea_list **to, const struct ea_class *def, uint flags, u64 data) +{ ea_set_attr(to, EA_LITERAL_EMBEDDED(def, flags, data)); } + +static inline void +ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, const void *data, uint len) +{ ea_set_attr(to, EA_LITERAL_STORE_ADATA(def, flags, data, len)); } + +static inline void +ea_copy_attr(ea_list **to, ea_list *from, const struct ea_class *def) +{ + eattr *e = ea_find_by_class(from, def); + if (e) + if (e->type & EAF_EMBEDDED) + ea_set_attr_u32(to, def, e->flags, e->u.data); + else + ea_set_attr_data(to, def, e->flags, e->u.ptr->data, e->u.ptr->length); + else + ea_unset_attr(to, 0, def); +} + +/* + * Common route attributes + */ + +/* Preference: first-order comparison */ +extern struct ea_class ea_gen_preference; +static inline u32 rt_get_preference(rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_preference, 0); } + +/* IGP metric: second-order comparison */ +extern struct ea_class ea_gen_igp_metric; +u32 rt_get_igp_metric(const rte *rt); +#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other + protocol-specific metric is availabe */ + +/* From: Advertising router */ +extern struct ea_class ea_gen_from; + +/* Source: An old method to devise the route source protocol and kind. + * To be superseded in a near future by something more informative. */ +extern struct ea_class ea_gen_source; +static inline u32 rt_get_source_attr(const rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_source, 0); } + +/* Flowspec validation result */ +enum flowspec_valid { + FLOWSPEC_UNKNOWN = 0, + FLOWSPEC_VALID = 1, + FLOWSPEC_INVALID = 2, + FLOWSPEC__MAX, +}; + +extern const char * flowspec_valid_names[FLOWSPEC__MAX]; +static inline const char *flowspec_valid_name(enum flowspec_valid v) +{ return (v < FLOWSPEC__MAX) ? flowspec_valid_names[v] : "???"; } + +extern struct ea_class ea_gen_flowspec_valid; +static inline enum flowspec_valid rt_get_flowspec_valid(rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN); } + +/* Next hop: For now, stored as adata */ +extern struct ea_class ea_gen_nexthop; + +static inline void ea_set_dest(struct ea_list **to, uint flags, uint dest) +{ + struct nexthop_adata nhad = NEXTHOP_DEST_LITERAL(dest); + ea_set_attr_data(to, &ea_gen_nexthop, flags, &nhad.ad.data, nhad.ad.length); +} + +/* Next hop structures */ + +#define NEXTHOP_ALIGNMENT (_Alignof(struct nexthop)) +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) +#define NEXTHOP_SIZE(_nh) NEXTHOP_SIZE_CNT(((_nh)->labels)) +#define NEXTHOP_SIZE_CNT(cnt) BIRD_ALIGN((sizeof(struct nexthop) + sizeof(u32) * (cnt)), NEXTHOP_ALIGNMENT) +#define nexthop_size(nh) NEXTHOP_SIZE((nh)) + +#define NEXTHOP_NEXT(_nh) ((void *) (_nh) + NEXTHOP_SIZE(_nh)) +#define NEXTHOP_END(_nhad) ((_nhad)->ad.data + (_nhad)->ad.length) +#define NEXTHOP_VALID(_nh, _nhad) ((void *) (_nh) < (void *) NEXTHOP_END(_nhad)) +#define NEXTHOP_ONE(_nhad) (NEXTHOP_NEXT(&(_nhad)->nh) == NEXTHOP_END(_nhad)) + +#define NEXTHOP_WALK(_iter, _nhad) for ( \ + struct nexthop *_iter = &(_nhad)->nh; \ + (void *) _iter < (void *) NEXTHOP_END(_nhad); \ + _iter = NEXTHOP_NEXT(_iter)) + + +static inline int nexthop_same(struct nexthop_adata *x, struct nexthop_adata *y) +{ return adata_same(&x->ad, &y->ad); } +struct nexthop_adata *nexthop_merge(struct nexthop_adata *x, struct nexthop_adata *y, int max, linpool *lp); +struct nexthop_adata *nexthop_sort(struct nexthop_adata *x, linpool *lp); +int nexthop_is_sorted(struct nexthop_adata *x); + +#define NEXTHOP_IS_REACHABLE(nhad) ((nhad)->ad.length > NEXTHOP_DEST_SIZE) + +/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ +static inline int rte_is_reachable(rte *r) +{ + eattr *nhea = ea_find(r->attrs, &ea_gen_nexthop); + if (!nhea) + return 0; + + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + return NEXTHOP_IS_REACHABLE(nhad); +} + +static inline int nhea_dest(eattr *nhea) +{ + if (!nhea) + return RTD_NONE; + + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + if (NEXTHOP_IS_REACHABLE(nhad)) + return RTD_UNICAST; + else + return nhad->dest; +} + +static inline int rte_dest(const rte *r) +{ + return nhea_dest(ea_find(r->attrs, &ea_gen_nexthop)); +} + +void rta_init(void); +ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */ +static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } +static inline struct ea_storage *ea_get_storage(ea_list *r) +{ + ASSERT_DIE(ea_is_cached(r)); + return SKIP_BACK(struct ea_storage, l[0], r); +} + +static inline ea_list *ea_clone(ea_list *r) { ea_get_storage(r)->uc++; return r; } +void ea__free(struct ea_storage *r); +static inline void ea_free(ea_list *l) { + if (!l) return; + struct ea_storage *r = ea_get_storage(l); + if (!--r->uc) ea__free(r); +} + +void ea_dump(ea_list *); +void ea_dump_all(void); +void ea_show_list(struct cli *, ea_list *); + +#define rta_lookup ea_lookup +#define rta_is_cached ea_is_cached +#define rta_clone ea_clone +#define rta_free ea_free + +#endif diff --cc nest/protocol.h index 3c823ae11,440297a12..709d64157 --- a/nest/protocol.h +++ b/nest/protocol.h @@@ -60,8 -78,7 +60,7 @@@ struct protocol int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ - void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ - int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ +// int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */ }; @@@ -342,7 -360,7 +342,7 @@@ void proto_notify_state(struct proto *p */ static inline int proto_is_inactive(struct proto *p) - { return (p->active_channels == 0) && (p->active_loops == 0); } -{ return (p->active_channels == 0) && (p->active_coroutines == 0) && (p->sources.uc == 0); } ++{ return (p->active_channels == 0) && (p->active_loops == 0) && (p->sources.uc == 0); } /* diff --cc nest/rt-attr.c index b31bc5cc6,f7e33d727..b3a4c7a1c --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@@ -166,20 -85,12 +166,22 @@@ const char * rta_dest_names[RTD_MAX] = [RTD_PROHIBIT] = "prohibited", }; +struct ea_class ea_gen_flowspec_valid = { + .name = "flowspec_valid", + .type = T_ENUM_FLOWSPEC_VALID, + .readonly = 1, +}; + +const char * flowspec_valid_names[FLOWSPEC__MAX] = { + [FLOWSPEC_UNKNOWN] = "unknown", + [FLOWSPEC_VALID] = "", + [FLOWSPEC_INVALID] = "invalid", +}; + + DOMAIN(attrs) attrs_domain; + pool *rta_pool; -static slab *rta_slab_[4]; -static slab *nexthop_slab_[4]; static slab *rte_src_slab; static struct idm src_ids; @@@ -194,30 -105,22 +196,25 @@@ #define RSH_REHASH rte_src_rehash #define RSH_PARAMS /2, *2, 1, 1, 8, 20 - #define RSH_INIT_ORDER 6 - - static HASH(struct rte_src) src_hash; + #define RSH_INIT_ORDER 2 +static struct rte_src **rte_src_global; +static uint rte_src_global_max = SRC_ID_INIT_SIZE; static void rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); - - HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); } - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) - struct rte_src * - rt_find_source(struct proto *p, u32 id) + static struct rte_src * + rt_find_source(struct rte_owner *p, u32 id) { - return HASH_FIND(src_hash, RSH, p, id); + return HASH_FIND(p->hash, RSH, id); } struct rte_src * @@@ -226,46 -132,60 +226,75 @@@ rt_get_source_o(struct rte_owner *p, u3 struct rte_src *src = rt_find_source(p, id); if (src) + { + UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); return src; + } + RTA_LOCK; src = sl_allocz(rte_src_slab); - src->proto = p; + src->owner = p; src->private_id = id; src->global_id = idm_alloc(&src_ids); - src->uc = 0; - HASH_INSERT2(src_hash, RSH, rta_pool, src); + atomic_store_explicit(&src->uc, 1, memory_order_release); + p->uc++; + + HASH_INSERT2(p->hash, RSH, rta_pool, src); + if (config->table_debug) + log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", + p->name, src->private_id, src->global_id, p->uc); + + if (src->global_id >= rte_src_global_max) + { + rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2)); + memset(&rte_src_global[rte_src_global_max / 2], 0, + sizeof(struct rte_src *) * (rte_src_global_max / 2)); + } + + rte_src_global[src->global_id] = src; + RTA_UNLOCK; return src; } +struct rte_src * +rt_find_source_global(u32 id) +{ + if (id >= rte_src_global_max) + return NULL; + else + return rte_src_global[id]; +} + + static inline void + rt_done_sources(struct rte_owner *o) + { - if (o->stop->list) - ev_send(o->stop->list, o->stop); - else - ev_send(o->list, o->stop); ++ ev_send(o->list, o->stop); + } + void - rt_prune_sources(void) + rt_prune_sources(void *data) { - HASH_WALK_FILTER(src_hash, next, src, sp) + struct rte_owner *o = data; + + HASH_WALK_FILTER(o->hash, next, src, sp) { - if (src->uc == 0) + u64 uc; + while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT) - ; ++ synchronize_rcu(); + + if (uc == 0) { - HASH_DO_REMOVE(src_hash, RSH, sp); + o->uc--; + + HASH_DO_REMOVE(o->hash, RSH, sp); + + RTA_LOCK; ++ rte_src_global[src->global_id] = NULL; idm_free(&src_ids, src->global_id); - sl_free(rte_src_slab, src); + sl_free(src); + RTA_UNLOCK; } } HASH_WALK_FILTER_END; @@@ -1466,22 -1407,22 +1543,24 @@@ ea_show_list(struct cli *c, ea_list *ea void rta_init(void) { + attrs_domain = DOMAIN_NEW(attrs, "Attributes"); + rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); - rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); - rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); - rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - - nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); - nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); - nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); - nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - rta_alloc_hash(); rte_src_init(); + ea_class_init(); + + /* These attributes are required to be first for nice "show route" output */ + ea_register_init(&ea_gen_nexthop); + ea_register_init(&ea_gen_hostentry); + + /* Other generic route attributes */ + ea_register_init(&ea_gen_preference); + ea_register_init(&ea_gen_igp_metric); + ea_register_init(&ea_gen_from); + ea_register_init(&ea_gen_source); + ea_register_init(&ea_gen_flowspec_valid); } /* diff --cc nest/rt-show.c index b784bf833,8196903dc..174000292 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@@ -55,26 -53,46 +55,26 @@@ rt_show_rte(struct cli *c, byte *ia, rt from[0] = 0; /* Need to normalize the extended attributes */ - if (d->verbose && !rta_is_cached(a) && a->eattrs) - ea_normalize(a->eattrs); + if (d->verbose && !rta_is_cached(a) && a) + a = ea_normalize(a, 0); - get_route_info = e->src->proto->proto->get_route_info; + get_route_info = e->src->owner->class ? e->src->owner->class->get_route_info : NULL; if (get_route_info) get_route_info(e, info); else - bsprintf(info, " (%d)", a->pref); + bsprintf(info, " (%d)", rt_get_preference(e)); if (d->last_table != d->tab) - rt_show_table(c, d); - - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), - e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); - - if (a->dest == RTD_UNICAST) - for (nh = &(a->nh); nh; nh = nh->next) - { - char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; - char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; - char weight[16] = ""; - - if (nh->labels) - { - lsp += bsprintf(lsp, " mpls %d", nh->label[0]); - for (int i=1;ilabels; i++) - lsp += bsprintf(lsp, "/%d", nh->label[i]); - } - *lsp = '\0'; + rt_show_table(d); - if (a->nh.next) - bsprintf(weight, " weight %d", nh->weight + 1); + eattr *heea; + struct hostentry_adata *had = NULL; + if (!net_is_flow(e->net) && (dest == RTD_NONE) && (heea = ea_find(a, &ea_gen_hostentry))) + had = (struct hostentry_adata *) heea->u.ptr; - if (ipa_nonzero(nh->gw)) - cli_printf(c, -1007, "\tvia %I on %s%s%s%s", - nh->gw, nh->iface->name, mpls, onlink, weight); - else - cli_printf(c, -1007, "\tdev %s%s%s", - nh->iface->name, mpls, onlink, weight); - } + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : had ? "recursive" : rta_dest_name(dest), - e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); ++ e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) { @@@ -178,10 -211,10 +178,10 @@@ rt_show_net(struct rt_show_data *d, con } } - if (d->show_protocol && (d->show_protocol != e.src->proto)) + if (d->show_protocol && (&d->show_protocol->sources != e.src->owner)) goto skip; - if (f_run(d->filter, &e, c->show_pool, 0) > F_ACCEPT) + if (f_run(d->filter, &e, 0) > F_ACCEPT) goto skip; if (d->stats < 2) diff --cc nest/rt-table.c index 15dbc3717,c67f5bf8a..b8f0e61d3 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@@ -666,14 -363,11 +666,14 @@@ rte_better(rte *new, rte *old if (!rte_is_valid(new)) return 0; - if (new->attrs->pref > old->attrs->pref) + u32 np = rt_get_preference(new); + u32 op = rt_get_preference(old); + + if (np > op) return 1; - if (new->attrs->pref < old->attrs->pref) + if (np < op) return 0; - if (new->src->proto->proto != old->src->proto->proto) + if (new->src->owner->class != old->src->owner->class) { /* * If the user has configured protocol preferences, so that two different protocols @@@ -695,13 -389,13 +695,13 @@@ rte_mergable(rte *pri, rte *sec if (!rte_is_valid(pri) || !rte_is_valid(sec)) return 0; - if (pri->attrs->pref != sec->attrs->pref) + if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; - if (pri->src->proto->proto != sec->src->proto->proto) + if (pri->src->owner->class != sec->src->owner->class) return 0; - if (mergable = pri->src->proto->rte_mergable) + if (mergable = pri->src->owner->class->rte_mergable) return mergable(pri, sec); return 0; @@@ -1596,13 -1269,13 +1596,13 @@@ rte_recalculate(struct rt_import_hook * { if (!old->generation && !new->generation) bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", - c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", - c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); } - if (new && rte_same(old, new)) + if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ old->stale_cycle = new->stale_cycle; @@@ -2845,39 -2234,9 +2845,37 @@@ again /* state change 2->0, 3->1 */ tab->prune_state &= 1; + if (tab->trie_new) + { + /* Finish prefix trie pruning */ + + if (!tab->trie_lock_count) + { + rfree(tab->trie->lp); + } + else + { + ASSERT(!tab->trie_old); + tab->trie_old = tab->trie; + tab->trie_old_lock_count = tab->trie_lock_count; + tab->trie_lock_count = 0; + } + + tab->trie = tab->trie_new; + tab->trie_new = NULL; + tab->prune_trie = 0; + } + else + { + /* Schedule prefix trie pruning */ + if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + /* state change 0->1, 2->3 */ + tab->prune_state |= 1; + tab->prune_trie = 1; + } + } + - rt_prune_sources(); - uint flushed_channels = 0; /* Close flushed channels */ @@@ -3041,124 -2402,63 +3039,115 @@@ rt_export_cleanup(rtable *tab } } - first_export = next; + first = next; } + rt_check_cork_low(tab); + done:; struct rt_import_hook *ih; node *x; - _Bool imports_stopped = 0; WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) if (ih->import_state == TIS_WAITING) - if (!first_export || (first_export->seq >= ih->flush_seq)) + if (!first || (first->seq >= ih->flush_seq)) { ih->import_state = TIS_CLEARED; ih->stopped(ih->req); rem_node(&ih->n); mb_free(ih); rt_unlock_table(tab); - imports_stopped = 1; } + if (tab->export_used) + ev_schedule(tab->rt_event); - if (imports_stopped) - { - if (config->table_debug) - log(L_TRACE "%s: Sources pruning routine requested", tab->name); - - rt_prune_sources(); - } - if (EMPTY_LIST(tab->pending_exports) && tm_active(tab->export_timer)) - tm_stop(tab->export_timer); - /* If reduced to at most one export block pending */ - if (tab->cork_active && - ((!tab->first_export) || (tab->first_export->seq + 128 > tab->next_export_seq))) - { - tab->cork_active = 0; - ev_uncork(&rt_cork); - } + if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) + tm_stop(tab->exporter.export_timer); } -void -rt_preconfig(struct config *c) +static void +rt_cork_release_hook(void *data UNUSED) { - init_list(&c->tables); - - rt_new_table(cf_get_symbol("master4"), NET_IP4); - rt_new_table(cf_get_symbol("master6"), NET_IP6); + do synchronize_rcu(); + while ( + !atomic_load_explicit(&rt_cork.active, memory_order_acquire) && + ev_run_list(&rt_cork.queue) + ); } - -/* - * Some functions for handing internal next hop updates - * triggered by rt_schedule_nhu(). +/** + * rt_lock_trie - lock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * + * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state + * structures. Therefore, asynchronous walks should lock the prefix trie using + * this function. That allows the prune loop to rebuild the trie, but postpones + * its freeing until all walks are done (unlocked by rt_unlock_trie()). + * + * Return a current trie that will be locked, the value should be passed back to + * rt_unlock_trie() for unlocking. + * */ +struct f_trie * +rt_lock_trie(rtable *tab) +{ + ASSERT(tab->trie); -static inline int -rta_next_hop_outdated(rta *a) + tab->trie_lock_count++; + return tab->trie; +} + +/** + * rt_unlock_trie - unlock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * @trie: value returned by matching rt_lock_trie() + * + * Done for trie locked by rt_lock_trie() after walk over the trie is done. + * It may free the trie and schedule next trie pruning. + */ +void +rt_unlock_trie(rtable *tab, struct f_trie *trie) { - struct hostentry *he = a->hostentry; + ASSERT(trie); - if (!he) - return 0; + if (trie == tab->trie) + { + /* Unlock the current prefix trie */ + ASSERT(tab->trie_lock_count); + tab->trie_lock_count--; + } + else if (trie == tab->trie_old) + { + /* Unlock the old prefix trie */ + ASSERT(tab->trie_old_lock_count); + tab->trie_old_lock_count--; - if (!he->src) - return a->dest != RTD_UNREACHABLE; + /* Free old prefix trie that is no longer needed */ + if (!tab->trie_old_lock_count) + { + rfree(tab->trie_old->lp); + tab->trie_old = NULL; - return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); + /* Kick prefix trie pruning that was postponed */ + if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + tab->prune_trie = 1; + rt_schedule_prune(tab); + } + } + } + else + log(L_BUG "Invalid arg to rt_unlock_trie()"); +} + + +void +rt_preconfig(struct config *c) +{ + init_list(&c->tables); + + rt_new_table(cf_get_symbol("master4"), NET_IP4); + rt_new_table(cf_get_symbol("master6"), NET_IP6); } void @@@ -4167,11 -3069,11 +4156,11 @@@ rt_get_igp_metric(const rte *rt if (ea) return ea->u.data; - if (rt->attrs->source == RTS_DEVICE) + if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; - if (rt->src->proto->rte_igp_metric) - return rt->src->proto->rte_igp_metric(rt); + if (rt->src->owner->class->rte_igp_metric) + return rt->src->owner->class->rte_igp_metric(rt); return IGP_METRIC_UNKNOWN; } diff --cc proto/babel/babel.c index 00b9aa795,40e85a164..4d024e3a4 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@@ -2257,15 -2260,11 +2257,15 @@@ babel_kick_timer(struct babel_proto *p static int -babel_preexport(struct channel *c, struct rte *new) +babel_preexport(struct channel *C, struct rte *new) { - if (new->src->proto != C->proto) - struct rta *a = new->attrs; ++ if (new->src->owner != &C->proto->sources) + return 0; + /* Reject our own unreachable routes */ - if ((a->dest == RTD_UNREACHABLE) && (new->src->owner == &c->proto->sources)) + eattr *ea = ea_find(new->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = (void *) ea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad)) return -1; return 0; @@@ -2286,13 -2285,13 +2286,13 @@@ babel_rt_notify(struct proto *P, struc { /* Update */ uint rt_seqno; - uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0); + uint rt_metric = ea_get_int(new->attrs, &ea_babel_metric, 0); u64 rt_router_id = 0; - if (new->src->proto == P) + if (new->src->owner == &P->sources) { - rt_seqno = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data; - eattr *e = ea_find(new->attrs->eattrs, EA_BABEL_ROUTER_ID); + rt_seqno = ea_get_int(new->attrs, &ea_babel_seqno, 0); + eattr *e = ea_find(new->attrs, &ea_babel_router_id); if (e) memcpy(&rt_router_id, e->u.ptr->data, sizeof(u64)); } @@@ -2498,17 -2499,5 +2504,16 @@@ struct protocol proto_babel = .start = babel_start, .shutdown = babel_shutdown, .reconfigure = babel_reconfigure, - .get_route_info = babel_get_route_info, - .get_attr = babel_get_attr }; + +void +babel_build(void) +{ + proto_build(&proto_babel); + + EA_REGISTER_ALL( + &ea_babel_metric, + &ea_babel_router_id, + &ea_babel_seqno + ); +} diff --cc proto/bgp/attrs.c index a7b1a7edc,9b9013f9e..e96b175da --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@@ -1671,15 -1624,16 +1671,16 @@@ bgp_free_prefix_table(struct bgp_channe } static struct bgp_prefix * --bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) ++bgp_get_prefix(struct bgp_channel *c, const net_addr *net, struct rte_src *src) { - u32 hash = net_hash(net) ^ u32_hash(path_id); - struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); ++ u32 path_id = src->global_id; + u32 path_id_hash = c->add_path_tx ? path_id : 0; + /* We must use a different hash function than the rtable */ + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash)); + struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash); if (px) - { - rem_node(&px->buck_node); return px; - } if (c->prefix_slab) px = sl_alloc(c->prefix_slab); @@@ -1690,74 -1644,20 +1691,77 @@@ px->hash = hash; px->path_id = path_id; net_copy(px->net, net); ++ rt_lock_source(src); HASH_INSERT2(c->prefix_hash, PXH, c->pool, px); return px; } -void +static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px); + +static inline int +bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b) +{ +#define BPX_TRACE(what) do { \ + if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \ + c->c.proto->name, c->c.name, what, \ + px->net, px->path_id, (b == c->withdraw_bucket) ? "withdraw" : "update"); } while (0) + px->lastmod = current_time(); + + /* Already queued for the same bucket */ + if (px->cur == b) + { + BPX_TRACE("already queued"); + return 0; + } + + /* Unqueue from the old bucket */ + if (px->cur) + { + rem_node(&px->buck_node_xx); + bgp_done_bucket(c, px->cur); + } + + /* The new bucket is the same as we sent before */ + if ((px->last == b) || c->c.out_table && !px->last && (b == c->withdraw_bucket)) + { + if (px->cur) + BPX_TRACE("reverted"); + else + BPX_TRACE("already sent"); + + /* Well, we haven't sent anything yet */ + if (!px->last) + bgp_free_prefix(c, px); + + px->cur = NULL; + return 0; + } + + /* Enqueue the bucket if it has been empty */ + if ((b != c->withdraw_bucket) && EMPTY_LIST(b->prefixes)) + add_tail(&c->bucket_queue, &b->send_node); + + /* Enqueue to the new bucket and indicate the change */ + add_tail(&b->prefixes, &px->buck_node_xx); + px->cur = b; + + BPX_TRACE("queued"); + return 1; + +#undef BPX_TRACE +} + +static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) { - rem_node(&px->buck_node); HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); ++ rt_unlock_source(rt_find_source_global(px->path_id)); ++ if (c->prefix_slab) - sl_free(c->prefix_slab, px); + sl_free(px); else mb_free(px); } @@@ -1929,11 -1668,10 +1933,10 @@@ bgp_setup_out_table(struct bgp_channel */ int -bgp_preexport(struct channel *c, rte *e) +bgp_preexport(struct channel *C, rte *e) { - struct proto *SRC = e->src->proto; - struct bgp_proto *p = (struct bgp_proto *) (c->proto); + struct bgp_proto *p = (struct bgp_proto *) C->proto; - struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL; + struct bgp_proto *src = bgp_rte_proto(e); /* Reject our routes */ if (src == p) @@@ -2121,7 -1842,8 +2123,7 @@@ bgp_rt_notify(struct proto *P, struct c struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; struct bgp_bucket *buck; - struct bgp_prefix *px; -- u32 path; ++ struct rte_src *path; if (new) { @@@ -2133,16 -1851,20 +2135,16 @@@ /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); -- path = new->src->global_id; - - lp_flush(bgp_linpool2); ++ path = new->src; } else { buck = bgp_get_withdraw_bucket(c); -- path = old->src->global_id; ++ path = old->src; } - px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0); - add_tail(&buck->prefixes, &px->buck_node); - - bgp_schedule_packet(p->conn, c, PKT_UPDATE); + if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck)) + bgp_schedule_packet(p->conn, c, PKT_UPDATE); } diff --cc proto/bgp/bgp.c index 33849b0bb,dc8455509..d240112c6 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@@ -2638,12 -2610,6 +2644,11 @@@ struct protocol proto_bgp = .reconfigure = bgp_reconfigure, .copy_config = bgp_copy_config, .get_status = bgp_get_status, - .get_route_info = bgp_get_route_info, - .get_attr = bgp_get_attr, .show_proto_info = bgp_show_proto_info }; + +void bgp_build(void) +{ + proto_build(&proto_bgp); + bgp_register_attrs(); +} diff --cc proto/bgp/bgp.h index 469f0cb96,7cb4df1f8..81382099d --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@@ -525,16 -518,12 +525,17 @@@ struct rte_source *bgp_find_source(stru struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); static inline int -rta_resolvable(rta *a) +rte_resolvable(const rte *rt) { - return a->dest == RTD_UNICAST; + eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop); + if (!nhea) + return 0; + + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } + extern struct rte_owner_class bgp_rte_owner_class; #ifdef LOCAL_DEBUG #define BGP_FORCE_DEBUG 1 @@@ -578,13 -587,20 +579,19 @@@ void bgp_done_prefix(struct bgp_channe int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); -void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint); -u32 bgp_rte_igp_metric(struct rte *); +void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count); +u32 bgp_rte_igp_metric(const rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); int bgp_preexport(struct channel *, struct rte *); -int bgp_get_attr(const struct eattr *e, byte *buf, int buflen); void bgp_get_route_info(struct rte *, byte *); -int bgp_total_aigp_metric_(rta *a, u64 *metric, const struct adata **ad); +int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad); + static inline struct bgp_proto *bgp_rte_proto(struct rte *rte) + { + return (rte->src->owner->class == &bgp_rte_owner_class) ? + SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL; + } + #define BGP_AIGP_METRIC 1 #define BGP_AIGP_MAX U64(0xffffffffffffffff) diff --cc proto/bgp/packets.c index de976588f,d2d5b174a..867be75fc --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@@ -2468,13 -2447,15 +2471,15 @@@ bgp_decode_nlri(struct bgp_parse_state /* Handle withdraw during next hop decoding */ if (s->err_withdraw) - a = NULL; + ea = NULL; } - c->desc->decode_nlri(s, nlri, len, a); + c->desc->decode_nlri(s, nlri, len, ea); - rta_free(s->cached_rta); - s->cached_rta = NULL; + rta_free(s->cached_ea); + s->cached_ea = NULL; + + rt_unlock_source(s->last_src); } static void diff --cc proto/ospf/ospf.c index 9c25f0f05,16774df69..4e29f960f --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@@ -492,7 -488,7 +492,7 @@@ ospf_preexport(struct channel *C, rte * struct ospf_area *oa = ospf_main_area(p); /* Reject our own routes */ - if (e->src->proto == &p->p) - if (e->sender == c->in_req.hook) ++ if (e->sender == C->in_req.hook) return -1; /* Do not export routes to stub areas */ @@@ -1519,39 -1537,5 +1525,38 @@@ struct protocol proto_ospf = .shutdown = ospf_shutdown, .reconfigure = ospf_reconfigure, .get_status = ospf_get_status, - .get_route_info = ospf_get_route_info - .get_attr = ospf_get_attr, }; + +struct ea_class ea_ospf_metric1 = { + .name = "ospf_metric1", + .type = T_INT, +}; + +struct ea_class ea_ospf_metric2 = { + .name = "ospf_metric2", + .type = T_INT, +}; + +struct ea_class ea_ospf_tag = { + .name = "ospf_tag", + .type = T_INT, + .format = ospf_tag_format, +}; + +struct ea_class ea_ospf_router_id = { + .name = "ospf_router_id", + .type = T_QUAD, +}; + +void +ospf_build(void) +{ + proto_build(&proto_ospf); + + EA_REGISTER_ALL( + &ea_ospf_metric1, + &ea_ospf_metric2, + &ea_ospf_tag, + &ea_ospf_router_id + ); +} diff --cc proto/pipe/pipe.c index 8af6de814,a30da0e25..b3b50a0d9 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@@ -86,8 -90,8 +86,8 @@@ pipe_preexport(struct channel *C, rte * if (e->generation >= max_generation) { log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u", - e->generation, max_generation, c->proto->name, - c->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id); + e->generation, max_generation, C->proto->name, - C->table->name, e->net, e->src->proto->name, e->src->private_id, e->src->global_id); ++ C->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id); return -1; } diff --cc proto/rip/rip.c index f5c013805,ee05f0580..ab0e3f4b3 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@@ -377,15 -353,9 +377,15 @@@ rip_rt_notify(struct proto *P, struct c en->valid = RIP_ENTRY_VALID; en->metric = rt_metric; en->tag = rt_tag; - en->from = (new->src->proto == P) ? rt_from : NULL; + en->from = (new->src->owner == &P->sources) ? rt_from : NULL; - en->iface = new->attrs->nh.iface; - en->next_hop = new->attrs->nh.gw; + + eattr *nhea = ea_find(new->attrs, &ea_gen_nexthop); + if (nhea) + { + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + en->iface = nhad->nh.iface; + en->next_hop = nhad->nh.gw; + } } else { @@@ -1116,10 -1095,10 +1125,10 @@@ static in rip_rte_better(struct rte *new, struct rte *old) { ASSERT_DIE(new->src == old->src); - struct rip_proto *p = (struct rip_proto *) new->src->proto; + struct rip_proto *p = rip_rte_proto(new); - u32 new_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, p->infinity); - u32 old_metric = ea_get_int(old->attrs->eattrs, EA_RIP_METRIC, p->infinity); + u32 new_metric = ea_get_int(new->attrs, &ea_rip_metric, p->infinity); + u32 old_metric = ea_get_int(old->attrs, &ea_rip_metric, p->infinity); return new_metric < old_metric; } @@@ -1227,11 -1205,11 +1235,11 @@@ rip_reconfigure(struct proto *P, struc static void rip_get_route_info(rte *rte, byte *buf) { - struct rip_proto *p = (struct rip_proto *) rte->src->proto; + struct rip_proto *p = rip_rte_proto(rte); - u32 rt_metric = ea_get_int(rte->attrs->eattrs, EA_RIP_METRIC, p->infinity); - u32 rt_tag = ea_get_int(rte->attrs->eattrs, EA_RIP_TAG, 0); + u32 rt_metric = ea_get_int(rte->attrs, &ea_rip_metric, p->infinity); + u32 rt_tag = ea_get_int(rte->attrs, &ea_rip_tag, 0); - buf += bsprintf(buf, " (%d/%d)", rte->attrs->pref, rt_metric); + buf += bsprintf(buf, " (%d/%d)", rt_get_preference(rte), rt_metric); if (rt_tag) bsprintf(buf, " [%04x]", rt_tag); @@@ -1372,17 -1352,5 +1386,16 @@@ struct protocol proto_rip = .start = rip_start, .shutdown = rip_shutdown, .reconfigure = rip_reconfigure, - .get_route_info = rip_get_route_info, - .get_attr = rip_get_attr }; + +void +rip_build(void) +{ + proto_build(&proto_rip); + + EA_REGISTER_ALL( + &ea_rip_metric, + &ea_rip_tag, + &ea_rip_from + ); +} diff --cc proto/static/static.c index f0a514f7c,b0c9d6401..65f3eccc8 --- a/proto/static/static.c +++ b/proto/static/static.c @@@ -53,10 -58,12 +56,10 @@@ static inline void static_free_source(s static void static_announce_rte(struct static_proto *p, struct static_route *r) { + struct rte_src *src; - rta *a = allocz(RTA_MAX_SIZE); - a->source = RTS_STATIC; - a->scope = SCOPE_UNIVERSE; - a->dest = r->dest; - a->pref = p->p.main_channel->preference; + ea_list *ea = NULL; - struct rte_src *src = static_get_source(p, r->index); + ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_STATIC); if (r->dest == RTD_UNICAST) { @@@ -114,14 -106,21 +117,17 @@@ return; /* We skip rta_lookup() here */ + src = static_get_source(p, r->index); - rte e0 = { .attrs = a, .src = src, .net = r->net, }, *e = &e0; + rte e0 = { .attrs = ea, .src = src, .net = r->net, }, *e = &e0; /* Evaluate the filter */ if (r->cmds) - f_eval_rte(r->cmds, e, static_lp); + f_eval_rte(r->cmds, e); rte_update(p->p.main_channel, r->net, e, src); + static_free_source(src, r->index); + r->state = SRS_CLEAN; - - if (r->cmds) - lp_flush(static_lp); - return; withdraw: @@@ -763,11 -791,4 +781,10 @@@ struct protocol proto_static = .shutdown = static_shutdown, .reconfigure = static_reconfigure, .copy_config = static_copy_config, - .get_route_info = static_get_route_info, }; + +void +static_build(void) +{ + proto_build(&proto_static); +} diff --cc sysdep/unix/krt.c index 46b5a51d4,5431bebea..f796a1593 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@@ -304,27 -292,18 +304,28 @@@ krt_uptodate(rte *a, rte *b /* Called when alien route is discovered during scan */ static void -krt_learn_rte(struct krt_proto *p, rte *e) +krt_learn_scan(struct krt_proto *p, rte *e) { - struct rte_src *src = e->src = rt_get_source(&p->p, krt_metric(e)); - rte_update(p->p.main_channel, e->net, e, e->src); - rt_unlock_source(src); + rte e0 = { + .attrs = e->attrs, + .src = rt_get_source(&p->p, krt_metric(e)), + }; + + ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference); + + rte_update(p->p.main_channel, e->net, &e0, e0.src); ++ rt_unlock_source(e0.src); } static void -krt_learn_init(struct krt_proto *p) +krt_learn_async(struct krt_proto *p, rte *e, int new) { - if (KRT_CF->learn) - channel_setup_in_table(p->p.main_channel, 1); + if (new) + return krt_learn_scan(p, e); + - struct rte_src *src = rt_find_source(&p->p, krt_metric(e)); - if (src) - rte_update(p->p.main_channel, e->net, NULL, src); ++ struct rte_src *src = rt_get_source(&p->p, krt_metric(e)); ++ rte_update(p->p.main_channel, e->net, NULL, src); ++ rt_unlock_source(src); } #endif @@@ -681,9 -673,9 +682,9 @@@ krt_scan_timer_kick(struct krt_proto *p */ static int -krt_preexport(struct channel *c, rte *e) +krt_preexport(struct channel *C, rte *e) { - if (e->src->proto == C->proto) - if (e->src->owner == &c->proto->sources) ++ if (e->src->owner == &C->proto->sources) return -1; if (!krt_capable(e))