From: Maria Matejka Date: Tue, 7 May 2024 14:51:49 +0000 (+0200) Subject: Merge commit '5d5c431a3c23ad7ed2ed5e769d9857e7f3a1e626' into kk-tbf-config-v2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fkk-tbf-config-v3;p=thirdparty%2Fbird.git Merge commit '5d5c431a3c23ad7ed2ed5e769d9857e7f3a1e626' into kk-tbf-config-v2 --- ebeaf4e8caafc1928b602bed8b07feff0f287c9a diff --cc conf/confbase.Y index b29abf4c1,d1d3604be..3a16cdbb7 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@@ -95,15 -99,12 +99,16 @@@ CF_DECL btime time; struct f_prefix px; struct proto_spec ps; + struct table_spec ts; struct channel_limit cl; struct timeformat *tf; - mpls_label_stack *mls; + struct settle_config settle; + struct adata *ad; const struct adata *bs; struct aggr_item_node *ai; + struct logging_rate_targets *lrt; + struct tbf_config *tc; + enum tbf_targets tt; } %token END CLI_MARKER INVALID_TOKEN ELSECOL DDOT diff --cc nest/config.Y index a89509c97,3228b08ec..b7c34503b --- a/nest/config.Y +++ b/nest/config.Y @@@ -127,11 -164,8 +165,11 @@@ CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LON CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) - CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) + CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, ROA, ROUTE, REFRESH, SETTLE, TIME, GC, THRESHOLD, PERIOD) CF_KEYWORDS(MPLS_LABEL, MPLS_POLICY, MPLS_CLASS) +CF_KEYWORDS(ASPA_PROVIDERS) +CF_KEYWORDS(LOGGING, RATE) +CF_KEYWORDS( OSPF_PKT, OSPF_LSA, RIP_PKT, RIP_RTE, BABEL_PKT) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@@ -151,18 -185,15 +189,19 @@@ CF_ENUM(T_ENUM_MPLS_POLICY, MPLS_POLICY %type optproto %type r_args %type sym_args - %type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type net_type_base tos password_algorithm + %type proto_start debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type net_type_base tos password_algorithm %type proto_patt proto_patt2 +%type table_patt %type channel_start proto_channel %type limit_spec %type r_args_for_val %type r_args_for %type channel_sym %type channel_arg +%type logging_rate_targets +%type logging_rate +%type tbf_target + %type partial_opt CF_GRAMMAR @@@ -246,11 -275,14 +285,15 @@@ table_opt cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]); this_table->trie_used = $2; } - | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } - | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } | GC THRESHOLD expr { this_table->gc_threshold = $3; } | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } + | LOGGING RATE expr expr { this_table->log_tbf_cf.rate = $3; this_table->log_tbf_cf.burst = $4; } + | CORK THRESHOLD expr expr { + if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); + this_table->cork_threshold.low = $3; + this_table->cork_threshold.high = $4; } + | EXPORT SETTLE TIME settle { this_table->export_settle = $4; } + | ROUTE REFRESH EXPORT SETTLE TIME settle { this_table->export_rr_settle = $6; } ; table_opts: @@@ -1024,19 -1027,6 +1081,12 @@@ proto_patt2 | TEXT { $$.ptr = $1; $$.patt = 1; } ; +table_patt: + CF_SYM_KNOWN { cf_assert_symbol($1, SYM_TABLE); $$.ptr = $1; $$.patt = 0; } + | ALL { $$.ptr = NULL; $$.patt = 1; } + | TEXT { $$.ptr = $1; $$.patt = 1; } + ; + - dynamic_attr: IGP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_GEN_IGP_METRIC); } ; - - dynamic_attr: MPLS_LABEL { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_MPLS_LABEL); } ; - dynamic_attr: MPLS_POLICY { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_MPLS_POLICY, EA_MPLS_POLICY); } ; - dynamic_attr: MPLS_CLASS { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_MPLS_CLASS); } ; - - CF_CODE CF_END diff --cc nest/protocol.h index 9e70fefb0,2172ecf45..8d4b2eddc --- a/nest/protocol.h +++ b/nest/protocol.h @@@ -229,21 -208,12 +208,13 @@@ struct proto * Routing entry hooks (called only for routes belonging to this protocol): * * rte_recalculate Called at the beginning of the best route selection - * rte_better Compare two rte's and decide which one is better (1=first, 0=second). - * rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no). * rte_mergable Compare two rte's and decide whether they could be merged (1=yes, 0=no). - * rte_insert Called whenever a rte is inserted to a routing table. - * rte_remove Called whenever a rte is removed from the routing table. */ - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); - int (*rte_better)(struct rte *, struct rte *); + int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); - struct rte * (*rte_modify)(struct rte *, struct linpool *); - void (*rte_insert)(struct network *, struct rte *); - void (*rte_remove)(struct network *, struct rte *); - u32 (*rte_igp_metric)(struct rte *); + u32 (*rte_igp_metric)(const struct rte *); + void (*set_logging_rate)(struct proto *P, uintptr_t arg); /* Hic sunt protocol-specific data */ }; diff --cc nest/route.h index 53bccdd4c,66e1d030f..fba3e3adf --- a/nest/route.h +++ b/nest/route.h @@@ -152,28 -65,71 +65,72 @@@ struct rtable_config uint gc_period; /* Approximate time between two consecutive GC runs */ u32 debug; /* Debugging flags (D_*) */ byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ byte trie_used; /* Rtable has attached trie */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ + struct rt_cork_threshold cork_threshold; /* Cork threshold values */ + struct settle_config export_settle; /* Export announcement settler */ + struct settle_config export_rr_settle;/* Export announcement settler config valid when any + route refresh is running */ + struct tbf_config log_tbf_cf; /* Config logging rate for rtable */ }; - typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ + struct rt_export_hook; + struct rt_export_request; + struct rt_exporter; + + struct rt_exporter_class { + void (*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(void *_rt_export_hook); + }; + + struct rt_exporter { + const struct rt_exporter_class *class; + pool *rp; + list hooks; /* Registered route export hooks */ + uint addr_type; /* Type of address data exported (NET_*) */ + }; + + struct rt_table_exporter { + struct rt_exporter e; + list pending; /* List of packed struct rt_pending_export */ + + struct rt_pending_export *first; /* First export to announce */ + u64 next_seq; /* The next export will have this ID */ + }; + + extern uint rtable_max_id; + + /* The public part of rtable structure */ + #define RTABLE_PUBLIC \ + resource r; \ + node n; /* Node in list of all tables */ \ + char *name; /* Name of this table */ \ + uint addr_type; /* Type of address data stored in table (NET_*) */ \ + uint id; /* Integer table ID for fast lookup */ \ + DOMAIN(rtable) lock; /* Lock to take to access the private parts */ \ + struct rtable_config *config; /* Configuration of this table */ \ + struct birdloop *loop; /* Service thread */ \ + netindex_hash *netindex; /* Prefix index for this table */ \ + + /* The complete rtable structure */ + struct rtable_private { + /* Once more the public part */ + struct { RTABLE_PUBLIC; }; + struct rtable_private **locked_at; + + /* Here the private items not to be accessed without locking */ pool *rp; /* Resource pool to allocate everything from, including itself */ - struct fib fib; + struct slab *rte_slab; /* Slab to allocate route objects */ + struct network *routes; /* Actual route objects in the table */ + u32 routes_block_size; /* Size of the route object pointer block */ struct f_trie *trie; /* Trie of prefixes defined in fib */ - char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - u32 debug; /* Debugging flags (D_*) */ - int pipe_busy; /* Pipe loop detection */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ + u32 net_count; /* Number of nets in the table */ + u32 debug; /* Debugging flags (D_*) */ - byte internal; /* Internal table of a protocol */ + list imports; /* Registered route importers */ + struct rt_table_exporter exporter; /* Exporter API structure */ struct hmap id_map; struct hostcache *hostcache; @@@ -198,91 -162,345 +163,345 @@@ struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ u32 trie_lock_count; /* Prefix trie locked by walks */ u32 trie_old_lock_count; /* Old prefix trie locked by walks */ - struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ ++ struct tbf log_tbf; /* Actual logging rate for rtable (might be changed in cmd) */ - list subscribers; /* Subscribers for notifications */ - struct timer *settle_timer; /* Settle time for notifications */ - list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ // struct mpls_domain *mpls_domain; /* Label allocator for MPLS */ - struct tbf log_tbf; /* Actual logging rate for rtable (might be changed in cmd) */ + }; + + /* The final union private-public rtable structure */ + typedef union rtable { + struct { + RTABLE_PUBLIC; + }; + struct rtable_private priv; } rtable; - struct rt_subscription { - node n; - rtable *tab; - void (*hook)(struct rt_subscription *b); - void *data; + /* Define the lock cleanup function */ + LOBJ_UNLOCK_CLEANUP(rtable, rtable); + + #define RT_IS_LOCKED(tab) LOBJ_IS_LOCKED((tab), rtable) + #define RT_LOCKED(tab, tp) LOBJ_LOCKED((tab), tp, rtable, rtable) + + #define RT_LOCK_SIMPLE(tab) LOBJ_LOCK_SIMPLE((tab), rtable) + #define RT_UNLOCK_SIMPLE(tab) LOBJ_UNLOCK_SIMPLE((tab), rtable) + + #define RT_UNLOCKED_TEMPORARILY(tab, tp) LOBJ_UNLOCKED_TEMPORARILY((tab), tp, rtable, rtable) + + #define RT_PUB(tab) SKIP_BACK(rtable, priv, tab) + + /* Flags for birdloop_flag() */ + #define RTF_CLEANUP 1 + #define RTF_NHU 2 + #define RTF_EXPORT 4 + #define RTF_DELETE 8 + + extern struct rt_cork { + _Atomic uint active; + event_list queue; + event run; + } rt_cork; + + static inline void rt_cork_acquire(void) + { + atomic_fetch_add_explicit(&rt_cork.active, 1, memory_order_acq_rel); + } + + static inline void rt_cork_release(void) + { + if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1) + { + synchronize_rcu(); + ev_send(&global_work_list, &rt_cork.run); + } + } + + static inline int rt_cork_check(event *e) + { + rcu_read_lock(); + + int corked = (atomic_load_explicit(&rt_cork.active, memory_order_acquire) > 0); + if (corked) + ev_send(&rt_cork.queue, e); + + rcu_read_unlock(); + + return corked; + } + + + typedef struct network { + struct rte_storage *routes; /* Available routes for this network */ + struct rt_pending_export *first, *last; + } net; + + struct rte_storage { + struct rte_storage *next; /* Next in chain */ + union { + struct { + RTE_IN_TABLE_WRITABLE; + }; + const struct rte rte; /* Route data */ + }; }; - struct rt_flowspec_link { + #define RTE_COPY(r) ((r) ? (r)->rte : (rte) {}) + #define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {}) + #define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) + #define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL) + + #define RTES_WRITE(r) (((r) != ((struct rte_storage *) 0)) ? ((struct rte *) &(r)->rte) : NULL) + + #define RTE_GET_NETINDEX(e) NET_TO_INDEX((e)->net) + + /* Table-channel connections */ + + struct rt_prefilter { + union { + const struct f_trie *trie; + const net_addr *addr; /* Network prefilter address */ + int (*hook)(const struct rt_prefilter *, const net_addr *); + }; + /* Network prefilter mode (TE_ADDR_*) */ + enum { + TE_ADDR_NONE = 0, /* No address matching */ + TE_ADDR_EQUAL, /* Exact query - show route */ + TE_ADDR_FOR, /* Longest prefix match - show route for */ + TE_ADDR_IN, /* Interval query - show route in */ + TE_ADDR_TRIE, /* Query defined by trie */ + TE_ADDR_HOOK, /* Query processed by supplied custom hook */ + } mode; + } PACKED; + + struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + event_list *list; /* Where to schedule announce events */ + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + int (*preimport)(struct rt_import_request *req, struct rte *new, const struct rte *old); + }; + + struct rt_import_hook { node n; - rtable *src; - rtable *dst; - u32 uc; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + u64 flush_seq; /* Table export seq when the channel announced flushing */ + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ + event announce_event; /* This event announces table updates */ }; - #define NHU_CLEAN 0 - #define NHU_SCHEDULED 1 - #define NHU_RUNNING 2 - #define NHU_DIRTY 3 + struct rt_pending_export { + struct rt_pending_export * _Atomic next; /* Next export for the same destination */ + struct rte_storage *new, *new_best, *old, *old_best; + u64 seq; /* Sequential ID (table-local) of the pending export */ + }; - typedef struct network { - struct rte *routes; /* Available routes for this network */ - struct fib_node n; /* FIB flags reserved for kernel syncer */ - } net; + struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + u8 trace_routes; + uint feed_block_size; /* How many routes to feed at once */ + struct rt_prefilter prefilter; + + event_list *list; /* Where to schedule export events */ + pool *pool; /* Pool to use for allocations */ + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, + struct rt_pending_export *rpe, struct rt_pending_export *last, + const rte **feed, uint count); + + void (*mark_seen)(struct rt_export_request *req, struct rt_pending_export *rpe); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); + }; - struct hostcache { - slab *slab; /* Slab holding all hostentries */ - struct hostentry **hash_table; /* Hash table for hostentries */ - unsigned hash_order, hash_shift; - unsigned hash_max, hash_min; - unsigned hash_items; - linpool *lp; /* Linpool for trie */ - struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ - list hostentries; /* List of all hostentries */ - byte update_hostcache; + static inline int rt_prefilter_net(const struct rt_prefilter *p, const net_addr *n) + { + switch (p->mode) + { + case TE_ADDR_NONE: return 1; + case TE_ADDR_IN: return net_in_netX(n, p->addr); + case TE_ADDR_EQUAL: return net_equal(n, p->addr); + case TE_ADDR_FOR: return net_in_netX(p->addr, n); + case TE_ADDR_TRIE: return trie_match_net(p->trie, n); + case TE_ADDR_HOOK: return p->hook(p, n); + } + + bug("Crazy prefilter application attempt failed wildly."); + } + + struct rt_export_hook { + node n; + struct rt_exporter *table; /* The connected table */ + + pool *pool; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + btime last_state_change; /* Time of last state transition */ + + _Atomic u8 export_state; /* Route export state (TES_*, see below) */ + struct event event; /* Event running all the export operations */ + + struct bmap seq_map; /* Keep track which exports were already procesed */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ }; - struct hostentry { - node ln; - ip_addr addr; /* IP address of host, part of key */ - ip_addr link; /* (link-local) IP address of host, used as gw - if host is directly attached */ - rtable *tab; /* Dependent table, part of key */ - rtable *owner; /* Nexthop owner table */ - struct hostentry *next; /* Next in hash chain */ - unsigned hash_key; /* Hash key */ - unsigned uc; /* Use count */ - struct rta *src; /* Source rta entry */ - byte dest; /* Chosen route destination type (RTD_...) */ - byte nexthop_linkable; /* Nexthop list is completely non-device */ - u32 igp_metric; /* Chosen route IGP metric */ + struct rt_table_export_hook { + union { + struct rt_export_hook h; + struct { /* Overriding the parent structure beginning */ + node _n; + struct rt_table_exporter *table; + }; + }; + + union { + u32 feed_index; /* Routing table iterator used during feeding */ + struct { + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + union { /* Last net visited but not processed */ + net_addr walk_last; + net_addr_ip4 walk_last_ip4; + net_addr_ip6 walk_last_ip6; + }; + }; + }; + + struct rt_pending_export *_Atomic last_export;/* Last export processed */ + struct rt_pending_export *rpe_next; /* Next pending export to process */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ + }; - typedef struct rte { - struct rte *next; - net *net; /* Network this RTE belongs to */ - struct rte_src *src; /* Route source that created the route */ - struct channel *sender; /* Channel used to send the route to the routing table */ - struct rta *attrs; /* Attributes of this route */ - u32 id; /* Table specific route id */ - byte flags; /* Flags (REF_...) */ - byte pflags; /* Protocol-specific flags */ - btime lastmod; /* Last modified */ - } rte; + #define TIS_DOWN 0 + #define TIS_UP 1 + #define TIS_STOP 2 + #define TIS_FLUSHING 3 + #define TIS_WAITING 4 + #define TIS_CLEARED 5 + #define TIS_MAX 6 + + #define TES_DOWN 0 + #define TES_HUNGRY 1 + #define TES_FEEDING 2 + #define TES_READY 3 + #define TES_STOP 4 + #define TES_MAX 5 + + + #define TFT_FIB 1 + #define TFT_TRIE 2 + #define TFT_HASH 3 - #define REF_COW 1 /* Copy this rte on write */ - #define REF_FILTERED 2 /* Route is rejected by import filter */ - #define REF_STALE 4 /* Route is stale in a refresh cycle */ - #define REF_DISCARD 8 /* Route is scheduled for discard */ - #define REF_MODIFY 16 /* Route is scheduled for modify */ + void rt_request_import(rtable *tab, struct rt_import_request *req); + void rt_request_export(rtable *tab, struct rt_export_request *req); + void rt_request_export_other(struct rt_exporter *tab, struct rt_export_request *req); - /* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ - static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } + void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); + void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); - /* Route just has REF_FILTERED flag */ - static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } + const char *rt_import_state_name(u8 state); + const char *rt_export_state_name(u8 state); + static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } + static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + + u8 rt_set_export_state(struct rt_export_hook *hook, u32 expected_mask, u8 state); + + void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + + /* + * For table export processing + */ + + /* Get next rpe. If src is given, it must match. */ + struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); + + /* Walk all rpe's */ + #define RPE_WALK(first, it, src) \ + for (struct rt_pending_export *it = (first); it; it = rpe_next(it, (src))) + + /* Mark the pending export processed */ + void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + + #define rpe_mark_seen_all(hook, first, last, src) do { \ + RPE_WALK((first), _rpe, (src)) { \ + rpe_mark_seen((hook), _rpe); \ + if (_rpe == last) break; \ + }} while (0) + + /* Get pending export seen status */ + int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + + /* + * For rt_export_hook and rt_exporter inheritance + */ + + void rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook); + struct rt_export_hook *rt_alloc_export(struct rt_exporter *re, pool *pool, uint size); + void rt_stop_export_common(struct rt_export_hook *hook); + void rt_export_stopped(struct rt_export_hook *hook); + void rt_exporter_init(struct rt_exporter *re); + + /* + * Channel export hooks. To be refactored out. + */ + + int channel_preimport(struct rt_import_request *req, rte *new, const rte *old); + + void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first, struct rt_pending_export *last, const rte **feed, uint count); + + void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first, struct rt_pending_export *last, const rte **feed, uint count); + void rt_notify_accepted(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first, struct rt_pending_export *last, const rte **feed, uint count); + void rt_notify_merged(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first, struct rt_pending_export *last, const rte **feed, uint count); + + void channel_rpe_mark_seen(struct channel *c, struct rt_pending_export *rpe); /* Types of route announcement, also used as flags */ #define RA_UNDEF 0 /* Undefined RA type */ @@@ -419,348 -706,22 +707,25 @@@ struct rt_show_data_rtable * rt_show_ad #define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ #define RSEM_EXPORTED 4 /* Routes marked in export map */ - /* - * Route Attributes - * - * Beware: All standard BGP attributes must be represented here instead - * of making them local to the route. This is needed to ensure proper - * construction of BGP route attribute lists. - */ - - /* Nexthop structure */ - struct nexthop { - ip_addr gw; /* Next hop */ - struct iface *iface; /* Outgoing interface */ - struct nexthop *next; - byte flags; - byte weight; - byte labels_orig; /* Number of labels before hostentry was applied */ - byte labels; /* Number of all labels */ - u32 label[0]; - }; - - #define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ - - - struct rte_src { - struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ - u64 private_id; /* Private ID, assigned by the protocol */ - u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ - }; - - - typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ - u32 hash_key; /* Hash over important fields */ - struct ea_list *eattrs; /* Extended Attribute chain */ - struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - ip_addr from; /* Advertising router */ - u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - u16 cached:1; /* Are attributes cached? */ - u16 source:7; /* Route source (RTS_...) */ - u16 scope:4; /* Route scope (SCOPE_... -- see ip.h) */ - u16 dest:4; /* Route destination type (RTD_...) */ - word pref; - struct nexthop nh; /* Next hop */ - } rta; - - #define RTS_STATIC 1 /* Normal static route */ - #define RTS_INHERIT 2 /* Route inherited from kernel */ - #define RTS_DEVICE 3 /* Device route */ - #define RTS_STATIC_DEVICE 4 /* Static device route */ - #define RTS_REDIRECT 5 /* Learned via redirect */ - #define RTS_RIP 6 /* RIP route */ - #define RTS_OSPF 7 /* OSPF route */ - #define RTS_OSPF_IA 8 /* OSPF inter-area route */ - #define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ - #define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ - #define RTS_BGP 11 /* BGP route */ - #define RTS_PIPE 12 /* Inter-table wormhole */ - #define RTS_BABEL 13 /* Babel route */ - #define RTS_RPKI 14 /* Route Origin Authorization */ - #define RTS_PERF 15 /* Perf checker */ - #define RTS_L3VPN 16 /* MPLS L3VPN */ - #define RTS_AGGREGATED 17 /* Aggregated route */ - #define RTS_MAX 18 - - #define RTD_NONE 0 /* Undefined next hop */ - #define RTD_UNICAST 1 /* Next hop is neighbor router */ - #define RTD_BLACKHOLE 2 /* Silently drop packets */ - #define RTD_UNREACHABLE 3 /* Reject as unreachable */ - #define RTD_PROHIBIT 4 /* Administratively prohibited */ - #define RTD_MAX 5 - - #define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other - protocol-specific metric is availabe */ - - - extern const char * rta_dest_names[RTD_MAX]; - - static inline const char *rta_dest_name(uint n) - { return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } - - /* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ - static inline int rte_is_reachable(rte *r) - { return r->attrs->dest == RTD_UNICAST; } - - - /* - * Extended Route Attributes - */ - - typedef struct eattr { - word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ - byte flags; /* Protocol-dependent flags */ - byte type:5; /* Attribute type */ - byte originated:1; /* The attribute has originated locally */ - byte fresh:1; /* An uncached attribute (e.g. modified in export filter) */ - byte undef:1; /* Explicitly undefined */ - union { - uintptr_t data; - const struct adata *ptr; /* Attribute data elsewhere */ - } u; - } eattr; - - - #define EA_CODE(proto,id) (((proto) << 8) | (id)) - #define EA_ID(ea) ((ea) & 0xff) - #define EA_PROTO(ea) ((ea) >> 8) - #define EA_CUSTOM(id) ((id) | EA_CUSTOM_BIT) - #define EA_IS_CUSTOM(ea) ((ea) & EA_CUSTOM_BIT) - #define EA_CUSTOM_ID(ea) ((ea) & ~EA_CUSTOM_BIT) - - const char *ea_custom_name(uint ea); - - #define EA_GEN_IGP_METRIC EA_CODE(PROTOCOL_NONE, 0) - #define EA_MPLS_LABEL EA_CODE(PROTOCOL_NONE, 1) - #define EA_MPLS_POLICY EA_CODE(PROTOCOL_NONE, 2) - #define EA_MPLS_CLASS EA_CODE(PROTOCOL_NONE, 3) - - #define EA_CODE_MASK 0xffff - #define EA_CUSTOM_BIT 0x8000 - #define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ - #define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ - #define EA_BIT_GET(ea) ((ea) >> 24) - #define EA_DATA_ALIGN 4 /* Alignment of adata in attribute cache */ - - #define EAF_TYPE_MASK 0x1f /* Mask with this to get type */ - #define EAF_TYPE_INT 0x01 /* 32-bit unsigned integer number */ - #define EAF_TYPE_OPAQUE 0x02 /* Opaque byte string (not filterable) */ - #define EAF_TYPE_IP_ADDRESS 0x04 /* IP address */ - #define EAF_TYPE_ROUTER_ID 0x05 /* Router ID (IPv4 address) */ - #define EAF_TYPE_AS_PATH 0x06 /* BGP AS path (encoding per RFC 1771:4.3) */ - #define EAF_TYPE_BITFIELD 0x09 /* 32-bit embedded bitfield */ - #define EAF_TYPE_INT_SET 0x0a /* Set of u32's (e.g., a community list) */ - #define EAF_TYPE_EC_SET 0x0e /* Set of pairs of u32's - ext. community list */ - #define EAF_TYPE_LC_SET 0x12 /* Set of triplets of u32's - large community list */ - #define EAF_TYPE_IFACE 0x14 /* Interface pointer stored in adata */ - #define EAF_TYPE_STRING 0x16 /* Text string */ - #define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ - #define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type spec) */ - - typedef struct adata { - uint length; /* Length of data */ - byte data[0]; - } adata; - - extern const adata null_adata; /* adata of length 0 */ - - static inline struct adata * - lp_alloc_adata(struct linpool *pool, uint len) - { - struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); - ad->length = len; - return ad; - } - - static inline int adata_same(const struct adata *a, const struct adata *b) - { return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } - - - typedef struct ea_list { - struct ea_list *next; /* In case we have an override list */ - byte flags; /* Flags: EALF_... */ - byte rfu; - word count; /* Number of attributes */ - eattr attrs[0]; /* Attribute definitions themselves */ - } ea_list; - - #define EALF_SORTED 1 /* Attributes are sorted by code */ - #define EALF_BISECT 2 /* Use interval bisection for searching */ - #define EALF_CACHED 4 /* Attributes belonging to cached rta */ - - struct rte_src *rt_find_source(struct proto *p, u32 id); - struct rte_src *rt_get_source(struct proto *p, u32 id); - static inline void rt_lock_source(struct rte_src *src) { src->uc++; } - static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } - void rt_prune_sources(void); - - struct ea_walk_state { - ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ - eattr *ea; /* Current eattr, initially NULL */ - u32 visited[4]; /* Bitfield, limiting max to 128 */ - }; - - eattr *ea_find(ea_list *, unsigned ea); - eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); - uintptr_t ea_get_int(ea_list *, unsigned ea, uintptr_t def); - void ea_dump(ea_list *); - void ea_sort(ea_list *); /* Sort entries in all sub-lists */ - unsigned ea_scan(ea_list *); /* How many bytes do we need for merged ea_list */ - void ea_merge(ea_list *from, ea_list *to); /* Merge sub-lists to allocated buffer */ - int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ - uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ - ea_list *ea_append(ea_list *to, ea_list *what); - void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); - - #define ea_normalize(ea) do { \ - if (ea->next) { \ - ea_list *t = alloca(ea_scan(ea)); \ - ea_merge(ea, t); \ - ea = t; \ - } \ - ea_sort(ea); \ - if (ea->count == 0) \ - ea = NULL; \ - } while(0) \ - - struct ea_one_attr_list { - ea_list l; - eattr a; + /* Host entry: Resolve hook for recursive nexthops */ + extern struct ea_class ea_gen_hostentry; + struct hostentry_adata { + adata ad; + struct hostentry *he; + u32 labels[0]; }; - static inline eattr * - ea_set_attr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, uintptr_t val) - { - struct ea_one_attr_list *ea = lp_alloc(pool, sizeof(*ea)); - *ea = (struct ea_one_attr_list) { - .l.flags = EALF_SORTED, - .l.count = 1, - .l.next = *to, - - .a.id = id, - .a.type = type, - .a.flags = flags, - }; + #define HOSTENTRY_LABEL_COUNT(head) (head->ad.length + sizeof(struct adata) - sizeof(struct hostentry_adata)) / sizeof(u32) - if (type & EAF_EMBEDDED) - ea->a.u.data = val; - else - ea->a.u.ptr = (struct adata *) val; + void + ea_set_hostentry(ea_list **to, rtable *dep, rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); - *to = &ea->l; - - return &ea->a; - } - - static inline void - ea_unset_attr(ea_list **to, struct linpool *pool, _Bool local, uint code) - { - struct ea_one_attr_list *ea = lp_alloc(pool, sizeof(*ea)); - *ea = (struct ea_one_attr_list) { - .l.flags = EALF_SORTED, - .l.count = 1, - .l.next = *to, - .a.id = code, - .a.fresh = local, - .a.originated = local, - .a.undef = 1, - }; - - *to = &ea->l; - } - - static inline void - ea_set_attr_u32(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, u32 val) - { ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - - static inline void - ea_set_attr_ptr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, struct adata *val) - { ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - - static inline void - ea_set_attr_data(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, void *data, uint len) - { - struct adata *a = lp_alloc_adata(pool, len); - memcpy(a->data, data, len); - ea_set_attr(to, pool, id, flags, type, (uintptr_t) a); - } - - - #define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) - - static inline size_t nexthop_size(const struct nexthop *nh) - { return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } - int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ - static inline int nexthop_same(struct nexthop *x, struct nexthop *y) - { return (x == y) || nexthop__same(x, y); } - int nexthop_equal_(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops, ignore labels_orig */ - static inline int nexthop_equal(struct nexthop *x, struct nexthop *y) - { return (x == y) || nexthop_equal_(x, y); } - struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); - struct nexthop *nexthop_sort(struct nexthop *x); - static inline void nexthop_link(struct rta *a, const struct nexthop *from) - { memcpy(&a->nh, from, nexthop_size(from)); } - void nexthop_insert(struct nexthop **n, struct nexthop *y); - int nexthop_is_sorted(struct nexthop *x); - - void rta_init(void); - static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } - #define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) - rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ - static inline int rta_is_cached(rta *r) { return r->cached; } - static inline rta *rta_clone(rta *r) { r->uc++; return r; } - void rta__free(rta *r); - static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } - rta *rta_do_cow(rta *o, linpool *lp); - static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } - void rta_dump(rta *); - void rta_dump_all(void); - void rta_show(struct cli *, rta *); + void ea_show_hostentry(const struct adata *ad, byte *buf, uint size); + void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad); ++/* CLI command to set logging rate */ +void table_logging_cmd(struct table_spec ts, struct tbf_config *rate); + - u32 rt_get_igp_metric(rte *rt); - struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); - void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); - - static inline void - rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) - { - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); - } - - /* - * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills - * rta->hostentry field. New hostentry has zero use count. Cached rta locks its - * hostentry (increases its use count), uncached rta does not lock it. Hostentry - * with zero use count is removed asynchronously during host cache update, - * therefore it is safe to hold such hostentry temorarily. Hostentry holds a - * lock for a 'source' rta, mainly to share multipath nexthops. - * - * There is no need to hold a lock for hostentry->dep table, because that table - * contains routes responsible for that hostentry, and therefore is non-empty if - * given hostentry has non-zero use count. If the hostentry has zero use count, - * the entry is removed before dep is referenced. - * - * The protocol responsible for routes with recursive next hops should hold a - * lock for a 'source' table governing that routes (argument tab to - * rta_set_recursive_next_hop()), because its routes reference hostentries - * (through rta) related to the governing table. When all such routes are - * removed, rtas are immediately removed achieving zero uc. Then the 'source' - * table lock could be immediately released, although hostentries may still - * exist - they will be freed together with the 'source' table. - */ - - static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; } - static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; } - - int rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior); - - /* * Default protocol preferences */ diff --cc nest/rt-table.c index 81104673c,79f74d24a..50fd08a15 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@@ -110,7 -108,8 +108,9 @@@ #include "lib/string.h" #include "lib/alloca.h" #include "lib/flowspec.h" +#include "nest/cli.h" + #include "lib/idm.h" + #include "lib/netindex_private.h" #ifdef CONFIG_BGP #include "proto/bgp/bgp.h" @@@ -1005,159 -1348,262 +1349,262 @@@ rt_next_export(struct rt_table_export_h } static inline void - rte_free_quick(rte *e) + rt_send_export_event(struct rt_export_hook *hook) { - rt_unlock_source(e->src); - rta_free(e->attrs); - sl_free(e); + ev_send(hook->req->list, &hook->event); } - int - rte_same(rte *x, rte *y) + static void + rt_announce_exports(struct settle *s) { - /* rte.flags / rte.pflags are not checked, as they are internal to rtable */ - return - x->attrs == y->attrs && - x->src == y->src && - rte_is_filtered(x) == rte_is_filtered(y); + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, export_settle, s)), tab) + if (!EMPTY_LIST(tab->exporter.pending)) + { + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exporter.e.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) + continue; + + rt_send_export_event(c); + } + } } - static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } + static void + rt_kick_export_settle(struct rtable_private *tab) + { + tab->export_settle.cf = tab->rr_counter ? tab->config->export_rr_settle : tab->config->export_settle; + settle_kick(&tab->export_settle, tab->loop); + } static void - rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) + rt_import_announce_exports(void *_hook) { - struct proto *p = c->proto; - struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; - rte *before_old = NULL; - rte *old_best = net->routes; - rte *old = NULL; - rte **k; + struct rt_import_hook *hook = _hook; + if (hook->import_state == TIS_CLEARED) + { + void (*stopped)(struct rt_import_request *) = hook->stopped; + struct rt_import_request *req = hook->req; - k = &net->routes; /* Find and remove original route from the same protocol */ - while (old = *k) + RT_LOCKED(hook->table, tab) { - if (old->src == src) - { - /* If there is the same route in the routing table but from - * a different sender, then there are two paths from the - * source protocol to this routing table through transparent - * pipes, which is not allowed. - * - * We log that and ignore the route. If it is withdraw, we - * ignore it completely (there might be 'spurious withdraws', - * see FIXME in do_rte_announce()) - */ - if (old->sender->proto != p) - { - if (new) - { - log_rl(&table->log_tbf, L_ERR "Pipe collision detected when sending %N to table %s", - net->n.addr, table->name); - rte_free_quick(new); - } - return; - } + req->hook = NULL; - if (new && rte_same(old, new)) - { - /* No changes, ignore the new route and refresh the old one */ + rt_trace(tab, D_EVENTS, "Hook %s stopped", req->name); + rem_node(&hook->n); + mb_free(hook); + rt_unlock_table(tab); + } - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + stopped(req); + return; + } - if (!rte_is_filtered(new)) - { - stats->imp_updates_ignored++; - rte_trace_in(D_ROUTES, c, new, "ignored"); - } + rt_trace(hook->table, D_EVENTS, "Announcing exports after imports from %s", hook->req->name); + birdloop_flag(hook->table->loop, RTF_EXPORT); + } - rte_free_quick(new); - return; - } - *k = old->next; - table->rt_count--; - break; - } - k = &old->next; - before_old = old; - } + static struct rt_pending_export * + rt_last_export(struct rt_table_exporter *tab) + { + struct rt_pending_export *rpe = NULL; - /* Save the last accessed position */ - rte **pos = k; + if (!EMPTY_LIST(tab->pending)) + { + /* We'll continue processing exports from this export on */ + struct rt_export_block *reb = TAIL(tab->pending); + ASSERT_DIE(reb->end); + rpe = &reb->export[reb->end - 1]; + } - if (!old) - before_old = NULL; + return rpe; + } - if (!old && !new) + #define RT_EXPORT_BULK 1024 + + static void + rt_export_hook(void *_data) + { + struct rt_table_export_hook *c = _data; + rtable *tab = SKIP_BACK(rtable, priv.exporter, c->table); + + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_READY); + + if (!c->rpe_next) + RT_LOCKED(tab, tp) { - stats->imp_withdraws_ignored++; - return; + c->rpe_next = rt_next_export(c, c->table); + + if (!c->rpe_next) + { + rt_export_used(c->table, c->h.req->name, "done exporting"); + return; + } } - int new_ok = rte_is_ok(new); - int old_ok = rte_is_ok(old); + int used = 0; + int no_next = 0; - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) + /* Process the export */ + for (uint i=0; irpe_next); + + if (!c->rpe_next) { - u32 all_routes = stats->imp_routes + stats->filt_routes; + no_next = 1; + break; + } + } - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); + if (used) + RT_LOCKED(tab, t) + if (no_next || t->cork_active) + rt_export_used(c->table, c->h.req->name, no_next ? "finished export bulk" : "cork active"); - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ + rt_send_export_event(&c->h); + } - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - rte_free_quick(new); - return; - } - } - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); + static inline int + rte_validate(struct channel *ch, rte *e) + { + int c; + const net_addr *n = e->net; - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ + #define IGNORING(pre, post) do { \ + log(L_WARN "%s.%s: Ignoring " pre " %N " post, ch->proto->name, ch->name, n); \ + return 0; \ + } while (0) - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); + if (!net_validate(n)) + IGNORING("bogus prefix", ""); - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - { rte_free_quick(new); new = NULL; } + /* FIXME: better handling different nettypes */ + c = !net_is_flow(n) ? + net_classify(n): (IADDR_HOST | SCOPE_UNIVERSE); + if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) + IGNORING("bogus route", ""); + + if (net_type_match(n, NB_DEST)) + { + eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop); + int dest = nhea_dest(nhea); + + if (dest == RTD_NONE) + IGNORING("route", "with no destination"); + + if ((dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + IGNORING("unsorted multipath route", ""); + } + else if (ea_find(e->attrs, &ea_gen_nexthop)) + IGNORING("route", "having a superfluous nexthop attribute"); + + return 1; + } + + int + rte_same(const rte *x, const rte *y) + { + /* rte.flags / rte.pflags are not checked, as they are internal to rtable */ + return + (x == y) || ( + (x->attrs == y->attrs) || + ((!(x->attrs->flags & EALF_CACHED) || !(y->attrs->flags & EALF_CACHED)) && ea_same(x->attrs, y->attrs)) + ) && + x->src == y->src && + rte_is_filtered(x) == rte_is_filtered(y); + } - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ + static inline int rte_is_ok(const rte *e) { return e && !rte_is_filtered(e); } + + static int + rte_recalculate(struct rtable_private *table, struct rt_import_hook *c, struct netindex *i, net *net, rte *new, struct rte_src *src) + { + struct rt_import_request *req = c->req; + struct rt_import_stats *stats = &c->stats; + struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; + const rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; + const rte *old = NULL; + + /* If the new route is identical to the old one, we find the attributes in + * cache and clone these with no performance drop. OTOH, if we were to lookup + * the attributes, such a route definitely hasn't been anywhere yet, + * therefore it's definitely worth the time. */ + struct rte_storage *new_stored = NULL; + if (new) + { + new_stored = rte_store(new, i, table); + new = RTES_WRITE(new_stored); + } - if (!old && !new) - return; + /* Find and remove original route from the same protocol */ + struct rte_storage **before_old = rte_find(net, src); - new_ok = 0; - goto skip_stats1; + if (*before_old) + { + old = &(old_stored = (*before_old))->rte; + + /* If there is the same route in the routing table but from + * a different sender, then there are two paths from the + * source protocol to this routing table through transparent + * pipes, which is not allowed. + * We log that and ignore the route. */ + if (old->sender != c) + { + if (!old->generation && !new->generation) + bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", + c->table->name, i->addr, old->src->owner->name, old->src->private_id, old->src->global_id); + - log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", ++ log_rl(&table->log_tbf, L_ERR "Route source collision in table %s: %N %s/%u:%u", + c->table->name, i->addr, old->src->owner->name, old->src->private_id, old->src->global_id); } + + if (new && rte_same(old, &new_stored->rte)) + { + /* No changes, ignore the new route and refresh the old one */ + old_stored->stale_cycle = new->stale_cycle; + + if (!rte_is_filtered(new)) + { + stats->updates_ignored++; + rt_rte_trace_in(D_ROUTES, req, new, "ignored"); + } + + /* We need to free the already stored route here before returning */ + rte_free(new_stored, table); + return 0; + } + + *before_old = (*before_old)->next; + table->rt_count--; + } + + if (!old && !new) + { + stats->withdraws_ignored++; + return 0; } + /* If rejected by import limit, we need to pretend there is no route */ + if (req->preimport && (req->preimport(req, new, old) == 0)) + { + rte_free(new_stored, table); + new_stored = NULL; + new = NULL; + } + + int new_ok = rte_is_ok(new); + int old_ok = rte_is_ok(old); + if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); @@@ -1960,10 -2815,12 +2816,15 @@@ rt_setup(pool *pp, struct rtable_confi t->config = cf; t->addr_type = cf->addr_type; t->debug = cf->debug; + t->id = idm_alloc(&rtable_idm); + if (t->id >= rtable_max_id) + rtable_max_id = t->id + 1; + + t->log_tbf.cf.rate = cf->log_tbf_cf.rate; + t->log_tbf.cf.burst = cf->log_tbf_cf.burst; + - fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + t->netindex = rt_global_netindex_hash; + t->routes = mb_allocz(p, (t->routes_block_size = 128) * sizeof(net)); if (cf->trie_used) { @@@ -1980,20 -2833,43 +2837,41 @@@ hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); - if (!(t->internal = cf->internal)) - { - t->rt_event = ev_new_init(p, rt_event, t); - t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); - t->last_rt_change = t->gc_time = current_time(); + t->fh = (struct birdloop_flag_handler) { .hook = rt_flag_handler, }; + t->nhu_uncork_event = ev_new_init(p, rt_nhu_uncork, t); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); + t->last_rt_change = t->gc_time = current_time(); - if (rt_is_flow(t)) - { - t->flowspec_trie = f_new_trie(lp_new_default(p), 0); - t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); - } + t->export_settle = SETTLE_INIT(&cf->export_settle, rt_announce_exports, NULL); + + t->exporter = (struct rt_table_exporter) { + .e = { + .class = &rt_table_exporter_class, + .addr_type = t->addr_type, + .rp = t->rp, + }, + .next_seq = 1, + }; + + rt_exporter_init(&t->exporter.e); + + init_list(&t->exporter.pending); + + t->cork_threshold = cf->cork_threshold; + - t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; - + if (rt_is_flow(RT_PUB(t))) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } - return t; + UNLOCK_DOMAIN(rtable, dom); + + /* Setup the service thread flag handler */ + birdloop_flag_set_handler(t->loop, &t->fh); + birdloop_leave(t->loop); + + return RT_PUB(t); } /** @@@ -2733,9 -4082,23 +4084,25 @@@ rt_reconfigure(struct rtable_private *t tab->name = new->name; tab->config = new; tab->debug = new->debug; + tab->log_tbf.cf.rate = new->log_tbf_cf.rate; + tab->log_tbf.cf.burst = new->log_tbf_cf.burst; + if (tab->hostcache) + tab->hostcache->req.trace_routes = new->debug; + + struct rt_table_export_hook *hook; node *n; + WALK_LIST2(hook, n, tab->exporter.e.hooks, h.n) + if (hook->h.req->export_one == rt_flowspec_export_one) + hook->h.req->trace_routes = new->debug; + + tab->cork_threshold = new->cork_threshold; + + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); + + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + return 1; } @@@ -3483,50 -4852,6 +4856,50 @@@ rt_get_hostentry(struct rtable_private return he; } +void - cmd_logging_rate(rtable *table, struct tbf_config *rate) ++cmd_logging_rate(rtable *tp, struct tbf_config *rate) +{ - table->log_tbf.cf.rate = rate->rate; - table->log_tbf.cf.burst = rate->burst; ++ RT_LOCKED(tp, table) ++ { ++ table->log_tbf.cf.rate = rate->rate; ++ table->log_tbf.cf.burst = rate->burst; ++ } +} + +void +table_logging_cmd(struct table_spec ts, struct tbf_config *rate) +{ + if (ts.patt) + { + const char *patt = (void *) ts.ptr; + int cnt = 0; + rtable *t; + node *n; + + WALK_LIST2(t, n, routing_tables, n) + if (!ts.ptr || patmatch(patt, t->name)) + { + cmd_logging_rate(t, rate); + cnt++; + } + + if (!cnt) + cli_msg(8003, "No tables match"); + else + cli_msg(0, ""); + } + else + { + const struct symbol *s = (struct symbol*) ts.ptr; + if (s->table->table) + { + cmd_logging_rate(s->table->table, rate); + cli_msg(0, ""); + } + else + cli_msg(9002, "%s does not exist", s->name); + } +} /* * Documentation for functions declared inline in route.h diff --cc proto/babel/babel.c index 7508a5e67,e215977de..d86c7a700 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@@ -2644,27 -2704,6 +2707,26 @@@ babel_reconfigure(struct proto *P, stru return 1; } +void +babel_set_logging_rate(struct proto *P, uintptr_t arg) +{ + struct babel_proto *p = (void *) P; + struct cmd_logging_rate_info *info = (struct cmd_logging_rate_info*) arg; + struct logging_rate_targets *targets = info->targets; + while (targets) + { + if (targets->target == TBF_BABEL_PKT || targets->target == TBF_ALL) + { + p->log_pkt_tbf.cf.rate = info->tbfc->rate; + p->log_pkt_tbf.cf.burst = info->tbfc->burst; + } + else + cli_msg(9002, "protocol %s: wrong logging rate change type for babel protocol", P->name); + targets = targets->next; + } +} + + - struct protocol proto_babel = { .name = "Babel", .template = "babel%d", diff --cc proto/babel/config.Y index 23fd7f792,6a7c071f9..769fa32ad --- a/proto/babel/config.Y +++ b/proto/babel/config.Y @@@ -24,10 -24,9 +24,10 @@@ CF_DECL CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK, - NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS, + NEXT, HOP, IPV4, IPV6, SHOW, INTERFACES, NEIGHBORS, ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE, - EXTENDED, TUNNEL, RTT, MIN, MAX, DECAY, SEND, TIMESTAMPS, COST, DELAY) + EXTENDED, TUNNEL, RTT, MIN, MAX, DECAY, SEND, TIMESTAMPS, COST, DELAY, + PKT, LOGGING, RATE, BURST) CF_GRAMMAR diff --cc proto/rip/config.Y index 84b0f49d8,652084423..6a02fc3a7 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@@ -38,7 -37,7 +37,7 @@@ CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGH PASSIVE, VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO, TIME, BFD, AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5, TTL, SECURITY, RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK, - DEMAND, CIRCUIT, RIP_METRIC, RIP_TAG, PKT, RTE, LOGGING, RATE, BURST) - DEMAND, CIRCUIT) ++ DEMAND, CIRCUIT, PKT, RTE, LOGGING, RATE, BURST) %type rip_variant rip_auth diff --cc proto/rip/rip.c index d4c8eaa0e,bc5bd8b51..c82134252 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@@ -1216,38 -1296,14 +1301,38 @@@ rip_reconfigure(struct proto *P, struc return 1; } +void +rip_set_logging_rate(struct proto *P, uintptr_t arg) +{ + struct rip_proto *p = (void *) P; + struct cmd_logging_rate_info *info = (struct cmd_logging_rate_info*) arg; + struct logging_rate_targets *targets = info->targets; + while (targets) + { + if (targets->target == TBF_RIP_PKT || targets->target == TBF_ALL) + { + p->log_pkt_tbf.cf.rate = info->tbfc->rate; + p->log_pkt_tbf.cf.burst = info->tbfc->burst; + } + else if (targets->target == TBF_RIP_RTE || targets->target == TBF_ALL) + { + p->log_rte_tbf.cf.rate = info->tbfc->rate; + p->log_rte_tbf.cf.burst = info->tbfc->burst; + } + else + cli_msg(9002, "protocol %s: wrong logging rate change type for rip protocol", P->name); + targets = targets->next; + } +} + static void - rip_get_route_info(rte *rte, byte *buf) + rip_get_route_info(const rte *rte, byte *buf) { - struct rip_proto *p = (struct rip_proto *) rte->src->proto; - u32 rt_metric = ea_get_int(rte->attrs->eattrs, EA_RIP_METRIC, p->infinity); - u32 rt_tag = ea_get_int(rte->attrs->eattrs, EA_RIP_TAG, 0); + struct rip_proto *p = rip_rte_proto(rte); + u32 rt_metric = ea_get_int(rte->attrs, &ea_rip_metric, p->infinity); + u32 rt_tag = ea_get_int(rte->attrs, &ea_rip_tag, 0); - buf += bsprintf(buf, " (%d/%d)", rte->attrs->pref, rt_metric); + buf += bsprintf(buf, " (%d/%d)", rt_get_preference(rte), rt_metric); if (rt_tag) bsprintf(buf, " [%04x]", rt_tag);