X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=proto%2Fbgp%2Fattrs.c;h=39297dd7bf2e877da80582956893294b2c5d9a57;hb=0b228fca04c8a9a81af6a4973877ceba9aede3f0;hp=fdc981ca9e38d8a569a9b77c475d3561b7a06eee;hpb=d807ea087f8d60e25eaef8c10168a40ca6545c57;p=thirdparty%2Fbird.git diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index fdc981ca9..39297dd7b 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -34,7 +34,7 @@ * are probably inadequate. * * Loop detection based on AS_PATH causes updates to be withdrawn. RFC - * 4271 does not explicitly specifiy the behavior in that case. + * 4271 does not explicitly specify the behavior in that case. * * Loop detection related to route reflection (based on ORIGINATOR_ID * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8 @@ -84,24 +84,14 @@ bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintp { ASSERT(bgp_attr_known(code)); - ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - eattr *e = &a->attrs[0]; - - a->flags = EALF_SORTED; - a->count = 1; - a->next = *attrs; - *attrs = a; - - e->id = EA_CODE(EAP_BGP, code); - e->type = bgp_attr_table[code].type; - e->flags = flags; - - if (e->type & EAF_EMBEDDED) - e->u.data = (u32) val; - else - e->u.ptr = (struct adata *) val; - - return e; + return ea_set_attr( + attrs, + pool, + EA_CODE(PROTOCOL_BGP, code), + flags, + bgp_attr_table[code].type, + val + ); } @@ -191,7 +181,7 @@ bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size } static int -bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len) +bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint len) { if (size < (4+len)) return -1; @@ -209,6 +199,179 @@ bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) } +/* + * AIGP handling + */ + +static int +bgp_aigp_valid(byte *data, uint len, char *err, uint elen) +{ + byte *pos = data; + char *err_dsc = NULL; + uint err_val = 0; + +#define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; }) + while (len) + { + if (len < 3) + BAD("TLV framing error", len); + + /* Process one TLV */ + uint ptype = pos[0]; + uint plen = get_u16(pos + 1); + + if (len < plen) + BAD("TLV framing error", plen); + + if (plen < 3) + BAD("Bad TLV length", plen); + + if ((ptype == BGP_AIGP_METRIC) && (plen != 11)) + BAD("Bad AIGP TLV length", plen); + + ADVANCE(pos, len, plen); + } +#undef BAD + + return 1; + +bad: + if (err) + if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0) + err[0] = 0; + + return 0; +} + +static const byte * +bgp_aigp_get_tlv(const struct adata *ad, uint type) +{ + if (!ad) + return NULL; + + uint len = ad->length; + const byte *pos = ad->data; + + while (len) + { + uint ptype = pos[0]; + uint plen = get_u16(pos + 1); + + if (ptype == type) + return pos; + + ADVANCE(pos, len, plen); + } + + return NULL; +} + +static const struct adata * +bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen) +{ + uint len = ad ? ad->length : 0; + const byte *pos = ad ? ad->data : NULL; + struct adata *res = lp_alloc_adata(pool, len + 3 + dlen); + byte *dst = res->data; + byte *tlv = NULL; + int del = 0; + + while (len) + { + uint ptype = pos[0]; + uint plen = get_u16(pos + 1); + + /* Find position for new TLV */ + if ((ptype >= type) && !tlv) + { + tlv = dst; + dst += 3 + dlen; + } + + /* Skip first matching TLV, copy others */ + if ((ptype == type) && !del) + del = 1; + else + { + memcpy(dst, pos, plen); + dst += plen; + } + + ADVANCE(pos, len, plen); + } + + if (!tlv) + { + tlv = dst; + dst += 3 + dlen; + } + + /* Store the TLD */ + put_u8(tlv + 0, type); + put_u16(tlv + 1, 3 + dlen); + memcpy(tlv + 3, data, dlen); + + /* Update length */ + res->length = dst - res->data; + + return res; +} + +static u64 UNUSED +bgp_aigp_get_metric(const struct adata *ad, u64 def) +{ + const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC); + return b ? get_u64(b + 3) : def; +} + +static const struct adata * +bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric) +{ + byte data[8]; + put_u64(data, metric); + return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8); +} + +int +bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad) +{ + eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP)); + if (!a) + return 0; + + const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC); + if (!b) + return 0; + + u64 aigp = get_u64(b + 3); + u64 step = e->attrs->igp_metric; + + if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN)) + step = BGP_AIGP_MAX; + + if (!step) + step = 1; + + *ad = a->u.ptr; + *metric = aigp + step; + if (*metric < aigp) + *metric = BGP_AIGP_MAX; + + return 1; +} + +static inline int +bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) +{ + if (e->attrs->source == RTS_BGP) + return 0; + + *metric = rt_get_igp_metric(e); + *ad = NULL; + return *metric < IGP_METRIC_UNKNOWN; +} + + /* * Attribute hooks */ @@ -241,18 +404,27 @@ bgp_format_origin(eattr *a, byte *buf, uint size UNUSED) } +static inline int +bgp_as_path_first_as_equal(const byte *data, uint len, u32 asn) +{ + return (len >= 6) && + ((data[0] == AS_PATH_SEQUENCE) || (data[0] == AS_PATH_CONFED_SEQUENCE)) && + (data[1] > 0) && + (get_u32(data+2) == asn); +} + static int bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { - byte *data = a->u.ptr->data; + const byte *data = a->u.ptr->data; uint len = a->u.ptr->length; if (!s->as4_session) { /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */ - byte *src = data; - data = alloca(len); - len = as_path_32to16(data, src, len); + byte *dst = alloca(len); + len = as_path_32to16(dst, data, len); + data = dst; } return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len); @@ -263,17 +435,13 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte { struct bgp_proto *p = s->proto; int as_length = s->as4_session ? 4 : 2; + int as_sets = p->cf->allow_as_sets; int as_confed = p->cf->confederation && p->is_interior; char err[128]; - if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err))) + if (!as_path_valid(data, len, as_length, as_sets, as_confed, err, sizeof(err))) WITHDRAW("Malformed AS_PATH attribute - %s", err); - /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */ - if (p->is_interior && !p->is_internal && - ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE))) - WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE"); - if (!s->as4_session) { /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */ @@ -282,6 +450,16 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte len = as_path_16to32(data, src, len); } + /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */ + if (p->is_interior && !p->is_internal && + ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE))) + WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE"); + + /* Reject routes with first AS in AS_PATH not matching neighbor AS; RFC 4271 6.3 */ + if (!p->is_internal && p->cf->enforce_first_as && + !bgp_as_path_first_as_equal(data, len, p->remote_as)) + WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS"); + bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len); } @@ -295,15 +473,20 @@ bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size) * store it and encode it later by AFI-specific hooks. */ - if (s->channel->afi == BGP_AF_IPV4) + if (!s->mp_reach) { - ASSERT(a->u.ptr->length == sizeof(ip_addr)); + // ASSERT(a->u.ptr->length == sizeof(ip_addr)); + + /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */ + ip_addr *addr = (void *) a->u.ptr->data; + if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr)) + return 0; if (size < (3+4)) return -1; bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4); - put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data )); + put_ip4(buf+3, ipa_to_ip4(*addr)); return 3+4; } @@ -386,15 +569,14 @@ bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, static int bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { - byte *data = a->u.ptr->data; + const byte *data = a->u.ptr->data; uint len = a->u.ptr->length; if (!s->as4_session) { /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */ - byte *src = data; - data = alloca(6); - len = aggregator_32to16(data, src); + byte *dst = alloca(6); + len = aggregator_32to16(dst, data); } return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len); @@ -420,7 +602,7 @@ bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, b static void bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED) { - byte *data = a->u.ptr->data; + const byte *data = a->u.ptr->data; bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0)); } @@ -550,12 +732,23 @@ bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint fla static void bgp_export_ext_community(struct bgp_export_state *s, eattr *a) { - a->u.ptr = ec_set_del_nontrans(s->pool, a->u.ptr); + if (!s->proto->is_interior) + { + struct adata *ad = ec_set_del_nontrans(s->pool, a->u.ptr); - if (a->u.ptr->length == 0) - UNSET(a); + if (ad->length == 0) + UNSET(a); + + ec_set_sort_x(ad); + a->u.ptr = ad; + } + else + { + if (a->u.ptr->length == 0) + UNSET(a); - ec_set_sort_x(a->u.ptr); + a->u.ptr = ec_set_sort(s->pool, a->u.ptr); + } } static void @@ -585,6 +778,9 @@ bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flag static void bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) { + struct bgp_proto *p = s->proto; + int sets = p->cf->allow_as_sets; + char err[128]; if (s->as4_session) @@ -593,7 +789,7 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt if (len < 6) DISCARD(BAD_LENGTH, "AS4_PATH", len); - if (!as_path_valid(data, len, 4, 1, err, sizeof(err))) + if (!as_path_valid(data, len, 4, sets, 1, err, sizeof(err))) DISCARD("Malformed AS4_PATH attribute - %s", err); struct adata *a = lp_alloc_adata(s->pool, len); @@ -609,6 +805,42 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a); } + +static void +bgp_export_aigp(struct bgp_export_state *s, eattr *a) +{ + if (!s->channel->cf->aigp) + UNSET(a); +} + +static void +bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) +{ + char err[128]; + + /* Acceptability test postponed to bgp_finish_attrs() */ + + if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) + DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags); + + if (!bgp_aigp_valid(data, len, err, sizeof(err))) + DISCARD("Malformed AIGP attribute - %s", err); + + bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len); +} + +static void +bgp_format_aigp(eattr *a, byte *buf, uint size UNUSED) +{ + const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC); + + if (!b) + bsprintf(buf, "?"); + else + bsprintf(buf, "%lu", get_u64(b + 3)); +} + + static void bgp_export_large_community(struct bgp_export_state *s, eattr *a) { @@ -701,7 +933,8 @@ bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size) static inline void bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) { - bgp_set_attr_data(to, s->pool, code, flags, data, len); + /* Cannot use bgp_set_attr_data() as it works on known attributes only */ + ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len); } @@ -824,6 +1057,15 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .decode = bgp_decode_as4_aggregator, .format = bgp_format_aggregator, }, + [BA_AIGP] = { + .name = "aigp", + .type = EAF_TYPE_OPAQUE, + .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS, + .export = bgp_export_aigp, + .encode = bgp_encode_raw, + .decode = bgp_decode_aigp, + .format = bgp_format_aigp, + }, [BA_LARGE_COMMUNITY] = { .name = "large_community", .type = EAF_TYPE_LC_SET, @@ -856,7 +1098,7 @@ bgp_attr_known(uint code) static inline void bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) { - if (EA_PROTO(a->id) != EAP_BGP) + if (EA_PROTO(a->id) != PROTOCOL_BGP) return; uint code = EA_ID(a->id); @@ -936,7 +1178,7 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) static inline int bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { - ASSERT(EA_PROTO(a->id) == EAP_BGP); + ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP); uint code = EA_ID(a->id); @@ -955,6 +1197,7 @@ bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size) * * The bgp_encode_attrs() function takes a list of extended attributes * and converts it to its BGP representation (a part of an Update message). + * BGP write state may be fake when called from MRT protocol. * * Result: Length of the attribute block generated or -1 if not enough space. */ @@ -1024,7 +1267,8 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui const struct bgp_attr_desc *desc = &bgp_attr_table[code]; /* Handle conflicting flags; RFC 7606 3 (c) */ - if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) + if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && + !(desc->flags & BAF_DECODE_FLAGS)) WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags); desc->decode(s, code, flags, data, len, to); @@ -1108,6 +1352,9 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH)) { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; } + if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP)) + { REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; } + /* When receiving attributes from non-AS4-aware BGP speaker, we have to reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */ if (!p->as4_session) @@ -1150,6 +1397,17 @@ withdraw: return NULL; } +void +bgp_finish_attrs(struct bgp_parse_state *s, rta *a) +{ + /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */ + if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp) + { + REPORT("Discarding AIGP attribute received on non-AIGP session"); + bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP); + } +} + /* * Route bucket hash table @@ -1232,7 +1490,7 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new) if (!(a->type & EAF_EMBEDDED)) { - struct adata *oa = a->u.ptr; + const struct adata *oa = a->u.ptr; struct adata *na = (struct adata *) dest; memcpy(na, oa, sizeof(struct adata) + oa->length); a->u.ptr = na; @@ -1302,7 +1560,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 20 +#define PXH_PARAMS /8, *2, 2, 2, 8, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) @@ -1371,7 +1629,7 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) */ int -bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED) +bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) { rte *e = *new; struct proto *SRC = e->attrs->src->proto; @@ -1386,8 +1644,6 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li if (src == NULL) return 0; - // XXXX: Check next hop AF - /* IBGP route reflection, RFC 4456 */ if (p->is_internal && src->is_internal && (p->local_as == src->local_as)) { @@ -1404,9 +1660,9 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li /* Handle well-known communities, RFC 1997 */ struct eattr *c; if (p->cf->interpret_communities && - (c = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY)))) + (c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)))) { - struct adata *d = c->u.ptr; + const struct adata *d = c->u.ptr; /* Do not export anywhere */ if (int_set_contains(d, BGP_COMM_NO_ADVERTISE)) @@ -1419,14 +1675,15 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li /* Do not export outside of AS (or confederation) */ if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT)) return -1; + + /* Do not export LLGR_STALE routes to LLGR-ignorant peers */ + if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE)) + return -1; } return 0; } - -static adata null_adata; /* adata of length 0 */ - static ea_list * bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool) { @@ -1435,7 +1692,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls }; ea_list *attrs = attrs0; eattr *a; - adata *ad; + const adata *ad; /* ORIGIN attribute - mandatory, attach if missing */ if (! bgp_find_attr(attrs0, BA_ORIGIN)) @@ -1450,8 +1707,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at ad = as_path_strip_confed(pool, ad); /* AS_PATH attribute - keep or prepend ASN */ - if (p->is_internal || - (p->rs_client && src && src->rs_client)) + if (p->is_internal || p->rs_client) { /* IBGP or route server -> just ensure there is one */ if (!a) @@ -1483,6 +1739,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF)) bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + /* AIGP attribute - accumulate local metric or originate new one */ + u64 metric; + if (s.local_next_hop && + (bgp_total_aigp_metric_(e, &metric, &ad) || + (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad)))) + { + ad = bgp_aigp_set_metric(pool, ad, metric); + bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad); + } + /* IBGP route reflection, RFC 4456 */ if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as)) { @@ -1535,7 +1801,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at } void -bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs) +bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old) { struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; @@ -1545,7 +1811,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea if (new) { - attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool2); + struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2); /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); @@ -1569,7 +1835,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea static inline u32 bgp_get_neighbor(rte *r) { - eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); u32 as; if (e && as_path_get_first_regular(e->u.ptr, &as)) @@ -1581,9 +1847,16 @@ bgp_get_neighbor(rte *r) } static inline int -rte_resolvable(rte *rt) +rte_stale(rte *r) { - return rt->attrs->dest == RTD_UNICAST; + if (r->u.bgp.stale < 0) + { + /* If staleness is unknown, compute and cache it */ + eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE); + } + + return r->u.bgp.stale; } int @@ -1610,9 +1883,17 @@ bgp_rte_better(rte *new, rte *old) if (n < o) return 0; - /* Start with local preferences */ - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); + /* LLGR draft - depreference stale routes */ + n = rte_stale(new); + o = rte_stale(old); + if (n > o) + return 0; + if (n < o) + return 1; + + /* Start with local preferences */ + x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); n = x ? x->u.data : new_bgp->cf->default_local_pref; o = y ? y->u.data : old_bgp->cf->default_local_pref; if (n > o) @@ -1620,11 +1901,19 @@ bgp_rte_better(rte *new, rte *old) if (n < o) return 0; + /* RFC 7311 4.1 - Apply AIGP metric */ + u64 n2 = bgp_total_aigp_metric(new); + u64 o2 = bgp_total_aigp_metric(old); + if (n2 < o2) + return 1; + if (n2 > o2) + return 0; + /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) { - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; if (n < o) @@ -1634,8 +1923,8 @@ bgp_rte_better(rte *new, rte *old) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); + x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); n = x ? x->u.data : ORIGIN_INCOMPLETE; o = y ? y->u.data : ORIGIN_INCOMPLETE; if (n < o) @@ -1657,8 +1946,8 @@ bgp_rte_better(rte *new, rte *old) if (new_bgp->cf->med_metric || old_bgp->cf->med_metric || (bgp_get_neighbor(new) == bgp_get_neighbor(old))) { - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); n = x ? x->u.data : new_bgp->cf->default_med; o = y ? y->u.data : old_bgp->cf->default_med; if (n < o) @@ -1683,8 +1972,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */ - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); + y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); n = x ? x->u.data : new_bgp->remote_id; o = y ? y->u.data : old_bgp->remote_id; @@ -1701,8 +1990,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4456 9. b) Compare cluster list lengths */ - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); + y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); n = x ? int_set_get_size(x->u.ptr) : 0; o = y ? int_set_get_size(y->u.ptr) : 0; if (n < o) @@ -1711,7 +2000,7 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */ - return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0); + return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0; } @@ -1728,12 +2017,16 @@ bgp_rte_mergable(rte *pri, rte *sec) return 0; /* RFC 4271 9.1.2.1. Route resolvability test */ - if (!rte_resolvable(sec)) + if (rte_resolvable(pri) != rte_resolvable(sec)) + return 0; + + /* LLGR draft - depreference stale routes */ + if (rte_stale(pri) != rte_stale(sec)) return 0; /* Start with local preferences */ - x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); - y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF)); + x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); p = x ? x->u.data : pri_bgp->cf->default_local_pref; s = y ? y->u.data : sec_bgp->cf->default_local_pref; if (p != s) @@ -1742,8 +2035,8 @@ bgp_rte_mergable(rte *pri, rte *sec) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths) { - x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; @@ -1755,8 +2048,8 @@ bgp_rte_mergable(rte *pri, rte *sec) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); - y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); + x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); p = x ? x->u.data : ORIGIN_INCOMPLETE; s = y ? y->u.data : ORIGIN_INCOMPLETE; if (p != s) @@ -1766,8 +2059,8 @@ bgp_rte_mergable(rte *pri, rte *sec) if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric || (bgp_get_neighbor(pri) == bgp_get_neighbor(sec))) { - x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); p = x ? x->u.data : pri_bgp->cf->default_med; s = y ? y->u.data : sec_bgp->cf->default_med; if (p != s) @@ -1810,7 +2103,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) rte *key = new ? new : old; u32 lpref = key->pref; u32 lasn = bgp_get_neighbor(key); - int old_is_group_best = 0; + int old_suppressed = old ? old->u.bgp.suppressed : 0; /* * Proper RFC 4271 path selection is a bit complicated, it cannot be @@ -1857,7 +2150,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) * We could find the best-in-group and then make some shortcuts like * in rte_recalculate, but as we would have to walk through all * net->routes just to find it, it is probably not worth. So we - * just have two simpler fast cases that use just the old route. + * just have one simple fast case that use just the old route. * We also set suppressed flag to avoid using it in bgp_rte_better(). */ @@ -1866,23 +2159,11 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) if (old) { - old_is_group_best = !old->u.bgp.suppressed; old->u.bgp.suppressed = 1; - int new_is_better = new && bgp_rte_better(new, old); - /* The first case - replace not best with worse (or remove not best) */ - if (!old_is_group_best && !new_is_better) + /* The fast case - replace not best with worse (or remove not best) */ + if (old_suppressed && !(new && bgp_rte_better(new, old))) return 0; - - /* The second case - replace the best with better */ - if (old_is_group_best && new_is_better) - { - /* new is best-in-group, the see discussion below - this is - a special variant of NBG && OBG. From OBG we can deduce - that same_group(old_best) iff (old == old_best) */ - new->u.bgp.suppressed = 0; - return (old == old_best); - } } /* The default case - find a new best-in-group route */ @@ -1899,6 +2180,16 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) if (!r) return 0; + /* Found if new is mergable with best-in-group */ + if (new && (new != r) && bgp_rte_mergable(r, new)) + new->u.bgp.suppressed = 0; + + /* Found all existing routes mergable with best-in-group */ + for (s=net->routes; rte_is_valid(s); s=s->next) + if (use_deterministic_med(s) && same_group(s, lpref, lasn)) + if ((s != r) && bgp_rte_mergable(r, s)) + s->u.bgp.suppressed = 0; + /* Found best-in-group */ r->u.bgp.suppressed = 0; @@ -1912,9 +2203,9 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) * rte_recalculate() without ignore that possibility). * * There are three possible cases according to whether the old route - * was the best in group (OBG, stored in old_is_group_best) and - * whether the new route is the best in group (NBG, tested by r == new). - * These cases work even if old or new is NULL. + * was the best in group (OBG, i.e. !old_suppressed) and whether the + * new route is the best in group (NBG, tested by r == new). These + * cases work even if old or new is NULL. * * NBG -> new is a possible candidate for the best route, so we just * check for the first reason using same_group(). @@ -1929,7 +2220,28 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) if (r == new) return old_best && same_group(old_best, lpref, lasn); else - return old_is_group_best; + return !old_suppressed; +} + +struct rte * +bgp_rte_modify_stale(struct rte *r, struct linpool *pool) +{ + eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + const struct adata *ad = a ? a->u.ptr : NULL; + uint flags = a ? a->flags : BAF_PARTIAL; + + if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) + return NULL; + + if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) + return r; + + r = rte_cow_rta(r, pool); + bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags, + int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); + r->u.bgp.stale = 1; + + return r; } @@ -1974,8 +2286,8 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) return; /* Merge AS_PATH and AS4_PATH */ - as_path_cut(p2->u.ptr, p2_len - p4_len); - p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr); + struct adata *apc = as_path_cut(pool, p2->u.ptr, p2_len - p4_len); + p2->u.ptr = as_path_merge(pool, apc, p4->u.ptr); } } @@ -2006,10 +2318,10 @@ bgp_get_attr(eattr *a, byte *buf, int buflen) } void -bgp_get_route_info(rte *e, byte *buf, ea_list *attrs) +bgp_get_route_info(rte *e, byte *buf) { - eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN)); + eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); u32 origas; buf += bsprintf(buf, " (%d", e->pref); @@ -2017,7 +2329,15 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs) if (e->u.bgp.suppressed) buf += bsprintf(buf, "-"); - if (e->attrs->hostentry) + if (rte_stale(e)) + buf += bsprintf(buf, "s"); + + u64 metric = bgp_total_aigp_metric(e); + if (metric < BGP_AIGP_MAX) + { + buf += bsprintf(buf, "/%lu", metric); + } + else if (e->attrs->igp_metric) { if (!rte_resolvable(e)) buf += bsprintf(buf, "/-");