* implementations that pass invalid AS_CONFED_* segments are
* widespread.
*
- * Error handling of AS4_* attributes is done as specified by
- * draft-ietf-idr-rfc4893bis-03. There are several possible
- * inconsistencies between AGGREGATOR and AS4_AGGREGATOR that are not
- * handled by that draft, these are logged and ignored (see
+ * Error handling of AS4_* attributes is done as specified by RFC 6793. There
+ * are several possible inconsistencies between AGGREGATOR and AS4_AGGREGATOR
+ * that are not handled by that RFC, these are logged and ignored (see
* bgp_reconstruct_4b_attrs()).
*/
+
static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH
#ifndef IPV6
,BA_NEXT_HOP
/* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */
static int
-validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, unsigned int *ilength)
+validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ilength)
{
int res = 0;
u8 *a, *dst;
- int len, plen, copy;
+ int len, plen;
dst = a = idata;
len = *ilength;
if (len < plen)
return -1;
+ if (a[1] == 0)
+ {
+ log(L_WARN "%s: %s_PATH attribute contains empty segment, skipping it",
+ p->p.name, as_path ? "AS" : "AS4");
+ goto skip;
+ }
+
switch (a[0])
{
case AS_PATH_SET:
- copy = 1;
res++;
break;
case AS_PATH_SEQUENCE:
- copy = 1;
res += a[1];
break;
log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment",
p->p.name, as_path ? "AS" : "AS4");
- copy = 0;
- break;
+ goto skip;
default:
return -1;
}
- if (copy)
- {
- if (dst != a)
- memmove(dst, a, plen);
- dst += plen;
- }
+ if (dst != a)
+ memmove(dst, a, plen);
+ dst += plen;
+ skip:
len -= plen;
a += plen;
}
}
static int
-bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len)
+bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a UNUSED6, int len UNUSED6)
{
#ifdef IPV6
return IGNORE;
as = get_u32(data);
data += 4;
- bsprintf(buf, "%d.%d.%d.%d AS%d", data[0], data[1], data[2], data[3], as);
+ bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as);
}
static int
return ((len % 8) == 0) ? 0 : WITHDRAW;
}
+static int
+bgp_check_large_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
+{
+ return ((len % 12) == 0) ? 0 : WITHDRAW;
+}
+
static struct attr_desc bgp_attr_table[] = {
{ NULL, -1, 0, 0, 0, /* Undefined */
bgp_check_next_hop, bgp_format_next_hop },
{ "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */
NULL, NULL },
- { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 0, /* BA_LOCAL_PREF */
+ { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_LOCAL_PREF */
NULL, NULL },
{ "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */
NULL, NULL },
{ "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
NULL, NULL },
{ "as4_aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
- NULL, NULL }
+ NULL, NULL },
+ [BA_LARGE_COMMUNITY] =
+ { "large_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_LC_SET, 1,
+ bgp_check_large_community, NULL }
};
/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH.
}
static int
-bgp_encode_attr_hdr(byte *dst, unsigned int flags, unsigned code, int len)
+bgp_encode_attr_hdr(byte *dst, uint flags, unsigned code, int len)
{
int wlen;
return len;
}
-#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0)
-
/**
* bgp_encode_attrs - encode BGP attributes
* @p: BGP instance
*
* Result: Length of the attribute block generated or -1 if not enough space.
*/
-unsigned int
+uint
bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
{
- unsigned int i, code, type, flags;
+ uint i, code, type, flags;
byte *start = w;
int len, rv;
len = bgp_get_attr_len(a);
/* Skip empty sets */
- if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET)) && (len == 0))
+ if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET) || (type == EAF_TYPE_LC_SET)) && (len == 0))
continue;
if (remains < len + 4)
break;
}
case EAF_TYPE_INT_SET:
+ case EAF_TYPE_LC_SET:
case EAF_TYPE_EC_SET:
{
u32 *z = int_set_get_data(a->u.ptr);
return -1;
}
+/*
static void
bgp_init_prefix(struct fib_node *N)
{
struct bgp_prefix *p = (struct bgp_prefix *) N;
p->bucket_node.next = NULL;
}
+*/
static int
bgp_compare_u32(const u32 *x, const u32 *y)
qsort(dst, ad->length / 8, 8, (int(*)(const void *, const void *)) bgp_compare_ec);
}
+static int
+bgp_compare_lc(const u32 *x, const u32 *y)
+{
+ if (x[0] != y[0])
+ return (x[0] > y[0]) ? 1 : -1;
+ if (x[1] != y[1])
+ return (x[1] > y[1]) ? 1 : -1;
+ if (x[2] != y[2])
+ return (x[2] > y[2]) ? 1 : -1;
+ return 0;
+}
+
+static inline void
+bgp_normalize_lc_set(u32 *dest, u32 *src, unsigned cnt)
+{
+ memcpy(dest, src, LCOMM_LENGTH * cnt);
+ qsort(dest, cnt, LCOMM_LENGTH, (int(*)(const void *, const void *)) bgp_compare_lc);
+}
+
static void
bgp_rehash_buckets(struct bgp_proto *p)
{
struct bgp_bucket *b;
unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
- unsigned size = sizeof(struct bgp_bucket) + ea_size;
+ unsigned size = sizeof(struct bgp_bucket) + ea_size_aligned;
unsigned i;
byte *dest;
unsigned index = hash & (p->hash_size - 1);
code = EA_ID(a->id);
if (ATTR_KNOWN(code))
{
- if (!bgp_attr_table[code].allow_in_ebgp && !p->is_internal)
- continue;
+ if (!p->is_internal)
+ {
+ if (!bgp_attr_table[code].allow_in_ebgp)
+ continue;
+ if ((code == BA_LOCAL_PREF) && !p->cf->allow_local_pref)
+ continue;
+ }
/* The flags might have been zero if the attr was added by filters */
a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags;
if (code < 32)
d->u.ptr = z;
break;
}
+ case EAF_TYPE_LC_SET:
+ {
+ struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
+ z->length = d->u.ptr->length;
+ bgp_normalize_lc_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / LCOMM_LENGTH);
+ d->u.ptr = z;
+ break;
+ }
default: ;
}
d++;
mb_free(buck);
}
+
+/* Prefix hash table */
+
+#define PXH_KEY(n1) n1->n.prefix, n1->n.pxlen, n1->path_id
+#define PXH_NEXT(n) n->next
+#define PXH_EQ(p1,l1,i1,p2,l2,i2) ipa_equal(p1, p2) && l1 == l2 && i1 == i2
+#define PXH_FN(p,l,i) ipa_hash32(p) ^ u32_hash((l << 16) ^ i)
+
+#define PXH_REHASH bgp_pxh_rehash
+#define PXH_PARAMS /8, *2, 2, 2, 8, 20
+
+
+HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
+
+void
+bgp_init_prefix_table(struct bgp_proto *p, u32 order)
+{
+ HASH_INIT(p->prefix_hash, p->p.pool, order);
+
+ p->prefix_slab = sl_new(p->p.pool, sizeof(struct bgp_prefix));
+}
+
+void
+bgp_free_prefix_table(struct bgp_proto *p)
+{
+ HASH_FREE(p->prefix_hash);
+
+ rfree(p->prefix_slab);
+ p->prefix_slab = NULL;
+}
+
+static struct bgp_prefix *
+bgp_get_prefix(struct bgp_proto *p, ip_addr prefix, int pxlen, u32 path_id)
+{
+ struct bgp_prefix *bp = HASH_FIND(p->prefix_hash, PXH, prefix, pxlen, path_id);
+
+ if (bp)
+ return bp;
+
+ bp = sl_alloc(p->prefix_slab);
+ bp->n.prefix = prefix;
+ bp->n.pxlen = pxlen;
+ bp->path_id = path_id;
+ bp->bucket_node.next = NULL;
+
+ HASH_INSERT2(p->prefix_hash, PXH, p->p.pool, bp);
+
+ return bp;
+}
+
+void
+bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp)
+{
+ HASH_REMOVE2(p->prefix_hash, PXH, p->p.pool, bp);
+ sl_free(p->prefix_slab, bp);
+}
+
+
void
bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
{
struct bgp_proto *p = (struct bgp_proto *) P;
struct bgp_bucket *buck;
struct bgp_prefix *px;
+ rte *key;
+ u32 path_id;
DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
if (new)
{
+ key = new;
buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP);
if (!buck) /* Inconsistent attribute list */
return;
}
else
{
+ key = old;
if (!(buck = p->withdraw_bucket))
{
buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
init_list(&buck->prefixes);
}
}
- px = fib_get(&p->prefix_fib, &n->n.prefix, n->n.pxlen);
+ path_id = p->add_path_tx ? key->attrs->src->global_id : 0;
+ px = bgp_get_prefix(p, n->n.prefix, n->n.pxlen, path_id);
if (px->bucket_node.next)
{
DBG("\tRemoving old entry.\n");
if (p->cf->next_hop_self ||
rta->dest != RTD_ROUTER ||
ipa_equal(rta->gw, IPA_NONE) ||
- ipa_has_link_scope(rta->gw) ||
- (!p->is_internal && (!p->neigh || (rta->iface != p->neigh->iface))))
+ ipa_is_link_local(rta->gw) ||
+ (!p->is_internal && !p->cf->next_hop_keep &&
+ (!p->neigh || (rta->iface != p->neigh->iface))))
set_next_hop(z, p->source_addr);
else
set_next_hop(z, rta->gw);
static inline int
bgp_as_path_loopy(struct bgp_proto *p, rta *a)
{
+ int num = p->cf->allow_local_as + 1;
eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- return (e && as_path_is_member(e->u.ptr, p->local_as));
+ return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num));
}
static inline int
bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
{
eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
- bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_add(pool, a ? a->u.ptr : NULL, cid));
+ bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_prepend(pool, a ? a->u.ptr : NULL, cid));
}
static int
}
/* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
- eBGP single-hop -> keep next_hop if on the same iface */
+ * eBGP single-hop -> keep next_hop if on the same iface.
+ * If the next_hop is zero (i.e. link-local), keep only if on the same iface.
+ *
+ * Note that same-iface-check uses iface from route, which is based on gw.
+ */
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
if (a && !p->cf->next_hop_self &&
- (p->is_internal || (p->neigh && (e->attrs->iface == p->neigh->iface))))
+ (p->cf->next_hop_keep ||
+ (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) ||
+ (p->neigh && (e->attrs->iface == p->neigh->iface))))
{
/* Leave the original next hop attribute, will check later where does it point */
}
if (rr)
{
/* Handling route reflection, RFC 4456 */
- struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto;
+ struct bgp_proto *src = (struct bgp_proto *) e->attrs->src->proto;
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
if (!a)
{
rte *e = *new;
struct bgp_proto *p = (struct bgp_proto *) P;
- struct bgp_proto *new_bgp = (e->attrs->proto->proto == &proto_bgp) ? (struct bgp_proto *) e->attrs->proto : NULL;
+ struct bgp_proto *new_bgp = (e->attrs->src->proto->proto == &proto_bgp) ?
+ (struct bgp_proto *) e->attrs->src->proto : NULL;
if (p == new_bgp) /* Poison reverse updates */
return -1;
if (e && as_path_get_first(e->u.ptr, &as))
return as;
else
- return ((struct bgp_proto *) r->attrs->proto)->remote_as;
+ return ((struct bgp_proto *) r->attrs->src->proto)->remote_as;
}
static inline int
int
bgp_rte_better(rte *new, rte *old)
{
- struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->proto;
- struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->proto;
+ struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
+ struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
eattr *x, *y;
u32 n, o;
}
+int
+bgp_rte_mergable(rte *pri, rte *sec)
+{
+ struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
+ struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
+ eattr *x, *y;
+ u32 p, s;
+
+ /* Skip suppressed routes (see bgp_rte_recalculate()) */
+ if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
+ return 0;
+
+ /* RFC 4271 9.1.2.1. Route resolvability test */
+ if (!rte_resolvable(sec))
+ return 0;
+
+ /* Start with local preferences */
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
+ p = x ? x->u.data : pri_bgp->cf->default_local_pref;
+ s = y ? y->u.data : sec_bgp->cf->default_local_pref;
+ if (p != s)
+ return 0;
+
+ /* RFC 4271 9.1.2.2. a) Use AS path lengths */
+ if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
+ {
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+ s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
+
+ if (p != s)
+ return 0;
+
+// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
+// return 0;
+ }
+
+ /* RFC 4271 9.1.2.2. b) Use origins */
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
+ p = x ? x->u.data : ORIGIN_INCOMPLETE;
+ s = y ? y->u.data : ORIGIN_INCOMPLETE;
+ if (p != s)
+ return 0;
+
+ /* RFC 4271 9.1.2.2. c) Compare MED's */
+ if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
+ (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
+ {
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ p = x ? x->u.data : pri_bgp->cf->default_med;
+ s = y ? y->u.data : sec_bgp->cf->default_med;
+ if (p != s)
+ return 0;
+ }
+
+ /* RFC 4271 9.1.2.2. d) Prefer external peers */
+ if (pri_bgp->is_internal != sec_bgp->is_internal)
+ return 0;
+
+ /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
+ p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
+ s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
+ if (p != s)
+ return 0;
+
+ /* Remaining criteria are ignored */
+
+ return 1;
+}
+
+
+
static inline int
same_group(rte *r, u32 lpref, u32 lasn)
{
static inline int
use_deterministic_med(rte *r)
{
- return ((struct bgp_proto *) r->attrs->proto)->cf->deterministic_med;
+ struct proto *P = r->attrs->src->proto;
+ return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
}
int
/* The default case - find a new best-in-group route */
r = new; /* new may not be in the list */
- for (s=net->routes; s; s=s->next)
+ for (s=net->routes; rte_is_valid(s); s=s->next)
if (use_deterministic_med(s) && same_group(s, lpref, lasn))
{
s->u.bgp.suppressed = 1;
* by a &rta.
*/
struct rta *
-bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct linpool *pool, int mandatory)
+bgp_decode_attrs(struct bgp_conn *conn, byte *attr, uint len, struct linpool *pool, int mandatory)
{
struct bgp_proto *bgp = conn->bgp;
rta *a = lp_alloc(pool, sizeof(struct rta));
- unsigned int flags, code, l, i, type;
+ uint flags, code, l, i, type;
int errcode;
byte *z, *attr_start;
byte seen[256/8];
int withdraw = 0;
bzero(a, sizeof(rta));
- a->proto = &bgp->p;
a->source = RTS_BGP;
a->scope = SCOPE_UNIVERSE;
a->cast = RTC_UNICAST;
{ errcode = 5; goto err; }
if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
{ errcode = 4; goto err; }
- if (!desc->allow_in_ebgp && !bgp->is_internal)
- continue;
+ if (!bgp->is_internal)
+ {
+ if (!desc->allow_in_ebgp)
+ continue;
+ if ((code == BA_LOCAL_PREF) && !bgp->cf->allow_local_pref)
+ continue;
+ }
if (desc->validate)
{
errcode = desc->validate(bgp, z, l);
ipa_ntoh(*(ip_addr *)ad->data);
break;
case EAF_TYPE_INT_SET:
+ case EAF_TYPE_LC_SET:
case EAF_TYPE_EC_SET:
{
u32 *z = (u32 *) ad->data;
int
bgp_get_attr(eattr *a, byte *buf, int buflen)
{
- unsigned int i = EA_ID(a->id);
+ uint i = EA_ID(a->id);
struct attr_desc *d;
int len;
}
void
-bgp_attr_init(struct bgp_proto *p)
+bgp_init_bucket_table(struct bgp_proto *p)
{
p->hash_size = 256;
p->hash_limit = p->hash_size * 4;
p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
init_list(&p->bucket_queue);
p->withdraw_bucket = NULL;
- fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
+ // fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
+}
+
+void
+bgp_free_bucket_table(struct bgp_proto *p)
+{
+ mb_free(p->bucket_hash);
+ p->bucket_hash = NULL;
+
+ struct bgp_bucket *b;
+ WALK_LIST_FIRST(b, p->bucket_queue)
+ {
+ rem_node(&b->send_node);
+ mb_free(b);
+ }
+
+ mb_free(p->withdraw_bucket);
+ p->withdraw_bucket = NULL;
}
void
eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
u32 origas;
- /*
+ buf += bsprintf(buf, " (%d", e->pref);
+
if (e->u.bgp.suppressed)
- buf += bsprintf(buf, " -");
- */
+ buf += bsprintf(buf, "-");
- buf += bsprintf(buf, " (%d", e->pref);
if (e->attrs->hostentry)
{
if (!rte_resolvable(e))