#include "nest/protocol.h"
#include "nest/route.h"
#include "nest/attrs.h"
-#include "nest/mrtdump.h"
+#include "proto/mrt/mrt.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
#include "lib/flowspec.h"
#define BGP_RR_BEGIN 1
#define BGP_RR_END 2
+#define BGP_NLRI_MAX (4 + 1 + 32)
+
+#define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */
+#define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */
+#define BGP_MPLS_NULL 3 /* Implicit NULL label */
+#define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */
+
static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
return (get_u16(buf) << 16) | buf[3];
}
-/*
- * MRT Dump format is not semantically specified.
- * We will use these values in appropriate fields:
- *
- * Local AS, Remote AS - configured AS numbers for given BGP instance.
- * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
- *
- * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
- * changes) and MESSAGE (for received BGP messages).
- *
- * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
- * only when AS4 session is established and even in that case MESSAGE
- * does not use AS4 variant for initial OPEN message. This strange
- * behavior is here for compatibility with Quagga and Bgpdump,
- */
-
-static byte *
-mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
+static void
+init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
{
struct bgp_proto *p = conn->bgp;
- uint v4 = ipa_is_ip4(p->cf->remote_ip);
+ int p_ok = conn->state >= BS_OPENCONFIRM;
- if (as4)
- {
- put_u32(buf+0, p->remote_as);
- put_u32(buf+4, p->public_as);
- buf+=8;
- }
- else
- {
- put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
- put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
- buf+=4;
- }
+ memset(d, 0, sizeof(struct mrt_bgp_data));
+ d->peer_as = p->remote_as;
+ d->local_as = p->local_as;
+ d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
+ d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
+ d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
+ d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
+ d->as4 = p_ok ? p->as4_session : 0;
+}
- put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
- put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
- buf+=4;
+static uint bgp_find_update_afi(byte *pos, uint len);
- if (v4)
- {
- buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
- buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
- }
- else
+static int
+bgp_estimate_add_path(struct bgp_proto *p, byte *pkt, uint len)
+{
+ /* No need to estimate it for other messages than UPDATE */
+ if (pkt[18] != PKT_UPDATE)
+ return 0;
+
+ /* 1 -> no channel, 2 -> all channels, 3 -> some channels */
+ if (p->summary_add_path_rx < 3)
+ return p->summary_add_path_rx == 2;
+
+ uint afi = bgp_find_update_afi(pkt, len);
+ struct bgp_channel *c = bgp_get_channel(p, afi);
+ if (!c)
{
- buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
- buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
+ /* Either frame error (if !afi) or unknown AFI/SAFI,
+ will be reported later in regular parsing */
+ BGP_TRACE(D_PACKETS, "MRT processing noticed invalid packet");
+ return 0;
}
- return buf;
+ return c->add_path_rx;
}
static void
-mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
+bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len)
{
- byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
- byte *bp = buf + MRTDUMP_HDR_LENGTH;
- int as4 = conn->bgp->as4_session;
+ struct mrt_bgp_data d;
+ init_mrt_bgp_data(conn, &d);
- bp = mrt_put_bgp4_hdr(bp, conn, as4);
- memcpy(bp, pkt, len);
- bp += len;
- mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
- buf, bp-buf);
-}
+ d.message = pkt;
+ d.msg_len = len;
+ d.add_path = bgp_estimate_add_path(conn->bgp, pkt, len);
-static inline u16
-convert_state(uint state)
-{
- /* Convert state from our BS_* values to values used in MRTDump */
- return (state == BS_CLOSE) ? 1 : state + 1;
+ mrt_dump_bgp_message(&d);
}
void
-mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
+bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new)
{
- byte buf[128];
- byte *bp = buf + MRTDUMP_HDR_LENGTH;
+ struct mrt_bgp_data d;
+ init_mrt_bgp_data(conn, &d);
+
+ d.old_state = old;
+ d.new_state = new;
- bp = mrt_put_bgp4_hdr(bp, conn, 1);
- put_u16(bp+0, convert_state(old));
- put_u16(bp+2, convert_state(new));
- bp += 4;
- mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
+ mrt_dump_bgp_state_change(&d);
}
static byte *
struct bgp_af_caps *ac;
uint any_ext_next_hop = 0;
uint any_add_path = 0;
+ byte *buf_head = buf;
byte *data;
/* Prepare bgp_caps structure */
caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
}
+ if (p->cf->llgr_mode)
+ caps->llgr_aware = 1;
+
/* Allocate and fill per-AF fields */
WALK_LIST(c, p->p.channels)
{
if (p->p.gr_recovery)
ac->gr_af_flags |= BGP_GRF_FORWARDING;
}
+
+ if (c->cf->llgr_able)
+ {
+ ac->llgr_able = 1;
+ ac->llgr_time = c->cf->llgr_time;
+
+ if (p->p.gr_recovery)
+ ac->llgr_flags |= BGP_LLGRF_FORWARDING;
+ }
}
/* Sort capability fields by AFI/SAFI */
/* Create capability list in buffer */
/*
- * Note that max length is ~ 20+14*af_count. With max 6 channels that is
- * 104. Option limit is 253 and buffer size is 4096, so we cannot overflow
- * unless we add new capabilities or more AFs.
+ * Note that max length is ~ 22+21*af_count. With max 12 channels that is
+ * 274. Option limit is 253 and buffer size is 4096, so we cannot overflow
+ * unless we add new capabilities or more AFs. XXXXX
*/
WALK_AF_CAPS(caps, ac)
*buf++ = 0; /* Capability data length */
}
+ if (caps->llgr_aware)
+ {
+ *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */
+ *buf++ = 0; /* Capability data length, will be fixed later */
+ data = buf;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->llgr_able)
+ {
+ put_af3(buf, ac->afi);
+ buf[3] = ac->llgr_flags;
+ put_u24(buf+4, ac->llgr_time);
+ buf += 7;
+ }
+
+ data[-1] = buf - data;
+ }
+
+ caps->length = buf - buf_head;
+
return buf;
}
int i, cl;
u32 af;
+ caps->length += len;
+
while (len > 0)
{
if (len < 2 || len < (2 + pos[1]))
}
break;
- case 65: /* AS4 capability, RFC 4893 */
+ case 65: /* AS4 capability, RFC 6793 */
if (cl != 4)
goto err;
caps->enhanced_refresh = 1;
break;
+ case 71: /* Long lived graceful restart capability, RFC draft */
+ if (cl % 7)
+ goto err;
+
+ /* Presumably, only the last instance is valid */
+ WALK_AF_CAPS(caps, ac)
+ {
+ ac->llgr_able = 0;
+ ac->llgr_flags = 0;
+ ac->llgr_time = 0;
+ }
+
+ caps->llgr_aware = 1;
+
+ for (i = 0; i < cl; i += 7)
+ {
+ af = get_af3(pos+2+i);
+ ac = bgp_get_af_caps(caps, af);
+ ac->llgr_able = 1;
+ ac->llgr_flags = pos[2+i+3];
+ ac->llgr_time = get_u24(pos + 2+i+4);
+ }
+ break;
+
/* We can safely ignore all other capabilities */
}
ADVANCE(pos, len, 2 + cl);
}
+
+ /* The LLGR capability must be advertised together with the GR capability,
+ otherwise it must be disregarded */
+ if (!caps->gr_aware && caps->llgr_aware)
+ {
+ caps->llgr_aware = 0;
+ WALK_AF_CAPS(caps, ac)
+ {
+ ac->llgr_able = 0;
+ ac->llgr_flags = 0;
+ ac->llgr_time = 0;
+ }
+ }
+
return;
err:
return;
}
+static int
+bgp_check_capabilities(struct bgp_conn *conn)
+{
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_caps *local = conn->local_caps;
+ struct bgp_caps *remote = conn->remote_caps;
+ struct bgp_channel *c;
+ int count = 0;
+
+ /* This is partially overlapping with bgp_conn_enter_established_state(),
+ but we need to run this just after we receive OPEN message */
+
+ WALK_LIST(c, p->p.channels)
+ {
+ const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
+ const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi);
+
+ /* Find out whether this channel will be active */
+ int active = loc && loc->ready &&
+ ((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4)));
+
+ /* Mandatory must be active */
+ if (c->cf->mandatory && !active)
+ return 0;
+
+ if (active)
+ count++;
+ }
+
+ /* We need at least one channel active */
+ if (!count)
+ return 0;
+
+ return 1;
+}
+
static int
bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
{
if (!id || (p->is_internal && id == p->local_id))
{ bgp_error(conn, 2, 3, pkt+24, -4); return; }
+ /* RFC 5492 4 - check for required capabilities */
+ if (p->cf->capabilities && !bgp_check_capabilities(conn))
+ { bgp_error(conn, 2, 7, NULL, 0); return; }
+
struct bgp_caps *caps = conn->remote_caps;
if (caps->as4_support)
#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
#define NO_NEXT_HOP "Missing NEXT_HOP attribute"
+#define NO_LABEL_STACK "Missing MPLS stack"
static void
/* GW_DIRECT -> single_hop -> p->neigh != NULL */
if (ipa_nonzero(gw))
- nbr = neigh_find2(&p->p, &gw, NULL, 0);
+ nbr = neigh_find(&p->p, gw, NULL, 0);
else if (ipa_nonzero(ll))
- nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0);
+ nbr = neigh_find(&p->p, ll, p->neigh->iface, 0);
if (!nbr || (nbr->scope == SCOPE_HOST))
WITHDRAW(BAD_NEXT_HOP);
- a->dest = RTD_ROUTER;
- a->gw = nbr->addr;
- a->iface = nbr->iface;
- a->hostentry = NULL;
- a->igp_metric = 0;
+ a->dest = RTD_UNICAST;
+ a->nh.gw = nbr->addr;
+ a->nh.iface = nbr->iface;
}
else /* GW_RECURSIVE */
{
if (ipa_zero(gw))
WITHDRAW(BAD_NEXT_HOP);
- rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll);
+ rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
+ s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
+
+ if (!s->mpls)
+ rta_apply_hostentry(a, s->hostentry, NULL);
+
+ /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
+ }
+}
+
+static void
+bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
+{
+ if (lnum > MPLS_MAX_LABEL_STACK)
+ {
+ REPORT("Too many MPLS labels ($u)", lnum);
+
+ a->dest = RTD_UNREACHABLE;
+ a->hostentry = NULL;
+ a->nh = (struct nexthop) { };
+ return;
+ }
+
+ /* Handle implicit NULL as empty MPLS stack */
+ if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
+ lnum = 0;
+
+ if (s->channel->cf->gw_mode == GW_DIRECT)
+ {
+ a->nh.labels = lnum;
+ memcpy(a->nh.label, labels, 4*lnum);
+ }
+ else /* GW_RECURSIVE */
+ {
+ mpls_label_stack ms;
+
+ ms.len = lnum;
+ memcpy(ms.stack, labels, 4*lnum);
+ rta_apply_hostentry(a, s->hostentry, &ms);
+ }
+}
+
+
+static int
+bgp_match_src(struct bgp_export_state *s, int mode)
+{
+ switch (mode)
+ {
+ case NH_NO: return 0;
+ case NH_ALL: return 1;
+ case NH_IBGP: return s->src && s->src->is_internal;
+ case NH_EBGP: return s->src && !s->src->is_internal;
+ default: return 0;
}
}
bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
{
struct bgp_proto *p = s->proto;
+ struct bgp_channel *c = s->channel;
ip_addr *nh = (void *) a->u.ptr->data;
- if (s->channel->cf->next_hop_self)
+ /* Handle next hop self option */
+ if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
return 0;
- if (s->channel->cf->next_hop_keep)
+ /* Handle next hop keep option */
+ if (c->cf->next_hop_keep && bgp_match_src(s, c->cf->next_hop_keep))
return 1;
/* Keep it when explicitly set in export filter */
if (a->type & EAF_FRESH)
return 1;
+ /* Check for non-matching AF */
+ if ((ipa_is_ip4(*nh) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
+ return 0;
+
/* Keep it when exported to internal peers */
if (p->is_interior && ipa_nonzero(*nh))
return 1;
bgp_use_gateway(struct bgp_export_state *s)
{
struct bgp_proto *p = s->proto;
+ struct bgp_channel *c = s->channel;
rta *ra = s->route->attrs;
- if (s->channel->cf->next_hop_self)
+ /* Handle next hop self option - also applies to gateway */
+ if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
return 0;
- /* We need valid global gateway */
- if ((ra->dest != RTD_ROUTER) || ipa_zero(ra->gw) || ipa_is_link_local(ra->gw))
+ /* We need one valid global gateway */
+ if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
+ return 0;
+
+ /* Check for non-matching AF */
+ if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
return 0;
/* Use it when exported to internal peers */
return 1;
/* Use it when forwarded to single-hop BGP peer on on the same iface */
- return p->neigh && (p->neigh->iface == ra->iface);
+ return p->neigh && (p->neigh->iface == ra->nh.iface);
}
static void
{
if (bgp_use_gateway(s))
{
- ip_addr nh[1] = { s->route->attrs->gw };
+ rta *ra = s->route->attrs;
+ ip_addr nh[1] = { ra->nh.gw };
bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
+
+ if (s->mpls)
+ {
+ u32 implicit_null = BGP_MPLS_NULL;
+ u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
+ uint lnum = ra->nh.labels ? ra->nh.labels : 1;
+ bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
+ }
}
else
{
ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
+
+ /* TODO: Use local MPLS assigned label */
+ if (s->mpls)
+ {
+ u32 implicit_null = BGP_MPLS_NULL;
+ bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
+ }
}
}
ip_addr peer = s->proto->cf->remote_ip;
uint len = a->u.ptr->length;
+ /* Forbid zero next hop */
if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
WITHDRAW(BAD_NEXT_HOP);
+ /* Forbid next hop equal to neighbor IP */
if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
WITHDRAW(BAD_NEXT_HOP);
+
+ /* Forbid next hop with non-matching AF */
+ if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
+ !s->channel->ext_next_hop)
+ WITHDRAW(BAD_NEXT_HOP);
+
+ /* Just check if MPLS stack */
+ if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
+ WITHDRAW(NO_LABEL_STACK);
+}
+
+static uint
+bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
+{
+ /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
+ ip_addr *nh = (void *) a->u.ptr->data;
+ uint len = a->u.ptr->length;
+
+ ASSERT((len == 16) || (len == 32));
+
+ /*
+ * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
+ * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
+ * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
+ * IPv6 address with IPv6 NLRI.
+ */
+
+ if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
+ {
+ put_ip4(buf, ipa_to_ip4(nh[0]));
+ return 4;
+ }
+
+ put_ip6(buf, ipa_to_ip6(nh[0]));
+
+ if (len == 32)
+ put_ip6(buf+16, ipa_to_ip6(nh[1]));
+
+ return len;
+}
+
+static void
+bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+{
+ struct bgp_channel *c = s->channel;
+ struct adata *ad = lp_alloc_adata(s->pool, 32);
+ ip_addr *nh = (void *) ad->data;
+
+ if (len == 4)
+ {
+ nh[0] = ipa_from_ip4(get_ip4(data));
+ nh[1] = IPA_NONE;
+ }
+ else if (len == 16)
+ {
+ nh[0] = ipa_from_ip6(get_ip6(data));
+ nh[1] = IPA_NONE;
+
+ if (ipa_is_link_local(nh[0]))
+ { nh[1] = nh[0]; nh[0] = IPA_NONE; }
+ }
+ else if (len == 32)
+ {
+ nh[0] = ipa_from_ip6(get_ip6(data));
+ nh[1] = ipa_from_ip6(get_ip6(data+16));
+
+ if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
+ nh[1] = IPA_NONE;
+ }
+ else
+ bgp_parse_error(s, 9);
+
+ if (ipa_zero(nh[1]))
+ ad->length = 16;
+
+ if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
+ WITHDRAW(BAD_NEXT_HOP);
+
+ // XXXX validate next hop
+
+ bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, a, nh[0], nh[1]);
+}
+
+static uint
+bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
+{
+ ip_addr *nh = (void *) a->u.ptr->data;
+ uint len = a->u.ptr->length;
+
+ ASSERT((len == 16) || (len == 32));
+
+ /*
+ * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
+ * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
+ * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
+ * IPv6 address with VPNv6 NLRI.
+ */
+
+ if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
+ {
+ put_u64(buf, 0); /* VPN RD is 0 */
+ put_ip4(buf+8, ipa_to_ip4(nh[0]));
+ return 12;
+ }
+
+ put_u64(buf, 0); /* VPN RD is 0 */
+ put_ip6(buf+8, ipa_to_ip6(nh[0]));
+
+ if (len == 16)
+ return 24;
+
+ put_u64(buf+24, 0); /* VPN RD is 0 */
+ put_ip6(buf+32, ipa_to_ip6(nh[1]));
+
+ return 48;
+}
+
+static void
+bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+{
+ struct bgp_channel *c = s->channel;
+ struct adata *ad = lp_alloc_adata(s->pool, 32);
+ ip_addr *nh = (void *) ad->data;
+
+ if (len == 12)
+ {
+ nh[0] = ipa_from_ip4(get_ip4(data+8));
+ nh[1] = IPA_NONE;
+ }
+ else if (len == 24)
+ {
+ nh[0] = ipa_from_ip6(get_ip6(data+8));
+ nh[1] = IPA_NONE;
+
+ if (ipa_is_link_local(nh[0]))
+ { nh[1] = nh[0]; nh[0] = IPA_NONE; }
+ }
+ else if (len == 48)
+ {
+ nh[0] = ipa_from_ip6(get_ip6(data+8));
+ nh[1] = ipa_from_ip6(get_ip6(data+32));
+
+ if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
+ nh[1] = IPA_NONE;
+ }
+ else
+ bgp_parse_error(s, 9);
+
+ if (ipa_zero(nh[1]))
+ ad->length = 16;
+
+ /* XXXX which error */
+ if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
+ bgp_parse_error(s, 9);
+
+ if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
+ WITHDRAW(BAD_NEXT_HOP);
+
+ // XXXX validate next hop
+
+ bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, a, nh[0], nh[1]);
}
+
+
static uint
bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
{
if (!a0)
{
/* Route withdraw */
- rte_update2(&s->channel->c, n, NULL, s->last_src);
+ rte_update3(&s->channel->c, n, NULL, s->last_src);
return;
}
a0->eattrs = ea;
}
- rta *a = rta_clone(s->cached_rta);
- rte *e = rte_get_temp(a);
+ rta *a = rta_clone(s->cached_rta);
+ rte *e = rte_get_temp(a);
+
+ e->pflags = 0;
+ e->u.bgp.suppressed = 0;
+ e->u.bgp.stale = -1;
+ rte_update3(&s->channel->c, n, e, s->last_src);
+}
+
+static void
+bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen)
+{
+ u32 dummy = 0;
+ u32 *labels = mpls ? (u32 *) mpls->data : &dummy;
+ uint lnum = mpls ? (mpls->length / 4) : 1;
+
+ for (uint i = 0; i < lnum; i++)
+ {
+ put_u24(*pos, labels[i] << 4);
+ ADVANCE(*pos, *size, 3);
+ }
+
+ /* Add bottom-of-stack flag */
+ (*pos)[-1] |= BGP_MPLS_BOS;
+
+ *pxlen += 24 * lnum;
+}
+
+static void
+bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
+{
+ u32 labels[BGP_MPLS_MAX], label;
+ uint lnum = 0;
+
+ do {
+ if (*pxlen < 24)
+ bgp_parse_error(s, 1);
+
+ label = get_u24(*pos);
+ labels[lnum++] = label >> 4;
+ ADVANCE(*pos, *len, 3);
+ *pxlen -= 24;
+
+ /* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but
+ fixed-size 24-bit Compatibility field, which MUST be ignored */
+ if (!a && !s->err_withdraw)
+ return;
+ }
+ while (!(label & BGP_MPLS_BOS));
+
+ if (!a)
+ return;
+
+ /* Attach MPLS attribute unless we already have one */
+ if (!s->mpls_labels)
+ {
+ s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
+ bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
+ }
+
+ /* Overwrite data in the attribute */
+ s->mpls_labels->length = 4*lnum;
+ memcpy(s->mpls_labels->data, labels, 4*lnum);
+
+ /* Update next hop entry in rta */
+ bgp_apply_mpls_labels(s, a, labels, lnum);
+
+ /* Attributes were changed, invalidate cached entry */
+ rta_free(s->cached_rta);
+ s->cached_rta = NULL;
+
+ return;
+}
+
+static uint
+bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_ip4 *net = (void *) px->net;
+
+ /* Encode path ID */
+ if (s->add_path)
+ {
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
+ }
+
+ /* Encode prefix length */
+ *pos = net->pxlen;
+ ADVANCE(pos, size, 1);
+
+ /* Encode MPLS labels */
+ if (s->mpls)
+ bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+ /* Encode prefix body */
+ ip4_addr a = ip4_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
+ memcpy(pos, &a, b);
+ ADVANCE(pos, size, b);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ net_addr_ip4 net;
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
+ {
+ if (len < 5)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
+ }
+
+ /* Decode prefix length */
+ uint l = *pos;
+ ADVANCE(pos, len, 1);
+
+ if (len < ((l + 7) / 8))
+ bgp_parse_error(s, 1);
+
+ /* Decode MPLS labels */
+ if (s->mpls)
+ bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+ if (l > IP4_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
+ /* Decode prefix body */
+ ip4_addr addr = IP4_NONE;
+ uint b = (l + 7) / 8;
+ memcpy(&addr, pos, b);
+ ADVANCE(pos, len, b);
+
+ net = NET_ADDR_IP4(ip4_ntoh(addr), l);
+ net_normalize_ip4(&net);
+
+ // XXXX validate prefix
- e->pflags = 0;
- e->u.bgp.suppressed = 0;
- rte_update2(&s->channel->c, n, e, s->last_src);
+ bgp_rte_update(s, (net_addr *) &net, path_id, a);
+ }
}
-
static uint
-bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
byte *pos = buf;
- while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip4_addr))))
+ while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
{
struct bgp_prefix *px = HEAD(buck->prefixes);
- struct net_addr_ip4 *net = (void *) px->net;
+ struct net_addr_ip6 *net = (void *) px->net;
/* Encode path ID */
if (s->add_path)
ADVANCE(pos, size, 4);
}
- ip4_addr a = ip4_hton(net->prefix);
- uint b = (net->pxlen + 7) / 8;
-
/* Encode prefix length */
*pos = net->pxlen;
ADVANCE(pos, size, 1);
+ /* Encode MPLS labels */
+ if (s->mpls)
+ bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
/* Encode prefix body */
+ ip6_addr a = ip6_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
}
static void
-bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
while (len)
{
- net_addr_ip4 net;
+ net_addr_ip6 net;
u32 path_id = 0;
/* Decode path ID */
/* Decode prefix length */
uint l = *pos;
- uint b = (l + 7) / 8;
ADVANCE(pos, len, 1);
- if (l > IP4_MAX_PREFIX_LENGTH)
- bgp_parse_error(s, 10);
-
- if (len < b)
+ if (len < ((l + 7) / 8))
bgp_parse_error(s, 1);
+ /* Decode MPLS labels */
+ if (s->mpls)
+ bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+ if (l > IP6_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
/* Decode prefix body */
- ip4_addr addr = IP4_NONE;
+ ip6_addr addr = IP6_NONE;
+ uint b = (l + 7) / 8;
memcpy(&addr, pos, b);
ADVANCE(pos, len, b);
- net = NET_ADDR_IP4(ip4_ntoh(addr), l);
- net_normalize_ip4(&net);
+ net = NET_ADDR_IP6(ip6_ntoh(addr), l);
+ net_normalize_ip6(&net);
// XXXX validate prefix
}
static uint
-bgp_encode_next_hop_ip4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
-{
- /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
-
- ASSERT(a->u.ptr->length == sizeof(ip_addr));
-
- put_ip4(buf, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
-
- return 4;
-}
-
-static void
-bgp_decode_next_hop_ip4(struct bgp_parse_state *s, byte *data, uint len, rta *a)
-{
- if (len != 4)
- bgp_parse_error(s, 9);
-
- ip_addr nh = ipa_from_ip4(get_ip4(data));
-
- // XXXX validate next hop
-
- bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh));
- bgp_apply_next_hop(s, a, nh, IPA_NONE);
-}
-
-
-static uint
-bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
byte *pos = buf;
- while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip6_addr))))
+ while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
{
struct bgp_prefix *px = HEAD(buck->prefixes);
- struct net_addr_ip6 *net = (void *) px->net;
+ struct net_addr_vpn4 *net = (void *) px->net;
/* Encode path ID */
if (s->add_path)
ADVANCE(pos, size, 4);
}
- ip6_addr a = ip6_hton(net->prefix);
- uint b = (net->pxlen + 7) / 8;
-
/* Encode prefix length */
- *pos = net->pxlen;
+ *pos = 64 + net->pxlen;
ADVANCE(pos, size, 1);
+ /* Encode MPLS labels */
+ if (s->mpls)
+ bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+ /* Encode route distinguisher */
+ put_u64(pos, net->rd);
+ ADVANCE(pos, size, 8);
+
/* Encode prefix body */
+ ip4_addr a = ip4_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
}
static void
-bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
while (len)
{
- net_addr_ip6 net;
+ net_addr_vpn4 net;
u32 path_id = 0;
/* Decode path ID */
/* Decode prefix length */
uint l = *pos;
- uint b = (l + 7) / 8;
ADVANCE(pos, len, 1);
- if (l > IP6_MAX_PREFIX_LENGTH)
- bgp_parse_error(s, 10);
+ if (len < ((l + 7) / 8))
+ bgp_parse_error(s, 1);
+
+ /* Decode MPLS labels */
+ if (s->mpls)
+ bgp_decode_mpls_labels(s, &pos, &len, &l, a);
- if (len < b)
+ /* Decode route distinguisher */
+ if (l < 64)
bgp_parse_error(s, 1);
+ u64 rd = get_u64(pos);
+ ADVANCE(pos, len, 8);
+ l -= 64;
+
+ if (l > IP4_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
/* Decode prefix body */
- ip6_addr addr = IP6_NONE;
+ ip4_addr addr = IP4_NONE;
+ uint b = (l + 7) / 8;
memcpy(&addr, pos, b);
ADVANCE(pos, len, b);
- net = NET_ADDR_IP6(ip6_ntoh(addr), l);
- net_normalize_ip6(&net);
+ net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
+ net_normalize_vpn4(&net);
// XXXX validate prefix
}
}
+
static uint
-bgp_encode_next_hop_ip6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
+bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
- ip_addr *nh = (void *) a->u.ptr->data;
- uint len = a->u.ptr->length;
+ byte *pos = buf;
- ASSERT((len == 16) || (len == 32));
+ while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_vpn6 *net = (void *) px->net;
- put_ip6(buf, ipa_to_ip6(nh[0]));
+ /* Encode path ID */
+ if (s->add_path)
+ {
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
+ }
- if (len == 32)
- put_ip6(buf+16, ipa_to_ip6(nh[1]));
+ /* Encode prefix length */
+ *pos = 64 + net->pxlen;
+ ADVANCE(pos, size, 1);
- return len;
+ /* Encode MPLS labels */
+ if (s->mpls)
+ bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+ /* Encode route distinguisher */
+ put_u64(pos, net->rd);
+ ADVANCE(pos, size, 8);
+
+ /* Encode prefix body */
+ ip6_addr a = ip6_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
+ memcpy(pos, &a, b);
+ ADVANCE(pos, size, b);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
}
static void
-bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
- struct adata *ad = lp_alloc_adata(s->pool, 32);
- ip_addr *nh = (void *) ad->data;
+ while (len)
+ {
+ net_addr_vpn6 net;
+ u32 path_id = 0;
- if ((len != 16) && (len != 32))
- bgp_parse_error(s, 9);
+ /* Decode path ID */
+ if (s->add_path)
+ {
+ if (len < 5)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
+ }
- nh[0] = ipa_from_ip6(get_ip6(data));
- nh[1] = (len == 32) ? ipa_from_ip6(get_ip6(data+16)) : IPA_NONE;
+ /* Decode prefix length */
+ uint l = *pos;
+ ADVANCE(pos, len, 1);
- if (ip6_is_link_local(nh[0]))
- {
- nh[1] = nh[0];
- nh[0] = IPA_NONE;
- }
+ if (len < ((l + 7) / 8))
+ bgp_parse_error(s, 1);
- if (!ip6_is_link_local(nh[1]))
- nh[1] = IPA_NONE;
+ /* Decode MPLS labels */
+ if (s->mpls)
+ bgp_decode_mpls_labels(s, &pos, &len, &l, a);
- if (ipa_zero(nh[1]))
- ad->length = 16;
+ /* Decode route distinguisher */
+ if (l < 64)
+ bgp_parse_error(s, 1);
- // XXXX validate next hop
+ u64 rd = get_u64(pos);
+ ADVANCE(pos, len, 8);
+ l -= 64;
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
- bgp_apply_next_hop(s, a, nh[0], nh[1]);
+ if (l > IP6_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
+ /* Decode prefix body */
+ ip6_addr addr = IP6_NONE;
+ uint b = (l + 7) / 8;
+ memcpy(&addr, pos, b);
+ ADVANCE(pos, len, b);
+
+ net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
+ net_normalize_vpn6(&net);
+
+ // XXXX validate prefix
+
+ bgp_rte_update(s, (net_addr *) &net, path_id, a);
+ }
}
uint pxlen = data[1];
// FIXME: Use some generic function
- memcpy(&px, data, BYTES(pxlen));
- px = ip4_and(px, ip4_mkmask(pxlen));
+ memcpy(&px, data+2, BYTES(pxlen));
+ px = ip4_and(ip4_ntoh(px), ip4_mkmask(pxlen));
/* Prepare the flow */
net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
uint pxlen = data[1];
// FIXME: Use some generic function
- memcpy(&px, data, BYTES(pxlen));
- px = ip6_and(px, ip6_mkmask(pxlen));
+ memcpy(&px, data+2, BYTES(pxlen));
+ px = ip6_and(ip6_ntoh(px), ip6_mkmask(pxlen));
/* Prepare the flow */
net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
.name = "ipv4",
.encode_nlri = bgp_encode_nlri_ip4,
.decode_nlri = bgp_decode_nlri_ip4,
- .encode_next_hop = bgp_encode_next_hop_ip4,
- .decode_next_hop = bgp_decode_next_hop_ip4,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
.update_next_hop = bgp_update_next_hop_ip,
},
{
.name = "ipv4-mc",
.encode_nlri = bgp_encode_nlri_ip4,
.decode_nlri = bgp_decode_nlri_ip4,
- .encode_next_hop = bgp_encode_next_hop_ip4,
- .decode_next_hop = bgp_decode_next_hop_ip4,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
.update_next_hop = bgp_update_next_hop_ip,
},
{
- .afi = BGP_AF_FLOW4,
- .net = NET_FLOW4,
- .name = "flow4",
- .encode_nlri = bgp_encode_nlri_flow4,
- .decode_nlri = bgp_decode_nlri_flow4,
- .encode_next_hop = bgp_encode_next_hop_none,
- .decode_next_hop = bgp_decode_next_hop_none,
- .update_next_hop = bgp_update_next_hop_none,
+ .afi = BGP_AF_IPV4_MPLS,
+ .net = NET_IP4,
+ .mpls = 1,
+ .name = "ipv4-mpls",
+ .encode_nlri = bgp_encode_nlri_ip4,
+ .decode_nlri = bgp_decode_nlri_ip4,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
},
{
.afi = BGP_AF_IPV6,
.name = "ipv6",
.encode_nlri = bgp_encode_nlri_ip6,
.decode_nlri = bgp_decode_nlri_ip6,
- .encode_next_hop = bgp_encode_next_hop_ip6,
- .decode_next_hop = bgp_decode_next_hop_ip6,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
.update_next_hop = bgp_update_next_hop_ip,
},
{
.name = "ipv6-mc",
.encode_nlri = bgp_encode_nlri_ip6,
.decode_nlri = bgp_decode_nlri_ip6,
- .encode_next_hop = bgp_encode_next_hop_ip6,
- .decode_next_hop = bgp_decode_next_hop_ip6,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_IPV6_MPLS,
+ .net = NET_IP6,
+ .mpls = 1,
+ .name = "ipv6-mpls",
+ .encode_nlri = bgp_encode_nlri_ip6,
+ .decode_nlri = bgp_decode_nlri_ip6,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_VPN4_MPLS,
+ .net = NET_VPN4,
+ .mpls = 1,
+ .name = "vpn4-mpls",
+ .encode_nlri = bgp_encode_nlri_vpn4,
+ .decode_nlri = bgp_decode_nlri_vpn4,
+ .encode_next_hop = bgp_encode_next_hop_vpn,
+ .decode_next_hop = bgp_decode_next_hop_vpn,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_VPN6_MPLS,
+ .net = NET_VPN6,
+ .mpls = 1,
+ .name = "vpn6-mpls",
+ .encode_nlri = bgp_encode_nlri_vpn6,
+ .decode_nlri = bgp_decode_nlri_vpn6,
+ .encode_next_hop = bgp_encode_next_hop_vpn,
+ .decode_next_hop = bgp_decode_next_hop_vpn,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_VPN4_MC,
+ .net = NET_VPN4,
+ .name = "vpn4-mc",
+ .encode_nlri = bgp_encode_nlri_vpn4,
+ .decode_nlri = bgp_decode_nlri_vpn4,
+ .encode_next_hop = bgp_encode_next_hop_vpn,
+ .decode_next_hop = bgp_decode_next_hop_vpn,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_VPN6_MC,
+ .net = NET_VPN6,
+ .name = "vpn6-mc",
+ .encode_nlri = bgp_encode_nlri_vpn6,
+ .decode_nlri = bgp_decode_nlri_vpn6,
+ .encode_next_hop = bgp_encode_next_hop_vpn,
+ .decode_next_hop = bgp_decode_next_hop_vpn,
.update_next_hop = bgp_update_next_hop_ip,
},
+ {
+ .afi = BGP_AF_FLOW4,
+ .net = NET_FLOW4,
+ .no_igp = 1,
+ .name = "flow4",
+ .encode_nlri = bgp_encode_nlri_flow4,
+ .decode_nlri = bgp_decode_nlri_flow4,
+ .encode_next_hop = bgp_encode_next_hop_none,
+ .decode_next_hop = bgp_decode_next_hop_none,
+ .update_next_hop = bgp_update_next_hop_none,
+ },
{
.afi = BGP_AF_FLOW6,
.net = NET_FLOW6,
+ .no_igp = 1,
.name = "flow6",
.encode_nlri = bgp_encode_nlri_flow6,
.decode_nlri = bgp_decode_nlri_flow6,
byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
byte *res = NULL;
+again: ;
+
/* Initialize write state */
struct bgp_write_state s = {
.proto = p,
.channel = c,
.pool = bgp_linpool,
+ .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop,
.as4_session = p->as4_session,
.add_path = c->add_path_tx,
+ .mpls = c->desc->mpls,
};
-again:
-
/* Try unreachable bucket */
if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
- res = (c->afi == BGP_AF_IPV4) ?
+ res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
bgp_create_ip_unreach(&s, buck, buf, end):
bgp_create_mp_unreach(&s, buck, buf, end);
goto again;
}
- res = (c->afi == BGP_AF_IPV4) ?
+ res = !s.mp_reach ?
bgp_create_ip_reach(&s, buck, buf, end):
bgp_create_mp_reach(&s, buck, buf, end);
s->channel = c;
s->add_path = c->add_path_rx;
+ s->mpls = c->desc->mpls;
s->last_id = 0;
s->last_src = s->proto->p.main_source;
if (ea)
{
- a = alloca(sizeof(struct rta));
- memset(a, 0, sizeof(struct rta));
+ a = allocz(RTA_MAX_SIZE);
a->source = RTS_BGP;
a->scope = SCOPE_UNIVERSE;
- a->cast = RTC_UNICAST;
- a->dest = RTD_UNREACHABLE;
a->from = s->proto->cf->remote_ip;
a->eattrs = ea;
if (s.attr_len)
ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
+ else
+ ea = NULL;
/* Check for End-of-RIB marker */
if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
return;
}
+static uint
+bgp_find_update_afi(byte *pos, uint len)
+{
+ /*
+ * This is stripped-down version of bgp_rx_update(), bgp_decode_attrs() and
+ * bgp_decode_mp_[un]reach_nlri() used by MRT code in order to find out which
+ * AFI/SAFI is associated with incoming UPDATE. Returns 0 for framing errors.
+ */
+ if (len < 23)
+ return 0;
+
+ /* Assume there is no withrawn NLRI, read lengths and move to attribute list */
+ uint wlen = get_u16(pos + 19);
+ uint alen = get_u16(pos + 21);
+ ADVANCE(pos, len, 23);
+
+ /* Either non-zero withdrawn NLRI, non-zero reachable NLRI, or IPv4 End-of-RIB */
+ if ((wlen != 0) || (alen < len) || !alen)
+ return BGP_AF_IPV4;
+
+ if (alen > len)
+ return 0;
+
+ /* Process attribute list (alen == len) */
+ while (len)
+ {
+ if (len < 2)
+ return 0;
+
+ uint flags = pos[0];
+ uint code = pos[1];
+ ADVANCE(pos, len, 2);
+
+ uint ll = !(flags & BAF_EXT_LEN) ? 1 : 2;
+ if (len < ll)
+ return 0;
+
+ /* Read attribute length and move to attribute body */
+ alen = (ll == 1) ? get_u8(pos) : get_u16(pos);
+ ADVANCE(pos, len, ll);
+
+ if (len < alen)
+ return 0;
+
+ /* Found MP NLRI */
+ if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
+ {
+ if (alen < 3)
+ return 0;
+
+ return BGP_AF(get_u16(pos), pos[2]);
+ }
+
+ /* Move to the next attribute */
+ ADVANCE(pos, len, alen);
+ }
+
+ /* No basic or MP NLRI, but there are some attributes -> error */
+ return 0;
+}
+
/*
* ROUTE-REFRESH
return buff;
}
+/* RFC 8203 - shutdown communication message */
+static int
+bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
+{
+ byte *msg = data + 1;
+ uint msg_len = data[0];
+ uint i;
+
+ /* Handle zero length message */
+ if (msg_len == 0)
+ return 1;
+
+ /* Handle proper message */
+ if ((msg_len > 128) && (msg_len + 1 > len))
+ return 0;
+
+ /* Some elementary cleanup */
+ for (i = 0; i < msg_len; i++)
+ if (msg[i] < ' ')
+ msg[i] = ' ';
+
+ proto_set_message(&p->p, msg, msg_len);
+ *bp += bsprintf(*bp, ": \"%s\"", p->p.message);
+ return 1;
+}
+
void
bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
{
- const byte *name;
- byte *t, argbuf[36];
+ byte argbuf[256], *t = argbuf;
uint i;
/* Don't report Cease messages generated by myself */
if (code == 6 && class == BE_BGP_TX)
return;
- name = bgp_error_dsc(code, subcode);
- t = argbuf;
+ /* Reset shutdown message */
+ if ((code == 6) && ((subcode == 2) || (subcode == 4)))
+ proto_set_message(&p->p, NULL, 0);
+
if (len)
{
- *t++ = ':';
- *t++ = ' ';
-
+ /* Bad peer AS - we would like to print the AS */
if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
{
- /* Bad peer AS - we would like to print the AS */
- t += bsprintf(t, "%u", (len == 2) ? get_u16(data) : get_u32(data));
+ t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
goto done;
}
+
+ /* RFC 8203 - shutdown communication */
+ if (((code == 6) && ((subcode == 2) || (subcode == 4))))
+ if (bgp_handle_message(p, data, len, &t))
+ goto done;
+
+ *t++ = ':';
+ *t++ = ' ';
if (len > 16)
len = 16;
for (i=0; i<len; i++)
t += bsprintf(t, "%02x", data[i]);
}
- done:
+
+done:
*t = 0;
- log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
+ const byte *dsc = bgp_error_dsc(code, subcode);
+ log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, dsc, argbuf);
}
static void
if (err)
{
bgp_update_startup_delay(p);
- bgp_stop(p, 0);
+ bgp_stop(p, 0, NULL, 0);
+ }
+ else
+ {
+ uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0);
+ if (p->cf->disable_after_cease & subcode_bit)
+ {
+ log(L_INFO "%s: Disabled after Cease notification", p->p.name);
+ p->startup_delay = 0;
+ p->p.disabled = 1;
+ }
}
}
DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
if (conn->bgp->p.mrtdump & MD_MESSAGES)
- mrt_dump_bgp_packet(conn, pkt, len);
+ bgp_dump_message(conn, pkt, len);
switch (type)
{