* stale routes after reestablishment of BGP session during graceful restart.
*
* Supported standards:
- * <itemize>
- * <item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP)
- * <item> <rfc id="1997"> - BGP Communities Attribute
- * <item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature
- * <item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6
- * <item> <rfc id="2918"> - Route Refresh Capability
- * <item> <rfc id="3107"> - Carrying Label Information in BGP
- * <item> <rfc id="4360"> - BGP Extended Communities Attribute
- * <item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks
- * <item> <rfc id="4456"> - BGP Route Reflection
- * <item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message
- * <item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks
- * <item> <rfc id="4724"> - Graceful Restart Mechanism for BGP
- * <item> <rfc id="4760"> - Multiprotocol extensions for BGP
- * <item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS
- * <item> <rfc id="5065"> - AS confederations for BGP
- * <item> <rfc id="5082"> - Generalized TTL Security Mechanism
- * <item> <rfc id="5492"> - Capabilities Advertisement with BGP
- * <item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop
- * <item> <rfc id="5575"> - Dissemination of Flow Specification Rules
- * <item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community
- * <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
- * <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
- * <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
- * <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
- * <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
- * <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
- * <item> <rfc id="7947"> - Internet Exchange BGP Route Server
- * <item> <rfc id="8092"> - BGP Large Communities Attribute
- * <item> <rfc id="8203"> - BGP Administrative Shutdown Communication
- * </itemize>
-*/
+ * RFC 4271 - Border Gateway Protocol 4 (BGP)
+ * RFC 1997 - BGP Communities Attribute
+ * RFC 2385 - Protection of BGP Sessions via TCP MD5 Signature
+ * RFC 2545 - Use of BGP Multiprotocol Extensions for IPv6
+ * RFC 2918 - Route Refresh Capability
+ * RFC 3107 - Carrying Label Information in BGP
+ * RFC 4360 - BGP Extended Communities Attribute
+ * RFC 4364 - BGP/MPLS IPv4 Virtual Private Networks
+ * RFC 4456 - BGP Route Reflection
+ * RFC 4486 - Subcodes for BGP Cease Notification Message
+ * RFC 4659 - BGP/MPLS IPv6 Virtual Private Networks
+ * RFC 4724 - Graceful Restart Mechanism for BGP
+ * RFC 4760 - Multiprotocol extensions for BGP
+ * RFC 4798 - Connecting IPv6 Islands over IPv4 MPLS
+ * RFC 5065 - AS confederations for BGP
+ * RFC 5082 - Generalized TTL Security Mechanism
+ * RFC 5492 - Capabilities Advertisement with BGP
+ * RFC 5549 - Advertising IPv4 NLRI with an IPv6 Next Hop
+ * RFC 5575 - Dissemination of Flow Specification Rules
+ * RFC 5668 - 4-Octet AS Specific BGP Extended Community
+ * RFC 6286 - AS-Wide Unique BGP Identifier
+ * RFC 6608 - Subcodes for BGP Finite State Machine Error
+ * RFC 6793 - BGP Support for 4-Octet AS Numbers
+ * RFC 7313 - Enhanced Route Refresh Capability for BGP
+ * RFC 7606 - Revised Error Handling for BGP UPDATE Messages
+ * RFC 7911 - Advertisement of Multiple Paths in BGP
+ * RFC 7947 - Internet Exchange BGP Route Server
+ * RFC 8092 - BGP Large Communities Attribute
+ * RFC 8203 - BGP Administrative Shutdown Communication
+ * RFC 8212 - Default EBGP Route Propagation Behavior without Policies
+ * draft-ietf-idr-bgp-extended-messages-27
+ * draft-uttaro-idr-bgp-persistence-04
+ */
#undef LOCAL_DEBUG
#include "nest/cli.h"
#include "nest/locks.h"
#include "conf/conf.h"
+#include "filter/filter.h"
#include "lib/socket.h"
#include "lib/resource.h"
#include "lib/string.h"
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
+static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn);
+static void bgp_setup_sk(struct bgp_conn *conn, sock *s);
+static void bgp_send_open(struct bgp_conn *conn);
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
struct bgp_socket *bs = NULL;
struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
- (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
+ (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
uint port = p->cf->local_port;
/* FIXME: Add some global init? */
/* We assume that cf->iface is defined iff cf->local_ip is link-local */
WALK_LIST(bs, bgp_sockets)
- if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->iface == ifa) && (bs->sk->sport == port))
+ if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->sport == port) &&
+ (bs->sk->iface == ifa) && (bs->sk->vrf == p->p.vrf))
{
bs->uc++;
p->sock = bs;
sk->ttl = 255;
sk->saddr = addr;
sk->sport = port;
+ sk->iface = ifa;
+ sk->vrf = p->p.vrf;
sk->flags = 0;
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->rbsize = BGP_RX_BUFFER_SIZE;
bs->sk = sk;
bs->uc = 1;
p->sock = bs;
+ sk->data = bs;
add_tail(&bgp_sockets, &bs->n);
BGP_TRACE(D_EVENTS, "Started");
p->start_state = BSS_CONNECT;
- if (!p->cf->passive)
+ if (!p->passive)
bgp_active(p);
+
+ if (p->postponed_sk)
+ {
+ /* Apply postponed incoming connection */
+ bgp_setup_conn(p, &p->incoming_conn);
+ bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
+ bgp_send_open(&p->incoming_conn);
+ p->postponed_sk = NULL;
+ }
}
static void
void
bgp_update_startup_delay(struct bgp_proto *p)
{
- struct bgp_config *cf = p->cf;
+ const struct bgp_config *cf = p->cf;
DBG("BGP: Updating startup delay\n");
}
static void
-bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
+bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len)
{
switch (conn->state)
{
case BS_OPENSENT:
case BS_OPENCONFIRM:
case BS_ESTABLISHED:
- bgp_error(conn, 6, subcode, data, len);
+ if (subcode < 0)
+ {
+ bgp_conn_enter_close_state(conn);
+ bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
+ }
+ else
+ bgp_error(conn, 6, subcode, data, len);
return;
default:
if ((p->p.proto_state == PS_START) &&
(p->outgoing_conn.state == BS_IDLE) &&
(p->incoming_conn.state != BS_OPENCONFIRM) &&
- !p->cf->passive)
+ !p->passive)
bgp_active(p);
if ((p->p.proto_state == PS_STOP) &&
bgp_down(p);
}
+static struct bgp_proto *
+bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
+{
+ struct symbol *sym;
+ char fmt[SYM_MAX_LEN];
+
+ bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits);
+
+ /* This is hack, we would like to share config, but we need to copy it now */
+ new_config = config;
+ cfg_mem = config->mem;
+ conf_this_scope = config->root_scope;
+ sym = cf_default_name(fmt, &(pp->dynamic_name_counter));
+ proto_clone_config(sym, pp->p.cf);
+ new_config = NULL;
+ cfg_mem = NULL;
+
+ /* Just pass remote_ip to bgp_init() */
+ ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
+
+ return (void *) proto_spawn(sym->proto, 0);
+}
+
void
-bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
+bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
{
proto_notify_state(&p->p, PS_STOP);
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
{
if (conn->bgp->p.mrtdump & MD_STATES)
- mrt_dump_bgp_state_change(conn, conn->state, new_state);
+ bgp_dump_state_change(conn, conn->state, new_state);
conn->state = new_state;
}
}
static const struct bgp_af_caps dummy_af_caps = { };
+static const struct bgp_af_caps basic_af_caps = { .ready = 1 };
void
bgp_conn_enter_established_state(struct bgp_conn *conn)
BGP_TRACE(D_EVENTS, "BGP session established");
/* For multi-hop BGP sessions */
- if (ipa_zero(p->source_addr))
- p->source_addr = conn->sk->saddr;
+ if (ipa_zero(p->local_ip))
+ p->local_ip = conn->sk->saddr;
+
+ /* For promiscuous sessions */
+ if (!p->remote_as)
+ p->remote_as = conn->received_as;
+
+ /* In case of LLv6 is not valid during BGP start */
+ if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6)
+ p->link_addr = p->neigh->iface->llv6->ip;
conn->sk->fast_rx = 0;
p->route_refresh = peer->route_refresh;
p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
- /* Whether we may handle possible GR of peer (it has some AF GR-able) */
- p->gr_ready = 0; /* Updated later */
+ /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
+ p->gr_ready = p->llgr_ready = 0; /* Updated later */
/* Whether peer is ready to handle our GR recovery */
int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
/* Number of active channels */
int num = 0;
+ /* Summary state of ADD_PATH RX for active channels */
+ uint summary_add_path_rx = 0;
+
WALK_LIST(c, p->p.channels)
{
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
+ /* Use default if capabilities were not announced */
+ if (!local->length && (c->afi == BGP_AF_IPV4))
+ loc = &basic_af_caps;
+
+ if (!peer->length && (c->afi == BGP_AF_IPV4))
+ rem = &basic_af_caps;
+
/* Ignore AFIs that were not announced in multiprotocol capability */
if (!loc || !loc->ready)
loc = &dummy_af_caps;
int active = loc->ready && rem->ready;
c->c.disabled = !active;
- c->c.reloadable = p->route_refresh;
+ c->c.reloadable = p->route_refresh || c->cf->import_table;
c->index = active ? num++ : 0;
c->load_state = BFS_NONE;
/* Channels where peer may do GR */
- c->gr_ready = active && local->gr_aware && rem->gr_able;
+ uint gr_ready = active && local->gr_aware && rem->gr_able;
+ uint llgr_ready = active && local->llgr_aware && rem->llgr_able;
+
+ c->gr_ready = gr_ready || llgr_ready;
p->gr_ready = p->gr_ready || c->gr_ready;
+ p->llgr_ready = p->llgr_ready || llgr_ready;
+
+ /* Remember last LLGR stale time */
+ c->stale_time = local->llgr_aware ? rem->llgr_time : 0;
/* Channels not able to recover gracefully */
if (p->p.gr_recovery && (!active || !peer_gr_ready))
if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
c->c.gr_wait = 1;
- /* Channels where peer is not able to recover gracefully */
- if (c->gr_active && ! (c->gr_ready && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
+ /* Channels where regular graceful restart failed */
+ if ((c->gr_active == BGP_GRS_ACTIVE) &&
+ !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
+ bgp_graceful_restart_done(c);
+
+ /* Channels where regular long-lived restart failed */
+ if ((c->gr_active == BGP_GRS_LLGR) &&
+ !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING)))
bgp_graceful_restart_done(c);
/* GR capability implies that neighbor will send End-of-RIB */
c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
+ if (active)
+ summary_add_path_rx |= !c->add_path_rx ? 1 : 2;
+
/* Update RA mode */
if (c->add_path_tx)
c->c.ra_mode = RA_ANY;
p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
p->channel_count = num;
+ p->summary_add_path_rx = summary_add_path_rx;
WALK_LIST(c, p->p.channels)
{
if (c->gr_ready)
{
- if (c->gr_active)
+ p->gr_active_num++;
+
+ switch (c->gr_active)
+ {
+ case BGP_GRS_NONE:
+ c->gr_active = BGP_GRS_ACTIVE;
+ rt_refresh_begin(c->c.table, &c->c);
+ break;
+
+ case BGP_GRS_ACTIVE:
rt_refresh_end(c->c.table, &c->c);
+ rt_refresh_begin(c->c.table, &c->c);
+ break;
- c->gr_active = 1;
- p->gr_active_num++;
- rt_refresh_begin(c->c.table, &c->c);
+ case BGP_GRS_LLGR:
+ rt_refresh_begin(c->c.table, &c->c);
+ rt_modify_stale(c->c.table, &c->c);
+ break;
+ }
}
else
{
ASSERT(p->gr_active_num > 0);
proto_notify_state(&p->p, PS_START);
- bgp_start_timer(p->gr_timer, p->conn->remote_caps->gr_time);
+ tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
}
/**
if (!p->gr_active_num)
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
+ tm_stop(c->stale_timer);
rt_refresh_end(c->c.table, &c->c);
}
struct bgp_proto *p = t->data;
BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
- bgp_stop(p, 0, NULL, 0);
+
+ if (p->llgr_ready)
+ {
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ {
+ /* Channel is not in GR and is already flushed */
+ if (!c->gr_active)
+ continue;
+
+ /* Channel is already in LLGR from past restart */
+ if (c->gr_active == BGP_GRS_LLGR)
+ continue;
+
+ /* Channel is in GR, but does not support LLGR -> stop GR */
+ if (!c->stale_time)
+ {
+ bgp_graceful_restart_done(c);
+ continue;
+ }
+
+ /* Channel is in GR, and supports LLGR -> start LLGR */
+ c->gr_active = BGP_GRS_LLGR;
+ tm_start(c->stale_timer, c->stale_time S);
+ rt_modify_stale(c->c.table, &c->c);
+ }
+ }
+ else
+ bgp_stop(p, 0, NULL, 0);
+}
+
+static void
+bgp_long_lived_stale_timeout(timer *t)
+{
+ struct bgp_channel *c = t->data;
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ BGP_TRACE(D_EVENTS, "Long-lived stale timeout");
+
+ bgp_graceful_restart_done(c);
}
c->load_state = BFS_REFRESHING;
rt_refresh_begin(c->c.table, &c->c);
+
+ if (c->c.in_table)
+ rt_refresh_begin(c->c.in_table, &c->c);
}
/**
c->load_state = BFS_NONE;
rt_refresh_end(c->c.table, &c->c);
+
+ if (c->c.in_table)
+ rt_prune_sync(c->c.in_table, 0);
}
conn->sk->rx_hook = bgp_rx;
conn->sk->tx_hook = bgp_tx;
tm_stop(conn->connect_timer);
+ bgp_prepare_capabilities(conn);
bgp_schedule_packet(conn, NULL, PKT_OPEN);
bgp_conn_set_state(conn, BS_OPENSENT);
bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
if (sk_rx_ready(conn->sk) > 0)
bgp_start_timer(conn->hold_timer, 10);
+ else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
+ {
+ BGP_TRACE(D_EVENTS, "Hold timer expired");
+ bgp_handle_graceful_restart(p);
+ bgp_conn_enter_idle_state(conn);
+ }
else
bgp_error(conn, 4, 0, NULL, 0);
}
conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0);
conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
- conn->tx_ev = ev_new(p->p.pool);
- conn->tx_ev->hook = bgp_kick_tx;
- conn->tx_ev->data = conn;
+ conn->tx_ev = ev_new_init(p->p.pool, bgp_kick_tx, conn);
}
static void
DBG("BGP: Connecting\n");
sock *s = sk_new(p->p.pool);
s->type = SK_TCP_ACTIVE;
- s->saddr = p->source_addr;
- s->daddr = p->cf->remote_ip;
+ s->saddr = p->local_ip;
+ s->daddr = p->remote_ip;
s->dport = p->cf->remote_port;
s->iface = p->neigh ? p->neigh->iface : NULL;
s->vrf = p->p.vrf;
s->tos = IP_PREC_INTERNET_CONTROL;
s->password = p->cf->password;
s->tx_hook = bgp_connected;
- BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
+ BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
+ s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
bgp_setup_conn(p, conn);
bgp_setup_sk(conn, s);
return;
}
+static inline int bgp_is_dynamic(struct bgp_proto *p)
+{ return ipa_zero(p->remote_ip); }
+
/**
* bgp_find_proto - find existing proto for incoming connection
* @sk: TCP socket
static struct bgp_proto *
bgp_find_proto(sock *sk)
{
+ struct bgp_proto *best = NULL;
struct bgp_proto *p;
+ /* sk->iface is valid only if src or dst address is link-local */
+ int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
+
WALK_LIST(p, proto_list)
if ((p->p.proto == &proto_bgp) &&
- ipa_equal(p->cf->remote_ip, sk->daddr) &&
- (!p->cf->iface || (p->cf->iface == sk->iface)) &&
- (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)) &&
- (p->cf->local_port == sk->sport))
- return p;
+ (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
+ (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
+ (p->p.vrf == sk->vrf) &&
+ (p->cf->local_port == sk->sport) &&
+ (!link || (p->cf->iface == sk->iface)) &&
+ (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)))
+ {
+ best = p;
- return NULL;
+ if (!bgp_is_dynamic(p))
+ break;
+ }
+
+ return best;
}
/**
sk_reallocate(sk);
}
+ /* For dynamic BGP, spawn new instance and postpone the socket */
+ if (bgp_is_dynamic(p))
+ {
+ p = bgp_spawn(p, sk->daddr);
+ p->postponed_sk = sk;
+ rmove(sk, p->p.pool);
+ return 0;
+ }
+
+ rmove(sk, p->p.pool);
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, sk);
bgp_send_open(&p->incoming_conn);
{
/* Called only for single-hop BGP sessions */
- if (ipa_zero(p->source_addr))
- p->source_addr = p->neigh->ifa->ip;
+ if (ipa_zero(p->local_ip))
+ p->local_ip = p->neigh->ifa->ip;
- if (ipa_is_link_local(p->source_addr))
- p->link_addr = p->source_addr;
+ if (ipa_is_link_local(p->local_ip))
+ p->link_addr = p->local_ip;
else if (p->neigh->iface->llv6)
p->link_addr = p->neigh->iface->llv6->ip;
{
BGP_TRACE(D_EVENTS, "BFD session down");
bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
- if (ps == PS_UP)
- bgp_update_startup_delay(p);
- bgp_stop(p, 0, NULL, 0);
+
+ if (p->cf->bfd == BGP_BFD_GRACEFUL)
+ {
+ /* Trigger graceful restart */
+ if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
+ bgp_handle_graceful_restart(p);
+
+ if (p->incoming_conn.state > BS_IDLE)
+ bgp_conn_enter_idle_state(&p->incoming_conn);
+
+ if (p->outgoing_conn.state > BS_IDLE)
+ bgp_conn_enter_idle_state(&p->outgoing_conn);
+ }
+ else
+ {
+ /* Trigger session down */
+ if (ps == PS_UP)
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0, NULL, 0);
+ }
}
}
static void
bgp_update_bfd(struct bgp_proto *p, int use_bfd)
{
- if (use_bfd && !p->bfd_req)
- p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
+ if (use_bfd && !p->bfd_req && !bgp_is_dynamic(p))
+ p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
p->cf->multihop ? NULL : p->neigh->iface,
bgp_bfd_notify, p);
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
- ASSERT(p->conn && p->route_refresh);
+ ASSERT(p->conn && (p->route_refresh || c->c.in_table));
- bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
+ if (c->c.in_table)
+ channel_schedule_reload(C);
+ else
+ bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
}
static void
bgp_start_locked(struct object_lock *lock)
{
struct bgp_proto *p = lock->data;
- struct bgp_config *cf = p->cf;
+ const struct bgp_config *cf = p->cf;
if (p->p.proto_state != PS_START)
{
DBG("BGP: Got lock\n");
- if (cf->multihop)
+ if (cf->multihop || bgp_is_dynamic(p))
{
/* Multi-hop sessions do not use neighbor entries */
bgp_initiate(p);
return;
}
- neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
+ neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
if (!n)
{
- log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
+ log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
/* As we do not start yet, we can just disable protocol */
p->p.disabled = 1;
bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
p->neigh = n;
if (n->scope <= 0)
- BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
+ BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface);
else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
else
bgp_start(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- struct object_lock *lock;
+ const struct bgp_config *cf = p->cf;
+
+ p->local_ip = cf->local_ip;
+ p->local_as = cf->local_as;
+ p->remote_as = cf->remote_as;
+ p->public_as = cf->local_as;
+
+ /* For dynamic BGP childs, remote_ip is already set */
+ if (ipa_nonzero(cf->remote_ip))
+ p->remote_ip = cf->remote_ip;
+
+ /* Confederation ID is used for truly external peers */
+ if (p->cf->confederation && !p->is_interior)
+ p->public_as = cf->confederation;
+
+ p->passive = cf->passive || bgp_is_dynamic(p);
- DBG("BGP: Startup.\n");
p->start_state = BSS_PREPARE;
p->outgoing_conn.state = BS_IDLE;
p->incoming_conn.state = BS_IDLE;
p->neigh = NULL;
p->bfd_req = NULL;
+ p->postponed_sk = NULL;
p->gr_ready = 0;
p->gr_active_num = 0;
- p->event = ev_new(p->p.pool);
- p->event->hook = bgp_decision;
- p->event->data = p;
-
+ p->event = ev_new_init(p->p.pool, bgp_decision, p);
p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
p->remote_id = 0;
- p->source_addr = p->cf->local_ip;
p->link_addr = IPA_NONE;
/* Lock all channels when in GR recovery mode */
* Before attempting to create the connection, we need to lock the port,
* so that we are the only instance attempting to talk with that neighbor.
*/
-
+ struct object_lock *lock;
lock = p->lock = olock_new(P->pool);
- lock->addr = p->cf->remote_ip;
+ lock->addr = p->remote_ip;
lock->port = p->cf->remote_port;
lock->iface = p->cf->iface;
lock->vrf = p->cf->iface ? NULL : p->p.vrf;
bgp_shutdown(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- uint subcode = 0;
+ int subcode = 0;
char *message = NULL;
byte *data = NULL;
case PDC_CMD_DISABLE:
case PDC_CMD_SHUTDOWN:
+ shutdown:
subcode = 2; // Errcode 6, 2 - administrative shutdown
message = P->message;
break;
message = P->message;
break;
+ case PDC_CMD_GR_DOWN:
+ if ((p->cf->gr_mode != BGP_GR_ABLE) &&
+ (p->cf->llgr_mode != BGP_LLGR_ABLE))
+ goto shutdown;
+
+ subcode = -1; // Do not send NOTIFICATION, just close the connection
+ break;
+
case PDC_RX_LIMIT_HIT:
case PDC_IN_LIMIT_HIT:
subcode = 1; // Errcode 6, 1 - max number of prefixes reached
if (message)
{
uint msg_len = strlen(message);
- msg_len = MIN(msg_len, 128);
+ msg_len = MIN(msg_len, 255);
/* Buffer will be freed automatically by protocol shutdown */
data = mb_alloc(p->p.pool, msg_len + 1);
struct bgp_config *cf = (struct bgp_config *) CF;
P->rt_notify = bgp_rt_notify;
- P->import_control = bgp_import_control;
+ P->preexport = bgp_preexport;
P->neigh_notify = bgp_neigh_notify;
P->reload_routes = bgp_reload_routes;
P->feed_begin = bgp_feed_begin;
P->rte_better = bgp_rte_better;
P->rte_mergable = bgp_rte_mergable;
P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
+ P->rte_modify = bgp_rte_modify_stale;
p->cf = cf;
- p->local_as = cf->local_as;
- p->remote_as = cf->remote_as;
- p->public_as = cf->local_as;
p->is_internal = (cf->local_as == cf->remote_as);
p->is_interior = p->is_internal || cf->confederation_member;
p->rs_client = cf->rs_client;
p->rr_client = cf->rr_client;
- /* Confederation ID is used for truly external peers */
- if (cf->confederation && !p->is_interior)
- p->public_as = cf->confederation;
+ p->ipv4 = ipa_nonzero(cf->remote_ip) ?
+ ipa_is_ip4(cf->remote_ip) :
+ (cf->remote_range && (cf->remote_range->type == NET_IP4));
+
+ p->remote_ip = cf->remote_ip;
+ p->remote_as = cf->remote_as;
+
+ /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
+ if (cf->c.parent)
+ cf->remote_ip = IPA_NONE;
/* Add all channels */
struct bgp_channel_config *cc;
{
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
- ip_addr src = p->source_addr;
+ ip_addr src = p->local_ip;
if (c->igp_table_ip4)
rt_lock_table(c->igp_table_ip4);
bgp_init_bucket_table(c);
bgp_init_prefix_table(c);
+ if (c->cf->import_table)
+ channel_setup_in_table(C);
+
+ c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
+
c->next_hop_addr = c->cf->next_hop_addr;
c->link_addr = IPA_NONE;
c->packets_to_send = 0;
if (ipa_zero(c->next_hop_addr))
{
/* We know the iface for single-hop, we make lookup for multihop */
- struct neighbor *nbr = p->neigh ?: neigh_find2(&p->p, &src, NULL, 0);
+ struct neighbor *nbr = p->neigh ?: neigh_find(&p->p, src, NULL, 0);
struct iface *iface = nbr ? nbr->iface : NULL;
if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
if (c->igp_table_ip6)
rt_unlock_table(c->igp_table_ip6);
+
+ c->index = 0;
+
+ /* Cleanup rest of bgp_channel starting at pool field */
+ memset(&(c->pool), 0, sizeof(struct bgp_channel) - OFFSETOF(struct bgp_channel, pool));
}
static inline struct bgp_channel_config *
bgp_postconfig(struct proto_config *CF)
{
struct bgp_config *cf = (void *) CF;
- int internal = (cf->local_as == cf->remote_as);
/* Do not check templates at all */
if (cf->c.class == SYM_TEMPLATE)
return;
+ /* Handle undefined remote_as, zero should mean unspecified external */
+ if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL))
+ cf->remote_as = cf->local_as;
+
+ int internal = (cf->local_as == cf->remote_as);
+ int interior = internal || cf->confederation_member;
+
/* EBGP direct by default, IBGP multihop by default */
if (cf->multihop < 0)
cf->multihop = internal ? 64 : 0;
+ /* LLGR mode default based on GR mode */
+ if (cf->llgr_mode < 0)
+ cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0;
+
/* Link check for single-hop BGP by default */
if (cf->check_link < 0)
cf->check_link = !cf->multihop;
if (!cf->local_as)
cf_error("Local AS number must be set");
- if (ipa_zero(cf->remote_ip))
+ if (ipa_zero(cf->remote_ip) && !cf->remote_range)
cf_error("Neighbor must be configured");
- if (!cf->remote_as)
- cf_error("Remote AS number must be set");
+ if (ipa_zero(cf->local_ip) && cf->strict_bind)
+ cf_error("Local address must be configured for strict bind");
- if (ipa_is_link_local(cf->remote_ip) && !cf->iface)
- cf_error("Link-local neighbor address requires specified interface");
+ if (!cf->remote_as && !cf->peer_type)
+ cf_error("Remote AS number (or peer type) must be set");
+
+ if ((cf->peer_type == BGP_PT_INTERNAL) && !internal)
+ cf_error("IBGP cannot have different ASNs");
+
+ if ((cf->peer_type == BGP_PT_EXTERNAL) && internal)
+ cf_error("EBGP cannot have the same ASNs");
+
+ if (!cf->iface && (ipa_is_link_local(cf->local_ip) ||
+ ipa_is_link_local(cf->remote_ip)))
+ cf_error("Link-local addresses require defined interface");
if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
cf_error("Neighbor AS number out of range (AS4 not available)");
if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
cf_error("Multihop BGP with BFD requires specified local address");
+ if (!cf->gr_mode && cf->llgr_mode)
+ cf_error("Long-lived graceful restart requires basic graceful restart");
+
struct bgp_channel_config *cc;
WALK_LIST(cc, CF->channels)
{
+ /* Handle undefined import filter */
+ if (cc->c.in_filter == FILTER_UNDEF)
+ if (interior)
+ cc->c.in_filter = FILTER_ACCEPT;
+ else
+ cf_error("EBGP requires explicit import policy");
+
+ /* Handle undefined export filter */
+ if (cc->c.out_filter == FILTER_UNDEF)
+ if (interior)
+ cc->c.out_filter = FILTER_REJECT;
+ else
+ cf_error("EBGP requires explicit export policy");
+
/* Disable after error incompatible with restart limit action */
if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
cc->c.in_limit.action = PLA_DISABLE;
+ /* Different default based on rr_client, rs_client */
+ if (cc->next_hop_keep == 0xff)
+ cc->next_hop_keep = cf->rr_client ? NH_IBGP : (cf->rs_client ? NH_ALL : NH_NO);
+
/* Different default based on rs_client */
if (!cc->missing_lladdr)
cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
if (!cc->gw_mode)
cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
- /* Default based on proto config */
+ /* Defaults based on proto config */
if (cc->gr_able == 0xff)
cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
+ if (cc->llgr_able == 0xff)
+ cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE);
+
+ if (cc->llgr_time == ~0U)
+ cc->llgr_time = cf->llgr_time;
+
/* Default values of IGP tables */
if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
{
bgp_reconfigure(struct proto *P, struct proto_config *CF)
{
struct bgp_proto *p = (void *) P;
- struct bgp_config *new = (void *) CF;
- struct bgp_config *old = p->cf;
+ const struct bgp_config *new = (void *) CF;
+ const struct bgp_config *old = p->cf;
if (proto_get_router_id(CF) != p->local_id)
return 0;
// password item is last and must be checked separately
OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
&& ((!old->password && !new->password)
- || (old->password && new->password && !strcmp(old->password, new->password)));
+ || (old->password && new->password && !strcmp(old->password, new->password)))
+ && ((!old->remote_range && !new->remote_range)
+ || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range)))
+ && ((!old->dynamic_name && !new->dynamic_name)
+ || (old->dynamic_name && new->dynamic_name && !strcmp(old->dynamic_name, new->dynamic_name)))
+ && (old->dynamic_name_digits == new->dynamic_name_digits);
/* FIXME: Move channel reconfiguration to generic protocol code ? */
struct channel *C, *C2;
{
C = (struct channel *) bgp_find_channel(p, cc->afi);
same = proto_configure_channel(P, &C, &cc->c) && same;
- C->stale = 0;
+
+ if (C)
+ C->stale = 0;
}
WALK_LIST_DELSAFE(C, C2, p->p.channels)
if (same)
p->cf = new;
+ /* Reset name counter */
+ p->dynamic_name_counter = 0;
+
return same;
}
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
+static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"};
static const char *
bgp_last_errmsg(struct bgp_proto *p)
return "Down";
int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
- if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
+ if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive)
return "Passive";
return bgp_state_names[state];
uint any_gr_able = 0;
uint any_add_path = 0;
uint any_ext_next_hop = 0;
+ uint any_llgr_able = 0;
u32 *afl1 = alloca(caps->af_count * sizeof(u32));
u32 *afl2 = alloca(caps->af_count * sizeof(u32));
uint afn1, afn2;
any_gr_able |= ac->gr_able;
any_add_path |= ac->add_path;
any_ext_next_hop |= ac->ext_next_hop;
+ any_llgr_able |= ac->llgr_able;
}
if (any_mp_bgp)
if (caps->enhanced_refresh)
cli_msg(-1006, " Enhanced refresh");
+
+ if (caps->llgr_aware)
+ cli_msg(-1006, " Long-lived graceful restart");
+
+ if (any_llgr_able)
+ {
+ u32 stale_time = 0;
+
+ afn1 = afn2 = 0;
+ WALK_AF_CAPS(caps, ac)
+ {
+ stale_time = MAX(stale_time, ac->llgr_time);
+
+ if (ac->llgr_able && ac->llgr_time)
+ afl1[afn1++] = ac->afi;
+
+ if (ac->llgr_flags & BGP_GRF_FORWARDING)
+ afl2[afn2++] = ac->afi;
+ }
+
+ /* Continues from llgr_aware */
+ cli_msg(-1006, " LL stale time: %u", stale_time);
+
+ bgp_show_afis(-1006, " AF supported:", afl1, afn1);
+ bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
+ }
}
static void
struct bgp_proto *p = (struct bgp_proto *) P;
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
- cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
+
+ if (bgp_is_dynamic(p) && p->cf->remote_range)
+ cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range);
+ else
+ cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface);
+
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
+ cli_msg(-1006, " Local AS: %u", p->cf->local_as);
if (p->gr_active_num)
cli_msg(-1006, " Neighbor graceful restart active");
p->rr_client ? " route-reflector" : "",
p->rs_client ? " route-server" : "",
p->as4_session ? " AS4" : "");
- cli_msg(-1006, " Source address: %I", p->source_addr);
+ cli_msg(-1006, " Source address: %I", p->local_ip);
cli_msg(-1006, " Hold timer: %t/%u",
tm_remains(p->conn->hold_timer), p->conn->hold_time);
cli_msg(-1006, " Keepalive timer: %t/%u",
{
channel_show_info(&c->c);
+ if (p->gr_active_num)
+ cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
+
+ if (c->stale_timer && tm_active(c->stale_timer))
+ cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer));
+
if (c->c.channel_state == CS_UP)
{
if (ipa_zero(c->link_addr))
struct protocol proto_bgp = {
.name = "BGP",
.template = "bgp%d",
- .attr_class = EAP_BGP,
+ .class = PROTOCOL_BGP,
.preference = DEF_PREF_BGP,
.channel_mask = NB_IP | NB_VPN | NB_FLOW,
.proto_size = sizeof(struct bgp_proto),