2 * BIRD -- The Border Gateway Protocol
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
8 * Can be freely distributed and used under the terms of the GNU GPL.
12 * DOC: Border Gateway Protocol
14 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
15 * the connection and most of the interface with BIRD core, |packets.c| handling
16 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
17 * manipulation with BGP attribute lists.
19 * As opposed to the other existing routing daemons, BIRD has a sophisticated
20 * core architecture which is able to keep all the information needed by BGP in
21 * the primary routing table, therefore no complex data structures like a
22 * central BGP table are needed. This increases memory footprint of a BGP router
23 * with many connections, but not too much and, which is more important, it
24 * makes BGP much easier to implement.
26 * Each instance of BGP (corresponding to a single BGP peer) is described by a
27 * &bgp_proto structure to which are attached individual connections represented
28 * by &bgp_connection (usually, there exists only one connection, but during BGP
29 * session setup, there can be more of them). The connections are handled
30 * according to the BGP state machine defined in the RFC with all the timers and
31 * all the parameters configurable.
33 * In incoming direction, we listen on the connection's socket and each time we
34 * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
35 * markers and passes complete packets to bgp_rx_packet() which distributes the
36 * packet according to its type.
38 * In outgoing direction, we gather all the routing updates and sort them to
39 * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
40 * fast comparison of &rta's and a &fib which helps us to find if we already
41 * have another route for the same destination queued for sending, so that we
42 * can replace it with the new one immediately instead of sending both
43 * updates). There also exists a special bucket holding all the route
44 * withdrawals which cannot be queued anywhere else as they don't have any
45 * attributes. If we have any packet to send (due to either new routes or the
46 * connection tracking code wanting to send a Open, Keepalive or Notification
47 * message), we call bgp_schedule_packet() which sets the corresponding bit in a
48 * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
49 * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
50 * packet type bits and calls the corresponding bgp_create_xx() functions,
51 * eventually rescheduling the same packet type if we have more data of the same
54 * The processing of attributes consists of two functions: bgp_decode_attrs()
55 * for checking of the attribute blocks and translating them to the language of
56 * BIRD's extended attributes and bgp_encode_attrs() which does the
57 * converse. Both functions are built around a @bgp_attr_table array describing
58 * all important characteristics of all known attributes. Unknown transitive
59 * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
61 * BGP protocol implements graceful restart in both restarting (local restart)
62 * and receiving (neighbor restart) roles. The first is handled mostly by the
63 * graceful restart code in the nest, BGP protocol just handles capabilities,
64 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
65 * The second is implemented by internal restart of the BGP state to %BS_IDLE
66 * and protocol state to %PS_START, but keeping the protocol up from the core
67 * point of view and therefore maintaining received routes. Routing table
68 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
69 * stale routes after reestablishment of BGP session during graceful restart.
71 * Supported standards:
72 * RFC 4271 - Border Gateway Protocol 4 (BGP)
73 * RFC 1997 - BGP Communities Attribute
74 * RFC 2385 - Protection of BGP Sessions via TCP MD5 Signature
75 * RFC 2545 - Use of BGP Multiprotocol Extensions for IPv6
76 * RFC 2918 - Route Refresh Capability
77 * RFC 3107 - Carrying Label Information in BGP
78 * RFC 4360 - BGP Extended Communities Attribute
79 * RFC 4364 - BGP/MPLS IPv4 Virtual Private Networks
80 * RFC 4456 - BGP Route Reflection
81 * RFC 4486 - Subcodes for BGP Cease Notification Message
82 * RFC 4659 - BGP/MPLS IPv6 Virtual Private Networks
83 * RFC 4724 - Graceful Restart Mechanism for BGP
84 * RFC 4760 - Multiprotocol extensions for BGP
85 * RFC 4798 - Connecting IPv6 Islands over IPv4 MPLS
86 * RFC 5065 - AS confederations for BGP
87 * RFC 5082 - Generalized TTL Security Mechanism
88 * RFC 5492 - Capabilities Advertisement with BGP
89 * RFC 5549 - Advertising IPv4 NLRI with an IPv6 Next Hop
90 * RFC 5575 - Dissemination of Flow Specification Rules
91 * RFC 5668 - 4-Octet AS Specific BGP Extended Community
92 * RFC 6286 - AS-Wide Unique BGP Identifier
93 * RFC 6608 - Subcodes for BGP Finite State Machine Error
94 * RFC 6793 - BGP Support for 4-Octet AS Numbers
95 * RFC 7311 - Accumulated IGP Metric Attribute for BGP
96 * RFC 7313 - Enhanced Route Refresh Capability for BGP
97 * RFC 7606 - Revised Error Handling for BGP UPDATE Messages
98 * RFC 7911 - Advertisement of Multiple Paths in BGP
99 * RFC 7947 - Internet Exchange BGP Route Server
100 * RFC 8092 - BGP Large Communities Attribute
101 * RFC 8203 - BGP Administrative Shutdown Communication
102 * RFC 8212 - Default EBGP Route Propagation Behavior without Policies
103 * RFC 8654 - Extended Message Support for BGP
104 * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications
105 * RFC 9234 - Route Leak Prevention and Detection Using Roles
106 * draft-ietf-idr-ext-opt-param-07
107 * draft-uttaro-idr-bgp-persistence-04
108 * draft-walton-bgp-hostname-capability-02
115 #include "nest/bird.h"
116 #include "nest/iface.h"
117 #include "nest/protocol.h"
118 #include "nest/route.h"
119 #include "nest/cli.h"
120 #include "nest/locks.h"
121 #include "conf/conf.h"
122 #include "filter/filter.h"
123 #include "lib/socket.h"
124 #include "lib/resource.h"
125 #include "lib/string.h"
130 static list
STATIC_LIST_INIT(bgp_sockets
); /* Global list of listening sockets */
133 static void bgp_connect(struct bgp_proto
*p
);
134 static void bgp_active(struct bgp_proto
*p
);
135 static void bgp_setup_conn(struct bgp_proto
*p
, struct bgp_conn
*conn
);
136 static void bgp_setup_sk(struct bgp_conn
*conn
, sock
*s
);
137 static void bgp_send_open(struct bgp_conn
*conn
);
138 static void bgp_update_bfd(struct bgp_proto
*p
, const struct bfd_options
*bfd
);
140 static int bgp_incoming_connection(sock
*sk
, uint dummy UNUSED
);
141 static void bgp_listen_sock_err(sock
*sk UNUSED
, int err
);
144 * bgp_open - open a BGP instance
147 * This function allocates and configures shared BGP resources, mainly listening
148 * sockets. Should be called as the last step during initialization (when lock
149 * is acquired and neighbor is ready). When error, caller should change state to
150 * PS_DOWN and return immediately.
153 bgp_open(struct bgp_proto
*p
)
155 struct bgp_socket
*bs
= NULL
;
156 struct iface
*ifa
= p
->cf
->strict_bind
? p
->cf
->iface
: NULL
;
157 ip_addr addr
= p
->cf
->strict_bind
? p
->cf
->local_ip
:
158 (p
->ipv4
? IPA_NONE4
: IPA_NONE6
);
159 uint port
= p
->cf
->local_port
;
160 uint flags
= p
->cf
->free_bind
? SKF_FREEBIND
: 0;
161 uint flag_mask
= SKF_FREEBIND
;
163 /* We assume that cf->iface is defined iff cf->local_ip is link-local */
165 WALK_LIST(bs
, bgp_sockets
)
166 if (ipa_equal(bs
->sk
->saddr
, addr
) &&
167 (bs
->sk
->sport
== port
) &&
168 (bs
->sk
->iface
== ifa
) &&
169 (bs
->sk
->vrf
== p
->p
.vrf
) &&
170 ((bs
->sk
->flags
& flag_mask
) == flags
))
177 sock
*sk
= sk_new(proto_pool
);
178 sk
->type
= SK_TCP_PASSIVE
;
185 sk
->tos
= IP_PREC_INTERNET_CONTROL
;
186 sk
->rbsize
= BGP_RX_BUFFER_SIZE
;
187 sk
->tbsize
= BGP_TX_BUFFER_SIZE
;
188 sk
->rx_hook
= bgp_incoming_connection
;
189 sk
->err_hook
= bgp_listen_sock_err
;
194 bs
= mb_allocz(proto_pool
, sizeof(struct bgp_socket
));
200 add_tail(&bgp_sockets
, &bs
->n
);
205 sk_log_error(sk
, p
->p
.name
);
206 log(L_ERR
"%s: Cannot open listening socket", p
->p
.name
);
212 * bgp_close - close a BGP instance
215 * This function frees and deconfigures shared BGP resources.
218 bgp_close(struct bgp_proto
*p
)
220 struct bgp_socket
*bs
= p
->sock
;
222 ASSERT(bs
&& bs
->uc
);
233 bgp_setup_auth(struct bgp_proto
*p
, int enable
)
237 ip_addr prefix
= p
->cf
->remote_ip
;
240 if (p
->cf
->remote_range
)
242 prefix
= net_prefix(p
->cf
->remote_range
);
243 pxlen
= net_pxlen(p
->cf
->remote_range
);
246 int rv
= sk_set_md5_auth(p
->sock
->sk
,
247 p
->cf
->local_ip
, prefix
, pxlen
, p
->cf
->iface
,
248 enable
? p
->cf
->password
: NULL
, p
->cf
->setkey
);
251 sk_log_error(p
->sock
->sk
, p
->p
.name
);
259 static inline struct bgp_channel
*
260 bgp_find_channel(struct bgp_proto
*p
, u32 afi
)
262 struct bgp_channel
*c
;
263 BGP_WALK_CHANNELS(p
, c
)
271 bgp_startup(struct bgp_proto
*p
)
273 BGP_TRACE(D_EVENTS
, "Started");
274 p
->start_state
= BSS_CONNECT
;
281 /* Apply postponed incoming connection */
282 bgp_setup_conn(p
, &p
->incoming_conn
);
283 bgp_setup_sk(&p
->incoming_conn
, p
->postponed_sk
);
284 bgp_send_open(&p
->incoming_conn
);
285 p
->postponed_sk
= NULL
;
290 bgp_startup_timeout(timer
*t
)
292 bgp_startup(t
->data
);
297 bgp_initiate(struct bgp_proto
*p
)
302 { err_val
= BEM_NO_SOCKET
; goto err1
; }
304 if (bgp_setup_auth(p
, 1) < 0)
305 { err_val
= BEM_INVALID_MD5
; goto err2
; }
308 bgp_update_bfd(p
, p
->cf
->bfd
);
310 if (p
->startup_delay
)
312 p
->start_state
= BSS_DELAY
;
313 BGP_TRACE(D_EVENTS
, "Startup delayed by %d seconds due to errors", p
->startup_delay
);
314 bgp_start_timer(p
->startup_timer
, p
->startup_delay
);
325 bgp_store_error(p
, NULL
, BE_MISC
, err_val
);
328 proto_notify_state(&p
->p
, PS_DOWN
);
334 * bgp_start_timer - start a BGP timer
336 * @value: time (in seconds) to fire (0 to disable the timer)
338 * This functions calls tm_start() on @t with time @value and the amount of
339 * randomization suggested by the BGP standard. Please use it for all BGP
343 bgp_start_timer(timer
*t
, uint value
)
347 /* The randomization procedure is specified in RFC 4271 section 10 */
348 btime time
= value S
;
349 btime randomize
= random() % ((time
/ 4) + 1);
350 tm_start(t
, time
- randomize
);
357 * bgp_close_conn - close a BGP connection
358 * @conn: connection to close
360 * This function takes a connection described by the &bgp_conn structure, closes
361 * its socket and frees all resources associated with it.
364 bgp_close_conn(struct bgp_conn
*conn
)
366 // struct bgp_proto *p = conn->bgp;
368 DBG("BGP: Closing connection\n");
369 conn
->packets_to_send
= 0;
370 conn
->channels_to_send
= 0;
371 rfree(conn
->connect_timer
);
372 conn
->connect_timer
= NULL
;
373 rfree(conn
->keepalive_timer
);
374 conn
->keepalive_timer
= NULL
;
375 rfree(conn
->hold_timer
);
376 conn
->hold_timer
= NULL
;
382 mb_free(conn
->local_caps
);
383 conn
->local_caps
= NULL
;
384 mb_free(conn
->remote_caps
);
385 conn
->remote_caps
= NULL
;
390 * bgp_update_startup_delay - update a startup delay
393 * This function updates a startup delay that is used to postpone next BGP
394 * connect. It also handles disable_after_error and might stop BGP instance
395 * when error happened and disable_after_error is on.
397 * It should be called when BGP protocol error happened.
400 bgp_update_startup_delay(struct bgp_proto
*p
)
402 const struct bgp_config
*cf
= p
->cf
;
404 DBG("BGP: Updating startup delay\n");
406 if (p
->last_proto_error
&& ((current_time() - p
->last_proto_error
) >= cf
->error_amnesia_time S
))
407 p
->startup_delay
= 0;
409 p
->last_proto_error
= current_time();
411 if (cf
->disable_after_error
)
413 p
->startup_delay
= 0;
418 if (!p
->startup_delay
)
419 p
->startup_delay
= cf
->error_delay_time_min
;
421 p
->startup_delay
= MIN(2 * p
->startup_delay
, cf
->error_delay_time_max
);
425 bgp_graceful_close_conn(struct bgp_conn
*conn
, int subcode
, byte
*data
, uint len
)
435 bgp_conn_enter_idle_state(conn
);
443 bgp_conn_enter_close_state(conn
);
444 bgp_schedule_packet(conn
, NULL
, PKT_SCHEDULE_CLOSE
);
447 bgp_error(conn
, 6, subcode
, data
, len
);
451 bug("bgp_graceful_close_conn: Unknown state %d", conn
->state
);
456 bgp_down(struct bgp_proto
*p
)
458 if (p
->start_state
> BSS_PREPARE
)
460 bgp_setup_auth(p
, 0);
466 BGP_TRACE(D_EVENTS
, "Down");
467 proto_notify_state(&p
->p
, PS_DOWN
);
471 bgp_decision(void *vp
)
473 struct bgp_proto
*p
= vp
;
475 DBG("BGP: Decision start\n");
476 if ((p
->p
.proto_state
== PS_START
) &&
477 (p
->outgoing_conn
.state
== BS_IDLE
) &&
478 (p
->incoming_conn
.state
!= BS_OPENCONFIRM
) &&
482 if ((p
->p
.proto_state
== PS_STOP
) &&
483 (p
->outgoing_conn
.state
== BS_IDLE
) &&
484 (p
->incoming_conn
.state
== BS_IDLE
))
488 static struct bgp_proto
*
489 bgp_spawn(struct bgp_proto
*pp
, ip_addr remote_ip
)
492 char fmt
[SYM_MAX_LEN
];
494 bsprintf(fmt
, "%s%%0%dd", pp
->cf
->dynamic_name
, pp
->cf
->dynamic_name_digits
);
496 /* This is hack, we would like to share config, but we need to copy it now */
498 cfg_mem
= config
->mem
;
499 conf_this_scope
= config
->root_scope
;
500 sym
= cf_default_name(fmt
, &(pp
->dynamic_name_counter
));
501 proto_clone_config(sym
, pp
->p
.cf
);
505 /* Just pass remote_ip to bgp_init() */
506 ((struct bgp_config
*) sym
->proto
)->remote_ip
= remote_ip
;
508 return (void *) proto_spawn(sym
->proto
, 0);
512 bgp_stop(struct bgp_proto
*p
, int subcode
, byte
*data
, uint len
)
514 proto_notify_state(&p
->p
, PS_STOP
);
515 bgp_graceful_close_conn(&p
->outgoing_conn
, subcode
, data
, len
);
516 bgp_graceful_close_conn(&p
->incoming_conn
, subcode
, data
, len
);
517 ev_schedule(p
->event
);
521 bgp_conn_set_state(struct bgp_conn
*conn
, uint new_state
)
523 if (conn
->bgp
->p
.mrtdump
& MD_STATES
)
524 bgp_dump_state_change(conn
, conn
->state
, new_state
);
526 conn
->state
= new_state
;
530 bgp_conn_enter_openconfirm_state(struct bgp_conn
*conn
)
532 /* Really, most of the work is done in bgp_rx_open(). */
533 bgp_conn_set_state(conn
, BS_OPENCONFIRM
);
536 static const struct bgp_af_caps dummy_af_caps
= { };
537 static const struct bgp_af_caps basic_af_caps
= { .ready
= 1 };
540 bgp_conn_enter_established_state(struct bgp_conn
*conn
)
542 struct bgp_proto
*p
= conn
->bgp
;
543 struct bgp_caps
*local
= conn
->local_caps
;
544 struct bgp_caps
*peer
= conn
->remote_caps
;
545 struct bgp_channel
*c
;
547 BGP_TRACE(D_EVENTS
, "BGP session established");
548 p
->last_established
= current_time();
549 p
->stats
.fsm_established_transitions
++;
551 /* For multi-hop BGP sessions */
552 if (ipa_zero(p
->local_ip
))
553 p
->local_ip
= conn
->sk
->saddr
;
555 /* For promiscuous sessions */
557 p
->remote_as
= conn
->received_as
;
559 /* In case of LLv6 is not valid during BGP start */
560 if (ipa_zero(p
->link_addr
) && p
->neigh
&& p
->neigh
->iface
&& p
->neigh
->iface
->llv6
)
561 p
->link_addr
= p
->neigh
->iface
->llv6
->ip
;
563 conn
->sk
->fast_rx
= 0;
566 p
->last_error_class
= 0;
567 p
->last_error_code
= 0;
569 p
->as4_session
= conn
->as4_session
;
571 p
->route_refresh
= peer
->route_refresh
;
572 p
->enhanced_refresh
= local
->enhanced_refresh
&& peer
->enhanced_refresh
;
574 /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
575 p
->gr_ready
= p
->llgr_ready
= 0; /* Updated later */
577 /* Whether peer is ready to handle our GR recovery */
578 int peer_gr_ready
= peer
->gr_aware
&& !(peer
->gr_flags
& BGP_GRF_RESTART
);
580 if (p
->gr_active_num
)
581 tm_stop(p
->gr_timer
);
583 /* Number of active channels */
586 /* Summary state of ADD_PATH RX for active channels */
587 uint summary_add_path_rx
= 0;
589 BGP_WALK_CHANNELS(p
, c
)
591 const struct bgp_af_caps
*loc
= bgp_find_af_caps(local
, c
->afi
);
592 const struct bgp_af_caps
*rem
= bgp_find_af_caps(peer
, c
->afi
);
594 /* Use default if capabilities were not announced */
595 if (!local
->length
&& (c
->afi
== BGP_AF_IPV4
))
596 loc
= &basic_af_caps
;
598 if (!peer
->length
&& (c
->afi
== BGP_AF_IPV4
))
599 rem
= &basic_af_caps
;
601 /* Ignore AFIs that were not announced in multiprotocol capability */
602 if (!loc
|| !loc
->ready
)
603 loc
= &dummy_af_caps
;
605 if (!rem
|| !rem
->ready
)
606 rem
= &dummy_af_caps
;
608 int active
= loc
->ready
&& rem
->ready
;
609 c
->c
.disabled
= !active
;
610 c
->c
.reloadable
= p
->route_refresh
|| c
->cf
->import_table
;
612 c
->index
= active
? num
++ : 0;
614 c
->feed_state
= BFS_NONE
;
615 c
->load_state
= BFS_NONE
;
617 /* Channels where peer may do GR */
618 uint gr_ready
= active
&& local
->gr_aware
&& rem
->gr_able
;
619 uint llgr_ready
= active
&& local
->llgr_aware
&& rem
->llgr_able
;
621 c
->gr_ready
= gr_ready
|| llgr_ready
;
622 p
->gr_ready
= p
->gr_ready
|| c
->gr_ready
;
623 p
->llgr_ready
= p
->llgr_ready
|| llgr_ready
;
625 /* Remember last LLGR stale time */
626 c
->stale_time
= local
->llgr_aware
? rem
->llgr_time
: 0;
628 /* Channels not able to recover gracefully */
629 if (p
->p
.gr_recovery
&& (!active
|| !peer_gr_ready
))
630 channel_graceful_restart_unlock(&c
->c
);
632 /* Channels waiting for local convergence */
633 if (p
->p
.gr_recovery
&& loc
->gr_able
&& peer_gr_ready
)
636 /* Channels where regular graceful restart failed */
637 if ((c
->gr_active
== BGP_GRS_ACTIVE
) &&
638 !(active
&& rem
->gr_able
&& (rem
->gr_af_flags
& BGP_GRF_FORWARDING
)))
639 bgp_graceful_restart_done(c
);
641 /* Channels where regular long-lived restart failed */
642 if ((c
->gr_active
== BGP_GRS_LLGR
) &&
643 !(active
&& rem
->llgr_able
&& (rem
->gr_af_flags
& BGP_LLGRF_FORWARDING
)))
644 bgp_graceful_restart_done(c
);
646 /* GR capability implies that neighbor will send End-of-RIB */
648 c
->load_state
= BFS_LOADING
;
650 c
->ext_next_hop
= c
->cf
->ext_next_hop
&& (bgp_channel_is_ipv6(c
) || rem
->ext_next_hop
);
651 c
->add_path_rx
= (loc
->add_path
& BGP_ADD_PATH_RX
) && (rem
->add_path
& BGP_ADD_PATH_TX
);
652 c
->add_path_tx
= (loc
->add_path
& BGP_ADD_PATH_TX
) && (rem
->add_path
& BGP_ADD_PATH_RX
);
655 summary_add_path_rx
|= !c
->add_path_rx
? 1 : 2;
659 c
->c
.ra_mode
= RA_ANY
;
660 else if (c
->cf
->secondary
)
661 c
->c
.ra_mode
= RA_ACCEPTED
;
663 c
->c
.ra_mode
= RA_OPTIMAL
;
666 p
->afi_map
= mb_alloc(p
->p
.pool
, num
* sizeof(u32
));
667 p
->channel_map
= mb_alloc(p
->p
.pool
, num
* sizeof(void *));
668 p
->channel_count
= num
;
669 p
->summary_add_path_rx
= summary_add_path_rx
;
671 BGP_WALK_CHANNELS(p
, c
)
676 p
->afi_map
[c
->index
] = c
->afi
;
677 p
->channel_map
[c
->index
] = c
;
680 /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
682 bgp_conn_set_state(conn
, BS_ESTABLISHED
);
683 proto_notify_state(&p
->p
, PS_UP
);
687 bgp_conn_leave_established_state(struct bgp_proto
*p
)
689 BGP_TRACE(D_EVENTS
, "BGP session closed");
690 p
->last_established
= current_time();
693 if (p
->p
.proto_state
== PS_UP
)
694 bgp_stop(p
, 0, NULL
, 0);
698 bgp_conn_enter_close_state(struct bgp_conn
*conn
)
700 struct bgp_proto
*p
= conn
->bgp
;
701 int os
= conn
->state
;
703 bgp_conn_set_state(conn
, BS_CLOSE
);
704 tm_stop(conn
->keepalive_timer
);
705 conn
->sk
->rx_hook
= NULL
;
707 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
708 bgp_start_timer(conn
->hold_timer
, 10);
710 if (os
== BS_ESTABLISHED
)
711 bgp_conn_leave_established_state(p
);
715 bgp_conn_enter_idle_state(struct bgp_conn
*conn
)
717 struct bgp_proto
*p
= conn
->bgp
;
718 int os
= conn
->state
;
720 bgp_close_conn(conn
);
721 bgp_conn_set_state(conn
, BS_IDLE
);
722 ev_schedule(p
->event
);
724 if (os
== BS_ESTABLISHED
)
725 bgp_conn_leave_established_state(p
);
729 * bgp_handle_graceful_restart - handle detected BGP graceful restart
732 * This function is called when a BGP graceful restart of the neighbor is
733 * detected (when the TCP connection fails or when a new TCP connection
734 * appears). The function activates processing of the restart - starts routing
735 * table refresh cycle and activates BGP restart timer. The protocol state goes
736 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
740 bgp_handle_graceful_restart(struct bgp_proto
*p
)
742 ASSERT(p
->conn
&& (p
->conn
->state
== BS_ESTABLISHED
) && p
->gr_ready
);
744 BGP_TRACE(D_EVENTS
, "Neighbor graceful restart detected%s",
745 p
->gr_active_num
? " - already pending" : "");
747 p
->gr_active_num
= 0;
749 struct bgp_channel
*c
;
750 BGP_WALK_CHANNELS(p
, c
)
752 /* FIXME: perhaps check for channel state instead of disabled flag? */
760 switch (c
->gr_active
)
763 c
->gr_active
= BGP_GRS_ACTIVE
;
764 rt_refresh_begin(c
->c
.table
, &c
->c
);
768 rt_refresh_end(c
->c
.table
, &c
->c
);
769 rt_refresh_begin(c
->c
.table
, &c
->c
);
773 rt_refresh_begin(c
->c
.table
, &c
->c
);
774 rt_modify_stale(c
->c
.table
, &c
->c
);
780 /* Just flush the routes */
781 rt_refresh_begin(c
->c
.table
, &c
->c
);
782 rt_refresh_end(c
->c
.table
, &c
->c
);
785 /* Reset bucket and prefix tables */
786 bgp_free_bucket_table(c
);
787 bgp_free_prefix_table(c
);
788 bgp_init_bucket_table(c
);
789 bgp_init_prefix_table(c
);
790 c
->packets_to_send
= 0;
793 /* p->gr_ready -> at least one active channel is c->gr_ready */
794 ASSERT(p
->gr_active_num
> 0);
796 proto_notify_state(&p
->p
, PS_START
);
797 tm_start(p
->gr_timer
, p
->conn
->remote_caps
->gr_time S
);
801 * bgp_graceful_restart_done - finish active BGP graceful restart
804 * This function is called when the active BGP graceful restart of the neighbor
805 * should be finished for channel @c - either successfully (the neighbor sends
806 * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
807 * unsuccessfully (the neighbor does not support BGP graceful restart on the new
808 * session). The function ends the routing table refresh cycle.
811 bgp_graceful_restart_done(struct bgp_channel
*c
)
813 struct bgp_proto
*p
= (void *) c
->c
.proto
;
815 ASSERT(c
->gr_active
);
819 if (!p
->gr_active_num
)
820 BGP_TRACE(D_EVENTS
, "Neighbor graceful restart done");
822 tm_stop(c
->stale_timer
);
823 rt_refresh_end(c
->c
.table
, &c
->c
);
827 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
830 * This function is a timeout hook for @gr_timer, implementing BGP restart time
831 * limit for reestablisment of the BGP session after the graceful restart. When
832 * fired, we just proceed with the usual protocol restart.
836 bgp_graceful_restart_timeout(timer
*t
)
838 struct bgp_proto
*p
= t
->data
;
840 BGP_TRACE(D_EVENTS
, "Neighbor graceful restart timeout");
844 struct bgp_channel
*c
;
845 BGP_WALK_CHANNELS(p
, c
)
847 /* Channel is not in GR and is already flushed */
851 /* Channel is already in LLGR from past restart */
852 if (c
->gr_active
== BGP_GRS_LLGR
)
855 /* Channel is in GR, but does not support LLGR -> stop GR */
858 bgp_graceful_restart_done(c
);
862 /* Channel is in GR, and supports LLGR -> start LLGR */
863 c
->gr_active
= BGP_GRS_LLGR
;
864 tm_start(c
->stale_timer
, c
->stale_time S
);
865 rt_modify_stale(c
->c
.table
, &c
->c
);
869 bgp_stop(p
, 0, NULL
, 0);
873 bgp_long_lived_stale_timeout(timer
*t
)
875 struct bgp_channel
*c
= t
->data
;
876 struct bgp_proto
*p
= (void *) c
->c
.proto
;
878 BGP_TRACE(D_EVENTS
, "Long-lived stale timeout");
880 bgp_graceful_restart_done(c
);
885 * bgp_refresh_begin - start incoming enhanced route refresh sequence
888 * This function is called when an incoming enhanced route refresh sequence is
889 * started by the neighbor, demarcated by the BoRR packet. The function updates
890 * the load state and starts the routing table refresh cycle. Note that graceful
891 * restart also uses routing table refresh cycle, but RFC 7313 and load states
892 * ensure that these two sequences do not overlap.
895 bgp_refresh_begin(struct bgp_channel
*c
)
897 struct bgp_proto
*p
= (void *) c
->c
.proto
;
899 if (c
->load_state
== BFS_LOADING
)
900 { log(L_WARN
"%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p
->p
.name
); return; }
902 c
->load_state
= BFS_REFRESHING
;
903 rt_refresh_begin(c
->c
.table
, &c
->c
);
906 rt_refresh_begin(c
->c
.in_table
, &c
->c
);
910 * bgp_refresh_end - finish incoming enhanced route refresh sequence
913 * This function is called when an incoming enhanced route refresh sequence is
914 * finished by the neighbor, demarcated by the EoRR packet. The function updates
915 * the load state and ends the routing table refresh cycle. Routes not received
916 * during the sequence are removed by the nest.
919 bgp_refresh_end(struct bgp_channel
*c
)
921 struct bgp_proto
*p
= (void *) c
->c
.proto
;
923 if (c
->load_state
!= BFS_REFRESHING
)
924 { log(L_WARN
"%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p
->p
.name
); return; }
926 c
->load_state
= BFS_NONE
;
927 rt_refresh_end(c
->c
.table
, &c
->c
);
930 rt_prune_sync(c
->c
.in_table
, 0);
935 bgp_send_open(struct bgp_conn
*conn
)
937 DBG("BGP: Sending open\n");
938 conn
->sk
->rx_hook
= bgp_rx
;
939 conn
->sk
->tx_hook
= bgp_tx
;
940 tm_stop(conn
->connect_timer
);
941 bgp_prepare_capabilities(conn
);
942 bgp_schedule_packet(conn
, NULL
, PKT_OPEN
);
943 bgp_conn_set_state(conn
, BS_OPENSENT
);
944 bgp_start_timer(conn
->hold_timer
, conn
->bgp
->cf
->initial_hold_time
);
948 bgp_connected(sock
*sk
)
950 struct bgp_conn
*conn
= sk
->data
;
951 struct bgp_proto
*p
= conn
->bgp
;
953 BGP_TRACE(D_EVENTS
, "Connected");
958 bgp_connect_timeout(timer
*t
)
960 struct bgp_conn
*conn
= t
->data
;
961 struct bgp_proto
*p
= conn
->bgp
;
963 DBG("BGP: connect_timeout\n");
964 if (p
->p
.proto_state
== PS_START
)
966 bgp_close_conn(conn
);
970 bgp_conn_enter_idle_state(conn
);
974 bgp_sock_err(sock
*sk
, int err
)
976 struct bgp_conn
*conn
= sk
->data
;
977 struct bgp_proto
*p
= conn
->bgp
;
980 * This error hook may be called either asynchronously from main
981 * loop, or synchronously from sk_send(). But sk_send() is called
982 * only from bgp_tx() and bgp_kick_tx(), which are both called
983 * asynchronously from main loop. Moreover, they end if err hook is
984 * called. Therefore, we could suppose that it is always called
988 bgp_store_error(p
, conn
, BE_SOCKET
, err
);
991 BGP_TRACE(D_EVENTS
, "Connection lost (%M)", err
);
993 BGP_TRACE(D_EVENTS
, "Connection closed");
995 if ((conn
->state
== BS_ESTABLISHED
) && p
->gr_ready
)
996 bgp_handle_graceful_restart(p
);
998 bgp_conn_enter_idle_state(conn
);
1002 bgp_hold_timeout(timer
*t
)
1004 struct bgp_conn
*conn
= t
->data
;
1005 struct bgp_proto
*p
= conn
->bgp
;
1007 DBG("BGP: Hold timeout\n");
1009 /* We are already closing the connection - just do hangup */
1010 if (conn
->state
== BS_CLOSE
)
1012 BGP_TRACE(D_EVENTS
, "Connection stalled");
1013 bgp_conn_enter_idle_state(conn
);
1017 /* If there is something in input queue, we are probably congested
1018 and perhaps just not processed BGP packets in time. */
1020 if (sk_rx_ready(conn
->sk
) > 0)
1021 bgp_start_timer(conn
->hold_timer
, 10);
1022 else if ((conn
->state
== BS_ESTABLISHED
) && p
->llgr_ready
)
1024 BGP_TRACE(D_EVENTS
, "Hold timer expired");
1025 bgp_handle_graceful_restart(p
);
1026 bgp_conn_enter_idle_state(conn
);
1029 bgp_error(conn
, 4, 0, NULL
, 0);
1033 bgp_keepalive_timeout(timer
*t
)
1035 struct bgp_conn
*conn
= t
->data
;
1037 DBG("BGP: Keepalive timer\n");
1038 bgp_schedule_packet(conn
, NULL
, PKT_KEEPALIVE
);
1040 /* Kick TX a bit faster */
1041 if (ev_active(conn
->tx_ev
))
1042 ev_run(conn
->tx_ev
);
1046 bgp_setup_conn(struct bgp_proto
*p
, struct bgp_conn
*conn
)
1051 conn
->packets_to_send
= 0;
1052 conn
->channels_to_send
= 0;
1053 conn
->last_channel
= 0;
1054 conn
->last_channel_count
= 0;
1056 conn
->connect_timer
= tm_new_init(p
->p
.pool
, bgp_connect_timeout
, conn
, 0, 0);
1057 conn
->hold_timer
= tm_new_init(p
->p
.pool
, bgp_hold_timeout
, conn
, 0, 0);
1058 conn
->keepalive_timer
= tm_new_init(p
->p
.pool
, bgp_keepalive_timeout
, conn
, 0, 0);
1060 conn
->tx_ev
= ev_new_init(p
->p
.pool
, bgp_kick_tx
, conn
);
1064 bgp_setup_sk(struct bgp_conn
*conn
, sock
*s
)
1067 s
->err_hook
= bgp_sock_err
;
1073 bgp_active(struct bgp_proto
*p
)
1075 int delay
= MAX(1, p
->cf
->connect_delay_time
);
1076 struct bgp_conn
*conn
= &p
->outgoing_conn
;
1078 BGP_TRACE(D_EVENTS
, "Connect delayed by %d seconds", delay
);
1079 bgp_setup_conn(p
, conn
);
1080 bgp_conn_set_state(conn
, BS_ACTIVE
);
1081 bgp_start_timer(conn
->connect_timer
, delay
);
1085 * bgp_connect - initiate an outgoing connection
1088 * The bgp_connect() function creates a new &bgp_conn and initiates
1089 * a TCP connection to the peer. The rest of connection setup is governed
1090 * by the BGP state machine as described in the standard.
1093 bgp_connect(struct bgp_proto
*p
) /* Enter Connect state and start establishing connection */
1095 struct bgp_conn
*conn
= &p
->outgoing_conn
;
1096 int hops
= p
->cf
->multihop
? : 1;
1098 DBG("BGP: Connecting\n");
1099 sock
*s
= sk_new(p
->p
.pool
);
1100 s
->type
= SK_TCP_ACTIVE
;
1101 s
->saddr
= p
->local_ip
;
1102 s
->daddr
= p
->remote_ip
;
1103 s
->dport
= p
->cf
->remote_port
;
1104 s
->iface
= p
->neigh
? p
->neigh
->iface
: NULL
;
1106 s
->ttl
= p
->cf
->ttl_security
? 255 : hops
;
1107 s
->rbsize
= p
->cf
->enable_extended_messages
? BGP_RX_BUFFER_EXT_SIZE
: BGP_RX_BUFFER_SIZE
;
1108 s
->tbsize
= p
->cf
->enable_extended_messages
? BGP_TX_BUFFER_EXT_SIZE
: BGP_TX_BUFFER_SIZE
;
1109 s
->tos
= IP_PREC_INTERNET_CONTROL
;
1110 s
->password
= p
->cf
->password
;
1111 s
->tx_hook
= bgp_connected
;
1112 BGP_TRACE(D_EVENTS
, "Connecting to %I%J from local address %I%J",
1113 s
->daddr
, ipa_is_link_local(s
->daddr
) ? p
->cf
->iface
: NULL
,
1114 s
->saddr
, ipa_is_link_local(s
->saddr
) ? s
->iface
: NULL
);
1115 bgp_setup_conn(p
, conn
);
1116 bgp_setup_sk(conn
, s
);
1117 bgp_conn_set_state(conn
, BS_CONNECT
);
1122 /* Set minimal receive TTL if needed */
1123 if (p
->cf
->ttl_security
)
1124 if (sk_set_min_ttl(s
, 256 - hops
) < 0)
1127 DBG("BGP: Waiting for connect success\n");
1128 bgp_start_timer(conn
->connect_timer
, p
->cf
->connect_retry_time
);
1132 sk_log_error(s
, p
->p
.name
);
1137 static inline int bgp_is_dynamic(struct bgp_proto
*p
)
1138 { return ipa_zero(p
->remote_ip
); }
1141 * bgp_find_proto - find existing proto for incoming connection
1145 static struct bgp_proto
*
1146 bgp_find_proto(sock
*sk
)
1148 struct bgp_proto
*best
= NULL
;
1149 struct bgp_proto
*p
;
1151 /* sk->iface is valid only if src or dst address is link-local */
1152 int link
= ipa_is_link_local(sk
->saddr
) || ipa_is_link_local(sk
->daddr
);
1154 WALK_LIST(p
, proto_list
)
1155 if ((p
->p
.proto
== &proto_bgp
) &&
1156 (ipa_equal(p
->remote_ip
, sk
->daddr
) || bgp_is_dynamic(p
)) &&
1157 (!p
->cf
->remote_range
|| ipa_in_netX(sk
->daddr
, p
->cf
->remote_range
)) &&
1158 (p
->p
.vrf
== sk
->vrf
) &&
1159 (p
->cf
->local_port
== sk
->sport
) &&
1160 (!link
|| (p
->cf
->iface
== sk
->iface
)) &&
1161 (ipa_zero(p
->cf
->local_ip
) || ipa_equal(p
->cf
->local_ip
, sk
->saddr
)))
1165 if (!bgp_is_dynamic(p
))
1173 * bgp_incoming_connection - handle an incoming connection
1177 * This function serves as a socket hook for accepting of new BGP
1178 * connections. It searches a BGP instance corresponding to the peer
1179 * which has connected and if such an instance exists, it creates a
1180 * &bgp_conn structure, attaches it to the instance and either sends
1181 * an Open message or (if there already is an active connection) it
1182 * closes the new connection by sending a Notification message.
1185 bgp_incoming_connection(sock
*sk
, uint dummy UNUSED
)
1187 struct bgp_proto
*p
;
1190 DBG("BGP: Incoming connection from %I port %d\n", sk
->daddr
, sk
->dport
);
1191 p
= bgp_find_proto(sk
);
1194 log(L_WARN
"BGP: Unexpected connect from unknown address %I%J (port %d)",
1195 sk
->daddr
, ipa_is_link_local(sk
->daddr
) ? sk
->iface
: NULL
, sk
->dport
);
1201 * BIRD should keep multiple incoming connections in OpenSent state (for
1202 * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
1203 * connections are rejected istead. The exception is the case where an
1204 * incoming connection triggers a graceful restart.
1207 acc
= (p
->p
.proto_state
== PS_START
|| p
->p
.proto_state
== PS_UP
) &&
1208 (p
->start_state
>= BSS_CONNECT
) && (!p
->incoming_conn
.sk
);
1210 if (p
->conn
&& (p
->conn
->state
== BS_ESTABLISHED
) && p
->gr_ready
)
1212 bgp_store_error(p
, NULL
, BE_MISC
, BEM_GRACEFUL_RESTART
);
1213 bgp_handle_graceful_restart(p
);
1214 bgp_conn_enter_idle_state(p
->conn
);
1217 /* There might be separate incoming connection in OpenSent state */
1218 if (p
->incoming_conn
.state
> BS_ACTIVE
)
1219 bgp_close_conn(&p
->incoming_conn
);
1222 BGP_TRACE(D_EVENTS
, "Incoming connection from %I%J (port %d) %s",
1223 sk
->daddr
, ipa_is_link_local(sk
->daddr
) ? sk
->iface
: NULL
,
1224 sk
->dport
, acc
? "accepted" : "rejected");
1232 hops
= p
->cf
->multihop
? : 1;
1234 if (sk_set_ttl(sk
, p
->cf
->ttl_security
? 255 : hops
) < 0)
1237 if (p
->cf
->ttl_security
)
1238 if (sk_set_min_ttl(sk
, 256 - hops
) < 0)
1241 if (p
->cf
->enable_extended_messages
)
1243 sk
->rbsize
= BGP_RX_BUFFER_EXT_SIZE
;
1244 sk
->tbsize
= BGP_TX_BUFFER_EXT_SIZE
;
1248 /* For dynamic BGP, spawn new instance and postpone the socket */
1249 if (bgp_is_dynamic(p
))
1251 p
= bgp_spawn(p
, sk
->daddr
);
1252 p
->postponed_sk
= sk
;
1253 rmove(sk
, p
->p
.pool
);
1257 rmove(sk
, p
->p
.pool
);
1258 bgp_setup_conn(p
, &p
->incoming_conn
);
1259 bgp_setup_sk(&p
->incoming_conn
, sk
);
1260 bgp_send_open(&p
->incoming_conn
);
1264 sk_log_error(sk
, p
->p
.name
);
1265 log(L_ERR
"%s: Incoming connection aborted", p
->p
.name
);
1271 bgp_listen_sock_err(sock
*sk UNUSED
, int err
)
1273 if (err
== ECONNABORTED
)
1274 log(L_WARN
"BGP: Incoming connection aborted");
1276 log(L_ERR
"BGP: Error on listening socket: %M", err
);
1280 bgp_start_neighbor(struct bgp_proto
*p
)
1282 /* Called only for single-hop BGP sessions */
1284 if (ipa_zero(p
->local_ip
))
1285 p
->local_ip
= p
->neigh
->ifa
->ip
;
1287 if (ipa_is_link_local(p
->local_ip
))
1288 p
->link_addr
= p
->local_ip
;
1289 else if (p
->neigh
->iface
->llv6
)
1290 p
->link_addr
= p
->neigh
->iface
->llv6
->ip
;
1296 bgp_neigh_notify(neighbor
*n
)
1298 struct bgp_proto
*p
= (struct bgp_proto
*) n
->proto
;
1299 int ps
= p
->p
.proto_state
;
1304 if ((ps
== PS_DOWN
) || (ps
== PS_STOP
))
1307 int prepare
= (ps
== PS_START
) && (p
->start_state
== BSS_PREPARE
);
1313 BGP_TRACE(D_EVENTS
, "Neighbor lost");
1314 bgp_store_error(p
, NULL
, BE_MISC
, BEM_NEIGHBOR_LOST
);
1315 /* Perhaps also run bgp_update_startup_delay(p)? */
1316 bgp_stop(p
, 0, NULL
, 0);
1319 else if (p
->cf
->check_link
&& !(n
->iface
->flags
& IF_LINK_UP
))
1323 BGP_TRACE(D_EVENTS
, "Link down");
1324 bgp_store_error(p
, NULL
, BE_MISC
, BEM_LINK_DOWN
);
1326 bgp_update_startup_delay(p
);
1327 bgp_stop(p
, 0, NULL
, 0);
1334 BGP_TRACE(D_EVENTS
, "Neighbor ready");
1335 bgp_start_neighbor(p
);
1341 bgp_bfd_notify(struct bfd_request
*req
)
1343 struct bgp_proto
*p
= req
->data
;
1344 int ps
= p
->p
.proto_state
;
1346 if (req
->down
&& ((ps
== PS_START
) || (ps
== PS_UP
)))
1348 BGP_TRACE(D_EVENTS
, "BFD session down");
1349 bgp_store_error(p
, NULL
, BE_MISC
, BEM_BFD_DOWN
);
1351 if (req
->opts
.mode
== BGP_BFD_GRACEFUL
)
1353 /* Trigger graceful restart */
1354 if (p
->conn
&& (p
->conn
->state
== BS_ESTABLISHED
) && p
->gr_ready
)
1355 bgp_handle_graceful_restart(p
);
1357 if (p
->incoming_conn
.state
> BS_IDLE
)
1358 bgp_conn_enter_idle_state(&p
->incoming_conn
);
1360 if (p
->outgoing_conn
.state
> BS_IDLE
)
1361 bgp_conn_enter_idle_state(&p
->outgoing_conn
);
1365 /* Trigger session down */
1367 bgp_update_startup_delay(p
);
1368 bgp_stop(p
, 0, NULL
, 0);
1374 bgp_update_bfd(struct bgp_proto
*p
, const struct bfd_options
*bfd
)
1376 if (bfd
&& p
->bfd_req
)
1377 bfd_update_request(p
->bfd_req
, bfd
);
1379 if (bfd
&& !p
->bfd_req
&& !bgp_is_dynamic(p
))
1380 p
->bfd_req
= bfd_request_session(p
->p
.pool
, p
->remote_ip
, p
->local_ip
,
1381 p
->cf
->multihop
? NULL
: p
->neigh
->iface
,
1382 p
->p
.vrf
, bgp_bfd_notify
, p
, bfd
);
1384 if (!bfd
&& p
->bfd_req
)
1392 bgp_reload_routes(struct channel
*C
)
1394 struct bgp_proto
*p
= (void *) C
->proto
;
1395 struct bgp_channel
*c
= (void *) C
;
1397 /* Ignore non-BGP channels */
1398 if (C
->channel
!= &channel_bgp
)
1401 ASSERT(p
->conn
&& (p
->route_refresh
|| c
->c
.in_table
));
1404 channel_schedule_reload(C
);
1406 bgp_schedule_packet(p
->conn
, c
, PKT_ROUTE_REFRESH
);
1410 bgp_feed_begin(struct channel
*C
, int initial
)
1412 struct bgp_proto
*p
= (void *) C
->proto
;
1413 struct bgp_channel
*c
= (void *) C
;
1415 /* Ignore non-BGP channels */
1416 if (C
->channel
!= &channel_bgp
)
1419 /* This should not happen */
1423 if (initial
&& p
->cf
->gr_mode
)
1424 c
->feed_state
= BFS_LOADING
;
1426 /* It is refeed and both sides support enhanced route refresh */
1427 if (!initial
&& p
->enhanced_refresh
)
1429 /* BoRR must not be sent before End-of-RIB */
1430 if (c
->feed_state
== BFS_LOADING
|| c
->feed_state
== BFS_LOADED
)
1433 c
->feed_state
= BFS_REFRESHING
;
1434 bgp_schedule_packet(p
->conn
, c
, PKT_BEGIN_REFRESH
);
1439 bgp_feed_end(struct channel
*C
)
1441 struct bgp_proto
*p
= (void *) C
->proto
;
1442 struct bgp_channel
*c
= (void *) C
;
1444 /* Ignore non-BGP channels */
1445 if (C
->channel
!= &channel_bgp
)
1448 /* This should not happen */
1452 /* Non-demarcated feed ended, nothing to do */
1453 if (c
->feed_state
== BFS_NONE
)
1456 /* Schedule End-of-RIB packet */
1457 if (c
->feed_state
== BFS_LOADING
)
1458 c
->feed_state
= BFS_LOADED
;
1460 /* Schedule EoRR packet */
1461 if (c
->feed_state
== BFS_REFRESHING
)
1462 c
->feed_state
= BFS_REFRESHED
;
1465 bgp_schedule_packet(p
->conn
, c
, PKT_UPDATE
);
1470 bgp_start_locked(struct object_lock
*lock
)
1472 struct bgp_proto
*p
= lock
->data
;
1473 const struct bgp_config
*cf
= p
->cf
;
1475 if (p
->p
.proto_state
!= PS_START
)
1477 DBG("BGP: Got lock in different state %d\n", p
->p
.proto_state
);
1481 DBG("BGP: Got lock\n");
1483 if (cf
->multihop
|| bgp_is_dynamic(p
))
1485 /* Multi-hop sessions do not use neighbor entries */
1490 neighbor
*n
= neigh_find(&p
->p
, p
->remote_ip
, cf
->iface
, NEF_STICKY
);
1493 log(L_ERR
"%s: Invalid remote address %I%J", p
->p
.name
, p
->remote_ip
, cf
->iface
);
1494 /* As we do not start yet, we can just disable protocol */
1496 bgp_store_error(p
, NULL
, BE_MISC
, BEM_INVALID_NEXT_HOP
);
1497 proto_notify_state(&p
->p
, PS_DOWN
);
1504 BGP_TRACE(D_EVENTS
, "Waiting for %I%J to become my neighbor", p
->remote_ip
, cf
->iface
);
1505 else if (p
->cf
->check_link
&& !(n
->iface
->flags
& IF_LINK_UP
))
1506 BGP_TRACE(D_EVENTS
, "Waiting for link on %s", n
->iface
->name
);
1508 bgp_start_neighbor(p
);
1512 bgp_start(struct proto
*P
)
1514 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1515 const struct bgp_config
*cf
= p
->cf
;
1517 p
->local_ip
= cf
->local_ip
;
1518 p
->local_as
= cf
->local_as
;
1519 p
->remote_as
= cf
->remote_as
;
1520 p
->public_as
= cf
->local_as
;
1522 /* For dynamic BGP childs, remote_ip is already set */
1523 if (ipa_nonzero(cf
->remote_ip
))
1524 p
->remote_ip
= cf
->remote_ip
;
1526 /* Confederation ID is used for truly external peers */
1527 if (p
->cf
->confederation
&& !p
->is_interior
)
1528 p
->public_as
= cf
->confederation
;
1530 p
->passive
= cf
->passive
|| bgp_is_dynamic(p
);
1532 p
->start_state
= BSS_PREPARE
;
1533 p
->outgoing_conn
.state
= BS_IDLE
;
1534 p
->incoming_conn
.state
= BS_IDLE
;
1537 p
->postponed_sk
= NULL
;
1539 p
->gr_active_num
= 0;
1541 /* Reset some stats */
1542 p
->stats
.rx_messages
= p
->stats
.tx_messages
= 0;
1543 p
->stats
.rx_updates
= p
->stats
.tx_updates
= 0;
1544 p
->stats
.rx_bytes
= p
->stats
.tx_bytes
= 0;
1545 p
->last_rx_update
= 0;
1547 p
->event
= ev_new_init(p
->p
.pool
, bgp_decision
, p
);
1548 p
->startup_timer
= tm_new_init(p
->p
.pool
, bgp_startup_timeout
, p
, 0, 0);
1549 p
->gr_timer
= tm_new_init(p
->p
.pool
, bgp_graceful_restart_timeout
, p
, 0, 0);
1551 p
->local_id
= proto_get_router_id(P
->cf
);
1553 p
->rr_cluster_id
= p
->cf
->rr_cluster_id
? p
->cf
->rr_cluster_id
: p
->local_id
;
1556 p
->link_addr
= IPA_NONE
;
1558 /* Lock all channels when in GR recovery mode */
1559 if (p
->p
.gr_recovery
&& p
->cf
->gr_mode
)
1561 struct bgp_channel
*c
;
1562 BGP_WALK_CHANNELS(p
, c
)
1563 channel_graceful_restart_lock(&c
->c
);
1567 * Before attempting to create the connection, we need to lock the port,
1568 * so that we are the only instance attempting to talk with that neighbor.
1570 struct object_lock
*lock
;
1571 lock
= p
->lock
= olock_new(P
->pool
);
1572 lock
->addr
= p
->remote_ip
;
1573 lock
->port
= p
->cf
->remote_port
;
1574 lock
->iface
= p
->cf
->iface
;
1575 lock
->vrf
= p
->cf
->iface
? NULL
: p
->p
.vrf
;
1576 lock
->type
= OBJLOCK_TCP
;
1577 lock
->hook
= bgp_start_locked
;
1580 /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */
1581 if (bgp_is_dynamic(p
))
1583 lock
->addr
= net_prefix(p
->cf
->remote_range
);
1587 olock_acquire(lock
);
1592 extern int proto_restart
;
1595 bgp_shutdown(struct proto
*P
)
1597 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1600 char *message
= NULL
;
1604 BGP_TRACE(D_EVENTS
, "Shutdown requested");
1606 switch (P
->down_code
)
1609 case PDC_CF_DISABLE
:
1610 subcode
= 3; // Errcode 6, 3 - peer de-configured
1613 case PDC_CF_RESTART
:
1614 subcode
= 6; // Errcode 6, 6 - other configuration change
1617 case PDC_CMD_DISABLE
:
1618 case PDC_CMD_SHUTDOWN
:
1620 subcode
= 2; // Errcode 6, 2 - administrative shutdown
1621 message
= P
->message
;
1624 case PDC_CMD_RESTART
:
1625 subcode
= 4; // Errcode 6, 4 - administrative reset
1626 message
= P
->message
;
1629 case PDC_CMD_GR_DOWN
:
1630 if ((p
->cf
->gr_mode
!= BGP_GR_ABLE
) &&
1631 (p
->cf
->llgr_mode
!= BGP_LLGR_ABLE
))
1634 subcode
= -1; // Do not send NOTIFICATION, just close the connection
1637 case PDC_RX_LIMIT_HIT
:
1638 case PDC_IN_LIMIT_HIT
:
1639 subcode
= 1; // Errcode 6, 1 - max number of prefixes reached
1640 /* log message for compatibility */
1641 log(L_WARN
"%s: Route limit exceeded, shutting down", p
->p
.name
);
1644 case PDC_OUT_LIMIT_HIT
:
1645 subcode
= proto_restart
? 4 : 2; // Administrative reset or shutdown
1648 bgp_store_error(p
, NULL
, BE_AUTO_DOWN
, BEA_ROUTE_LIMIT_EXCEEDED
);
1650 bgp_update_startup_delay(p
);
1652 p
->startup_delay
= 0;
1656 bgp_store_error(p
, NULL
, BE_MAN_DOWN
, 0);
1657 p
->startup_delay
= 0;
1659 /* RFC 8203 - shutdown communication */
1662 uint msg_len
= strlen(message
);
1663 msg_len
= MIN(msg_len
, 255);
1665 /* Buffer will be freed automatically by protocol shutdown */
1666 data
= mb_alloc(p
->p
.pool
, msg_len
+ 1);
1670 memcpy(data
+1, message
, msg_len
);
1674 bgp_stop(p
, subcode
, data
, len
);
1675 return p
->p
.proto_state
;
1678 static struct proto
*
1679 bgp_init(struct proto_config
*CF
)
1681 struct proto
*P
= proto_new(CF
);
1682 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1683 struct bgp_config
*cf
= (struct bgp_config
*) CF
;
1685 P
->rt_notify
= bgp_rt_notify
;
1686 P
->preexport
= bgp_preexport
;
1687 P
->neigh_notify
= bgp_neigh_notify
;
1688 P
->reload_routes
= bgp_reload_routes
;
1689 P
->feed_begin
= bgp_feed_begin
;
1690 P
->feed_end
= bgp_feed_end
;
1691 P
->rte_better
= bgp_rte_better
;
1692 P
->rte_mergable
= bgp_rte_mergable
;
1693 P
->rte_recalculate
= cf
->deterministic_med
? bgp_rte_recalculate
: NULL
;
1694 P
->rte_modify
= bgp_rte_modify_stale
;
1695 P
->rte_igp_metric
= bgp_rte_igp_metric
;
1698 p
->is_internal
= (cf
->local_as
== cf
->remote_as
);
1699 p
->is_interior
= p
->is_internal
|| cf
->confederation_member
;
1700 p
->rs_client
= cf
->rs_client
;
1701 p
->rr_client
= cf
->rr_client
;
1703 p
->ipv4
= ipa_nonzero(cf
->remote_ip
) ?
1704 ipa_is_ip4(cf
->remote_ip
) :
1705 (cf
->remote_range
&& (cf
->remote_range
->type
== NET_IP4
));
1707 p
->remote_ip
= cf
->remote_ip
;
1708 p
->remote_as
= cf
->remote_as
;
1710 /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
1712 cf
->remote_ip
= IPA_NONE
;
1714 /* Add all channels */
1715 struct bgp_channel_config
*cc
;
1716 BGP_CF_WALK_CHANNELS(cf
, cc
)
1717 proto_add_channel(P
, &cc
->c
);
1723 bgp_channel_init(struct channel
*C
, struct channel_config
*CF
)
1725 struct bgp_channel
*c
= (void *) C
;
1726 struct bgp_channel_config
*cf
= (void *) CF
;
1732 if (cf
->igp_table_ip4
)
1733 c
->igp_table_ip4
= cf
->igp_table_ip4
->table
;
1735 if (cf
->igp_table_ip6
)
1736 c
->igp_table_ip6
= cf
->igp_table_ip6
->table
;
1739 c
->base_table
= cf
->base_table
->table
;
1743 bgp_channel_start(struct channel
*C
)
1745 struct bgp_proto
*p
= (void *) C
->proto
;
1746 struct bgp_channel
*c
= (void *) C
;
1747 ip_addr src
= p
->local_ip
;
1749 if (c
->igp_table_ip4
)
1750 rt_lock_table(c
->igp_table_ip4
);
1752 if (c
->igp_table_ip6
)
1753 rt_lock_table(c
->igp_table_ip6
);
1757 rt_lock_table(c
->base_table
);
1758 rt_flowspec_link(c
->base_table
, c
->c
.table
);
1761 c
->pool
= p
->p
.pool
; // XXXX
1762 bgp_init_bucket_table(c
);
1763 bgp_init_prefix_table(c
);
1765 if (c
->cf
->import_table
)
1766 channel_setup_in_table(C
);
1768 if (c
->cf
->export_table
)
1769 channel_setup_out_table(C
);
1771 c
->stale_timer
= tm_new_init(c
->pool
, bgp_long_lived_stale_timeout
, c
, 0, 0);
1773 c
->next_hop_addr
= c
->cf
->next_hop_addr
;
1774 c
->link_addr
= IPA_NONE
;
1775 c
->packets_to_send
= 0;
1777 /* Try to use source address as next hop address */
1778 if (ipa_zero(c
->next_hop_addr
))
1780 if (bgp_channel_is_ipv4(c
) && (ipa_is_ip4(src
) || c
->ext_next_hop
))
1781 c
->next_hop_addr
= src
;
1783 if (bgp_channel_is_ipv6(c
) && (ipa_is_ip6(src
) || c
->ext_next_hop
))
1784 c
->next_hop_addr
= src
;
1787 /* Use preferred addresses associated with interface / source address */
1788 if (ipa_zero(c
->next_hop_addr
))
1790 /* We know the iface for single-hop, we make lookup for multihop */
1791 struct neighbor
*nbr
= p
->neigh
?: neigh_find(&p
->p
, src
, NULL
, 0);
1792 struct iface
*iface
= nbr
? nbr
->iface
: NULL
;
1794 if (bgp_channel_is_ipv4(c
) && iface
&& iface
->addr4
)
1795 c
->next_hop_addr
= iface
->addr4
->ip
;
1797 if (bgp_channel_is_ipv6(c
) && iface
&& iface
->addr6
)
1798 c
->next_hop_addr
= iface
->addr6
->ip
;
1801 /* Exit if no feasible next hop address is found */
1802 if (ipa_zero(c
->next_hop_addr
))
1804 log(L_WARN
"%s: Missing next hop address", p
->p
.name
);
1808 /* Set link-local address for IPv6 single-hop BGP */
1809 if (ipa_is_ip6(c
->next_hop_addr
) && p
->neigh
)
1811 c
->link_addr
= p
->link_addr
;
1813 if (ipa_zero(c
->link_addr
))
1814 log(L_WARN
"%s: Missing link-local address", p
->p
.name
);
1817 /* Link local address is already in c->link_addr */
1818 if (ipa_is_link_local(c
->next_hop_addr
))
1819 c
->next_hop_addr
= IPA_NONE
;
1821 return 0; /* XXXX: Currently undefined */
1825 bgp_channel_shutdown(struct channel
*C
)
1827 struct bgp_channel
*c
= (void *) C
;
1829 c
->next_hop_addr
= IPA_NONE
;
1830 c
->link_addr
= IPA_NONE
;
1831 c
->packets_to_send
= 0;
1835 bgp_channel_cleanup(struct channel
*C
)
1837 struct bgp_channel
*c
= (void *) C
;
1839 if (c
->igp_table_ip4
)
1840 rt_unlock_table(c
->igp_table_ip4
);
1842 if (c
->igp_table_ip6
)
1843 rt_unlock_table(c
->igp_table_ip6
);
1847 rt_flowspec_unlink(c
->base_table
, c
->c
.table
);
1848 rt_unlock_table(c
->base_table
);
1853 /* Cleanup rest of bgp_channel starting at pool field */
1854 memset(&(c
->pool
), 0, sizeof(struct bgp_channel
) - OFFSETOF(struct bgp_channel
, pool
));
1857 static inline struct bgp_channel_config
*
1858 bgp_find_channel_config(struct bgp_config
*cf
, u32 afi
)
1860 struct bgp_channel_config
*cc
;
1862 BGP_CF_WALK_CHANNELS(cf
, cc
)
1869 struct rtable_config
*
1870 bgp_default_igp_table(struct bgp_config
*cf
, struct bgp_channel_config
*cc
, u32 type
)
1872 struct bgp_channel_config
*cc2
;
1873 struct rtable_config
*tab
;
1875 /* First, try table connected by the channel */
1876 if (cc
->c
.table
->addr_type
== type
)
1879 /* Find paired channel with the same SAFI but the other AFI */
1880 u32 afi2
= cc
->afi
^ 0x30000;
1881 cc2
= bgp_find_channel_config(cf
, afi2
);
1883 /* Second, try IGP table configured in the paired channel */
1884 if (cc2
&& (tab
= (type
== NET_IP4
) ? cc2
->igp_table_ip4
: cc2
->igp_table_ip6
))
1887 /* Third, try table connected by the paired channel */
1888 if (cc2
&& (cc2
->c
.table
->addr_type
== type
))
1889 return cc2
->c
.table
;
1891 /* Last, try default table of given type */
1892 if (tab
= cf
->c
.global
->def_tables
[type
])
1895 cf_error("Undefined IGP table");
1898 static struct rtable_config
*
1899 bgp_default_base_table(struct bgp_config
*cf
, struct bgp_channel_config
*cc
)
1901 /* Expected table type */
1902 u32 type
= (cc
->afi
== BGP_AF_FLOW4
) ? NET_IP4
: NET_IP6
;
1904 /* First, try appropriate IP channel */
1905 u32 afi2
= BGP_AF(BGP_AFI(cc
->afi
), BGP_SAFI_UNICAST
);
1906 struct bgp_channel_config
*cc2
= bgp_find_channel_config(cf
, afi2
);
1907 if (cc2
&& (cc2
->c
.table
->addr_type
== type
))
1908 return cc2
->c
.table
;
1910 /* Last, try default table of given type */
1911 struct rtable_config
*tab
= cf
->c
.global
->def_tables
[type
];
1915 cf_error("Undefined base table");
1919 bgp_postconfig(struct proto_config
*CF
)
1921 struct bgp_config
*cf
= (void *) CF
;
1923 /* Do not check templates at all */
1924 if (cf
->c
.class == SYM_TEMPLATE
)
1928 /* Handle undefined remote_as, zero should mean unspecified external */
1929 if (!cf
->remote_as
&& (cf
->peer_type
== BGP_PT_INTERNAL
))
1930 cf
->remote_as
= cf
->local_as
;
1932 int internal
= (cf
->local_as
== cf
->remote_as
);
1933 int interior
= internal
|| cf
->confederation_member
;
1935 /* EBGP direct by default, IBGP multihop by default */
1936 if (cf
->multihop
< 0)
1937 cf
->multihop
= internal
? 64 : 0;
1939 /* LLGR mode default based on GR mode */
1940 if (cf
->llgr_mode
< 0)
1941 cf
->llgr_mode
= cf
->gr_mode
? BGP_LLGR_AWARE
: 0;
1943 /* Link check for single-hop BGP by default */
1944 if (cf
->check_link
< 0)
1945 cf
->check_link
= !cf
->multihop
;
1949 cf_error("Local AS number must be set");
1951 if (ipa_zero(cf
->remote_ip
) && !cf
->remote_range
)
1952 cf_error("Neighbor must be configured");
1954 if (ipa_zero(cf
->local_ip
) && cf
->strict_bind
)
1955 cf_error("Local address must be configured for strict bind");
1957 if (!cf
->remote_as
&& !cf
->peer_type
)
1958 cf_error("Remote AS number (or peer type) must be set");
1960 if ((cf
->peer_type
== BGP_PT_INTERNAL
) && !internal
)
1961 cf_error("IBGP cannot have different ASNs");
1963 if ((cf
->peer_type
== BGP_PT_EXTERNAL
) && internal
)
1964 cf_error("EBGP cannot have the same ASNs");
1966 if (!cf
->iface
&& (ipa_is_link_local(cf
->local_ip
) ||
1967 ipa_is_link_local(cf
->remote_ip
)))
1968 cf_error("Link-local addresses require defined interface");
1970 if (!(cf
->capabilities
&& cf
->enable_as4
) && (cf
->remote_as
> 0xFFFF))
1971 cf_error("Neighbor AS number out of range (AS4 not available)");
1973 if (!internal
&& cf
->rr_client
)
1974 cf_error("Only internal neighbor can be RR client");
1976 if (internal
&& cf
->rs_client
)
1977 cf_error("Only external neighbor can be RS client");
1979 if (internal
&& (cf
->local_role
!= BGP_ROLE_UNDEFINED
))
1980 cf_error("Local role cannot be set on IBGP sessions");
1982 if (interior
&& (cf
->local_role
!= BGP_ROLE_UNDEFINED
))
1983 log(L_WARN
"BGP roles are not recommended to be used within AS confederations");
1985 if (cf
->require_roles
&& (cf
->local_role
== BGP_ROLE_UNDEFINED
))
1986 cf_error("Local role must be set if roles are required");
1988 if (!cf
->confederation
&& cf
->confederation_member
)
1989 cf_error("Confederation ID must be set for member sessions");
1991 if (cf
->multihop
&& (ipa_is_link_local(cf
->local_ip
) ||
1992 ipa_is_link_local(cf
->remote_ip
)))
1993 cf_error("Multihop BGP cannot be used with link-local addresses");
1995 if (cf
->multihop
&& cf
->iface
)
1996 cf_error("Multihop BGP cannot be bound to interface");
1998 if (cf
->multihop
&& cf
->check_link
)
1999 cf_error("Multihop BGP cannot depend on link state");
2001 if (cf
->multihop
&& cf
->bfd
&& ipa_zero(cf
->local_ip
))
2002 cf_error("Multihop BGP with BFD requires specified local address");
2004 if (!cf
->gr_mode
&& cf
->llgr_mode
)
2005 cf_error("Long-lived graceful restart requires basic graceful restart");
2007 if (internal
&& cf
->enforce_first_as
)
2008 cf_error("Enforce first AS check is requires EBGP sessions");
2011 struct bgp_channel_config
*cc
;
2012 BGP_CF_WALK_CHANNELS(cf
, cc
)
2014 /* Handle undefined import filter */
2015 if (cc
->c
.in_filter
== FILTER_UNDEF
)
2017 cc
->c
.in_filter
= FILTER_ACCEPT
;
2019 cf_error("EBGP requires explicit import policy");
2021 /* Handle undefined export filter */
2022 if (cc
->c
.out_filter
== FILTER_UNDEF
)
2024 cc
->c
.out_filter
= FILTER_REJECT
;
2026 cf_error("EBGP requires explicit export policy");
2028 /* Disable after error incompatible with restart limit action */
2029 if ((cc
->c
.in_limit
.action
== PLA_RESTART
) && cf
->disable_after_error
)
2030 cc
->c
.in_limit
.action
= PLA_DISABLE
;
2032 /* Different default based on rr_client, rs_client */
2033 if (cc
->next_hop_keep
== 0xff)
2034 cc
->next_hop_keep
= cf
->rr_client
? NH_IBGP
: (cf
->rs_client
? NH_ALL
: NH_NO
);
2036 /* Different default for gw_mode */
2038 cc
->gw_mode
= cf
->multihop
? GW_RECURSIVE
: GW_DIRECT
;
2040 /* Defaults based on proto config */
2041 if (cc
->gr_able
== 0xff)
2042 cc
->gr_able
= (cf
->gr_mode
== BGP_GR_ABLE
);
2044 if (cc
->llgr_able
== 0xff)
2045 cc
->llgr_able
= (cf
->llgr_mode
== BGP_LLGR_ABLE
);
2047 if (cc
->llgr_time
== ~0U)
2048 cc
->llgr_time
= cf
->llgr_time
;
2050 /* AIGP enabled by default on interior sessions */
2051 if (cc
->aigp
== 0xff)
2052 cc
->aigp
= interior
;
2054 /* Default values of IGP tables */
2055 if ((cc
->gw_mode
== GW_RECURSIVE
) && !cc
->desc
->no_igp
)
2057 if (!cc
->igp_table_ip4
&& (bgp_cc_is_ipv4(cc
) || cc
->ext_next_hop
))
2058 cc
->igp_table_ip4
= bgp_default_igp_table(cf
, cc
, NET_IP4
);
2060 if (!cc
->igp_table_ip6
&& (bgp_cc_is_ipv6(cc
) || cc
->ext_next_hop
))
2061 cc
->igp_table_ip6
= bgp_default_igp_table(cf
, cc
, NET_IP6
);
2063 if (cc
->igp_table_ip4
&& bgp_cc_is_ipv6(cc
) && !cc
->ext_next_hop
)
2064 cf_error("Mismatched IGP table type");
2066 if (cc
->igp_table_ip6
&& bgp_cc_is_ipv4(cc
) && !cc
->ext_next_hop
)
2067 cf_error("Mismatched IGP table type");
2070 /* Default value of base table */
2071 if ((BGP_SAFI(cc
->afi
) == BGP_SAFI_FLOW
) && cc
->validate
&& !cc
->base_table
)
2072 cc
->base_table
= bgp_default_base_table(cf
, cc
);
2074 if (cc
->base_table
&& !cc
->base_table
->trie_used
)
2075 cf_error("Flowspec validation requires base table (%s) with trie",
2076 cc
->base_table
->name
);
2078 if (cf
->multihop
&& (cc
->gw_mode
== GW_DIRECT
))
2079 cf_error("Multihop BGP cannot use direct gateway mode");
2081 if ((cc
->gw_mode
== GW_RECURSIVE
) && cc
->c
.table
->sorted
)
2082 cf_error("BGP in recursive mode prohibits sorted table");
2084 if (cf
->deterministic_med
&& cc
->c
.table
->sorted
)
2085 cf_error("BGP with deterministic MED prohibits sorted table");
2087 if (cc
->secondary
&& !cc
->c
.table
->sorted
)
2088 cf_error("BGP with secondary option requires sorted table");
2093 bgp_reconfigure(struct proto
*P
, struct proto_config
*CF
)
2095 struct bgp_proto
*p
= (void *) P
;
2096 const struct bgp_config
*new = (void *) CF
;
2097 const struct bgp_config
*old
= p
->cf
;
2099 if (proto_get_router_id(CF
) != p
->local_id
)
2102 int same
= !memcmp(((byte
*) old
) + sizeof(struct proto_config
),
2103 ((byte
*) new) + sizeof(struct proto_config
),
2104 // password item is last and must be checked separately
2105 OFFSETOF(struct bgp_config
, password
) - sizeof(struct proto_config
))
2106 && !bstrcmp(old
->password
, new->password
)
2107 && ((!old
->remote_range
&& !new->remote_range
)
2108 || (old
->remote_range
&& new->remote_range
&& net_equal(old
->remote_range
, new->remote_range
)))
2109 && !bstrcmp(old
->dynamic_name
, new->dynamic_name
)
2110 && (old
->dynamic_name_digits
== new->dynamic_name_digits
);
2112 /* FIXME: Move channel reconfiguration to generic protocol code ? */
2113 struct channel
*C
, *C2
;
2114 struct bgp_channel_config
*cc
;
2116 WALK_LIST(C
, p
->p
.channels
)
2119 BGP_CF_WALK_CHANNELS(new, cc
)
2121 C
= (struct channel
*) bgp_find_channel(p
, cc
->afi
);
2122 same
= proto_configure_channel(P
, &C
, &cc
->c
) && same
;
2125 WALK_LIST_DELSAFE(C
, C2
, p
->p
.channels
)
2127 same
= proto_configure_channel(P
, &C
, NULL
) && same
;
2129 if (same
&& (p
->start_state
> BSS_PREPARE
))
2130 bgp_update_bfd(p
, new->bfd
);
2132 /* We should update our copy of configuration ptr as old configuration will be freed */
2136 /* Reset name counter */
2137 p
->dynamic_name_counter
= 0;
2142 #define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL )
2145 bgp_channel_reconfigure(struct channel
*C
, struct channel_config
*CC
, int *import_changed
, int *export_changed
)
2147 struct bgp_proto
*p
= (void *) C
->proto
;
2148 struct bgp_channel
*c
= (void *) C
;
2149 struct bgp_channel_config
*new = (void *) CC
;
2150 struct bgp_channel_config
*old
= c
->cf
;
2152 if ((new->secondary
!= old
->secondary
) ||
2153 (new->validate
!= old
->validate
) ||
2154 (new->gr_able
!= old
->gr_able
) ||
2155 (new->llgr_able
!= old
->llgr_able
) ||
2156 (new->llgr_time
!= old
->llgr_time
) ||
2157 (new->ext_next_hop
!= old
->ext_next_hop
) ||
2158 (new->add_path
!= old
->add_path
) ||
2159 (new->import_table
!= old
->import_table
) ||
2160 (new->export_table
!= old
->export_table
) ||
2161 (TABLE(new, igp_table_ip4
) != TABLE(old
, igp_table_ip4
)) ||
2162 (TABLE(new, igp_table_ip6
) != TABLE(old
, igp_table_ip6
)) ||
2163 (TABLE(new, base_table
) != TABLE(old
, base_table
)))
2166 if (new->mandatory
&& !old
->mandatory
&& (C
->channel_state
!= CS_UP
))
2169 if ((new->gw_mode
!= old
->gw_mode
) ||
2170 (new->aigp
!= old
->aigp
) ||
2171 (new->cost
!= old
->cost
))
2173 /* import_changed itself does not force ROUTE_REFRESH when import_table is active */
2174 if (c
->c
.in_table
&& (c
->c
.channel_state
== CS_UP
))
2175 bgp_schedule_packet(p
->conn
, c
, PKT_ROUTE_REFRESH
);
2177 *import_changed
= 1;
2180 if (!ipa_equal(new->next_hop_addr
, old
->next_hop_addr
) ||
2181 (new->next_hop_self
!= old
->next_hop_self
) ||
2182 (new->next_hop_keep
!= old
->next_hop_keep
) ||
2183 (new->aigp
!= old
->aigp
) ||
2184 (new->aigp_originate
!= old
->aigp_originate
))
2185 *export_changed
= 1;
2192 bgp_copy_config(struct proto_config
*dest
, struct proto_config
*src
)
2194 struct bgp_config
*d
= (void *) dest
;
2195 struct bgp_config
*s
= (void *) src
;
2197 /* Copy BFD options */
2200 struct bfd_options
*opts
= cfg_alloc(sizeof(struct bfd_options
));
2201 memcpy(opts
, s
->bfd
, sizeof(struct bfd_options
));
2208 * bgp_error - report a protocol error
2210 * @code: error code (according to the RFC)
2211 * @subcode: error sub-code
2212 * @data: data to be passed in the Notification message
2213 * @len: length of the data
2215 * bgp_error() sends a notification packet to tell the other side that a protocol
2216 * error has occurred (including the data considered erroneous if possible) and
2217 * closes the connection.
2220 bgp_error(struct bgp_conn
*c
, uint code
, uint subcode
, byte
*data
, int len
)
2222 struct bgp_proto
*p
= c
->bgp
;
2224 if (c
->state
== BS_CLOSE
)
2227 bgp_log_error(p
, BE_BGP_TX
, "Error", code
, subcode
, data
, ABS(len
));
2228 bgp_store_error(p
, c
, BE_BGP_TX
, (code
<< 16) | subcode
);
2229 bgp_conn_enter_close_state(c
);
2231 c
->notify_code
= code
;
2232 c
->notify_subcode
= subcode
;
2233 c
->notify_data
= data
;
2234 c
->notify_size
= (len
> 0) ? len
: 0;
2235 bgp_schedule_packet(c
, NULL
, PKT_NOTIFICATION
);
2239 bgp_update_startup_delay(p
);
2240 bgp_stop(p
, 0, NULL
, 0);
2245 * bgp_store_error - store last error for status report
2248 * @class: error class (BE_xxx constants)
2249 * @code: error code (class specific)
2251 * bgp_store_error() decides whether given error is interesting enough
2252 * and store that error to last_error variables of @p
2255 bgp_store_error(struct bgp_proto
*p
, struct bgp_conn
*c
, u8
class, u32 code
)
2257 /* During PS_UP, we ignore errors on secondary connection */
2258 if ((p
->p
.proto_state
== PS_UP
) && c
&& (c
!= p
->conn
))
2261 /* During PS_STOP, we ignore any errors, as we want to report
2262 * the error that caused transition to PS_STOP
2264 if (p
->p
.proto_state
== PS_STOP
)
2267 p
->last_error_class
= class;
2268 p
->last_error_code
= code
;
2271 static char *bgp_state_names
[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
2272 static char *bgp_err_classes
[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
2273 static char *bgp_misc_errors
[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
2274 static char *bgp_auto_errors
[] = { "", "Route limit exceeded"};
2275 static char *bgp_gr_states
[] = { "None", "Regular", "Long-lived"};
2278 bgp_last_errmsg(struct bgp_proto
*p
)
2280 switch (p
->last_error_class
)
2283 return bgp_misc_errors
[p
->last_error_code
];
2285 return (p
->last_error_code
== 0) ? "Connection closed" : strerror(p
->last_error_code
);
2288 return bgp_error_dsc(p
->last_error_code
>> 16, p
->last_error_code
& 0xFF);
2290 return bgp_auto_errors
[p
->last_error_code
];
2297 bgp_state_dsc(struct bgp_proto
*p
)
2299 if (p
->p
.proto_state
== PS_DOWN
)
2302 int state
= MAX(p
->incoming_conn
.state
, p
->outgoing_conn
.state
);
2303 if ((state
== BS_IDLE
) && (p
->start_state
>= BSS_CONNECT
) && p
->passive
)
2306 return bgp_state_names
[state
];
2310 bgp_get_status(struct proto
*P
, byte
*buf
)
2312 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
2314 const char *err1
= bgp_err_classes
[p
->last_error_class
];
2315 const char *err2
= bgp_last_errmsg(p
);
2317 if (P
->proto_state
== PS_DOWN
)
2318 bsprintf(buf
, "%s%s", err1
, err2
);
2320 bsprintf(buf
, "%-14s%s%s", bgp_state_dsc(p
), err1
, err2
);
2324 bgp_show_afis(int code
, char *s
, u32
*afis
, uint count
)
2331 for (u32
*af
= afis
; af
< (afis
+ count
); af
++)
2333 const struct bgp_af_desc
*desc
= bgp_get_af_desc(*af
);
2335 buffer_print(&b
, " %s", desc
->name
);
2337 buffer_print(&b
, " <%u/%u>", BGP_AFI(*af
), BGP_SAFI(*af
));
2341 strcpy(b
.end
- 32, " ... <too long>");
2343 cli_msg(code
, b
.start
);
2347 bgp_format_role_name(u8 role
)
2349 static const char *bgp_role_names
[] = { "provider", "rs_server", "rs_client", "customer", "peer" };
2350 if (role
== BGP_ROLE_UNDEFINED
) return "undefined";
2351 if (role
< ARRAY_SIZE(bgp_role_names
)) return bgp_role_names
[role
];
2356 bgp_show_capabilities(struct bgp_proto
*p UNUSED
, struct bgp_caps
*caps
)
2358 struct bgp_af_caps
*ac
;
2359 uint any_mp_bgp
= 0;
2360 uint any_gr_able
= 0;
2361 uint any_add_path
= 0;
2362 uint any_ext_next_hop
= 0;
2363 uint any_llgr_able
= 0;
2364 u32
*afl1
= alloca(caps
->af_count
* sizeof(u32
));
2365 u32
*afl2
= alloca(caps
->af_count
* sizeof(u32
));
2368 WALK_AF_CAPS(caps
, ac
)
2370 any_mp_bgp
|= ac
->ready
;
2371 any_gr_able
|= ac
->gr_able
;
2372 any_add_path
|= ac
->add_path
;
2373 any_ext_next_hop
|= ac
->ext_next_hop
;
2374 any_llgr_able
|= ac
->llgr_able
;
2379 cli_msg(-1006, " Multiprotocol");
2382 WALK_AF_CAPS(caps
, ac
)
2384 afl1
[afn1
++] = ac
->afi
;
2386 bgp_show_afis(-1006, " AF announced:", afl1
, afn1
);
2389 if (caps
->route_refresh
)
2390 cli_msg(-1006, " Route refresh");
2392 if (any_ext_next_hop
)
2394 cli_msg(-1006, " Extended next hop");
2397 WALK_AF_CAPS(caps
, ac
)
2398 if (ac
->ext_next_hop
)
2399 afl1
[afn1
++] = ac
->afi
;
2401 bgp_show_afis(-1006, " IPv6 nexthop:", afl1
, afn1
);
2404 if (caps
->ext_messages
)
2405 cli_msg(-1006, " Extended message");
2408 cli_msg(-1006, " Graceful restart");
2412 /* Continues from gr_aware */
2413 cli_msg(-1006, " Restart time: %u", caps
->gr_time
);
2414 if (caps
->gr_flags
& BGP_GRF_RESTART
)
2415 cli_msg(-1006, " Restart recovery");
2418 WALK_AF_CAPS(caps
, ac
)
2421 afl1
[afn1
++] = ac
->afi
;
2423 if (ac
->gr_af_flags
& BGP_GRF_FORWARDING
)
2424 afl2
[afn2
++] = ac
->afi
;
2427 bgp_show_afis(-1006, " AF supported:", afl1
, afn1
);
2428 bgp_show_afis(-1006, " AF preserved:", afl2
, afn2
);
2431 if (caps
->as4_support
)
2432 cli_msg(-1006, " 4-octet AS numbers");
2436 cli_msg(-1006, " ADD-PATH");
2439 WALK_AF_CAPS(caps
, ac
)
2441 if (ac
->add_path
& BGP_ADD_PATH_RX
)
2442 afl1
[afn1
++] = ac
->afi
;
2444 if (ac
->add_path
& BGP_ADD_PATH_TX
)
2445 afl2
[afn2
++] = ac
->afi
;
2448 bgp_show_afis(-1006, " RX:", afl1
, afn1
);
2449 bgp_show_afis(-1006, " TX:", afl2
, afn2
);
2452 if (caps
->enhanced_refresh
)
2453 cli_msg(-1006, " Enhanced refresh");
2455 if (caps
->llgr_aware
)
2456 cli_msg(-1006, " Long-lived graceful restart");
2463 WALK_AF_CAPS(caps
, ac
)
2465 stale_time
= MAX(stale_time
, ac
->llgr_time
);
2467 if (ac
->llgr_able
&& ac
->llgr_time
)
2468 afl1
[afn1
++] = ac
->afi
;
2470 if (ac
->llgr_flags
& BGP_GRF_FORWARDING
)
2471 afl2
[afn2
++] = ac
->afi
;
2474 /* Continues from llgr_aware */
2475 cli_msg(-1006, " LL stale time: %u", stale_time
);
2477 bgp_show_afis(-1006, " AF supported:", afl1
, afn1
);
2478 bgp_show_afis(-1006, " AF preserved:", afl2
, afn2
);
2482 cli_msg(-1006, " Hostname: %s", caps
->hostname
);
2484 if (caps
->role
!= BGP_ROLE_UNDEFINED
)
2485 cli_msg(-1006, " Role: %s", bgp_format_role_name(caps
->role
));
2489 bgp_show_proto_info(struct proto
*P
)
2491 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
2493 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p
));
2495 if (bgp_is_dynamic(p
) && p
->cf
->remote_range
)
2496 cli_msg(-1006, " Neighbor range: %N", p
->cf
->remote_range
);
2498 cli_msg(-1006, " Neighbor address: %I%J", p
->remote_ip
, p
->cf
->iface
);
2500 if ((p
->conn
== &p
->outgoing_conn
) && (p
->cf
->remote_port
!= BGP_PORT
))
2501 cli_msg(-1006, " Neighbor port: %u", p
->cf
->remote_port
);
2503 cli_msg(-1006, " Neighbor AS: %u", p
->remote_as
);
2504 cli_msg(-1006, " Local AS: %u", p
->cf
->local_as
);
2506 if (p
->gr_active_num
)
2507 cli_msg(-1006, " Neighbor graceful restart active");
2509 if (P
->proto_state
== PS_START
)
2511 struct bgp_conn
*oc
= &p
->outgoing_conn
;
2513 if ((p
->start_state
< BSS_CONNECT
) &&
2514 (tm_active(p
->startup_timer
)))
2515 cli_msg(-1006, " Error wait: %t/%u",
2516 tm_remains(p
->startup_timer
), p
->startup_delay
);
2518 if ((oc
->state
== BS_ACTIVE
) &&
2519 (tm_active(oc
->connect_timer
)))
2520 cli_msg(-1006, " Connect delay: %t/%u",
2521 tm_remains(oc
->connect_timer
), p
->cf
->connect_delay_time
);
2523 if (p
->gr_active_num
&& tm_active(p
->gr_timer
))
2524 cli_msg(-1006, " Restart timer: %t/-",
2525 tm_remains(p
->gr_timer
));
2527 else if (P
->proto_state
== PS_UP
)
2529 cli_msg(-1006, " Neighbor ID: %R", p
->remote_id
);
2530 cli_msg(-1006, " Local capabilities");
2531 bgp_show_capabilities(p
, p
->conn
->local_caps
);
2532 cli_msg(-1006, " Neighbor capabilities");
2533 bgp_show_capabilities(p
, p
->conn
->remote_caps
);
2534 cli_msg(-1006, " Session: %s%s%s%s%s",
2535 p
->is_internal
? "internal" : "external",
2536 p
->cf
->multihop
? " multihop" : "",
2537 p
->rr_client
? " route-reflector" : "",
2538 p
->rs_client
? " route-server" : "",
2539 p
->as4_session
? " AS4" : "");
2540 cli_msg(-1006, " Source address: %I", p
->local_ip
);
2541 cli_msg(-1006, " Hold timer: %t/%u",
2542 tm_remains(p
->conn
->hold_timer
), p
->conn
->hold_time
);
2543 cli_msg(-1006, " Keepalive timer: %t/%u",
2544 tm_remains(p
->conn
->keepalive_timer
), p
->conn
->keepalive_time
);
2548 struct bgp_stats
*s
= &p
->stats
;
2549 cli_msg(-1006, " FSM established transitions: %u",
2550 s
->fsm_established_transitions
);
2551 cli_msg(-1006, " Rcvd messages: %u total / %u updates / %lu bytes",
2552 s
->rx_messages
, s
->rx_updates
, s
->rx_bytes
);
2553 cli_msg(-1006, " Sent messages: %u total / %u updates / %lu bytes",
2554 s
->tx_messages
, s
->tx_updates
, s
->tx_bytes
);
2555 cli_msg(-1006, " Last rcvd update elapsed time: %t s",
2556 p
->last_rx_update
? (current_time() - p
->last_rx_update
) : 0);
2559 if ((p
->last_error_class
!= BE_NONE
) &&
2560 (p
->last_error_class
!= BE_MAN_DOWN
))
2562 const char *err1
= bgp_err_classes
[p
->last_error_class
];
2563 const char *err2
= bgp_last_errmsg(p
);
2564 cli_msg(-1006, " Last error: %s%s", err1
, err2
);
2568 struct bgp_channel
*c
;
2569 WALK_LIST(c
, p
->p
.channels
)
2571 channel_show_info(&c
->c
);
2573 if (c
->c
.channel
!= &channel_bgp
)
2576 if (p
->gr_active_num
)
2577 cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states
[c
->gr_active
]);
2579 if (c
->stale_timer
&& tm_active(c
->stale_timer
))
2580 cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c
->stale_timer
));
2582 if (c
->c
.channel_state
== CS_UP
)
2584 if (ipa_zero(c
->link_addr
))
2585 cli_msg(-1006, " BGP Next hop: %I", c
->next_hop_addr
);
2587 cli_msg(-1006, " BGP Next hop: %I %I", c
->next_hop_addr
, c
->link_addr
);
2590 if (c
->igp_table_ip4
)
2591 cli_msg(-1006, " IGP IPv4 table: %s", c
->igp_table_ip4
->name
);
2593 if (c
->igp_table_ip6
)
2594 cli_msg(-1006, " IGP IPv6 table: %s", c
->igp_table_ip6
->name
);
2597 cli_msg(-1006, " Base table: %s", c
->base_table
->name
);
2602 struct channel_class channel_bgp
= {
2603 .channel_size
= sizeof(struct bgp_channel
),
2604 .config_size
= sizeof(struct bgp_channel_config
),
2605 .init
= bgp_channel_init
,
2606 .start
= bgp_channel_start
,
2607 .shutdown
= bgp_channel_shutdown
,
2608 .cleanup
= bgp_channel_cleanup
,
2609 .reconfigure
= bgp_channel_reconfigure
,
2612 struct protocol proto_bgp
= {
2614 .template = "bgp%d",
2615 .class = PROTOCOL_BGP
,
2616 .preference
= DEF_PREF_BGP
,
2617 .channel_mask
= NB_IP
| NB_VPN
| NB_FLOW
,
2618 .proto_size
= sizeof(struct bgp_proto
),
2619 .config_size
= sizeof(struct bgp_config
),
2620 .postconfig
= bgp_postconfig
,
2623 .shutdown
= bgp_shutdown
,
2624 .reconfigure
= bgp_reconfigure
,
2625 .copy_config
= bgp_copy_config
,
2626 .get_status
= bgp_get_status
,
2627 .get_attr
= bgp_get_attr
,
2628 .get_route_info
= bgp_get_route_info
,
2629 .show_proto_info
= bgp_show_proto_info
2632 void bgp_build(void)
2634 proto_build(&proto_bgp
);