2 * BIRD -- The Border Gateway Protocol
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
8 * Can be freely distributed and used under the terms of the GNU GPL.
12 * DOC: Border Gateway Protocol
14 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
15 * the connection and most of the interface with BIRD core, |packets.c| handling
16 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
17 * manipulation with BGP attribute lists.
19 * As opposed to the other existing routing daemons, BIRD has a sophisticated
20 * core architecture which is able to keep all the information needed by BGP in
21 * the primary routing table, therefore no complex data structures like a
22 * central BGP table are needed. This increases memory footprint of a BGP router
23 * with many connections, but not too much and, which is more important, it
24 * makes BGP much easier to implement.
26 * Each instance of BGP (corresponding to a single BGP peer) is described by a
27 * &bgp_proto structure to which are attached individual connections represented
28 * by &bgp_connection (usually, there exists only one connection, but during BGP
29 * session setup, there can be more of them). The connections are handled
30 * according to the BGP state machine defined in the RFC with all the timers and
31 * all the parameters configurable.
33 * In incoming direction, we listen on the connection's socket and each time we
34 * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
35 * markers and passes complete packets to bgp_rx_packet() which distributes the
36 * packet according to its type.
38 * In outgoing direction, we gather all the routing updates and sort them to
39 * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
40 * fast comparison of &rta's and a &fib which helps us to find if we already
41 * have another route for the same destination queued for sending, so that we
42 * can replace it with the new one immediately instead of sending both
43 * updates). There also exists a special bucket holding all the route
44 * withdrawals which cannot be queued anywhere else as they don't have any
45 * attributes. If we have any packet to send (due to either new routes or the
46 * connection tracking code wanting to send a Open, Keepalive or Notification
47 * message), we call bgp_schedule_packet() which sets the corresponding bit in a
48 * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
49 * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
50 * packet type bits and calls the corresponding bgp_create_xx() functions,
51 * eventually rescheduling the same packet type if we have more data of the same
54 * The processing of attributes consists of two functions: bgp_decode_attrs()
55 * for checking of the attribute blocks and translating them to the language of
56 * BIRD's extended attributes and bgp_encode_attrs() which does the
57 * converse. Both functions are built around a @bgp_attr_table array describing
58 * all important characteristics of all known attributes. Unknown transitive
59 * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
61 * BGP protocol implements graceful restart in both restarting (local restart)
62 * and receiving (neighbor restart) roles. The first is handled mostly by the
63 * graceful restart code in the nest, BGP protocol just handles capabilities,
64 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
65 * The second is implemented by internal restart of the BGP state to %BS_IDLE
66 * and protocol state to %PS_START, but keeping the protocol up from the core
67 * point of view and therefore maintaining received routes. Routing table
68 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
69 * stale routes after reestablishment of BGP session during graceful restart.
71 * Supported standards:
73 * <item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP)
74 * <item> <rfc id="1997"> - BGP Communities Attribute
75 * <item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature
76 * <item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6
77 * <item> <rfc id="2918"> - Route Refresh Capability
78 * <item> <rfc id="3107"> - Carrying Label Information in BGP
79 * <item> <rfc id="4360"> - BGP Extended Communities Attribute
80 * <item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks
81 * <item> <rfc id="4456"> - BGP Route Reflection
82 * <item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message
83 * <item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks
84 * <item> <rfc id="4724"> - Graceful Restart Mechanism for BGP
85 * <item> <rfc id="4760"> - Multiprotocol extensions for BGP
86 * <item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS
87 * <item> <rfc id="5065"> - AS confederations for BGP
88 * <item> <rfc id="5082"> - Generalized TTL Security Mechanism
89 * <item> <rfc id="5492"> - Capabilities Advertisement with BGP
90 * <item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop
91 * <item> <rfc id="5575"> - Dissemination of Flow Specification Rules
92 * <item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community
93 * <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
94 * <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
95 * <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
96 * <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
97 * <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
98 * <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
99 * <item> <rfc id="7947"> - Internet Exchange BGP Route Server
100 * <item> <rfc id="8092"> - BGP Large Communities Attribute
106 #include "nest/bird.h"
107 #include "nest/iface.h"
108 #include "nest/protocol.h"
109 #include "nest/route.h"
110 #include "nest/cli.h"
111 #include "nest/locks.h"
112 #include "conf/conf.h"
113 #include "lib/socket.h"
114 #include "lib/resource.h"
115 #include "lib/string.h"
120 struct linpool
*bgp_linpool
; /* Global temporary pool */
121 struct linpool
*bgp_linpool2
; /* Global temporary pool for bgp_rt_notify() */
122 static list bgp_sockets
; /* Global list of listening sockets */
125 static void bgp_connect(struct bgp_proto
*p
);
126 static void bgp_active(struct bgp_proto
*p
);
127 static void bgp_update_bfd(struct bgp_proto
*p
, int use_bfd
);
129 static int bgp_incoming_connection(sock
*sk
, uint dummy UNUSED
);
130 static void bgp_listen_sock_err(sock
*sk UNUSED
, int err
);
133 * bgp_open - open a BGP instance
136 * This function allocates and configures shared BGP resources, mainly listening
137 * sockets. Should be called as the last step during initialization (when lock
138 * is acquired and neighbor is ready). When error, caller should change state to
139 * PS_DOWN and return immediately.
142 bgp_open(struct bgp_proto
*p
)
144 struct bgp_socket
*bs
= NULL
;
145 struct iface
*ifa
= p
->cf
->strict_bind
? p
->cf
->iface
: NULL
;
146 ip_addr addr
= p
->cf
->strict_bind
? p
->cf
->local_ip
:
147 (ipa_is_ip4(p
->cf
->remote_ip
) ? IPA_NONE4
: IPA_NONE6
);
148 uint port
= p
->cf
->local_port
;
150 /* FIXME: Add some global init? */
152 init_list(&bgp_sockets
);
154 /* We assume that cf->iface is defined iff cf->local_ip is link-local */
156 WALK_LIST(bs
, bgp_sockets
)
157 if (ipa_equal(bs
->sk
->saddr
, addr
) && (bs
->sk
->iface
== ifa
) && (bs
->sk
->sport
== port
))
164 sock
*sk
= sk_new(proto_pool
);
165 sk
->type
= SK_TCP_PASSIVE
;
170 sk
->tos
= IP_PREC_INTERNET_CONTROL
;
171 sk
->rbsize
= BGP_RX_BUFFER_SIZE
;
172 sk
->tbsize
= BGP_TX_BUFFER_SIZE
;
173 sk
->rx_hook
= bgp_incoming_connection
;
174 sk
->err_hook
= bgp_listen_sock_err
;
179 bs
= mb_allocz(proto_pool
, sizeof(struct bgp_socket
));
184 add_tail(&bgp_sockets
, &bs
->n
);
188 bgp_linpool
= lp_new_default(proto_pool
);
189 bgp_linpool2
= lp_new_default(proto_pool
);
195 sk_log_error(sk
, p
->p
.name
);
196 log(L_ERR
"%s: Cannot open listening socket", p
->p
.name
);
202 * bgp_close - close a BGP instance
205 * This function frees and deconfigures shared BGP resources.
208 bgp_close(struct bgp_proto
*p
)
210 struct bgp_socket
*bs
= p
->sock
;
212 ASSERT(bs
&& bs
->uc
);
221 if (!EMPTY_LIST(bgp_sockets
))
232 bgp_setup_auth(struct bgp_proto
*p
, int enable
)
236 int rv
= sk_set_md5_auth(p
->sock
->sk
,
237 p
->cf
->local_ip
, p
->cf
->remote_ip
, p
->cf
->iface
,
238 enable
? p
->cf
->password
: NULL
, p
->cf
->setkey
);
241 sk_log_error(p
->sock
->sk
, p
->p
.name
);
249 static inline struct bgp_channel
*
250 bgp_find_channel(struct bgp_proto
*p
, u32 afi
)
252 struct bgp_channel
*c
;
253 WALK_LIST(c
, p
->p
.channels
)
261 bgp_startup(struct bgp_proto
*p
)
263 BGP_TRACE(D_EVENTS
, "Started");
264 p
->start_state
= BSS_CONNECT
;
271 bgp_startup_timeout(timer
*t
)
273 bgp_startup(t
->data
);
278 bgp_initiate(struct bgp_proto
*p
)
283 { err_val
= BEM_NO_SOCKET
; goto err1
; }
285 if (bgp_setup_auth(p
, 1) < 0)
286 { err_val
= BEM_INVALID_MD5
; goto err2
; }
289 bgp_update_bfd(p
, p
->cf
->bfd
);
291 if (p
->startup_delay
)
293 p
->start_state
= BSS_DELAY
;
294 BGP_TRACE(D_EVENTS
, "Startup delayed by %d seconds due to errors", p
->startup_delay
);
295 bgp_start_timer(p
->startup_timer
, p
->startup_delay
);
306 bgp_store_error(p
, NULL
, BE_MISC
, err_val
);
307 proto_notify_state(&p
->p
, PS_DOWN
);
313 * bgp_start_timer - start a BGP timer
315 * @value: time to fire (0 to disable the timer)
317 * This functions calls tm_start() on @t with time @value and the amount of
318 * randomization suggested by the BGP standard. Please use it for all BGP
322 bgp_start_timer(timer
*t
, int value
)
326 /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
327 t
->randomize
= value
/ 4;
328 tm_start(t
, value
- t
->randomize
);
335 * bgp_close_conn - close a BGP connection
336 * @conn: connection to close
338 * This function takes a connection described by the &bgp_conn structure, closes
339 * its socket and frees all resources associated with it.
342 bgp_close_conn(struct bgp_conn
*conn
)
344 // struct bgp_proto *p = conn->bgp;
346 DBG("BGP: Closing connection\n");
347 conn
->packets_to_send
= 0;
348 conn
->channels_to_send
= 0;
349 rfree(conn
->connect_timer
);
350 conn
->connect_timer
= NULL
;
351 rfree(conn
->keepalive_timer
);
352 conn
->keepalive_timer
= NULL
;
353 rfree(conn
->hold_timer
);
354 conn
->hold_timer
= NULL
;
360 mb_free(conn
->local_caps
);
361 conn
->local_caps
= NULL
;
362 mb_free(conn
->remote_caps
);
363 conn
->remote_caps
= NULL
;
368 * bgp_update_startup_delay - update a startup delay
371 * This function updates a startup delay that is used to postpone next BGP
372 * connect. It also handles disable_after_error and might stop BGP instance
373 * when error happened and disable_after_error is on.
375 * It should be called when BGP protocol error happened.
378 bgp_update_startup_delay(struct bgp_proto
*p
)
380 struct bgp_config
*cf
= p
->cf
;
382 DBG("BGP: Updating startup delay\n");
384 if (p
->last_proto_error
&& ((now
- p
->last_proto_error
) >= (int) cf
->error_amnesia_time
))
385 p
->startup_delay
= 0;
387 p
->last_proto_error
= now
;
389 if (cf
->disable_after_error
)
391 p
->startup_delay
= 0;
396 if (!p
->startup_delay
)
397 p
->startup_delay
= cf
->error_delay_time_min
;
399 p
->startup_delay
= MIN(2 * p
->startup_delay
, cf
->error_delay_time_max
);
403 bgp_graceful_close_conn(struct bgp_conn
*conn
, uint subcode
)
413 bgp_conn_enter_idle_state(conn
);
419 bgp_error(conn
, 6, subcode
, NULL
, 0);
423 bug("bgp_graceful_close_conn: Unknown state %d", conn
->state
);
428 bgp_down(struct bgp_proto
*p
)
430 if (p
->start_state
> BSS_PREPARE
)
432 bgp_setup_auth(p
, 0);
436 BGP_TRACE(D_EVENTS
, "Down");
437 proto_notify_state(&p
->p
, PS_DOWN
);
441 bgp_decision(void *vp
)
443 struct bgp_proto
*p
= vp
;
445 DBG("BGP: Decision start\n");
446 if ((p
->p
.proto_state
== PS_START
) &&
447 (p
->outgoing_conn
.state
== BS_IDLE
) &&
448 (p
->incoming_conn
.state
!= BS_OPENCONFIRM
) &&
452 if ((p
->p
.proto_state
== PS_STOP
) &&
453 (p
->outgoing_conn
.state
== BS_IDLE
) &&
454 (p
->incoming_conn
.state
== BS_IDLE
))
459 bgp_stop(struct bgp_proto
*p
, uint subcode
)
461 proto_notify_state(&p
->p
, PS_STOP
);
462 bgp_graceful_close_conn(&p
->outgoing_conn
, subcode
);
463 bgp_graceful_close_conn(&p
->incoming_conn
, subcode
);
464 ev_schedule(p
->event
);
468 bgp_conn_set_state(struct bgp_conn
*conn
, uint new_state
)
470 if (conn
->bgp
->p
.mrtdump
& MD_STATES
)
471 mrt_dump_bgp_state_change(conn
, conn
->state
, new_state
);
473 conn
->state
= new_state
;
477 bgp_conn_enter_openconfirm_state(struct bgp_conn
*conn
)
479 /* Really, most of the work is done in bgp_rx_open(). */
480 bgp_conn_set_state(conn
, BS_OPENCONFIRM
);
483 static const struct bgp_af_caps dummy_af_caps
= { };
486 bgp_conn_enter_established_state(struct bgp_conn
*conn
)
488 struct bgp_proto
*p
= conn
->bgp
;
489 struct bgp_caps
*local
= conn
->local_caps
;
490 struct bgp_caps
*peer
= conn
->remote_caps
;
491 struct bgp_channel
*c
;
493 BGP_TRACE(D_EVENTS
, "BGP session established");
495 /* For multi-hop BGP sessions */
496 if (ipa_zero(p
->source_addr
))
497 p
->source_addr
= conn
->sk
->saddr
;
499 conn
->sk
->fast_rx
= 0;
502 p
->last_error_class
= 0;
503 p
->last_error_code
= 0;
505 p
->as4_session
= conn
->as4_session
;
507 p
->route_refresh
= peer
->route_refresh
;
508 p
->enhanced_refresh
= local
->enhanced_refresh
&& peer
->enhanced_refresh
;
510 /* Whether we may handle possible GR of peer (it has some AF GR-able) */
511 p
->gr_ready
= 0; /* Updated later */
513 /* Whether peer is ready to handle our GR recovery */
514 int peer_gr_ready
= peer
->gr_aware
&& !(peer
->gr_flags
& BGP_GRF_RESTART
);
516 if (p
->gr_active_num
)
517 tm_stop(p
->gr_timer
);
519 /* Number of active channels */
522 WALK_LIST(c
, p
->p
.channels
)
524 const struct bgp_af_caps
*loc
= bgp_find_af_caps(local
, c
->afi
);
525 const struct bgp_af_caps
*rem
= bgp_find_af_caps(peer
, c
->afi
);
527 /* Ignore AFIs that were not announced in multiprotocol capability */
528 if (!loc
|| !loc
->ready
)
529 loc
= &dummy_af_caps
;
531 if (!rem
|| !rem
->ready
)
532 rem
= &dummy_af_caps
;
534 int active
= loc
->ready
&& rem
->ready
;
535 c
->c
.disabled
= !active
;
536 c
->c
.reloadable
= p
->route_refresh
;
538 c
->index
= active
? num
++ : 0;
540 c
->feed_state
= BFS_NONE
;
541 c
->load_state
= BFS_NONE
;
543 /* Channels where peer may do GR */
544 c
->gr_ready
= active
&& local
->gr_aware
&& rem
->gr_able
;
545 p
->gr_ready
= p
->gr_ready
|| c
->gr_ready
;
547 /* Channels not able to recover gracefully */
548 if (p
->p
.gr_recovery
&& (!active
|| !peer_gr_ready
))
549 channel_graceful_restart_unlock(&c
->c
);
551 /* Channels waiting for local convergence */
552 if (p
->p
.gr_recovery
&& loc
->gr_able
&& peer_gr_ready
)
555 /* Channels where peer is not able to recover gracefully */
556 if (c
->gr_active
&& ! (c
->gr_ready
&& (rem
->gr_af_flags
& BGP_GRF_FORWARDING
)))
557 bgp_graceful_restart_done(c
);
559 /* GR capability implies that neighbor will send End-of-RIB */
561 c
->load_state
= BFS_LOADING
;
563 c
->ext_next_hop
= c
->cf
->ext_next_hop
&& (bgp_channel_is_ipv6(c
) || rem
->ext_next_hop
);
564 c
->add_path_rx
= (loc
->add_path
& BGP_ADD_PATH_RX
) && (rem
->add_path
& BGP_ADD_PATH_TX
);
565 c
->add_path_tx
= (loc
->add_path
& BGP_ADD_PATH_TX
) && (rem
->add_path
& BGP_ADD_PATH_RX
);
569 c
->c
.ra_mode
= RA_ANY
;
570 else if (c
->cf
->secondary
)
571 c
->c
.ra_mode
= RA_ACCEPTED
;
573 c
->c
.ra_mode
= RA_OPTIMAL
;
576 p
->afi_map
= mb_alloc(p
->p
.pool
, num
* sizeof(u32
));
577 p
->channel_map
= mb_alloc(p
->p
.pool
, num
* sizeof(void *));
578 p
->channel_count
= num
;
580 WALK_LIST(c
, p
->p
.channels
)
585 p
->afi_map
[c
->index
] = c
->afi
;
586 p
->channel_map
[c
->index
] = c
;
589 /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
591 bgp_conn_set_state(conn
, BS_ESTABLISHED
);
592 proto_notify_state(&p
->p
, PS_UP
);
596 bgp_conn_leave_established_state(struct bgp_proto
*p
)
598 BGP_TRACE(D_EVENTS
, "BGP session closed");
601 // XXXX free these tables to avoid memory leak during graceful restart
602 // bgp_free_prefix_table(p);
603 // bgp_free_bucket_table(p);
605 if (p
->p
.proto_state
== PS_UP
)
610 bgp_conn_enter_close_state(struct bgp_conn
*conn
)
612 struct bgp_proto
*p
= conn
->bgp
;
613 int os
= conn
->state
;
615 bgp_conn_set_state(conn
, BS_CLOSE
);
616 tm_stop(conn
->keepalive_timer
);
617 conn
->sk
->rx_hook
= NULL
;
619 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
620 bgp_start_timer(conn
->hold_timer
, 10);
622 if (os
== BS_ESTABLISHED
)
623 bgp_conn_leave_established_state(p
);
627 bgp_conn_enter_idle_state(struct bgp_conn
*conn
)
629 struct bgp_proto
*p
= conn
->bgp
;
630 int os
= conn
->state
;
632 bgp_close_conn(conn
);
633 bgp_conn_set_state(conn
, BS_IDLE
);
634 ev_schedule(p
->event
);
636 if (os
== BS_ESTABLISHED
)
637 bgp_conn_leave_established_state(p
);
641 * bgp_handle_graceful_restart - handle detected BGP graceful restart
644 * This function is called when a BGP graceful restart of the neighbor is
645 * detected (when the TCP connection fails or when a new TCP connection
646 * appears). The function activates processing of the restart - starts routing
647 * table refresh cycle and activates BGP restart timer. The protocol state goes
648 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
652 bgp_handle_graceful_restart(struct bgp_proto
*p
)
654 ASSERT(p
->conn
&& (p
->conn
->state
== BS_ESTABLISHED
) && p
->gr_ready
);
656 BGP_TRACE(D_EVENTS
, "Neighbor graceful restart detected%s",
657 p
->gr_active_num
? " - already pending" : "");
659 p
->gr_active_num
= 0;
661 struct bgp_channel
*c
;
662 WALK_LIST(c
, p
->p
.channels
)
667 rt_refresh_end(c
->c
.table
, &c
->c
);
671 rt_refresh_begin(c
->c
.table
, &c
->c
);
675 /* Just flush the routes */
676 rt_refresh_begin(c
->c
.table
, &c
->c
);
677 rt_refresh_end(c
->c
.table
, &c
->c
);
681 proto_notify_state(&p
->p
, PS_START
);
682 bgp_start_timer(p
->gr_timer
, p
->conn
->local_caps
->gr_time
);
686 * bgp_graceful_restart_done - finish active BGP graceful restart
689 * This function is called when the active BGP graceful restart of the neighbor
690 * should be finished for channel @c - either successfully (the neighbor sends
691 * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
692 * unsuccessfully (the neighbor does not support BGP graceful restart on the new
693 * session). The function ends the routing table refresh cycle.
696 bgp_graceful_restart_done(struct bgp_channel
*c
)
698 struct bgp_proto
*p
= (void *) c
->c
.proto
;
700 ASSERT(c
->gr_active
);
704 if (!p
->gr_active_num
)
705 BGP_TRACE(D_EVENTS
, "Neighbor graceful restart done");
707 rt_refresh_end(c
->c
.table
, &c
->c
);
711 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
714 * This function is a timeout hook for @gr_timer, implementing BGP restart time
715 * limit for reestablisment of the BGP session after the graceful restart. When
716 * fired, we just proceed with the usual protocol restart.
720 bgp_graceful_restart_timeout(timer
*t
)
722 struct bgp_proto
*p
= t
->data
;
724 BGP_TRACE(D_EVENTS
, "Neighbor graceful restart timeout");
730 * bgp_refresh_begin - start incoming enhanced route refresh sequence
733 * This function is called when an incoming enhanced route refresh sequence is
734 * started by the neighbor, demarcated by the BoRR packet. The function updates
735 * the load state and starts the routing table refresh cycle. Note that graceful
736 * restart also uses routing table refresh cycle, but RFC 7313 and load states
737 * ensure that these two sequences do not overlap.
740 bgp_refresh_begin(struct bgp_channel
*c
)
742 struct bgp_proto
*p
= (void *) c
->c
.proto
;
744 if (c
->load_state
== BFS_LOADING
)
745 { log(L_WARN
"%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p
->p
.name
); return; }
747 c
->load_state
= BFS_REFRESHING
;
748 rt_refresh_begin(c
->c
.table
, &c
->c
);
752 * bgp_refresh_end - finish incoming enhanced route refresh sequence
755 * This function is called when an incoming enhanced route refresh sequence is
756 * finished by the neighbor, demarcated by the EoRR packet. The function updates
757 * the load state and ends the routing table refresh cycle. Routes not received
758 * during the sequence are removed by the nest.
761 bgp_refresh_end(struct bgp_channel
*c
)
763 struct bgp_proto
*p
= (void *) c
->c
.proto
;
765 if (c
->load_state
!= BFS_REFRESHING
)
766 { log(L_WARN
"%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p
->p
.name
); return; }
768 c
->load_state
= BFS_NONE
;
769 rt_refresh_end(c
->c
.table
, &c
->c
);
774 bgp_send_open(struct bgp_conn
*conn
)
776 DBG("BGP: Sending open\n");
777 conn
->sk
->rx_hook
= bgp_rx
;
778 conn
->sk
->tx_hook
= bgp_tx
;
779 tm_stop(conn
->connect_timer
);
780 bgp_schedule_packet(conn
, NULL
, PKT_OPEN
);
781 bgp_conn_set_state(conn
, BS_OPENSENT
);
782 bgp_start_timer(conn
->hold_timer
, conn
->bgp
->cf
->initial_hold_time
);
786 bgp_connected(sock
*sk
)
788 struct bgp_conn
*conn
= sk
->data
;
789 struct bgp_proto
*p
= conn
->bgp
;
791 BGP_TRACE(D_EVENTS
, "Connected");
796 bgp_connect_timeout(timer
*t
)
798 struct bgp_conn
*conn
= t
->data
;
799 struct bgp_proto
*p
= conn
->bgp
;
801 DBG("BGP: connect_timeout\n");
802 if (p
->p
.proto_state
== PS_START
)
804 bgp_close_conn(conn
);
808 bgp_conn_enter_idle_state(conn
);
812 bgp_sock_err(sock
*sk
, int err
)
814 struct bgp_conn
*conn
= sk
->data
;
815 struct bgp_proto
*p
= conn
->bgp
;
818 * This error hook may be called either asynchronously from main
819 * loop, or synchronously from sk_send(). But sk_send() is called
820 * only from bgp_tx() and bgp_kick_tx(), which are both called
821 * asynchronously from main loop. Moreover, they end if err hook is
822 * called. Therefore, we could suppose that it is always called
826 bgp_store_error(p
, conn
, BE_SOCKET
, err
);
829 BGP_TRACE(D_EVENTS
, "Connection lost (%M)", err
);
831 BGP_TRACE(D_EVENTS
, "Connection closed");
833 if ((conn
->state
== BS_ESTABLISHED
) && p
->gr_ready
)
834 bgp_handle_graceful_restart(p
);
836 bgp_conn_enter_idle_state(conn
);
840 bgp_hold_timeout(timer
*t
)
842 struct bgp_conn
*conn
= t
->data
;
843 struct bgp_proto
*p
= conn
->bgp
;
845 DBG("BGP: Hold timeout\n");
847 /* We are already closing the connection - just do hangup */
848 if (conn
->state
== BS_CLOSE
)
850 BGP_TRACE(D_EVENTS
, "Connection stalled");
851 bgp_conn_enter_idle_state(conn
);
855 /* If there is something in input queue, we are probably congested
856 and perhaps just not processed BGP packets in time. */
858 if (sk_rx_ready(conn
->sk
) > 0)
859 bgp_start_timer(conn
->hold_timer
, 10);
861 bgp_error(conn
, 4, 0, NULL
, 0);
865 bgp_keepalive_timeout(timer
*t
)
867 struct bgp_conn
*conn
= t
->data
;
869 DBG("BGP: Keepalive timer\n");
870 bgp_schedule_packet(conn
, NULL
, PKT_KEEPALIVE
);
872 /* Kick TX a bit faster */
873 if (ev_active(conn
->tx_ev
))
878 bgp_setup_conn(struct bgp_proto
*p
, struct bgp_conn
*conn
)
883 conn
->packets_to_send
= 0;
884 conn
->channels_to_send
= 0;
885 conn
->last_channel
= 0;
886 conn
->last_channel_count
= 0;
888 conn
->connect_timer
= tm_new_set(p
->p
.pool
, bgp_connect_timeout
, conn
, 0, 0);
889 conn
->hold_timer
= tm_new_set(p
->p
.pool
, bgp_hold_timeout
, conn
, 0, 0);
890 conn
->keepalive_timer
= tm_new_set(p
->p
.pool
, bgp_keepalive_timeout
, conn
, 0, 0);
892 conn
->tx_ev
= ev_new(p
->p
.pool
);
893 conn
->tx_ev
->hook
= bgp_kick_tx
;
894 conn
->tx_ev
->data
= conn
;
898 bgp_setup_sk(struct bgp_conn
*conn
, sock
*s
)
901 s
->err_hook
= bgp_sock_err
;
907 bgp_active(struct bgp_proto
*p
)
909 int delay
= MAX(1, p
->cf
->connect_delay_time
);
910 struct bgp_conn
*conn
= &p
->outgoing_conn
;
912 BGP_TRACE(D_EVENTS
, "Connect delayed by %d seconds", delay
);
913 bgp_setup_conn(p
, conn
);
914 bgp_conn_set_state(conn
, BS_ACTIVE
);
915 bgp_start_timer(conn
->connect_timer
, delay
);
919 * bgp_connect - initiate an outgoing connection
922 * The bgp_connect() function creates a new &bgp_conn and initiates
923 * a TCP connection to the peer. The rest of connection setup is governed
924 * by the BGP state machine as described in the standard.
927 bgp_connect(struct bgp_proto
*p
) /* Enter Connect state and start establishing connection */
929 struct bgp_conn
*conn
= &p
->outgoing_conn
;
930 int hops
= p
->cf
->multihop
? : 1;
932 DBG("BGP: Connecting\n");
933 sock
*s
= sk_new(p
->p
.pool
);
934 s
->type
= SK_TCP_ACTIVE
;
935 s
->saddr
= p
->source_addr
;
936 s
->daddr
= p
->cf
->remote_ip
;
937 s
->dport
= p
->cf
->remote_port
;
938 s
->iface
= p
->neigh
? p
->neigh
->iface
: NULL
;
939 s
->ttl
= p
->cf
->ttl_security
? 255 : hops
;
940 s
->rbsize
= p
->cf
->enable_extended_messages
? BGP_RX_BUFFER_EXT_SIZE
: BGP_RX_BUFFER_SIZE
;
941 s
->tbsize
= p
->cf
->enable_extended_messages
? BGP_TX_BUFFER_EXT_SIZE
: BGP_TX_BUFFER_SIZE
;
942 s
->tos
= IP_PREC_INTERNET_CONTROL
;
943 s
->password
= p
->cf
->password
;
944 s
->tx_hook
= bgp_connected
;
945 BGP_TRACE(D_EVENTS
, "Connecting to %I%J from local address %I%J", s
->daddr
, p
->cf
->iface
,
946 s
->saddr
, ipa_is_link_local(s
->saddr
) ? s
->iface
: NULL
);
947 bgp_setup_conn(p
, conn
);
948 bgp_setup_sk(conn
, s
);
949 bgp_conn_set_state(conn
, BS_CONNECT
);
954 /* Set minimal receive TTL if needed */
955 if (p
->cf
->ttl_security
)
956 if (sk_set_min_ttl(s
, 256 - hops
) < 0)
959 DBG("BGP: Waiting for connect success\n");
960 bgp_start_timer(conn
->connect_timer
, p
->cf
->connect_retry_time
);
964 sk_log_error(s
, p
->p
.name
);
970 * bgp_find_proto - find existing proto for incoming connection
974 static struct bgp_proto
*
975 bgp_find_proto(sock
*sk
)
979 WALK_LIST(p
, proto_list
)
980 if ((p
->p
.proto
== &proto_bgp
) &&
981 ipa_equal(p
->cf
->remote_ip
, sk
->daddr
) &&
982 (!p
->cf
->iface
|| (p
->cf
->iface
== sk
->iface
)) &&
983 (ipa_zero(p
->cf
->local_ip
) || ipa_equal(p
->cf
->local_ip
, sk
->saddr
)) &&
984 (p
->cf
->local_port
== sk
->sport
))
991 * bgp_incoming_connection - handle an incoming connection
995 * This function serves as a socket hook for accepting of new BGP
996 * connections. It searches a BGP instance corresponding to the peer
997 * which has connected and if such an instance exists, it creates a
998 * &bgp_conn structure, attaches it to the instance and either sends
999 * an Open message or (if there already is an active connection) it
1000 * closes the new connection by sending a Notification message.
1003 bgp_incoming_connection(sock
*sk
, uint dummy UNUSED
)
1005 struct bgp_proto
*p
;
1008 DBG("BGP: Incoming connection from %I port %d\n", sk
->daddr
, sk
->dport
);
1009 p
= bgp_find_proto(sk
);
1012 log(L_WARN
"BGP: Unexpected connect from unknown address %I%J (port %d)",
1013 sk
->daddr
, ipa_is_link_local(sk
->daddr
) ? sk
->iface
: NULL
, sk
->dport
);
1019 * BIRD should keep multiple incoming connections in OpenSent state (for
1020 * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
1021 * connections are rejected istead. The exception is the case where an
1022 * incoming connection triggers a graceful restart.
1025 acc
= (p
->p
.proto_state
== PS_START
|| p
->p
.proto_state
== PS_UP
) &&
1026 (p
->start_state
>= BSS_CONNECT
) && (!p
->incoming_conn
.sk
);
1028 if (p
->conn
&& (p
->conn
->state
== BS_ESTABLISHED
) && p
->gr_ready
)
1030 bgp_store_error(p
, NULL
, BE_MISC
, BEM_GRACEFUL_RESTART
);
1031 bgp_handle_graceful_restart(p
);
1032 bgp_conn_enter_idle_state(p
->conn
);
1035 /* There might be separate incoming connection in OpenSent state */
1036 if (p
->incoming_conn
.state
> BS_ACTIVE
)
1037 bgp_close_conn(&p
->incoming_conn
);
1040 BGP_TRACE(D_EVENTS
, "Incoming connection from %I%J (port %d) %s",
1041 sk
->daddr
, ipa_is_link_local(sk
->daddr
) ? sk
->iface
: NULL
,
1042 sk
->dport
, acc
? "accepted" : "rejected");
1050 hops
= p
->cf
->multihop
? : 1;
1052 if (sk_set_ttl(sk
, p
->cf
->ttl_security
? 255 : hops
) < 0)
1055 if (p
->cf
->ttl_security
)
1056 if (sk_set_min_ttl(sk
, 256 - hops
) < 0)
1059 if (p
->cf
->enable_extended_messages
)
1061 sk
->rbsize
= BGP_RX_BUFFER_EXT_SIZE
;
1062 sk
->tbsize
= BGP_TX_BUFFER_EXT_SIZE
;
1066 bgp_setup_conn(p
, &p
->incoming_conn
);
1067 bgp_setup_sk(&p
->incoming_conn
, sk
);
1068 bgp_send_open(&p
->incoming_conn
);
1072 sk_log_error(sk
, p
->p
.name
);
1073 log(L_ERR
"%s: Incoming connection aborted", p
->p
.name
);
1079 bgp_listen_sock_err(sock
*sk UNUSED
, int err
)
1081 if (err
== ECONNABORTED
)
1082 log(L_WARN
"BGP: Incoming connection aborted");
1084 log(L_ERR
"BGP: Error on listening socket: %M", err
);
1088 bgp_start_neighbor(struct bgp_proto
*p
)
1090 /* Called only for single-hop BGP sessions */
1092 if (ipa_zero(p
->source_addr
))
1093 p
->source_addr
= p
->neigh
->ifa
->ip
;
1095 if (ipa_is_link_local(p
->source_addr
))
1096 p
->link_addr
= p
->source_addr
;
1099 /* Find some link-local address for given iface */
1101 WALK_LIST(a
, p
->neigh
->iface
->addrs
)
1102 if (a
->scope
== SCOPE_LINK
)
1104 p
->link_addr
= a
->ip
;
1108 DBG("%s: Selected link-local address %I\n", p
->p
.name
, p
->link_addr
);
1115 bgp_neigh_notify(neighbor
*n
)
1117 struct bgp_proto
*p
= (struct bgp_proto
*) n
->proto
;
1118 int ps
= p
->p
.proto_state
;
1123 if ((ps
== PS_DOWN
) || (ps
== PS_STOP
))
1126 int prepare
= (ps
== PS_START
) && (p
->start_state
== BSS_PREPARE
);
1132 BGP_TRACE(D_EVENTS
, "Neighbor lost");
1133 bgp_store_error(p
, NULL
, BE_MISC
, BEM_NEIGHBOR_LOST
);
1134 /* Perhaps also run bgp_update_startup_delay(p)? */
1138 else if (p
->cf
->check_link
&& !(n
->iface
->flags
& IF_LINK_UP
))
1142 BGP_TRACE(D_EVENTS
, "Link down");
1143 bgp_store_error(p
, NULL
, BE_MISC
, BEM_LINK_DOWN
);
1145 bgp_update_startup_delay(p
);
1153 BGP_TRACE(D_EVENTS
, "Neighbor ready");
1154 bgp_start_neighbor(p
);
1160 bgp_bfd_notify(struct bfd_request
*req
)
1162 struct bgp_proto
*p
= req
->data
;
1163 int ps
= p
->p
.proto_state
;
1165 if (req
->down
&& ((ps
== PS_START
) || (ps
== PS_UP
)))
1167 BGP_TRACE(D_EVENTS
, "BFD session down");
1168 bgp_store_error(p
, NULL
, BE_MISC
, BEM_BFD_DOWN
);
1170 bgp_update_startup_delay(p
);
1176 bgp_update_bfd(struct bgp_proto
*p
, int use_bfd
)
1178 if (use_bfd
&& !p
->bfd_req
)
1179 p
->bfd_req
= bfd_request_session(p
->p
.pool
, p
->cf
->remote_ip
, p
->source_addr
,
1180 p
->cf
->multihop
? NULL
: p
->neigh
->iface
,
1183 if (!use_bfd
&& p
->bfd_req
)
1191 bgp_reload_routes(struct channel
*C
)
1193 struct bgp_proto
*p
= (void *) C
->proto
;
1194 struct bgp_channel
*c
= (void *) C
;
1196 ASSERT(p
->conn
&& p
->route_refresh
);
1198 bgp_schedule_packet(p
->conn
, c
, PKT_ROUTE_REFRESH
);
1202 bgp_feed_begin(struct channel
*C
, int initial
)
1204 struct bgp_proto
*p
= (void *) C
->proto
;
1205 struct bgp_channel
*c
= (void *) C
;
1207 /* This should not happen */
1211 if (initial
&& p
->cf
->gr_mode
)
1212 c
->feed_state
= BFS_LOADING
;
1214 /* It is refeed and both sides support enhanced route refresh */
1215 if (!initial
&& p
->enhanced_refresh
)
1217 /* BoRR must not be sent before End-of-RIB */
1218 if (c
->feed_state
== BFS_LOADING
|| c
->feed_state
== BFS_LOADED
)
1221 c
->feed_state
= BFS_REFRESHING
;
1222 bgp_schedule_packet(p
->conn
, c
, PKT_BEGIN_REFRESH
);
1227 bgp_feed_end(struct channel
*C
)
1229 struct bgp_proto
*p
= (void *) C
->proto
;
1230 struct bgp_channel
*c
= (void *) C
;
1232 /* This should not happen */
1236 /* Non-demarcated feed ended, nothing to do */
1237 if (c
->feed_state
== BFS_NONE
)
1240 /* Schedule End-of-RIB packet */
1241 if (c
->feed_state
== BFS_LOADING
)
1242 c
->feed_state
= BFS_LOADED
;
1244 /* Schedule EoRR packet */
1245 if (c
->feed_state
== BFS_REFRESHING
)
1246 c
->feed_state
= BFS_REFRESHED
;
1249 bgp_schedule_packet(p
->conn
, c
, PKT_UPDATE
);
1254 bgp_start_locked(struct object_lock
*lock
)
1256 struct bgp_proto
*p
= lock
->data
;
1257 struct bgp_config
*cf
= p
->cf
;
1259 if (p
->p
.proto_state
!= PS_START
)
1261 DBG("BGP: Got lock in different state %d\n", p
->p
.proto_state
);
1265 DBG("BGP: Got lock\n");
1269 /* Multi-hop sessions do not use neighbor entries */
1274 neighbor
*n
= neigh_find2(&p
->p
, &cf
->remote_ip
, cf
->iface
, NEF_STICKY
);
1277 log(L_ERR
"%s: Invalid remote address %I%J", p
->p
.name
, cf
->remote_ip
, cf
->iface
);
1278 /* As we do not start yet, we can just disable protocol */
1280 bgp_store_error(p
, NULL
, BE_MISC
, BEM_INVALID_NEXT_HOP
);
1281 proto_notify_state(&p
->p
, PS_DOWN
);
1288 BGP_TRACE(D_EVENTS
, "Waiting for %I%J to become my neighbor", cf
->remote_ip
, cf
->iface
);
1289 else if (p
->cf
->check_link
&& !(n
->iface
->flags
& IF_LINK_UP
))
1290 BGP_TRACE(D_EVENTS
, "Waiting for link on %s", n
->iface
->name
);
1292 bgp_start_neighbor(p
);
1296 bgp_start(struct proto
*P
)
1298 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1299 struct object_lock
*lock
;
1301 DBG("BGP: Startup.\n");
1302 p
->start_state
= BSS_PREPARE
;
1303 p
->outgoing_conn
.state
= BS_IDLE
;
1304 p
->incoming_conn
.state
= BS_IDLE
;
1308 p
->gr_active_num
= 0;
1310 p
->event
= ev_new(p
->p
.pool
);
1311 p
->event
->hook
= bgp_decision
;
1314 p
->startup_timer
= tm_new(p
->p
.pool
);
1315 p
->startup_timer
->hook
= bgp_startup_timeout
;
1316 p
->startup_timer
->data
= p
;
1318 p
->gr_timer
= tm_new(p
->p
.pool
);
1319 p
->gr_timer
->hook
= bgp_graceful_restart_timeout
;
1320 p
->gr_timer
->data
= p
;
1322 p
->local_id
= proto_get_router_id(P
->cf
);
1324 p
->rr_cluster_id
= p
->cf
->rr_cluster_id
? p
->cf
->rr_cluster_id
: p
->local_id
;
1327 p
->source_addr
= p
->cf
->local_ip
;
1328 p
->link_addr
= IPA_NONE
;
1331 if (p
->p
.gr_recovery
&& p
->cf
->gr_mode
)
1333 struct bgp_channel
*c
;
1334 WALK_LIST(c
, p
->p
.channels
)
1335 channel_graceful_restart_lock(&c
->c
);
1339 * Before attempting to create the connection, we need to lock the port,
1340 * so that we are the only instance attempting to talk with that neighbor.
1343 lock
= p
->lock
= olock_new(P
->pool
);
1344 lock
->addr
= p
->cf
->remote_ip
;
1345 lock
->port
= p
->cf
->remote_port
;
1346 lock
->iface
= p
->cf
->iface
;
1347 lock
->type
= OBJLOCK_TCP
;
1348 lock
->hook
= bgp_start_locked
;
1350 olock_acquire(lock
);
1355 extern int proto_restart
;
1358 bgp_shutdown(struct proto
*P
)
1360 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1363 BGP_TRACE(D_EVENTS
, "Shutdown requested");
1365 switch (P
->down_code
)
1368 case PDC_CF_DISABLE
:
1369 subcode
= 3; // Errcode 6, 3 - peer de-configured
1372 case PDC_CF_RESTART
:
1373 subcode
= 6; // Errcode 6, 6 - other configuration change
1376 case PDC_CMD_DISABLE
:
1377 case PDC_CMD_SHUTDOWN
:
1378 subcode
= 2; // Errcode 6, 2 - administrative shutdown
1381 case PDC_CMD_RESTART
:
1382 subcode
= 4; // Errcode 6, 4 - administrative reset
1385 case PDC_RX_LIMIT_HIT
:
1386 case PDC_IN_LIMIT_HIT
:
1387 subcode
= 1; // Errcode 6, 1 - max number of prefixes reached
1388 /* log message for compatibility */
1389 log(L_WARN
"%s: Route limit exceeded, shutting down", p
->p
.name
);
1392 case PDC_OUT_LIMIT_HIT
:
1393 subcode
= proto_restart
? 4 : 2; // Administrative reset or shutdown
1396 bgp_store_error(p
, NULL
, BE_AUTO_DOWN
, BEA_ROUTE_LIMIT_EXCEEDED
);
1398 bgp_update_startup_delay(p
);
1400 p
->startup_delay
= 0;
1404 bgp_store_error(p
, NULL
, BE_MAN_DOWN
, 0);
1405 p
->startup_delay
= 0;
1408 bgp_stop(p
, subcode
);
1409 return p
->p
.proto_state
;
1412 static struct proto
*
1413 bgp_init(struct proto_config
*CF
)
1415 struct proto
*P
= proto_new(CF
);
1416 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1417 struct bgp_config
*cf
= (struct bgp_config
*) CF
;
1419 P
->rt_notify
= bgp_rt_notify
;
1420 P
->import_control
= bgp_import_control
;
1421 P
->neigh_notify
= bgp_neigh_notify
;
1422 P
->reload_routes
= bgp_reload_routes
;
1423 P
->feed_begin
= bgp_feed_begin
;
1424 P
->feed_end
= bgp_feed_end
;
1425 P
->rte_better
= bgp_rte_better
;
1426 P
->rte_mergable
= bgp_rte_mergable
;
1427 P
->rte_recalculate
= cf
->deterministic_med
? bgp_rte_recalculate
: NULL
;
1430 p
->local_as
= cf
->local_as
;
1431 p
->remote_as
= cf
->remote_as
;
1432 p
->public_as
= cf
->local_as
;
1433 p
->is_internal
= (cf
->local_as
== cf
->remote_as
);
1434 p
->is_interior
= p
->is_internal
|| cf
->confederation_member
;
1435 p
->rs_client
= cf
->rs_client
;
1436 p
->rr_client
= cf
->rr_client
;
1438 /* Confederation ID is used for truly external peers */
1439 if (cf
->confederation
&& !p
->is_interior
)
1440 p
->public_as
= cf
->confederation
;
1442 /* Add all channels */
1443 struct bgp_channel_config
*cc
;
1444 WALK_LIST(cc
, CF
->channels
)
1445 proto_add_channel(P
, &cc
->c
);
1451 bgp_channel_init(struct channel
*C
, struct channel_config
*CF
)
1453 struct bgp_channel
*c
= (void *) C
;
1454 struct bgp_channel_config
*cf
= (void *) CF
;
1460 if (cf
->igp_table_ip4
)
1461 c
->igp_table_ip4
= cf
->igp_table_ip4
->table
;
1463 if (cf
->igp_table_ip6
)
1464 c
->igp_table_ip6
= cf
->igp_table_ip6
->table
;
1468 bgp_channel_start(struct channel
*C
)
1470 struct bgp_proto
*p
= (void *) C
->proto
;
1471 struct bgp_channel
*c
= (void *) C
;
1472 ip_addr src
= p
->source_addr
;
1474 if (c
->igp_table_ip4
)
1475 rt_lock_table(c
->igp_table_ip4
);
1477 if (c
->igp_table_ip6
)
1478 rt_lock_table(c
->igp_table_ip6
);
1480 c
->pool
= p
->p
.pool
; // XXXX
1481 bgp_init_bucket_table(c
);
1482 bgp_init_prefix_table(c
);
1484 c
->next_hop_addr
= c
->cf
->next_hop_addr
;
1485 c
->link_addr
= IPA_NONE
;
1486 c
->packets_to_send
= 0;
1488 /* Try to use source address as next hop address */
1489 if (ipa_zero(c
->next_hop_addr
))
1491 if (bgp_channel_is_ipv4(c
) && (ipa_is_ip4(src
) || c
->ext_next_hop
))
1492 c
->next_hop_addr
= src
;
1494 if (bgp_channel_is_ipv6(c
) && (ipa_is_ip6(src
) || c
->ext_next_hop
))
1495 c
->next_hop_addr
= src
;
1498 /* Exit if no feasible next hop address is found */
1499 if (ipa_zero(c
->next_hop_addr
))
1501 log(L_WARN
"%s: Missing next hop address", p
->p
.name
);
1505 /* Set link-local address for IPv6 single-hop BGP */
1506 if (ipa_is_ip6(c
->next_hop_addr
) && p
->neigh
)
1508 c
->link_addr
= p
->link_addr
;
1510 if (ipa_zero(c
->link_addr
))
1511 log(L_WARN
"%s: Missing link-local address", p
->p
.name
);
1514 /* Link local address is already in c->link_addr */
1515 if (ipa_is_link_local(c
->next_hop_addr
))
1516 c
->next_hop_addr
= IPA_NONE
;
1518 return 0; /* XXXX: Currently undefined */
1522 bgp_channel_shutdown(struct channel
*C
)
1524 struct bgp_channel
*c
= (void *) C
;
1526 /* XXXX: cleanup bucket and prefix tables */
1528 c
->next_hop_addr
= IPA_NONE
;
1529 c
->link_addr
= IPA_NONE
;
1533 bgp_channel_cleanup(struct channel
*C
)
1535 struct bgp_channel
*c
= (void *) C
;
1537 if (c
->igp_table_ip4
)
1538 rt_unlock_table(c
->igp_table_ip4
);
1540 if (c
->igp_table_ip6
)
1541 rt_unlock_table(c
->igp_table_ip6
);
1544 static inline struct bgp_channel_config
*
1545 bgp_find_channel_config(struct bgp_config
*cf
, u32 afi
)
1547 struct bgp_channel_config
*cc
;
1549 WALK_LIST(cc
, cf
->c
.channels
)
1556 struct rtable_config
*
1557 bgp_default_igp_table(struct bgp_config
*cf
, struct bgp_channel_config
*cc
, u32 type
)
1559 struct bgp_channel_config
*cc2
;
1560 struct rtable_config
*tab
;
1562 /* First, try table connected by the channel */
1563 if (cc
->c
.table
->addr_type
== type
)
1566 /* Find paired channel with the same SAFI but the other AFI */
1567 u32 afi2
= cc
->afi
^ 0x30000;
1568 cc2
= bgp_find_channel_config(cf
, afi2
);
1570 /* Second, try IGP table configured in the paired channel */
1571 if (cc2
&& (tab
= (type
== NET_IP4
) ? cc2
->igp_table_ip4
: cc2
->igp_table_ip6
))
1574 /* Third, try table connected by the paired channel */
1575 if (cc2
&& (cc2
->c
.table
->addr_type
== type
))
1576 return cc2
->c
.table
;
1578 /* Last, try default table of given type */
1579 if (tab
= cf
->c
.global
->def_tables
[type
])
1582 cf_error("Undefined IGP table");
1587 bgp_postconfig(struct proto_config
*CF
)
1589 struct bgp_config
*cf
= (void *) CF
;
1590 int internal
= (cf
->local_as
== cf
->remote_as
);
1592 /* Do not check templates at all */
1593 if (cf
->c
.class == SYM_TEMPLATE
)
1597 /* EBGP direct by default, IBGP multihop by default */
1598 if (cf
->multihop
< 0)
1599 cf
->multihop
= internal
? 64 : 0;
1603 cf_error("Local AS number must be set");
1605 if (ipa_zero(cf
->remote_ip
))
1606 cf_error("Neighbor must be configured");
1609 cf_error("Remote AS number must be set");
1611 if (ipa_is_link_local(cf
->remote_ip
) && !cf
->iface
)
1612 cf_error("Link-local neighbor address requires specified interface");
1614 if (!(cf
->capabilities
&& cf
->enable_as4
) && (cf
->remote_as
> 0xFFFF))
1615 cf_error("Neighbor AS number out of range (AS4 not available)");
1617 if (!internal
&& cf
->rr_client
)
1618 cf_error("Only internal neighbor can be RR client");
1620 if (internal
&& cf
->rs_client
)
1621 cf_error("Only external neighbor can be RS client");
1623 if (!cf
->confederation
&& cf
->confederation_member
)
1624 cf_error("Confederation ID must be set for member sessions");
1626 if (cf
->multihop
&& (ipa_is_link_local(cf
->local_ip
) ||
1627 ipa_is_link_local(cf
->remote_ip
)))
1628 cf_error("Multihop BGP cannot be used with link-local addresses");
1630 if (cf
->multihop
&& cf
->iface
)
1631 cf_error("Multihop BGP cannot be bound to interface");
1633 if (cf
->multihop
&& cf
->check_link
)
1634 cf_error("Multihop BGP cannot depend on link state");
1636 if (cf
->multihop
&& cf
->bfd
&& ipa_zero(cf
->local_ip
))
1637 cf_error("Multihop BGP with BFD requires specified local address");
1640 struct bgp_channel_config
*cc
;
1641 WALK_LIST(cc
, CF
->channels
)
1643 /* Disable after error incompatible with restart limit action */
1644 if ((cc
->c
.in_limit
.action
== PLA_RESTART
) && cf
->disable_after_error
)
1645 cc
->c
.in_limit
.action
= PLA_DISABLE
;
1647 /* Different default based on rs_client */
1648 if (!cc
->missing_lladdr
)
1649 cc
->missing_lladdr
= cf
->rs_client
? MLL_IGNORE
: MLL_SELF
;
1651 /* Different default for gw_mode */
1653 cc
->gw_mode
= cf
->multihop
? GW_RECURSIVE
: GW_DIRECT
;
1655 /* Default based on proto config */
1656 if (cc
->gr_able
== 0xff)
1657 cc
->gr_able
= (cf
->gr_mode
== BGP_GR_ABLE
);
1659 /* Default values of IGP tables */
1660 if ((cc
->gw_mode
== GW_RECURSIVE
) && !cc
->desc
->no_igp
)
1662 if (!cc
->igp_table_ip4
&& (bgp_cc_is_ipv4(cc
) || cc
->ext_next_hop
))
1663 cc
->igp_table_ip4
= bgp_default_igp_table(cf
, cc
, NET_IP4
);
1665 if (!cc
->igp_table_ip6
&& (bgp_cc_is_ipv6(cc
) || cc
->ext_next_hop
))
1666 cc
->igp_table_ip6
= bgp_default_igp_table(cf
, cc
, NET_IP6
);
1668 if (cc
->igp_table_ip4
&& bgp_cc_is_ipv6(cc
) && !cc
->ext_next_hop
)
1669 cf_error("Mismatched IGP table type");
1671 if (cc
->igp_table_ip6
&& bgp_cc_is_ipv4(cc
) && !cc
->ext_next_hop
)
1672 cf_error("Mismatched IGP table type");
1675 if (cf
->multihop
&& (cc
->gw_mode
== GW_DIRECT
))
1676 cf_error("Multihop BGP cannot use direct gateway mode");
1678 if ((cc
->gw_mode
== GW_RECURSIVE
) && cc
->c
.table
->sorted
)
1679 cf_error("BGP in recursive mode prohibits sorted table");
1681 if (cf
->deterministic_med
&& cc
->c
.table
->sorted
)
1682 cf_error("BGP with deterministic MED prohibits sorted table");
1684 if (cc
->secondary
&& !cc
->c
.table
->sorted
)
1685 cf_error("BGP with secondary option requires sorted table");
1690 bgp_reconfigure(struct proto
*P
, struct proto_config
*CF
)
1692 struct bgp_proto
*p
= (void *) P
;
1693 struct bgp_config
*new = (void *) CF
;
1694 struct bgp_config
*old
= p
->cf
;
1696 if (proto_get_router_id(CF
) != p
->local_id
)
1699 int same
= !memcmp(((byte
*) old
) + sizeof(struct proto_config
),
1700 ((byte
*) new) + sizeof(struct proto_config
),
1701 // password item is last and must be checked separately
1702 OFFSETOF(struct bgp_config
, password
) - sizeof(struct proto_config
))
1703 && ((!old
->password
&& !new->password
)
1704 || (old
->password
&& new->password
&& !strcmp(old
->password
, new->password
)));
1706 /* FIXME: Move channel reconfiguration to generic protocol code ? */
1707 struct channel
*C
, *C2
;
1708 struct bgp_channel_config
*cc
;
1710 WALK_LIST(C
, p
->p
.channels
)
1713 WALK_LIST(cc
, new->c
.channels
)
1715 C
= (struct channel
*) bgp_find_channel(p
, cc
->afi
);
1716 same
= proto_configure_channel(P
, &C
, &cc
->c
) && same
;
1720 WALK_LIST_DELSAFE(C
, C2
, p
->p
.channels
)
1722 same
= proto_configure_channel(P
, &C
, NULL
) && same
;
1725 if (same
&& (p
->start_state
> BSS_PREPARE
))
1726 bgp_update_bfd(p
, new->bfd
);
1728 /* We should update our copy of configuration ptr as old configuration will be freed */
1735 #define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
1738 bgp_channel_reconfigure(struct channel
*C
, struct channel_config
*CC
)
1740 struct bgp_channel
*c
= (void *) C
;
1741 struct bgp_channel_config
*new = (void *) CC
;
1742 struct bgp_channel_config
*old
= c
->cf
;
1744 if (memcmp(((byte
*) old
) + sizeof(struct channel_config
),
1745 ((byte
*) new) + sizeof(struct channel_config
),
1746 /* Remaining items must be checked separately */
1747 OFFSETOF(struct bgp_channel_config
, rest
) - sizeof(struct channel_config
)))
1750 /* Check change in IGP tables */
1751 if ((IGP_TABLE(old
, ip4
) != IGP_TABLE(new, ip4
)) ||
1752 (IGP_TABLE(old
, ip6
) != IGP_TABLE(new, ip6
)))
1760 bgp_copy_config(struct proto_config
*dest UNUSED
, struct proto_config
*src UNUSED
)
1762 /* Just a shallow copy */
1767 * bgp_error - report a protocol error
1769 * @code: error code (according to the RFC)
1770 * @subcode: error sub-code
1771 * @data: data to be passed in the Notification message
1772 * @len: length of the data
1774 * bgp_error() sends a notification packet to tell the other side that a protocol
1775 * error has occurred (including the data considered erroneous if possible) and
1776 * closes the connection.
1779 bgp_error(struct bgp_conn
*c
, uint code
, uint subcode
, byte
*data
, int len
)
1781 struct bgp_proto
*p
= c
->bgp
;
1783 if (c
->state
== BS_CLOSE
)
1786 bgp_log_error(p
, BE_BGP_TX
, "Error", code
, subcode
, data
, ABS(len
));
1787 bgp_store_error(p
, c
, BE_BGP_TX
, (code
<< 16) | subcode
);
1788 bgp_conn_enter_close_state(c
);
1790 c
->notify_code
= code
;
1791 c
->notify_subcode
= subcode
;
1792 c
->notify_data
= data
;
1793 c
->notify_size
= (len
> 0) ? len
: 0;
1794 bgp_schedule_packet(c
, NULL
, PKT_NOTIFICATION
);
1798 bgp_update_startup_delay(p
);
1804 * bgp_store_error - store last error for status report
1807 * @class: error class (BE_xxx constants)
1808 * @code: error code (class specific)
1810 * bgp_store_error() decides whether given error is interesting enough
1811 * and store that error to last_error variables of @p
1814 bgp_store_error(struct bgp_proto
*p
, struct bgp_conn
*c
, u8
class, u32 code
)
1816 /* During PS_UP, we ignore errors on secondary connection */
1817 if ((p
->p
.proto_state
== PS_UP
) && c
&& (c
!= p
->conn
))
1820 /* During PS_STOP, we ignore any errors, as we want to report
1821 * the error that caused transition to PS_STOP
1823 if (p
->p
.proto_state
== PS_STOP
)
1826 p
->last_error_class
= class;
1827 p
->last_error_code
= code
;
1830 static char *bgp_state_names
[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1831 static char *bgp_err_classes
[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1832 static char *bgp_misc_errors
[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1833 static char *bgp_auto_errors
[] = { "", "Route limit exceeded"};
1836 bgp_last_errmsg(struct bgp_proto
*p
)
1838 switch (p
->last_error_class
)
1841 return bgp_misc_errors
[p
->last_error_code
];
1843 return (p
->last_error_code
== 0) ? "Connection closed" : strerror(p
->last_error_code
);
1846 return bgp_error_dsc(p
->last_error_code
>> 16, p
->last_error_code
& 0xFF);
1848 return bgp_auto_errors
[p
->last_error_code
];
1855 bgp_state_dsc(struct bgp_proto
*p
)
1857 if (p
->p
.proto_state
== PS_DOWN
)
1860 int state
= MAX(p
->incoming_conn
.state
, p
->outgoing_conn
.state
);
1861 if ((state
== BS_IDLE
) && (p
->start_state
>= BSS_CONNECT
) && p
->cf
->passive
)
1864 return bgp_state_names
[state
];
1868 bgp_get_status(struct proto
*P
, byte
*buf
)
1870 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1872 const char *err1
= bgp_err_classes
[p
->last_error_class
];
1873 const char *err2
= bgp_last_errmsg(p
);
1875 if (P
->proto_state
== PS_DOWN
)
1876 bsprintf(buf
, "%s%s", err1
, err2
);
1878 bsprintf(buf
, "%-14s%s%s", bgp_state_dsc(p
), err1
, err2
);
1882 bgp_show_afis(int code
, char *s
, u32
*afis
, uint count
)
1889 for (u32
*af
= afis
; af
< (afis
+ count
); af
++)
1891 const struct bgp_af_desc
*desc
= bgp_get_af_desc(*af
);
1893 buffer_print(&b
, " %s", desc
->name
);
1895 buffer_print(&b
, " <%u/%u>", BGP_AFI(*af
), BGP_SAFI(*af
));
1899 strcpy(b
.end
- 32, " ... <too long>");
1901 cli_msg(code
, b
.start
);
1905 bgp_show_capabilities(struct bgp_proto
*p UNUSED
, struct bgp_caps
*caps
)
1907 struct bgp_af_caps
*ac
;
1908 uint any_mp_bgp
= 0;
1909 uint any_gr_able
= 0;
1910 uint any_add_path
= 0;
1911 uint any_ext_next_hop
= 0;
1912 u32
*afl1
= alloca(caps
->af_count
* sizeof(u32
));
1913 u32
*afl2
= alloca(caps
->af_count
* sizeof(u32
));
1916 WALK_AF_CAPS(caps
, ac
)
1918 any_mp_bgp
|= ac
->ready
;
1919 any_gr_able
|= ac
->gr_able
;
1920 any_add_path
|= ac
->add_path
;
1921 any_ext_next_hop
|= ac
->ext_next_hop
;
1926 cli_msg(-1006, " Multiprotocol");
1929 WALK_AF_CAPS(caps
, ac
)
1931 afl1
[afn1
++] = ac
->afi
;
1933 bgp_show_afis(-1006, " AF announced:", afl1
, afn1
);
1936 if (caps
->route_refresh
)
1937 cli_msg(-1006, " Route refresh");
1939 if (any_ext_next_hop
)
1941 cli_msg(-1006, " Extended next hop");
1944 WALK_AF_CAPS(caps
, ac
)
1945 if (ac
->ext_next_hop
)
1946 afl1
[afn1
++] = ac
->afi
;
1948 bgp_show_afis(-1006, " IPv6 nexthop:", afl1
, afn1
);
1951 if (caps
->ext_messages
)
1952 cli_msg(-1006, " Extended message");
1955 cli_msg(-1006, " Graceful restart");
1959 /* Continues from gr_aware */
1960 cli_msg(-1006, " Restart time: %u", caps
->gr_time
);
1961 if (caps
->gr_flags
& BGP_GRF_RESTART
)
1962 cli_msg(-1006, " Restart recovery");
1965 WALK_AF_CAPS(caps
, ac
)
1968 afl1
[afn1
++] = ac
->afi
;
1970 if (ac
->gr_af_flags
& BGP_GRF_FORWARDING
)
1971 afl2
[afn2
++] = ac
->afi
;
1974 bgp_show_afis(-1006, " AF supported:", afl1
, afn1
);
1975 bgp_show_afis(-1006, " AF preserved:", afl2
, afn2
);
1978 if (caps
->as4_support
)
1979 cli_msg(-1006, " 4-octet AS numbers");
1983 cli_msg(-1006, " ADD-PATH");
1986 WALK_AF_CAPS(caps
, ac
)
1988 if (ac
->add_path
& BGP_ADD_PATH_RX
)
1989 afl1
[afn1
++] = ac
->afi
;
1991 if (ac
->add_path
& BGP_ADD_PATH_TX
)
1992 afl2
[afn2
++] = ac
->afi
;
1995 bgp_show_afis(-1006, " RX:", afl1
, afn1
);
1996 bgp_show_afis(-1006, " TX:", afl2
, afn2
);
1999 if (caps
->enhanced_refresh
)
2000 cli_msg(-1006, " Enhanced refresh");
2004 bgp_show_proto_info(struct proto
*P
)
2006 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
2008 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p
));
2009 cli_msg(-1006, " Neighbor address: %I%J", p
->cf
->remote_ip
, p
->cf
->iface
);
2010 cli_msg(-1006, " Neighbor AS: %u", p
->remote_as
);
2012 if (p
->gr_active_num
)
2013 cli_msg(-1006, " Neighbor graceful restart active");
2015 if (P
->proto_state
== PS_START
)
2017 struct bgp_conn
*oc
= &p
->outgoing_conn
;
2019 if ((p
->start_state
< BSS_CONNECT
) &&
2020 (p
->startup_timer
->expires
))
2021 cli_msg(-1006, " Error wait: %d/%d",
2022 p
->startup_timer
->expires
- now
, p
->startup_delay
);
2024 if ((oc
->state
== BS_ACTIVE
) &&
2025 (oc
->connect_timer
->expires
))
2026 cli_msg(-1006, " Connect delay: %d/%d",
2027 oc
->connect_timer
->expires
- now
, p
->cf
->connect_delay_time
);
2029 if (p
->gr_active_num
&& p
->gr_timer
->expires
)
2030 cli_msg(-1006, " Restart timer: %d/-", p
->gr_timer
->expires
- now
);
2032 else if (P
->proto_state
== PS_UP
)
2034 cli_msg(-1006, " Neighbor ID: %R", p
->remote_id
);
2035 cli_msg(-1006, " Local capabilities");
2036 bgp_show_capabilities(p
, p
->conn
->local_caps
);
2037 cli_msg(-1006, " Neighbor capabilities");
2038 bgp_show_capabilities(p
, p
->conn
->remote_caps
);
2040 cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s",
2041 p->is_internal ? "internal" : "external",
2042 p->cf->multihop ? " multihop" : "",
2043 p->rr_client ? " route-reflector" : "",
2044 p->rs_client ? " route-server" : "",
2045 p->as4_session ? " AS4" : "",
2046 p->add_path_rx ? " add-path-rx" : "",
2047 p->add_path_tx ? " add-path-tx" : "",
2048 p->ext_messages ? " ext-messages" : "");
2050 cli_msg(-1006, " Source address: %I", p
->source_addr
);
2051 cli_msg(-1006, " Hold timer: %d/%d",
2052 tm_remains(p
->conn
->hold_timer
), p
->conn
->hold_time
);
2053 cli_msg(-1006, " Keepalive timer: %d/%d",
2054 tm_remains(p
->conn
->keepalive_timer
), p
->conn
->keepalive_time
);
2057 if ((p
->last_error_class
!= BE_NONE
) &&
2058 (p
->last_error_class
!= BE_MAN_DOWN
))
2060 const char *err1
= bgp_err_classes
[p
->last_error_class
];
2061 const char *err2
= bgp_last_errmsg(p
);
2062 cli_msg(-1006, " Last error: %s%s", err1
, err2
);
2067 struct bgp_channel
*c
;
2068 WALK_LIST(c
, p
->p
.channels
)
2070 channel_show_info(&c
->c
);
2072 if (c
->igp_table_ip4
)
2073 cli_msg(-1006, " IGP IPv4 table: %s", c
->igp_table_ip4
->name
);
2075 if (c
->igp_table_ip6
)
2076 cli_msg(-1006, " IGP IPv6 table: %s", c
->igp_table_ip6
->name
);
2081 struct channel_class channel_bgp
= {
2082 .channel_size
= sizeof(struct bgp_channel
),
2083 .config_size
= sizeof(struct bgp_channel_config
),
2084 .init
= bgp_channel_init
,
2085 .start
= bgp_channel_start
,
2086 .shutdown
= bgp_channel_shutdown
,
2087 .cleanup
= bgp_channel_cleanup
,
2088 .reconfigure
= bgp_channel_reconfigure
,
2091 struct protocol proto_bgp
= {
2093 .template = "bgp%d",
2094 .attr_class
= EAP_BGP
,
2095 .preference
= DEF_PREF_BGP
,
2096 .channel_mask
= NB_IP
| NB_VPN
| NB_FLOW
,
2097 .proto_size
= sizeof(struct bgp_proto
),
2098 .config_size
= sizeof(struct bgp_config
),
2099 .postconfig
= bgp_postconfig
,
2102 .shutdown
= bgp_shutdown
,
2103 .reconfigure
= bgp_reconfigure
,
2104 .copy_config
= bgp_copy_config
,
2105 .get_status
= bgp_get_status
,
2106 .get_attr
= bgp_get_attr
,
2107 .get_route_info
= bgp_get_route_info
,
2108 .show_proto_info
= bgp_show_proto_info