]>
git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/bgp.c
2 * BIRD -- The Border Gateway Protocol
4 * (c) 2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
10 * DOC: Border Gateway Protocol
12 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13 * connection and most of the interface with BIRD core, |packets.c| handling
14 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15 * manipulation with BGP attribute lists.
17 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18 * architecture which is able to keep all the information needed by BGP in the
19 * primary routing table, therefore no complex data structures like a central
20 * BGP table are needed. This increases memory footprint of a BGP router with
21 * many connections, but not too much and, which is more important, it makes
22 * BGP much easier to implement.
24 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25 * structure to which are attached individual connections represented by &bgp_connection
26 * (usually, there exists only one connection, but during BGP session setup, there
27 * can be more of them). The connections are handled according to the BGP state machine
28 * defined in the RFC with all the timers and all the parameters configurable.
30 * In incoming direction, we listen on the connection's socket and each time we receive
31 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32 * passes complete packets to bgp_rx_packet() which distributes the packet according
35 * In outgoing direction, we gather all the routing updates and sort them to buckets
36 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37 * of &rta's and a &fib which helps us to find if we already have another route for
38 * the same destination queued for sending, so that we can replace it with the new one
39 * immediately instead of sending both updates). There also exists a special bucket holding
40 * all the route withdrawals which cannot be queued anywhere else as they don't have any
41 * attributes. If we have any packet to send (due to either new routes or the connection
42 * tracking code wanting to send a Open, Keepalive or Notification message), we call
43 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47 * type if we have more data of the same type to send.
49 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50 * of the attribute blocks and translating them to the language of BIRD's extended attributes
51 * and bgp_encode_attrs() which does the converse. Both functions are built around a
52 * @bgp_attr_table array describing all important characteristics of all known attributes.
53 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
58 #include "nest/bird.h"
59 #include "nest/iface.h"
60 #include "nest/protocol.h"
61 #include "nest/route.h"
63 #include "nest/locks.h"
64 #include "conf/conf.h"
65 #include "lib/socket.h"
66 #include "lib/resource.h"
67 #include "lib/string.h"
71 struct linpool
*bgp_linpool
; /* Global temporary pool */
72 static sock
*bgp_listen_sk
; /* Global listening socket */
73 static int bgp_counter
; /* Number of protocol instances using the listening socket */
75 static void bgp_close(struct bgp_proto
*p
, int apply_md5
);
76 static void bgp_connect(struct bgp_proto
*p
);
77 static void bgp_active(struct bgp_proto
*p
);
78 static sock
*bgp_setup_listen_sk(ip_addr addr
, unsigned port
, u32 flags
);
79 static void bgp_update_bfd(struct bgp_proto
*p
, int use_bfd
);
83 * bgp_open - open a BGP instance
86 * This function allocates and configures shared BGP resources.
87 * Should be called as the last step during initialization
88 * (when lock is acquired and neighbor is ready).
89 * When error, state changed to PS_DOWN, -1 is returned and caller
90 * should return immediately.
93 bgp_open(struct bgp_proto
*p
)
95 struct config
*cfg
= p
->cf
->c
.global
;
101 bgp_listen_sk
= bgp_setup_listen_sk(cfg
->listen_bgp_addr
, cfg
->listen_bgp_port
, cfg
->listen_bgp_flags
);
106 errcode
= BEM_NO_SOCKET
;
111 bgp_linpool
= lp_new(&root_pool
, 4080);
115 int rv
= sk_set_md5_auth(bgp_listen_sk
, p
->cf
->remote_ip
, p
->cf
->iface
, p
->cf
->password
);
119 errcode
= BEM_INVALID_MD5
;
128 bgp_store_error(p
, NULL
, BE_MISC
, errcode
);
129 proto_notify_state(&p
->p
, PS_DOWN
);
134 bgp_startup(struct bgp_proto
*p
)
136 BGP_TRACE(D_EVENTS
, "Started");
137 p
->start_state
= p
->cf
->capabilities
? BSS_CONNECT
: BSS_CONNECT_NOCAP
;
144 bgp_startup_timeout(timer
*t
)
146 bgp_startup(t
->data
);
151 bgp_initiate(struct bgp_proto
*p
)
153 int rv
= bgp_open(p
);
158 bgp_update_bfd(p
, p
->cf
->bfd
);
160 if (p
->startup_delay
)
162 p
->start_state
= BSS_DELAY
;
163 BGP_TRACE(D_EVENTS
, "Startup delayed by %d seconds", p
->startup_delay
);
164 bgp_start_timer(p
->startup_timer
, p
->startup_delay
);
171 * bgp_close - close a BGP instance
173 * @apply_md5: 0 to disable unsetting MD5 auth
175 * This function frees and deconfigures shared BGP resources.
176 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
177 * from failed bgp_open().
180 bgp_close(struct bgp_proto
*p
, int apply_md5
)
185 if (p
->cf
->password
&& apply_md5
)
186 sk_set_md5_auth(bgp_listen_sk
, p
->cf
->remote_ip
, p
->cf
->iface
, NULL
);
190 rfree(bgp_listen_sk
);
191 bgp_listen_sk
= NULL
;
198 * bgp_start_timer - start a BGP timer
200 * @value: time to fire (0 to disable the timer)
202 * This functions calls tm_start() on @t with time @value and the
203 * amount of randomization suggested by the BGP standard. Please use
204 * it for all BGP timers.
207 bgp_start_timer(timer
*t
, int value
)
211 /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
212 t
->randomize
= value
/ 4;
213 tm_start(t
, value
- t
->randomize
);
220 * bgp_close_conn - close a BGP connection
221 * @conn: connection to close
223 * This function takes a connection described by the &bgp_conn structure,
224 * closes its socket and frees all resources associated with it.
227 bgp_close_conn(struct bgp_conn
*conn
)
229 // struct bgp_proto *p = conn->bgp;
231 DBG("BGP: Closing connection\n");
232 conn
->packets_to_send
= 0;
233 rfree(conn
->connect_retry_timer
);
234 conn
->connect_retry_timer
= NULL
;
235 rfree(conn
->keepalive_timer
);
236 conn
->keepalive_timer
= NULL
;
237 rfree(conn
->hold_timer
);
238 conn
->hold_timer
= NULL
;
247 * bgp_update_startup_delay - update a startup delay
250 * This function updates a startup delay that is used to postpone next BGP connect.
251 * It also handles disable_after_error and might stop BGP instance when error
252 * happened and disable_after_error is on.
254 * It should be called when BGP protocol error happened.
257 bgp_update_startup_delay(struct bgp_proto
*p
)
259 struct bgp_config
*cf
= p
->cf
;
261 DBG("BGP: Updating startup delay\n");
263 if (p
->last_proto_error
&& ((now
- p
->last_proto_error
) >= (int) cf
->error_amnesia_time
))
264 p
->startup_delay
= 0;
266 p
->last_proto_error
= now
;
268 if (cf
->disable_after_error
)
270 p
->startup_delay
= 0;
275 if (!p
->startup_delay
)
276 p
->startup_delay
= cf
->error_delay_time_min
;
278 p
->startup_delay
= MIN(2 * p
->startup_delay
, cf
->error_delay_time_max
);
282 bgp_graceful_close_conn(struct bgp_conn
*conn
, unsigned subcode
)
291 bgp_conn_enter_idle_state(conn
);
296 bgp_error(conn
, 6, subcode
, NULL
, 0);
299 bug("bgp_graceful_close_conn: Unknown state %d", conn
->state
);
304 bgp_down(struct bgp_proto
*p
)
306 if (p
->start_state
> BSS_PREPARE
)
309 BGP_TRACE(D_EVENTS
, "Down");
310 proto_notify_state(&p
->p
, PS_DOWN
);
314 bgp_decision(void *vp
)
316 struct bgp_proto
*p
= vp
;
318 DBG("BGP: Decision start\n");
319 if ((p
->p
.proto_state
== PS_START
)
320 && (p
->outgoing_conn
.state
== BS_IDLE
)
321 && (!p
->cf
->passive
))
324 if ((p
->p
.proto_state
== PS_STOP
)
325 && (p
->outgoing_conn
.state
== BS_IDLE
)
326 && (p
->incoming_conn
.state
== BS_IDLE
))
331 bgp_stop(struct bgp_proto
*p
, unsigned subcode
)
333 proto_notify_state(&p
->p
, PS_STOP
);
334 bgp_graceful_close_conn(&p
->outgoing_conn
, subcode
);
335 bgp_graceful_close_conn(&p
->incoming_conn
, subcode
);
336 ev_schedule(p
->event
);
340 bgp_conn_set_state(struct bgp_conn
*conn
, unsigned new_state
)
342 if (conn
->bgp
->p
.mrtdump
& MD_STATES
)
343 mrt_dump_bgp_state_change(conn
, conn
->state
, new_state
);
345 conn
->state
= new_state
;
349 bgp_conn_enter_openconfirm_state(struct bgp_conn
*conn
)
351 /* Really, most of the work is done in bgp_rx_open(). */
352 bgp_conn_set_state(conn
, BS_OPENCONFIRM
);
356 bgp_conn_enter_established_state(struct bgp_conn
*conn
)
358 struct bgp_proto
*p
= conn
->bgp
;
360 BGP_TRACE(D_EVENTS
, "BGP session established");
363 /* For multi-hop BGP sessions */
364 if (ipa_zero(p
->source_addr
))
365 p
->source_addr
= conn
->sk
->saddr
;
368 p
->last_error_class
= 0;
369 p
->last_error_code
= 0;
370 bgp_init_bucket_table(p
);
371 bgp_init_prefix_table(p
, 8);
373 bgp_conn_set_state(conn
, BS_ESTABLISHED
);
374 proto_notify_state(&p
->p
, PS_UP
);
378 bgp_conn_leave_established_state(struct bgp_proto
*p
)
380 BGP_TRACE(D_EVENTS
, "BGP session closed");
383 if (p
->p
.proto_state
== PS_UP
)
388 bgp_conn_enter_close_state(struct bgp_conn
*conn
)
390 struct bgp_proto
*p
= conn
->bgp
;
391 int os
= conn
->state
;
393 bgp_conn_set_state(conn
, BS_CLOSE
);
394 tm_stop(conn
->keepalive_timer
);
395 conn
->sk
->rx_hook
= NULL
;
397 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
398 bgp_start_timer(conn
->hold_timer
, 10);
400 if (os
== BS_ESTABLISHED
)
401 bgp_conn_leave_established_state(p
);
405 bgp_conn_enter_idle_state(struct bgp_conn
*conn
)
407 struct bgp_proto
*p
= conn
->bgp
;
408 int os
= conn
->state
;
410 bgp_close_conn(conn
);
411 bgp_conn_set_state(conn
, BS_IDLE
);
412 ev_schedule(p
->event
);
414 if (os
== BS_ESTABLISHED
)
415 bgp_conn_leave_established_state(p
);
419 bgp_send_open(struct bgp_conn
*conn
)
421 conn
->start_state
= conn
->bgp
->start_state
;
423 // Default values, possibly changed by receiving capabilities.
424 conn
->peer_refresh_support
= 0;
425 conn
->peer_as4_support
= 0;
426 conn
->peer_add_path
= 0;
427 conn
->advertised_as
= 0;
429 DBG("BGP: Sending open\n");
430 conn
->sk
->rx_hook
= bgp_rx
;
431 conn
->sk
->tx_hook
= bgp_tx
;
432 tm_stop(conn
->connect_retry_timer
);
433 bgp_schedule_packet(conn
, PKT_OPEN
);
434 bgp_conn_set_state(conn
, BS_OPENSENT
);
435 bgp_start_timer(conn
->hold_timer
, conn
->bgp
->cf
->initial_hold_time
);
439 bgp_connected(sock
*sk
)
441 struct bgp_conn
*conn
= sk
->data
;
442 struct bgp_proto
*p
= conn
->bgp
;
444 BGP_TRACE(D_EVENTS
, "Connected");
449 bgp_connect_timeout(timer
*t
)
451 struct bgp_conn
*conn
= t
->data
;
452 struct bgp_proto
*p
= conn
->bgp
;
454 DBG("BGP: connect_timeout\n");
455 if (p
->p
.proto_state
== PS_START
)
457 bgp_close_conn(conn
);
461 bgp_conn_enter_idle_state(conn
);
465 bgp_sock_err(sock
*sk
, int err
)
467 struct bgp_conn
*conn
= sk
->data
;
468 struct bgp_proto
*p
= conn
->bgp
;
471 * This error hook may be called either asynchronously from main
472 * loop, or synchronously from sk_send(). But sk_send() is called
473 * only from bgp_tx() and bgp_kick_tx(), which are both called
474 * asynchronously from main loop. Moreover, they end if err hook is
475 * called. Therefore, we could suppose that it is always called
479 bgp_store_error(p
, conn
, BE_SOCKET
, err
);
482 BGP_TRACE(D_EVENTS
, "Connection lost (%M)", err
);
484 BGP_TRACE(D_EVENTS
, "Connection closed");
486 bgp_conn_enter_idle_state(conn
);
490 bgp_hold_timeout(timer
*t
)
492 struct bgp_conn
*conn
= t
->data
;
493 struct bgp_proto
*p
= conn
->bgp
;
495 DBG("BGP: Hold timeout\n");
497 /* We are already closing the connection - just do hangup */
498 if (conn
->state
== BS_CLOSE
)
500 BGP_TRACE(D_EVENTS
, "Connection stalled");
501 bgp_conn_enter_idle_state(conn
);
505 /* If there is something in input queue, we are probably congested
506 and perhaps just not processed BGP packets in time. */
508 if (sk_rx_ready(conn
->sk
) > 0)
509 bgp_start_timer(conn
->hold_timer
, 10);
511 bgp_error(conn
, 4, 0, NULL
, 0);
515 bgp_keepalive_timeout(timer
*t
)
517 struct bgp_conn
*conn
= t
->data
;
519 DBG("BGP: Keepalive timer\n");
520 bgp_schedule_packet(conn
, PKT_KEEPALIVE
);
524 bgp_setup_conn(struct bgp_proto
*p
, struct bgp_conn
*conn
)
530 conn
->packets_to_send
= 0;
532 t
= conn
->connect_retry_timer
= tm_new(p
->p
.pool
);
533 t
->hook
= bgp_connect_timeout
;
535 t
= conn
->hold_timer
= tm_new(p
->p
.pool
);
536 t
->hook
= bgp_hold_timeout
;
538 t
= conn
->keepalive_timer
= tm_new(p
->p
.pool
);
539 t
->hook
= bgp_keepalive_timeout
;
541 conn
->tx_ev
= ev_new(p
->p
.pool
);
542 conn
->tx_ev
->hook
= bgp_kick_tx
;
543 conn
->tx_ev
->data
= conn
;
547 bgp_setup_sk(struct bgp_conn
*conn
, sock
*s
)
550 s
->err_hook
= bgp_sock_err
;
555 bgp_active(struct bgp_proto
*p
)
557 int delay
= MAX(1, p
->cf
->start_delay_time
);
558 struct bgp_conn
*conn
= &p
->outgoing_conn
;
560 BGP_TRACE(D_EVENTS
, "Connect delayed by %d seconds", delay
);
561 bgp_setup_conn(p
, conn
);
562 bgp_conn_set_state(conn
, BS_ACTIVE
);
563 bgp_start_timer(conn
->connect_retry_timer
, delay
);
567 * bgp_connect - initiate an outgoing connection
570 * The bgp_connect() function creates a new &bgp_conn and initiates
571 * a TCP connection to the peer. The rest of connection setup is governed
572 * by the BGP state machine as described in the standard.
575 bgp_connect(struct bgp_proto
*p
) /* Enter Connect state and start establishing connection */
578 struct bgp_conn
*conn
= &p
->outgoing_conn
;
579 int hops
= p
->cf
->multihop
? : 1;
581 DBG("BGP: Connecting\n");
582 s
= sk_new(p
->p
.pool
);
583 s
->type
= SK_TCP_ACTIVE
;
584 s
->saddr
= p
->source_addr
;
585 s
->daddr
= p
->cf
->remote_ip
;
586 s
->iface
= p
->neigh
? p
->neigh
->iface
: NULL
;
588 s
->ttl
= p
->cf
->ttl_security
? 255 : hops
;
589 s
->rbsize
= BGP_RX_BUFFER_SIZE
;
590 s
->tbsize
= BGP_TX_BUFFER_SIZE
;
591 s
->tos
= IP_PREC_INTERNET_CONTROL
;
592 s
->password
= p
->cf
->password
;
593 s
->tx_hook
= bgp_connected
;
594 BGP_TRACE(D_EVENTS
, "Connecting to %I%J from local address %I%J", s
->daddr
, p
->cf
->iface
,
595 s
->saddr
, ipa_has_link_scope(s
->saddr
) ? s
->iface
: NULL
);
596 bgp_setup_conn(p
, conn
);
597 bgp_setup_sk(conn
, s
);
598 bgp_conn_set_state(conn
, BS_CONNECT
);
606 /* Set minimal receive TTL if needed */
607 if (p
->cf
->ttl_security
)
609 DBG("Setting minimum received TTL to %d", 256 - hops
);
610 if (sk_set_min_ttl(s
, 256 - hops
) < 0)
612 log(L_ERR
"TTL security configuration failed, closing session");
618 DBG("BGP: Waiting for connect success\n");
619 bgp_start_timer(conn
->connect_retry_timer
, p
->cf
->connect_retry_time
);
623 * bgp_incoming_connection - handle an incoming connection
627 * This function serves as a socket hook for accepting of new BGP
628 * connections. It searches a BGP instance corresponding to the peer
629 * which has connected and if such an instance exists, it creates a
630 * &bgp_conn structure, attaches it to the instance and either sends
631 * an Open message or (if there already is an active connection) it
632 * closes the new connection by sending a Notification message.
635 bgp_incoming_connection(sock
*sk
, int dummy UNUSED
)
637 struct proto_config
*pc
;
639 DBG("BGP: Incoming connection from %I port %d\n", sk
->daddr
, sk
->dport
);
640 WALK_LIST(pc
, config
->protos
)
641 if (pc
->protocol
== &proto_bgp
&& pc
->proto
)
643 struct bgp_proto
*p
= (struct bgp_proto
*) pc
->proto
;
644 if (ipa_equal(p
->cf
->remote_ip
, sk
->daddr
) &&
645 (!ipa_has_link_scope(sk
->daddr
) || (p
->cf
->iface
== sk
->iface
)))
647 /* We are in proper state and there is no other incoming connection */
648 int acc
= (p
->p
.proto_state
== PS_START
|| p
->p
.proto_state
== PS_UP
) &&
649 (p
->start_state
>= BSS_CONNECT
) && (!p
->incoming_conn
.sk
);
651 BGP_TRACE(D_EVENTS
, "Incoming connection from %I%J (port %d) %s",
652 sk
->daddr
, ipa_has_link_scope(sk
->daddr
) ? sk
->iface
: NULL
,
653 sk
->dport
, acc
? "accepted" : "rejected");
658 int hops
= p
->cf
->multihop
? : 1;
659 if (p
->cf
->ttl_security
)
661 /* TTL security support */
662 if ((sk_set_ttl(sk
, 255) < 0) ||
663 (sk_set_min_ttl(sk
, 256 - hops
) < 0))
665 log(L_ERR
"TTL security configuration failed, closing session");
670 sk_set_ttl(sk
, hops
);
672 bgp_setup_conn(p
, &p
->incoming_conn
);
673 bgp_setup_sk(&p
->incoming_conn
, sk
);
674 bgp_send_open(&p
->incoming_conn
);
679 log(L_WARN
"BGP: Unexpected connect from unknown address %I%J (port %d)",
680 sk
->daddr
, ipa_has_link_scope(sk
->daddr
) ? sk
->iface
: NULL
, sk
->dport
);
687 bgp_listen_sock_err(sock
*sk UNUSED
, int err
)
689 if (err
== ECONNABORTED
)
690 log(L_WARN
"BGP: Incoming connection aborted");
692 log(L_ERR
"BGP: Error on listening socket: %M", err
);
696 bgp_setup_listen_sk(ip_addr addr
, unsigned port
, u32 flags
)
698 sock
*s
= sk_new(&root_pool
);
699 DBG("BGP: Creating listening socket\n");
700 s
->type
= SK_TCP_PASSIVE
;
703 s
->sport
= port
? port
: BGP_PORT
;
704 s
->flags
= flags
? 0 : SKF_V6ONLY
;
705 s
->tos
= IP_PREC_INTERNET_CONTROL
;
706 s
->rbsize
= BGP_RX_BUFFER_SIZE
;
707 s
->tbsize
= BGP_TX_BUFFER_SIZE
;
708 s
->rx_hook
= bgp_incoming_connection
;
709 s
->err_hook
= bgp_listen_sock_err
;
713 log(L_ERR
"BGP: Unable to open listening socket");
722 bgp_start_neighbor(struct bgp_proto
*p
)
724 /* Called only for single-hop BGP sessions */
726 if (ipa_zero(p
->source_addr
))
727 p
->source_addr
= p
->neigh
->ifa
->ip
;
732 p
->local_link
= IPA_NONE
;
733 WALK_LIST(a
, p
->neigh
->iface
->addrs
)
734 if (a
->scope
== SCOPE_LINK
)
736 p
->local_link
= a
->ip
;
740 if (! ipa_nonzero(p
->local_link
))
741 log(L_WARN
"%s: Missing link local address on interface %s", p
->p
.name
, p
->neigh
->iface
->name
);
743 DBG("BGP: Selected link-level address %I\n", p
->local_link
);
751 bgp_neigh_notify(neighbor
*n
)
753 struct bgp_proto
*p
= (struct bgp_proto
*) n
->proto
;
755 if (! (n
->flags
& NEF_STICKY
))
760 if ((p
->p
.proto_state
== PS_START
) && (p
->start_state
== BSS_PREPARE
))
762 BGP_TRACE(D_EVENTS
, "Neighbor found");
763 bgp_start_neighbor(p
);
768 if ((p
->p
.proto_state
== PS_START
) || (p
->p
.proto_state
== PS_UP
))
770 BGP_TRACE(D_EVENTS
, "Neighbor lost");
771 bgp_store_error(p
, NULL
, BE_MISC
, BEM_NEIGHBOR_LOST
);
778 bgp_bfd_notify(struct bfd_request
*req
)
780 struct bgp_proto
*p
= req
->data
;
781 int ps
= p
->p
.proto_state
;
783 if (req
->down
&& ((ps
== PS_START
) || (ps
== PS_UP
)))
785 BGP_TRACE(D_EVENTS
, "BFD session down");
786 bgp_store_error(p
, NULL
, BE_MISC
, BEM_BFD_DOWN
);
788 bgp_update_startup_delay(p
);
794 bgp_update_bfd(struct bgp_proto
*p
, int use_bfd
)
796 if (use_bfd
&& !p
->bfd_req
)
797 p
->bfd_req
= bfd_request_session(p
->p
.pool
, p
->cf
->remote_ip
, p
->source_addr
,
798 p
->cf
->multihop
? NULL
: p
->neigh
->iface
,
801 if (!use_bfd
&& p
->bfd_req
)
809 bgp_reload_routes(struct proto
*P
)
811 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
812 if (!p
->conn
|| !p
->conn
->peer_refresh_support
)
815 bgp_schedule_packet(p
->conn
, PKT_ROUTE_REFRESH
);
820 bgp_start_locked(struct object_lock
*lock
)
822 struct bgp_proto
*p
= lock
->data
;
823 struct bgp_config
*cf
= p
->cf
;
825 if (p
->p
.proto_state
!= PS_START
)
827 DBG("BGP: Got lock in different state %d\n", p
->p
.proto_state
);
831 DBG("BGP: Got lock\n");
835 /* Multi-hop sessions do not use neighbor entries */
840 p
->neigh
= neigh_find2(&p
->p
, &cf
->remote_ip
, cf
->iface
, NEF_STICKY
);
841 if (!p
->neigh
|| (p
->neigh
->scope
== SCOPE_HOST
))
843 log(L_ERR
"%s: Invalid remote address %I%J", p
->p
.name
, cf
->remote_ip
, cf
->iface
);
844 /* As we do not start yet, we can just disable protocol */
846 bgp_store_error(p
, NULL
, BE_MISC
, BEM_INVALID_NEXT_HOP
);
847 proto_notify_state(&p
->p
, PS_DOWN
);
851 if (p
->neigh
->scope
> 0)
852 bgp_start_neighbor(p
);
854 BGP_TRACE(D_EVENTS
, "Waiting for %I%J to become my neighbor", cf
->remote_ip
, cf
->iface
);
858 bgp_start(struct proto
*P
)
860 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
861 struct object_lock
*lock
;
863 DBG("BGP: Startup.\n");
864 p
->start_state
= BSS_PREPARE
;
865 p
->outgoing_conn
.state
= BS_IDLE
;
866 p
->incoming_conn
.state
= BS_IDLE
;
870 rt_lock_table(p
->igp_table
);
872 p
->event
= ev_new(p
->p
.pool
);
873 p
->event
->hook
= bgp_decision
;
876 p
->startup_timer
= tm_new(p
->p
.pool
);
877 p
->startup_timer
->hook
= bgp_startup_timeout
;
878 p
->startup_timer
->data
= p
;
880 p
->local_id
= proto_get_router_id(P
->cf
);
882 p
->rr_cluster_id
= p
->cf
->rr_cluster_id
? p
->cf
->rr_cluster_id
: p
->local_id
;
885 p
->source_addr
= p
->cf
->source_addr
;
888 * Before attempting to create the connection, we need to lock the
889 * port, so that are sure we're the only instance attempting to talk
890 * with that neighbor.
893 lock
= p
->lock
= olock_new(P
->pool
);
894 lock
->addr
= p
->cf
->remote_ip
;
895 lock
->iface
= p
->cf
->iface
;
896 lock
->type
= OBJLOCK_TCP
;
897 lock
->port
= BGP_PORT
;
898 lock
->hook
= bgp_start_locked
;
905 extern int proto_restart
;
908 bgp_shutdown(struct proto
*P
)
910 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
911 unsigned subcode
= 0;
913 BGP_TRACE(D_EVENTS
, "Shutdown requested");
915 switch (P
->down_code
)
919 subcode
= 3; // Errcode 6, 3 - peer de-configured
923 subcode
= 6; // Errcode 6, 6 - other configuration change
926 case PDC_CMD_DISABLE
:
927 case PDC_CMD_SHUTDOWN
:
928 subcode
= 2; // Errcode 6, 2 - administrative shutdown
931 case PDC_CMD_RESTART
:
932 subcode
= 4; // Errcode 6, 4 - administrative reset
935 case PDC_RX_LIMIT_HIT
:
936 case PDC_IN_LIMIT_HIT
:
937 subcode
= 1; // Errcode 6, 1 - max number of prefixes reached
938 /* log message for compatibility */
939 log(L_WARN
"%s: Route limit exceeded, shutting down", p
->p
.name
);
942 case PDC_OUT_LIMIT_HIT
:
943 subcode
= proto_restart
? 4 : 2; // Administrative reset or shutdown
946 bgp_store_error(p
, NULL
, BE_AUTO_DOWN
, BEA_ROUTE_LIMIT_EXCEEDED
);
948 bgp_update_startup_delay(p
);
950 p
->startup_delay
= 0;
954 bgp_store_error(p
, NULL
, BE_MAN_DOWN
, 0);
955 p
->startup_delay
= 0;
958 bgp_stop(p
, subcode
);
959 return p
->p
.proto_state
;
963 bgp_cleanup(struct proto
*P
)
965 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
966 rt_unlock_table(p
->igp_table
);
970 get_igp_table(struct bgp_config
*cf
)
972 return cf
->igp_table
? cf
->igp_table
->table
: cf
->c
.table
->table
;
975 static struct proto
*
976 bgp_init(struct proto_config
*C
)
978 struct proto
*P
= proto_new(C
, sizeof(struct bgp_proto
));
979 struct bgp_config
*c
= (struct bgp_config
*) C
;
980 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
982 P
->accept_ra_types
= c
->secondary
? RA_ACCEPTED
: RA_OPTIMAL
;
983 P
->rt_notify
= bgp_rt_notify
;
984 P
->import_control
= bgp_import_control
;
985 P
->neigh_notify
= bgp_neigh_notify
;
986 P
->reload_routes
= bgp_reload_routes
;
987 P
->rte_better
= bgp_rte_better
;
988 P
->rte_recalculate
= c
->deterministic_med
? bgp_rte_recalculate
: NULL
;
991 p
->local_as
= c
->local_as
;
992 p
->remote_as
= c
->remote_as
;
993 p
->is_internal
= (c
->local_as
== c
->remote_as
);
994 p
->rs_client
= c
->rs_client
;
995 p
->rr_client
= c
->rr_client
;
996 p
->igp_table
= get_igp_table(c
);
1003 bgp_check_config(struct bgp_config
*c
)
1005 int internal
= (c
->local_as
== c
->remote_as
);
1007 /* Do not check templates at all */
1008 if (c
->c
.class == SYM_TEMPLATE
)
1012 /* EBGP direct by default, IBGP multihop by default */
1013 if (c
->multihop
< 0)
1014 c
->multihop
= internal
? 64 : 0;
1016 /* Different default for gw_mode */
1018 c
->gw_mode
= c
->multihop
? GW_RECURSIVE
: GW_DIRECT
;
1020 /* Different default based on rs_client */
1021 if (!c
->missing_lladdr
)
1022 c
->missing_lladdr
= c
->rs_client
? MLL_IGNORE
: MLL_SELF
;
1024 /* Disable after error incompatible with restart limit action */
1025 if (c
->c
.in_limit
&& (c
->c
.in_limit
->action
== PLA_RESTART
) && c
->disable_after_error
)
1026 c
->c
.in_limit
->action
= PLA_DISABLE
;
1030 cf_error("Local AS number must be set");
1033 cf_error("Neighbor must be configured");
1035 if (!(c
->capabilities
&& c
->enable_as4
) && (c
->remote_as
> 0xFFFF))
1036 cf_error("Neighbor AS number out of range (AS4 not available)");
1038 if (!internal
&& c
->rr_client
)
1039 cf_error("Only internal neighbor can be RR client");
1041 if (internal
&& c
->rs_client
)
1042 cf_error("Only external neighbor can be RS client");
1044 if (c
->multihop
&& (c
->gw_mode
== GW_DIRECT
))
1045 cf_error("Multihop BGP cannot use direct gateway mode");
1047 if (c
->multihop
&& (ipa_has_link_scope(c
->remote_ip
) ||
1048 ipa_has_link_scope(c
->source_addr
)))
1049 cf_error("Multihop BGP cannot be used with link-local addresses");
1051 if (c
->multihop
&& c
->bfd
&& ipa_zero(c
->source_addr
))
1052 cf_error("Multihop BGP with BFD requires specified source address");
1054 if ((c
->gw_mode
== GW_RECURSIVE
) && c
->c
.table
->sorted
)
1055 cf_error("BGP in recursive mode prohibits sorted table");
1057 if (c
->deterministic_med
&& c
->c
.table
->sorted
)
1058 cf_error("BGP with deterministic MED prohibits sorted table");
1060 if (c
->secondary
&& !c
->c
.table
->sorted
)
1061 cf_error("BGP with secondary option requires sorted table");
1065 bgp_reconfigure(struct proto
*P
, struct proto_config
*C
)
1067 struct bgp_config
*new = (struct bgp_config
*) C
;
1068 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1069 struct bgp_config
*old
= p
->cf
;
1071 if (proto_get_router_id(C
) != p
->local_id
)
1074 int same
= !memcmp(((byte
*) old
) + sizeof(struct proto_config
),
1075 ((byte
*) new) + sizeof(struct proto_config
),
1076 // password item is last and must be checked separately
1077 OFFSETOF(struct bgp_config
, password
) - sizeof(struct proto_config
))
1078 && ((!old
->password
&& !new->password
)
1079 || (old
->password
&& new->password
&& !strcmp(old
->password
, new->password
)))
1080 && (get_igp_table(old
) == get_igp_table(new));
1082 if (same
&& (p
->start_state
> BSS_PREPARE
))
1083 bgp_update_bfd(p
, new->bfd
);
1085 /* We should update our copy of configuration ptr as old configuration will be freed */
1093 bgp_copy_config(struct proto_config
*dest
, struct proto_config
*src
)
1095 /* Just a shallow copy */
1096 proto_copy_rest(dest
, src
, sizeof(struct bgp_config
));
1101 * bgp_error - report a protocol error
1103 * @code: error code (according to the RFC)
1104 * @subcode: error sub-code
1105 * @data: data to be passed in the Notification message
1106 * @len: length of the data
1108 * bgp_error() sends a notification packet to tell the other side that a protocol
1109 * error has occurred (including the data considered erroneous if possible) and
1110 * closes the connection.
1113 bgp_error(struct bgp_conn
*c
, unsigned code
, unsigned subcode
, byte
*data
, int len
)
1115 struct bgp_proto
*p
= c
->bgp
;
1117 if (c
->state
== BS_CLOSE
)
1120 bgp_log_error(p
, BE_BGP_TX
, "Error", code
, subcode
, data
, (len
> 0) ? len
: -len
);
1121 bgp_store_error(p
, c
, BE_BGP_TX
, (code
<< 16) | subcode
);
1122 bgp_conn_enter_close_state(c
);
1124 c
->notify_code
= code
;
1125 c
->notify_subcode
= subcode
;
1126 c
->notify_data
= data
;
1127 c
->notify_size
= (len
> 0) ? len
: 0;
1128 bgp_schedule_packet(c
, PKT_NOTIFICATION
);
1132 bgp_update_startup_delay(p
);
1138 * bgp_store_error - store last error for status report
1141 * @class: error class (BE_xxx constants)
1142 * @code: error code (class specific)
1144 * bgp_store_error() decides whether given error is interesting enough
1145 * and store that error to last_error variables of @p
1148 bgp_store_error(struct bgp_proto
*p
, struct bgp_conn
*c
, u8
class, u32 code
)
1150 /* During PS_UP, we ignore errors on secondary connection */
1151 if ((p
->p
.proto_state
== PS_UP
) && c
&& (c
!= p
->conn
))
1154 /* During PS_STOP, we ignore any errors, as we want to report
1155 * the error that caused transition to PS_STOP
1157 if (p
->p
.proto_state
== PS_STOP
)
1160 p
->last_error_class
= class;
1161 p
->last_error_code
= code
;
1164 static char *bgp_state_names
[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1165 static char *bgp_err_classes
[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1166 static char *bgp_misc_errors
[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" };
1167 static char *bgp_auto_errors
[] = { "", "Route limit exceeded"};
1170 bgp_last_errmsg(struct bgp_proto
*p
)
1172 switch (p
->last_error_class
)
1175 return bgp_misc_errors
[p
->last_error_code
];
1177 return (p
->last_error_code
== 0) ? "Connection closed" : strerror(p
->last_error_code
);
1180 return bgp_error_dsc(p
->last_error_code
>> 16, p
->last_error_code
& 0xFF);
1182 return bgp_auto_errors
[p
->last_error_code
];
1189 bgp_state_dsc(struct bgp_proto
*p
)
1191 if (p
->p
.proto_state
== PS_DOWN
)
1194 int state
= MAX(p
->incoming_conn
.state
, p
->outgoing_conn
.state
);
1195 if ((state
== BS_IDLE
) && (p
->start_state
>= BSS_CONNECT
) && p
->cf
->passive
)
1198 return bgp_state_names
[state
];
1202 bgp_get_status(struct proto
*P
, byte
*buf
)
1204 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1206 const char *err1
= bgp_err_classes
[p
->last_error_class
];
1207 const char *err2
= bgp_last_errmsg(p
);
1209 if (P
->proto_state
== PS_DOWN
)
1210 bsprintf(buf
, "%s%s", err1
, err2
);
1212 bsprintf(buf
, "%-14s%s%s", bgp_state_dsc(p
), err1
, err2
);
1216 bgp_show_proto_info(struct proto
*P
)
1218 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1219 struct bgp_conn
*c
= p
->conn
;
1221 proto_show_basic_info(P
);
1223 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p
));
1224 cli_msg(-1006, " Neighbor address: %I%J", p
->cf
->remote_ip
, p
->cf
->iface
);
1225 cli_msg(-1006, " Neighbor AS: %u", p
->remote_as
);
1227 if (P
->proto_state
== PS_START
)
1229 struct bgp_conn
*oc
= &p
->outgoing_conn
;
1231 if ((p
->start_state
< BSS_CONNECT
) &&
1232 (p
->startup_timer
->expires
))
1233 cli_msg(-1006, " Error wait: %d/%d",
1234 p
->startup_timer
->expires
- now
, p
->startup_delay
);
1236 if ((oc
->state
== BS_ACTIVE
) &&
1237 (oc
->connect_retry_timer
->expires
))
1238 cli_msg(-1006, " Start delay: %d/%d",
1239 oc
->connect_retry_timer
->expires
- now
, p
->cf
->start_delay_time
);
1241 else if (P
->proto_state
== PS_UP
)
1243 cli_msg(-1006, " Neighbor ID: %R", p
->remote_id
);
1244 cli_msg(-1006, " Neighbor caps: %s%s%s%s",
1245 c
->peer_refresh_support
? " refresh" : "",
1246 c
->peer_as4_support
? " AS4" : "",
1247 (c
->peer_add_path
& ADD_PATH_RX
) ? " add-path-rx" : "",
1248 (c
->peer_add_path
& ADD_PATH_TX
) ? " add-path-tx" : "");
1249 cli_msg(-1006, " Session: %s%s%s%s%s%s%s",
1250 p
->is_internal
? "internal" : "external",
1251 p
->cf
->multihop
? " multihop" : "",
1252 p
->rr_client
? " route-reflector" : "",
1253 p
->rs_client
? " route-server" : "",
1254 p
->as4_session
? " AS4" : "",
1255 p
->add_path_rx
? " add-path-rx" : "",
1256 p
->add_path_tx
? " add-path-tx" : "");
1257 cli_msg(-1006, " Source address: %I", p
->source_addr
);
1258 if (P
->cf
->in_limit
)
1259 cli_msg(-1006, " Route limit: %d/%d",
1260 p
->p
.stats
.imp_routes
+ p
->p
.stats
.filt_routes
, P
->cf
->in_limit
->limit
);
1261 cli_msg(-1006, " Hold timer: %d/%d",
1262 tm_remains(c
->hold_timer
), c
->hold_time
);
1263 cli_msg(-1006, " Keepalive timer: %d/%d",
1264 tm_remains(c
->keepalive_timer
), c
->keepalive_time
);
1267 if ((p
->last_error_class
!= BE_NONE
) &&
1268 (p
->last_error_class
!= BE_MAN_DOWN
))
1270 const char *err1
= bgp_err_classes
[p
->last_error_class
];
1271 const char *err2
= bgp_last_errmsg(p
);
1272 cli_msg(-1006, " Last error: %s%s", err1
, err2
);
1276 struct protocol proto_bgp
= {
1279 attr_class
: EAP_BGP
,
1280 preference
: DEF_PREF_BGP
,
1283 shutdown
: bgp_shutdown
,
1284 cleanup
: bgp_cleanup
,
1285 reconfigure
: bgp_reconfigure
,
1286 copy_config
: bgp_copy_config
,
1287 get_status
: bgp_get_status
,
1288 get_attr
: bgp_get_attr
,
1289 get_route_info
: bgp_get_route_info
,
1290 show_proto_info
: bgp_show_proto_info