]> git.ipfire.org Git - thirdparty/bird.git/blame - proto/bgp/bgp.c
BGP: Add MPLS support
[thirdparty/bird.git] / proto / bgp / bgp.c
CommitLineData
2638249d
MM
1/*
2 * BIRD -- The Border Gateway Protocol
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
d15b0b0a
OZ
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
2638249d
MM
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
54e55169
MM
11/**
12 * DOC: Border Gateway Protocol
13 *
d15b0b0a
OZ
14 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
15 * the connection and most of the interface with BIRD core, |packets.c| handling
54e55169
MM
16 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
17 * manipulation with BGP attribute lists.
18 *
d15b0b0a
OZ
19 * As opposed to the other existing routing daemons, BIRD has a sophisticated
20 * core architecture which is able to keep all the information needed by BGP in
21 * the primary routing table, therefore no complex data structures like a
22 * central BGP table are needed. This increases memory footprint of a BGP router
23 * with many connections, but not too much and, which is more important, it
24 * makes BGP much easier to implement.
54e55169 25 *
d15b0b0a
OZ
26 * Each instance of BGP (corresponding to a single BGP peer) is described by a
27 * &bgp_proto structure to which are attached individual connections represented
28 * by &bgp_connection (usually, there exists only one connection, but during BGP
29 * session setup, there can be more of them). The connections are handled
30 * according to the BGP state machine defined in the RFC with all the timers and
31 * all the parameters configurable.
54e55169 32 *
d15b0b0a
OZ
33 * In incoming direction, we listen on the connection's socket and each time we
34 * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
35 * markers and passes complete packets to bgp_rx_packet() which distributes the
36 * packet according to its type.
54e55169 37 *
d15b0b0a
OZ
38 * In outgoing direction, we gather all the routing updates and sort them to
39 * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
40 * fast comparison of &rta's and a &fib which helps us to find if we already
41 * have another route for the same destination queued for sending, so that we
42 * can replace it with the new one immediately instead of sending both
43 * updates). There also exists a special bucket holding all the route
44 * withdrawals which cannot be queued anywhere else as they don't have any
45 * attributes. If we have any packet to send (due to either new routes or the
46 * connection tracking code wanting to send a Open, Keepalive or Notification
47 * message), we call bgp_schedule_packet() which sets the corresponding bit in a
48 * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
49 * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
50 * packet type bits and calls the corresponding bgp_create_xx() functions,
51 * eventually rescheduling the same packet type if we have more data of the same
52 * type to send.
54e55169 53 *
d15b0b0a
OZ
54 * The processing of attributes consists of two functions: bgp_decode_attrs()
55 * for checking of the attribute blocks and translating them to the language of
56 * BIRD's extended attributes and bgp_encode_attrs() which does the
57 * converse. Both functions are built around a @bgp_attr_table array describing
58 * all important characteristics of all known attributes. Unknown transitive
59 * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
6eda3f13
OZ
60 *
61 * BGP protocol implements graceful restart in both restarting (local restart)
62 * and receiving (neighbor restart) roles. The first is handled mostly by the
63 * graceful restart code in the nest, BGP protocol just handles capabilities,
64 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
65 * The second is implemented by internal restart of the BGP state to %BS_IDLE
66 * and protocol state to %PS_START, but keeping the protocol up from the core
67 * point of view and therefore maintaining received routes. Routing table
68 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
69 * stale routes after reestablishment of BGP session during graceful restart.
c49e4a65
OZ
70 *
71 * Supported standards:
0f40405f
OZ
72 * RFC 4271 - Border Gateway Protocol 4 (BGP)
73 * RFC 1997 - BGP Communities Attribute
74 * RFC 2385 - Protection of BGP Sessions via TCP MD5 Signature
75 * RFC 2545 - Use of BGP Multiprotocol Extensions for IPv6
76 * RFC 2918 - Route Refresh Capability
77 * RFC 3107 - Carrying Label Information in BGP
78 * RFC 4360 - BGP Extended Communities Attribute
79 * RFC 4364 - BGP/MPLS IPv4 Virtual Private Networks
80 * RFC 4456 - BGP Route Reflection
81 * RFC 4486 - Subcodes for BGP Cease Notification Message
82 * RFC 4659 - BGP/MPLS IPv6 Virtual Private Networks
83 * RFC 4724 - Graceful Restart Mechanism for BGP
84 * RFC 4760 - Multiprotocol extensions for BGP
85 * RFC 4798 - Connecting IPv6 Islands over IPv4 MPLS
86 * RFC 5065 - AS confederations for BGP
87 * RFC 5082 - Generalized TTL Security Mechanism
88 * RFC 5492 - Capabilities Advertisement with BGP
0f40405f
OZ
89 * RFC 5575 - Dissemination of Flow Specification Rules
90 * RFC 5668 - 4-Octet AS Specific BGP Extended Community
91 * RFC 6286 - AS-Wide Unique BGP Identifier
92 * RFC 6608 - Subcodes for BGP Finite State Machine Error
93 * RFC 6793 - BGP Support for 4-Octet AS Numbers
09ee846d 94 * RFC 7311 - Accumulated IGP Metric Attribute for BGP
0f40405f
OZ
95 * RFC 7313 - Enhanced Route Refresh Capability for BGP
96 * RFC 7606 - Revised Error Handling for BGP UPDATE Messages
97 * RFC 7911 - Advertisement of Multiple Paths in BGP
98 * RFC 7947 - Internet Exchange BGP Route Server
99 * RFC 8092 - BGP Large Communities Attribute
100 * RFC 8203 - BGP Administrative Shutdown Communication
101 * RFC 8212 - Default EBGP Route Propagation Behavior without Policies
be7c1aef 102 * RFC 8654 - Extended Message Support for BGP
9bf20484 103 * RFC 8950 - Advertising IPv4 NLRI with an IPv6 Next Hop
913ec57f 104 * RFC 9072 - Extended Optional Parameters Length for BGP OPEN Message
1f2eb2ac 105 * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications
c73b5d2d 106 * RFC 9234 - Route Leak Prevention and Detection Using Roles
0f40405f 107 * draft-uttaro-idr-bgp-persistence-04
71423871 108 * draft-walton-bgp-hostname-capability-02
0f40405f 109 */
54e55169 110
48d79d52 111#undef LOCAL_DEBUG
2638249d 112
02552526
OZ
113#include <stdlib.h>
114
2638249d
MM
115#include "nest/bird.h"
116#include "nest/iface.h"
117#include "nest/protocol.h"
118#include "nest/route.h"
b8113a5e 119#include "nest/cli.h"
1ec52253 120#include "nest/locks.h"
2638249d 121#include "conf/conf.h"
3831b619 122#include "filter/filter.h"
c01e3741 123#include "lib/socket.h"
973399ae 124#include "lib/resource.h"
7d875e09 125#include "lib/string.h"
2638249d
MM
126
127#include "bgp.h"
a848dad4 128#include "proto/bmp/bmp.h"
2638249d 129
e7d2ac44 130
06ece326 131static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */
d15b0b0a 132
c01e3741 133
c01e3741 134static void bgp_connect(struct bgp_proto *p);
dd91e467 135static void bgp_active(struct bgp_proto *p);
e0835db4
OZ
136static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn);
137static void bgp_setup_sk(struct bgp_conn *conn, sock *s);
138static void bgp_send_open(struct bgp_conn *conn);
9d3fc306 139static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd);
2638249d 140
d15b0b0a
OZ
141static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
142static void bgp_listen_sock_err(sock *sk UNUSED, int err);
11cb6202 143
11b32d91
OZ
144/**
145 * bgp_open - open a BGP instance
146 * @p: BGP instance
147 *
d15b0b0a
OZ
148 * This function allocates and configures shared BGP resources, mainly listening
149 * sockets. Should be called as the last step during initialization (when lock
150 * is acquired and neighbor is ready). When error, caller should change state to
151 * PS_DOWN and return immediately.
11b32d91
OZ
152 */
153static int
154bgp_open(struct bgp_proto *p)
155{
d15b0b0a
OZ
156 struct bgp_socket *bs = NULL;
157 struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
158 ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
e0835db4 159 (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
d15b0b0a 160 uint port = p->cf->local_port;
60e9def9
OZ
161 uint flags = p->cf->free_bind ? SKF_FREEBIND : 0;
162 uint flag_mask = SKF_FREEBIND;
d15b0b0a 163
d15b0b0a 164 /* We assume that cf->iface is defined iff cf->local_ip is link-local */
11b32d91 165
d15b0b0a 166 WALK_LIST(bs, bgp_sockets)
60e9def9
OZ
167 if (ipa_equal(bs->sk->saddr, addr) &&
168 (bs->sk->sport == port) &&
169 (bs->sk->iface == ifa) &&
170 (bs->sk->vrf == p->p.vrf) &&
171 ((bs->sk->flags & flag_mask) == flags))
a34b0934 172 {
d15b0b0a
OZ
173 bs->uc++;
174 p->sock = bs;
175 return 0;
a34b0934
OZ
176 }
177
d15b0b0a
OZ
178 sock *sk = sk_new(proto_pool);
179 sk->type = SK_TCP_PASSIVE;
180 sk->ttl = 255;
181 sk->saddr = addr;
182 sk->sport = port;
e19d0805
OZ
183 sk->iface = ifa;
184 sk->vrf = p->p.vrf;
60e9def9 185 sk->flags = flags;
d15b0b0a
OZ
186 sk->tos = IP_PREC_INTERNET_CONTROL;
187 sk->rbsize = BGP_RX_BUFFER_SIZE;
188 sk->tbsize = BGP_TX_BUFFER_SIZE;
189 sk->rx_hook = bgp_incoming_connection;
190 sk->err_hook = bgp_listen_sock_err;
191
192 if (sk_open(sk) < 0)
193 goto err;
11b32d91 194
d15b0b0a
OZ
195 bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
196 bs->sk = sk;
197 bs->uc = 1;
198 p->sock = bs;
470740f9 199 sk->data = bs;
05476c4d 200
d15b0b0a
OZ
201 add_tail(&bgp_sockets, &bs->n);
202
11b32d91 203 return 0;
b1b19433
OZ
204
205err:
d15b0b0a
OZ
206 sk_log_error(sk, p->p.name);
207 log(L_ERR "%s: Cannot open listening socket", p->p.name);
208 rfree(sk);
b1b19433 209 return -1;
11b32d91
OZ
210}
211
d15b0b0a
OZ
212/**
213 * bgp_close - close a BGP instance
214 * @p: BGP instance
215 *
216 * This function frees and deconfigures shared BGP resources.
217 */
218static void
219bgp_close(struct bgp_proto *p)
220{
221 struct bgp_socket *bs = p->sock;
222
223 ASSERT(bs && bs->uc);
224
225 if (--bs->uc)
226 return;
227
228 rfree(bs->sk);
229 rem_node(&bs->n);
230 mb_free(bs);
d15b0b0a
OZ
231}
232
233static inline int
234bgp_setup_auth(struct bgp_proto *p, int enable)
235{
236 if (p->cf->password)
237 {
757cab18
OZ
238 ip_addr prefix = p->cf->remote_ip;
239 int pxlen = -1;
240
241 if (p->cf->remote_range)
242 {
243 prefix = net_prefix(p->cf->remote_range);
244 pxlen = net_pxlen(p->cf->remote_range);
245 }
246
d15b0b0a 247 int rv = sk_set_md5_auth(p->sock->sk,
757cab18 248 p->cf->local_ip, prefix, pxlen, p->cf->iface,
d15b0b0a
OZ
249 enable ? p->cf->password : NULL, p->cf->setkey);
250
251 if (rv < 0)
252 sk_log_error(p->sock->sk, p->p.name);
253
254 return rv;
255 }
256 else
257 return 0;
258}
259
260static inline struct bgp_channel *
261bgp_find_channel(struct bgp_proto *p, u32 afi)
262{
263 struct bgp_channel *c;
54430df9 264 BGP_WALK_CHANNELS(p, c)
d15b0b0a
OZ
265 if (c->afi == afi)
266 return c;
267
268 return NULL;
269}
270
dd91e467
OZ
271static void
272bgp_startup(struct bgp_proto *p)
273{
274 BGP_TRACE(D_EVENTS, "Started");
d15b0b0a 275 p->start_state = BSS_CONNECT;
be6e39eb 276
e0835db4 277 if (!p->passive)
be6e39eb 278 bgp_active(p);
e0835db4
OZ
279
280 if (p->postponed_sk)
281 {
282 /* Apply postponed incoming connection */
283 bgp_setup_conn(p, &p->incoming_conn);
284 bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
285 bgp_send_open(&p->incoming_conn);
286 p->postponed_sk = NULL;
287 }
dd91e467
OZ
288}
289
290static void
291bgp_startup_timeout(timer *t)
292{
293 bgp_startup(t->data);
294}
295
296
297static void
298bgp_initiate(struct bgp_proto *p)
299{
d15b0b0a
OZ
300 int err_val;
301
302 if (bgp_open(p) < 0)
303 { err_val = BEM_NO_SOCKET; goto err1; }
304
305 if (bgp_setup_auth(p, 1) < 0)
306 { err_val = BEM_INVALID_MD5; goto err2; }
9be9a264 307
1ec52253
OZ
308 if (p->cf->bfd)
309 bgp_update_bfd(p, p->cf->bfd);
310
dd91e467 311 if (p->startup_delay)
d15b0b0a
OZ
312 {
313 p->start_state = BSS_DELAY;
314 BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
315 bgp_start_timer(p->startup_timer, p->startup_delay);
316 }
dd91e467
OZ
317 else
318 bgp_startup(p);
dd91e467 319
d15b0b0a 320 return;
d51aa281 321
d15b0b0a
OZ
322err2:
323 bgp_close(p);
324err1:
325 p->p.disabled = 1;
326 bgp_store_error(p, NULL, BE_MISC, err_val);
91d04583
OZ
327
328 p->neigh = NULL;
d15b0b0a 329 proto_notify_state(&p->p, PS_DOWN);
d51aa281 330
d15b0b0a 331 return;
c01e3741
MM
332}
333
54e55169
MM
334/**
335 * bgp_start_timer - start a BGP timer
336 * @t: timer
cc881bd1 337 * @value: time (in seconds) to fire (0 to disable the timer)
54e55169 338 *
d15b0b0a
OZ
339 * This functions calls tm_start() on @t with time @value and the amount of
340 * randomization suggested by the BGP standard. Please use it for all BGP
341 * timers.
54e55169 342 */
3fdbafb6 343void
cc881bd1 344bgp_start_timer(timer *t, uint value)
c01e3741 345{
3fdbafb6 346 if (value)
d15b0b0a 347 {
cc881bd1
OZ
348 /* The randomization procedure is specified in RFC 4271 section 10 */
349 btime time = value S;
350 btime randomize = random() % ((time / 4) + 1);
a6f79ca5 351 tm_start(t, time - randomize);
d15b0b0a 352 }
b552ecc4 353 else
a6f79ca5 354 tm_stop(t);
b552ecc4
MM
355}
356
54e55169
MM
357/**
358 * bgp_close_conn - close a BGP connection
359 * @conn: connection to close
360 *
d15b0b0a
OZ
361 * This function takes a connection described by the &bgp_conn structure, closes
362 * its socket and frees all resources associated with it.
54e55169 363 */
b552ecc4
MM
364void
365bgp_close_conn(struct bgp_conn *conn)
366{
e81b440f 367 // struct bgp_proto *p = conn->bgp;
b552ecc4
MM
368
369 DBG("BGP: Closing connection\n");
370 conn->packets_to_send = 0;
d15b0b0a
OZ
371 conn->channels_to_send = 0;
372 rfree(conn->connect_timer);
373 conn->connect_timer = NULL;
b552ecc4
MM
374 rfree(conn->keepalive_timer);
375 conn->keepalive_timer = NULL;
376 rfree(conn->hold_timer);
377 conn->hold_timer = NULL;
11b32d91
OZ
378 rfree(conn->tx_ev);
379 conn->tx_ev = NULL;
d15b0b0a
OZ
380 rfree(conn->sk);
381 conn->sk = NULL;
382
1be0be1b
OZ
383 mb_free(conn->local_open_msg);
384 conn->local_open_msg = NULL;
385 mb_free(conn->remote_open_msg);
386 conn->remote_open_msg = NULL;
387 conn->local_open_length = 0;
388 conn->remote_open_length = 0;
389
d15b0b0a
OZ
390 mb_free(conn->local_caps);
391 conn->local_caps = NULL;
392 mb_free(conn->remote_caps);
393 conn->remote_caps = NULL;
4558adab
OZ
394
395 conn->notify_data = NULL;
396 conn->notify_size = 0;
11b32d91
OZ
397}
398
399
400/**
401 * bgp_update_startup_delay - update a startup delay
402 * @p: BGP instance
11b32d91 403 *
d15b0b0a
OZ
404 * This function updates a startup delay that is used to postpone next BGP
405 * connect. It also handles disable_after_error and might stop BGP instance
406 * when error happened and disable_after_error is on.
11b32d91
OZ
407 *
408 * It should be called when BGP protocol error happened.
409 */
410void
b99d3786 411bgp_update_startup_delay(struct bgp_proto *p)
11b32d91 412{
a22c3e59 413 const struct bgp_config *cf = p->cf;
11b32d91 414
b99d3786 415 DBG("BGP: Updating startup delay\n");
11b32d91 416
cc881bd1 417 if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
72382626
OZ
418 p->startup_delay = 0;
419
cc881bd1 420 p->last_proto_error = current_time();
11b32d91
OZ
421
422 if (cf->disable_after_error)
d15b0b0a
OZ
423 {
424 p->startup_delay = 0;
425 p->p.disabled = 1;
426 return;
427 }
11b32d91 428
11b32d91
OZ
429 if (!p->startup_delay)
430 p->startup_delay = cf->error_delay_time_min;
431 else
b99d3786 432 p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
c01e3741
MM
433}
434
11b32d91 435static void
8a68316e 436bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len)
48e842cc 437{
11b32d91 438 switch (conn->state)
d15b0b0a
OZ
439 {
440 case BS_IDLE:
441 case BS_CLOSE:
442 return;
443
444 case BS_CONNECT:
445 case BS_ACTIVE:
446 bgp_conn_enter_idle_state(conn);
447 return;
448
449 case BS_OPENSENT:
450 case BS_OPENCONFIRM:
451 case BS_ESTABLISHED:
8a68316e
OZ
452 if (subcode < 0)
453 {
454 bgp_conn_enter_close_state(conn);
455 bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
456 }
457 else
458 bgp_error(conn, 6, subcode, data, len);
d15b0b0a
OZ
459 return;
460
461 default:
462 bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
463 }
48e842cc
MM
464}
465
11b32d91
OZ
466static void
467bgp_down(struct bgp_proto *p)
468{
469 if (p->start_state > BSS_PREPARE)
d15b0b0a
OZ
470 {
471 bgp_setup_auth(p, 0);
472 bgp_close(p);
473 }
11b32d91 474
91d04583
OZ
475 p->neigh = NULL;
476
b99d3786 477 BGP_TRACE(D_EVENTS, "Down");
11b32d91
OZ
478 proto_notify_state(&p->p, PS_DOWN);
479}
480
481static void
482bgp_decision(void *vp)
483{
484 struct bgp_proto *p = vp;
485
486 DBG("BGP: Decision start\n");
d15b0b0a
OZ
487 if ((p->p.proto_state == PS_START) &&
488 (p->outgoing_conn.state == BS_IDLE) &&
489 (p->incoming_conn.state != BS_OPENCONFIRM) &&
e0835db4 490 !p->passive)
dd91e467 491 bgp_active(p);
11b32d91 492
d15b0b0a
OZ
493 if ((p->p.proto_state == PS_STOP) &&
494 (p->outgoing_conn.state == BS_IDLE) &&
495 (p->incoming_conn.state == BS_IDLE))
11b32d91
OZ
496 bgp_down(p);
497}
498
e0835db4
OZ
499static struct bgp_proto *
500bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
501{
502 struct symbol *sym;
503 char fmt[SYM_MAX_LEN];
504
505 bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits);
506
507 /* This is hack, we would like to share config, but we need to copy it now */
508 new_config = config;
509 cfg_mem = config->mem;
51f2e7af
MM
510 config->current_scope = config->root_scope;
511 sym = cf_default_name(config, fmt, &(pp->dynamic_name_counter));
e0835db4
OZ
512 proto_clone_config(sym, pp->p.cf);
513 new_config = NULL;
514 cfg_mem = NULL;
515
516 /* Just pass remote_ip to bgp_init() */
eac9250f 517 ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
e0835db4 518
eac9250f 519 return (void *) proto_spawn(sym->proto, 0);
e0835db4
OZ
520}
521
b99d3786 522void
8a68316e 523bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
11b32d91 524{
9d456d53
OZ
525 proto_shutdown_mpls_map(&p->p, 1);
526
11b32d91 527 proto_notify_state(&p->p, PS_STOP);
cd1d9961
OZ
528 bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
529 bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
11b32d91
OZ
530 ev_schedule(p->event);
531}
532
cf31112f 533static inline void
d15b0b0a 534bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
cf31112f
OZ
535{
536 if (conn->bgp->p.mrtdump & MD_STATES)
863ecfc7 537 bgp_dump_state_change(conn, conn->state, new_state);
cf31112f
OZ
538
539 conn->state = new_state;
540}
541
542void
543bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
544{
545 /* Really, most of the work is done in bgp_rx_open(). */
546 bgp_conn_set_state(conn, BS_OPENCONFIRM);
547}
548
d15b0b0a 549static const struct bgp_af_caps dummy_af_caps = { };
7e5f769d 550static const struct bgp_af_caps basic_af_caps = { .ready = 1 };
d15b0b0a 551
11b32d91
OZ
552void
553bgp_conn_enter_established_state(struct bgp_conn *conn)
554{
555 struct bgp_proto *p = conn->bgp;
d15b0b0a
OZ
556 struct bgp_caps *local = conn->local_caps;
557 struct bgp_caps *peer = conn->remote_caps;
558 struct bgp_channel *c;
523f020b 559
11b32d91 560 BGP_TRACE(D_EVENTS, "BGP session established");
21d09632
OZ
561 p->last_established = current_time();
562 p->stats.fsm_established_transitions++;
11b32d91 563
9be9a264 564 /* For multi-hop BGP sessions */
a22c3e59
OZ
565 if (ipa_zero(p->local_ip))
566 p->local_ip = conn->sk->saddr;
9be9a264 567
23ee6b1c
OZ
568 /* For promiscuous sessions */
569 if (!p->remote_as)
570 p->remote_as = conn->received_as;
571
e16b0aef
OZ
572 /* In case of LLv6 is not valid during BGP start */
573 if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6)
574 p->link_addr = p->neigh->iface->llv6->ip;
575
9e7b3ebd
OZ
576 conn->sk->fast_rx = 0;
577
11b32d91
OZ
578 p->conn = conn;
579 p->last_error_class = 0;
580 p->last_error_code = 0;
094d2bdb 581
d15b0b0a
OZ
582 p->as4_session = conn->as4_session;
583
584 p->route_refresh = peer->route_refresh;
585 p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
0c791f87 586
5bd73431
OZ
587 /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
588 p->gr_ready = p->llgr_ready = 0; /* Updated later */
0c791f87 589
d15b0b0a
OZ
590 /* Whether peer is ready to handle our GR recovery */
591 int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
0c791f87 592
d15b0b0a 593 if (p->gr_active_num)
a6f79ca5 594 tm_stop(p->gr_timer);
0c791f87 595
d15b0b0a
OZ
596 /* Number of active channels */
597 int num = 0;
598
863ecfc7
OZ
599 /* Summary state of ADD_PATH RX for active channels */
600 uint summary_add_path_rx = 0;
601
54430df9 602 BGP_WALK_CHANNELS(p, c)
d15b0b0a
OZ
603 {
604 const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
605 const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
606
7e5f769d
OZ
607 /* Use default if capabilities were not announced */
608 if (!local->length && (c->afi == BGP_AF_IPV4))
609 loc = &basic_af_caps;
610
611 if (!peer->length && (c->afi == BGP_AF_IPV4))
612 rem = &basic_af_caps;
613
d15b0b0a
OZ
614 /* Ignore AFIs that were not announced in multiprotocol capability */
615 if (!loc || !loc->ready)
616 loc = &dummy_af_caps;
617
618 if (!rem || !rem->ready)
619 rem = &dummy_af_caps;
620
621 int active = loc->ready && rem->ready;
622 c->c.disabled = !active;
682d3f7d 623 c->c.reloadable = p->route_refresh || c->cf->import_table;
d15b0b0a
OZ
624
625 c->index = active ? num++ : 0;
626
627 c->feed_state = BFS_NONE;
628 c->load_state = BFS_NONE;
629
630 /* Channels where peer may do GR */
5bd73431
OZ
631 uint gr_ready = active && local->gr_aware && rem->gr_able;
632 uint llgr_ready = active && local->llgr_aware && rem->llgr_able;
633
634 c->gr_ready = gr_ready || llgr_ready;
d15b0b0a 635 p->gr_ready = p->gr_ready || c->gr_ready;
5bd73431
OZ
636 p->llgr_ready = p->llgr_ready || llgr_ready;
637
638 /* Remember last LLGR stale time */
639 c->stale_time = local->llgr_aware ? rem->llgr_time : 0;
0c791f87 640
d15b0b0a
OZ
641 /* Channels not able to recover gracefully */
642 if (p->p.gr_recovery && (!active || !peer_gr_ready))
643 channel_graceful_restart_unlock(&c->c);
9aed29e6 644
d15b0b0a
OZ
645 /* Channels waiting for local convergence */
646 if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
647 c->c.gr_wait = 1;
648
5bd73431
OZ
649 /* Channels where regular graceful restart failed */
650 if ((c->gr_active == BGP_GRS_ACTIVE) &&
651 !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
652 bgp_graceful_restart_done(c);
653
654 /* Channels where regular long-lived restart failed */
655 if ((c->gr_active == BGP_GRS_LLGR) &&
656 !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING)))
d15b0b0a
OZ
657 bgp_graceful_restart_done(c);
658
659 /* GR capability implies that neighbor will send End-of-RIB */
660 if (peer->gr_aware)
661 c->load_state = BFS_LOADING;
662
d8022d26 663 c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
d15b0b0a
OZ
664 c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
665 c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
666
863ecfc7
OZ
667 if (active)
668 summary_add_path_rx |= !c->add_path_rx ? 1 : 2;
669
f8aad5d5 670 /* Update RA mode */
d15b0b0a
OZ
671 if (c->add_path_tx)
672 c->c.ra_mode = RA_ANY;
f8aad5d5
OZ
673 else if (c->cf->secondary)
674 c->c.ra_mode = RA_ACCEPTED;
675 else
676 c->c.ra_mode = RA_OPTIMAL;
d15b0b0a
OZ
677 }
678
679 p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
680 p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
681 p->channel_count = num;
863ecfc7 682 p->summary_add_path_rx = summary_add_path_rx;
d15b0b0a 683
54430df9 684 BGP_WALK_CHANNELS(p, c)
d15b0b0a
OZ
685 {
686 if (c->c.disabled)
687 continue;
688
689 p->afi_map[c->index] = c->afi;
690 p->channel_map[c->index] = c;
691 }
692
693 /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
9aed29e6 694
cf31112f 695 bgp_conn_set_state(conn, BS_ESTABLISHED);
11b32d91 696 proto_notify_state(&p->p, PS_UP);
aa3c3549
OZ
697 bmp_peer_up(p, conn->local_open_msg, conn->local_open_length,
698 conn->remote_open_msg, conn->remote_open_length);
11b32d91
OZ
699}
700
701static void
4558adab 702bgp_conn_leave_established_state(struct bgp_conn *conn, struct bgp_proto *p)
11b32d91
OZ
703{
704 BGP_TRACE(D_EVENTS, "BGP session closed");
21d09632 705 p->last_established = current_time();
11b32d91
OZ
706 p->conn = NULL;
707
708 if (p->p.proto_state == PS_UP)
cd1d9961 709 bgp_stop(p, 0, NULL, 0);
4558adab
OZ
710
711 bmp_peer_down(p, p->last_error_class,
712 conn->notify_code, conn->notify_subcode,
713 conn->notify_data, conn->notify_size);
11b32d91
OZ
714}
715
716void
717bgp_conn_enter_close_state(struct bgp_conn *conn)
718{
719 struct bgp_proto *p = conn->bgp;
720 int os = conn->state;
721
cf31112f 722 bgp_conn_set_state(conn, BS_CLOSE);
a6f79ca5 723 tm_stop(conn->keepalive_timer);
11b32d91
OZ
724 conn->sk->rx_hook = NULL;
725
48b15ef1
OZ
726 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
727 bgp_start_timer(conn->hold_timer, 10);
728
11b32d91 729 if (os == BS_ESTABLISHED)
4558adab 730 bgp_conn_leave_established_state(conn, p);
11b32d91
OZ
731}
732
733void
734bgp_conn_enter_idle_state(struct bgp_conn *conn)
735{
736 struct bgp_proto *p = conn->bgp;
737 int os = conn->state;
738
739 bgp_close_conn(conn);
cf31112f 740 bgp_conn_set_state(conn, BS_IDLE);
11b32d91
OZ
741 ev_schedule(p->event);
742
743 if (os == BS_ESTABLISHED)
4558adab 744 bgp_conn_leave_established_state(conn, p);
11b32d91
OZ
745}
746
6eda3f13
OZ
747/**
748 * bgp_handle_graceful_restart - handle detected BGP graceful restart
749 * @p: BGP instance
750 *
751 * This function is called when a BGP graceful restart of the neighbor is
752 * detected (when the TCP connection fails or when a new TCP connection
753 * appears). The function activates processing of the restart - starts routing
754 * table refresh cycle and activates BGP restart timer. The protocol state goes
755 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
756 * caller.
757 */
0c791f87
OZ
758void
759bgp_handle_graceful_restart(struct bgp_proto *p)
760{
761 ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
762
763 BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
d15b0b0a
OZ
764 p->gr_active_num ? " - already pending" : "");
765
766 p->gr_active_num = 0;
0c791f87 767
d15b0b0a 768 struct bgp_channel *c;
54430df9 769 BGP_WALK_CHANNELS(p, c)
d15b0b0a 770 {
7fc55925
OZ
771 /* FIXME: perhaps check for channel state instead of disabled flag? */
772 if (c->c.disabled)
773 continue;
774
d15b0b0a
OZ
775 if (c->gr_ready)
776 {
5bd73431
OZ
777 p->gr_active_num++;
778
779 switch (c->gr_active)
780 {
781 case BGP_GRS_NONE:
782 c->gr_active = BGP_GRS_ACTIVE;
783 rt_refresh_begin(c->c.table, &c->c);
784 break;
785
786 case BGP_GRS_ACTIVE:
d15b0b0a 787 rt_refresh_end(c->c.table, &c->c);
5bd73431
OZ
788 rt_refresh_begin(c->c.table, &c->c);
789 break;
0c791f87 790
5bd73431
OZ
791 case BGP_GRS_LLGR:
792 rt_refresh_begin(c->c.table, &c->c);
793 rt_modify_stale(c->c.table, &c->c);
794 break;
795 }
d15b0b0a
OZ
796 }
797 else
798 {
799 /* Just flush the routes */
800 rt_refresh_begin(c->c.table, &c->c);
801 rt_refresh_end(c->c.table, &c->c);
802 }
7fc55925
OZ
803
804 /* Reset bucket and prefix tables */
805 bgp_free_bucket_table(c);
806 bgp_free_prefix_table(c);
807 bgp_init_bucket_table(c);
808 bgp_init_prefix_table(c);
809 c->packets_to_send = 0;
d15b0b0a
OZ
810 }
811
e62cd033
OZ
812 /* p->gr_ready -> at least one active channel is c->gr_ready */
813 ASSERT(p->gr_active_num > 0);
814
d15b0b0a 815 proto_notify_state(&p->p, PS_START);
5bd73431 816 tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
0c791f87
OZ
817}
818
6eda3f13
OZ
819/**
820 * bgp_graceful_restart_done - finish active BGP graceful restart
d15b0b0a 821 * @c: BGP channel
6eda3f13
OZ
822 *
823 * This function is called when the active BGP graceful restart of the neighbor
d15b0b0a
OZ
824 * should be finished for channel @c - either successfully (the neighbor sends
825 * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
826 * unsuccessfully (the neighbor does not support BGP graceful restart on the new
827 * session). The function ends the routing table refresh cycle.
6eda3f13 828 */
0c791f87 829void
d15b0b0a 830bgp_graceful_restart_done(struct bgp_channel *c)
0c791f87 831{
d15b0b0a
OZ
832 struct bgp_proto *p = (void *) c->c.proto;
833
834 ASSERT(c->gr_active);
835 c->gr_active = 0;
836 p->gr_active_num--;
837
838 if (!p->gr_active_num)
839 BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
840
5bd73431 841 tm_stop(c->stale_timer);
d15b0b0a 842 rt_refresh_end(c->c.table, &c->c);
0c791f87
OZ
843}
844
6eda3f13
OZ
845/**
846 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
847 * @t: timer
848 *
849 * This function is a timeout hook for @gr_timer, implementing BGP restart time
850 * limit for reestablisment of the BGP session after the graceful restart. When
851 * fired, we just proceed with the usual protocol restart.
852 */
853
0c791f87
OZ
854static void
855bgp_graceful_restart_timeout(timer *t)
856{
857 struct bgp_proto *p = t->data;
858
859 BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
5bd73431
OZ
860
861 if (p->llgr_ready)
862 {
863 struct bgp_channel *c;
54430df9 864 BGP_WALK_CHANNELS(p, c)
5bd73431
OZ
865 {
866 /* Channel is not in GR and is already flushed */
867 if (!c->gr_active)
868 continue;
869
870 /* Channel is already in LLGR from past restart */
871 if (c->gr_active == BGP_GRS_LLGR)
872 continue;
873
874 /* Channel is in GR, but does not support LLGR -> stop GR */
875 if (!c->stale_time)
876 {
877 bgp_graceful_restart_done(c);
878 continue;
879 }
880
881 /* Channel is in GR, and supports LLGR -> start LLGR */
882 c->gr_active = BGP_GRS_LLGR;
883 tm_start(c->stale_timer, c->stale_time S);
884 rt_modify_stale(c->c.table, &c->c);
885 }
886 }
887 else
888 bgp_stop(p, 0, NULL, 0);
889}
890
891static void
892bgp_long_lived_stale_timeout(timer *t)
893{
894 struct bgp_channel *c = t->data;
895 struct bgp_proto *p = (void *) c->c.proto;
896
897 BGP_TRACE(D_EVENTS, "Long-lived stale timeout");
898
899 bgp_graceful_restart_done(c);
0c791f87
OZ
900}
901
9aed29e6
OZ
902
903/**
904 * bgp_refresh_begin - start incoming enhanced route refresh sequence
d15b0b0a 905 * @c: BGP channel
9aed29e6
OZ
906 *
907 * This function is called when an incoming enhanced route refresh sequence is
908 * started by the neighbor, demarcated by the BoRR packet. The function updates
909 * the load state and starts the routing table refresh cycle. Note that graceful
910 * restart also uses routing table refresh cycle, but RFC 7313 and load states
911 * ensure that these two sequences do not overlap.
912 */
913void
d15b0b0a 914bgp_refresh_begin(struct bgp_channel *c)
9aed29e6 915{
d15b0b0a
OZ
916 struct bgp_proto *p = (void *) c->c.proto;
917
918 if (c->load_state == BFS_LOADING)
919 { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
9aed29e6 920
d15b0b0a
OZ
921 c->load_state = BFS_REFRESHING;
922 rt_refresh_begin(c->c.table, &c->c);
682d3f7d
OZ
923
924 if (c->c.in_table)
925 rt_refresh_begin(c->c.in_table, &c->c);
9aed29e6
OZ
926}
927
928/**
929 * bgp_refresh_end - finish incoming enhanced route refresh sequence
d15b0b0a 930 * @c: BGP channel
9aed29e6
OZ
931 *
932 * This function is called when an incoming enhanced route refresh sequence is
933 * finished by the neighbor, demarcated by the EoRR packet. The function updates
934 * the load state and ends the routing table refresh cycle. Routes not received
935 * during the sequence are removed by the nest.
936 */
937void
d15b0b0a 938bgp_refresh_end(struct bgp_channel *c)
9aed29e6 939{
d15b0b0a 940 struct bgp_proto *p = (void *) c->c.proto;
9aed29e6 941
d15b0b0a
OZ
942 if (c->load_state != BFS_REFRESHING)
943 { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
944
945 c->load_state = BFS_NONE;
946 rt_refresh_end(c->c.table, &c->c);
682d3f7d
OZ
947
948 if (c->c.in_table)
949 rt_prune_sync(c->c.in_table, 0);
9aed29e6
OZ
950}
951
952
c01e3741
MM
953static void
954bgp_send_open(struct bgp_conn *conn)
955{
956 DBG("BGP: Sending open\n");
957 conn->sk->rx_hook = bgp_rx;
b552ecc4 958 conn->sk->tx_hook = bgp_tx;
a6f79ca5 959 tm_stop(conn->connect_timer);
4a50c8bd 960 bgp_prepare_capabilities(conn);
d15b0b0a 961 bgp_schedule_packet(conn, NULL, PKT_OPEN);
cf31112f 962 bgp_conn_set_state(conn, BS_OPENSENT);
3fdbafb6 963 bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
c01e3741
MM
964}
965
3fdbafb6
MM
966static void
967bgp_connected(sock *sk)
c01e3741
MM
968{
969 struct bgp_conn *conn = sk->data;
85368cd4 970 struct bgp_proto *p = conn->bgp;
c01e3741 971
85368cd4 972 BGP_TRACE(D_EVENTS, "Connected");
c01e3741 973 bgp_send_open(conn);
c01e3741
MM
974}
975
976static void
977bgp_connect_timeout(timer *t)
978{
3fdbafb6 979 struct bgp_conn *conn = t->data;
85368cd4 980 struct bgp_proto *p = conn->bgp;
c01e3741 981
85368cd4 982 DBG("BGP: connect_timeout\n");
11b32d91 983 if (p->p.proto_state == PS_START)
d15b0b0a
OZ
984 {
985 bgp_close_conn(conn);
986 bgp_connect(p);
987 }
11b32d91
OZ
988 else
989 bgp_conn_enter_idle_state(conn);
c01e3741
MM
990}
991
992static void
3fdbafb6 993bgp_sock_err(sock *sk, int err)
c01e3741
MM
994{
995 struct bgp_conn *conn = sk->data;
85368cd4 996 struct bgp_proto *p = conn->bgp;
c01e3741 997
47597724
OZ
998 /*
999 * This error hook may be called either asynchronously from main
1000 * loop, or synchronously from sk_send(). But sk_send() is called
1001 * only from bgp_tx() and bgp_kick_tx(), which are both called
1002 * asynchronously from main loop. Moreover, they end if err hook is
1003 * called. Therefore, we could suppose that it is always called
1004 * asynchronously.
1005 */
1006
11b32d91
OZ
1007 bgp_store_error(p, conn, BE_SOCKET, err);
1008
53943a00
MM
1009 if (err)
1010 BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
1011 else
1012 BGP_TRACE(D_EVENTS, "Connection closed");
11b32d91 1013
0c791f87
OZ
1014 if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
1015 bgp_handle_graceful_restart(p);
1016
11b32d91 1017 bgp_conn_enter_idle_state(conn);
c01e3741
MM
1018}
1019
3fdbafb6
MM
1020static void
1021bgp_hold_timeout(timer *t)
1022{
1023 struct bgp_conn *conn = t->data;
48b15ef1 1024 struct bgp_proto *p = conn->bgp;
3fdbafb6 1025
ea89da38
OZ
1026 DBG("BGP: Hold timeout\n");
1027
48b15ef1
OZ
1028 /* We are already closing the connection - just do hangup */
1029 if (conn->state == BS_CLOSE)
1030 {
1031 BGP_TRACE(D_EVENTS, "Connection stalled");
1032 bgp_conn_enter_idle_state(conn);
1033 return;
1034 }
1035
ea89da38
OZ
1036 /* If there is something in input queue, we are probably congested
1037 and perhaps just not processed BGP packets in time. */
1038
1039 if (sk_rx_ready(conn->sk) > 0)
1040 bgp_start_timer(conn->hold_timer, 10);
5bd73431
OZ
1041 else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
1042 {
1043 BGP_TRACE(D_EVENTS, "Hold timer expired");
1044 bgp_handle_graceful_restart(p);
1045 bgp_conn_enter_idle_state(conn);
1046 }
ea89da38
OZ
1047 else
1048 bgp_error(conn, 4, 0, NULL, 0);
3fdbafb6
MM
1049}
1050
1051static void
1052bgp_keepalive_timeout(timer *t)
1053{
1054 struct bgp_conn *conn = t->data;
1055
1056 DBG("BGP: Keepalive timer\n");
d15b0b0a 1057 bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
bd22d7f4
OZ
1058
1059 /* Kick TX a bit faster */
1060 if (ev_active(conn->tx_ev))
1061 ev_run(conn->tx_ev);
3fdbafb6
MM
1062}
1063
c01e3741 1064static void
6fd766c1 1065bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
c01e3741 1066{
6fd766c1 1067 conn->sk = NULL;
c01e3741 1068 conn->bgp = p;
d15b0b0a 1069
72a6ef11 1070 conn->packets_to_send = 0;
d15b0b0a
OZ
1071 conn->channels_to_send = 0;
1072 conn->last_channel = 0;
1073 conn->last_channel_count = 0;
1074
a6f79ca5
OZ
1075 conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0);
1076 conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0);
1077 conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
c01e3741 1078
961671c0 1079 conn->tx_ev = ev_new_init(p->p.pool, bgp_kick_tx, conn);
c01e3741
MM
1080}
1081
6fd766c1 1082static void
e81b440f 1083bgp_setup_sk(struct bgp_conn *conn, sock *s)
6fd766c1
MM
1084{
1085 s->data = conn;
6fd766c1 1086 s->err_hook = bgp_sock_err;
9e7b3ebd 1087 s->fast_rx = 1;
6fd766c1
MM
1088 conn->sk = s;
1089}
1090
11b32d91 1091static void
dd91e467 1092bgp_active(struct bgp_proto *p)
11b32d91 1093{
6cf72d7a 1094 int delay = MAX(1, p->cf->connect_delay_time);
11b32d91
OZ
1095 struct bgp_conn *conn = &p->outgoing_conn;
1096
1097 BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
1098 bgp_setup_conn(p, conn);
cf31112f 1099 bgp_conn_set_state(conn, BS_ACTIVE);
d15b0b0a 1100 bgp_start_timer(conn->connect_timer, delay);
11b32d91
OZ
1101}
1102
54e55169
MM
1103/**
1104 * bgp_connect - initiate an outgoing connection
1105 * @p: BGP instance
1106 *
1107 * The bgp_connect() function creates a new &bgp_conn and initiates
1108 * a TCP connection to the peer. The rest of connection setup is governed
1109 * by the BGP state machine as described in the standard.
1110 */
c01e3741
MM
1111static void
1112bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
1113{
b552ecc4 1114 struct bgp_conn *conn = &p->outgoing_conn;
b1b19433 1115 int hops = p->cf->multihop ? : 1;
c01e3741
MM
1116
1117 DBG("BGP: Connecting\n");
d15b0b0a 1118 sock *s = sk_new(p->p.pool);
c01e3741 1119 s->type = SK_TCP_ACTIVE;
a22c3e59
OZ
1120 s->saddr = p->local_ip;
1121 s->daddr = p->remote_ip;
dcde7ae5 1122 s->dport = p->cf->remote_port;
53ffbff3 1123 s->iface = p->neigh ? p->neigh->iface : NULL;
943478b0 1124 s->vrf = p->p.vrf;
b1b19433 1125 s->ttl = p->cf->ttl_security ? 255 : hops;
06e0d1b6
OZ
1126 s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
1127 s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
a39b165e
OZ
1128 s->tos = IP_PREC_INTERNET_CONTROL;
1129 s->password = p->cf->password;
1130 s->tx_hook = bgp_connected;
2b712554 1131 s->flags = p->cf->free_bind ? SKF_FREEBIND : 0;
470740f9
OZ
1132 BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
1133 s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
88a183c6 1134 s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
6fd766c1 1135 bgp_setup_conn(p, conn);
e81b440f 1136 bgp_setup_sk(conn, s);
cf31112f 1137 bgp_conn_set_state(conn, BS_CONNECT);
b1b19433
OZ
1138
1139 if (sk_open(s) < 0)
05476c4d 1140 goto err;
b1b19433
OZ
1141
1142 /* Set minimal receive TTL if needed */
1143 if (p->cf->ttl_security)
b1b19433 1144 if (sk_set_min_ttl(s, 256 - hops) < 0)
05476c4d 1145 goto err;
b1b19433 1146
c01e3741 1147 DBG("BGP: Waiting for connect success\n");
d15b0b0a 1148 bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
05476c4d
OZ
1149 return;
1150
d15b0b0a 1151err:
05476c4d
OZ
1152 sk_log_error(s, p->p.name);
1153 bgp_sock_err(s, 0);
1154 return;
c01e3741
MM
1155}
1156
e0835db4
OZ
1157static inline int bgp_is_dynamic(struct bgp_proto *p)
1158{ return ipa_zero(p->remote_ip); }
1159
374917ad
OZ
1160/**
1161 * bgp_find_proto - find existing proto for incoming connection
1162 * @sk: TCP socket
1163 *
1164 */
1165static struct bgp_proto *
1166bgp_find_proto(sock *sk)
1167{
e0835db4 1168 struct bgp_proto *best = NULL;
d15b0b0a 1169 struct bgp_proto *p;
374917ad 1170
470740f9
OZ
1171 /* sk->iface is valid only if src or dst address is link-local */
1172 int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
1173
d15b0b0a
OZ
1174 WALK_LIST(p, proto_list)
1175 if ((p->p.proto == &proto_bgp) &&
e0835db4
OZ
1176 (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
1177 (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
1178 (p->p.vrf == sk->vrf) &&
1179 (p->cf->local_port == sk->sport) &&
470740f9
OZ
1180 (!link || (p->cf->iface == sk->iface)) &&
1181 (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)))
e0835db4
OZ
1182 {
1183 best = p;
374917ad 1184
e0835db4
OZ
1185 if (!bgp_is_dynamic(p))
1186 break;
1187 }
1188
1189 return best;
374917ad
OZ
1190}
1191
54e55169
MM
1192/**
1193 * bgp_incoming_connection - handle an incoming connection
1194 * @sk: TCP socket
1195 * @dummy: unused
1196 *
1197 * This function serves as a socket hook for accepting of new BGP
1198 * connections. It searches a BGP instance corresponding to the peer
1199 * which has connected and if such an instance exists, it creates a
1200 * &bgp_conn structure, attaches it to the instance and either sends
1201 * an Open message or (if there already is an active connection) it
1202 * closes the new connection by sending a Notification message.
1203 */
48e842cc 1204static int
3e236955 1205bgp_incoming_connection(sock *sk, uint dummy UNUSED)
c01e3741 1206{
374917ad
OZ
1207 struct bgp_proto *p;
1208 int acc, hops;
c01e3741 1209
48e842cc 1210 DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
374917ad
OZ
1211 p = bgp_find_proto(sk);
1212 if (!p)
d15b0b0a
OZ
1213 {
1214 log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
1215 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
1216 rfree(sk);
1217 return 0;
1218 }
374917ad 1219
487c6961
OZ
1220 /*
1221 * BIRD should keep multiple incoming connections in OpenSent state (for
1222 * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
1223 * connections are rejected istead. The exception is the case where an
1224 * incoming connection triggers a graceful restart.
1225 */
1226
374917ad
OZ
1227 acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
1228 (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
dd91e467 1229
374917ad 1230 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
d15b0b0a
OZ
1231 {
1232 bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
1233 bgp_handle_graceful_restart(p);
1234 bgp_conn_enter_idle_state(p->conn);
1235 acc = 1;
1236
1237 /* There might be separate incoming connection in OpenSent state */
1238 if (p->incoming_conn.state > BS_ACTIVE)
1239 bgp_close_conn(&p->incoming_conn);
1240 }
374917ad
OZ
1241
1242 BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
1243 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
1244 sk->dport, acc ? "accepted" : "rejected");
1245
1246 if (!acc)
d15b0b0a
OZ
1247 {
1248 rfree(sk);
1249 return 0;
1250 }
374917ad
OZ
1251
1252 hops = p->cf->multihop ? : 1;
1253
1254 if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
1255 goto err;
1256
1257 if (p->cf->ttl_security)
1258 if (sk_set_min_ttl(sk, 256 - hops) < 0)
1259 goto err;
1260
06e0d1b6 1261 if (p->cf->enable_extended_messages)
d15b0b0a
OZ
1262 {
1263 sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
1264 sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
1265 sk_reallocate(sk);
1266 }
06e0d1b6 1267
e0835db4
OZ
1268 /* For dynamic BGP, spawn new instance and postpone the socket */
1269 if (bgp_is_dynamic(p))
1270 {
1271 p = bgp_spawn(p, sk->daddr);
1272 p->postponed_sk = sk;
1273 rmove(sk, p->p.pool);
1274 return 0;
1275 }
1276
1277 rmove(sk, p->p.pool);
374917ad
OZ
1278 bgp_setup_conn(p, &p->incoming_conn);
1279 bgp_setup_sk(&p->incoming_conn, sk);
1280 bgp_send_open(&p->incoming_conn);
1281 return 0;
1282
1283err:
1284 sk_log_error(sk, p->p.name);
1285 log(L_ERR "%s: Incoming connection aborted", p->p.name);
48e842cc
MM
1286 rfree(sk);
1287 return 0;
1288}
1289
2af25a97 1290static void
e81b440f 1291bgp_listen_sock_err(sock *sk UNUSED, int err)
2af25a97
OZ
1292{
1293 if (err == ECONNABORTED)
1294 log(L_WARN "BGP: Incoming connection aborted");
1295 else
a34b0934 1296 log(L_ERR "BGP: Error on listening socket: %M", err);
2af25a97
OZ
1297}
1298
acfce55c
MM
1299static void
1300bgp_start_neighbor(struct bgp_proto *p)
1301{
9be9a264
OZ
1302 /* Called only for single-hop BGP sessions */
1303
a22c3e59
OZ
1304 if (ipa_zero(p->local_ip))
1305 p->local_ip = p->neigh->ifa->ip;
ad440a57 1306
a22c3e59
OZ
1307 if (ipa_is_link_local(p->local_ip))
1308 p->link_addr = p->local_ip;
153f02da
OZ
1309 else if (p->neigh->iface->llv6)
1310 p->link_addr = p->neigh->iface->llv6->ip;
11b32d91 1311
6fd766c1 1312 bgp_initiate(p);
48e842cc
MM
1313}
1314
1315static void
1316bgp_neigh_notify(neighbor *n)
1317{
1318 struct bgp_proto *p = (struct bgp_proto *) n->proto;
523f020b
OZ
1319 int ps = p->p.proto_state;
1320
1321 if (n != p->neigh)
1322 return;
48e842cc 1323
523f020b 1324 if ((ps == PS_DOWN) || (ps == PS_STOP))
b21955e0
OZ
1325 return;
1326
523f020b
OZ
1327 int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1328
1329 if (n->scope <= 0)
d15b0b0a
OZ
1330 {
1331 if (!prepare)
48e842cc 1332 {
d15b0b0a
OZ
1333 BGP_TRACE(D_EVENTS, "Neighbor lost");
1334 bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1335 /* Perhaps also run bgp_update_startup_delay(p)? */
830ba75e 1336 bgp_stop(p, 0, NULL, 0);
523f020b 1337 }
d15b0b0a 1338 }
523f020b 1339 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
d15b0b0a
OZ
1340 {
1341 if (!prepare)
523f020b 1342 {
d15b0b0a
OZ
1343 BGP_TRACE(D_EVENTS, "Link down");
1344 bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1345 if (ps == PS_UP)
1346 bgp_update_startup_delay(p);
830ba75e 1347 bgp_stop(p, 0, NULL, 0);
48e842cc 1348 }
d15b0b0a 1349 }
48e842cc 1350 else
d15b0b0a
OZ
1351 {
1352 if (prepare)
48e842cc 1353 {
d15b0b0a
OZ
1354 BGP_TRACE(D_EVENTS, "Neighbor ready");
1355 bgp_start_neighbor(p);
48e842cc 1356 }
d15b0b0a 1357 }
48e842cc
MM
1358}
1359
1ec52253
OZ
1360static void
1361bgp_bfd_notify(struct bfd_request *req)
1362{
1363 struct bgp_proto *p = req->data;
1364 int ps = p->p.proto_state;
1365
1366 if (req->down && ((ps == PS_START) || (ps == PS_UP)))
d15b0b0a
OZ
1367 {
1368 BGP_TRACE(D_EVENTS, "BFD session down");
1369 bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
5bd73431 1370
9d3fc306 1371 if (req->opts.mode == BGP_BFD_GRACEFUL)
5bd73431
OZ
1372 {
1373 /* Trigger graceful restart */
1374 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1375 bgp_handle_graceful_restart(p);
1376
1377 if (p->incoming_conn.state > BS_IDLE)
1378 bgp_conn_enter_idle_state(&p->incoming_conn);
1379
1380 if (p->outgoing_conn.state > BS_IDLE)
1381 bgp_conn_enter_idle_state(&p->outgoing_conn);
1382 }
1383 else
1384 {
1385 /* Trigger session down */
1386 if (ps == PS_UP)
1387 bgp_update_startup_delay(p);
1388 bgp_stop(p, 0, NULL, 0);
1389 }
d15b0b0a 1390 }
1ec52253
OZ
1391}
1392
1393static void
9d3fc306 1394bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd)
1ec52253 1395{
9d3fc306
OZ
1396 if (bfd && p->bfd_req)
1397 bfd_update_request(p->bfd_req, bfd);
1398
1399 if (bfd && !p->bfd_req && !bgp_is_dynamic(p))
a22c3e59 1400 p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
1ec52253 1401 p->cf->multihop ? NULL : p->neigh->iface,
9d3fc306 1402 p->p.vrf, bgp_bfd_notify, p, bfd);
1ec52253 1403
9d3fc306 1404 if (!bfd && p->bfd_req)
d15b0b0a
OZ
1405 {
1406 rfree(p->bfd_req);
1407 p->bfd_req = NULL;
1408 }
1ec52253
OZ
1409}
1410
d15b0b0a
OZ
1411static void
1412bgp_reload_routes(struct channel *C)
bf47fe4b 1413{
d15b0b0a
OZ
1414 struct bgp_proto *p = (void *) C->proto;
1415 struct bgp_channel *c = (void *) C;
bf47fe4b 1416
54430df9
OZ
1417 /* Ignore non-BGP channels */
1418 if (C->channel != &channel_bgp)
1419 return;
1420
682d3f7d 1421 ASSERT(p->conn && (p->route_refresh || c->c.in_table));
d15b0b0a 1422
682d3f7d
OZ
1423 if (c->c.in_table)
1424 channel_schedule_reload(C);
1425 else
1426 bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
bf47fe4b
OZ
1427}
1428
0c791f87 1429static void
d15b0b0a 1430bgp_feed_begin(struct channel *C, int initial)
0c791f87 1431{
d15b0b0a
OZ
1432 struct bgp_proto *p = (void *) C->proto;
1433 struct bgp_channel *c = (void *) C;
9aed29e6 1434
54430df9
OZ
1435 /* Ignore non-BGP channels */
1436 if (C->channel != &channel_bgp)
1437 return;
1438
9aed29e6
OZ
1439 /* This should not happen */
1440 if (!p->conn)
0c791f87
OZ
1441 return;
1442
9aed29e6 1443 if (initial && p->cf->gr_mode)
d15b0b0a 1444 c->feed_state = BFS_LOADING;
9aed29e6
OZ
1445
1446 /* It is refeed and both sides support enhanced route refresh */
d15b0b0a
OZ
1447 if (!initial && p->enhanced_refresh)
1448 {
1449 /* BoRR must not be sent before End-of-RIB */
1450 if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
1451 return;
9aed29e6 1452
d15b0b0a
OZ
1453 c->feed_state = BFS_REFRESHING;
1454 bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
1455 }
9aed29e6
OZ
1456}
1457
1458static void
d15b0b0a 1459bgp_feed_end(struct channel *C)
9aed29e6 1460{
d15b0b0a
OZ
1461 struct bgp_proto *p = (void *) C->proto;
1462 struct bgp_channel *c = (void *) C;
9aed29e6 1463
54430df9
OZ
1464 /* Ignore non-BGP channels */
1465 if (C->channel != &channel_bgp)
1466 return;
1467
9aed29e6
OZ
1468 /* This should not happen */
1469 if (!p->conn)
1470 return;
1471
1472 /* Non-demarcated feed ended, nothing to do */
d15b0b0a 1473 if (c->feed_state == BFS_NONE)
9aed29e6
OZ
1474 return;
1475
1476 /* Schedule End-of-RIB packet */
d15b0b0a
OZ
1477 if (c->feed_state == BFS_LOADING)
1478 c->feed_state = BFS_LOADED;
9aed29e6
OZ
1479
1480 /* Schedule EoRR packet */
d15b0b0a
OZ
1481 if (c->feed_state == BFS_REFRESHING)
1482 c->feed_state = BFS_REFRESHED;
9aed29e6
OZ
1483
1484 /* Kick TX hook */
d15b0b0a 1485 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
0c791f87
OZ
1486}
1487
9aed29e6 1488
48e842cc
MM
1489static void
1490bgp_start_locked(struct object_lock *lock)
1491{
1492 struct bgp_proto *p = lock->data;
a22c3e59 1493 const struct bgp_config *cf = p->cf;
48e842cc 1494
11b32d91 1495 if (p->p.proto_state != PS_START)
d15b0b0a
OZ
1496 {
1497 DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1498 return;
1499 }
11b32d91 1500
48e842cc 1501 DBG("BGP: Got lock\n");
4847a894 1502
e0835db4 1503 if (cf->multihop || bgp_is_dynamic(p))
d15b0b0a
OZ
1504 {
1505 /* Multi-hop sessions do not use neighbor entries */
1506 bgp_initiate(p);
1507 return;
1508 }
4847a894 1509
a22c3e59 1510 neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
523f020b 1511 if (!n)
d15b0b0a 1512 {
a22c3e59 1513 log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
d15b0b0a
OZ
1514 /* As we do not start yet, we can just disable protocol */
1515 p->p.disabled = 1;
1516 bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1517 proto_notify_state(&p->p, PS_DOWN);
1518 return;
1519 }
523f020b
OZ
1520
1521 p->neigh = n;
1522
1523 if (n->scope <= 0)
a22c3e59 1524 BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface);
523f020b
OZ
1525 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1526 BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1527 else
1528 bgp_start_neighbor(p);
c01e3741
MM
1529}
1530
2638249d
MM
1531static int
1532bgp_start(struct proto *P)
1533{
c01e3741 1534 struct bgp_proto *p = (struct bgp_proto *) P;
a22c3e59
OZ
1535 const struct bgp_config *cf = p->cf;
1536
1537 p->local_ip = cf->local_ip;
a22c3e59
OZ
1538 p->local_as = cf->local_as;
1539 p->remote_as = cf->remote_as;
1540 p->public_as = cf->local_as;
1541
e0835db4
OZ
1542 /* For dynamic BGP childs, remote_ip is already set */
1543 if (ipa_nonzero(cf->remote_ip))
1544 p->remote_ip = cf->remote_ip;
1545
a22c3e59
OZ
1546 /* Confederation ID is used for truly external peers */
1547 if (p->cf->confederation && !p->is_interior)
1548 p->public_as = cf->confederation;
c01e3741 1549
e0835db4
OZ
1550 p->passive = cf->passive || bgp_is_dynamic(p);
1551
11b32d91 1552 p->start_state = BSS_PREPARE;
b552ecc4
MM
1553 p->outgoing_conn.state = BS_IDLE;
1554 p->incoming_conn.state = BS_IDLE;
bcbdcbb6 1555 p->neigh = NULL;
1ec52253 1556 p->bfd_req = NULL;
e0835db4 1557 p->postponed_sk = NULL;
0c791f87 1558 p->gr_ready = 0;
d15b0b0a 1559 p->gr_active_num = 0;
cfe34a31 1560
21d09632
OZ
1561 /* Reset some stats */
1562 p->stats.rx_messages = p->stats.tx_messages = 0;
1563 p->stats.rx_updates = p->stats.tx_updates = 0;
1564 p->stats.rx_bytes = p->stats.tx_bytes = 0;
1565 p->last_rx_update = 0;
1566
961671c0 1567 p->event = ev_new_init(p->p.pool, bgp_decision, p);
a6f79ca5
OZ
1568 p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
1569 p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
0c791f87 1570
4ef09506
OZ
1571 p->local_id = proto_get_router_id(P->cf);
1572 if (p->rr_client)
1573 p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1574
9be9a264 1575 p->remote_id = 0;
ef57b70f 1576 p->link_addr = IPA_NONE;
9be9a264 1577
9d456d53
OZ
1578 proto_setup_mpls_map(P, RTS_BGP, 1);
1579
7fc55925 1580 /* Lock all channels when in GR recovery mode */
6eda3f13 1581 if (p->p.gr_recovery && p->cf->gr_mode)
d15b0b0a
OZ
1582 {
1583 struct bgp_channel *c;
54430df9 1584 BGP_WALK_CHANNELS(p, c)
d15b0b0a
OZ
1585 channel_graceful_restart_lock(&c->c);
1586 }
0c791f87 1587
c01e3741 1588 /*
d15b0b0a
OZ
1589 * Before attempting to create the connection, we need to lock the port,
1590 * so that we are the only instance attempting to talk with that neighbor.
c01e3741 1591 */
a22c3e59 1592 struct object_lock *lock;
c01e3741 1593 lock = p->lock = olock_new(P->pool);
a22c3e59 1594 lock->addr = p->remote_ip;
dcde7ae5 1595 lock->port = p->cf->remote_port;
53ffbff3 1596 lock->iface = p->cf->iface;
9f4908fe 1597 lock->vrf = p->cf->iface ? NULL : p->p.vrf;
c01e3741 1598 lock->type = OBJLOCK_TCP;
c01e3741
MM
1599 lock->hook = bgp_start_locked;
1600 lock->data = p;
eb1e43a9
OZ
1601
1602 /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */
1603 if (bgp_is_dynamic(p))
1604 {
1605 lock->addr = net_prefix(p->cf->remote_range);
1606 lock->inst = 1;
1607 }
1608
c01e3741 1609 olock_acquire(lock);
d51aa281 1610
c01e3741 1611 return PS_START;
2638249d
MM
1612}
1613
d9b77cc2
OZ
1614extern int proto_restart;
1615
2638249d
MM
1616static int
1617bgp_shutdown(struct proto *P)
1618{
c01e3741 1619 struct bgp_proto *p = (struct bgp_proto *) P;
8a68316e 1620 int subcode = 0;
c01e3741 1621
cd1d9961
OZ
1622 char *message = NULL;
1623 byte *data = NULL;
1624 uint len = 0;
c01e3741 1625
85368cd4 1626 BGP_TRACE(D_EVENTS, "Shutdown requested");
b99d3786 1627
ebecb6f6 1628 switch (P->down_code)
d15b0b0a
OZ
1629 {
1630 case PDC_CF_REMOVE:
1631 case PDC_CF_DISABLE:
1632 subcode = 3; // Errcode 6, 3 - peer de-configured
1633 break;
1634
1635 case PDC_CF_RESTART:
1636 subcode = 6; // Errcode 6, 6 - other configuration change
1637 break;
1638
1639 case PDC_CMD_DISABLE:
1640 case PDC_CMD_SHUTDOWN:
8a68316e 1641 shutdown:
d15b0b0a 1642 subcode = 2; // Errcode 6, 2 - administrative shutdown
830ba75e 1643 message = P->message;
d15b0b0a
OZ
1644 break;
1645
1646 case PDC_CMD_RESTART:
1647 subcode = 4; // Errcode 6, 4 - administrative reset
830ba75e 1648 message = P->message;
d15b0b0a
OZ
1649 break;
1650
8a68316e
OZ
1651 case PDC_CMD_GR_DOWN:
1652 if ((p->cf->gr_mode != BGP_GR_ABLE) &&
1653 (p->cf->llgr_mode != BGP_LLGR_ABLE))
1654 goto shutdown;
1655
1656 subcode = -1; // Do not send NOTIFICATION, just close the connection
1657 break;
1658
d15b0b0a
OZ
1659 case PDC_RX_LIMIT_HIT:
1660 case PDC_IN_LIMIT_HIT:
1661 subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1662 /* log message for compatibility */
1663 log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1664 goto limit;
1665
1666 case PDC_OUT_LIMIT_HIT:
1667 subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1668
1669 limit:
1670 bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1671 if (proto_restart)
1672 bgp_update_startup_delay(p);
1673 else
1674 p->startup_delay = 0;
1675 goto done;
1676 }
b99d3786 1677
ebecb6f6 1678 bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
11b32d91 1679 p->startup_delay = 0;
c01e3741 1680
cd1d9961
OZ
1681 /* RFC 8203 - shutdown communication */
1682 if (message)
1683 {
1684 uint msg_len = strlen(message);
7ff34ca2 1685 msg_len = MIN(msg_len, 255);
cd1d9961
OZ
1686
1687 /* Buffer will be freed automatically by protocol shutdown */
1688 data = mb_alloc(p->p.pool, msg_len + 1);
1689 len = msg_len + 1;
1690
1691 data[0] = msg_len;
1692 memcpy(data+1, message, msg_len);
1693 }
1694
d15b0b0a 1695done:
cd1d9961 1696 bgp_stop(p, subcode, data, len);
11b32d91 1697 return p->p.proto_state;
2638249d
MM
1698}
1699
48e842cc 1700static struct proto *
d15b0b0a 1701bgp_init(struct proto_config *CF)
48e842cc 1702{
d15b0b0a 1703 struct proto *P = proto_new(CF);
48e842cc 1704 struct bgp_proto *p = (struct bgp_proto *) P;
d15b0b0a 1705 struct bgp_config *cf = (struct bgp_config *) CF;
48e842cc
MM
1706
1707 P->rt_notify = bgp_rt_notify;
14375237 1708 P->preexport = bgp_preexport;
48e842cc 1709 P->neigh_notify = bgp_neigh_notify;
bf47fe4b 1710 P->reload_routes = bgp_reload_routes;
9aed29e6
OZ
1711 P->feed_begin = bgp_feed_begin;
1712 P->feed_end = bgp_feed_end;
094d2bdb 1713 P->rte_better = bgp_rte_better;
8d9eef17 1714 P->rte_mergable = bgp_rte_mergable;
d15b0b0a 1715 P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
5bd73431 1716 P->rte_modify = bgp_rte_modify_stale;
d471d5fc 1717 P->rte_igp_metric = bgp_rte_igp_metric;
d15b0b0a
OZ
1718
1719 p->cf = cf;
d15b0b0a
OZ
1720 p->is_internal = (cf->local_as == cf->remote_as);
1721 p->is_interior = p->is_internal || cf->confederation_member;
1722 p->rs_client = cf->rs_client;
1723 p->rr_client = cf->rr_client;
1724
e0835db4
OZ
1725 p->ipv4 = ipa_nonzero(cf->remote_ip) ?
1726 ipa_is_ip4(cf->remote_ip) :
1727 (cf->remote_range && (cf->remote_range->type == NET_IP4));
1728
1729 p->remote_ip = cf->remote_ip;
1730 p->remote_as = cf->remote_as;
1731
1732 /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
1733 if (cf->c.parent)
1734 cf->remote_ip = IPA_NONE;
1735
9d456d53 1736 /* Add all BGP channels */
d15b0b0a 1737 struct bgp_channel_config *cc;
54430df9 1738 BGP_CF_WALK_CHANNELS(cf, cc)
d15b0b0a 1739 proto_add_channel(P, &cc->c);
9be9a264 1740
9d456d53
OZ
1741 /* Add MPLS channel */
1742 proto_configure_channel(P, &P->mpls_channel, proto_cf_mpls_channel(CF));
1743
48e842cc
MM
1744 return P;
1745}
1746
d15b0b0a
OZ
1747static void
1748bgp_channel_init(struct channel *C, struct channel_config *CF)
1749{
1750 struct bgp_channel *c = (void *) C;
1751 struct bgp_channel_config *cf = (void *) CF;
1752
d15b0b0a
OZ
1753 c->cf = cf;
1754 c->afi = cf->afi;
ef57b70f
OZ
1755 c->desc = cf->desc;
1756
1757 if (cf->igp_table_ip4)
1758 c->igp_table_ip4 = cf->igp_table_ip4->table;
1759
1760 if (cf->igp_table_ip6)
1761 c->igp_table_ip6 = cf->igp_table_ip6->table;
1f2eb2ac
OZ
1762
1763 if (cf->base_table)
1764 c->base_table = cf->base_table->table;
d15b0b0a
OZ
1765}
1766
1767static int
1768bgp_channel_start(struct channel *C)
1769{
1770 struct bgp_proto *p = (void *) C->proto;
1771 struct bgp_channel *c = (void *) C;
a22c3e59 1772 ip_addr src = p->local_ip;
d15b0b0a 1773
ef57b70f
OZ
1774 if (c->igp_table_ip4)
1775 rt_lock_table(c->igp_table_ip4);
1776
1777 if (c->igp_table_ip6)
1778 rt_lock_table(c->igp_table_ip6);
d15b0b0a 1779
1f2eb2ac
OZ
1780 if (c->base_table)
1781 {
1782 rt_lock_table(c->base_table);
1783 rt_flowspec_link(c->base_table, c->c.table);
1784 }
1785
d15b0b0a
OZ
1786 c->pool = p->p.pool; // XXXX
1787 bgp_init_bucket_table(c);
1788 bgp_init_prefix_table(c);
1789
682d3f7d
OZ
1790 if (c->cf->import_table)
1791 channel_setup_in_table(C);
1792
b7d7599c
OZ
1793 if (c->cf->export_table)
1794 channel_setup_out_table(C);
1795
5bd73431
OZ
1796 c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
1797
d15b0b0a
OZ
1798 c->next_hop_addr = c->cf->next_hop_addr;
1799 c->link_addr = IPA_NONE;
1800 c->packets_to_send = 0;
1801
1802 /* Try to use source address as next hop address */
1803 if (ipa_zero(c->next_hop_addr))
1804 {
ef57b70f 1805 if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
d15b0b0a
OZ
1806 c->next_hop_addr = src;
1807
ef57b70f 1808 if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
d15b0b0a
OZ
1809 c->next_hop_addr = src;
1810 }
1811
ccee67ca
OZ
1812 /* Use preferred addresses associated with interface / source address */
1813 if (ipa_zero(c->next_hop_addr))
1814 {
1815 /* We know the iface for single-hop, we make lookup for multihop */
586c1800 1816 struct neighbor *nbr = p->neigh ?: neigh_find(&p->p, src, NULL, 0);
ccee67ca
OZ
1817 struct iface *iface = nbr ? nbr->iface : NULL;
1818
1819 if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
1820 c->next_hop_addr = iface->addr4->ip;
1821
1822 if (bgp_channel_is_ipv6(c) && iface && iface->addr6)
1823 c->next_hop_addr = iface->addr6->ip;
1824 }
1825
ef57b70f
OZ
1826 /* Exit if no feasible next hop address is found */
1827 if (ipa_zero(c->next_hop_addr))
1828 {
1829 log(L_WARN "%s: Missing next hop address", p->p.name);
1830 return 0;
1831 }
1832
d15b0b0a 1833 /* Set link-local address for IPv6 single-hop BGP */
ef57b70f 1834 if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
d15b0b0a
OZ
1835 {
1836 c->link_addr = p->link_addr;
1837
1838 if (ipa_zero(c->link_addr))
1839 log(L_WARN "%s: Missing link-local address", p->p.name);
1840 }
1841
ef57b70f
OZ
1842 /* Link local address is already in c->link_addr */
1843 if (ipa_is_link_local(c->next_hop_addr))
1844 c->next_hop_addr = IPA_NONE;
d15b0b0a
OZ
1845
1846 return 0; /* XXXX: Currently undefined */
1847}
1848
1849static void
1850bgp_channel_shutdown(struct channel *C)
1851{
1852 struct bgp_channel *c = (void *) C;
1853
d15b0b0a
OZ
1854 c->next_hop_addr = IPA_NONE;
1855 c->link_addr = IPA_NONE;
7fc55925 1856 c->packets_to_send = 0;
d15b0b0a
OZ
1857}
1858
1859static void
1860bgp_channel_cleanup(struct channel *C)
1861{
1862 struct bgp_channel *c = (void *) C;
1863
ef57b70f
OZ
1864 if (c->igp_table_ip4)
1865 rt_unlock_table(c->igp_table_ip4);
1866
1867 if (c->igp_table_ip6)
1868 rt_unlock_table(c->igp_table_ip6);
b8a3608a 1869
1f2eb2ac
OZ
1870 if (c->base_table)
1871 {
1872 rt_flowspec_unlink(c->base_table, c->c.table);
1873 rt_unlock_table(c->base_table);
1874 }
1875
b8a3608a
OZ
1876 c->index = 0;
1877
1878 /* Cleanup rest of bgp_channel starting at pool field */
1879 memset(&(c->pool), 0, sizeof(struct bgp_channel) - OFFSETOF(struct bgp_channel, pool));
ef57b70f
OZ
1880}
1881
1882static inline struct bgp_channel_config *
1883bgp_find_channel_config(struct bgp_config *cf, u32 afi)
1884{
1885 struct bgp_channel_config *cc;
1886
54430df9 1887 BGP_CF_WALK_CHANNELS(cf, cc)
ef57b70f
OZ
1888 if (cc->afi == afi)
1889 return cc;
1890
1891 return NULL;
d15b0b0a 1892}
a7f23f58 1893
ef57b70f
OZ
1894struct rtable_config *
1895bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
1896{
1897 struct bgp_channel_config *cc2;
1898 struct rtable_config *tab;
1899
1900 /* First, try table connected by the channel */
1901 if (cc->c.table->addr_type == type)
1902 return cc->c.table;
1903
1904 /* Find paired channel with the same SAFI but the other AFI */
1905 u32 afi2 = cc->afi ^ 0x30000;
1906 cc2 = bgp_find_channel_config(cf, afi2);
1907
1908 /* Second, try IGP table configured in the paired channel */
1909 if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
1910 return tab;
1911
1912 /* Third, try table connected by the paired channel */
1913 if (cc2 && (cc2->c.table->addr_type == type))
1914 return cc2->c.table;
1915
1916 /* Last, try default table of given type */
1917 if (tab = cf->c.global->def_tables[type])
1918 return tab;
1919
1920 cf_error("Undefined IGP table");
1921}
1922
1f2eb2ac
OZ
1923static struct rtable_config *
1924bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc)
1925{
1926 /* Expected table type */
1927 u32 type = (cc->afi == BGP_AF_FLOW4) ? NET_IP4 : NET_IP6;
1928
1929 /* First, try appropriate IP channel */
1930 u32 afi2 = BGP_AF(BGP_AFI(cc->afi), BGP_SAFI_UNICAST);
1931 struct bgp_channel_config *cc2 = bgp_find_channel_config(cf, afi2);
1932 if (cc2 && (cc2->c.table->addr_type == type))
1933 return cc2->c.table;
1934
1935 /* Last, try default table of given type */
1936 struct rtable_config *tab = cf->c.global->def_tables[type];
1937 if (tab)
1938 return tab;
1939
1940 cf_error("Undefined base table");
1941}
ef57b70f 1942
a7f23f58 1943void
d15b0b0a 1944bgp_postconfig(struct proto_config *CF)
a7f23f58 1945{
d15b0b0a 1946 struct bgp_config *cf = (void *) CF;
a7f23f58
OZ
1947
1948 /* Do not check templates at all */
d15b0b0a 1949 if (cf->c.class == SYM_TEMPLATE)
a7f23f58
OZ
1950 return;
1951
f3e59178 1952
23ee6b1c
OZ
1953 /* Handle undefined remote_as, zero should mean unspecified external */
1954 if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL))
1955 cf->remote_as = cf->local_as;
1956
1957 int internal = (cf->local_as == cf->remote_as);
1958 int interior = internal || cf->confederation_member;
1959
f3e59178 1960 /* EBGP direct by default, IBGP multihop by default */
d15b0b0a
OZ
1961 if (cf->multihop < 0)
1962 cf->multihop = internal ? 64 : 0;
f3e59178 1963
5bd73431
OZ
1964 /* LLGR mode default based on GR mode */
1965 if (cf->llgr_mode < 0)
1966 cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0;
1967
dea98864
OZ
1968 /* Link check for single-hop BGP by default */
1969 if (cf->check_link < 0)
1970 cf->check_link = !cf->multihop;
1971
f3e59178 1972
d15b0b0a 1973 if (!cf->local_as)
a7f23f58
OZ
1974 cf_error("Local AS number must be set");
1975
e0835db4 1976 if (ipa_zero(cf->remote_ip) && !cf->remote_range)
a7f23f58
OZ
1977 cf_error("Neighbor must be configured");
1978
e0835db4
OZ
1979 if (ipa_zero(cf->local_ip) && cf->strict_bind)
1980 cf_error("Local address must be configured for strict bind");
1981
23ee6b1c
OZ
1982 if (!cf->remote_as && !cf->peer_type)
1983 cf_error("Remote AS number (or peer type) must be set");
1984
1985 if ((cf->peer_type == BGP_PT_INTERNAL) && !internal)
1986 cf_error("IBGP cannot have different ASNs");
1987
1988 if ((cf->peer_type == BGP_PT_EXTERNAL) && internal)
1989 cf_error("EBGP cannot have the same ASNs");
a1beb8f3 1990
470740f9
OZ
1991 if (!cf->iface && (ipa_is_link_local(cf->local_ip) ||
1992 ipa_is_link_local(cf->remote_ip)))
1993 cf_error("Link-local addresses require defined interface");
a1beb8f3 1994
d15b0b0a 1995 if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
a7f23f58
OZ
1996 cf_error("Neighbor AS number out of range (AS4 not available)");
1997
d15b0b0a 1998 if (!internal && cf->rr_client)
a7f23f58
OZ
1999 cf_error("Only internal neighbor can be RR client");
2000
d15b0b0a 2001 if (internal && cf->rs_client)
a7f23f58
OZ
2002 cf_error("Only external neighbor can be RS client");
2003
c73b5d2d
EB
2004 if (internal && (cf->local_role != BGP_ROLE_UNDEFINED))
2005 cf_error("Local role cannot be set on IBGP sessions");
2006
971721c9
OZ
2007 if (interior && (cf->local_role != BGP_ROLE_UNDEFINED))
2008 log(L_WARN "BGP roles are not recommended to be used within AS confederations");
2009
c73b5d2d
EB
2010 if (cf->require_roles && (cf->local_role == BGP_ROLE_UNDEFINED))
2011 cf_error("Local role must be set if roles are required");
2012
d15b0b0a
OZ
2013 if (!cf->confederation && cf->confederation_member)
2014 cf_error("Confederation ID must be set for member sessions");
a7f23f58 2015
d15b0b0a
OZ
2016 if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
2017 ipa_is_link_local(cf->remote_ip)))
53ffbff3
OZ
2018 cf_error("Multihop BGP cannot be used with link-local addresses");
2019
e919601a 2020 if (cf->multihop && cf->iface)
33b6c292
OZ
2021 cf_error("Multihop BGP cannot be bound to interface");
2022
d15b0b0a 2023 if (cf->multihop && cf->check_link)
523f020b
OZ
2024 cf_error("Multihop BGP cannot depend on link state");
2025
d15b0b0a
OZ
2026 if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
2027 cf_error("Multihop BGP with BFD requires specified local address");
2028
5bd73431
OZ
2029 if (!cf->gr_mode && cf->llgr_mode)
2030 cf_error("Long-lived graceful restart requires basic graceful restart");
2031
0b228fca
OZ
2032 if (internal && cf->enforce_first_as)
2033 cf_error("Enforce first AS check is requires EBGP sessions");
2034
3859e4ef
OZ
2035 if (cf->keepalive_time > cf->hold_time)
2036 cf_error("Keepalive time must be at most hold time");
2037
2038 if (cf->keepalive_time > (cf->hold_time / 2))
2039 log(L_WARN "Keepalive time should be at most 1/2 of hold time");
2040
2041 if (cf->min_hold_time > cf->hold_time)
2042 cf_error("Min hold time (%u) exceeds hold time (%u)",
2043 cf->min_hold_time, cf->hold_time);
2044
2045 uint keepalive_time = cf->keepalive_time ?: cf->hold_time / 3;
2046 if (cf->min_keepalive_time > keepalive_time)
2047 cf_error("Min keepalive time (%u) exceeds keepalive time (%u)",
2048 cf->min_keepalive_time, keepalive_time);
2049
d15b0b0a
OZ
2050
2051 struct bgp_channel_config *cc;
54430df9 2052 BGP_CF_WALK_CHANNELS(cf, cc)
d15b0b0a 2053 {
3831b619
OZ
2054 /* Handle undefined import filter */
2055 if (cc->c.in_filter == FILTER_UNDEF)
2056 if (interior)
2057 cc->c.in_filter = FILTER_ACCEPT;
2058 else
2059 cf_error("EBGP requires explicit import policy");
2060
2061 /* Handle undefined export filter */
2062 if (cc->c.out_filter == FILTER_UNDEF)
2063 if (interior)
2064 cc->c.out_filter = FILTER_REJECT;
2065 else
2066 cf_error("EBGP requires explicit export policy");
2067
d15b0b0a
OZ
2068 /* Disable after error incompatible with restart limit action */
2069 if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
2070 cc->c.in_limit.action = PLA_DISABLE;
2071
1cab2b4a
OZ
2072 /* Different default based on rr_client, rs_client */
2073 if (cc->next_hop_keep == 0xff)
2074 cc->next_hop_keep = cf->rr_client ? NH_IBGP : (cf->rs_client ? NH_ALL : NH_NO);
2075
d15b0b0a
OZ
2076 /* Different default for gw_mode */
2077 if (!cc->gw_mode)
2078 cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
1ec52253 2079
8f79e6b9
OZ
2080 /* Different default for next_hop_prefer */
2081 if (!cc->next_hop_prefer)
2082 cc->next_hop_prefer = (cc->gw_mode == GW_DIRECT) ? NHP_GLOBAL : NHP_LOCAL;
2083
5bd73431 2084 /* Defaults based on proto config */
d15b0b0a
OZ
2085 if (cc->gr_able == 0xff)
2086 cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
26822d8f 2087
5bd73431
OZ
2088 if (cc->llgr_able == 0xff)
2089 cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE);
2090
2091 if (cc->llgr_time == ~0U)
2092 cc->llgr_time = cf->llgr_time;
2093
09ee846d
OZ
2094 /* AIGP enabled by default on interior sessions */
2095 if (cc->aigp == 0xff)
2096 cc->aigp = interior;
2097
6fe11c99 2098 /* Default values of IGP tables */
ef57b70f
OZ
2099 if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
2100 {
2101 if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
2102 cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
2103
2104 if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
2105 cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
6fe11c99
OZ
2106
2107 if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
2108 cf_error("Mismatched IGP table type");
2109
2110 if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
2111 cf_error("Mismatched IGP table type");
ef57b70f
OZ
2112 }
2113
1f2eb2ac
OZ
2114 /* Default value of base table */
2115 if ((BGP_SAFI(cc->afi) == BGP_SAFI_FLOW) && cc->validate && !cc->base_table)
2116 cc->base_table = bgp_default_base_table(cf, cc);
2117
2118 if (cc->base_table && !cc->base_table->trie_used)
2119 cf_error("Flowspec validation requires base table (%s) with trie",
2120 cc->base_table->name);
2121
d15b0b0a
OZ
2122 if (cf->multihop && (cc->gw_mode == GW_DIRECT))
2123 cf_error("Multihop BGP cannot use direct gateway mode");
26822d8f 2124
d15b0b0a
OZ
2125 if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
2126 cf_error("BGP in recursive mode prohibits sorted table");
2127
2128 if (cf->deterministic_med && cc->c.table->sorted)
2129 cf_error("BGP with deterministic MED prohibits sorted table");
2130
2131 if (cc->secondary && !cc->c.table->sorted)
2132 cf_error("BGP with secondary option requires sorted table");
2133 }
a7f23f58
OZ
2134}
2135
2136static int
d15b0b0a 2137bgp_reconfigure(struct proto *P, struct proto_config *CF)
a7f23f58 2138{
d15b0b0a 2139 struct bgp_proto *p = (void *) P;
a22c3e59
OZ
2140 const struct bgp_config *new = (void *) CF;
2141 const struct bgp_config *old = p->cf;
a7f23f58 2142
d15b0b0a 2143 if (proto_get_router_id(CF) != p->local_id)
79b4e12e
OZ
2144 return 0;
2145
a7f23f58
OZ
2146 int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
2147 ((byte *) new) + sizeof(struct proto_config),
2148 // password item is last and must be checked separately
2149 OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
15b0a922 2150 && !bstrcmp(old->password, new->password)
d35fb9d7
OZ
2151 && ((!old->remote_range && !new->remote_range)
2152 || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range)))
15b0a922 2153 && !bstrcmp(old->dynamic_name, new->dynamic_name)
e0835db4 2154 && (old->dynamic_name_digits == new->dynamic_name_digits);
d15b0b0a
OZ
2155
2156 /* FIXME: Move channel reconfiguration to generic protocol code ? */
2157 struct channel *C, *C2;
2158 struct bgp_channel_config *cc;
2159
2160 WALK_LIST(C, p->p.channels)
2161 C->stale = 1;
2162
9d456d53 2163 /* Reconfigure BGP channels */
54430df9 2164 BGP_CF_WALK_CHANNELS(new, cc)
d15b0b0a
OZ
2165 {
2166 C = (struct channel *) bgp_find_channel(p, cc->afi);
2167 same = proto_configure_channel(P, &C, &cc->c) && same;
d15b0b0a
OZ
2168 }
2169
9d456d53
OZ
2170 /* Reconfigure MPLS channel */
2171 same = proto_configure_channel(P, &P->mpls_channel, proto_cf_mpls_channel(CF)) && same;
2172
d15b0b0a
OZ
2173 WALK_LIST_DELSAFE(C, C2, p->p.channels)
2174 if (C->stale)
2175 same = proto_configure_channel(P, &C, NULL) && same;
2176
9d456d53
OZ
2177 if (same)
2178 proto_setup_mpls_map(P, RTS_BGP, 1);
2179
1ec52253
OZ
2180 if (same && (p->start_state > BSS_PREPARE))
2181 bgp_update_bfd(p, new->bfd);
2182
a7f23f58
OZ
2183 /* We should update our copy of configuration ptr as old configuration will be freed */
2184 if (same)
2185 p->cf = new;
2186
e0835db4
OZ
2187 /* Reset name counter */
2188 p->dynamic_name_counter = 0;
2189
a7f23f58
OZ
2190 return same;
2191}
2192
1f2eb2ac 2193#define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL )
ffb38dfb 2194
d15b0b0a 2195static int
e2b530aa 2196bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed)
d15b0b0a 2197{
6c9cda6f 2198 struct bgp_proto *p = (void *) C->proto;
d15b0b0a
OZ
2199 struct bgp_channel *c = (void *) C;
2200 struct bgp_channel_config *new = (void *) CC;
2201 struct bgp_channel_config *old = c->cf;
2202
e2b530aa 2203 if ((new->secondary != old->secondary) ||
1f2eb2ac 2204 (new->validate != old->validate) ||
e2b530aa
OZ
2205 (new->gr_able != old->gr_able) ||
2206 (new->llgr_able != old->llgr_able) ||
2207 (new->llgr_time != old->llgr_time) ||
2208 (new->ext_next_hop != old->ext_next_hop) ||
2209 (new->add_path != old->add_path) ||
2210 (new->import_table != old->import_table) ||
b7d7599c 2211 (new->export_table != old->export_table) ||
1f2eb2ac
OZ
2212 (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) ||
2213 (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) ||
2214 (TABLE(new, base_table) != TABLE(old, base_table)))
d15b0b0a
OZ
2215 return 0;
2216
e2b530aa 2217 if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP))
d15b0b0a
OZ
2218 return 0;
2219
09ee846d 2220 if ((new->gw_mode != old->gw_mode) ||
8f79e6b9 2221 (new->next_hop_prefer != old->next_hop_prefer) ||
09ee846d
OZ
2222 (new->aigp != old->aigp) ||
2223 (new->cost != old->cost))
6c9cda6f
OZ
2224 {
2225 /* import_changed itself does not force ROUTE_REFRESH when import_table is active */
2226 if (c->c.in_table && (c->c.channel_state == CS_UP))
2227 bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
2228
e2b530aa 2229 *import_changed = 1;
6c9cda6f 2230 }
e2b530aa
OZ
2231
2232 if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
2233 (new->next_hop_self != old->next_hop_self) ||
2234 (new->next_hop_keep != old->next_hop_keep) ||
09ee846d
OZ
2235 (new->aigp != old->aigp) ||
2236 (new->aigp_originate != old->aigp_originate))
e2b530aa
OZ
2237 *export_changed = 1;
2238
d15b0b0a
OZ
2239 c->cf = new;
2240 return 1;
2241}
2242
a7f23f58 2243static void
9d3fc306 2244bgp_copy_config(struct proto_config *dest, struct proto_config *src)
a7f23f58 2245{
9d3fc306
OZ
2246 struct bgp_config *d = (void *) dest;
2247 struct bgp_config *s = (void *) src;
2248
2249 /* Copy BFD options */
2250 if (s->bfd)
2251 {
2252 struct bfd_options *opts = cfg_alloc(sizeof(struct bfd_options));
2253 memcpy(opts, s->bfd, sizeof(struct bfd_options));
2254 d->bfd = opts;
2255 }
a7f23f58
OZ
2256}
2257
2258
54e55169
MM
2259/**
2260 * bgp_error - report a protocol error
2261 * @c: connection
2262 * @code: error code (according to the RFC)
2e9b2421 2263 * @subcode: error sub-code
54e55169
MM
2264 * @data: data to be passed in the Notification message
2265 * @len: length of the data
2266 *
2267 * bgp_error() sends a notification packet to tell the other side that a protocol
2e9b2421 2268 * error has occurred (including the data considered erroneous if possible) and
54e55169
MM
2269 * closes the connection.
2270 */
3fdbafb6 2271void
d15b0b0a 2272bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
3fdbafb6 2273{
b99d3786
OZ
2274 struct bgp_proto *p = c->bgp;
2275
11b32d91 2276 if (c->state == BS_CLOSE)
3fdbafb6 2277 return;
11b32d91 2278
d15b0b0a 2279 bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
b99d3786 2280 bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
11b32d91 2281
3fdbafb6
MM
2282 c->notify_code = code;
2283 c->notify_subcode = subcode;
efcece2d
MM
2284 c->notify_data = data;
2285 c->notify_size = (len > 0) ? len : 0;
4558adab
OZ
2286
2287 bgp_conn_enter_close_state(c);
d15b0b0a 2288 bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
b99d3786
OZ
2289
2290 if (code != 6)
d15b0b0a
OZ
2291 {
2292 bgp_update_startup_delay(p);
830ba75e 2293 bgp_stop(p, 0, NULL, 0);
d15b0b0a 2294 }
3fdbafb6
MM
2295}
2296
11b32d91
OZ
2297/**
2298 * bgp_store_error - store last error for status report
2299 * @p: BGP instance
2300 * @c: connection
2301 * @class: error class (BE_xxx constants)
2302 * @code: error code (class specific)
2303 *
2304 * bgp_store_error() decides whether given error is interesting enough
2305 * and store that error to last_error variables of @p
2306 */
2307void
2308bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
2309{
2310 /* During PS_UP, we ignore errors on secondary connection */
2311 if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
2312 return;
2313
2314 /* During PS_STOP, we ignore any errors, as we want to report
2315 * the error that caused transition to PS_STOP
2316 */
2317 if (p->p.proto_state == PS_STOP)
2318 return;
2319
2320 p->last_error_class = class;
2321 p->last_error_code = code;
2322}
2323
11b32d91 2324static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
72b28a04 2325static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
523f020b 2326static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
72b28a04 2327static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
5bd73431 2328static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"};
11b32d91 2329
b8113a5e
OZ
2330static const char *
2331bgp_last_errmsg(struct bgp_proto *p)
973399ae 2332{
11b32d91 2333 switch (p->last_error_class)
d15b0b0a
OZ
2334 {
2335 case BE_MISC:
2336 return bgp_misc_errors[p->last_error_code];
2337 case BE_SOCKET:
2338 return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
2339 case BE_BGP_RX:
2340 case BE_BGP_TX:
2341 return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
2342 case BE_AUTO_DOWN:
2343 return bgp_auto_errors[p->last_error_code];
2344 default:
2345 return "";
2346 }
b8113a5e
OZ
2347}
2348
2349static const char *
2350bgp_state_dsc(struct bgp_proto *p)
2351{
51947659
OZ
2352 if (p->p.proto_state == PS_DOWN)
2353 return "Down";
b8113a5e
OZ
2354
2355 int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
e0835db4 2356 if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive)
b8113a5e
OZ
2357 return "Passive";
2358
2359 return bgp_state_names[state];
2360}
2361
2362static void
2363bgp_get_status(struct proto *P, byte *buf)
2364{
2365 struct bgp_proto *p = (struct bgp_proto *) P;
2366
2367 const char *err1 = bgp_err_classes[p->last_error_class];
2368 const char *err2 = bgp_last_errmsg(p);
11b32d91 2369
f4ab2317 2370 if (P->proto_state == PS_DOWN)
11b32d91 2371 bsprintf(buf, "%s%s", err1, err2);
f4ab2317 2372 else
b8113a5e
OZ
2373 bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
2374}
2375
256cc8ee
OZ
2376static void
2377bgp_show_afis(int code, char *s, u32 *afis, uint count)
2378{
2379 buffer b;
2380 LOG_BUFFER_INIT(b);
2381
2382 buffer_puts(&b, s);
2383
2384 for (u32 *af = afis; af < (afis + count); af++)
2385 {
2386 const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
2387 if (desc)
2388 buffer_print(&b, " %s", desc->name);
2389 else
2390 buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
2391 }
2392
2393 if (b.pos == b.end)
2394 strcpy(b.end - 32, " ... <too long>");
2395
2396 cli_msg(code, b.start);
2397}
2398
af611f93 2399const char *
c73b5d2d
EB
2400bgp_format_role_name(u8 role)
2401{
2402 static const char *bgp_role_names[] = { "provider", "rs_server", "rs_client", "customer", "peer" };
2403 if (role == BGP_ROLE_UNDEFINED) return "undefined";
971721c9 2404 if (role < ARRAY_SIZE(bgp_role_names)) return bgp_role_names[role];
c73b5d2d
EB
2405 return "?";
2406}
2407
256cc8ee
OZ
2408static void
2409bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
2410{
2411 struct bgp_af_caps *ac;
2412 uint any_mp_bgp = 0;
2413 uint any_gr_able = 0;
2414 uint any_add_path = 0;
d8022d26 2415 uint any_ext_next_hop = 0;
5bd73431 2416 uint any_llgr_able = 0;
256cc8ee
OZ
2417 u32 *afl1 = alloca(caps->af_count * sizeof(u32));
2418 u32 *afl2 = alloca(caps->af_count * sizeof(u32));
2419 uint afn1, afn2;
2420
2421 WALK_AF_CAPS(caps, ac)
2422 {
2423 any_mp_bgp |= ac->ready;
2424 any_gr_able |= ac->gr_able;
2425 any_add_path |= ac->add_path;
d8022d26 2426 any_ext_next_hop |= ac->ext_next_hop;
5bd73431 2427 any_llgr_able |= ac->llgr_able;
256cc8ee
OZ
2428 }
2429
2430 if (any_mp_bgp)
2431 {
2432 cli_msg(-1006, " Multiprotocol");
2433
2434 afn1 = 0;
2435 WALK_AF_CAPS(caps, ac)
2436 if (ac->ready)
2437 afl1[afn1++] = ac->afi;
2438
2439 bgp_show_afis(-1006, " AF announced:", afl1, afn1);
2440 }
2441
2442 if (caps->route_refresh)
2443 cli_msg(-1006, " Route refresh");
2444
d8022d26
OZ
2445 if (any_ext_next_hop)
2446 {
2447 cli_msg(-1006, " Extended next hop");
2448
2449 afn1 = 0;
2450 WALK_AF_CAPS(caps, ac)
2451 if (ac->ext_next_hop)
2452 afl1[afn1++] = ac->afi;
2453
2454 bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1);
2455 }
2456
256cc8ee
OZ
2457 if (caps->ext_messages)
2458 cli_msg(-1006, " Extended message");
2459
2460 if (caps->gr_aware)
2461 cli_msg(-1006, " Graceful restart");
2462
2463 if (any_gr_able)
2464 {
2465 /* Continues from gr_aware */
2466 cli_msg(-1006, " Restart time: %u", caps->gr_time);
2467 if (caps->gr_flags & BGP_GRF_RESTART)
2468 cli_msg(-1006, " Restart recovery");
2469
2470 afn1 = afn2 = 0;
2471 WALK_AF_CAPS(caps, ac)
2472 {
2473 if (ac->gr_able)
2474 afl1[afn1++] = ac->afi;
2475
2476 if (ac->gr_af_flags & BGP_GRF_FORWARDING)
2477 afl2[afn2++] = ac->afi;
2478 }
2479
2480 bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2481 bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2482 }
2483
2484 if (caps->as4_support)
2485 cli_msg(-1006, " 4-octet AS numbers");
2486
2487 if (any_add_path)
2488 {
2489 cli_msg(-1006, " ADD-PATH");
2490
2491 afn1 = afn2 = 0;
2492 WALK_AF_CAPS(caps, ac)
2493 {
2494 if (ac->add_path & BGP_ADD_PATH_RX)
2495 afl1[afn1++] = ac->afi;
2496
2497 if (ac->add_path & BGP_ADD_PATH_TX)
2498 afl2[afn2++] = ac->afi;
2499 }
2500
2501 bgp_show_afis(-1006, " RX:", afl1, afn1);
2502 bgp_show_afis(-1006, " TX:", afl2, afn2);
2503 }
2504
2505 if (caps->enhanced_refresh)
2506 cli_msg(-1006, " Enhanced refresh");
5bd73431
OZ
2507
2508 if (caps->llgr_aware)
2509 cli_msg(-1006, " Long-lived graceful restart");
2510
2511 if (any_llgr_able)
2512 {
2513 u32 stale_time = 0;
2514
2515 afn1 = afn2 = 0;
2516 WALK_AF_CAPS(caps, ac)
2517 {
2518 stale_time = MAX(stale_time, ac->llgr_time);
2519
2520 if (ac->llgr_able && ac->llgr_time)
2521 afl1[afn1++] = ac->afi;
2522
2523 if (ac->llgr_flags & BGP_GRF_FORWARDING)
2524 afl2[afn2++] = ac->afi;
2525 }
2526
2527 /* Continues from llgr_aware */
2528 cli_msg(-1006, " LL stale time: %u", stale_time);
2529
2530 bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2531 bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2532 }
71423871
VB
2533
2534 if (caps->hostname)
2535 cli_msg(-1006, " Hostname: %s", caps->hostname);
c73b5d2d
EB
2536
2537 if (caps->role != BGP_ROLE_UNDEFINED)
2538 cli_msg(-1006, " Role: %s", bgp_format_role_name(caps->role));
256cc8ee
OZ
2539}
2540
b8113a5e
OZ
2541static void
2542bgp_show_proto_info(struct proto *P)
2543{
2544 struct bgp_proto *p = (struct bgp_proto *) P;
b8113a5e 2545
b8113a5e 2546 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
e0835db4
OZ
2547
2548 if (bgp_is_dynamic(p) && p->cf->remote_range)
2549 cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range);
2550 else
2551 cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface);
2552
a9c19b92
OZ
2553 if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT))
2554 cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port);
2555
e0835db4 2556 cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
0b1e1e1a 2557 cli_msg(-1006, " Local AS: %u", p->cf->local_as);
b8113a5e 2558
d15b0b0a 2559 if (p->gr_active_num)
0c791f87
OZ
2560 cli_msg(-1006, " Neighbor graceful restart active");
2561
b8113a5e 2562 if (P->proto_state == PS_START)
d15b0b0a
OZ
2563 {
2564 struct bgp_conn *oc = &p->outgoing_conn;
b8113a5e 2565
d15b0b0a 2566 if ((p->start_state < BSS_CONNECT) &&
a6f79ca5 2567 (tm_active(p->startup_timer)))
d3fa9e84 2568 cli_msg(-1006, " Error wait: %t/%u",
a6f79ca5 2569 tm_remains(p->startup_timer), p->startup_delay);
b8113a5e 2570
d15b0b0a 2571 if ((oc->state == BS_ACTIVE) &&
a6f79ca5 2572 (tm_active(oc->connect_timer)))
d3fa9e84 2573 cli_msg(-1006, " Connect delay: %t/%u",
a6f79ca5 2574 tm_remains(oc->connect_timer), p->cf->connect_delay_time);
0c791f87 2575
a6f79ca5 2576 if (p->gr_active_num && tm_active(p->gr_timer))
d3fa9e84 2577 cli_msg(-1006, " Restart timer: %t/-",
a6f79ca5 2578 tm_remains(p->gr_timer));
d15b0b0a 2579 }
b8113a5e 2580 else if (P->proto_state == PS_UP)
d15b0b0a
OZ
2581 {
2582 cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
256cc8ee
OZ
2583 cli_msg(-1006, " Local capabilities");
2584 bgp_show_capabilities(p, p->conn->local_caps);
2585 cli_msg(-1006, " Neighbor capabilities");
2586 bgp_show_capabilities(p, p->conn->remote_caps);
7fc55925
OZ
2587 cli_msg(-1006, " Session: %s%s%s%s%s",
2588 p->is_internal ? "internal" : "external",
2589 p->cf->multihop ? " multihop" : "",
2590 p->rr_client ? " route-reflector" : "",
2591 p->rs_client ? " route-server" : "",
2592 p->as4_session ? " AS4" : "");
a22c3e59 2593 cli_msg(-1006, " Source address: %I", p->local_ip);
d3fa9e84 2594 cli_msg(-1006, " Hold timer: %t/%u",
a6f79ca5 2595 tm_remains(p->conn->hold_timer), p->conn->hold_time);
d3fa9e84 2596 cli_msg(-1006, " Keepalive timer: %t/%u",
a6f79ca5 2597 tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
d15b0b0a 2598 }
b8113a5e 2599
5a6e8380 2600#if 0
21d09632
OZ
2601 struct bgp_stats *s = &p->stats;
2602 cli_msg(-1006, " FSM established transitions: %u",
2603 s->fsm_established_transitions);
2604 cli_msg(-1006, " Rcvd messages: %u total / %u updates / %lu bytes",
2605 s->rx_messages, s->rx_updates, s->rx_bytes);
2606 cli_msg(-1006, " Sent messages: %u total / %u updates / %lu bytes",
2607 s->tx_messages, s->tx_updates, s->tx_bytes);
2608 cli_msg(-1006, " Last rcvd update elapsed time: %t s",
2609 p->last_rx_update ? (current_time() - p->last_rx_update) : 0);
5a6e8380 2610#endif
21d09632 2611
523f020b 2612 if ((p->last_error_class != BE_NONE) &&
b8113a5e 2613 (p->last_error_class != BE_MAN_DOWN))
d15b0b0a
OZ
2614 {
2615 const char *err1 = bgp_err_classes[p->last_error_class];
2616 const char *err2 = bgp_last_errmsg(p);
2617 cli_msg(-1006, " Last error: %s%s", err1, err2);
2618 }
2619
2620 {
ef57b70f 2621 struct bgp_channel *c;
d15b0b0a 2622 WALK_LIST(c, p->p.channels)
ef57b70f
OZ
2623 {
2624 channel_show_info(&c->c);
2625
54430df9
OZ
2626 if (c->c.channel != &channel_bgp)
2627 continue;
2628
5bd73431
OZ
2629 if (p->gr_active_num)
2630 cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
2631
0db7a1d6 2632 if (c->stale_timer && tm_active(c->stale_timer))
5bd73431
OZ
2633 cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer));
2634
7fc55925
OZ
2635 if (c->c.channel_state == CS_UP)
2636 {
2637 if (ipa_zero(c->link_addr))
2638 cli_msg(-1006, " BGP Next hop: %I", c->next_hop_addr);
2639 else
2640 cli_msg(-1006, " BGP Next hop: %I %I", c->next_hop_addr, c->link_addr);
2641 }
ccee67ca 2642
ef57b70f
OZ
2643 if (c->igp_table_ip4)
2644 cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name);
2645
2646 if (c->igp_table_ip6)
2647 cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
1f2eb2ac
OZ
2648
2649 if (c->base_table)
2650 cli_msg(-1006, " Base table: %s", c->base_table->name);
ef57b70f 2651 }
d15b0b0a 2652 }
973399ae
MM
2653}
2654
f4deef89 2655const struct channel_class channel_bgp = {
d15b0b0a
OZ
2656 .channel_size = sizeof(struct bgp_channel),
2657 .config_size = sizeof(struct bgp_channel_config),
2658 .init = bgp_channel_init,
2659 .start = bgp_channel_start,
2660 .shutdown = bgp_channel_shutdown,
2661 .cleanup = bgp_channel_cleanup,
2662 .reconfigure = bgp_channel_reconfigure,
2663};
2664
2638249d 2665struct protocol proto_bgp = {
4a591d4b
PT
2666 .name = "BGP",
2667 .template = "bgp%d",
ee7e2ffd 2668 .class = PROTOCOL_BGP,
4a591d4b 2669 .preference = DEF_PREF_BGP,
9d456d53 2670 .channel_mask = NB_IP | NB_VPN | NB_FLOW | NB_MPLS,
d15b0b0a 2671 .proto_size = sizeof(struct bgp_proto),
2bbc3083 2672 .config_size = sizeof(struct bgp_config),
d15b0b0a 2673 .postconfig = bgp_postconfig,
4a591d4b
PT
2674 .init = bgp_init,
2675 .start = bgp_start,
2676 .shutdown = bgp_shutdown,
4a591d4b
PT
2677 .reconfigure = bgp_reconfigure,
2678 .copy_config = bgp_copy_config,
2679 .get_status = bgp_get_status,
2680 .get_attr = bgp_get_attr,
2681 .get_route_info = bgp_get_route_info,
2682 .show_proto_info = bgp_show_proto_info
2638249d 2683};
4a23ede2
MM
2684
2685void bgp_build(void)
2686{
2687 proto_build(&proto_bgp);
2688}