]> git.ipfire.org Git - thirdparty/bird.git/blame - proto/bgp/bgp.c
BMP: Refactor route monitoring
[thirdparty/bird.git] / proto / bgp / bgp.c
CommitLineData
2638249d
MM
1/*
2 * BIRD -- The Border Gateway Protocol
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
d15b0b0a
OZ
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
2638249d
MM
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
54e55169
MM
11/**
12 * DOC: Border Gateway Protocol
13 *
d15b0b0a
OZ
14 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
15 * the connection and most of the interface with BIRD core, |packets.c| handling
54e55169
MM
16 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
17 * manipulation with BGP attribute lists.
18 *
d15b0b0a
OZ
19 * As opposed to the other existing routing daemons, BIRD has a sophisticated
20 * core architecture which is able to keep all the information needed by BGP in
21 * the primary routing table, therefore no complex data structures like a
22 * central BGP table are needed. This increases memory footprint of a BGP router
23 * with many connections, but not too much and, which is more important, it
24 * makes BGP much easier to implement.
54e55169 25 *
d15b0b0a
OZ
26 * Each instance of BGP (corresponding to a single BGP peer) is described by a
27 * &bgp_proto structure to which are attached individual connections represented
28 * by &bgp_connection (usually, there exists only one connection, but during BGP
29 * session setup, there can be more of them). The connections are handled
30 * according to the BGP state machine defined in the RFC with all the timers and
31 * all the parameters configurable.
54e55169 32 *
d15b0b0a
OZ
33 * In incoming direction, we listen on the connection's socket and each time we
34 * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
35 * markers and passes complete packets to bgp_rx_packet() which distributes the
36 * packet according to its type.
54e55169 37 *
d15b0b0a
OZ
38 * In outgoing direction, we gather all the routing updates and sort them to
39 * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
40 * fast comparison of &rta's and a &fib which helps us to find if we already
41 * have another route for the same destination queued for sending, so that we
42 * can replace it with the new one immediately instead of sending both
43 * updates). There also exists a special bucket holding all the route
44 * withdrawals which cannot be queued anywhere else as they don't have any
45 * attributes. If we have any packet to send (due to either new routes or the
46 * connection tracking code wanting to send a Open, Keepalive or Notification
47 * message), we call bgp_schedule_packet() which sets the corresponding bit in a
48 * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
49 * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
50 * packet type bits and calls the corresponding bgp_create_xx() functions,
51 * eventually rescheduling the same packet type if we have more data of the same
52 * type to send.
54e55169 53 *
d15b0b0a
OZ
54 * The processing of attributes consists of two functions: bgp_decode_attrs()
55 * for checking of the attribute blocks and translating them to the language of
56 * BIRD's extended attributes and bgp_encode_attrs() which does the
57 * converse. Both functions are built around a @bgp_attr_table array describing
58 * all important characteristics of all known attributes. Unknown transitive
59 * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
6eda3f13
OZ
60 *
61 * BGP protocol implements graceful restart in both restarting (local restart)
62 * and receiving (neighbor restart) roles. The first is handled mostly by the
63 * graceful restart code in the nest, BGP protocol just handles capabilities,
64 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
65 * The second is implemented by internal restart of the BGP state to %BS_IDLE
66 * and protocol state to %PS_START, but keeping the protocol up from the core
67 * point of view and therefore maintaining received routes. Routing table
68 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
69 * stale routes after reestablishment of BGP session during graceful restart.
c49e4a65
OZ
70 *
71 * Supported standards:
0f40405f
OZ
72 * RFC 4271 - Border Gateway Protocol 4 (BGP)
73 * RFC 1997 - BGP Communities Attribute
74 * RFC 2385 - Protection of BGP Sessions via TCP MD5 Signature
75 * RFC 2545 - Use of BGP Multiprotocol Extensions for IPv6
76 * RFC 2918 - Route Refresh Capability
77 * RFC 3107 - Carrying Label Information in BGP
78 * RFC 4360 - BGP Extended Communities Attribute
79 * RFC 4364 - BGP/MPLS IPv4 Virtual Private Networks
80 * RFC 4456 - BGP Route Reflection
81 * RFC 4486 - Subcodes for BGP Cease Notification Message
82 * RFC 4659 - BGP/MPLS IPv6 Virtual Private Networks
83 * RFC 4724 - Graceful Restart Mechanism for BGP
84 * RFC 4760 - Multiprotocol extensions for BGP
85 * RFC 4798 - Connecting IPv6 Islands over IPv4 MPLS
86 * RFC 5065 - AS confederations for BGP
87 * RFC 5082 - Generalized TTL Security Mechanism
88 * RFC 5492 - Capabilities Advertisement with BGP
89 * RFC 5549 - Advertising IPv4 NLRI with an IPv6 Next Hop
90 * RFC 5575 - Dissemination of Flow Specification Rules
91 * RFC 5668 - 4-Octet AS Specific BGP Extended Community
92 * RFC 6286 - AS-Wide Unique BGP Identifier
93 * RFC 6608 - Subcodes for BGP Finite State Machine Error
94 * RFC 6793 - BGP Support for 4-Octet AS Numbers
09ee846d 95 * RFC 7311 - Accumulated IGP Metric Attribute for BGP
0f40405f
OZ
96 * RFC 7313 - Enhanced Route Refresh Capability for BGP
97 * RFC 7606 - Revised Error Handling for BGP UPDATE Messages
98 * RFC 7911 - Advertisement of Multiple Paths in BGP
99 * RFC 7947 - Internet Exchange BGP Route Server
100 * RFC 8092 - BGP Large Communities Attribute
101 * RFC 8203 - BGP Administrative Shutdown Communication
102 * RFC 8212 - Default EBGP Route Propagation Behavior without Policies
be7c1aef 103 * RFC 8654 - Extended Message Support for BGP
913ec57f 104 * RFC 9072 - Extended Optional Parameters Length for BGP OPEN Message
1f2eb2ac 105 * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications
c73b5d2d 106 * RFC 9234 - Route Leak Prevention and Detection Using Roles
0f40405f 107 * draft-uttaro-idr-bgp-persistence-04
71423871 108 * draft-walton-bgp-hostname-capability-02
0f40405f 109 */
54e55169 110
48d79d52 111#undef LOCAL_DEBUG
2638249d 112
02552526
OZ
113#include <stdlib.h>
114
2638249d
MM
115#include "nest/bird.h"
116#include "nest/iface.h"
117#include "nest/protocol.h"
118#include "nest/route.h"
b8113a5e 119#include "nest/cli.h"
1ec52253 120#include "nest/locks.h"
2638249d 121#include "conf/conf.h"
3831b619 122#include "filter/filter.h"
c01e3741 123#include "lib/socket.h"
973399ae 124#include "lib/resource.h"
7d875e09 125#include "lib/string.h"
2638249d
MM
126
127#include "bgp.h"
a848dad4 128#include "proto/bmp/bmp.h"
2638249d 129
e7d2ac44 130
06ece326 131static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */
d15b0b0a 132
c01e3741 133
c01e3741 134static void bgp_connect(struct bgp_proto *p);
dd91e467 135static void bgp_active(struct bgp_proto *p);
e0835db4
OZ
136static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn);
137static void bgp_setup_sk(struct bgp_conn *conn, sock *s);
138static void bgp_send_open(struct bgp_conn *conn);
9d3fc306 139static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd);
2638249d 140
d15b0b0a
OZ
141static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
142static void bgp_listen_sock_err(sock *sk UNUSED, int err);
11cb6202 143
11b32d91
OZ
144/**
145 * bgp_open - open a BGP instance
146 * @p: BGP instance
147 *
d15b0b0a
OZ
148 * This function allocates and configures shared BGP resources, mainly listening
149 * sockets. Should be called as the last step during initialization (when lock
150 * is acquired and neighbor is ready). When error, caller should change state to
151 * PS_DOWN and return immediately.
11b32d91
OZ
152 */
153static int
154bgp_open(struct bgp_proto *p)
155{
d15b0b0a
OZ
156 struct bgp_socket *bs = NULL;
157 struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
158 ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
e0835db4 159 (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
d15b0b0a 160 uint port = p->cf->local_port;
60e9def9
OZ
161 uint flags = p->cf->free_bind ? SKF_FREEBIND : 0;
162 uint flag_mask = SKF_FREEBIND;
d15b0b0a 163
d15b0b0a 164 /* We assume that cf->iface is defined iff cf->local_ip is link-local */
11b32d91 165
d15b0b0a 166 WALK_LIST(bs, bgp_sockets)
60e9def9
OZ
167 if (ipa_equal(bs->sk->saddr, addr) &&
168 (bs->sk->sport == port) &&
169 (bs->sk->iface == ifa) &&
170 (bs->sk->vrf == p->p.vrf) &&
171 ((bs->sk->flags & flag_mask) == flags))
a34b0934 172 {
d15b0b0a
OZ
173 bs->uc++;
174 p->sock = bs;
175 return 0;
a34b0934
OZ
176 }
177
d15b0b0a
OZ
178 sock *sk = sk_new(proto_pool);
179 sk->type = SK_TCP_PASSIVE;
180 sk->ttl = 255;
181 sk->saddr = addr;
182 sk->sport = port;
e19d0805
OZ
183 sk->iface = ifa;
184 sk->vrf = p->p.vrf;
60e9def9 185 sk->flags = flags;
d15b0b0a
OZ
186 sk->tos = IP_PREC_INTERNET_CONTROL;
187 sk->rbsize = BGP_RX_BUFFER_SIZE;
188 sk->tbsize = BGP_TX_BUFFER_SIZE;
189 sk->rx_hook = bgp_incoming_connection;
190 sk->err_hook = bgp_listen_sock_err;
191
192 if (sk_open(sk) < 0)
193 goto err;
11b32d91 194
d15b0b0a
OZ
195 bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
196 bs->sk = sk;
197 bs->uc = 1;
198 p->sock = bs;
470740f9 199 sk->data = bs;
05476c4d 200
d15b0b0a
OZ
201 add_tail(&bgp_sockets, &bs->n);
202
11b32d91 203 return 0;
b1b19433
OZ
204
205err:
d15b0b0a
OZ
206 sk_log_error(sk, p->p.name);
207 log(L_ERR "%s: Cannot open listening socket", p->p.name);
208 rfree(sk);
b1b19433 209 return -1;
11b32d91
OZ
210}
211
d15b0b0a
OZ
212/**
213 * bgp_close - close a BGP instance
214 * @p: BGP instance
215 *
216 * This function frees and deconfigures shared BGP resources.
217 */
218static void
219bgp_close(struct bgp_proto *p)
220{
221 struct bgp_socket *bs = p->sock;
222
223 ASSERT(bs && bs->uc);
224
225 if (--bs->uc)
226 return;
227
228 rfree(bs->sk);
229 rem_node(&bs->n);
230 mb_free(bs);
d15b0b0a
OZ
231}
232
233static inline int
234bgp_setup_auth(struct bgp_proto *p, int enable)
235{
236 if (p->cf->password)
237 {
757cab18
OZ
238 ip_addr prefix = p->cf->remote_ip;
239 int pxlen = -1;
240
241 if (p->cf->remote_range)
242 {
243 prefix = net_prefix(p->cf->remote_range);
244 pxlen = net_pxlen(p->cf->remote_range);
245 }
246
d15b0b0a 247 int rv = sk_set_md5_auth(p->sock->sk,
757cab18 248 p->cf->local_ip, prefix, pxlen, p->cf->iface,
d15b0b0a
OZ
249 enable ? p->cf->password : NULL, p->cf->setkey);
250
251 if (rv < 0)
252 sk_log_error(p->sock->sk, p->p.name);
253
254 return rv;
255 }
256 else
257 return 0;
258}
259
260static inline struct bgp_channel *
261bgp_find_channel(struct bgp_proto *p, u32 afi)
262{
263 struct bgp_channel *c;
54430df9 264 BGP_WALK_CHANNELS(p, c)
d15b0b0a
OZ
265 if (c->afi == afi)
266 return c;
267
268 return NULL;
269}
270
dd91e467
OZ
271static void
272bgp_startup(struct bgp_proto *p)
273{
274 BGP_TRACE(D_EVENTS, "Started");
d15b0b0a 275 p->start_state = BSS_CONNECT;
be6e39eb 276
e0835db4 277 if (!p->passive)
be6e39eb 278 bgp_active(p);
e0835db4
OZ
279
280 if (p->postponed_sk)
281 {
282 /* Apply postponed incoming connection */
283 bgp_setup_conn(p, &p->incoming_conn);
284 bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
285 bgp_send_open(&p->incoming_conn);
286 p->postponed_sk = NULL;
287 }
dd91e467
OZ
288}
289
290static void
291bgp_startup_timeout(timer *t)
292{
293 bgp_startup(t->data);
294}
295
296
297static void
298bgp_initiate(struct bgp_proto *p)
299{
d15b0b0a
OZ
300 int err_val;
301
302 if (bgp_open(p) < 0)
303 { err_val = BEM_NO_SOCKET; goto err1; }
304
305 if (bgp_setup_auth(p, 1) < 0)
306 { err_val = BEM_INVALID_MD5; goto err2; }
9be9a264 307
1ec52253
OZ
308 if (p->cf->bfd)
309 bgp_update_bfd(p, p->cf->bfd);
310
dd91e467 311 if (p->startup_delay)
d15b0b0a
OZ
312 {
313 p->start_state = BSS_DELAY;
314 BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
315 bgp_start_timer(p->startup_timer, p->startup_delay);
316 }
dd91e467
OZ
317 else
318 bgp_startup(p);
dd91e467 319
d15b0b0a 320 return;
d51aa281 321
d15b0b0a
OZ
322err2:
323 bgp_close(p);
324err1:
325 p->p.disabled = 1;
326 bgp_store_error(p, NULL, BE_MISC, err_val);
91d04583
OZ
327
328 p->neigh = NULL;
d15b0b0a 329 proto_notify_state(&p->p, PS_DOWN);
d51aa281 330
d15b0b0a 331 return;
c01e3741
MM
332}
333
54e55169
MM
334/**
335 * bgp_start_timer - start a BGP timer
336 * @t: timer
cc881bd1 337 * @value: time (in seconds) to fire (0 to disable the timer)
54e55169 338 *
d15b0b0a
OZ
339 * This functions calls tm_start() on @t with time @value and the amount of
340 * randomization suggested by the BGP standard. Please use it for all BGP
341 * timers.
54e55169 342 */
3fdbafb6 343void
cc881bd1 344bgp_start_timer(timer *t, uint value)
c01e3741 345{
3fdbafb6 346 if (value)
d15b0b0a 347 {
cc881bd1
OZ
348 /* The randomization procedure is specified in RFC 4271 section 10 */
349 btime time = value S;
350 btime randomize = random() % ((time / 4) + 1);
a6f79ca5 351 tm_start(t, time - randomize);
d15b0b0a 352 }
b552ecc4 353 else
a6f79ca5 354 tm_stop(t);
b552ecc4
MM
355}
356
54e55169
MM
357/**
358 * bgp_close_conn - close a BGP connection
359 * @conn: connection to close
360 *
d15b0b0a
OZ
361 * This function takes a connection described by the &bgp_conn structure, closes
362 * its socket and frees all resources associated with it.
54e55169 363 */
b552ecc4
MM
364void
365bgp_close_conn(struct bgp_conn *conn)
366{
e81b440f 367 // struct bgp_proto *p = conn->bgp;
b552ecc4
MM
368
369 DBG("BGP: Closing connection\n");
370 conn->packets_to_send = 0;
d15b0b0a
OZ
371 conn->channels_to_send = 0;
372 rfree(conn->connect_timer);
373 conn->connect_timer = NULL;
b552ecc4
MM
374 rfree(conn->keepalive_timer);
375 conn->keepalive_timer = NULL;
376 rfree(conn->hold_timer);
377 conn->hold_timer = NULL;
11b32d91
OZ
378 rfree(conn->tx_ev);
379 conn->tx_ev = NULL;
d15b0b0a
OZ
380 rfree(conn->sk);
381 conn->sk = NULL;
382
1be0be1b
OZ
383 mb_free(conn->local_open_msg);
384 conn->local_open_msg = NULL;
385 mb_free(conn->remote_open_msg);
386 conn->remote_open_msg = NULL;
387 conn->local_open_length = 0;
388 conn->remote_open_length = 0;
389
d15b0b0a
OZ
390 mb_free(conn->local_caps);
391 conn->local_caps = NULL;
392 mb_free(conn->remote_caps);
393 conn->remote_caps = NULL;
11b32d91
OZ
394}
395
396
397/**
398 * bgp_update_startup_delay - update a startup delay
399 * @p: BGP instance
11b32d91 400 *
d15b0b0a
OZ
401 * This function updates a startup delay that is used to postpone next BGP
402 * connect. It also handles disable_after_error and might stop BGP instance
403 * when error happened and disable_after_error is on.
11b32d91
OZ
404 *
405 * It should be called when BGP protocol error happened.
406 */
407void
b99d3786 408bgp_update_startup_delay(struct bgp_proto *p)
11b32d91 409{
a22c3e59 410 const struct bgp_config *cf = p->cf;
11b32d91 411
b99d3786 412 DBG("BGP: Updating startup delay\n");
11b32d91 413
cc881bd1 414 if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
72382626
OZ
415 p->startup_delay = 0;
416
cc881bd1 417 p->last_proto_error = current_time();
11b32d91
OZ
418
419 if (cf->disable_after_error)
d15b0b0a
OZ
420 {
421 p->startup_delay = 0;
422 p->p.disabled = 1;
423 return;
424 }
11b32d91 425
11b32d91
OZ
426 if (!p->startup_delay)
427 p->startup_delay = cf->error_delay_time_min;
428 else
b99d3786 429 p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
c01e3741
MM
430}
431
11b32d91 432static void
8a68316e 433bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len)
48e842cc 434{
11b32d91 435 switch (conn->state)
d15b0b0a
OZ
436 {
437 case BS_IDLE:
438 case BS_CLOSE:
439 return;
440
441 case BS_CONNECT:
442 case BS_ACTIVE:
443 bgp_conn_enter_idle_state(conn);
444 return;
445
446 case BS_OPENSENT:
447 case BS_OPENCONFIRM:
448 case BS_ESTABLISHED:
8a68316e
OZ
449 if (subcode < 0)
450 {
451 bgp_conn_enter_close_state(conn);
452 bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
453 }
454 else
455 bgp_error(conn, 6, subcode, data, len);
d15b0b0a
OZ
456 return;
457
458 default:
459 bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
460 }
48e842cc
MM
461}
462
11b32d91
OZ
463static void
464bgp_down(struct bgp_proto *p)
465{
466 if (p->start_state > BSS_PREPARE)
d15b0b0a
OZ
467 {
468 bgp_setup_auth(p, 0);
469 bgp_close(p);
470 }
11b32d91 471
91d04583
OZ
472 p->neigh = NULL;
473
b99d3786 474 BGP_TRACE(D_EVENTS, "Down");
11b32d91
OZ
475 proto_notify_state(&p->p, PS_DOWN);
476}
477
478static void
479bgp_decision(void *vp)
480{
481 struct bgp_proto *p = vp;
482
483 DBG("BGP: Decision start\n");
d15b0b0a
OZ
484 if ((p->p.proto_state == PS_START) &&
485 (p->outgoing_conn.state == BS_IDLE) &&
486 (p->incoming_conn.state != BS_OPENCONFIRM) &&
e0835db4 487 !p->passive)
dd91e467 488 bgp_active(p);
11b32d91 489
d15b0b0a
OZ
490 if ((p->p.proto_state == PS_STOP) &&
491 (p->outgoing_conn.state == BS_IDLE) &&
492 (p->incoming_conn.state == BS_IDLE))
11b32d91
OZ
493 bgp_down(p);
494}
495
e0835db4
OZ
496static struct bgp_proto *
497bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
498{
499 struct symbol *sym;
500 char fmt[SYM_MAX_LEN];
501
502 bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits);
503
504 /* This is hack, we would like to share config, but we need to copy it now */
505 new_config = config;
506 cfg_mem = config->mem;
507 conf_this_scope = config->root_scope;
508 sym = cf_default_name(fmt, &(pp->dynamic_name_counter));
509 proto_clone_config(sym, pp->p.cf);
510 new_config = NULL;
511 cfg_mem = NULL;
512
513 /* Just pass remote_ip to bgp_init() */
eac9250f 514 ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
e0835db4 515
eac9250f 516 return (void *) proto_spawn(sym->proto, 0);
e0835db4
OZ
517}
518
b99d3786 519void
8a68316e 520bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
11b32d91
OZ
521{
522 proto_notify_state(&p->p, PS_STOP);
cd1d9961
OZ
523 bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
524 bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
11b32d91
OZ
525 ev_schedule(p->event);
526}
527
cf31112f 528static inline void
d15b0b0a 529bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
cf31112f
OZ
530{
531 if (conn->bgp->p.mrtdump & MD_STATES)
863ecfc7 532 bgp_dump_state_change(conn, conn->state, new_state);
cf31112f
OZ
533
534 conn->state = new_state;
535}
536
537void
538bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
539{
540 /* Really, most of the work is done in bgp_rx_open(). */
541 bgp_conn_set_state(conn, BS_OPENCONFIRM);
542}
543
d15b0b0a 544static const struct bgp_af_caps dummy_af_caps = { };
7e5f769d 545static const struct bgp_af_caps basic_af_caps = { .ready = 1 };
d15b0b0a 546
11b32d91
OZ
547void
548bgp_conn_enter_established_state(struct bgp_conn *conn)
549{
550 struct bgp_proto *p = conn->bgp;
d15b0b0a
OZ
551 struct bgp_caps *local = conn->local_caps;
552 struct bgp_caps *peer = conn->remote_caps;
553 struct bgp_channel *c;
523f020b 554
11b32d91 555 BGP_TRACE(D_EVENTS, "BGP session established");
21d09632
OZ
556 p->last_established = current_time();
557 p->stats.fsm_established_transitions++;
11b32d91 558
9be9a264 559 /* For multi-hop BGP sessions */
a22c3e59
OZ
560 if (ipa_zero(p->local_ip))
561 p->local_ip = conn->sk->saddr;
9be9a264 562
23ee6b1c
OZ
563 /* For promiscuous sessions */
564 if (!p->remote_as)
565 p->remote_as = conn->received_as;
566
e16b0aef
OZ
567 /* In case of LLv6 is not valid during BGP start */
568 if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6)
569 p->link_addr = p->neigh->iface->llv6->ip;
570
9e7b3ebd
OZ
571 conn->sk->fast_rx = 0;
572
11b32d91
OZ
573 p->conn = conn;
574 p->last_error_class = 0;
575 p->last_error_code = 0;
094d2bdb 576
d15b0b0a
OZ
577 p->as4_session = conn->as4_session;
578
579 p->route_refresh = peer->route_refresh;
580 p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
0c791f87 581
5bd73431
OZ
582 /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
583 p->gr_ready = p->llgr_ready = 0; /* Updated later */
0c791f87 584
d15b0b0a
OZ
585 /* Whether peer is ready to handle our GR recovery */
586 int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
0c791f87 587
d15b0b0a 588 if (p->gr_active_num)
a6f79ca5 589 tm_stop(p->gr_timer);
0c791f87 590
d15b0b0a
OZ
591 /* Number of active channels */
592 int num = 0;
593
863ecfc7
OZ
594 /* Summary state of ADD_PATH RX for active channels */
595 uint summary_add_path_rx = 0;
596
54430df9 597 BGP_WALK_CHANNELS(p, c)
d15b0b0a
OZ
598 {
599 const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
600 const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
601
7e5f769d
OZ
602 /* Use default if capabilities were not announced */
603 if (!local->length && (c->afi == BGP_AF_IPV4))
604 loc = &basic_af_caps;
605
606 if (!peer->length && (c->afi == BGP_AF_IPV4))
607 rem = &basic_af_caps;
608
d15b0b0a
OZ
609 /* Ignore AFIs that were not announced in multiprotocol capability */
610 if (!loc || !loc->ready)
611 loc = &dummy_af_caps;
612
613 if (!rem || !rem->ready)
614 rem = &dummy_af_caps;
615
616 int active = loc->ready && rem->ready;
617 c->c.disabled = !active;
682d3f7d 618 c->c.reloadable = p->route_refresh || c->cf->import_table;
d15b0b0a
OZ
619
620 c->index = active ? num++ : 0;
621
622 c->feed_state = BFS_NONE;
623 c->load_state = BFS_NONE;
624
625 /* Channels where peer may do GR */
5bd73431
OZ
626 uint gr_ready = active && local->gr_aware && rem->gr_able;
627 uint llgr_ready = active && local->llgr_aware && rem->llgr_able;
628
629 c->gr_ready = gr_ready || llgr_ready;
d15b0b0a 630 p->gr_ready = p->gr_ready || c->gr_ready;
5bd73431
OZ
631 p->llgr_ready = p->llgr_ready || llgr_ready;
632
633 /* Remember last LLGR stale time */
634 c->stale_time = local->llgr_aware ? rem->llgr_time : 0;
0c791f87 635
d15b0b0a
OZ
636 /* Channels not able to recover gracefully */
637 if (p->p.gr_recovery && (!active || !peer_gr_ready))
638 channel_graceful_restart_unlock(&c->c);
9aed29e6 639
d15b0b0a
OZ
640 /* Channels waiting for local convergence */
641 if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
642 c->c.gr_wait = 1;
643
5bd73431
OZ
644 /* Channels where regular graceful restart failed */
645 if ((c->gr_active == BGP_GRS_ACTIVE) &&
646 !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
647 bgp_graceful_restart_done(c);
648
649 /* Channels where regular long-lived restart failed */
650 if ((c->gr_active == BGP_GRS_LLGR) &&
651 !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING)))
d15b0b0a
OZ
652 bgp_graceful_restart_done(c);
653
654 /* GR capability implies that neighbor will send End-of-RIB */
655 if (peer->gr_aware)
656 c->load_state = BFS_LOADING;
657
d8022d26 658 c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
d15b0b0a
OZ
659 c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
660 c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
661
863ecfc7
OZ
662 if (active)
663 summary_add_path_rx |= !c->add_path_rx ? 1 : 2;
664
f8aad5d5 665 /* Update RA mode */
d15b0b0a
OZ
666 if (c->add_path_tx)
667 c->c.ra_mode = RA_ANY;
f8aad5d5
OZ
668 else if (c->cf->secondary)
669 c->c.ra_mode = RA_ACCEPTED;
670 else
671 c->c.ra_mode = RA_OPTIMAL;
d15b0b0a
OZ
672 }
673
674 p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
675 p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
676 p->channel_count = num;
863ecfc7 677 p->summary_add_path_rx = summary_add_path_rx;
d15b0b0a 678
54430df9 679 BGP_WALK_CHANNELS(p, c)
d15b0b0a
OZ
680 {
681 if (c->c.disabled)
682 continue;
683
684 p->afi_map[c->index] = c->afi;
685 p->channel_map[c->index] = c;
686 }
687
688 /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
9aed29e6 689
cf31112f 690 bgp_conn_set_state(conn, BS_ESTABLISHED);
11b32d91 691 proto_notify_state(&p->p, PS_UP);
aa3c3549
OZ
692 bmp_peer_up(p, conn->local_open_msg, conn->local_open_length,
693 conn->remote_open_msg, conn->remote_open_length);
11b32d91
OZ
694}
695
696static void
697bgp_conn_leave_established_state(struct bgp_proto *p)
698{
699 BGP_TRACE(D_EVENTS, "BGP session closed");
21d09632 700 p->last_established = current_time();
11b32d91
OZ
701 p->conn = NULL;
702
703 if (p->p.proto_state == PS_UP)
cd1d9961 704 bgp_stop(p, 0, NULL, 0);
11b32d91
OZ
705}
706
707void
708bgp_conn_enter_close_state(struct bgp_conn *conn)
709{
710 struct bgp_proto *p = conn->bgp;
711 int os = conn->state;
712
cf31112f 713 bgp_conn_set_state(conn, BS_CLOSE);
a6f79ca5 714 tm_stop(conn->keepalive_timer);
11b32d91
OZ
715 conn->sk->rx_hook = NULL;
716
48b15ef1
OZ
717 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
718 bgp_start_timer(conn->hold_timer, 10);
719
11b32d91
OZ
720 if (os == BS_ESTABLISHED)
721 bgp_conn_leave_established_state(p);
722}
723
724void
725bgp_conn_enter_idle_state(struct bgp_conn *conn)
726{
727 struct bgp_proto *p = conn->bgp;
728 int os = conn->state;
729
730 bgp_close_conn(conn);
cf31112f 731 bgp_conn_set_state(conn, BS_IDLE);
11b32d91
OZ
732 ev_schedule(p->event);
733
734 if (os == BS_ESTABLISHED)
735 bgp_conn_leave_established_state(p);
736}
737
6eda3f13
OZ
738/**
739 * bgp_handle_graceful_restart - handle detected BGP graceful restart
740 * @p: BGP instance
741 *
742 * This function is called when a BGP graceful restart of the neighbor is
743 * detected (when the TCP connection fails or when a new TCP connection
744 * appears). The function activates processing of the restart - starts routing
745 * table refresh cycle and activates BGP restart timer. The protocol state goes
746 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
747 * caller.
748 */
0c791f87
OZ
749void
750bgp_handle_graceful_restart(struct bgp_proto *p)
751{
752 ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
753
754 BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
d15b0b0a
OZ
755 p->gr_active_num ? " - already pending" : "");
756
757 p->gr_active_num = 0;
0c791f87 758
d15b0b0a 759 struct bgp_channel *c;
54430df9 760 BGP_WALK_CHANNELS(p, c)
d15b0b0a 761 {
7fc55925
OZ
762 /* FIXME: perhaps check for channel state instead of disabled flag? */
763 if (c->c.disabled)
764 continue;
765
d15b0b0a
OZ
766 if (c->gr_ready)
767 {
5bd73431
OZ
768 p->gr_active_num++;
769
770 switch (c->gr_active)
771 {
772 case BGP_GRS_NONE:
773 c->gr_active = BGP_GRS_ACTIVE;
774 rt_refresh_begin(c->c.table, &c->c);
775 break;
776
777 case BGP_GRS_ACTIVE:
d15b0b0a 778 rt_refresh_end(c->c.table, &c->c);
5bd73431
OZ
779 rt_refresh_begin(c->c.table, &c->c);
780 break;
0c791f87 781
5bd73431
OZ
782 case BGP_GRS_LLGR:
783 rt_refresh_begin(c->c.table, &c->c);
784 rt_modify_stale(c->c.table, &c->c);
785 break;
786 }
d15b0b0a
OZ
787 }
788 else
789 {
790 /* Just flush the routes */
791 rt_refresh_begin(c->c.table, &c->c);
792 rt_refresh_end(c->c.table, &c->c);
793 }
7fc55925
OZ
794
795 /* Reset bucket and prefix tables */
796 bgp_free_bucket_table(c);
797 bgp_free_prefix_table(c);
798 bgp_init_bucket_table(c);
799 bgp_init_prefix_table(c);
800 c->packets_to_send = 0;
d15b0b0a
OZ
801 }
802
e62cd033
OZ
803 /* p->gr_ready -> at least one active channel is c->gr_ready */
804 ASSERT(p->gr_active_num > 0);
805
d15b0b0a 806 proto_notify_state(&p->p, PS_START);
5bd73431 807 tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
0c791f87
OZ
808}
809
6eda3f13
OZ
810/**
811 * bgp_graceful_restart_done - finish active BGP graceful restart
d15b0b0a 812 * @c: BGP channel
6eda3f13
OZ
813 *
814 * This function is called when the active BGP graceful restart of the neighbor
d15b0b0a
OZ
815 * should be finished for channel @c - either successfully (the neighbor sends
816 * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
817 * unsuccessfully (the neighbor does not support BGP graceful restart on the new
818 * session). The function ends the routing table refresh cycle.
6eda3f13 819 */
0c791f87 820void
d15b0b0a 821bgp_graceful_restart_done(struct bgp_channel *c)
0c791f87 822{
d15b0b0a
OZ
823 struct bgp_proto *p = (void *) c->c.proto;
824
825 ASSERT(c->gr_active);
826 c->gr_active = 0;
827 p->gr_active_num--;
828
829 if (!p->gr_active_num)
830 BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
831
5bd73431 832 tm_stop(c->stale_timer);
d15b0b0a 833 rt_refresh_end(c->c.table, &c->c);
0c791f87
OZ
834}
835
6eda3f13
OZ
836/**
837 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
838 * @t: timer
839 *
840 * This function is a timeout hook for @gr_timer, implementing BGP restart time
841 * limit for reestablisment of the BGP session after the graceful restart. When
842 * fired, we just proceed with the usual protocol restart.
843 */
844
0c791f87
OZ
845static void
846bgp_graceful_restart_timeout(timer *t)
847{
848 struct bgp_proto *p = t->data;
849
850 BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
5bd73431
OZ
851
852 if (p->llgr_ready)
853 {
854 struct bgp_channel *c;
54430df9 855 BGP_WALK_CHANNELS(p, c)
5bd73431
OZ
856 {
857 /* Channel is not in GR and is already flushed */
858 if (!c->gr_active)
859 continue;
860
861 /* Channel is already in LLGR from past restart */
862 if (c->gr_active == BGP_GRS_LLGR)
863 continue;
864
865 /* Channel is in GR, but does not support LLGR -> stop GR */
866 if (!c->stale_time)
867 {
868 bgp_graceful_restart_done(c);
869 continue;
870 }
871
872 /* Channel is in GR, and supports LLGR -> start LLGR */
873 c->gr_active = BGP_GRS_LLGR;
874 tm_start(c->stale_timer, c->stale_time S);
875 rt_modify_stale(c->c.table, &c->c);
876 }
877 }
878 else
a848dad4 879 {
5bd73431 880 bgp_stop(p, 0, NULL, 0);
4adebdf1 881 bmp_peer_down(p, BE_NONE, NULL, 0);
a848dad4 882 }
5bd73431
OZ
883}
884
885static void
886bgp_long_lived_stale_timeout(timer *t)
887{
888 struct bgp_channel *c = t->data;
889 struct bgp_proto *p = (void *) c->c.proto;
890
891 BGP_TRACE(D_EVENTS, "Long-lived stale timeout");
892
893 bgp_graceful_restart_done(c);
0c791f87
OZ
894}
895
9aed29e6
OZ
896
897/**
898 * bgp_refresh_begin - start incoming enhanced route refresh sequence
d15b0b0a 899 * @c: BGP channel
9aed29e6
OZ
900 *
901 * This function is called when an incoming enhanced route refresh sequence is
902 * started by the neighbor, demarcated by the BoRR packet. The function updates
903 * the load state and starts the routing table refresh cycle. Note that graceful
904 * restart also uses routing table refresh cycle, but RFC 7313 and load states
905 * ensure that these two sequences do not overlap.
906 */
907void
d15b0b0a 908bgp_refresh_begin(struct bgp_channel *c)
9aed29e6 909{
d15b0b0a
OZ
910 struct bgp_proto *p = (void *) c->c.proto;
911
912 if (c->load_state == BFS_LOADING)
913 { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
9aed29e6 914
d15b0b0a
OZ
915 c->load_state = BFS_REFRESHING;
916 rt_refresh_begin(c->c.table, &c->c);
682d3f7d
OZ
917
918 if (c->c.in_table)
919 rt_refresh_begin(c->c.in_table, &c->c);
9aed29e6
OZ
920}
921
922/**
923 * bgp_refresh_end - finish incoming enhanced route refresh sequence
d15b0b0a 924 * @c: BGP channel
9aed29e6
OZ
925 *
926 * This function is called when an incoming enhanced route refresh sequence is
927 * finished by the neighbor, demarcated by the EoRR packet. The function updates
928 * the load state and ends the routing table refresh cycle. Routes not received
929 * during the sequence are removed by the nest.
930 */
931void
d15b0b0a 932bgp_refresh_end(struct bgp_channel *c)
9aed29e6 933{
d15b0b0a 934 struct bgp_proto *p = (void *) c->c.proto;
9aed29e6 935
d15b0b0a
OZ
936 if (c->load_state != BFS_REFRESHING)
937 { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
938
939 c->load_state = BFS_NONE;
940 rt_refresh_end(c->c.table, &c->c);
682d3f7d
OZ
941
942 if (c->c.in_table)
943 rt_prune_sync(c->c.in_table, 0);
9aed29e6
OZ
944}
945
946
c01e3741
MM
947static void
948bgp_send_open(struct bgp_conn *conn)
949{
950 DBG("BGP: Sending open\n");
951 conn->sk->rx_hook = bgp_rx;
b552ecc4 952 conn->sk->tx_hook = bgp_tx;
a6f79ca5 953 tm_stop(conn->connect_timer);
4a50c8bd 954 bgp_prepare_capabilities(conn);
d15b0b0a 955 bgp_schedule_packet(conn, NULL, PKT_OPEN);
cf31112f 956 bgp_conn_set_state(conn, BS_OPENSENT);
3fdbafb6 957 bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
c01e3741
MM
958}
959
3fdbafb6
MM
960static void
961bgp_connected(sock *sk)
c01e3741
MM
962{
963 struct bgp_conn *conn = sk->data;
85368cd4 964 struct bgp_proto *p = conn->bgp;
c01e3741 965
85368cd4 966 BGP_TRACE(D_EVENTS, "Connected");
c01e3741 967 bgp_send_open(conn);
c01e3741
MM
968}
969
970static void
971bgp_connect_timeout(timer *t)
972{
3fdbafb6 973 struct bgp_conn *conn = t->data;
85368cd4 974 struct bgp_proto *p = conn->bgp;
c01e3741 975
85368cd4 976 DBG("BGP: connect_timeout\n");
11b32d91 977 if (p->p.proto_state == PS_START)
d15b0b0a
OZ
978 {
979 bgp_close_conn(conn);
980 bgp_connect(p);
981 }
11b32d91
OZ
982 else
983 bgp_conn_enter_idle_state(conn);
c01e3741
MM
984}
985
986static void
3fdbafb6 987bgp_sock_err(sock *sk, int err)
c01e3741
MM
988{
989 struct bgp_conn *conn = sk->data;
85368cd4 990 struct bgp_proto *p = conn->bgp;
c01e3741 991
47597724
OZ
992 /*
993 * This error hook may be called either asynchronously from main
994 * loop, or synchronously from sk_send(). But sk_send() is called
995 * only from bgp_tx() and bgp_kick_tx(), which are both called
996 * asynchronously from main loop. Moreover, they end if err hook is
997 * called. Therefore, we could suppose that it is always called
998 * asynchronously.
999 */
1000
11b32d91
OZ
1001 bgp_store_error(p, conn, BE_SOCKET, err);
1002
53943a00
MM
1003 if (err)
1004 BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
1005 else
a848dad4 1006 {
53943a00 1007 BGP_TRACE(D_EVENTS, "Connection closed");
4adebdf1 1008 bmp_peer_down(p, BE_SOCKET, NULL, 0);
a848dad4 1009 }
11b32d91 1010
0c791f87
OZ
1011 if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
1012 bgp_handle_graceful_restart(p);
1013
11b32d91 1014 bgp_conn_enter_idle_state(conn);
c01e3741
MM
1015}
1016
3fdbafb6
MM
1017static void
1018bgp_hold_timeout(timer *t)
1019{
1020 struct bgp_conn *conn = t->data;
48b15ef1 1021 struct bgp_proto *p = conn->bgp;
3fdbafb6 1022
ea89da38
OZ
1023 DBG("BGP: Hold timeout\n");
1024
48b15ef1
OZ
1025 /* We are already closing the connection - just do hangup */
1026 if (conn->state == BS_CLOSE)
1027 {
1028 BGP_TRACE(D_EVENTS, "Connection stalled");
1029 bgp_conn_enter_idle_state(conn);
1030 return;
1031 }
1032
ea89da38
OZ
1033 /* If there is something in input queue, we are probably congested
1034 and perhaps just not processed BGP packets in time. */
1035
1036 if (sk_rx_ready(conn->sk) > 0)
1037 bgp_start_timer(conn->hold_timer, 10);
5bd73431
OZ
1038 else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
1039 {
1040 BGP_TRACE(D_EVENTS, "Hold timer expired");
1041 bgp_handle_graceful_restart(p);
1042 bgp_conn_enter_idle_state(conn);
1043 }
ea89da38
OZ
1044 else
1045 bgp_error(conn, 4, 0, NULL, 0);
3fdbafb6
MM
1046}
1047
1048static void
1049bgp_keepalive_timeout(timer *t)
1050{
1051 struct bgp_conn *conn = t->data;
1052
1053 DBG("BGP: Keepalive timer\n");
d15b0b0a 1054 bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
bd22d7f4
OZ
1055
1056 /* Kick TX a bit faster */
1057 if (ev_active(conn->tx_ev))
1058 ev_run(conn->tx_ev);
3fdbafb6
MM
1059}
1060
c01e3741 1061static void
6fd766c1 1062bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
c01e3741 1063{
6fd766c1 1064 conn->sk = NULL;
c01e3741 1065 conn->bgp = p;
d15b0b0a 1066
72a6ef11 1067 conn->packets_to_send = 0;
d15b0b0a
OZ
1068 conn->channels_to_send = 0;
1069 conn->last_channel = 0;
1070 conn->last_channel_count = 0;
1071
a6f79ca5
OZ
1072 conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0);
1073 conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0);
1074 conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
c01e3741 1075
961671c0 1076 conn->tx_ev = ev_new_init(p->p.pool, bgp_kick_tx, conn);
c01e3741
MM
1077}
1078
6fd766c1 1079static void
e81b440f 1080bgp_setup_sk(struct bgp_conn *conn, sock *s)
6fd766c1
MM
1081{
1082 s->data = conn;
6fd766c1 1083 s->err_hook = bgp_sock_err;
9e7b3ebd 1084 s->fast_rx = 1;
6fd766c1
MM
1085 conn->sk = s;
1086}
1087
11b32d91 1088static void
dd91e467 1089bgp_active(struct bgp_proto *p)
11b32d91 1090{
6cf72d7a 1091 int delay = MAX(1, p->cf->connect_delay_time);
11b32d91
OZ
1092 struct bgp_conn *conn = &p->outgoing_conn;
1093
1094 BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
1095 bgp_setup_conn(p, conn);
cf31112f 1096 bgp_conn_set_state(conn, BS_ACTIVE);
d15b0b0a 1097 bgp_start_timer(conn->connect_timer, delay);
11b32d91
OZ
1098}
1099
54e55169
MM
1100/**
1101 * bgp_connect - initiate an outgoing connection
1102 * @p: BGP instance
1103 *
1104 * The bgp_connect() function creates a new &bgp_conn and initiates
1105 * a TCP connection to the peer. The rest of connection setup is governed
1106 * by the BGP state machine as described in the standard.
1107 */
c01e3741
MM
1108static void
1109bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
1110{
b552ecc4 1111 struct bgp_conn *conn = &p->outgoing_conn;
b1b19433 1112 int hops = p->cf->multihop ? : 1;
c01e3741
MM
1113
1114 DBG("BGP: Connecting\n");
d15b0b0a 1115 sock *s = sk_new(p->p.pool);
c01e3741 1116 s->type = SK_TCP_ACTIVE;
a22c3e59
OZ
1117 s->saddr = p->local_ip;
1118 s->daddr = p->remote_ip;
dcde7ae5 1119 s->dport = p->cf->remote_port;
53ffbff3 1120 s->iface = p->neigh ? p->neigh->iface : NULL;
943478b0 1121 s->vrf = p->p.vrf;
b1b19433 1122 s->ttl = p->cf->ttl_security ? 255 : hops;
06e0d1b6
OZ
1123 s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
1124 s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
a39b165e
OZ
1125 s->tos = IP_PREC_INTERNET_CONTROL;
1126 s->password = p->cf->password;
1127 s->tx_hook = bgp_connected;
2b712554 1128 s->flags = p->cf->free_bind ? SKF_FREEBIND : 0;
470740f9
OZ
1129 BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
1130 s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
88a183c6 1131 s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
6fd766c1 1132 bgp_setup_conn(p, conn);
e81b440f 1133 bgp_setup_sk(conn, s);
cf31112f 1134 bgp_conn_set_state(conn, BS_CONNECT);
b1b19433
OZ
1135
1136 if (sk_open(s) < 0)
05476c4d 1137 goto err;
b1b19433
OZ
1138
1139 /* Set minimal receive TTL if needed */
1140 if (p->cf->ttl_security)
b1b19433 1141 if (sk_set_min_ttl(s, 256 - hops) < 0)
05476c4d 1142 goto err;
b1b19433 1143
c01e3741 1144 DBG("BGP: Waiting for connect success\n");
d15b0b0a 1145 bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
05476c4d
OZ
1146 return;
1147
d15b0b0a 1148err:
05476c4d
OZ
1149 sk_log_error(s, p->p.name);
1150 bgp_sock_err(s, 0);
1151 return;
c01e3741
MM
1152}
1153
e0835db4
OZ
1154static inline int bgp_is_dynamic(struct bgp_proto *p)
1155{ return ipa_zero(p->remote_ip); }
1156
374917ad
OZ
1157/**
1158 * bgp_find_proto - find existing proto for incoming connection
1159 * @sk: TCP socket
1160 *
1161 */
1162static struct bgp_proto *
1163bgp_find_proto(sock *sk)
1164{
e0835db4 1165 struct bgp_proto *best = NULL;
d15b0b0a 1166 struct bgp_proto *p;
374917ad 1167
470740f9
OZ
1168 /* sk->iface is valid only if src or dst address is link-local */
1169 int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
1170
d15b0b0a
OZ
1171 WALK_LIST(p, proto_list)
1172 if ((p->p.proto == &proto_bgp) &&
e0835db4
OZ
1173 (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
1174 (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
1175 (p->p.vrf == sk->vrf) &&
1176 (p->cf->local_port == sk->sport) &&
470740f9
OZ
1177 (!link || (p->cf->iface == sk->iface)) &&
1178 (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)))
e0835db4
OZ
1179 {
1180 best = p;
374917ad 1181
e0835db4
OZ
1182 if (!bgp_is_dynamic(p))
1183 break;
1184 }
1185
1186 return best;
374917ad
OZ
1187}
1188
54e55169
MM
1189/**
1190 * bgp_incoming_connection - handle an incoming connection
1191 * @sk: TCP socket
1192 * @dummy: unused
1193 *
1194 * This function serves as a socket hook for accepting of new BGP
1195 * connections. It searches a BGP instance corresponding to the peer
1196 * which has connected and if such an instance exists, it creates a
1197 * &bgp_conn structure, attaches it to the instance and either sends
1198 * an Open message or (if there already is an active connection) it
1199 * closes the new connection by sending a Notification message.
1200 */
48e842cc 1201static int
3e236955 1202bgp_incoming_connection(sock *sk, uint dummy UNUSED)
c01e3741 1203{
374917ad
OZ
1204 struct bgp_proto *p;
1205 int acc, hops;
c01e3741 1206
48e842cc 1207 DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
374917ad
OZ
1208 p = bgp_find_proto(sk);
1209 if (!p)
d15b0b0a
OZ
1210 {
1211 log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
1212 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
1213 rfree(sk);
1214 return 0;
1215 }
374917ad 1216
487c6961
OZ
1217 /*
1218 * BIRD should keep multiple incoming connections in OpenSent state (for
1219 * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
1220 * connections are rejected istead. The exception is the case where an
1221 * incoming connection triggers a graceful restart.
1222 */
1223
374917ad
OZ
1224 acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
1225 (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
dd91e467 1226
374917ad 1227 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
d15b0b0a
OZ
1228 {
1229 bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
1230 bgp_handle_graceful_restart(p);
1231 bgp_conn_enter_idle_state(p->conn);
1232 acc = 1;
1233
1234 /* There might be separate incoming connection in OpenSent state */
1235 if (p->incoming_conn.state > BS_ACTIVE)
1236 bgp_close_conn(&p->incoming_conn);
1237 }
374917ad
OZ
1238
1239 BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
1240 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
1241 sk->dport, acc ? "accepted" : "rejected");
1242
1243 if (!acc)
d15b0b0a
OZ
1244 {
1245 rfree(sk);
1246 return 0;
1247 }
374917ad
OZ
1248
1249 hops = p->cf->multihop ? : 1;
1250
1251 if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
1252 goto err;
1253
1254 if (p->cf->ttl_security)
1255 if (sk_set_min_ttl(sk, 256 - hops) < 0)
1256 goto err;
1257
06e0d1b6 1258 if (p->cf->enable_extended_messages)
d15b0b0a
OZ
1259 {
1260 sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
1261 sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
1262 sk_reallocate(sk);
1263 }
06e0d1b6 1264
e0835db4
OZ
1265 /* For dynamic BGP, spawn new instance and postpone the socket */
1266 if (bgp_is_dynamic(p))
1267 {
1268 p = bgp_spawn(p, sk->daddr);
1269 p->postponed_sk = sk;
1270 rmove(sk, p->p.pool);
1271 return 0;
1272 }
1273
1274 rmove(sk, p->p.pool);
374917ad
OZ
1275 bgp_setup_conn(p, &p->incoming_conn);
1276 bgp_setup_sk(&p->incoming_conn, sk);
1277 bgp_send_open(&p->incoming_conn);
1278 return 0;
1279
1280err:
1281 sk_log_error(sk, p->p.name);
1282 log(L_ERR "%s: Incoming connection aborted", p->p.name);
48e842cc
MM
1283 rfree(sk);
1284 return 0;
1285}
1286
2af25a97 1287static void
e81b440f 1288bgp_listen_sock_err(sock *sk UNUSED, int err)
2af25a97
OZ
1289{
1290 if (err == ECONNABORTED)
1291 log(L_WARN "BGP: Incoming connection aborted");
1292 else
a34b0934 1293 log(L_ERR "BGP: Error on listening socket: %M", err);
2af25a97
OZ
1294}
1295
acfce55c
MM
1296static void
1297bgp_start_neighbor(struct bgp_proto *p)
1298{
9be9a264
OZ
1299 /* Called only for single-hop BGP sessions */
1300
a22c3e59
OZ
1301 if (ipa_zero(p->local_ip))
1302 p->local_ip = p->neigh->ifa->ip;
ad440a57 1303
a22c3e59
OZ
1304 if (ipa_is_link_local(p->local_ip))
1305 p->link_addr = p->local_ip;
153f02da
OZ
1306 else if (p->neigh->iface->llv6)
1307 p->link_addr = p->neigh->iface->llv6->ip;
11b32d91 1308
6fd766c1 1309 bgp_initiate(p);
48e842cc
MM
1310}
1311
1312static void
1313bgp_neigh_notify(neighbor *n)
1314{
1315 struct bgp_proto *p = (struct bgp_proto *) n->proto;
523f020b
OZ
1316 int ps = p->p.proto_state;
1317
1318 if (n != p->neigh)
1319 return;
48e842cc 1320
523f020b 1321 if ((ps == PS_DOWN) || (ps == PS_STOP))
b21955e0
OZ
1322 return;
1323
523f020b
OZ
1324 int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1325
1326 if (n->scope <= 0)
d15b0b0a
OZ
1327 {
1328 if (!prepare)
48e842cc 1329 {
d15b0b0a
OZ
1330 BGP_TRACE(D_EVENTS, "Neighbor lost");
1331 bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1332 /* Perhaps also run bgp_update_startup_delay(p)? */
830ba75e 1333 bgp_stop(p, 0, NULL, 0);
4adebdf1 1334 bmp_peer_down(p, BE_MISC, NULL, 0);
523f020b 1335 }
d15b0b0a 1336 }
523f020b 1337 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
d15b0b0a
OZ
1338 {
1339 if (!prepare)
523f020b 1340 {
d15b0b0a
OZ
1341 BGP_TRACE(D_EVENTS, "Link down");
1342 bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1343 if (ps == PS_UP)
1344 bgp_update_startup_delay(p);
830ba75e 1345 bgp_stop(p, 0, NULL, 0);
4adebdf1 1346 bmp_peer_down(p, BE_MISC, NULL, 0);
48e842cc 1347 }
d15b0b0a 1348 }
48e842cc 1349 else
d15b0b0a
OZ
1350 {
1351 if (prepare)
48e842cc 1352 {
d15b0b0a
OZ
1353 BGP_TRACE(D_EVENTS, "Neighbor ready");
1354 bgp_start_neighbor(p);
48e842cc 1355 }
d15b0b0a 1356 }
48e842cc
MM
1357}
1358
1ec52253
OZ
1359static void
1360bgp_bfd_notify(struct bfd_request *req)
1361{
1362 struct bgp_proto *p = req->data;
1363 int ps = p->p.proto_state;
1364
1365 if (req->down && ((ps == PS_START) || (ps == PS_UP)))
d15b0b0a
OZ
1366 {
1367 BGP_TRACE(D_EVENTS, "BFD session down");
1368 bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
5bd73431 1369
9d3fc306 1370 if (req->opts.mode == BGP_BFD_GRACEFUL)
5bd73431
OZ
1371 {
1372 /* Trigger graceful restart */
1373 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1374 bgp_handle_graceful_restart(p);
1375
1376 if (p->incoming_conn.state > BS_IDLE)
1377 bgp_conn_enter_idle_state(&p->incoming_conn);
1378
1379 if (p->outgoing_conn.state > BS_IDLE)
1380 bgp_conn_enter_idle_state(&p->outgoing_conn);
1381 }
1382 else
1383 {
1384 /* Trigger session down */
1385 if (ps == PS_UP)
1386 bgp_update_startup_delay(p);
1387 bgp_stop(p, 0, NULL, 0);
4adebdf1 1388 bmp_peer_down(p, BE_MISC, NULL, 0);
5bd73431 1389 }
d15b0b0a 1390 }
1ec52253
OZ
1391}
1392
1393static void
9d3fc306 1394bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd)
1ec52253 1395{
9d3fc306
OZ
1396 if (bfd && p->bfd_req)
1397 bfd_update_request(p->bfd_req, bfd);
1398
1399 if (bfd && !p->bfd_req && !bgp_is_dynamic(p))
a22c3e59 1400 p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
1ec52253 1401 p->cf->multihop ? NULL : p->neigh->iface,
9d3fc306 1402 p->p.vrf, bgp_bfd_notify, p, bfd);
1ec52253 1403
9d3fc306 1404 if (!bfd && p->bfd_req)
d15b0b0a
OZ
1405 {
1406 rfree(p->bfd_req);
1407 p->bfd_req = NULL;
1408 }
1ec52253
OZ
1409}
1410
d15b0b0a
OZ
1411static void
1412bgp_reload_routes(struct channel *C)
bf47fe4b 1413{
d15b0b0a
OZ
1414 struct bgp_proto *p = (void *) C->proto;
1415 struct bgp_channel *c = (void *) C;
bf47fe4b 1416
54430df9
OZ
1417 /* Ignore non-BGP channels */
1418 if (C->channel != &channel_bgp)
1419 return;
1420
682d3f7d 1421 ASSERT(p->conn && (p->route_refresh || c->c.in_table));
d15b0b0a 1422
682d3f7d
OZ
1423 if (c->c.in_table)
1424 channel_schedule_reload(C);
1425 else
1426 bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
bf47fe4b
OZ
1427}
1428
0c791f87 1429static void
d15b0b0a 1430bgp_feed_begin(struct channel *C, int initial)
0c791f87 1431{
d15b0b0a
OZ
1432 struct bgp_proto *p = (void *) C->proto;
1433 struct bgp_channel *c = (void *) C;
9aed29e6 1434
54430df9
OZ
1435 /* Ignore non-BGP channels */
1436 if (C->channel != &channel_bgp)
1437 return;
1438
9aed29e6
OZ
1439 /* This should not happen */
1440 if (!p->conn)
0c791f87
OZ
1441 return;
1442
9aed29e6 1443 if (initial && p->cf->gr_mode)
d15b0b0a 1444 c->feed_state = BFS_LOADING;
9aed29e6
OZ
1445
1446 /* It is refeed and both sides support enhanced route refresh */
d15b0b0a
OZ
1447 if (!initial && p->enhanced_refresh)
1448 {
1449 /* BoRR must not be sent before End-of-RIB */
1450 if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
1451 return;
9aed29e6 1452
d15b0b0a
OZ
1453 c->feed_state = BFS_REFRESHING;
1454 bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
1455 }
9aed29e6
OZ
1456}
1457
1458static void
d15b0b0a 1459bgp_feed_end(struct channel *C)
9aed29e6 1460{
d15b0b0a
OZ
1461 struct bgp_proto *p = (void *) C->proto;
1462 struct bgp_channel *c = (void *) C;
9aed29e6 1463
54430df9
OZ
1464 /* Ignore non-BGP channels */
1465 if (C->channel != &channel_bgp)
1466 return;
1467
9aed29e6
OZ
1468 /* This should not happen */
1469 if (!p->conn)
1470 return;
1471
1472 /* Non-demarcated feed ended, nothing to do */
d15b0b0a 1473 if (c->feed_state == BFS_NONE)
9aed29e6
OZ
1474 return;
1475
1476 /* Schedule End-of-RIB packet */
d15b0b0a
OZ
1477 if (c->feed_state == BFS_LOADING)
1478 c->feed_state = BFS_LOADED;
9aed29e6
OZ
1479
1480 /* Schedule EoRR packet */
d15b0b0a
OZ
1481 if (c->feed_state == BFS_REFRESHING)
1482 c->feed_state = BFS_REFRESHED;
9aed29e6
OZ
1483
1484 /* Kick TX hook */
d15b0b0a 1485 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
0c791f87
OZ
1486}
1487
9aed29e6 1488
48e842cc
MM
1489static void
1490bgp_start_locked(struct object_lock *lock)
1491{
1492 struct bgp_proto *p = lock->data;
a22c3e59 1493 const struct bgp_config *cf = p->cf;
48e842cc 1494
11b32d91 1495 if (p->p.proto_state != PS_START)
d15b0b0a
OZ
1496 {
1497 DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1498 return;
1499 }
11b32d91 1500
48e842cc 1501 DBG("BGP: Got lock\n");
4847a894 1502
e0835db4 1503 if (cf->multihop || bgp_is_dynamic(p))
d15b0b0a
OZ
1504 {
1505 /* Multi-hop sessions do not use neighbor entries */
1506 bgp_initiate(p);
1507 return;
1508 }
4847a894 1509
a22c3e59 1510 neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
523f020b 1511 if (!n)
d15b0b0a 1512 {
a22c3e59 1513 log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
d15b0b0a
OZ
1514 /* As we do not start yet, we can just disable protocol */
1515 p->p.disabled = 1;
1516 bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1517 proto_notify_state(&p->p, PS_DOWN);
1518 return;
1519 }
523f020b
OZ
1520
1521 p->neigh = n;
1522
1523 if (n->scope <= 0)
a22c3e59 1524 BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface);
523f020b
OZ
1525 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1526 BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1527 else
1528 bgp_start_neighbor(p);
c01e3741
MM
1529}
1530
2638249d
MM
1531static int
1532bgp_start(struct proto *P)
1533{
c01e3741 1534 struct bgp_proto *p = (struct bgp_proto *) P;
a22c3e59
OZ
1535 const struct bgp_config *cf = p->cf;
1536
1537 p->local_ip = cf->local_ip;
a22c3e59
OZ
1538 p->local_as = cf->local_as;
1539 p->remote_as = cf->remote_as;
1540 p->public_as = cf->local_as;
1541
e0835db4
OZ
1542 /* For dynamic BGP childs, remote_ip is already set */
1543 if (ipa_nonzero(cf->remote_ip))
1544 p->remote_ip = cf->remote_ip;
1545
a22c3e59
OZ
1546 /* Confederation ID is used for truly external peers */
1547 if (p->cf->confederation && !p->is_interior)
1548 p->public_as = cf->confederation;
c01e3741 1549
e0835db4
OZ
1550 p->passive = cf->passive || bgp_is_dynamic(p);
1551
11b32d91 1552 p->start_state = BSS_PREPARE;
b552ecc4
MM
1553 p->outgoing_conn.state = BS_IDLE;
1554 p->incoming_conn.state = BS_IDLE;
bcbdcbb6 1555 p->neigh = NULL;
1ec52253 1556 p->bfd_req = NULL;
e0835db4 1557 p->postponed_sk = NULL;
0c791f87 1558 p->gr_ready = 0;
d15b0b0a 1559 p->gr_active_num = 0;
cfe34a31 1560
21d09632
OZ
1561 /* Reset some stats */
1562 p->stats.rx_messages = p->stats.tx_messages = 0;
1563 p->stats.rx_updates = p->stats.tx_updates = 0;
1564 p->stats.rx_bytes = p->stats.tx_bytes = 0;
1565 p->last_rx_update = 0;
1566
961671c0 1567 p->event = ev_new_init(p->p.pool, bgp_decision, p);
a6f79ca5
OZ
1568 p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
1569 p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
0c791f87 1570
4ef09506
OZ
1571 p->local_id = proto_get_router_id(P->cf);
1572 if (p->rr_client)
1573 p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1574
9be9a264 1575 p->remote_id = 0;
ef57b70f 1576 p->link_addr = IPA_NONE;
9be9a264 1577
7fc55925 1578 /* Lock all channels when in GR recovery mode */
6eda3f13 1579 if (p->p.gr_recovery && p->cf->gr_mode)
d15b0b0a
OZ
1580 {
1581 struct bgp_channel *c;
54430df9 1582 BGP_WALK_CHANNELS(p, c)
d15b0b0a
OZ
1583 channel_graceful_restart_lock(&c->c);
1584 }
0c791f87 1585
c01e3741 1586 /*
d15b0b0a
OZ
1587 * Before attempting to create the connection, we need to lock the port,
1588 * so that we are the only instance attempting to talk with that neighbor.
c01e3741 1589 */
a22c3e59 1590 struct object_lock *lock;
c01e3741 1591 lock = p->lock = olock_new(P->pool);
a22c3e59 1592 lock->addr = p->remote_ip;
dcde7ae5 1593 lock->port = p->cf->remote_port;
53ffbff3 1594 lock->iface = p->cf->iface;
9f4908fe 1595 lock->vrf = p->cf->iface ? NULL : p->p.vrf;
c01e3741 1596 lock->type = OBJLOCK_TCP;
c01e3741
MM
1597 lock->hook = bgp_start_locked;
1598 lock->data = p;
eb1e43a9
OZ
1599
1600 /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */
1601 if (bgp_is_dynamic(p))
1602 {
1603 lock->addr = net_prefix(p->cf->remote_range);
1604 lock->inst = 1;
1605 }
1606
c01e3741 1607 olock_acquire(lock);
d51aa281 1608
c01e3741 1609 return PS_START;
2638249d
MM
1610}
1611
d9b77cc2
OZ
1612extern int proto_restart;
1613
2638249d
MM
1614static int
1615bgp_shutdown(struct proto *P)
1616{
c01e3741 1617 struct bgp_proto *p = (struct bgp_proto *) P;
8a68316e 1618 int subcode = 0;
c01e3741 1619
cd1d9961
OZ
1620 char *message = NULL;
1621 byte *data = NULL;
1622 uint len = 0;
c01e3741 1623
85368cd4 1624 BGP_TRACE(D_EVENTS, "Shutdown requested");
b99d3786 1625
ebecb6f6 1626 switch (P->down_code)
d15b0b0a
OZ
1627 {
1628 case PDC_CF_REMOVE:
1629 case PDC_CF_DISABLE:
1630 subcode = 3; // Errcode 6, 3 - peer de-configured
1631 break;
1632
1633 case PDC_CF_RESTART:
1634 subcode = 6; // Errcode 6, 6 - other configuration change
1635 break;
1636
1637 case PDC_CMD_DISABLE:
1638 case PDC_CMD_SHUTDOWN:
8a68316e 1639 shutdown:
d15b0b0a 1640 subcode = 2; // Errcode 6, 2 - administrative shutdown
830ba75e 1641 message = P->message;
d15b0b0a
OZ
1642 break;
1643
1644 case PDC_CMD_RESTART:
1645 subcode = 4; // Errcode 6, 4 - administrative reset
830ba75e 1646 message = P->message;
d15b0b0a
OZ
1647 break;
1648
8a68316e
OZ
1649 case PDC_CMD_GR_DOWN:
1650 if ((p->cf->gr_mode != BGP_GR_ABLE) &&
1651 (p->cf->llgr_mode != BGP_LLGR_ABLE))
1652 goto shutdown;
1653
1654 subcode = -1; // Do not send NOTIFICATION, just close the connection
1655 break;
1656
d15b0b0a
OZ
1657 case PDC_RX_LIMIT_HIT:
1658 case PDC_IN_LIMIT_HIT:
1659 subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1660 /* log message for compatibility */
1661 log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1662 goto limit;
1663
1664 case PDC_OUT_LIMIT_HIT:
1665 subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1666
1667 limit:
1668 bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1669 if (proto_restart)
1670 bgp_update_startup_delay(p);
1671 else
1672 p->startup_delay = 0;
1673 goto done;
1674 }
b99d3786 1675
ebecb6f6 1676 bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
11b32d91 1677 p->startup_delay = 0;
c01e3741 1678
cd1d9961
OZ
1679 /* RFC 8203 - shutdown communication */
1680 if (message)
1681 {
1682 uint msg_len = strlen(message);
7ff34ca2 1683 msg_len = MIN(msg_len, 255);
cd1d9961
OZ
1684
1685 /* Buffer will be freed automatically by protocol shutdown */
1686 data = mb_alloc(p->p.pool, msg_len + 1);
1687 len = msg_len + 1;
1688
1689 data[0] = msg_len;
1690 memcpy(data+1, message, msg_len);
1691 }
1692
d15b0b0a 1693done:
cd1d9961 1694 bgp_stop(p, subcode, data, len);
11b32d91 1695 return p->p.proto_state;
2638249d
MM
1696}
1697
48e842cc 1698static struct proto *
d15b0b0a 1699bgp_init(struct proto_config *CF)
48e842cc 1700{
d15b0b0a 1701 struct proto *P = proto_new(CF);
48e842cc 1702 struct bgp_proto *p = (struct bgp_proto *) P;
d15b0b0a 1703 struct bgp_config *cf = (struct bgp_config *) CF;
48e842cc
MM
1704
1705 P->rt_notify = bgp_rt_notify;
14375237 1706 P->preexport = bgp_preexport;
48e842cc 1707 P->neigh_notify = bgp_neigh_notify;
bf47fe4b 1708 P->reload_routes = bgp_reload_routes;
9aed29e6
OZ
1709 P->feed_begin = bgp_feed_begin;
1710 P->feed_end = bgp_feed_end;
094d2bdb 1711 P->rte_better = bgp_rte_better;
8d9eef17 1712 P->rte_mergable = bgp_rte_mergable;
d15b0b0a 1713 P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
5bd73431 1714 P->rte_modify = bgp_rte_modify_stale;
d471d5fc 1715 P->rte_igp_metric = bgp_rte_igp_metric;
d15b0b0a
OZ
1716
1717 p->cf = cf;
d15b0b0a
OZ
1718 p->is_internal = (cf->local_as == cf->remote_as);
1719 p->is_interior = p->is_internal || cf->confederation_member;
1720 p->rs_client = cf->rs_client;
1721 p->rr_client = cf->rr_client;
1722
e0835db4
OZ
1723 p->ipv4 = ipa_nonzero(cf->remote_ip) ?
1724 ipa_is_ip4(cf->remote_ip) :
1725 (cf->remote_range && (cf->remote_range->type == NET_IP4));
1726
1727 p->remote_ip = cf->remote_ip;
1728 p->remote_as = cf->remote_as;
1729
1730 /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
1731 if (cf->c.parent)
1732 cf->remote_ip = IPA_NONE;
1733
d15b0b0a
OZ
1734 /* Add all channels */
1735 struct bgp_channel_config *cc;
54430df9 1736 BGP_CF_WALK_CHANNELS(cf, cc)
d15b0b0a 1737 proto_add_channel(P, &cc->c);
9be9a264 1738
48e842cc
MM
1739 return P;
1740}
1741
d15b0b0a
OZ
1742static void
1743bgp_channel_init(struct channel *C, struct channel_config *CF)
1744{
1745 struct bgp_channel *c = (void *) C;
1746 struct bgp_channel_config *cf = (void *) CF;
1747
d15b0b0a
OZ
1748 c->cf = cf;
1749 c->afi = cf->afi;
ef57b70f
OZ
1750 c->desc = cf->desc;
1751
1752 if (cf->igp_table_ip4)
1753 c->igp_table_ip4 = cf->igp_table_ip4->table;
1754
1755 if (cf->igp_table_ip6)
1756 c->igp_table_ip6 = cf->igp_table_ip6->table;
1f2eb2ac
OZ
1757
1758 if (cf->base_table)
1759 c->base_table = cf->base_table->table;
d15b0b0a
OZ
1760}
1761
1762static int
1763bgp_channel_start(struct channel *C)
1764{
1765 struct bgp_proto *p = (void *) C->proto;
1766 struct bgp_channel *c = (void *) C;
a22c3e59 1767 ip_addr src = p->local_ip;
d15b0b0a 1768
ef57b70f
OZ
1769 if (c->igp_table_ip4)
1770 rt_lock_table(c->igp_table_ip4);
1771
1772 if (c->igp_table_ip6)
1773 rt_lock_table(c->igp_table_ip6);
d15b0b0a 1774
1f2eb2ac
OZ
1775 if (c->base_table)
1776 {
1777 rt_lock_table(c->base_table);
1778 rt_flowspec_link(c->base_table, c->c.table);
1779 }
1780
d15b0b0a
OZ
1781 c->pool = p->p.pool; // XXXX
1782 bgp_init_bucket_table(c);
1783 bgp_init_prefix_table(c);
1784
682d3f7d
OZ
1785 if (c->cf->import_table)
1786 channel_setup_in_table(C);
1787
b7d7599c
OZ
1788 if (c->cf->export_table)
1789 channel_setup_out_table(C);
1790
5bd73431
OZ
1791 c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
1792
d15b0b0a
OZ
1793 c->next_hop_addr = c->cf->next_hop_addr;
1794 c->link_addr = IPA_NONE;
1795 c->packets_to_send = 0;
1796
1797 /* Try to use source address as next hop address */
1798 if (ipa_zero(c->next_hop_addr))
1799 {
ef57b70f 1800 if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
d15b0b0a
OZ
1801 c->next_hop_addr = src;
1802
ef57b70f 1803 if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
d15b0b0a
OZ
1804 c->next_hop_addr = src;
1805 }
1806
ccee67ca
OZ
1807 /* Use preferred addresses associated with interface / source address */
1808 if (ipa_zero(c->next_hop_addr))
1809 {
1810 /* We know the iface for single-hop, we make lookup for multihop */
586c1800 1811 struct neighbor *nbr = p->neigh ?: neigh_find(&p->p, src, NULL, 0);
ccee67ca
OZ
1812 struct iface *iface = nbr ? nbr->iface : NULL;
1813
1814 if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
1815 c->next_hop_addr = iface->addr4->ip;
1816
1817 if (bgp_channel_is_ipv6(c) && iface && iface->addr6)
1818 c->next_hop_addr = iface->addr6->ip;
1819 }
1820
ef57b70f
OZ
1821 /* Exit if no feasible next hop address is found */
1822 if (ipa_zero(c->next_hop_addr))
1823 {
1824 log(L_WARN "%s: Missing next hop address", p->p.name);
1825 return 0;
1826 }
1827
d15b0b0a 1828 /* Set link-local address for IPv6 single-hop BGP */
ef57b70f 1829 if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
d15b0b0a
OZ
1830 {
1831 c->link_addr = p->link_addr;
1832
1833 if (ipa_zero(c->link_addr))
1834 log(L_WARN "%s: Missing link-local address", p->p.name);
1835 }
1836
ef57b70f
OZ
1837 /* Link local address is already in c->link_addr */
1838 if (ipa_is_link_local(c->next_hop_addr))
1839 c->next_hop_addr = IPA_NONE;
d15b0b0a
OZ
1840
1841 return 0; /* XXXX: Currently undefined */
1842}
1843
1844static void
1845bgp_channel_shutdown(struct channel *C)
1846{
1847 struct bgp_channel *c = (void *) C;
1848
d15b0b0a
OZ
1849 c->next_hop_addr = IPA_NONE;
1850 c->link_addr = IPA_NONE;
7fc55925 1851 c->packets_to_send = 0;
d15b0b0a
OZ
1852}
1853
1854static void
1855bgp_channel_cleanup(struct channel *C)
1856{
1857 struct bgp_channel *c = (void *) C;
1858
ef57b70f
OZ
1859 if (c->igp_table_ip4)
1860 rt_unlock_table(c->igp_table_ip4);
1861
1862 if (c->igp_table_ip6)
1863 rt_unlock_table(c->igp_table_ip6);
b8a3608a 1864
1f2eb2ac
OZ
1865 if (c->base_table)
1866 {
1867 rt_flowspec_unlink(c->base_table, c->c.table);
1868 rt_unlock_table(c->base_table);
1869 }
1870
b8a3608a
OZ
1871 c->index = 0;
1872
1873 /* Cleanup rest of bgp_channel starting at pool field */
1874 memset(&(c->pool), 0, sizeof(struct bgp_channel) - OFFSETOF(struct bgp_channel, pool));
ef57b70f
OZ
1875}
1876
1877static inline struct bgp_channel_config *
1878bgp_find_channel_config(struct bgp_config *cf, u32 afi)
1879{
1880 struct bgp_channel_config *cc;
1881
54430df9 1882 BGP_CF_WALK_CHANNELS(cf, cc)
ef57b70f
OZ
1883 if (cc->afi == afi)
1884 return cc;
1885
1886 return NULL;
d15b0b0a 1887}
a7f23f58 1888
ef57b70f
OZ
1889struct rtable_config *
1890bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
1891{
1892 struct bgp_channel_config *cc2;
1893 struct rtable_config *tab;
1894
1895 /* First, try table connected by the channel */
1896 if (cc->c.table->addr_type == type)
1897 return cc->c.table;
1898
1899 /* Find paired channel with the same SAFI but the other AFI */
1900 u32 afi2 = cc->afi ^ 0x30000;
1901 cc2 = bgp_find_channel_config(cf, afi2);
1902
1903 /* Second, try IGP table configured in the paired channel */
1904 if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
1905 return tab;
1906
1907 /* Third, try table connected by the paired channel */
1908 if (cc2 && (cc2->c.table->addr_type == type))
1909 return cc2->c.table;
1910
1911 /* Last, try default table of given type */
1912 if (tab = cf->c.global->def_tables[type])
1913 return tab;
1914
1915 cf_error("Undefined IGP table");
1916}
1917
1f2eb2ac
OZ
1918static struct rtable_config *
1919bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc)
1920{
1921 /* Expected table type */
1922 u32 type = (cc->afi == BGP_AF_FLOW4) ? NET_IP4 : NET_IP6;
1923
1924 /* First, try appropriate IP channel */
1925 u32 afi2 = BGP_AF(BGP_AFI(cc->afi), BGP_SAFI_UNICAST);
1926 struct bgp_channel_config *cc2 = bgp_find_channel_config(cf, afi2);
1927 if (cc2 && (cc2->c.table->addr_type == type))
1928 return cc2->c.table;
1929
1930 /* Last, try default table of given type */
1931 struct rtable_config *tab = cf->c.global->def_tables[type];
1932 if (tab)
1933 return tab;
1934
1935 cf_error("Undefined base table");
1936}
ef57b70f 1937
a7f23f58 1938void
d15b0b0a 1939bgp_postconfig(struct proto_config *CF)
a7f23f58 1940{
d15b0b0a 1941 struct bgp_config *cf = (void *) CF;
a7f23f58
OZ
1942
1943 /* Do not check templates at all */
d15b0b0a 1944 if (cf->c.class == SYM_TEMPLATE)
a7f23f58
OZ
1945 return;
1946
f3e59178 1947
23ee6b1c
OZ
1948 /* Handle undefined remote_as, zero should mean unspecified external */
1949 if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL))
1950 cf->remote_as = cf->local_as;
1951
1952 int internal = (cf->local_as == cf->remote_as);
1953 int interior = internal || cf->confederation_member;
1954
f3e59178 1955 /* EBGP direct by default, IBGP multihop by default */
d15b0b0a
OZ
1956 if (cf->multihop < 0)
1957 cf->multihop = internal ? 64 : 0;
f3e59178 1958
5bd73431
OZ
1959 /* LLGR mode default based on GR mode */
1960 if (cf->llgr_mode < 0)
1961 cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0;
1962
dea98864
OZ
1963 /* Link check for single-hop BGP by default */
1964 if (cf->check_link < 0)
1965 cf->check_link = !cf->multihop;
1966
f3e59178 1967
d15b0b0a 1968 if (!cf->local_as)
a7f23f58
OZ
1969 cf_error("Local AS number must be set");
1970
e0835db4 1971 if (ipa_zero(cf->remote_ip) && !cf->remote_range)
a7f23f58
OZ
1972 cf_error("Neighbor must be configured");
1973
e0835db4
OZ
1974 if (ipa_zero(cf->local_ip) && cf->strict_bind)
1975 cf_error("Local address must be configured for strict bind");
1976
23ee6b1c
OZ
1977 if (!cf->remote_as && !cf->peer_type)
1978 cf_error("Remote AS number (or peer type) must be set");
1979
1980 if ((cf->peer_type == BGP_PT_INTERNAL) && !internal)
1981 cf_error("IBGP cannot have different ASNs");
1982
1983 if ((cf->peer_type == BGP_PT_EXTERNAL) && internal)
1984 cf_error("EBGP cannot have the same ASNs");
a1beb8f3 1985
470740f9
OZ
1986 if (!cf->iface && (ipa_is_link_local(cf->local_ip) ||
1987 ipa_is_link_local(cf->remote_ip)))
1988 cf_error("Link-local addresses require defined interface");
a1beb8f3 1989
d15b0b0a 1990 if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
a7f23f58
OZ
1991 cf_error("Neighbor AS number out of range (AS4 not available)");
1992
d15b0b0a 1993 if (!internal && cf->rr_client)
a7f23f58
OZ
1994 cf_error("Only internal neighbor can be RR client");
1995
d15b0b0a 1996 if (internal && cf->rs_client)
a7f23f58
OZ
1997 cf_error("Only external neighbor can be RS client");
1998
c73b5d2d
EB
1999 if (internal && (cf->local_role != BGP_ROLE_UNDEFINED))
2000 cf_error("Local role cannot be set on IBGP sessions");
2001
971721c9
OZ
2002 if (interior && (cf->local_role != BGP_ROLE_UNDEFINED))
2003 log(L_WARN "BGP roles are not recommended to be used within AS confederations");
2004
c73b5d2d
EB
2005 if (cf->require_roles && (cf->local_role == BGP_ROLE_UNDEFINED))
2006 cf_error("Local role must be set if roles are required");
2007
d15b0b0a
OZ
2008 if (!cf->confederation && cf->confederation_member)
2009 cf_error("Confederation ID must be set for member sessions");
a7f23f58 2010
d15b0b0a
OZ
2011 if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
2012 ipa_is_link_local(cf->remote_ip)))
53ffbff3
OZ
2013 cf_error("Multihop BGP cannot be used with link-local addresses");
2014
e919601a 2015 if (cf->multihop && cf->iface)
33b6c292
OZ
2016 cf_error("Multihop BGP cannot be bound to interface");
2017
d15b0b0a 2018 if (cf->multihop && cf->check_link)
523f020b
OZ
2019 cf_error("Multihop BGP cannot depend on link state");
2020
d15b0b0a
OZ
2021 if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
2022 cf_error("Multihop BGP with BFD requires specified local address");
2023
5bd73431
OZ
2024 if (!cf->gr_mode && cf->llgr_mode)
2025 cf_error("Long-lived graceful restart requires basic graceful restart");
2026
0b228fca
OZ
2027 if (internal && cf->enforce_first_as)
2028 cf_error("Enforce first AS check is requires EBGP sessions");
2029
3859e4ef
OZ
2030 if (cf->keepalive_time > cf->hold_time)
2031 cf_error("Keepalive time must be at most hold time");
2032
2033 if (cf->keepalive_time > (cf->hold_time / 2))
2034 log(L_WARN "Keepalive time should be at most 1/2 of hold time");
2035
2036 if (cf->min_hold_time > cf->hold_time)
2037 cf_error("Min hold time (%u) exceeds hold time (%u)",
2038 cf->min_hold_time, cf->hold_time);
2039
2040 uint keepalive_time = cf->keepalive_time ?: cf->hold_time / 3;
2041 if (cf->min_keepalive_time > keepalive_time)
2042 cf_error("Min keepalive time (%u) exceeds keepalive time (%u)",
2043 cf->min_keepalive_time, keepalive_time);
2044
d15b0b0a
OZ
2045
2046 struct bgp_channel_config *cc;
54430df9 2047 BGP_CF_WALK_CHANNELS(cf, cc)
d15b0b0a 2048 {
3831b619
OZ
2049 /* Handle undefined import filter */
2050 if (cc->c.in_filter == FILTER_UNDEF)
2051 if (interior)
2052 cc->c.in_filter = FILTER_ACCEPT;
2053 else
2054 cf_error("EBGP requires explicit import policy");
2055
2056 /* Handle undefined export filter */
2057 if (cc->c.out_filter == FILTER_UNDEF)
2058 if (interior)
2059 cc->c.out_filter = FILTER_REJECT;
2060 else
2061 cf_error("EBGP requires explicit export policy");
2062
d15b0b0a
OZ
2063 /* Disable after error incompatible with restart limit action */
2064 if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
2065 cc->c.in_limit.action = PLA_DISABLE;
2066
1cab2b4a
OZ
2067 /* Different default based on rr_client, rs_client */
2068 if (cc->next_hop_keep == 0xff)
2069 cc->next_hop_keep = cf->rr_client ? NH_IBGP : (cf->rs_client ? NH_ALL : NH_NO);
2070
d15b0b0a
OZ
2071 /* Different default for gw_mode */
2072 if (!cc->gw_mode)
2073 cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
1ec52253 2074
8f79e6b9
OZ
2075 /* Different default for next_hop_prefer */
2076 if (!cc->next_hop_prefer)
2077 cc->next_hop_prefer = (cc->gw_mode == GW_DIRECT) ? NHP_GLOBAL : NHP_LOCAL;
2078
5bd73431 2079 /* Defaults based on proto config */
d15b0b0a
OZ
2080 if (cc->gr_able == 0xff)
2081 cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
26822d8f 2082
5bd73431
OZ
2083 if (cc->llgr_able == 0xff)
2084 cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE);
2085
2086 if (cc->llgr_time == ~0U)
2087 cc->llgr_time = cf->llgr_time;
2088
09ee846d
OZ
2089 /* AIGP enabled by default on interior sessions */
2090 if (cc->aigp == 0xff)
2091 cc->aigp = interior;
2092
6fe11c99 2093 /* Default values of IGP tables */
ef57b70f
OZ
2094 if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
2095 {
2096 if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
2097 cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
2098
2099 if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
2100 cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
6fe11c99
OZ
2101
2102 if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
2103 cf_error("Mismatched IGP table type");
2104
2105 if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
2106 cf_error("Mismatched IGP table type");
ef57b70f
OZ
2107 }
2108
1f2eb2ac
OZ
2109 /* Default value of base table */
2110 if ((BGP_SAFI(cc->afi) == BGP_SAFI_FLOW) && cc->validate && !cc->base_table)
2111 cc->base_table = bgp_default_base_table(cf, cc);
2112
2113 if (cc->base_table && !cc->base_table->trie_used)
2114 cf_error("Flowspec validation requires base table (%s) with trie",
2115 cc->base_table->name);
2116
d15b0b0a
OZ
2117 if (cf->multihop && (cc->gw_mode == GW_DIRECT))
2118 cf_error("Multihop BGP cannot use direct gateway mode");
26822d8f 2119
d15b0b0a
OZ
2120 if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
2121 cf_error("BGP in recursive mode prohibits sorted table");
2122
2123 if (cf->deterministic_med && cc->c.table->sorted)
2124 cf_error("BGP with deterministic MED prohibits sorted table");
2125
2126 if (cc->secondary && !cc->c.table->sorted)
2127 cf_error("BGP with secondary option requires sorted table");
2128 }
a7f23f58
OZ
2129}
2130
2131static int
d15b0b0a 2132bgp_reconfigure(struct proto *P, struct proto_config *CF)
a7f23f58 2133{
d15b0b0a 2134 struct bgp_proto *p = (void *) P;
a22c3e59
OZ
2135 const struct bgp_config *new = (void *) CF;
2136 const struct bgp_config *old = p->cf;
a7f23f58 2137
d15b0b0a 2138 if (proto_get_router_id(CF) != p->local_id)
79b4e12e
OZ
2139 return 0;
2140
a7f23f58
OZ
2141 int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
2142 ((byte *) new) + sizeof(struct proto_config),
2143 // password item is last and must be checked separately
2144 OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
15b0a922 2145 && !bstrcmp(old->password, new->password)
d35fb9d7
OZ
2146 && ((!old->remote_range && !new->remote_range)
2147 || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range)))
15b0a922 2148 && !bstrcmp(old->dynamic_name, new->dynamic_name)
e0835db4 2149 && (old->dynamic_name_digits == new->dynamic_name_digits);
d15b0b0a
OZ
2150
2151 /* FIXME: Move channel reconfiguration to generic protocol code ? */
2152 struct channel *C, *C2;
2153 struct bgp_channel_config *cc;
2154
2155 WALK_LIST(C, p->p.channels)
2156 C->stale = 1;
2157
54430df9 2158 BGP_CF_WALK_CHANNELS(new, cc)
d15b0b0a
OZ
2159 {
2160 C = (struct channel *) bgp_find_channel(p, cc->afi);
2161 same = proto_configure_channel(P, &C, &cc->c) && same;
d15b0b0a
OZ
2162 }
2163
2164 WALK_LIST_DELSAFE(C, C2, p->p.channels)
2165 if (C->stale)
2166 same = proto_configure_channel(P, &C, NULL) && same;
2167
1ec52253
OZ
2168 if (same && (p->start_state > BSS_PREPARE))
2169 bgp_update_bfd(p, new->bfd);
2170
a7f23f58
OZ
2171 /* We should update our copy of configuration ptr as old configuration will be freed */
2172 if (same)
2173 p->cf = new;
2174
e0835db4
OZ
2175 /* Reset name counter */
2176 p->dynamic_name_counter = 0;
2177
a7f23f58
OZ
2178 return same;
2179}
2180
1f2eb2ac 2181#define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL )
ffb38dfb 2182
d15b0b0a 2183static int
e2b530aa 2184bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed)
d15b0b0a 2185{
6c9cda6f 2186 struct bgp_proto *p = (void *) C->proto;
d15b0b0a
OZ
2187 struct bgp_channel *c = (void *) C;
2188 struct bgp_channel_config *new = (void *) CC;
2189 struct bgp_channel_config *old = c->cf;
2190
e2b530aa 2191 if ((new->secondary != old->secondary) ||
1f2eb2ac 2192 (new->validate != old->validate) ||
e2b530aa
OZ
2193 (new->gr_able != old->gr_able) ||
2194 (new->llgr_able != old->llgr_able) ||
2195 (new->llgr_time != old->llgr_time) ||
2196 (new->ext_next_hop != old->ext_next_hop) ||
2197 (new->add_path != old->add_path) ||
2198 (new->import_table != old->import_table) ||
b7d7599c 2199 (new->export_table != old->export_table) ||
1f2eb2ac
OZ
2200 (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) ||
2201 (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) ||
2202 (TABLE(new, base_table) != TABLE(old, base_table)))
d15b0b0a
OZ
2203 return 0;
2204
e2b530aa 2205 if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP))
d15b0b0a
OZ
2206 return 0;
2207
09ee846d 2208 if ((new->gw_mode != old->gw_mode) ||
8f79e6b9 2209 (new->next_hop_prefer != old->next_hop_prefer) ||
09ee846d
OZ
2210 (new->aigp != old->aigp) ||
2211 (new->cost != old->cost))
6c9cda6f
OZ
2212 {
2213 /* import_changed itself does not force ROUTE_REFRESH when import_table is active */
2214 if (c->c.in_table && (c->c.channel_state == CS_UP))
2215 bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
2216
e2b530aa 2217 *import_changed = 1;
6c9cda6f 2218 }
e2b530aa
OZ
2219
2220 if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
2221 (new->next_hop_self != old->next_hop_self) ||
2222 (new->next_hop_keep != old->next_hop_keep) ||
09ee846d
OZ
2223 (new->aigp != old->aigp) ||
2224 (new->aigp_originate != old->aigp_originate))
e2b530aa
OZ
2225 *export_changed = 1;
2226
d15b0b0a
OZ
2227 c->cf = new;
2228 return 1;
2229}
2230
a7f23f58 2231static void
9d3fc306 2232bgp_copy_config(struct proto_config *dest, struct proto_config *src)
a7f23f58 2233{
9d3fc306
OZ
2234 struct bgp_config *d = (void *) dest;
2235 struct bgp_config *s = (void *) src;
2236
2237 /* Copy BFD options */
2238 if (s->bfd)
2239 {
2240 struct bfd_options *opts = cfg_alloc(sizeof(struct bfd_options));
2241 memcpy(opts, s->bfd, sizeof(struct bfd_options));
2242 d->bfd = opts;
2243 }
a7f23f58
OZ
2244}
2245
2246
54e55169
MM
2247/**
2248 * bgp_error - report a protocol error
2249 * @c: connection
2250 * @code: error code (according to the RFC)
2e9b2421 2251 * @subcode: error sub-code
54e55169
MM
2252 * @data: data to be passed in the Notification message
2253 * @len: length of the data
2254 *
2255 * bgp_error() sends a notification packet to tell the other side that a protocol
2e9b2421 2256 * error has occurred (including the data considered erroneous if possible) and
54e55169
MM
2257 * closes the connection.
2258 */
3fdbafb6 2259void
d15b0b0a 2260bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
3fdbafb6 2261{
b99d3786
OZ
2262 struct bgp_proto *p = c->bgp;
2263
11b32d91 2264 if (c->state == BS_CLOSE)
3fdbafb6 2265 return;
11b32d91 2266
d15b0b0a 2267 bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
b99d3786 2268 bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
11b32d91
OZ
2269 bgp_conn_enter_close_state(c);
2270
3fdbafb6
MM
2271 c->notify_code = code;
2272 c->notify_subcode = subcode;
efcece2d
MM
2273 c->notify_data = data;
2274 c->notify_size = (len > 0) ? len : 0;
d15b0b0a 2275 bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
b99d3786
OZ
2276
2277 if (code != 6)
d15b0b0a
OZ
2278 {
2279 bgp_update_startup_delay(p);
830ba75e 2280 bgp_stop(p, 0, NULL, 0);
d15b0b0a 2281 }
3fdbafb6
MM
2282}
2283
11b32d91
OZ
2284/**
2285 * bgp_store_error - store last error for status report
2286 * @p: BGP instance
2287 * @c: connection
2288 * @class: error class (BE_xxx constants)
2289 * @code: error code (class specific)
2290 *
2291 * bgp_store_error() decides whether given error is interesting enough
2292 * and store that error to last_error variables of @p
2293 */
2294void
2295bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
2296{
2297 /* During PS_UP, we ignore errors on secondary connection */
2298 if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
2299 return;
2300
2301 /* During PS_STOP, we ignore any errors, as we want to report
2302 * the error that caused transition to PS_STOP
2303 */
2304 if (p->p.proto_state == PS_STOP)
2305 return;
2306
2307 p->last_error_class = class;
2308 p->last_error_code = code;
2309}
2310
11b32d91 2311static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
72b28a04 2312static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
523f020b 2313static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
72b28a04 2314static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
5bd73431 2315static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"};
11b32d91 2316
b8113a5e
OZ
2317static const char *
2318bgp_last_errmsg(struct bgp_proto *p)
973399ae 2319{
11b32d91 2320 switch (p->last_error_class)
d15b0b0a
OZ
2321 {
2322 case BE_MISC:
2323 return bgp_misc_errors[p->last_error_code];
2324 case BE_SOCKET:
2325 return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
2326 case BE_BGP_RX:
2327 case BE_BGP_TX:
2328 return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
2329 case BE_AUTO_DOWN:
2330 return bgp_auto_errors[p->last_error_code];
2331 default:
2332 return "";
2333 }
b8113a5e
OZ
2334}
2335
2336static const char *
2337bgp_state_dsc(struct bgp_proto *p)
2338{
51947659
OZ
2339 if (p->p.proto_state == PS_DOWN)
2340 return "Down";
b8113a5e
OZ
2341
2342 int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
e0835db4 2343 if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive)
b8113a5e
OZ
2344 return "Passive";
2345
2346 return bgp_state_names[state];
2347}
2348
2349static void
2350bgp_get_status(struct proto *P, byte *buf)
2351{
2352 struct bgp_proto *p = (struct bgp_proto *) P;
2353
2354 const char *err1 = bgp_err_classes[p->last_error_class];
2355 const char *err2 = bgp_last_errmsg(p);
11b32d91 2356
f4ab2317 2357 if (P->proto_state == PS_DOWN)
11b32d91 2358 bsprintf(buf, "%s%s", err1, err2);
f4ab2317 2359 else
b8113a5e
OZ
2360 bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
2361}
2362
256cc8ee
OZ
2363static void
2364bgp_show_afis(int code, char *s, u32 *afis, uint count)
2365{
2366 buffer b;
2367 LOG_BUFFER_INIT(b);
2368
2369 buffer_puts(&b, s);
2370
2371 for (u32 *af = afis; af < (afis + count); af++)
2372 {
2373 const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
2374 if (desc)
2375 buffer_print(&b, " %s", desc->name);
2376 else
2377 buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
2378 }
2379
2380 if (b.pos == b.end)
2381 strcpy(b.end - 32, " ... <too long>");
2382
2383 cli_msg(code, b.start);
2384}
2385
af611f93 2386const char *
c73b5d2d
EB
2387bgp_format_role_name(u8 role)
2388{
2389 static const char *bgp_role_names[] = { "provider", "rs_server", "rs_client", "customer", "peer" };
2390 if (role == BGP_ROLE_UNDEFINED) return "undefined";
971721c9 2391 if (role < ARRAY_SIZE(bgp_role_names)) return bgp_role_names[role];
c73b5d2d
EB
2392 return "?";
2393}
2394
256cc8ee
OZ
2395static void
2396bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
2397{
2398 struct bgp_af_caps *ac;
2399 uint any_mp_bgp = 0;
2400 uint any_gr_able = 0;
2401 uint any_add_path = 0;
d8022d26 2402 uint any_ext_next_hop = 0;
5bd73431 2403 uint any_llgr_able = 0;
256cc8ee
OZ
2404 u32 *afl1 = alloca(caps->af_count * sizeof(u32));
2405 u32 *afl2 = alloca(caps->af_count * sizeof(u32));
2406 uint afn1, afn2;
2407
2408 WALK_AF_CAPS(caps, ac)
2409 {
2410 any_mp_bgp |= ac->ready;
2411 any_gr_able |= ac->gr_able;
2412 any_add_path |= ac->add_path;
d8022d26 2413 any_ext_next_hop |= ac->ext_next_hop;
5bd73431 2414 any_llgr_able |= ac->llgr_able;
256cc8ee
OZ
2415 }
2416
2417 if (any_mp_bgp)
2418 {
2419 cli_msg(-1006, " Multiprotocol");
2420
2421 afn1 = 0;
2422 WALK_AF_CAPS(caps, ac)
2423 if (ac->ready)
2424 afl1[afn1++] = ac->afi;
2425
2426 bgp_show_afis(-1006, " AF announced:", afl1, afn1);
2427 }
2428
2429 if (caps->route_refresh)
2430 cli_msg(-1006, " Route refresh");
2431
d8022d26
OZ
2432 if (any_ext_next_hop)
2433 {
2434 cli_msg(-1006, " Extended next hop");
2435
2436 afn1 = 0;
2437 WALK_AF_CAPS(caps, ac)
2438 if (ac->ext_next_hop)
2439 afl1[afn1++] = ac->afi;
2440
2441 bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1);
2442 }
2443
256cc8ee
OZ
2444 if (caps->ext_messages)
2445 cli_msg(-1006, " Extended message");
2446
2447 if (caps->gr_aware)
2448 cli_msg(-1006, " Graceful restart");
2449
2450 if (any_gr_able)
2451 {
2452 /* Continues from gr_aware */
2453 cli_msg(-1006, " Restart time: %u", caps->gr_time);
2454 if (caps->gr_flags & BGP_GRF_RESTART)
2455 cli_msg(-1006, " Restart recovery");
2456
2457 afn1 = afn2 = 0;
2458 WALK_AF_CAPS(caps, ac)
2459 {
2460 if (ac->gr_able)
2461 afl1[afn1++] = ac->afi;
2462
2463 if (ac->gr_af_flags & BGP_GRF_FORWARDING)
2464 afl2[afn2++] = ac->afi;
2465 }
2466
2467 bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2468 bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2469 }
2470
2471 if (caps->as4_support)
2472 cli_msg(-1006, " 4-octet AS numbers");
2473
2474 if (any_add_path)
2475 {
2476 cli_msg(-1006, " ADD-PATH");
2477
2478 afn1 = afn2 = 0;
2479 WALK_AF_CAPS(caps, ac)
2480 {
2481 if (ac->add_path & BGP_ADD_PATH_RX)
2482 afl1[afn1++] = ac->afi;
2483
2484 if (ac->add_path & BGP_ADD_PATH_TX)
2485 afl2[afn2++] = ac->afi;
2486 }
2487
2488 bgp_show_afis(-1006, " RX:", afl1, afn1);
2489 bgp_show_afis(-1006, " TX:", afl2, afn2);
2490 }
2491
2492 if (caps->enhanced_refresh)
2493 cli_msg(-1006, " Enhanced refresh");
5bd73431
OZ
2494
2495 if (caps->llgr_aware)
2496 cli_msg(-1006, " Long-lived graceful restart");
2497
2498 if (any_llgr_able)
2499 {
2500 u32 stale_time = 0;
2501
2502 afn1 = afn2 = 0;
2503 WALK_AF_CAPS(caps, ac)
2504 {
2505 stale_time = MAX(stale_time, ac->llgr_time);
2506
2507 if (ac->llgr_able && ac->llgr_time)
2508 afl1[afn1++] = ac->afi;
2509
2510 if (ac->llgr_flags & BGP_GRF_FORWARDING)
2511 afl2[afn2++] = ac->afi;
2512 }
2513
2514 /* Continues from llgr_aware */
2515 cli_msg(-1006, " LL stale time: %u", stale_time);
2516
2517 bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2518 bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2519 }
71423871
VB
2520
2521 if (caps->hostname)
2522 cli_msg(-1006, " Hostname: %s", caps->hostname);
c73b5d2d
EB
2523
2524 if (caps->role != BGP_ROLE_UNDEFINED)
2525 cli_msg(-1006, " Role: %s", bgp_format_role_name(caps->role));
256cc8ee
OZ
2526}
2527
b8113a5e
OZ
2528static void
2529bgp_show_proto_info(struct proto *P)
2530{
2531 struct bgp_proto *p = (struct bgp_proto *) P;
b8113a5e 2532
b8113a5e 2533 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
e0835db4
OZ
2534
2535 if (bgp_is_dynamic(p) && p->cf->remote_range)
2536 cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range);
2537 else
2538 cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface);
2539
a9c19b92
OZ
2540 if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT))
2541 cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port);
2542
e0835db4 2543 cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
0b1e1e1a 2544 cli_msg(-1006, " Local AS: %u", p->cf->local_as);
b8113a5e 2545
d15b0b0a 2546 if (p->gr_active_num)
0c791f87
OZ
2547 cli_msg(-1006, " Neighbor graceful restart active");
2548
b8113a5e 2549 if (P->proto_state == PS_START)
d15b0b0a
OZ
2550 {
2551 struct bgp_conn *oc = &p->outgoing_conn;
b8113a5e 2552
d15b0b0a 2553 if ((p->start_state < BSS_CONNECT) &&
a6f79ca5 2554 (tm_active(p->startup_timer)))
d3fa9e84 2555 cli_msg(-1006, " Error wait: %t/%u",
a6f79ca5 2556 tm_remains(p->startup_timer), p->startup_delay);
b8113a5e 2557
d15b0b0a 2558 if ((oc->state == BS_ACTIVE) &&
a6f79ca5 2559 (tm_active(oc->connect_timer)))
d3fa9e84 2560 cli_msg(-1006, " Connect delay: %t/%u",
a6f79ca5 2561 tm_remains(oc->connect_timer), p->cf->connect_delay_time);
0c791f87 2562
a6f79ca5 2563 if (p->gr_active_num && tm_active(p->gr_timer))
d3fa9e84 2564 cli_msg(-1006, " Restart timer: %t/-",
a6f79ca5 2565 tm_remains(p->gr_timer));
d15b0b0a 2566 }
b8113a5e 2567 else if (P->proto_state == PS_UP)
d15b0b0a
OZ
2568 {
2569 cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
256cc8ee
OZ
2570 cli_msg(-1006, " Local capabilities");
2571 bgp_show_capabilities(p, p->conn->local_caps);
2572 cli_msg(-1006, " Neighbor capabilities");
2573 bgp_show_capabilities(p, p->conn->remote_caps);
7fc55925
OZ
2574 cli_msg(-1006, " Session: %s%s%s%s%s",
2575 p->is_internal ? "internal" : "external",
2576 p->cf->multihop ? " multihop" : "",
2577 p->rr_client ? " route-reflector" : "",
2578 p->rs_client ? " route-server" : "",
2579 p->as4_session ? " AS4" : "");
a22c3e59 2580 cli_msg(-1006, " Source address: %I", p->local_ip);
d3fa9e84 2581 cli_msg(-1006, " Hold timer: %t/%u",
a6f79ca5 2582 tm_remains(p->conn->hold_timer), p->conn->hold_time);
d3fa9e84 2583 cli_msg(-1006, " Keepalive timer: %t/%u",
a6f79ca5 2584 tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
d15b0b0a 2585 }
b8113a5e 2586
5a6e8380 2587#if 0
21d09632
OZ
2588 struct bgp_stats *s = &p->stats;
2589 cli_msg(-1006, " FSM established transitions: %u",
2590 s->fsm_established_transitions);
2591 cli_msg(-1006, " Rcvd messages: %u total / %u updates / %lu bytes",
2592 s->rx_messages, s->rx_updates, s->rx_bytes);
2593 cli_msg(-1006, " Sent messages: %u total / %u updates / %lu bytes",
2594 s->tx_messages, s->tx_updates, s->tx_bytes);
2595 cli_msg(-1006, " Last rcvd update elapsed time: %t s",
2596 p->last_rx_update ? (current_time() - p->last_rx_update) : 0);
5a6e8380 2597#endif
21d09632 2598
523f020b 2599 if ((p->last_error_class != BE_NONE) &&
b8113a5e 2600 (p->last_error_class != BE_MAN_DOWN))
d15b0b0a
OZ
2601 {
2602 const char *err1 = bgp_err_classes[p->last_error_class];
2603 const char *err2 = bgp_last_errmsg(p);
2604 cli_msg(-1006, " Last error: %s%s", err1, err2);
2605 }
2606
2607 {
ef57b70f 2608 struct bgp_channel *c;
d15b0b0a 2609 WALK_LIST(c, p->p.channels)
ef57b70f
OZ
2610 {
2611 channel_show_info(&c->c);
2612
54430df9
OZ
2613 if (c->c.channel != &channel_bgp)
2614 continue;
2615
5bd73431
OZ
2616 if (p->gr_active_num)
2617 cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
2618
0db7a1d6 2619 if (c->stale_timer && tm_active(c->stale_timer))
5bd73431
OZ
2620 cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer));
2621
7fc55925
OZ
2622 if (c->c.channel_state == CS_UP)
2623 {
2624 if (ipa_zero(c->link_addr))
2625 cli_msg(-1006, " BGP Next hop: %I", c->next_hop_addr);
2626 else
2627 cli_msg(-1006, " BGP Next hop: %I %I", c->next_hop_addr, c->link_addr);
2628 }
ccee67ca 2629
ef57b70f
OZ
2630 if (c->igp_table_ip4)
2631 cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name);
2632
2633 if (c->igp_table_ip6)
2634 cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
1f2eb2ac
OZ
2635
2636 if (c->base_table)
2637 cli_msg(-1006, " Base table: %s", c->base_table->name);
ef57b70f 2638 }
d15b0b0a 2639 }
973399ae
MM
2640}
2641
f4deef89 2642const struct channel_class channel_bgp = {
d15b0b0a
OZ
2643 .channel_size = sizeof(struct bgp_channel),
2644 .config_size = sizeof(struct bgp_channel_config),
2645 .init = bgp_channel_init,
2646 .start = bgp_channel_start,
2647 .shutdown = bgp_channel_shutdown,
2648 .cleanup = bgp_channel_cleanup,
2649 .reconfigure = bgp_channel_reconfigure,
2650};
2651
2638249d 2652struct protocol proto_bgp = {
4a591d4b
PT
2653 .name = "BGP",
2654 .template = "bgp%d",
ee7e2ffd 2655 .class = PROTOCOL_BGP,
4a591d4b 2656 .preference = DEF_PREF_BGP,
1e37e35c 2657 .channel_mask = NB_IP | NB_VPN | NB_FLOW,
d15b0b0a 2658 .proto_size = sizeof(struct bgp_proto),
2bbc3083 2659 .config_size = sizeof(struct bgp_config),
d15b0b0a 2660 .postconfig = bgp_postconfig,
4a591d4b
PT
2661 .init = bgp_init,
2662 .start = bgp_start,
2663 .shutdown = bgp_shutdown,
4a591d4b
PT
2664 .reconfigure = bgp_reconfigure,
2665 .copy_config = bgp_copy_config,
2666 .get_status = bgp_get_status,
2667 .get_attr = bgp_get_attr,
2668 .get_route_info = bgp_get_route_info,
2669 .show_proto_info = bgp_show_proto_info
2638249d 2670};
4a23ede2
MM
2671
2672void bgp_build(void)
2673{
2674 proto_build(&proto_bgp);
2675}