]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/bgp.c
BGP: Better dispatch of incoming connections
[thirdparty/bird.git] / proto / bgp / bgp.c
1 /*
2 * BIRD -- The Border Gateway Protocol
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 /**
12 * DOC: Border Gateway Protocol
13 *
14 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
15 * the connection and most of the interface with BIRD core, |packets.c| handling
16 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
17 * manipulation with BGP attribute lists.
18 *
19 * As opposed to the other existing routing daemons, BIRD has a sophisticated
20 * core architecture which is able to keep all the information needed by BGP in
21 * the primary routing table, therefore no complex data structures like a
22 * central BGP table are needed. This increases memory footprint of a BGP router
23 * with many connections, but not too much and, which is more important, it
24 * makes BGP much easier to implement.
25 *
26 * Each instance of BGP (corresponding to a single BGP peer) is described by a
27 * &bgp_proto structure to which are attached individual connections represented
28 * by &bgp_connection (usually, there exists only one connection, but during BGP
29 * session setup, there can be more of them). The connections are handled
30 * according to the BGP state machine defined in the RFC with all the timers and
31 * all the parameters configurable.
32 *
33 * In incoming direction, we listen on the connection's socket and each time we
34 * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
35 * markers and passes complete packets to bgp_rx_packet() which distributes the
36 * packet according to its type.
37 *
38 * In outgoing direction, we gather all the routing updates and sort them to
39 * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
40 * fast comparison of &rta's and a &fib which helps us to find if we already
41 * have another route for the same destination queued for sending, so that we
42 * can replace it with the new one immediately instead of sending both
43 * updates). There also exists a special bucket holding all the route
44 * withdrawals which cannot be queued anywhere else as they don't have any
45 * attributes. If we have any packet to send (due to either new routes or the
46 * connection tracking code wanting to send a Open, Keepalive or Notification
47 * message), we call bgp_schedule_packet() which sets the corresponding bit in a
48 * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
49 * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
50 * packet type bits and calls the corresponding bgp_create_xx() functions,
51 * eventually rescheduling the same packet type if we have more data of the same
52 * type to send.
53 *
54 * The processing of attributes consists of two functions: bgp_decode_attrs()
55 * for checking of the attribute blocks and translating them to the language of
56 * BIRD's extended attributes and bgp_encode_attrs() which does the
57 * converse. Both functions are built around a @bgp_attr_table array describing
58 * all important characteristics of all known attributes. Unknown transitive
59 * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
60 *
61 * BGP protocol implements graceful restart in both restarting (local restart)
62 * and receiving (neighbor restart) roles. The first is handled mostly by the
63 * graceful restart code in the nest, BGP protocol just handles capabilities,
64 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
65 * The second is implemented by internal restart of the BGP state to %BS_IDLE
66 * and protocol state to %PS_START, but keeping the protocol up from the core
67 * point of view and therefore maintaining received routes. Routing table
68 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
69 * stale routes after reestablishment of BGP session during graceful restart.
70 *
71 * Supported standards:
72 * RFC 4271 - Border Gateway Protocol 4 (BGP)
73 * RFC 1997 - BGP Communities Attribute
74 * RFC 2385 - Protection of BGP Sessions via TCP MD5 Signature
75 * RFC 2545 - Use of BGP Multiprotocol Extensions for IPv6
76 * RFC 2918 - Route Refresh Capability
77 * RFC 3107 - Carrying Label Information in BGP
78 * RFC 4360 - BGP Extended Communities Attribute
79 * RFC 4364 - BGP/MPLS IPv4 Virtual Private Networks
80 * RFC 4456 - BGP Route Reflection
81 * RFC 4486 - Subcodes for BGP Cease Notification Message
82 * RFC 4659 - BGP/MPLS IPv6 Virtual Private Networks
83 * RFC 4724 - Graceful Restart Mechanism for BGP
84 * RFC 4760 - Multiprotocol extensions for BGP
85 * RFC 4798 - Connecting IPv6 Islands over IPv4 MPLS
86 * RFC 5065 - AS confederations for BGP
87 * RFC 5082 - Generalized TTL Security Mechanism
88 * RFC 5492 - Capabilities Advertisement with BGP
89 * RFC 5549 - Advertising IPv4 NLRI with an IPv6 Next Hop
90 * RFC 5575 - Dissemination of Flow Specification Rules
91 * RFC 5668 - 4-Octet AS Specific BGP Extended Community
92 * RFC 6286 - AS-Wide Unique BGP Identifier
93 * RFC 6608 - Subcodes for BGP Finite State Machine Error
94 * RFC 6793 - BGP Support for 4-Octet AS Numbers
95 * RFC 7313 - Enhanced Route Refresh Capability for BGP
96 * RFC 7606 - Revised Error Handling for BGP UPDATE Messages
97 * RFC 7911 - Advertisement of Multiple Paths in BGP
98 * RFC 7947 - Internet Exchange BGP Route Server
99 * RFC 8092 - BGP Large Communities Attribute
100 * RFC 8203 - BGP Administrative Shutdown Communication
101 * RFC 8212 - Default EBGP Route Propagation Behavior without Policies
102 * draft-ietf-idr-bgp-extended-messages-27
103 * draft-uttaro-idr-bgp-persistence-04
104 */
105
106 #undef LOCAL_DEBUG
107
108 #include <stdlib.h>
109
110 #include "nest/bird.h"
111 #include "nest/iface.h"
112 #include "nest/protocol.h"
113 #include "nest/route.h"
114 #include "nest/cli.h"
115 #include "nest/locks.h"
116 #include "conf/conf.h"
117 #include "filter/filter.h"
118 #include "lib/socket.h"
119 #include "lib/resource.h"
120 #include "lib/string.h"
121
122 #include "bgp.h"
123
124
125 struct linpool *bgp_linpool; /* Global temporary pool */
126 struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */
127 static list bgp_sockets; /* Global list of listening sockets */
128
129
130 static void bgp_connect(struct bgp_proto *p);
131 static void bgp_active(struct bgp_proto *p);
132 static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
133
134 static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
135 static void bgp_listen_sock_err(sock *sk UNUSED, int err);
136
137 /**
138 * bgp_open - open a BGP instance
139 * @p: BGP instance
140 *
141 * This function allocates and configures shared BGP resources, mainly listening
142 * sockets. Should be called as the last step during initialization (when lock
143 * is acquired and neighbor is ready). When error, caller should change state to
144 * PS_DOWN and return immediately.
145 */
146 static int
147 bgp_open(struct bgp_proto *p)
148 {
149 struct bgp_socket *bs = NULL;
150 struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
151 ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
152 (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
153 uint port = p->cf->local_port;
154
155 /* FIXME: Add some global init? */
156 if (!bgp_linpool)
157 init_list(&bgp_sockets);
158
159 /* We assume that cf->iface is defined iff cf->local_ip is link-local */
160
161 WALK_LIST(bs, bgp_sockets)
162 if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->sport == port) &&
163 (bs->sk->iface == ifa) && (bs->sk->vrf == p->p.vrf))
164 {
165 bs->uc++;
166 p->sock = bs;
167 return 0;
168 }
169
170 sock *sk = sk_new(proto_pool);
171 sk->type = SK_TCP_PASSIVE;
172 sk->ttl = 255;
173 sk->saddr = addr;
174 sk->sport = port;
175 sk->iface = ifa;
176 sk->vrf = p->p.vrf;
177 sk->flags = 0;
178 sk->tos = IP_PREC_INTERNET_CONTROL;
179 sk->rbsize = BGP_RX_BUFFER_SIZE;
180 sk->tbsize = BGP_TX_BUFFER_SIZE;
181 sk->rx_hook = bgp_incoming_connection;
182 sk->err_hook = bgp_listen_sock_err;
183
184 if (sk_open(sk) < 0)
185 goto err;
186
187 bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
188 bs->sk = sk;
189 bs->uc = 1;
190 p->sock = bs;
191 sk->data = bs;
192
193 add_tail(&bgp_sockets, &bs->n);
194
195 if (!bgp_linpool)
196 {
197 bgp_linpool = lp_new_default(proto_pool);
198 bgp_linpool2 = lp_new_default(proto_pool);
199 }
200
201 return 0;
202
203 err:
204 sk_log_error(sk, p->p.name);
205 log(L_ERR "%s: Cannot open listening socket", p->p.name);
206 rfree(sk);
207 return -1;
208 }
209
210 /**
211 * bgp_close - close a BGP instance
212 * @p: BGP instance
213 *
214 * This function frees and deconfigures shared BGP resources.
215 */
216 static void
217 bgp_close(struct bgp_proto *p)
218 {
219 struct bgp_socket *bs = p->sock;
220
221 ASSERT(bs && bs->uc);
222
223 if (--bs->uc)
224 return;
225
226 rfree(bs->sk);
227 rem_node(&bs->n);
228 mb_free(bs);
229
230 if (!EMPTY_LIST(bgp_sockets))
231 return;
232
233 rfree(bgp_linpool);
234 bgp_linpool = NULL;
235
236 rfree(bgp_linpool2);
237 bgp_linpool2 = NULL;
238 }
239
240 static inline int
241 bgp_setup_auth(struct bgp_proto *p, int enable)
242 {
243 if (p->cf->password)
244 {
245 int rv = sk_set_md5_auth(p->sock->sk,
246 p->cf->local_ip, p->cf->remote_ip, p->cf->iface,
247 enable ? p->cf->password : NULL, p->cf->setkey);
248
249 if (rv < 0)
250 sk_log_error(p->sock->sk, p->p.name);
251
252 return rv;
253 }
254 else
255 return 0;
256 }
257
258 static inline struct bgp_channel *
259 bgp_find_channel(struct bgp_proto *p, u32 afi)
260 {
261 struct bgp_channel *c;
262 WALK_LIST(c, p->p.channels)
263 if (c->afi == afi)
264 return c;
265
266 return NULL;
267 }
268
269 static void
270 bgp_startup(struct bgp_proto *p)
271 {
272 BGP_TRACE(D_EVENTS, "Started");
273 p->start_state = BSS_CONNECT;
274
275 if (!p->cf->passive)
276 bgp_active(p);
277 }
278
279 static void
280 bgp_startup_timeout(timer *t)
281 {
282 bgp_startup(t->data);
283 }
284
285
286 static void
287 bgp_initiate(struct bgp_proto *p)
288 {
289 int err_val;
290
291 if (bgp_open(p) < 0)
292 { err_val = BEM_NO_SOCKET; goto err1; }
293
294 if (bgp_setup_auth(p, 1) < 0)
295 { err_val = BEM_INVALID_MD5; goto err2; }
296
297 if (p->cf->bfd)
298 bgp_update_bfd(p, p->cf->bfd);
299
300 if (p->startup_delay)
301 {
302 p->start_state = BSS_DELAY;
303 BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
304 bgp_start_timer(p->startup_timer, p->startup_delay);
305 }
306 else
307 bgp_startup(p);
308
309 return;
310
311 err2:
312 bgp_close(p);
313 err1:
314 p->p.disabled = 1;
315 bgp_store_error(p, NULL, BE_MISC, err_val);
316 proto_notify_state(&p->p, PS_DOWN);
317
318 return;
319 }
320
321 /**
322 * bgp_start_timer - start a BGP timer
323 * @t: timer
324 * @value: time (in seconds) to fire (0 to disable the timer)
325 *
326 * This functions calls tm_start() on @t with time @value and the amount of
327 * randomization suggested by the BGP standard. Please use it for all BGP
328 * timers.
329 */
330 void
331 bgp_start_timer(timer *t, uint value)
332 {
333 if (value)
334 {
335 /* The randomization procedure is specified in RFC 4271 section 10 */
336 btime time = value S;
337 btime randomize = random() % ((time / 4) + 1);
338 tm_start(t, time - randomize);
339 }
340 else
341 tm_stop(t);
342 }
343
344 /**
345 * bgp_close_conn - close a BGP connection
346 * @conn: connection to close
347 *
348 * This function takes a connection described by the &bgp_conn structure, closes
349 * its socket and frees all resources associated with it.
350 */
351 void
352 bgp_close_conn(struct bgp_conn *conn)
353 {
354 // struct bgp_proto *p = conn->bgp;
355
356 DBG("BGP: Closing connection\n");
357 conn->packets_to_send = 0;
358 conn->channels_to_send = 0;
359 rfree(conn->connect_timer);
360 conn->connect_timer = NULL;
361 rfree(conn->keepalive_timer);
362 conn->keepalive_timer = NULL;
363 rfree(conn->hold_timer);
364 conn->hold_timer = NULL;
365 rfree(conn->tx_ev);
366 conn->tx_ev = NULL;
367 rfree(conn->sk);
368 conn->sk = NULL;
369
370 mb_free(conn->local_caps);
371 conn->local_caps = NULL;
372 mb_free(conn->remote_caps);
373 conn->remote_caps = NULL;
374 }
375
376
377 /**
378 * bgp_update_startup_delay - update a startup delay
379 * @p: BGP instance
380 *
381 * This function updates a startup delay that is used to postpone next BGP
382 * connect. It also handles disable_after_error and might stop BGP instance
383 * when error happened and disable_after_error is on.
384 *
385 * It should be called when BGP protocol error happened.
386 */
387 void
388 bgp_update_startup_delay(struct bgp_proto *p)
389 {
390 struct bgp_config *cf = p->cf;
391
392 DBG("BGP: Updating startup delay\n");
393
394 if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
395 p->startup_delay = 0;
396
397 p->last_proto_error = current_time();
398
399 if (cf->disable_after_error)
400 {
401 p->startup_delay = 0;
402 p->p.disabled = 1;
403 return;
404 }
405
406 if (!p->startup_delay)
407 p->startup_delay = cf->error_delay_time_min;
408 else
409 p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
410 }
411
412 static void
413 bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
414 {
415 switch (conn->state)
416 {
417 case BS_IDLE:
418 case BS_CLOSE:
419 return;
420
421 case BS_CONNECT:
422 case BS_ACTIVE:
423 bgp_conn_enter_idle_state(conn);
424 return;
425
426 case BS_OPENSENT:
427 case BS_OPENCONFIRM:
428 case BS_ESTABLISHED:
429 bgp_error(conn, 6, subcode, data, len);
430 return;
431
432 default:
433 bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
434 }
435 }
436
437 static void
438 bgp_down(struct bgp_proto *p)
439 {
440 if (p->start_state > BSS_PREPARE)
441 {
442 bgp_setup_auth(p, 0);
443 bgp_close(p);
444 }
445
446 BGP_TRACE(D_EVENTS, "Down");
447 proto_notify_state(&p->p, PS_DOWN);
448 }
449
450 static void
451 bgp_decision(void *vp)
452 {
453 struct bgp_proto *p = vp;
454
455 DBG("BGP: Decision start\n");
456 if ((p->p.proto_state == PS_START) &&
457 (p->outgoing_conn.state == BS_IDLE) &&
458 (p->incoming_conn.state != BS_OPENCONFIRM) &&
459 !p->cf->passive)
460 bgp_active(p);
461
462 if ((p->p.proto_state == PS_STOP) &&
463 (p->outgoing_conn.state == BS_IDLE) &&
464 (p->incoming_conn.state == BS_IDLE))
465 bgp_down(p);
466 }
467
468 void
469 bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
470 {
471 proto_notify_state(&p->p, PS_STOP);
472 bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
473 bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
474 ev_schedule(p->event);
475 }
476
477 static inline void
478 bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
479 {
480 if (conn->bgp->p.mrtdump & MD_STATES)
481 bgp_dump_state_change(conn, conn->state, new_state);
482
483 conn->state = new_state;
484 }
485
486 void
487 bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
488 {
489 /* Really, most of the work is done in bgp_rx_open(). */
490 bgp_conn_set_state(conn, BS_OPENCONFIRM);
491 }
492
493 static const struct bgp_af_caps dummy_af_caps = { };
494
495 void
496 bgp_conn_enter_established_state(struct bgp_conn *conn)
497 {
498 struct bgp_proto *p = conn->bgp;
499 struct bgp_caps *local = conn->local_caps;
500 struct bgp_caps *peer = conn->remote_caps;
501 struct bgp_channel *c;
502
503 BGP_TRACE(D_EVENTS, "BGP session established");
504
505 /* For multi-hop BGP sessions */
506 if (ipa_zero(p->source_addr))
507 p->source_addr = conn->sk->saddr;
508
509 /* In case of LLv6 is not valid during BGP start */
510 if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6)
511 p->link_addr = p->neigh->iface->llv6->ip;
512
513 conn->sk->fast_rx = 0;
514
515 p->conn = conn;
516 p->last_error_class = 0;
517 p->last_error_code = 0;
518
519 p->as4_session = conn->as4_session;
520
521 p->route_refresh = peer->route_refresh;
522 p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
523
524 /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
525 p->gr_ready = p->llgr_ready = 0; /* Updated later */
526
527 /* Whether peer is ready to handle our GR recovery */
528 int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
529
530 if (p->gr_active_num)
531 tm_stop(p->gr_timer);
532
533 /* Number of active channels */
534 int num = 0;
535
536 /* Summary state of ADD_PATH RX for active channels */
537 uint summary_add_path_rx = 0;
538
539 WALK_LIST(c, p->p.channels)
540 {
541 const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
542 const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
543
544 /* Ignore AFIs that were not announced in multiprotocol capability */
545 if (!loc || !loc->ready)
546 loc = &dummy_af_caps;
547
548 if (!rem || !rem->ready)
549 rem = &dummy_af_caps;
550
551 int active = loc->ready && rem->ready;
552 c->c.disabled = !active;
553 c->c.reloadable = p->route_refresh || c->cf->import_table;
554
555 c->index = active ? num++ : 0;
556
557 c->feed_state = BFS_NONE;
558 c->load_state = BFS_NONE;
559
560 /* Channels where peer may do GR */
561 uint gr_ready = active && local->gr_aware && rem->gr_able;
562 uint llgr_ready = active && local->llgr_aware && rem->llgr_able;
563
564 c->gr_ready = gr_ready || llgr_ready;
565 p->gr_ready = p->gr_ready || c->gr_ready;
566 p->llgr_ready = p->llgr_ready || llgr_ready;
567
568 /* Remember last LLGR stale time */
569 c->stale_time = local->llgr_aware ? rem->llgr_time : 0;
570
571 /* Channels not able to recover gracefully */
572 if (p->p.gr_recovery && (!active || !peer_gr_ready))
573 channel_graceful_restart_unlock(&c->c);
574
575 /* Channels waiting for local convergence */
576 if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
577 c->c.gr_wait = 1;
578
579 /* Channels where regular graceful restart failed */
580 if ((c->gr_active == BGP_GRS_ACTIVE) &&
581 !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
582 bgp_graceful_restart_done(c);
583
584 /* Channels where regular long-lived restart failed */
585 if ((c->gr_active == BGP_GRS_LLGR) &&
586 !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING)))
587 bgp_graceful_restart_done(c);
588
589 /* GR capability implies that neighbor will send End-of-RIB */
590 if (peer->gr_aware)
591 c->load_state = BFS_LOADING;
592
593 c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
594 c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
595 c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
596
597 if (active)
598 summary_add_path_rx |= !c->add_path_rx ? 1 : 2;
599
600 /* Update RA mode */
601 if (c->add_path_tx)
602 c->c.ra_mode = RA_ANY;
603 else if (c->cf->secondary)
604 c->c.ra_mode = RA_ACCEPTED;
605 else
606 c->c.ra_mode = RA_OPTIMAL;
607 }
608
609 p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
610 p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
611 p->channel_count = num;
612 p->summary_add_path_rx = summary_add_path_rx;
613
614 WALK_LIST(c, p->p.channels)
615 {
616 if (c->c.disabled)
617 continue;
618
619 p->afi_map[c->index] = c->afi;
620 p->channel_map[c->index] = c;
621 }
622
623 /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
624
625 bgp_conn_set_state(conn, BS_ESTABLISHED);
626 proto_notify_state(&p->p, PS_UP);
627 }
628
629 static void
630 bgp_conn_leave_established_state(struct bgp_proto *p)
631 {
632 BGP_TRACE(D_EVENTS, "BGP session closed");
633 p->conn = NULL;
634
635 if (p->p.proto_state == PS_UP)
636 bgp_stop(p, 0, NULL, 0);
637 }
638
639 void
640 bgp_conn_enter_close_state(struct bgp_conn *conn)
641 {
642 struct bgp_proto *p = conn->bgp;
643 int os = conn->state;
644
645 bgp_conn_set_state(conn, BS_CLOSE);
646 tm_stop(conn->keepalive_timer);
647 conn->sk->rx_hook = NULL;
648
649 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
650 bgp_start_timer(conn->hold_timer, 10);
651
652 if (os == BS_ESTABLISHED)
653 bgp_conn_leave_established_state(p);
654 }
655
656 void
657 bgp_conn_enter_idle_state(struct bgp_conn *conn)
658 {
659 struct bgp_proto *p = conn->bgp;
660 int os = conn->state;
661
662 bgp_close_conn(conn);
663 bgp_conn_set_state(conn, BS_IDLE);
664 ev_schedule(p->event);
665
666 if (os == BS_ESTABLISHED)
667 bgp_conn_leave_established_state(p);
668 }
669
670 /**
671 * bgp_handle_graceful_restart - handle detected BGP graceful restart
672 * @p: BGP instance
673 *
674 * This function is called when a BGP graceful restart of the neighbor is
675 * detected (when the TCP connection fails or when a new TCP connection
676 * appears). The function activates processing of the restart - starts routing
677 * table refresh cycle and activates BGP restart timer. The protocol state goes
678 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
679 * caller.
680 */
681 void
682 bgp_handle_graceful_restart(struct bgp_proto *p)
683 {
684 ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
685
686 BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
687 p->gr_active_num ? " - already pending" : "");
688
689 p->gr_active_num = 0;
690
691 struct bgp_channel *c;
692 WALK_LIST(c, p->p.channels)
693 {
694 /* FIXME: perhaps check for channel state instead of disabled flag? */
695 if (c->c.disabled)
696 continue;
697
698 if (c->gr_ready)
699 {
700 p->gr_active_num++;
701
702 switch (c->gr_active)
703 {
704 case BGP_GRS_NONE:
705 c->gr_active = BGP_GRS_ACTIVE;
706 rt_refresh_begin(c->c.table, &c->c);
707 break;
708
709 case BGP_GRS_ACTIVE:
710 rt_refresh_end(c->c.table, &c->c);
711 rt_refresh_begin(c->c.table, &c->c);
712 break;
713
714 case BGP_GRS_LLGR:
715 rt_refresh_begin(c->c.table, &c->c);
716 rt_modify_stale(c->c.table, &c->c);
717 break;
718 }
719 }
720 else
721 {
722 /* Just flush the routes */
723 rt_refresh_begin(c->c.table, &c->c);
724 rt_refresh_end(c->c.table, &c->c);
725 }
726
727 /* Reset bucket and prefix tables */
728 bgp_free_bucket_table(c);
729 bgp_free_prefix_table(c);
730 bgp_init_bucket_table(c);
731 bgp_init_prefix_table(c);
732 c->packets_to_send = 0;
733 }
734
735 /* p->gr_ready -> at least one active channel is c->gr_ready */
736 ASSERT(p->gr_active_num > 0);
737
738 proto_notify_state(&p->p, PS_START);
739 tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
740 }
741
742 /**
743 * bgp_graceful_restart_done - finish active BGP graceful restart
744 * @c: BGP channel
745 *
746 * This function is called when the active BGP graceful restart of the neighbor
747 * should be finished for channel @c - either successfully (the neighbor sends
748 * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
749 * unsuccessfully (the neighbor does not support BGP graceful restart on the new
750 * session). The function ends the routing table refresh cycle.
751 */
752 void
753 bgp_graceful_restart_done(struct bgp_channel *c)
754 {
755 struct bgp_proto *p = (void *) c->c.proto;
756
757 ASSERT(c->gr_active);
758 c->gr_active = 0;
759 p->gr_active_num--;
760
761 if (!p->gr_active_num)
762 BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
763
764 tm_stop(c->stale_timer);
765 rt_refresh_end(c->c.table, &c->c);
766 }
767
768 /**
769 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
770 * @t: timer
771 *
772 * This function is a timeout hook for @gr_timer, implementing BGP restart time
773 * limit for reestablisment of the BGP session after the graceful restart. When
774 * fired, we just proceed with the usual protocol restart.
775 */
776
777 static void
778 bgp_graceful_restart_timeout(timer *t)
779 {
780 struct bgp_proto *p = t->data;
781
782 BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
783
784 if (p->llgr_ready)
785 {
786 struct bgp_channel *c;
787 WALK_LIST(c, p->p.channels)
788 {
789 /* Channel is not in GR and is already flushed */
790 if (!c->gr_active)
791 continue;
792
793 /* Channel is already in LLGR from past restart */
794 if (c->gr_active == BGP_GRS_LLGR)
795 continue;
796
797 /* Channel is in GR, but does not support LLGR -> stop GR */
798 if (!c->stale_time)
799 {
800 bgp_graceful_restart_done(c);
801 continue;
802 }
803
804 /* Channel is in GR, and supports LLGR -> start LLGR */
805 c->gr_active = BGP_GRS_LLGR;
806 tm_start(c->stale_timer, c->stale_time S);
807 rt_modify_stale(c->c.table, &c->c);
808 }
809 }
810 else
811 bgp_stop(p, 0, NULL, 0);
812 }
813
814 static void
815 bgp_long_lived_stale_timeout(timer *t)
816 {
817 struct bgp_channel *c = t->data;
818 struct bgp_proto *p = (void *) c->c.proto;
819
820 BGP_TRACE(D_EVENTS, "Long-lived stale timeout");
821
822 bgp_graceful_restart_done(c);
823 }
824
825
826 /**
827 * bgp_refresh_begin - start incoming enhanced route refresh sequence
828 * @c: BGP channel
829 *
830 * This function is called when an incoming enhanced route refresh sequence is
831 * started by the neighbor, demarcated by the BoRR packet. The function updates
832 * the load state and starts the routing table refresh cycle. Note that graceful
833 * restart also uses routing table refresh cycle, but RFC 7313 and load states
834 * ensure that these two sequences do not overlap.
835 */
836 void
837 bgp_refresh_begin(struct bgp_channel *c)
838 {
839 struct bgp_proto *p = (void *) c->c.proto;
840
841 if (c->load_state == BFS_LOADING)
842 { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
843
844 c->load_state = BFS_REFRESHING;
845 rt_refresh_begin(c->c.table, &c->c);
846
847 if (c->c.in_table)
848 rt_refresh_begin(c->c.in_table, &c->c);
849 }
850
851 /**
852 * bgp_refresh_end - finish incoming enhanced route refresh sequence
853 * @c: BGP channel
854 *
855 * This function is called when an incoming enhanced route refresh sequence is
856 * finished by the neighbor, demarcated by the EoRR packet. The function updates
857 * the load state and ends the routing table refresh cycle. Routes not received
858 * during the sequence are removed by the nest.
859 */
860 void
861 bgp_refresh_end(struct bgp_channel *c)
862 {
863 struct bgp_proto *p = (void *) c->c.proto;
864
865 if (c->load_state != BFS_REFRESHING)
866 { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
867
868 c->load_state = BFS_NONE;
869 rt_refresh_end(c->c.table, &c->c);
870
871 if (c->c.in_table)
872 rt_prune_sync(c->c.in_table, 0);
873 }
874
875
876 static void
877 bgp_send_open(struct bgp_conn *conn)
878 {
879 DBG("BGP: Sending open\n");
880 conn->sk->rx_hook = bgp_rx;
881 conn->sk->tx_hook = bgp_tx;
882 tm_stop(conn->connect_timer);
883 bgp_schedule_packet(conn, NULL, PKT_OPEN);
884 bgp_conn_set_state(conn, BS_OPENSENT);
885 bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
886 }
887
888 static void
889 bgp_connected(sock *sk)
890 {
891 struct bgp_conn *conn = sk->data;
892 struct bgp_proto *p = conn->bgp;
893
894 BGP_TRACE(D_EVENTS, "Connected");
895 bgp_send_open(conn);
896 }
897
898 static void
899 bgp_connect_timeout(timer *t)
900 {
901 struct bgp_conn *conn = t->data;
902 struct bgp_proto *p = conn->bgp;
903
904 DBG("BGP: connect_timeout\n");
905 if (p->p.proto_state == PS_START)
906 {
907 bgp_close_conn(conn);
908 bgp_connect(p);
909 }
910 else
911 bgp_conn_enter_idle_state(conn);
912 }
913
914 static void
915 bgp_sock_err(sock *sk, int err)
916 {
917 struct bgp_conn *conn = sk->data;
918 struct bgp_proto *p = conn->bgp;
919
920 /*
921 * This error hook may be called either asynchronously from main
922 * loop, or synchronously from sk_send(). But sk_send() is called
923 * only from bgp_tx() and bgp_kick_tx(), which are both called
924 * asynchronously from main loop. Moreover, they end if err hook is
925 * called. Therefore, we could suppose that it is always called
926 * asynchronously.
927 */
928
929 bgp_store_error(p, conn, BE_SOCKET, err);
930
931 if (err)
932 BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
933 else
934 BGP_TRACE(D_EVENTS, "Connection closed");
935
936 if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
937 bgp_handle_graceful_restart(p);
938
939 bgp_conn_enter_idle_state(conn);
940 }
941
942 static void
943 bgp_hold_timeout(timer *t)
944 {
945 struct bgp_conn *conn = t->data;
946 struct bgp_proto *p = conn->bgp;
947
948 DBG("BGP: Hold timeout\n");
949
950 /* We are already closing the connection - just do hangup */
951 if (conn->state == BS_CLOSE)
952 {
953 BGP_TRACE(D_EVENTS, "Connection stalled");
954 bgp_conn_enter_idle_state(conn);
955 return;
956 }
957
958 /* If there is something in input queue, we are probably congested
959 and perhaps just not processed BGP packets in time. */
960
961 if (sk_rx_ready(conn->sk) > 0)
962 bgp_start_timer(conn->hold_timer, 10);
963 else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
964 {
965 BGP_TRACE(D_EVENTS, "Hold timer expired");
966 bgp_handle_graceful_restart(p);
967 bgp_conn_enter_idle_state(conn);
968 }
969 else
970 bgp_error(conn, 4, 0, NULL, 0);
971 }
972
973 static void
974 bgp_keepalive_timeout(timer *t)
975 {
976 struct bgp_conn *conn = t->data;
977
978 DBG("BGP: Keepalive timer\n");
979 bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
980
981 /* Kick TX a bit faster */
982 if (ev_active(conn->tx_ev))
983 ev_run(conn->tx_ev);
984 }
985
986 static void
987 bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
988 {
989 conn->sk = NULL;
990 conn->bgp = p;
991
992 conn->packets_to_send = 0;
993 conn->channels_to_send = 0;
994 conn->last_channel = 0;
995 conn->last_channel_count = 0;
996
997 conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0);
998 conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0);
999 conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
1000
1001 conn->tx_ev = ev_new_init(p->p.pool, bgp_kick_tx, conn);
1002 }
1003
1004 static void
1005 bgp_setup_sk(struct bgp_conn *conn, sock *s)
1006 {
1007 s->data = conn;
1008 s->err_hook = bgp_sock_err;
1009 s->fast_rx = 1;
1010 conn->sk = s;
1011 }
1012
1013 static void
1014 bgp_active(struct bgp_proto *p)
1015 {
1016 int delay = MAX(1, p->cf->connect_delay_time);
1017 struct bgp_conn *conn = &p->outgoing_conn;
1018
1019 BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
1020 bgp_setup_conn(p, conn);
1021 bgp_conn_set_state(conn, BS_ACTIVE);
1022 bgp_start_timer(conn->connect_timer, delay);
1023 }
1024
1025 /**
1026 * bgp_connect - initiate an outgoing connection
1027 * @p: BGP instance
1028 *
1029 * The bgp_connect() function creates a new &bgp_conn and initiates
1030 * a TCP connection to the peer. The rest of connection setup is governed
1031 * by the BGP state machine as described in the standard.
1032 */
1033 static void
1034 bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
1035 {
1036 struct bgp_conn *conn = &p->outgoing_conn;
1037 int hops = p->cf->multihop ? : 1;
1038
1039 DBG("BGP: Connecting\n");
1040 sock *s = sk_new(p->p.pool);
1041 s->type = SK_TCP_ACTIVE;
1042 s->saddr = p->source_addr;
1043 s->daddr = p->cf->remote_ip;
1044 s->dport = p->cf->remote_port;
1045 s->iface = p->neigh ? p->neigh->iface : NULL;
1046 s->vrf = p->p.vrf;
1047 s->ttl = p->cf->ttl_security ? 255 : hops;
1048 s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
1049 s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
1050 s->tos = IP_PREC_INTERNET_CONTROL;
1051 s->password = p->cf->password;
1052 s->tx_hook = bgp_connected;
1053 BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
1054 s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
1055 s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
1056 bgp_setup_conn(p, conn);
1057 bgp_setup_sk(conn, s);
1058 bgp_conn_set_state(conn, BS_CONNECT);
1059
1060 if (sk_open(s) < 0)
1061 goto err;
1062
1063 /* Set minimal receive TTL if needed */
1064 if (p->cf->ttl_security)
1065 if (sk_set_min_ttl(s, 256 - hops) < 0)
1066 goto err;
1067
1068 DBG("BGP: Waiting for connect success\n");
1069 bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
1070 return;
1071
1072 err:
1073 sk_log_error(s, p->p.name);
1074 bgp_sock_err(s, 0);
1075 return;
1076 }
1077
1078 /**
1079 * bgp_find_proto - find existing proto for incoming connection
1080 * @sk: TCP socket
1081 *
1082 */
1083 static struct bgp_proto *
1084 bgp_find_proto(sock *sk)
1085 {
1086 struct bgp_proto *p;
1087
1088 /* sk->iface is valid only if src or dst address is link-local */
1089 int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
1090
1091 WALK_LIST(p, proto_list)
1092 if ((p->p.proto == &proto_bgp) &&
1093 (p->sock == sk->data) &&
1094 ipa_equal(p->cf->remote_ip, sk->daddr) &&
1095 (!link || (p->cf->iface == sk->iface)) &&
1096 (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)))
1097 return p;
1098
1099 return NULL;
1100 }
1101
1102 /**
1103 * bgp_incoming_connection - handle an incoming connection
1104 * @sk: TCP socket
1105 * @dummy: unused
1106 *
1107 * This function serves as a socket hook for accepting of new BGP
1108 * connections. It searches a BGP instance corresponding to the peer
1109 * which has connected and if such an instance exists, it creates a
1110 * &bgp_conn structure, attaches it to the instance and either sends
1111 * an Open message or (if there already is an active connection) it
1112 * closes the new connection by sending a Notification message.
1113 */
1114 static int
1115 bgp_incoming_connection(sock *sk, uint dummy UNUSED)
1116 {
1117 struct bgp_proto *p;
1118 int acc, hops;
1119
1120 DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
1121 p = bgp_find_proto(sk);
1122 if (!p)
1123 {
1124 log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
1125 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
1126 rfree(sk);
1127 return 0;
1128 }
1129
1130 /*
1131 * BIRD should keep multiple incoming connections in OpenSent state (for
1132 * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
1133 * connections are rejected istead. The exception is the case where an
1134 * incoming connection triggers a graceful restart.
1135 */
1136
1137 acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
1138 (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
1139
1140 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1141 {
1142 bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
1143 bgp_handle_graceful_restart(p);
1144 bgp_conn_enter_idle_state(p->conn);
1145 acc = 1;
1146
1147 /* There might be separate incoming connection in OpenSent state */
1148 if (p->incoming_conn.state > BS_ACTIVE)
1149 bgp_close_conn(&p->incoming_conn);
1150 }
1151
1152 BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
1153 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
1154 sk->dport, acc ? "accepted" : "rejected");
1155
1156 if (!acc)
1157 {
1158 rfree(sk);
1159 return 0;
1160 }
1161
1162 hops = p->cf->multihop ? : 1;
1163
1164 if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
1165 goto err;
1166
1167 if (p->cf->ttl_security)
1168 if (sk_set_min_ttl(sk, 256 - hops) < 0)
1169 goto err;
1170
1171 if (p->cf->enable_extended_messages)
1172 {
1173 sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
1174 sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
1175 sk_reallocate(sk);
1176 }
1177
1178 bgp_setup_conn(p, &p->incoming_conn);
1179 bgp_setup_sk(&p->incoming_conn, sk);
1180 bgp_send_open(&p->incoming_conn);
1181 return 0;
1182
1183 err:
1184 sk_log_error(sk, p->p.name);
1185 log(L_ERR "%s: Incoming connection aborted", p->p.name);
1186 rfree(sk);
1187 return 0;
1188 }
1189
1190 static void
1191 bgp_listen_sock_err(sock *sk UNUSED, int err)
1192 {
1193 if (err == ECONNABORTED)
1194 log(L_WARN "BGP: Incoming connection aborted");
1195 else
1196 log(L_ERR "BGP: Error on listening socket: %M", err);
1197 }
1198
1199 static void
1200 bgp_start_neighbor(struct bgp_proto *p)
1201 {
1202 /* Called only for single-hop BGP sessions */
1203
1204 if (ipa_zero(p->source_addr))
1205 p->source_addr = p->neigh->ifa->ip;
1206
1207 if (ipa_is_link_local(p->source_addr))
1208 p->link_addr = p->source_addr;
1209 else if (p->neigh->iface->llv6)
1210 p->link_addr = p->neigh->iface->llv6->ip;
1211
1212 bgp_initiate(p);
1213 }
1214
1215 static void
1216 bgp_neigh_notify(neighbor *n)
1217 {
1218 struct bgp_proto *p = (struct bgp_proto *) n->proto;
1219 int ps = p->p.proto_state;
1220
1221 if (n != p->neigh)
1222 return;
1223
1224 if ((ps == PS_DOWN) || (ps == PS_STOP))
1225 return;
1226
1227 int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1228
1229 if (n->scope <= 0)
1230 {
1231 if (!prepare)
1232 {
1233 BGP_TRACE(D_EVENTS, "Neighbor lost");
1234 bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1235 /* Perhaps also run bgp_update_startup_delay(p)? */
1236 bgp_stop(p, 0, NULL, 0);
1237 }
1238 }
1239 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1240 {
1241 if (!prepare)
1242 {
1243 BGP_TRACE(D_EVENTS, "Link down");
1244 bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1245 if (ps == PS_UP)
1246 bgp_update_startup_delay(p);
1247 bgp_stop(p, 0, NULL, 0);
1248 }
1249 }
1250 else
1251 {
1252 if (prepare)
1253 {
1254 BGP_TRACE(D_EVENTS, "Neighbor ready");
1255 bgp_start_neighbor(p);
1256 }
1257 }
1258 }
1259
1260 static void
1261 bgp_bfd_notify(struct bfd_request *req)
1262 {
1263 struct bgp_proto *p = req->data;
1264 int ps = p->p.proto_state;
1265
1266 if (req->down && ((ps == PS_START) || (ps == PS_UP)))
1267 {
1268 BGP_TRACE(D_EVENTS, "BFD session down");
1269 bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
1270
1271 if (p->cf->bfd == BGP_BFD_GRACEFUL)
1272 {
1273 /* Trigger graceful restart */
1274 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1275 bgp_handle_graceful_restart(p);
1276
1277 if (p->incoming_conn.state > BS_IDLE)
1278 bgp_conn_enter_idle_state(&p->incoming_conn);
1279
1280 if (p->outgoing_conn.state > BS_IDLE)
1281 bgp_conn_enter_idle_state(&p->outgoing_conn);
1282 }
1283 else
1284 {
1285 /* Trigger session down */
1286 if (ps == PS_UP)
1287 bgp_update_startup_delay(p);
1288 bgp_stop(p, 0, NULL, 0);
1289 }
1290 }
1291 }
1292
1293 static void
1294 bgp_update_bfd(struct bgp_proto *p, int use_bfd)
1295 {
1296 if (use_bfd && !p->bfd_req)
1297 p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
1298 p->cf->multihop ? NULL : p->neigh->iface,
1299 bgp_bfd_notify, p);
1300
1301 if (!use_bfd && p->bfd_req)
1302 {
1303 rfree(p->bfd_req);
1304 p->bfd_req = NULL;
1305 }
1306 }
1307
1308 static void
1309 bgp_reload_routes(struct channel *C)
1310 {
1311 struct bgp_proto *p = (void *) C->proto;
1312 struct bgp_channel *c = (void *) C;
1313
1314 ASSERT(p->conn && (p->route_refresh || c->c.in_table));
1315
1316 if (c->c.in_table)
1317 channel_schedule_reload(C);
1318 else
1319 bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
1320 }
1321
1322 static void
1323 bgp_feed_begin(struct channel *C, int initial)
1324 {
1325 struct bgp_proto *p = (void *) C->proto;
1326 struct bgp_channel *c = (void *) C;
1327
1328 /* This should not happen */
1329 if (!p->conn)
1330 return;
1331
1332 if (initial && p->cf->gr_mode)
1333 c->feed_state = BFS_LOADING;
1334
1335 /* It is refeed and both sides support enhanced route refresh */
1336 if (!initial && p->enhanced_refresh)
1337 {
1338 /* BoRR must not be sent before End-of-RIB */
1339 if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
1340 return;
1341
1342 c->feed_state = BFS_REFRESHING;
1343 bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
1344 }
1345 }
1346
1347 static void
1348 bgp_feed_end(struct channel *C)
1349 {
1350 struct bgp_proto *p = (void *) C->proto;
1351 struct bgp_channel *c = (void *) C;
1352
1353 /* This should not happen */
1354 if (!p->conn)
1355 return;
1356
1357 /* Non-demarcated feed ended, nothing to do */
1358 if (c->feed_state == BFS_NONE)
1359 return;
1360
1361 /* Schedule End-of-RIB packet */
1362 if (c->feed_state == BFS_LOADING)
1363 c->feed_state = BFS_LOADED;
1364
1365 /* Schedule EoRR packet */
1366 if (c->feed_state == BFS_REFRESHING)
1367 c->feed_state = BFS_REFRESHED;
1368
1369 /* Kick TX hook */
1370 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1371 }
1372
1373
1374 static void
1375 bgp_start_locked(struct object_lock *lock)
1376 {
1377 struct bgp_proto *p = lock->data;
1378 struct bgp_config *cf = p->cf;
1379
1380 if (p->p.proto_state != PS_START)
1381 {
1382 DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1383 return;
1384 }
1385
1386 DBG("BGP: Got lock\n");
1387
1388 if (cf->multihop)
1389 {
1390 /* Multi-hop sessions do not use neighbor entries */
1391 bgp_initiate(p);
1392 return;
1393 }
1394
1395 neighbor *n = neigh_find(&p->p, cf->remote_ip, cf->iface, NEF_STICKY);
1396 if (!n)
1397 {
1398 log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
1399 /* As we do not start yet, we can just disable protocol */
1400 p->p.disabled = 1;
1401 bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1402 proto_notify_state(&p->p, PS_DOWN);
1403 return;
1404 }
1405
1406 p->neigh = n;
1407
1408 if (n->scope <= 0)
1409 BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1410 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1411 BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1412 else
1413 bgp_start_neighbor(p);
1414 }
1415
1416 static int
1417 bgp_start(struct proto *P)
1418 {
1419 struct bgp_proto *p = (struct bgp_proto *) P;
1420 struct object_lock *lock;
1421
1422 DBG("BGP: Startup.\n");
1423 p->start_state = BSS_PREPARE;
1424 p->outgoing_conn.state = BS_IDLE;
1425 p->incoming_conn.state = BS_IDLE;
1426 p->neigh = NULL;
1427 p->bfd_req = NULL;
1428 p->gr_ready = 0;
1429 p->gr_active_num = 0;
1430
1431 p->event = ev_new_init(p->p.pool, bgp_decision, p);
1432 p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
1433 p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
1434
1435 p->local_id = proto_get_router_id(P->cf);
1436 if (p->rr_client)
1437 p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1438
1439 p->remote_id = 0;
1440 p->source_addr = p->cf->local_ip;
1441 p->link_addr = IPA_NONE;
1442
1443 /* Lock all channels when in GR recovery mode */
1444 if (p->p.gr_recovery && p->cf->gr_mode)
1445 {
1446 struct bgp_channel *c;
1447 WALK_LIST(c, p->p.channels)
1448 channel_graceful_restart_lock(&c->c);
1449 }
1450
1451 /*
1452 * Before attempting to create the connection, we need to lock the port,
1453 * so that we are the only instance attempting to talk with that neighbor.
1454 */
1455
1456 lock = p->lock = olock_new(P->pool);
1457 lock->addr = p->cf->remote_ip;
1458 lock->port = p->cf->remote_port;
1459 lock->iface = p->cf->iface;
1460 lock->vrf = p->cf->iface ? NULL : p->p.vrf;
1461 lock->type = OBJLOCK_TCP;
1462 lock->hook = bgp_start_locked;
1463 lock->data = p;
1464 olock_acquire(lock);
1465
1466 return PS_START;
1467 }
1468
1469 extern int proto_restart;
1470
1471 static int
1472 bgp_shutdown(struct proto *P)
1473 {
1474 struct bgp_proto *p = (struct bgp_proto *) P;
1475 uint subcode = 0;
1476
1477 char *message = NULL;
1478 byte *data = NULL;
1479 uint len = 0;
1480
1481 BGP_TRACE(D_EVENTS, "Shutdown requested");
1482
1483 switch (P->down_code)
1484 {
1485 case PDC_CF_REMOVE:
1486 case PDC_CF_DISABLE:
1487 subcode = 3; // Errcode 6, 3 - peer de-configured
1488 break;
1489
1490 case PDC_CF_RESTART:
1491 subcode = 6; // Errcode 6, 6 - other configuration change
1492 break;
1493
1494 case PDC_CMD_DISABLE:
1495 case PDC_CMD_SHUTDOWN:
1496 subcode = 2; // Errcode 6, 2 - administrative shutdown
1497 message = P->message;
1498 break;
1499
1500 case PDC_CMD_RESTART:
1501 subcode = 4; // Errcode 6, 4 - administrative reset
1502 message = P->message;
1503 break;
1504
1505 case PDC_RX_LIMIT_HIT:
1506 case PDC_IN_LIMIT_HIT:
1507 subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1508 /* log message for compatibility */
1509 log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1510 goto limit;
1511
1512 case PDC_OUT_LIMIT_HIT:
1513 subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1514
1515 limit:
1516 bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1517 if (proto_restart)
1518 bgp_update_startup_delay(p);
1519 else
1520 p->startup_delay = 0;
1521 goto done;
1522 }
1523
1524 bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1525 p->startup_delay = 0;
1526
1527 /* RFC 8203 - shutdown communication */
1528 if (message)
1529 {
1530 uint msg_len = strlen(message);
1531 msg_len = MIN(msg_len, 128);
1532
1533 /* Buffer will be freed automatically by protocol shutdown */
1534 data = mb_alloc(p->p.pool, msg_len + 1);
1535 len = msg_len + 1;
1536
1537 data[0] = msg_len;
1538 memcpy(data+1, message, msg_len);
1539 }
1540
1541 done:
1542 bgp_stop(p, subcode, data, len);
1543 return p->p.proto_state;
1544 }
1545
1546 static struct proto *
1547 bgp_init(struct proto_config *CF)
1548 {
1549 struct proto *P = proto_new(CF);
1550 struct bgp_proto *p = (struct bgp_proto *) P;
1551 struct bgp_config *cf = (struct bgp_config *) CF;
1552
1553 P->rt_notify = bgp_rt_notify;
1554 P->preexport = bgp_preexport;
1555 P->neigh_notify = bgp_neigh_notify;
1556 P->reload_routes = bgp_reload_routes;
1557 P->feed_begin = bgp_feed_begin;
1558 P->feed_end = bgp_feed_end;
1559 P->rte_better = bgp_rte_better;
1560 P->rte_mergable = bgp_rte_mergable;
1561 P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
1562 P->rte_modify = bgp_rte_modify_stale;
1563
1564 p->cf = cf;
1565 p->local_as = cf->local_as;
1566 p->remote_as = cf->remote_as;
1567 p->public_as = cf->local_as;
1568 p->is_internal = (cf->local_as == cf->remote_as);
1569 p->is_interior = p->is_internal || cf->confederation_member;
1570 p->rs_client = cf->rs_client;
1571 p->rr_client = cf->rr_client;
1572
1573 /* Confederation ID is used for truly external peers */
1574 if (cf->confederation && !p->is_interior)
1575 p->public_as = cf->confederation;
1576
1577 /* Add all channels */
1578 struct bgp_channel_config *cc;
1579 WALK_LIST(cc, CF->channels)
1580 proto_add_channel(P, &cc->c);
1581
1582 return P;
1583 }
1584
1585 static void
1586 bgp_channel_init(struct channel *C, struct channel_config *CF)
1587 {
1588 struct bgp_channel *c = (void *) C;
1589 struct bgp_channel_config *cf = (void *) CF;
1590
1591 c->cf = cf;
1592 c->afi = cf->afi;
1593 c->desc = cf->desc;
1594
1595 if (cf->igp_table_ip4)
1596 c->igp_table_ip4 = cf->igp_table_ip4->table;
1597
1598 if (cf->igp_table_ip6)
1599 c->igp_table_ip6 = cf->igp_table_ip6->table;
1600 }
1601
1602 static int
1603 bgp_channel_start(struct channel *C)
1604 {
1605 struct bgp_proto *p = (void *) C->proto;
1606 struct bgp_channel *c = (void *) C;
1607 ip_addr src = p->source_addr;
1608
1609 if (c->igp_table_ip4)
1610 rt_lock_table(c->igp_table_ip4);
1611
1612 if (c->igp_table_ip6)
1613 rt_lock_table(c->igp_table_ip6);
1614
1615 c->pool = p->p.pool; // XXXX
1616 bgp_init_bucket_table(c);
1617 bgp_init_prefix_table(c);
1618
1619 if (c->cf->import_table)
1620 channel_setup_in_table(C);
1621
1622 c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
1623
1624 c->next_hop_addr = c->cf->next_hop_addr;
1625 c->link_addr = IPA_NONE;
1626 c->packets_to_send = 0;
1627
1628 /* Try to use source address as next hop address */
1629 if (ipa_zero(c->next_hop_addr))
1630 {
1631 if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
1632 c->next_hop_addr = src;
1633
1634 if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
1635 c->next_hop_addr = src;
1636 }
1637
1638 /* Use preferred addresses associated with interface / source address */
1639 if (ipa_zero(c->next_hop_addr))
1640 {
1641 /* We know the iface for single-hop, we make lookup for multihop */
1642 struct neighbor *nbr = p->neigh ?: neigh_find(&p->p, src, NULL, 0);
1643 struct iface *iface = nbr ? nbr->iface : NULL;
1644
1645 if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
1646 c->next_hop_addr = iface->addr4->ip;
1647
1648 if (bgp_channel_is_ipv6(c) && iface && iface->addr6)
1649 c->next_hop_addr = iface->addr6->ip;
1650 }
1651
1652 /* Exit if no feasible next hop address is found */
1653 if (ipa_zero(c->next_hop_addr))
1654 {
1655 log(L_WARN "%s: Missing next hop address", p->p.name);
1656 return 0;
1657 }
1658
1659 /* Set link-local address for IPv6 single-hop BGP */
1660 if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
1661 {
1662 c->link_addr = p->link_addr;
1663
1664 if (ipa_zero(c->link_addr))
1665 log(L_WARN "%s: Missing link-local address", p->p.name);
1666 }
1667
1668 /* Link local address is already in c->link_addr */
1669 if (ipa_is_link_local(c->next_hop_addr))
1670 c->next_hop_addr = IPA_NONE;
1671
1672 return 0; /* XXXX: Currently undefined */
1673 }
1674
1675 static void
1676 bgp_channel_shutdown(struct channel *C)
1677 {
1678 struct bgp_channel *c = (void *) C;
1679
1680 c->next_hop_addr = IPA_NONE;
1681 c->link_addr = IPA_NONE;
1682 c->packets_to_send = 0;
1683 }
1684
1685 static void
1686 bgp_channel_cleanup(struct channel *C)
1687 {
1688 struct bgp_channel *c = (void *) C;
1689
1690 if (c->igp_table_ip4)
1691 rt_unlock_table(c->igp_table_ip4);
1692
1693 if (c->igp_table_ip6)
1694 rt_unlock_table(c->igp_table_ip6);
1695 }
1696
1697 static inline struct bgp_channel_config *
1698 bgp_find_channel_config(struct bgp_config *cf, u32 afi)
1699 {
1700 struct bgp_channel_config *cc;
1701
1702 WALK_LIST(cc, cf->c.channels)
1703 if (cc->afi == afi)
1704 return cc;
1705
1706 return NULL;
1707 }
1708
1709 struct rtable_config *
1710 bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
1711 {
1712 struct bgp_channel_config *cc2;
1713 struct rtable_config *tab;
1714
1715 /* First, try table connected by the channel */
1716 if (cc->c.table->addr_type == type)
1717 return cc->c.table;
1718
1719 /* Find paired channel with the same SAFI but the other AFI */
1720 u32 afi2 = cc->afi ^ 0x30000;
1721 cc2 = bgp_find_channel_config(cf, afi2);
1722
1723 /* Second, try IGP table configured in the paired channel */
1724 if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
1725 return tab;
1726
1727 /* Third, try table connected by the paired channel */
1728 if (cc2 && (cc2->c.table->addr_type == type))
1729 return cc2->c.table;
1730
1731 /* Last, try default table of given type */
1732 if (tab = cf->c.global->def_tables[type])
1733 return tab;
1734
1735 cf_error("Undefined IGP table");
1736 }
1737
1738
1739 void
1740 bgp_postconfig(struct proto_config *CF)
1741 {
1742 struct bgp_config *cf = (void *) CF;
1743 int internal = (cf->local_as == cf->remote_as);
1744 int interior = internal || cf->confederation_member;
1745
1746 /* Do not check templates at all */
1747 if (cf->c.class == SYM_TEMPLATE)
1748 return;
1749
1750
1751 /* EBGP direct by default, IBGP multihop by default */
1752 if (cf->multihop < 0)
1753 cf->multihop = internal ? 64 : 0;
1754
1755 /* LLGR mode default based on GR mode */
1756 if (cf->llgr_mode < 0)
1757 cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0;
1758
1759 /* Link check for single-hop BGP by default */
1760 if (cf->check_link < 0)
1761 cf->check_link = !cf->multihop;
1762
1763
1764 if (!cf->local_as)
1765 cf_error("Local AS number must be set");
1766
1767 if (ipa_zero(cf->remote_ip))
1768 cf_error("Neighbor must be configured");
1769
1770 if (!cf->remote_as)
1771 cf_error("Remote AS number must be set");
1772
1773 if (!cf->iface && (ipa_is_link_local(cf->local_ip) ||
1774 ipa_is_link_local(cf->remote_ip)))
1775 cf_error("Link-local addresses require defined interface");
1776
1777 if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
1778 cf_error("Neighbor AS number out of range (AS4 not available)");
1779
1780 if (!internal && cf->rr_client)
1781 cf_error("Only internal neighbor can be RR client");
1782
1783 if (internal && cf->rs_client)
1784 cf_error("Only external neighbor can be RS client");
1785
1786 if (!cf->confederation && cf->confederation_member)
1787 cf_error("Confederation ID must be set for member sessions");
1788
1789 if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
1790 ipa_is_link_local(cf->remote_ip)))
1791 cf_error("Multihop BGP cannot be used with link-local addresses");
1792
1793 if (cf->multihop && cf->iface)
1794 cf_error("Multihop BGP cannot be bound to interface");
1795
1796 if (cf->multihop && cf->check_link)
1797 cf_error("Multihop BGP cannot depend on link state");
1798
1799 if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
1800 cf_error("Multihop BGP with BFD requires specified local address");
1801
1802 if (!cf->gr_mode && cf->llgr_mode)
1803 cf_error("Long-lived graceful restart requires basic graceful restart");
1804
1805
1806 struct bgp_channel_config *cc;
1807 WALK_LIST(cc, CF->channels)
1808 {
1809 /* Handle undefined import filter */
1810 if (cc->c.in_filter == FILTER_UNDEF)
1811 if (interior)
1812 cc->c.in_filter = FILTER_ACCEPT;
1813 else
1814 cf_error("EBGP requires explicit import policy");
1815
1816 /* Handle undefined export filter */
1817 if (cc->c.out_filter == FILTER_UNDEF)
1818 if (interior)
1819 cc->c.out_filter = FILTER_REJECT;
1820 else
1821 cf_error("EBGP requires explicit export policy");
1822
1823 /* Disable after error incompatible with restart limit action */
1824 if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
1825 cc->c.in_limit.action = PLA_DISABLE;
1826
1827 /* Different default based on rr_client, rs_client */
1828 if (cc->next_hop_keep == 0xff)
1829 cc->next_hop_keep = cf->rr_client ? NH_IBGP : (cf->rs_client ? NH_ALL : NH_NO);
1830
1831 /* Different default based on rs_client */
1832 if (!cc->missing_lladdr)
1833 cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
1834
1835 /* Different default for gw_mode */
1836 if (!cc->gw_mode)
1837 cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
1838
1839 /* Defaults based on proto config */
1840 if (cc->gr_able == 0xff)
1841 cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
1842
1843 if (cc->llgr_able == 0xff)
1844 cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE);
1845
1846 if (cc->llgr_time == ~0U)
1847 cc->llgr_time = cf->llgr_time;
1848
1849 /* Default values of IGP tables */
1850 if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
1851 {
1852 if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
1853 cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
1854
1855 if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
1856 cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
1857
1858 if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
1859 cf_error("Mismatched IGP table type");
1860
1861 if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
1862 cf_error("Mismatched IGP table type");
1863 }
1864
1865 if (cf->multihop && (cc->gw_mode == GW_DIRECT))
1866 cf_error("Multihop BGP cannot use direct gateway mode");
1867
1868 if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
1869 cf_error("BGP in recursive mode prohibits sorted table");
1870
1871 if (cf->deterministic_med && cc->c.table->sorted)
1872 cf_error("BGP with deterministic MED prohibits sorted table");
1873
1874 if (cc->secondary && !cc->c.table->sorted)
1875 cf_error("BGP with secondary option requires sorted table");
1876 }
1877 }
1878
1879 static int
1880 bgp_reconfigure(struct proto *P, struct proto_config *CF)
1881 {
1882 struct bgp_proto *p = (void *) P;
1883 struct bgp_config *new = (void *) CF;
1884 struct bgp_config *old = p->cf;
1885
1886 if (proto_get_router_id(CF) != p->local_id)
1887 return 0;
1888
1889 int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1890 ((byte *) new) + sizeof(struct proto_config),
1891 // password item is last and must be checked separately
1892 OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1893 && ((!old->password && !new->password)
1894 || (old->password && new->password && !strcmp(old->password, new->password)));
1895
1896 /* FIXME: Move channel reconfiguration to generic protocol code ? */
1897 struct channel *C, *C2;
1898 struct bgp_channel_config *cc;
1899
1900 WALK_LIST(C, p->p.channels)
1901 C->stale = 1;
1902
1903 WALK_LIST(cc, new->c.channels)
1904 {
1905 C = (struct channel *) bgp_find_channel(p, cc->afi);
1906 same = proto_configure_channel(P, &C, &cc->c) && same;
1907
1908 if (C)
1909 C->stale = 0;
1910 }
1911
1912 WALK_LIST_DELSAFE(C, C2, p->p.channels)
1913 if (C->stale)
1914 same = proto_configure_channel(P, &C, NULL) && same;
1915
1916
1917 if (same && (p->start_state > BSS_PREPARE))
1918 bgp_update_bfd(p, new->bfd);
1919
1920 /* We should update our copy of configuration ptr as old configuration will be freed */
1921 if (same)
1922 p->cf = new;
1923
1924 return same;
1925 }
1926
1927 #define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
1928
1929 static int
1930 bgp_channel_reconfigure(struct channel *C, struct channel_config *CC)
1931 {
1932 struct bgp_channel *c = (void *) C;
1933 struct bgp_channel_config *new = (void *) CC;
1934 struct bgp_channel_config *old = c->cf;
1935
1936 if (memcmp(((byte *) old) + sizeof(struct channel_config),
1937 ((byte *) new) + sizeof(struct channel_config),
1938 /* Remaining items must be checked separately */
1939 OFFSETOF(struct bgp_channel_config, rest) - sizeof(struct channel_config)))
1940 return 0;
1941
1942 /* Check change in IGP tables */
1943 if ((IGP_TABLE(old, ip4) != IGP_TABLE(new, ip4)) ||
1944 (IGP_TABLE(old, ip6) != IGP_TABLE(new, ip6)))
1945 return 0;
1946
1947 c->cf = new;
1948 return 1;
1949 }
1950
1951 static void
1952 bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
1953 {
1954 /* Just a shallow copy */
1955 }
1956
1957
1958 /**
1959 * bgp_error - report a protocol error
1960 * @c: connection
1961 * @code: error code (according to the RFC)
1962 * @subcode: error sub-code
1963 * @data: data to be passed in the Notification message
1964 * @len: length of the data
1965 *
1966 * bgp_error() sends a notification packet to tell the other side that a protocol
1967 * error has occurred (including the data considered erroneous if possible) and
1968 * closes the connection.
1969 */
1970 void
1971 bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
1972 {
1973 struct bgp_proto *p = c->bgp;
1974
1975 if (c->state == BS_CLOSE)
1976 return;
1977
1978 bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
1979 bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1980 bgp_conn_enter_close_state(c);
1981
1982 c->notify_code = code;
1983 c->notify_subcode = subcode;
1984 c->notify_data = data;
1985 c->notify_size = (len > 0) ? len : 0;
1986 bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
1987
1988 if (code != 6)
1989 {
1990 bgp_update_startup_delay(p);
1991 bgp_stop(p, 0, NULL, 0);
1992 }
1993 }
1994
1995 /**
1996 * bgp_store_error - store last error for status report
1997 * @p: BGP instance
1998 * @c: connection
1999 * @class: error class (BE_xxx constants)
2000 * @code: error code (class specific)
2001 *
2002 * bgp_store_error() decides whether given error is interesting enough
2003 * and store that error to last_error variables of @p
2004 */
2005 void
2006 bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
2007 {
2008 /* During PS_UP, we ignore errors on secondary connection */
2009 if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
2010 return;
2011
2012 /* During PS_STOP, we ignore any errors, as we want to report
2013 * the error that caused transition to PS_STOP
2014 */
2015 if (p->p.proto_state == PS_STOP)
2016 return;
2017
2018 p->last_error_class = class;
2019 p->last_error_code = code;
2020 }
2021
2022 static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
2023 static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
2024 static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
2025 static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
2026 static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"};
2027
2028 static const char *
2029 bgp_last_errmsg(struct bgp_proto *p)
2030 {
2031 switch (p->last_error_class)
2032 {
2033 case BE_MISC:
2034 return bgp_misc_errors[p->last_error_code];
2035 case BE_SOCKET:
2036 return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
2037 case BE_BGP_RX:
2038 case BE_BGP_TX:
2039 return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
2040 case BE_AUTO_DOWN:
2041 return bgp_auto_errors[p->last_error_code];
2042 default:
2043 return "";
2044 }
2045 }
2046
2047 static const char *
2048 bgp_state_dsc(struct bgp_proto *p)
2049 {
2050 if (p->p.proto_state == PS_DOWN)
2051 return "Down";
2052
2053 int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
2054 if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
2055 return "Passive";
2056
2057 return bgp_state_names[state];
2058 }
2059
2060 static void
2061 bgp_get_status(struct proto *P, byte *buf)
2062 {
2063 struct bgp_proto *p = (struct bgp_proto *) P;
2064
2065 const char *err1 = bgp_err_classes[p->last_error_class];
2066 const char *err2 = bgp_last_errmsg(p);
2067
2068 if (P->proto_state == PS_DOWN)
2069 bsprintf(buf, "%s%s", err1, err2);
2070 else
2071 bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
2072 }
2073
2074 static void
2075 bgp_show_afis(int code, char *s, u32 *afis, uint count)
2076 {
2077 buffer b;
2078 LOG_BUFFER_INIT(b);
2079
2080 buffer_puts(&b, s);
2081
2082 for (u32 *af = afis; af < (afis + count); af++)
2083 {
2084 const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
2085 if (desc)
2086 buffer_print(&b, " %s", desc->name);
2087 else
2088 buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
2089 }
2090
2091 if (b.pos == b.end)
2092 strcpy(b.end - 32, " ... <too long>");
2093
2094 cli_msg(code, b.start);
2095 }
2096
2097 static void
2098 bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
2099 {
2100 struct bgp_af_caps *ac;
2101 uint any_mp_bgp = 0;
2102 uint any_gr_able = 0;
2103 uint any_add_path = 0;
2104 uint any_ext_next_hop = 0;
2105 uint any_llgr_able = 0;
2106 u32 *afl1 = alloca(caps->af_count * sizeof(u32));
2107 u32 *afl2 = alloca(caps->af_count * sizeof(u32));
2108 uint afn1, afn2;
2109
2110 WALK_AF_CAPS(caps, ac)
2111 {
2112 any_mp_bgp |= ac->ready;
2113 any_gr_able |= ac->gr_able;
2114 any_add_path |= ac->add_path;
2115 any_ext_next_hop |= ac->ext_next_hop;
2116 any_llgr_able |= ac->llgr_able;
2117 }
2118
2119 if (any_mp_bgp)
2120 {
2121 cli_msg(-1006, " Multiprotocol");
2122
2123 afn1 = 0;
2124 WALK_AF_CAPS(caps, ac)
2125 if (ac->ready)
2126 afl1[afn1++] = ac->afi;
2127
2128 bgp_show_afis(-1006, " AF announced:", afl1, afn1);
2129 }
2130
2131 if (caps->route_refresh)
2132 cli_msg(-1006, " Route refresh");
2133
2134 if (any_ext_next_hop)
2135 {
2136 cli_msg(-1006, " Extended next hop");
2137
2138 afn1 = 0;
2139 WALK_AF_CAPS(caps, ac)
2140 if (ac->ext_next_hop)
2141 afl1[afn1++] = ac->afi;
2142
2143 bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1);
2144 }
2145
2146 if (caps->ext_messages)
2147 cli_msg(-1006, " Extended message");
2148
2149 if (caps->gr_aware)
2150 cli_msg(-1006, " Graceful restart");
2151
2152 if (any_gr_able)
2153 {
2154 /* Continues from gr_aware */
2155 cli_msg(-1006, " Restart time: %u", caps->gr_time);
2156 if (caps->gr_flags & BGP_GRF_RESTART)
2157 cli_msg(-1006, " Restart recovery");
2158
2159 afn1 = afn2 = 0;
2160 WALK_AF_CAPS(caps, ac)
2161 {
2162 if (ac->gr_able)
2163 afl1[afn1++] = ac->afi;
2164
2165 if (ac->gr_af_flags & BGP_GRF_FORWARDING)
2166 afl2[afn2++] = ac->afi;
2167 }
2168
2169 bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2170 bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2171 }
2172
2173 if (caps->as4_support)
2174 cli_msg(-1006, " 4-octet AS numbers");
2175
2176 if (any_add_path)
2177 {
2178 cli_msg(-1006, " ADD-PATH");
2179
2180 afn1 = afn2 = 0;
2181 WALK_AF_CAPS(caps, ac)
2182 {
2183 if (ac->add_path & BGP_ADD_PATH_RX)
2184 afl1[afn1++] = ac->afi;
2185
2186 if (ac->add_path & BGP_ADD_PATH_TX)
2187 afl2[afn2++] = ac->afi;
2188 }
2189
2190 bgp_show_afis(-1006, " RX:", afl1, afn1);
2191 bgp_show_afis(-1006, " TX:", afl2, afn2);
2192 }
2193
2194 if (caps->enhanced_refresh)
2195 cli_msg(-1006, " Enhanced refresh");
2196
2197 if (caps->llgr_aware)
2198 cli_msg(-1006, " Long-lived graceful restart");
2199
2200 if (any_llgr_able)
2201 {
2202 u32 stale_time = 0;
2203
2204 afn1 = afn2 = 0;
2205 WALK_AF_CAPS(caps, ac)
2206 {
2207 stale_time = MAX(stale_time, ac->llgr_time);
2208
2209 if (ac->llgr_able && ac->llgr_time)
2210 afl1[afn1++] = ac->afi;
2211
2212 if (ac->llgr_flags & BGP_GRF_FORWARDING)
2213 afl2[afn2++] = ac->afi;
2214 }
2215
2216 /* Continues from llgr_aware */
2217 cli_msg(-1006, " LL stale time: %u", stale_time);
2218
2219 bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2220 bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2221 }
2222 }
2223
2224 static void
2225 bgp_show_proto_info(struct proto *P)
2226 {
2227 struct bgp_proto *p = (struct bgp_proto *) P;
2228
2229 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
2230 cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
2231 cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
2232
2233 if (p->gr_active_num)
2234 cli_msg(-1006, " Neighbor graceful restart active");
2235
2236 if (P->proto_state == PS_START)
2237 {
2238 struct bgp_conn *oc = &p->outgoing_conn;
2239
2240 if ((p->start_state < BSS_CONNECT) &&
2241 (tm_active(p->startup_timer)))
2242 cli_msg(-1006, " Error wait: %t/%u",
2243 tm_remains(p->startup_timer), p->startup_delay);
2244
2245 if ((oc->state == BS_ACTIVE) &&
2246 (tm_active(oc->connect_timer)))
2247 cli_msg(-1006, " Connect delay: %t/%u",
2248 tm_remains(oc->connect_timer), p->cf->connect_delay_time);
2249
2250 if (p->gr_active_num && tm_active(p->gr_timer))
2251 cli_msg(-1006, " Restart timer: %t/-",
2252 tm_remains(p->gr_timer));
2253 }
2254 else if (P->proto_state == PS_UP)
2255 {
2256 cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
2257 cli_msg(-1006, " Local capabilities");
2258 bgp_show_capabilities(p, p->conn->local_caps);
2259 cli_msg(-1006, " Neighbor capabilities");
2260 bgp_show_capabilities(p, p->conn->remote_caps);
2261 cli_msg(-1006, " Session: %s%s%s%s%s",
2262 p->is_internal ? "internal" : "external",
2263 p->cf->multihop ? " multihop" : "",
2264 p->rr_client ? " route-reflector" : "",
2265 p->rs_client ? " route-server" : "",
2266 p->as4_session ? " AS4" : "");
2267 cli_msg(-1006, " Source address: %I", p->source_addr);
2268 cli_msg(-1006, " Hold timer: %t/%u",
2269 tm_remains(p->conn->hold_timer), p->conn->hold_time);
2270 cli_msg(-1006, " Keepalive timer: %t/%u",
2271 tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
2272 }
2273
2274 if ((p->last_error_class != BE_NONE) &&
2275 (p->last_error_class != BE_MAN_DOWN))
2276 {
2277 const char *err1 = bgp_err_classes[p->last_error_class];
2278 const char *err2 = bgp_last_errmsg(p);
2279 cli_msg(-1006, " Last error: %s%s", err1, err2);
2280 }
2281
2282 {
2283 struct bgp_channel *c;
2284 WALK_LIST(c, p->p.channels)
2285 {
2286 channel_show_info(&c->c);
2287
2288 if (p->gr_active_num)
2289 cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
2290
2291 if (c->stale_timer && tm_active(c->stale_timer))
2292 cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer));
2293
2294 if (c->c.channel_state == CS_UP)
2295 {
2296 if (ipa_zero(c->link_addr))
2297 cli_msg(-1006, " BGP Next hop: %I", c->next_hop_addr);
2298 else
2299 cli_msg(-1006, " BGP Next hop: %I %I", c->next_hop_addr, c->link_addr);
2300 }
2301
2302 if (c->igp_table_ip4)
2303 cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name);
2304
2305 if (c->igp_table_ip6)
2306 cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
2307 }
2308 }
2309 }
2310
2311 struct channel_class channel_bgp = {
2312 .channel_size = sizeof(struct bgp_channel),
2313 .config_size = sizeof(struct bgp_channel_config),
2314 .init = bgp_channel_init,
2315 .start = bgp_channel_start,
2316 .shutdown = bgp_channel_shutdown,
2317 .cleanup = bgp_channel_cleanup,
2318 .reconfigure = bgp_channel_reconfigure,
2319 };
2320
2321 struct protocol proto_bgp = {
2322 .name = "BGP",
2323 .template = "bgp%d",
2324 .class = PROTOCOL_BGP,
2325 .preference = DEF_PREF_BGP,
2326 .channel_mask = NB_IP | NB_VPN | NB_FLOW,
2327 .proto_size = sizeof(struct bgp_proto),
2328 .config_size = sizeof(struct bgp_config),
2329 .postconfig = bgp_postconfig,
2330 .init = bgp_init,
2331 .start = bgp_start,
2332 .shutdown = bgp_shutdown,
2333 .reconfigure = bgp_reconfigure,
2334 .copy_config = bgp_copy_config,
2335 .get_status = bgp_get_status,
2336 .get_attr = bgp_get_attr,
2337 .get_route_info = bgp_get_route_info,
2338 .show_proto_info = bgp_show_proto_info
2339 };