]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/bgp.c
Lib: Add and use ev_new_init()
[thirdparty/bird.git] / proto / bgp / bgp.c
1 /*
2 * BIRD -- The Border Gateway Protocol
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 /**
12 * DOC: Border Gateway Protocol
13 *
14 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
15 * the connection and most of the interface with BIRD core, |packets.c| handling
16 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
17 * manipulation with BGP attribute lists.
18 *
19 * As opposed to the other existing routing daemons, BIRD has a sophisticated
20 * core architecture which is able to keep all the information needed by BGP in
21 * the primary routing table, therefore no complex data structures like a
22 * central BGP table are needed. This increases memory footprint of a BGP router
23 * with many connections, but not too much and, which is more important, it
24 * makes BGP much easier to implement.
25 *
26 * Each instance of BGP (corresponding to a single BGP peer) is described by a
27 * &bgp_proto structure to which are attached individual connections represented
28 * by &bgp_connection (usually, there exists only one connection, but during BGP
29 * session setup, there can be more of them). The connections are handled
30 * according to the BGP state machine defined in the RFC with all the timers and
31 * all the parameters configurable.
32 *
33 * In incoming direction, we listen on the connection's socket and each time we
34 * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
35 * markers and passes complete packets to bgp_rx_packet() which distributes the
36 * packet according to its type.
37 *
38 * In outgoing direction, we gather all the routing updates and sort them to
39 * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
40 * fast comparison of &rta's and a &fib which helps us to find if we already
41 * have another route for the same destination queued for sending, so that we
42 * can replace it with the new one immediately instead of sending both
43 * updates). There also exists a special bucket holding all the route
44 * withdrawals which cannot be queued anywhere else as they don't have any
45 * attributes. If we have any packet to send (due to either new routes or the
46 * connection tracking code wanting to send a Open, Keepalive or Notification
47 * message), we call bgp_schedule_packet() which sets the corresponding bit in a
48 * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
49 * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
50 * packet type bits and calls the corresponding bgp_create_xx() functions,
51 * eventually rescheduling the same packet type if we have more data of the same
52 * type to send.
53 *
54 * The processing of attributes consists of two functions: bgp_decode_attrs()
55 * for checking of the attribute blocks and translating them to the language of
56 * BIRD's extended attributes and bgp_encode_attrs() which does the
57 * converse. Both functions are built around a @bgp_attr_table array describing
58 * all important characteristics of all known attributes. Unknown transitive
59 * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
60 *
61 * BGP protocol implements graceful restart in both restarting (local restart)
62 * and receiving (neighbor restart) roles. The first is handled mostly by the
63 * graceful restart code in the nest, BGP protocol just handles capabilities,
64 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
65 * The second is implemented by internal restart of the BGP state to %BS_IDLE
66 * and protocol state to %PS_START, but keeping the protocol up from the core
67 * point of view and therefore maintaining received routes. Routing table
68 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
69 * stale routes after reestablishment of BGP session during graceful restart.
70 *
71 * Supported standards:
72 * <itemize>
73 * <item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP)
74 * <item> <rfc id="1997"> - BGP Communities Attribute
75 * <item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature
76 * <item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6
77 * <item> <rfc id="2918"> - Route Refresh Capability
78 * <item> <rfc id="3107"> - Carrying Label Information in BGP
79 * <item> <rfc id="4360"> - BGP Extended Communities Attribute
80 * <item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks
81 * <item> <rfc id="4456"> - BGP Route Reflection
82 * <item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message
83 * <item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks
84 * <item> <rfc id="4724"> - Graceful Restart Mechanism for BGP
85 * <item> <rfc id="4760"> - Multiprotocol extensions for BGP
86 * <item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS
87 * <item> <rfc id="5065"> - AS confederations for BGP
88 * <item> <rfc id="5082"> - Generalized TTL Security Mechanism
89 * <item> <rfc id="5492"> - Capabilities Advertisement with BGP
90 * <item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop
91 * <item> <rfc id="5575"> - Dissemination of Flow Specification Rules
92 * <item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community
93 * <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
94 * <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
95 * <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
96 * <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
97 * <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
98 * <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
99 * <item> <rfc id="7947"> - Internet Exchange BGP Route Server
100 * <item> <rfc id="8092"> - BGP Large Communities Attribute
101 * <item> <rfc id="8203"> - BGP Administrative Shutdown Communication
102 * <item> <rfc id="8212"> - Default EBGP Route Propagation Behavior without Policies
103 * </itemize>
104 */
105
106 #undef LOCAL_DEBUG
107
108 #include <stdlib.h>
109
110 #include "nest/bird.h"
111 #include "nest/iface.h"
112 #include "nest/protocol.h"
113 #include "nest/route.h"
114 #include "nest/cli.h"
115 #include "nest/locks.h"
116 #include "conf/conf.h"
117 #include "filter/filter.h"
118 #include "lib/socket.h"
119 #include "lib/resource.h"
120 #include "lib/string.h"
121
122 #include "bgp.h"
123
124
125 struct linpool *bgp_linpool; /* Global temporary pool */
126 struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */
127 static list bgp_sockets; /* Global list of listening sockets */
128
129
130 static void bgp_connect(struct bgp_proto *p);
131 static void bgp_active(struct bgp_proto *p);
132 static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
133
134 static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
135 static void bgp_listen_sock_err(sock *sk UNUSED, int err);
136
137 /**
138 * bgp_open - open a BGP instance
139 * @p: BGP instance
140 *
141 * This function allocates and configures shared BGP resources, mainly listening
142 * sockets. Should be called as the last step during initialization (when lock
143 * is acquired and neighbor is ready). When error, caller should change state to
144 * PS_DOWN and return immediately.
145 */
146 static int
147 bgp_open(struct bgp_proto *p)
148 {
149 struct bgp_socket *bs = NULL;
150 struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
151 ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
152 (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
153 uint port = p->cf->local_port;
154
155 /* FIXME: Add some global init? */
156 if (!bgp_linpool)
157 init_list(&bgp_sockets);
158
159 /* We assume that cf->iface is defined iff cf->local_ip is link-local */
160
161 WALK_LIST(bs, bgp_sockets)
162 if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->iface == ifa) && (bs->sk->sport == port))
163 {
164 bs->uc++;
165 p->sock = bs;
166 return 0;
167 }
168
169 sock *sk = sk_new(proto_pool);
170 sk->type = SK_TCP_PASSIVE;
171 sk->ttl = 255;
172 sk->saddr = addr;
173 sk->sport = port;
174 sk->flags = 0;
175 sk->tos = IP_PREC_INTERNET_CONTROL;
176 sk->rbsize = BGP_RX_BUFFER_SIZE;
177 sk->tbsize = BGP_TX_BUFFER_SIZE;
178 sk->rx_hook = bgp_incoming_connection;
179 sk->err_hook = bgp_listen_sock_err;
180
181 if (sk_open(sk) < 0)
182 goto err;
183
184 bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
185 bs->sk = sk;
186 bs->uc = 1;
187 p->sock = bs;
188
189 add_tail(&bgp_sockets, &bs->n);
190
191 if (!bgp_linpool)
192 {
193 bgp_linpool = lp_new_default(proto_pool);
194 bgp_linpool2 = lp_new_default(proto_pool);
195 }
196
197 return 0;
198
199 err:
200 sk_log_error(sk, p->p.name);
201 log(L_ERR "%s: Cannot open listening socket", p->p.name);
202 rfree(sk);
203 return -1;
204 }
205
206 /**
207 * bgp_close - close a BGP instance
208 * @p: BGP instance
209 *
210 * This function frees and deconfigures shared BGP resources.
211 */
212 static void
213 bgp_close(struct bgp_proto *p)
214 {
215 struct bgp_socket *bs = p->sock;
216
217 ASSERT(bs && bs->uc);
218
219 if (--bs->uc)
220 return;
221
222 rfree(bs->sk);
223 rem_node(&bs->n);
224 mb_free(bs);
225
226 if (!EMPTY_LIST(bgp_sockets))
227 return;
228
229 rfree(bgp_linpool);
230 bgp_linpool = NULL;
231
232 rfree(bgp_linpool2);
233 bgp_linpool2 = NULL;
234 }
235
236 static inline int
237 bgp_setup_auth(struct bgp_proto *p, int enable)
238 {
239 if (p->cf->password)
240 {
241 int rv = sk_set_md5_auth(p->sock->sk,
242 p->cf->local_ip, p->cf->remote_ip, p->cf->iface,
243 enable ? p->cf->password : NULL, p->cf->setkey);
244
245 if (rv < 0)
246 sk_log_error(p->sock->sk, p->p.name);
247
248 return rv;
249 }
250 else
251 return 0;
252 }
253
254 static inline struct bgp_channel *
255 bgp_find_channel(struct bgp_proto *p, u32 afi)
256 {
257 struct bgp_channel *c;
258 WALK_LIST(c, p->p.channels)
259 if (c->afi == afi)
260 return c;
261
262 return NULL;
263 }
264
265 static void
266 bgp_startup(struct bgp_proto *p)
267 {
268 BGP_TRACE(D_EVENTS, "Started");
269 p->start_state = BSS_CONNECT;
270
271 if (!p->cf->passive)
272 bgp_active(p);
273 }
274
275 static void
276 bgp_startup_timeout(timer *t)
277 {
278 bgp_startup(t->data);
279 }
280
281
282 static void
283 bgp_initiate(struct bgp_proto *p)
284 {
285 int err_val;
286
287 if (bgp_open(p) < 0)
288 { err_val = BEM_NO_SOCKET; goto err1; }
289
290 if (bgp_setup_auth(p, 1) < 0)
291 { err_val = BEM_INVALID_MD5; goto err2; }
292
293 if (p->cf->bfd)
294 bgp_update_bfd(p, p->cf->bfd);
295
296 if (p->startup_delay)
297 {
298 p->start_state = BSS_DELAY;
299 BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
300 bgp_start_timer(p->startup_timer, p->startup_delay);
301 }
302 else
303 bgp_startup(p);
304
305 return;
306
307 err2:
308 bgp_close(p);
309 err1:
310 p->p.disabled = 1;
311 bgp_store_error(p, NULL, BE_MISC, err_val);
312 proto_notify_state(&p->p, PS_DOWN);
313
314 return;
315 }
316
317 /**
318 * bgp_start_timer - start a BGP timer
319 * @t: timer
320 * @value: time (in seconds) to fire (0 to disable the timer)
321 *
322 * This functions calls tm_start() on @t with time @value and the amount of
323 * randomization suggested by the BGP standard. Please use it for all BGP
324 * timers.
325 */
326 void
327 bgp_start_timer(timer *t, uint value)
328 {
329 if (value)
330 {
331 /* The randomization procedure is specified in RFC 4271 section 10 */
332 btime time = value S;
333 btime randomize = random() % ((time / 4) + 1);
334 tm_start(t, time - randomize);
335 }
336 else
337 tm_stop(t);
338 }
339
340 /**
341 * bgp_close_conn - close a BGP connection
342 * @conn: connection to close
343 *
344 * This function takes a connection described by the &bgp_conn structure, closes
345 * its socket and frees all resources associated with it.
346 */
347 void
348 bgp_close_conn(struct bgp_conn *conn)
349 {
350 // struct bgp_proto *p = conn->bgp;
351
352 DBG("BGP: Closing connection\n");
353 conn->packets_to_send = 0;
354 conn->channels_to_send = 0;
355 rfree(conn->connect_timer);
356 conn->connect_timer = NULL;
357 rfree(conn->keepalive_timer);
358 conn->keepalive_timer = NULL;
359 rfree(conn->hold_timer);
360 conn->hold_timer = NULL;
361 rfree(conn->tx_ev);
362 conn->tx_ev = NULL;
363 rfree(conn->sk);
364 conn->sk = NULL;
365
366 mb_free(conn->local_caps);
367 conn->local_caps = NULL;
368 mb_free(conn->remote_caps);
369 conn->remote_caps = NULL;
370 }
371
372
373 /**
374 * bgp_update_startup_delay - update a startup delay
375 * @p: BGP instance
376 *
377 * This function updates a startup delay that is used to postpone next BGP
378 * connect. It also handles disable_after_error and might stop BGP instance
379 * when error happened and disable_after_error is on.
380 *
381 * It should be called when BGP protocol error happened.
382 */
383 void
384 bgp_update_startup_delay(struct bgp_proto *p)
385 {
386 struct bgp_config *cf = p->cf;
387
388 DBG("BGP: Updating startup delay\n");
389
390 if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
391 p->startup_delay = 0;
392
393 p->last_proto_error = current_time();
394
395 if (cf->disable_after_error)
396 {
397 p->startup_delay = 0;
398 p->p.disabled = 1;
399 return;
400 }
401
402 if (!p->startup_delay)
403 p->startup_delay = cf->error_delay_time_min;
404 else
405 p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
406 }
407
408 static void
409 bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
410 {
411 switch (conn->state)
412 {
413 case BS_IDLE:
414 case BS_CLOSE:
415 return;
416
417 case BS_CONNECT:
418 case BS_ACTIVE:
419 bgp_conn_enter_idle_state(conn);
420 return;
421
422 case BS_OPENSENT:
423 case BS_OPENCONFIRM:
424 case BS_ESTABLISHED:
425 bgp_error(conn, 6, subcode, data, len);
426 return;
427
428 default:
429 bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
430 }
431 }
432
433 static void
434 bgp_down(struct bgp_proto *p)
435 {
436 if (p->start_state > BSS_PREPARE)
437 {
438 bgp_setup_auth(p, 0);
439 bgp_close(p);
440 }
441
442 BGP_TRACE(D_EVENTS, "Down");
443 proto_notify_state(&p->p, PS_DOWN);
444 }
445
446 static void
447 bgp_decision(void *vp)
448 {
449 struct bgp_proto *p = vp;
450
451 DBG("BGP: Decision start\n");
452 if ((p->p.proto_state == PS_START) &&
453 (p->outgoing_conn.state == BS_IDLE) &&
454 (p->incoming_conn.state != BS_OPENCONFIRM) &&
455 !p->cf->passive)
456 bgp_active(p);
457
458 if ((p->p.proto_state == PS_STOP) &&
459 (p->outgoing_conn.state == BS_IDLE) &&
460 (p->incoming_conn.state == BS_IDLE))
461 bgp_down(p);
462 }
463
464 void
465 bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
466 {
467 proto_notify_state(&p->p, PS_STOP);
468 bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
469 bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
470 ev_schedule(p->event);
471 }
472
473 static inline void
474 bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
475 {
476 if (conn->bgp->p.mrtdump & MD_STATES)
477 mrt_dump_bgp_state_change(conn, conn->state, new_state);
478
479 conn->state = new_state;
480 }
481
482 void
483 bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
484 {
485 /* Really, most of the work is done in bgp_rx_open(). */
486 bgp_conn_set_state(conn, BS_OPENCONFIRM);
487 }
488
489 static const struct bgp_af_caps dummy_af_caps = { };
490
491 void
492 bgp_conn_enter_established_state(struct bgp_conn *conn)
493 {
494 struct bgp_proto *p = conn->bgp;
495 struct bgp_caps *local = conn->local_caps;
496 struct bgp_caps *peer = conn->remote_caps;
497 struct bgp_channel *c;
498
499 BGP_TRACE(D_EVENTS, "BGP session established");
500
501 /* For multi-hop BGP sessions */
502 if (ipa_zero(p->source_addr))
503 p->source_addr = conn->sk->saddr;
504
505 conn->sk->fast_rx = 0;
506
507 p->conn = conn;
508 p->last_error_class = 0;
509 p->last_error_code = 0;
510
511 p->as4_session = conn->as4_session;
512
513 p->route_refresh = peer->route_refresh;
514 p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
515
516 /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
517 p->gr_ready = p->llgr_ready = 0; /* Updated later */
518
519 /* Whether peer is ready to handle our GR recovery */
520 int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
521
522 if (p->gr_active_num)
523 tm_stop(p->gr_timer);
524
525 /* Number of active channels */
526 int num = 0;
527
528 WALK_LIST(c, p->p.channels)
529 {
530 const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
531 const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
532
533 /* Ignore AFIs that were not announced in multiprotocol capability */
534 if (!loc || !loc->ready)
535 loc = &dummy_af_caps;
536
537 if (!rem || !rem->ready)
538 rem = &dummy_af_caps;
539
540 int active = loc->ready && rem->ready;
541 c->c.disabled = !active;
542 c->c.reloadable = p->route_refresh;
543
544 c->index = active ? num++ : 0;
545
546 c->feed_state = BFS_NONE;
547 c->load_state = BFS_NONE;
548
549 /* Channels where peer may do GR */
550 uint gr_ready = active && local->gr_aware && rem->gr_able;
551 uint llgr_ready = active && local->llgr_aware && rem->llgr_able;
552
553 c->gr_ready = gr_ready || llgr_ready;
554 p->gr_ready = p->gr_ready || c->gr_ready;
555 p->llgr_ready = p->llgr_ready || llgr_ready;
556
557 /* Remember last LLGR stale time */
558 c->stale_time = local->llgr_aware ? rem->llgr_time : 0;
559
560 /* Channels not able to recover gracefully */
561 if (p->p.gr_recovery && (!active || !peer_gr_ready))
562 channel_graceful_restart_unlock(&c->c);
563
564 /* Channels waiting for local convergence */
565 if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
566 c->c.gr_wait = 1;
567
568 /* Channels where regular graceful restart failed */
569 if ((c->gr_active == BGP_GRS_ACTIVE) &&
570 !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
571 bgp_graceful_restart_done(c);
572
573 /* Channels where regular long-lived restart failed */
574 if ((c->gr_active == BGP_GRS_LLGR) &&
575 !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING)))
576 bgp_graceful_restart_done(c);
577
578 /* GR capability implies that neighbor will send End-of-RIB */
579 if (peer->gr_aware)
580 c->load_state = BFS_LOADING;
581
582 c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
583 c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
584 c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
585
586 /* Update RA mode */
587 if (c->add_path_tx)
588 c->c.ra_mode = RA_ANY;
589 else if (c->cf->secondary)
590 c->c.ra_mode = RA_ACCEPTED;
591 else
592 c->c.ra_mode = RA_OPTIMAL;
593 }
594
595 p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
596 p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
597 p->channel_count = num;
598
599 WALK_LIST(c, p->p.channels)
600 {
601 if (c->c.disabled)
602 continue;
603
604 p->afi_map[c->index] = c->afi;
605 p->channel_map[c->index] = c;
606 }
607
608 /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
609
610 bgp_conn_set_state(conn, BS_ESTABLISHED);
611 proto_notify_state(&p->p, PS_UP);
612 }
613
614 static void
615 bgp_conn_leave_established_state(struct bgp_proto *p)
616 {
617 BGP_TRACE(D_EVENTS, "BGP session closed");
618 p->conn = NULL;
619
620 if (p->p.proto_state == PS_UP)
621 bgp_stop(p, 0, NULL, 0);
622 }
623
624 void
625 bgp_conn_enter_close_state(struct bgp_conn *conn)
626 {
627 struct bgp_proto *p = conn->bgp;
628 int os = conn->state;
629
630 bgp_conn_set_state(conn, BS_CLOSE);
631 tm_stop(conn->keepalive_timer);
632 conn->sk->rx_hook = NULL;
633
634 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
635 bgp_start_timer(conn->hold_timer, 10);
636
637 if (os == BS_ESTABLISHED)
638 bgp_conn_leave_established_state(p);
639 }
640
641 void
642 bgp_conn_enter_idle_state(struct bgp_conn *conn)
643 {
644 struct bgp_proto *p = conn->bgp;
645 int os = conn->state;
646
647 bgp_close_conn(conn);
648 bgp_conn_set_state(conn, BS_IDLE);
649 ev_schedule(p->event);
650
651 if (os == BS_ESTABLISHED)
652 bgp_conn_leave_established_state(p);
653 }
654
655 /**
656 * bgp_handle_graceful_restart - handle detected BGP graceful restart
657 * @p: BGP instance
658 *
659 * This function is called when a BGP graceful restart of the neighbor is
660 * detected (when the TCP connection fails or when a new TCP connection
661 * appears). The function activates processing of the restart - starts routing
662 * table refresh cycle and activates BGP restart timer. The protocol state goes
663 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
664 * caller.
665 */
666 void
667 bgp_handle_graceful_restart(struct bgp_proto *p)
668 {
669 ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
670
671 BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
672 p->gr_active_num ? " - already pending" : "");
673
674 p->gr_active_num = 0;
675
676 struct bgp_channel *c;
677 WALK_LIST(c, p->p.channels)
678 {
679 /* FIXME: perhaps check for channel state instead of disabled flag? */
680 if (c->c.disabled)
681 continue;
682
683 if (c->gr_ready)
684 {
685 p->gr_active_num++;
686
687 switch (c->gr_active)
688 {
689 case BGP_GRS_NONE:
690 c->gr_active = BGP_GRS_ACTIVE;
691 rt_refresh_begin(c->c.table, &c->c);
692 break;
693
694 case BGP_GRS_ACTIVE:
695 rt_refresh_end(c->c.table, &c->c);
696 rt_refresh_begin(c->c.table, &c->c);
697 break;
698
699 case BGP_GRS_LLGR:
700 rt_refresh_begin(c->c.table, &c->c);
701 rt_modify_stale(c->c.table, &c->c);
702 break;
703 }
704 }
705 else
706 {
707 /* Just flush the routes */
708 rt_refresh_begin(c->c.table, &c->c);
709 rt_refresh_end(c->c.table, &c->c);
710 }
711
712 /* Reset bucket and prefix tables */
713 bgp_free_bucket_table(c);
714 bgp_free_prefix_table(c);
715 bgp_init_bucket_table(c);
716 bgp_init_prefix_table(c);
717 c->packets_to_send = 0;
718 }
719
720 /* p->gr_ready -> at least one active channel is c->gr_ready */
721 ASSERT(p->gr_active_num > 0);
722
723 proto_notify_state(&p->p, PS_START);
724 tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
725 }
726
727 /**
728 * bgp_graceful_restart_done - finish active BGP graceful restart
729 * @c: BGP channel
730 *
731 * This function is called when the active BGP graceful restart of the neighbor
732 * should be finished for channel @c - either successfully (the neighbor sends
733 * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
734 * unsuccessfully (the neighbor does not support BGP graceful restart on the new
735 * session). The function ends the routing table refresh cycle.
736 */
737 void
738 bgp_graceful_restart_done(struct bgp_channel *c)
739 {
740 struct bgp_proto *p = (void *) c->c.proto;
741
742 ASSERT(c->gr_active);
743 c->gr_active = 0;
744 p->gr_active_num--;
745
746 if (!p->gr_active_num)
747 BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
748
749 tm_stop(c->stale_timer);
750 rt_refresh_end(c->c.table, &c->c);
751 }
752
753 /**
754 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
755 * @t: timer
756 *
757 * This function is a timeout hook for @gr_timer, implementing BGP restart time
758 * limit for reestablisment of the BGP session after the graceful restart. When
759 * fired, we just proceed with the usual protocol restart.
760 */
761
762 static void
763 bgp_graceful_restart_timeout(timer *t)
764 {
765 struct bgp_proto *p = t->data;
766
767 BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
768
769 if (p->llgr_ready)
770 {
771 struct bgp_channel *c;
772 WALK_LIST(c, p->p.channels)
773 {
774 /* Channel is not in GR and is already flushed */
775 if (!c->gr_active)
776 continue;
777
778 /* Channel is already in LLGR from past restart */
779 if (c->gr_active == BGP_GRS_LLGR)
780 continue;
781
782 /* Channel is in GR, but does not support LLGR -> stop GR */
783 if (!c->stale_time)
784 {
785 bgp_graceful_restart_done(c);
786 continue;
787 }
788
789 /* Channel is in GR, and supports LLGR -> start LLGR */
790 c->gr_active = BGP_GRS_LLGR;
791 tm_start(c->stale_timer, c->stale_time S);
792 rt_modify_stale(c->c.table, &c->c);
793 }
794 }
795 else
796 bgp_stop(p, 0, NULL, 0);
797 }
798
799 static void
800 bgp_long_lived_stale_timeout(timer *t)
801 {
802 struct bgp_channel *c = t->data;
803 struct bgp_proto *p = (void *) c->c.proto;
804
805 BGP_TRACE(D_EVENTS, "Long-lived stale timeout");
806
807 bgp_graceful_restart_done(c);
808 }
809
810
811 /**
812 * bgp_refresh_begin - start incoming enhanced route refresh sequence
813 * @c: BGP channel
814 *
815 * This function is called when an incoming enhanced route refresh sequence is
816 * started by the neighbor, demarcated by the BoRR packet. The function updates
817 * the load state and starts the routing table refresh cycle. Note that graceful
818 * restart also uses routing table refresh cycle, but RFC 7313 and load states
819 * ensure that these two sequences do not overlap.
820 */
821 void
822 bgp_refresh_begin(struct bgp_channel *c)
823 {
824 struct bgp_proto *p = (void *) c->c.proto;
825
826 if (c->load_state == BFS_LOADING)
827 { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
828
829 c->load_state = BFS_REFRESHING;
830 rt_refresh_begin(c->c.table, &c->c);
831 }
832
833 /**
834 * bgp_refresh_end - finish incoming enhanced route refresh sequence
835 * @c: BGP channel
836 *
837 * This function is called when an incoming enhanced route refresh sequence is
838 * finished by the neighbor, demarcated by the EoRR packet. The function updates
839 * the load state and ends the routing table refresh cycle. Routes not received
840 * during the sequence are removed by the nest.
841 */
842 void
843 bgp_refresh_end(struct bgp_channel *c)
844 {
845 struct bgp_proto *p = (void *) c->c.proto;
846
847 if (c->load_state != BFS_REFRESHING)
848 { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
849
850 c->load_state = BFS_NONE;
851 rt_refresh_end(c->c.table, &c->c);
852 }
853
854
855 static void
856 bgp_send_open(struct bgp_conn *conn)
857 {
858 DBG("BGP: Sending open\n");
859 conn->sk->rx_hook = bgp_rx;
860 conn->sk->tx_hook = bgp_tx;
861 tm_stop(conn->connect_timer);
862 bgp_schedule_packet(conn, NULL, PKT_OPEN);
863 bgp_conn_set_state(conn, BS_OPENSENT);
864 bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
865 }
866
867 static void
868 bgp_connected(sock *sk)
869 {
870 struct bgp_conn *conn = sk->data;
871 struct bgp_proto *p = conn->bgp;
872
873 BGP_TRACE(D_EVENTS, "Connected");
874 bgp_send_open(conn);
875 }
876
877 static void
878 bgp_connect_timeout(timer *t)
879 {
880 struct bgp_conn *conn = t->data;
881 struct bgp_proto *p = conn->bgp;
882
883 DBG("BGP: connect_timeout\n");
884 if (p->p.proto_state == PS_START)
885 {
886 bgp_close_conn(conn);
887 bgp_connect(p);
888 }
889 else
890 bgp_conn_enter_idle_state(conn);
891 }
892
893 static void
894 bgp_sock_err(sock *sk, int err)
895 {
896 struct bgp_conn *conn = sk->data;
897 struct bgp_proto *p = conn->bgp;
898
899 /*
900 * This error hook may be called either asynchronously from main
901 * loop, or synchronously from sk_send(). But sk_send() is called
902 * only from bgp_tx() and bgp_kick_tx(), which are both called
903 * asynchronously from main loop. Moreover, they end if err hook is
904 * called. Therefore, we could suppose that it is always called
905 * asynchronously.
906 */
907
908 bgp_store_error(p, conn, BE_SOCKET, err);
909
910 if (err)
911 BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
912 else
913 BGP_TRACE(D_EVENTS, "Connection closed");
914
915 if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
916 bgp_handle_graceful_restart(p);
917
918 bgp_conn_enter_idle_state(conn);
919 }
920
921 static void
922 bgp_hold_timeout(timer *t)
923 {
924 struct bgp_conn *conn = t->data;
925 struct bgp_proto *p = conn->bgp;
926
927 DBG("BGP: Hold timeout\n");
928
929 /* We are already closing the connection - just do hangup */
930 if (conn->state == BS_CLOSE)
931 {
932 BGP_TRACE(D_EVENTS, "Connection stalled");
933 bgp_conn_enter_idle_state(conn);
934 return;
935 }
936
937 /* If there is something in input queue, we are probably congested
938 and perhaps just not processed BGP packets in time. */
939
940 if (sk_rx_ready(conn->sk) > 0)
941 bgp_start_timer(conn->hold_timer, 10);
942 else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
943 {
944 BGP_TRACE(D_EVENTS, "Hold timer expired");
945 bgp_handle_graceful_restart(p);
946 bgp_conn_enter_idle_state(conn);
947 }
948 else
949 bgp_error(conn, 4, 0, NULL, 0);
950 }
951
952 static void
953 bgp_keepalive_timeout(timer *t)
954 {
955 struct bgp_conn *conn = t->data;
956
957 DBG("BGP: Keepalive timer\n");
958 bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
959
960 /* Kick TX a bit faster */
961 if (ev_active(conn->tx_ev))
962 ev_run(conn->tx_ev);
963 }
964
965 static void
966 bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
967 {
968 conn->sk = NULL;
969 conn->bgp = p;
970
971 conn->packets_to_send = 0;
972 conn->channels_to_send = 0;
973 conn->last_channel = 0;
974 conn->last_channel_count = 0;
975
976 conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0);
977 conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0);
978 conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
979
980 conn->tx_ev = ev_new_init(p->p.pool, bgp_kick_tx, conn);
981 }
982
983 static void
984 bgp_setup_sk(struct bgp_conn *conn, sock *s)
985 {
986 s->data = conn;
987 s->err_hook = bgp_sock_err;
988 s->fast_rx = 1;
989 conn->sk = s;
990 }
991
992 static void
993 bgp_active(struct bgp_proto *p)
994 {
995 int delay = MAX(1, p->cf->connect_delay_time);
996 struct bgp_conn *conn = &p->outgoing_conn;
997
998 BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
999 bgp_setup_conn(p, conn);
1000 bgp_conn_set_state(conn, BS_ACTIVE);
1001 bgp_start_timer(conn->connect_timer, delay);
1002 }
1003
1004 /**
1005 * bgp_connect - initiate an outgoing connection
1006 * @p: BGP instance
1007 *
1008 * The bgp_connect() function creates a new &bgp_conn and initiates
1009 * a TCP connection to the peer. The rest of connection setup is governed
1010 * by the BGP state machine as described in the standard.
1011 */
1012 static void
1013 bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
1014 {
1015 struct bgp_conn *conn = &p->outgoing_conn;
1016 int hops = p->cf->multihop ? : 1;
1017
1018 DBG("BGP: Connecting\n");
1019 sock *s = sk_new(p->p.pool);
1020 s->type = SK_TCP_ACTIVE;
1021 s->saddr = p->source_addr;
1022 s->daddr = p->cf->remote_ip;
1023 s->dport = p->cf->remote_port;
1024 s->iface = p->neigh ? p->neigh->iface : NULL;
1025 s->vrf = p->p.vrf;
1026 s->ttl = p->cf->ttl_security ? 255 : hops;
1027 s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
1028 s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
1029 s->tos = IP_PREC_INTERNET_CONTROL;
1030 s->password = p->cf->password;
1031 s->tx_hook = bgp_connected;
1032 BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
1033 s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
1034 bgp_setup_conn(p, conn);
1035 bgp_setup_sk(conn, s);
1036 bgp_conn_set_state(conn, BS_CONNECT);
1037
1038 if (sk_open(s) < 0)
1039 goto err;
1040
1041 /* Set minimal receive TTL if needed */
1042 if (p->cf->ttl_security)
1043 if (sk_set_min_ttl(s, 256 - hops) < 0)
1044 goto err;
1045
1046 DBG("BGP: Waiting for connect success\n");
1047 bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
1048 return;
1049
1050 err:
1051 sk_log_error(s, p->p.name);
1052 bgp_sock_err(s, 0);
1053 return;
1054 }
1055
1056 /**
1057 * bgp_find_proto - find existing proto for incoming connection
1058 * @sk: TCP socket
1059 *
1060 */
1061 static struct bgp_proto *
1062 bgp_find_proto(sock *sk)
1063 {
1064 struct bgp_proto *p;
1065
1066 WALK_LIST(p, proto_list)
1067 if ((p->p.proto == &proto_bgp) &&
1068 ipa_equal(p->cf->remote_ip, sk->daddr) &&
1069 (!p->cf->iface || (p->cf->iface == sk->iface)) &&
1070 (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)) &&
1071 (p->cf->local_port == sk->sport))
1072 return p;
1073
1074 return NULL;
1075 }
1076
1077 /**
1078 * bgp_incoming_connection - handle an incoming connection
1079 * @sk: TCP socket
1080 * @dummy: unused
1081 *
1082 * This function serves as a socket hook for accepting of new BGP
1083 * connections. It searches a BGP instance corresponding to the peer
1084 * which has connected and if such an instance exists, it creates a
1085 * &bgp_conn structure, attaches it to the instance and either sends
1086 * an Open message or (if there already is an active connection) it
1087 * closes the new connection by sending a Notification message.
1088 */
1089 static int
1090 bgp_incoming_connection(sock *sk, uint dummy UNUSED)
1091 {
1092 struct bgp_proto *p;
1093 int acc, hops;
1094
1095 DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
1096 p = bgp_find_proto(sk);
1097 if (!p)
1098 {
1099 log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
1100 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
1101 rfree(sk);
1102 return 0;
1103 }
1104
1105 /*
1106 * BIRD should keep multiple incoming connections in OpenSent state (for
1107 * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
1108 * connections are rejected istead. The exception is the case where an
1109 * incoming connection triggers a graceful restart.
1110 */
1111
1112 acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
1113 (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
1114
1115 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1116 {
1117 bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
1118 bgp_handle_graceful_restart(p);
1119 bgp_conn_enter_idle_state(p->conn);
1120 acc = 1;
1121
1122 /* There might be separate incoming connection in OpenSent state */
1123 if (p->incoming_conn.state > BS_ACTIVE)
1124 bgp_close_conn(&p->incoming_conn);
1125 }
1126
1127 BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
1128 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
1129 sk->dport, acc ? "accepted" : "rejected");
1130
1131 if (!acc)
1132 {
1133 rfree(sk);
1134 return 0;
1135 }
1136
1137 hops = p->cf->multihop ? : 1;
1138
1139 if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
1140 goto err;
1141
1142 if (p->cf->ttl_security)
1143 if (sk_set_min_ttl(sk, 256 - hops) < 0)
1144 goto err;
1145
1146 if (p->cf->enable_extended_messages)
1147 {
1148 sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
1149 sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
1150 sk_reallocate(sk);
1151 }
1152
1153 bgp_setup_conn(p, &p->incoming_conn);
1154 bgp_setup_sk(&p->incoming_conn, sk);
1155 bgp_send_open(&p->incoming_conn);
1156 return 0;
1157
1158 err:
1159 sk_log_error(sk, p->p.name);
1160 log(L_ERR "%s: Incoming connection aborted", p->p.name);
1161 rfree(sk);
1162 return 0;
1163 }
1164
1165 static void
1166 bgp_listen_sock_err(sock *sk UNUSED, int err)
1167 {
1168 if (err == ECONNABORTED)
1169 log(L_WARN "BGP: Incoming connection aborted");
1170 else
1171 log(L_ERR "BGP: Error on listening socket: %M", err);
1172 }
1173
1174 static void
1175 bgp_start_neighbor(struct bgp_proto *p)
1176 {
1177 /* Called only for single-hop BGP sessions */
1178
1179 if (ipa_zero(p->source_addr))
1180 p->source_addr = p->neigh->ifa->ip;
1181
1182 if (ipa_is_link_local(p->source_addr))
1183 p->link_addr = p->source_addr;
1184 else if (p->neigh->iface->llv6)
1185 p->link_addr = p->neigh->iface->llv6->ip;
1186
1187 bgp_initiate(p);
1188 }
1189
1190 static void
1191 bgp_neigh_notify(neighbor *n)
1192 {
1193 struct bgp_proto *p = (struct bgp_proto *) n->proto;
1194 int ps = p->p.proto_state;
1195
1196 if (n != p->neigh)
1197 return;
1198
1199 if ((ps == PS_DOWN) || (ps == PS_STOP))
1200 return;
1201
1202 int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1203
1204 if (n->scope <= 0)
1205 {
1206 if (!prepare)
1207 {
1208 BGP_TRACE(D_EVENTS, "Neighbor lost");
1209 bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1210 /* Perhaps also run bgp_update_startup_delay(p)? */
1211 bgp_stop(p, 0, NULL, 0);
1212 }
1213 }
1214 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1215 {
1216 if (!prepare)
1217 {
1218 BGP_TRACE(D_EVENTS, "Link down");
1219 bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1220 if (ps == PS_UP)
1221 bgp_update_startup_delay(p);
1222 bgp_stop(p, 0, NULL, 0);
1223 }
1224 }
1225 else
1226 {
1227 if (prepare)
1228 {
1229 BGP_TRACE(D_EVENTS, "Neighbor ready");
1230 bgp_start_neighbor(p);
1231 }
1232 }
1233 }
1234
1235 static void
1236 bgp_bfd_notify(struct bfd_request *req)
1237 {
1238 struct bgp_proto *p = req->data;
1239 int ps = p->p.proto_state;
1240
1241 if (req->down && ((ps == PS_START) || (ps == PS_UP)))
1242 {
1243 BGP_TRACE(D_EVENTS, "BFD session down");
1244 bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
1245
1246 if (p->cf->bfd == BGP_BFD_GRACEFUL)
1247 {
1248 /* Trigger graceful restart */
1249 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1250 bgp_handle_graceful_restart(p);
1251
1252 if (p->incoming_conn.state > BS_IDLE)
1253 bgp_conn_enter_idle_state(&p->incoming_conn);
1254
1255 if (p->outgoing_conn.state > BS_IDLE)
1256 bgp_conn_enter_idle_state(&p->outgoing_conn);
1257 }
1258 else
1259 {
1260 /* Trigger session down */
1261 if (ps == PS_UP)
1262 bgp_update_startup_delay(p);
1263 bgp_stop(p, 0, NULL, 0);
1264 }
1265 }
1266 }
1267
1268 static void
1269 bgp_update_bfd(struct bgp_proto *p, int use_bfd)
1270 {
1271 if (use_bfd && !p->bfd_req)
1272 p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
1273 p->cf->multihop ? NULL : p->neigh->iface,
1274 bgp_bfd_notify, p);
1275
1276 if (!use_bfd && p->bfd_req)
1277 {
1278 rfree(p->bfd_req);
1279 p->bfd_req = NULL;
1280 }
1281 }
1282
1283 static void
1284 bgp_reload_routes(struct channel *C)
1285 {
1286 struct bgp_proto *p = (void *) C->proto;
1287 struct bgp_channel *c = (void *) C;
1288
1289 ASSERT(p->conn && p->route_refresh);
1290
1291 bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
1292 }
1293
1294 static void
1295 bgp_feed_begin(struct channel *C, int initial)
1296 {
1297 struct bgp_proto *p = (void *) C->proto;
1298 struct bgp_channel *c = (void *) C;
1299
1300 /* This should not happen */
1301 if (!p->conn)
1302 return;
1303
1304 if (initial && p->cf->gr_mode)
1305 c->feed_state = BFS_LOADING;
1306
1307 /* It is refeed and both sides support enhanced route refresh */
1308 if (!initial && p->enhanced_refresh)
1309 {
1310 /* BoRR must not be sent before End-of-RIB */
1311 if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
1312 return;
1313
1314 c->feed_state = BFS_REFRESHING;
1315 bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
1316 }
1317 }
1318
1319 static void
1320 bgp_feed_end(struct channel *C)
1321 {
1322 struct bgp_proto *p = (void *) C->proto;
1323 struct bgp_channel *c = (void *) C;
1324
1325 /* This should not happen */
1326 if (!p->conn)
1327 return;
1328
1329 /* Non-demarcated feed ended, nothing to do */
1330 if (c->feed_state == BFS_NONE)
1331 return;
1332
1333 /* Schedule End-of-RIB packet */
1334 if (c->feed_state == BFS_LOADING)
1335 c->feed_state = BFS_LOADED;
1336
1337 /* Schedule EoRR packet */
1338 if (c->feed_state == BFS_REFRESHING)
1339 c->feed_state = BFS_REFRESHED;
1340
1341 /* Kick TX hook */
1342 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1343 }
1344
1345
1346 static void
1347 bgp_start_locked(struct object_lock *lock)
1348 {
1349 struct bgp_proto *p = lock->data;
1350 struct bgp_config *cf = p->cf;
1351
1352 if (p->p.proto_state != PS_START)
1353 {
1354 DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1355 return;
1356 }
1357
1358 DBG("BGP: Got lock\n");
1359
1360 if (cf->multihop)
1361 {
1362 /* Multi-hop sessions do not use neighbor entries */
1363 bgp_initiate(p);
1364 return;
1365 }
1366
1367 neighbor *n = neigh_find(&p->p, cf->remote_ip, cf->iface, NEF_STICKY);
1368 if (!n)
1369 {
1370 log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
1371 /* As we do not start yet, we can just disable protocol */
1372 p->p.disabled = 1;
1373 bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1374 proto_notify_state(&p->p, PS_DOWN);
1375 return;
1376 }
1377
1378 p->neigh = n;
1379
1380 if (n->scope <= 0)
1381 BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1382 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1383 BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1384 else
1385 bgp_start_neighbor(p);
1386 }
1387
1388 static int
1389 bgp_start(struct proto *P)
1390 {
1391 struct bgp_proto *p = (struct bgp_proto *) P;
1392 struct object_lock *lock;
1393
1394 DBG("BGP: Startup.\n");
1395 p->start_state = BSS_PREPARE;
1396 p->outgoing_conn.state = BS_IDLE;
1397 p->incoming_conn.state = BS_IDLE;
1398 p->neigh = NULL;
1399 p->bfd_req = NULL;
1400 p->gr_ready = 0;
1401 p->gr_active_num = 0;
1402
1403 p->event = ev_new_init(p->p.pool, bgp_decision, p);
1404 p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
1405 p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
1406
1407 p->local_id = proto_get_router_id(P->cf);
1408 if (p->rr_client)
1409 p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1410
1411 p->remote_id = 0;
1412 p->source_addr = p->cf->local_ip;
1413 p->link_addr = IPA_NONE;
1414
1415 /* Lock all channels when in GR recovery mode */
1416 if (p->p.gr_recovery && p->cf->gr_mode)
1417 {
1418 struct bgp_channel *c;
1419 WALK_LIST(c, p->p.channels)
1420 channel_graceful_restart_lock(&c->c);
1421 }
1422
1423 /*
1424 * Before attempting to create the connection, we need to lock the port,
1425 * so that we are the only instance attempting to talk with that neighbor.
1426 */
1427
1428 lock = p->lock = olock_new(P->pool);
1429 lock->addr = p->cf->remote_ip;
1430 lock->port = p->cf->remote_port;
1431 lock->iface = p->cf->iface;
1432 lock->vrf = p->cf->iface ? NULL : p->p.vrf;
1433 lock->type = OBJLOCK_TCP;
1434 lock->hook = bgp_start_locked;
1435 lock->data = p;
1436 olock_acquire(lock);
1437
1438 return PS_START;
1439 }
1440
1441 extern int proto_restart;
1442
1443 static int
1444 bgp_shutdown(struct proto *P)
1445 {
1446 struct bgp_proto *p = (struct bgp_proto *) P;
1447 uint subcode = 0;
1448
1449 char *message = NULL;
1450 byte *data = NULL;
1451 uint len = 0;
1452
1453 BGP_TRACE(D_EVENTS, "Shutdown requested");
1454
1455 switch (P->down_code)
1456 {
1457 case PDC_CF_REMOVE:
1458 case PDC_CF_DISABLE:
1459 subcode = 3; // Errcode 6, 3 - peer de-configured
1460 break;
1461
1462 case PDC_CF_RESTART:
1463 subcode = 6; // Errcode 6, 6 - other configuration change
1464 break;
1465
1466 case PDC_CMD_DISABLE:
1467 case PDC_CMD_SHUTDOWN:
1468 subcode = 2; // Errcode 6, 2 - administrative shutdown
1469 message = P->message;
1470 break;
1471
1472 case PDC_CMD_RESTART:
1473 subcode = 4; // Errcode 6, 4 - administrative reset
1474 message = P->message;
1475 break;
1476
1477 case PDC_RX_LIMIT_HIT:
1478 case PDC_IN_LIMIT_HIT:
1479 subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1480 /* log message for compatibility */
1481 log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1482 goto limit;
1483
1484 case PDC_OUT_LIMIT_HIT:
1485 subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1486
1487 limit:
1488 bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1489 if (proto_restart)
1490 bgp_update_startup_delay(p);
1491 else
1492 p->startup_delay = 0;
1493 goto done;
1494 }
1495
1496 bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1497 p->startup_delay = 0;
1498
1499 /* RFC 8203 - shutdown communication */
1500 if (message)
1501 {
1502 uint msg_len = strlen(message);
1503 msg_len = MIN(msg_len, 128);
1504
1505 /* Buffer will be freed automatically by protocol shutdown */
1506 data = mb_alloc(p->p.pool, msg_len + 1);
1507 len = msg_len + 1;
1508
1509 data[0] = msg_len;
1510 memcpy(data+1, message, msg_len);
1511 }
1512
1513 done:
1514 bgp_stop(p, subcode, data, len);
1515 return p->p.proto_state;
1516 }
1517
1518 static struct proto *
1519 bgp_init(struct proto_config *CF)
1520 {
1521 struct proto *P = proto_new(CF);
1522 struct bgp_proto *p = (struct bgp_proto *) P;
1523 struct bgp_config *cf = (struct bgp_config *) CF;
1524
1525 P->rt_notify = bgp_rt_notify;
1526 P->import_control = bgp_import_control;
1527 P->neigh_notify = bgp_neigh_notify;
1528 P->reload_routes = bgp_reload_routes;
1529 P->feed_begin = bgp_feed_begin;
1530 P->feed_end = bgp_feed_end;
1531 P->rte_better = bgp_rte_better;
1532 P->rte_mergable = bgp_rte_mergable;
1533 P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
1534 P->rte_modify = bgp_rte_modify_stale;
1535
1536 p->cf = cf;
1537 p->local_as = cf->local_as;
1538 p->remote_as = cf->remote_as;
1539 p->public_as = cf->local_as;
1540 p->is_internal = (cf->local_as == cf->remote_as);
1541 p->is_interior = p->is_internal || cf->confederation_member;
1542 p->rs_client = cf->rs_client;
1543 p->rr_client = cf->rr_client;
1544
1545 /* Confederation ID is used for truly external peers */
1546 if (cf->confederation && !p->is_interior)
1547 p->public_as = cf->confederation;
1548
1549 /* Add all channels */
1550 struct bgp_channel_config *cc;
1551 WALK_LIST(cc, CF->channels)
1552 proto_add_channel(P, &cc->c);
1553
1554 return P;
1555 }
1556
1557 static void
1558 bgp_channel_init(struct channel *C, struct channel_config *CF)
1559 {
1560 struct bgp_channel *c = (void *) C;
1561 struct bgp_channel_config *cf = (void *) CF;
1562
1563 c->cf = cf;
1564 c->afi = cf->afi;
1565 c->desc = cf->desc;
1566
1567 if (cf->igp_table_ip4)
1568 c->igp_table_ip4 = cf->igp_table_ip4->table;
1569
1570 if (cf->igp_table_ip6)
1571 c->igp_table_ip6 = cf->igp_table_ip6->table;
1572 }
1573
1574 static int
1575 bgp_channel_start(struct channel *C)
1576 {
1577 struct bgp_proto *p = (void *) C->proto;
1578 struct bgp_channel *c = (void *) C;
1579 ip_addr src = p->source_addr;
1580
1581 if (c->igp_table_ip4)
1582 rt_lock_table(c->igp_table_ip4);
1583
1584 if (c->igp_table_ip6)
1585 rt_lock_table(c->igp_table_ip6);
1586
1587 c->pool = p->p.pool; // XXXX
1588 bgp_init_bucket_table(c);
1589 bgp_init_prefix_table(c);
1590
1591 c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
1592
1593 c->next_hop_addr = c->cf->next_hop_addr;
1594 c->link_addr = IPA_NONE;
1595 c->packets_to_send = 0;
1596
1597 /* Try to use source address as next hop address */
1598 if (ipa_zero(c->next_hop_addr))
1599 {
1600 if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
1601 c->next_hop_addr = src;
1602
1603 if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
1604 c->next_hop_addr = src;
1605 }
1606
1607 /* Use preferred addresses associated with interface / source address */
1608 if (ipa_zero(c->next_hop_addr))
1609 {
1610 /* We know the iface for single-hop, we make lookup for multihop */
1611 struct neighbor *nbr = p->neigh ?: neigh_find(&p->p, src, NULL, 0);
1612 struct iface *iface = nbr ? nbr->iface : NULL;
1613
1614 if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
1615 c->next_hop_addr = iface->addr4->ip;
1616
1617 if (bgp_channel_is_ipv6(c) && iface && iface->addr6)
1618 c->next_hop_addr = iface->addr6->ip;
1619 }
1620
1621 /* Exit if no feasible next hop address is found */
1622 if (ipa_zero(c->next_hop_addr))
1623 {
1624 log(L_WARN "%s: Missing next hop address", p->p.name);
1625 return 0;
1626 }
1627
1628 /* Set link-local address for IPv6 single-hop BGP */
1629 if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
1630 {
1631 c->link_addr = p->link_addr;
1632
1633 if (ipa_zero(c->link_addr))
1634 log(L_WARN "%s: Missing link-local address", p->p.name);
1635 }
1636
1637 /* Link local address is already in c->link_addr */
1638 if (ipa_is_link_local(c->next_hop_addr))
1639 c->next_hop_addr = IPA_NONE;
1640
1641 return 0; /* XXXX: Currently undefined */
1642 }
1643
1644 static void
1645 bgp_channel_shutdown(struct channel *C)
1646 {
1647 struct bgp_channel *c = (void *) C;
1648
1649 c->next_hop_addr = IPA_NONE;
1650 c->link_addr = IPA_NONE;
1651 c->packets_to_send = 0;
1652 }
1653
1654 static void
1655 bgp_channel_cleanup(struct channel *C)
1656 {
1657 struct bgp_channel *c = (void *) C;
1658
1659 if (c->igp_table_ip4)
1660 rt_unlock_table(c->igp_table_ip4);
1661
1662 if (c->igp_table_ip6)
1663 rt_unlock_table(c->igp_table_ip6);
1664 }
1665
1666 static inline struct bgp_channel_config *
1667 bgp_find_channel_config(struct bgp_config *cf, u32 afi)
1668 {
1669 struct bgp_channel_config *cc;
1670
1671 WALK_LIST(cc, cf->c.channels)
1672 if (cc->afi == afi)
1673 return cc;
1674
1675 return NULL;
1676 }
1677
1678 struct rtable_config *
1679 bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
1680 {
1681 struct bgp_channel_config *cc2;
1682 struct rtable_config *tab;
1683
1684 /* First, try table connected by the channel */
1685 if (cc->c.table->addr_type == type)
1686 return cc->c.table;
1687
1688 /* Find paired channel with the same SAFI but the other AFI */
1689 u32 afi2 = cc->afi ^ 0x30000;
1690 cc2 = bgp_find_channel_config(cf, afi2);
1691
1692 /* Second, try IGP table configured in the paired channel */
1693 if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
1694 return tab;
1695
1696 /* Third, try table connected by the paired channel */
1697 if (cc2 && (cc2->c.table->addr_type == type))
1698 return cc2->c.table;
1699
1700 /* Last, try default table of given type */
1701 if (tab = cf->c.global->def_tables[type])
1702 return tab;
1703
1704 cf_error("Undefined IGP table");
1705 }
1706
1707
1708 void
1709 bgp_postconfig(struct proto_config *CF)
1710 {
1711 struct bgp_config *cf = (void *) CF;
1712 int internal = (cf->local_as == cf->remote_as);
1713 int interior = internal || cf->confederation_member;
1714
1715 /* Do not check templates at all */
1716 if (cf->c.class == SYM_TEMPLATE)
1717 return;
1718
1719
1720 /* EBGP direct by default, IBGP multihop by default */
1721 if (cf->multihop < 0)
1722 cf->multihop = internal ? 64 : 0;
1723
1724 /* LLGR mode default based on GR mode */
1725 if (cf->llgr_mode < 0)
1726 cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0;
1727
1728 /* Link check for single-hop BGP by default */
1729 if (cf->check_link < 0)
1730 cf->check_link = !cf->multihop;
1731
1732
1733 if (!cf->local_as)
1734 cf_error("Local AS number must be set");
1735
1736 if (ipa_zero(cf->remote_ip))
1737 cf_error("Neighbor must be configured");
1738
1739 if (!cf->remote_as)
1740 cf_error("Remote AS number must be set");
1741
1742 if (ipa_is_link_local(cf->remote_ip) && !cf->iface)
1743 cf_error("Link-local neighbor address requires specified interface");
1744
1745 if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
1746 cf_error("Neighbor AS number out of range (AS4 not available)");
1747
1748 if (!internal && cf->rr_client)
1749 cf_error("Only internal neighbor can be RR client");
1750
1751 if (internal && cf->rs_client)
1752 cf_error("Only external neighbor can be RS client");
1753
1754 if (!cf->confederation && cf->confederation_member)
1755 cf_error("Confederation ID must be set for member sessions");
1756
1757 if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
1758 ipa_is_link_local(cf->remote_ip)))
1759 cf_error("Multihop BGP cannot be used with link-local addresses");
1760
1761 if (cf->multihop && cf->iface)
1762 cf_error("Multihop BGP cannot be bound to interface");
1763
1764 if (cf->multihop && cf->check_link)
1765 cf_error("Multihop BGP cannot depend on link state");
1766
1767 if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
1768 cf_error("Multihop BGP with BFD requires specified local address");
1769
1770 if (!cf->gr_mode && cf->llgr_mode)
1771 cf_error("Long-lived graceful restart requires basic graceful restart");
1772
1773
1774 struct bgp_channel_config *cc;
1775 WALK_LIST(cc, CF->channels)
1776 {
1777 /* Handle undefined import filter */
1778 if (cc->c.in_filter == FILTER_UNDEF)
1779 if (interior)
1780 cc->c.in_filter = FILTER_ACCEPT;
1781 else
1782 cf_error("EBGP requires explicit import policy");
1783
1784 /* Handle undefined export filter */
1785 if (cc->c.out_filter == FILTER_UNDEF)
1786 if (interior)
1787 cc->c.out_filter = FILTER_REJECT;
1788 else
1789 cf_error("EBGP requires explicit export policy");
1790
1791 /* Disable after error incompatible with restart limit action */
1792 if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
1793 cc->c.in_limit.action = PLA_DISABLE;
1794
1795 /* Different default based on rs_client */
1796 if (!cc->missing_lladdr)
1797 cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
1798
1799 /* Different default for gw_mode */
1800 if (!cc->gw_mode)
1801 cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
1802
1803 /* Defaults based on proto config */
1804 if (cc->gr_able == 0xff)
1805 cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
1806
1807 if (cc->llgr_able == 0xff)
1808 cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE);
1809
1810 if (cc->llgr_time == ~0U)
1811 cc->llgr_time = cf->llgr_time;
1812
1813 /* Default values of IGP tables */
1814 if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
1815 {
1816 if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
1817 cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
1818
1819 if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
1820 cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
1821
1822 if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
1823 cf_error("Mismatched IGP table type");
1824
1825 if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
1826 cf_error("Mismatched IGP table type");
1827 }
1828
1829 if (cf->multihop && (cc->gw_mode == GW_DIRECT))
1830 cf_error("Multihop BGP cannot use direct gateway mode");
1831
1832 if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
1833 cf_error("BGP in recursive mode prohibits sorted table");
1834
1835 if (cf->deterministic_med && cc->c.table->sorted)
1836 cf_error("BGP with deterministic MED prohibits sorted table");
1837
1838 if (cc->secondary && !cc->c.table->sorted)
1839 cf_error("BGP with secondary option requires sorted table");
1840 }
1841 }
1842
1843 static int
1844 bgp_reconfigure(struct proto *P, struct proto_config *CF)
1845 {
1846 struct bgp_proto *p = (void *) P;
1847 struct bgp_config *new = (void *) CF;
1848 struct bgp_config *old = p->cf;
1849
1850 if (proto_get_router_id(CF) != p->local_id)
1851 return 0;
1852
1853 int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1854 ((byte *) new) + sizeof(struct proto_config),
1855 // password item is last and must be checked separately
1856 OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1857 && ((!old->password && !new->password)
1858 || (old->password && new->password && !strcmp(old->password, new->password)));
1859
1860 /* FIXME: Move channel reconfiguration to generic protocol code ? */
1861 struct channel *C, *C2;
1862 struct bgp_channel_config *cc;
1863
1864 WALK_LIST(C, p->p.channels)
1865 C->stale = 1;
1866
1867 WALK_LIST(cc, new->c.channels)
1868 {
1869 C = (struct channel *) bgp_find_channel(p, cc->afi);
1870 same = proto_configure_channel(P, &C, &cc->c) && same;
1871 C->stale = 0;
1872 }
1873
1874 WALK_LIST_DELSAFE(C, C2, p->p.channels)
1875 if (C->stale)
1876 same = proto_configure_channel(P, &C, NULL) && same;
1877
1878
1879 if (same && (p->start_state > BSS_PREPARE))
1880 bgp_update_bfd(p, new->bfd);
1881
1882 /* We should update our copy of configuration ptr as old configuration will be freed */
1883 if (same)
1884 p->cf = new;
1885
1886 return same;
1887 }
1888
1889 #define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
1890
1891 static int
1892 bgp_channel_reconfigure(struct channel *C, struct channel_config *CC)
1893 {
1894 struct bgp_channel *c = (void *) C;
1895 struct bgp_channel_config *new = (void *) CC;
1896 struct bgp_channel_config *old = c->cf;
1897
1898 if (memcmp(((byte *) old) + sizeof(struct channel_config),
1899 ((byte *) new) + sizeof(struct channel_config),
1900 /* Remaining items must be checked separately */
1901 OFFSETOF(struct bgp_channel_config, rest) - sizeof(struct channel_config)))
1902 return 0;
1903
1904 /* Check change in IGP tables */
1905 if ((IGP_TABLE(old, ip4) != IGP_TABLE(new, ip4)) ||
1906 (IGP_TABLE(old, ip6) != IGP_TABLE(new, ip6)))
1907 return 0;
1908
1909 c->cf = new;
1910 return 1;
1911 }
1912
1913 static void
1914 bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
1915 {
1916 /* Just a shallow copy */
1917 }
1918
1919
1920 /**
1921 * bgp_error - report a protocol error
1922 * @c: connection
1923 * @code: error code (according to the RFC)
1924 * @subcode: error sub-code
1925 * @data: data to be passed in the Notification message
1926 * @len: length of the data
1927 *
1928 * bgp_error() sends a notification packet to tell the other side that a protocol
1929 * error has occurred (including the data considered erroneous if possible) and
1930 * closes the connection.
1931 */
1932 void
1933 bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
1934 {
1935 struct bgp_proto *p = c->bgp;
1936
1937 if (c->state == BS_CLOSE)
1938 return;
1939
1940 bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
1941 bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1942 bgp_conn_enter_close_state(c);
1943
1944 c->notify_code = code;
1945 c->notify_subcode = subcode;
1946 c->notify_data = data;
1947 c->notify_size = (len > 0) ? len : 0;
1948 bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
1949
1950 if (code != 6)
1951 {
1952 bgp_update_startup_delay(p);
1953 bgp_stop(p, 0, NULL, 0);
1954 }
1955 }
1956
1957 /**
1958 * bgp_store_error - store last error for status report
1959 * @p: BGP instance
1960 * @c: connection
1961 * @class: error class (BE_xxx constants)
1962 * @code: error code (class specific)
1963 *
1964 * bgp_store_error() decides whether given error is interesting enough
1965 * and store that error to last_error variables of @p
1966 */
1967 void
1968 bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1969 {
1970 /* During PS_UP, we ignore errors on secondary connection */
1971 if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1972 return;
1973
1974 /* During PS_STOP, we ignore any errors, as we want to report
1975 * the error that caused transition to PS_STOP
1976 */
1977 if (p->p.proto_state == PS_STOP)
1978 return;
1979
1980 p->last_error_class = class;
1981 p->last_error_code = code;
1982 }
1983
1984 static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1985 static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1986 static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1987 static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1988 static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"};
1989
1990 static const char *
1991 bgp_last_errmsg(struct bgp_proto *p)
1992 {
1993 switch (p->last_error_class)
1994 {
1995 case BE_MISC:
1996 return bgp_misc_errors[p->last_error_code];
1997 case BE_SOCKET:
1998 return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1999 case BE_BGP_RX:
2000 case BE_BGP_TX:
2001 return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
2002 case BE_AUTO_DOWN:
2003 return bgp_auto_errors[p->last_error_code];
2004 default:
2005 return "";
2006 }
2007 }
2008
2009 static const char *
2010 bgp_state_dsc(struct bgp_proto *p)
2011 {
2012 if (p->p.proto_state == PS_DOWN)
2013 return "Down";
2014
2015 int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
2016 if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
2017 return "Passive";
2018
2019 return bgp_state_names[state];
2020 }
2021
2022 static void
2023 bgp_get_status(struct proto *P, byte *buf)
2024 {
2025 struct bgp_proto *p = (struct bgp_proto *) P;
2026
2027 const char *err1 = bgp_err_classes[p->last_error_class];
2028 const char *err2 = bgp_last_errmsg(p);
2029
2030 if (P->proto_state == PS_DOWN)
2031 bsprintf(buf, "%s%s", err1, err2);
2032 else
2033 bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
2034 }
2035
2036 static void
2037 bgp_show_afis(int code, char *s, u32 *afis, uint count)
2038 {
2039 buffer b;
2040 LOG_BUFFER_INIT(b);
2041
2042 buffer_puts(&b, s);
2043
2044 for (u32 *af = afis; af < (afis + count); af++)
2045 {
2046 const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
2047 if (desc)
2048 buffer_print(&b, " %s", desc->name);
2049 else
2050 buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
2051 }
2052
2053 if (b.pos == b.end)
2054 strcpy(b.end - 32, " ... <too long>");
2055
2056 cli_msg(code, b.start);
2057 }
2058
2059 static void
2060 bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
2061 {
2062 struct bgp_af_caps *ac;
2063 uint any_mp_bgp = 0;
2064 uint any_gr_able = 0;
2065 uint any_add_path = 0;
2066 uint any_ext_next_hop = 0;
2067 uint any_llgr_able = 0;
2068 u32 *afl1 = alloca(caps->af_count * sizeof(u32));
2069 u32 *afl2 = alloca(caps->af_count * sizeof(u32));
2070 uint afn1, afn2;
2071
2072 WALK_AF_CAPS(caps, ac)
2073 {
2074 any_mp_bgp |= ac->ready;
2075 any_gr_able |= ac->gr_able;
2076 any_add_path |= ac->add_path;
2077 any_ext_next_hop |= ac->ext_next_hop;
2078 any_llgr_able |= ac->llgr_able;
2079 }
2080
2081 if (any_mp_bgp)
2082 {
2083 cli_msg(-1006, " Multiprotocol");
2084
2085 afn1 = 0;
2086 WALK_AF_CAPS(caps, ac)
2087 if (ac->ready)
2088 afl1[afn1++] = ac->afi;
2089
2090 bgp_show_afis(-1006, " AF announced:", afl1, afn1);
2091 }
2092
2093 if (caps->route_refresh)
2094 cli_msg(-1006, " Route refresh");
2095
2096 if (any_ext_next_hop)
2097 {
2098 cli_msg(-1006, " Extended next hop");
2099
2100 afn1 = 0;
2101 WALK_AF_CAPS(caps, ac)
2102 if (ac->ext_next_hop)
2103 afl1[afn1++] = ac->afi;
2104
2105 bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1);
2106 }
2107
2108 if (caps->ext_messages)
2109 cli_msg(-1006, " Extended message");
2110
2111 if (caps->gr_aware)
2112 cli_msg(-1006, " Graceful restart");
2113
2114 if (any_gr_able)
2115 {
2116 /* Continues from gr_aware */
2117 cli_msg(-1006, " Restart time: %u", caps->gr_time);
2118 if (caps->gr_flags & BGP_GRF_RESTART)
2119 cli_msg(-1006, " Restart recovery");
2120
2121 afn1 = afn2 = 0;
2122 WALK_AF_CAPS(caps, ac)
2123 {
2124 if (ac->gr_able)
2125 afl1[afn1++] = ac->afi;
2126
2127 if (ac->gr_af_flags & BGP_GRF_FORWARDING)
2128 afl2[afn2++] = ac->afi;
2129 }
2130
2131 bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2132 bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2133 }
2134
2135 if (caps->as4_support)
2136 cli_msg(-1006, " 4-octet AS numbers");
2137
2138 if (any_add_path)
2139 {
2140 cli_msg(-1006, " ADD-PATH");
2141
2142 afn1 = afn2 = 0;
2143 WALK_AF_CAPS(caps, ac)
2144 {
2145 if (ac->add_path & BGP_ADD_PATH_RX)
2146 afl1[afn1++] = ac->afi;
2147
2148 if (ac->add_path & BGP_ADD_PATH_TX)
2149 afl2[afn2++] = ac->afi;
2150 }
2151
2152 bgp_show_afis(-1006, " RX:", afl1, afn1);
2153 bgp_show_afis(-1006, " TX:", afl2, afn2);
2154 }
2155
2156 if (caps->enhanced_refresh)
2157 cli_msg(-1006, " Enhanced refresh");
2158
2159 if (caps->llgr_aware)
2160 cli_msg(-1006, " Long-lived graceful restart");
2161
2162 if (any_llgr_able)
2163 {
2164 u32 stale_time = 0;
2165
2166 afn1 = afn2 = 0;
2167 WALK_AF_CAPS(caps, ac)
2168 {
2169 stale_time = MAX(stale_time, ac->llgr_time);
2170
2171 if (ac->llgr_able && ac->llgr_time)
2172 afl1[afn1++] = ac->afi;
2173
2174 if (ac->llgr_flags & BGP_GRF_FORWARDING)
2175 afl2[afn2++] = ac->afi;
2176 }
2177
2178 /* Continues from llgr_aware */
2179 cli_msg(-1006, " LL stale time: %u", stale_time);
2180
2181 bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2182 bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2183 }
2184 }
2185
2186 static void
2187 bgp_show_proto_info(struct proto *P)
2188 {
2189 struct bgp_proto *p = (struct bgp_proto *) P;
2190
2191 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
2192 cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
2193 cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
2194
2195 if (p->gr_active_num)
2196 cli_msg(-1006, " Neighbor graceful restart active");
2197
2198 if (P->proto_state == PS_START)
2199 {
2200 struct bgp_conn *oc = &p->outgoing_conn;
2201
2202 if ((p->start_state < BSS_CONNECT) &&
2203 (tm_active(p->startup_timer)))
2204 cli_msg(-1006, " Error wait: %t/%u",
2205 tm_remains(p->startup_timer), p->startup_delay);
2206
2207 if ((oc->state == BS_ACTIVE) &&
2208 (tm_active(oc->connect_timer)))
2209 cli_msg(-1006, " Connect delay: %t/%u",
2210 tm_remains(oc->connect_timer), p->cf->connect_delay_time);
2211
2212 if (p->gr_active_num && tm_active(p->gr_timer))
2213 cli_msg(-1006, " Restart timer: %t/-",
2214 tm_remains(p->gr_timer));
2215 }
2216 else if (P->proto_state == PS_UP)
2217 {
2218 cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
2219 cli_msg(-1006, " Local capabilities");
2220 bgp_show_capabilities(p, p->conn->local_caps);
2221 cli_msg(-1006, " Neighbor capabilities");
2222 bgp_show_capabilities(p, p->conn->remote_caps);
2223 cli_msg(-1006, " Session: %s%s%s%s%s",
2224 p->is_internal ? "internal" : "external",
2225 p->cf->multihop ? " multihop" : "",
2226 p->rr_client ? " route-reflector" : "",
2227 p->rs_client ? " route-server" : "",
2228 p->as4_session ? " AS4" : "");
2229 cli_msg(-1006, " Source address: %I", p->source_addr);
2230 cli_msg(-1006, " Hold timer: %t/%u",
2231 tm_remains(p->conn->hold_timer), p->conn->hold_time);
2232 cli_msg(-1006, " Keepalive timer: %t/%u",
2233 tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
2234 }
2235
2236 if ((p->last_error_class != BE_NONE) &&
2237 (p->last_error_class != BE_MAN_DOWN))
2238 {
2239 const char *err1 = bgp_err_classes[p->last_error_class];
2240 const char *err2 = bgp_last_errmsg(p);
2241 cli_msg(-1006, " Last error: %s%s", err1, err2);
2242 }
2243
2244 {
2245 struct bgp_channel *c;
2246 WALK_LIST(c, p->p.channels)
2247 {
2248 channel_show_info(&c->c);
2249
2250 if (p->gr_active_num)
2251 cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
2252
2253 if (c->stale_timer && tm_active(c->stale_timer))
2254 cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer));
2255
2256 if (c->c.channel_state == CS_UP)
2257 {
2258 if (ipa_zero(c->link_addr))
2259 cli_msg(-1006, " BGP Next hop: %I", c->next_hop_addr);
2260 else
2261 cli_msg(-1006, " BGP Next hop: %I %I", c->next_hop_addr, c->link_addr);
2262 }
2263
2264 if (c->igp_table_ip4)
2265 cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name);
2266
2267 if (c->igp_table_ip6)
2268 cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
2269 }
2270 }
2271 }
2272
2273 struct channel_class channel_bgp = {
2274 .channel_size = sizeof(struct bgp_channel),
2275 .config_size = sizeof(struct bgp_channel_config),
2276 .init = bgp_channel_init,
2277 .start = bgp_channel_start,
2278 .shutdown = bgp_channel_shutdown,
2279 .cleanup = bgp_channel_cleanup,
2280 .reconfigure = bgp_channel_reconfigure,
2281 };
2282
2283 struct protocol proto_bgp = {
2284 .name = "BGP",
2285 .template = "bgp%d",
2286 .class = PROTOCOL_BGP,
2287 .preference = DEF_PREF_BGP,
2288 .channel_mask = NB_IP | NB_VPN | NB_FLOW,
2289 .proto_size = sizeof(struct bgp_proto),
2290 .config_size = sizeof(struct bgp_config),
2291 .postconfig = bgp_postconfig,
2292 .init = bgp_init,
2293 .start = bgp_start,
2294 .shutdown = bgp_shutdown,
2295 .reconfigure = bgp_reconfigure,
2296 .copy_config = bgp_copy_config,
2297 .get_status = bgp_get_status,
2298 .get_attr = bgp_get_attr,
2299 .get_route_info = bgp_get_route_info,
2300 .show_proto_info = bgp_show_proto_info
2301 };