]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/bgp.c
IO: Avoid multiple event cycles in one loop cycle.
[thirdparty/bird.git] / proto / bgp / bgp.c
1 /*
2 * BIRD -- The Border Gateway Protocol
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Border Gateway Protocol
11 *
12 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13 * connection and most of the interface with BIRD core, |packets.c| handling
14 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15 * manipulation with BGP attribute lists.
16 *
17 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18 * architecture which is able to keep all the information needed by BGP in the
19 * primary routing table, therefore no complex data structures like a central
20 * BGP table are needed. This increases memory footprint of a BGP router with
21 * many connections, but not too much and, which is more important, it makes
22 * BGP much easier to implement.
23 *
24 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25 * structure to which are attached individual connections represented by &bgp_connection
26 * (usually, there exists only one connection, but during BGP session setup, there
27 * can be more of them). The connections are handled according to the BGP state machine
28 * defined in the RFC with all the timers and all the parameters configurable.
29 *
30 * In incoming direction, we listen on the connection's socket and each time we receive
31 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32 * passes complete packets to bgp_rx_packet() which distributes the packet according
33 * to its type.
34 *
35 * In outgoing direction, we gather all the routing updates and sort them to buckets
36 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37 * of &rta's and a &fib which helps us to find if we already have another route for
38 * the same destination queued for sending, so that we can replace it with the new one
39 * immediately instead of sending both updates). There also exists a special bucket holding
40 * all the route withdrawals which cannot be queued anywhere else as they don't have any
41 * attributes. If we have any packet to send (due to either new routes or the connection
42 * tracking code wanting to send a Open, Keepalive or Notification message), we call
43 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47 * type if we have more data of the same type to send.
48 *
49 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50 * of the attribute blocks and translating them to the language of BIRD's extended attributes
51 * and bgp_encode_attrs() which does the converse. Both functions are built around a
52 * @bgp_attr_table array describing all important characteristics of all known attributes.
53 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54 *
55 * BGP protocol implements graceful restart in both restarting (local restart)
56 * and receiving (neighbor restart) roles. The first is handled mostly by the
57 * graceful restart code in the nest, BGP protocol just handles capabilities,
58 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
59 * The second is implemented by internal restart of the BGP state to %BS_IDLE
60 * and protocol state to %PS_START, but keeping the protocol up from the core
61 * point of view and therefore maintaining received routes. Routing table
62 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
63 * stale routes after reestablishment of BGP session during graceful restart.
64 */
65
66 #undef LOCAL_DEBUG
67
68 #include "nest/bird.h"
69 #include "nest/iface.h"
70 #include "nest/protocol.h"
71 #include "nest/route.h"
72 #include "nest/cli.h"
73 #include "nest/locks.h"
74 #include "conf/conf.h"
75 #include "lib/socket.h"
76 #include "lib/resource.h"
77 #include "lib/string.h"
78
79 #include "bgp.h"
80
81
82 struct linpool *bgp_linpool; /* Global temporary pool */
83 static sock *bgp_listen_sk; /* Global listening socket */
84 static int bgp_counter; /* Number of protocol instances using the listening socket */
85
86 static void bgp_close(struct bgp_proto *p, int apply_md5);
87 static void bgp_connect(struct bgp_proto *p);
88 static void bgp_active(struct bgp_proto *p);
89 static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
90 static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
91
92
93 /**
94 * bgp_open - open a BGP instance
95 * @p: BGP instance
96 *
97 * This function allocates and configures shared BGP resources.
98 * Should be called as the last step during initialization
99 * (when lock is acquired and neighbor is ready).
100 * When error, state changed to PS_DOWN, -1 is returned and caller
101 * should return immediately.
102 */
103 static int
104 bgp_open(struct bgp_proto *p)
105 {
106 struct config *cfg = p->cf->c.global;
107 int errcode;
108
109 if (!bgp_listen_sk)
110 bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
111
112 if (!bgp_listen_sk)
113 {
114 errcode = BEM_NO_SOCKET;
115 goto err;
116 }
117
118 if (!bgp_linpool)
119 bgp_linpool = lp_new(&root_pool, 4080);
120
121 bgp_counter++;
122
123 if (p->cf->password)
124 if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, p->cf->password) < 0)
125 {
126 sk_log_error(bgp_listen_sk, p->p.name);
127 bgp_close(p, 0);
128 errcode = BEM_INVALID_MD5;
129 goto err;
130 }
131
132 return 0;
133
134 err:
135 p->p.disabled = 1;
136 bgp_store_error(p, NULL, BE_MISC, errcode);
137 proto_notify_state(&p->p, PS_DOWN);
138 return -1;
139 }
140
141 static void
142 bgp_startup(struct bgp_proto *p)
143 {
144 BGP_TRACE(D_EVENTS, "Started");
145 p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
146
147 if (!p->cf->passive)
148 bgp_active(p);
149 }
150
151 static void
152 bgp_startup_timeout(timer *t)
153 {
154 bgp_startup(t->data);
155 }
156
157
158 static void
159 bgp_initiate(struct bgp_proto *p)
160 {
161 int rv = bgp_open(p);
162 if (rv < 0)
163 return;
164
165 if (p->cf->bfd)
166 bgp_update_bfd(p, p->cf->bfd);
167
168 if (p->startup_delay)
169 {
170 p->start_state = BSS_DELAY;
171 BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
172 bgp_start_timer(p->startup_timer, p->startup_delay);
173 }
174 else
175 bgp_startup(p);
176 }
177
178 /**
179 * bgp_close - close a BGP instance
180 * @p: BGP instance
181 * @apply_md5: 0 to disable unsetting MD5 auth
182 *
183 * This function frees and deconfigures shared BGP resources.
184 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
185 * from failed bgp_open().
186 */
187 static void
188 bgp_close(struct bgp_proto *p, int apply_md5)
189 {
190 ASSERT(bgp_counter);
191 bgp_counter--;
192
193 if (p->cf->password && apply_md5)
194 if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, NULL) < 0)
195 sk_log_error(bgp_listen_sk, p->p.name);
196
197 if (!bgp_counter)
198 {
199 rfree(bgp_listen_sk);
200 bgp_listen_sk = NULL;
201 rfree(bgp_linpool);
202 bgp_linpool = NULL;
203 }
204 }
205
206 /**
207 * bgp_start_timer - start a BGP timer
208 * @t: timer
209 * @value: time to fire (0 to disable the timer)
210 *
211 * This functions calls tm_start() on @t with time @value and the
212 * amount of randomization suggested by the BGP standard. Please use
213 * it for all BGP timers.
214 */
215 void
216 bgp_start_timer(timer *t, int value)
217 {
218 if (value)
219 {
220 /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
221 t->randomize = value / 4;
222 tm_start(t, value - t->randomize);
223 }
224 else
225 tm_stop(t);
226 }
227
228 /**
229 * bgp_close_conn - close a BGP connection
230 * @conn: connection to close
231 *
232 * This function takes a connection described by the &bgp_conn structure,
233 * closes its socket and frees all resources associated with it.
234 */
235 void
236 bgp_close_conn(struct bgp_conn *conn)
237 {
238 // struct bgp_proto *p = conn->bgp;
239
240 DBG("BGP: Closing connection\n");
241 conn->packets_to_send = 0;
242 rfree(conn->connect_retry_timer);
243 conn->connect_retry_timer = NULL;
244 rfree(conn->keepalive_timer);
245 conn->keepalive_timer = NULL;
246 rfree(conn->hold_timer);
247 conn->hold_timer = NULL;
248 rfree(conn->sk);
249 conn->sk = NULL;
250 rfree(conn->tx_ev);
251 conn->tx_ev = NULL;
252 }
253
254
255 /**
256 * bgp_update_startup_delay - update a startup delay
257 * @p: BGP instance
258 *
259 * This function updates a startup delay that is used to postpone next BGP connect.
260 * It also handles disable_after_error and might stop BGP instance when error
261 * happened and disable_after_error is on.
262 *
263 * It should be called when BGP protocol error happened.
264 */
265 void
266 bgp_update_startup_delay(struct bgp_proto *p)
267 {
268 struct bgp_config *cf = p->cf;
269
270 DBG("BGP: Updating startup delay\n");
271
272 if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
273 p->startup_delay = 0;
274
275 p->last_proto_error = now;
276
277 if (cf->disable_after_error)
278 {
279 p->startup_delay = 0;
280 p->p.disabled = 1;
281 return;
282 }
283
284 if (!p->startup_delay)
285 p->startup_delay = cf->error_delay_time_min;
286 else
287 p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
288 }
289
290 static void
291 bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
292 {
293 switch (conn->state)
294 {
295 case BS_IDLE:
296 case BS_CLOSE:
297 return;
298 case BS_CONNECT:
299 case BS_ACTIVE:
300 bgp_conn_enter_idle_state(conn);
301 return;
302 case BS_OPENSENT:
303 case BS_OPENCONFIRM:
304 case BS_ESTABLISHED:
305 bgp_error(conn, 6, subcode, NULL, 0);
306 return;
307 default:
308 bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
309 }
310 }
311
312 static void
313 bgp_down(struct bgp_proto *p)
314 {
315 if (p->start_state > BSS_PREPARE)
316 bgp_close(p, 1);
317
318 BGP_TRACE(D_EVENTS, "Down");
319 proto_notify_state(&p->p, PS_DOWN);
320 }
321
322 static void
323 bgp_decision(void *vp)
324 {
325 struct bgp_proto *p = vp;
326
327 DBG("BGP: Decision start\n");
328 if ((p->p.proto_state == PS_START)
329 && (p->outgoing_conn.state == BS_IDLE)
330 && (p->incoming_conn.state != BS_OPENCONFIRM)
331 && (!p->cf->passive))
332 bgp_active(p);
333
334 if ((p->p.proto_state == PS_STOP)
335 && (p->outgoing_conn.state == BS_IDLE)
336 && (p->incoming_conn.state == BS_IDLE))
337 bgp_down(p);
338 }
339
340 void
341 bgp_stop(struct bgp_proto *p, unsigned subcode)
342 {
343 proto_notify_state(&p->p, PS_STOP);
344 bgp_graceful_close_conn(&p->outgoing_conn, subcode);
345 bgp_graceful_close_conn(&p->incoming_conn, subcode);
346 ev_schedule(p->event);
347 }
348
349 static inline void
350 bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
351 {
352 if (conn->bgp->p.mrtdump & MD_STATES)
353 mrt_dump_bgp_state_change(conn, conn->state, new_state);
354
355 conn->state = new_state;
356 }
357
358 void
359 bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
360 {
361 /* Really, most of the work is done in bgp_rx_open(). */
362 bgp_conn_set_state(conn, BS_OPENCONFIRM);
363 }
364
365 void
366 bgp_conn_enter_established_state(struct bgp_conn *conn)
367 {
368 struct bgp_proto *p = conn->bgp;
369
370 BGP_TRACE(D_EVENTS, "BGP session established");
371 DBG("BGP: UP!!!\n");
372
373 /* For multi-hop BGP sessions */
374 if (ipa_zero(p->source_addr))
375 p->source_addr = conn->sk->saddr;
376
377 conn->sk->fast_rx = 0;
378
379 p->conn = conn;
380 p->last_error_class = 0;
381 p->last_error_code = 0;
382 p->feed_state = BFS_NONE;
383 p->load_state = BFS_NONE;
384 bgp_init_bucket_table(p);
385 bgp_init_prefix_table(p, 8);
386
387 int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
388
389 if (p->p.gr_recovery && !peer_gr_ready)
390 proto_graceful_restart_unlock(&p->p);
391
392 if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
393 p->p.gr_wait = 1;
394
395 if (p->gr_active)
396 tm_stop(p->gr_timer);
397
398 if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
399 bgp_graceful_restart_done(p);
400
401 /* GR capability implies that neighbor will send End-of-RIB */
402 if (conn->peer_gr_aware)
403 p->load_state = BFS_LOADING;
404
405 /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
406
407 bgp_conn_set_state(conn, BS_ESTABLISHED);
408 proto_notify_state(&p->p, PS_UP);
409 }
410
411 static void
412 bgp_conn_leave_established_state(struct bgp_proto *p)
413 {
414 BGP_TRACE(D_EVENTS, "BGP session closed");
415 p->conn = NULL;
416
417 if (p->p.proto_state == PS_UP)
418 bgp_stop(p, 0);
419 }
420
421 void
422 bgp_conn_enter_close_state(struct bgp_conn *conn)
423 {
424 struct bgp_proto *p = conn->bgp;
425 int os = conn->state;
426
427 bgp_conn_set_state(conn, BS_CLOSE);
428 tm_stop(conn->keepalive_timer);
429 conn->sk->rx_hook = NULL;
430
431 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
432 bgp_start_timer(conn->hold_timer, 10);
433
434 if (os == BS_ESTABLISHED)
435 bgp_conn_leave_established_state(p);
436 }
437
438 void
439 bgp_conn_enter_idle_state(struct bgp_conn *conn)
440 {
441 struct bgp_proto *p = conn->bgp;
442 int os = conn->state;
443
444 bgp_close_conn(conn);
445 bgp_conn_set_state(conn, BS_IDLE);
446 ev_schedule(p->event);
447
448 if (os == BS_ESTABLISHED)
449 bgp_conn_leave_established_state(p);
450 }
451
452 /**
453 * bgp_handle_graceful_restart - handle detected BGP graceful restart
454 * @p: BGP instance
455 *
456 * This function is called when a BGP graceful restart of the neighbor is
457 * detected (when the TCP connection fails or when a new TCP connection
458 * appears). The function activates processing of the restart - starts routing
459 * table refresh cycle and activates BGP restart timer. The protocol state goes
460 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
461 * caller.
462 */
463 void
464 bgp_handle_graceful_restart(struct bgp_proto *p)
465 {
466 ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
467
468 BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
469 p->gr_active ? " - already pending" : "");
470 proto_notify_state(&p->p, PS_START);
471
472 if (p->gr_active)
473 rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
474
475 p->gr_active = 1;
476 bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
477 rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
478 }
479
480 /**
481 * bgp_graceful_restart_done - finish active BGP graceful restart
482 * @p: BGP instance
483 *
484 * This function is called when the active BGP graceful restart of the neighbor
485 * should be finished - either successfully (the neighbor sends all paths and
486 * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
487 * not support BGP graceful restart on the new session). The function ends
488 * routing table refresh cycle and stops BGP restart timer.
489 */
490 void
491 bgp_graceful_restart_done(struct bgp_proto *p)
492 {
493 BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
494 p->gr_active = 0;
495 tm_stop(p->gr_timer);
496 rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
497 }
498
499 /**
500 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
501 * @t: timer
502 *
503 * This function is a timeout hook for @gr_timer, implementing BGP restart time
504 * limit for reestablisment of the BGP session after the graceful restart. When
505 * fired, we just proceed with the usual protocol restart.
506 */
507
508 static void
509 bgp_graceful_restart_timeout(timer *t)
510 {
511 struct bgp_proto *p = t->data;
512
513 BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
514 bgp_stop(p, 0);
515 }
516
517
518 /**
519 * bgp_refresh_begin - start incoming enhanced route refresh sequence
520 * @p: BGP instance
521 *
522 * This function is called when an incoming enhanced route refresh sequence is
523 * started by the neighbor, demarcated by the BoRR packet. The function updates
524 * the load state and starts the routing table refresh cycle. Note that graceful
525 * restart also uses routing table refresh cycle, but RFC 7313 and load states
526 * ensure that these two sequences do not overlap.
527 */
528 void
529 bgp_refresh_begin(struct bgp_proto *p)
530 {
531 if (p->load_state == BFS_LOADING)
532 { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
533
534 p->load_state = BFS_REFRESHING;
535 rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
536 }
537
538 /**
539 * bgp_refresh_end - finish incoming enhanced route refresh sequence
540 * @p: BGP instance
541 *
542 * This function is called when an incoming enhanced route refresh sequence is
543 * finished by the neighbor, demarcated by the EoRR packet. The function updates
544 * the load state and ends the routing table refresh cycle. Routes not received
545 * during the sequence are removed by the nest.
546 */
547 void
548 bgp_refresh_end(struct bgp_proto *p)
549 {
550 if (p->load_state != BFS_REFRESHING)
551 { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
552
553 p->load_state = BFS_NONE;
554 rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
555 }
556
557
558 static void
559 bgp_send_open(struct bgp_conn *conn)
560 {
561 conn->start_state = conn->bgp->start_state;
562
563 // Default values, possibly changed by receiving capabilities.
564 conn->advertised_as = 0;
565 conn->peer_refresh_support = 0;
566 conn->peer_as4_support = 0;
567 conn->peer_add_path = 0;
568 conn->peer_enhanced_refresh_support = 0;
569 conn->peer_gr_aware = 0;
570 conn->peer_gr_able = 0;
571 conn->peer_gr_time = 0;
572 conn->peer_gr_flags = 0;
573 conn->peer_gr_aflags = 0;
574 conn->peer_ext_messages_support = 0;
575
576 DBG("BGP: Sending open\n");
577 conn->sk->rx_hook = bgp_rx;
578 conn->sk->tx_hook = bgp_tx;
579 tm_stop(conn->connect_retry_timer);
580 bgp_schedule_packet(conn, PKT_OPEN);
581 bgp_conn_set_state(conn, BS_OPENSENT);
582 bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
583 }
584
585 static void
586 bgp_connected(sock *sk)
587 {
588 struct bgp_conn *conn = sk->data;
589 struct bgp_proto *p = conn->bgp;
590
591 BGP_TRACE(D_EVENTS, "Connected");
592 bgp_send_open(conn);
593 }
594
595 static void
596 bgp_connect_timeout(timer *t)
597 {
598 struct bgp_conn *conn = t->data;
599 struct bgp_proto *p = conn->bgp;
600
601 DBG("BGP: connect_timeout\n");
602 if (p->p.proto_state == PS_START)
603 {
604 bgp_close_conn(conn);
605 bgp_connect(p);
606 }
607 else
608 bgp_conn_enter_idle_state(conn);
609 }
610
611 static void
612 bgp_sock_err(sock *sk, int err)
613 {
614 struct bgp_conn *conn = sk->data;
615 struct bgp_proto *p = conn->bgp;
616
617 /*
618 * This error hook may be called either asynchronously from main
619 * loop, or synchronously from sk_send(). But sk_send() is called
620 * only from bgp_tx() and bgp_kick_tx(), which are both called
621 * asynchronously from main loop. Moreover, they end if err hook is
622 * called. Therefore, we could suppose that it is always called
623 * asynchronously.
624 */
625
626 bgp_store_error(p, conn, BE_SOCKET, err);
627
628 if (err)
629 BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
630 else
631 BGP_TRACE(D_EVENTS, "Connection closed");
632
633 if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
634 bgp_handle_graceful_restart(p);
635
636 bgp_conn_enter_idle_state(conn);
637 }
638
639 static void
640 bgp_hold_timeout(timer *t)
641 {
642 struct bgp_conn *conn = t->data;
643 struct bgp_proto *p = conn->bgp;
644
645 DBG("BGP: Hold timeout\n");
646
647 /* We are already closing the connection - just do hangup */
648 if (conn->state == BS_CLOSE)
649 {
650 BGP_TRACE(D_EVENTS, "Connection stalled");
651 bgp_conn_enter_idle_state(conn);
652 return;
653 }
654
655 /* If there is something in input queue, we are probably congested
656 and perhaps just not processed BGP packets in time. */
657
658 if (sk_rx_ready(conn->sk) > 0)
659 bgp_start_timer(conn->hold_timer, 10);
660 else
661 bgp_error(conn, 4, 0, NULL, 0);
662 }
663
664 static void
665 bgp_keepalive_timeout(timer *t)
666 {
667 struct bgp_conn *conn = t->data;
668
669 DBG("BGP: Keepalive timer\n");
670 bgp_schedule_packet(conn, PKT_KEEPALIVE);
671
672 /* Kick TX a bit faster */
673 if (ev_active(conn->tx_ev))
674 ev_run(conn->tx_ev);
675 }
676
677 static void
678 bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
679 {
680 timer *t;
681
682 conn->sk = NULL;
683 conn->bgp = p;
684 conn->packets_to_send = 0;
685
686 t = conn->connect_retry_timer = tm_new(p->p.pool);
687 t->hook = bgp_connect_timeout;
688 t->data = conn;
689 t = conn->hold_timer = tm_new(p->p.pool);
690 t->hook = bgp_hold_timeout;
691 t->data = conn;
692 t = conn->keepalive_timer = tm_new(p->p.pool);
693 t->hook = bgp_keepalive_timeout;
694 t->data = conn;
695 conn->tx_ev = ev_new(p->p.pool);
696 conn->tx_ev->hook = bgp_kick_tx;
697 conn->tx_ev->data = conn;
698 }
699
700 static void
701 bgp_setup_sk(struct bgp_conn *conn, sock *s)
702 {
703 s->data = conn;
704 s->err_hook = bgp_sock_err;
705 s->fast_rx = 1;
706 conn->sk = s;
707 }
708
709 static void
710 bgp_active(struct bgp_proto *p)
711 {
712 int delay = MAX(1, p->cf->connect_delay_time);
713 struct bgp_conn *conn = &p->outgoing_conn;
714
715 BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
716 bgp_setup_conn(p, conn);
717 bgp_conn_set_state(conn, BS_ACTIVE);
718 bgp_start_timer(conn->connect_retry_timer, delay);
719 }
720
721 /**
722 * bgp_connect - initiate an outgoing connection
723 * @p: BGP instance
724 *
725 * The bgp_connect() function creates a new &bgp_conn and initiates
726 * a TCP connection to the peer. The rest of connection setup is governed
727 * by the BGP state machine as described in the standard.
728 */
729 static void
730 bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
731 {
732 sock *s;
733 struct bgp_conn *conn = &p->outgoing_conn;
734 int hops = p->cf->multihop ? : 1;
735
736 DBG("BGP: Connecting\n");
737 s = sk_new(p->p.pool);
738 s->type = SK_TCP_ACTIVE;
739 s->saddr = p->source_addr;
740 s->daddr = p->cf->remote_ip;
741 s->dport = p->cf->remote_port;
742 s->iface = p->neigh ? p->neigh->iface : NULL;
743 s->ttl = p->cf->ttl_security ? 255 : hops;
744 s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
745 s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
746 s->tos = IP_PREC_INTERNET_CONTROL;
747 s->password = p->cf->password;
748 s->tx_hook = bgp_connected;
749 BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
750 s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
751 bgp_setup_conn(p, conn);
752 bgp_setup_sk(conn, s);
753 bgp_conn_set_state(conn, BS_CONNECT);
754
755 if (sk_open(s) < 0)
756 goto err;
757
758 /* Set minimal receive TTL if needed */
759 if (p->cf->ttl_security)
760 if (sk_set_min_ttl(s, 256 - hops) < 0)
761 goto err;
762
763 DBG("BGP: Waiting for connect success\n");
764 bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
765 return;
766
767 err:
768 sk_log_error(s, p->p.name);
769 bgp_sock_err(s, 0);
770 return;
771 }
772
773 /**
774 * bgp_find_proto - find existing proto for incoming connection
775 * @sk: TCP socket
776 *
777 */
778 static struct bgp_proto *
779 bgp_find_proto(sock *sk)
780 {
781 struct proto_config *pc;
782
783 WALK_LIST(pc, config->protos)
784 if ((pc->protocol == &proto_bgp) && pc->proto)
785 {
786 struct bgp_proto *p = (struct bgp_proto *) pc->proto;
787 if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
788 (!ipa_is_link_local(sk->daddr) || (p->cf->iface == sk->iface)))
789 return p;
790 }
791
792 return NULL;
793 }
794
795 /**
796 * bgp_incoming_connection - handle an incoming connection
797 * @sk: TCP socket
798 * @dummy: unused
799 *
800 * This function serves as a socket hook for accepting of new BGP
801 * connections. It searches a BGP instance corresponding to the peer
802 * which has connected and if such an instance exists, it creates a
803 * &bgp_conn structure, attaches it to the instance and either sends
804 * an Open message or (if there already is an active connection) it
805 * closes the new connection by sending a Notification message.
806 */
807 static int
808 bgp_incoming_connection(sock *sk, int dummy UNUSED)
809 {
810 struct bgp_proto *p;
811 int acc, hops;
812
813 DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
814 p = bgp_find_proto(sk);
815 if (!p)
816 {
817 log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
818 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
819 rfree(sk);
820 return 0;
821 }
822
823 /*
824 * BIRD should keep multiple incoming connections in OpenSent state (for
825 * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
826 * connections are rejected istead. The exception is the case where an
827 * incoming connection triggers a graceful restart.
828 */
829
830 acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
831 (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
832
833 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
834 {
835 bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
836 bgp_handle_graceful_restart(p);
837 bgp_conn_enter_idle_state(p->conn);
838 acc = 1;
839
840 /* There might be separate incoming connection in OpenSent state */
841 if (p->incoming_conn.state > BS_ACTIVE)
842 bgp_close_conn(&p->incoming_conn);
843 }
844
845 BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
846 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
847 sk->dport, acc ? "accepted" : "rejected");
848
849 if (!acc)
850 {
851 rfree(sk);
852 return 0;
853 }
854
855 hops = p->cf->multihop ? : 1;
856
857 if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
858 goto err;
859
860 if (p->cf->ttl_security)
861 if (sk_set_min_ttl(sk, 256 - hops) < 0)
862 goto err;
863
864 if (p->cf->enable_extended_messages)
865 {
866 sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
867 sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
868 sk_reallocate(sk);
869 }
870
871 bgp_setup_conn(p, &p->incoming_conn);
872 bgp_setup_sk(&p->incoming_conn, sk);
873 bgp_send_open(&p->incoming_conn);
874 return 0;
875
876 err:
877 sk_log_error(sk, p->p.name);
878 log(L_ERR "%s: Incoming connection aborted", p->p.name);
879 rfree(sk);
880 return 0;
881 }
882
883 static void
884 bgp_listen_sock_err(sock *sk UNUSED, int err)
885 {
886 if (err == ECONNABORTED)
887 log(L_WARN "BGP: Incoming connection aborted");
888 else
889 log(L_ERR "BGP: Error on listening socket: %M", err);
890 }
891
892 static sock *
893 bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
894 {
895 sock *s = sk_new(&root_pool);
896 DBG("BGP: Creating listening socket\n");
897 s->type = SK_TCP_PASSIVE;
898 s->ttl = 255;
899 s->saddr = addr;
900 s->sport = port ? port : BGP_PORT;
901 s->flags = flags ? 0 : SKF_V6ONLY;
902 s->tos = IP_PREC_INTERNET_CONTROL;
903 s->rbsize = BGP_RX_BUFFER_SIZE;
904 s->tbsize = BGP_TX_BUFFER_SIZE;
905 s->rx_hook = bgp_incoming_connection;
906 s->err_hook = bgp_listen_sock_err;
907
908 if (sk_open(s) < 0)
909 goto err;
910
911 return s;
912
913 err:
914 sk_log_error(s, "BGP");
915 log(L_ERR "BGP: Cannot open listening socket");
916 rfree(s);
917 return NULL;
918 }
919
920 static void
921 bgp_start_neighbor(struct bgp_proto *p)
922 {
923 /* Called only for single-hop BGP sessions */
924
925 if (ipa_zero(p->source_addr))
926 p->source_addr = p->neigh->ifa->ip;
927
928 #ifdef IPV6
929 {
930 struct ifa *a;
931 p->local_link = IPA_NONE;
932 WALK_LIST(a, p->neigh->iface->addrs)
933 if (a->scope == SCOPE_LINK)
934 {
935 p->local_link = a->ip;
936 break;
937 }
938
939 if (! ipa_nonzero(p->local_link))
940 log(L_WARN "%s: Missing link local address on interface %s", p->p.name, p->neigh->iface->name);
941
942 DBG("BGP: Selected link-level address %I\n", p->local_link);
943 }
944 #endif
945
946 bgp_initiate(p);
947 }
948
949 static void
950 bgp_neigh_notify(neighbor *n)
951 {
952 struct bgp_proto *p = (struct bgp_proto *) n->proto;
953 int ps = p->p.proto_state;
954
955 if (n != p->neigh)
956 return;
957
958 if ((ps == PS_DOWN) || (ps == PS_STOP))
959 return;
960
961 int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
962
963 if (n->scope <= 0)
964 {
965 if (!prepare)
966 {
967 BGP_TRACE(D_EVENTS, "Neighbor lost");
968 bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
969 /* Perhaps also run bgp_update_startup_delay(p)? */
970 bgp_stop(p, 0);
971 }
972 }
973 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
974 {
975 if (!prepare)
976 {
977 BGP_TRACE(D_EVENTS, "Link down");
978 bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
979 if (ps == PS_UP)
980 bgp_update_startup_delay(p);
981 bgp_stop(p, 0);
982 }
983 }
984 else
985 {
986 if (prepare)
987 {
988 BGP_TRACE(D_EVENTS, "Neighbor ready");
989 bgp_start_neighbor(p);
990 }
991 }
992 }
993
994 static void
995 bgp_bfd_notify(struct bfd_request *req)
996 {
997 struct bgp_proto *p = req->data;
998 int ps = p->p.proto_state;
999
1000 if (req->down && ((ps == PS_START) || (ps == PS_UP)))
1001 {
1002 BGP_TRACE(D_EVENTS, "BFD session down");
1003 bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
1004 if (ps == PS_UP)
1005 bgp_update_startup_delay(p);
1006 bgp_stop(p, 0);
1007 }
1008 }
1009
1010 static void
1011 bgp_update_bfd(struct bgp_proto *p, int use_bfd)
1012 {
1013 if (use_bfd && !p->bfd_req)
1014 p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
1015 p->cf->multihop ? NULL : p->neigh->iface,
1016 bgp_bfd_notify, p);
1017
1018 if (!use_bfd && p->bfd_req)
1019 {
1020 rfree(p->bfd_req);
1021 p->bfd_req = NULL;
1022 }
1023 }
1024
1025 static int
1026 bgp_reload_routes(struct proto *P)
1027 {
1028 struct bgp_proto *p = (struct bgp_proto *) P;
1029 if (!p->conn || !p->conn->peer_refresh_support)
1030 return 0;
1031
1032 bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
1033 return 1;
1034 }
1035
1036 static void
1037 bgp_feed_begin(struct proto *P, int initial)
1038 {
1039 struct bgp_proto *p = (struct bgp_proto *) P;
1040
1041 /* This should not happen */
1042 if (!p->conn)
1043 return;
1044
1045 if (initial && p->cf->gr_mode)
1046 p->feed_state = BFS_LOADING;
1047
1048 /* It is refeed and both sides support enhanced route refresh */
1049 if (!initial && p->cf->enable_refresh &&
1050 p->conn->peer_enhanced_refresh_support)
1051 {
1052 /* BoRR must not be sent before End-of-RIB */
1053 if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
1054 return;
1055
1056 p->feed_state = BFS_REFRESHING;
1057 bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
1058 }
1059 }
1060
1061 static void
1062 bgp_feed_end(struct proto *P)
1063 {
1064 struct bgp_proto *p = (struct bgp_proto *) P;
1065
1066 /* This should not happen */
1067 if (!p->conn)
1068 return;
1069
1070 /* Non-demarcated feed ended, nothing to do */
1071 if (p->feed_state == BFS_NONE)
1072 return;
1073
1074 /* Schedule End-of-RIB packet */
1075 if (p->feed_state == BFS_LOADING)
1076 p->feed_state = BFS_LOADED;
1077
1078 /* Schedule EoRR packet */
1079 if (p->feed_state == BFS_REFRESHING)
1080 p->feed_state = BFS_REFRESHED;
1081
1082 /* Kick TX hook */
1083 bgp_schedule_packet(p->conn, PKT_UPDATE);
1084 }
1085
1086
1087 static void
1088 bgp_start_locked(struct object_lock *lock)
1089 {
1090 struct bgp_proto *p = lock->data;
1091 struct bgp_config *cf = p->cf;
1092
1093 if (p->p.proto_state != PS_START)
1094 {
1095 DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1096 return;
1097 }
1098
1099 DBG("BGP: Got lock\n");
1100
1101 if (cf->multihop)
1102 {
1103 /* Multi-hop sessions do not use neighbor entries */
1104 bgp_initiate(p);
1105 return;
1106 }
1107
1108 neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
1109 if (!n)
1110 {
1111 log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
1112 /* As we do not start yet, we can just disable protocol */
1113 p->p.disabled = 1;
1114 bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1115 proto_notify_state(&p->p, PS_DOWN);
1116 return;
1117 }
1118
1119 p->neigh = n;
1120
1121 if (n->scope <= 0)
1122 BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1123 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1124 BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1125 else
1126 bgp_start_neighbor(p);
1127 }
1128
1129 static int
1130 bgp_start(struct proto *P)
1131 {
1132 struct bgp_proto *p = (struct bgp_proto *) P;
1133 struct object_lock *lock;
1134
1135 DBG("BGP: Startup.\n");
1136 p->start_state = BSS_PREPARE;
1137 p->outgoing_conn.state = BS_IDLE;
1138 p->incoming_conn.state = BS_IDLE;
1139 p->neigh = NULL;
1140 p->bfd_req = NULL;
1141 p->gr_ready = 0;
1142 p->gr_active = 0;
1143
1144 rt_lock_table(p->igp_table);
1145
1146 p->event = ev_new(p->p.pool);
1147 p->event->hook = bgp_decision;
1148 p->event->data = p;
1149
1150 p->startup_timer = tm_new(p->p.pool);
1151 p->startup_timer->hook = bgp_startup_timeout;
1152 p->startup_timer->data = p;
1153
1154 p->gr_timer = tm_new(p->p.pool);
1155 p->gr_timer->hook = bgp_graceful_restart_timeout;
1156 p->gr_timer->data = p;
1157
1158 p->local_id = proto_get_router_id(P->cf);
1159 if (p->rr_client)
1160 p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1161
1162 p->remote_id = 0;
1163 p->source_addr = p->cf->source_addr;
1164
1165 if (p->p.gr_recovery && p->cf->gr_mode)
1166 proto_graceful_restart_lock(P);
1167
1168 /*
1169 * Before attempting to create the connection, we need to lock the
1170 * port, so that are sure we're the only instance attempting to talk
1171 * with that neighbor.
1172 */
1173
1174 lock = p->lock = olock_new(P->pool);
1175 lock->addr = p->cf->remote_ip;
1176 lock->port = p->cf->remote_port;
1177 lock->iface = p->cf->iface;
1178 lock->type = OBJLOCK_TCP;
1179 lock->hook = bgp_start_locked;
1180 lock->data = p;
1181 olock_acquire(lock);
1182
1183 return PS_START;
1184 }
1185
1186 extern int proto_restart;
1187
1188 static int
1189 bgp_shutdown(struct proto *P)
1190 {
1191 struct bgp_proto *p = (struct bgp_proto *) P;
1192 unsigned subcode = 0;
1193
1194 BGP_TRACE(D_EVENTS, "Shutdown requested");
1195
1196 switch (P->down_code)
1197 {
1198 case PDC_CF_REMOVE:
1199 case PDC_CF_DISABLE:
1200 subcode = 3; // Errcode 6, 3 - peer de-configured
1201 break;
1202
1203 case PDC_CF_RESTART:
1204 subcode = 6; // Errcode 6, 6 - other configuration change
1205 break;
1206
1207 case PDC_CMD_DISABLE:
1208 case PDC_CMD_SHUTDOWN:
1209 subcode = 2; // Errcode 6, 2 - administrative shutdown
1210 break;
1211
1212 case PDC_CMD_RESTART:
1213 subcode = 4; // Errcode 6, 4 - administrative reset
1214 break;
1215
1216 case PDC_RX_LIMIT_HIT:
1217 case PDC_IN_LIMIT_HIT:
1218 subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1219 /* log message for compatibility */
1220 log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1221 goto limit;
1222
1223 case PDC_OUT_LIMIT_HIT:
1224 subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1225
1226 limit:
1227 bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1228 if (proto_restart)
1229 bgp_update_startup_delay(p);
1230 else
1231 p->startup_delay = 0;
1232 goto done;
1233 }
1234
1235 bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1236 p->startup_delay = 0;
1237
1238 done:
1239 bgp_stop(p, subcode);
1240 return p->p.proto_state;
1241 }
1242
1243 static void
1244 bgp_cleanup(struct proto *P)
1245 {
1246 struct bgp_proto *p = (struct bgp_proto *) P;
1247 rt_unlock_table(p->igp_table);
1248 }
1249
1250 static rtable *
1251 get_igp_table(struct bgp_config *cf)
1252 {
1253 return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
1254 }
1255
1256 static struct proto *
1257 bgp_init(struct proto_config *C)
1258 {
1259 struct proto *P = proto_new(C, sizeof(struct bgp_proto));
1260 struct bgp_config *c = (struct bgp_config *) C;
1261 struct bgp_proto *p = (struct bgp_proto *) P;
1262
1263 P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
1264 P->rt_notify = bgp_rt_notify;
1265 P->import_control = bgp_import_control;
1266 P->neigh_notify = bgp_neigh_notify;
1267 P->reload_routes = bgp_reload_routes;
1268 P->feed_begin = bgp_feed_begin;
1269 P->feed_end = bgp_feed_end;
1270 P->rte_better = bgp_rte_better;
1271 P->rte_mergable = bgp_rte_mergable;
1272 P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
1273
1274 p->cf = c;
1275 p->local_as = c->local_as;
1276 p->remote_as = c->remote_as;
1277 p->is_internal = (c->local_as == c->remote_as);
1278 p->rs_client = c->rs_client;
1279 p->rr_client = c->rr_client;
1280 p->igp_table = get_igp_table(c);
1281
1282 return P;
1283 }
1284
1285
1286 void
1287 bgp_check_config(struct bgp_config *c)
1288 {
1289 int internal = (c->local_as == c->remote_as);
1290
1291 /* Do not check templates at all */
1292 if (c->c.class == SYM_TEMPLATE)
1293 return;
1294
1295
1296 /* EBGP direct by default, IBGP multihop by default */
1297 if (c->multihop < 0)
1298 c->multihop = internal ? 64 : 0;
1299
1300 /* Different default for gw_mode */
1301 if (!c->gw_mode)
1302 c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
1303
1304 /* Different default based on rs_client */
1305 if (!c->missing_lladdr)
1306 c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
1307
1308 /* Disable after error incompatible with restart limit action */
1309 if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
1310 c->c.in_limit->action = PLA_DISABLE;
1311
1312
1313 if (!c->local_as)
1314 cf_error("Local AS number must be set");
1315
1316 if (ipa_zero(c->remote_ip))
1317 cf_error("Neighbor must be configured");
1318
1319 if (!c->remote_as)
1320 cf_error("Remote AS number must be set");
1321
1322 // if (ipa_is_link_local(c->remote_ip) && !c->iface)
1323 // cf_error("Link-local neighbor address requires specified interface");
1324
1325 if (!ipa_is_link_local(c->remote_ip) != !c->iface)
1326 cf_error("Link-local address and interface scope must be used together");
1327
1328 if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
1329 cf_error("Neighbor AS number out of range (AS4 not available)");
1330
1331 if (!internal && c->rr_client)
1332 cf_error("Only internal neighbor can be RR client");
1333
1334 if (internal && c->rs_client)
1335 cf_error("Only external neighbor can be RS client");
1336
1337 if (c->multihop && (c->gw_mode == GW_DIRECT))
1338 cf_error("Multihop BGP cannot use direct gateway mode");
1339
1340 if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
1341 ipa_is_link_local(c->source_addr)))
1342 cf_error("Multihop BGP cannot be used with link-local addresses");
1343
1344 if (c->multihop && c->check_link)
1345 cf_error("Multihop BGP cannot depend on link state");
1346
1347 if (c->multihop && c->bfd && ipa_zero(c->source_addr))
1348 cf_error("Multihop BGP with BFD requires specified source address");
1349
1350 if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
1351 cf_error("BGP in recursive mode prohibits sorted table");
1352
1353 if (c->deterministic_med && c->c.table->sorted)
1354 cf_error("BGP with deterministic MED prohibits sorted table");
1355
1356 if (c->secondary && !c->c.table->sorted)
1357 cf_error("BGP with secondary option requires sorted table");
1358 }
1359
1360 static int
1361 bgp_reconfigure(struct proto *P, struct proto_config *C)
1362 {
1363 struct bgp_config *new = (struct bgp_config *) C;
1364 struct bgp_proto *p = (struct bgp_proto *) P;
1365 struct bgp_config *old = p->cf;
1366
1367 if (proto_get_router_id(C) != p->local_id)
1368 return 0;
1369
1370 int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1371 ((byte *) new) + sizeof(struct proto_config),
1372 // password item is last and must be checked separately
1373 OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1374 && ((!old->password && !new->password)
1375 || (old->password && new->password && !strcmp(old->password, new->password)))
1376 && (get_igp_table(old) == get_igp_table(new));
1377
1378 if (same && (p->start_state > BSS_PREPARE))
1379 bgp_update_bfd(p, new->bfd);
1380
1381 /* We should update our copy of configuration ptr as old configuration will be freed */
1382 if (same)
1383 p->cf = new;
1384
1385 return same;
1386 }
1387
1388 static void
1389 bgp_copy_config(struct proto_config *dest, struct proto_config *src)
1390 {
1391 /* Just a shallow copy */
1392 proto_copy_rest(dest, src, sizeof(struct bgp_config));
1393 }
1394
1395
1396 /**
1397 * bgp_error - report a protocol error
1398 * @c: connection
1399 * @code: error code (according to the RFC)
1400 * @subcode: error sub-code
1401 * @data: data to be passed in the Notification message
1402 * @len: length of the data
1403 *
1404 * bgp_error() sends a notification packet to tell the other side that a protocol
1405 * error has occurred (including the data considered erroneous if possible) and
1406 * closes the connection.
1407 */
1408 void
1409 bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
1410 {
1411 struct bgp_proto *p = c->bgp;
1412
1413 if (c->state == BS_CLOSE)
1414 return;
1415
1416 bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
1417 bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1418 bgp_conn_enter_close_state(c);
1419
1420 c->notify_code = code;
1421 c->notify_subcode = subcode;
1422 c->notify_data = data;
1423 c->notify_size = (len > 0) ? len : 0;
1424 bgp_schedule_packet(c, PKT_NOTIFICATION);
1425
1426 if (code != 6)
1427 {
1428 bgp_update_startup_delay(p);
1429 bgp_stop(p, 0);
1430 }
1431 }
1432
1433 /**
1434 * bgp_store_error - store last error for status report
1435 * @p: BGP instance
1436 * @c: connection
1437 * @class: error class (BE_xxx constants)
1438 * @code: error code (class specific)
1439 *
1440 * bgp_store_error() decides whether given error is interesting enough
1441 * and store that error to last_error variables of @p
1442 */
1443 void
1444 bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1445 {
1446 /* During PS_UP, we ignore errors on secondary connection */
1447 if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1448 return;
1449
1450 /* During PS_STOP, we ignore any errors, as we want to report
1451 * the error that caused transition to PS_STOP
1452 */
1453 if (p->p.proto_state == PS_STOP)
1454 return;
1455
1456 p->last_error_class = class;
1457 p->last_error_code = code;
1458 }
1459
1460 static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1461 static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1462 static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1463 static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1464
1465 static const char *
1466 bgp_last_errmsg(struct bgp_proto *p)
1467 {
1468 switch (p->last_error_class)
1469 {
1470 case BE_MISC:
1471 return bgp_misc_errors[p->last_error_code];
1472 case BE_SOCKET:
1473 return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1474 case BE_BGP_RX:
1475 case BE_BGP_TX:
1476 return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1477 case BE_AUTO_DOWN:
1478 return bgp_auto_errors[p->last_error_code];
1479 default:
1480 return "";
1481 }
1482 }
1483
1484 static const char *
1485 bgp_state_dsc(struct bgp_proto *p)
1486 {
1487 if (p->p.proto_state == PS_DOWN)
1488 return "Down";
1489
1490 int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1491 if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1492 return "Passive";
1493
1494 return bgp_state_names[state];
1495 }
1496
1497 static void
1498 bgp_get_status(struct proto *P, byte *buf)
1499 {
1500 struct bgp_proto *p = (struct bgp_proto *) P;
1501
1502 const char *err1 = bgp_err_classes[p->last_error_class];
1503 const char *err2 = bgp_last_errmsg(p);
1504
1505 if (P->proto_state == PS_DOWN)
1506 bsprintf(buf, "%s%s", err1, err2);
1507 else
1508 bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1509 }
1510
1511 static void
1512 bgp_show_proto_info(struct proto *P)
1513 {
1514 struct bgp_proto *p = (struct bgp_proto *) P;
1515 struct bgp_conn *c = p->conn;
1516
1517 proto_show_basic_info(P);
1518
1519 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
1520 cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
1521 cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
1522
1523 if (p->gr_active)
1524 cli_msg(-1006, " Neighbor graceful restart active");
1525
1526 if (P->proto_state == PS_START)
1527 {
1528 struct bgp_conn *oc = &p->outgoing_conn;
1529
1530 if ((p->start_state < BSS_CONNECT) &&
1531 (p->startup_timer->expires))
1532 cli_msg(-1006, " Error wait: %d/%d",
1533 p->startup_timer->expires - now, p->startup_delay);
1534
1535 if ((oc->state == BS_ACTIVE) &&
1536 (oc->connect_retry_timer->expires))
1537 cli_msg(-1006, " Connect delay: %d/%d",
1538 oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
1539
1540 if (p->gr_active && p->gr_timer->expires)
1541 cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now);
1542 }
1543 else if (P->proto_state == PS_UP)
1544 {
1545 cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
1546 cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s",
1547 c->peer_refresh_support ? " refresh" : "",
1548 c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
1549 c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
1550 c->peer_as4_support ? " AS4" : "",
1551 (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
1552 (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
1553 c->peer_ext_messages_support ? " ext-messages" : "");
1554 cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s",
1555 p->is_internal ? "internal" : "external",
1556 p->cf->multihop ? " multihop" : "",
1557 p->rr_client ? " route-reflector" : "",
1558 p->rs_client ? " route-server" : "",
1559 p->as4_session ? " AS4" : "",
1560 p->add_path_rx ? " add-path-rx" : "",
1561 p->add_path_tx ? " add-path-tx" : "",
1562 p->ext_messages ? " ext-messages" : "");
1563 cli_msg(-1006, " Source address: %I", p->source_addr);
1564 if (P->cf->in_limit)
1565 cli_msg(-1006, " Route limit: %d/%d",
1566 p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
1567 cli_msg(-1006, " Hold timer: %d/%d",
1568 tm_remains(c->hold_timer), c->hold_time);
1569 cli_msg(-1006, " Keepalive timer: %d/%d",
1570 tm_remains(c->keepalive_timer), c->keepalive_time);
1571 }
1572
1573 if ((p->last_error_class != BE_NONE) &&
1574 (p->last_error_class != BE_MAN_DOWN))
1575 {
1576 const char *err1 = bgp_err_classes[p->last_error_class];
1577 const char *err2 = bgp_last_errmsg(p);
1578 cli_msg(-1006, " Last error: %s%s", err1, err2);
1579 }
1580 }
1581
1582 struct protocol proto_bgp = {
1583 .name = "BGP",
1584 .template = "bgp%d",
1585 .attr_class = EAP_BGP,
1586 .preference = DEF_PREF_BGP,
1587 .config_size = sizeof(struct bgp_config),
1588 .init = bgp_init,
1589 .start = bgp_start,
1590 .shutdown = bgp_shutdown,
1591 .cleanup = bgp_cleanup,
1592 .reconfigure = bgp_reconfigure,
1593 .copy_config = bgp_copy_config,
1594 .get_status = bgp_get_status,
1595 .get_attr = bgp_get_attr,
1596 .get_route_info = bgp_get_route_info,
1597 .show_proto_info = bgp_show_proto_info
1598 };