]>
Commit | Line | Data |
---|---|---|
2638249d MM |
1 | /* |
2 | * BIRD -- The Border Gateway Protocol | |
3 | * | |
4 | * (c) 2000 Martin Mares <mj@ucw.cz> | |
d15b0b0a OZ |
5 | * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org> |
6 | * (c) 2008--2016 CZ.NIC z.s.p.o. | |
2638249d MM |
7 | * |
8 | * Can be freely distributed and used under the terms of the GNU GPL. | |
9 | */ | |
10 | ||
54e55169 MM |
11 | /** |
12 | * DOC: Border Gateway Protocol | |
13 | * | |
d15b0b0a OZ |
14 | * The BGP protocol is implemented in three parts: |bgp.c| which takes care of |
15 | * the connection and most of the interface with BIRD core, |packets.c| handling | |
54e55169 MM |
16 | * both incoming and outgoing BGP packets and |attrs.c| containing functions for |
17 | * manipulation with BGP attribute lists. | |
18 | * | |
d15b0b0a OZ |
19 | * As opposed to the other existing routing daemons, BIRD has a sophisticated |
20 | * core architecture which is able to keep all the information needed by BGP in | |
21 | * the primary routing table, therefore no complex data structures like a | |
22 | * central BGP table are needed. This increases memory footprint of a BGP router | |
23 | * with many connections, but not too much and, which is more important, it | |
24 | * makes BGP much easier to implement. | |
54e55169 | 25 | * |
d15b0b0a OZ |
26 | * Each instance of BGP (corresponding to a single BGP peer) is described by a |
27 | * &bgp_proto structure to which are attached individual connections represented | |
28 | * by &bgp_connection (usually, there exists only one connection, but during BGP | |
29 | * session setup, there can be more of them). The connections are handled | |
30 | * according to the BGP state machine defined in the RFC with all the timers and | |
31 | * all the parameters configurable. | |
54e55169 | 32 | * |
d15b0b0a OZ |
33 | * In incoming direction, we listen on the connection's socket and each time we |
34 | * receive some input, we pass it to bgp_rx(). It decodes packet headers and the | |
35 | * markers and passes complete packets to bgp_rx_packet() which distributes the | |
36 | * packet according to its type. | |
54e55169 | 37 | * |
d15b0b0a OZ |
38 | * In outgoing direction, we gather all the routing updates and sort them to |
39 | * buckets (&bgp_bucket) according to their attributes (we keep a hash table for | |
40 | * fast comparison of &rta's and a &fib which helps us to find if we already | |
41 | * have another route for the same destination queued for sending, so that we | |
42 | * can replace it with the new one immediately instead of sending both | |
43 | * updates). There also exists a special bucket holding all the route | |
44 | * withdrawals which cannot be queued anywhere else as they don't have any | |
45 | * attributes. If we have any packet to send (due to either new routes or the | |
46 | * connection tracking code wanting to send a Open, Keepalive or Notification | |
47 | * message), we call bgp_schedule_packet() which sets the corresponding bit in a | |
48 | * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket | |
49 | * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the | |
50 | * packet type bits and calls the corresponding bgp_create_xx() functions, | |
51 | * eventually rescheduling the same packet type if we have more data of the same | |
52 | * type to send. | |
54e55169 | 53 | * |
d15b0b0a OZ |
54 | * The processing of attributes consists of two functions: bgp_decode_attrs() |
55 | * for checking of the attribute blocks and translating them to the language of | |
56 | * BIRD's extended attributes and bgp_encode_attrs() which does the | |
57 | * converse. Both functions are built around a @bgp_attr_table array describing | |
58 | * all important characteristics of all known attributes. Unknown transitive | |
59 | * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams. | |
6eda3f13 OZ |
60 | * |
61 | * BGP protocol implements graceful restart in both restarting (local restart) | |
62 | * and receiving (neighbor restart) roles. The first is handled mostly by the | |
63 | * graceful restart code in the nest, BGP protocol just handles capabilities, | |
64 | * sets @gr_wait and locks graceful restart until end-of-RIB mark is received. | |
65 | * The second is implemented by internal restart of the BGP state to %BS_IDLE | |
66 | * and protocol state to %PS_START, but keeping the protocol up from the core | |
67 | * point of view and therefore maintaining received routes. Routing table | |
68 | * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing | |
69 | * stale routes after reestablishment of BGP session during graceful restart. | |
c49e4a65 OZ |
70 | * |
71 | * Supported standards: | |
0f40405f OZ |
72 | * RFC 4271 - Border Gateway Protocol 4 (BGP) |
73 | * RFC 1997 - BGP Communities Attribute | |
74 | * RFC 2385 - Protection of BGP Sessions via TCP MD5 Signature | |
75 | * RFC 2545 - Use of BGP Multiprotocol Extensions for IPv6 | |
76 | * RFC 2918 - Route Refresh Capability | |
77 | * RFC 3107 - Carrying Label Information in BGP | |
78 | * RFC 4360 - BGP Extended Communities Attribute | |
79 | * RFC 4364 - BGP/MPLS IPv4 Virtual Private Networks | |
80 | * RFC 4456 - BGP Route Reflection | |
81 | * RFC 4486 - Subcodes for BGP Cease Notification Message | |
82 | * RFC 4659 - BGP/MPLS IPv6 Virtual Private Networks | |
83 | * RFC 4724 - Graceful Restart Mechanism for BGP | |
84 | * RFC 4760 - Multiprotocol extensions for BGP | |
85 | * RFC 4798 - Connecting IPv6 Islands over IPv4 MPLS | |
86 | * RFC 5065 - AS confederations for BGP | |
87 | * RFC 5082 - Generalized TTL Security Mechanism | |
88 | * RFC 5492 - Capabilities Advertisement with BGP | |
89 | * RFC 5549 - Advertising IPv4 NLRI with an IPv6 Next Hop | |
90 | * RFC 5575 - Dissemination of Flow Specification Rules | |
91 | * RFC 5668 - 4-Octet AS Specific BGP Extended Community | |
92 | * RFC 6286 - AS-Wide Unique BGP Identifier | |
93 | * RFC 6608 - Subcodes for BGP Finite State Machine Error | |
94 | * RFC 6793 - BGP Support for 4-Octet AS Numbers | |
09ee846d | 95 | * RFC 7311 - Accumulated IGP Metric Attribute for BGP |
0f40405f OZ |
96 | * RFC 7313 - Enhanced Route Refresh Capability for BGP |
97 | * RFC 7606 - Revised Error Handling for BGP UPDATE Messages | |
98 | * RFC 7911 - Advertisement of Multiple Paths in BGP | |
99 | * RFC 7947 - Internet Exchange BGP Route Server | |
100 | * RFC 8092 - BGP Large Communities Attribute | |
101 | * RFC 8203 - BGP Administrative Shutdown Communication | |
102 | * RFC 8212 - Default EBGP Route Propagation Behavior without Policies | |
be7c1aef | 103 | * RFC 8654 - Extended Message Support for BGP |
913ec57f | 104 | * RFC 9072 - Extended Optional Parameters Length for BGP OPEN Message |
1f2eb2ac | 105 | * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications |
c73b5d2d | 106 | * RFC 9234 - Route Leak Prevention and Detection Using Roles |
0f40405f | 107 | * draft-uttaro-idr-bgp-persistence-04 |
71423871 | 108 | * draft-walton-bgp-hostname-capability-02 |
0f40405f | 109 | */ |
54e55169 | 110 | |
48d79d52 | 111 | #undef LOCAL_DEBUG |
2638249d | 112 | |
02552526 OZ |
113 | #include <stdlib.h> |
114 | ||
2638249d MM |
115 | #include "nest/bird.h" |
116 | #include "nest/iface.h" | |
117 | #include "nest/protocol.h" | |
118 | #include "nest/route.h" | |
b8113a5e | 119 | #include "nest/cli.h" |
1ec52253 | 120 | #include "nest/locks.h" |
2638249d | 121 | #include "conf/conf.h" |
3831b619 | 122 | #include "filter/filter.h" |
c01e3741 | 123 | #include "lib/socket.h" |
973399ae | 124 | #include "lib/resource.h" |
7d875e09 | 125 | #include "lib/string.h" |
2638249d MM |
126 | |
127 | #include "bgp.h" | |
a848dad4 | 128 | #include "proto/bmp/bmp.h" |
2638249d | 129 | |
e7d2ac44 | 130 | |
06ece326 | 131 | static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */ |
d15b0b0a | 132 | |
c01e3741 | 133 | |
c01e3741 | 134 | static void bgp_connect(struct bgp_proto *p); |
dd91e467 | 135 | static void bgp_active(struct bgp_proto *p); |
e0835db4 OZ |
136 | static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn); |
137 | static void bgp_setup_sk(struct bgp_conn *conn, sock *s); | |
138 | static void bgp_send_open(struct bgp_conn *conn); | |
9d3fc306 | 139 | static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); |
2638249d | 140 | |
d15b0b0a OZ |
141 | static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); |
142 | static void bgp_listen_sock_err(sock *sk UNUSED, int err); | |
11cb6202 | 143 | |
11b32d91 OZ |
144 | /** |
145 | * bgp_open - open a BGP instance | |
146 | * @p: BGP instance | |
147 | * | |
d15b0b0a OZ |
148 | * This function allocates and configures shared BGP resources, mainly listening |
149 | * sockets. Should be called as the last step during initialization (when lock | |
150 | * is acquired and neighbor is ready). When error, caller should change state to | |
151 | * PS_DOWN and return immediately. | |
11b32d91 OZ |
152 | */ |
153 | static int | |
154 | bgp_open(struct bgp_proto *p) | |
155 | { | |
d15b0b0a OZ |
156 | struct bgp_socket *bs = NULL; |
157 | struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; | |
158 | ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : | |
e0835db4 | 159 | (p->ipv4 ? IPA_NONE4 : IPA_NONE6); |
d15b0b0a | 160 | uint port = p->cf->local_port; |
60e9def9 OZ |
161 | uint flags = p->cf->free_bind ? SKF_FREEBIND : 0; |
162 | uint flag_mask = SKF_FREEBIND; | |
d15b0b0a | 163 | |
d15b0b0a | 164 | /* We assume that cf->iface is defined iff cf->local_ip is link-local */ |
11b32d91 | 165 | |
d15b0b0a | 166 | WALK_LIST(bs, bgp_sockets) |
60e9def9 OZ |
167 | if (ipa_equal(bs->sk->saddr, addr) && |
168 | (bs->sk->sport == port) && | |
169 | (bs->sk->iface == ifa) && | |
170 | (bs->sk->vrf == p->p.vrf) && | |
171 | ((bs->sk->flags & flag_mask) == flags)) | |
a34b0934 | 172 | { |
d15b0b0a OZ |
173 | bs->uc++; |
174 | p->sock = bs; | |
175 | return 0; | |
a34b0934 OZ |
176 | } |
177 | ||
d15b0b0a OZ |
178 | sock *sk = sk_new(proto_pool); |
179 | sk->type = SK_TCP_PASSIVE; | |
180 | sk->ttl = 255; | |
181 | sk->saddr = addr; | |
182 | sk->sport = port; | |
e19d0805 OZ |
183 | sk->iface = ifa; |
184 | sk->vrf = p->p.vrf; | |
60e9def9 | 185 | sk->flags = flags; |
d15b0b0a OZ |
186 | sk->tos = IP_PREC_INTERNET_CONTROL; |
187 | sk->rbsize = BGP_RX_BUFFER_SIZE; | |
188 | sk->tbsize = BGP_TX_BUFFER_SIZE; | |
189 | sk->rx_hook = bgp_incoming_connection; | |
190 | sk->err_hook = bgp_listen_sock_err; | |
191 | ||
192 | if (sk_open(sk) < 0) | |
193 | goto err; | |
11b32d91 | 194 | |
d15b0b0a OZ |
195 | bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); |
196 | bs->sk = sk; | |
197 | bs->uc = 1; | |
198 | p->sock = bs; | |
470740f9 | 199 | sk->data = bs; |
05476c4d | 200 | |
d15b0b0a OZ |
201 | add_tail(&bgp_sockets, &bs->n); |
202 | ||
11b32d91 | 203 | return 0; |
b1b19433 OZ |
204 | |
205 | err: | |
d15b0b0a OZ |
206 | sk_log_error(sk, p->p.name); |
207 | log(L_ERR "%s: Cannot open listening socket", p->p.name); | |
208 | rfree(sk); | |
b1b19433 | 209 | return -1; |
11b32d91 OZ |
210 | } |
211 | ||
d15b0b0a OZ |
212 | /** |
213 | * bgp_close - close a BGP instance | |
214 | * @p: BGP instance | |
215 | * | |
216 | * This function frees and deconfigures shared BGP resources. | |
217 | */ | |
218 | static void | |
219 | bgp_close(struct bgp_proto *p) | |
220 | { | |
221 | struct bgp_socket *bs = p->sock; | |
222 | ||
223 | ASSERT(bs && bs->uc); | |
224 | ||
225 | if (--bs->uc) | |
226 | return; | |
227 | ||
228 | rfree(bs->sk); | |
229 | rem_node(&bs->n); | |
230 | mb_free(bs); | |
d15b0b0a OZ |
231 | } |
232 | ||
233 | static inline int | |
234 | bgp_setup_auth(struct bgp_proto *p, int enable) | |
235 | { | |
236 | if (p->cf->password) | |
237 | { | |
757cab18 OZ |
238 | ip_addr prefix = p->cf->remote_ip; |
239 | int pxlen = -1; | |
240 | ||
241 | if (p->cf->remote_range) | |
242 | { | |
243 | prefix = net_prefix(p->cf->remote_range); | |
244 | pxlen = net_pxlen(p->cf->remote_range); | |
245 | } | |
246 | ||
d15b0b0a | 247 | int rv = sk_set_md5_auth(p->sock->sk, |
757cab18 | 248 | p->cf->local_ip, prefix, pxlen, p->cf->iface, |
d15b0b0a OZ |
249 | enable ? p->cf->password : NULL, p->cf->setkey); |
250 | ||
251 | if (rv < 0) | |
252 | sk_log_error(p->sock->sk, p->p.name); | |
253 | ||
254 | return rv; | |
255 | } | |
256 | else | |
257 | return 0; | |
258 | } | |
259 | ||
260 | static inline struct bgp_channel * | |
261 | bgp_find_channel(struct bgp_proto *p, u32 afi) | |
262 | { | |
263 | struct bgp_channel *c; | |
54430df9 | 264 | BGP_WALK_CHANNELS(p, c) |
d15b0b0a OZ |
265 | if (c->afi == afi) |
266 | return c; | |
267 | ||
268 | return NULL; | |
269 | } | |
270 | ||
dd91e467 OZ |
271 | static void |
272 | bgp_startup(struct bgp_proto *p) | |
273 | { | |
274 | BGP_TRACE(D_EVENTS, "Started"); | |
d15b0b0a | 275 | p->start_state = BSS_CONNECT; |
be6e39eb | 276 | |
e0835db4 | 277 | if (!p->passive) |
be6e39eb | 278 | bgp_active(p); |
e0835db4 OZ |
279 | |
280 | if (p->postponed_sk) | |
281 | { | |
282 | /* Apply postponed incoming connection */ | |
283 | bgp_setup_conn(p, &p->incoming_conn); | |
284 | bgp_setup_sk(&p->incoming_conn, p->postponed_sk); | |
285 | bgp_send_open(&p->incoming_conn); | |
286 | p->postponed_sk = NULL; | |
287 | } | |
dd91e467 OZ |
288 | } |
289 | ||
290 | static void | |
291 | bgp_startup_timeout(timer *t) | |
292 | { | |
293 | bgp_startup(t->data); | |
294 | } | |
295 | ||
296 | ||
297 | static void | |
298 | bgp_initiate(struct bgp_proto *p) | |
299 | { | |
d15b0b0a OZ |
300 | int err_val; |
301 | ||
302 | if (bgp_open(p) < 0) | |
303 | { err_val = BEM_NO_SOCKET; goto err1; } | |
304 | ||
305 | if (bgp_setup_auth(p, 1) < 0) | |
306 | { err_val = BEM_INVALID_MD5; goto err2; } | |
9be9a264 | 307 | |
1ec52253 OZ |
308 | if (p->cf->bfd) |
309 | bgp_update_bfd(p, p->cf->bfd); | |
310 | ||
dd91e467 | 311 | if (p->startup_delay) |
d15b0b0a OZ |
312 | { |
313 | p->start_state = BSS_DELAY; | |
314 | BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay); | |
315 | bgp_start_timer(p->startup_timer, p->startup_delay); | |
316 | } | |
dd91e467 OZ |
317 | else |
318 | bgp_startup(p); | |
dd91e467 | 319 | |
d15b0b0a | 320 | return; |
d51aa281 | 321 | |
d15b0b0a OZ |
322 | err2: |
323 | bgp_close(p); | |
324 | err1: | |
325 | p->p.disabled = 1; | |
326 | bgp_store_error(p, NULL, BE_MISC, err_val); | |
91d04583 OZ |
327 | |
328 | p->neigh = NULL; | |
d15b0b0a | 329 | proto_notify_state(&p->p, PS_DOWN); |
d51aa281 | 330 | |
d15b0b0a | 331 | return; |
c01e3741 MM |
332 | } |
333 | ||
54e55169 MM |
334 | /** |
335 | * bgp_start_timer - start a BGP timer | |
336 | * @t: timer | |
cc881bd1 | 337 | * @value: time (in seconds) to fire (0 to disable the timer) |
54e55169 | 338 | * |
d15b0b0a OZ |
339 | * This functions calls tm_start() on @t with time @value and the amount of |
340 | * randomization suggested by the BGP standard. Please use it for all BGP | |
341 | * timers. | |
54e55169 | 342 | */ |
3fdbafb6 | 343 | void |
cc881bd1 | 344 | bgp_start_timer(timer *t, uint value) |
c01e3741 | 345 | { |
3fdbafb6 | 346 | if (value) |
d15b0b0a | 347 | { |
cc881bd1 OZ |
348 | /* The randomization procedure is specified in RFC 4271 section 10 */ |
349 | btime time = value S; | |
350 | btime randomize = random() % ((time / 4) + 1); | |
a6f79ca5 | 351 | tm_start(t, time - randomize); |
d15b0b0a | 352 | } |
b552ecc4 | 353 | else |
a6f79ca5 | 354 | tm_stop(t); |
b552ecc4 MM |
355 | } |
356 | ||
54e55169 MM |
357 | /** |
358 | * bgp_close_conn - close a BGP connection | |
359 | * @conn: connection to close | |
360 | * | |
d15b0b0a OZ |
361 | * This function takes a connection described by the &bgp_conn structure, closes |
362 | * its socket and frees all resources associated with it. | |
54e55169 | 363 | */ |
b552ecc4 MM |
364 | void |
365 | bgp_close_conn(struct bgp_conn *conn) | |
366 | { | |
e81b440f | 367 | // struct bgp_proto *p = conn->bgp; |
b552ecc4 MM |
368 | |
369 | DBG("BGP: Closing connection\n"); | |
370 | conn->packets_to_send = 0; | |
d15b0b0a OZ |
371 | conn->channels_to_send = 0; |
372 | rfree(conn->connect_timer); | |
373 | conn->connect_timer = NULL; | |
b552ecc4 MM |
374 | rfree(conn->keepalive_timer); |
375 | conn->keepalive_timer = NULL; | |
376 | rfree(conn->hold_timer); | |
377 | conn->hold_timer = NULL; | |
11b32d91 OZ |
378 | rfree(conn->tx_ev); |
379 | conn->tx_ev = NULL; | |
d15b0b0a OZ |
380 | rfree(conn->sk); |
381 | conn->sk = NULL; | |
382 | ||
1be0be1b OZ |
383 | mb_free(conn->local_open_msg); |
384 | conn->local_open_msg = NULL; | |
385 | mb_free(conn->remote_open_msg); | |
386 | conn->remote_open_msg = NULL; | |
387 | conn->local_open_length = 0; | |
388 | conn->remote_open_length = 0; | |
389 | ||
d15b0b0a OZ |
390 | mb_free(conn->local_caps); |
391 | conn->local_caps = NULL; | |
392 | mb_free(conn->remote_caps); | |
393 | conn->remote_caps = NULL; | |
11b32d91 OZ |
394 | } |
395 | ||
396 | ||
397 | /** | |
398 | * bgp_update_startup_delay - update a startup delay | |
399 | * @p: BGP instance | |
11b32d91 | 400 | * |
d15b0b0a OZ |
401 | * This function updates a startup delay that is used to postpone next BGP |
402 | * connect. It also handles disable_after_error and might stop BGP instance | |
403 | * when error happened and disable_after_error is on. | |
11b32d91 OZ |
404 | * |
405 | * It should be called when BGP protocol error happened. | |
406 | */ | |
407 | void | |
b99d3786 | 408 | bgp_update_startup_delay(struct bgp_proto *p) |
11b32d91 | 409 | { |
a22c3e59 | 410 | const struct bgp_config *cf = p->cf; |
11b32d91 | 411 | |
b99d3786 | 412 | DBG("BGP: Updating startup delay\n"); |
11b32d91 | 413 | |
cc881bd1 | 414 | if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S)) |
72382626 OZ |
415 | p->startup_delay = 0; |
416 | ||
cc881bd1 | 417 | p->last_proto_error = current_time(); |
11b32d91 OZ |
418 | |
419 | if (cf->disable_after_error) | |
d15b0b0a OZ |
420 | { |
421 | p->startup_delay = 0; | |
422 | p->p.disabled = 1; | |
423 | return; | |
424 | } | |
11b32d91 | 425 | |
11b32d91 OZ |
426 | if (!p->startup_delay) |
427 | p->startup_delay = cf->error_delay_time_min; | |
428 | else | |
b99d3786 | 429 | p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max); |
c01e3741 MM |
430 | } |
431 | ||
11b32d91 | 432 | static void |
8a68316e | 433 | bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len) |
48e842cc | 434 | { |
11b32d91 | 435 | switch (conn->state) |
d15b0b0a OZ |
436 | { |
437 | case BS_IDLE: | |
438 | case BS_CLOSE: | |
439 | return; | |
440 | ||
441 | case BS_CONNECT: | |
442 | case BS_ACTIVE: | |
443 | bgp_conn_enter_idle_state(conn); | |
444 | return; | |
445 | ||
446 | case BS_OPENSENT: | |
447 | case BS_OPENCONFIRM: | |
448 | case BS_ESTABLISHED: | |
8a68316e OZ |
449 | if (subcode < 0) |
450 | { | |
451 | bgp_conn_enter_close_state(conn); | |
452 | bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE); | |
453 | } | |
454 | else | |
455 | bgp_error(conn, 6, subcode, data, len); | |
d15b0b0a OZ |
456 | return; |
457 | ||
458 | default: | |
459 | bug("bgp_graceful_close_conn: Unknown state %d", conn->state); | |
460 | } | |
48e842cc MM |
461 | } |
462 | ||
11b32d91 OZ |
463 | static void |
464 | bgp_down(struct bgp_proto *p) | |
465 | { | |
466 | if (p->start_state > BSS_PREPARE) | |
d15b0b0a OZ |
467 | { |
468 | bgp_setup_auth(p, 0); | |
469 | bgp_close(p); | |
470 | } | |
11b32d91 | 471 | |
91d04583 OZ |
472 | p->neigh = NULL; |
473 | ||
b99d3786 | 474 | BGP_TRACE(D_EVENTS, "Down"); |
11b32d91 OZ |
475 | proto_notify_state(&p->p, PS_DOWN); |
476 | } | |
477 | ||
478 | static void | |
479 | bgp_decision(void *vp) | |
480 | { | |
481 | struct bgp_proto *p = vp; | |
482 | ||
483 | DBG("BGP: Decision start\n"); | |
d15b0b0a OZ |
484 | if ((p->p.proto_state == PS_START) && |
485 | (p->outgoing_conn.state == BS_IDLE) && | |
486 | (p->incoming_conn.state != BS_OPENCONFIRM) && | |
e0835db4 | 487 | !p->passive) |
dd91e467 | 488 | bgp_active(p); |
11b32d91 | 489 | |
d15b0b0a OZ |
490 | if ((p->p.proto_state == PS_STOP) && |
491 | (p->outgoing_conn.state == BS_IDLE) && | |
492 | (p->incoming_conn.state == BS_IDLE)) | |
11b32d91 OZ |
493 | bgp_down(p); |
494 | } | |
495 | ||
e0835db4 OZ |
496 | static struct bgp_proto * |
497 | bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip) | |
498 | { | |
499 | struct symbol *sym; | |
500 | char fmt[SYM_MAX_LEN]; | |
501 | ||
502 | bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits); | |
503 | ||
504 | /* This is hack, we would like to share config, but we need to copy it now */ | |
505 | new_config = config; | |
506 | cfg_mem = config->mem; | |
507 | conf_this_scope = config->root_scope; | |
508 | sym = cf_default_name(fmt, &(pp->dynamic_name_counter)); | |
509 | proto_clone_config(sym, pp->p.cf); | |
510 | new_config = NULL; | |
511 | cfg_mem = NULL; | |
512 | ||
513 | /* Just pass remote_ip to bgp_init() */ | |
eac9250f | 514 | ((struct bgp_config *) sym->proto)->remote_ip = remote_ip; |
e0835db4 | 515 | |
eac9250f | 516 | return (void *) proto_spawn(sym->proto, 0); |
e0835db4 OZ |
517 | } |
518 | ||
b99d3786 | 519 | void |
8a68316e | 520 | bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) |
11b32d91 OZ |
521 | { |
522 | proto_notify_state(&p->p, PS_STOP); | |
cd1d9961 OZ |
523 | bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); |
524 | bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len); | |
11b32d91 OZ |
525 | ev_schedule(p->event); |
526 | } | |
527 | ||
cf31112f | 528 | static inline void |
d15b0b0a | 529 | bgp_conn_set_state(struct bgp_conn *conn, uint new_state) |
cf31112f OZ |
530 | { |
531 | if (conn->bgp->p.mrtdump & MD_STATES) | |
863ecfc7 | 532 | bgp_dump_state_change(conn, conn->state, new_state); |
cf31112f OZ |
533 | |
534 | conn->state = new_state; | |
535 | } | |
536 | ||
537 | void | |
538 | bgp_conn_enter_openconfirm_state(struct bgp_conn *conn) | |
539 | { | |
540 | /* Really, most of the work is done in bgp_rx_open(). */ | |
541 | bgp_conn_set_state(conn, BS_OPENCONFIRM); | |
542 | } | |
543 | ||
d15b0b0a | 544 | static const struct bgp_af_caps dummy_af_caps = { }; |
7e5f769d | 545 | static const struct bgp_af_caps basic_af_caps = { .ready = 1 }; |
d15b0b0a | 546 | |
11b32d91 OZ |
547 | void |
548 | bgp_conn_enter_established_state(struct bgp_conn *conn) | |
549 | { | |
550 | struct bgp_proto *p = conn->bgp; | |
d15b0b0a OZ |
551 | struct bgp_caps *local = conn->local_caps; |
552 | struct bgp_caps *peer = conn->remote_caps; | |
553 | struct bgp_channel *c; | |
523f020b | 554 | |
11b32d91 | 555 | BGP_TRACE(D_EVENTS, "BGP session established"); |
21d09632 OZ |
556 | p->last_established = current_time(); |
557 | p->stats.fsm_established_transitions++; | |
11b32d91 | 558 | |
9be9a264 | 559 | /* For multi-hop BGP sessions */ |
a22c3e59 OZ |
560 | if (ipa_zero(p->local_ip)) |
561 | p->local_ip = conn->sk->saddr; | |
9be9a264 | 562 | |
23ee6b1c OZ |
563 | /* For promiscuous sessions */ |
564 | if (!p->remote_as) | |
565 | p->remote_as = conn->received_as; | |
566 | ||
e16b0aef OZ |
567 | /* In case of LLv6 is not valid during BGP start */ |
568 | if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6) | |
569 | p->link_addr = p->neigh->iface->llv6->ip; | |
570 | ||
9e7b3ebd OZ |
571 | conn->sk->fast_rx = 0; |
572 | ||
11b32d91 OZ |
573 | p->conn = conn; |
574 | p->last_error_class = 0; | |
575 | p->last_error_code = 0; | |
094d2bdb | 576 | |
d15b0b0a OZ |
577 | p->as4_session = conn->as4_session; |
578 | ||
579 | p->route_refresh = peer->route_refresh; | |
580 | p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh; | |
0c791f87 | 581 | |
5bd73431 OZ |
582 | /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */ |
583 | p->gr_ready = p->llgr_ready = 0; /* Updated later */ | |
0c791f87 | 584 | |
d15b0b0a OZ |
585 | /* Whether peer is ready to handle our GR recovery */ |
586 | int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART); | |
0c791f87 | 587 | |
d15b0b0a | 588 | if (p->gr_active_num) |
a6f79ca5 | 589 | tm_stop(p->gr_timer); |
0c791f87 | 590 | |
d15b0b0a OZ |
591 | /* Number of active channels */ |
592 | int num = 0; | |
593 | ||
863ecfc7 OZ |
594 | /* Summary state of ADD_PATH RX for active channels */ |
595 | uint summary_add_path_rx = 0; | |
596 | ||
54430df9 | 597 | BGP_WALK_CHANNELS(p, c) |
d15b0b0a OZ |
598 | { |
599 | const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); | |
600 | const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi); | |
601 | ||
7e5f769d OZ |
602 | /* Use default if capabilities were not announced */ |
603 | if (!local->length && (c->afi == BGP_AF_IPV4)) | |
604 | loc = &basic_af_caps; | |
605 | ||
606 | if (!peer->length && (c->afi == BGP_AF_IPV4)) | |
607 | rem = &basic_af_caps; | |
608 | ||
d15b0b0a OZ |
609 | /* Ignore AFIs that were not announced in multiprotocol capability */ |
610 | if (!loc || !loc->ready) | |
611 | loc = &dummy_af_caps; | |
612 | ||
613 | if (!rem || !rem->ready) | |
614 | rem = &dummy_af_caps; | |
615 | ||
616 | int active = loc->ready && rem->ready; | |
617 | c->c.disabled = !active; | |
682d3f7d | 618 | c->c.reloadable = p->route_refresh || c->cf->import_table; |
d15b0b0a OZ |
619 | |
620 | c->index = active ? num++ : 0; | |
621 | ||
622 | c->feed_state = BFS_NONE; | |
623 | c->load_state = BFS_NONE; | |
624 | ||
625 | /* Channels where peer may do GR */ | |
5bd73431 OZ |
626 | uint gr_ready = active && local->gr_aware && rem->gr_able; |
627 | uint llgr_ready = active && local->llgr_aware && rem->llgr_able; | |
628 | ||
629 | c->gr_ready = gr_ready || llgr_ready; | |
d15b0b0a | 630 | p->gr_ready = p->gr_ready || c->gr_ready; |
5bd73431 OZ |
631 | p->llgr_ready = p->llgr_ready || llgr_ready; |
632 | ||
633 | /* Remember last LLGR stale time */ | |
634 | c->stale_time = local->llgr_aware ? rem->llgr_time : 0; | |
0c791f87 | 635 | |
d15b0b0a OZ |
636 | /* Channels not able to recover gracefully */ |
637 | if (p->p.gr_recovery && (!active || !peer_gr_ready)) | |
638 | channel_graceful_restart_unlock(&c->c); | |
9aed29e6 | 639 | |
d15b0b0a OZ |
640 | /* Channels waiting for local convergence */ |
641 | if (p->p.gr_recovery && loc->gr_able && peer_gr_ready) | |
642 | c->c.gr_wait = 1; | |
643 | ||
5bd73431 OZ |
644 | /* Channels where regular graceful restart failed */ |
645 | if ((c->gr_active == BGP_GRS_ACTIVE) && | |
646 | !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING))) | |
647 | bgp_graceful_restart_done(c); | |
648 | ||
649 | /* Channels where regular long-lived restart failed */ | |
650 | if ((c->gr_active == BGP_GRS_LLGR) && | |
651 | !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING))) | |
d15b0b0a OZ |
652 | bgp_graceful_restart_done(c); |
653 | ||
654 | /* GR capability implies that neighbor will send End-of-RIB */ | |
655 | if (peer->gr_aware) | |
656 | c->load_state = BFS_LOADING; | |
657 | ||
d8022d26 | 658 | c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop); |
d15b0b0a OZ |
659 | c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX); |
660 | c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX); | |
661 | ||
863ecfc7 OZ |
662 | if (active) |
663 | summary_add_path_rx |= !c->add_path_rx ? 1 : 2; | |
664 | ||
f8aad5d5 | 665 | /* Update RA mode */ |
d15b0b0a OZ |
666 | if (c->add_path_tx) |
667 | c->c.ra_mode = RA_ANY; | |
f8aad5d5 OZ |
668 | else if (c->cf->secondary) |
669 | c->c.ra_mode = RA_ACCEPTED; | |
670 | else | |
671 | c->c.ra_mode = RA_OPTIMAL; | |
d15b0b0a OZ |
672 | } |
673 | ||
674 | p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32)); | |
675 | p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *)); | |
676 | p->channel_count = num; | |
863ecfc7 | 677 | p->summary_add_path_rx = summary_add_path_rx; |
d15b0b0a | 678 | |
54430df9 | 679 | BGP_WALK_CHANNELS(p, c) |
d15b0b0a OZ |
680 | { |
681 | if (c->c.disabled) | |
682 | continue; | |
683 | ||
684 | p->afi_map[c->index] = c->afi; | |
685 | p->channel_map[c->index] = c; | |
686 | } | |
687 | ||
688 | /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */ | |
9aed29e6 | 689 | |
cf31112f | 690 | bgp_conn_set_state(conn, BS_ESTABLISHED); |
11b32d91 | 691 | proto_notify_state(&p->p, PS_UP); |
aa3c3549 OZ |
692 | bmp_peer_up(p, conn->local_open_msg, conn->local_open_length, |
693 | conn->remote_open_msg, conn->remote_open_length); | |
11b32d91 OZ |
694 | } |
695 | ||
696 | static void | |
697 | bgp_conn_leave_established_state(struct bgp_proto *p) | |
698 | { | |
699 | BGP_TRACE(D_EVENTS, "BGP session closed"); | |
21d09632 | 700 | p->last_established = current_time(); |
11b32d91 OZ |
701 | p->conn = NULL; |
702 | ||
703 | if (p->p.proto_state == PS_UP) | |
cd1d9961 | 704 | bgp_stop(p, 0, NULL, 0); |
11b32d91 OZ |
705 | } |
706 | ||
707 | void | |
708 | bgp_conn_enter_close_state(struct bgp_conn *conn) | |
709 | { | |
710 | struct bgp_proto *p = conn->bgp; | |
711 | int os = conn->state; | |
712 | ||
cf31112f | 713 | bgp_conn_set_state(conn, BS_CLOSE); |
a6f79ca5 | 714 | tm_stop(conn->keepalive_timer); |
11b32d91 OZ |
715 | conn->sk->rx_hook = NULL; |
716 | ||
48b15ef1 OZ |
717 | /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */ |
718 | bgp_start_timer(conn->hold_timer, 10); | |
719 | ||
11b32d91 OZ |
720 | if (os == BS_ESTABLISHED) |
721 | bgp_conn_leave_established_state(p); | |
722 | } | |
723 | ||
724 | void | |
725 | bgp_conn_enter_idle_state(struct bgp_conn *conn) | |
726 | { | |
727 | struct bgp_proto *p = conn->bgp; | |
728 | int os = conn->state; | |
729 | ||
730 | bgp_close_conn(conn); | |
cf31112f | 731 | bgp_conn_set_state(conn, BS_IDLE); |
11b32d91 OZ |
732 | ev_schedule(p->event); |
733 | ||
734 | if (os == BS_ESTABLISHED) | |
735 | bgp_conn_leave_established_state(p); | |
736 | } | |
737 | ||
6eda3f13 OZ |
738 | /** |
739 | * bgp_handle_graceful_restart - handle detected BGP graceful restart | |
740 | * @p: BGP instance | |
741 | * | |
742 | * This function is called when a BGP graceful restart of the neighbor is | |
743 | * detected (when the TCP connection fails or when a new TCP connection | |
744 | * appears). The function activates processing of the restart - starts routing | |
745 | * table refresh cycle and activates BGP restart timer. The protocol state goes | |
746 | * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the | |
747 | * caller. | |
748 | */ | |
0c791f87 OZ |
749 | void |
750 | bgp_handle_graceful_restart(struct bgp_proto *p) | |
751 | { | |
752 | ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready); | |
753 | ||
754 | BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s", | |
d15b0b0a OZ |
755 | p->gr_active_num ? " - already pending" : ""); |
756 | ||
757 | p->gr_active_num = 0; | |
0c791f87 | 758 | |
d15b0b0a | 759 | struct bgp_channel *c; |
54430df9 | 760 | BGP_WALK_CHANNELS(p, c) |
d15b0b0a | 761 | { |
7fc55925 OZ |
762 | /* FIXME: perhaps check for channel state instead of disabled flag? */ |
763 | if (c->c.disabled) | |
764 | continue; | |
765 | ||
d15b0b0a OZ |
766 | if (c->gr_ready) |
767 | { | |
5bd73431 OZ |
768 | p->gr_active_num++; |
769 | ||
770 | switch (c->gr_active) | |
771 | { | |
772 | case BGP_GRS_NONE: | |
773 | c->gr_active = BGP_GRS_ACTIVE; | |
774 | rt_refresh_begin(c->c.table, &c->c); | |
775 | break; | |
776 | ||
777 | case BGP_GRS_ACTIVE: | |
d15b0b0a | 778 | rt_refresh_end(c->c.table, &c->c); |
5bd73431 OZ |
779 | rt_refresh_begin(c->c.table, &c->c); |
780 | break; | |
0c791f87 | 781 | |
5bd73431 OZ |
782 | case BGP_GRS_LLGR: |
783 | rt_refresh_begin(c->c.table, &c->c); | |
784 | rt_modify_stale(c->c.table, &c->c); | |
785 | break; | |
786 | } | |
d15b0b0a OZ |
787 | } |
788 | else | |
789 | { | |
790 | /* Just flush the routes */ | |
791 | rt_refresh_begin(c->c.table, &c->c); | |
792 | rt_refresh_end(c->c.table, &c->c); | |
793 | } | |
7fc55925 OZ |
794 | |
795 | /* Reset bucket and prefix tables */ | |
796 | bgp_free_bucket_table(c); | |
797 | bgp_free_prefix_table(c); | |
798 | bgp_init_bucket_table(c); | |
799 | bgp_init_prefix_table(c); | |
800 | c->packets_to_send = 0; | |
d15b0b0a OZ |
801 | } |
802 | ||
e62cd033 OZ |
803 | /* p->gr_ready -> at least one active channel is c->gr_ready */ |
804 | ASSERT(p->gr_active_num > 0); | |
805 | ||
d15b0b0a | 806 | proto_notify_state(&p->p, PS_START); |
5bd73431 | 807 | tm_start(p->gr_timer, p->conn->remote_caps->gr_time S); |
0c791f87 OZ |
808 | } |
809 | ||
6eda3f13 OZ |
810 | /** |
811 | * bgp_graceful_restart_done - finish active BGP graceful restart | |
d15b0b0a | 812 | * @c: BGP channel |
6eda3f13 OZ |
813 | * |
814 | * This function is called when the active BGP graceful restart of the neighbor | |
d15b0b0a OZ |
815 | * should be finished for channel @c - either successfully (the neighbor sends |
816 | * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or | |
817 | * unsuccessfully (the neighbor does not support BGP graceful restart on the new | |
818 | * session). The function ends the routing table refresh cycle. | |
6eda3f13 | 819 | */ |
0c791f87 | 820 | void |
d15b0b0a | 821 | bgp_graceful_restart_done(struct bgp_channel *c) |
0c791f87 | 822 | { |
d15b0b0a OZ |
823 | struct bgp_proto *p = (void *) c->c.proto; |
824 | ||
825 | ASSERT(c->gr_active); | |
826 | c->gr_active = 0; | |
827 | p->gr_active_num--; | |
828 | ||
829 | if (!p->gr_active_num) | |
830 | BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); | |
831 | ||
5bd73431 | 832 | tm_stop(c->stale_timer); |
d15b0b0a | 833 | rt_refresh_end(c->c.table, &c->c); |
0c791f87 OZ |
834 | } |
835 | ||
6eda3f13 OZ |
836 | /** |
837 | * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer' | |
838 | * @t: timer | |
839 | * | |
840 | * This function is a timeout hook for @gr_timer, implementing BGP restart time | |
841 | * limit for reestablisment of the BGP session after the graceful restart. When | |
842 | * fired, we just proceed with the usual protocol restart. | |
843 | */ | |
844 | ||
0c791f87 OZ |
845 | static void |
846 | bgp_graceful_restart_timeout(timer *t) | |
847 | { | |
848 | struct bgp_proto *p = t->data; | |
849 | ||
850 | BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout"); | |
5bd73431 OZ |
851 | |
852 | if (p->llgr_ready) | |
853 | { | |
854 | struct bgp_channel *c; | |
54430df9 | 855 | BGP_WALK_CHANNELS(p, c) |
5bd73431 OZ |
856 | { |
857 | /* Channel is not in GR and is already flushed */ | |
858 | if (!c->gr_active) | |
859 | continue; | |
860 | ||
861 | /* Channel is already in LLGR from past restart */ | |
862 | if (c->gr_active == BGP_GRS_LLGR) | |
863 | continue; | |
864 | ||
865 | /* Channel is in GR, but does not support LLGR -> stop GR */ | |
866 | if (!c->stale_time) | |
867 | { | |
868 | bgp_graceful_restart_done(c); | |
869 | continue; | |
870 | } | |
871 | ||
872 | /* Channel is in GR, and supports LLGR -> start LLGR */ | |
873 | c->gr_active = BGP_GRS_LLGR; | |
874 | tm_start(c->stale_timer, c->stale_time S); | |
875 | rt_modify_stale(c->c.table, &c->c); | |
876 | } | |
877 | } | |
878 | else | |
a848dad4 | 879 | { |
5bd73431 | 880 | bgp_stop(p, 0, NULL, 0); |
4adebdf1 | 881 | bmp_peer_down(p, BE_NONE, NULL, 0); |
a848dad4 | 882 | } |
5bd73431 OZ |
883 | } |
884 | ||
885 | static void | |
886 | bgp_long_lived_stale_timeout(timer *t) | |
887 | { | |
888 | struct bgp_channel *c = t->data; | |
889 | struct bgp_proto *p = (void *) c->c.proto; | |
890 | ||
891 | BGP_TRACE(D_EVENTS, "Long-lived stale timeout"); | |
892 | ||
893 | bgp_graceful_restart_done(c); | |
0c791f87 OZ |
894 | } |
895 | ||
9aed29e6 OZ |
896 | |
897 | /** | |
898 | * bgp_refresh_begin - start incoming enhanced route refresh sequence | |
d15b0b0a | 899 | * @c: BGP channel |
9aed29e6 OZ |
900 | * |
901 | * This function is called when an incoming enhanced route refresh sequence is | |
902 | * started by the neighbor, demarcated by the BoRR packet. The function updates | |
903 | * the load state and starts the routing table refresh cycle. Note that graceful | |
904 | * restart also uses routing table refresh cycle, but RFC 7313 and load states | |
905 | * ensure that these two sequences do not overlap. | |
906 | */ | |
907 | void | |
d15b0b0a | 908 | bgp_refresh_begin(struct bgp_channel *c) |
9aed29e6 | 909 | { |
d15b0b0a OZ |
910 | struct bgp_proto *p = (void *) c->c.proto; |
911 | ||
912 | if (c->load_state == BFS_LOADING) | |
913 | { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } | |
9aed29e6 | 914 | |
d15b0b0a OZ |
915 | c->load_state = BFS_REFRESHING; |
916 | rt_refresh_begin(c->c.table, &c->c); | |
682d3f7d OZ |
917 | |
918 | if (c->c.in_table) | |
919 | rt_refresh_begin(c->c.in_table, &c->c); | |
9aed29e6 OZ |
920 | } |
921 | ||
922 | /** | |
923 | * bgp_refresh_end - finish incoming enhanced route refresh sequence | |
d15b0b0a | 924 | * @c: BGP channel |
9aed29e6 OZ |
925 | * |
926 | * This function is called when an incoming enhanced route refresh sequence is | |
927 | * finished by the neighbor, demarcated by the EoRR packet. The function updates | |
928 | * the load state and ends the routing table refresh cycle. Routes not received | |
929 | * during the sequence are removed by the nest. | |
930 | */ | |
931 | void | |
d15b0b0a | 932 | bgp_refresh_end(struct bgp_channel *c) |
9aed29e6 | 933 | { |
d15b0b0a | 934 | struct bgp_proto *p = (void *) c->c.proto; |
9aed29e6 | 935 | |
d15b0b0a OZ |
936 | if (c->load_state != BFS_REFRESHING) |
937 | { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } | |
938 | ||
939 | c->load_state = BFS_NONE; | |
940 | rt_refresh_end(c->c.table, &c->c); | |
682d3f7d OZ |
941 | |
942 | if (c->c.in_table) | |
943 | rt_prune_sync(c->c.in_table, 0); | |
9aed29e6 OZ |
944 | } |
945 | ||
946 | ||
c01e3741 MM |
947 | static void |
948 | bgp_send_open(struct bgp_conn *conn) | |
949 | { | |
950 | DBG("BGP: Sending open\n"); | |
951 | conn->sk->rx_hook = bgp_rx; | |
b552ecc4 | 952 | conn->sk->tx_hook = bgp_tx; |
a6f79ca5 | 953 | tm_stop(conn->connect_timer); |
4a50c8bd | 954 | bgp_prepare_capabilities(conn); |
d15b0b0a | 955 | bgp_schedule_packet(conn, NULL, PKT_OPEN); |
cf31112f | 956 | bgp_conn_set_state(conn, BS_OPENSENT); |
3fdbafb6 | 957 | bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time); |
c01e3741 MM |
958 | } |
959 | ||
3fdbafb6 MM |
960 | static void |
961 | bgp_connected(sock *sk) | |
c01e3741 MM |
962 | { |
963 | struct bgp_conn *conn = sk->data; | |
85368cd4 | 964 | struct bgp_proto *p = conn->bgp; |
c01e3741 | 965 | |
85368cd4 | 966 | BGP_TRACE(D_EVENTS, "Connected"); |
c01e3741 | 967 | bgp_send_open(conn); |
c01e3741 MM |
968 | } |
969 | ||
970 | static void | |
971 | bgp_connect_timeout(timer *t) | |
972 | { | |
3fdbafb6 | 973 | struct bgp_conn *conn = t->data; |
85368cd4 | 974 | struct bgp_proto *p = conn->bgp; |
c01e3741 | 975 | |
85368cd4 | 976 | DBG("BGP: connect_timeout\n"); |
11b32d91 | 977 | if (p->p.proto_state == PS_START) |
d15b0b0a OZ |
978 | { |
979 | bgp_close_conn(conn); | |
980 | bgp_connect(p); | |
981 | } | |
11b32d91 OZ |
982 | else |
983 | bgp_conn_enter_idle_state(conn); | |
c01e3741 MM |
984 | } |
985 | ||
986 | static void | |
3fdbafb6 | 987 | bgp_sock_err(sock *sk, int err) |
c01e3741 MM |
988 | { |
989 | struct bgp_conn *conn = sk->data; | |
85368cd4 | 990 | struct bgp_proto *p = conn->bgp; |
c01e3741 | 991 | |
47597724 OZ |
992 | /* |
993 | * This error hook may be called either asynchronously from main | |
994 | * loop, or synchronously from sk_send(). But sk_send() is called | |
995 | * only from bgp_tx() and bgp_kick_tx(), which are both called | |
996 | * asynchronously from main loop. Moreover, they end if err hook is | |
997 | * called. Therefore, we could suppose that it is always called | |
998 | * asynchronously. | |
999 | */ | |
1000 | ||
11b32d91 OZ |
1001 | bgp_store_error(p, conn, BE_SOCKET, err); |
1002 | ||
53943a00 MM |
1003 | if (err) |
1004 | BGP_TRACE(D_EVENTS, "Connection lost (%M)", err); | |
1005 | else | |
a848dad4 | 1006 | { |
53943a00 | 1007 | BGP_TRACE(D_EVENTS, "Connection closed"); |
4adebdf1 | 1008 | bmp_peer_down(p, BE_SOCKET, NULL, 0); |
a848dad4 | 1009 | } |
11b32d91 | 1010 | |
0c791f87 OZ |
1011 | if ((conn->state == BS_ESTABLISHED) && p->gr_ready) |
1012 | bgp_handle_graceful_restart(p); | |
1013 | ||
11b32d91 | 1014 | bgp_conn_enter_idle_state(conn); |
c01e3741 MM |
1015 | } |
1016 | ||
3fdbafb6 MM |
1017 | static void |
1018 | bgp_hold_timeout(timer *t) | |
1019 | { | |
1020 | struct bgp_conn *conn = t->data; | |
48b15ef1 | 1021 | struct bgp_proto *p = conn->bgp; |
3fdbafb6 | 1022 | |
ea89da38 OZ |
1023 | DBG("BGP: Hold timeout\n"); |
1024 | ||
48b15ef1 OZ |
1025 | /* We are already closing the connection - just do hangup */ |
1026 | if (conn->state == BS_CLOSE) | |
1027 | { | |
1028 | BGP_TRACE(D_EVENTS, "Connection stalled"); | |
1029 | bgp_conn_enter_idle_state(conn); | |
1030 | return; | |
1031 | } | |
1032 | ||
ea89da38 OZ |
1033 | /* If there is something in input queue, we are probably congested |
1034 | and perhaps just not processed BGP packets in time. */ | |
1035 | ||
1036 | if (sk_rx_ready(conn->sk) > 0) | |
1037 | bgp_start_timer(conn->hold_timer, 10); | |
5bd73431 OZ |
1038 | else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready) |
1039 | { | |
1040 | BGP_TRACE(D_EVENTS, "Hold timer expired"); | |
1041 | bgp_handle_graceful_restart(p); | |
1042 | bgp_conn_enter_idle_state(conn); | |
1043 | } | |
ea89da38 OZ |
1044 | else |
1045 | bgp_error(conn, 4, 0, NULL, 0); | |
3fdbafb6 MM |
1046 | } |
1047 | ||
1048 | static void | |
1049 | bgp_keepalive_timeout(timer *t) | |
1050 | { | |
1051 | struct bgp_conn *conn = t->data; | |
1052 | ||
1053 | DBG("BGP: Keepalive timer\n"); | |
d15b0b0a | 1054 | bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE); |
bd22d7f4 OZ |
1055 | |
1056 | /* Kick TX a bit faster */ | |
1057 | if (ev_active(conn->tx_ev)) | |
1058 | ev_run(conn->tx_ev); | |
3fdbafb6 MM |
1059 | } |
1060 | ||
c01e3741 | 1061 | static void |
6fd766c1 | 1062 | bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) |
c01e3741 | 1063 | { |
6fd766c1 | 1064 | conn->sk = NULL; |
c01e3741 | 1065 | conn->bgp = p; |
d15b0b0a | 1066 | |
72a6ef11 | 1067 | conn->packets_to_send = 0; |
d15b0b0a OZ |
1068 | conn->channels_to_send = 0; |
1069 | conn->last_channel = 0; | |
1070 | conn->last_channel_count = 0; | |
1071 | ||
a6f79ca5 OZ |
1072 | conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0); |
1073 | conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0); | |
1074 | conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0); | |
c01e3741 | 1075 | |
961671c0 | 1076 | conn->tx_ev = ev_new_init(p->p.pool, bgp_kick_tx, conn); |
c01e3741 MM |
1077 | } |
1078 | ||
6fd766c1 | 1079 | static void |
e81b440f | 1080 | bgp_setup_sk(struct bgp_conn *conn, sock *s) |
6fd766c1 MM |
1081 | { |
1082 | s->data = conn; | |
6fd766c1 | 1083 | s->err_hook = bgp_sock_err; |
9e7b3ebd | 1084 | s->fast_rx = 1; |
6fd766c1 MM |
1085 | conn->sk = s; |
1086 | } | |
1087 | ||
11b32d91 | 1088 | static void |
dd91e467 | 1089 | bgp_active(struct bgp_proto *p) |
11b32d91 | 1090 | { |
6cf72d7a | 1091 | int delay = MAX(1, p->cf->connect_delay_time); |
11b32d91 OZ |
1092 | struct bgp_conn *conn = &p->outgoing_conn; |
1093 | ||
1094 | BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); | |
1095 | bgp_setup_conn(p, conn); | |
cf31112f | 1096 | bgp_conn_set_state(conn, BS_ACTIVE); |
d15b0b0a | 1097 | bgp_start_timer(conn->connect_timer, delay); |
11b32d91 OZ |
1098 | } |
1099 | ||
54e55169 MM |
1100 | /** |
1101 | * bgp_connect - initiate an outgoing connection | |
1102 | * @p: BGP instance | |
1103 | * | |
1104 | * The bgp_connect() function creates a new &bgp_conn and initiates | |
1105 | * a TCP connection to the peer. The rest of connection setup is governed | |
1106 | * by the BGP state machine as described in the standard. | |
1107 | */ | |
c01e3741 MM |
1108 | static void |
1109 | bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */ | |
1110 | { | |
b552ecc4 | 1111 | struct bgp_conn *conn = &p->outgoing_conn; |
b1b19433 | 1112 | int hops = p->cf->multihop ? : 1; |
c01e3741 MM |
1113 | |
1114 | DBG("BGP: Connecting\n"); | |
d15b0b0a | 1115 | sock *s = sk_new(p->p.pool); |
c01e3741 | 1116 | s->type = SK_TCP_ACTIVE; |
a22c3e59 OZ |
1117 | s->saddr = p->local_ip; |
1118 | s->daddr = p->remote_ip; | |
dcde7ae5 | 1119 | s->dport = p->cf->remote_port; |
53ffbff3 | 1120 | s->iface = p->neigh ? p->neigh->iface : NULL; |
943478b0 | 1121 | s->vrf = p->p.vrf; |
b1b19433 | 1122 | s->ttl = p->cf->ttl_security ? 255 : hops; |
06e0d1b6 OZ |
1123 | s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE; |
1124 | s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE; | |
a39b165e OZ |
1125 | s->tos = IP_PREC_INTERNET_CONTROL; |
1126 | s->password = p->cf->password; | |
1127 | s->tx_hook = bgp_connected; | |
2b712554 | 1128 | s->flags = p->cf->free_bind ? SKF_FREEBIND : 0; |
470740f9 OZ |
1129 | BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", |
1130 | s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL, | |
88a183c6 | 1131 | s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL); |
6fd766c1 | 1132 | bgp_setup_conn(p, conn); |
e81b440f | 1133 | bgp_setup_sk(conn, s); |
cf31112f | 1134 | bgp_conn_set_state(conn, BS_CONNECT); |
b1b19433 OZ |
1135 | |
1136 | if (sk_open(s) < 0) | |
05476c4d | 1137 | goto err; |
b1b19433 OZ |
1138 | |
1139 | /* Set minimal receive TTL if needed */ | |
1140 | if (p->cf->ttl_security) | |
b1b19433 | 1141 | if (sk_set_min_ttl(s, 256 - hops) < 0) |
05476c4d | 1142 | goto err; |
b1b19433 | 1143 | |
c01e3741 | 1144 | DBG("BGP: Waiting for connect success\n"); |
d15b0b0a | 1145 | bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time); |
05476c4d OZ |
1146 | return; |
1147 | ||
d15b0b0a | 1148 | err: |
05476c4d OZ |
1149 | sk_log_error(s, p->p.name); |
1150 | bgp_sock_err(s, 0); | |
1151 | return; | |
c01e3741 MM |
1152 | } |
1153 | ||
e0835db4 OZ |
1154 | static inline int bgp_is_dynamic(struct bgp_proto *p) |
1155 | { return ipa_zero(p->remote_ip); } | |
1156 | ||
374917ad OZ |
1157 | /** |
1158 | * bgp_find_proto - find existing proto for incoming connection | |
1159 | * @sk: TCP socket | |
1160 | * | |
1161 | */ | |
1162 | static struct bgp_proto * | |
1163 | bgp_find_proto(sock *sk) | |
1164 | { | |
e0835db4 | 1165 | struct bgp_proto *best = NULL; |
d15b0b0a | 1166 | struct bgp_proto *p; |
374917ad | 1167 | |
470740f9 OZ |
1168 | /* sk->iface is valid only if src or dst address is link-local */ |
1169 | int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr); | |
1170 | ||
d15b0b0a OZ |
1171 | WALK_LIST(p, proto_list) |
1172 | if ((p->p.proto == &proto_bgp) && | |
e0835db4 OZ |
1173 | (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) && |
1174 | (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) && | |
1175 | (p->p.vrf == sk->vrf) && | |
1176 | (p->cf->local_port == sk->sport) && | |
470740f9 OZ |
1177 | (!link || (p->cf->iface == sk->iface)) && |
1178 | (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr))) | |
e0835db4 OZ |
1179 | { |
1180 | best = p; | |
374917ad | 1181 | |
e0835db4 OZ |
1182 | if (!bgp_is_dynamic(p)) |
1183 | break; | |
1184 | } | |
1185 | ||
1186 | return best; | |
374917ad OZ |
1187 | } |
1188 | ||
54e55169 MM |
1189 | /** |
1190 | * bgp_incoming_connection - handle an incoming connection | |
1191 | * @sk: TCP socket | |
1192 | * @dummy: unused | |
1193 | * | |
1194 | * This function serves as a socket hook for accepting of new BGP | |
1195 | * connections. It searches a BGP instance corresponding to the peer | |
1196 | * which has connected and if such an instance exists, it creates a | |
1197 | * &bgp_conn structure, attaches it to the instance and either sends | |
1198 | * an Open message or (if there already is an active connection) it | |
1199 | * closes the new connection by sending a Notification message. | |
1200 | */ | |
48e842cc | 1201 | static int |
3e236955 | 1202 | bgp_incoming_connection(sock *sk, uint dummy UNUSED) |
c01e3741 | 1203 | { |
374917ad OZ |
1204 | struct bgp_proto *p; |
1205 | int acc, hops; | |
c01e3741 | 1206 | |
48e842cc | 1207 | DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport); |
374917ad OZ |
1208 | p = bgp_find_proto(sk); |
1209 | if (!p) | |
d15b0b0a OZ |
1210 | { |
1211 | log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)", | |
1212 | sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport); | |
1213 | rfree(sk); | |
1214 | return 0; | |
1215 | } | |
374917ad | 1216 | |
487c6961 OZ |
1217 | /* |
1218 | * BIRD should keep multiple incoming connections in OpenSent state (for | |
1219 | * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming | |
1220 | * connections are rejected istead. The exception is the case where an | |
1221 | * incoming connection triggers a graceful restart. | |
1222 | */ | |
1223 | ||
374917ad OZ |
1224 | acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && |
1225 | (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk); | |
dd91e467 | 1226 | |
374917ad | 1227 | if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready) |
d15b0b0a OZ |
1228 | { |
1229 | bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART); | |
1230 | bgp_handle_graceful_restart(p); | |
1231 | bgp_conn_enter_idle_state(p->conn); | |
1232 | acc = 1; | |
1233 | ||
1234 | /* There might be separate incoming connection in OpenSent state */ | |
1235 | if (p->incoming_conn.state > BS_ACTIVE) | |
1236 | bgp_close_conn(&p->incoming_conn); | |
1237 | } | |
374917ad OZ |
1238 | |
1239 | BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s", | |
1240 | sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, | |
1241 | sk->dport, acc ? "accepted" : "rejected"); | |
1242 | ||
1243 | if (!acc) | |
d15b0b0a OZ |
1244 | { |
1245 | rfree(sk); | |
1246 | return 0; | |
1247 | } | |
374917ad OZ |
1248 | |
1249 | hops = p->cf->multihop ? : 1; | |
1250 | ||
1251 | if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0) | |
1252 | goto err; | |
1253 | ||
1254 | if (p->cf->ttl_security) | |
1255 | if (sk_set_min_ttl(sk, 256 - hops) < 0) | |
1256 | goto err; | |
1257 | ||
06e0d1b6 | 1258 | if (p->cf->enable_extended_messages) |
d15b0b0a OZ |
1259 | { |
1260 | sk->rbsize = BGP_RX_BUFFER_EXT_SIZE; | |
1261 | sk->tbsize = BGP_TX_BUFFER_EXT_SIZE; | |
1262 | sk_reallocate(sk); | |
1263 | } | |
06e0d1b6 | 1264 | |
e0835db4 OZ |
1265 | /* For dynamic BGP, spawn new instance and postpone the socket */ |
1266 | if (bgp_is_dynamic(p)) | |
1267 | { | |
1268 | p = bgp_spawn(p, sk->daddr); | |
1269 | p->postponed_sk = sk; | |
1270 | rmove(sk, p->p.pool); | |
1271 | return 0; | |
1272 | } | |
1273 | ||
1274 | rmove(sk, p->p.pool); | |
374917ad OZ |
1275 | bgp_setup_conn(p, &p->incoming_conn); |
1276 | bgp_setup_sk(&p->incoming_conn, sk); | |
1277 | bgp_send_open(&p->incoming_conn); | |
1278 | return 0; | |
1279 | ||
1280 | err: | |
1281 | sk_log_error(sk, p->p.name); | |
1282 | log(L_ERR "%s: Incoming connection aborted", p->p.name); | |
48e842cc MM |
1283 | rfree(sk); |
1284 | return 0; | |
1285 | } | |
1286 | ||
2af25a97 | 1287 | static void |
e81b440f | 1288 | bgp_listen_sock_err(sock *sk UNUSED, int err) |
2af25a97 OZ |
1289 | { |
1290 | if (err == ECONNABORTED) | |
1291 | log(L_WARN "BGP: Incoming connection aborted"); | |
1292 | else | |
a34b0934 | 1293 | log(L_ERR "BGP: Error on listening socket: %M", err); |
2af25a97 OZ |
1294 | } |
1295 | ||
acfce55c MM |
1296 | static void |
1297 | bgp_start_neighbor(struct bgp_proto *p) | |
1298 | { | |
9be9a264 OZ |
1299 | /* Called only for single-hop BGP sessions */ |
1300 | ||
a22c3e59 OZ |
1301 | if (ipa_zero(p->local_ip)) |
1302 | p->local_ip = p->neigh->ifa->ip; | |
ad440a57 | 1303 | |
a22c3e59 OZ |
1304 | if (ipa_is_link_local(p->local_ip)) |
1305 | p->link_addr = p->local_ip; | |
153f02da OZ |
1306 | else if (p->neigh->iface->llv6) |
1307 | p->link_addr = p->neigh->iface->llv6->ip; | |
11b32d91 | 1308 | |
6fd766c1 | 1309 | bgp_initiate(p); |
48e842cc MM |
1310 | } |
1311 | ||
1312 | static void | |
1313 | bgp_neigh_notify(neighbor *n) | |
1314 | { | |
1315 | struct bgp_proto *p = (struct bgp_proto *) n->proto; | |
523f020b OZ |
1316 | int ps = p->p.proto_state; |
1317 | ||
1318 | if (n != p->neigh) | |
1319 | return; | |
48e842cc | 1320 | |
523f020b | 1321 | if ((ps == PS_DOWN) || (ps == PS_STOP)) |
b21955e0 OZ |
1322 | return; |
1323 | ||
523f020b OZ |
1324 | int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE); |
1325 | ||
1326 | if (n->scope <= 0) | |
d15b0b0a OZ |
1327 | { |
1328 | if (!prepare) | |
48e842cc | 1329 | { |
d15b0b0a OZ |
1330 | BGP_TRACE(D_EVENTS, "Neighbor lost"); |
1331 | bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST); | |
1332 | /* Perhaps also run bgp_update_startup_delay(p)? */ | |
830ba75e | 1333 | bgp_stop(p, 0, NULL, 0); |
4adebdf1 | 1334 | bmp_peer_down(p, BE_MISC, NULL, 0); |
523f020b | 1335 | } |
d15b0b0a | 1336 | } |
523f020b | 1337 | else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP)) |
d15b0b0a OZ |
1338 | { |
1339 | if (!prepare) | |
523f020b | 1340 | { |
d15b0b0a OZ |
1341 | BGP_TRACE(D_EVENTS, "Link down"); |
1342 | bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN); | |
1343 | if (ps == PS_UP) | |
1344 | bgp_update_startup_delay(p); | |
830ba75e | 1345 | bgp_stop(p, 0, NULL, 0); |
4adebdf1 | 1346 | bmp_peer_down(p, BE_MISC, NULL, 0); |
48e842cc | 1347 | } |
d15b0b0a | 1348 | } |
48e842cc | 1349 | else |
d15b0b0a OZ |
1350 | { |
1351 | if (prepare) | |
48e842cc | 1352 | { |
d15b0b0a OZ |
1353 | BGP_TRACE(D_EVENTS, "Neighbor ready"); |
1354 | bgp_start_neighbor(p); | |
48e842cc | 1355 | } |
d15b0b0a | 1356 | } |
48e842cc MM |
1357 | } |
1358 | ||
1ec52253 OZ |
1359 | static void |
1360 | bgp_bfd_notify(struct bfd_request *req) | |
1361 | { | |
1362 | struct bgp_proto *p = req->data; | |
1363 | int ps = p->p.proto_state; | |
1364 | ||
1365 | if (req->down && ((ps == PS_START) || (ps == PS_UP))) | |
d15b0b0a OZ |
1366 | { |
1367 | BGP_TRACE(D_EVENTS, "BFD session down"); | |
1368 | bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN); | |
5bd73431 | 1369 | |
9d3fc306 | 1370 | if (req->opts.mode == BGP_BFD_GRACEFUL) |
5bd73431 OZ |
1371 | { |
1372 | /* Trigger graceful restart */ | |
1373 | if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready) | |
1374 | bgp_handle_graceful_restart(p); | |
1375 | ||
1376 | if (p->incoming_conn.state > BS_IDLE) | |
1377 | bgp_conn_enter_idle_state(&p->incoming_conn); | |
1378 | ||
1379 | if (p->outgoing_conn.state > BS_IDLE) | |
1380 | bgp_conn_enter_idle_state(&p->outgoing_conn); | |
1381 | } | |
1382 | else | |
1383 | { | |
1384 | /* Trigger session down */ | |
1385 | if (ps == PS_UP) | |
1386 | bgp_update_startup_delay(p); | |
1387 | bgp_stop(p, 0, NULL, 0); | |
4adebdf1 | 1388 | bmp_peer_down(p, BE_MISC, NULL, 0); |
5bd73431 | 1389 | } |
d15b0b0a | 1390 | } |
1ec52253 OZ |
1391 | } |
1392 | ||
1393 | static void | |
9d3fc306 | 1394 | bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd) |
1ec52253 | 1395 | { |
9d3fc306 OZ |
1396 | if (bfd && p->bfd_req) |
1397 | bfd_update_request(p->bfd_req, bfd); | |
1398 | ||
1399 | if (bfd && !p->bfd_req && !bgp_is_dynamic(p)) | |
a22c3e59 | 1400 | p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip, |
1ec52253 | 1401 | p->cf->multihop ? NULL : p->neigh->iface, |
9d3fc306 | 1402 | p->p.vrf, bgp_bfd_notify, p, bfd); |
1ec52253 | 1403 | |
9d3fc306 | 1404 | if (!bfd && p->bfd_req) |
d15b0b0a OZ |
1405 | { |
1406 | rfree(p->bfd_req); | |
1407 | p->bfd_req = NULL; | |
1408 | } | |
1ec52253 OZ |
1409 | } |
1410 | ||
d15b0b0a OZ |
1411 | static void |
1412 | bgp_reload_routes(struct channel *C) | |
bf47fe4b | 1413 | { |
d15b0b0a OZ |
1414 | struct bgp_proto *p = (void *) C->proto; |
1415 | struct bgp_channel *c = (void *) C; | |
bf47fe4b | 1416 | |
54430df9 OZ |
1417 | /* Ignore non-BGP channels */ |
1418 | if (C->channel != &channel_bgp) | |
1419 | return; | |
1420 | ||
682d3f7d | 1421 | ASSERT(p->conn && (p->route_refresh || c->c.in_table)); |
d15b0b0a | 1422 | |
682d3f7d OZ |
1423 | if (c->c.in_table) |
1424 | channel_schedule_reload(C); | |
1425 | else | |
1426 | bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); | |
bf47fe4b OZ |
1427 | } |
1428 | ||
0c791f87 | 1429 | static void |
d15b0b0a | 1430 | bgp_feed_begin(struct channel *C, int initial) |
0c791f87 | 1431 | { |
d15b0b0a OZ |
1432 | struct bgp_proto *p = (void *) C->proto; |
1433 | struct bgp_channel *c = (void *) C; | |
9aed29e6 | 1434 | |
54430df9 OZ |
1435 | /* Ignore non-BGP channels */ |
1436 | if (C->channel != &channel_bgp) | |
1437 | return; | |
1438 | ||
9aed29e6 OZ |
1439 | /* This should not happen */ |
1440 | if (!p->conn) | |
0c791f87 OZ |
1441 | return; |
1442 | ||
9aed29e6 | 1443 | if (initial && p->cf->gr_mode) |
d15b0b0a | 1444 | c->feed_state = BFS_LOADING; |
9aed29e6 OZ |
1445 | |
1446 | /* It is refeed and both sides support enhanced route refresh */ | |
d15b0b0a OZ |
1447 | if (!initial && p->enhanced_refresh) |
1448 | { | |
1449 | /* BoRR must not be sent before End-of-RIB */ | |
1450 | if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED) | |
1451 | return; | |
9aed29e6 | 1452 | |
d15b0b0a OZ |
1453 | c->feed_state = BFS_REFRESHING; |
1454 | bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH); | |
1455 | } | |
9aed29e6 OZ |
1456 | } |
1457 | ||
1458 | static void | |
d15b0b0a | 1459 | bgp_feed_end(struct channel *C) |
9aed29e6 | 1460 | { |
d15b0b0a OZ |
1461 | struct bgp_proto *p = (void *) C->proto; |
1462 | struct bgp_channel *c = (void *) C; | |
9aed29e6 | 1463 | |
54430df9 OZ |
1464 | /* Ignore non-BGP channels */ |
1465 | if (C->channel != &channel_bgp) | |
1466 | return; | |
1467 | ||
9aed29e6 OZ |
1468 | /* This should not happen */ |
1469 | if (!p->conn) | |
1470 | return; | |
1471 | ||
1472 | /* Non-demarcated feed ended, nothing to do */ | |
d15b0b0a | 1473 | if (c->feed_state == BFS_NONE) |
9aed29e6 OZ |
1474 | return; |
1475 | ||
1476 | /* Schedule End-of-RIB packet */ | |
d15b0b0a OZ |
1477 | if (c->feed_state == BFS_LOADING) |
1478 | c->feed_state = BFS_LOADED; | |
9aed29e6 OZ |
1479 | |
1480 | /* Schedule EoRR packet */ | |
d15b0b0a OZ |
1481 | if (c->feed_state == BFS_REFRESHING) |
1482 | c->feed_state = BFS_REFRESHED; | |
9aed29e6 OZ |
1483 | |
1484 | /* Kick TX hook */ | |
d15b0b0a | 1485 | bgp_schedule_packet(p->conn, c, PKT_UPDATE); |
0c791f87 OZ |
1486 | } |
1487 | ||
9aed29e6 | 1488 | |
48e842cc MM |
1489 | static void |
1490 | bgp_start_locked(struct object_lock *lock) | |
1491 | { | |
1492 | struct bgp_proto *p = lock->data; | |
a22c3e59 | 1493 | const struct bgp_config *cf = p->cf; |
48e842cc | 1494 | |
11b32d91 | 1495 | if (p->p.proto_state != PS_START) |
d15b0b0a OZ |
1496 | { |
1497 | DBG("BGP: Got lock in different state %d\n", p->p.proto_state); | |
1498 | return; | |
1499 | } | |
11b32d91 | 1500 | |
48e842cc | 1501 | DBG("BGP: Got lock\n"); |
4847a894 | 1502 | |
e0835db4 | 1503 | if (cf->multihop || bgp_is_dynamic(p)) |
d15b0b0a OZ |
1504 | { |
1505 | /* Multi-hop sessions do not use neighbor entries */ | |
1506 | bgp_initiate(p); | |
1507 | return; | |
1508 | } | |
4847a894 | 1509 | |
a22c3e59 | 1510 | neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY); |
523f020b | 1511 | if (!n) |
d15b0b0a | 1512 | { |
a22c3e59 | 1513 | log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface); |
d15b0b0a OZ |
1514 | /* As we do not start yet, we can just disable protocol */ |
1515 | p->p.disabled = 1; | |
1516 | bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); | |
1517 | proto_notify_state(&p->p, PS_DOWN); | |
1518 | return; | |
1519 | } | |
523f020b OZ |
1520 | |
1521 | p->neigh = n; | |
1522 | ||
1523 | if (n->scope <= 0) | |
a22c3e59 | 1524 | BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface); |
523f020b OZ |
1525 | else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP)) |
1526 | BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name); | |
1527 | else | |
1528 | bgp_start_neighbor(p); | |
c01e3741 MM |
1529 | } |
1530 | ||
2638249d MM |
1531 | static int |
1532 | bgp_start(struct proto *P) | |
1533 | { | |
c01e3741 | 1534 | struct bgp_proto *p = (struct bgp_proto *) P; |
a22c3e59 OZ |
1535 | const struct bgp_config *cf = p->cf; |
1536 | ||
1537 | p->local_ip = cf->local_ip; | |
a22c3e59 OZ |
1538 | p->local_as = cf->local_as; |
1539 | p->remote_as = cf->remote_as; | |
1540 | p->public_as = cf->local_as; | |
1541 | ||
e0835db4 OZ |
1542 | /* For dynamic BGP childs, remote_ip is already set */ |
1543 | if (ipa_nonzero(cf->remote_ip)) | |
1544 | p->remote_ip = cf->remote_ip; | |
1545 | ||
a22c3e59 OZ |
1546 | /* Confederation ID is used for truly external peers */ |
1547 | if (p->cf->confederation && !p->is_interior) | |
1548 | p->public_as = cf->confederation; | |
c01e3741 | 1549 | |
e0835db4 OZ |
1550 | p->passive = cf->passive || bgp_is_dynamic(p); |
1551 | ||
11b32d91 | 1552 | p->start_state = BSS_PREPARE; |
b552ecc4 MM |
1553 | p->outgoing_conn.state = BS_IDLE; |
1554 | p->incoming_conn.state = BS_IDLE; | |
bcbdcbb6 | 1555 | p->neigh = NULL; |
1ec52253 | 1556 | p->bfd_req = NULL; |
e0835db4 | 1557 | p->postponed_sk = NULL; |
0c791f87 | 1558 | p->gr_ready = 0; |
d15b0b0a | 1559 | p->gr_active_num = 0; |
cfe34a31 | 1560 | |
21d09632 OZ |
1561 | /* Reset some stats */ |
1562 | p->stats.rx_messages = p->stats.tx_messages = 0; | |
1563 | p->stats.rx_updates = p->stats.tx_updates = 0; | |
1564 | p->stats.rx_bytes = p->stats.tx_bytes = 0; | |
1565 | p->last_rx_update = 0; | |
1566 | ||
961671c0 | 1567 | p->event = ev_new_init(p->p.pool, bgp_decision, p); |
a6f79ca5 OZ |
1568 | p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0); |
1569 | p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0); | |
0c791f87 | 1570 | |
4ef09506 OZ |
1571 | p->local_id = proto_get_router_id(P->cf); |
1572 | if (p->rr_client) | |
1573 | p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id; | |
1574 | ||
9be9a264 | 1575 | p->remote_id = 0; |
ef57b70f | 1576 | p->link_addr = IPA_NONE; |
9be9a264 | 1577 | |
7fc55925 | 1578 | /* Lock all channels when in GR recovery mode */ |
6eda3f13 | 1579 | if (p->p.gr_recovery && p->cf->gr_mode) |
d15b0b0a OZ |
1580 | { |
1581 | struct bgp_channel *c; | |
54430df9 | 1582 | BGP_WALK_CHANNELS(p, c) |
d15b0b0a OZ |
1583 | channel_graceful_restart_lock(&c->c); |
1584 | } | |
0c791f87 | 1585 | |
c01e3741 | 1586 | /* |
d15b0b0a OZ |
1587 | * Before attempting to create the connection, we need to lock the port, |
1588 | * so that we are the only instance attempting to talk with that neighbor. | |
c01e3741 | 1589 | */ |
a22c3e59 | 1590 | struct object_lock *lock; |
c01e3741 | 1591 | lock = p->lock = olock_new(P->pool); |
a22c3e59 | 1592 | lock->addr = p->remote_ip; |
dcde7ae5 | 1593 | lock->port = p->cf->remote_port; |
53ffbff3 | 1594 | lock->iface = p->cf->iface; |
9f4908fe | 1595 | lock->vrf = p->cf->iface ? NULL : p->p.vrf; |
c01e3741 | 1596 | lock->type = OBJLOCK_TCP; |
c01e3741 MM |
1597 | lock->hook = bgp_start_locked; |
1598 | lock->data = p; | |
eb1e43a9 OZ |
1599 | |
1600 | /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */ | |
1601 | if (bgp_is_dynamic(p)) | |
1602 | { | |
1603 | lock->addr = net_prefix(p->cf->remote_range); | |
1604 | lock->inst = 1; | |
1605 | } | |
1606 | ||
c01e3741 | 1607 | olock_acquire(lock); |
d51aa281 | 1608 | |
c01e3741 | 1609 | return PS_START; |
2638249d MM |
1610 | } |
1611 | ||
d9b77cc2 OZ |
1612 | extern int proto_restart; |
1613 | ||
2638249d MM |
1614 | static int |
1615 | bgp_shutdown(struct proto *P) | |
1616 | { | |
c01e3741 | 1617 | struct bgp_proto *p = (struct bgp_proto *) P; |
8a68316e | 1618 | int subcode = 0; |
c01e3741 | 1619 | |
cd1d9961 OZ |
1620 | char *message = NULL; |
1621 | byte *data = NULL; | |
1622 | uint len = 0; | |
c01e3741 | 1623 | |
85368cd4 | 1624 | BGP_TRACE(D_EVENTS, "Shutdown requested"); |
b99d3786 | 1625 | |
ebecb6f6 | 1626 | switch (P->down_code) |
d15b0b0a OZ |
1627 | { |
1628 | case PDC_CF_REMOVE: | |
1629 | case PDC_CF_DISABLE: | |
1630 | subcode = 3; // Errcode 6, 3 - peer de-configured | |
1631 | break; | |
1632 | ||
1633 | case PDC_CF_RESTART: | |
1634 | subcode = 6; // Errcode 6, 6 - other configuration change | |
1635 | break; | |
1636 | ||
1637 | case PDC_CMD_DISABLE: | |
1638 | case PDC_CMD_SHUTDOWN: | |
8a68316e | 1639 | shutdown: |
d15b0b0a | 1640 | subcode = 2; // Errcode 6, 2 - administrative shutdown |
830ba75e | 1641 | message = P->message; |
d15b0b0a OZ |
1642 | break; |
1643 | ||
1644 | case PDC_CMD_RESTART: | |
1645 | subcode = 4; // Errcode 6, 4 - administrative reset | |
830ba75e | 1646 | message = P->message; |
d15b0b0a OZ |
1647 | break; |
1648 | ||
8a68316e OZ |
1649 | case PDC_CMD_GR_DOWN: |
1650 | if ((p->cf->gr_mode != BGP_GR_ABLE) && | |
1651 | (p->cf->llgr_mode != BGP_LLGR_ABLE)) | |
1652 | goto shutdown; | |
1653 | ||
1654 | subcode = -1; // Do not send NOTIFICATION, just close the connection | |
1655 | break; | |
1656 | ||
d15b0b0a OZ |
1657 | case PDC_RX_LIMIT_HIT: |
1658 | case PDC_IN_LIMIT_HIT: | |
1659 | subcode = 1; // Errcode 6, 1 - max number of prefixes reached | |
1660 | /* log message for compatibility */ | |
1661 | log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name); | |
1662 | goto limit; | |
1663 | ||
1664 | case PDC_OUT_LIMIT_HIT: | |
1665 | subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown | |
1666 | ||
1667 | limit: | |
1668 | bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED); | |
1669 | if (proto_restart) | |
1670 | bgp_update_startup_delay(p); | |
1671 | else | |
1672 | p->startup_delay = 0; | |
1673 | goto done; | |
1674 | } | |
b99d3786 | 1675 | |
ebecb6f6 | 1676 | bgp_store_error(p, NULL, BE_MAN_DOWN, 0); |
11b32d91 | 1677 | p->startup_delay = 0; |
c01e3741 | 1678 | |
cd1d9961 OZ |
1679 | /* RFC 8203 - shutdown communication */ |
1680 | if (message) | |
1681 | { | |
1682 | uint msg_len = strlen(message); | |
7ff34ca2 | 1683 | msg_len = MIN(msg_len, 255); |
cd1d9961 OZ |
1684 | |
1685 | /* Buffer will be freed automatically by protocol shutdown */ | |
1686 | data = mb_alloc(p->p.pool, msg_len + 1); | |
1687 | len = msg_len + 1; | |
1688 | ||
1689 | data[0] = msg_len; | |
1690 | memcpy(data+1, message, msg_len); | |
1691 | } | |
1692 | ||
d15b0b0a | 1693 | done: |
cd1d9961 | 1694 | bgp_stop(p, subcode, data, len); |
11b32d91 | 1695 | return p->p.proto_state; |
2638249d MM |
1696 | } |
1697 | ||
48e842cc | 1698 | static struct proto * |
d15b0b0a | 1699 | bgp_init(struct proto_config *CF) |
48e842cc | 1700 | { |
d15b0b0a | 1701 | struct proto *P = proto_new(CF); |
48e842cc | 1702 | struct bgp_proto *p = (struct bgp_proto *) P; |
d15b0b0a | 1703 | struct bgp_config *cf = (struct bgp_config *) CF; |
48e842cc MM |
1704 | |
1705 | P->rt_notify = bgp_rt_notify; | |
14375237 | 1706 | P->preexport = bgp_preexport; |
48e842cc | 1707 | P->neigh_notify = bgp_neigh_notify; |
bf47fe4b | 1708 | P->reload_routes = bgp_reload_routes; |
9aed29e6 OZ |
1709 | P->feed_begin = bgp_feed_begin; |
1710 | P->feed_end = bgp_feed_end; | |
094d2bdb | 1711 | P->rte_better = bgp_rte_better; |
8d9eef17 | 1712 | P->rte_mergable = bgp_rte_mergable; |
d15b0b0a | 1713 | P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; |
5bd73431 | 1714 | P->rte_modify = bgp_rte_modify_stale; |
d471d5fc | 1715 | P->rte_igp_metric = bgp_rte_igp_metric; |
d15b0b0a OZ |
1716 | |
1717 | p->cf = cf; | |
d15b0b0a OZ |
1718 | p->is_internal = (cf->local_as == cf->remote_as); |
1719 | p->is_interior = p->is_internal || cf->confederation_member; | |
1720 | p->rs_client = cf->rs_client; | |
1721 | p->rr_client = cf->rr_client; | |
1722 | ||
e0835db4 OZ |
1723 | p->ipv4 = ipa_nonzero(cf->remote_ip) ? |
1724 | ipa_is_ip4(cf->remote_ip) : | |
1725 | (cf->remote_range && (cf->remote_range->type == NET_IP4)); | |
1726 | ||
1727 | p->remote_ip = cf->remote_ip; | |
1728 | p->remote_as = cf->remote_as; | |
1729 | ||
1730 | /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */ | |
1731 | if (cf->c.parent) | |
1732 | cf->remote_ip = IPA_NONE; | |
1733 | ||
d15b0b0a OZ |
1734 | /* Add all channels */ |
1735 | struct bgp_channel_config *cc; | |
54430df9 | 1736 | BGP_CF_WALK_CHANNELS(cf, cc) |
d15b0b0a | 1737 | proto_add_channel(P, &cc->c); |
9be9a264 | 1738 | |
48e842cc MM |
1739 | return P; |
1740 | } | |
1741 | ||
d15b0b0a OZ |
1742 | static void |
1743 | bgp_channel_init(struct channel *C, struct channel_config *CF) | |
1744 | { | |
1745 | struct bgp_channel *c = (void *) C; | |
1746 | struct bgp_channel_config *cf = (void *) CF; | |
1747 | ||
d15b0b0a OZ |
1748 | c->cf = cf; |
1749 | c->afi = cf->afi; | |
ef57b70f OZ |
1750 | c->desc = cf->desc; |
1751 | ||
1752 | if (cf->igp_table_ip4) | |
1753 | c->igp_table_ip4 = cf->igp_table_ip4->table; | |
1754 | ||
1755 | if (cf->igp_table_ip6) | |
1756 | c->igp_table_ip6 = cf->igp_table_ip6->table; | |
1f2eb2ac OZ |
1757 | |
1758 | if (cf->base_table) | |
1759 | c->base_table = cf->base_table->table; | |
d15b0b0a OZ |
1760 | } |
1761 | ||
1762 | static int | |
1763 | bgp_channel_start(struct channel *C) | |
1764 | { | |
1765 | struct bgp_proto *p = (void *) C->proto; | |
1766 | struct bgp_channel *c = (void *) C; | |
a22c3e59 | 1767 | ip_addr src = p->local_ip; |
d15b0b0a | 1768 | |
ef57b70f OZ |
1769 | if (c->igp_table_ip4) |
1770 | rt_lock_table(c->igp_table_ip4); | |
1771 | ||
1772 | if (c->igp_table_ip6) | |
1773 | rt_lock_table(c->igp_table_ip6); | |
d15b0b0a | 1774 | |
1f2eb2ac OZ |
1775 | if (c->base_table) |
1776 | { | |
1777 | rt_lock_table(c->base_table); | |
1778 | rt_flowspec_link(c->base_table, c->c.table); | |
1779 | } | |
1780 | ||
d15b0b0a OZ |
1781 | c->pool = p->p.pool; // XXXX |
1782 | bgp_init_bucket_table(c); | |
1783 | bgp_init_prefix_table(c); | |
1784 | ||
682d3f7d OZ |
1785 | if (c->cf->import_table) |
1786 | channel_setup_in_table(C); | |
1787 | ||
b7d7599c OZ |
1788 | if (c->cf->export_table) |
1789 | channel_setup_out_table(C); | |
1790 | ||
5bd73431 OZ |
1791 | c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0); |
1792 | ||
d15b0b0a OZ |
1793 | c->next_hop_addr = c->cf->next_hop_addr; |
1794 | c->link_addr = IPA_NONE; | |
1795 | c->packets_to_send = 0; | |
1796 | ||
1797 | /* Try to use source address as next hop address */ | |
1798 | if (ipa_zero(c->next_hop_addr)) | |
1799 | { | |
ef57b70f | 1800 | if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop)) |
d15b0b0a OZ |
1801 | c->next_hop_addr = src; |
1802 | ||
ef57b70f | 1803 | if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop)) |
d15b0b0a OZ |
1804 | c->next_hop_addr = src; |
1805 | } | |
1806 | ||
ccee67ca OZ |
1807 | /* Use preferred addresses associated with interface / source address */ |
1808 | if (ipa_zero(c->next_hop_addr)) | |
1809 | { | |
1810 | /* We know the iface for single-hop, we make lookup for multihop */ | |
586c1800 | 1811 | struct neighbor *nbr = p->neigh ?: neigh_find(&p->p, src, NULL, 0); |
ccee67ca OZ |
1812 | struct iface *iface = nbr ? nbr->iface : NULL; |
1813 | ||
1814 | if (bgp_channel_is_ipv4(c) && iface && iface->addr4) | |
1815 | c->next_hop_addr = iface->addr4->ip; | |
1816 | ||
1817 | if (bgp_channel_is_ipv6(c) && iface && iface->addr6) | |
1818 | c->next_hop_addr = iface->addr6->ip; | |
1819 | } | |
1820 | ||
ef57b70f OZ |
1821 | /* Exit if no feasible next hop address is found */ |
1822 | if (ipa_zero(c->next_hop_addr)) | |
1823 | { | |
1824 | log(L_WARN "%s: Missing next hop address", p->p.name); | |
1825 | return 0; | |
1826 | } | |
1827 | ||
d15b0b0a | 1828 | /* Set link-local address for IPv6 single-hop BGP */ |
ef57b70f | 1829 | if (ipa_is_ip6(c->next_hop_addr) && p->neigh) |
d15b0b0a OZ |
1830 | { |
1831 | c->link_addr = p->link_addr; | |
1832 | ||
1833 | if (ipa_zero(c->link_addr)) | |
1834 | log(L_WARN "%s: Missing link-local address", p->p.name); | |
1835 | } | |
1836 | ||
ef57b70f OZ |
1837 | /* Link local address is already in c->link_addr */ |
1838 | if (ipa_is_link_local(c->next_hop_addr)) | |
1839 | c->next_hop_addr = IPA_NONE; | |
d15b0b0a OZ |
1840 | |
1841 | return 0; /* XXXX: Currently undefined */ | |
1842 | } | |
1843 | ||
1844 | static void | |
1845 | bgp_channel_shutdown(struct channel *C) | |
1846 | { | |
1847 | struct bgp_channel *c = (void *) C; | |
1848 | ||
d15b0b0a OZ |
1849 | c->next_hop_addr = IPA_NONE; |
1850 | c->link_addr = IPA_NONE; | |
7fc55925 | 1851 | c->packets_to_send = 0; |
d15b0b0a OZ |
1852 | } |
1853 | ||
1854 | static void | |
1855 | bgp_channel_cleanup(struct channel *C) | |
1856 | { | |
1857 | struct bgp_channel *c = (void *) C; | |
1858 | ||
ef57b70f OZ |
1859 | if (c->igp_table_ip4) |
1860 | rt_unlock_table(c->igp_table_ip4); | |
1861 | ||
1862 | if (c->igp_table_ip6) | |
1863 | rt_unlock_table(c->igp_table_ip6); | |
b8a3608a | 1864 | |
1f2eb2ac OZ |
1865 | if (c->base_table) |
1866 | { | |
1867 | rt_flowspec_unlink(c->base_table, c->c.table); | |
1868 | rt_unlock_table(c->base_table); | |
1869 | } | |
1870 | ||
b8a3608a OZ |
1871 | c->index = 0; |
1872 | ||
1873 | /* Cleanup rest of bgp_channel starting at pool field */ | |
1874 | memset(&(c->pool), 0, sizeof(struct bgp_channel) - OFFSETOF(struct bgp_channel, pool)); | |
ef57b70f OZ |
1875 | } |
1876 | ||
1877 | static inline struct bgp_channel_config * | |
1878 | bgp_find_channel_config(struct bgp_config *cf, u32 afi) | |
1879 | { | |
1880 | struct bgp_channel_config *cc; | |
1881 | ||
54430df9 | 1882 | BGP_CF_WALK_CHANNELS(cf, cc) |
ef57b70f OZ |
1883 | if (cc->afi == afi) |
1884 | return cc; | |
1885 | ||
1886 | return NULL; | |
d15b0b0a | 1887 | } |
a7f23f58 | 1888 | |
ef57b70f OZ |
1889 | struct rtable_config * |
1890 | bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type) | |
1891 | { | |
1892 | struct bgp_channel_config *cc2; | |
1893 | struct rtable_config *tab; | |
1894 | ||
1895 | /* First, try table connected by the channel */ | |
1896 | if (cc->c.table->addr_type == type) | |
1897 | return cc->c.table; | |
1898 | ||
1899 | /* Find paired channel with the same SAFI but the other AFI */ | |
1900 | u32 afi2 = cc->afi ^ 0x30000; | |
1901 | cc2 = bgp_find_channel_config(cf, afi2); | |
1902 | ||
1903 | /* Second, try IGP table configured in the paired channel */ | |
1904 | if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6)) | |
1905 | return tab; | |
1906 | ||
1907 | /* Third, try table connected by the paired channel */ | |
1908 | if (cc2 && (cc2->c.table->addr_type == type)) | |
1909 | return cc2->c.table; | |
1910 | ||
1911 | /* Last, try default table of given type */ | |
1912 | if (tab = cf->c.global->def_tables[type]) | |
1913 | return tab; | |
1914 | ||
1915 | cf_error("Undefined IGP table"); | |
1916 | } | |
1917 | ||
1f2eb2ac OZ |
1918 | static struct rtable_config * |
1919 | bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc) | |
1920 | { | |
1921 | /* Expected table type */ | |
1922 | u32 type = (cc->afi == BGP_AF_FLOW4) ? NET_IP4 : NET_IP6; | |
1923 | ||
1924 | /* First, try appropriate IP channel */ | |
1925 | u32 afi2 = BGP_AF(BGP_AFI(cc->afi), BGP_SAFI_UNICAST); | |
1926 | struct bgp_channel_config *cc2 = bgp_find_channel_config(cf, afi2); | |
1927 | if (cc2 && (cc2->c.table->addr_type == type)) | |
1928 | return cc2->c.table; | |
1929 | ||
1930 | /* Last, try default table of given type */ | |
1931 | struct rtable_config *tab = cf->c.global->def_tables[type]; | |
1932 | if (tab) | |
1933 | return tab; | |
1934 | ||
1935 | cf_error("Undefined base table"); | |
1936 | } | |
ef57b70f | 1937 | |
a7f23f58 | 1938 | void |
d15b0b0a | 1939 | bgp_postconfig(struct proto_config *CF) |
a7f23f58 | 1940 | { |
d15b0b0a | 1941 | struct bgp_config *cf = (void *) CF; |
a7f23f58 OZ |
1942 | |
1943 | /* Do not check templates at all */ | |
d15b0b0a | 1944 | if (cf->c.class == SYM_TEMPLATE) |
a7f23f58 OZ |
1945 | return; |
1946 | ||
f3e59178 | 1947 | |
23ee6b1c OZ |
1948 | /* Handle undefined remote_as, zero should mean unspecified external */ |
1949 | if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL)) | |
1950 | cf->remote_as = cf->local_as; | |
1951 | ||
1952 | int internal = (cf->local_as == cf->remote_as); | |
1953 | int interior = internal || cf->confederation_member; | |
1954 | ||
f3e59178 | 1955 | /* EBGP direct by default, IBGP multihop by default */ |
d15b0b0a OZ |
1956 | if (cf->multihop < 0) |
1957 | cf->multihop = internal ? 64 : 0; | |
f3e59178 | 1958 | |
5bd73431 OZ |
1959 | /* LLGR mode default based on GR mode */ |
1960 | if (cf->llgr_mode < 0) | |
1961 | cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0; | |
1962 | ||
dea98864 OZ |
1963 | /* Link check for single-hop BGP by default */ |
1964 | if (cf->check_link < 0) | |
1965 | cf->check_link = !cf->multihop; | |
1966 | ||
f3e59178 | 1967 | |
d15b0b0a | 1968 | if (!cf->local_as) |
a7f23f58 OZ |
1969 | cf_error("Local AS number must be set"); |
1970 | ||
e0835db4 | 1971 | if (ipa_zero(cf->remote_ip) && !cf->remote_range) |
a7f23f58 OZ |
1972 | cf_error("Neighbor must be configured"); |
1973 | ||
e0835db4 OZ |
1974 | if (ipa_zero(cf->local_ip) && cf->strict_bind) |
1975 | cf_error("Local address must be configured for strict bind"); | |
1976 | ||
23ee6b1c OZ |
1977 | if (!cf->remote_as && !cf->peer_type) |
1978 | cf_error("Remote AS number (or peer type) must be set"); | |
1979 | ||
1980 | if ((cf->peer_type == BGP_PT_INTERNAL) && !internal) | |
1981 | cf_error("IBGP cannot have different ASNs"); | |
1982 | ||
1983 | if ((cf->peer_type == BGP_PT_EXTERNAL) && internal) | |
1984 | cf_error("EBGP cannot have the same ASNs"); | |
a1beb8f3 | 1985 | |
470740f9 OZ |
1986 | if (!cf->iface && (ipa_is_link_local(cf->local_ip) || |
1987 | ipa_is_link_local(cf->remote_ip))) | |
1988 | cf_error("Link-local addresses require defined interface"); | |
a1beb8f3 | 1989 | |
d15b0b0a | 1990 | if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF)) |
a7f23f58 OZ |
1991 | cf_error("Neighbor AS number out of range (AS4 not available)"); |
1992 | ||
d15b0b0a | 1993 | if (!internal && cf->rr_client) |
a7f23f58 OZ |
1994 | cf_error("Only internal neighbor can be RR client"); |
1995 | ||
d15b0b0a | 1996 | if (internal && cf->rs_client) |
a7f23f58 OZ |
1997 | cf_error("Only external neighbor can be RS client"); |
1998 | ||
c73b5d2d EB |
1999 | if (internal && (cf->local_role != BGP_ROLE_UNDEFINED)) |
2000 | cf_error("Local role cannot be set on IBGP sessions"); | |
2001 | ||
971721c9 OZ |
2002 | if (interior && (cf->local_role != BGP_ROLE_UNDEFINED)) |
2003 | log(L_WARN "BGP roles are not recommended to be used within AS confederations"); | |
2004 | ||
c73b5d2d EB |
2005 | if (cf->require_roles && (cf->local_role == BGP_ROLE_UNDEFINED)) |
2006 | cf_error("Local role must be set if roles are required"); | |
2007 | ||
d15b0b0a OZ |
2008 | if (!cf->confederation && cf->confederation_member) |
2009 | cf_error("Confederation ID must be set for member sessions"); | |
a7f23f58 | 2010 | |
d15b0b0a OZ |
2011 | if (cf->multihop && (ipa_is_link_local(cf->local_ip) || |
2012 | ipa_is_link_local(cf->remote_ip))) | |
53ffbff3 OZ |
2013 | cf_error("Multihop BGP cannot be used with link-local addresses"); |
2014 | ||
e919601a | 2015 | if (cf->multihop && cf->iface) |
33b6c292 OZ |
2016 | cf_error("Multihop BGP cannot be bound to interface"); |
2017 | ||
d15b0b0a | 2018 | if (cf->multihop && cf->check_link) |
523f020b OZ |
2019 | cf_error("Multihop BGP cannot depend on link state"); |
2020 | ||
d15b0b0a OZ |
2021 | if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip)) |
2022 | cf_error("Multihop BGP with BFD requires specified local address"); | |
2023 | ||
5bd73431 OZ |
2024 | if (!cf->gr_mode && cf->llgr_mode) |
2025 | cf_error("Long-lived graceful restart requires basic graceful restart"); | |
2026 | ||
0b228fca OZ |
2027 | if (internal && cf->enforce_first_as) |
2028 | cf_error("Enforce first AS check is requires EBGP sessions"); | |
2029 | ||
3859e4ef OZ |
2030 | if (cf->keepalive_time > cf->hold_time) |
2031 | cf_error("Keepalive time must be at most hold time"); | |
2032 | ||
2033 | if (cf->keepalive_time > (cf->hold_time / 2)) | |
2034 | log(L_WARN "Keepalive time should be at most 1/2 of hold time"); | |
2035 | ||
2036 | if (cf->min_hold_time > cf->hold_time) | |
2037 | cf_error("Min hold time (%u) exceeds hold time (%u)", | |
2038 | cf->min_hold_time, cf->hold_time); | |
2039 | ||
2040 | uint keepalive_time = cf->keepalive_time ?: cf->hold_time / 3; | |
2041 | if (cf->min_keepalive_time > keepalive_time) | |
2042 | cf_error("Min keepalive time (%u) exceeds keepalive time (%u)", | |
2043 | cf->min_keepalive_time, keepalive_time); | |
2044 | ||
d15b0b0a OZ |
2045 | |
2046 | struct bgp_channel_config *cc; | |
54430df9 | 2047 | BGP_CF_WALK_CHANNELS(cf, cc) |
d15b0b0a | 2048 | { |
3831b619 OZ |
2049 | /* Handle undefined import filter */ |
2050 | if (cc->c.in_filter == FILTER_UNDEF) | |
2051 | if (interior) | |
2052 | cc->c.in_filter = FILTER_ACCEPT; | |
2053 | else | |
2054 | cf_error("EBGP requires explicit import policy"); | |
2055 | ||
2056 | /* Handle undefined export filter */ | |
2057 | if (cc->c.out_filter == FILTER_UNDEF) | |
2058 | if (interior) | |
2059 | cc->c.out_filter = FILTER_REJECT; | |
2060 | else | |
2061 | cf_error("EBGP requires explicit export policy"); | |
2062 | ||
d15b0b0a OZ |
2063 | /* Disable after error incompatible with restart limit action */ |
2064 | if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error) | |
2065 | cc->c.in_limit.action = PLA_DISABLE; | |
2066 | ||
1cab2b4a OZ |
2067 | /* Different default based on rr_client, rs_client */ |
2068 | if (cc->next_hop_keep == 0xff) | |
2069 | cc->next_hop_keep = cf->rr_client ? NH_IBGP : (cf->rs_client ? NH_ALL : NH_NO); | |
2070 | ||
d15b0b0a OZ |
2071 | /* Different default for gw_mode */ |
2072 | if (!cc->gw_mode) | |
2073 | cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT; | |
1ec52253 | 2074 | |
8f79e6b9 OZ |
2075 | /* Different default for next_hop_prefer */ |
2076 | if (!cc->next_hop_prefer) | |
2077 | cc->next_hop_prefer = (cc->gw_mode == GW_DIRECT) ? NHP_GLOBAL : NHP_LOCAL; | |
2078 | ||
5bd73431 | 2079 | /* Defaults based on proto config */ |
d15b0b0a OZ |
2080 | if (cc->gr_able == 0xff) |
2081 | cc->gr_able = (cf->gr_mode == BGP_GR_ABLE); | |
26822d8f | 2082 | |
5bd73431 OZ |
2083 | if (cc->llgr_able == 0xff) |
2084 | cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE); | |
2085 | ||
2086 | if (cc->llgr_time == ~0U) | |
2087 | cc->llgr_time = cf->llgr_time; | |
2088 | ||
09ee846d OZ |
2089 | /* AIGP enabled by default on interior sessions */ |
2090 | if (cc->aigp == 0xff) | |
2091 | cc->aigp = interior; | |
2092 | ||
6fe11c99 | 2093 | /* Default values of IGP tables */ |
ef57b70f OZ |
2094 | if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp) |
2095 | { | |
2096 | if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop)) | |
2097 | cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4); | |
2098 | ||
2099 | if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop)) | |
2100 | cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6); | |
6fe11c99 OZ |
2101 | |
2102 | if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop) | |
2103 | cf_error("Mismatched IGP table type"); | |
2104 | ||
2105 | if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop) | |
2106 | cf_error("Mismatched IGP table type"); | |
ef57b70f OZ |
2107 | } |
2108 | ||
1f2eb2ac OZ |
2109 | /* Default value of base table */ |
2110 | if ((BGP_SAFI(cc->afi) == BGP_SAFI_FLOW) && cc->validate && !cc->base_table) | |
2111 | cc->base_table = bgp_default_base_table(cf, cc); | |
2112 | ||
2113 | if (cc->base_table && !cc->base_table->trie_used) | |
2114 | cf_error("Flowspec validation requires base table (%s) with trie", | |
2115 | cc->base_table->name); | |
2116 | ||
d15b0b0a OZ |
2117 | if (cf->multihop && (cc->gw_mode == GW_DIRECT)) |
2118 | cf_error("Multihop BGP cannot use direct gateway mode"); | |
26822d8f | 2119 | |
d15b0b0a OZ |
2120 | if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted) |
2121 | cf_error("BGP in recursive mode prohibits sorted table"); | |
2122 | ||
2123 | if (cf->deterministic_med && cc->c.table->sorted) | |
2124 | cf_error("BGP with deterministic MED prohibits sorted table"); | |
2125 | ||
2126 | if (cc->secondary && !cc->c.table->sorted) | |
2127 | cf_error("BGP with secondary option requires sorted table"); | |
2128 | } | |
a7f23f58 OZ |
2129 | } |
2130 | ||
2131 | static int | |
d15b0b0a | 2132 | bgp_reconfigure(struct proto *P, struct proto_config *CF) |
a7f23f58 | 2133 | { |
d15b0b0a | 2134 | struct bgp_proto *p = (void *) P; |
a22c3e59 OZ |
2135 | const struct bgp_config *new = (void *) CF; |
2136 | const struct bgp_config *old = p->cf; | |
a7f23f58 | 2137 | |
d15b0b0a | 2138 | if (proto_get_router_id(CF) != p->local_id) |
79b4e12e OZ |
2139 | return 0; |
2140 | ||
a7f23f58 OZ |
2141 | int same = !memcmp(((byte *) old) + sizeof(struct proto_config), |
2142 | ((byte *) new) + sizeof(struct proto_config), | |
2143 | // password item is last and must be checked separately | |
2144 | OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config)) | |
15b0a922 | 2145 | && !bstrcmp(old->password, new->password) |
d35fb9d7 OZ |
2146 | && ((!old->remote_range && !new->remote_range) |
2147 | || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range))) | |
15b0a922 | 2148 | && !bstrcmp(old->dynamic_name, new->dynamic_name) |
e0835db4 | 2149 | && (old->dynamic_name_digits == new->dynamic_name_digits); |
d15b0b0a OZ |
2150 | |
2151 | /* FIXME: Move channel reconfiguration to generic protocol code ? */ | |
2152 | struct channel *C, *C2; | |
2153 | struct bgp_channel_config *cc; | |
2154 | ||
2155 | WALK_LIST(C, p->p.channels) | |
2156 | C->stale = 1; | |
2157 | ||
54430df9 | 2158 | BGP_CF_WALK_CHANNELS(new, cc) |
d15b0b0a OZ |
2159 | { |
2160 | C = (struct channel *) bgp_find_channel(p, cc->afi); | |
2161 | same = proto_configure_channel(P, &C, &cc->c) && same; | |
d15b0b0a OZ |
2162 | } |
2163 | ||
2164 | WALK_LIST_DELSAFE(C, C2, p->p.channels) | |
2165 | if (C->stale) | |
2166 | same = proto_configure_channel(P, &C, NULL) && same; | |
2167 | ||
1ec52253 OZ |
2168 | if (same && (p->start_state > BSS_PREPARE)) |
2169 | bgp_update_bfd(p, new->bfd); | |
2170 | ||
a7f23f58 OZ |
2171 | /* We should update our copy of configuration ptr as old configuration will be freed */ |
2172 | if (same) | |
2173 | p->cf = new; | |
2174 | ||
e0835db4 OZ |
2175 | /* Reset name counter */ |
2176 | p->dynamic_name_counter = 0; | |
2177 | ||
a7f23f58 OZ |
2178 | return same; |
2179 | } | |
2180 | ||
1f2eb2ac | 2181 | #define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL ) |
ffb38dfb | 2182 | |
d15b0b0a | 2183 | static int |
e2b530aa | 2184 | bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed) |
d15b0b0a | 2185 | { |
6c9cda6f | 2186 | struct bgp_proto *p = (void *) C->proto; |
d15b0b0a OZ |
2187 | struct bgp_channel *c = (void *) C; |
2188 | struct bgp_channel_config *new = (void *) CC; | |
2189 | struct bgp_channel_config *old = c->cf; | |
2190 | ||
e2b530aa | 2191 | if ((new->secondary != old->secondary) || |
1f2eb2ac | 2192 | (new->validate != old->validate) || |
e2b530aa OZ |
2193 | (new->gr_able != old->gr_able) || |
2194 | (new->llgr_able != old->llgr_able) || | |
2195 | (new->llgr_time != old->llgr_time) || | |
2196 | (new->ext_next_hop != old->ext_next_hop) || | |
2197 | (new->add_path != old->add_path) || | |
2198 | (new->import_table != old->import_table) || | |
b7d7599c | 2199 | (new->export_table != old->export_table) || |
1f2eb2ac OZ |
2200 | (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) || |
2201 | (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) || | |
2202 | (TABLE(new, base_table) != TABLE(old, base_table))) | |
d15b0b0a OZ |
2203 | return 0; |
2204 | ||
e2b530aa | 2205 | if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP)) |
d15b0b0a OZ |
2206 | return 0; |
2207 | ||
09ee846d | 2208 | if ((new->gw_mode != old->gw_mode) || |
8f79e6b9 | 2209 | (new->next_hop_prefer != old->next_hop_prefer) || |
09ee846d OZ |
2210 | (new->aigp != old->aigp) || |
2211 | (new->cost != old->cost)) | |
6c9cda6f OZ |
2212 | { |
2213 | /* import_changed itself does not force ROUTE_REFRESH when import_table is active */ | |
2214 | if (c->c.in_table && (c->c.channel_state == CS_UP)) | |
2215 | bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); | |
2216 | ||
e2b530aa | 2217 | *import_changed = 1; |
6c9cda6f | 2218 | } |
e2b530aa OZ |
2219 | |
2220 | if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) || | |
2221 | (new->next_hop_self != old->next_hop_self) || | |
2222 | (new->next_hop_keep != old->next_hop_keep) || | |
09ee846d OZ |
2223 | (new->aigp != old->aigp) || |
2224 | (new->aigp_originate != old->aigp_originate)) | |
e2b530aa OZ |
2225 | *export_changed = 1; |
2226 | ||
d15b0b0a OZ |
2227 | c->cf = new; |
2228 | return 1; | |
2229 | } | |
2230 | ||
a7f23f58 | 2231 | static void |
9d3fc306 | 2232 | bgp_copy_config(struct proto_config *dest, struct proto_config *src) |
a7f23f58 | 2233 | { |
9d3fc306 OZ |
2234 | struct bgp_config *d = (void *) dest; |
2235 | struct bgp_config *s = (void *) src; | |
2236 | ||
2237 | /* Copy BFD options */ | |
2238 | if (s->bfd) | |
2239 | { | |
2240 | struct bfd_options *opts = cfg_alloc(sizeof(struct bfd_options)); | |
2241 | memcpy(opts, s->bfd, sizeof(struct bfd_options)); | |
2242 | d->bfd = opts; | |
2243 | } | |
a7f23f58 OZ |
2244 | } |
2245 | ||
2246 | ||
54e55169 MM |
2247 | /** |
2248 | * bgp_error - report a protocol error | |
2249 | * @c: connection | |
2250 | * @code: error code (according to the RFC) | |
2e9b2421 | 2251 | * @subcode: error sub-code |
54e55169 MM |
2252 | * @data: data to be passed in the Notification message |
2253 | * @len: length of the data | |
2254 | * | |
2255 | * bgp_error() sends a notification packet to tell the other side that a protocol | |
2e9b2421 | 2256 | * error has occurred (including the data considered erroneous if possible) and |
54e55169 MM |
2257 | * closes the connection. |
2258 | */ | |
3fdbafb6 | 2259 | void |
d15b0b0a | 2260 | bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len) |
3fdbafb6 | 2261 | { |
b99d3786 OZ |
2262 | struct bgp_proto *p = c->bgp; |
2263 | ||
11b32d91 | 2264 | if (c->state == BS_CLOSE) |
3fdbafb6 | 2265 | return; |
11b32d91 | 2266 | |
d15b0b0a | 2267 | bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len)); |
b99d3786 | 2268 | bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode); |
11b32d91 OZ |
2269 | bgp_conn_enter_close_state(c); |
2270 | ||
3fdbafb6 MM |
2271 | c->notify_code = code; |
2272 | c->notify_subcode = subcode; | |
efcece2d MM |
2273 | c->notify_data = data; |
2274 | c->notify_size = (len > 0) ? len : 0; | |
d15b0b0a | 2275 | bgp_schedule_packet(c, NULL, PKT_NOTIFICATION); |
b99d3786 OZ |
2276 | |
2277 | if (code != 6) | |
d15b0b0a OZ |
2278 | { |
2279 | bgp_update_startup_delay(p); | |
830ba75e | 2280 | bgp_stop(p, 0, NULL, 0); |
d15b0b0a | 2281 | } |
3fdbafb6 MM |
2282 | } |
2283 | ||
11b32d91 OZ |
2284 | /** |
2285 | * bgp_store_error - store last error for status report | |
2286 | * @p: BGP instance | |
2287 | * @c: connection | |
2288 | * @class: error class (BE_xxx constants) | |
2289 | * @code: error code (class specific) | |
2290 | * | |
2291 | * bgp_store_error() decides whether given error is interesting enough | |
2292 | * and store that error to last_error variables of @p | |
2293 | */ | |
2294 | void | |
2295 | bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code) | |
2296 | { | |
2297 | /* During PS_UP, we ignore errors on secondary connection */ | |
2298 | if ((p->p.proto_state == PS_UP) && c && (c != p->conn)) | |
2299 | return; | |
2300 | ||
2301 | /* During PS_STOP, we ignore any errors, as we want to report | |
2302 | * the error that caused transition to PS_STOP | |
2303 | */ | |
2304 | if (p->p.proto_state == PS_STOP) | |
2305 | return; | |
2306 | ||
2307 | p->last_error_class = class; | |
2308 | p->last_error_code = code; | |
2309 | } | |
2310 | ||
11b32d91 | 2311 | static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" }; |
72b28a04 | 2312 | static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""}; |
523f020b | 2313 | static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"}; |
72b28a04 | 2314 | static char *bgp_auto_errors[] = { "", "Route limit exceeded"}; |
5bd73431 | 2315 | static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"}; |
11b32d91 | 2316 | |
b8113a5e OZ |
2317 | static const char * |
2318 | bgp_last_errmsg(struct bgp_proto *p) | |
973399ae | 2319 | { |
11b32d91 | 2320 | switch (p->last_error_class) |
d15b0b0a OZ |
2321 | { |
2322 | case BE_MISC: | |
2323 | return bgp_misc_errors[p->last_error_code]; | |
2324 | case BE_SOCKET: | |
2325 | return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code); | |
2326 | case BE_BGP_RX: | |
2327 | case BE_BGP_TX: | |
2328 | return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF); | |
2329 | case BE_AUTO_DOWN: | |
2330 | return bgp_auto_errors[p->last_error_code]; | |
2331 | default: | |
2332 | return ""; | |
2333 | } | |
b8113a5e OZ |
2334 | } |
2335 | ||
2336 | static const char * | |
2337 | bgp_state_dsc(struct bgp_proto *p) | |
2338 | { | |
51947659 OZ |
2339 | if (p->p.proto_state == PS_DOWN) |
2340 | return "Down"; | |
b8113a5e OZ |
2341 | |
2342 | int state = MAX(p->incoming_conn.state, p->outgoing_conn.state); | |
e0835db4 | 2343 | if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive) |
b8113a5e OZ |
2344 | return "Passive"; |
2345 | ||
2346 | return bgp_state_names[state]; | |
2347 | } | |
2348 | ||
2349 | static void | |
2350 | bgp_get_status(struct proto *P, byte *buf) | |
2351 | { | |
2352 | struct bgp_proto *p = (struct bgp_proto *) P; | |
2353 | ||
2354 | const char *err1 = bgp_err_classes[p->last_error_class]; | |
2355 | const char *err2 = bgp_last_errmsg(p); | |
11b32d91 | 2356 | |
f4ab2317 | 2357 | if (P->proto_state == PS_DOWN) |
11b32d91 | 2358 | bsprintf(buf, "%s%s", err1, err2); |
f4ab2317 | 2359 | else |
b8113a5e OZ |
2360 | bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2); |
2361 | } | |
2362 | ||
256cc8ee OZ |
2363 | static void |
2364 | bgp_show_afis(int code, char *s, u32 *afis, uint count) | |
2365 | { | |
2366 | buffer b; | |
2367 | LOG_BUFFER_INIT(b); | |
2368 | ||
2369 | buffer_puts(&b, s); | |
2370 | ||
2371 | for (u32 *af = afis; af < (afis + count); af++) | |
2372 | { | |
2373 | const struct bgp_af_desc *desc = bgp_get_af_desc(*af); | |
2374 | if (desc) | |
2375 | buffer_print(&b, " %s", desc->name); | |
2376 | else | |
2377 | buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af)); | |
2378 | } | |
2379 | ||
2380 | if (b.pos == b.end) | |
2381 | strcpy(b.end - 32, " ... <too long>"); | |
2382 | ||
2383 | cli_msg(code, b.start); | |
2384 | } | |
2385 | ||
af611f93 | 2386 | const char * |
c73b5d2d EB |
2387 | bgp_format_role_name(u8 role) |
2388 | { | |
2389 | static const char *bgp_role_names[] = { "provider", "rs_server", "rs_client", "customer", "peer" }; | |
2390 | if (role == BGP_ROLE_UNDEFINED) return "undefined"; | |
971721c9 | 2391 | if (role < ARRAY_SIZE(bgp_role_names)) return bgp_role_names[role]; |
c73b5d2d EB |
2392 | return "?"; |
2393 | } | |
2394 | ||
256cc8ee OZ |
2395 | static void |
2396 | bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps) | |
2397 | { | |
2398 | struct bgp_af_caps *ac; | |
2399 | uint any_mp_bgp = 0; | |
2400 | uint any_gr_able = 0; | |
2401 | uint any_add_path = 0; | |
d8022d26 | 2402 | uint any_ext_next_hop = 0; |
5bd73431 | 2403 | uint any_llgr_able = 0; |
256cc8ee OZ |
2404 | u32 *afl1 = alloca(caps->af_count * sizeof(u32)); |
2405 | u32 *afl2 = alloca(caps->af_count * sizeof(u32)); | |
2406 | uint afn1, afn2; | |
2407 | ||
2408 | WALK_AF_CAPS(caps, ac) | |
2409 | { | |
2410 | any_mp_bgp |= ac->ready; | |
2411 | any_gr_able |= ac->gr_able; | |
2412 | any_add_path |= ac->add_path; | |
d8022d26 | 2413 | any_ext_next_hop |= ac->ext_next_hop; |
5bd73431 | 2414 | any_llgr_able |= ac->llgr_able; |
256cc8ee OZ |
2415 | } |
2416 | ||
2417 | if (any_mp_bgp) | |
2418 | { | |
2419 | cli_msg(-1006, " Multiprotocol"); | |
2420 | ||
2421 | afn1 = 0; | |
2422 | WALK_AF_CAPS(caps, ac) | |
2423 | if (ac->ready) | |
2424 | afl1[afn1++] = ac->afi; | |
2425 | ||
2426 | bgp_show_afis(-1006, " AF announced:", afl1, afn1); | |
2427 | } | |
2428 | ||
2429 | if (caps->route_refresh) | |
2430 | cli_msg(-1006, " Route refresh"); | |
2431 | ||
d8022d26 OZ |
2432 | if (any_ext_next_hop) |
2433 | { | |
2434 | cli_msg(-1006, " Extended next hop"); | |
2435 | ||
2436 | afn1 = 0; | |
2437 | WALK_AF_CAPS(caps, ac) | |
2438 | if (ac->ext_next_hop) | |
2439 | afl1[afn1++] = ac->afi; | |
2440 | ||
2441 | bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1); | |
2442 | } | |
2443 | ||
256cc8ee OZ |
2444 | if (caps->ext_messages) |
2445 | cli_msg(-1006, " Extended message"); | |
2446 | ||
2447 | if (caps->gr_aware) | |
2448 | cli_msg(-1006, " Graceful restart"); | |
2449 | ||
2450 | if (any_gr_able) | |
2451 | { | |
2452 | /* Continues from gr_aware */ | |
2453 | cli_msg(-1006, " Restart time: %u", caps->gr_time); | |
2454 | if (caps->gr_flags & BGP_GRF_RESTART) | |
2455 | cli_msg(-1006, " Restart recovery"); | |
2456 | ||
2457 | afn1 = afn2 = 0; | |
2458 | WALK_AF_CAPS(caps, ac) | |
2459 | { | |
2460 | if (ac->gr_able) | |
2461 | afl1[afn1++] = ac->afi; | |
2462 | ||
2463 | if (ac->gr_af_flags & BGP_GRF_FORWARDING) | |
2464 | afl2[afn2++] = ac->afi; | |
2465 | } | |
2466 | ||
2467 | bgp_show_afis(-1006, " AF supported:", afl1, afn1); | |
2468 | bgp_show_afis(-1006, " AF preserved:", afl2, afn2); | |
2469 | } | |
2470 | ||
2471 | if (caps->as4_support) | |
2472 | cli_msg(-1006, " 4-octet AS numbers"); | |
2473 | ||
2474 | if (any_add_path) | |
2475 | { | |
2476 | cli_msg(-1006, " ADD-PATH"); | |
2477 | ||
2478 | afn1 = afn2 = 0; | |
2479 | WALK_AF_CAPS(caps, ac) | |
2480 | { | |
2481 | if (ac->add_path & BGP_ADD_PATH_RX) | |
2482 | afl1[afn1++] = ac->afi; | |
2483 | ||
2484 | if (ac->add_path & BGP_ADD_PATH_TX) | |
2485 | afl2[afn2++] = ac->afi; | |
2486 | } | |
2487 | ||
2488 | bgp_show_afis(-1006, " RX:", afl1, afn1); | |
2489 | bgp_show_afis(-1006, " TX:", afl2, afn2); | |
2490 | } | |
2491 | ||
2492 | if (caps->enhanced_refresh) | |
2493 | cli_msg(-1006, " Enhanced refresh"); | |
5bd73431 OZ |
2494 | |
2495 | if (caps->llgr_aware) | |
2496 | cli_msg(-1006, " Long-lived graceful restart"); | |
2497 | ||
2498 | if (any_llgr_able) | |
2499 | { | |
2500 | u32 stale_time = 0; | |
2501 | ||
2502 | afn1 = afn2 = 0; | |
2503 | WALK_AF_CAPS(caps, ac) | |
2504 | { | |
2505 | stale_time = MAX(stale_time, ac->llgr_time); | |
2506 | ||
2507 | if (ac->llgr_able && ac->llgr_time) | |
2508 | afl1[afn1++] = ac->afi; | |
2509 | ||
2510 | if (ac->llgr_flags & BGP_GRF_FORWARDING) | |
2511 | afl2[afn2++] = ac->afi; | |
2512 | } | |
2513 | ||
2514 | /* Continues from llgr_aware */ | |
2515 | cli_msg(-1006, " LL stale time: %u", stale_time); | |
2516 | ||
2517 | bgp_show_afis(-1006, " AF supported:", afl1, afn1); | |
2518 | bgp_show_afis(-1006, " AF preserved:", afl2, afn2); | |
2519 | } | |
71423871 VB |
2520 | |
2521 | if (caps->hostname) | |
2522 | cli_msg(-1006, " Hostname: %s", caps->hostname); | |
c73b5d2d EB |
2523 | |
2524 | if (caps->role != BGP_ROLE_UNDEFINED) | |
2525 | cli_msg(-1006, " Role: %s", bgp_format_role_name(caps->role)); | |
256cc8ee OZ |
2526 | } |
2527 | ||
b8113a5e OZ |
2528 | static void |
2529 | bgp_show_proto_info(struct proto *P) | |
2530 | { | |
2531 | struct bgp_proto *p = (struct bgp_proto *) P; | |
b8113a5e | 2532 | |
b8113a5e | 2533 | cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p)); |
e0835db4 OZ |
2534 | |
2535 | if (bgp_is_dynamic(p) && p->cf->remote_range) | |
2536 | cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range); | |
2537 | else | |
2538 | cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); | |
2539 | ||
a9c19b92 OZ |
2540 | if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT)) |
2541 | cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port); | |
2542 | ||
e0835db4 | 2543 | cli_msg(-1006, " Neighbor AS: %u", p->remote_as); |
0b1e1e1a | 2544 | cli_msg(-1006, " Local AS: %u", p->cf->local_as); |
b8113a5e | 2545 | |
d15b0b0a | 2546 | if (p->gr_active_num) |
0c791f87 OZ |
2547 | cli_msg(-1006, " Neighbor graceful restart active"); |
2548 | ||
b8113a5e | 2549 | if (P->proto_state == PS_START) |
d15b0b0a OZ |
2550 | { |
2551 | struct bgp_conn *oc = &p->outgoing_conn; | |
b8113a5e | 2552 | |
d15b0b0a | 2553 | if ((p->start_state < BSS_CONNECT) && |
a6f79ca5 | 2554 | (tm_active(p->startup_timer))) |
d3fa9e84 | 2555 | cli_msg(-1006, " Error wait: %t/%u", |
a6f79ca5 | 2556 | tm_remains(p->startup_timer), p->startup_delay); |
b8113a5e | 2557 | |
d15b0b0a | 2558 | if ((oc->state == BS_ACTIVE) && |
a6f79ca5 | 2559 | (tm_active(oc->connect_timer))) |
d3fa9e84 | 2560 | cli_msg(-1006, " Connect delay: %t/%u", |
a6f79ca5 | 2561 | tm_remains(oc->connect_timer), p->cf->connect_delay_time); |
0c791f87 | 2562 | |
a6f79ca5 | 2563 | if (p->gr_active_num && tm_active(p->gr_timer)) |
d3fa9e84 | 2564 | cli_msg(-1006, " Restart timer: %t/-", |
a6f79ca5 | 2565 | tm_remains(p->gr_timer)); |
d15b0b0a | 2566 | } |
b8113a5e | 2567 | else if (P->proto_state == PS_UP) |
d15b0b0a OZ |
2568 | { |
2569 | cli_msg(-1006, " Neighbor ID: %R", p->remote_id); | |
256cc8ee OZ |
2570 | cli_msg(-1006, " Local capabilities"); |
2571 | bgp_show_capabilities(p, p->conn->local_caps); | |
2572 | cli_msg(-1006, " Neighbor capabilities"); | |
2573 | bgp_show_capabilities(p, p->conn->remote_caps); | |
7fc55925 OZ |
2574 | cli_msg(-1006, " Session: %s%s%s%s%s", |
2575 | p->is_internal ? "internal" : "external", | |
2576 | p->cf->multihop ? " multihop" : "", | |
2577 | p->rr_client ? " route-reflector" : "", | |
2578 | p->rs_client ? " route-server" : "", | |
2579 | p->as4_session ? " AS4" : ""); | |
a22c3e59 | 2580 | cli_msg(-1006, " Source address: %I", p->local_ip); |
d3fa9e84 | 2581 | cli_msg(-1006, " Hold timer: %t/%u", |
a6f79ca5 | 2582 | tm_remains(p->conn->hold_timer), p->conn->hold_time); |
d3fa9e84 | 2583 | cli_msg(-1006, " Keepalive timer: %t/%u", |
a6f79ca5 | 2584 | tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time); |
d15b0b0a | 2585 | } |
b8113a5e | 2586 | |
5a6e8380 | 2587 | #if 0 |
21d09632 OZ |
2588 | struct bgp_stats *s = &p->stats; |
2589 | cli_msg(-1006, " FSM established transitions: %u", | |
2590 | s->fsm_established_transitions); | |
2591 | cli_msg(-1006, " Rcvd messages: %u total / %u updates / %lu bytes", | |
2592 | s->rx_messages, s->rx_updates, s->rx_bytes); | |
2593 | cli_msg(-1006, " Sent messages: %u total / %u updates / %lu bytes", | |
2594 | s->tx_messages, s->tx_updates, s->tx_bytes); | |
2595 | cli_msg(-1006, " Last rcvd update elapsed time: %t s", | |
2596 | p->last_rx_update ? (current_time() - p->last_rx_update) : 0); | |
5a6e8380 | 2597 | #endif |
21d09632 | 2598 | |
523f020b | 2599 | if ((p->last_error_class != BE_NONE) && |
b8113a5e | 2600 | (p->last_error_class != BE_MAN_DOWN)) |
d15b0b0a OZ |
2601 | { |
2602 | const char *err1 = bgp_err_classes[p->last_error_class]; | |
2603 | const char *err2 = bgp_last_errmsg(p); | |
2604 | cli_msg(-1006, " Last error: %s%s", err1, err2); | |
2605 | } | |
2606 | ||
2607 | { | |
ef57b70f | 2608 | struct bgp_channel *c; |
d15b0b0a | 2609 | WALK_LIST(c, p->p.channels) |
ef57b70f OZ |
2610 | { |
2611 | channel_show_info(&c->c); | |
2612 | ||
54430df9 OZ |
2613 | if (c->c.channel != &channel_bgp) |
2614 | continue; | |
2615 | ||
5bd73431 OZ |
2616 | if (p->gr_active_num) |
2617 | cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]); | |
2618 | ||
0db7a1d6 | 2619 | if (c->stale_timer && tm_active(c->stale_timer)) |
5bd73431 OZ |
2620 | cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer)); |
2621 | ||
7fc55925 OZ |
2622 | if (c->c.channel_state == CS_UP) |
2623 | { | |
2624 | if (ipa_zero(c->link_addr)) | |
2625 | cli_msg(-1006, " BGP Next hop: %I", c->next_hop_addr); | |
2626 | else | |
2627 | cli_msg(-1006, " BGP Next hop: %I %I", c->next_hop_addr, c->link_addr); | |
2628 | } | |
ccee67ca | 2629 | |
ef57b70f OZ |
2630 | if (c->igp_table_ip4) |
2631 | cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name); | |
2632 | ||
2633 | if (c->igp_table_ip6) | |
2634 | cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name); | |
1f2eb2ac OZ |
2635 | |
2636 | if (c->base_table) | |
2637 | cli_msg(-1006, " Base table: %s", c->base_table->name); | |
ef57b70f | 2638 | } |
d15b0b0a | 2639 | } |
973399ae MM |
2640 | } |
2641 | ||
f4deef89 | 2642 | const struct channel_class channel_bgp = { |
d15b0b0a OZ |
2643 | .channel_size = sizeof(struct bgp_channel), |
2644 | .config_size = sizeof(struct bgp_channel_config), | |
2645 | .init = bgp_channel_init, | |
2646 | .start = bgp_channel_start, | |
2647 | .shutdown = bgp_channel_shutdown, | |
2648 | .cleanup = bgp_channel_cleanup, | |
2649 | .reconfigure = bgp_channel_reconfigure, | |
2650 | }; | |
2651 | ||
2638249d | 2652 | struct protocol proto_bgp = { |
4a591d4b PT |
2653 | .name = "BGP", |
2654 | .template = "bgp%d", | |
ee7e2ffd | 2655 | .class = PROTOCOL_BGP, |
4a591d4b | 2656 | .preference = DEF_PREF_BGP, |
1e37e35c | 2657 | .channel_mask = NB_IP | NB_VPN | NB_FLOW, |
d15b0b0a | 2658 | .proto_size = sizeof(struct bgp_proto), |
2bbc3083 | 2659 | .config_size = sizeof(struct bgp_config), |
d15b0b0a | 2660 | .postconfig = bgp_postconfig, |
4a591d4b PT |
2661 | .init = bgp_init, |
2662 | .start = bgp_start, | |
2663 | .shutdown = bgp_shutdown, | |
4a591d4b PT |
2664 | .reconfigure = bgp_reconfigure, |
2665 | .copy_config = bgp_copy_config, | |
2666 | .get_status = bgp_get_status, | |
2667 | .get_attr = bgp_get_attr, | |
2668 | .get_route_info = bgp_get_route_info, | |
2669 | .show_proto_info = bgp_show_proto_info | |
2638249d | 2670 | }; |
4a23ede2 MM |
2671 | |
2672 | void bgp_build(void) | |
2673 | { | |
2674 | proto_build(&proto_bgp); | |
2675 | } |