]>
Commit | Line | Data |
---|---|---|
2638249d MM |
1 | /* |
2 | * BIRD -- The Border Gateway Protocol | |
3 | * | |
4 | * (c) 2000 Martin Mares <mj@ucw.cz> | |
5 | * | |
6 | * Can be freely distributed and used under the terms of the GNU GPL. | |
7 | */ | |
8 | ||
54e55169 MM |
9 | /** |
10 | * DOC: Border Gateway Protocol | |
11 | * | |
12 | * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the | |
13 | * connection and most of the interface with BIRD core, |packets.c| handling | |
14 | * both incoming and outgoing BGP packets and |attrs.c| containing functions for | |
15 | * manipulation with BGP attribute lists. | |
16 | * | |
17 | * As opposed to the other existing routing daemons, BIRD has a sophisticated core | |
18 | * architecture which is able to keep all the information needed by BGP in the | |
19 | * primary routing table, therefore no complex data structures like a central | |
20 | * BGP table are needed. This increases memory footprint of a BGP router with | |
21 | * many connections, but not too much and, which is more important, it makes | |
22 | * BGP much easier to implement. | |
23 | * | |
58f7d004 | 24 | * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto |
54e55169 MM |
25 | * structure to which are attached individual connections represented by &bgp_connection |
26 | * (usually, there exists only one connection, but during BGP session setup, there | |
27 | * can be more of them). The connections are handled according to the BGP state machine | |
28 | * defined in the RFC with all the timers and all the parameters configurable. | |
29 | * | |
30 | * In incoming direction, we listen on the connection's socket and each time we receive | |
31 | * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and | |
32 | * passes complete packets to bgp_rx_packet() which distributes the packet according | |
33 | * to its type. | |
34 | * | |
35 | * In outgoing direction, we gather all the routing updates and sort them to buckets | |
36 | * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison | |
37 | * of &rta's and a &fib which helps us to find if we already have another route for | |
38 | * the same destination queued for sending, so that we can replace it with the new one | |
39 | * immediately instead of sending both updates). There also exists a special bucket holding | |
40 | * all the route withdrawals which cannot be queued anywhere else as they don't have any | |
41 | * attributes. If we have any packet to send (due to either new routes or the connection | |
58f7d004 | 42 | * tracking code wanting to send a Open, Keepalive or Notification message), we call |
54e55169 MM |
43 | * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send |
44 | * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty, | |
45 | * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls | |
46 | * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet | |
47 | * type if we have more data of the same type to send. | |
48 | * | |
49 | * The processing of attributes consists of two functions: bgp_decode_attrs() for checking | |
50 | * of the attribute blocks and translating them to the language of BIRD's extended attributes | |
51 | * and bgp_encode_attrs() which does the converse. Both functions are built around a | |
52 | * @bgp_attr_table array describing all important characteristics of all known attributes. | |
53 | * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams. | |
54 | */ | |
55 | ||
48d79d52 | 56 | #undef LOCAL_DEBUG |
2638249d MM |
57 | |
58 | #include "nest/bird.h" | |
59 | #include "nest/iface.h" | |
60 | #include "nest/protocol.h" | |
61 | #include "nest/route.h" | |
c01e3741 | 62 | #include "nest/locks.h" |
2638249d | 63 | #include "conf/conf.h" |
c01e3741 | 64 | #include "lib/socket.h" |
973399ae | 65 | #include "lib/resource.h" |
7d875e09 | 66 | #include "lib/string.h" |
2638249d MM |
67 | |
68 | #include "bgp.h" | |
69 | ||
973399ae | 70 | struct linpool *bgp_linpool; /* Global temporary pool */ |
c01e3741 MM |
71 | static sock *bgp_listen_sk; /* Global listening socket */ |
72 | static int bgp_counter; /* Number of protocol instances using the listening socket */ | |
c01e3741 | 73 | |
11b32d91 | 74 | static void bgp_close(struct bgp_proto *p, int apply_md5); |
c01e3741 | 75 | static void bgp_connect(struct bgp_proto *p); |
dd91e467 | 76 | static void bgp_active(struct bgp_proto *p); |
789772ed | 77 | static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags); |
2638249d | 78 | |
11cb6202 | 79 | |
11b32d91 OZ |
80 | /** |
81 | * bgp_open - open a BGP instance | |
82 | * @p: BGP instance | |
83 | * | |
84 | * This function allocates and configures shared BGP resources. | |
85 | * Should be called as the last step during initialization | |
86 | * (when lock is acquired and neighbor is ready). | |
87 | * When error, state changed to PS_DOWN, -1 is returned and caller | |
88 | * should return immediately. | |
89 | */ | |
90 | static int | |
91 | bgp_open(struct bgp_proto *p) | |
92 | { | |
789772ed | 93 | struct config *cfg = p->cf->c.global; |
11b32d91 OZ |
94 | bgp_counter++; |
95 | ||
96 | if (!bgp_listen_sk) | |
d72cdff4 | 97 | bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags); |
11b32d91 OZ |
98 | |
99 | if (!bgp_linpool) | |
100 | bgp_linpool = lp_new(&root_pool, 4080); | |
101 | ||
102 | if (p->cf->password) | |
103 | { | |
104 | int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password); | |
105 | if (rv < 0) | |
106 | { | |
107 | bgp_close(p, 0); | |
108 | p->p.disabled = 1; | |
109 | bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_MD5); | |
110 | proto_notify_state(&p->p, PS_DOWN); | |
111 | return -1; | |
112 | } | |
113 | } | |
114 | ||
11b32d91 OZ |
115 | return 0; |
116 | } | |
117 | ||
dd91e467 OZ |
118 | static void |
119 | bgp_startup(struct bgp_proto *p) | |
120 | { | |
121 | BGP_TRACE(D_EVENTS, "Started"); | |
122 | p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP; | |
be6e39eb OZ |
123 | |
124 | if (!p->cf->passive) | |
125 | bgp_active(p); | |
dd91e467 OZ |
126 | } |
127 | ||
128 | static void | |
129 | bgp_startup_timeout(timer *t) | |
130 | { | |
131 | bgp_startup(t->data); | |
132 | } | |
133 | ||
134 | ||
135 | static void | |
136 | bgp_initiate(struct bgp_proto *p) | |
137 | { | |
138 | if (p->startup_delay) | |
139 | { | |
140 | BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay); | |
141 | bgp_start_timer(p->startup_timer, p->startup_delay); | |
142 | } | |
143 | else | |
144 | bgp_startup(p); | |
145 | } | |
146 | ||
11b32d91 OZ |
147 | /** |
148 | * bgp_close - close a BGP instance | |
149 | * @p: BGP instance | |
150 | * @apply_md5: 0 to disable unsetting MD5 auth | |
151 | * | |
152 | * This function frees and deconfigures shared BGP resources. | |
153 | * @apply_md5 is set to 0 when bgp_close is called as a cleanup | |
154 | * from failed bgp_open(). | |
155 | */ | |
9831e591 | 156 | static void |
11b32d91 | 157 | bgp_close(struct bgp_proto *p, int apply_md5) |
c01e3741 | 158 | { |
c01e3741 MM |
159 | ASSERT(bgp_counter); |
160 | bgp_counter--; | |
d51aa281 | 161 | |
11b32d91 | 162 | if (p->cf->password && apply_md5) |
d51aa281 OZ |
163 | sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL); |
164 | ||
c01e3741 MM |
165 | if (!bgp_counter) |
166 | { | |
167 | rfree(bgp_listen_sk); | |
168 | bgp_listen_sk = NULL; | |
973399ae MM |
169 | rfree(bgp_linpool); |
170 | bgp_linpool = NULL; | |
c01e3741 | 171 | } |
c01e3741 MM |
172 | } |
173 | ||
54e55169 MM |
174 | /** |
175 | * bgp_start_timer - start a BGP timer | |
176 | * @t: timer | |
177 | * @value: time to fire (0 to disable the timer) | |
178 | * | |
179 | * This functions calls tm_start() on @t with time @value and the | |
180 | * amount of randomization suggested by the BGP standard. Please use | |
181 | * it for all BGP timers. | |
182 | */ | |
3fdbafb6 | 183 | void |
c01e3741 MM |
184 | bgp_start_timer(timer *t, int value) |
185 | { | |
3fdbafb6 | 186 | if (value) |
b3155b33 MM |
187 | { |
188 | /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */ | |
189 | t->randomize = value / 4; | |
190 | tm_start(t, value - t->randomize); | |
191 | } | |
b552ecc4 MM |
192 | else |
193 | tm_stop(t); | |
194 | } | |
195 | ||
54e55169 MM |
196 | /** |
197 | * bgp_close_conn - close a BGP connection | |
198 | * @conn: connection to close | |
199 | * | |
200 | * This function takes a connection described by the &bgp_conn structure, | |
201 | * closes its socket and frees all resources associated with it. | |
54e55169 | 202 | */ |
b552ecc4 MM |
203 | void |
204 | bgp_close_conn(struct bgp_conn *conn) | |
205 | { | |
206 | struct bgp_proto *p = conn->bgp; | |
207 | ||
208 | DBG("BGP: Closing connection\n"); | |
209 | conn->packets_to_send = 0; | |
210 | rfree(conn->connect_retry_timer); | |
211 | conn->connect_retry_timer = NULL; | |
212 | rfree(conn->keepalive_timer); | |
213 | conn->keepalive_timer = NULL; | |
214 | rfree(conn->hold_timer); | |
215 | conn->hold_timer = NULL; | |
38a608c5 | 216 | rfree(conn->sk); |
b552ecc4 | 217 | conn->sk = NULL; |
11b32d91 OZ |
218 | rfree(conn->tx_ev); |
219 | conn->tx_ev = NULL; | |
220 | } | |
221 | ||
222 | ||
223 | /** | |
224 | * bgp_update_startup_delay - update a startup delay | |
225 | * @p: BGP instance | |
11b32d91 OZ |
226 | * |
227 | * This function updates a startup delay that is used to postpone next BGP connect. | |
228 | * It also handles disable_after_error and might stop BGP instance when error | |
229 | * happened and disable_after_error is on. | |
230 | * | |
231 | * It should be called when BGP protocol error happened. | |
232 | */ | |
233 | void | |
b99d3786 | 234 | bgp_update_startup_delay(struct bgp_proto *p) |
11b32d91 OZ |
235 | { |
236 | struct bgp_config *cf = p->cf; | |
237 | ||
b99d3786 | 238 | DBG("BGP: Updating startup delay\n"); |
11b32d91 | 239 | |
72382626 OZ |
240 | if (p->last_proto_error && ((now - p->last_proto_error) >= cf->error_amnesia_time)) |
241 | p->startup_delay = 0; | |
242 | ||
11b32d91 OZ |
243 | p->last_proto_error = now; |
244 | ||
245 | if (cf->disable_after_error) | |
246 | { | |
247 | p->startup_delay = 0; | |
248 | p->p.disabled = 1; | |
11b32d91 | 249 | return; |
6fd766c1 | 250 | } |
11b32d91 | 251 | |
11b32d91 OZ |
252 | if (!p->startup_delay) |
253 | p->startup_delay = cf->error_delay_time_min; | |
254 | else | |
b99d3786 | 255 | p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max); |
c01e3741 MM |
256 | } |
257 | ||
11b32d91 | 258 | static void |
b99d3786 | 259 | bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode) |
48e842cc | 260 | { |
11b32d91 | 261 | switch (conn->state) |
48e842cc MM |
262 | { |
263 | case BS_IDLE: | |
11b32d91 OZ |
264 | case BS_CLOSE: |
265 | return; | |
48e842cc MM |
266 | case BS_CONNECT: |
267 | case BS_ACTIVE: | |
11b32d91 OZ |
268 | bgp_conn_enter_idle_state(conn); |
269 | return; | |
48e842cc MM |
270 | case BS_OPENSENT: |
271 | case BS_OPENCONFIRM: | |
272 | case BS_ESTABLISHED: | |
b99d3786 | 273 | bgp_error(conn, 6, subcode, NULL, 0); |
11b32d91 | 274 | return; |
48e842cc | 275 | default: |
11b32d91 | 276 | bug("bgp_graceful_close_conn: Unknown state %d", conn->state); |
48e842cc MM |
277 | } |
278 | } | |
279 | ||
11b32d91 OZ |
280 | static void |
281 | bgp_down(struct bgp_proto *p) | |
282 | { | |
283 | if (p->start_state > BSS_PREPARE) | |
284 | bgp_close(p, 1); | |
285 | ||
b99d3786 | 286 | BGP_TRACE(D_EVENTS, "Down"); |
11b32d91 OZ |
287 | proto_notify_state(&p->p, PS_DOWN); |
288 | } | |
289 | ||
290 | static void | |
291 | bgp_decision(void *vp) | |
292 | { | |
293 | struct bgp_proto *p = vp; | |
294 | ||
295 | DBG("BGP: Decision start\n"); | |
296 | if ((p->p.proto_state == PS_START) | |
be6e39eb OZ |
297 | && (p->outgoing_conn.state == BS_IDLE) |
298 | && (!p->cf->passive)) | |
dd91e467 | 299 | bgp_active(p); |
11b32d91 OZ |
300 | |
301 | if ((p->p.proto_state == PS_STOP) | |
302 | && (p->outgoing_conn.state == BS_IDLE) | |
303 | && (p->incoming_conn.state == BS_IDLE)) | |
304 | bgp_down(p); | |
305 | } | |
306 | ||
b99d3786 OZ |
307 | void |
308 | bgp_stop(struct bgp_proto *p, unsigned subcode) | |
11b32d91 OZ |
309 | { |
310 | proto_notify_state(&p->p, PS_STOP); | |
b99d3786 OZ |
311 | bgp_graceful_close_conn(&p->outgoing_conn, subcode); |
312 | bgp_graceful_close_conn(&p->incoming_conn, subcode); | |
11b32d91 OZ |
313 | ev_schedule(p->event); |
314 | } | |
315 | ||
316 | void | |
317 | bgp_conn_enter_established_state(struct bgp_conn *conn) | |
318 | { | |
319 | struct bgp_proto *p = conn->bgp; | |
320 | ||
321 | BGP_TRACE(D_EVENTS, "BGP session established"); | |
322 | DBG("BGP: UP!!!\n"); | |
323 | ||
324 | p->conn = conn; | |
325 | p->last_error_class = 0; | |
326 | p->last_error_code = 0; | |
327 | bgp_attr_init(conn->bgp); | |
328 | conn->state = BS_ESTABLISHED; | |
329 | proto_notify_state(&p->p, PS_UP); | |
330 | } | |
331 | ||
332 | static void | |
333 | bgp_conn_leave_established_state(struct bgp_proto *p) | |
334 | { | |
335 | BGP_TRACE(D_EVENTS, "BGP session closed"); | |
336 | p->conn = NULL; | |
337 | ||
338 | if (p->p.proto_state == PS_UP) | |
b99d3786 | 339 | bgp_stop(p, 0); |
11b32d91 OZ |
340 | } |
341 | ||
342 | void | |
343 | bgp_conn_enter_close_state(struct bgp_conn *conn) | |
344 | { | |
345 | struct bgp_proto *p = conn->bgp; | |
346 | int os = conn->state; | |
347 | ||
348 | conn->state = BS_CLOSE; | |
349 | tm_stop(conn->hold_timer); | |
350 | tm_stop(conn->keepalive_timer); | |
351 | conn->sk->rx_hook = NULL; | |
352 | ||
353 | if (os == BS_ESTABLISHED) | |
354 | bgp_conn_leave_established_state(p); | |
355 | } | |
356 | ||
357 | void | |
358 | bgp_conn_enter_idle_state(struct bgp_conn *conn) | |
359 | { | |
360 | struct bgp_proto *p = conn->bgp; | |
361 | int os = conn->state; | |
362 | ||
363 | bgp_close_conn(conn); | |
364 | conn->state = BS_IDLE; | |
365 | ev_schedule(p->event); | |
366 | ||
367 | if (os == BS_ESTABLISHED) | |
368 | bgp_conn_leave_established_state(p); | |
369 | } | |
370 | ||
c01e3741 MM |
371 | static void |
372 | bgp_send_open(struct bgp_conn *conn) | |
373 | { | |
165a6227 OZ |
374 | conn->start_state = conn->bgp->start_state; |
375 | conn->want_as4_support = conn->bgp->cf->enable_as4 && (conn->start_state != BSS_CONNECT_NOCAP); | |
376 | conn->peer_as4_support = 0; // Default value, possibly changed by receiving capability. | |
377 | ||
c01e3741 MM |
378 | DBG("BGP: Sending open\n"); |
379 | conn->sk->rx_hook = bgp_rx; | |
b552ecc4 | 380 | conn->sk->tx_hook = bgp_tx; |
c01e3741 | 381 | tm_stop(conn->connect_retry_timer); |
72a6ef11 | 382 | bgp_schedule_packet(conn, PKT_OPEN); |
c01e3741 | 383 | conn->state = BS_OPENSENT; |
3fdbafb6 | 384 | bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time); |
c01e3741 MM |
385 | } |
386 | ||
3fdbafb6 MM |
387 | static void |
388 | bgp_connected(sock *sk) | |
c01e3741 MM |
389 | { |
390 | struct bgp_conn *conn = sk->data; | |
85368cd4 | 391 | struct bgp_proto *p = conn->bgp; |
c01e3741 | 392 | |
85368cd4 | 393 | BGP_TRACE(D_EVENTS, "Connected"); |
c01e3741 | 394 | bgp_send_open(conn); |
c01e3741 MM |
395 | } |
396 | ||
397 | static void | |
398 | bgp_connect_timeout(timer *t) | |
399 | { | |
3fdbafb6 | 400 | struct bgp_conn *conn = t->data; |
85368cd4 | 401 | struct bgp_proto *p = conn->bgp; |
c01e3741 | 402 | |
85368cd4 | 403 | DBG("BGP: connect_timeout\n"); |
11b32d91 OZ |
404 | if (p->p.proto_state == PS_START) |
405 | { | |
406 | bgp_close_conn(conn); | |
407 | bgp_connect(p); | |
408 | } | |
409 | else | |
410 | bgp_conn_enter_idle_state(conn); | |
c01e3741 MM |
411 | } |
412 | ||
413 | static void | |
3fdbafb6 | 414 | bgp_sock_err(sock *sk, int err) |
c01e3741 MM |
415 | { |
416 | struct bgp_conn *conn = sk->data; | |
85368cd4 | 417 | struct bgp_proto *p = conn->bgp; |
c01e3741 | 418 | |
11b32d91 OZ |
419 | bgp_store_error(p, conn, BE_SOCKET, err); |
420 | ||
53943a00 MM |
421 | if (err) |
422 | BGP_TRACE(D_EVENTS, "Connection lost (%M)", err); | |
423 | else | |
424 | BGP_TRACE(D_EVENTS, "Connection closed"); | |
11b32d91 OZ |
425 | |
426 | bgp_conn_enter_idle_state(conn); | |
c01e3741 MM |
427 | } |
428 | ||
3fdbafb6 MM |
429 | static void |
430 | bgp_hold_timeout(timer *t) | |
431 | { | |
432 | struct bgp_conn *conn = t->data; | |
433 | ||
b552ecc4 | 434 | DBG("BGP: Hold timeout, closing connection\n"); |
efcece2d | 435 | bgp_error(conn, 4, 0, NULL, 0); |
3fdbafb6 MM |
436 | } |
437 | ||
438 | static void | |
439 | bgp_keepalive_timeout(timer *t) | |
440 | { | |
441 | struct bgp_conn *conn = t->data; | |
442 | ||
443 | DBG("BGP: Keepalive timer\n"); | |
444 | bgp_schedule_packet(conn, PKT_KEEPALIVE); | |
445 | } | |
446 | ||
c01e3741 | 447 | static void |
6fd766c1 | 448 | bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) |
c01e3741 MM |
449 | { |
450 | timer *t; | |
451 | ||
6fd766c1 | 452 | conn->sk = NULL; |
c01e3741 | 453 | conn->bgp = p; |
72a6ef11 | 454 | conn->packets_to_send = 0; |
c01e3741 MM |
455 | |
456 | t = conn->connect_retry_timer = tm_new(p->p.pool); | |
457 | t->hook = bgp_connect_timeout; | |
3fdbafb6 MM |
458 | t->data = conn; |
459 | t = conn->hold_timer = tm_new(p->p.pool); | |
c01e3741 | 460 | t->hook = bgp_hold_timeout; |
3fdbafb6 MM |
461 | t->data = conn; |
462 | t = conn->keepalive_timer = tm_new(p->p.pool); | |
c01e3741 | 463 | t->hook = bgp_keepalive_timeout; |
3fdbafb6 | 464 | t->data = conn; |
11b32d91 OZ |
465 | conn->tx_ev = ev_new(p->p.pool); |
466 | conn->tx_ev->hook = bgp_kick_tx; | |
467 | conn->tx_ev->data = conn; | |
c01e3741 MM |
468 | } |
469 | ||
6fd766c1 MM |
470 | static void |
471 | bgp_setup_sk(struct bgp_proto *p, struct bgp_conn *conn, sock *s) | |
472 | { | |
473 | s->data = conn; | |
6fd766c1 | 474 | s->err_hook = bgp_sock_err; |
6fd766c1 MM |
475 | conn->sk = s; |
476 | } | |
477 | ||
11b32d91 | 478 | static void |
dd91e467 | 479 | bgp_active(struct bgp_proto *p) |
11b32d91 | 480 | { |
b99d3786 | 481 | int delay = MAX(1, p->cf->start_delay_time); |
11b32d91 OZ |
482 | struct bgp_conn *conn = &p->outgoing_conn; |
483 | ||
484 | BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); | |
485 | bgp_setup_conn(p, conn); | |
486 | conn->state = BS_ACTIVE; | |
487 | bgp_start_timer(conn->connect_retry_timer, delay); | |
488 | } | |
489 | ||
72b28a04 OZ |
490 | int |
491 | bgp_apply_limits(struct bgp_proto *p) | |
492 | { | |
493 | if (p->cf->route_limit && (p->p.stats.imp_routes > p->cf->route_limit)) | |
494 | { | |
495 | log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name); | |
496 | bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED); | |
b99d3786 OZ |
497 | bgp_update_startup_delay(p); |
498 | bgp_stop(p, 1); // Errcode 6, 1 - max number of prefixes reached | |
72b28a04 OZ |
499 | return -1; |
500 | } | |
501 | ||
502 | return 0; | |
503 | } | |
504 | ||
505 | ||
54e55169 MM |
506 | /** |
507 | * bgp_connect - initiate an outgoing connection | |
508 | * @p: BGP instance | |
509 | * | |
510 | * The bgp_connect() function creates a new &bgp_conn and initiates | |
511 | * a TCP connection to the peer. The rest of connection setup is governed | |
512 | * by the BGP state machine as described in the standard. | |
513 | */ | |
c01e3741 MM |
514 | static void |
515 | bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */ | |
516 | { | |
517 | sock *s; | |
b552ecc4 | 518 | struct bgp_conn *conn = &p->outgoing_conn; |
c01e3741 MM |
519 | |
520 | DBG("BGP: Connecting\n"); | |
521 | s = sk_new(p->p.pool); | |
522 | s->type = SK_TCP_ACTIVE; | |
ad440a57 | 523 | s->saddr = p->source_addr; |
c01e3741 | 524 | s->daddr = p->cf->remote_ip; |
c01e3741 | 525 | s->dport = BGP_PORT; |
a39b165e OZ |
526 | s->ttl = p->cf->multihop ? : 1; |
527 | s->rbsize = BGP_RX_BUFFER_SIZE; | |
528 | s->tbsize = BGP_TX_BUFFER_SIZE; | |
529 | s->tos = IP_PREC_INTERNET_CONTROL; | |
530 | s->password = p->cf->password; | |
531 | s->tx_hook = bgp_connected; | |
85368cd4 | 532 | BGP_TRACE(D_EVENTS, "Connecting to %I from local address %I", s->daddr, s->saddr); |
6fd766c1 | 533 | bgp_setup_conn(p, conn); |
c01e3741 MM |
534 | bgp_setup_sk(p, conn, s); |
535 | conn->state = BS_CONNECT; | |
536 | if (sk_open(s)) | |
537 | { | |
3fdbafb6 | 538 | bgp_sock_err(s, 0); |
c01e3741 MM |
539 | return; |
540 | } | |
541 | DBG("BGP: Waiting for connect success\n"); | |
542 | bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time); | |
543 | } | |
544 | ||
54e55169 MM |
545 | /** |
546 | * bgp_incoming_connection - handle an incoming connection | |
547 | * @sk: TCP socket | |
548 | * @dummy: unused | |
549 | * | |
550 | * This function serves as a socket hook for accepting of new BGP | |
551 | * connections. It searches a BGP instance corresponding to the peer | |
552 | * which has connected and if such an instance exists, it creates a | |
553 | * &bgp_conn structure, attaches it to the instance and either sends | |
554 | * an Open message or (if there already is an active connection) it | |
555 | * closes the new connection by sending a Notification message. | |
556 | */ | |
48e842cc | 557 | static int |
e21423ba | 558 | bgp_incoming_connection(sock *sk, int dummy UNUSED) |
c01e3741 | 559 | { |
93d6bf38 | 560 | struct proto_config *pc; |
c01e3741 | 561 | |
48e842cc | 562 | DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport); |
93d6bf38 MM |
563 | WALK_LIST(pc, config->protos) |
564 | if (pc->protocol == &proto_bgp && pc->proto) | |
565 | { | |
566 | struct bgp_proto *p = (struct bgp_proto *) pc->proto; | |
567 | if (ipa_equal(p->cf->remote_ip, sk->daddr)) | |
568 | { | |
dd91e467 OZ |
569 | /* We are in proper state and there is no other incoming connection */ |
570 | int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && | |
571 | (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk); | |
572 | ||
573 | BGP_TRACE(D_EVENTS, "Incoming connection from %I (port %d) %s", | |
574 | sk->daddr, sk->dport, acc ? "accepted" : "rejected"); | |
575 | ||
576 | if (!acc) | |
577 | goto err; | |
578 | ||
579 | bgp_setup_conn(p, &p->incoming_conn); | |
580 | bgp_setup_sk(p, &p->incoming_conn, sk); | |
581 | sk_set_ttl(sk, p->cf->multihop ? : 1); | |
582 | bgp_send_open(&p->incoming_conn); | |
583 | return 0; | |
93d6bf38 MM |
584 | } |
585 | } | |
dd91e467 OZ |
586 | |
587 | log(L_WARN "BGP: Unexpected connect from unknown address %I (port %d)", sk->daddr, sk->dport); | |
588 | err: | |
48e842cc MM |
589 | rfree(sk); |
590 | return 0; | |
591 | } | |
592 | ||
11b32d91 | 593 | static sock * |
789772ed | 594 | bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags) |
48e842cc | 595 | { |
11b32d91 OZ |
596 | sock *s = sk_new(&root_pool); |
597 | DBG("BGP: Creating incoming socket\n"); | |
598 | s->type = SK_TCP_PASSIVE; | |
789772ed OZ |
599 | s->saddr = addr; |
600 | s->sport = port ? port : BGP_PORT; | |
601 | s->flags = flags; | |
11b32d91 OZ |
602 | s->tos = IP_PREC_INTERNET_CONTROL; |
603 | s->rbsize = BGP_RX_BUFFER_SIZE; | |
604 | s->tbsize = BGP_TX_BUFFER_SIZE; | |
605 | s->rx_hook = bgp_incoming_connection; | |
606 | if (sk_open(s)) | |
c01e3741 | 607 | { |
11b32d91 OZ |
608 | log(L_ERR "Unable to open incoming BGP socket"); |
609 | rfree(s); | |
610 | return NULL; | |
c01e3741 | 611 | } |
11b32d91 OZ |
612 | else |
613 | return s; | |
acfce55c MM |
614 | } |
615 | ||
616 | static void | |
617 | bgp_start_neighbor(struct bgp_proto *p) | |
618 | { | |
619 | p->local_addr = p->neigh->iface->addr->ip; | |
ad440a57 OZ |
620 | p->source_addr = ipa_nonzero(p->cf->source_addr) ? p->cf->source_addr : p->local_addr; |
621 | ||
622 | DBG("BGP: local=%I remote=%I\n", p->source_addr, p->next_hop); | |
11d4474c MM |
623 | #ifdef IPV6 |
624 | { | |
625 | struct ifa *a; | |
4827b69f | 626 | p->local_link = IPA_NONE; |
11d4474c MM |
627 | WALK_LIST(a, p->neigh->iface->addrs) |
628 | if (a->scope == SCOPE_LINK) | |
629 | { | |
630 | p->local_link = a->ip; | |
631 | break; | |
632 | } | |
4827b69f OZ |
633 | |
634 | if (! ipa_nonzero(p->local_link)) | |
635 | log(L_WARN "%s: Missing link local address on interface %s", p->p.name, p->neigh->iface->name); | |
636 | ||
11d4474c MM |
637 | DBG("BGP: Selected link-level address %I\n", p->local_link); |
638 | } | |
639 | #endif | |
11b32d91 OZ |
640 | |
641 | int rv = bgp_open(p); | |
642 | if (rv < 0) | |
643 | return; | |
644 | ||
6fd766c1 | 645 | bgp_initiate(p); |
48e842cc MM |
646 | } |
647 | ||
648 | static void | |
649 | bgp_neigh_notify(neighbor *n) | |
650 | { | |
651 | struct bgp_proto *p = (struct bgp_proto *) n->proto; | |
652 | ||
653 | if (n->iface) | |
654 | { | |
11b32d91 OZ |
655 | if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE)) |
656 | { | |
657 | BGP_TRACE(D_EVENTS, "Neighbor found"); | |
658 | bgp_start_neighbor(p); | |
659 | } | |
48e842cc MM |
660 | } |
661 | else | |
662 | { | |
11b32d91 OZ |
663 | if ((p->p.proto_state == PS_START) || (p->p.proto_state == PS_UP)) |
664 | { | |
665 | BGP_TRACE(D_EVENTS, "Neighbor lost"); | |
666 | bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST); | |
b99d3786 | 667 | bgp_stop(p, 0); |
11b32d91 | 668 | } |
48e842cc MM |
669 | } |
670 | } | |
671 | ||
672 | static void | |
673 | bgp_start_locked(struct object_lock *lock) | |
674 | { | |
675 | struct bgp_proto *p = lock->data; | |
676 | struct bgp_config *cf = p->cf; | |
677 | ||
11b32d91 OZ |
678 | if (p->p.proto_state != PS_START) |
679 | { | |
680 | DBG("BGP: Got lock in different state %d\n", p->p.proto_state); | |
681 | return; | |
682 | } | |
683 | ||
48e842cc | 684 | DBG("BGP: Got lock\n"); |
e0d6a7bd | 685 | p->local_id = cf->c.global->router_id; |
48e842cc MM |
686 | p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip; |
687 | p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY); | |
4847a894 OZ |
688 | |
689 | if (cf->rr_client) | |
690 | { | |
691 | p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id; | |
692 | p->rr_client = cf->rr_client; | |
693 | } | |
694 | ||
a92fe607 OZ |
695 | p->rs_client = cf->rs_client; |
696 | ||
48e842cc MM |
697 | if (!p->neigh) |
698 | { | |
699 | log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop); | |
11b32d91 | 700 | /* As we do not start yet, we can just disable protocol */ |
48e842cc | 701 | p->p.disabled = 1; |
11b32d91 | 702 | bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); |
48e842cc | 703 | proto_notify_state(&p->p, PS_DOWN); |
11b32d91 | 704 | return; |
48e842cc | 705 | } |
11b32d91 OZ |
706 | |
707 | if (p->neigh->iface) | |
48e842cc MM |
708 | bgp_start_neighbor(p); |
709 | else | |
85368cd4 | 710 | BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", p->next_hop); |
c01e3741 MM |
711 | } |
712 | ||
2638249d MM |
713 | static int |
714 | bgp_start(struct proto *P) | |
715 | { | |
c01e3741 MM |
716 | struct bgp_proto *p = (struct bgp_proto *) P; |
717 | struct object_lock *lock; | |
718 | ||
b552ecc4 | 719 | DBG("BGP: Startup.\n"); |
11b32d91 | 720 | p->start_state = BSS_PREPARE; |
b552ecc4 MM |
721 | p->outgoing_conn.state = BS_IDLE; |
722 | p->incoming_conn.state = BS_IDLE; | |
bcbdcbb6 | 723 | p->neigh = NULL; |
b552ecc4 | 724 | |
11b32d91 OZ |
725 | p->event = ev_new(p->p.pool); |
726 | p->event->hook = bgp_decision; | |
727 | p->event->data = p; | |
79681f4a | 728 | |
dd91e467 OZ |
729 | p->startup_timer = tm_new(p->p.pool); |
730 | p->startup_timer->hook = bgp_startup_timeout; | |
731 | p->startup_timer->data = p; | |
732 | ||
c01e3741 MM |
733 | /* |
734 | * Before attempting to create the connection, we need to lock the | |
735 | * port, so that are sure we're the only instance attempting to talk | |
736 | * with that neighbor. | |
737 | */ | |
738 | ||
c01e3741 MM |
739 | lock = p->lock = olock_new(P->pool); |
740 | lock->addr = p->cf->remote_ip; | |
741 | lock->type = OBJLOCK_TCP; | |
742 | lock->port = BGP_PORT; | |
743 | lock->iface = NULL; | |
744 | lock->hook = bgp_start_locked; | |
745 | lock->data = p; | |
746 | olock_acquire(lock); | |
d51aa281 | 747 | |
c01e3741 | 748 | return PS_START; |
2638249d MM |
749 | } |
750 | ||
751 | static int | |
752 | bgp_shutdown(struct proto *P) | |
753 | { | |
c01e3741 | 754 | struct bgp_proto *p = (struct bgp_proto *) P; |
b99d3786 | 755 | unsigned subcode; |
c01e3741 | 756 | |
85368cd4 | 757 | BGP_TRACE(D_EVENTS, "Shutdown requested"); |
11b32d91 | 758 | bgp_store_error(p, NULL, BE_MAN_DOWN, 0); |
b99d3786 OZ |
759 | |
760 | if (P->reconfiguring) | |
761 | { | |
762 | if (P->cf_new) | |
763 | subcode = 6; // Errcode 6, 6 - other configuration change | |
764 | else | |
765 | subcode = 3; // Errcode 6, 3 - peer de-configured | |
766 | } | |
767 | else | |
768 | subcode = 2; // Errcode 6, 2 - administrative shutdown | |
769 | ||
11b32d91 | 770 | p->startup_delay = 0; |
b99d3786 | 771 | bgp_stop(p, subcode); |
c01e3741 | 772 | |
11b32d91 | 773 | return p->p.proto_state; |
2638249d MM |
774 | } |
775 | ||
48e842cc MM |
776 | static struct proto * |
777 | bgp_init(struct proto_config *C) | |
778 | { | |
779 | struct bgp_config *c = (struct bgp_config *) C; | |
780 | struct proto *P = proto_new(C, sizeof(struct bgp_proto)); | |
781 | struct bgp_proto *p = (struct bgp_proto *) P; | |
782 | ||
23ac9e9a | 783 | P->accept_ra_types = RA_OPTIMAL; |
48e842cc MM |
784 | P->rt_notify = bgp_rt_notify; |
785 | P->rte_better = bgp_rte_better; | |
786 | P->import_control = bgp_import_control; | |
787 | P->neigh_notify = bgp_neigh_notify; | |
788 | p->cf = c; | |
789 | p->local_as = c->local_as; | |
790 | p->remote_as = c->remote_as; | |
791 | p->is_internal = (c->local_as == c->remote_as); | |
48e842cc MM |
792 | return P; |
793 | } | |
794 | ||
54e55169 MM |
795 | /** |
796 | * bgp_error - report a protocol error | |
797 | * @c: connection | |
798 | * @code: error code (according to the RFC) | |
2e9b2421 | 799 | * @subcode: error sub-code |
54e55169 MM |
800 | * @data: data to be passed in the Notification message |
801 | * @len: length of the data | |
802 | * | |
803 | * bgp_error() sends a notification packet to tell the other side that a protocol | |
2e9b2421 | 804 | * error has occurred (including the data considered erroneous if possible) and |
54e55169 MM |
805 | * closes the connection. |
806 | */ | |
3fdbafb6 | 807 | void |
efcece2d | 808 | bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len) |
3fdbafb6 | 809 | { |
b99d3786 OZ |
810 | struct bgp_proto *p = c->bgp; |
811 | ||
11b32d91 | 812 | if (c->state == BS_CLOSE) |
3fdbafb6 | 813 | return; |
11b32d91 | 814 | |
b99d3786 OZ |
815 | bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len); |
816 | bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode); | |
11b32d91 OZ |
817 | bgp_conn_enter_close_state(c); |
818 | ||
3fdbafb6 MM |
819 | c->notify_code = code; |
820 | c->notify_subcode = subcode; | |
efcece2d MM |
821 | c->notify_data = data; |
822 | c->notify_size = (len > 0) ? len : 0; | |
3fdbafb6 | 823 | bgp_schedule_packet(c, PKT_NOTIFICATION); |
b99d3786 OZ |
824 | |
825 | if (code != 6) | |
826 | { | |
827 | bgp_update_startup_delay(p); | |
828 | bgp_stop(p, 0); | |
829 | } | |
3fdbafb6 MM |
830 | } |
831 | ||
11b32d91 OZ |
832 | /** |
833 | * bgp_store_error - store last error for status report | |
834 | * @p: BGP instance | |
835 | * @c: connection | |
836 | * @class: error class (BE_xxx constants) | |
837 | * @code: error code (class specific) | |
838 | * | |
839 | * bgp_store_error() decides whether given error is interesting enough | |
840 | * and store that error to last_error variables of @p | |
841 | */ | |
842 | void | |
843 | bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code) | |
844 | { | |
845 | /* During PS_UP, we ignore errors on secondary connection */ | |
846 | if ((p->p.proto_state == PS_UP) && c && (c != p->conn)) | |
847 | return; | |
848 | ||
849 | /* During PS_STOP, we ignore any errors, as we want to report | |
850 | * the error that caused transition to PS_STOP | |
851 | */ | |
852 | if (p->p.proto_state == PS_STOP) | |
853 | return; | |
854 | ||
855 | p->last_error_class = class; | |
856 | p->last_error_code = code; | |
857 | } | |
858 | ||
2638249d MM |
859 | void |
860 | bgp_check(struct bgp_config *c) | |
861 | { | |
862 | if (!c->local_as) | |
863 | cf_error("Local AS number must be set"); | |
e8ba557c | 864 | |
2638249d MM |
865 | if (!c->remote_as) |
866 | cf_error("Neighbor must be configured"); | |
e8ba557c | 867 | |
ba5ed6f3 | 868 | if (!bgp_as4_support && c->enable_as4) |
11b32d91 | 869 | cf_error("AS4 support disabled globally"); |
e8ba557c OZ |
870 | |
871 | if (!bgp_as4_support && (c->local_as > 0xFFFF)) | |
11cb6202 | 872 | cf_error("Local AS number out of range"); |
e8ba557c OZ |
873 | |
874 | if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF)) | |
875 | cf_error("Neighbor AS number out of range (AS4 not available)"); | |
876 | ||
4847a894 OZ |
877 | if ((c->local_as != c->remote_as) && (c->rr_client)) |
878 | cf_error("Only internal neighbor can be RR client"); | |
e8ba557c | 879 | |
a92fe607 OZ |
880 | if ((c->local_as == c->remote_as) && (c->rs_client)) |
881 | cf_error("Only external neighbor can be RS client"); | |
2638249d MM |
882 | } |
883 | ||
11b32d91 | 884 | static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" }; |
72b28a04 | 885 | static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""}; |
11b32d91 | 886 | static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed" }; |
72b28a04 | 887 | static char *bgp_auto_errors[] = { "", "Route limit exceeded"}; |
11b32d91 OZ |
888 | |
889 | ||
42532f08 | 890 | static void |
973399ae MM |
891 | bgp_get_status(struct proto *P, byte *buf) |
892 | { | |
893 | struct bgp_proto *p = (struct bgp_proto *) P; | |
894 | ||
11b32d91 OZ |
895 | const byte *err1 = bgp_err_classes[p->last_error_class]; |
896 | const byte *err2 = ""; | |
897 | byte errbuf[32]; | |
898 | ||
899 | switch (p->last_error_class) | |
900 | { | |
901 | case BE_MISC: | |
902 | err2 = bgp_misc_errors[p->last_error_code]; | |
903 | break; | |
904 | case BE_SOCKET: | |
905 | err2 = (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code); | |
906 | break; | |
907 | case BE_BGP_RX: | |
908 | case BE_BGP_TX: | |
909 | err2 = bgp_error_dsc(errbuf, p->last_error_code >> 16, p->last_error_code & 0xFF); | |
910 | break; | |
72b28a04 OZ |
911 | case BE_AUTO_DOWN: |
912 | err2 = bgp_auto_errors[p->last_error_code]; | |
913 | break; | |
11b32d91 OZ |
914 | } |
915 | ||
f4ab2317 | 916 | if (P->proto_state == PS_DOWN) |
11b32d91 | 917 | bsprintf(buf, "%s%s", err1, err2); |
f4ab2317 | 918 | else |
11b32d91 OZ |
919 | bsprintf(buf, "%-14s%s%s", |
920 | bgp_state_names[MAX(p->incoming_conn.state, p->outgoing_conn.state)], | |
921 | err1, err2); | |
973399ae MM |
922 | } |
923 | ||
42532f08 MM |
924 | static int |
925 | bgp_reconfigure(struct proto *P, struct proto_config *C) | |
926 | { | |
927 | struct bgp_config *new = (struct bgp_config *) C; | |
928 | struct bgp_proto *p = (struct bgp_proto *) P; | |
929 | struct bgp_config *old = p->cf; | |
930 | ||
59121155 OZ |
931 | int same = !memcmp(((byte *) old) + sizeof(struct proto_config), |
932 | ((byte *) new) + sizeof(struct proto_config), | |
70670bf3 OZ |
933 | // password item is last and must be checked separately |
934 | OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config)) | |
935 | && ((!old->password && !new->password) | |
936 | || (old->password && new->password && !strcmp(old->password, new->password))); | |
59121155 OZ |
937 | |
938 | /* We should update our copy of configuration ptr as old configuration will be freed */ | |
939 | if (same) | |
940 | p->cf = new; | |
941 | ||
942 | return same; | |
42532f08 MM |
943 | } |
944 | ||
2638249d MM |
945 | struct protocol proto_bgp = { |
946 | name: "BGP", | |
947 | template: "bgp%d", | |
10be74da | 948 | attr_class: EAP_BGP, |
2638249d MM |
949 | init: bgp_init, |
950 | start: bgp_start, | |
951 | shutdown: bgp_shutdown, | |
973399ae | 952 | get_status: bgp_get_status, |
10be74da | 953 | get_attr: bgp_get_attr, |
42532f08 | 954 | reconfigure: bgp_reconfigure, |
2638249d | 955 | get_route_info: bgp_get_route_info, |
2638249d | 956 | }; |