]> git.ipfire.org Git - thirdparty/bird.git/blame - proto/bgp/packets.c
BGP: Mandatory option for channels
[thirdparty/bird.git] / proto / bgp / packets.c
CommitLineData
c01e3741
MM
1/*
2 * BIRD -- BGP Packet Processing
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
d15b0b0a
OZ
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
c01e3741
MM
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
85368cd4 11#undef LOCAL_DEBUG
72a6ef11 12
d15b0b0a
OZ
13#include <stdlib.h>
14
c01e3741
MM
15#include "nest/bird.h"
16#include "nest/iface.h"
17#include "nest/protocol.h"
18#include "nest/route.h"
11cb6202 19#include "nest/attrs.h"
863ecfc7 20#include "proto/mrt/mrt.h"
c01e3741 21#include "conf/conf.h"
72a6ef11 22#include "lib/unaligned.h"
ac3ad139 23#include "lib/flowspec.h"
72a6ef11 24#include "lib/socket.h"
c01e3741 25
11b32d91
OZ
26#include "nest/cli.h"
27
c01e3741 28#include "bgp.h"
72a6ef11 29
9aed29e6
OZ
30
31#define BGP_RR_REQUEST 0
32#define BGP_RR_BEGIN 1
33#define BGP_RR_END 2
34
1e37e35c
OZ
35#define BGP_NLRI_MAX (4 + 1 + 32)
36
37#define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */
38#define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */
39#define BGP_MPLS_NULL 3 /* Implicit NULL label */
40#define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */
41
9aed29e6 42
1123e707
OZ
43static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
44static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
cb530392 45
ac574513
OZ
46/* Table for state -> RFC 6608 FSM error subcodes */
47static byte fsm_err_subcode[BS_MAX] = {
48 [BS_OPENSENT] = 1,
49 [BS_OPENCONFIRM] = 2,
50 [BS_ESTABLISHED] = 3
51};
52
d15b0b0a
OZ
53
54static struct bgp_channel *
55bgp_get_channel(struct bgp_proto *p, u32 afi)
56{
57 uint i;
58
59 for (i = 0; i < p->channel_count; i++)
60 if (p->afi_map[i] == afi)
61 return p->channel_map[i];
62
63 return NULL;
64}
65
66static inline void
67put_af3(byte *buf, u32 id)
68{
69 put_u16(buf, id >> 16);
70 buf[2] = id & 0xff;
71}
72
73static inline void
74put_af4(byte *buf, u32 id)
75{
76 put_u16(buf, id >> 16);
77 buf[2] = 0;
78 buf[3] = id & 0xff;
79}
80
81static inline u32
82get_af3(byte *buf)
83{
84 return (get_u16(buf) << 16) | buf[2];
85}
86
87static inline u32
88get_af4(byte *buf)
89{
90 return (get_u16(buf) << 16) | buf[3];
91}
92
863ecfc7
OZ
93static void
94init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
cf31112f
OZ
95{
96 struct bgp_proto *p = conn->bgp;
863ecfc7 97 int p_ok = conn->state >= BS_OPENCONFIRM;
cf31112f 98
863ecfc7
OZ
99 memset(d, 0, sizeof(struct mrt_bgp_data));
100 d->peer_as = p->remote_as;
101 d->local_as = p->local_as;
102 d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
103 d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
104 d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
105 d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
106 d->as4 = p_ok ? p->as4_session : 0;
107}
cf31112f 108
863ecfc7 109static uint bgp_find_update_afi(byte *pos, uint len);
d15b0b0a 110
863ecfc7
OZ
111static int
112bgp_estimate_add_path(struct bgp_proto *p, byte *pkt, uint len)
113{
114 /* No need to estimate it for other messages than UPDATE */
115 if (pkt[18] != PKT_UPDATE)
116 return 0;
117
118 /* 1 -> no channel, 2 -> all channels, 3 -> some channels */
119 if (p->summary_add_path_rx < 3)
120 return p->summary_add_path_rx == 2;
121
122 uint afi = bgp_find_update_afi(pkt, len);
123 struct bgp_channel *c = bgp_get_channel(p, afi);
124 if (!c)
d15b0b0a 125 {
863ecfc7
OZ
126 /* Either frame error (if !afi) or unknown AFI/SAFI,
127 will be reported later in regular parsing */
128 BGP_TRACE(D_PACKETS, "MRT processing noticed invalid packet");
129 return 0;
d15b0b0a 130 }
cf31112f 131
863ecfc7 132 return c->add_path_rx;
cf31112f
OZ
133}
134
135static void
863ecfc7 136bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len)
cf31112f 137{
863ecfc7
OZ
138 struct mrt_bgp_data d;
139 init_mrt_bgp_data(conn, &d);
cf31112f 140
863ecfc7
OZ
141 d.message = pkt;
142 d.msg_len = len;
143 d.add_path = bgp_estimate_add_path(conn->bgp, pkt, len);
cf31112f 144
863ecfc7 145 mrt_dump_bgp_message(&d);
cf31112f
OZ
146}
147
148void
863ecfc7 149bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new)
cf31112f 150{
863ecfc7
OZ
151 struct mrt_bgp_data d;
152 init_mrt_bgp_data(conn, &d);
153
154 d.old_state = old;
155 d.new_state = new;
cf31112f 156
863ecfc7 157 mrt_dump_bgp_state_change(&d);
cf31112f
OZ
158}
159
72a6ef11
MM
160static byte *
161bgp_create_notification(struct bgp_conn *conn, byte *buf)
162{
85368cd4
MM
163 struct bgp_proto *p = conn->bgp;
164
165 BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
72a6ef11
MM
166 buf[0] = conn->notify_code;
167 buf[1] = conn->notify_subcode;
efcece2d
MM
168 memcpy(buf+2, conn->notify_data, conn->notify_size);
169 return buf + 2 + conn->notify_size;
72a6ef11
MM
170}
171
e3299ab1 172
d15b0b0a 173/* Capability negotiation as per RFC 5492 */
e3299ab1 174
d15b0b0a
OZ
175const struct bgp_af_caps *
176bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
bf47fe4b 177{
d15b0b0a 178 struct bgp_af_caps *ac;
bf47fe4b 179
d15b0b0a
OZ
180 WALK_AF_CAPS(caps, ac)
181 if (ac->afi == afi)
182 return ac;
183
184 return NULL;
52e21323
OZ
185}
186
d15b0b0a
OZ
187static struct bgp_af_caps *
188bgp_get_af_caps(struct bgp_caps *caps, u32 afi)
0c791f87 189{
d15b0b0a 190 struct bgp_af_caps *ac;
0c791f87 191
d15b0b0a
OZ
192 WALK_AF_CAPS(caps, ac)
193 if (ac->afi == afi)
194 return ac;
0c791f87 195
d15b0b0a
OZ
196 ac = &caps->af_data[caps->af_count++];
197 memset(ac, 0, sizeof(struct bgp_af_caps));
198 ac->afi = afi;
0c791f87 199
d15b0b0a 200 return ac;
0c791f87
OZ
201}
202
d15b0b0a
OZ
203static int
204bgp_af_caps_cmp(const void *X, const void *Y)
0c791f87 205{
d15b0b0a
OZ
206 const struct bgp_af_caps *x = X, *y = Y;
207 return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
0c791f87
OZ
208}
209
11cb6202 210
094d2bdb 211static byte *
d15b0b0a 212bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
094d2bdb 213{
d15b0b0a
OZ
214 struct bgp_proto *p = conn->bgp;
215 struct bgp_channel *c;
216 struct bgp_caps *caps;
217 struct bgp_af_caps *ac;
d8022d26 218 uint any_ext_next_hop = 0;
d15b0b0a 219 uint any_add_path = 0;
7e5f769d 220 byte *buf_head = buf;
d15b0b0a 221 byte *data;
094d2bdb 222
d15b0b0a 223 /* Prepare bgp_caps structure */
094d2bdb 224
d15b0b0a
OZ
225 int n = list_length(&p->p.channels);
226 caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
227 conn->local_caps = caps;
094d2bdb 228
d15b0b0a
OZ
229 caps->as4_support = p->cf->enable_as4;
230 caps->ext_messages = p->cf->enable_extended_messages;
231 caps->route_refresh = p->cf->enable_refresh;
232 caps->enhanced_refresh = p->cf->enable_refresh;
094d2bdb 233
d15b0b0a
OZ
234 if (caps->as4_support)
235 caps->as4_number = p->public_as;
9aed29e6 236
d15b0b0a
OZ
237 if (p->cf->gr_mode)
238 {
239 caps->gr_aware = 1;
240 caps->gr_time = p->cf->gr_time;
241 caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
242 }
9aed29e6 243
5bd73431
OZ
244 if (p->cf->llgr_mode)
245 caps->llgr_aware = 1;
246
d15b0b0a
OZ
247 /* Allocate and fill per-AF fields */
248 WALK_LIST(c, p->p.channels)
249 {
250 ac = &caps->af_data[caps->af_count++];
251 ac->afi = c->afi;
252 ac->ready = 1;
85368cd4 253
d8022d26
OZ
254 ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
255 any_ext_next_hop |= ac->ext_next_hop;
256
d15b0b0a
OZ
257 ac->add_path = c->cf->add_path;
258 any_add_path |= ac->add_path;
165a6227 259
d15b0b0a 260 if (c->cf->gr_able)
165a6227 261 {
d15b0b0a
OZ
262 ac->gr_able = 1;
263
264 if (p->p.gr_recovery)
265 ac->gr_af_flags |= BGP_GRF_FORWARDING;
165a6227 266 }
5bd73431
OZ
267
268 if (c->cf->llgr_able)
269 {
270 ac->llgr_able = 1;
271 ac->llgr_time = c->cf->llgr_time;
272
273 if (p->p.gr_recovery)
274 ac->llgr_flags |= BGP_LLGRF_FORWARDING;
275 }
d15b0b0a 276 }
165a6227 277
d15b0b0a
OZ
278 /* Sort capability fields by AFI/SAFI */
279 qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
11cb6202 280
e3299ab1 281
d15b0b0a 282 /* Create capability list in buffer */
e8ba557c 283
f8aad5d5 284 /*
5bd73431
OZ
285 * Note that max length is ~ 22+21*af_count. With max 12 channels that is
286 * 274. Option limit is 253 and buffer size is 4096, so we cannot overflow
287 * unless we add new capabilities or more AFs. XXXXX
f8aad5d5
OZ
288 */
289
d15b0b0a
OZ
290 WALK_AF_CAPS(caps, ac)
291 if (ac->ready)
292 {
293 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
294 *buf++ = 4; /* Capability data length */
295 put_af4(buf, ac->afi);
296 buf += 4;
297 }
0c791f87 298
d15b0b0a
OZ
299 if (caps->route_refresh)
300 {
301 *buf++ = 2; /* Capability 2: Support for route refresh */
302 *buf++ = 0; /* Capability data length */
303 }
bf47fe4b 304
d8022d26
OZ
305 if (any_ext_next_hop)
306 {
307 *buf++ = 5; /* Capability 5: Support for extended next hop */
308 *buf++ = 0; /* Capability data length, will be fixed later */
309 data = buf;
310
311 WALK_AF_CAPS(caps, ac)
312 if (ac->ext_next_hop)
313 {
314 put_af4(buf, ac->afi);
315 put_u16(buf+4, BGP_AFI_IPV6);
316 buf += 6;
317 }
318
319 data[-1] = buf - data;
320 }
321
d15b0b0a
OZ
322 if (caps->ext_messages)
323 {
324 *buf++ = 6; /* Capability 6: Support for extended messages */
325 *buf++ = 0; /* Capability data length */
326 }
11cb6202 327
d15b0b0a
OZ
328 if (caps->gr_aware)
329 {
330 *buf++ = 64; /* Capability 64: Support for graceful restart */
331 *buf++ = 0; /* Capability data length, will be fixed later */
332 data = buf;
094d2bdb 333
d15b0b0a
OZ
334 put_u16(buf, caps->gr_time);
335 buf[0] |= caps->gr_flags;
336 buf += 2;
9aed29e6 337
d15b0b0a
OZ
338 WALK_AF_CAPS(caps, ac)
339 if (ac->gr_able)
340 {
341 put_af3(buf, ac->afi);
342 buf[3] = ac->gr_af_flags;
343 buf += 4;
344 }
06e0d1b6 345
d15b0b0a
OZ
346 data[-1] = buf - data;
347 }
72a6ef11 348
d15b0b0a
OZ
349 if (caps->as4_support)
350 {
351 *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
352 *buf++ = 4; /* Capability data length */
353 put_u32(buf, p->public_as);
354 buf += 4;
355 }
f421cfdd 356
d15b0b0a
OZ
357 if (any_add_path)
358 {
359 *buf++ = 69; /* Capability 69: Support for ADD-PATH */
360 *buf++ = 0; /* Capability data length, will be fixed later */
361 data = buf;
094d2bdb 362
d15b0b0a
OZ
363 WALK_AF_CAPS(caps, ac)
364 if (ac->add_path)
365 {
366 put_af3(buf, ac->afi);
367 buf[3] = ac->add_path;
368 buf += 4;
369 }
094d2bdb 370
d15b0b0a
OZ
371 data[-1] = buf - data;
372 }
373
374 if (caps->enhanced_refresh)
375 {
376 *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
377 *buf++ = 0; /* Capability data length */
378 }
379
5bd73431
OZ
380 if (caps->llgr_aware)
381 {
382 *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */
383 *buf++ = 0; /* Capability data length, will be fixed later */
384 data = buf;
385
386 WALK_AF_CAPS(caps, ac)
387 if (ac->llgr_able)
388 {
389 put_af3(buf, ac->afi);
390 buf[3] = ac->llgr_flags;
391 put_u24(buf+4, ac->llgr_time);
392 buf += 7;
393 }
394
395 data[-1] = buf - data;
396 }
397
7e5f769d
OZ
398 caps->length = buf - buf_head;
399
d15b0b0a 400 return buf;
f421cfdd
MM
401}
402
82a79586 403static void
d15b0b0a 404bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len)
82a79586 405{
d15b0b0a
OZ
406 struct bgp_proto *p = conn->bgp;
407 struct bgp_af_caps *ac;
408 int i, cl;
409 u32 af;
82a79586 410
7e5f769d
OZ
411 caps->length += len;
412
d15b0b0a
OZ
413 while (len > 0)
414 {
415 if (len < 2 || len < (2 + pos[1]))
416 goto err;
1c1da87b 417
d15b0b0a
OZ
418 /* Capability length */
419 cl = pos[1];
3fdbafb6 420
d15b0b0a
OZ
421 /* Capability type */
422 switch (pos[0])
f421cfdd 423 {
d15b0b0a
OZ
424 case 1: /* Multiprotocol capability, RFC 4760 */
425 if (cl != 4)
426 goto err;
427
428 af = get_af4(pos+2);
429 ac = bgp_get_af_caps(caps, af);
430 ac->ready = 1;
431 break;
432
433 case 2: /* Route refresh capability, RFC 2918 */
434 if (cl != 0)
435 goto err;
436
437 caps->route_refresh = 1;
438 break;
439
d8022d26
OZ
440 case 5: /* Extended next hop encoding capability, RFC 5549 */
441 if (cl % 6)
442 goto err;
443
444 for (i = 0; i < cl; i += 6)
445 {
446 /* Specified only for IPv4 prefixes with IPv6 next hops */
447 if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
448 (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
449 continue;
450
451 af = get_af4(pos+2+i);
452 ac = bgp_get_af_caps(caps, af);
453 ac->ext_next_hop = 1;
454 }
455 break;
456
d15b0b0a
OZ
457 case 6: /* Extended message length capability, RFC draft */
458 if (cl != 0)
459 goto err;
460
461 caps->ext_messages = 1;
462 break;
463
464 case 64: /* Graceful restart capability, RFC 4724 */
465 if (cl % 4 != 2)
466 goto err;
467
468 /* Only the last instance is valid */
469 WALK_AF_CAPS(caps, ac)
470 {
471 ac->gr_able = 0;
472 ac->gr_af_flags = 0;
473 }
474
475 caps->gr_aware = 1;
476 caps->gr_flags = pos[2] & 0xf0;
477 caps->gr_time = get_u16(pos + 2) & 0x0fff;
478
479 for (i = 2; i < cl; i += 4)
480 {
481 af = get_af3(pos+2+i);
482 ac = bgp_get_af_caps(caps, af);
483 ac->gr_able = 1;
484 ac->gr_af_flags = pos[2+i+3];
485 }
486 break;
487
c49e4a65 488 case 65: /* AS4 capability, RFC 6793 */
d15b0b0a
OZ
489 if (cl != 4)
490 goto err;
491
492 caps->as4_support = 1;
493 caps->as4_number = get_u32(pos + 2);
494 break;
495
496 case 69: /* ADD-PATH capability, RFC 7911 */
497 if (cl % 4)
498 goto err;
499
500 for (i = 0; i < cl; i += 4)
501 {
502 byte val = pos[2+i+3];
503 if (!val || (val > BGP_ADD_PATH_FULL))
f421cfdd 504 {
d15b0b0a
OZ
505 log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
506 p->p.name, val);
f421cfdd
MM
507 break;
508 }
d15b0b0a 509 }
72a6ef11 510
d15b0b0a
OZ
511 for (i = 0; i < cl; i += 4)
512 {
513 af = get_af3(pos+2+i);
514 ac = bgp_get_af_caps(caps, af);
515 ac->add_path = pos[2+i+3];
516 }
517 break;
0c791f87 518
d15b0b0a
OZ
519 case 70: /* Enhanced route refresh capability, RFC 7313 */
520 if (cl != 0)
521 goto err;
0c791f87 522
d15b0b0a
OZ
523 caps->enhanced_refresh = 1;
524 break;
1c1da87b 525
5bd73431
OZ
526 case 71: /* Long lived graceful restart capability, RFC draft */
527 if (cl % 7)
528 goto err;
529
530 /* Presumably, only the last instance is valid */
531 WALK_AF_CAPS(caps, ac)
532 {
533 ac->llgr_able = 0;
534 ac->llgr_flags = 0;
535 ac->llgr_time = 0;
536 }
537
538 caps->llgr_aware = 1;
539
540 for (i = 0; i < cl; i += 7)
541 {
542 af = get_af3(pos+2+i);
543 ac = bgp_get_af_caps(caps, af);
544 ac->llgr_able = 1;
545 ac->llgr_flags = pos[2+i+3];
546 ac->llgr_time = get_u24(pos + 2+i+4);
547 }
548 break;
549
d15b0b0a
OZ
550 /* We can safely ignore all other capabilities */
551 }
552
553 ADVANCE(pos, len, 2 + cl);
554 }
5bd73431
OZ
555
556 /* The LLGR capability must be advertised together with the GR capability,
557 otherwise it must be disregarded */
558 if (!caps->gr_aware && caps->llgr_aware)
559 {
560 caps->llgr_aware = 0;
561 WALK_AF_CAPS(caps, ac)
562 {
563 ac->llgr_able = 0;
564 ac->llgr_flags = 0;
565 ac->llgr_time = 0;
566 }
567 }
568
d15b0b0a
OZ
569 return;
570
571err:
572 bgp_error(conn, 2, 0, NULL, 0);
573 return;
53ffbff3
OZ
574}
575
3c360581
OZ
576static int
577bgp_check_capabilities(struct bgp_conn *conn)
578{
579 struct bgp_proto *p = conn->bgp;
580 struct bgp_caps *local = conn->local_caps;
581 struct bgp_caps *remote = conn->remote_caps;
582 struct bgp_channel *c;
583 int count = 0;
584
585 /* This is partially overlapping with bgp_conn_enter_established_state(),
586 but we need to run this just after we receive OPEN message */
587
588 WALK_LIST(c, p->p.channels)
589 {
590 const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
591 const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi);
592
593 /* Find out whether this channel will be active */
594 int active = loc && loc->ready &&
595 ((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4)));
596
597 /* Mandatory must be active */
598 if (c->cf->mandatory && !active)
599 return 0;
600
601 if (active)
602 count++;
603 }
604
605 /* We need at least one channel active */
606 if (!count)
607 return 0;
608
609 return 1;
610}
611
d15b0b0a
OZ
612static int
613bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
1c1da87b 614{
cf3d6470 615 struct bgp_proto *p = conn->bgp;
d15b0b0a
OZ
616 struct bgp_caps *caps;
617 int ol;
cf3d6470 618
d15b0b0a
OZ
619 /* Max number of announced AFIs is limited by max option length (255) */
620 caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps));
621 memset(caps, 0, sizeof(struct bgp_caps));
cf3d6470 622
d15b0b0a
OZ
623 while (len > 0)
624 {
625 if ((len < 2) || (len < (2 + pos[1])))
626 { bgp_error(conn, 2, 0, NULL, 0); return -1; }
627
628 ol = pos[1];
629 if (pos[0] == 2)
cf3d6470 630 {
d15b0b0a
OZ
631 /* BGP capabilities, RFC 5492 */
632 if (p->cf->capabilities)
633 bgp_read_capabilities(conn, caps, pos + 2, ol);
cf3d6470 634 }
d15b0b0a 635 else
cf3d6470 636 {
d15b0b0a
OZ
637 /* Unknown option */
638 bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */
639 return -1;
cf3d6470
MM
640 }
641
d15b0b0a
OZ
642 ADVANCE(pos, len, 2 + ol);
643 }
644
645 uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps);
646 conn->remote_caps = mb_allocz(p->p.pool, n);
647 memcpy(conn->remote_caps, caps, n);
648
649 return 0;
1c1da87b
MM
650}
651
0c791f87 652static byte *
d15b0b0a 653bgp_create_open(struct bgp_conn *conn, byte *buf)
0c791f87
OZ
654{
655 struct bgp_proto *p = conn->bgp;
0c791f87 656
d15b0b0a
OZ
657 BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
658 BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
0c791f87 659
d15b0b0a
OZ
660 buf[0] = BGP_VERSION;
661 put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
662 put_u16(buf+3, p->cf->hold_time);
663 put_u32(buf+5, p->local_id);
0c791f87 664
d15b0b0a
OZ
665 if (p->cf->capabilities)
666 {
667 /* Prepare local_caps and write capabilities to buffer */
668 byte *end = bgp_write_capabilities(conn, buf+12);
669 uint len = end - (buf+12);
1c1da87b 670
d15b0b0a
OZ
671 buf[9] = len + 2; /* Optional parameters length */
672 buf[10] = 2; /* Option 2: Capability list */
673 buf[11] = len; /* Option data length */
bf47fe4b 674
d15b0b0a
OZ
675 return end;
676 }
677 else
678 {
679 /* Prepare empty local_caps */
680 conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
bf47fe4b 681
d15b0b0a
OZ
682 buf[9] = 0; /* No optional parameters */
683 return buf + 10;
684 }
9aed29e6 685
9aed29e6
OZ
686 return buf;
687}
688
d15b0b0a
OZ
689static void
690bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
9aed29e6
OZ
691{
692 struct bgp_proto *p = conn->bgp;
d15b0b0a
OZ
693 struct bgp_conn *other;
694 u32 asn, hold, id;
9aed29e6 695
d15b0b0a
OZ
696 /* Check state */
697 if (conn->state != BS_OPENSENT)
698 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
9aed29e6 699
d15b0b0a
OZ
700 /* Check message contents */
701 if (len < 29 || len != 29 + (uint) pkt[28])
702 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
9aed29e6 703
d15b0b0a
OZ
704 if (pkt[19] != BGP_VERSION)
705 { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
72a6ef11 706
d15b0b0a
OZ
707 asn = get_u16(pkt+20);
708 hold = get_u16(pkt+22);
709 id = get_u32(pkt+24);
710 BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
711
712 if (bgp_read_options(conn, pkt+29, pkt[28]) < 0)
713 return;
714
715 if (hold > 0 && hold < 3)
716 { bgp_error(conn, 2, 6, pkt+22, 2); return; }
72a6ef11 717
d15b0b0a
OZ
718 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
719 if (!id || (p->is_internal && id == p->local_id))
720 { bgp_error(conn, 2, 3, pkt+24, -4); return; }
721
3c360581
OZ
722 /* RFC 5492 4 - check for required capabilities */
723 if (p->cf->capabilities && !bgp_check_capabilities(conn))
724 { bgp_error(conn, 2, 7, NULL, 0); return; }
725
d15b0b0a
OZ
726 struct bgp_caps *caps = conn->remote_caps;
727
728 if (caps->as4_support)
729 {
730 u32 as4 = caps->as4_number;
731
732 if ((as4 != asn) && (asn != AS_TRANS))
733 log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
734
735 if (as4 != p->remote_as)
736 { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
737 }
738 else
739 {
740 if (asn != p->remote_as)
741 { bgp_error(conn, 2, 2, pkt+20, 2); return; }
742 }
743
744 /* Check the other connection */
745 other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
746 switch (other->state)
747 {
748 case BS_CONNECT:
749 case BS_ACTIVE:
750 /* Stop outgoing connection attempts */
751 bgp_conn_enter_idle_state(other);
752 break;
753
754 case BS_IDLE:
755 case BS_OPENSENT:
756 case BS_CLOSE:
757 break;
758
759 case BS_OPENCONFIRM:
760 /*
761 * Description of collision detection rules in RFC 4271 is confusing and
762 * contradictory, but it is essentially:
763 *
764 * 1. Router with higher ID is dominant
765 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
766 * 3. When both connections are in OpenConfirm state, one initiated by
767 * the dominant router is kept.
768 *
769 * The first line in the expression below evaluates whether the neighbor
770 * is dominant, the second line whether the new connection was initiated
771 * by the neighbor. If both are true (or both are false), we keep the new
772 * connection, otherwise we keep the old one.
773 */
774 if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
775 == (conn == &p->incoming_conn))
0c3588bf 776 {
d15b0b0a
OZ
777 /* Should close the other connection */
778 BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
779 bgp_error(other, 6, 7, NULL, 0);
780 break;
0c3588bf 781 }
d15b0b0a
OZ
782 /* Fall thru */
783 case BS_ESTABLISHED:
784 /* Should close this connection */
785 BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
786 bgp_error(conn, 6, 7, NULL, 0);
787 return;
0c3588bf 788
d15b0b0a
OZ
789 default:
790 bug("bgp_rx_open: Unknown state");
791 }
792
793 /* Update our local variables */
794 conn->hold_time = MIN(hold, p->cf->hold_time);
795 conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
796 conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
797 conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
798 p->remote_id = id;
799
800 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
801 conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
802
803 bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
804 bgp_start_timer(conn->hold_timer, conn->hold_time);
805 bgp_conn_enter_openconfirm_state(conn);
806}
807
808
809/*
810 * Next hop handling
811 */
812
813#define REPORT(msg, args...) \
814 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
815
82f42ea0
OZ
816#define DISCARD(msg, args...) \
817 ({ REPORT(msg, ## args); return; })
818
d15b0b0a
OZ
819#define WITHDRAW(msg, args...) \
820 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
821
82f42ea0 822#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
d15b0b0a
OZ
823#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
824#define NO_NEXT_HOP "Missing NEXT_HOP attribute"
1e37e35c 825#define NO_LABEL_STACK "Missing MPLS stack"
d15b0b0a
OZ
826
827
828static void
829bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
830{
831 struct bgp_proto *p = s->proto;
832 struct bgp_channel *c = s->channel;
833
834 if (c->cf->gw_mode == GW_DIRECT)
835 {
836 neighbor *nbr = NULL;
837
838 /* GW_DIRECT -> single_hop -> p->neigh != NULL */
839 if (ipa_nonzero(gw))
586c1800 840 nbr = neigh_find(&p->p, gw, NULL, 0);
d15b0b0a 841 else if (ipa_nonzero(ll))
586c1800 842 nbr = neigh_find(&p->p, ll, p->neigh->iface, 0);
d15b0b0a
OZ
843
844 if (!nbr || (nbr->scope == SCOPE_HOST))
845 WITHDRAW(BAD_NEXT_HOP);
846
4e276a89 847 a->dest = RTD_UNICAST;
1e37e35c
OZ
848 a->nh.gw = nbr->addr;
849 a->nh.iface = nbr->iface;
d15b0b0a
OZ
850 }
851 else /* GW_RECURSIVE */
852 {
853 if (ipa_zero(gw))
854 WITHDRAW(BAD_NEXT_HOP);
855
ef57b70f
OZ
856 rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
857 s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
1e37e35c
OZ
858
859 if (!s->mpls)
860 rta_apply_hostentry(a, s->hostentry, NULL);
861
862 /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
863 }
864}
865
866static void
867bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
868{
869 if (lnum > MPLS_MAX_LABEL_STACK)
870 {
871 REPORT("Too many MPLS labels ($u)", lnum);
872
873 a->dest = RTD_UNREACHABLE;
874 a->hostentry = NULL;
875 a->nh = (struct nexthop) { };
876 return;
877 }
878
879 /* Handle implicit NULL as empty MPLS stack */
880 if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
881 lnum = 0;
882
883 if (s->channel->cf->gw_mode == GW_DIRECT)
884 {
885 a->nh.labels = lnum;
886 memcpy(a->nh.label, labels, 4*lnum);
887 }
888 else /* GW_RECURSIVE */
889 {
890 mpls_label_stack ms;
891
892 ms.len = lnum;
893 memcpy(ms.stack, labels, 4*lnum);
894 rta_apply_hostentry(a, s->hostentry, &ms);
d15b0b0a
OZ
895 }
896}
897
1e37e35c 898
1cab2b4a
OZ
899static int
900bgp_match_src(struct bgp_export_state *s, int mode)
901{
902 switch (mode)
903 {
904 case NH_NO: return 0;
905 case NH_ALL: return 1;
906 case NH_IBGP: return s->src && s->src->is_internal;
907 case NH_EBGP: return s->src && !s->src->is_internal;
908 default: return 0;
909 }
910}
911
d15b0b0a
OZ
912static inline int
913bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
914{
915 struct bgp_proto *p = s->proto;
cb311b44 916 struct bgp_channel *c = s->channel;
d15b0b0a
OZ
917 ip_addr *nh = (void *) a->u.ptr->data;
918
1cab2b4a
OZ
919 /* Handle next hop self option */
920 if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
d15b0b0a
OZ
921 return 0;
922
1cab2b4a
OZ
923 /* Handle next hop keep option */
924 if (c->cf->next_hop_keep && bgp_match_src(s, c->cf->next_hop_keep))
d15b0b0a
OZ
925 return 1;
926
927 /* Keep it when explicitly set in export filter */
928 if (a->type & EAF_FRESH)
929 return 1;
930
cb311b44
OZ
931 /* Check for non-matching AF */
932 if ((ipa_is_ip4(*nh) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
933 return 0;
934
d15b0b0a
OZ
935 /* Keep it when exported to internal peers */
936 if (p->is_interior && ipa_nonzero(*nh))
937 return 1;
938
939 /* Keep it when forwarded between single-hop BGPs on the same iface */
940 struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
941 return p->neigh && (p->neigh->iface == ifa);
942}
943
944static inline int
945bgp_use_gateway(struct bgp_export_state *s)
946{
947 struct bgp_proto *p = s->proto;
cb311b44 948 struct bgp_channel *c = s->channel;
d15b0b0a
OZ
949 rta *ra = s->route->attrs;
950
1cab2b4a
OZ
951 /* Handle next hop self option - also applies to gateway */
952 if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
d15b0b0a
OZ
953 return 0;
954
62e64905
OZ
955 /* We need one valid global gateway */
956 if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
d15b0b0a
OZ
957 return 0;
958
cb311b44
OZ
959 /* Check for non-matching AF */
960 if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
961 return 0;
962
d15b0b0a
OZ
963 /* Use it when exported to internal peers */
964 if (p->is_interior)
965 return 1;
966
967 /* Use it when forwarded to single-hop BGP peer on on the same iface */
4e276a89 968 return p->neigh && (p->neigh->iface == ra->nh.iface);
d15b0b0a
OZ
969}
970
971static void
972bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
973{
974 if (!a || !bgp_use_next_hop(s, a))
975 {
976 if (bgp_use_gateway(s))
72a6ef11 977 {
1e37e35c
OZ
978 rta *ra = s->route->attrs;
979 ip_addr nh[1] = { ra->nh.gw };
d15b0b0a 980 bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
1e37e35c
OZ
981
982 if (s->mpls)
983 {
984 u32 implicit_null = BGP_MPLS_NULL;
985 u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
986 uint lnum = ra->nh.labels ? ra->nh.labels : 1;
987 bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
988 }
72a6ef11 989 }
d15b0b0a 990 else
72a6ef11 991 {
d15b0b0a
OZ
992 ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
993 bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
1e37e35c
OZ
994
995 /* TODO: Use local MPLS assigned label */
996 if (s->mpls)
8e86ffce
OZ
997 {
998 u32 implicit_null = BGP_MPLS_NULL;
999 bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
1000 }
72a6ef11 1001 }
d15b0b0a
OZ
1002 }
1003
1004 /* Check if next hop is valid */
1005 a = bgp_find_attr(*to, BA_NEXT_HOP);
1006 if (!a)
1007 WITHDRAW(NO_NEXT_HOP);
1008
1009 ip_addr *nh = (void *) a->u.ptr->data;
1010 ip_addr peer = s->proto->cf->remote_ip;
1011 uint len = a->u.ptr->length;
1012
ef57b70f 1013 /* Forbid zero next hop */
d15b0b0a
OZ
1014 if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
1015 WITHDRAW(BAD_NEXT_HOP);
1016
ef57b70f 1017 /* Forbid next hop equal to neighbor IP */
d15b0b0a
OZ
1018 if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
1019 WITHDRAW(BAD_NEXT_HOP);
1e37e35c 1020
ef57b70f
OZ
1021 /* Forbid next hop with non-matching AF */
1022 if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
1023 !s->channel->ext_next_hop)
1024 WITHDRAW(BAD_NEXT_HOP);
1025
1e37e35c
OZ
1026 /* Just check if MPLS stack */
1027 if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
1028 WITHDRAW(NO_LABEL_STACK);
d15b0b0a
OZ
1029}
1030
ef57b70f
OZ
1031static uint
1032bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
1033{
1034 /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
1035 ip_addr *nh = (void *) a->u.ptr->data;
1036 uint len = a->u.ptr->length;
1037
1038 ASSERT((len == 16) || (len == 32));
1039
1040 /*
1041 * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
1042 * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
1043 * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
1044 * IPv6 address with IPv6 NLRI.
1045 */
1046
1047 if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
1048 {
1049 put_ip4(buf, ipa_to_ip4(nh[0]));
1050 return 4;
1051 }
1052
1053 put_ip6(buf, ipa_to_ip6(nh[0]));
1054
1055 if (len == 32)
1056 put_ip6(buf+16, ipa_to_ip6(nh[1]));
1057
1058 return len;
1059}
1060
1061static void
1062bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
1063{
1064 struct bgp_channel *c = s->channel;
1065 struct adata *ad = lp_alloc_adata(s->pool, 32);
1066 ip_addr *nh = (void *) ad->data;
1067
1068 if (len == 4)
1069 {
1070 nh[0] = ipa_from_ip4(get_ip4(data));
1071 nh[1] = IPA_NONE;
1072 }
1073 else if (len == 16)
1074 {
1075 nh[0] = ipa_from_ip6(get_ip6(data));
1076 nh[1] = IPA_NONE;
1077
1078 if (ipa_is_link_local(nh[0]))
1079 { nh[1] = nh[0]; nh[0] = IPA_NONE; }
1080 }
1081 else if (len == 32)
1082 {
1083 nh[0] = ipa_from_ip6(get_ip6(data));
1084 nh[1] = ipa_from_ip6(get_ip6(data+16));
1085
1086 if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
1087 nh[1] = IPA_NONE;
1088 }
1089 else
1090 bgp_parse_error(s, 9);
1091
1092 if (ipa_zero(nh[1]))
1093 ad->length = 16;
1094
1095 if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
1096 WITHDRAW(BAD_NEXT_HOP);
1097
1098 // XXXX validate next hop
1099
1100 bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
1101 bgp_apply_next_hop(s, a, nh[0], nh[1]);
1102}
1103
1104static uint
1105bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
1106{
1107 ip_addr *nh = (void *) a->u.ptr->data;
1108 uint len = a->u.ptr->length;
1109
1110 ASSERT((len == 16) || (len == 32));
1111
1112 /*
1113 * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
1114 * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
1115 * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
1116 * IPv6 address with VPNv6 NLRI.
1117 */
1118
1119 if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
1120 {
1121 put_u64(buf, 0); /* VPN RD is 0 */
1122 put_ip4(buf+8, ipa_to_ip4(nh[0]));
1123 return 12;
1124 }
1125
1126 put_u64(buf, 0); /* VPN RD is 0 */
1127 put_ip6(buf+8, ipa_to_ip6(nh[0]));
1128
1129 if (len == 16)
1130 return 24;
1131
1132 put_u64(buf+24, 0); /* VPN RD is 0 */
1133 put_ip6(buf+32, ipa_to_ip6(nh[1]));
1134
1135 return 48;
1136}
1137
1138static void
1139bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
1140{
1141 struct bgp_channel *c = s->channel;
1142 struct adata *ad = lp_alloc_adata(s->pool, 32);
1143 ip_addr *nh = (void *) ad->data;
1144
1145 if (len == 12)
1146 {
1147 nh[0] = ipa_from_ip4(get_ip4(data+8));
1148 nh[1] = IPA_NONE;
1149 }
1150 else if (len == 24)
1151 {
1152 nh[0] = ipa_from_ip6(get_ip6(data+8));
1153 nh[1] = IPA_NONE;
1154
1155 if (ipa_is_link_local(nh[0]))
1156 { nh[1] = nh[0]; nh[0] = IPA_NONE; }
1157 }
1158 else if (len == 48)
1159 {
1160 nh[0] = ipa_from_ip6(get_ip6(data+8));
1161 nh[1] = ipa_from_ip6(get_ip6(data+32));
1162
1163 if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
1164 nh[1] = IPA_NONE;
1165 }
1166 else
1167 bgp_parse_error(s, 9);
1168
1169 if (ipa_zero(nh[1]))
1170 ad->length = 16;
1171
1172 /* XXXX which error */
1173 if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
1174 bgp_parse_error(s, 9);
1175
1176 if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
1177 WITHDRAW(BAD_NEXT_HOP);
1178
1179 // XXXX validate next hop
1180
1181 bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
1182 bgp_apply_next_hop(s, a, nh[0], nh[1]);
1183}
1184
1185
1186
ac3ad139
OZ
1187static uint
1188bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
1189{
ac3ad139
OZ
1190 return 0;
1191}
1192
1193static void
1194bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
1195{
82f42ea0
OZ
1196 /*
1197 * Although we expect no next hop and RFC 7606 7.11 states that attribute
1198 * MP_REACH_NLRI with unexpected next hop length is considered malformed,
1199 * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
1200 */
1201
ac3ad139
OZ
1202 return;
1203}
1204
1205static void
82f42ea0 1206bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
ac3ad139 1207{
82f42ea0
OZ
1208 /* NEXT_HOP shall not pass */
1209 if (a)
1210 bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
ac3ad139
OZ
1211}
1212
d15b0b0a
OZ
1213
1214/*
1215 * UPDATE
1216 */
1217
1218static void
1219bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
1220{
1221 if (path_id != s->last_id)
1222 {
1223 s->last_src = rt_get_source(&s->proto->p, path_id);
1224 s->last_id = path_id;
1225
1226 rta_free(s->cached_rta);
1227 s->cached_rta = NULL;
1228 }
1229
1230 if (!a0)
1231 {
1232 /* Route withdraw */
682d3f7d 1233 rte_update3(&s->channel->c, n, NULL, s->last_src);
d15b0b0a
OZ
1234 return;
1235 }
1236
1237 /* Prepare cached route attributes */
1238 if (s->cached_rta == NULL)
1239 {
1240 a0->src = s->last_src;
1241
1242 /* Workaround for rta_lookup() breaking eattrs */
1243 ea_list *ea = a0->eattrs;
1244 s->cached_rta = rta_lookup(a0);
1245 a0->eattrs = ea;
1246 }
1247
1248 rta *a = rta_clone(s->cached_rta);
1249 rte *e = rte_get_temp(a);
1250
1251 e->pflags = 0;
1252 e->u.bgp.suppressed = 0;
5bd73431 1253 e->u.bgp.stale = -1;
682d3f7d 1254 rte_update3(&s->channel->c, n, e, s->last_src);
d15b0b0a
OZ
1255}
1256
1e37e35c
OZ
1257static void
1258bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen)
1259{
1260 u32 dummy = 0;
1261 u32 *labels = mpls ? (u32 *) mpls->data : &dummy;
1262 uint lnum = mpls ? (mpls->length / 4) : 1;
1263
1264 for (uint i = 0; i < lnum; i++)
1265 {
1266 put_u24(*pos, labels[i] << 4);
1267 ADVANCE(*pos, *size, 3);
1268 }
1269
1270 /* Add bottom-of-stack flag */
1271 (*pos)[-1] |= BGP_MPLS_BOS;
1272
1273 *pxlen += 24 * lnum;
1274}
1275
1276static void
1277bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
1278{
1279 u32 labels[BGP_MPLS_MAX], label;
1280 uint lnum = 0;
1281
1282 do {
1283 if (*pxlen < 24)
1284 bgp_parse_error(s, 1);
1285
1286 label = get_u24(*pos);
1287 labels[lnum++] = label >> 4;
1288 ADVANCE(*pos, *len, 3);
1289 *pxlen -= 24;
d15b0b0a 1290
93c1defd
OZ
1291 /* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but
1292 fixed-size 24-bit Compatibility field, which MUST be ignored */
1293 if (!a && !s->err_withdraw)
1294 return;
1e37e35c
OZ
1295 }
1296 while (!(label & BGP_MPLS_BOS));
1297
1298 if (!a)
1299 return;
1300
1301 /* Attach MPLS attribute unless we already have one */
1302 if (!s->mpls_labels)
1303 {
1304 s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
1305 bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
1306 }
1307
1308 /* Overwrite data in the attribute */
1309 s->mpls_labels->length = 4*lnum;
1310 memcpy(s->mpls_labels->data, labels, 4*lnum);
1311
1312 /* Update next hop entry in rta */
1313 bgp_apply_mpls_labels(s, a, labels, lnum);
1314
1315 /* Attributes were changed, invalidate cached entry */
1316 rta_free(s->cached_rta);
1317 s->cached_rta = NULL;
1318
1319 return;
1320}
d15b0b0a
OZ
1321
1322static uint
1323bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1324{
1325 byte *pos = buf;
1326
1e37e35c 1327 while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
d15b0b0a
OZ
1328 {
1329 struct bgp_prefix *px = HEAD(buck->prefixes);
1330 struct net_addr_ip4 *net = (void *) px->net;
1331
1332 /* Encode path ID */
1333 if (s->add_path)
72a6ef11 1334 {
d15b0b0a
OZ
1335 put_u32(pos, px->path_id);
1336 ADVANCE(pos, size, 4);
72a6ef11 1337 }
d15b0b0a 1338
d15b0b0a
OZ
1339 /* Encode prefix length */
1340 *pos = net->pxlen;
1341 ADVANCE(pos, size, 1);
1342
1e37e35c
OZ
1343 /* Encode MPLS labels */
1344 if (s->mpls)
1345 bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1346
d15b0b0a 1347 /* Encode prefix body */
1e37e35c
OZ
1348 ip4_addr a = ip4_hton(net->prefix);
1349 uint b = (net->pxlen + 7) / 8;
d15b0b0a
OZ
1350 memcpy(pos, &a, b);
1351 ADVANCE(pos, size, b);
1352
1353 bgp_free_prefix(s->channel, px);
1354 }
1355
1356 return pos - buf;
1357}
1358
1359static void
1360bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1361{
1362 while (len)
1363 {
1364 net_addr_ip4 net;
1365 u32 path_id = 0;
1366
1367 /* Decode path ID */
1368 if (s->add_path)
72a6ef11 1369 {
d15b0b0a
OZ
1370 if (len < 5)
1371 bgp_parse_error(s, 1);
1372
1373 path_id = get_u32(pos);
1374 ADVANCE(pos, len, 4);
72a6ef11 1375 }
d15b0b0a
OZ
1376
1377 /* Decode prefix length */
1378 uint l = *pos;
d15b0b0a
OZ
1379 ADVANCE(pos, len, 1);
1380
1e37e35c
OZ
1381 if (len < ((l + 7) / 8))
1382 bgp_parse_error(s, 1);
1383
1384 /* Decode MPLS labels */
1385 if (s->mpls)
1386 bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1387
d15b0b0a
OZ
1388 if (l > IP4_MAX_PREFIX_LENGTH)
1389 bgp_parse_error(s, 10);
1390
d15b0b0a
OZ
1391 /* Decode prefix body */
1392 ip4_addr addr = IP4_NONE;
1e37e35c 1393 uint b = (l + 7) / 8;
d15b0b0a
OZ
1394 memcpy(&addr, pos, b);
1395 ADVANCE(pos, len, b);
1396
1397 net = NET_ADDR_IP4(ip4_ntoh(addr), l);
1398 net_normalize_ip4(&net);
1399
1400 // XXXX validate prefix
1401
1402 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1403 }
1404}
1405
d15b0b0a
OZ
1406
1407static uint
1408bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1409{
1410 byte *pos = buf;
1411
1e37e35c 1412 while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
d15b0b0a
OZ
1413 {
1414 struct bgp_prefix *px = HEAD(buck->prefixes);
1415 struct net_addr_ip6 *net = (void *) px->net;
1416
1417 /* Encode path ID */
1418 if (s->add_path)
bf47fe4b 1419 {
d15b0b0a
OZ
1420 put_u32(pos, px->path_id);
1421 ADVANCE(pos, size, 4);
bf47fe4b 1422 }
d15b0b0a 1423
d15b0b0a
OZ
1424 /* Encode prefix length */
1425 *pos = net->pxlen;
1426 ADVANCE(pos, size, 1);
1427
1e37e35c
OZ
1428 /* Encode MPLS labels */
1429 if (s->mpls)
1430 bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1431
d15b0b0a 1432 /* Encode prefix body */
1e37e35c
OZ
1433 ip6_addr a = ip6_hton(net->prefix);
1434 uint b = (net->pxlen + 7) / 8;
d15b0b0a
OZ
1435 memcpy(pos, &a, b);
1436 ADVANCE(pos, size, b);
1437
1438 bgp_free_prefix(s->channel, px);
1439 }
1440
1441 return pos - buf;
1442}
1443
1444static void
1445bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1446{
1447 while (len)
1448 {
1449 net_addr_ip6 net;
1450 u32 path_id = 0;
1451
1452 /* Decode path ID */
1453 if (s->add_path)
9aed29e6 1454 {
d15b0b0a
OZ
1455 if (len < 5)
1456 bgp_parse_error(s, 1);
1457
1458 path_id = get_u32(pos);
1459 ADVANCE(pos, len, 4);
9aed29e6 1460 }
0c791f87 1461
d15b0b0a
OZ
1462 /* Decode prefix length */
1463 uint l = *pos;
d15b0b0a 1464 ADVANCE(pos, len, 1);
9aed29e6 1465
1e37e35c
OZ
1466 if (len < ((l + 7) / 8))
1467 bgp_parse_error(s, 1);
1468
1469 /* Decode MPLS labels */
1470 if (s->mpls)
1471 bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1472
d15b0b0a
OZ
1473 if (l > IP6_MAX_PREFIX_LENGTH)
1474 bgp_parse_error(s, 10);
0c791f87 1475
d15b0b0a
OZ
1476 /* Decode prefix body */
1477 ip6_addr addr = IP6_NONE;
1e37e35c 1478 uint b = (l + 7) / 8;
d15b0b0a
OZ
1479 memcpy(&addr, pos, b);
1480 ADVANCE(pos, len, b);
9aed29e6 1481
d15b0b0a
OZ
1482 net = NET_ADDR_IP6(ip6_ntoh(addr), l);
1483 net_normalize_ip6(&net);
0c791f87 1484
d15b0b0a 1485 // XXXX validate prefix
9aed29e6 1486
d15b0b0a
OZ
1487 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1488 }
72a6ef11
MM
1489}
1490
1e37e35c
OZ
1491static uint
1492bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1493{
1494 byte *pos = buf;
1495
1496 while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
1497 {
1498 struct bgp_prefix *px = HEAD(buck->prefixes);
1499 struct net_addr_vpn4 *net = (void *) px->net;
1500
1501 /* Encode path ID */
1502 if (s->add_path)
1503 {
1504 put_u32(pos, px->path_id);
1505 ADVANCE(pos, size, 4);
1506 }
1507
1508 /* Encode prefix length */
01111fc4 1509 *pos = 64 + net->pxlen;
1e37e35c
OZ
1510 ADVANCE(pos, size, 1);
1511
1512 /* Encode MPLS labels */
711d617d
OZ
1513 if (s->mpls)
1514 bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1e37e35c
OZ
1515
1516 /* Encode route distinguisher */
1517 put_u64(pos, net->rd);
1518 ADVANCE(pos, size, 8);
1519
1520 /* Encode prefix body */
1521 ip4_addr a = ip4_hton(net->prefix);
1522 uint b = (net->pxlen + 7) / 8;
1523 memcpy(pos, &a, b);
1524 ADVANCE(pos, size, b);
1525
1526 bgp_free_prefix(s->channel, px);
1527 }
1528
1529 return pos - buf;
1530}
1531
1532static void
1533bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1534{
1535 while (len)
1536 {
1537 net_addr_vpn4 net;
1538 u32 path_id = 0;
1539
1540 /* Decode path ID */
1541 if (s->add_path)
1542 {
1543 if (len < 5)
1544 bgp_parse_error(s, 1);
1545
1546 path_id = get_u32(pos);
1547 ADVANCE(pos, len, 4);
1548 }
1549
1550 /* Decode prefix length */
1551 uint l = *pos;
1552 ADVANCE(pos, len, 1);
1553
1554 if (len < ((l + 7) / 8))
1555 bgp_parse_error(s, 1);
1556
1557 /* Decode MPLS labels */
711d617d
OZ
1558 if (s->mpls)
1559 bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1e37e35c
OZ
1560
1561 /* Decode route distinguisher */
1562 if (l < 64)
1563 bgp_parse_error(s, 1);
1564
1565 u64 rd = get_u64(pos);
1566 ADVANCE(pos, len, 8);
1567 l -= 64;
1568
1569 if (l > IP4_MAX_PREFIX_LENGTH)
1570 bgp_parse_error(s, 10);
1571
1572 /* Decode prefix body */
1573 ip4_addr addr = IP4_NONE;
1574 uint b = (l + 7) / 8;
1575 memcpy(&addr, pos, b);
1576 ADVANCE(pos, len, b);
1577
1578 net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
1579 net_normalize_vpn4(&net);
1580
1581 // XXXX validate prefix
1582
1583 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1584 }
1585}
1586
1e37e35c
OZ
1587
1588static uint
1589bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1590{
1591 byte *pos = buf;
1592
1593 while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
1594 {
1595 struct bgp_prefix *px = HEAD(buck->prefixes);
1596 struct net_addr_vpn6 *net = (void *) px->net;
1597
1598 /* Encode path ID */
1599 if (s->add_path)
1600 {
1601 put_u32(pos, px->path_id);
1602 ADVANCE(pos, size, 4);
1603 }
1604
1605 /* Encode prefix length */
01111fc4 1606 *pos = 64 + net->pxlen;
1e37e35c
OZ
1607 ADVANCE(pos, size, 1);
1608
1609 /* Encode MPLS labels */
49c7ef3b
OZ
1610 if (s->mpls)
1611 bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1e37e35c
OZ
1612
1613 /* Encode route distinguisher */
1614 put_u64(pos, net->rd);
1615 ADVANCE(pos, size, 8);
1616
1617 /* Encode prefix body */
1618 ip6_addr a = ip6_hton(net->prefix);
1619 uint b = (net->pxlen + 7) / 8;
1620 memcpy(pos, &a, b);
1621 ADVANCE(pos, size, b);
1622
1623 bgp_free_prefix(s->channel, px);
1624 }
1625
1626 return pos - buf;
1627}
1628
1629static void
1630bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1631{
1632 while (len)
1633 {
1634 net_addr_vpn6 net;
1635 u32 path_id = 0;
1636
1637 /* Decode path ID */
1638 if (s->add_path)
1639 {
1640 if (len < 5)
1641 bgp_parse_error(s, 1);
1642
1643 path_id = get_u32(pos);
1644 ADVANCE(pos, len, 4);
1645 }
1646
1647 /* Decode prefix length */
1648 uint l = *pos;
1649 ADVANCE(pos, len, 1);
1650
1651 if (len < ((l + 7) / 8))
1652 bgp_parse_error(s, 1);
1653
1654 /* Decode MPLS labels */
1655 if (s->mpls)
1656 bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1657
1658 /* Decode route distinguisher */
1659 if (l < 64)
1660 bgp_parse_error(s, 1);
1661
1662 u64 rd = get_u64(pos);
1663 ADVANCE(pos, len, 8);
1664 l -= 64;
1665
1666 if (l > IP6_MAX_PREFIX_LENGTH)
1667 bgp_parse_error(s, 10);
1668
1669 /* Decode prefix body */
1670 ip6_addr addr = IP6_NONE;
1671 uint b = (l + 7) / 8;
1672 memcpy(&addr, pos, b);
1673 ADVANCE(pos, len, b);
1674
1675 net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
1676 net_normalize_vpn6(&net);
1677
1678 // XXXX validate prefix
1679
1680 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1681 }
1682}
1683
1e37e35c 1684
ac3ad139
OZ
1685static uint
1686bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1687{
1688 byte *pos = buf;
1689
1690 while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
1691 {
1692 struct bgp_prefix *px = HEAD(buck->prefixes);
1693 struct net_addr_flow4 *net = (void *) px->net;
1694 uint flen = net->length - sizeof(net_addr_flow4);
1695
1696 /* Encode path ID */
1697 if (s->add_path)
1698 {
1699 put_u32(pos, px->path_id);
1700 ADVANCE(pos, size, 4);
1701 }
1702
1703 if (flen > size)
1704 break;
1705
1706 /* Copy whole flow data including length */
1707 memcpy(pos, net->data, flen);
1708 ADVANCE(pos, size, flen);
1709
1710 bgp_free_prefix(s->channel, px);
1711 }
1712
1713 return pos - buf;
1714}
1715
1716static void
1717bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1718{
1719 while (len)
1720 {
1721 u32 path_id = 0;
1722
1723 /* Decode path ID */
1724 if (s->add_path)
1725 {
1726 if (len < 4)
1727 bgp_parse_error(s, 1);
1728
1729 path_id = get_u32(pos);
1730 ADVANCE(pos, len, 4);
1731 }
1732
1733 if (len < 2)
1734 bgp_parse_error(s, 1);
1735
1736 /* Decode flow length */
1737 uint hlen = flow_hdr_length(pos);
1738 uint dlen = flow_read_length(pos);
1739 uint flen = hlen + dlen;
1740 byte *data = pos + hlen;
1741
1742 if (len < flen)
1743 bgp_parse_error(s, 1);
1744
1745 /* Validate flow data */
1746 enum flow_validated_state r = flow4_validate(data, dlen);
1747 if (r != FLOW_ST_VALID)
1748 {
1749 log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
1750 bgp_parse_error(s, 1);
1751 }
1752
1753 if (data[0] != FLOW_TYPE_DST_PREFIX)
1754 {
1755 log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
1756 bgp_parse_error(s, 1);
1757 }
1758
1759 /* Decode dst prefix */
1760 ip4_addr px = IP4_NONE;
1761 uint pxlen = data[1];
1762
1763 // FIXME: Use some generic function
7fc55925
OZ
1764 memcpy(&px, data+2, BYTES(pxlen));
1765 px = ip4_and(ip4_ntoh(px), ip4_mkmask(pxlen));
ac3ad139
OZ
1766
1767 /* Prepare the flow */
1768 net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
1769 net_fill_flow4(n, px, pxlen, pos, flen);
1770 ADVANCE(pos, len, flen);
1771
1772 bgp_rte_update(s, n, path_id, a);
1773 }
1774}
1775
1776
1777static uint
1778bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1779{
1780 byte *pos = buf;
1781
1782 while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
1783 {
1784 struct bgp_prefix *px = HEAD(buck->prefixes);
1785 struct net_addr_flow6 *net = (void *) px->net;
1786 uint flen = net->length - sizeof(net_addr_flow6);
1787
1788 /* Encode path ID */
1789 if (s->add_path)
1790 {
1791 put_u32(pos, px->path_id);
1792 ADVANCE(pos, size, 4);
1793 }
1794
1795 if (flen > size)
1796 break;
1797
1798 /* Copy whole flow data including length */
1799 memcpy(pos, net->data, flen);
1800 ADVANCE(pos, size, flen);
1801
1802 bgp_free_prefix(s->channel, px);
1803 }
1804
1805 return pos - buf;
1806}
1807
1808static void
1809bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1810{
1811 while (len)
1812 {
1813 u32 path_id = 0;
1814
1815 /* Decode path ID */
1816 if (s->add_path)
1817 {
1818 if (len < 4)
1819 bgp_parse_error(s, 1);
1820
1821 path_id = get_u32(pos);
1822 ADVANCE(pos, len, 4);
1823 }
1824
1825 if (len < 2)
1826 bgp_parse_error(s, 1);
1827
1828 /* Decode flow length */
1829 uint hlen = flow_hdr_length(pos);
1830 uint dlen = flow_read_length(pos);
1831 uint flen = hlen + dlen;
1832 byte *data = pos + hlen;
1833
1834 if (len < flen)
1835 bgp_parse_error(s, 1);
1836
1837 /* Validate flow data */
1838 enum flow_validated_state r = flow6_validate(data, dlen);
1839 if (r != FLOW_ST_VALID)
1840 {
1841 log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
1842 bgp_parse_error(s, 1);
1843 }
1844
1845 if (data[0] != FLOW_TYPE_DST_PREFIX)
1846 {
1847 log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
1848 bgp_parse_error(s, 1);
1849 }
1850
1851 /* Decode dst prefix */
1852 ip6_addr px = IP6_NONE;
1853 uint pxlen = data[1];
1854
1855 // FIXME: Use some generic function
7fc55925
OZ
1856 memcpy(&px, data+2, BYTES(pxlen));
1857 px = ip6_and(ip6_ntoh(px), ip6_mkmask(pxlen));
ac3ad139
OZ
1858
1859 /* Prepare the flow */
1860 net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
1861 net_fill_flow6(n, px, pxlen, pos, flen);
1862 ADVANCE(pos, len, flen);
1863
1864 bgp_rte_update(s, n, path_id, a);
1865 }
1866}
1867
1868
d15b0b0a
OZ
1869static const struct bgp_af_desc bgp_af_table[] = {
1870 {
1871 .afi = BGP_AF_IPV4,
1872 .net = NET_IP4,
1873 .name = "ipv4",
1874 .encode_nlri = bgp_encode_nlri_ip4,
1875 .decode_nlri = bgp_decode_nlri_ip4,
ef57b70f
OZ
1876 .encode_next_hop = bgp_encode_next_hop_ip,
1877 .decode_next_hop = bgp_decode_next_hop_ip,
d15b0b0a
OZ
1878 .update_next_hop = bgp_update_next_hop_ip,
1879 },
1880 {
1881 .afi = BGP_AF_IPV4_MC,
1882 .net = NET_IP4,
1883 .name = "ipv4-mc",
1884 .encode_nlri = bgp_encode_nlri_ip4,
1885 .decode_nlri = bgp_decode_nlri_ip4,
ef57b70f
OZ
1886 .encode_next_hop = bgp_encode_next_hop_ip,
1887 .decode_next_hop = bgp_decode_next_hop_ip,
d15b0b0a
OZ
1888 .update_next_hop = bgp_update_next_hop_ip,
1889 },
ac3ad139 1890 {
1e37e35c
OZ
1891 .afi = BGP_AF_IPV4_MPLS,
1892 .net = NET_IP4,
1893 .mpls = 1,
1894 .name = "ipv4-mpls",
1895 .encode_nlri = bgp_encode_nlri_ip4,
1896 .decode_nlri = bgp_decode_nlri_ip4,
ef57b70f
OZ
1897 .encode_next_hop = bgp_encode_next_hop_ip,
1898 .decode_next_hop = bgp_decode_next_hop_ip,
1e37e35c 1899 .update_next_hop = bgp_update_next_hop_ip,
ac3ad139 1900 },
d15b0b0a
OZ
1901 {
1902 .afi = BGP_AF_IPV6,
1903 .net = NET_IP6,
1904 .name = "ipv6",
1905 .encode_nlri = bgp_encode_nlri_ip6,
1906 .decode_nlri = bgp_decode_nlri_ip6,
ef57b70f
OZ
1907 .encode_next_hop = bgp_encode_next_hop_ip,
1908 .decode_next_hop = bgp_decode_next_hop_ip,
d15b0b0a
OZ
1909 .update_next_hop = bgp_update_next_hop_ip,
1910 },
1911 {
1912 .afi = BGP_AF_IPV6_MC,
1913 .net = NET_IP6,
1914 .name = "ipv6-mc",
1915 .encode_nlri = bgp_encode_nlri_ip6,
1916 .decode_nlri = bgp_decode_nlri_ip6,
ef57b70f
OZ
1917 .encode_next_hop = bgp_encode_next_hop_ip,
1918 .decode_next_hop = bgp_decode_next_hop_ip,
d15b0b0a
OZ
1919 .update_next_hop = bgp_update_next_hop_ip,
1920 },
1e37e35c
OZ
1921 {
1922 .afi = BGP_AF_IPV6_MPLS,
1923 .net = NET_IP6,
1924 .mpls = 1,
1925 .name = "ipv6-mpls",
1926 .encode_nlri = bgp_encode_nlri_ip6,
1927 .decode_nlri = bgp_decode_nlri_ip6,
ef57b70f
OZ
1928 .encode_next_hop = bgp_encode_next_hop_ip,
1929 .decode_next_hop = bgp_decode_next_hop_ip,
1e37e35c
OZ
1930 .update_next_hop = bgp_update_next_hop_ip,
1931 },
1932 {
1933 .afi = BGP_AF_VPN4_MPLS,
1934 .net = NET_VPN4,
1935 .mpls = 1,
1936 .name = "vpn4-mpls",
1937 .encode_nlri = bgp_encode_nlri_vpn4,
1938 .decode_nlri = bgp_decode_nlri_vpn4,
ef57b70f
OZ
1939 .encode_next_hop = bgp_encode_next_hop_vpn,
1940 .decode_next_hop = bgp_decode_next_hop_vpn,
1e37e35c
OZ
1941 .update_next_hop = bgp_update_next_hop_ip,
1942 },
1943 {
1944 .afi = BGP_AF_VPN6_MPLS,
1945 .net = NET_VPN6,
1946 .mpls = 1,
1947 .name = "vpn6-mpls",
1948 .encode_nlri = bgp_encode_nlri_vpn6,
1949 .decode_nlri = bgp_decode_nlri_vpn6,
ef57b70f
OZ
1950 .encode_next_hop = bgp_encode_next_hop_vpn,
1951 .decode_next_hop = bgp_decode_next_hop_vpn,
1e37e35c
OZ
1952 .update_next_hop = bgp_update_next_hop_ip,
1953 },
711d617d
OZ
1954 {
1955 .afi = BGP_AF_VPN4_MC,
1956 .net = NET_VPN4,
1957 .name = "vpn4-mc",
1958 .encode_nlri = bgp_encode_nlri_vpn4,
1959 .decode_nlri = bgp_decode_nlri_vpn4,
1960 .encode_next_hop = bgp_encode_next_hop_vpn,
1961 .decode_next_hop = bgp_decode_next_hop_vpn,
1962 .update_next_hop = bgp_update_next_hop_ip,
1963 },
1964 {
1965 .afi = BGP_AF_VPN6_MC,
1966 .net = NET_VPN6,
1967 .name = "vpn6-mc",
1968 .encode_nlri = bgp_encode_nlri_vpn6,
1969 .decode_nlri = bgp_decode_nlri_vpn6,
1970 .encode_next_hop = bgp_encode_next_hop_vpn,
1971 .decode_next_hop = bgp_decode_next_hop_vpn,
1972 .update_next_hop = bgp_update_next_hop_ip,
1973 },
1e37e35c
OZ
1974 {
1975 .afi = BGP_AF_FLOW4,
1976 .net = NET_FLOW4,
ef57b70f 1977 .no_igp = 1,
1e37e35c
OZ
1978 .name = "flow4",
1979 .encode_nlri = bgp_encode_nlri_flow4,
1980 .decode_nlri = bgp_decode_nlri_flow4,
1981 .encode_next_hop = bgp_encode_next_hop_none,
1982 .decode_next_hop = bgp_decode_next_hop_none,
1983 .update_next_hop = bgp_update_next_hop_none,
1984 },
ac3ad139
OZ
1985 {
1986 .afi = BGP_AF_FLOW6,
1987 .net = NET_FLOW6,
ef57b70f 1988 .no_igp = 1,
ac3ad139
OZ
1989 .name = "flow6",
1990 .encode_nlri = bgp_encode_nlri_flow6,
1991 .decode_nlri = bgp_decode_nlri_flow6,
1992 .encode_next_hop = bgp_encode_next_hop_none,
1993 .decode_next_hop = bgp_decode_next_hop_none,
1994 .update_next_hop = bgp_update_next_hop_none,
1995 },
d15b0b0a
OZ
1996};
1997
1998const struct bgp_af_desc *
1999bgp_get_af_desc(u32 afi)
72a6ef11 2000{
d15b0b0a
OZ
2001 uint i;
2002 for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
2003 if (bgp_af_table[i].afi == afi)
2004 return &bgp_af_table[i];
72a6ef11 2005
d15b0b0a 2006 return NULL;
3fdbafb6
MM
2007}
2008
d15b0b0a
OZ
2009static inline uint
2010bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
2011{
2012 return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
2013}
2014
2015static inline uint
2016bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
2017{
2018 return s->channel->desc->encode_next_hop(s, nh, buf, 255);
2019}
11cb6202
OZ
2020
2021void
d15b0b0a 2022bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
11cb6202 2023{
d15b0b0a
OZ
2024 s->channel->desc->update_next_hop(s, a, to);
2025}
11cb6202 2026
d15b0b0a
OZ
2027#define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
2028
2029static byte *
2030bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
2031{
2032 /*
2033 * 2 B Withdrawn Routes Length (zero)
2034 * --- IPv4 Withdrawn Routes NLRI (unused)
2035 * 2 B Total Path Attribute Length
2036 * var Path Attributes
2037 * var IPv4 Network Layer Reachability Information
2038 */
2039
2040 int lr, la;
2041
2042 la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
2043 if (la < 0)
2044 {
2045 /* Attribute list too long */
2046 bgp_withdraw_bucket(s->channel, buck);
2047 return NULL;
2048 }
9aed29e6 2049
d15b0b0a
OZ
2050 put_u16(buf+0, 0);
2051 put_u16(buf+2, la);
11cb6202 2052
d15b0b0a 2053 lr = bgp_encode_nlri(s, buck, buf+4+la, end);
cf31112f 2054
d15b0b0a
OZ
2055 return buf+4+la+lr;
2056}
52e21323 2057
d15b0b0a
OZ
2058static byte *
2059bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
2060{
2061 /*
2062 * 2 B IPv4 Withdrawn Routes Length (zero)
2063 * --- IPv4 Withdrawn Routes NLRI (unused)
2064 * 2 B Total Path Attribute Length
2065 * 1 B MP_REACH_NLRI hdr - Attribute Flags
2066 * 1 B MP_REACH_NLRI hdr - Attribute Type Code
2067 * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
2068 * 2 B MP_REACH_NLRI data - Address Family Identifier
2069 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
2070 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
2071 * var MP_REACH_NLRI data - Network Address of Next Hop
2072 * 1 B MP_REACH_NLRI data - Reserved (zero)
2073 * var MP_REACH_NLRI data - Network Layer Reachability Information
2074 * var Rest of Path Attributes
2075 * --- IPv4 Network Layer Reachability Information (unused)
2076 */
2077
2078 int lh, lr, la; /* Lengths of next hop, NLRI and attributes */
2079
2080 /* Begin of MP_REACH_NLRI atribute */
2081 buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
2082 buf[5] = BA_MP_REACH_NLRI;
2083 put_u16(buf+6, 0); /* Will be fixed later */
2084 put_af3(buf+8, s->channel->afi);
2085 byte *pos = buf+11;
2086
2087 /* Encode attributes to temporary buffer */
2088 byte *abuf = alloca(MAX_ATTRS_LENGTH);
2089 la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
2090 if (la < 0)
2091 {
2092 /* Attribute list too long */
2093 bgp_withdraw_bucket(s->channel, buck);
2094 return NULL;
2095 }
0c791f87 2096
d15b0b0a
OZ
2097 /* Encode the next hop */
2098 lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
2099 *pos = lh;
2100 pos += 1+lh;
11cb6202 2101
d15b0b0a
OZ
2102 /* Reserved field */
2103 *pos++ = 0;
094d2bdb 2104
d15b0b0a
OZ
2105 /* Encode the NLRI */
2106 lr = bgp_encode_nlri(s, buck, pos, end - la);
2107 pos += lr;
094d2bdb 2108
d15b0b0a
OZ
2109 /* End of MP_REACH_NLRI atribute, update data length */
2110 put_u16(buf+6, pos-buf-8);
11cb6202 2111
d15b0b0a
OZ
2112 /* Copy remaining attributes */
2113 memcpy(pos, abuf, la);
2114 pos += la;
2115
2116 /* Initial UPDATE fields */
2117 put_u16(buf+0, 0);
2118 put_u16(buf+2, pos-buf-4);
2119
2120 return pos;
11cb6202
OZ
2121}
2122
d15b0b0a
OZ
2123#undef MAX_ATTRS_LENGTH
2124
2125static byte *
2126bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
a47a0108 2127{
d15b0b0a
OZ
2128 /*
2129 * 2 B Withdrawn Routes Length
2130 * var IPv4 Withdrawn Routes NLRI
2131 * 2 B Total Path Attribute Length (zero)
2132 * --- Path Attributes (unused)
2133 * --- IPv4 Network Layer Reachability Information (unused)
2134 */
11cb6202 2135
d15b0b0a 2136 uint len = bgp_encode_nlri(s, buck, buf+2, end);
11cb6202 2137
d15b0b0a
OZ
2138 put_u16(buf+0, len);
2139 put_u16(buf+2+len, 0);
11cb6202 2140
d15b0b0a 2141 return buf+4+len;
a47a0108
MM
2142}
2143
d15b0b0a
OZ
2144static byte *
2145bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
3fdbafb6 2146{
d15b0b0a
OZ
2147 /*
2148 * 2 B Withdrawn Routes Length (zero)
2149 * --- IPv4 Withdrawn Routes NLRI (unused)
2150 * 2 B Total Path Attribute Length
2151 * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
2152 * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
2153 * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
2154 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
2155 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
2156 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
2157 * --- IPv4 Network Layer Reachability Information (unused)
2158 */
2159
2160 uint len = bgp_encode_nlri(s, buck, buf+11, end);
3fdbafb6 2161
d15b0b0a
OZ
2162 put_u16(buf+0, 0);
2163 put_u16(buf+2, 7+len);
3fdbafb6 2164
d15b0b0a
OZ
2165 /* Begin of MP_UNREACH_NLRI atribute */
2166 buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
2167 buf[5] = BA_MP_UNREACH_NLRI;
2168 put_u16(buf+6, 3+len);
2169 put_af3(buf+8, s->channel->afi);
11cb6202 2170
d15b0b0a
OZ
2171 return buf+11+len;
2172}
11cb6202 2173
d15b0b0a
OZ
2174static byte *
2175bgp_create_update(struct bgp_channel *c, byte *buf)
2176{
2177 struct bgp_proto *p = (void *) c->c.proto;
2178 struct bgp_bucket *buck;
2179 byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
2180 byte *res = NULL;
2181
1e37e35c
OZ
2182again: ;
2183
d15b0b0a
OZ
2184 /* Initialize write state */
2185 struct bgp_write_state s = {
2186 .proto = p,
2187 .channel = c,
2188 .pool = bgp_linpool,
863ecfc7 2189 .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop,
d15b0b0a
OZ
2190 .as4_session = p->as4_session,
2191 .add_path = c->add_path_tx,
1e37e35c 2192 .mpls = c->desc->mpls,
d15b0b0a
OZ
2193 };
2194
d15b0b0a
OZ
2195 /* Try unreachable bucket */
2196 if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
2197 {
ef57b70f 2198 res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
d15b0b0a
OZ
2199 bgp_create_ip_unreach(&s, buck, buf, end):
2200 bgp_create_mp_unreach(&s, buck, buf, end);
11cb6202 2201
d15b0b0a
OZ
2202 goto done;
2203 }
3fdbafb6 2204
d15b0b0a
OZ
2205 /* Try reachable buckets */
2206 if (!EMPTY_LIST(c->bucket_queue))
2207 {
2208 buck = HEAD(c->bucket_queue);
e8ba557c 2209
d15b0b0a
OZ
2210 /* Cleanup empty buckets */
2211 if (EMPTY_LIST(buck->prefixes))
a5bf5f78 2212 {
d15b0b0a
OZ
2213 bgp_free_bucket(c, buck);
2214 goto again;
a5bf5f78 2215 }
11cb6202 2216
863ecfc7 2217 res = !s.mp_reach ?
d15b0b0a
OZ
2218 bgp_create_ip_reach(&s, buck, buf, end):
2219 bgp_create_mp_reach(&s, buck, buf, end);
0c791f87 2220
d15b0b0a
OZ
2221 if (EMPTY_LIST(buck->prefixes))
2222 bgp_free_bucket(c, buck);
2223 else
2224 bgp_defer_bucket(c, buck);
0c791f87 2225
d15b0b0a
OZ
2226 if (!res)
2227 goto again;
b552ecc4 2228
d15b0b0a
OZ
2229 goto done;
2230 }
094d2bdb 2231
d15b0b0a
OZ
2232 /* No more prefixes to send */
2233 return NULL;
165a6227 2234
d15b0b0a
OZ
2235done:
2236 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
2237 lp_flush(s.pool);
3fdbafb6 2238
d15b0b0a 2239 return res;
3fdbafb6
MM
2240}
2241
d15b0b0a
OZ
2242static byte *
2243bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
2244{
2245 /* Empty update packet */
2246 put_u32(buf, 0);
2247
2248 return buf+4;
2249}
2250
2251static byte *
2252bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
2253{
2254 put_u16(buf+0, 0);
2255 put_u16(buf+2, 6); /* length 4--9 */
2256
2257 /* Empty MP_UNREACH_NLRI atribute */
2258 buf[4] = BAF_OPTIONAL;
2259 buf[5] = BA_MP_UNREACH_NLRI;
2260 buf[6] = 3; /* Length 7--9 */
2261 put_af3(buf+7, c->afi);
2262
2263 return buf+10;
2264}
2265
2266static byte *
2267bgp_create_end_mark(struct bgp_channel *c, byte *buf)
2268{
2269 struct bgp_proto *p = (void *) c->c.proto;
2270
2271 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
2272
2273 return (c->afi == BGP_AF_IPV4) ?
2274 bgp_create_ip_end_mark(c, buf):
2275 bgp_create_mp_end_mark(c, buf);
2276}
0c791f87
OZ
2277
2278static inline void
82f42ea0 2279bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi)
0c791f87 2280{
82f42ea0 2281 struct bgp_proto *p = s->proto;
d15b0b0a
OZ
2282 struct bgp_channel *c = bgp_get_channel(p, afi);
2283
9aed29e6
OZ
2284 BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
2285
d15b0b0a 2286 if (!c)
82f42ea0 2287 DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
d15b0b0a
OZ
2288
2289 if (c->load_state == BFS_LOADING)
2290 c->load_state = BFS_NONE;
0c791f87
OZ
2291
2292 if (p->p.gr_recovery)
d15b0b0a
OZ
2293 channel_graceful_restart_unlock(&c->c);
2294
2295 if (c->gr_active)
2296 bgp_graceful_restart_done(c);
2297}
2298
2299static inline void
2300bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
2301{
2302 struct bgp_channel *c = bgp_get_channel(s->proto, afi);
2303 rta *a = NULL;
2304
d15b0b0a 2305 if (!c)
82f42ea0 2306 DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
d15b0b0a
OZ
2307
2308 s->channel = c;
2309 s->add_path = c->add_path_rx;
1e37e35c 2310 s->mpls = c->desc->mpls;
d15b0b0a
OZ
2311
2312 s->last_id = 0;
2313 s->last_src = s->proto->p.main_source;
2314
2315 /*
2316 * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
2317 * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
2318 * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
2319 * decode_next_hop hooks) by restoring a->eattrs afterwards.
2320 */
2321
2322 if (ea)
2323 {
039a65d0 2324 a = allocz(RTA_MAX_SIZE);
d15b0b0a
OZ
2325
2326 a->source = RTS_BGP;
2327 a->scope = SCOPE_UNIVERSE;
d15b0b0a
OZ
2328 a->from = s->proto->cf->remote_ip;
2329 a->eattrs = ea;
2330
2331 c->desc->decode_next_hop(s, nh, nh_len, a);
2332
2333 /* Handle withdraw during next hop decoding */
2334 if (s->err_withdraw)
2335 a = NULL;
2336 }
2337
2338 c->desc->decode_nlri(s, nlri, len, a);
2339
2340 rta_free(s->cached_rta);
2341 s->cached_rta = NULL;
2342}
2343
2344static void
2345bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
2346{
2347 struct bgp_proto *p = conn->bgp;
2348 ea_list *ea = NULL;
2349
2350 BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
2351
2352 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
2353 if (conn->state == BS_OPENCONFIRM)
2354 bgp_conn_enter_established_state(conn);
2355
2356 if (conn->state != BS_ESTABLISHED)
2357 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
2358
2359 bgp_start_timer(conn->hold_timer, conn->hold_time);
2360
2361 /* Initialize parse state */
2362 struct bgp_parse_state s = {
2363 .proto = p,
2364 .pool = bgp_linpool,
2365 .as4_session = p->as4_session,
2366 };
2367
2368 /* Parse error handler */
2369 if (setjmp(s.err_jmpbuf))
2370 {
2371 bgp_error(conn, 3, s.err_subcode, NULL, 0);
2372 goto done;
2373 }
2374
2375 /* Check minimal length */
2376 if (len < 23)
2377 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
2378
2379 /* Skip fixed header */
2380 uint pos = 19;
2381
2382 /*
2383 * UPDATE message format
2384 *
2385 * 2 B IPv4 Withdrawn Routes Length
2386 * var IPv4 Withdrawn Routes NLRI
2387 * 2 B Total Path Attribute Length
2388 * var Path Attributes
2389 * var IPv4 Reachable Routes NLRI
2390 */
2391
2392 s.ip_unreach_len = get_u16(pkt + pos);
2393 s.ip_unreach_nlri = pkt + pos + 2;
2394 pos += 2 + s.ip_unreach_len;
2395
2396 if (pos + 2 > len)
2397 bgp_parse_error(&s, 1);
2398
2399 s.attr_len = get_u16(pkt + pos);
2400 s.attrs = pkt + pos + 2;
2401 pos += 2 + s.attr_len;
2402
2403 if (pos > len)
2404 bgp_parse_error(&s, 1);
2405
2406 s.ip_reach_len = len - pos;
2407 s.ip_reach_nlri = pkt + pos;
0c791f87 2408
0c791f87 2409
d15b0b0a
OZ
2410 if (s.attr_len)
2411 ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
d493d0f1
OZ
2412 else
2413 ea = NULL;
0c791f87 2414
d15b0b0a
OZ
2415 /* Check for End-of-RIB marker */
2416 if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
82f42ea0 2417 { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; }
973399ae 2418
d15b0b0a
OZ
2419 /* Check for MP End-of-RIB marker */
2420 if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
82f42ea0
OZ
2421 !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af)
2422 { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; }
094d2bdb 2423
d15b0b0a
OZ
2424 if (s.ip_unreach_len)
2425 bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
094d2bdb 2426
d15b0b0a
OZ
2427 if (s.mp_unreach_len)
2428 bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
094d2bdb 2429
d15b0b0a
OZ
2430 if (s.ip_reach_len)
2431 bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
2432 ea, s.ip_next_hop_data, s.ip_next_hop_len);
509aab5d 2433
d15b0b0a
OZ
2434 if (s.mp_reach_len)
2435 bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
2436 ea, s.mp_next_hop_data, s.mp_next_hop_len);
094d2bdb 2437
d15b0b0a
OZ
2438done:
2439 rta_free(s.cached_rta);
2440 lp_flush(s.pool);
2441 return;
094d2bdb
OZ
2442}
2443
863ecfc7
OZ
2444static uint
2445bgp_find_update_afi(byte *pos, uint len)
2446{
2447 /*
2448 * This is stripped-down version of bgp_rx_update(), bgp_decode_attrs() and
2449 * bgp_decode_mp_[un]reach_nlri() used by MRT code in order to find out which
2450 * AFI/SAFI is associated with incoming UPDATE. Returns 0 for framing errors.
2451 */
2452 if (len < 23)
2453 return 0;
2454
2455 /* Assume there is no withrawn NLRI, read lengths and move to attribute list */
2456 uint wlen = get_u16(pos + 19);
2457 uint alen = get_u16(pos + 21);
2458 ADVANCE(pos, len, 23);
2459
2460 /* Either non-zero withdrawn NLRI, non-zero reachable NLRI, or IPv4 End-of-RIB */
2461 if ((wlen != 0) || (alen < len) || !alen)
2462 return BGP_AF_IPV4;
2463
2464 if (alen > len)
2465 return 0;
2466
2467 /* Process attribute list (alen == len) */
2468 while (len)
2469 {
2470 if (len < 2)
2471 return 0;
2472
2473 uint flags = pos[0];
2474 uint code = pos[1];
2475 ADVANCE(pos, len, 2);
2476
2477 uint ll = !(flags & BAF_EXT_LEN) ? 1 : 2;
2478 if (len < ll)
2479 return 0;
2480
2481 /* Read attribute length and move to attribute body */
2482 alen = (ll == 1) ? get_u8(pos) : get_u16(pos);
2483 ADVANCE(pos, len, ll);
2484
2485 if (len < alen)
2486 return 0;
2487
2488 /* Found MP NLRI */
2489 if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
2490 {
2491 if (alen < 3)
2492 return 0;
2493
2494 return BGP_AF(get_u16(pos), pos[2]);
2495 }
2496
2497 /* Move to the next attribute */
2498 ADVANCE(pos, len, alen);
2499 }
2500
2501 /* No basic or MP NLRI, but there are some attributes -> error */
2502 return 0;
2503}
2504
094d2bdb 2505
d15b0b0a
OZ
2506/*
2507 * ROUTE-REFRESH
2508 */
094d2bdb 2509
d15b0b0a
OZ
2510static inline byte *
2511bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
1c1da87b 2512{
d15b0b0a 2513 struct bgp_proto *p = (void *) c->c.proto;
1b180121 2514
d15b0b0a 2515 BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
53ffbff3 2516
d15b0b0a
OZ
2517 /* Original route refresh request, RFC 2918 */
2518 put_af4(buf, c->afi);
2519 buf[2] = BGP_RR_REQUEST;
cfe34a31 2520
d15b0b0a
OZ
2521 return buf+4;
2522}
53ffbff3 2523
d15b0b0a
OZ
2524static inline byte *
2525bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
2526{
2527 struct bgp_proto *p = (void *) c->c.proto;
53ffbff3 2528
d15b0b0a 2529 BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
53ffbff3 2530
d15b0b0a
OZ
2531 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
2532 put_af4(buf, c->afi);
2533 buf[2] = BGP_RR_BEGIN;
cfe34a31 2534
d15b0b0a
OZ
2535 return buf+4;
2536}
53ffbff3 2537
d15b0b0a
OZ
2538static inline byte *
2539bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
2540{
2541 struct bgp_proto *p = (void *) c->c.proto;
cfe34a31 2542
d15b0b0a
OZ
2543 BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
2544
2545 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
2546 put_af4(buf, c->afi);
2547 buf[2] = BGP_RR_END;
1c1da87b 2548
d15b0b0a
OZ
2549 return buf+4;
2550}
1c1da87b 2551
3fdbafb6 2552static void
d15b0b0a 2553bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
3fdbafb6 2554{
85368cd4 2555 struct bgp_proto *p = conn->bgp;
973399ae 2556
d15b0b0a
OZ
2557 if (conn->state != BS_ESTABLISHED)
2558 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
0c791f87 2559
d15b0b0a
OZ
2560 if (!conn->local_caps->route_refresh)
2561 { bgp_error(conn, 1, 3, pkt+18, 1); return; }
094d2bdb 2562
d15b0b0a
OZ
2563 if (len < (BGP_HEADER_LENGTH + 4))
2564 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
2565
2566 if (len > (BGP_HEADER_LENGTH + 4))
2567 { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
973399ae 2568
d15b0b0a
OZ
2569 struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
2570 if (!c)
2571 {
2572 log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
2573 p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
f94557de 2574 return;
d15b0b0a 2575 }
f94557de 2576
d15b0b0a
OZ
2577 /* RFC 7313 redefined reserved field as RR message subtype */
2578 uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
06fb60c4 2579
d15b0b0a
OZ
2580 switch (subtype)
2581 {
2582 case BGP_RR_REQUEST:
2583 BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
2584 channel_request_feeding(&c->c);
2585 break;
06fb60c4 2586
d15b0b0a
OZ
2587 case BGP_RR_BEGIN:
2588 BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
2589 bgp_refresh_begin(c);
2590 break;
094d2bdb 2591
d15b0b0a
OZ
2592 case BGP_RR_END:
2593 BGP_TRACE(D_PACKETS, "Got END-OF-RR");
2594 bgp_refresh_end(c);
2595 break;
06fb60c4 2596
d15b0b0a
OZ
2597 default:
2598 log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
2599 p->p.name, subtype);
2600 break;
2601 }
2602}
06fb60c4 2603
d15b0b0a
OZ
2604static inline struct bgp_channel *
2605bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
2606{
2607 uint i = conn->last_channel;
72b28a04 2608
d15b0b0a
OZ
2609 /* Try the last channel, but at most several times */
2610 if ((conn->channels_to_send & (1 << i)) &&
2611 (conn->last_channel_count < 16))
2612 goto found;
06fb60c4 2613
d15b0b0a
OZ
2614 /* Find channel with non-zero channels_to_send */
2615 do
2616 {
2617 i++;
2618 if (i >= p->channel_count)
2619 i = 0;
2620 }
2621 while (! (conn->channels_to_send & (1 << i)));
06fb60c4 2622
d15b0b0a
OZ
2623 /* Use that channel */
2624 conn->last_channel = i;
2625 conn->last_channel_count = 0;
f8809249 2626
d15b0b0a
OZ
2627found:
2628 conn->last_channel_count++;
2629 return p->channel_map[i];
2630}
1c1da87b 2631
d15b0b0a
OZ
2632static inline int
2633bgp_send(struct bgp_conn *conn, uint type, uint len)
06fb60c4 2634{
d15b0b0a
OZ
2635 sock *sk = conn->sk;
2636 byte *buf = sk->tbuf;
06fb60c4 2637
d15b0b0a
OZ
2638 memset(buf, 0xff, 16); /* Marker */
2639 put_u16(buf+16, len);
2640 buf[18] = type;
06fb60c4 2641
d15b0b0a
OZ
2642 return sk_send(sk, len);
2643}
06fb60c4 2644
d15b0b0a
OZ
2645/**
2646 * bgp_fire_tx - transmit packets
2647 * @conn: connection
2648 *
2649 * Whenever the transmit buffers of the underlying TCP connection
2650 * are free and we have any packets queued for sending, the socket functions
2651 * call bgp_fire_tx() which takes care of selecting the highest priority packet
2652 * queued (Notification > Keepalive > Open > Update), assembling its header
2653 * and body and sending it to the connection.
2654 */
2655static int
2656bgp_fire_tx(struct bgp_conn *conn)
1c1da87b
MM
2657{
2658 struct bgp_proto *p = conn->bgp;
d15b0b0a
OZ
2659 struct bgp_channel *c;
2660 byte *buf, *pkt, *end;
2661 uint s;
1c1da87b 2662
d15b0b0a
OZ
2663 if (!conn->sk)
2664 return 0;
2665
2666 buf = conn->sk->tbuf;
2667 pkt = buf + BGP_HEADER_LENGTH;
2668 s = conn->packets_to_send;
2669
2670 if (s & (1 << PKT_SCHEDULE_CLOSE))
2671 {
2672 /* We can finally close connection and enter idle state */
2673 bgp_conn_enter_idle_state(conn);
2674 return 0;
2675 }
2676 if (s & (1 << PKT_NOTIFICATION))
2677 {
2678 conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
2679 end = bgp_create_notification(conn, pkt);
2680 return bgp_send(conn, PKT_NOTIFICATION, end - buf);
2681 }
2682 else if (s & (1 << PKT_KEEPALIVE))
2683 {
2684 conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
2685 BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
2686 bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
2687 return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
2688 }
2689 else if (s & (1 << PKT_OPEN))
2690 {
2691 conn->packets_to_send &= ~(1 << PKT_OPEN);
2692 end = bgp_create_open(conn, pkt);
2693 return bgp_send(conn, PKT_OPEN, end - buf);
2694 }
2695 else while (conn->channels_to_send)
2696 {
2697 c = bgp_get_channel_to_send(p, conn);
2698 s = c->packets_to_send;
2699
2700 if (s & (1 << PKT_ROUTE_REFRESH))
0c791f87 2701 {
d15b0b0a
OZ
2702 c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
2703 end = bgp_create_route_refresh(c, pkt);
2704 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
0c791f87 2705 }
d15b0b0a 2706 else if (s & (1 << PKT_BEGIN_REFRESH))
1c1da87b 2707 {
d15b0b0a
OZ
2708 /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
2709 c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
2710 end = bgp_create_begin_refresh(c, pkt);
2711 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
1c1da87b 2712 }
d15b0b0a 2713 else if (s & (1 << PKT_UPDATE))
1c1da87b 2714 {
d15b0b0a
OZ
2715 end = bgp_create_update(c, pkt);
2716 if (end)
2717 return bgp_send(conn, PKT_UPDATE, end - buf);
1c1da87b 2718
d15b0b0a
OZ
2719 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
2720 c->packets_to_send = 0;
2721 conn->channels_to_send &= ~(1 << c->index);
64cf11f5 2722
d15b0b0a
OZ
2723 if (c->feed_state == BFS_LOADED)
2724 {
2725 c->feed_state = BFS_NONE;
2726 end = bgp_create_end_mark(c, pkt);
2727 return bgp_send(conn, PKT_UPDATE, end - buf);
2728 }
1c1da87b 2729
d15b0b0a
OZ
2730 else if (c->feed_state == BFS_REFRESHED)
2731 {
2732 c->feed_state = BFS_NONE;
2733 end = bgp_create_end_refresh(c, pkt);
2734 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
2735 }
2736 }
2737 else if (s)
2738 bug("Channel packets_to_send: %x", s);
094d2bdb 2739
d15b0b0a
OZ
2740 c->packets_to_send = 0;
2741 conn->channels_to_send &= ~(1 << c->index);
2742 }
06fb60c4 2743
d15b0b0a
OZ
2744 return 0;
2745}
06fb60c4 2746
d15b0b0a
OZ
2747/**
2748 * bgp_schedule_packet - schedule a packet for transmission
2749 * @conn: connection
2750 * @c: channel
2751 * @type: packet type
2752 *
2753 * Schedule a packet of type @type to be sent as soon as possible.
2754 */
2755void
2756bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
2757{
2758 ASSERT(conn->sk);
1c1da87b 2759
d15b0b0a 2760 DBG("BGP: Scheduling packet type %d\n", type);
06fb60c4 2761
d15b0b0a
OZ
2762 if (c)
2763 {
2764 if (! conn->channels_to_send)
2765 {
2766 conn->last_channel = c->index;
2767 conn->last_channel_count = 0;
2768 }
06fb60c4 2769
d15b0b0a
OZ
2770 c->packets_to_send |= 1 << type;
2771 conn->channels_to_send |= 1 << c->index;
2772 }
2773 else
2774 conn->packets_to_send |= 1 << type;
1c1da87b 2775
d15b0b0a
OZ
2776 if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
2777 ev_schedule(conn->tx_ev);
2778}
1c1da87b 2779
d15b0b0a
OZ
2780void
2781bgp_kick_tx(void *vconn)
1c1da87b 2782{
d15b0b0a 2783 struct bgp_conn *conn = vconn;
44f26560 2784
d15b0b0a
OZ
2785 DBG("BGP: kicking TX\n");
2786 while (bgp_fire_tx(conn) > 0)
2787 ;
2788}
1c1da87b 2789
d15b0b0a
OZ
2790void
2791bgp_tx(sock *sk)
2792{
2793 struct bgp_conn *conn = sk->data;
1c1da87b 2794
d15b0b0a
OZ
2795 DBG("BGP: TX hook\n");
2796 while (bgp_fire_tx(conn) > 0)
2797 ;
efcece2d
MM
2798}
2799
d15b0b0a 2800
efcece2d
MM
2801static struct {
2802 byte major, minor;
2803 byte *msg;
2804} bgp_msg_table[] = {
2805 { 1, 0, "Invalid message header" },
2806 { 1, 1, "Connection not synchronized" },
2807 { 1, 2, "Bad message length" },
2808 { 1, 3, "Bad message type" },
2809 { 2, 0, "Invalid OPEN message" },
2810 { 2, 1, "Unsupported version number" },
2811 { 2, 2, "Bad peer AS" },
2812 { 2, 3, "Bad BGP identifier" },
2813 { 2, 4, "Unsupported optional parameter" },
2814 { 2, 5, "Authentication failure" },
2815 { 2, 6, "Unacceptable hold time" },
06e0d1b6 2816 { 2, 7, "Required capability missing" }, /* [RFC5492] */
506fa1a7 2817 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
efcece2d
MM
2818 { 3, 0, "Invalid UPDATE message" },
2819 { 3, 1, "Malformed attribute list" },
2820 { 3, 2, "Unrecognized well-known attribute" },
2821 { 3, 3, "Missing mandatory attribute" },
2822 { 3, 4, "Invalid attribute flags" },
2823 { 3, 5, "Invalid attribute length" },
2824 { 3, 6, "Invalid ORIGIN attribute" },
2825 { 3, 7, "AS routing loop" }, /* Deprecated */
2826 { 3, 8, "Invalid NEXT_HOP attribute" },
2827 { 3, 9, "Optional attribute error" },
2828 { 3, 10, "Invalid network field" },
2829 { 3, 11, "Malformed AS_PATH" },
2830 { 4, 0, "Hold timer expired" },
ac574513
OZ
2831 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
2832 { 5, 1, "Unexpected message in OpenSent state" },
2833 { 5, 2, "Unexpected message in OpenConfirm state" },
2834 { 5, 3, "Unexpected message in Established state" },
165a6227
OZ
2835 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
2836 { 6, 1, "Maximum number of prefixes reached" },
2837 { 6, 2, "Administrative shutdown" },
2838 { 6, 3, "Peer de-configured" },
2839 { 6, 4, "Administrative reset" },
2840 { 6, 5, "Connection rejected" },
2841 { 6, 6, "Other configuration change" },
2842 { 6, 7, "Connection collision resolution" },
9aed29e6
OZ
2843 { 6, 8, "Out of Resources" },
2844 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
2845 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
efcece2d
MM
2846};
2847
11b32d91
OZ
2848/**
2849 * bgp_error_dsc - return BGP error description
11b32d91
OZ
2850 * @code: BGP error code
2851 * @subcode: BGP error subcode
2852 *
2853 * bgp_error_dsc() returns error description for BGP errors
2854 * which might be static string or given temporary buffer.
2855 */
b8113a5e 2856const char *
d15b0b0a 2857bgp_error_dsc(uint code, uint subcode)
11b32d91 2858{
b8113a5e 2859 static char buff[32];
d15b0b0a
OZ
2860 uint i;
2861
11b32d91
OZ
2862 for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
2863 if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
d15b0b0a 2864 return bgp_msg_table[i].msg;
11b32d91 2865
d15b0b0a 2866 bsprintf(buff, "Unknown error %u.%u", code, subcode);
11b32d91
OZ
2867 return buff;
2868}
2869
cd1d9961
OZ
2870/* RFC 8203 - shutdown communication message */
2871static int
2872bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
2873{
2874 byte *msg = data + 1;
2875 uint msg_len = data[0];
2876 uint i;
2877
2878 /* Handle zero length message */
2879 if (msg_len == 0)
2880 return 1;
2881
2882 /* Handle proper message */
2883 if ((msg_len > 128) && (msg_len + 1 > len))
2884 return 0;
2885
2886 /* Some elementary cleanup */
2887 for (i = 0; i < msg_len; i++)
2888 if (msg[i] < ' ')
2889 msg[i] = ' ';
2890
2891 proto_set_message(&p->p, msg, msg_len);
2892 *bp += bsprintf(*bp, ": \"%s\"", p->p.message);
2893 return 1;
2894}
2895
efcece2d 2896void
d15b0b0a 2897bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
efcece2d 2898{
cd1d9961 2899 byte argbuf[256], *t = argbuf;
d15b0b0a 2900 uint i;
efcece2d 2901
b99d3786
OZ
2902 /* Don't report Cease messages generated by myself */
2903 if (code == 6 && class == BE_BGP_TX)
85733143
MM
2904 return;
2905
cd1d9961
OZ
2906 /* Reset shutdown message */
2907 if ((code == 6) && ((subcode == 2) || (subcode == 4)))
2908 proto_set_message(&p->p, NULL, 0);
2909
efcece2d
MM
2910 if (len)
2911 {
cd1d9961 2912 /* Bad peer AS - we would like to print the AS */
a5bf5f78
OZ
2913 if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
2914 {
cd1d9961 2915 t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
a5bf5f78
OZ
2916 goto done;
2917 }
cd1d9961
OZ
2918
2919 /* RFC 8203 - shutdown communication */
2920 if (((code == 6) && ((subcode == 2) || (subcode == 4))))
2921 if (bgp_handle_message(p, data, len, &t))
2922 goto done;
2923
2924 *t++ = ':';
2925 *t++ = ' ';
efcece2d
MM
2926 if (len > 16)
2927 len = 16;
2928 for (i=0; i<len; i++)
2929 t += bsprintf(t, "%02x", data[i]);
2930 }
cd1d9961
OZ
2931
2932done:
efcece2d 2933 *t = 0;
cd1d9961
OZ
2934 const byte *dsc = bgp_error_dsc(code, subcode);
2935 log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, dsc, argbuf);
3fdbafb6
MM
2936}
2937
2938static void
3e236955 2939bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
3fdbafb6 2940{
165a6227 2941 struct bgp_proto *p = conn->bgp;
d15b0b0a 2942
3fdbafb6 2943 if (len < 21)
d15b0b0a 2944 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
11b32d91 2945
d15b0b0a
OZ
2946 uint code = pkt[19];
2947 uint subcode = pkt[20];
b99d3786 2948 int err = (code != 6);
165a6227 2949
b99d3786
OZ
2950 bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
2951 bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
e3299ab1 2952
11b32d91 2953 bgp_conn_enter_close_state(conn);
d15b0b0a 2954 bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
b99d3786 2955
d15b0b0a
OZ
2956 if (err)
2957 {
2958 bgp_update_startup_delay(p);
830ba75e 2959 bgp_stop(p, 0, NULL, 0);
d15b0b0a 2960 }
63472779 2961 else
75d98b60
OZ
2962 {
2963 uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0);
2964 if (p->cf->disable_after_cease & subcode_bit)
63472779 2965 {
75d98b60
OZ
2966 log(L_INFO "%s: Disabled after Cease notification", p->p.name);
2967 p->startup_delay = 0;
2968 p->p.disabled = 1;
63472779 2969 }
75d98b60 2970 }
3fdbafb6
MM
2971}
2972
2973static void
e21423ba 2974bgp_rx_keepalive(struct bgp_conn *conn)
3fdbafb6 2975{
85368cd4
MM
2976 struct bgp_proto *p = conn->bgp;
2977
2978 BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
3fdbafb6 2979 bgp_start_timer(conn->hold_timer, conn->hold_time);
3fdbafb6 2980
d15b0b0a
OZ
2981 if (conn->state == BS_OPENCONFIRM)
2982 { bgp_conn_enter_established_state(conn); return; }
bf47fe4b 2983
bf47fe4b 2984 if (conn->state != BS_ESTABLISHED)
d15b0b0a 2985 bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
bf47fe4b
OZ
2986}
2987
2988
54e55169
MM
2989/**
2990 * bgp_rx_packet - handle a received packet
2991 * @conn: BGP connection
2992 * @pkt: start of the packet
2993 * @len: packet size
2994 *
2995 * bgp_rx_packet() takes a newly received packet and calls the corresponding
2996 * packet handler according to the packet type.
2997 */
3fdbafb6 2998static void
d15b0b0a 2999bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
3fdbafb6 3000{
cf31112f
OZ
3001 byte type = pkt[18];
3002
3003 DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
3004
3005 if (conn->bgp->p.mrtdump & MD_MESSAGES)
863ecfc7 3006 bgp_dump_message(conn, pkt, len);
cf31112f
OZ
3007
3008 switch (type)
d15b0b0a
OZ
3009 {
3010 case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
3011 case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
3012 case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
3013 case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
3014 case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
3015 default: bgp_error(conn, 1, 3, pkt+18, 1);
3016 }
72a6ef11
MM
3017}
3018
54e55169
MM
3019/**
3020 * bgp_rx - handle received data
3021 * @sk: socket
3022 * @size: amount of data received
3023 *
3024 * bgp_rx() is called by the socket layer whenever new data arrive from
3025 * the underlying TCP connection. It assembles the data fragments to packets,
3026 * checks their headers and framing and passes complete packets to
3027 * bgp_rx_packet().
3028 */
72a6ef11 3029int
3e236955 3030bgp_rx(sock *sk, uint size)
72a6ef11
MM
3031{
3032 struct bgp_conn *conn = sk->data;
3033 byte *pkt_start = sk->rbuf;
3034 byte *end = pkt_start + size;
d15b0b0a 3035 uint i, len;
72a6ef11
MM
3036
3037 DBG("BGP: RX hook: Got %d bytes\n", size);
3038 while (end >= pkt_start + BGP_HEADER_LENGTH)
3039 {
11b32d91
OZ
3040 if ((conn->state == BS_CLOSE) || (conn->sk != sk))
3041 return 0;
3fdbafb6
MM
3042 for(i=0; i<16; i++)
3043 if (pkt_start[i] != 0xff)
3044 {
efcece2d 3045 bgp_error(conn, 1, 1, NULL, 0);
3fdbafb6
MM
3046 break;
3047 }
3048 len = get_u16(pkt_start+16);
d15b0b0a 3049 if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
3fdbafb6 3050 {
efcece2d 3051 bgp_error(conn, 1, 2, pkt_start+16, 2);
3fdbafb6
MM
3052 break;
3053 }
5f532add
MM
3054 if (end < pkt_start + len)
3055 break;
3056 bgp_rx_packet(conn, pkt_start, len);
3057 pkt_start += len;
72a6ef11
MM
3058 }
3059 if (pkt_start != sk->rbuf)
3060 {
3061 memmove(sk->rbuf, pkt_start, end - pkt_start);
3062 sk->rpos = sk->rbuf + (end - pkt_start);
3063 }
3064 return 0;
3065}