]> git.ipfire.org Git - thirdparty/bird.git/blame - proto/bgp/packets.c
Autotools: updated config.guess and config.sub
[thirdparty/bird.git] / proto / bgp / packets.c
CommitLineData
c01e3741
MM
1/*
2 * BIRD -- BGP Packet Processing
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
d15b0b0a
OZ
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
c01e3741
MM
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
85368cd4 11#undef LOCAL_DEBUG
72a6ef11 12
d15b0b0a
OZ
13#include <stdlib.h>
14
c01e3741
MM
15#include "nest/bird.h"
16#include "nest/iface.h"
17#include "nest/protocol.h"
18#include "nest/route.h"
11cb6202 19#include "nest/attrs.h"
cf31112f 20#include "nest/mrtdump.h"
c01e3741 21#include "conf/conf.h"
72a6ef11 22#include "lib/unaligned.h"
ac3ad139 23#include "lib/flowspec.h"
72a6ef11 24#include "lib/socket.h"
c01e3741 25
11b32d91
OZ
26#include "nest/cli.h"
27
c01e3741 28#include "bgp.h"
72a6ef11 29
9aed29e6
OZ
30
31#define BGP_RR_REQUEST 0
32#define BGP_RR_BEGIN 1
33#define BGP_RR_END 2
34
1e37e35c
OZ
35#define BGP_NLRI_MAX (4 + 1 + 32)
36
37#define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */
38#define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */
39#define BGP_MPLS_NULL 3 /* Implicit NULL label */
40#define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */
41
9aed29e6 42
1123e707
OZ
43static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
44static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
cb530392 45
ac574513
OZ
46/* Table for state -> RFC 6608 FSM error subcodes */
47static byte fsm_err_subcode[BS_MAX] = {
48 [BS_OPENSENT] = 1,
49 [BS_OPENCONFIRM] = 2,
50 [BS_ESTABLISHED] = 3
51};
52
d15b0b0a
OZ
53
54static struct bgp_channel *
55bgp_get_channel(struct bgp_proto *p, u32 afi)
56{
57 uint i;
58
59 for (i = 0; i < p->channel_count; i++)
60 if (p->afi_map[i] == afi)
61 return p->channel_map[i];
62
63 return NULL;
64}
65
66static inline void
67put_af3(byte *buf, u32 id)
68{
69 put_u16(buf, id >> 16);
70 buf[2] = id & 0xff;
71}
72
73static inline void
74put_af4(byte *buf, u32 id)
75{
76 put_u16(buf, id >> 16);
77 buf[2] = 0;
78 buf[3] = id & 0xff;
79}
80
81static inline u32
82get_af3(byte *buf)
83{
84 return (get_u16(buf) << 16) | buf[2];
85}
86
87static inline u32
88get_af4(byte *buf)
89{
90 return (get_u16(buf) << 16) | buf[3];
91}
92
cf31112f
OZ
93/*
94 * MRT Dump format is not semantically specified.
95 * We will use these values in appropriate fields:
96 *
97 * Local AS, Remote AS - configured AS numbers for given BGP instance.
98 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
99 *
100 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
101 * changes) and MESSAGE (for received BGP messages).
102 *
103 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
104 * only when AS4 session is established and even in that case MESSAGE
105 * does not use AS4 variant for initial OPEN message. This strange
106 * behavior is here for compatibility with Quagga and Bgpdump,
107 */
108
109static byte *
110mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
111{
112 struct bgp_proto *p = conn->bgp;
d15b0b0a 113 uint v4 = ipa_is_ip4(p->cf->remote_ip);
cf31112f
OZ
114
115 if (as4)
d15b0b0a
OZ
116 {
117 put_u32(buf+0, p->remote_as);
118 put_u32(buf+4, p->public_as);
119 buf+=8;
120 }
cf31112f 121 else
d15b0b0a
OZ
122 {
123 put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
124 put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
125 buf+=4;
126 }
cf31112f 127
572c6440 128 put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
d15b0b0a 129 put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
cf31112f 130 buf+=4;
d15b0b0a
OZ
131
132 if (v4)
133 {
134 buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
135 buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
136 }
137 else
138 {
139 buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
140 buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
141 }
cf31112f
OZ
142
143 return buf;
144}
145
146static void
d15b0b0a 147mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
cf31112f 148{
06e0d1b6 149 byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
cf31112f
OZ
150 byte *bp = buf + MRTDUMP_HDR_LENGTH;
151 int as4 = conn->bgp->as4_session;
152
153 bp = mrt_put_bgp4_hdr(bp, conn, as4);
154 memcpy(bp, pkt, len);
155 bp += len;
156 mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
157 buf, bp-buf);
158}
159
160static inline u16
d15b0b0a 161convert_state(uint state)
cf31112f
OZ
162{
163 /* Convert state from our BS_* values to values used in MRTDump */
164 return (state == BS_CLOSE) ? 1 : state + 1;
165}
166
167void
d15b0b0a 168mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
cf31112f
OZ
169{
170 byte buf[128];
171 byte *bp = buf + MRTDUMP_HDR_LENGTH;
172
173 bp = mrt_put_bgp4_hdr(bp, conn, 1);
174 put_u16(bp+0, convert_state(old));
175 put_u16(bp+2, convert_state(new));
176 bp += 4;
177 mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
178}
179
72a6ef11
MM
180static byte *
181bgp_create_notification(struct bgp_conn *conn, byte *buf)
182{
85368cd4
MM
183 struct bgp_proto *p = conn->bgp;
184
185 BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
72a6ef11
MM
186 buf[0] = conn->notify_code;
187 buf[1] = conn->notify_subcode;
efcece2d
MM
188 memcpy(buf+2, conn->notify_data, conn->notify_size);
189 return buf + 2 + conn->notify_size;
72a6ef11
MM
190}
191
e3299ab1 192
d15b0b0a 193/* Capability negotiation as per RFC 5492 */
e3299ab1 194
d15b0b0a
OZ
195const struct bgp_af_caps *
196bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
bf47fe4b 197{
d15b0b0a 198 struct bgp_af_caps *ac;
bf47fe4b 199
d15b0b0a
OZ
200 WALK_AF_CAPS(caps, ac)
201 if (ac->afi == afi)
202 return ac;
203
204 return NULL;
52e21323
OZ
205}
206
d15b0b0a
OZ
207static struct bgp_af_caps *
208bgp_get_af_caps(struct bgp_caps *caps, u32 afi)
0c791f87 209{
d15b0b0a 210 struct bgp_af_caps *ac;
0c791f87 211
d15b0b0a
OZ
212 WALK_AF_CAPS(caps, ac)
213 if (ac->afi == afi)
214 return ac;
0c791f87 215
d15b0b0a
OZ
216 ac = &caps->af_data[caps->af_count++];
217 memset(ac, 0, sizeof(struct bgp_af_caps));
218 ac->afi = afi;
0c791f87 219
d15b0b0a 220 return ac;
0c791f87
OZ
221}
222
d15b0b0a
OZ
223static int
224bgp_af_caps_cmp(const void *X, const void *Y)
0c791f87 225{
d15b0b0a
OZ
226 const struct bgp_af_caps *x = X, *y = Y;
227 return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
0c791f87
OZ
228}
229
11cb6202 230
094d2bdb 231static byte *
d15b0b0a 232bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
094d2bdb 233{
d15b0b0a
OZ
234 struct bgp_proto *p = conn->bgp;
235 struct bgp_channel *c;
236 struct bgp_caps *caps;
237 struct bgp_af_caps *ac;
d8022d26 238 uint any_ext_next_hop = 0;
d15b0b0a
OZ
239 uint any_add_path = 0;
240 byte *data;
094d2bdb 241
d15b0b0a 242 /* Prepare bgp_caps structure */
094d2bdb 243
d15b0b0a
OZ
244 int n = list_length(&p->p.channels);
245 caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
246 conn->local_caps = caps;
094d2bdb 247
d15b0b0a
OZ
248 caps->as4_support = p->cf->enable_as4;
249 caps->ext_messages = p->cf->enable_extended_messages;
250 caps->route_refresh = p->cf->enable_refresh;
251 caps->enhanced_refresh = p->cf->enable_refresh;
094d2bdb 252
d15b0b0a
OZ
253 if (caps->as4_support)
254 caps->as4_number = p->public_as;
9aed29e6 255
d15b0b0a
OZ
256 if (p->cf->gr_mode)
257 {
258 caps->gr_aware = 1;
259 caps->gr_time = p->cf->gr_time;
260 caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
261 }
9aed29e6 262
d15b0b0a
OZ
263 /* Allocate and fill per-AF fields */
264 WALK_LIST(c, p->p.channels)
265 {
266 ac = &caps->af_data[caps->af_count++];
267 ac->afi = c->afi;
268 ac->ready = 1;
85368cd4 269
d8022d26
OZ
270 ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
271 any_ext_next_hop |= ac->ext_next_hop;
272
d15b0b0a
OZ
273 ac->add_path = c->cf->add_path;
274 any_add_path |= ac->add_path;
165a6227 275
d15b0b0a 276 if (c->cf->gr_able)
165a6227 277 {
d15b0b0a
OZ
278 ac->gr_able = 1;
279
280 if (p->p.gr_recovery)
281 ac->gr_af_flags |= BGP_GRF_FORWARDING;
165a6227 282 }
d15b0b0a 283 }
165a6227 284
d15b0b0a
OZ
285 /* Sort capability fields by AFI/SAFI */
286 qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
11cb6202 287
e3299ab1 288
d15b0b0a 289 /* Create capability list in buffer */
e8ba557c 290
f8aad5d5 291 /*
711d617d
OZ
292 * Note that max length is ~ 20+14*af_count. With max 12 channels that is
293 * 188. Option limit is 253 and buffer size is 4096, so we cannot overflow
f8aad5d5
OZ
294 * unless we add new capabilities or more AFs.
295 */
296
d15b0b0a
OZ
297 WALK_AF_CAPS(caps, ac)
298 if (ac->ready)
299 {
300 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
301 *buf++ = 4; /* Capability data length */
302 put_af4(buf, ac->afi);
303 buf += 4;
304 }
0c791f87 305
d15b0b0a
OZ
306 if (caps->route_refresh)
307 {
308 *buf++ = 2; /* Capability 2: Support for route refresh */
309 *buf++ = 0; /* Capability data length */
310 }
bf47fe4b 311
d8022d26
OZ
312 if (any_ext_next_hop)
313 {
314 *buf++ = 5; /* Capability 5: Support for extended next hop */
315 *buf++ = 0; /* Capability data length, will be fixed later */
316 data = buf;
317
318 WALK_AF_CAPS(caps, ac)
319 if (ac->ext_next_hop)
320 {
321 put_af4(buf, ac->afi);
322 put_u16(buf+4, BGP_AFI_IPV6);
323 buf += 6;
324 }
325
326 data[-1] = buf - data;
327 }
328
d15b0b0a
OZ
329 if (caps->ext_messages)
330 {
331 *buf++ = 6; /* Capability 6: Support for extended messages */
332 *buf++ = 0; /* Capability data length */
333 }
11cb6202 334
d15b0b0a
OZ
335 if (caps->gr_aware)
336 {
337 *buf++ = 64; /* Capability 64: Support for graceful restart */
338 *buf++ = 0; /* Capability data length, will be fixed later */
339 data = buf;
094d2bdb 340
d15b0b0a
OZ
341 put_u16(buf, caps->gr_time);
342 buf[0] |= caps->gr_flags;
343 buf += 2;
9aed29e6 344
d15b0b0a
OZ
345 WALK_AF_CAPS(caps, ac)
346 if (ac->gr_able)
347 {
348 put_af3(buf, ac->afi);
349 buf[3] = ac->gr_af_flags;
350 buf += 4;
351 }
06e0d1b6 352
d15b0b0a
OZ
353 data[-1] = buf - data;
354 }
72a6ef11 355
d15b0b0a
OZ
356 if (caps->as4_support)
357 {
358 *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
359 *buf++ = 4; /* Capability data length */
360 put_u32(buf, p->public_as);
361 buf += 4;
362 }
f421cfdd 363
d15b0b0a
OZ
364 if (any_add_path)
365 {
366 *buf++ = 69; /* Capability 69: Support for ADD-PATH */
367 *buf++ = 0; /* Capability data length, will be fixed later */
368 data = buf;
094d2bdb 369
d15b0b0a
OZ
370 WALK_AF_CAPS(caps, ac)
371 if (ac->add_path)
372 {
373 put_af3(buf, ac->afi);
374 buf[3] = ac->add_path;
375 buf += 4;
376 }
094d2bdb 377
d15b0b0a
OZ
378 data[-1] = buf - data;
379 }
380
381 if (caps->enhanced_refresh)
382 {
383 *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
384 *buf++ = 0; /* Capability data length */
385 }
386
d15b0b0a 387 return buf;
f421cfdd
MM
388}
389
82a79586 390static void
d15b0b0a 391bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len)
82a79586 392{
d15b0b0a
OZ
393 struct bgp_proto *p = conn->bgp;
394 struct bgp_af_caps *ac;
395 int i, cl;
396 u32 af;
82a79586 397
d15b0b0a
OZ
398 while (len > 0)
399 {
400 if (len < 2 || len < (2 + pos[1]))
401 goto err;
1c1da87b 402
d15b0b0a
OZ
403 /* Capability length */
404 cl = pos[1];
3fdbafb6 405
d15b0b0a
OZ
406 /* Capability type */
407 switch (pos[0])
f421cfdd 408 {
d15b0b0a
OZ
409 case 1: /* Multiprotocol capability, RFC 4760 */
410 if (cl != 4)
411 goto err;
412
413 af = get_af4(pos+2);
414 ac = bgp_get_af_caps(caps, af);
415 ac->ready = 1;
416 break;
417
418 case 2: /* Route refresh capability, RFC 2918 */
419 if (cl != 0)
420 goto err;
421
422 caps->route_refresh = 1;
423 break;
424
d8022d26
OZ
425 case 5: /* Extended next hop encoding capability, RFC 5549 */
426 if (cl % 6)
427 goto err;
428
429 for (i = 0; i < cl; i += 6)
430 {
431 /* Specified only for IPv4 prefixes with IPv6 next hops */
432 if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
433 (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
434 continue;
435
436 af = get_af4(pos+2+i);
437 ac = bgp_get_af_caps(caps, af);
438 ac->ext_next_hop = 1;
439 }
440 break;
441
d15b0b0a
OZ
442 case 6: /* Extended message length capability, RFC draft */
443 if (cl != 0)
444 goto err;
445
446 caps->ext_messages = 1;
447 break;
448
449 case 64: /* Graceful restart capability, RFC 4724 */
450 if (cl % 4 != 2)
451 goto err;
452
453 /* Only the last instance is valid */
454 WALK_AF_CAPS(caps, ac)
455 {
456 ac->gr_able = 0;
457 ac->gr_af_flags = 0;
458 }
459
460 caps->gr_aware = 1;
461 caps->gr_flags = pos[2] & 0xf0;
462 caps->gr_time = get_u16(pos + 2) & 0x0fff;
463
464 for (i = 2; i < cl; i += 4)
465 {
466 af = get_af3(pos+2+i);
467 ac = bgp_get_af_caps(caps, af);
468 ac->gr_able = 1;
469 ac->gr_af_flags = pos[2+i+3];
470 }
471 break;
472
c49e4a65 473 case 65: /* AS4 capability, RFC 6793 */
d15b0b0a
OZ
474 if (cl != 4)
475 goto err;
476
477 caps->as4_support = 1;
478 caps->as4_number = get_u32(pos + 2);
479 break;
480
481 case 69: /* ADD-PATH capability, RFC 7911 */
482 if (cl % 4)
483 goto err;
484
485 for (i = 0; i < cl; i += 4)
486 {
487 byte val = pos[2+i+3];
488 if (!val || (val > BGP_ADD_PATH_FULL))
f421cfdd 489 {
d15b0b0a
OZ
490 log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
491 p->p.name, val);
f421cfdd
MM
492 break;
493 }
d15b0b0a 494 }
72a6ef11 495
d15b0b0a
OZ
496 for (i = 0; i < cl; i += 4)
497 {
498 af = get_af3(pos+2+i);
499 ac = bgp_get_af_caps(caps, af);
500 ac->add_path = pos[2+i+3];
501 }
502 break;
0c791f87 503
d15b0b0a
OZ
504 case 70: /* Enhanced route refresh capability, RFC 7313 */
505 if (cl != 0)
506 goto err;
0c791f87 507
d15b0b0a
OZ
508 caps->enhanced_refresh = 1;
509 break;
1c1da87b 510
d15b0b0a
OZ
511 /* We can safely ignore all other capabilities */
512 }
513
514 ADVANCE(pos, len, 2 + cl);
515 }
516 return;
517
518err:
519 bgp_error(conn, 2, 0, NULL, 0);
520 return;
53ffbff3
OZ
521}
522
d15b0b0a
OZ
523static int
524bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
1c1da87b 525{
cf3d6470 526 struct bgp_proto *p = conn->bgp;
d15b0b0a
OZ
527 struct bgp_caps *caps;
528 int ol;
cf3d6470 529
d15b0b0a
OZ
530 /* Max number of announced AFIs is limited by max option length (255) */
531 caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps));
532 memset(caps, 0, sizeof(struct bgp_caps));
cf3d6470 533
d15b0b0a
OZ
534 while (len > 0)
535 {
536 if ((len < 2) || (len < (2 + pos[1])))
537 { bgp_error(conn, 2, 0, NULL, 0); return -1; }
538
539 ol = pos[1];
540 if (pos[0] == 2)
cf3d6470 541 {
d15b0b0a
OZ
542 /* BGP capabilities, RFC 5492 */
543 if (p->cf->capabilities)
544 bgp_read_capabilities(conn, caps, pos + 2, ol);
cf3d6470 545 }
d15b0b0a 546 else
cf3d6470 547 {
d15b0b0a
OZ
548 /* Unknown option */
549 bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */
550 return -1;
cf3d6470
MM
551 }
552
d15b0b0a
OZ
553 ADVANCE(pos, len, 2 + ol);
554 }
555
556 uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps);
557 conn->remote_caps = mb_allocz(p->p.pool, n);
558 memcpy(conn->remote_caps, caps, n);
559
560 return 0;
1c1da87b
MM
561}
562
0c791f87 563static byte *
d15b0b0a 564bgp_create_open(struct bgp_conn *conn, byte *buf)
0c791f87
OZ
565{
566 struct bgp_proto *p = conn->bgp;
0c791f87 567
d15b0b0a
OZ
568 BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
569 BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
0c791f87 570
d15b0b0a
OZ
571 buf[0] = BGP_VERSION;
572 put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
573 put_u16(buf+3, p->cf->hold_time);
574 put_u32(buf+5, p->local_id);
0c791f87 575
d15b0b0a
OZ
576 if (p->cf->capabilities)
577 {
578 /* Prepare local_caps and write capabilities to buffer */
579 byte *end = bgp_write_capabilities(conn, buf+12);
580 uint len = end - (buf+12);
1c1da87b 581
d15b0b0a
OZ
582 buf[9] = len + 2; /* Optional parameters length */
583 buf[10] = 2; /* Option 2: Capability list */
584 buf[11] = len; /* Option data length */
bf47fe4b 585
d15b0b0a
OZ
586 return end;
587 }
588 else
589 {
590 /* Prepare empty local_caps */
591 conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
bf47fe4b 592
d15b0b0a
OZ
593 buf[9] = 0; /* No optional parameters */
594 return buf + 10;
595 }
9aed29e6 596
9aed29e6
OZ
597 return buf;
598}
599
d15b0b0a
OZ
600static void
601bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
9aed29e6
OZ
602{
603 struct bgp_proto *p = conn->bgp;
d15b0b0a
OZ
604 struct bgp_conn *other;
605 u32 asn, hold, id;
9aed29e6 606
d15b0b0a
OZ
607 /* Check state */
608 if (conn->state != BS_OPENSENT)
609 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
9aed29e6 610
d15b0b0a
OZ
611 /* Check message contents */
612 if (len < 29 || len != 29 + (uint) pkt[28])
613 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
9aed29e6 614
d15b0b0a
OZ
615 if (pkt[19] != BGP_VERSION)
616 { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
72a6ef11 617
d15b0b0a
OZ
618 asn = get_u16(pkt+20);
619 hold = get_u16(pkt+22);
620 id = get_u32(pkt+24);
621 BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
622
623 if (bgp_read_options(conn, pkt+29, pkt[28]) < 0)
624 return;
625
626 if (hold > 0 && hold < 3)
627 { bgp_error(conn, 2, 6, pkt+22, 2); return; }
72a6ef11 628
d15b0b0a
OZ
629 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
630 if (!id || (p->is_internal && id == p->local_id))
631 { bgp_error(conn, 2, 3, pkt+24, -4); return; }
632
633 struct bgp_caps *caps = conn->remote_caps;
634
635 if (caps->as4_support)
636 {
637 u32 as4 = caps->as4_number;
638
639 if ((as4 != asn) && (asn != AS_TRANS))
640 log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
641
642 if (as4 != p->remote_as)
643 { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
644 }
645 else
646 {
647 if (asn != p->remote_as)
648 { bgp_error(conn, 2, 2, pkt+20, 2); return; }
649 }
650
651 /* Check the other connection */
652 other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
653 switch (other->state)
654 {
655 case BS_CONNECT:
656 case BS_ACTIVE:
657 /* Stop outgoing connection attempts */
658 bgp_conn_enter_idle_state(other);
659 break;
660
661 case BS_IDLE:
662 case BS_OPENSENT:
663 case BS_CLOSE:
664 break;
665
666 case BS_OPENCONFIRM:
667 /*
668 * Description of collision detection rules in RFC 4271 is confusing and
669 * contradictory, but it is essentially:
670 *
671 * 1. Router with higher ID is dominant
672 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
673 * 3. When both connections are in OpenConfirm state, one initiated by
674 * the dominant router is kept.
675 *
676 * The first line in the expression below evaluates whether the neighbor
677 * is dominant, the second line whether the new connection was initiated
678 * by the neighbor. If both are true (or both are false), we keep the new
679 * connection, otherwise we keep the old one.
680 */
681 if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
682 == (conn == &p->incoming_conn))
0c3588bf 683 {
d15b0b0a
OZ
684 /* Should close the other connection */
685 BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
686 bgp_error(other, 6, 7, NULL, 0);
687 break;
0c3588bf 688 }
d15b0b0a
OZ
689 /* Fall thru */
690 case BS_ESTABLISHED:
691 /* Should close this connection */
692 BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
693 bgp_error(conn, 6, 7, NULL, 0);
694 return;
0c3588bf 695
d15b0b0a
OZ
696 default:
697 bug("bgp_rx_open: Unknown state");
698 }
699
700 /* Update our local variables */
701 conn->hold_time = MIN(hold, p->cf->hold_time);
702 conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
703 conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
704 conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
705 p->remote_id = id;
706
707 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
708 conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
709
710 bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
711 bgp_start_timer(conn->hold_timer, conn->hold_time);
712 bgp_conn_enter_openconfirm_state(conn);
713}
714
715
716/*
717 * Next hop handling
718 */
719
720#define REPORT(msg, args...) \
721 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
722
82f42ea0
OZ
723#define DISCARD(msg, args...) \
724 ({ REPORT(msg, ## args); return; })
725
d15b0b0a
OZ
726#define WITHDRAW(msg, args...) \
727 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
728
82f42ea0 729#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
d15b0b0a
OZ
730#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
731#define NO_NEXT_HOP "Missing NEXT_HOP attribute"
1e37e35c 732#define NO_LABEL_STACK "Missing MPLS stack"
d15b0b0a
OZ
733
734
735static void
736bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
737{
738 struct bgp_proto *p = s->proto;
739 struct bgp_channel *c = s->channel;
740
741 if (c->cf->gw_mode == GW_DIRECT)
742 {
743 neighbor *nbr = NULL;
744
745 /* GW_DIRECT -> single_hop -> p->neigh != NULL */
746 if (ipa_nonzero(gw))
747 nbr = neigh_find2(&p->p, &gw, NULL, 0);
748 else if (ipa_nonzero(ll))
749 nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0);
750
751 if (!nbr || (nbr->scope == SCOPE_HOST))
752 WITHDRAW(BAD_NEXT_HOP);
753
4e276a89 754 a->dest = RTD_UNICAST;
1e37e35c
OZ
755 a->nh.gw = nbr->addr;
756 a->nh.iface = nbr->iface;
d15b0b0a
OZ
757 }
758 else /* GW_RECURSIVE */
759 {
760 if (ipa_zero(gw))
761 WITHDRAW(BAD_NEXT_HOP);
762
ef57b70f
OZ
763 rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
764 s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
1e37e35c
OZ
765
766 if (!s->mpls)
767 rta_apply_hostentry(a, s->hostentry, NULL);
768
769 /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
770 }
771}
772
773static void
774bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
775{
776 if (lnum > MPLS_MAX_LABEL_STACK)
777 {
778 REPORT("Too many MPLS labels ($u)", lnum);
779
780 a->dest = RTD_UNREACHABLE;
781 a->hostentry = NULL;
782 a->nh = (struct nexthop) { };
783 return;
784 }
785
786 /* Handle implicit NULL as empty MPLS stack */
787 if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
788 lnum = 0;
789
790 if (s->channel->cf->gw_mode == GW_DIRECT)
791 {
792 a->nh.labels = lnum;
793 memcpy(a->nh.label, labels, 4*lnum);
794 }
795 else /* GW_RECURSIVE */
796 {
797 mpls_label_stack ms;
798
799 ms.len = lnum;
800 memcpy(ms.stack, labels, 4*lnum);
801 rta_apply_hostentry(a, s->hostentry, &ms);
d15b0b0a
OZ
802 }
803}
804
1e37e35c 805
d15b0b0a
OZ
806static inline int
807bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
808{
809 struct bgp_proto *p = s->proto;
810 ip_addr *nh = (void *) a->u.ptr->data;
811
812 if (s->channel->cf->next_hop_self)
813 return 0;
814
815 if (s->channel->cf->next_hop_keep)
816 return 1;
817
818 /* Keep it when explicitly set in export filter */
819 if (a->type & EAF_FRESH)
820 return 1;
821
822 /* Keep it when exported to internal peers */
823 if (p->is_interior && ipa_nonzero(*nh))
824 return 1;
825
826 /* Keep it when forwarded between single-hop BGPs on the same iface */
827 struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
828 return p->neigh && (p->neigh->iface == ifa);
829}
830
831static inline int
832bgp_use_gateway(struct bgp_export_state *s)
833{
834 struct bgp_proto *p = s->proto;
835 rta *ra = s->route->attrs;
836
837 if (s->channel->cf->next_hop_self)
838 return 0;
839
62e64905
OZ
840 /* We need one valid global gateway */
841 if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
d15b0b0a
OZ
842 return 0;
843
844 /* Use it when exported to internal peers */
845 if (p->is_interior)
846 return 1;
847
848 /* Use it when forwarded to single-hop BGP peer on on the same iface */
4e276a89 849 return p->neigh && (p->neigh->iface == ra->nh.iface);
d15b0b0a
OZ
850}
851
852static void
853bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
854{
855 if (!a || !bgp_use_next_hop(s, a))
856 {
857 if (bgp_use_gateway(s))
72a6ef11 858 {
1e37e35c
OZ
859 rta *ra = s->route->attrs;
860 ip_addr nh[1] = { ra->nh.gw };
d15b0b0a 861 bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
1e37e35c
OZ
862
863 if (s->mpls)
864 {
865 u32 implicit_null = BGP_MPLS_NULL;
866 u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
867 uint lnum = ra->nh.labels ? ra->nh.labels : 1;
868 bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
869 }
72a6ef11 870 }
d15b0b0a 871 else
72a6ef11 872 {
d15b0b0a
OZ
873 ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
874 bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
1e37e35c
OZ
875
876 /* TODO: Use local MPLS assigned label */
877 if (s->mpls)
878 bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK);
72a6ef11 879 }
d15b0b0a
OZ
880 }
881
882 /* Check if next hop is valid */
883 a = bgp_find_attr(*to, BA_NEXT_HOP);
884 if (!a)
885 WITHDRAW(NO_NEXT_HOP);
886
887 ip_addr *nh = (void *) a->u.ptr->data;
888 ip_addr peer = s->proto->cf->remote_ip;
889 uint len = a->u.ptr->length;
890
ef57b70f 891 /* Forbid zero next hop */
d15b0b0a
OZ
892 if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
893 WITHDRAW(BAD_NEXT_HOP);
894
ef57b70f 895 /* Forbid next hop equal to neighbor IP */
d15b0b0a
OZ
896 if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
897 WITHDRAW(BAD_NEXT_HOP);
1e37e35c 898
ef57b70f
OZ
899 /* Forbid next hop with non-matching AF */
900 if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
901 !s->channel->ext_next_hop)
902 WITHDRAW(BAD_NEXT_HOP);
903
1e37e35c
OZ
904 /* Just check if MPLS stack */
905 if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
906 WITHDRAW(NO_LABEL_STACK);
d15b0b0a
OZ
907}
908
ef57b70f
OZ
909static uint
910bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
911{
912 /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
913 ip_addr *nh = (void *) a->u.ptr->data;
914 uint len = a->u.ptr->length;
915
916 ASSERT((len == 16) || (len == 32));
917
918 /*
919 * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
920 * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
921 * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
922 * IPv6 address with IPv6 NLRI.
923 */
924
925 if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
926 {
927 put_ip4(buf, ipa_to_ip4(nh[0]));
928 return 4;
929 }
930
931 put_ip6(buf, ipa_to_ip6(nh[0]));
932
933 if (len == 32)
934 put_ip6(buf+16, ipa_to_ip6(nh[1]));
935
936 return len;
937}
938
939static void
940bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
941{
942 struct bgp_channel *c = s->channel;
943 struct adata *ad = lp_alloc_adata(s->pool, 32);
944 ip_addr *nh = (void *) ad->data;
945
946 if (len == 4)
947 {
948 nh[0] = ipa_from_ip4(get_ip4(data));
949 nh[1] = IPA_NONE;
950 }
951 else if (len == 16)
952 {
953 nh[0] = ipa_from_ip6(get_ip6(data));
954 nh[1] = IPA_NONE;
955
956 if (ipa_is_link_local(nh[0]))
957 { nh[1] = nh[0]; nh[0] = IPA_NONE; }
958 }
959 else if (len == 32)
960 {
961 nh[0] = ipa_from_ip6(get_ip6(data));
962 nh[1] = ipa_from_ip6(get_ip6(data+16));
963
964 if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
965 nh[1] = IPA_NONE;
966 }
967 else
968 bgp_parse_error(s, 9);
969
970 if (ipa_zero(nh[1]))
971 ad->length = 16;
972
973 if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
974 WITHDRAW(BAD_NEXT_HOP);
975
976 // XXXX validate next hop
977
978 bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
979 bgp_apply_next_hop(s, a, nh[0], nh[1]);
980}
981
982static uint
983bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
984{
985 ip_addr *nh = (void *) a->u.ptr->data;
986 uint len = a->u.ptr->length;
987
988 ASSERT((len == 16) || (len == 32));
989
990 /*
991 * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
992 * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
993 * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
994 * IPv6 address with VPNv6 NLRI.
995 */
996
997 if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
998 {
999 put_u64(buf, 0); /* VPN RD is 0 */
1000 put_ip4(buf+8, ipa_to_ip4(nh[0]));
1001 return 12;
1002 }
1003
1004 put_u64(buf, 0); /* VPN RD is 0 */
1005 put_ip6(buf+8, ipa_to_ip6(nh[0]));
1006
1007 if (len == 16)
1008 return 24;
1009
1010 put_u64(buf+24, 0); /* VPN RD is 0 */
1011 put_ip6(buf+32, ipa_to_ip6(nh[1]));
1012
1013 return 48;
1014}
1015
1016static void
1017bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
1018{
1019 struct bgp_channel *c = s->channel;
1020 struct adata *ad = lp_alloc_adata(s->pool, 32);
1021 ip_addr *nh = (void *) ad->data;
1022
1023 if (len == 12)
1024 {
1025 nh[0] = ipa_from_ip4(get_ip4(data+8));
1026 nh[1] = IPA_NONE;
1027 }
1028 else if (len == 24)
1029 {
1030 nh[0] = ipa_from_ip6(get_ip6(data+8));
1031 nh[1] = IPA_NONE;
1032
1033 if (ipa_is_link_local(nh[0]))
1034 { nh[1] = nh[0]; nh[0] = IPA_NONE; }
1035 }
1036 else if (len == 48)
1037 {
1038 nh[0] = ipa_from_ip6(get_ip6(data+8));
1039 nh[1] = ipa_from_ip6(get_ip6(data+32));
1040
1041 if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
1042 nh[1] = IPA_NONE;
1043 }
1044 else
1045 bgp_parse_error(s, 9);
1046
1047 if (ipa_zero(nh[1]))
1048 ad->length = 16;
1049
1050 /* XXXX which error */
1051 if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
1052 bgp_parse_error(s, 9);
1053
1054 if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
1055 WITHDRAW(BAD_NEXT_HOP);
1056
1057 // XXXX validate next hop
1058
1059 bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
1060 bgp_apply_next_hop(s, a, nh[0], nh[1]);
1061}
1062
1063
1064
ac3ad139
OZ
1065static uint
1066bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
1067{
ac3ad139
OZ
1068 return 0;
1069}
1070
1071static void
1072bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
1073{
82f42ea0
OZ
1074 /*
1075 * Although we expect no next hop and RFC 7606 7.11 states that attribute
1076 * MP_REACH_NLRI with unexpected next hop length is considered malformed,
1077 * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
1078 */
1079
ac3ad139
OZ
1080 return;
1081}
1082
1083static void
82f42ea0 1084bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
ac3ad139 1085{
82f42ea0
OZ
1086 /* NEXT_HOP shall not pass */
1087 if (a)
1088 bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
ac3ad139
OZ
1089}
1090
d15b0b0a
OZ
1091
1092/*
1093 * UPDATE
1094 */
1095
1096static void
1097bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
1098{
1099 if (path_id != s->last_id)
1100 {
1101 s->last_src = rt_get_source(&s->proto->p, path_id);
1102 s->last_id = path_id;
1103
1104 rta_free(s->cached_rta);
1105 s->cached_rta = NULL;
1106 }
1107
1108 if (!a0)
1109 {
1110 /* Route withdraw */
1111 rte_update2(&s->channel->c, n, NULL, s->last_src);
1112 return;
1113 }
1114
1115 /* Prepare cached route attributes */
1116 if (s->cached_rta == NULL)
1117 {
1118 a0->src = s->last_src;
1119
1120 /* Workaround for rta_lookup() breaking eattrs */
1121 ea_list *ea = a0->eattrs;
1122 s->cached_rta = rta_lookup(a0);
1123 a0->eattrs = ea;
1124 }
1125
1126 rta *a = rta_clone(s->cached_rta);
1127 rte *e = rte_get_temp(a);
1128
1129 e->pflags = 0;
1130 e->u.bgp.suppressed = 0;
1131 rte_update2(&s->channel->c, n, e, s->last_src);
1132}
1133
1e37e35c
OZ
1134static void
1135bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen)
1136{
1137 u32 dummy = 0;
1138 u32 *labels = mpls ? (u32 *) mpls->data : &dummy;
1139 uint lnum = mpls ? (mpls->length / 4) : 1;
1140
1141 for (uint i = 0; i < lnum; i++)
1142 {
1143 put_u24(*pos, labels[i] << 4);
1144 ADVANCE(*pos, *size, 3);
1145 }
1146
1147 /* Add bottom-of-stack flag */
1148 (*pos)[-1] |= BGP_MPLS_BOS;
1149
1150 *pxlen += 24 * lnum;
1151}
1152
1153static void
1154bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
1155{
1156 u32 labels[BGP_MPLS_MAX], label;
1157 uint lnum = 0;
1158
1159 do {
1160 if (*pxlen < 24)
1161 bgp_parse_error(s, 1);
1162
1163 label = get_u24(*pos);
1164 labels[lnum++] = label >> 4;
1165 ADVANCE(*pos, *len, 3);
1166 *pxlen -= 24;
d15b0b0a 1167
1e37e35c
OZ
1168 /* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */
1169 if (!a && !s->err_withdraw && (lnum == 1) && (label == BGP_MPLS_MAGIC))
1170 break;
1171 }
1172 while (!(label & BGP_MPLS_BOS));
1173
1174 if (!a)
1175 return;
1176
1177 /* Attach MPLS attribute unless we already have one */
1178 if (!s->mpls_labels)
1179 {
1180 s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
1181 bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
1182 }
1183
1184 /* Overwrite data in the attribute */
1185 s->mpls_labels->length = 4*lnum;
1186 memcpy(s->mpls_labels->data, labels, 4*lnum);
1187
1188 /* Update next hop entry in rta */
1189 bgp_apply_mpls_labels(s, a, labels, lnum);
1190
1191 /* Attributes were changed, invalidate cached entry */
1192 rta_free(s->cached_rta);
1193 s->cached_rta = NULL;
1194
1195 return;
1196}
d15b0b0a
OZ
1197
1198static uint
1199bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1200{
1201 byte *pos = buf;
1202
1e37e35c 1203 while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
d15b0b0a
OZ
1204 {
1205 struct bgp_prefix *px = HEAD(buck->prefixes);
1206 struct net_addr_ip4 *net = (void *) px->net;
1207
1208 /* Encode path ID */
1209 if (s->add_path)
72a6ef11 1210 {
d15b0b0a
OZ
1211 put_u32(pos, px->path_id);
1212 ADVANCE(pos, size, 4);
72a6ef11 1213 }
d15b0b0a 1214
d15b0b0a
OZ
1215 /* Encode prefix length */
1216 *pos = net->pxlen;
1217 ADVANCE(pos, size, 1);
1218
1e37e35c
OZ
1219 /* Encode MPLS labels */
1220 if (s->mpls)
1221 bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1222
d15b0b0a 1223 /* Encode prefix body */
1e37e35c
OZ
1224 ip4_addr a = ip4_hton(net->prefix);
1225 uint b = (net->pxlen + 7) / 8;
d15b0b0a
OZ
1226 memcpy(pos, &a, b);
1227 ADVANCE(pos, size, b);
1228
1229 bgp_free_prefix(s->channel, px);
1230 }
1231
1232 return pos - buf;
1233}
1234
1235static void
1236bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1237{
1238 while (len)
1239 {
1240 net_addr_ip4 net;
1241 u32 path_id = 0;
1242
1243 /* Decode path ID */
1244 if (s->add_path)
72a6ef11 1245 {
d15b0b0a
OZ
1246 if (len < 5)
1247 bgp_parse_error(s, 1);
1248
1249 path_id = get_u32(pos);
1250 ADVANCE(pos, len, 4);
72a6ef11 1251 }
d15b0b0a
OZ
1252
1253 /* Decode prefix length */
1254 uint l = *pos;
d15b0b0a
OZ
1255 ADVANCE(pos, len, 1);
1256
1e37e35c
OZ
1257 if (len < ((l + 7) / 8))
1258 bgp_parse_error(s, 1);
1259
1260 /* Decode MPLS labels */
1261 if (s->mpls)
1262 bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1263
d15b0b0a
OZ
1264 if (l > IP4_MAX_PREFIX_LENGTH)
1265 bgp_parse_error(s, 10);
1266
d15b0b0a
OZ
1267 /* Decode prefix body */
1268 ip4_addr addr = IP4_NONE;
1e37e35c 1269 uint b = (l + 7) / 8;
d15b0b0a
OZ
1270 memcpy(&addr, pos, b);
1271 ADVANCE(pos, len, b);
1272
1273 net = NET_ADDR_IP4(ip4_ntoh(addr), l);
1274 net_normalize_ip4(&net);
1275
1276 // XXXX validate prefix
1277
1278 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1279 }
1280}
1281
d15b0b0a
OZ
1282
1283static uint
1284bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1285{
1286 byte *pos = buf;
1287
1e37e35c 1288 while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
d15b0b0a
OZ
1289 {
1290 struct bgp_prefix *px = HEAD(buck->prefixes);
1291 struct net_addr_ip6 *net = (void *) px->net;
1292
1293 /* Encode path ID */
1294 if (s->add_path)
bf47fe4b 1295 {
d15b0b0a
OZ
1296 put_u32(pos, px->path_id);
1297 ADVANCE(pos, size, 4);
bf47fe4b 1298 }
d15b0b0a 1299
d15b0b0a
OZ
1300 /* Encode prefix length */
1301 *pos = net->pxlen;
1302 ADVANCE(pos, size, 1);
1303
1e37e35c
OZ
1304 /* Encode MPLS labels */
1305 if (s->mpls)
1306 bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1307
d15b0b0a 1308 /* Encode prefix body */
1e37e35c
OZ
1309 ip6_addr a = ip6_hton(net->prefix);
1310 uint b = (net->pxlen + 7) / 8;
d15b0b0a
OZ
1311 memcpy(pos, &a, b);
1312 ADVANCE(pos, size, b);
1313
1314 bgp_free_prefix(s->channel, px);
1315 }
1316
1317 return pos - buf;
1318}
1319
1320static void
1321bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1322{
1323 while (len)
1324 {
1325 net_addr_ip6 net;
1326 u32 path_id = 0;
1327
1328 /* Decode path ID */
1329 if (s->add_path)
9aed29e6 1330 {
d15b0b0a
OZ
1331 if (len < 5)
1332 bgp_parse_error(s, 1);
1333
1334 path_id = get_u32(pos);
1335 ADVANCE(pos, len, 4);
9aed29e6 1336 }
0c791f87 1337
d15b0b0a
OZ
1338 /* Decode prefix length */
1339 uint l = *pos;
d15b0b0a 1340 ADVANCE(pos, len, 1);
9aed29e6 1341
1e37e35c
OZ
1342 if (len < ((l + 7) / 8))
1343 bgp_parse_error(s, 1);
1344
1345 /* Decode MPLS labels */
1346 if (s->mpls)
1347 bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1348
d15b0b0a
OZ
1349 if (l > IP6_MAX_PREFIX_LENGTH)
1350 bgp_parse_error(s, 10);
0c791f87 1351
d15b0b0a
OZ
1352 /* Decode prefix body */
1353 ip6_addr addr = IP6_NONE;
1e37e35c 1354 uint b = (l + 7) / 8;
d15b0b0a
OZ
1355 memcpy(&addr, pos, b);
1356 ADVANCE(pos, len, b);
9aed29e6 1357
d15b0b0a
OZ
1358 net = NET_ADDR_IP6(ip6_ntoh(addr), l);
1359 net_normalize_ip6(&net);
0c791f87 1360
d15b0b0a 1361 // XXXX validate prefix
9aed29e6 1362
d15b0b0a
OZ
1363 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1364 }
72a6ef11
MM
1365}
1366
1e37e35c
OZ
1367static uint
1368bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1369{
1370 byte *pos = buf;
1371
1372 while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
1373 {
1374 struct bgp_prefix *px = HEAD(buck->prefixes);
1375 struct net_addr_vpn4 *net = (void *) px->net;
1376
1377 /* Encode path ID */
1378 if (s->add_path)
1379 {
1380 put_u32(pos, px->path_id);
1381 ADVANCE(pos, size, 4);
1382 }
1383
1384 /* Encode prefix length */
01111fc4 1385 *pos = 64 + net->pxlen;
1e37e35c
OZ
1386 ADVANCE(pos, size, 1);
1387
1388 /* Encode MPLS labels */
711d617d
OZ
1389 if (s->mpls)
1390 bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1e37e35c
OZ
1391
1392 /* Encode route distinguisher */
1393 put_u64(pos, net->rd);
1394 ADVANCE(pos, size, 8);
1395
1396 /* Encode prefix body */
1397 ip4_addr a = ip4_hton(net->prefix);
1398 uint b = (net->pxlen + 7) / 8;
1399 memcpy(pos, &a, b);
1400 ADVANCE(pos, size, b);
1401
1402 bgp_free_prefix(s->channel, px);
1403 }
1404
1405 return pos - buf;
1406}
1407
1408static void
1409bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1410{
1411 while (len)
1412 {
1413 net_addr_vpn4 net;
1414 u32 path_id = 0;
1415
1416 /* Decode path ID */
1417 if (s->add_path)
1418 {
1419 if (len < 5)
1420 bgp_parse_error(s, 1);
1421
1422 path_id = get_u32(pos);
1423 ADVANCE(pos, len, 4);
1424 }
1425
1426 /* Decode prefix length */
1427 uint l = *pos;
1428 ADVANCE(pos, len, 1);
1429
1430 if (len < ((l + 7) / 8))
1431 bgp_parse_error(s, 1);
1432
1433 /* Decode MPLS labels */
711d617d
OZ
1434 if (s->mpls)
1435 bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1e37e35c
OZ
1436
1437 /* Decode route distinguisher */
1438 if (l < 64)
1439 bgp_parse_error(s, 1);
1440
1441 u64 rd = get_u64(pos);
1442 ADVANCE(pos, len, 8);
1443 l -= 64;
1444
1445 if (l > IP4_MAX_PREFIX_LENGTH)
1446 bgp_parse_error(s, 10);
1447
1448 /* Decode prefix body */
1449 ip4_addr addr = IP4_NONE;
1450 uint b = (l + 7) / 8;
1451 memcpy(&addr, pos, b);
1452 ADVANCE(pos, len, b);
1453
1454 net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
1455 net_normalize_vpn4(&net);
1456
1457 // XXXX validate prefix
1458
1459 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1460 }
1461}
1462
1e37e35c
OZ
1463
1464static uint
1465bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1466{
1467 byte *pos = buf;
1468
1469 while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
1470 {
1471 struct bgp_prefix *px = HEAD(buck->prefixes);
1472 struct net_addr_vpn6 *net = (void *) px->net;
1473
1474 /* Encode path ID */
1475 if (s->add_path)
1476 {
1477 put_u32(pos, px->path_id);
1478 ADVANCE(pos, size, 4);
1479 }
1480
1481 /* Encode prefix length */
01111fc4 1482 *pos = 64 + net->pxlen;
1e37e35c
OZ
1483 ADVANCE(pos, size, 1);
1484
1485 /* Encode MPLS labels */
49c7ef3b
OZ
1486 if (s->mpls)
1487 bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1e37e35c
OZ
1488
1489 /* Encode route distinguisher */
1490 put_u64(pos, net->rd);
1491 ADVANCE(pos, size, 8);
1492
1493 /* Encode prefix body */
1494 ip6_addr a = ip6_hton(net->prefix);
1495 uint b = (net->pxlen + 7) / 8;
1496 memcpy(pos, &a, b);
1497 ADVANCE(pos, size, b);
1498
1499 bgp_free_prefix(s->channel, px);
1500 }
1501
1502 return pos - buf;
1503}
1504
1505static void
1506bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1507{
1508 while (len)
1509 {
1510 net_addr_vpn6 net;
1511 u32 path_id = 0;
1512
1513 /* Decode path ID */
1514 if (s->add_path)
1515 {
1516 if (len < 5)
1517 bgp_parse_error(s, 1);
1518
1519 path_id = get_u32(pos);
1520 ADVANCE(pos, len, 4);
1521 }
1522
1523 /* Decode prefix length */
1524 uint l = *pos;
1525 ADVANCE(pos, len, 1);
1526
1527 if (len < ((l + 7) / 8))
1528 bgp_parse_error(s, 1);
1529
1530 /* Decode MPLS labels */
1531 if (s->mpls)
1532 bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1533
1534 /* Decode route distinguisher */
1535 if (l < 64)
1536 bgp_parse_error(s, 1);
1537
1538 u64 rd = get_u64(pos);
1539 ADVANCE(pos, len, 8);
1540 l -= 64;
1541
1542 if (l > IP6_MAX_PREFIX_LENGTH)
1543 bgp_parse_error(s, 10);
1544
1545 /* Decode prefix body */
1546 ip6_addr addr = IP6_NONE;
1547 uint b = (l + 7) / 8;
1548 memcpy(&addr, pos, b);
1549 ADVANCE(pos, len, b);
1550
1551 net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
1552 net_normalize_vpn6(&net);
1553
1554 // XXXX validate prefix
1555
1556 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1557 }
1558}
1559
1e37e35c 1560
ac3ad139
OZ
1561static uint
1562bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1563{
1564 byte *pos = buf;
1565
1566 while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
1567 {
1568 struct bgp_prefix *px = HEAD(buck->prefixes);
1569 struct net_addr_flow4 *net = (void *) px->net;
1570 uint flen = net->length - sizeof(net_addr_flow4);
1571
1572 /* Encode path ID */
1573 if (s->add_path)
1574 {
1575 put_u32(pos, px->path_id);
1576 ADVANCE(pos, size, 4);
1577 }
1578
1579 if (flen > size)
1580 break;
1581
1582 /* Copy whole flow data including length */
1583 memcpy(pos, net->data, flen);
1584 ADVANCE(pos, size, flen);
1585
1586 bgp_free_prefix(s->channel, px);
1587 }
1588
1589 return pos - buf;
1590}
1591
1592static void
1593bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1594{
1595 while (len)
1596 {
1597 u32 path_id = 0;
1598
1599 /* Decode path ID */
1600 if (s->add_path)
1601 {
1602 if (len < 4)
1603 bgp_parse_error(s, 1);
1604
1605 path_id = get_u32(pos);
1606 ADVANCE(pos, len, 4);
1607 }
1608
1609 if (len < 2)
1610 bgp_parse_error(s, 1);
1611
1612 /* Decode flow length */
1613 uint hlen = flow_hdr_length(pos);
1614 uint dlen = flow_read_length(pos);
1615 uint flen = hlen + dlen;
1616 byte *data = pos + hlen;
1617
1618 if (len < flen)
1619 bgp_parse_error(s, 1);
1620
1621 /* Validate flow data */
1622 enum flow_validated_state r = flow4_validate(data, dlen);
1623 if (r != FLOW_ST_VALID)
1624 {
1625 log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
1626 bgp_parse_error(s, 1);
1627 }
1628
1629 if (data[0] != FLOW_TYPE_DST_PREFIX)
1630 {
1631 log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
1632 bgp_parse_error(s, 1);
1633 }
1634
1635 /* Decode dst prefix */
1636 ip4_addr px = IP4_NONE;
1637 uint pxlen = data[1];
1638
1639 // FIXME: Use some generic function
7fc55925
OZ
1640 memcpy(&px, data+2, BYTES(pxlen));
1641 px = ip4_and(ip4_ntoh(px), ip4_mkmask(pxlen));
ac3ad139
OZ
1642
1643 /* Prepare the flow */
1644 net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
1645 net_fill_flow4(n, px, pxlen, pos, flen);
1646 ADVANCE(pos, len, flen);
1647
1648 bgp_rte_update(s, n, path_id, a);
1649 }
1650}
1651
1652
1653static uint
1654bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1655{
1656 byte *pos = buf;
1657
1658 while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
1659 {
1660 struct bgp_prefix *px = HEAD(buck->prefixes);
1661 struct net_addr_flow6 *net = (void *) px->net;
1662 uint flen = net->length - sizeof(net_addr_flow6);
1663
1664 /* Encode path ID */
1665 if (s->add_path)
1666 {
1667 put_u32(pos, px->path_id);
1668 ADVANCE(pos, size, 4);
1669 }
1670
1671 if (flen > size)
1672 break;
1673
1674 /* Copy whole flow data including length */
1675 memcpy(pos, net->data, flen);
1676 ADVANCE(pos, size, flen);
1677
1678 bgp_free_prefix(s->channel, px);
1679 }
1680
1681 return pos - buf;
1682}
1683
1684static void
1685bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1686{
1687 while (len)
1688 {
1689 u32 path_id = 0;
1690
1691 /* Decode path ID */
1692 if (s->add_path)
1693 {
1694 if (len < 4)
1695 bgp_parse_error(s, 1);
1696
1697 path_id = get_u32(pos);
1698 ADVANCE(pos, len, 4);
1699 }
1700
1701 if (len < 2)
1702 bgp_parse_error(s, 1);
1703
1704 /* Decode flow length */
1705 uint hlen = flow_hdr_length(pos);
1706 uint dlen = flow_read_length(pos);
1707 uint flen = hlen + dlen;
1708 byte *data = pos + hlen;
1709
1710 if (len < flen)
1711 bgp_parse_error(s, 1);
1712
1713 /* Validate flow data */
1714 enum flow_validated_state r = flow6_validate(data, dlen);
1715 if (r != FLOW_ST_VALID)
1716 {
1717 log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
1718 bgp_parse_error(s, 1);
1719 }
1720
1721 if (data[0] != FLOW_TYPE_DST_PREFIX)
1722 {
1723 log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
1724 bgp_parse_error(s, 1);
1725 }
1726
1727 /* Decode dst prefix */
1728 ip6_addr px = IP6_NONE;
1729 uint pxlen = data[1];
1730
1731 // FIXME: Use some generic function
7fc55925
OZ
1732 memcpy(&px, data+2, BYTES(pxlen));
1733 px = ip6_and(ip6_ntoh(px), ip6_mkmask(pxlen));
ac3ad139
OZ
1734
1735 /* Prepare the flow */
1736 net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
1737 net_fill_flow6(n, px, pxlen, pos, flen);
1738 ADVANCE(pos, len, flen);
1739
1740 bgp_rte_update(s, n, path_id, a);
1741 }
1742}
1743
1744
d15b0b0a
OZ
1745static const struct bgp_af_desc bgp_af_table[] = {
1746 {
1747 .afi = BGP_AF_IPV4,
1748 .net = NET_IP4,
1749 .name = "ipv4",
1750 .encode_nlri = bgp_encode_nlri_ip4,
1751 .decode_nlri = bgp_decode_nlri_ip4,
ef57b70f
OZ
1752 .encode_next_hop = bgp_encode_next_hop_ip,
1753 .decode_next_hop = bgp_decode_next_hop_ip,
d15b0b0a
OZ
1754 .update_next_hop = bgp_update_next_hop_ip,
1755 },
1756 {
1757 .afi = BGP_AF_IPV4_MC,
1758 .net = NET_IP4,
1759 .name = "ipv4-mc",
1760 .encode_nlri = bgp_encode_nlri_ip4,
1761 .decode_nlri = bgp_decode_nlri_ip4,
ef57b70f
OZ
1762 .encode_next_hop = bgp_encode_next_hop_ip,
1763 .decode_next_hop = bgp_decode_next_hop_ip,
d15b0b0a
OZ
1764 .update_next_hop = bgp_update_next_hop_ip,
1765 },
ac3ad139 1766 {
1e37e35c
OZ
1767 .afi = BGP_AF_IPV4_MPLS,
1768 .net = NET_IP4,
1769 .mpls = 1,
1770 .name = "ipv4-mpls",
1771 .encode_nlri = bgp_encode_nlri_ip4,
1772 .decode_nlri = bgp_decode_nlri_ip4,
ef57b70f
OZ
1773 .encode_next_hop = bgp_encode_next_hop_ip,
1774 .decode_next_hop = bgp_decode_next_hop_ip,
1e37e35c 1775 .update_next_hop = bgp_update_next_hop_ip,
ac3ad139 1776 },
d15b0b0a
OZ
1777 {
1778 .afi = BGP_AF_IPV6,
1779 .net = NET_IP6,
1780 .name = "ipv6",
1781 .encode_nlri = bgp_encode_nlri_ip6,
1782 .decode_nlri = bgp_decode_nlri_ip6,
ef57b70f
OZ
1783 .encode_next_hop = bgp_encode_next_hop_ip,
1784 .decode_next_hop = bgp_decode_next_hop_ip,
d15b0b0a
OZ
1785 .update_next_hop = bgp_update_next_hop_ip,
1786 },
1787 {
1788 .afi = BGP_AF_IPV6_MC,
1789 .net = NET_IP6,
1790 .name = "ipv6-mc",
1791 .encode_nlri = bgp_encode_nlri_ip6,
1792 .decode_nlri = bgp_decode_nlri_ip6,
ef57b70f
OZ
1793 .encode_next_hop = bgp_encode_next_hop_ip,
1794 .decode_next_hop = bgp_decode_next_hop_ip,
d15b0b0a
OZ
1795 .update_next_hop = bgp_update_next_hop_ip,
1796 },
1e37e35c
OZ
1797 {
1798 .afi = BGP_AF_IPV6_MPLS,
1799 .net = NET_IP6,
1800 .mpls = 1,
1801 .name = "ipv6-mpls",
1802 .encode_nlri = bgp_encode_nlri_ip6,
1803 .decode_nlri = bgp_decode_nlri_ip6,
ef57b70f
OZ
1804 .encode_next_hop = bgp_encode_next_hop_ip,
1805 .decode_next_hop = bgp_decode_next_hop_ip,
1e37e35c
OZ
1806 .update_next_hop = bgp_update_next_hop_ip,
1807 },
1808 {
1809 .afi = BGP_AF_VPN4_MPLS,
1810 .net = NET_VPN4,
1811 .mpls = 1,
1812 .name = "vpn4-mpls",
1813 .encode_nlri = bgp_encode_nlri_vpn4,
1814 .decode_nlri = bgp_decode_nlri_vpn4,
ef57b70f
OZ
1815 .encode_next_hop = bgp_encode_next_hop_vpn,
1816 .decode_next_hop = bgp_decode_next_hop_vpn,
1e37e35c
OZ
1817 .update_next_hop = bgp_update_next_hop_ip,
1818 },
1819 {
1820 .afi = BGP_AF_VPN6_MPLS,
1821 .net = NET_VPN6,
1822 .mpls = 1,
1823 .name = "vpn6-mpls",
1824 .encode_nlri = bgp_encode_nlri_vpn6,
1825 .decode_nlri = bgp_decode_nlri_vpn6,
ef57b70f
OZ
1826 .encode_next_hop = bgp_encode_next_hop_vpn,
1827 .decode_next_hop = bgp_decode_next_hop_vpn,
1e37e35c
OZ
1828 .update_next_hop = bgp_update_next_hop_ip,
1829 },
711d617d
OZ
1830 {
1831 .afi = BGP_AF_VPN4_MC,
1832 .net = NET_VPN4,
1833 .name = "vpn4-mc",
1834 .encode_nlri = bgp_encode_nlri_vpn4,
1835 .decode_nlri = bgp_decode_nlri_vpn4,
1836 .encode_next_hop = bgp_encode_next_hop_vpn,
1837 .decode_next_hop = bgp_decode_next_hop_vpn,
1838 .update_next_hop = bgp_update_next_hop_ip,
1839 },
1840 {
1841 .afi = BGP_AF_VPN6_MC,
1842 .net = NET_VPN6,
1843 .name = "vpn6-mc",
1844 .encode_nlri = bgp_encode_nlri_vpn6,
1845 .decode_nlri = bgp_decode_nlri_vpn6,
1846 .encode_next_hop = bgp_encode_next_hop_vpn,
1847 .decode_next_hop = bgp_decode_next_hop_vpn,
1848 .update_next_hop = bgp_update_next_hop_ip,
1849 },
1e37e35c
OZ
1850 {
1851 .afi = BGP_AF_FLOW4,
1852 .net = NET_FLOW4,
ef57b70f 1853 .no_igp = 1,
1e37e35c
OZ
1854 .name = "flow4",
1855 .encode_nlri = bgp_encode_nlri_flow4,
1856 .decode_nlri = bgp_decode_nlri_flow4,
1857 .encode_next_hop = bgp_encode_next_hop_none,
1858 .decode_next_hop = bgp_decode_next_hop_none,
1859 .update_next_hop = bgp_update_next_hop_none,
1860 },
ac3ad139
OZ
1861 {
1862 .afi = BGP_AF_FLOW6,
1863 .net = NET_FLOW6,
ef57b70f 1864 .no_igp = 1,
ac3ad139
OZ
1865 .name = "flow6",
1866 .encode_nlri = bgp_encode_nlri_flow6,
1867 .decode_nlri = bgp_decode_nlri_flow6,
1868 .encode_next_hop = bgp_encode_next_hop_none,
1869 .decode_next_hop = bgp_decode_next_hop_none,
1870 .update_next_hop = bgp_update_next_hop_none,
1871 },
d15b0b0a
OZ
1872};
1873
1874const struct bgp_af_desc *
1875bgp_get_af_desc(u32 afi)
72a6ef11 1876{
d15b0b0a
OZ
1877 uint i;
1878 for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
1879 if (bgp_af_table[i].afi == afi)
1880 return &bgp_af_table[i];
72a6ef11 1881
d15b0b0a 1882 return NULL;
3fdbafb6
MM
1883}
1884
d15b0b0a
OZ
1885static inline uint
1886bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1887{
1888 return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
1889}
1890
1891static inline uint
1892bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
1893{
1894 return s->channel->desc->encode_next_hop(s, nh, buf, 255);
1895}
11cb6202
OZ
1896
1897void
d15b0b0a 1898bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
11cb6202 1899{
d15b0b0a
OZ
1900 s->channel->desc->update_next_hop(s, a, to);
1901}
11cb6202 1902
d15b0b0a
OZ
1903#define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
1904
1905static byte *
1906bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1907{
1908 /*
1909 * 2 B Withdrawn Routes Length (zero)
1910 * --- IPv4 Withdrawn Routes NLRI (unused)
1911 * 2 B Total Path Attribute Length
1912 * var Path Attributes
1913 * var IPv4 Network Layer Reachability Information
1914 */
1915
1916 int lr, la;
1917
1918 la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
1919 if (la < 0)
1920 {
1921 /* Attribute list too long */
1922 bgp_withdraw_bucket(s->channel, buck);
1923 return NULL;
1924 }
9aed29e6 1925
d15b0b0a
OZ
1926 put_u16(buf+0, 0);
1927 put_u16(buf+2, la);
11cb6202 1928
d15b0b0a 1929 lr = bgp_encode_nlri(s, buck, buf+4+la, end);
cf31112f 1930
d15b0b0a
OZ
1931 return buf+4+la+lr;
1932}
52e21323 1933
d15b0b0a
OZ
1934static byte *
1935bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1936{
1937 /*
1938 * 2 B IPv4 Withdrawn Routes Length (zero)
1939 * --- IPv4 Withdrawn Routes NLRI (unused)
1940 * 2 B Total Path Attribute Length
1941 * 1 B MP_REACH_NLRI hdr - Attribute Flags
1942 * 1 B MP_REACH_NLRI hdr - Attribute Type Code
1943 * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
1944 * 2 B MP_REACH_NLRI data - Address Family Identifier
1945 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
1946 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
1947 * var MP_REACH_NLRI data - Network Address of Next Hop
1948 * 1 B MP_REACH_NLRI data - Reserved (zero)
1949 * var MP_REACH_NLRI data - Network Layer Reachability Information
1950 * var Rest of Path Attributes
1951 * --- IPv4 Network Layer Reachability Information (unused)
1952 */
1953
1954 int lh, lr, la; /* Lengths of next hop, NLRI and attributes */
1955
1956 /* Begin of MP_REACH_NLRI atribute */
1957 buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
1958 buf[5] = BA_MP_REACH_NLRI;
1959 put_u16(buf+6, 0); /* Will be fixed later */
1960 put_af3(buf+8, s->channel->afi);
1961 byte *pos = buf+11;
1962
1963 /* Encode attributes to temporary buffer */
1964 byte *abuf = alloca(MAX_ATTRS_LENGTH);
1965 la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
1966 if (la < 0)
1967 {
1968 /* Attribute list too long */
1969 bgp_withdraw_bucket(s->channel, buck);
1970 return NULL;
1971 }
0c791f87 1972
d15b0b0a
OZ
1973 /* Encode the next hop */
1974 lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
1975 *pos = lh;
1976 pos += 1+lh;
11cb6202 1977
d15b0b0a
OZ
1978 /* Reserved field */
1979 *pos++ = 0;
094d2bdb 1980
d15b0b0a
OZ
1981 /* Encode the NLRI */
1982 lr = bgp_encode_nlri(s, buck, pos, end - la);
1983 pos += lr;
094d2bdb 1984
d15b0b0a
OZ
1985 /* End of MP_REACH_NLRI atribute, update data length */
1986 put_u16(buf+6, pos-buf-8);
11cb6202 1987
d15b0b0a
OZ
1988 /* Copy remaining attributes */
1989 memcpy(pos, abuf, la);
1990 pos += la;
1991
1992 /* Initial UPDATE fields */
1993 put_u16(buf+0, 0);
1994 put_u16(buf+2, pos-buf-4);
1995
1996 return pos;
11cb6202
OZ
1997}
1998
d15b0b0a
OZ
1999#undef MAX_ATTRS_LENGTH
2000
2001static byte *
2002bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
a47a0108 2003{
d15b0b0a
OZ
2004 /*
2005 * 2 B Withdrawn Routes Length
2006 * var IPv4 Withdrawn Routes NLRI
2007 * 2 B Total Path Attribute Length (zero)
2008 * --- Path Attributes (unused)
2009 * --- IPv4 Network Layer Reachability Information (unused)
2010 */
11cb6202 2011
d15b0b0a 2012 uint len = bgp_encode_nlri(s, buck, buf+2, end);
11cb6202 2013
d15b0b0a
OZ
2014 put_u16(buf+0, len);
2015 put_u16(buf+2+len, 0);
11cb6202 2016
d15b0b0a 2017 return buf+4+len;
a47a0108
MM
2018}
2019
d15b0b0a
OZ
2020static byte *
2021bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
3fdbafb6 2022{
d15b0b0a
OZ
2023 /*
2024 * 2 B Withdrawn Routes Length (zero)
2025 * --- IPv4 Withdrawn Routes NLRI (unused)
2026 * 2 B Total Path Attribute Length
2027 * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
2028 * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
2029 * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
2030 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
2031 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
2032 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
2033 * --- IPv4 Network Layer Reachability Information (unused)
2034 */
2035
2036 uint len = bgp_encode_nlri(s, buck, buf+11, end);
3fdbafb6 2037
d15b0b0a
OZ
2038 put_u16(buf+0, 0);
2039 put_u16(buf+2, 7+len);
3fdbafb6 2040
d15b0b0a
OZ
2041 /* Begin of MP_UNREACH_NLRI atribute */
2042 buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
2043 buf[5] = BA_MP_UNREACH_NLRI;
2044 put_u16(buf+6, 3+len);
2045 put_af3(buf+8, s->channel->afi);
11cb6202 2046
d15b0b0a
OZ
2047 return buf+11+len;
2048}
11cb6202 2049
d15b0b0a
OZ
2050static byte *
2051bgp_create_update(struct bgp_channel *c, byte *buf)
2052{
2053 struct bgp_proto *p = (void *) c->c.proto;
2054 struct bgp_bucket *buck;
2055 byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
2056 byte *res = NULL;
2057
1e37e35c
OZ
2058again: ;
2059
d15b0b0a
OZ
2060 /* Initialize write state */
2061 struct bgp_write_state s = {
2062 .proto = p,
2063 .channel = c,
2064 .pool = bgp_linpool,
2065 .as4_session = p->as4_session,
2066 .add_path = c->add_path_tx,
1e37e35c 2067 .mpls = c->desc->mpls,
d15b0b0a
OZ
2068 };
2069
d15b0b0a
OZ
2070 /* Try unreachable bucket */
2071 if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
2072 {
ef57b70f 2073 res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
d15b0b0a
OZ
2074 bgp_create_ip_unreach(&s, buck, buf, end):
2075 bgp_create_mp_unreach(&s, buck, buf, end);
11cb6202 2076
d15b0b0a
OZ
2077 goto done;
2078 }
3fdbafb6 2079
d15b0b0a
OZ
2080 /* Try reachable buckets */
2081 if (!EMPTY_LIST(c->bucket_queue))
2082 {
2083 buck = HEAD(c->bucket_queue);
e8ba557c 2084
d15b0b0a
OZ
2085 /* Cleanup empty buckets */
2086 if (EMPTY_LIST(buck->prefixes))
a5bf5f78 2087 {
d15b0b0a
OZ
2088 bgp_free_bucket(c, buck);
2089 goto again;
a5bf5f78 2090 }
11cb6202 2091
ef57b70f 2092 res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
d15b0b0a
OZ
2093 bgp_create_ip_reach(&s, buck, buf, end):
2094 bgp_create_mp_reach(&s, buck, buf, end);
0c791f87 2095
d15b0b0a
OZ
2096 if (EMPTY_LIST(buck->prefixes))
2097 bgp_free_bucket(c, buck);
2098 else
2099 bgp_defer_bucket(c, buck);
0c791f87 2100
d15b0b0a
OZ
2101 if (!res)
2102 goto again;
b552ecc4 2103
d15b0b0a
OZ
2104 goto done;
2105 }
094d2bdb 2106
d15b0b0a
OZ
2107 /* No more prefixes to send */
2108 return NULL;
165a6227 2109
d15b0b0a
OZ
2110done:
2111 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
2112 lp_flush(s.pool);
3fdbafb6 2113
d15b0b0a 2114 return res;
3fdbafb6
MM
2115}
2116
d15b0b0a
OZ
2117static byte *
2118bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
2119{
2120 /* Empty update packet */
2121 put_u32(buf, 0);
2122
2123 return buf+4;
2124}
2125
2126static byte *
2127bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
2128{
2129 put_u16(buf+0, 0);
2130 put_u16(buf+2, 6); /* length 4--9 */
2131
2132 /* Empty MP_UNREACH_NLRI atribute */
2133 buf[4] = BAF_OPTIONAL;
2134 buf[5] = BA_MP_UNREACH_NLRI;
2135 buf[6] = 3; /* Length 7--9 */
2136 put_af3(buf+7, c->afi);
2137
2138 return buf+10;
2139}
2140
2141static byte *
2142bgp_create_end_mark(struct bgp_channel *c, byte *buf)
2143{
2144 struct bgp_proto *p = (void *) c->c.proto;
2145
2146 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
2147
2148 return (c->afi == BGP_AF_IPV4) ?
2149 bgp_create_ip_end_mark(c, buf):
2150 bgp_create_mp_end_mark(c, buf);
2151}
0c791f87
OZ
2152
2153static inline void
82f42ea0 2154bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi)
0c791f87 2155{
82f42ea0 2156 struct bgp_proto *p = s->proto;
d15b0b0a
OZ
2157 struct bgp_channel *c = bgp_get_channel(p, afi);
2158
9aed29e6
OZ
2159 BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
2160
d15b0b0a 2161 if (!c)
82f42ea0 2162 DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
d15b0b0a
OZ
2163
2164 if (c->load_state == BFS_LOADING)
2165 c->load_state = BFS_NONE;
0c791f87
OZ
2166
2167 if (p->p.gr_recovery)
d15b0b0a
OZ
2168 channel_graceful_restart_unlock(&c->c);
2169
2170 if (c->gr_active)
2171 bgp_graceful_restart_done(c);
2172}
2173
2174static inline void
2175bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
2176{
2177 struct bgp_channel *c = bgp_get_channel(s->proto, afi);
2178 rta *a = NULL;
2179
d15b0b0a 2180 if (!c)
82f42ea0 2181 DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
d15b0b0a
OZ
2182
2183 s->channel = c;
2184 s->add_path = c->add_path_rx;
1e37e35c 2185 s->mpls = c->desc->mpls;
d15b0b0a
OZ
2186
2187 s->last_id = 0;
2188 s->last_src = s->proto->p.main_source;
2189
2190 /*
2191 * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
2192 * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
2193 * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
2194 * decode_next_hop hooks) by restoring a->eattrs afterwards.
2195 */
2196
2197 if (ea)
2198 {
039a65d0 2199 a = allocz(RTA_MAX_SIZE);
d15b0b0a
OZ
2200
2201 a->source = RTS_BGP;
2202 a->scope = SCOPE_UNIVERSE;
d15b0b0a
OZ
2203 a->from = s->proto->cf->remote_ip;
2204 a->eattrs = ea;
2205
2206 c->desc->decode_next_hop(s, nh, nh_len, a);
2207
2208 /* Handle withdraw during next hop decoding */
2209 if (s->err_withdraw)
2210 a = NULL;
2211 }
2212
2213 c->desc->decode_nlri(s, nlri, len, a);
2214
2215 rta_free(s->cached_rta);
2216 s->cached_rta = NULL;
2217}
2218
2219static void
2220bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
2221{
2222 struct bgp_proto *p = conn->bgp;
2223 ea_list *ea = NULL;
2224
2225 BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
2226
2227 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
2228 if (conn->state == BS_OPENCONFIRM)
2229 bgp_conn_enter_established_state(conn);
2230
2231 if (conn->state != BS_ESTABLISHED)
2232 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
2233
2234 bgp_start_timer(conn->hold_timer, conn->hold_time);
2235
2236 /* Initialize parse state */
2237 struct bgp_parse_state s = {
2238 .proto = p,
2239 .pool = bgp_linpool,
2240 .as4_session = p->as4_session,
2241 };
2242
2243 /* Parse error handler */
2244 if (setjmp(s.err_jmpbuf))
2245 {
2246 bgp_error(conn, 3, s.err_subcode, NULL, 0);
2247 goto done;
2248 }
2249
2250 /* Check minimal length */
2251 if (len < 23)
2252 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
2253
2254 /* Skip fixed header */
2255 uint pos = 19;
2256
2257 /*
2258 * UPDATE message format
2259 *
2260 * 2 B IPv4 Withdrawn Routes Length
2261 * var IPv4 Withdrawn Routes NLRI
2262 * 2 B Total Path Attribute Length
2263 * var Path Attributes
2264 * var IPv4 Reachable Routes NLRI
2265 */
2266
2267 s.ip_unreach_len = get_u16(pkt + pos);
2268 s.ip_unreach_nlri = pkt + pos + 2;
2269 pos += 2 + s.ip_unreach_len;
2270
2271 if (pos + 2 > len)
2272 bgp_parse_error(&s, 1);
2273
2274 s.attr_len = get_u16(pkt + pos);
2275 s.attrs = pkt + pos + 2;
2276 pos += 2 + s.attr_len;
2277
2278 if (pos > len)
2279 bgp_parse_error(&s, 1);
2280
2281 s.ip_reach_len = len - pos;
2282 s.ip_reach_nlri = pkt + pos;
0c791f87 2283
0c791f87 2284
d15b0b0a
OZ
2285 if (s.attr_len)
2286 ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
d493d0f1
OZ
2287 else
2288 ea = NULL;
0c791f87 2289
d15b0b0a
OZ
2290 /* Check for End-of-RIB marker */
2291 if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
82f42ea0 2292 { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; }
973399ae 2293
d15b0b0a
OZ
2294 /* Check for MP End-of-RIB marker */
2295 if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
82f42ea0
OZ
2296 !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af)
2297 { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; }
094d2bdb 2298
d15b0b0a
OZ
2299 if (s.ip_unreach_len)
2300 bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
094d2bdb 2301
d15b0b0a
OZ
2302 if (s.mp_unreach_len)
2303 bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
094d2bdb 2304
d15b0b0a
OZ
2305 if (s.ip_reach_len)
2306 bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
2307 ea, s.ip_next_hop_data, s.ip_next_hop_len);
509aab5d 2308
d15b0b0a
OZ
2309 if (s.mp_reach_len)
2310 bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
2311 ea, s.mp_next_hop_data, s.mp_next_hop_len);
094d2bdb 2312
d15b0b0a
OZ
2313done:
2314 rta_free(s.cached_rta);
2315 lp_flush(s.pool);
2316 return;
094d2bdb
OZ
2317}
2318
094d2bdb 2319
d15b0b0a
OZ
2320/*
2321 * ROUTE-REFRESH
2322 */
094d2bdb 2323
d15b0b0a
OZ
2324static inline byte *
2325bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
1c1da87b 2326{
d15b0b0a 2327 struct bgp_proto *p = (void *) c->c.proto;
1b180121 2328
d15b0b0a 2329 BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
53ffbff3 2330
d15b0b0a
OZ
2331 /* Original route refresh request, RFC 2918 */
2332 put_af4(buf, c->afi);
2333 buf[2] = BGP_RR_REQUEST;
cfe34a31 2334
d15b0b0a
OZ
2335 return buf+4;
2336}
53ffbff3 2337
d15b0b0a
OZ
2338static inline byte *
2339bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
2340{
2341 struct bgp_proto *p = (void *) c->c.proto;
53ffbff3 2342
d15b0b0a 2343 BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
53ffbff3 2344
d15b0b0a
OZ
2345 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
2346 put_af4(buf, c->afi);
2347 buf[2] = BGP_RR_BEGIN;
cfe34a31 2348
d15b0b0a
OZ
2349 return buf+4;
2350}
53ffbff3 2351
d15b0b0a
OZ
2352static inline byte *
2353bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
2354{
2355 struct bgp_proto *p = (void *) c->c.proto;
cfe34a31 2356
d15b0b0a
OZ
2357 BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
2358
2359 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
2360 put_af4(buf, c->afi);
2361 buf[2] = BGP_RR_END;
1c1da87b 2362
d15b0b0a
OZ
2363 return buf+4;
2364}
1c1da87b 2365
3fdbafb6 2366static void
d15b0b0a 2367bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
3fdbafb6 2368{
85368cd4 2369 struct bgp_proto *p = conn->bgp;
973399ae 2370
d15b0b0a
OZ
2371 if (conn->state != BS_ESTABLISHED)
2372 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
0c791f87 2373
d15b0b0a
OZ
2374 if (!conn->local_caps->route_refresh)
2375 { bgp_error(conn, 1, 3, pkt+18, 1); return; }
094d2bdb 2376
d15b0b0a
OZ
2377 if (len < (BGP_HEADER_LENGTH + 4))
2378 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
2379
2380 if (len > (BGP_HEADER_LENGTH + 4))
2381 { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
973399ae 2382
d15b0b0a
OZ
2383 struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
2384 if (!c)
2385 {
2386 log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
2387 p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
f94557de 2388 return;
d15b0b0a 2389 }
f94557de 2390
d15b0b0a
OZ
2391 /* RFC 7313 redefined reserved field as RR message subtype */
2392 uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
06fb60c4 2393
d15b0b0a
OZ
2394 switch (subtype)
2395 {
2396 case BGP_RR_REQUEST:
2397 BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
2398 channel_request_feeding(&c->c);
2399 break;
06fb60c4 2400
d15b0b0a
OZ
2401 case BGP_RR_BEGIN:
2402 BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
2403 bgp_refresh_begin(c);
2404 break;
094d2bdb 2405
d15b0b0a
OZ
2406 case BGP_RR_END:
2407 BGP_TRACE(D_PACKETS, "Got END-OF-RR");
2408 bgp_refresh_end(c);
2409 break;
06fb60c4 2410
d15b0b0a
OZ
2411 default:
2412 log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
2413 p->p.name, subtype);
2414 break;
2415 }
2416}
06fb60c4 2417
d15b0b0a
OZ
2418static inline struct bgp_channel *
2419bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
2420{
2421 uint i = conn->last_channel;
72b28a04 2422
d15b0b0a
OZ
2423 /* Try the last channel, but at most several times */
2424 if ((conn->channels_to_send & (1 << i)) &&
2425 (conn->last_channel_count < 16))
2426 goto found;
06fb60c4 2427
d15b0b0a
OZ
2428 /* Find channel with non-zero channels_to_send */
2429 do
2430 {
2431 i++;
2432 if (i >= p->channel_count)
2433 i = 0;
2434 }
2435 while (! (conn->channels_to_send & (1 << i)));
06fb60c4 2436
d15b0b0a
OZ
2437 /* Use that channel */
2438 conn->last_channel = i;
2439 conn->last_channel_count = 0;
f8809249 2440
d15b0b0a
OZ
2441found:
2442 conn->last_channel_count++;
2443 return p->channel_map[i];
2444}
1c1da87b 2445
d15b0b0a
OZ
2446static inline int
2447bgp_send(struct bgp_conn *conn, uint type, uint len)
06fb60c4 2448{
d15b0b0a
OZ
2449 sock *sk = conn->sk;
2450 byte *buf = sk->tbuf;
06fb60c4 2451
d15b0b0a
OZ
2452 memset(buf, 0xff, 16); /* Marker */
2453 put_u16(buf+16, len);
2454 buf[18] = type;
06fb60c4 2455
d15b0b0a
OZ
2456 return sk_send(sk, len);
2457}
06fb60c4 2458
d15b0b0a
OZ
2459/**
2460 * bgp_fire_tx - transmit packets
2461 * @conn: connection
2462 *
2463 * Whenever the transmit buffers of the underlying TCP connection
2464 * are free and we have any packets queued for sending, the socket functions
2465 * call bgp_fire_tx() which takes care of selecting the highest priority packet
2466 * queued (Notification > Keepalive > Open > Update), assembling its header
2467 * and body and sending it to the connection.
2468 */
2469static int
2470bgp_fire_tx(struct bgp_conn *conn)
1c1da87b
MM
2471{
2472 struct bgp_proto *p = conn->bgp;
d15b0b0a
OZ
2473 struct bgp_channel *c;
2474 byte *buf, *pkt, *end;
2475 uint s;
1c1da87b 2476
d15b0b0a
OZ
2477 if (!conn->sk)
2478 return 0;
2479
2480 buf = conn->sk->tbuf;
2481 pkt = buf + BGP_HEADER_LENGTH;
2482 s = conn->packets_to_send;
2483
2484 if (s & (1 << PKT_SCHEDULE_CLOSE))
2485 {
2486 /* We can finally close connection and enter idle state */
2487 bgp_conn_enter_idle_state(conn);
2488 return 0;
2489 }
2490 if (s & (1 << PKT_NOTIFICATION))
2491 {
2492 conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
2493 end = bgp_create_notification(conn, pkt);
2494 return bgp_send(conn, PKT_NOTIFICATION, end - buf);
2495 }
2496 else if (s & (1 << PKT_KEEPALIVE))
2497 {
2498 conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
2499 BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
2500 bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
2501 return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
2502 }
2503 else if (s & (1 << PKT_OPEN))
2504 {
2505 conn->packets_to_send &= ~(1 << PKT_OPEN);
2506 end = bgp_create_open(conn, pkt);
2507 return bgp_send(conn, PKT_OPEN, end - buf);
2508 }
2509 else while (conn->channels_to_send)
2510 {
2511 c = bgp_get_channel_to_send(p, conn);
2512 s = c->packets_to_send;
2513
2514 if (s & (1 << PKT_ROUTE_REFRESH))
0c791f87 2515 {
d15b0b0a
OZ
2516 c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
2517 end = bgp_create_route_refresh(c, pkt);
2518 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
0c791f87 2519 }
d15b0b0a 2520 else if (s & (1 << PKT_BEGIN_REFRESH))
1c1da87b 2521 {
d15b0b0a
OZ
2522 /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
2523 c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
2524 end = bgp_create_begin_refresh(c, pkt);
2525 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
1c1da87b 2526 }
d15b0b0a 2527 else if (s & (1 << PKT_UPDATE))
1c1da87b 2528 {
d15b0b0a
OZ
2529 end = bgp_create_update(c, pkt);
2530 if (end)
2531 return bgp_send(conn, PKT_UPDATE, end - buf);
1c1da87b 2532
d15b0b0a
OZ
2533 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
2534 c->packets_to_send = 0;
2535 conn->channels_to_send &= ~(1 << c->index);
64cf11f5 2536
d15b0b0a
OZ
2537 if (c->feed_state == BFS_LOADED)
2538 {
2539 c->feed_state = BFS_NONE;
2540 end = bgp_create_end_mark(c, pkt);
2541 return bgp_send(conn, PKT_UPDATE, end - buf);
2542 }
1c1da87b 2543
d15b0b0a
OZ
2544 else if (c->feed_state == BFS_REFRESHED)
2545 {
2546 c->feed_state = BFS_NONE;
2547 end = bgp_create_end_refresh(c, pkt);
2548 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
2549 }
2550 }
2551 else if (s)
2552 bug("Channel packets_to_send: %x", s);
094d2bdb 2553
d15b0b0a
OZ
2554 c->packets_to_send = 0;
2555 conn->channels_to_send &= ~(1 << c->index);
2556 }
06fb60c4 2557
d15b0b0a
OZ
2558 return 0;
2559}
06fb60c4 2560
d15b0b0a
OZ
2561/**
2562 * bgp_schedule_packet - schedule a packet for transmission
2563 * @conn: connection
2564 * @c: channel
2565 * @type: packet type
2566 *
2567 * Schedule a packet of type @type to be sent as soon as possible.
2568 */
2569void
2570bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
2571{
2572 ASSERT(conn->sk);
1c1da87b 2573
d15b0b0a 2574 DBG("BGP: Scheduling packet type %d\n", type);
06fb60c4 2575
d15b0b0a
OZ
2576 if (c)
2577 {
2578 if (! conn->channels_to_send)
2579 {
2580 conn->last_channel = c->index;
2581 conn->last_channel_count = 0;
2582 }
06fb60c4 2583
d15b0b0a
OZ
2584 c->packets_to_send |= 1 << type;
2585 conn->channels_to_send |= 1 << c->index;
2586 }
2587 else
2588 conn->packets_to_send |= 1 << type;
1c1da87b 2589
d15b0b0a
OZ
2590 if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
2591 ev_schedule(conn->tx_ev);
2592}
1c1da87b 2593
d15b0b0a
OZ
2594void
2595bgp_kick_tx(void *vconn)
1c1da87b 2596{
d15b0b0a 2597 struct bgp_conn *conn = vconn;
44f26560 2598
d15b0b0a
OZ
2599 DBG("BGP: kicking TX\n");
2600 while (bgp_fire_tx(conn) > 0)
2601 ;
2602}
1c1da87b 2603
d15b0b0a
OZ
2604void
2605bgp_tx(sock *sk)
2606{
2607 struct bgp_conn *conn = sk->data;
1c1da87b 2608
d15b0b0a
OZ
2609 DBG("BGP: TX hook\n");
2610 while (bgp_fire_tx(conn) > 0)
2611 ;
efcece2d
MM
2612}
2613
d15b0b0a 2614
efcece2d
MM
2615static struct {
2616 byte major, minor;
2617 byte *msg;
2618} bgp_msg_table[] = {
2619 { 1, 0, "Invalid message header" },
2620 { 1, 1, "Connection not synchronized" },
2621 { 1, 2, "Bad message length" },
2622 { 1, 3, "Bad message type" },
2623 { 2, 0, "Invalid OPEN message" },
2624 { 2, 1, "Unsupported version number" },
2625 { 2, 2, "Bad peer AS" },
2626 { 2, 3, "Bad BGP identifier" },
2627 { 2, 4, "Unsupported optional parameter" },
2628 { 2, 5, "Authentication failure" },
2629 { 2, 6, "Unacceptable hold time" },
06e0d1b6 2630 { 2, 7, "Required capability missing" }, /* [RFC5492] */
506fa1a7 2631 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
efcece2d
MM
2632 { 3, 0, "Invalid UPDATE message" },
2633 { 3, 1, "Malformed attribute list" },
2634 { 3, 2, "Unrecognized well-known attribute" },
2635 { 3, 3, "Missing mandatory attribute" },
2636 { 3, 4, "Invalid attribute flags" },
2637 { 3, 5, "Invalid attribute length" },
2638 { 3, 6, "Invalid ORIGIN attribute" },
2639 { 3, 7, "AS routing loop" }, /* Deprecated */
2640 { 3, 8, "Invalid NEXT_HOP attribute" },
2641 { 3, 9, "Optional attribute error" },
2642 { 3, 10, "Invalid network field" },
2643 { 3, 11, "Malformed AS_PATH" },
2644 { 4, 0, "Hold timer expired" },
ac574513
OZ
2645 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
2646 { 5, 1, "Unexpected message in OpenSent state" },
2647 { 5, 2, "Unexpected message in OpenConfirm state" },
2648 { 5, 3, "Unexpected message in Established state" },
165a6227
OZ
2649 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
2650 { 6, 1, "Maximum number of prefixes reached" },
2651 { 6, 2, "Administrative shutdown" },
2652 { 6, 3, "Peer de-configured" },
2653 { 6, 4, "Administrative reset" },
2654 { 6, 5, "Connection rejected" },
2655 { 6, 6, "Other configuration change" },
2656 { 6, 7, "Connection collision resolution" },
9aed29e6
OZ
2657 { 6, 8, "Out of Resources" },
2658 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
2659 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
efcece2d
MM
2660};
2661
11b32d91
OZ
2662/**
2663 * bgp_error_dsc - return BGP error description
11b32d91
OZ
2664 * @code: BGP error code
2665 * @subcode: BGP error subcode
2666 *
2667 * bgp_error_dsc() returns error description for BGP errors
2668 * which might be static string or given temporary buffer.
2669 */
b8113a5e 2670const char *
d15b0b0a 2671bgp_error_dsc(uint code, uint subcode)
11b32d91 2672{
b8113a5e 2673 static char buff[32];
d15b0b0a
OZ
2674 uint i;
2675
11b32d91
OZ
2676 for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
2677 if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
d15b0b0a 2678 return bgp_msg_table[i].msg;
11b32d91 2679
d15b0b0a 2680 bsprintf(buff, "Unknown error %u.%u", code, subcode);
11b32d91
OZ
2681 return buff;
2682}
2683
cd1d9961
OZ
2684/* RFC 8203 - shutdown communication message */
2685static int
2686bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
2687{
2688 byte *msg = data + 1;
2689 uint msg_len = data[0];
2690 uint i;
2691
2692 /* Handle zero length message */
2693 if (msg_len == 0)
2694 return 1;
2695
2696 /* Handle proper message */
2697 if ((msg_len > 128) && (msg_len + 1 > len))
2698 return 0;
2699
2700 /* Some elementary cleanup */
2701 for (i = 0; i < msg_len; i++)
2702 if (msg[i] < ' ')
2703 msg[i] = ' ';
2704
2705 proto_set_message(&p->p, msg, msg_len);
2706 *bp += bsprintf(*bp, ": \"%s\"", p->p.message);
2707 return 1;
2708}
2709
efcece2d 2710void
d15b0b0a 2711bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
efcece2d 2712{
cd1d9961 2713 byte argbuf[256], *t = argbuf;
d15b0b0a 2714 uint i;
efcece2d 2715
b99d3786
OZ
2716 /* Don't report Cease messages generated by myself */
2717 if (code == 6 && class == BE_BGP_TX)
85733143
MM
2718 return;
2719
cd1d9961
OZ
2720 /* Reset shutdown message */
2721 if ((code == 6) && ((subcode == 2) || (subcode == 4)))
2722 proto_set_message(&p->p, NULL, 0);
2723
efcece2d
MM
2724 if (len)
2725 {
cd1d9961 2726 /* Bad peer AS - we would like to print the AS */
a5bf5f78
OZ
2727 if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
2728 {
cd1d9961 2729 t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
a5bf5f78
OZ
2730 goto done;
2731 }
cd1d9961
OZ
2732
2733 /* RFC 8203 - shutdown communication */
2734 if (((code == 6) && ((subcode == 2) || (subcode == 4))))
2735 if (bgp_handle_message(p, data, len, &t))
2736 goto done;
2737
2738 *t++ = ':';
2739 *t++ = ' ';
efcece2d
MM
2740 if (len > 16)
2741 len = 16;
2742 for (i=0; i<len; i++)
2743 t += bsprintf(t, "%02x", data[i]);
2744 }
cd1d9961
OZ
2745
2746done:
efcece2d 2747 *t = 0;
cd1d9961
OZ
2748 const byte *dsc = bgp_error_dsc(code, subcode);
2749 log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, dsc, argbuf);
3fdbafb6
MM
2750}
2751
2752static void
3e236955 2753bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
3fdbafb6 2754{
165a6227 2755 struct bgp_proto *p = conn->bgp;
d15b0b0a 2756
3fdbafb6 2757 if (len < 21)
d15b0b0a 2758 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
11b32d91 2759
d15b0b0a
OZ
2760 uint code = pkt[19];
2761 uint subcode = pkt[20];
b99d3786 2762 int err = (code != 6);
165a6227 2763
b99d3786
OZ
2764 bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
2765 bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
e3299ab1 2766
11b32d91 2767 bgp_conn_enter_close_state(conn);
d15b0b0a 2768 bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
b99d3786 2769
d15b0b0a
OZ
2770 if (err)
2771 {
2772 bgp_update_startup_delay(p);
830ba75e 2773 bgp_stop(p, 0, NULL, 0);
d15b0b0a 2774 }
63472779 2775 else
75d98b60
OZ
2776 {
2777 uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0);
2778 if (p->cf->disable_after_cease & subcode_bit)
63472779 2779 {
75d98b60
OZ
2780 log(L_INFO "%s: Disabled after Cease notification", p->p.name);
2781 p->startup_delay = 0;
2782 p->p.disabled = 1;
63472779 2783 }
75d98b60 2784 }
3fdbafb6
MM
2785}
2786
2787static void
e21423ba 2788bgp_rx_keepalive(struct bgp_conn *conn)
3fdbafb6 2789{
85368cd4
MM
2790 struct bgp_proto *p = conn->bgp;
2791
2792 BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
3fdbafb6 2793 bgp_start_timer(conn->hold_timer, conn->hold_time);
3fdbafb6 2794
d15b0b0a
OZ
2795 if (conn->state == BS_OPENCONFIRM)
2796 { bgp_conn_enter_established_state(conn); return; }
bf47fe4b 2797
bf47fe4b 2798 if (conn->state != BS_ESTABLISHED)
d15b0b0a 2799 bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
bf47fe4b
OZ
2800}
2801
2802
54e55169
MM
2803/**
2804 * bgp_rx_packet - handle a received packet
2805 * @conn: BGP connection
2806 * @pkt: start of the packet
2807 * @len: packet size
2808 *
2809 * bgp_rx_packet() takes a newly received packet and calls the corresponding
2810 * packet handler according to the packet type.
2811 */
3fdbafb6 2812static void
d15b0b0a 2813bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
3fdbafb6 2814{
cf31112f
OZ
2815 byte type = pkt[18];
2816
2817 DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
2818
2819 if (conn->bgp->p.mrtdump & MD_MESSAGES)
2820 mrt_dump_bgp_packet(conn, pkt, len);
2821
2822 switch (type)
d15b0b0a
OZ
2823 {
2824 case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
2825 case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
2826 case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
2827 case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
2828 case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
2829 default: bgp_error(conn, 1, 3, pkt+18, 1);
2830 }
72a6ef11
MM
2831}
2832
54e55169
MM
2833/**
2834 * bgp_rx - handle received data
2835 * @sk: socket
2836 * @size: amount of data received
2837 *
2838 * bgp_rx() is called by the socket layer whenever new data arrive from
2839 * the underlying TCP connection. It assembles the data fragments to packets,
2840 * checks their headers and framing and passes complete packets to
2841 * bgp_rx_packet().
2842 */
72a6ef11 2843int
3e236955 2844bgp_rx(sock *sk, uint size)
72a6ef11
MM
2845{
2846 struct bgp_conn *conn = sk->data;
2847 byte *pkt_start = sk->rbuf;
2848 byte *end = pkt_start + size;
d15b0b0a 2849 uint i, len;
72a6ef11
MM
2850
2851 DBG("BGP: RX hook: Got %d bytes\n", size);
2852 while (end >= pkt_start + BGP_HEADER_LENGTH)
2853 {
11b32d91
OZ
2854 if ((conn->state == BS_CLOSE) || (conn->sk != sk))
2855 return 0;
3fdbafb6
MM
2856 for(i=0; i<16; i++)
2857 if (pkt_start[i] != 0xff)
2858 {
efcece2d 2859 bgp_error(conn, 1, 1, NULL, 0);
3fdbafb6
MM
2860 break;
2861 }
2862 len = get_u16(pkt_start+16);
d15b0b0a 2863 if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
3fdbafb6 2864 {
efcece2d 2865 bgp_error(conn, 1, 2, pkt_start+16, 2);
3fdbafb6
MM
2866 break;
2867 }
5f532add
MM
2868 if (end < pkt_start + len)
2869 break;
2870 bgp_rx_packet(conn, pkt_start, len);
2871 pkt_start += len;
72a6ef11
MM
2872 }
2873 if (pkt_start != sk->rbuf)
2874 {
2875 memmove(sk->rbuf, pkt_start, end - pkt_start);
2876 sk->rpos = sk->rbuf + (end - pkt_start);
2877 }
2878 return 0;
2879}