]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
Minor cleanups
[thirdparty/bird.git] / proto / bgp / packets.c
1 /*
2 * BIRD -- BGP Packet Processing
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 #undef LOCAL_DEBUG
12
13 #include <stdlib.h>
14
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "nest/mrtdump.h"
21 #include "conf/conf.h"
22 #include "lib/unaligned.h"
23 #include "lib/flowspec.h"
24 #include "lib/socket.h"
25
26 #include "nest/cli.h"
27
28 #include "bgp.h"
29
30
31 #define BGP_RR_REQUEST 0
32 #define BGP_RR_BEGIN 1
33 #define BGP_RR_END 2
34
35
36 static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
37 static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
38
39 /* Table for state -> RFC 6608 FSM error subcodes */
40 static byte fsm_err_subcode[BS_MAX] = {
41 [BS_OPENSENT] = 1,
42 [BS_OPENCONFIRM] = 2,
43 [BS_ESTABLISHED] = 3
44 };
45
46
47 static struct bgp_channel *
48 bgp_get_channel(struct bgp_proto *p, u32 afi)
49 {
50 uint i;
51
52 for (i = 0; i < p->channel_count; i++)
53 if (p->afi_map[i] == afi)
54 return p->channel_map[i];
55
56 return NULL;
57 }
58
59 static inline void
60 put_af3(byte *buf, u32 id)
61 {
62 put_u16(buf, id >> 16);
63 buf[2] = id & 0xff;
64 }
65
66 static inline void
67 put_af4(byte *buf, u32 id)
68 {
69 put_u16(buf, id >> 16);
70 buf[2] = 0;
71 buf[3] = id & 0xff;
72 }
73
74 static inline u32
75 get_af3(byte *buf)
76 {
77 return (get_u16(buf) << 16) | buf[2];
78 }
79
80 static inline u32
81 get_af4(byte *buf)
82 {
83 return (get_u16(buf) << 16) | buf[3];
84 }
85
86 /*
87 * MRT Dump format is not semantically specified.
88 * We will use these values in appropriate fields:
89 *
90 * Local AS, Remote AS - configured AS numbers for given BGP instance.
91 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
92 *
93 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
94 * changes) and MESSAGE (for received BGP messages).
95 *
96 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
97 * only when AS4 session is established and even in that case MESSAGE
98 * does not use AS4 variant for initial OPEN message. This strange
99 * behavior is here for compatibility with Quagga and Bgpdump,
100 */
101
102 static byte *
103 mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
104 {
105 struct bgp_proto *p = conn->bgp;
106 uint v4 = ipa_is_ip4(p->cf->remote_ip);
107
108 if (as4)
109 {
110 put_u32(buf+0, p->remote_as);
111 put_u32(buf+4, p->public_as);
112 buf+=8;
113 }
114 else
115 {
116 put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
117 put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
118 buf+=4;
119 }
120
121 put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
122 put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
123 buf+=4;
124
125 if (v4)
126 {
127 buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
128 buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
129 }
130 else
131 {
132 buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
133 buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
134 }
135
136 return buf;
137 }
138
139 static void
140 mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
141 {
142 byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
143 byte *bp = buf + MRTDUMP_HDR_LENGTH;
144 int as4 = conn->bgp->as4_session;
145
146 bp = mrt_put_bgp4_hdr(bp, conn, as4);
147 memcpy(bp, pkt, len);
148 bp += len;
149 mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
150 buf, bp-buf);
151 }
152
153 static inline u16
154 convert_state(uint state)
155 {
156 /* Convert state from our BS_* values to values used in MRTDump */
157 return (state == BS_CLOSE) ? 1 : state + 1;
158 }
159
160 void
161 mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
162 {
163 byte buf[128];
164 byte *bp = buf + MRTDUMP_HDR_LENGTH;
165
166 bp = mrt_put_bgp4_hdr(bp, conn, 1);
167 put_u16(bp+0, convert_state(old));
168 put_u16(bp+2, convert_state(new));
169 bp += 4;
170 mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
171 }
172
173 static byte *
174 bgp_create_notification(struct bgp_conn *conn, byte *buf)
175 {
176 struct bgp_proto *p = conn->bgp;
177
178 BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
179 buf[0] = conn->notify_code;
180 buf[1] = conn->notify_subcode;
181 memcpy(buf+2, conn->notify_data, conn->notify_size);
182 return buf + 2 + conn->notify_size;
183 }
184
185
186 /* Capability negotiation as per RFC 5492 */
187
188 const struct bgp_af_caps *
189 bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
190 {
191 struct bgp_af_caps *ac;
192
193 WALK_AF_CAPS(caps, ac)
194 if (ac->afi == afi)
195 return ac;
196
197 return NULL;
198 }
199
200 static struct bgp_af_caps *
201 bgp_get_af_caps(struct bgp_caps *caps, u32 afi)
202 {
203 struct bgp_af_caps *ac;
204
205 WALK_AF_CAPS(caps, ac)
206 if (ac->afi == afi)
207 return ac;
208
209 ac = &caps->af_data[caps->af_count++];
210 memset(ac, 0, sizeof(struct bgp_af_caps));
211 ac->afi = afi;
212
213 return ac;
214 }
215
216 static int
217 bgp_af_caps_cmp(const void *X, const void *Y)
218 {
219 const struct bgp_af_caps *x = X, *y = Y;
220 return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
221 }
222
223
224 static byte *
225 bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
226 {
227 struct bgp_proto *p = conn->bgp;
228 struct bgp_channel *c;
229 struct bgp_caps *caps;
230 struct bgp_af_caps *ac;
231 uint any_add_path = 0;
232 byte *data;
233
234 /* Prepare bgp_caps structure */
235
236 int n = list_length(&p->p.channels);
237 caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
238 conn->local_caps = caps;
239
240 caps->as4_support = p->cf->enable_as4;
241 caps->ext_messages = p->cf->enable_extended_messages;
242 caps->route_refresh = p->cf->enable_refresh;
243 caps->enhanced_refresh = p->cf->enable_refresh;
244
245 if (caps->as4_support)
246 caps->as4_number = p->public_as;
247
248 if (p->cf->gr_mode)
249 {
250 caps->gr_aware = 1;
251 caps->gr_time = p->cf->gr_time;
252 caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
253 }
254
255 /* Allocate and fill per-AF fields */
256 WALK_LIST(c, p->p.channels)
257 {
258 ac = &caps->af_data[caps->af_count++];
259 ac->afi = c->afi;
260 ac->ready = 1;
261
262 ac->add_path = c->cf->add_path;
263 any_add_path |= ac->add_path;
264
265 if (c->cf->gr_able)
266 {
267 ac->gr_able = 1;
268
269 if (p->p.gr_recovery)
270 ac->gr_af_flags |= BGP_GRF_FORWARDING;
271 }
272 }
273
274 /* Sort capability fields by AFI/SAFI */
275 qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
276
277
278 /* Create capability list in buffer */
279
280 /*
281 * Note that max length is ~ 20+14*af_count. With max 6 channels that is
282 * 104. Option limit is 253 and buffer size is 4096, so we cannot overflow
283 * unless we add new capabilities or more AFs.
284 */
285
286 WALK_AF_CAPS(caps, ac)
287 if (ac->ready)
288 {
289 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
290 *buf++ = 4; /* Capability data length */
291 put_af4(buf, ac->afi);
292 buf += 4;
293 }
294
295 if (caps->route_refresh)
296 {
297 *buf++ = 2; /* Capability 2: Support for route refresh */
298 *buf++ = 0; /* Capability data length */
299 }
300
301 if (caps->ext_messages)
302 {
303 *buf++ = 6; /* Capability 6: Support for extended messages */
304 *buf++ = 0; /* Capability data length */
305 }
306
307 if (caps->gr_aware)
308 {
309 *buf++ = 64; /* Capability 64: Support for graceful restart */
310 *buf++ = 0; /* Capability data length, will be fixed later */
311 data = buf;
312
313 put_u16(buf, caps->gr_time);
314 buf[0] |= caps->gr_flags;
315 buf += 2;
316
317 WALK_AF_CAPS(caps, ac)
318 if (ac->gr_able)
319 {
320 put_af3(buf, ac->afi);
321 buf[3] = ac->gr_af_flags;
322 buf += 4;
323 }
324
325 data[-1] = buf - data;
326 }
327
328 if (caps->as4_support)
329 {
330 *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
331 *buf++ = 4; /* Capability data length */
332 put_u32(buf, p->public_as);
333 buf += 4;
334 }
335
336 if (any_add_path)
337 {
338 *buf++ = 69; /* Capability 69: Support for ADD-PATH */
339 *buf++ = 0; /* Capability data length, will be fixed later */
340 data = buf;
341
342 WALK_AF_CAPS(caps, ac)
343 if (ac->add_path)
344 {
345 put_af3(buf, ac->afi);
346 buf[3] = ac->add_path;
347 buf += 4;
348 }
349
350 data[-1] = buf - data;
351 }
352
353 if (caps->enhanced_refresh)
354 {
355 *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
356 *buf++ = 0; /* Capability data length */
357 }
358
359 return buf;
360 }
361
362 static void
363 bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len)
364 {
365 struct bgp_proto *p = conn->bgp;
366 struct bgp_af_caps *ac;
367 int i, cl;
368 u32 af;
369
370 while (len > 0)
371 {
372 if (len < 2 || len < (2 + pos[1]))
373 goto err;
374
375 /* Capability length */
376 cl = pos[1];
377
378 /* Capability type */
379 switch (pos[0])
380 {
381 case 1: /* Multiprotocol capability, RFC 4760 */
382 if (cl != 4)
383 goto err;
384
385 af = get_af4(pos+2);
386 ac = bgp_get_af_caps(caps, af);
387 ac->ready = 1;
388 break;
389
390 case 2: /* Route refresh capability, RFC 2918 */
391 if (cl != 0)
392 goto err;
393
394 caps->route_refresh = 1;
395 break;
396
397 case 6: /* Extended message length capability, RFC draft */
398 if (cl != 0)
399 goto err;
400
401 caps->ext_messages = 1;
402 break;
403
404 case 64: /* Graceful restart capability, RFC 4724 */
405 if (cl % 4 != 2)
406 goto err;
407
408 /* Only the last instance is valid */
409 WALK_AF_CAPS(caps, ac)
410 {
411 ac->gr_able = 0;
412 ac->gr_af_flags = 0;
413 }
414
415 caps->gr_aware = 1;
416 caps->gr_flags = pos[2] & 0xf0;
417 caps->gr_time = get_u16(pos + 2) & 0x0fff;
418
419 for (i = 2; i < cl; i += 4)
420 {
421 af = get_af3(pos+2+i);
422 ac = bgp_get_af_caps(caps, af);
423 ac->gr_able = 1;
424 ac->gr_af_flags = pos[2+i+3];
425 }
426 break;
427
428 case 65: /* AS4 capability, RFC 4893 */
429 if (cl != 4)
430 goto err;
431
432 caps->as4_support = 1;
433 caps->as4_number = get_u32(pos + 2);
434 break;
435
436 case 69: /* ADD-PATH capability, RFC 7911 */
437 if (cl % 4)
438 goto err;
439
440 for (i = 0; i < cl; i += 4)
441 {
442 byte val = pos[2+i+3];
443 if (!val || (val > BGP_ADD_PATH_FULL))
444 {
445 log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
446 p->p.name, val);
447 break;
448 }
449 }
450
451 for (i = 0; i < cl; i += 4)
452 {
453 af = get_af3(pos+2+i);
454 ac = bgp_get_af_caps(caps, af);
455 ac->add_path = pos[2+i+3];
456 }
457 break;
458
459 case 70: /* Enhanced route refresh capability, RFC 7313 */
460 if (cl != 0)
461 goto err;
462
463 caps->enhanced_refresh = 1;
464 break;
465
466 /* We can safely ignore all other capabilities */
467 }
468
469 ADVANCE(pos, len, 2 + cl);
470 }
471 return;
472
473 err:
474 bgp_error(conn, 2, 0, NULL, 0);
475 return;
476 }
477
478 static int
479 bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
480 {
481 struct bgp_proto *p = conn->bgp;
482 struct bgp_caps *caps;
483 int ol;
484
485 /* Max number of announced AFIs is limited by max option length (255) */
486 caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps));
487 memset(caps, 0, sizeof(struct bgp_caps));
488
489 while (len > 0)
490 {
491 if ((len < 2) || (len < (2 + pos[1])))
492 { bgp_error(conn, 2, 0, NULL, 0); return -1; }
493
494 ol = pos[1];
495 if (pos[0] == 2)
496 {
497 /* BGP capabilities, RFC 5492 */
498 if (p->cf->capabilities)
499 bgp_read_capabilities(conn, caps, pos + 2, ol);
500 }
501 else
502 {
503 /* Unknown option */
504 bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */
505 return -1;
506 }
507
508 ADVANCE(pos, len, 2 + ol);
509 }
510
511 uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps);
512 conn->remote_caps = mb_allocz(p->p.pool, n);
513 memcpy(conn->remote_caps, caps, n);
514
515 return 0;
516 }
517
518 static byte *
519 bgp_create_open(struct bgp_conn *conn, byte *buf)
520 {
521 struct bgp_proto *p = conn->bgp;
522
523 BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
524 BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
525
526 buf[0] = BGP_VERSION;
527 put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
528 put_u16(buf+3, p->cf->hold_time);
529 put_u32(buf+5, p->local_id);
530
531 if (p->cf->capabilities)
532 {
533 /* Prepare local_caps and write capabilities to buffer */
534 byte *end = bgp_write_capabilities(conn, buf+12);
535 uint len = end - (buf+12);
536
537 buf[9] = len + 2; /* Optional parameters length */
538 buf[10] = 2; /* Option 2: Capability list */
539 buf[11] = len; /* Option data length */
540
541 return end;
542 }
543 else
544 {
545 /* Prepare empty local_caps */
546 conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
547
548 buf[9] = 0; /* No optional parameters */
549 return buf + 10;
550 }
551
552 return buf;
553 }
554
555 static void
556 bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
557 {
558 struct bgp_proto *p = conn->bgp;
559 struct bgp_conn *other;
560 u32 asn, hold, id;
561
562 /* Check state */
563 if (conn->state != BS_OPENSENT)
564 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
565
566 /* Check message contents */
567 if (len < 29 || len != 29 + (uint) pkt[28])
568 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
569
570 if (pkt[19] != BGP_VERSION)
571 { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
572
573 asn = get_u16(pkt+20);
574 hold = get_u16(pkt+22);
575 id = get_u32(pkt+24);
576 BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
577
578 if (bgp_read_options(conn, pkt+29, pkt[28]) < 0)
579 return;
580
581 if (hold > 0 && hold < 3)
582 { bgp_error(conn, 2, 6, pkt+22, 2); return; }
583
584 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
585 if (!id || (p->is_internal && id == p->local_id))
586 { bgp_error(conn, 2, 3, pkt+24, -4); return; }
587
588 struct bgp_caps *caps = conn->remote_caps;
589
590 if (caps->as4_support)
591 {
592 u32 as4 = caps->as4_number;
593
594 if ((as4 != asn) && (asn != AS_TRANS))
595 log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
596
597 if (as4 != p->remote_as)
598 { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
599 }
600 else
601 {
602 if (asn != p->remote_as)
603 { bgp_error(conn, 2, 2, pkt+20, 2); return; }
604 }
605
606 /* Check the other connection */
607 other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
608 switch (other->state)
609 {
610 case BS_CONNECT:
611 case BS_ACTIVE:
612 /* Stop outgoing connection attempts */
613 bgp_conn_enter_idle_state(other);
614 break;
615
616 case BS_IDLE:
617 case BS_OPENSENT:
618 case BS_CLOSE:
619 break;
620
621 case BS_OPENCONFIRM:
622 /*
623 * Description of collision detection rules in RFC 4271 is confusing and
624 * contradictory, but it is essentially:
625 *
626 * 1. Router with higher ID is dominant
627 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
628 * 3. When both connections are in OpenConfirm state, one initiated by
629 * the dominant router is kept.
630 *
631 * The first line in the expression below evaluates whether the neighbor
632 * is dominant, the second line whether the new connection was initiated
633 * by the neighbor. If both are true (or both are false), we keep the new
634 * connection, otherwise we keep the old one.
635 */
636 if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
637 == (conn == &p->incoming_conn))
638 {
639 /* Should close the other connection */
640 BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
641 bgp_error(other, 6, 7, NULL, 0);
642 break;
643 }
644 /* Fall thru */
645 case BS_ESTABLISHED:
646 /* Should close this connection */
647 BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
648 bgp_error(conn, 6, 7, NULL, 0);
649 return;
650
651 default:
652 bug("bgp_rx_open: Unknown state");
653 }
654
655 /* Update our local variables */
656 conn->hold_time = MIN(hold, p->cf->hold_time);
657 conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
658 conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
659 conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
660 p->remote_id = id;
661
662 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
663 conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
664
665 bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
666 bgp_start_timer(conn->hold_timer, conn->hold_time);
667 bgp_conn_enter_openconfirm_state(conn);
668 }
669
670
671 /*
672 * Next hop handling
673 */
674
675 #define REPORT(msg, args...) \
676 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
677
678 #define WITHDRAW(msg, args...) \
679 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
680
681 #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
682 #define NO_NEXT_HOP "Missing NEXT_HOP attribute"
683
684
685 static void
686 bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
687 {
688 struct bgp_proto *p = s->proto;
689 struct bgp_channel *c = s->channel;
690
691 if (c->cf->gw_mode == GW_DIRECT)
692 {
693 neighbor *nbr = NULL;
694
695 /* GW_DIRECT -> single_hop -> p->neigh != NULL */
696 if (ipa_nonzero(gw))
697 nbr = neigh_find2(&p->p, &gw, NULL, 0);
698 else if (ipa_nonzero(ll))
699 nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0);
700
701 if (!nbr || (nbr->scope == SCOPE_HOST))
702 WITHDRAW(BAD_NEXT_HOP);
703
704 a->dest = RTD_ROUTER;
705 a->gw = nbr->addr;
706 a->iface = nbr->iface;
707 a->hostentry = NULL;
708 a->igp_metric = 0;
709 }
710 else /* GW_RECURSIVE */
711 {
712 if (ipa_zero(gw))
713 WITHDRAW(BAD_NEXT_HOP);
714
715 rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll);
716 }
717 }
718
719 static inline int
720 bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
721 {
722 struct bgp_proto *p = s->proto;
723 ip_addr *nh = (void *) a->u.ptr->data;
724
725 if (s->channel->cf->next_hop_self)
726 return 0;
727
728 if (s->channel->cf->next_hop_keep)
729 return 1;
730
731 /* Keep it when explicitly set in export filter */
732 if (a->type & EAF_FRESH)
733 return 1;
734
735 /* Keep it when exported to internal peers */
736 if (p->is_interior && ipa_nonzero(*nh))
737 return 1;
738
739 /* Keep it when forwarded between single-hop BGPs on the same iface */
740 struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
741 return p->neigh && (p->neigh->iface == ifa);
742 }
743
744 static inline int
745 bgp_use_gateway(struct bgp_export_state *s)
746 {
747 struct bgp_proto *p = s->proto;
748 rta *ra = s->route->attrs;
749
750 if (s->channel->cf->next_hop_self)
751 return 0;
752
753 /* We need valid global gateway */
754 if ((ra->dest != RTD_ROUTER) || ipa_zero(ra->gw) || ipa_is_link_local(ra->gw))
755 return 0;
756
757 /* Use it when exported to internal peers */
758 if (p->is_interior)
759 return 1;
760
761 /* Use it when forwarded to single-hop BGP peer on on the same iface */
762 return p->neigh && (p->neigh->iface == ra->iface);
763 }
764
765 static void
766 bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
767 {
768 if (!a || !bgp_use_next_hop(s, a))
769 {
770 if (bgp_use_gateway(s))
771 {
772 ip_addr nh[1] = { s->route->attrs->gw };
773 bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
774 }
775 else
776 {
777 ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
778 bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
779 }
780 }
781
782 /* Check if next hop is valid */
783 a = bgp_find_attr(*to, BA_NEXT_HOP);
784 if (!a)
785 WITHDRAW(NO_NEXT_HOP);
786
787 ip_addr *nh = (void *) a->u.ptr->data;
788 ip_addr peer = s->proto->cf->remote_ip;
789 uint len = a->u.ptr->length;
790
791 if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
792 WITHDRAW(BAD_NEXT_HOP);
793
794 if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
795 WITHDRAW(BAD_NEXT_HOP);
796 }
797
798 static uint
799 bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
800 {
801 // FIXME
802 return 0;
803 }
804
805 static void
806 bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
807 {
808 // FIXME
809 return;
810 }
811
812 static void
813 bgp_update_next_hop_none(struct bgp_export_state *s UNUSED, eattr *a UNUSED, ea_list **to UNUSED)
814 {
815 // FIXME
816 }
817
818
819 /*
820 * UPDATE
821 */
822
823 static void
824 bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
825 {
826 if (path_id != s->last_id)
827 {
828 s->last_src = rt_get_source(&s->proto->p, path_id);
829 s->last_id = path_id;
830
831 rta_free(s->cached_rta);
832 s->cached_rta = NULL;
833 }
834
835 if (!a0)
836 {
837 /* Route withdraw */
838 rte_update2(&s->channel->c, n, NULL, s->last_src);
839 return;
840 }
841
842 /* Prepare cached route attributes */
843 if (s->cached_rta == NULL)
844 {
845 a0->src = s->last_src;
846
847 /* Workaround for rta_lookup() breaking eattrs */
848 ea_list *ea = a0->eattrs;
849 s->cached_rta = rta_lookup(a0);
850 a0->eattrs = ea;
851 }
852
853 rta *a = rta_clone(s->cached_rta);
854 rte *e = rte_get_temp(a);
855
856 e->pflags = 0;
857 e->u.bgp.suppressed = 0;
858 rte_update2(&s->channel->c, n, e, s->last_src);
859 }
860
861
862
863 static uint
864 bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
865 {
866 byte *pos = buf;
867
868 while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip4_addr))))
869 {
870 struct bgp_prefix *px = HEAD(buck->prefixes);
871 struct net_addr_ip4 *net = (void *) px->net;
872
873 /* Encode path ID */
874 if (s->add_path)
875 {
876 put_u32(pos, px->path_id);
877 ADVANCE(pos, size, 4);
878 }
879
880 ip4_addr a = ip4_hton(net->prefix);
881 uint b = (net->pxlen + 7) / 8;
882
883 /* Encode prefix length */
884 *pos = net->pxlen;
885 ADVANCE(pos, size, 1);
886
887 /* Encode prefix body */
888 memcpy(pos, &a, b);
889 ADVANCE(pos, size, b);
890
891 bgp_free_prefix(s->channel, px);
892 }
893
894 return pos - buf;
895 }
896
897 static void
898 bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
899 {
900 while (len)
901 {
902 net_addr_ip4 net;
903 u32 path_id = 0;
904
905 /* Decode path ID */
906 if (s->add_path)
907 {
908 if (len < 5)
909 bgp_parse_error(s, 1);
910
911 path_id = get_u32(pos);
912 ADVANCE(pos, len, 4);
913 }
914
915 /* Decode prefix length */
916 uint l = *pos;
917 uint b = (l + 7) / 8;
918 ADVANCE(pos, len, 1);
919
920 if (l > IP4_MAX_PREFIX_LENGTH)
921 bgp_parse_error(s, 10);
922
923 if (len < b)
924 bgp_parse_error(s, 1);
925
926 /* Decode prefix body */
927 ip4_addr addr = IP4_NONE;
928 memcpy(&addr, pos, b);
929 ADVANCE(pos, len, b);
930
931 net = NET_ADDR_IP4(ip4_ntoh(addr), l);
932 net_normalize_ip4(&net);
933
934 // XXXX validate prefix
935
936 bgp_rte_update(s, (net_addr *) &net, path_id, a);
937 }
938 }
939
940 static uint
941 bgp_encode_next_hop_ip4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
942 {
943 /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
944
945 ASSERT(a->u.ptr->length == sizeof(ip_addr));
946
947 put_ip4(buf, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
948
949 return 4;
950 }
951
952 static void
953 bgp_decode_next_hop_ip4(struct bgp_parse_state *s, byte *data, uint len, rta *a)
954 {
955 if (len != 4)
956 bgp_parse_error(s, 9);
957
958 ip_addr nh = ipa_from_ip4(get_ip4(data));
959
960 // XXXX validate next hop
961
962 bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh));
963 bgp_apply_next_hop(s, a, nh, IPA_NONE);
964 }
965
966
967 static uint
968 bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
969 {
970 byte *pos = buf;
971
972 while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip6_addr))))
973 {
974 struct bgp_prefix *px = HEAD(buck->prefixes);
975 struct net_addr_ip6 *net = (void *) px->net;
976
977 /* Encode path ID */
978 if (s->add_path)
979 {
980 put_u32(pos, px->path_id);
981 ADVANCE(pos, size, 4);
982 }
983
984 ip6_addr a = ip6_hton(net->prefix);
985 uint b = (net->pxlen + 7) / 8;
986
987 /* Encode prefix length */
988 *pos = net->pxlen;
989 ADVANCE(pos, size, 1);
990
991 /* Encode prefix body */
992 memcpy(pos, &a, b);
993 ADVANCE(pos, size, b);
994
995 bgp_free_prefix(s->channel, px);
996 }
997
998 return pos - buf;
999 }
1000
1001 static void
1002 bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1003 {
1004 while (len)
1005 {
1006 net_addr_ip6 net;
1007 u32 path_id = 0;
1008
1009 /* Decode path ID */
1010 if (s->add_path)
1011 {
1012 if (len < 5)
1013 bgp_parse_error(s, 1);
1014
1015 path_id = get_u32(pos);
1016 ADVANCE(pos, len, 4);
1017 }
1018
1019 /* Decode prefix length */
1020 uint l = *pos;
1021 uint b = (l + 7) / 8;
1022 ADVANCE(pos, len, 1);
1023
1024 if (l > IP6_MAX_PREFIX_LENGTH)
1025 bgp_parse_error(s, 10);
1026
1027 if (len < b)
1028 bgp_parse_error(s, 1);
1029
1030 /* Decode prefix body */
1031 ip6_addr addr = IP6_NONE;
1032 memcpy(&addr, pos, b);
1033 ADVANCE(pos, len, b);
1034
1035 net = NET_ADDR_IP6(ip6_ntoh(addr), l);
1036 net_normalize_ip6(&net);
1037
1038 // XXXX validate prefix
1039
1040 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1041 }
1042 }
1043
1044 static uint
1045 bgp_encode_next_hop_ip6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
1046 {
1047 ip_addr *nh = (void *) a->u.ptr->data;
1048 uint len = a->u.ptr->length;
1049
1050 ASSERT((len == 16) || (len == 32));
1051
1052 put_ip6(buf, ipa_to_ip6(nh[0]));
1053
1054 if (len == 32)
1055 put_ip6(buf+16, ipa_to_ip6(nh[1]));
1056
1057 return len;
1058 }
1059
1060 static void
1061 bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
1062 {
1063 struct adata *ad = lp_alloc_adata(s->pool, 32);
1064 ip_addr *nh = (void *) ad->data;
1065
1066 if ((len != 16) && (len != 32))
1067 bgp_parse_error(s, 9);
1068
1069 nh[0] = ipa_from_ip6(get_ip6(data));
1070 nh[1] = (len == 32) ? ipa_from_ip6(get_ip6(data+16)) : IPA_NONE;
1071
1072 if (ip6_is_link_local(nh[0]))
1073 {
1074 nh[1] = nh[0];
1075 nh[0] = IPA_NONE;
1076 }
1077
1078 if (!ip6_is_link_local(nh[1]))
1079 nh[1] = IPA_NONE;
1080
1081 if (ipa_zero(nh[1]))
1082 ad->length = 16;
1083
1084 // XXXX validate next hop
1085
1086 bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
1087 bgp_apply_next_hop(s, a, nh[0], nh[1]);
1088 }
1089
1090
1091 static uint
1092 bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1093 {
1094 byte *pos = buf;
1095
1096 while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
1097 {
1098 struct bgp_prefix *px = HEAD(buck->prefixes);
1099 struct net_addr_flow4 *net = (void *) px->net;
1100 uint flen = net->length - sizeof(net_addr_flow4);
1101
1102 /* Encode path ID */
1103 if (s->add_path)
1104 {
1105 put_u32(pos, px->path_id);
1106 ADVANCE(pos, size, 4);
1107 }
1108
1109 if (flen > size)
1110 break;
1111
1112 /* Copy whole flow data including length */
1113 memcpy(pos, net->data, flen);
1114 ADVANCE(pos, size, flen);
1115
1116 bgp_free_prefix(s->channel, px);
1117 }
1118
1119 return pos - buf;
1120 }
1121
1122 static void
1123 bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1124 {
1125 while (len)
1126 {
1127 u32 path_id = 0;
1128
1129 /* Decode path ID */
1130 if (s->add_path)
1131 {
1132 if (len < 4)
1133 bgp_parse_error(s, 1);
1134
1135 path_id = get_u32(pos);
1136 ADVANCE(pos, len, 4);
1137 }
1138
1139 if (len < 2)
1140 bgp_parse_error(s, 1);
1141
1142 /* Decode flow length */
1143 uint hlen = flow_hdr_length(pos);
1144 uint dlen = flow_read_length(pos);
1145 uint flen = hlen + dlen;
1146 byte *data = pos + hlen;
1147
1148 if (len < flen)
1149 bgp_parse_error(s, 1);
1150
1151 /* Validate flow data */
1152 enum flow_validated_state r = flow4_validate(data, dlen);
1153 if (r != FLOW_ST_VALID)
1154 {
1155 log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
1156 bgp_parse_error(s, 1);
1157 }
1158
1159 if (data[0] != FLOW_TYPE_DST_PREFIX)
1160 {
1161 log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
1162 bgp_parse_error(s, 1);
1163 }
1164
1165 /* Decode dst prefix */
1166 ip4_addr px = IP4_NONE;
1167 uint pxlen = data[1];
1168
1169 // FIXME: Use some generic function
1170 memcpy(&px, data, BYTES(pxlen));
1171 px = ip4_and(px, ip4_mkmask(pxlen));
1172
1173 /* Prepare the flow */
1174 net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
1175 net_fill_flow4(n, px, pxlen, pos, flen);
1176 ADVANCE(pos, len, flen);
1177
1178 bgp_rte_update(s, n, path_id, a);
1179 }
1180 }
1181
1182
1183 static uint
1184 bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1185 {
1186 byte *pos = buf;
1187
1188 while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
1189 {
1190 struct bgp_prefix *px = HEAD(buck->prefixes);
1191 struct net_addr_flow6 *net = (void *) px->net;
1192 uint flen = net->length - sizeof(net_addr_flow6);
1193
1194 /* Encode path ID */
1195 if (s->add_path)
1196 {
1197 put_u32(pos, px->path_id);
1198 ADVANCE(pos, size, 4);
1199 }
1200
1201 if (flen > size)
1202 break;
1203
1204 /* Copy whole flow data including length */
1205 memcpy(pos, net->data, flen);
1206 ADVANCE(pos, size, flen);
1207
1208 bgp_free_prefix(s->channel, px);
1209 }
1210
1211 return pos - buf;
1212 }
1213
1214 static void
1215 bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1216 {
1217 while (len)
1218 {
1219 u32 path_id = 0;
1220
1221 /* Decode path ID */
1222 if (s->add_path)
1223 {
1224 if (len < 4)
1225 bgp_parse_error(s, 1);
1226
1227 path_id = get_u32(pos);
1228 ADVANCE(pos, len, 4);
1229 }
1230
1231 if (len < 2)
1232 bgp_parse_error(s, 1);
1233
1234 /* Decode flow length */
1235 uint hlen = flow_hdr_length(pos);
1236 uint dlen = flow_read_length(pos);
1237 uint flen = hlen + dlen;
1238 byte *data = pos + hlen;
1239
1240 if (len < flen)
1241 bgp_parse_error(s, 1);
1242
1243 /* Validate flow data */
1244 enum flow_validated_state r = flow6_validate(data, dlen);
1245 if (r != FLOW_ST_VALID)
1246 {
1247 log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
1248 bgp_parse_error(s, 1);
1249 }
1250
1251 if (data[0] != FLOW_TYPE_DST_PREFIX)
1252 {
1253 log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
1254 bgp_parse_error(s, 1);
1255 }
1256
1257 /* Decode dst prefix */
1258 ip6_addr px = IP6_NONE;
1259 uint pxlen = data[1];
1260
1261 // FIXME: Use some generic function
1262 memcpy(&px, data, BYTES(pxlen));
1263 px = ip6_and(px, ip6_mkmask(pxlen));
1264
1265 /* Prepare the flow */
1266 net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
1267 net_fill_flow6(n, px, pxlen, pos, flen);
1268 ADVANCE(pos, len, flen);
1269
1270 bgp_rte_update(s, n, path_id, a);
1271 }
1272 }
1273
1274
1275 static const struct bgp_af_desc bgp_af_table[] = {
1276 {
1277 .afi = BGP_AF_IPV4,
1278 .net = NET_IP4,
1279 .name = "ipv4",
1280 .encode_nlri = bgp_encode_nlri_ip4,
1281 .decode_nlri = bgp_decode_nlri_ip4,
1282 .encode_next_hop = bgp_encode_next_hop_ip4,
1283 .decode_next_hop = bgp_decode_next_hop_ip4,
1284 .update_next_hop = bgp_update_next_hop_ip,
1285 },
1286 {
1287 .afi = BGP_AF_IPV4_MC,
1288 .net = NET_IP4,
1289 .name = "ipv4-mc",
1290 .encode_nlri = bgp_encode_nlri_ip4,
1291 .decode_nlri = bgp_decode_nlri_ip4,
1292 .encode_next_hop = bgp_encode_next_hop_ip4,
1293 .decode_next_hop = bgp_decode_next_hop_ip4,
1294 .update_next_hop = bgp_update_next_hop_ip,
1295 },
1296 {
1297 .afi = BGP_AF_FLOW4,
1298 .net = NET_FLOW4,
1299 .name = "flow4",
1300 .encode_nlri = bgp_encode_nlri_flow4,
1301 .decode_nlri = bgp_decode_nlri_flow4,
1302 .encode_next_hop = bgp_encode_next_hop_none,
1303 .decode_next_hop = bgp_decode_next_hop_none,
1304 .update_next_hop = bgp_update_next_hop_none,
1305 },
1306 {
1307 .afi = BGP_AF_IPV6,
1308 .net = NET_IP6,
1309 .name = "ipv6",
1310 .encode_nlri = bgp_encode_nlri_ip6,
1311 .decode_nlri = bgp_decode_nlri_ip6,
1312 .encode_next_hop = bgp_encode_next_hop_ip6,
1313 .decode_next_hop = bgp_decode_next_hop_ip6,
1314 .update_next_hop = bgp_update_next_hop_ip,
1315 },
1316 {
1317 .afi = BGP_AF_IPV6_MC,
1318 .net = NET_IP6,
1319 .name = "ipv6-mc",
1320 .encode_nlri = bgp_encode_nlri_ip6,
1321 .decode_nlri = bgp_decode_nlri_ip6,
1322 .encode_next_hop = bgp_encode_next_hop_ip6,
1323 .decode_next_hop = bgp_decode_next_hop_ip6,
1324 .update_next_hop = bgp_update_next_hop_ip,
1325 },
1326 {
1327 .afi = BGP_AF_FLOW6,
1328 .net = NET_FLOW6,
1329 .name = "flow6",
1330 .encode_nlri = bgp_encode_nlri_flow6,
1331 .decode_nlri = bgp_decode_nlri_flow6,
1332 .encode_next_hop = bgp_encode_next_hop_none,
1333 .decode_next_hop = bgp_decode_next_hop_none,
1334 .update_next_hop = bgp_update_next_hop_none,
1335 },
1336 };
1337
1338 const struct bgp_af_desc *
1339 bgp_get_af_desc(u32 afi)
1340 {
1341 uint i;
1342 for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
1343 if (bgp_af_table[i].afi == afi)
1344 return &bgp_af_table[i];
1345
1346 return NULL;
1347 }
1348
1349 static inline uint
1350 bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1351 {
1352 return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
1353 }
1354
1355 static inline uint
1356 bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
1357 {
1358 return s->channel->desc->encode_next_hop(s, nh, buf, 255);
1359 }
1360
1361 void
1362 bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
1363 {
1364 s->channel->desc->update_next_hop(s, a, to);
1365 }
1366
1367 #define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
1368
1369 static byte *
1370 bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1371 {
1372 /*
1373 * 2 B Withdrawn Routes Length (zero)
1374 * --- IPv4 Withdrawn Routes NLRI (unused)
1375 * 2 B Total Path Attribute Length
1376 * var Path Attributes
1377 * var IPv4 Network Layer Reachability Information
1378 */
1379
1380 int lr, la;
1381
1382 la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
1383 if (la < 0)
1384 {
1385 /* Attribute list too long */
1386 bgp_withdraw_bucket(s->channel, buck);
1387 return NULL;
1388 }
1389
1390 put_u16(buf+0, 0);
1391 put_u16(buf+2, la);
1392
1393 lr = bgp_encode_nlri(s, buck, buf+4+la, end);
1394
1395 return buf+4+la+lr;
1396 }
1397
1398 static byte *
1399 bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1400 {
1401 /*
1402 * 2 B IPv4 Withdrawn Routes Length (zero)
1403 * --- IPv4 Withdrawn Routes NLRI (unused)
1404 * 2 B Total Path Attribute Length
1405 * 1 B MP_REACH_NLRI hdr - Attribute Flags
1406 * 1 B MP_REACH_NLRI hdr - Attribute Type Code
1407 * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
1408 * 2 B MP_REACH_NLRI data - Address Family Identifier
1409 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
1410 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
1411 * var MP_REACH_NLRI data - Network Address of Next Hop
1412 * 1 B MP_REACH_NLRI data - Reserved (zero)
1413 * var MP_REACH_NLRI data - Network Layer Reachability Information
1414 * var Rest of Path Attributes
1415 * --- IPv4 Network Layer Reachability Information (unused)
1416 */
1417
1418 int lh, lr, la; /* Lengths of next hop, NLRI and attributes */
1419
1420 /* Begin of MP_REACH_NLRI atribute */
1421 buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
1422 buf[5] = BA_MP_REACH_NLRI;
1423 put_u16(buf+6, 0); /* Will be fixed later */
1424 put_af3(buf+8, s->channel->afi);
1425 byte *pos = buf+11;
1426
1427 /* Encode attributes to temporary buffer */
1428 byte *abuf = alloca(MAX_ATTRS_LENGTH);
1429 la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
1430 if (la < 0)
1431 {
1432 /* Attribute list too long */
1433 bgp_withdraw_bucket(s->channel, buck);
1434 return NULL;
1435 }
1436
1437 /* Encode the next hop */
1438 lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
1439 *pos = lh;
1440 pos += 1+lh;
1441
1442 /* Reserved field */
1443 *pos++ = 0;
1444
1445 /* Encode the NLRI */
1446 lr = bgp_encode_nlri(s, buck, pos, end - la);
1447 pos += lr;
1448
1449 /* End of MP_REACH_NLRI atribute, update data length */
1450 put_u16(buf+6, pos-buf-8);
1451
1452 /* Copy remaining attributes */
1453 memcpy(pos, abuf, la);
1454 pos += la;
1455
1456 /* Initial UPDATE fields */
1457 put_u16(buf+0, 0);
1458 put_u16(buf+2, pos-buf-4);
1459
1460 return pos;
1461 }
1462
1463 #undef MAX_ATTRS_LENGTH
1464
1465 static byte *
1466 bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1467 {
1468 /*
1469 * 2 B Withdrawn Routes Length
1470 * var IPv4 Withdrawn Routes NLRI
1471 * 2 B Total Path Attribute Length (zero)
1472 * --- Path Attributes (unused)
1473 * --- IPv4 Network Layer Reachability Information (unused)
1474 */
1475
1476 uint len = bgp_encode_nlri(s, buck, buf+2, end);
1477
1478 put_u16(buf+0, len);
1479 put_u16(buf+2+len, 0);
1480
1481 return buf+4+len;
1482 }
1483
1484 static byte *
1485 bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1486 {
1487 /*
1488 * 2 B Withdrawn Routes Length (zero)
1489 * --- IPv4 Withdrawn Routes NLRI (unused)
1490 * 2 B Total Path Attribute Length
1491 * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
1492 * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
1493 * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
1494 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
1495 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
1496 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
1497 * --- IPv4 Network Layer Reachability Information (unused)
1498 */
1499
1500 uint len = bgp_encode_nlri(s, buck, buf+11, end);
1501
1502 put_u16(buf+0, 0);
1503 put_u16(buf+2, 7+len);
1504
1505 /* Begin of MP_UNREACH_NLRI atribute */
1506 buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
1507 buf[5] = BA_MP_UNREACH_NLRI;
1508 put_u16(buf+6, 3+len);
1509 put_af3(buf+8, s->channel->afi);
1510
1511 return buf+11+len;
1512 }
1513
1514 static byte *
1515 bgp_create_update(struct bgp_channel *c, byte *buf)
1516 {
1517 struct bgp_proto *p = (void *) c->c.proto;
1518 struct bgp_bucket *buck;
1519 byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
1520 byte *res = NULL;
1521
1522 /* Initialize write state */
1523 struct bgp_write_state s = {
1524 .proto = p,
1525 .channel = c,
1526 .pool = bgp_linpool,
1527 .as4_session = p->as4_session,
1528 .add_path = c->add_path_tx,
1529 };
1530
1531 again:
1532
1533 /* Try unreachable bucket */
1534 if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
1535 {
1536 res = (c->afi == BGP_AF_IPV4) ?
1537 bgp_create_ip_unreach(&s, buck, buf, end):
1538 bgp_create_mp_unreach(&s, buck, buf, end);
1539
1540 goto done;
1541 }
1542
1543 /* Try reachable buckets */
1544 if (!EMPTY_LIST(c->bucket_queue))
1545 {
1546 buck = HEAD(c->bucket_queue);
1547
1548 /* Cleanup empty buckets */
1549 if (EMPTY_LIST(buck->prefixes))
1550 {
1551 bgp_free_bucket(c, buck);
1552 goto again;
1553 }
1554
1555 res = (c->afi == BGP_AF_IPV4) ?
1556 bgp_create_ip_reach(&s, buck, buf, end):
1557 bgp_create_mp_reach(&s, buck, buf, end);
1558
1559 if (EMPTY_LIST(buck->prefixes))
1560 bgp_free_bucket(c, buck);
1561 else
1562 bgp_defer_bucket(c, buck);
1563
1564 if (!res)
1565 goto again;
1566
1567 goto done;
1568 }
1569
1570 /* No more prefixes to send */
1571 return NULL;
1572
1573 done:
1574 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
1575 lp_flush(s.pool);
1576
1577 return res;
1578 }
1579
1580 static byte *
1581 bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
1582 {
1583 /* Empty update packet */
1584 put_u32(buf, 0);
1585
1586 return buf+4;
1587 }
1588
1589 static byte *
1590 bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
1591 {
1592 put_u16(buf+0, 0);
1593 put_u16(buf+2, 6); /* length 4--9 */
1594
1595 /* Empty MP_UNREACH_NLRI atribute */
1596 buf[4] = BAF_OPTIONAL;
1597 buf[5] = BA_MP_UNREACH_NLRI;
1598 buf[6] = 3; /* Length 7--9 */
1599 put_af3(buf+7, c->afi);
1600
1601 return buf+10;
1602 }
1603
1604 static byte *
1605 bgp_create_end_mark(struct bgp_channel *c, byte *buf)
1606 {
1607 struct bgp_proto *p = (void *) c->c.proto;
1608
1609 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
1610
1611 return (c->afi == BGP_AF_IPV4) ?
1612 bgp_create_ip_end_mark(c, buf):
1613 bgp_create_mp_end_mark(c, buf);
1614 }
1615
1616 static inline void
1617 bgp_rx_end_mark(struct bgp_proto *p, u32 afi)
1618 {
1619 struct bgp_channel *c = bgp_get_channel(p, afi);
1620
1621 BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
1622
1623 /* XXXX handle unknown AF in MP_*_NLRI */
1624 if (!c)
1625 return;
1626
1627 if (c->load_state == BFS_LOADING)
1628 c->load_state = BFS_NONE;
1629
1630 if (p->p.gr_recovery)
1631 channel_graceful_restart_unlock(&c->c);
1632
1633 if (c->gr_active)
1634 bgp_graceful_restart_done(c);
1635 }
1636
1637 static inline void
1638 bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
1639 {
1640 struct bgp_channel *c = bgp_get_channel(s->proto, afi);
1641 rta *a = NULL;
1642
1643 /* XXXX handle unknown AF in MP_*_NLRI */
1644 if (!c)
1645 return;
1646
1647 s->channel = c;
1648 s->add_path = c->add_path_rx;
1649
1650 s->last_id = 0;
1651 s->last_src = s->proto->p.main_source;
1652
1653 /*
1654 * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
1655 * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
1656 * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
1657 * decode_next_hop hooks) by restoring a->eattrs afterwards.
1658 */
1659
1660 if (ea)
1661 {
1662 a = alloca(sizeof(struct rta));
1663 memset(a, 0, sizeof(struct rta));
1664
1665 a->source = RTS_BGP;
1666 a->scope = SCOPE_UNIVERSE;
1667 a->cast = RTC_UNICAST;
1668 a->dest = RTD_UNREACHABLE;
1669 a->from = s->proto->cf->remote_ip;
1670 a->eattrs = ea;
1671
1672 c->desc->decode_next_hop(s, nh, nh_len, a);
1673
1674 /* Handle withdraw during next hop decoding */
1675 if (s->err_withdraw)
1676 a = NULL;
1677 }
1678
1679 c->desc->decode_nlri(s, nlri, len, a);
1680
1681 rta_free(s->cached_rta);
1682 s->cached_rta = NULL;
1683 }
1684
1685 static void
1686 bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
1687 {
1688 struct bgp_proto *p = conn->bgp;
1689 ea_list *ea = NULL;
1690
1691 BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
1692
1693 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1694 if (conn->state == BS_OPENCONFIRM)
1695 bgp_conn_enter_established_state(conn);
1696
1697 if (conn->state != BS_ESTABLISHED)
1698 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1699
1700 bgp_start_timer(conn->hold_timer, conn->hold_time);
1701
1702 /* Initialize parse state */
1703 struct bgp_parse_state s = {
1704 .proto = p,
1705 .pool = bgp_linpool,
1706 .as4_session = p->as4_session,
1707 };
1708
1709 /* Parse error handler */
1710 if (setjmp(s.err_jmpbuf))
1711 {
1712 bgp_error(conn, 3, s.err_subcode, NULL, 0);
1713 goto done;
1714 }
1715
1716 /* Check minimal length */
1717 if (len < 23)
1718 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1719
1720 /* Skip fixed header */
1721 uint pos = 19;
1722
1723 /*
1724 * UPDATE message format
1725 *
1726 * 2 B IPv4 Withdrawn Routes Length
1727 * var IPv4 Withdrawn Routes NLRI
1728 * 2 B Total Path Attribute Length
1729 * var Path Attributes
1730 * var IPv4 Reachable Routes NLRI
1731 */
1732
1733 s.ip_unreach_len = get_u16(pkt + pos);
1734 s.ip_unreach_nlri = pkt + pos + 2;
1735 pos += 2 + s.ip_unreach_len;
1736
1737 if (pos + 2 > len)
1738 bgp_parse_error(&s, 1);
1739
1740 s.attr_len = get_u16(pkt + pos);
1741 s.attrs = pkt + pos + 2;
1742 pos += 2 + s.attr_len;
1743
1744 if (pos > len)
1745 bgp_parse_error(&s, 1);
1746
1747 s.ip_reach_len = len - pos;
1748 s.ip_reach_nlri = pkt + pos;
1749
1750
1751 if (s.attr_len)
1752 ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
1753
1754 /* Check for End-of-RIB marker */
1755 if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
1756 { bgp_rx_end_mark(p, BGP_AF_IPV4); goto done; }
1757
1758 /* Check for MP End-of-RIB marker */
1759 if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
1760 !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af) /* XXXX See RFC 7606 5.2 */
1761 { bgp_rx_end_mark(p, s.mp_unreach_af); goto done; }
1762
1763 if (s.ip_unreach_len)
1764 bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
1765
1766 if (s.mp_unreach_len)
1767 bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
1768
1769 if (s.ip_reach_len)
1770 bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
1771 ea, s.ip_next_hop_data, s.ip_next_hop_len);
1772
1773 if (s.mp_reach_len)
1774 bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
1775 ea, s.mp_next_hop_data, s.mp_next_hop_len);
1776
1777 done:
1778 rta_free(s.cached_rta);
1779 lp_flush(s.pool);
1780 return;
1781 }
1782
1783
1784 /*
1785 * ROUTE-REFRESH
1786 */
1787
1788 static inline byte *
1789 bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
1790 {
1791 struct bgp_proto *p = (void *) c->c.proto;
1792
1793 BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
1794
1795 /* Original route refresh request, RFC 2918 */
1796 put_af4(buf, c->afi);
1797 buf[2] = BGP_RR_REQUEST;
1798
1799 return buf+4;
1800 }
1801
1802 static inline byte *
1803 bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
1804 {
1805 struct bgp_proto *p = (void *) c->c.proto;
1806
1807 BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
1808
1809 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
1810 put_af4(buf, c->afi);
1811 buf[2] = BGP_RR_BEGIN;
1812
1813 return buf+4;
1814 }
1815
1816 static inline byte *
1817 bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
1818 {
1819 struct bgp_proto *p = (void *) c->c.proto;
1820
1821 BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
1822
1823 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
1824 put_af4(buf, c->afi);
1825 buf[2] = BGP_RR_END;
1826
1827 return buf+4;
1828 }
1829
1830 static void
1831 bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
1832 {
1833 struct bgp_proto *p = conn->bgp;
1834
1835 if (conn->state != BS_ESTABLISHED)
1836 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1837
1838 if (!conn->local_caps->route_refresh)
1839 { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1840
1841 if (len < (BGP_HEADER_LENGTH + 4))
1842 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1843
1844 if (len > (BGP_HEADER_LENGTH + 4))
1845 { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
1846
1847 struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
1848 if (!c)
1849 {
1850 log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
1851 p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
1852 return;
1853 }
1854
1855 /* RFC 7313 redefined reserved field as RR message subtype */
1856 uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
1857
1858 switch (subtype)
1859 {
1860 case BGP_RR_REQUEST:
1861 BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1862 channel_request_feeding(&c->c);
1863 break;
1864
1865 case BGP_RR_BEGIN:
1866 BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
1867 bgp_refresh_begin(c);
1868 break;
1869
1870 case BGP_RR_END:
1871 BGP_TRACE(D_PACKETS, "Got END-OF-RR");
1872 bgp_refresh_end(c);
1873 break;
1874
1875 default:
1876 log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
1877 p->p.name, subtype);
1878 break;
1879 }
1880 }
1881
1882 static inline struct bgp_channel *
1883 bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
1884 {
1885 uint i = conn->last_channel;
1886
1887 /* Try the last channel, but at most several times */
1888 if ((conn->channels_to_send & (1 << i)) &&
1889 (conn->last_channel_count < 16))
1890 goto found;
1891
1892 /* Find channel with non-zero channels_to_send */
1893 do
1894 {
1895 i++;
1896 if (i >= p->channel_count)
1897 i = 0;
1898 }
1899 while (! (conn->channels_to_send & (1 << i)));
1900
1901 /* Use that channel */
1902 conn->last_channel = i;
1903 conn->last_channel_count = 0;
1904
1905 found:
1906 conn->last_channel_count++;
1907 return p->channel_map[i];
1908 }
1909
1910 static inline int
1911 bgp_send(struct bgp_conn *conn, uint type, uint len)
1912 {
1913 sock *sk = conn->sk;
1914 byte *buf = sk->tbuf;
1915
1916 memset(buf, 0xff, 16); /* Marker */
1917 put_u16(buf+16, len);
1918 buf[18] = type;
1919
1920 return sk_send(sk, len);
1921 }
1922
1923 /**
1924 * bgp_fire_tx - transmit packets
1925 * @conn: connection
1926 *
1927 * Whenever the transmit buffers of the underlying TCP connection
1928 * are free and we have any packets queued for sending, the socket functions
1929 * call bgp_fire_tx() which takes care of selecting the highest priority packet
1930 * queued (Notification > Keepalive > Open > Update), assembling its header
1931 * and body and sending it to the connection.
1932 */
1933 static int
1934 bgp_fire_tx(struct bgp_conn *conn)
1935 {
1936 struct bgp_proto *p = conn->bgp;
1937 struct bgp_channel *c;
1938 byte *buf, *pkt, *end;
1939 uint s;
1940
1941 if (!conn->sk)
1942 return 0;
1943
1944 buf = conn->sk->tbuf;
1945 pkt = buf + BGP_HEADER_LENGTH;
1946 s = conn->packets_to_send;
1947
1948 if (s & (1 << PKT_SCHEDULE_CLOSE))
1949 {
1950 /* We can finally close connection and enter idle state */
1951 bgp_conn_enter_idle_state(conn);
1952 return 0;
1953 }
1954 if (s & (1 << PKT_NOTIFICATION))
1955 {
1956 conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
1957 end = bgp_create_notification(conn, pkt);
1958 return bgp_send(conn, PKT_NOTIFICATION, end - buf);
1959 }
1960 else if (s & (1 << PKT_KEEPALIVE))
1961 {
1962 conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
1963 BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
1964 bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
1965 return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
1966 }
1967 else if (s & (1 << PKT_OPEN))
1968 {
1969 conn->packets_to_send &= ~(1 << PKT_OPEN);
1970 end = bgp_create_open(conn, pkt);
1971 return bgp_send(conn, PKT_OPEN, end - buf);
1972 }
1973 else while (conn->channels_to_send)
1974 {
1975 c = bgp_get_channel_to_send(p, conn);
1976 s = c->packets_to_send;
1977
1978 if (s & (1 << PKT_ROUTE_REFRESH))
1979 {
1980 c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
1981 end = bgp_create_route_refresh(c, pkt);
1982 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
1983 }
1984 else if (s & (1 << PKT_BEGIN_REFRESH))
1985 {
1986 /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
1987 c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
1988 end = bgp_create_begin_refresh(c, pkt);
1989 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
1990 }
1991 else if (s & (1 << PKT_UPDATE))
1992 {
1993 end = bgp_create_update(c, pkt);
1994 if (end)
1995 return bgp_send(conn, PKT_UPDATE, end - buf);
1996
1997 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
1998 c->packets_to_send = 0;
1999 conn->channels_to_send &= ~(1 << c->index);
2000
2001 if (c->feed_state == BFS_LOADED)
2002 {
2003 c->feed_state = BFS_NONE;
2004 end = bgp_create_end_mark(c, pkt);
2005 return bgp_send(conn, PKT_UPDATE, end - buf);
2006 }
2007
2008 else if (c->feed_state == BFS_REFRESHED)
2009 {
2010 c->feed_state = BFS_NONE;
2011 end = bgp_create_end_refresh(c, pkt);
2012 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
2013 }
2014 }
2015 else if (s)
2016 bug("Channel packets_to_send: %x", s);
2017
2018 c->packets_to_send = 0;
2019 conn->channels_to_send &= ~(1 << c->index);
2020 }
2021
2022 return 0;
2023 }
2024
2025 /**
2026 * bgp_schedule_packet - schedule a packet for transmission
2027 * @conn: connection
2028 * @c: channel
2029 * @type: packet type
2030 *
2031 * Schedule a packet of type @type to be sent as soon as possible.
2032 */
2033 void
2034 bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
2035 {
2036 ASSERT(conn->sk);
2037
2038 DBG("BGP: Scheduling packet type %d\n", type);
2039
2040 if (c)
2041 {
2042 if (! conn->channels_to_send)
2043 {
2044 conn->last_channel = c->index;
2045 conn->last_channel_count = 0;
2046 }
2047
2048 c->packets_to_send |= 1 << type;
2049 conn->channels_to_send |= 1 << c->index;
2050 }
2051 else
2052 conn->packets_to_send |= 1 << type;
2053
2054 if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
2055 ev_schedule(conn->tx_ev);
2056 }
2057
2058 void
2059 bgp_kick_tx(void *vconn)
2060 {
2061 struct bgp_conn *conn = vconn;
2062
2063 DBG("BGP: kicking TX\n");
2064 while (bgp_fire_tx(conn) > 0)
2065 ;
2066 }
2067
2068 void
2069 bgp_tx(sock *sk)
2070 {
2071 struct bgp_conn *conn = sk->data;
2072
2073 DBG("BGP: TX hook\n");
2074 while (bgp_fire_tx(conn) > 0)
2075 ;
2076 }
2077
2078
2079 static struct {
2080 byte major, minor;
2081 byte *msg;
2082 } bgp_msg_table[] = {
2083 { 1, 0, "Invalid message header" },
2084 { 1, 1, "Connection not synchronized" },
2085 { 1, 2, "Bad message length" },
2086 { 1, 3, "Bad message type" },
2087 { 2, 0, "Invalid OPEN message" },
2088 { 2, 1, "Unsupported version number" },
2089 { 2, 2, "Bad peer AS" },
2090 { 2, 3, "Bad BGP identifier" },
2091 { 2, 4, "Unsupported optional parameter" },
2092 { 2, 5, "Authentication failure" },
2093 { 2, 6, "Unacceptable hold time" },
2094 { 2, 7, "Required capability missing" }, /* [RFC5492] */
2095 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
2096 { 3, 0, "Invalid UPDATE message" },
2097 { 3, 1, "Malformed attribute list" },
2098 { 3, 2, "Unrecognized well-known attribute" },
2099 { 3, 3, "Missing mandatory attribute" },
2100 { 3, 4, "Invalid attribute flags" },
2101 { 3, 5, "Invalid attribute length" },
2102 { 3, 6, "Invalid ORIGIN attribute" },
2103 { 3, 7, "AS routing loop" }, /* Deprecated */
2104 { 3, 8, "Invalid NEXT_HOP attribute" },
2105 { 3, 9, "Optional attribute error" },
2106 { 3, 10, "Invalid network field" },
2107 { 3, 11, "Malformed AS_PATH" },
2108 { 4, 0, "Hold timer expired" },
2109 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
2110 { 5, 1, "Unexpected message in OpenSent state" },
2111 { 5, 2, "Unexpected message in OpenConfirm state" },
2112 { 5, 3, "Unexpected message in Established state" },
2113 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
2114 { 6, 1, "Maximum number of prefixes reached" },
2115 { 6, 2, "Administrative shutdown" },
2116 { 6, 3, "Peer de-configured" },
2117 { 6, 4, "Administrative reset" },
2118 { 6, 5, "Connection rejected" },
2119 { 6, 6, "Other configuration change" },
2120 { 6, 7, "Connection collision resolution" },
2121 { 6, 8, "Out of Resources" },
2122 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
2123 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
2124 };
2125
2126 /**
2127 * bgp_error_dsc - return BGP error description
2128 * @code: BGP error code
2129 * @subcode: BGP error subcode
2130 *
2131 * bgp_error_dsc() returns error description for BGP errors
2132 * which might be static string or given temporary buffer.
2133 */
2134 const char *
2135 bgp_error_dsc(uint code, uint subcode)
2136 {
2137 static char buff[32];
2138 uint i;
2139
2140 for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
2141 if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
2142 return bgp_msg_table[i].msg;
2143
2144 bsprintf(buff, "Unknown error %u.%u", code, subcode);
2145 return buff;
2146 }
2147
2148 void
2149 bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
2150 {
2151 const byte *name;
2152 byte *t, argbuf[36];
2153 uint i;
2154
2155 /* Don't report Cease messages generated by myself */
2156 if (code == 6 && class == BE_BGP_TX)
2157 return;
2158
2159 name = bgp_error_dsc(code, subcode);
2160 t = argbuf;
2161 if (len)
2162 {
2163 *t++ = ':';
2164 *t++ = ' ';
2165
2166 if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
2167 {
2168 /* Bad peer AS - we would like to print the AS */
2169 t += bsprintf(t, "%u", (len == 2) ? get_u16(data) : get_u32(data));
2170 goto done;
2171 }
2172 if (len > 16)
2173 len = 16;
2174 for (i=0; i<len; i++)
2175 t += bsprintf(t, "%02x", data[i]);
2176 }
2177 done:
2178 *t = 0;
2179 log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
2180 }
2181
2182 static void
2183 bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
2184 {
2185 struct bgp_proto *p = conn->bgp;
2186
2187 if (len < 21)
2188 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
2189
2190 uint code = pkt[19];
2191 uint subcode = pkt[20];
2192 int err = (code != 6);
2193
2194 bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
2195 bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
2196
2197 bgp_conn_enter_close_state(conn);
2198 bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
2199
2200 if (err)
2201 {
2202 bgp_update_startup_delay(p);
2203 bgp_stop(p, 0);
2204 }
2205 }
2206
2207 static void
2208 bgp_rx_keepalive(struct bgp_conn *conn)
2209 {
2210 struct bgp_proto *p = conn->bgp;
2211
2212 BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
2213 bgp_start_timer(conn->hold_timer, conn->hold_time);
2214
2215 if (conn->state == BS_OPENCONFIRM)
2216 { bgp_conn_enter_established_state(conn); return; }
2217
2218 if (conn->state != BS_ESTABLISHED)
2219 bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
2220 }
2221
2222
2223 /**
2224 * bgp_rx_packet - handle a received packet
2225 * @conn: BGP connection
2226 * @pkt: start of the packet
2227 * @len: packet size
2228 *
2229 * bgp_rx_packet() takes a newly received packet and calls the corresponding
2230 * packet handler according to the packet type.
2231 */
2232 static void
2233 bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
2234 {
2235 byte type = pkt[18];
2236
2237 DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
2238
2239 if (conn->bgp->p.mrtdump & MD_MESSAGES)
2240 mrt_dump_bgp_packet(conn, pkt, len);
2241
2242 switch (type)
2243 {
2244 case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
2245 case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
2246 case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
2247 case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
2248 case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
2249 default: bgp_error(conn, 1, 3, pkt+18, 1);
2250 }
2251 }
2252
2253 /**
2254 * bgp_rx - handle received data
2255 * @sk: socket
2256 * @size: amount of data received
2257 *
2258 * bgp_rx() is called by the socket layer whenever new data arrive from
2259 * the underlying TCP connection. It assembles the data fragments to packets,
2260 * checks their headers and framing and passes complete packets to
2261 * bgp_rx_packet().
2262 */
2263 int
2264 bgp_rx(sock *sk, uint size)
2265 {
2266 struct bgp_conn *conn = sk->data;
2267 byte *pkt_start = sk->rbuf;
2268 byte *end = pkt_start + size;
2269 uint i, len;
2270
2271 DBG("BGP: RX hook: Got %d bytes\n", size);
2272 while (end >= pkt_start + BGP_HEADER_LENGTH)
2273 {
2274 if ((conn->state == BS_CLOSE) || (conn->sk != sk))
2275 return 0;
2276 for(i=0; i<16; i++)
2277 if (pkt_start[i] != 0xff)
2278 {
2279 bgp_error(conn, 1, 1, NULL, 0);
2280 break;
2281 }
2282 len = get_u16(pkt_start+16);
2283 if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
2284 {
2285 bgp_error(conn, 1, 2, pkt_start+16, 2);
2286 break;
2287 }
2288 if (end < pkt_start + len)
2289 break;
2290 bgp_rx_packet(conn, pkt_start, len);
2291 pkt_start += len;
2292 }
2293 if (pkt_start != sk->rbuf)
2294 {
2295 memmove(sk->rbuf, pkt_start, end - pkt_start);
2296 sk->rpos = sk->rbuf + (end - pkt_start);
2297 }
2298 return 0;
2299 }