]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
The MRT protocol
[thirdparty/bird.git] / proto / bgp / packets.c
1 /*
2 * BIRD -- BGP Packet Processing
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 #undef LOCAL_DEBUG
10
11 #include "nest/bird.h"
12 #include "nest/iface.h"
13 #include "nest/protocol.h"
14 #include "nest/route.h"
15 #include "nest/attrs.h"
16 #include "proto/mrt/mrt.h"
17 #include "conf/conf.h"
18 #include "lib/unaligned.h"
19 #include "lib/socket.h"
20
21 #include "nest/cli.h"
22
23 #include "bgp.h"
24
25
26 #define BGP_RR_REQUEST 0
27 #define BGP_RR_BEGIN 1
28 #define BGP_RR_END 2
29
30
31 static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
32 static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
33
34 /* Table for state -> RFC 6608 FSM error subcodes */
35 static byte fsm_err_subcode[BS_MAX] = {
36 [BS_OPENSENT] = 1,
37 [BS_OPENCONFIRM] = 2,
38 [BS_ESTABLISHED] = 3
39 };
40
41 static void
42 init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
43 {
44 struct bgp_proto *p = conn->bgp;
45 int p_ok = conn->state >= BS_OPENCONFIRM;
46
47 memset(d, 0, sizeof(struct mrt_bgp_data));
48 d->peer_as = p->remote_as;
49 d->local_as = p->local_as;
50 d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
51 d->af = BGP_AF;
52 d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
53 d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
54 d->as4 = p_ok ? p->as4_session : 0;
55 d->add_path = p_ok ? p->add_path_rx : 0;
56 }
57
58 static void
59 bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len)
60 {
61 struct mrt_bgp_data d;
62 init_mrt_bgp_data(conn, &d);
63
64 d.message = pkt;
65 d.msg_len = len;
66
67 mrt_dump_bgp_message(&d);
68 }
69
70 void
71 bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new)
72 {
73 struct mrt_bgp_data d;
74 init_mrt_bgp_data(conn, &d);
75
76 d.old_state = old;
77 d.new_state = new;
78
79 mrt_dump_bgp_state_change(&d);
80 }
81
82 static byte *
83 bgp_create_notification(struct bgp_conn *conn, byte *buf)
84 {
85 struct bgp_proto *p = conn->bgp;
86
87 BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
88 buf[0] = conn->notify_code;
89 buf[1] = conn->notify_subcode;
90 memcpy(buf+2, conn->notify_data, conn->notify_size);
91 return buf + 2 + conn->notify_size;
92 }
93
94 #ifdef IPV6
95 static byte *
96 bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
97 {
98 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
99 *buf++ = 4; /* Capability data length */
100 *buf++ = 0; /* We support AF IPv6 */
101 *buf++ = BGP_AF_IPV6;
102 *buf++ = 0; /* RFU */
103 *buf++ = 1; /* and SAFI 1 */
104 return buf;
105 }
106
107 #else
108
109 static byte *
110 bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
111 {
112 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
113 *buf++ = 4; /* Capability data length */
114 *buf++ = 0; /* We support AF IPv4 */
115 *buf++ = BGP_AF_IPV4;
116 *buf++ = 0; /* RFU */
117 *buf++ = 1; /* and SAFI 1 */
118 return buf;
119 }
120 #endif
121
122 static byte *
123 bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
124 {
125 *buf++ = 2; /* Capability 2: Support for route refresh */
126 *buf++ = 0; /* Capability data length */
127 return buf;
128 }
129
130 static byte *
131 bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
132 {
133 *buf++ = 6; /* Capability 6: Support for extended messages */
134 *buf++ = 0; /* Capability data length */
135 return buf;
136 }
137
138 static byte *
139 bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
140 {
141 *buf++ = 64; /* Capability 64: Support for graceful restart */
142 *buf++ = 6; /* Capability data length */
143
144 put_u16(buf, p->cf->gr_time);
145 if (p->p.gr_recovery)
146 buf[0] |= BGP_GRF_RESTART;
147 buf += 2;
148
149 *buf++ = 0; /* Appropriate AF */
150 *buf++ = BGP_AF;
151 *buf++ = 1; /* and SAFI 1 */
152 *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
153
154 return buf;
155 }
156
157 static byte *
158 bgp_put_cap_gr2(struct bgp_proto *p UNUSED, byte *buf)
159 {
160 *buf++ = 64; /* Capability 64: Support for graceful restart */
161 *buf++ = 2; /* Capability data length */
162 put_u16(buf, 0);
163 return buf + 2;
164 }
165
166 static byte *
167 bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
168 {
169 *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
170 *buf++ = 4; /* Capability data length */
171 put_u32(buf, p->local_as);
172 return buf + 4;
173 }
174
175 static byte *
176 bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
177 {
178 *buf++ = 69; /* Capability 69: Support for ADD-PATH */
179 *buf++ = 4; /* Capability data length */
180
181 *buf++ = 0; /* Appropriate AF */
182 *buf++ = BGP_AF;
183 *buf++ = 1; /* SAFI 1 */
184
185 *buf++ = p->cf->add_path;
186
187 return buf;
188 }
189
190 static byte *
191 bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
192 {
193 *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
194 *buf++ = 0; /* Capability data length */
195 return buf;
196 }
197
198 static byte *
199 bgp_put_cap_llgr1(struct bgp_proto *p, byte *buf)
200 {
201 *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */
202 *buf++ = 7; /* Capability data length */
203
204 *buf++ = 0; /* Appropriate AF */
205 *buf++ = BGP_AF;
206 *buf++ = 1; /* and SAFI 1 */
207
208 /* Next is 8bit flags and 24bit time */
209 put_u32(buf, p->cf->llgr_time);
210 buf[0] = p->p.gr_recovery ? BGP_LLGRF_FORWARDING : 0;
211 buf += 4;
212
213 return buf;
214 }
215
216 static byte *
217 bgp_put_cap_llgr2(struct bgp_proto *p UNUSED, byte *buf)
218 {
219 *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */
220 *buf++ = 0; /* Capability data length */
221 return buf;
222 }
223
224
225 static byte *
226 bgp_create_open(struct bgp_conn *conn, byte *buf)
227 {
228 struct bgp_proto *p = conn->bgp;
229 byte *cap;
230 int cap_len;
231
232 BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
233 BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
234 buf[0] = BGP_VERSION;
235 put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
236 put_u16(buf+3, p->cf->hold_time);
237 put_u32(buf+5, p->local_id);
238
239 if (conn->start_state == BSS_CONNECT_NOCAP)
240 {
241 BGP_TRACE(D_PACKETS, "Skipping capabilities");
242 buf[9] = 0;
243 return buf + 10;
244 }
245
246 /* Skipped 3 B for length field and Capabilities parameter header */
247 cap = buf + 12;
248
249 #ifndef IPV6
250 if (p->cf->advertise_ipv4)
251 cap = bgp_put_cap_ipv4(p, cap);
252 #endif
253
254 #ifdef IPV6
255 cap = bgp_put_cap_ipv6(p, cap);
256 #endif
257
258 if (p->cf->enable_refresh)
259 cap = bgp_put_cap_rr(p, cap);
260
261 if (p->cf->gr_mode == BGP_GR_ABLE)
262 cap = bgp_put_cap_gr1(p, cap);
263 else if (p->cf->gr_mode == BGP_GR_AWARE)
264 cap = bgp_put_cap_gr2(p, cap);
265
266 if (p->cf->enable_as4)
267 cap = bgp_put_cap_as4(p, cap);
268
269 if (p->cf->add_path)
270 cap = bgp_put_cap_add_path(p, cap);
271
272 if (p->cf->enable_refresh)
273 cap = bgp_put_cap_err(p, cap);
274
275 if (p->cf->enable_extended_messages)
276 cap = bgp_put_cap_ext_msg(p, cap);
277
278 if (p->cf->llgr_mode == BGP_LLGR_ABLE)
279 cap = bgp_put_cap_llgr1(p, cap);
280 else if (p->cf->llgr_mode == BGP_LLGR_AWARE)
281 cap = bgp_put_cap_llgr2(p, cap);
282
283 cap_len = cap - buf - 12;
284 if (cap_len > 0)
285 {
286 buf[9] = cap_len + 2; /* Optional params len */
287 buf[10] = 2; /* Option: Capability list */
288 buf[11] = cap_len; /* Option length */
289 return cap;
290 }
291 else
292 {
293 buf[9] = 0; /* No optional parameters */
294 return buf + 10;
295 }
296 }
297
298 static uint
299 bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains)
300 {
301 byte *start = w;
302 ip_addr a;
303 int bytes;
304
305 while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
306 {
307 struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
308 DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
309
310 if (p->add_path_tx)
311 {
312 put_u32(w, px->path_id);
313 w += 4;
314 remains -= 4;
315 }
316
317 *w++ = px->n.pxlen;
318 bytes = (px->n.pxlen + 7) / 8;
319 a = px->n.prefix;
320 ipa_hton(a);
321 memcpy(w, &a, bytes);
322 w += bytes;
323 remains -= bytes + 1;
324 rem_node(&px->bucket_node);
325 bgp_free_prefix(p, px);
326 // fib_delete(&p->prefix_fib, px);
327 }
328 return w - start;
329 }
330
331 static void
332 bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
333 {
334 while (!EMPTY_LIST(buck->prefixes))
335 {
336 struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
337 log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
338 rem_node(&px->bucket_node);
339 bgp_free_prefix(p, px);
340 // fib_delete(&p->prefix_fib, px);
341 }
342 }
343
344 #ifndef IPV6 /* IPv4 version */
345
346 static byte *
347 bgp_create_update(struct bgp_conn *conn, byte *buf)
348 {
349 struct bgp_proto *p = conn->bgp;
350 struct bgp_bucket *buck;
351 int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
352 byte *w;
353 int wd_size = 0;
354 int r_size = 0;
355 int a_size = 0;
356
357 w = buf+2;
358 if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
359 {
360 DBG("Withdrawn routes:\n");
361 wd_size = bgp_encode_prefixes(p, w, buck, remains);
362 w += wd_size;
363 remains -= wd_size;
364 }
365 put_u16(buf, wd_size);
366
367 if (!wd_size)
368 {
369 while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
370 {
371 if (EMPTY_LIST(buck->prefixes))
372 {
373 DBG("Deleting empty bucket %p\n", buck);
374 rem_node(&buck->send_node);
375 bgp_free_bucket(p, buck);
376 continue;
377 }
378
379 DBG("Processing bucket %p\n", buck);
380 a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024);
381
382 if (a_size < 0)
383 {
384 log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
385 bgp_flush_prefixes(p, buck);
386 rem_node(&buck->send_node);
387 bgp_free_bucket(p, buck);
388 continue;
389 }
390
391 put_u16(w, a_size);
392 w += a_size + 2;
393 r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
394 w += r_size;
395 break;
396 }
397 }
398 if (!a_size) /* Attributes not already encoded */
399 {
400 put_u16(w, 0);
401 w += 2;
402 }
403 if (wd_size || r_size)
404 {
405 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
406 return w;
407 }
408 else
409 return NULL;
410 }
411
412 static byte *
413 bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
414 {
415 struct bgp_proto *p = conn->bgp;
416 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
417
418 put_u32(buf, 0);
419 return buf+4;
420 }
421
422 #else /* IPv6 version */
423
424 static inline int
425 same_iface(struct bgp_proto *p, ip_addr *ip)
426 {
427 neighbor *n = neigh_find(&p->p, ip, 0);
428 return n && p->neigh && n->iface == p->neigh->iface;
429 }
430
431 static byte *
432 bgp_create_update(struct bgp_conn *conn, byte *buf)
433 {
434 struct bgp_proto *p = conn->bgp;
435 struct bgp_bucket *buck;
436 int size, second, rem_stored;
437 int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
438 byte *w, *w_stored, *tmp, *tstart;
439 ip_addr *ipp, ip, ip_ll;
440 ea_list *ea;
441 eattr *nh;
442
443 put_u16(buf, 0);
444 w = buf+4;
445
446 if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
447 {
448 DBG("Withdrawn routes:\n");
449 tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
450 *tmp++ = 0;
451 *tmp++ = BGP_AF_IPV6;
452 *tmp++ = 1;
453 ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
454 size = bgp_encode_attrs(p, w, ea, remains);
455 ASSERT(size >= 0);
456 w += size;
457 remains -= size;
458 }
459 else
460 {
461 while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
462 {
463 if (EMPTY_LIST(buck->prefixes))
464 {
465 DBG("Deleting empty bucket %p\n", buck);
466 rem_node(&buck->send_node);
467 bgp_free_bucket(p, buck);
468 continue;
469 }
470
471 DBG("Processing bucket %p\n", buck);
472 rem_stored = remains;
473 w_stored = w;
474
475 size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024);
476 if (size < 0)
477 {
478 log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
479 bgp_flush_prefixes(p, buck);
480 rem_node(&buck->send_node);
481 bgp_free_bucket(p, buck);
482 continue;
483 }
484 w += size;
485 remains -= size;
486
487 /* We have two addresses here in NEXT_HOP eattr. Really.
488 Unless NEXT_HOP was modified by filter */
489 nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
490 ASSERT(nh);
491 second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
492 ipp = (ip_addr *) nh->u.ptr->data;
493 ip = ipp[0];
494 ip_ll = IPA_NONE;
495
496 if (ipa_equal(ip, p->source_addr))
497 ip_ll = p->local_link;
498 else
499 {
500 /* If we send a route with 'third party' next hop destinated
501 * in the same interface, we should also send a link local
502 * next hop address. We use the received one (stored in the
503 * other part of BA_NEXT_HOP eattr). If we didn't received
504 * it (for example it is a static route), we can't use
505 * 'third party' next hop and we have to use local IP address
506 * as next hop. Sending original next hop address without
507 * link local address seems to be a natural way to solve that
508 * problem, but it is contrary to RFC 2545 and Quagga does not
509 * accept such routes.
510 *
511 * There are two cases, either we have global IP, or
512 * IPA_NONE if the neighbor is link-local. For IPA_NONE,
513 * we suppose it is on the same iface, see bgp_update_attrs().
514 */
515
516 if (ipa_zero(ip) || same_iface(p, &ip))
517 {
518 if (second && ipa_nonzero(ipp[1]))
519 ip_ll = ipp[1];
520 else
521 {
522 switch (p->cf->missing_lladdr)
523 {
524 case MLL_SELF:
525 ip = p->source_addr;
526 ip_ll = p->local_link;
527 break;
528 case MLL_DROP:
529 log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
530 w = w_stored;
531 remains = rem_stored;
532 bgp_flush_prefixes(p, buck);
533 rem_node(&buck->send_node);
534 bgp_free_bucket(p, buck);
535 continue;
536 case MLL_IGNORE:
537 break;
538 }
539 }
540 }
541 }
542
543 tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
544 *tmp++ = 0;
545 *tmp++ = BGP_AF_IPV6;
546 *tmp++ = 1;
547
548 if (ipa_is_link_local(ip))
549 ip = IPA_NONE;
550
551 if (ipa_nonzero(ip_ll))
552 {
553 *tmp++ = 32;
554 ipa_hton(ip);
555 memcpy(tmp, &ip, 16);
556 ipa_hton(ip_ll);
557 memcpy(tmp+16, &ip_ll, 16);
558 tmp += 32;
559 }
560 else
561 {
562 *tmp++ = 16;
563 ipa_hton(ip);
564 memcpy(tmp, &ip, 16);
565 tmp += 16;
566 }
567
568 *tmp++ = 0; /* No SNPA information */
569 tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
570 ea->attrs[0].u.ptr->length = tmp - tstart;
571 size = bgp_encode_attrs(p, w, ea, remains);
572 ASSERT(size >= 0);
573 w += size;
574 break;
575 }
576 }
577
578 size = w - (buf+4);
579 put_u16(buf+2, size);
580 lp_flush(bgp_linpool);
581 if (size)
582 {
583 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
584 return w;
585 }
586 else
587 return NULL;
588 }
589
590 static byte *
591 bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
592 {
593 struct bgp_proto *p = conn->bgp;
594 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
595
596 put_u16(buf+0, 0);
597 put_u16(buf+2, 6); /* length 4-9 */
598 buf += 4;
599
600 /* Empty MP_UNREACH_NLRI atribute */
601 *buf++ = BAF_OPTIONAL;
602 *buf++ = BA_MP_UNREACH_NLRI;
603 *buf++ = 3; /* Length 7-9 */
604 *buf++ = 0; /* AFI */
605 *buf++ = BGP_AF_IPV6;
606 *buf++ = 1; /* SAFI */
607 return buf;
608 }
609
610 #endif
611
612 static inline byte *
613 bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
614 {
615 struct bgp_proto *p = conn->bgp;
616 BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
617
618 /* Original original route refresh request, RFC 2918 */
619 *buf++ = 0;
620 *buf++ = BGP_AF;
621 *buf++ = BGP_RR_REQUEST;
622 *buf++ = 1; /* SAFI */
623 return buf;
624 }
625
626 static inline byte *
627 bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
628 {
629 struct bgp_proto *p = conn->bgp;
630 BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
631
632 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
633 *buf++ = 0;
634 *buf++ = BGP_AF;
635 *buf++ = BGP_RR_BEGIN;
636 *buf++ = 1; /* SAFI */
637 return buf;
638 }
639
640 static inline byte *
641 bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
642 {
643 struct bgp_proto *p = conn->bgp;
644 BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
645
646 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
647 *buf++ = 0;
648 *buf++ = BGP_AF;
649 *buf++ = BGP_RR_END;
650 *buf++ = 1; /* SAFI */
651 return buf;
652 }
653
654
655 static void
656 bgp_create_header(byte *buf, uint len, uint type)
657 {
658 memset(buf, 0xff, 16); /* Marker */
659 put_u16(buf+16, len);
660 buf[18] = type;
661 }
662
663 /**
664 * bgp_fire_tx - transmit packets
665 * @conn: connection
666 *
667 * Whenever the transmit buffers of the underlying TCP connection
668 * are free and we have any packets queued for sending, the socket functions
669 * call bgp_fire_tx() which takes care of selecting the highest priority packet
670 * queued (Notification > Keepalive > Open > Update), assembling its header
671 * and body and sending it to the connection.
672 */
673 static int
674 bgp_fire_tx(struct bgp_conn *conn)
675 {
676 struct bgp_proto *p = conn->bgp;
677 uint s = conn->packets_to_send;
678 sock *sk = conn->sk;
679 byte *buf, *pkt, *end;
680 int type;
681
682 if (!sk)
683 {
684 conn->packets_to_send = 0;
685 return 0;
686 }
687 buf = sk->tbuf;
688 pkt = buf + BGP_HEADER_LENGTH;
689
690 if (s & (1 << PKT_SCHEDULE_CLOSE))
691 {
692 /* We can finally close connection and enter idle state */
693 bgp_conn_enter_idle_state(conn);
694 return 0;
695 }
696 if (s & (1 << PKT_NOTIFICATION))
697 {
698 s = 1 << PKT_SCHEDULE_CLOSE;
699 type = PKT_NOTIFICATION;
700 end = bgp_create_notification(conn, pkt);
701 }
702 else if (s & (1 << PKT_KEEPALIVE))
703 {
704 s &= ~(1 << PKT_KEEPALIVE);
705 type = PKT_KEEPALIVE;
706 end = pkt; /* Keepalives carry no data */
707 BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
708 bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
709 }
710 else if (s & (1 << PKT_OPEN))
711 {
712 s &= ~(1 << PKT_OPEN);
713 type = PKT_OPEN;
714 end = bgp_create_open(conn, pkt);
715 }
716 else if (s & (1 << PKT_ROUTE_REFRESH))
717 {
718 s &= ~(1 << PKT_ROUTE_REFRESH);
719 type = PKT_ROUTE_REFRESH;
720 end = bgp_create_route_refresh(conn, pkt);
721 }
722 else if (s & (1 << PKT_BEGIN_REFRESH))
723 {
724 s &= ~(1 << PKT_BEGIN_REFRESH);
725 type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */
726 end = bgp_create_begin_refresh(conn, pkt);
727 }
728 else if (s & (1 << PKT_UPDATE))
729 {
730 type = PKT_UPDATE;
731 end = bgp_create_update(conn, pkt);
732
733 if (!end)
734 {
735 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
736
737 conn->packets_to_send = 0;
738
739 if (p->feed_state == BFS_LOADED)
740 {
741 type = PKT_UPDATE;
742 end = bgp_create_end_mark(conn, pkt);
743 }
744
745 else if (p->feed_state == BFS_REFRESHED)
746 {
747 type = PKT_ROUTE_REFRESH;
748 end = bgp_create_end_refresh(conn, pkt);
749 }
750
751 else /* Really nothing to send */
752 return 0;
753
754 p->feed_state = BFS_NONE;
755 }
756 }
757 else
758 return 0;
759
760 conn->packets_to_send = s;
761 bgp_create_header(buf, end - buf, type);
762 return sk_send(sk, end - buf);
763 }
764
765 /**
766 * bgp_schedule_packet - schedule a packet for transmission
767 * @conn: connection
768 * @type: packet type
769 *
770 * Schedule a packet of type @type to be sent as soon as possible.
771 */
772 void
773 bgp_schedule_packet(struct bgp_conn *conn, int type)
774 {
775 DBG("BGP: Scheduling packet type %d\n", type);
776 conn->packets_to_send |= 1 << type;
777 if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev))
778 ev_schedule(conn->tx_ev);
779 }
780
781 void
782 bgp_kick_tx(void *vconn)
783 {
784 struct bgp_conn *conn = vconn;
785
786 DBG("BGP: kicking TX\n");
787 while (bgp_fire_tx(conn) > 0)
788 ;
789 }
790
791 void
792 bgp_tx(sock *sk)
793 {
794 struct bgp_conn *conn = sk->data;
795
796 DBG("BGP: TX hook\n");
797 while (bgp_fire_tx(conn) > 0)
798 ;
799 }
800
801 /* Capatibility negotiation as per RFC 2842 */
802
803 void
804 bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
805 {
806 // struct bgp_proto *p = conn->bgp;
807 int i, cl;
808
809 while (len > 0)
810 {
811 if (len < 2 || len < 2 + opt[1])
812 goto err;
813
814 cl = opt[1];
815
816 switch (opt[0])
817 {
818 case 2: /* Route refresh capability, RFC 2918 */
819 if (cl != 0)
820 goto err;
821 conn->peer_refresh_support = 1;
822 break;
823
824 case 6: /* Extended message length capability, draft */
825 if (cl != 0)
826 goto err;
827 conn->peer_ext_messages_support = 1;
828 break;
829
830 case 64: /* Graceful restart capability, RFC 4724 */
831 if (cl % 4 != 2)
832 goto err;
833 conn->peer_gr_aware = 1;
834 conn->peer_gr_able = 0;
835 conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
836 conn->peer_gr_flags = opt[2] & 0xf0;
837 conn->peer_gr_aflags = 0;
838 for (i = 2; i < cl; i += 4)
839 if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
840 {
841 conn->peer_gr_able = 1;
842 conn->peer_gr_aflags = opt[2+i+3];
843 }
844 break;
845
846 case 65: /* AS4 capability, RFC 4893 */
847 if (cl != 4)
848 goto err;
849 conn->peer_as4_support = 1;
850 if (conn->bgp->cf->enable_as4)
851 conn->advertised_as = get_u32(opt + 2);
852 break;
853
854 case 69: /* ADD-PATH capability, RFC 7911 */
855 if (cl % 4)
856 goto err;
857 for (i = 0; i < cl; i += 4)
858 if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
859 conn->peer_add_path = opt[2+i+3];
860 if (conn->peer_add_path > ADD_PATH_FULL)
861 goto err;
862 break;
863
864 case 70: /* Enhanced route refresh capability, RFC 7313 */
865 if (cl != 0)
866 goto err;
867 conn->peer_enhanced_refresh_support = 1;
868 break;
869
870 case 71: /* Long-lived graceful restart capability, RFC draft */
871 if (cl % 7)
872 goto err;
873 conn->peer_llgr_aware = 1;
874 conn->peer_llgr_able = 0;
875 conn->peer_llgr_time = 0;
876 conn->peer_llgr_aflags = 0;
877 for (i = 0; i < cl; i += 7)
878 if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
879 {
880 conn->peer_llgr_able = 1;
881 conn->peer_llgr_time = get_u32(opt + 2+i+3) & 0xffffff;
882 conn->peer_llgr_aflags = opt[2+i+3];
883 }
884 break;
885
886 /* We can safely ignore all other capabilities */
887 }
888 len -= 2 + cl;
889 opt += 2 + cl;
890 }
891
892 /* The LLGR capability must be advertised together with the GR capability,
893 otherwise it must be disregarded */
894 if (!conn->peer_gr_aware && conn->peer_llgr_aware)
895 {
896 conn->peer_llgr_aware = 0;
897 conn->peer_llgr_able = 0;
898 conn->peer_llgr_time = 0;
899 conn->peer_llgr_aflags = 0;
900 }
901
902 return;
903
904 err:
905 bgp_error(conn, 2, 0, NULL, 0);
906 return;
907 }
908
909 static int
910 bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
911 {
912 struct bgp_proto *p = conn->bgp;
913 int ol;
914
915 while (len > 0)
916 {
917 if (len < 2 || len < 2 + opt[1])
918 { bgp_error(conn, 2, 0, NULL, 0); return 0; }
919 #ifdef LOCAL_DEBUG
920 {
921 int i;
922 DBG("\tOption %02x:", opt[0]);
923 for(i=0; i<opt[1]; i++)
924 DBG(" %02x", opt[2+i]);
925 DBG("\n");
926 }
927 #endif
928
929 ol = opt[1];
930 switch (opt[0])
931 {
932 case 2:
933 if (conn->start_state == BSS_CONNECT_NOCAP)
934 BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
935 else
936 bgp_parse_capabilities(conn, opt + 2, ol);
937 break;
938
939 default:
940 /*
941 * BGP specs don't tell us to send which option
942 * we didn't recognize, but it's common practice
943 * to do so. Also, capability negotiation with
944 * Cisco routers doesn't work without that.
945 */
946 bgp_error(conn, 2, 4, opt, ol);
947 return 0;
948 }
949 len -= 2 + ol;
950 opt += 2 + ol;
951 }
952 return 0;
953 }
954
955 static void
956 bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
957 {
958 struct bgp_conn *other;
959 struct bgp_proto *p = conn->bgp;
960 unsigned hold;
961 u16 base_as;
962 u32 id;
963
964 /* Check state */
965 if (conn->state != BS_OPENSENT)
966 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
967
968 /* Check message contents */
969 if (len < 29 || len != 29U + pkt[28])
970 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
971 if (pkt[19] != BGP_VERSION)
972 { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
973 conn->advertised_as = base_as = get_u16(pkt+20);
974 hold = get_u16(pkt+22);
975 id = get_u32(pkt+24);
976 BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
977
978 if (bgp_parse_options(conn, pkt+29, pkt[28]))
979 return;
980
981 if (hold > 0 && hold < 3)
982 { bgp_error(conn, 2, 6, pkt+22, 2); return; }
983
984 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
985 if (!id || (p->is_internal && id == p->local_id))
986 { bgp_error(conn, 2, 3, pkt+24, -4); return; }
987
988 if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
989 log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
990
991 if (conn->advertised_as != p->remote_as)
992 {
993 if (conn->peer_as4_support)
994 {
995 u32 val = htonl(conn->advertised_as);
996 bgp_error(conn, 2, 2, (byte *) &val, 4);
997 }
998 else
999 bgp_error(conn, 2, 2, pkt+20, 2);
1000
1001 return;
1002 }
1003
1004 /* Check the other connection */
1005 other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
1006 switch (other->state)
1007 {
1008 case BS_CONNECT:
1009 case BS_ACTIVE:
1010 /* Stop outgoing connection attempts */
1011 bgp_conn_enter_idle_state(other);
1012 break;
1013
1014 case BS_IDLE:
1015 case BS_OPENSENT:
1016 case BS_CLOSE:
1017 break;
1018
1019 case BS_OPENCONFIRM:
1020 /*
1021 * Description of collision detection rules in RFC 4271 is confusing and
1022 * contradictory, but it is essentially:
1023 *
1024 * 1. Router with higher ID is dominant
1025 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
1026 * 3. When both connections are in OpenConfirm state, one initiated by
1027 * the dominant router is kept.
1028 *
1029 * The first line in the expression below evaluates whether the neighbor
1030 * is dominant, the second line whether the new connection was initiated
1031 * by the neighbor. If both are true (or both are false), we keep the new
1032 * connection, otherwise we keep the old one.
1033 */
1034 if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as)))
1035 == (conn == &p->incoming_conn))
1036 {
1037 /* Should close the other connection */
1038 BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
1039 bgp_error(other, 6, 7, NULL, 0);
1040 break;
1041 }
1042 /* Fall thru */
1043 case BS_ESTABLISHED:
1044 /* Should close this connection */
1045 BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
1046 bgp_error(conn, 6, 7, NULL, 0);
1047 return;
1048 default:
1049 bug("bgp_rx_open: Unknown state");
1050 }
1051
1052 /* Update our local variables */
1053 conn->hold_time = MIN(hold, p->cf->hold_time);
1054 conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
1055 p->remote_id = id;
1056 p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
1057 p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
1058 p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
1059 p->gr_ready = (p->cf->gr_mode && conn->peer_gr_able) ||
1060 (p->cf->llgr_mode && conn->peer_llgr_able);
1061 p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support;
1062
1063 /* Update RA mode */
1064 if (p->add_path_tx)
1065 p->p.accept_ra_types = RA_ANY;
1066 else if (p->cf->secondary)
1067 p->p.accept_ra_types = RA_ACCEPTED;
1068 else
1069 p->p.accept_ra_types = RA_OPTIMAL;
1070
1071 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
1072
1073 bgp_schedule_packet(conn, PKT_KEEPALIVE);
1074 bgp_start_timer(conn->hold_timer, conn->hold_time);
1075 bgp_conn_enter_openconfirm_state(conn);
1076 }
1077
1078
1079 static inline void
1080 bgp_rx_end_mark(struct bgp_proto *p)
1081 {
1082 BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
1083
1084 if (p->load_state == BFS_LOADING)
1085 p->load_state = BFS_NONE;
1086
1087 if (p->p.gr_recovery)
1088 proto_graceful_restart_unlock(&p->p);
1089
1090 if (p->gr_active)
1091 bgp_graceful_restart_done(p);
1092 }
1093
1094
1095 #define DECODE_PREFIX(pp, ll) do { \
1096 if (p->add_path_rx) \
1097 { \
1098 if (ll < 5) { err=1; goto done; } \
1099 path_id = get_u32(pp); \
1100 pp += 4; \
1101 ll -= 4; \
1102 } \
1103 int b = *pp++; \
1104 int q; \
1105 ll--; \
1106 if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
1107 q = (b+7) / 8; \
1108 if (ll < q) { err=1; goto done; } \
1109 memcpy(&prefix, pp, q); \
1110 pp += q; \
1111 ll -= q; \
1112 ipa_ntoh(prefix); \
1113 prefix = ipa_and(prefix, ipa_mkmask(b)); \
1114 pxlen = b; \
1115 } while (0)
1116
1117
1118 static inline void
1119 bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
1120 u32 path_id, u32 *last_id, struct rte_src **src,
1121 rta *a0, rta **a)
1122 {
1123 if (path_id != *last_id)
1124 {
1125 *src = rt_get_source(&p->p, path_id);
1126 *last_id = path_id;
1127
1128 if (*a)
1129 {
1130 rta_free(*a);
1131 *a = NULL;
1132 }
1133 }
1134
1135 /* Prepare cached route attributes */
1136 if (!*a)
1137 {
1138 a0->src = *src;
1139
1140 /* Workaround for rta_lookup() breaking eattrs */
1141 ea_list *ea = a0->eattrs;
1142 *a = rta_lookup(a0);
1143 a0->eattrs = ea;
1144 }
1145
1146 net *n = net_get(p->p.table, prefix, pxlen);
1147 rte *e = rte_get_temp(rta_clone(*a));
1148 e->net = n;
1149 e->pflags = 0;
1150 e->u.bgp.suppressed = 0;
1151 e->u.bgp.stale = -1;
1152 rte_update2(p->p.main_ahook, n, e, *src);
1153 }
1154
1155 static inline void
1156 bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
1157 u32 path_id, u32 *last_id, struct rte_src **src)
1158 {
1159 if (path_id != *last_id)
1160 {
1161 *src = rt_find_source(&p->p, path_id);
1162 *last_id = path_id;
1163 }
1164
1165 net *n = net_find(p->p.table, prefix, pxlen);
1166 rte_update2( p->p.main_ahook, n, NULL, *src);
1167 }
1168
1169 static inline int
1170 bgp_set_next_hop(struct bgp_proto *p, rta *a)
1171 {
1172 struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
1173 ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
1174
1175 #ifdef IPV6
1176 int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
1177
1178 /* First address should not be link-local, but may be zero in direct mode */
1179 if (ipa_is_link_local(*nexthop))
1180 *nexthop = IPA_NONE;
1181 #else
1182 int second = 0;
1183 #endif
1184
1185 if (p->cf->gw_mode == GW_DIRECT)
1186 {
1187 neighbor *ng = NULL;
1188
1189 if (ipa_nonzero(*nexthop))
1190 ng = neigh_find(&p->p, nexthop, 0);
1191 else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
1192 ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
1193
1194 /* Fallback */
1195 if (!ng)
1196 ng = p->neigh;
1197
1198 if (ng->scope == SCOPE_HOST)
1199 return 0;
1200
1201 a->dest = RTD_ROUTER;
1202 a->gw = ng->addr;
1203 a->iface = ng->iface;
1204 a->hostentry = NULL;
1205 a->igp_metric = 0;
1206 }
1207 else /* GW_RECURSIVE */
1208 {
1209 if (ipa_zero(*nexthop))
1210 return 0;
1211
1212 rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
1213 }
1214
1215 return 1;
1216 }
1217
1218 #ifndef IPV6 /* IPv4 version */
1219
1220 static void
1221 bgp_do_rx_update(struct bgp_conn *conn,
1222 byte *withdrawn, int withdrawn_len,
1223 byte *nlri, int nlri_len,
1224 byte *attrs, int attr_len)
1225 {
1226 struct bgp_proto *p = conn->bgp;
1227 struct rte_src *src = p->p.main_source;
1228 rta *a0, *a = NULL;
1229 ip_addr prefix;
1230 int pxlen, err = 0;
1231 u32 path_id = 0;
1232 u32 last_id = 0;
1233
1234 /* Check for End-of-RIB marker */
1235 if (!withdrawn_len && !attr_len && !nlri_len)
1236 {
1237 bgp_rx_end_mark(p);
1238 return;
1239 }
1240
1241 /* Withdraw routes */
1242 while (withdrawn_len)
1243 {
1244 DECODE_PREFIX(withdrawn, withdrawn_len);
1245 DBG("Withdraw %I/%d\n", prefix, pxlen);
1246
1247 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1248 }
1249
1250 if (!attr_len && !nlri_len) /* shortcut */
1251 return;
1252
1253 a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
1254
1255 if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1256 return;
1257
1258 if (a0 && nlri_len && !bgp_set_next_hop(p, a0))
1259 a0 = NULL;
1260
1261 last_id = 0;
1262 src = p->p.main_source;
1263
1264 while (nlri_len)
1265 {
1266 DECODE_PREFIX(nlri, nlri_len);
1267 DBG("Add %I/%d\n", prefix, pxlen);
1268
1269 if (a0)
1270 bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1271 else /* Forced withdraw as a result of soft error */
1272 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1273 }
1274
1275 done:
1276 if (a)
1277 rta_free(a);
1278
1279 if (err)
1280 bgp_error(conn, 3, err, NULL, 0);
1281
1282 return;
1283 }
1284
1285 #else /* IPv6 version */
1286
1287 #define DO_NLRI(name) \
1288 x = p->name##_start; \
1289 len = len0 = p->name##_len; \
1290 if (len) \
1291 { \
1292 if (len < 3) { err=9; goto done; } \
1293 af = get_u16(x); \
1294 x += 3; \
1295 len -= 3; \
1296 DBG("\tNLRI AF=%d sub=%d len=%d\n", af, x[-1], len);\
1297 } \
1298 else \
1299 af = 0; \
1300 if (af == BGP_AF_IPV6)
1301
1302 static void
1303 bgp_attach_next_hop(rta *a0, byte *x)
1304 {
1305 ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
1306 memcpy(nh, x+1, 16);
1307 ipa_ntoh(nh[0]);
1308
1309 /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
1310 if (*x == 32)
1311 {
1312 memcpy(nh+1, x+17, 16);
1313 ipa_ntoh(nh[1]);
1314 }
1315 else
1316 nh[1] = IPA_NONE;
1317 }
1318
1319
1320 static void
1321 bgp_do_rx_update(struct bgp_conn *conn,
1322 byte *withdrawn UNUSED, int withdrawn_len,
1323 byte *nlri UNUSED, int nlri_len,
1324 byte *attrs, int attr_len)
1325 {
1326 struct bgp_proto *p = conn->bgp;
1327 struct rte_src *src = p->p.main_source;
1328 byte *x;
1329 int len, len0;
1330 unsigned af;
1331 rta *a0, *a = NULL;
1332 ip_addr prefix;
1333 int pxlen, err = 0;
1334 u32 path_id = 0;
1335 u32 last_id = 0;
1336
1337 p->mp_reach_len = 0;
1338 p->mp_unreach_len = 0;
1339 a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
1340
1341 if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1342 return;
1343
1344 /* Check for End-of-RIB marker */
1345 if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len &&
1346 (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
1347 {
1348 bgp_rx_end_mark(p);
1349 return;
1350 }
1351
1352 DO_NLRI(mp_unreach)
1353 {
1354 while (len)
1355 {
1356 DECODE_PREFIX(x, len);
1357 DBG("Withdraw %I/%d\n", prefix, pxlen);
1358 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1359 }
1360 }
1361
1362 DO_NLRI(mp_reach)
1363 {
1364 /* Create fake NEXT_HOP attribute */
1365 if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
1366 { err = 9; goto done; }
1367
1368 if (a0)
1369 bgp_attach_next_hop(a0, x);
1370
1371 /* Also ignore one reserved byte */
1372 len -= *x + 2;
1373 x += *x + 2;
1374
1375 if (a0 && ! bgp_set_next_hop(p, a0))
1376 a0 = NULL;
1377
1378 last_id = 0;
1379 src = p->p.main_source;
1380
1381 while (len)
1382 {
1383 DECODE_PREFIX(x, len);
1384 DBG("Add %I/%d\n", prefix, pxlen);
1385
1386 if (a0)
1387 bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1388 else /* Forced withdraw as a result of soft error */
1389 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1390 }
1391 }
1392
1393 done:
1394 if (a)
1395 rta_free(a);
1396
1397 if (err) /* Use subcode 9, not err */
1398 bgp_error(conn, 3, 9, NULL, 0);
1399
1400 return;
1401 }
1402
1403 #endif
1404
1405 static void
1406 bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
1407 {
1408 struct bgp_proto *p = conn->bgp;
1409 byte *withdrawn, *attrs, *nlri;
1410 uint withdrawn_len, attr_len, nlri_len;
1411
1412 BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
1413
1414 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1415 if (conn->state == BS_OPENCONFIRM)
1416 bgp_conn_enter_established_state(conn);
1417
1418 if (conn->state != BS_ESTABLISHED)
1419 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1420 bgp_start_timer(conn->hold_timer, conn->hold_time);
1421
1422 /* Find parts of the packet and check sizes */
1423 if (len < 23)
1424 {
1425 bgp_error(conn, 1, 2, pkt+16, 2);
1426 return;
1427 }
1428 withdrawn = pkt + 21;
1429 withdrawn_len = get_u16(pkt + 19);
1430 if (withdrawn_len + 23 > len)
1431 goto malformed;
1432 attrs = withdrawn + withdrawn_len + 2;
1433 attr_len = get_u16(attrs - 2);
1434 if (withdrawn_len + attr_len + 23 > len)
1435 goto malformed;
1436 nlri = attrs + attr_len;
1437 nlri_len = len - withdrawn_len - attr_len - 23;
1438 if (!attr_len && nlri_len)
1439 goto malformed;
1440 DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
1441
1442 lp_flush(bgp_linpool);
1443
1444 bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
1445 return;
1446
1447 malformed:
1448 bgp_error(conn, 3, 1, NULL, 0);
1449 }
1450
1451 static struct {
1452 byte major, minor;
1453 byte *msg;
1454 } bgp_msg_table[] = {
1455 { 1, 0, "Invalid message header" },
1456 { 1, 1, "Connection not synchronized" },
1457 { 1, 2, "Bad message length" },
1458 { 1, 3, "Bad message type" },
1459 { 2, 0, "Invalid OPEN message" },
1460 { 2, 1, "Unsupported version number" },
1461 { 2, 2, "Bad peer AS" },
1462 { 2, 3, "Bad BGP identifier" },
1463 { 2, 4, "Unsupported optional parameter" },
1464 { 2, 5, "Authentication failure" },
1465 { 2, 6, "Unacceptable hold time" },
1466 { 2, 7, "Required capability missing" }, /* [RFC5492] */
1467 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1468 { 3, 0, "Invalid UPDATE message" },
1469 { 3, 1, "Malformed attribute list" },
1470 { 3, 2, "Unrecognized well-known attribute" },
1471 { 3, 3, "Missing mandatory attribute" },
1472 { 3, 4, "Invalid attribute flags" },
1473 { 3, 5, "Invalid attribute length" },
1474 { 3, 6, "Invalid ORIGIN attribute" },
1475 { 3, 7, "AS routing loop" }, /* Deprecated */
1476 { 3, 8, "Invalid NEXT_HOP attribute" },
1477 { 3, 9, "Optional attribute error" },
1478 { 3, 10, "Invalid network field" },
1479 { 3, 11, "Malformed AS_PATH" },
1480 { 4, 0, "Hold timer expired" },
1481 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1482 { 5, 1, "Unexpected message in OpenSent state" },
1483 { 5, 2, "Unexpected message in OpenConfirm state" },
1484 { 5, 3, "Unexpected message in Established state" },
1485 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1486 { 6, 1, "Maximum number of prefixes reached" },
1487 { 6, 2, "Administrative shutdown" },
1488 { 6, 3, "Peer de-configured" },
1489 { 6, 4, "Administrative reset" },
1490 { 6, 5, "Connection rejected" },
1491 { 6, 6, "Other configuration change" },
1492 { 6, 7, "Connection collision resolution" },
1493 { 6, 8, "Out of Resources" },
1494 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
1495 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
1496 };
1497
1498 /**
1499 * bgp_error_dsc - return BGP error description
1500 * @code: BGP error code
1501 * @subcode: BGP error subcode
1502 *
1503 * bgp_error_dsc() returns error description for BGP errors
1504 * which might be static string or given temporary buffer.
1505 */
1506 const char *
1507 bgp_error_dsc(unsigned code, unsigned subcode)
1508 {
1509 static char buff[32];
1510 unsigned i;
1511 for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1512 if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1513 {
1514 return bgp_msg_table[i].msg;
1515 }
1516
1517 bsprintf(buff, "Unknown error %d.%d", code, subcode);
1518 return buff;
1519 }
1520
1521 /* RFC 8203 - shutdown communication message */
1522 static int
1523 bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
1524 {
1525 byte *msg = data + 1;
1526 uint msg_len = data[0];
1527 uint i;
1528
1529 /* Handle zero length message */
1530 if (msg_len == 0)
1531 return 1;
1532
1533 /* Handle proper message */
1534 if ((msg_len > 128) && (msg_len + 1 > len))
1535 return 0;
1536
1537 /* Some elementary cleanup */
1538 for (i = 0; i < msg_len; i++)
1539 if (msg[i] < ' ')
1540 msg[i] = ' ';
1541
1542 proto_set_message(&p->p, msg, msg_len);
1543 *bp += bsprintf(*bp, ": \"%s\"", p->p.message);
1544 return 1;
1545 }
1546
1547 void
1548 bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
1549 {
1550 byte argbuf[256], *t = argbuf;
1551 unsigned i;
1552
1553 /* Don't report Cease messages generated by myself */
1554 if (code == 6 && class == BE_BGP_TX)
1555 return;
1556
1557 /* Reset shutdown message */
1558 if ((code == 6) && ((subcode == 2) || (subcode == 4)))
1559 proto_set_message(&p->p, NULL, 0);
1560
1561 if (len)
1562 {
1563 /* Bad peer AS - we would like to print the AS */
1564 if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1565 {
1566 t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
1567 goto done;
1568 }
1569
1570 /* RFC 8203 - shutdown communication */
1571 if (((code == 6) && ((subcode == 2) || (subcode == 4))))
1572 if (bgp_handle_message(p, data, len, &t))
1573 goto done;
1574
1575 *t++ = ':';
1576 *t++ = ' ';
1577 if (len > 16)
1578 len = 16;
1579 for (i=0; i<len; i++)
1580 t += bsprintf(t, "%02x", data[i]);
1581 }
1582
1583 done:
1584 *t = 0;
1585 const byte *dsc = bgp_error_dsc(code, subcode);
1586 log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, dsc, argbuf);
1587 }
1588
1589 static void
1590 bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
1591 {
1592 struct bgp_proto *p = conn->bgp;
1593 if (len < 21)
1594 {
1595 bgp_error(conn, 1, 2, pkt+16, 2);
1596 return;
1597 }
1598
1599 unsigned code = pkt[19];
1600 unsigned subcode = pkt[20];
1601 int err = (code != 6);
1602
1603 bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1604 bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1605
1606 #ifndef IPV6
1607 if ((code == 2) && ((subcode == 4) || (subcode == 7))
1608 /* Error related to capability:
1609 * 4 - Peer does not support capabilities at all.
1610 * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1611 */
1612 && (p->cf->capabilities == 2)
1613 /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1614 && (conn->start_state == BSS_CONNECT)
1615 /* Failed connection attempt have used capabilities */
1616 && (p->cf->remote_as <= 0xFFFF))
1617 /* Not possible with disabled capabilities */
1618 {
1619 /* We try connect without capabilities */
1620 log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1621 p->start_state = BSS_CONNECT_NOCAP;
1622 err = 0;
1623 }
1624 #endif
1625
1626 bgp_conn_enter_close_state(conn);
1627 bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1628
1629 if (err)
1630 {
1631 bgp_update_startup_delay(p);
1632 bgp_stop(p, 0, NULL, 0);
1633 }
1634 else
1635 {
1636 uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0);
1637 if (p->cf->disable_after_cease & subcode_bit)
1638 {
1639 log(L_INFO "%s: Disabled after Cease notification", p->p.name);
1640 p->startup_delay = 0;
1641 p->p.disabled = 1;
1642 }
1643 }
1644 }
1645
1646 static void
1647 bgp_rx_keepalive(struct bgp_conn *conn)
1648 {
1649 struct bgp_proto *p = conn->bgp;
1650
1651 BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1652 bgp_start_timer(conn->hold_timer, conn->hold_time);
1653 switch (conn->state)
1654 {
1655 case BS_OPENCONFIRM:
1656 bgp_conn_enter_established_state(conn);
1657 break;
1658 case BS_ESTABLISHED:
1659 break;
1660 default:
1661 bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
1662 }
1663 }
1664
1665 static void
1666 bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
1667 {
1668 struct bgp_proto *p = conn->bgp;
1669
1670 if (conn->state != BS_ESTABLISHED)
1671 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1672
1673 if (!p->cf->enable_refresh)
1674 { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1675
1676 if (len < (BGP_HEADER_LENGTH + 4))
1677 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1678
1679 if (len > (BGP_HEADER_LENGTH + 4))
1680 { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
1681
1682 /* FIXME - we ignore AFI/SAFI values, as we support
1683 just one value and even an error code for an invalid
1684 request is not defined */
1685
1686 /* RFC 7313 redefined reserved field as RR message subtype */
1687 uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
1688
1689 switch (subtype)
1690 {
1691 case BGP_RR_REQUEST:
1692 BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1693 proto_request_feeding(&p->p);
1694 break;
1695
1696 case BGP_RR_BEGIN:
1697 BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
1698 bgp_refresh_begin(p);
1699 break;
1700
1701 case BGP_RR_END:
1702 BGP_TRACE(D_PACKETS, "Got END-OF-RR");
1703 bgp_refresh_end(p);
1704 break;
1705
1706 default:
1707 log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
1708 p->p.name, subtype);
1709 break;
1710 }
1711 }
1712
1713
1714 /**
1715 * bgp_rx_packet - handle a received packet
1716 * @conn: BGP connection
1717 * @pkt: start of the packet
1718 * @len: packet size
1719 *
1720 * bgp_rx_packet() takes a newly received packet and calls the corresponding
1721 * packet handler according to the packet type.
1722 */
1723 static void
1724 bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1725 {
1726 byte type = pkt[18];
1727
1728 DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
1729
1730 if (conn->bgp->p.mrtdump & MD_MESSAGES)
1731 bgp_dump_message(conn, pkt, len);
1732
1733 switch (type)
1734 {
1735 case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
1736 case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
1737 case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
1738 case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
1739 case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
1740 default: bgp_error(conn, 1, 3, pkt+18, 1);
1741 }
1742 }
1743
1744 /**
1745 * bgp_rx - handle received data
1746 * @sk: socket
1747 * @size: amount of data received
1748 *
1749 * bgp_rx() is called by the socket layer whenever new data arrive from
1750 * the underlying TCP connection. It assembles the data fragments to packets,
1751 * checks their headers and framing and passes complete packets to
1752 * bgp_rx_packet().
1753 */
1754 int
1755 bgp_rx(sock *sk, uint size)
1756 {
1757 struct bgp_conn *conn = sk->data;
1758 struct bgp_proto *p = conn->bgp;
1759 byte *pkt_start = sk->rbuf;
1760 byte *end = pkt_start + size;
1761 unsigned i, len;
1762
1763 DBG("BGP: RX hook: Got %d bytes\n", size);
1764 while (end >= pkt_start + BGP_HEADER_LENGTH)
1765 {
1766 if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1767 return 0;
1768 for(i=0; i<16; i++)
1769 if (pkt_start[i] != 0xff)
1770 {
1771 bgp_error(conn, 1, 1, NULL, 0);
1772 break;
1773 }
1774 len = get_u16(pkt_start+16);
1775 if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p))
1776 {
1777 bgp_error(conn, 1, 2, pkt_start+16, 2);
1778 break;
1779 }
1780 if (end < pkt_start + len)
1781 break;
1782 bgp_rx_packet(conn, pkt_start, len);
1783 pkt_start += len;
1784 }
1785 if (pkt_start != sk->rbuf)
1786 {
1787 memmove(sk->rbuf, pkt_start, end - pkt_start);
1788 sk->rpos = sk->rbuf + (end - pkt_start);
1789 }
1790 return 0;
1791 }