]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
ab87bdcc6d0438cf6a223844388c189ad44b04f1
[thirdparty/bird.git] / proto / bgp / packets.c
1 /*
2 * BIRD -- BGP Packet Processing
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 #undef LOCAL_DEBUG
10
11 #include "nest/bird.h"
12 #include "nest/iface.h"
13 #include "nest/protocol.h"
14 #include "nest/route.h"
15 #include "nest/attrs.h"
16 #include "nest/mrtdump.h"
17 #include "conf/conf.h"
18 #include "lib/unaligned.h"
19 #include "lib/socket.h"
20
21 #include "nest/cli.h"
22
23 #include "bgp.h"
24
25
26 #define BGP_RR_REQUEST 0
27 #define BGP_RR_BEGIN 1
28 #define BGP_RR_END 2
29
30
31 static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
32 static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
33
34 /* Table for state -> RFC 6608 FSM error subcodes */
35 static byte fsm_err_subcode[BS_MAX] = {
36 [BS_OPENSENT] = 1,
37 [BS_OPENCONFIRM] = 2,
38 [BS_ESTABLISHED] = 3
39 };
40
41 /*
42 * MRT Dump format is not semantically specified.
43 * We will use these values in appropriate fields:
44 *
45 * Local AS, Remote AS - configured AS numbers for given BGP instance.
46 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
47 *
48 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
49 * changes) and MESSAGE (for received BGP messages).
50 *
51 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
52 * only when AS4 session is established and even in that case MESSAGE
53 * does not use AS4 variant for initial OPEN message. This strange
54 * behavior is here for compatibility with Quagga and Bgpdump,
55 */
56
57 static byte *
58 mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
59 {
60 struct bgp_proto *p = conn->bgp;
61
62 if (as4)
63 {
64 put_u32(buf+0, p->remote_as);
65 put_u32(buf+4, p->local_as);
66 buf+=8;
67 }
68 else
69 {
70 put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
71 put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS);
72 buf+=4;
73 }
74
75 put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
76 put_u16(buf+2, BGP_AF);
77 buf+=4;
78 buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
79 buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE);
80
81 return buf;
82 }
83
84 static void
85 mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
86 {
87 byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
88 byte *bp = buf + MRTDUMP_HDR_LENGTH;
89 int as4 = conn->bgp->as4_session;
90
91 bp = mrt_put_bgp4_hdr(bp, conn, as4);
92 memcpy(bp, pkt, len);
93 bp += len;
94 mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
95 buf, bp-buf);
96 }
97
98 static inline u16
99 convert_state(unsigned state)
100 {
101 /* Convert state from our BS_* values to values used in MRTDump */
102 return (state == BS_CLOSE) ? 1 : state + 1;
103 }
104
105 void
106 mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
107 {
108 byte buf[128];
109 byte *bp = buf + MRTDUMP_HDR_LENGTH;
110
111 bp = mrt_put_bgp4_hdr(bp, conn, 1);
112 put_u16(bp+0, convert_state(old));
113 put_u16(bp+2, convert_state(new));
114 bp += 4;
115 mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
116 }
117
118 static byte *
119 bgp_create_notification(struct bgp_conn *conn, byte *buf)
120 {
121 struct bgp_proto *p = conn->bgp;
122
123 BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
124 buf[0] = conn->notify_code;
125 buf[1] = conn->notify_subcode;
126 memcpy(buf+2, conn->notify_data, conn->notify_size);
127 return buf + 2 + conn->notify_size;
128 }
129
130 #ifdef IPV6
131 static byte *
132 bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
133 {
134 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
135 *buf++ = 4; /* Capability data length */
136 *buf++ = 0; /* We support AF IPv6 */
137 *buf++ = BGP_AF_IPV6;
138 *buf++ = 0; /* RFU */
139 *buf++ = 1; /* and SAFI 1 */
140 return buf;
141 }
142
143 #else
144
145 static byte *
146 bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
147 {
148 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
149 *buf++ = 4; /* Capability data length */
150 *buf++ = 0; /* We support AF IPv4 */
151 *buf++ = BGP_AF_IPV4;
152 *buf++ = 0; /* RFU */
153 *buf++ = 1; /* and SAFI 1 */
154 return buf;
155 }
156 #endif
157
158 static byte *
159 bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
160 {
161 *buf++ = 2; /* Capability 2: Support for route refresh */
162 *buf++ = 0; /* Capability data length */
163 return buf;
164 }
165
166 static byte *
167 bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
168 {
169 *buf++ = 6; /* Capability 6: Support for extended messages */
170 *buf++ = 0; /* Capability data length */
171 return buf;
172 }
173
174 static byte *
175 bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
176 {
177 *buf++ = 64; /* Capability 64: Support for graceful restart */
178 *buf++ = 6; /* Capability data length */
179
180 put_u16(buf, p->cf->gr_time);
181 if (p->p.gr_recovery)
182 buf[0] |= BGP_GRF_RESTART;
183 buf += 2;
184
185 *buf++ = 0; /* Appropriate AF */
186 *buf++ = BGP_AF;
187 *buf++ = 1; /* and SAFI 1 */
188 *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
189
190 return buf;
191 }
192
193 static byte *
194 bgp_put_cap_gr2(struct bgp_proto *p UNUSED, byte *buf)
195 {
196 *buf++ = 64; /* Capability 64: Support for graceful restart */
197 *buf++ = 2; /* Capability data length */
198 put_u16(buf, 0);
199 return buf + 2;
200 }
201
202 static byte *
203 bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
204 {
205 *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
206 *buf++ = 4; /* Capability data length */
207 put_u32(buf, p->local_as);
208 return buf + 4;
209 }
210
211 static byte *
212 bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
213 {
214 *buf++ = 69; /* Capability 69: Support for ADD-PATH */
215 *buf++ = 4; /* Capability data length */
216
217 *buf++ = 0; /* Appropriate AF */
218 *buf++ = BGP_AF;
219 *buf++ = 1; /* SAFI 1 */
220
221 *buf++ = p->cf->add_path;
222
223 return buf;
224 }
225
226 static byte *
227 bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
228 {
229 *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
230 *buf++ = 0; /* Capability data length */
231 return buf;
232 }
233
234
235 static byte *
236 bgp_create_open(struct bgp_conn *conn, byte *buf)
237 {
238 struct bgp_proto *p = conn->bgp;
239 byte *cap;
240 int cap_len;
241
242 BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
243 BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
244 buf[0] = BGP_VERSION;
245 put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
246 put_u16(buf+3, p->cf->hold_time);
247 put_u32(buf+5, p->local_id);
248
249 if (conn->start_state == BSS_CONNECT_NOCAP)
250 {
251 BGP_TRACE(D_PACKETS, "Skipping capabilities");
252 buf[9] = 0;
253 return buf + 10;
254 }
255
256 /* Skipped 3 B for length field and Capabilities parameter header */
257 cap = buf + 12;
258
259 #ifndef IPV6
260 if (p->cf->advertise_ipv4)
261 cap = bgp_put_cap_ipv4(p, cap);
262 #endif
263
264 #ifdef IPV6
265 cap = bgp_put_cap_ipv6(p, cap);
266 #endif
267
268 if (p->cf->enable_refresh)
269 cap = bgp_put_cap_rr(p, cap);
270
271 if (p->cf->gr_mode == BGP_GR_ABLE)
272 cap = bgp_put_cap_gr1(p, cap);
273 else if (p->cf->gr_mode == BGP_GR_AWARE)
274 cap = bgp_put_cap_gr2(p, cap);
275
276 if (p->cf->enable_as4)
277 cap = bgp_put_cap_as4(p, cap);
278
279 if (p->cf->add_path)
280 cap = bgp_put_cap_add_path(p, cap);
281
282 if (p->cf->enable_refresh)
283 cap = bgp_put_cap_err(p, cap);
284
285 if (p->cf->enable_extended_messages)
286 cap = bgp_put_cap_ext_msg(p, cap);
287
288 cap_len = cap - buf - 12;
289 if (cap_len > 0)
290 {
291 buf[9] = cap_len + 2; /* Optional params len */
292 buf[10] = 2; /* Option: Capability list */
293 buf[11] = cap_len; /* Option length */
294 return cap;
295 }
296 else
297 {
298 buf[9] = 0; /* No optional parameters */
299 return buf + 10;
300 }
301 }
302
303 static uint
304 bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains)
305 {
306 byte *start = w;
307 ip_addr a;
308 int bytes;
309
310 while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
311 {
312 struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
313 DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
314
315 if (p->add_path_tx)
316 {
317 put_u32(w, px->path_id);
318 w += 4;
319 remains -= 4;
320 }
321
322 *w++ = px->n.pxlen;
323 bytes = (px->n.pxlen + 7) / 8;
324 a = px->n.prefix;
325 ipa_hton(a);
326 memcpy(w, &a, bytes);
327 w += bytes;
328 remains -= bytes + 1;
329 rem_node(&px->bucket_node);
330 bgp_free_prefix(p, px);
331 // fib_delete(&p->prefix_fib, px);
332 }
333 return w - start;
334 }
335
336 static void
337 bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
338 {
339 while (!EMPTY_LIST(buck->prefixes))
340 {
341 struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
342 log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
343 rem_node(&px->bucket_node);
344 bgp_free_prefix(p, px);
345 // fib_delete(&p->prefix_fib, px);
346 }
347 }
348
349 #ifndef IPV6 /* IPv4 version */
350
351 static byte *
352 bgp_create_update(struct bgp_conn *conn, byte *buf)
353 {
354 struct bgp_proto *p = conn->bgp;
355 struct bgp_bucket *buck;
356 int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
357 byte *w;
358 int wd_size = 0;
359 int r_size = 0;
360 int a_size = 0;
361
362 w = buf+2;
363 if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
364 {
365 DBG("Withdrawn routes:\n");
366 wd_size = bgp_encode_prefixes(p, w, buck, remains);
367 w += wd_size;
368 remains -= wd_size;
369 }
370 put_u16(buf, wd_size);
371
372 if (!wd_size)
373 {
374 while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
375 {
376 if (EMPTY_LIST(buck->prefixes))
377 {
378 DBG("Deleting empty bucket %p\n", buck);
379 rem_node(&buck->send_node);
380 bgp_free_bucket(p, buck);
381 continue;
382 }
383
384 DBG("Processing bucket %p\n", buck);
385 a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024);
386
387 if (a_size < 0)
388 {
389 log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
390 bgp_flush_prefixes(p, buck);
391 rem_node(&buck->send_node);
392 bgp_free_bucket(p, buck);
393 continue;
394 }
395
396 put_u16(w, a_size);
397 w += a_size + 2;
398 r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
399 w += r_size;
400 break;
401 }
402 }
403 if (!a_size) /* Attributes not already encoded */
404 {
405 put_u16(w, 0);
406 w += 2;
407 }
408 if (wd_size || r_size)
409 {
410 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
411 return w;
412 }
413 else
414 return NULL;
415 }
416
417 static byte *
418 bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
419 {
420 struct bgp_proto *p = conn->bgp;
421 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
422
423 put_u32(buf, 0);
424 return buf+4;
425 }
426
427 #else /* IPv6 version */
428
429 static inline int
430 same_iface(struct bgp_proto *p, ip_addr *ip)
431 {
432 neighbor *n = neigh_find(&p->p, ip, 0);
433 return n && p->neigh && n->iface == p->neigh->iface;
434 }
435
436 static byte *
437 bgp_create_update(struct bgp_conn *conn, byte *buf)
438 {
439 struct bgp_proto *p = conn->bgp;
440 struct bgp_bucket *buck;
441 int size, second, rem_stored;
442 int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
443 byte *w, *w_stored, *tmp, *tstart;
444 ip_addr *ipp, ip, ip_ll;
445 ea_list *ea;
446 eattr *nh;
447
448 put_u16(buf, 0);
449 w = buf+4;
450
451 if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
452 {
453 DBG("Withdrawn routes:\n");
454 tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
455 *tmp++ = 0;
456 *tmp++ = BGP_AF_IPV6;
457 *tmp++ = 1;
458 ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
459 size = bgp_encode_attrs(p, w, ea, remains);
460 ASSERT(size >= 0);
461 w += size;
462 remains -= size;
463 }
464 else
465 {
466 while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
467 {
468 if (EMPTY_LIST(buck->prefixes))
469 {
470 DBG("Deleting empty bucket %p\n", buck);
471 rem_node(&buck->send_node);
472 bgp_free_bucket(p, buck);
473 continue;
474 }
475
476 DBG("Processing bucket %p\n", buck);
477 rem_stored = remains;
478 w_stored = w;
479
480 size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024);
481 if (size < 0)
482 {
483 log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
484 bgp_flush_prefixes(p, buck);
485 rem_node(&buck->send_node);
486 bgp_free_bucket(p, buck);
487 continue;
488 }
489 w += size;
490 remains -= size;
491
492 /* We have two addresses here in NEXT_HOP eattr. Really.
493 Unless NEXT_HOP was modified by filter */
494 nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
495 ASSERT(nh);
496 second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
497 ipp = (ip_addr *) nh->u.ptr->data;
498 ip = ipp[0];
499 ip_ll = IPA_NONE;
500
501 if (ipa_equal(ip, p->source_addr))
502 ip_ll = p->local_link;
503 else
504 {
505 /* If we send a route with 'third party' next hop destinated
506 * in the same interface, we should also send a link local
507 * next hop address. We use the received one (stored in the
508 * other part of BA_NEXT_HOP eattr). If we didn't received
509 * it (for example it is a static route), we can't use
510 * 'third party' next hop and we have to use local IP address
511 * as next hop. Sending original next hop address without
512 * link local address seems to be a natural way to solve that
513 * problem, but it is contrary to RFC 2545 and Quagga does not
514 * accept such routes.
515 *
516 * There are two cases, either we have global IP, or
517 * IPA_NONE if the neighbor is link-local. For IPA_NONE,
518 * we suppose it is on the same iface, see bgp_update_attrs().
519 */
520
521 if (ipa_zero(ip) || same_iface(p, &ip))
522 {
523 if (second && ipa_nonzero(ipp[1]))
524 ip_ll = ipp[1];
525 else
526 {
527 switch (p->cf->missing_lladdr)
528 {
529 case MLL_SELF:
530 ip = p->source_addr;
531 ip_ll = p->local_link;
532 break;
533 case MLL_DROP:
534 log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
535 w = w_stored;
536 remains = rem_stored;
537 bgp_flush_prefixes(p, buck);
538 rem_node(&buck->send_node);
539 bgp_free_bucket(p, buck);
540 continue;
541 case MLL_IGNORE:
542 break;
543 }
544 }
545 }
546 }
547
548 tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
549 *tmp++ = 0;
550 *tmp++ = BGP_AF_IPV6;
551 *tmp++ = 1;
552
553 if (ipa_is_link_local(ip))
554 ip = IPA_NONE;
555
556 if (ipa_nonzero(ip_ll))
557 {
558 *tmp++ = 32;
559 ipa_hton(ip);
560 memcpy(tmp, &ip, 16);
561 ipa_hton(ip_ll);
562 memcpy(tmp+16, &ip_ll, 16);
563 tmp += 32;
564 }
565 else
566 {
567 *tmp++ = 16;
568 ipa_hton(ip);
569 memcpy(tmp, &ip, 16);
570 tmp += 16;
571 }
572
573 *tmp++ = 0; /* No SNPA information */
574 tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
575 ea->attrs[0].u.ptr->length = tmp - tstart;
576 size = bgp_encode_attrs(p, w, ea, remains);
577 ASSERT(size >= 0);
578 w += size;
579 break;
580 }
581 }
582
583 size = w - (buf+4);
584 put_u16(buf+2, size);
585 lp_flush(bgp_linpool);
586 if (size)
587 {
588 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
589 return w;
590 }
591 else
592 return NULL;
593 }
594
595 static byte *
596 bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
597 {
598 struct bgp_proto *p = conn->bgp;
599 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
600
601 put_u16(buf+0, 0);
602 put_u16(buf+2, 6); /* length 4-9 */
603 buf += 4;
604
605 /* Empty MP_UNREACH_NLRI atribute */
606 *buf++ = BAF_OPTIONAL;
607 *buf++ = BA_MP_UNREACH_NLRI;
608 *buf++ = 3; /* Length 7-9 */
609 *buf++ = 0; /* AFI */
610 *buf++ = BGP_AF_IPV6;
611 *buf++ = 1; /* SAFI */
612 return buf;
613 }
614
615 #endif
616
617 static inline byte *
618 bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
619 {
620 struct bgp_proto *p = conn->bgp;
621 BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
622
623 /* Original original route refresh request, RFC 2918 */
624 *buf++ = 0;
625 *buf++ = BGP_AF;
626 *buf++ = BGP_RR_REQUEST;
627 *buf++ = 1; /* SAFI */
628 return buf;
629 }
630
631 static inline byte *
632 bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
633 {
634 struct bgp_proto *p = conn->bgp;
635 BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
636
637 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
638 *buf++ = 0;
639 *buf++ = BGP_AF;
640 *buf++ = BGP_RR_BEGIN;
641 *buf++ = 1; /* SAFI */
642 return buf;
643 }
644
645 static inline byte *
646 bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
647 {
648 struct bgp_proto *p = conn->bgp;
649 BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
650
651 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
652 *buf++ = 0;
653 *buf++ = BGP_AF;
654 *buf++ = BGP_RR_END;
655 *buf++ = 1; /* SAFI */
656 return buf;
657 }
658
659
660 static void
661 bgp_create_header(byte *buf, uint len, uint type)
662 {
663 memset(buf, 0xff, 16); /* Marker */
664 put_u16(buf+16, len);
665 buf[18] = type;
666 }
667
668 /**
669 * bgp_fire_tx - transmit packets
670 * @conn: connection
671 *
672 * Whenever the transmit buffers of the underlying TCP connection
673 * are free and we have any packets queued for sending, the socket functions
674 * call bgp_fire_tx() which takes care of selecting the highest priority packet
675 * queued (Notification > Keepalive > Open > Update), assembling its header
676 * and body and sending it to the connection.
677 */
678 static int
679 bgp_fire_tx(struct bgp_conn *conn)
680 {
681 struct bgp_proto *p = conn->bgp;
682 uint s = conn->packets_to_send;
683 sock *sk = conn->sk;
684 byte *buf, *pkt, *end;
685 int type;
686
687 if (!sk)
688 {
689 conn->packets_to_send = 0;
690 return 0;
691 }
692 buf = sk->tbuf;
693 pkt = buf + BGP_HEADER_LENGTH;
694
695 if (s & (1 << PKT_SCHEDULE_CLOSE))
696 {
697 /* We can finally close connection and enter idle state */
698 bgp_conn_enter_idle_state(conn);
699 return 0;
700 }
701 if (s & (1 << PKT_NOTIFICATION))
702 {
703 s = 1 << PKT_SCHEDULE_CLOSE;
704 type = PKT_NOTIFICATION;
705 end = bgp_create_notification(conn, pkt);
706 }
707 else if (s & (1 << PKT_KEEPALIVE))
708 {
709 s &= ~(1 << PKT_KEEPALIVE);
710 type = PKT_KEEPALIVE;
711 end = pkt; /* Keepalives carry no data */
712 BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
713 bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
714 }
715 else if (s & (1 << PKT_OPEN))
716 {
717 s &= ~(1 << PKT_OPEN);
718 type = PKT_OPEN;
719 end = bgp_create_open(conn, pkt);
720 }
721 else if (s & (1 << PKT_ROUTE_REFRESH))
722 {
723 s &= ~(1 << PKT_ROUTE_REFRESH);
724 type = PKT_ROUTE_REFRESH;
725 end = bgp_create_route_refresh(conn, pkt);
726 }
727 else if (s & (1 << PKT_BEGIN_REFRESH))
728 {
729 s &= ~(1 << PKT_BEGIN_REFRESH);
730 type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */
731 end = bgp_create_begin_refresh(conn, pkt);
732 }
733 else if (s & (1 << PKT_UPDATE))
734 {
735 type = PKT_UPDATE;
736 end = bgp_create_update(conn, pkt);
737
738 if (!end)
739 {
740 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
741
742 conn->packets_to_send = 0;
743
744 if (p->feed_state == BFS_LOADED)
745 {
746 type = PKT_UPDATE;
747 end = bgp_create_end_mark(conn, pkt);
748 }
749
750 else if (p->feed_state == BFS_REFRESHED)
751 {
752 type = PKT_ROUTE_REFRESH;
753 end = bgp_create_end_refresh(conn, pkt);
754 }
755
756 else /* Really nothing to send */
757 return 0;
758
759 p->feed_state = BFS_NONE;
760 }
761 }
762 else
763 return 0;
764
765 conn->packets_to_send = s;
766 bgp_create_header(buf, end - buf, type);
767 return sk_send(sk, end - buf);
768 }
769
770 /**
771 * bgp_schedule_packet - schedule a packet for transmission
772 * @conn: connection
773 * @type: packet type
774 *
775 * Schedule a packet of type @type to be sent as soon as possible.
776 */
777 void
778 bgp_schedule_packet(struct bgp_conn *conn, int type)
779 {
780 DBG("BGP: Scheduling packet type %d\n", type);
781 conn->packets_to_send |= 1 << type;
782 if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev))
783 ev_schedule(conn->tx_ev);
784 }
785
786 void
787 bgp_kick_tx(void *vconn)
788 {
789 struct bgp_conn *conn = vconn;
790
791 DBG("BGP: kicking TX\n");
792 while (bgp_fire_tx(conn) > 0)
793 ;
794 }
795
796 void
797 bgp_tx(sock *sk)
798 {
799 struct bgp_conn *conn = sk->data;
800
801 DBG("BGP: TX hook\n");
802 while (bgp_fire_tx(conn) > 0)
803 ;
804 }
805
806 /* Capatibility negotiation as per RFC 2842 */
807
808 void
809 bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
810 {
811 // struct bgp_proto *p = conn->bgp;
812 int i, cl;
813
814 while (len > 0)
815 {
816 if (len < 2 || len < 2 + opt[1])
817 goto err;
818
819 cl = opt[1];
820
821 switch (opt[0])
822 {
823 case 2: /* Route refresh capability, RFC 2918 */
824 if (cl != 0)
825 goto err;
826 conn->peer_refresh_support = 1;
827 break;
828
829 case 6: /* Extended message length capability, draft */
830 if (cl != 0)
831 goto err;
832 conn->peer_ext_messages_support = 1;
833 break;
834
835 case 64: /* Graceful restart capability, RFC 4724 */
836 if (cl % 4 != 2)
837 goto err;
838 conn->peer_gr_aware = 1;
839 conn->peer_gr_able = 0;
840 conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
841 conn->peer_gr_flags = opt[2] & 0xf0;
842 conn->peer_gr_aflags = 0;
843 for (i = 2; i < cl; i += 4)
844 if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
845 {
846 conn->peer_gr_able = 1;
847 conn->peer_gr_aflags = opt[2+i+3];
848 }
849 break;
850
851 case 65: /* AS4 capability, RFC 4893 */
852 if (cl != 4)
853 goto err;
854 conn->peer_as4_support = 1;
855 if (conn->bgp->cf->enable_as4)
856 conn->advertised_as = get_u32(opt + 2);
857 break;
858
859 case 69: /* ADD-PATH capability, RFC 7911 */
860 if (cl % 4)
861 goto err;
862 for (i = 0; i < cl; i += 4)
863 if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
864 conn->peer_add_path = opt[2+i+3];
865 if (conn->peer_add_path > ADD_PATH_FULL)
866 goto err;
867 break;
868
869 case 70: /* Enhanced route refresh capability, RFC 7313 */
870 if (cl != 0)
871 goto err;
872 conn->peer_enhanced_refresh_support = 1;
873 break;
874
875 /* We can safely ignore all other capabilities */
876 }
877 len -= 2 + cl;
878 opt += 2 + cl;
879 }
880 return;
881
882 err:
883 bgp_error(conn, 2, 0, NULL, 0);
884 return;
885 }
886
887 static int
888 bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
889 {
890 struct bgp_proto *p = conn->bgp;
891 int ol;
892
893 while (len > 0)
894 {
895 if (len < 2 || len < 2 + opt[1])
896 { bgp_error(conn, 2, 0, NULL, 0); return 0; }
897 #ifdef LOCAL_DEBUG
898 {
899 int i;
900 DBG("\tOption %02x:", opt[0]);
901 for(i=0; i<opt[1]; i++)
902 DBG(" %02x", opt[2+i]);
903 DBG("\n");
904 }
905 #endif
906
907 ol = opt[1];
908 switch (opt[0])
909 {
910 case 2:
911 if (conn->start_state == BSS_CONNECT_NOCAP)
912 BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
913 else
914 bgp_parse_capabilities(conn, opt + 2, ol);
915 break;
916
917 default:
918 /*
919 * BGP specs don't tell us to send which option
920 * we didn't recognize, but it's common practice
921 * to do so. Also, capability negotiation with
922 * Cisco routers doesn't work without that.
923 */
924 bgp_error(conn, 2, 4, opt, ol);
925 return 0;
926 }
927 len -= 2 + ol;
928 opt += 2 + ol;
929 }
930 return 0;
931 }
932
933 static void
934 bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
935 {
936 struct bgp_conn *other;
937 struct bgp_proto *p = conn->bgp;
938 unsigned hold;
939 u16 base_as;
940 u32 id;
941
942 /* Check state */
943 if (conn->state != BS_OPENSENT)
944 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
945
946 /* Check message contents */
947 if (len < 29 || len != 29U + pkt[28])
948 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
949 if (pkt[19] != BGP_VERSION)
950 { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
951 conn->advertised_as = base_as = get_u16(pkt+20);
952 hold = get_u16(pkt+22);
953 id = get_u32(pkt+24);
954 BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
955
956 if (bgp_parse_options(conn, pkt+29, pkt[28]))
957 return;
958
959 if (hold > 0 && hold < 3)
960 { bgp_error(conn, 2, 6, pkt+22, 2); return; }
961
962 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
963 if (!id || (p->is_internal && id == p->local_id))
964 { bgp_error(conn, 2, 3, pkt+24, -4); return; }
965
966 if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
967 log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
968
969 if (conn->advertised_as != p->remote_as)
970 {
971 if (conn->peer_as4_support)
972 {
973 u32 val = htonl(conn->advertised_as);
974 bgp_error(conn, 2, 2, (byte *) &val, 4);
975 }
976 else
977 bgp_error(conn, 2, 2, pkt+20, 2);
978
979 return;
980 }
981
982 /* Check the other connection */
983 other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
984 switch (other->state)
985 {
986 case BS_CONNECT:
987 case BS_ACTIVE:
988 /* Stop outgoing connection attempts */
989 bgp_conn_enter_idle_state(other);
990 break;
991
992 case BS_IDLE:
993 case BS_OPENSENT:
994 case BS_CLOSE:
995 break;
996
997 case BS_OPENCONFIRM:
998 /*
999 * Description of collision detection rules in RFC 4271 is confusing and
1000 * contradictory, but it is essentially:
1001 *
1002 * 1. Router with higher ID is dominant
1003 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
1004 * 3. When both connections are in OpenConfirm state, one initiated by
1005 * the dominant router is kept.
1006 *
1007 * The first line in the expression below evaluates whether the neighbor
1008 * is dominant, the second line whether the new connection was initiated
1009 * by the neighbor. If both are true (or both are false), we keep the new
1010 * connection, otherwise we keep the old one.
1011 */
1012 if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as)))
1013 == (conn == &p->incoming_conn))
1014 {
1015 /* Should close the other connection */
1016 BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
1017 bgp_error(other, 6, 7, NULL, 0);
1018 break;
1019 }
1020 /* Fall thru */
1021 case BS_ESTABLISHED:
1022 /* Should close this connection */
1023 BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
1024 bgp_error(conn, 6, 7, NULL, 0);
1025 return;
1026 default:
1027 bug("bgp_rx_open: Unknown state");
1028 }
1029
1030 /* Update our local variables */
1031 conn->hold_time = MIN(hold, p->cf->hold_time);
1032 conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
1033 p->remote_id = id;
1034 p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
1035 p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
1036 p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
1037 p->gr_ready = p->cf->gr_mode && conn->peer_gr_able;
1038 p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support;
1039
1040 /* Update RA mode */
1041 if (p->add_path_tx)
1042 p->p.accept_ra_types = RA_ANY;
1043 else if (p->cf->secondary)
1044 p->p.accept_ra_types = RA_ACCEPTED;
1045 else
1046 p->p.accept_ra_types = RA_OPTIMAL;
1047
1048 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
1049
1050 bgp_schedule_packet(conn, PKT_KEEPALIVE);
1051 bgp_start_timer(conn->hold_timer, conn->hold_time);
1052 bgp_conn_enter_openconfirm_state(conn);
1053 }
1054
1055
1056 static inline void
1057 bgp_rx_end_mark(struct bgp_proto *p)
1058 {
1059 BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
1060
1061 if (p->load_state == BFS_LOADING)
1062 p->load_state = BFS_NONE;
1063
1064 if (p->p.gr_recovery)
1065 proto_graceful_restart_unlock(&p->p);
1066
1067 if (p->gr_active)
1068 bgp_graceful_restart_done(p);
1069 }
1070
1071
1072 #define DECODE_PREFIX(pp, ll) do { \
1073 if (p->add_path_rx) \
1074 { \
1075 if (ll < 5) { err=1; goto done; } \
1076 path_id = get_u32(pp); \
1077 pp += 4; \
1078 ll -= 4; \
1079 } \
1080 int b = *pp++; \
1081 int q; \
1082 ll--; \
1083 if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
1084 q = (b+7) / 8; \
1085 if (ll < q) { err=1; goto done; } \
1086 memcpy(&prefix, pp, q); \
1087 pp += q; \
1088 ll -= q; \
1089 ipa_ntoh(prefix); \
1090 prefix = ipa_and(prefix, ipa_mkmask(b)); \
1091 pxlen = b; \
1092 } while (0)
1093
1094
1095 static inline void
1096 bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
1097 u32 path_id, u32 *last_id, struct rte_src **src,
1098 rta *a0, rta **a)
1099 {
1100 if (path_id != *last_id)
1101 {
1102 *src = rt_get_source(&p->p, path_id);
1103 *last_id = path_id;
1104
1105 if (*a)
1106 {
1107 rta_free(*a);
1108 *a = NULL;
1109 }
1110 }
1111
1112 /* Prepare cached route attributes */
1113 if (!*a)
1114 {
1115 a0->src = *src;
1116
1117 /* Workaround for rta_lookup() breaking eattrs */
1118 ea_list *ea = a0->eattrs;
1119 *a = rta_lookup(a0);
1120 a0->eattrs = ea;
1121 }
1122
1123 net *n = net_get(p->p.table, prefix, pxlen);
1124 rte *e = rte_get_temp(rta_clone(*a));
1125 e->net = n;
1126 e->pflags = 0;
1127 e->u.bgp.suppressed = 0;
1128 rte_update2(p->p.main_ahook, n, e, *src);
1129 }
1130
1131 static inline void
1132 bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
1133 u32 path_id, u32 *last_id, struct rte_src **src)
1134 {
1135 if (path_id != *last_id)
1136 {
1137 *src = rt_find_source(&p->p, path_id);
1138 *last_id = path_id;
1139 }
1140
1141 net *n = net_find(p->p.table, prefix, pxlen);
1142 rte_update2( p->p.main_ahook, n, NULL, *src);
1143 }
1144
1145 static inline int
1146 bgp_set_next_hop(struct bgp_proto *p, rta *a)
1147 {
1148 struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
1149 ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
1150
1151 #ifdef IPV6
1152 int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
1153
1154 /* First address should not be link-local, but may be zero in direct mode */
1155 if (ipa_is_link_local(*nexthop))
1156 *nexthop = IPA_NONE;
1157 #else
1158 int second = 0;
1159 #endif
1160
1161 if (p->cf->gw_mode == GW_DIRECT)
1162 {
1163 neighbor *ng = NULL;
1164
1165 if (ipa_nonzero(*nexthop))
1166 ng = neigh_find(&p->p, nexthop, 0);
1167 else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
1168 ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
1169
1170 /* Fallback */
1171 if (!ng)
1172 ng = p->neigh;
1173
1174 if (ng->scope == SCOPE_HOST)
1175 return 0;
1176
1177 a->dest = RTD_ROUTER;
1178 a->gw = ng->addr;
1179 a->iface = ng->iface;
1180 a->hostentry = NULL;
1181 a->igp_metric = 0;
1182 }
1183 else /* GW_RECURSIVE */
1184 {
1185 if (ipa_zero(*nexthop))
1186 return 0;
1187
1188 rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
1189 }
1190
1191 return 1;
1192 }
1193
1194 #ifndef IPV6 /* IPv4 version */
1195
1196 static void
1197 bgp_do_rx_update(struct bgp_conn *conn,
1198 byte *withdrawn, int withdrawn_len,
1199 byte *nlri, int nlri_len,
1200 byte *attrs, int attr_len)
1201 {
1202 struct bgp_proto *p = conn->bgp;
1203 struct rte_src *src = p->p.main_source;
1204 rta *a0, *a = NULL;
1205 ip_addr prefix;
1206 int pxlen, err = 0;
1207 u32 path_id = 0;
1208 u32 last_id = 0;
1209
1210 /* Check for End-of-RIB marker */
1211 if (!withdrawn_len && !attr_len && !nlri_len)
1212 {
1213 bgp_rx_end_mark(p);
1214 return;
1215 }
1216
1217 /* Withdraw routes */
1218 while (withdrawn_len)
1219 {
1220 DECODE_PREFIX(withdrawn, withdrawn_len);
1221 DBG("Withdraw %I/%d\n", prefix, pxlen);
1222
1223 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1224 }
1225
1226 if (!attr_len && !nlri_len) /* shortcut */
1227 return;
1228
1229 a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
1230
1231 if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1232 return;
1233
1234 if (a0 && nlri_len && !bgp_set_next_hop(p, a0))
1235 a0 = NULL;
1236
1237 last_id = 0;
1238 src = p->p.main_source;
1239
1240 while (nlri_len)
1241 {
1242 DECODE_PREFIX(nlri, nlri_len);
1243 DBG("Add %I/%d\n", prefix, pxlen);
1244
1245 if (a0)
1246 bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1247 else /* Forced withdraw as a result of soft error */
1248 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1249 }
1250
1251 done:
1252 if (a)
1253 rta_free(a);
1254
1255 if (err)
1256 bgp_error(conn, 3, err, NULL, 0);
1257
1258 return;
1259 }
1260
1261 #else /* IPv6 version */
1262
1263 #define DO_NLRI(name) \
1264 x = p->name##_start; \
1265 len = len0 = p->name##_len; \
1266 if (len) \
1267 { \
1268 if (len < 3) { err=9; goto done; } \
1269 af = get_u16(x); \
1270 x += 3; \
1271 len -= 3; \
1272 DBG("\tNLRI AF=%d sub=%d len=%d\n", af, x[-1], len);\
1273 } \
1274 else \
1275 af = 0; \
1276 if (af == BGP_AF_IPV6)
1277
1278 static void
1279 bgp_attach_next_hop(rta *a0, byte *x)
1280 {
1281 ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
1282 memcpy(nh, x+1, 16);
1283 ipa_ntoh(nh[0]);
1284
1285 /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
1286 if (*x == 32)
1287 {
1288 memcpy(nh+1, x+17, 16);
1289 ipa_ntoh(nh[1]);
1290 }
1291 else
1292 nh[1] = IPA_NONE;
1293 }
1294
1295
1296 static void
1297 bgp_do_rx_update(struct bgp_conn *conn,
1298 byte *withdrawn UNUSED, int withdrawn_len,
1299 byte *nlri UNUSED, int nlri_len,
1300 byte *attrs, int attr_len)
1301 {
1302 struct bgp_proto *p = conn->bgp;
1303 struct rte_src *src = p->p.main_source;
1304 byte *x;
1305 int len, len0;
1306 unsigned af;
1307 rta *a0, *a = NULL;
1308 ip_addr prefix;
1309 int pxlen, err = 0;
1310 u32 path_id = 0;
1311 u32 last_id = 0;
1312
1313 p->mp_reach_len = 0;
1314 p->mp_unreach_len = 0;
1315 a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
1316
1317 if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1318 return;
1319
1320 /* Check for End-of-RIB marker */
1321 if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len &&
1322 (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
1323 {
1324 bgp_rx_end_mark(p);
1325 return;
1326 }
1327
1328 DO_NLRI(mp_unreach)
1329 {
1330 while (len)
1331 {
1332 DECODE_PREFIX(x, len);
1333 DBG("Withdraw %I/%d\n", prefix, pxlen);
1334 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1335 }
1336 }
1337
1338 DO_NLRI(mp_reach)
1339 {
1340 /* Create fake NEXT_HOP attribute */
1341 if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
1342 { err = 9; goto done; }
1343
1344 if (a0)
1345 bgp_attach_next_hop(a0, x);
1346
1347 /* Also ignore one reserved byte */
1348 len -= *x + 2;
1349 x += *x + 2;
1350
1351 if (a0 && ! bgp_set_next_hop(p, a0))
1352 a0 = NULL;
1353
1354 last_id = 0;
1355 src = p->p.main_source;
1356
1357 while (len)
1358 {
1359 DECODE_PREFIX(x, len);
1360 DBG("Add %I/%d\n", prefix, pxlen);
1361
1362 if (a0)
1363 bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1364 else /* Forced withdraw as a result of soft error */
1365 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1366 }
1367 }
1368
1369 done:
1370 if (a)
1371 rta_free(a);
1372
1373 if (err) /* Use subcode 9, not err */
1374 bgp_error(conn, 3, 9, NULL, 0);
1375
1376 return;
1377 }
1378
1379 #endif
1380
1381 static void
1382 bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
1383 {
1384 struct bgp_proto *p = conn->bgp;
1385 byte *withdrawn, *attrs, *nlri;
1386 uint withdrawn_len, attr_len, nlri_len;
1387
1388 BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
1389
1390 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1391 if (conn->state == BS_OPENCONFIRM)
1392 bgp_conn_enter_established_state(conn);
1393
1394 if (conn->state != BS_ESTABLISHED)
1395 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1396 bgp_start_timer(conn->hold_timer, conn->hold_time);
1397
1398 /* Find parts of the packet and check sizes */
1399 if (len < 23)
1400 {
1401 bgp_error(conn, 1, 2, pkt+16, 2);
1402 return;
1403 }
1404 withdrawn = pkt + 21;
1405 withdrawn_len = get_u16(pkt + 19);
1406 if (withdrawn_len + 23 > len)
1407 goto malformed;
1408 attrs = withdrawn + withdrawn_len + 2;
1409 attr_len = get_u16(attrs - 2);
1410 if (withdrawn_len + attr_len + 23 > len)
1411 goto malformed;
1412 nlri = attrs + attr_len;
1413 nlri_len = len - withdrawn_len - attr_len - 23;
1414 if (!attr_len && nlri_len)
1415 goto malformed;
1416 DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
1417
1418 lp_flush(bgp_linpool);
1419
1420 bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
1421 return;
1422
1423 malformed:
1424 bgp_error(conn, 3, 1, NULL, 0);
1425 }
1426
1427 static struct {
1428 byte major, minor;
1429 byte *msg;
1430 } bgp_msg_table[] = {
1431 { 1, 0, "Invalid message header" },
1432 { 1, 1, "Connection not synchronized" },
1433 { 1, 2, "Bad message length" },
1434 { 1, 3, "Bad message type" },
1435 { 2, 0, "Invalid OPEN message" },
1436 { 2, 1, "Unsupported version number" },
1437 { 2, 2, "Bad peer AS" },
1438 { 2, 3, "Bad BGP identifier" },
1439 { 2, 4, "Unsupported optional parameter" },
1440 { 2, 5, "Authentication failure" },
1441 { 2, 6, "Unacceptable hold time" },
1442 { 2, 7, "Required capability missing" }, /* [RFC5492] */
1443 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1444 { 3, 0, "Invalid UPDATE message" },
1445 { 3, 1, "Malformed attribute list" },
1446 { 3, 2, "Unrecognized well-known attribute" },
1447 { 3, 3, "Missing mandatory attribute" },
1448 { 3, 4, "Invalid attribute flags" },
1449 { 3, 5, "Invalid attribute length" },
1450 { 3, 6, "Invalid ORIGIN attribute" },
1451 { 3, 7, "AS routing loop" }, /* Deprecated */
1452 { 3, 8, "Invalid NEXT_HOP attribute" },
1453 { 3, 9, "Optional attribute error" },
1454 { 3, 10, "Invalid network field" },
1455 { 3, 11, "Malformed AS_PATH" },
1456 { 4, 0, "Hold timer expired" },
1457 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1458 { 5, 1, "Unexpected message in OpenSent state" },
1459 { 5, 2, "Unexpected message in OpenConfirm state" },
1460 { 5, 3, "Unexpected message in Established state" },
1461 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1462 { 6, 1, "Maximum number of prefixes reached" },
1463 { 6, 2, "Administrative shutdown" },
1464 { 6, 3, "Peer de-configured" },
1465 { 6, 4, "Administrative reset" },
1466 { 6, 5, "Connection rejected" },
1467 { 6, 6, "Other configuration change" },
1468 { 6, 7, "Connection collision resolution" },
1469 { 6, 8, "Out of Resources" },
1470 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
1471 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
1472 };
1473
1474 /**
1475 * bgp_error_dsc - return BGP error description
1476 * @code: BGP error code
1477 * @subcode: BGP error subcode
1478 *
1479 * bgp_error_dsc() returns error description for BGP errors
1480 * which might be static string or given temporary buffer.
1481 */
1482 const char *
1483 bgp_error_dsc(unsigned code, unsigned subcode)
1484 {
1485 static char buff[32];
1486 unsigned i;
1487 for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1488 if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1489 {
1490 return bgp_msg_table[i].msg;
1491 }
1492
1493 bsprintf(buff, "Unknown error %d.%d", code, subcode);
1494 return buff;
1495 }
1496
1497 void
1498 bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
1499 {
1500 const byte *name;
1501 byte *t, argbuf[36];
1502 unsigned i;
1503
1504 /* Don't report Cease messages generated by myself */
1505 if (code == 6 && class == BE_BGP_TX)
1506 return;
1507
1508 name = bgp_error_dsc(code, subcode);
1509 t = argbuf;
1510 if (len)
1511 {
1512 *t++ = ':';
1513 *t++ = ' ';
1514
1515 if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1516 {
1517 /* Bad peer AS - we would like to print the AS */
1518 t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
1519 goto done;
1520 }
1521 if (len > 16)
1522 len = 16;
1523 for (i=0; i<len; i++)
1524 t += bsprintf(t, "%02x", data[i]);
1525 }
1526 done:
1527 *t = 0;
1528 log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
1529 }
1530
1531 static void
1532 bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
1533 {
1534 struct bgp_proto *p = conn->bgp;
1535 if (len < 21)
1536 {
1537 bgp_error(conn, 1, 2, pkt+16, 2);
1538 return;
1539 }
1540
1541 unsigned code = pkt[19];
1542 unsigned subcode = pkt[20];
1543 int err = (code != 6);
1544
1545 bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1546 bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1547
1548 #ifndef IPV6
1549 if ((code == 2) && ((subcode == 4) || (subcode == 7))
1550 /* Error related to capability:
1551 * 4 - Peer does not support capabilities at all.
1552 * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1553 */
1554 && (p->cf->capabilities == 2)
1555 /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1556 && (conn->start_state == BSS_CONNECT)
1557 /* Failed connection attempt have used capabilities */
1558 && (p->cf->remote_as <= 0xFFFF))
1559 /* Not possible with disabled capabilities */
1560 {
1561 /* We try connect without capabilities */
1562 log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1563 p->start_state = BSS_CONNECT_NOCAP;
1564 err = 0;
1565 }
1566 #endif
1567
1568 bgp_conn_enter_close_state(conn);
1569 bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1570
1571 if (err)
1572 {
1573 bgp_update_startup_delay(p);
1574 bgp_stop(p, 0);
1575 }
1576 }
1577
1578 static void
1579 bgp_rx_keepalive(struct bgp_conn *conn)
1580 {
1581 struct bgp_proto *p = conn->bgp;
1582
1583 BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1584 bgp_start_timer(conn->hold_timer, conn->hold_time);
1585 switch (conn->state)
1586 {
1587 case BS_OPENCONFIRM:
1588 bgp_conn_enter_established_state(conn);
1589 break;
1590 case BS_ESTABLISHED:
1591 break;
1592 default:
1593 bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
1594 }
1595 }
1596
1597 static void
1598 bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
1599 {
1600 struct bgp_proto *p = conn->bgp;
1601
1602 if (conn->state != BS_ESTABLISHED)
1603 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1604
1605 if (!p->cf->enable_refresh)
1606 { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1607
1608 if (len < (BGP_HEADER_LENGTH + 4))
1609 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1610
1611 if (len > (BGP_HEADER_LENGTH + 4))
1612 { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
1613
1614 /* FIXME - we ignore AFI/SAFI values, as we support
1615 just one value and even an error code for an invalid
1616 request is not defined */
1617
1618 /* RFC 7313 redefined reserved field as RR message subtype */
1619 uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
1620
1621 switch (subtype)
1622 {
1623 case BGP_RR_REQUEST:
1624 BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1625 proto_request_feeding(&p->p);
1626 break;
1627
1628 case BGP_RR_BEGIN:
1629 BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
1630 bgp_refresh_begin(p);
1631 break;
1632
1633 case BGP_RR_END:
1634 BGP_TRACE(D_PACKETS, "Got END-OF-RR");
1635 bgp_refresh_end(p);
1636 break;
1637
1638 default:
1639 log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
1640 p->p.name, subtype);
1641 break;
1642 }
1643 }
1644
1645
1646 /**
1647 * bgp_rx_packet - handle a received packet
1648 * @conn: BGP connection
1649 * @pkt: start of the packet
1650 * @len: packet size
1651 *
1652 * bgp_rx_packet() takes a newly received packet and calls the corresponding
1653 * packet handler according to the packet type.
1654 */
1655 static void
1656 bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1657 {
1658 byte type = pkt[18];
1659
1660 DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
1661
1662 if (conn->bgp->p.mrtdump & MD_MESSAGES)
1663 mrt_dump_bgp_packet(conn, pkt, len);
1664
1665 switch (type)
1666 {
1667 case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
1668 case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
1669 case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
1670 case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
1671 case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
1672 default: bgp_error(conn, 1, 3, pkt+18, 1);
1673 }
1674 }
1675
1676 /**
1677 * bgp_rx - handle received data
1678 * @sk: socket
1679 * @size: amount of data received
1680 *
1681 * bgp_rx() is called by the socket layer whenever new data arrive from
1682 * the underlying TCP connection. It assembles the data fragments to packets,
1683 * checks their headers and framing and passes complete packets to
1684 * bgp_rx_packet().
1685 */
1686 int
1687 bgp_rx(sock *sk, uint size)
1688 {
1689 struct bgp_conn *conn = sk->data;
1690 struct bgp_proto *p = conn->bgp;
1691 byte *pkt_start = sk->rbuf;
1692 byte *end = pkt_start + size;
1693 unsigned i, len;
1694
1695 DBG("BGP: RX hook: Got %d bytes\n", size);
1696 while (end >= pkt_start + BGP_HEADER_LENGTH)
1697 {
1698 if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1699 return 0;
1700 for(i=0; i<16; i++)
1701 if (pkt_start[i] != 0xff)
1702 {
1703 bgp_error(conn, 1, 1, NULL, 0);
1704 break;
1705 }
1706 len = get_u16(pkt_start+16);
1707 if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p))
1708 {
1709 bgp_error(conn, 1, 2, pkt_start+16, 2);
1710 break;
1711 }
1712 if (end < pkt_start + len)
1713 break;
1714 bgp_rx_packet(conn, pkt_start, len);
1715 pkt_start += len;
1716 }
1717 if (pkt_start != sk->rbuf)
1718 {
1719 memmove(sk->rbuf, pkt_start, end - pkt_start);
1720 sk->rpos = sk->rbuf + (end - pkt_start);
1721 }
1722 return 0;
1723 }