]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
unsigned [int] -> uint
[thirdparty/bird.git] / proto / bgp / packets.c
1 /*
2 * BIRD -- BGP Packet Processing
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 #undef LOCAL_DEBUG
10
11 #include "nest/bird.h"
12 #include "nest/iface.h"
13 #include "nest/protocol.h"
14 #include "nest/route.h"
15 #include "nest/attrs.h"
16 #include "nest/mrtdump.h"
17 #include "conf/conf.h"
18 #include "lib/unaligned.h"
19 #include "lib/socket.h"
20
21 #include "nest/cli.h"
22
23 #include "bgp.h"
24
25
26 #define BGP_RR_REQUEST 0
27 #define BGP_RR_BEGIN 1
28 #define BGP_RR_END 2
29
30
31 static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
32 static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
33
34 /* Table for state -> RFC 6608 FSM error subcodes */
35 static byte fsm_err_subcode[BS_MAX] = {
36 [BS_OPENSENT] = 1,
37 [BS_OPENCONFIRM] = 2,
38 [BS_ESTABLISHED] = 3
39 };
40
41 /*
42 * MRT Dump format is not semantically specified.
43 * We will use these values in appropriate fields:
44 *
45 * Local AS, Remote AS - configured AS numbers for given BGP instance.
46 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
47 *
48 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
49 * changes) and MESSAGE (for received BGP messages).
50 *
51 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
52 * only when AS4 session is established and even in that case MESSAGE
53 * does not use AS4 variant for initial OPEN message. This strange
54 * behavior is here for compatibility with Quagga and Bgpdump,
55 */
56
57 static byte *
58 mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
59 {
60 struct bgp_proto *p = conn->bgp;
61
62 if (as4)
63 {
64 put_u32(buf+0, p->remote_as);
65 put_u32(buf+4, p->local_as);
66 buf+=8;
67 }
68 else
69 {
70 put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
71 put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS);
72 buf+=4;
73 }
74
75 put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
76 put_u16(buf+2, BGP_AF);
77 buf+=4;
78 buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
79 buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE);
80
81 return buf;
82 }
83
84 static void
85 mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
86 {
87 byte buf[BGP_MAX_PACKET_LENGTH + 128];
88 byte *bp = buf + MRTDUMP_HDR_LENGTH;
89 int as4 = conn->bgp->as4_session;
90
91 bp = mrt_put_bgp4_hdr(bp, conn, as4);
92 memcpy(bp, pkt, len);
93 bp += len;
94 mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
95 buf, bp-buf);
96 }
97
98 static inline u16
99 convert_state(unsigned state)
100 {
101 /* Convert state from our BS_* values to values used in MRTDump */
102 return (state == BS_CLOSE) ? 1 : state + 1;
103 }
104
105 void
106 mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
107 {
108 byte buf[128];
109 byte *bp = buf + MRTDUMP_HDR_LENGTH;
110
111 bp = mrt_put_bgp4_hdr(bp, conn, 1);
112 put_u16(bp+0, convert_state(old));
113 put_u16(bp+2, convert_state(new));
114 bp += 4;
115 mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
116 }
117
118 static byte *
119 bgp_create_notification(struct bgp_conn *conn, byte *buf)
120 {
121 struct bgp_proto *p = conn->bgp;
122
123 BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
124 buf[0] = conn->notify_code;
125 buf[1] = conn->notify_subcode;
126 memcpy(buf+2, conn->notify_data, conn->notify_size);
127 return buf + 2 + conn->notify_size;
128 }
129
130 #ifdef IPV6
131 static byte *
132 bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
133 {
134 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
135 *buf++ = 4; /* Capability data length */
136 *buf++ = 0; /* We support AF IPv6 */
137 *buf++ = BGP_AF_IPV6;
138 *buf++ = 0; /* RFU */
139 *buf++ = 1; /* and SAFI 1 */
140 return buf;
141 }
142
143 #else
144
145 static byte *
146 bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
147 {
148 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
149 *buf++ = 4; /* Capability data length */
150 *buf++ = 0; /* We support AF IPv4 */
151 *buf++ = BGP_AF_IPV4;
152 *buf++ = 0; /* RFU */
153 *buf++ = 1; /* and SAFI 1 */
154 return buf;
155 }
156 #endif
157
158 static byte *
159 bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
160 {
161 *buf++ = 2; /* Capability 2: Support for route refresh */
162 *buf++ = 0; /* Capability data length */
163 return buf;
164 }
165
166 static byte *
167 bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
168 {
169 *buf++ = 64; /* Capability 64: Support for graceful restart */
170 *buf++ = 6; /* Capability data length */
171
172 put_u16(buf, p->cf->gr_time);
173 if (p->p.gr_recovery)
174 buf[0] |= BGP_GRF_RESTART;
175 buf += 2;
176
177 *buf++ = 0; /* Appropriate AF */
178 *buf++ = BGP_AF;
179 *buf++ = 1; /* and SAFI 1 */
180 *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
181
182 return buf;
183 }
184
185 static byte *
186 bgp_put_cap_gr2(struct bgp_proto *p, byte *buf)
187 {
188 *buf++ = 64; /* Capability 64: Support for graceful restart */
189 *buf++ = 2; /* Capability data length */
190 put_u16(buf, 0);
191 return buf + 2;
192 }
193
194 static byte *
195 bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
196 {
197 *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
198 *buf++ = 4; /* Capability data length */
199 put_u32(buf, p->local_as);
200 return buf + 4;
201 }
202
203 static byte *
204 bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
205 {
206 *buf++ = 69; /* Capability 69: Support for ADD-PATH */
207 *buf++ = 4; /* Capability data length */
208
209 *buf++ = 0; /* Appropriate AF */
210 *buf++ = BGP_AF;
211 *buf++ = 1; /* SAFI 1 */
212
213 *buf++ = p->cf->add_path;
214
215 return buf;
216 }
217
218 static byte *
219 bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
220 {
221 *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
222 *buf++ = 0; /* Capability data length */
223 return buf;
224 }
225
226
227 static byte *
228 bgp_create_open(struct bgp_conn *conn, byte *buf)
229 {
230 struct bgp_proto *p = conn->bgp;
231 byte *cap;
232 int cap_len;
233
234 BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
235 BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
236 buf[0] = BGP_VERSION;
237 put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
238 put_u16(buf+3, p->cf->hold_time);
239 put_u32(buf+5, p->local_id);
240
241 if (conn->start_state == BSS_CONNECT_NOCAP)
242 {
243 BGP_TRACE(D_PACKETS, "Skipping capabilities");
244 buf[9] = 0;
245 return buf + 10;
246 }
247
248 /* Skipped 3 B for length field and Capabilities parameter header */
249 cap = buf + 12;
250
251 #ifndef IPV6
252 if (p->cf->advertise_ipv4)
253 cap = bgp_put_cap_ipv4(p, cap);
254 #endif
255
256 #ifdef IPV6
257 cap = bgp_put_cap_ipv6(p, cap);
258 #endif
259
260 if (p->cf->enable_refresh)
261 cap = bgp_put_cap_rr(p, cap);
262
263 if (p->cf->gr_mode == BGP_GR_ABLE)
264 cap = bgp_put_cap_gr1(p, cap);
265 else if (p->cf->gr_mode == BGP_GR_AWARE)
266 cap = bgp_put_cap_gr2(p, cap);
267
268 if (p->cf->enable_as4)
269 cap = bgp_put_cap_as4(p, cap);
270
271 if (p->cf->add_path)
272 cap = bgp_put_cap_add_path(p, cap);
273
274 if (p->cf->enable_refresh)
275 cap = bgp_put_cap_err(p, cap);
276
277 cap_len = cap - buf - 12;
278 if (cap_len > 0)
279 {
280 buf[9] = cap_len + 2; /* Optional params len */
281 buf[10] = 2; /* Option: Capability list */
282 buf[11] = cap_len; /* Option length */
283 return cap;
284 }
285 else
286 {
287 buf[9] = 0; /* No optional parameters */
288 return buf + 10;
289 }
290 }
291
292 static uint
293 bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains)
294 {
295 byte *start = w;
296 ip_addr a;
297 int bytes;
298
299 while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
300 {
301 struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
302 DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
303
304 if (p->add_path_tx)
305 {
306 put_u32(w, px->path_id);
307 w += 4;
308 remains -= 4;
309 }
310
311 *w++ = px->n.pxlen;
312 bytes = (px->n.pxlen + 7) / 8;
313 a = px->n.prefix;
314 ipa_hton(a);
315 memcpy(w, &a, bytes);
316 w += bytes;
317 remains -= bytes + 1;
318 rem_node(&px->bucket_node);
319 bgp_free_prefix(p, px);
320 // fib_delete(&p->prefix_fib, px);
321 }
322 return w - start;
323 }
324
325 static void
326 bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
327 {
328 while (!EMPTY_LIST(buck->prefixes))
329 {
330 struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
331 log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
332 rem_node(&px->bucket_node);
333 bgp_free_prefix(p, px);
334 // fib_delete(&p->prefix_fib, px);
335 }
336 }
337
338 #ifndef IPV6 /* IPv4 version */
339
340 static byte *
341 bgp_create_update(struct bgp_conn *conn, byte *buf)
342 {
343 struct bgp_proto *p = conn->bgp;
344 struct bgp_bucket *buck;
345 int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
346 byte *w;
347 int wd_size = 0;
348 int r_size = 0;
349 int a_size = 0;
350
351 w = buf+2;
352 if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
353 {
354 DBG("Withdrawn routes:\n");
355 wd_size = bgp_encode_prefixes(p, w, buck, remains);
356 w += wd_size;
357 remains -= wd_size;
358 }
359 put_u16(buf, wd_size);
360
361 if (remains >= 3072)
362 {
363 while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
364 {
365 if (EMPTY_LIST(buck->prefixes))
366 {
367 DBG("Deleting empty bucket %p\n", buck);
368 rem_node(&buck->send_node);
369 bgp_free_bucket(p, buck);
370 continue;
371 }
372
373 DBG("Processing bucket %p\n", buck);
374 a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048);
375
376 if (a_size < 0)
377 {
378 log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
379 bgp_flush_prefixes(p, buck);
380 rem_node(&buck->send_node);
381 bgp_free_bucket(p, buck);
382 continue;
383 }
384
385 put_u16(w, a_size);
386 w += a_size + 2;
387 r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
388 w += r_size;
389 break;
390 }
391 }
392 if (!a_size) /* Attributes not already encoded */
393 {
394 put_u16(w, 0);
395 w += 2;
396 }
397 if (wd_size || r_size)
398 {
399 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
400 return w;
401 }
402 else
403 return NULL;
404 }
405
406 static byte *
407 bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
408 {
409 struct bgp_proto *p = conn->bgp;
410 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
411
412 put_u32(buf, 0);
413 return buf+4;
414 }
415
416 #else /* IPv6 version */
417
418 static inline int
419 same_iface(struct bgp_proto *p, ip_addr *ip)
420 {
421 neighbor *n = neigh_find(&p->p, ip, 0);
422 return n && p->neigh && n->iface == p->neigh->iface;
423 }
424
425 static byte *
426 bgp_create_update(struct bgp_conn *conn, byte *buf)
427 {
428 struct bgp_proto *p = conn->bgp;
429 struct bgp_bucket *buck;
430 int size, second, rem_stored;
431 int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
432 byte *w, *w_stored, *tmp, *tstart;
433 ip_addr *ipp, ip, ip_ll;
434 ea_list *ea;
435 eattr *nh;
436
437 put_u16(buf, 0);
438 w = buf+4;
439
440 if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
441 {
442 DBG("Withdrawn routes:\n");
443 tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
444 *tmp++ = 0;
445 *tmp++ = BGP_AF_IPV6;
446 *tmp++ = 1;
447 ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
448 size = bgp_encode_attrs(p, w, ea, remains);
449 ASSERT(size >= 0);
450 w += size;
451 remains -= size;
452 }
453
454 if (remains >= 3072)
455 {
456 while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
457 {
458 if (EMPTY_LIST(buck->prefixes))
459 {
460 DBG("Deleting empty bucket %p\n", buck);
461 rem_node(&buck->send_node);
462 bgp_free_bucket(p, buck);
463 continue;
464 }
465
466 DBG("Processing bucket %p\n", buck);
467 rem_stored = remains;
468 w_stored = w;
469
470 size = bgp_encode_attrs(p, w, buck->eattrs, 2048);
471 if (size < 0)
472 {
473 log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
474 bgp_flush_prefixes(p, buck);
475 rem_node(&buck->send_node);
476 bgp_free_bucket(p, buck);
477 continue;
478 }
479 w += size;
480 remains -= size;
481
482 /* We have two addresses here in NEXT_HOP eattr. Really.
483 Unless NEXT_HOP was modified by filter */
484 nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
485 ASSERT(nh);
486 second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
487 ipp = (ip_addr *) nh->u.ptr->data;
488 ip = ipp[0];
489 ip_ll = IPA_NONE;
490
491 if (ipa_equal(ip, p->source_addr))
492 ip_ll = p->local_link;
493 else
494 {
495 /* If we send a route with 'third party' next hop destinated
496 * in the same interface, we should also send a link local
497 * next hop address. We use the received one (stored in the
498 * other part of BA_NEXT_HOP eattr). If we didn't received
499 * it (for example it is a static route), we can't use
500 * 'third party' next hop and we have to use local IP address
501 * as next hop. Sending original next hop address without
502 * link local address seems to be a natural way to solve that
503 * problem, but it is contrary to RFC 2545 and Quagga does not
504 * accept such routes.
505 *
506 * There are two cases, either we have global IP, or
507 * IPA_NONE if the neighbor is link-local. For IPA_NONE,
508 * we suppose it is on the same iface, see bgp_update_attrs().
509 */
510
511 if (ipa_zero(ip) || same_iface(p, &ip))
512 {
513 if (second && ipa_nonzero(ipp[1]))
514 ip_ll = ipp[1];
515 else
516 {
517 switch (p->cf->missing_lladdr)
518 {
519 case MLL_SELF:
520 ip = p->source_addr;
521 ip_ll = p->local_link;
522 break;
523 case MLL_DROP:
524 log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
525 w = w_stored;
526 remains = rem_stored;
527 bgp_flush_prefixes(p, buck);
528 rem_node(&buck->send_node);
529 bgp_free_bucket(p, buck);
530 continue;
531 case MLL_IGNORE:
532 break;
533 }
534 }
535 }
536 }
537
538 tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
539 *tmp++ = 0;
540 *tmp++ = BGP_AF_IPV6;
541 *tmp++ = 1;
542
543 if (ipa_is_link_local(ip))
544 ip = IPA_NONE;
545
546 if (ipa_nonzero(ip_ll))
547 {
548 *tmp++ = 32;
549 ipa_hton(ip);
550 memcpy(tmp, &ip, 16);
551 ipa_hton(ip_ll);
552 memcpy(tmp+16, &ip_ll, 16);
553 tmp += 32;
554 }
555 else
556 {
557 *tmp++ = 16;
558 ipa_hton(ip);
559 memcpy(tmp, &ip, 16);
560 tmp += 16;
561 }
562
563 *tmp++ = 0; /* No SNPA information */
564 tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
565 ea->attrs[0].u.ptr->length = tmp - tstart;
566 size = bgp_encode_attrs(p, w, ea, remains);
567 ASSERT(size >= 0);
568 w += size;
569 break;
570 }
571 }
572
573 size = w - (buf+4);
574 put_u16(buf+2, size);
575 lp_flush(bgp_linpool);
576 if (size)
577 {
578 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
579 return w;
580 }
581 else
582 return NULL;
583 }
584
585 static byte *
586 bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
587 {
588 struct bgp_proto *p = conn->bgp;
589 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
590
591 put_u16(buf+0, 0);
592 put_u16(buf+2, 6); /* length 4-9 */
593 buf += 4;
594
595 /* Empty MP_UNREACH_NLRI atribute */
596 *buf++ = BAF_OPTIONAL;
597 *buf++ = BA_MP_UNREACH_NLRI;
598 *buf++ = 3; /* Length 7-9 */
599 *buf++ = 0; /* AFI */
600 *buf++ = BGP_AF_IPV6;
601 *buf++ = 1; /* SAFI */
602 return buf;
603 }
604
605 #endif
606
607 static inline byte *
608 bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
609 {
610 struct bgp_proto *p = conn->bgp;
611 BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
612
613 /* Original original route refresh request, RFC 2918 */
614 *buf++ = 0;
615 *buf++ = BGP_AF;
616 *buf++ = BGP_RR_REQUEST;
617 *buf++ = 1; /* SAFI */
618 return buf;
619 }
620
621 static inline byte *
622 bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
623 {
624 struct bgp_proto *p = conn->bgp;
625 BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
626
627 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
628 *buf++ = 0;
629 *buf++ = BGP_AF;
630 *buf++ = BGP_RR_BEGIN;
631 *buf++ = 1; /* SAFI */
632 return buf;
633 }
634
635 static inline byte *
636 bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
637 {
638 struct bgp_proto *p = conn->bgp;
639 BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
640
641 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
642 *buf++ = 0;
643 *buf++ = BGP_AF;
644 *buf++ = BGP_RR_END;
645 *buf++ = 1; /* SAFI */
646 return buf;
647 }
648
649
650 static void
651 bgp_create_header(byte *buf, uint len, uint type)
652 {
653 memset(buf, 0xff, 16); /* Marker */
654 put_u16(buf+16, len);
655 buf[18] = type;
656 }
657
658 /**
659 * bgp_fire_tx - transmit packets
660 * @conn: connection
661 *
662 * Whenever the transmit buffers of the underlying TCP connection
663 * are free and we have any packets queued for sending, the socket functions
664 * call bgp_fire_tx() which takes care of selecting the highest priority packet
665 * queued (Notification > Keepalive > Open > Update), assembling its header
666 * and body and sending it to the connection.
667 */
668 static int
669 bgp_fire_tx(struct bgp_conn *conn)
670 {
671 struct bgp_proto *p = conn->bgp;
672 uint s = conn->packets_to_send;
673 sock *sk = conn->sk;
674 byte *buf, *pkt, *end;
675 int type;
676
677 if (!sk)
678 {
679 conn->packets_to_send = 0;
680 return 0;
681 }
682 buf = sk->tbuf;
683 pkt = buf + BGP_HEADER_LENGTH;
684
685 if (s & (1 << PKT_SCHEDULE_CLOSE))
686 {
687 /* We can finally close connection and enter idle state */
688 bgp_conn_enter_idle_state(conn);
689 return 0;
690 }
691 if (s & (1 << PKT_NOTIFICATION))
692 {
693 s = 1 << PKT_SCHEDULE_CLOSE;
694 type = PKT_NOTIFICATION;
695 end = bgp_create_notification(conn, pkt);
696 }
697 else if (s & (1 << PKT_KEEPALIVE))
698 {
699 s &= ~(1 << PKT_KEEPALIVE);
700 type = PKT_KEEPALIVE;
701 end = pkt; /* Keepalives carry no data */
702 BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
703 bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
704 }
705 else if (s & (1 << PKT_OPEN))
706 {
707 s &= ~(1 << PKT_OPEN);
708 type = PKT_OPEN;
709 end = bgp_create_open(conn, pkt);
710 }
711 else if (s & (1 << PKT_ROUTE_REFRESH))
712 {
713 s &= ~(1 << PKT_ROUTE_REFRESH);
714 type = PKT_ROUTE_REFRESH;
715 end = bgp_create_route_refresh(conn, pkt);
716 }
717 else if (s & (1 << PKT_BEGIN_REFRESH))
718 {
719 s &= ~(1 << PKT_BEGIN_REFRESH);
720 type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */
721 end = bgp_create_begin_refresh(conn, pkt);
722 }
723 else if (s & (1 << PKT_UPDATE))
724 {
725 type = PKT_UPDATE;
726 end = bgp_create_update(conn, pkt);
727
728 if (!end)
729 {
730 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
731
732 conn->packets_to_send = 0;
733
734 if (p->feed_state == BFS_LOADED)
735 {
736 type = PKT_UPDATE;
737 end = bgp_create_end_mark(conn, pkt);
738 }
739
740 else if (p->feed_state == BFS_REFRESHED)
741 {
742 type = PKT_ROUTE_REFRESH;
743 end = bgp_create_end_refresh(conn, pkt);
744 }
745
746 else /* Really nothing to send */
747 return 0;
748
749 p->feed_state = BFS_NONE;
750 }
751 }
752 else
753 return 0;
754
755 conn->packets_to_send = s;
756 bgp_create_header(buf, end - buf, type);
757 return sk_send(sk, end - buf);
758 }
759
760 /**
761 * bgp_schedule_packet - schedule a packet for transmission
762 * @conn: connection
763 * @type: packet type
764 *
765 * Schedule a packet of type @type to be sent as soon as possible.
766 */
767 void
768 bgp_schedule_packet(struct bgp_conn *conn, int type)
769 {
770 DBG("BGP: Scheduling packet type %d\n", type);
771 conn->packets_to_send |= 1 << type;
772 if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev))
773 ev_schedule(conn->tx_ev);
774 }
775
776 void
777 bgp_kick_tx(void *vconn)
778 {
779 struct bgp_conn *conn = vconn;
780
781 DBG("BGP: kicking TX\n");
782 while (bgp_fire_tx(conn) > 0)
783 ;
784 }
785
786 void
787 bgp_tx(sock *sk)
788 {
789 struct bgp_conn *conn = sk->data;
790
791 DBG("BGP: TX hook\n");
792 while (bgp_fire_tx(conn) > 0)
793 ;
794 }
795
796 /* Capatibility negotiation as per RFC 2842 */
797
798 void
799 bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
800 {
801 // struct bgp_proto *p = conn->bgp;
802 int i, cl;
803
804 while (len > 0)
805 {
806 if (len < 2 || len < 2 + opt[1])
807 goto err;
808
809 cl = opt[1];
810
811 switch (opt[0])
812 {
813 case 2: /* Route refresh capability, RFC 2918 */
814 if (cl != 0)
815 goto err;
816 conn->peer_refresh_support = 1;
817 break;
818
819 case 64: /* Graceful restart capability, RFC 4724 */
820 if (cl % 4 != 2)
821 goto err;
822 conn->peer_gr_aware = 1;
823 conn->peer_gr_able = 0;
824 conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
825 conn->peer_gr_flags = opt[2] & 0xf0;
826 conn->peer_gr_aflags = 0;
827 for (i = 2; i < cl; i += 4)
828 if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
829 {
830 conn->peer_gr_able = 1;
831 conn->peer_gr_aflags = opt[2+i+3];
832 }
833 break;
834
835 case 65: /* AS4 capability, RFC 4893 */
836 if (cl != 4)
837 goto err;
838 conn->peer_as4_support = 1;
839 if (conn->bgp->cf->enable_as4)
840 conn->advertised_as = get_u32(opt + 2);
841 break;
842
843 case 69: /* ADD-PATH capability, draft */
844 if (cl % 4)
845 goto err;
846 for (i = 0; i < cl; i += 4)
847 if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
848 conn->peer_add_path = opt[2+i+3];
849 if (conn->peer_add_path > ADD_PATH_FULL)
850 goto err;
851 break;
852
853 case 70: /* Enhanced route refresh capability, RFC 7313 */
854 if (cl != 0)
855 goto err;
856 conn->peer_enhanced_refresh_support = 1;
857 break;
858
859 /* We can safely ignore all other capabilities */
860 }
861 len -= 2 + cl;
862 opt += 2 + cl;
863 }
864 return;
865
866 err:
867 bgp_error(conn, 2, 0, NULL, 0);
868 return;
869 }
870
871 static int
872 bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
873 {
874 struct bgp_proto *p = conn->bgp;
875 int ol;
876
877 while (len > 0)
878 {
879 if (len < 2 || len < 2 + opt[1])
880 { bgp_error(conn, 2, 0, NULL, 0); return 0; }
881 #ifdef LOCAL_DEBUG
882 {
883 int i;
884 DBG("\tOption %02x:", opt[0]);
885 for(i=0; i<opt[1]; i++)
886 DBG(" %02x", opt[2+i]);
887 DBG("\n");
888 }
889 #endif
890
891 ol = opt[1];
892 switch (opt[0])
893 {
894 case 2:
895 if (conn->start_state == BSS_CONNECT_NOCAP)
896 BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
897 else
898 bgp_parse_capabilities(conn, opt + 2, ol);
899 break;
900
901 default:
902 /*
903 * BGP specs don't tell us to send which option
904 * we didn't recognize, but it's common practice
905 * to do so. Also, capability negotiation with
906 * Cisco routers doesn't work without that.
907 */
908 bgp_error(conn, 2, 4, opt, ol);
909 return 0;
910 }
911 len -= 2 + ol;
912 opt += 2 + ol;
913 }
914 return 0;
915 }
916
917 static void
918 bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
919 {
920 struct bgp_conn *other;
921 struct bgp_proto *p = conn->bgp;
922 unsigned hold;
923 u16 base_as;
924 u32 id;
925
926 /* Check state */
927 if (conn->state != BS_OPENSENT)
928 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
929
930 /* Check message contents */
931 if (len < 29 || len != 29 + pkt[28])
932 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
933 if (pkt[19] != BGP_VERSION)
934 { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
935 conn->advertised_as = base_as = get_u16(pkt+20);
936 hold = get_u16(pkt+22);
937 id = get_u32(pkt+24);
938 BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
939
940 if (bgp_parse_options(conn, pkt+29, pkt[28]))
941 return;
942
943 if (hold > 0 && hold < 3)
944 { bgp_error(conn, 2, 6, pkt+22, 2); return; }
945
946 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
947 if (!id || (p->is_internal && id == p->local_id))
948 { bgp_error(conn, 2, 3, pkt+24, -4); return; }
949
950 if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
951 log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
952
953 if (conn->advertised_as != p->remote_as)
954 {
955 if (conn->peer_as4_support)
956 {
957 u32 val = htonl(conn->advertised_as);
958 bgp_error(conn, 2, 2, (byte *) &val, 4);
959 }
960 else
961 bgp_error(conn, 2, 2, pkt+20, 2);
962
963 return;
964 }
965
966 /* Check the other connection */
967 other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
968 switch (other->state)
969 {
970 case BS_CONNECT:
971 case BS_ACTIVE:
972 /* Stop outgoing connection attempts */
973 bgp_conn_enter_idle_state(other);
974 break;
975
976 case BS_IDLE:
977 case BS_OPENSENT:
978 case BS_CLOSE:
979 break;
980
981 case BS_OPENCONFIRM:
982 /*
983 * Description of collision detection rules in RFC 4271 is confusing and
984 * contradictory, but it is essentially:
985 *
986 * 1. Router with higher ID is dominant
987 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
988 * 3. When both connections are in OpenConfirm state, one initiated by
989 * the dominant router is kept.
990 *
991 * The first line in the expression below evaluates whether the neighbor
992 * is dominant, the second line whether the new connection was initiated
993 * by the neighbor. If both are true (or both are false), we keep the new
994 * connection, otherwise we keep the old one.
995 */
996 if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as)))
997 == (conn == &p->incoming_conn))
998 {
999 /* Should close the other connection */
1000 BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
1001 bgp_error(other, 6, 7, NULL, 0);
1002 break;
1003 }
1004 /* Fall thru */
1005 case BS_ESTABLISHED:
1006 /* Should close this connection */
1007 BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
1008 bgp_error(conn, 6, 7, NULL, 0);
1009 return;
1010 default:
1011 bug("bgp_rx_open: Unknown state");
1012 }
1013
1014 /* Update our local variables */
1015 conn->hold_time = MIN(hold, p->cf->hold_time);
1016 conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
1017 p->remote_id = id;
1018 p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
1019 p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
1020 p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
1021 p->gr_ready = p->cf->gr_mode && conn->peer_gr_able;
1022
1023 if (p->add_path_tx)
1024 p->p.accept_ra_types = RA_ANY;
1025
1026 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
1027
1028 bgp_schedule_packet(conn, PKT_KEEPALIVE);
1029 bgp_start_timer(conn->hold_timer, conn->hold_time);
1030 bgp_conn_enter_openconfirm_state(conn);
1031 }
1032
1033
1034 static inline void
1035 bgp_rx_end_mark(struct bgp_proto *p)
1036 {
1037 BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
1038
1039 if (p->load_state == BFS_LOADING)
1040 p->load_state = BFS_NONE;
1041
1042 if (p->p.gr_recovery)
1043 proto_graceful_restart_unlock(&p->p);
1044
1045 if (p->gr_active)
1046 bgp_graceful_restart_done(p);
1047 }
1048
1049
1050 #define DECODE_PREFIX(pp, ll) do { \
1051 if (p->add_path_rx) \
1052 { \
1053 if (ll < 5) { err=1; goto done; } \
1054 path_id = get_u32(pp); \
1055 pp += 4; \
1056 ll -= 4; \
1057 } \
1058 int b = *pp++; \
1059 int q; \
1060 ll--; \
1061 if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
1062 q = (b+7) / 8; \
1063 if (ll < q) { err=1; goto done; } \
1064 memcpy(&prefix, pp, q); \
1065 pp += q; \
1066 ll -= q; \
1067 ipa_ntoh(prefix); \
1068 prefix = ipa_and(prefix, ipa_mkmask(b)); \
1069 pxlen = b; \
1070 } while (0)
1071
1072
1073 static inline void
1074 bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
1075 u32 path_id, u32 *last_id, struct rte_src **src,
1076 rta *a0, rta **a)
1077 {
1078 if (path_id != *last_id)
1079 {
1080 *src = rt_get_source(&p->p, path_id);
1081 *last_id = path_id;
1082
1083 if (*a)
1084 {
1085 rta_free(*a);
1086 *a = NULL;
1087 }
1088 }
1089
1090 /* Prepare cached route attributes */
1091 if (!*a)
1092 {
1093 a0->src = *src;
1094
1095 /* Workaround for rta_lookup() breaking eattrs */
1096 ea_list *ea = a0->eattrs;
1097 *a = rta_lookup(a0);
1098 a0->eattrs = ea;
1099 }
1100
1101 net *n = net_get(p->p.table, prefix, pxlen);
1102 rte *e = rte_get_temp(rta_clone(*a));
1103 e->net = n;
1104 e->pflags = 0;
1105 e->u.bgp.suppressed = 0;
1106 rte_update2(p->p.main_ahook, n, e, *src);
1107 }
1108
1109 static inline void
1110 bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
1111 u32 path_id, u32 *last_id, struct rte_src **src)
1112 {
1113 if (path_id != *last_id)
1114 {
1115 *src = rt_find_source(&p->p, path_id);
1116 *last_id = path_id;
1117 }
1118
1119 net *n = net_find(p->p.table, prefix, pxlen);
1120 rte_update2( p->p.main_ahook, n, NULL, *src);
1121 }
1122
1123 static inline int
1124 bgp_set_next_hop(struct bgp_proto *p, rta *a)
1125 {
1126 struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
1127 ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
1128
1129 #ifdef IPV6
1130 int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
1131
1132 /* First address should not be link-local, but may be zero in direct mode */
1133 if (ipa_is_link_local(*nexthop))
1134 *nexthop = IPA_NONE;
1135 #else
1136 int second = 0;
1137 #endif
1138
1139 if (p->cf->gw_mode == GW_DIRECT)
1140 {
1141 neighbor *ng = NULL;
1142
1143 if (ipa_nonzero(*nexthop))
1144 ng = neigh_find(&p->p, nexthop, 0);
1145 else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
1146 ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
1147
1148 /* Fallback */
1149 if (!ng)
1150 ng = p->neigh;
1151
1152 if (ng->scope == SCOPE_HOST)
1153 return 0;
1154
1155 a->dest = RTD_ROUTER;
1156 a->gw = ng->addr;
1157 a->iface = ng->iface;
1158 a->hostentry = NULL;
1159 a->igp_metric = 0;
1160 }
1161 else /* GW_RECURSIVE */
1162 {
1163 if (ipa_zero(*nexthop))
1164 return 0;
1165
1166 rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
1167 }
1168
1169 return 1;
1170 }
1171
1172 #ifndef IPV6 /* IPv4 version */
1173
1174 static void
1175 bgp_do_rx_update(struct bgp_conn *conn,
1176 byte *withdrawn, int withdrawn_len,
1177 byte *nlri, int nlri_len,
1178 byte *attrs, int attr_len)
1179 {
1180 struct bgp_proto *p = conn->bgp;
1181 struct rte_src *src = p->p.main_source;
1182 rta *a0, *a = NULL;
1183 ip_addr prefix;
1184 int pxlen, err = 0;
1185 u32 path_id = 0;
1186 u32 last_id = 0;
1187
1188 /* Check for End-of-RIB marker */
1189 if (!withdrawn_len && !attr_len && !nlri_len)
1190 {
1191 bgp_rx_end_mark(p);
1192 return;
1193 }
1194
1195 /* Withdraw routes */
1196 while (withdrawn_len)
1197 {
1198 DECODE_PREFIX(withdrawn, withdrawn_len);
1199 DBG("Withdraw %I/%d\n", prefix, pxlen);
1200
1201 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1202 }
1203
1204 if (!attr_len && !nlri_len) /* shortcut */
1205 return;
1206
1207 a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
1208
1209 if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1210 return;
1211
1212 if (a0 && nlri_len && !bgp_set_next_hop(p, a0))
1213 a0 = NULL;
1214
1215 last_id = 0;
1216 src = p->p.main_source;
1217
1218 while (nlri_len)
1219 {
1220 DECODE_PREFIX(nlri, nlri_len);
1221 DBG("Add %I/%d\n", prefix, pxlen);
1222
1223 if (a0)
1224 bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1225 else /* Forced withdraw as a result of soft error */
1226 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1227 }
1228
1229 done:
1230 if (a)
1231 rta_free(a);
1232
1233 if (err)
1234 bgp_error(conn, 3, err, NULL, 0);
1235
1236 return;
1237 }
1238
1239 #else /* IPv6 version */
1240
1241 #define DO_NLRI(name) \
1242 start = x = p->name##_start; \
1243 len = len0 = p->name##_len; \
1244 if (len) \
1245 { \
1246 if (len < 3) { err=9; goto done; } \
1247 af = get_u16(x); \
1248 sub = x[2]; \
1249 x += 3; \
1250 len -= 3; \
1251 DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
1252 } \
1253 else \
1254 af = 0; \
1255 if (af == BGP_AF_IPV6)
1256
1257 static void
1258 bgp_attach_next_hop(rta *a0, byte *x)
1259 {
1260 ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
1261 memcpy(nh, x+1, 16);
1262 ipa_ntoh(nh[0]);
1263
1264 /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
1265 if (*x == 32)
1266 {
1267 memcpy(nh+1, x+17, 16);
1268 ipa_ntoh(nh[1]);
1269 }
1270 else
1271 nh[1] = IPA_NONE;
1272 }
1273
1274
1275 static void
1276 bgp_do_rx_update(struct bgp_conn *conn,
1277 byte *withdrawn, int withdrawn_len,
1278 byte *nlri, int nlri_len,
1279 byte *attrs, int attr_len)
1280 {
1281 struct bgp_proto *p = conn->bgp;
1282 struct rte_src *src = p->p.main_source;
1283 byte *start, *x;
1284 int len, len0;
1285 unsigned af, sub;
1286 rta *a0, *a = NULL;
1287 ip_addr prefix;
1288 int pxlen, err = 0;
1289 u32 path_id = 0;
1290 u32 last_id = 0;
1291
1292 p->mp_reach_len = 0;
1293 p->mp_unreach_len = 0;
1294 a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
1295
1296 if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1297 return;
1298
1299 /* Check for End-of-RIB marker */
1300 if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len &&
1301 (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
1302 {
1303 bgp_rx_end_mark(p);
1304 return;
1305 }
1306
1307 DO_NLRI(mp_unreach)
1308 {
1309 while (len)
1310 {
1311 DECODE_PREFIX(x, len);
1312 DBG("Withdraw %I/%d\n", prefix, pxlen);
1313 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1314 }
1315 }
1316
1317 DO_NLRI(mp_reach)
1318 {
1319 /* Create fake NEXT_HOP attribute */
1320 if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
1321 { err = 9; goto done; }
1322
1323 if (a0)
1324 bgp_attach_next_hop(a0, x);
1325
1326 /* Also ignore one reserved byte */
1327 len -= *x + 2;
1328 x += *x + 2;
1329
1330 if (a0 && ! bgp_set_next_hop(p, a0))
1331 a0 = NULL;
1332
1333 last_id = 0;
1334 src = p->p.main_source;
1335
1336 while (len)
1337 {
1338 DECODE_PREFIX(x, len);
1339 DBG("Add %I/%d\n", prefix, pxlen);
1340
1341 if (a0)
1342 bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1343 else /* Forced withdraw as a result of soft error */
1344 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1345 }
1346 }
1347
1348 done:
1349 if (a)
1350 rta_free(a);
1351
1352 if (err) /* Use subcode 9, not err */
1353 bgp_error(conn, 3, 9, NULL, 0);
1354
1355 return;
1356 }
1357
1358 #endif
1359
1360 static void
1361 bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
1362 {
1363 struct bgp_proto *p = conn->bgp;
1364 byte *withdrawn, *attrs, *nlri;
1365 int withdrawn_len, attr_len, nlri_len;
1366
1367 BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
1368
1369 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1370 if (conn->state == BS_OPENCONFIRM)
1371 bgp_conn_enter_established_state(conn);
1372
1373 if (conn->state != BS_ESTABLISHED)
1374 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1375 bgp_start_timer(conn->hold_timer, conn->hold_time);
1376
1377 /* Find parts of the packet and check sizes */
1378 if (len < 23)
1379 {
1380 bgp_error(conn, 1, 2, pkt+16, 2);
1381 return;
1382 }
1383 withdrawn = pkt + 21;
1384 withdrawn_len = get_u16(pkt + 19);
1385 if (withdrawn_len + 23 > len)
1386 goto malformed;
1387 attrs = withdrawn + withdrawn_len + 2;
1388 attr_len = get_u16(attrs - 2);
1389 if (withdrawn_len + attr_len + 23 > len)
1390 goto malformed;
1391 nlri = attrs + attr_len;
1392 nlri_len = len - withdrawn_len - attr_len - 23;
1393 if (!attr_len && nlri_len)
1394 goto malformed;
1395 DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
1396
1397 lp_flush(bgp_linpool);
1398
1399 bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
1400 return;
1401
1402 malformed:
1403 bgp_error(conn, 3, 1, NULL, 0);
1404 }
1405
1406 static struct {
1407 byte major, minor;
1408 byte *msg;
1409 } bgp_msg_table[] = {
1410 { 1, 0, "Invalid message header" },
1411 { 1, 1, "Connection not synchronized" },
1412 { 1, 2, "Bad message length" },
1413 { 1, 3, "Bad message type" },
1414 { 2, 0, "Invalid OPEN message" },
1415 { 2, 1, "Unsupported version number" },
1416 { 2, 2, "Bad peer AS" },
1417 { 2, 3, "Bad BGP identifier" },
1418 { 2, 4, "Unsupported optional parameter" },
1419 { 2, 5, "Authentication failure" },
1420 { 2, 6, "Unacceptable hold time" },
1421 { 2, 7, "Required capability missing" }, /* [RFC3392] */
1422 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1423 { 3, 0, "Invalid UPDATE message" },
1424 { 3, 1, "Malformed attribute list" },
1425 { 3, 2, "Unrecognized well-known attribute" },
1426 { 3, 3, "Missing mandatory attribute" },
1427 { 3, 4, "Invalid attribute flags" },
1428 { 3, 5, "Invalid attribute length" },
1429 { 3, 6, "Invalid ORIGIN attribute" },
1430 { 3, 7, "AS routing loop" }, /* Deprecated */
1431 { 3, 8, "Invalid NEXT_HOP attribute" },
1432 { 3, 9, "Optional attribute error" },
1433 { 3, 10, "Invalid network field" },
1434 { 3, 11, "Malformed AS_PATH" },
1435 { 4, 0, "Hold timer expired" },
1436 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1437 { 5, 1, "Unexpected message in OpenSent state" },
1438 { 5, 2, "Unexpected message in OpenConfirm state" },
1439 { 5, 3, "Unexpected message in Established state" },
1440 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1441 { 6, 1, "Maximum number of prefixes reached" },
1442 { 6, 2, "Administrative shutdown" },
1443 { 6, 3, "Peer de-configured" },
1444 { 6, 4, "Administrative reset" },
1445 { 6, 5, "Connection rejected" },
1446 { 6, 6, "Other configuration change" },
1447 { 6, 7, "Connection collision resolution" },
1448 { 6, 8, "Out of Resources" },
1449 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
1450 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
1451 };
1452
1453 /**
1454 * bgp_error_dsc - return BGP error description
1455 * @code: BGP error code
1456 * @subcode: BGP error subcode
1457 *
1458 * bgp_error_dsc() returns error description for BGP errors
1459 * which might be static string or given temporary buffer.
1460 */
1461 const char *
1462 bgp_error_dsc(unsigned code, unsigned subcode)
1463 {
1464 static char buff[32];
1465 unsigned i;
1466 for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1467 if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1468 {
1469 return bgp_msg_table[i].msg;
1470 }
1471
1472 bsprintf(buff, "Unknown error %d.%d", code, subcode);
1473 return buff;
1474 }
1475
1476 void
1477 bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
1478 {
1479 const byte *name;
1480 byte *t, argbuf[36];
1481 unsigned i;
1482
1483 /* Don't report Cease messages generated by myself */
1484 if (code == 6 && class == BE_BGP_TX)
1485 return;
1486
1487 name = bgp_error_dsc(code, subcode);
1488 t = argbuf;
1489 if (len)
1490 {
1491 *t++ = ':';
1492 *t++ = ' ';
1493
1494 if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1495 {
1496 /* Bad peer AS - we would like to print the AS */
1497 t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
1498 goto done;
1499 }
1500 if (len > 16)
1501 len = 16;
1502 for (i=0; i<len; i++)
1503 t += bsprintf(t, "%02x", data[i]);
1504 }
1505 done:
1506 *t = 0;
1507 log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
1508 }
1509
1510 static void
1511 bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
1512 {
1513 struct bgp_proto *p = conn->bgp;
1514 if (len < 21)
1515 {
1516 bgp_error(conn, 1, 2, pkt+16, 2);
1517 return;
1518 }
1519
1520 unsigned code = pkt[19];
1521 unsigned subcode = pkt[20];
1522 int err = (code != 6);
1523
1524 bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1525 bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1526
1527 #ifndef IPV6
1528 if ((code == 2) && ((subcode == 4) || (subcode == 7))
1529 /* Error related to capability:
1530 * 4 - Peer does not support capabilities at all.
1531 * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1532 */
1533 && (p->cf->capabilities == 2)
1534 /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1535 && (conn->start_state == BSS_CONNECT)
1536 /* Failed connection attempt have used capabilities */
1537 && (p->cf->remote_as <= 0xFFFF))
1538 /* Not possible with disabled capabilities */
1539 {
1540 /* We try connect without capabilities */
1541 log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1542 p->start_state = BSS_CONNECT_NOCAP;
1543 err = 0;
1544 }
1545 #endif
1546
1547 bgp_conn_enter_close_state(conn);
1548 bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1549
1550 if (err)
1551 {
1552 bgp_update_startup_delay(p);
1553 bgp_stop(p, 0);
1554 }
1555 }
1556
1557 static void
1558 bgp_rx_keepalive(struct bgp_conn *conn)
1559 {
1560 struct bgp_proto *p = conn->bgp;
1561
1562 BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1563 bgp_start_timer(conn->hold_timer, conn->hold_time);
1564 switch (conn->state)
1565 {
1566 case BS_OPENCONFIRM:
1567 bgp_conn_enter_established_state(conn);
1568 break;
1569 case BS_ESTABLISHED:
1570 break;
1571 default:
1572 bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
1573 }
1574 }
1575
1576 static void
1577 bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, int len)
1578 {
1579 struct bgp_proto *p = conn->bgp;
1580
1581 if (conn->state != BS_ESTABLISHED)
1582 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1583
1584 if (!p->cf->enable_refresh)
1585 { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1586
1587 if (len < (BGP_HEADER_LENGTH + 4))
1588 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1589
1590 if (len > (BGP_HEADER_LENGTH + 4))
1591 { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
1592
1593 /* FIXME - we ignore AFI/SAFI values, as we support
1594 just one value and even an error code for an invalid
1595 request is not defined */
1596
1597 /* RFC 7313 redefined reserved field as RR message subtype */
1598 uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
1599
1600 switch (subtype)
1601 {
1602 case BGP_RR_REQUEST:
1603 BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1604 proto_request_feeding(&p->p);
1605 break;
1606
1607 case BGP_RR_BEGIN:
1608 BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
1609 bgp_refresh_begin(p);
1610 break;
1611
1612 case BGP_RR_END:
1613 BGP_TRACE(D_PACKETS, "Got END-OF-RR");
1614 bgp_refresh_end(p);
1615 break;
1616
1617 default:
1618 log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
1619 p->p.name, subtype);
1620 break;
1621 }
1622 }
1623
1624
1625 /**
1626 * bgp_rx_packet - handle a received packet
1627 * @conn: BGP connection
1628 * @pkt: start of the packet
1629 * @len: packet size
1630 *
1631 * bgp_rx_packet() takes a newly received packet and calls the corresponding
1632 * packet handler according to the packet type.
1633 */
1634 static void
1635 bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1636 {
1637 byte type = pkt[18];
1638
1639 DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
1640
1641 if (conn->bgp->p.mrtdump & MD_MESSAGES)
1642 mrt_dump_bgp_packet(conn, pkt, len);
1643
1644 switch (type)
1645 {
1646 case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
1647 case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
1648 case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
1649 case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
1650 case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
1651 default: bgp_error(conn, 1, 3, pkt+18, 1);
1652 }
1653 }
1654
1655 /**
1656 * bgp_rx - handle received data
1657 * @sk: socket
1658 * @size: amount of data received
1659 *
1660 * bgp_rx() is called by the socket layer whenever new data arrive from
1661 * the underlying TCP connection. It assembles the data fragments to packets,
1662 * checks their headers and framing and passes complete packets to
1663 * bgp_rx_packet().
1664 */
1665 int
1666 bgp_rx(sock *sk, int size)
1667 {
1668 struct bgp_conn *conn = sk->data;
1669 byte *pkt_start = sk->rbuf;
1670 byte *end = pkt_start + size;
1671 unsigned i, len;
1672
1673 DBG("BGP: RX hook: Got %d bytes\n", size);
1674 while (end >= pkt_start + BGP_HEADER_LENGTH)
1675 {
1676 if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1677 return 0;
1678 for(i=0; i<16; i++)
1679 if (pkt_start[i] != 0xff)
1680 {
1681 bgp_error(conn, 1, 1, NULL, 0);
1682 break;
1683 }
1684 len = get_u16(pkt_start+16);
1685 if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
1686 {
1687 bgp_error(conn, 1, 2, pkt_start+16, 2);
1688 break;
1689 }
1690 if (end < pkt_start + len)
1691 break;
1692 bgp_rx_packet(conn, pkt_start, len);
1693 pkt_start += len;
1694 }
1695 if (pkt_start != sk->rbuf)
1696 {
1697 memmove(sk->rbuf, pkt_start, end - pkt_start);
1698 sk->rpos = sk->rbuf + (end - pkt_start);
1699 }
1700 return 0;
1701 }