]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
Merged multipath and single-path data structures.
[thirdparty/bird.git] / proto / bgp / packets.c
1 /*
2 * BIRD -- BGP Packet Processing
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 #undef LOCAL_DEBUG
12
13 #include <stdlib.h>
14
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "nest/mrtdump.h"
21 #include "conf/conf.h"
22 #include "lib/unaligned.h"
23 #include "lib/socket.h"
24
25 #include "nest/cli.h"
26
27 #include "bgp.h"
28
29
30 #define BGP_RR_REQUEST 0
31 #define BGP_RR_BEGIN 1
32 #define BGP_RR_END 2
33
34
35 static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
36 static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
37
38 /* Table for state -> RFC 6608 FSM error subcodes */
39 static byte fsm_err_subcode[BS_MAX] = {
40 [BS_OPENSENT] = 1,
41 [BS_OPENCONFIRM] = 2,
42 [BS_ESTABLISHED] = 3
43 };
44
45
46 static struct bgp_channel *
47 bgp_get_channel(struct bgp_proto *p, u32 afi)
48 {
49 uint i;
50
51 for (i = 0; i < p->channel_count; i++)
52 if (p->afi_map[i] == afi)
53 return p->channel_map[i];
54
55 return NULL;
56 }
57
58 static inline void
59 put_af3(byte *buf, u32 id)
60 {
61 put_u16(buf, id >> 16);
62 buf[2] = id & 0xff;
63 }
64
65 static inline void
66 put_af4(byte *buf, u32 id)
67 {
68 put_u16(buf, id >> 16);
69 buf[2] = 0;
70 buf[3] = id & 0xff;
71 }
72
73 static inline u32
74 get_af3(byte *buf)
75 {
76 return (get_u16(buf) << 16) | buf[2];
77 }
78
79 static inline u32
80 get_af4(byte *buf)
81 {
82 return (get_u16(buf) << 16) | buf[3];
83 }
84
85 /*
86 * MRT Dump format is not semantically specified.
87 * We will use these values in appropriate fields:
88 *
89 * Local AS, Remote AS - configured AS numbers for given BGP instance.
90 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
91 *
92 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
93 * changes) and MESSAGE (for received BGP messages).
94 *
95 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
96 * only when AS4 session is established and even in that case MESSAGE
97 * does not use AS4 variant for initial OPEN message. This strange
98 * behavior is here for compatibility with Quagga and Bgpdump,
99 */
100
101 static byte *
102 mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
103 {
104 struct bgp_proto *p = conn->bgp;
105 uint v4 = ipa_is_ip4(p->cf->remote_ip);
106
107 if (as4)
108 {
109 put_u32(buf+0, p->remote_as);
110 put_u32(buf+4, p->public_as);
111 buf+=8;
112 }
113 else
114 {
115 put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
116 put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
117 buf+=4;
118 }
119
120 put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
121 put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
122 buf+=4;
123
124 if (v4)
125 {
126 buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
127 buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
128 }
129 else
130 {
131 buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
132 buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
133 }
134
135 return buf;
136 }
137
138 static void
139 mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
140 {
141 byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
142 byte *bp = buf + MRTDUMP_HDR_LENGTH;
143 int as4 = conn->bgp->as4_session;
144
145 bp = mrt_put_bgp4_hdr(bp, conn, as4);
146 memcpy(bp, pkt, len);
147 bp += len;
148 mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
149 buf, bp-buf);
150 }
151
152 static inline u16
153 convert_state(uint state)
154 {
155 /* Convert state from our BS_* values to values used in MRTDump */
156 return (state == BS_CLOSE) ? 1 : state + 1;
157 }
158
159 void
160 mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
161 {
162 byte buf[128];
163 byte *bp = buf + MRTDUMP_HDR_LENGTH;
164
165 bp = mrt_put_bgp4_hdr(bp, conn, 1);
166 put_u16(bp+0, convert_state(old));
167 put_u16(bp+2, convert_state(new));
168 bp += 4;
169 mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
170 }
171
172 static byte *
173 bgp_create_notification(struct bgp_conn *conn, byte *buf)
174 {
175 struct bgp_proto *p = conn->bgp;
176
177 BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
178 buf[0] = conn->notify_code;
179 buf[1] = conn->notify_subcode;
180 memcpy(buf+2, conn->notify_data, conn->notify_size);
181 return buf + 2 + conn->notify_size;
182 }
183
184
185 /* Capability negotiation as per RFC 5492 */
186
187 #define WALK_AF_CAPS(caps,ac) \
188 for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++)
189
190 const struct bgp_af_caps *
191 bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
192 {
193 struct bgp_af_caps *ac;
194
195 WALK_AF_CAPS(caps, ac)
196 if (ac->afi == afi)
197 return ac;
198
199 return NULL;
200 }
201
202 static struct bgp_af_caps *
203 bgp_get_af_caps(struct bgp_caps *caps, u32 afi)
204 {
205 struct bgp_af_caps *ac;
206
207 WALK_AF_CAPS(caps, ac)
208 if (ac->afi == afi)
209 return ac;
210
211 ac = &caps->af_data[caps->af_count++];
212 memset(ac, 0, sizeof(struct bgp_af_caps));
213 ac->afi = afi;
214
215 return ac;
216 }
217
218 static int
219 bgp_af_caps_cmp(const void *X, const void *Y)
220 {
221 const struct bgp_af_caps *x = X, *y = Y;
222 return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
223 }
224
225
226 static byte *
227 bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
228 {
229 struct bgp_proto *p = conn->bgp;
230 struct bgp_channel *c;
231 struct bgp_caps *caps;
232 struct bgp_af_caps *ac;
233 uint any_add_path = 0;
234 byte *data;
235
236 /* Prepare bgp_caps structure */
237
238 int n = list_length(&p->p.channels);
239 caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
240 conn->local_caps = caps;
241
242 caps->as4_support = p->cf->enable_as4;
243 caps->ext_messages = p->cf->enable_extended_messages;
244 caps->route_refresh = p->cf->enable_refresh;
245 caps->enhanced_refresh = p->cf->enable_refresh;
246
247 if (caps->as4_support)
248 caps->as4_number = p->public_as;
249
250 if (p->cf->gr_mode)
251 {
252 caps->gr_aware = 1;
253 caps->gr_time = p->cf->gr_time;
254 caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
255 }
256
257 /* Allocate and fill per-AF fields */
258 WALK_LIST(c, p->p.channels)
259 {
260 ac = &caps->af_data[caps->af_count++];
261 ac->afi = c->afi;
262 ac->ready = 1;
263
264 ac->add_path = c->cf->add_path;
265 any_add_path |= ac->add_path;
266
267 if (c->cf->gr_able)
268 {
269 ac->gr_able = 1;
270
271 if (p->p.gr_recovery)
272 ac->gr_af_flags |= BGP_GRF_FORWARDING;
273 }
274 }
275
276 /* Sort capability fields by AFI/SAFI */
277 qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
278
279
280 /* Create capability list in buffer */
281
282 WALK_AF_CAPS(caps, ac)
283 if (ac->ready)
284 {
285 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
286 *buf++ = 4; /* Capability data length */
287 put_af4(buf, ac->afi);
288 buf += 4;
289 }
290
291 if (caps->route_refresh)
292 {
293 *buf++ = 2; /* Capability 2: Support for route refresh */
294 *buf++ = 0; /* Capability data length */
295 }
296
297 if (caps->ext_messages)
298 {
299 *buf++ = 6; /* Capability 6: Support for extended messages */
300 *buf++ = 0; /* Capability data length */
301 }
302
303 if (caps->gr_aware)
304 {
305 *buf++ = 64; /* Capability 64: Support for graceful restart */
306 *buf++ = 0; /* Capability data length, will be fixed later */
307 data = buf;
308
309 put_u16(buf, caps->gr_time);
310 buf[0] |= caps->gr_flags;
311 buf += 2;
312
313 WALK_AF_CAPS(caps, ac)
314 if (ac->gr_able)
315 {
316 put_af3(buf, ac->afi);
317 buf[3] = ac->gr_af_flags;
318 buf += 4;
319 }
320
321 data[-1] = buf - data;
322 }
323
324 if (caps->as4_support)
325 {
326 *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
327 *buf++ = 4; /* Capability data length */
328 put_u32(buf, p->public_as);
329 buf += 4;
330 }
331
332 if (any_add_path)
333 {
334 *buf++ = 69; /* Capability 69: Support for ADD-PATH */
335 *buf++ = 0; /* Capability data length, will be fixed later */
336 data = buf;
337
338 WALK_AF_CAPS(caps, ac)
339 if (ac->add_path)
340 {
341 put_af3(buf, ac->afi);
342 buf[3] = ac->add_path;
343 buf += 4;
344 }
345
346 data[-1] = buf - data;
347 }
348
349 if (caps->enhanced_refresh)
350 {
351 *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
352 *buf++ = 0; /* Capability data length */
353 }
354
355 /* FIXME: Should not XXXX 255 */
356
357 return buf;
358 }
359
360 static void
361 bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len)
362 {
363 struct bgp_proto *p = conn->bgp;
364 struct bgp_af_caps *ac;
365 int i, cl;
366 u32 af;
367
368 while (len > 0)
369 {
370 if (len < 2 || len < (2 + pos[1]))
371 goto err;
372
373 /* Capability length */
374 cl = pos[1];
375
376 /* Capability type */
377 switch (pos[0])
378 {
379 case 1: /* Multiprotocol capability, RFC 4760 */
380 if (cl != 4)
381 goto err;
382
383 af = get_af4(pos+2);
384 ac = bgp_get_af_caps(caps, af);
385 ac->ready = 1;
386 break;
387
388 case 2: /* Route refresh capability, RFC 2918 */
389 if (cl != 0)
390 goto err;
391
392 caps->route_refresh = 1;
393 break;
394
395 case 6: /* Extended message length capability, RFC draft */
396 if (cl != 0)
397 goto err;
398
399 caps->ext_messages = 1;
400 break;
401
402 case 64: /* Graceful restart capability, RFC 4724 */
403 if (cl % 4 != 2)
404 goto err;
405
406 /* Only the last instance is valid */
407 WALK_AF_CAPS(caps, ac)
408 {
409 ac->gr_able = 0;
410 ac->gr_af_flags = 0;
411 }
412
413 caps->gr_aware = 1;
414 caps->gr_flags = pos[2] & 0xf0;
415 caps->gr_time = get_u16(pos + 2) & 0x0fff;
416
417 for (i = 2; i < cl; i += 4)
418 {
419 af = get_af3(pos+2+i);
420 ac = bgp_get_af_caps(caps, af);
421 ac->gr_able = 1;
422 ac->gr_af_flags = pos[2+i+3];
423 }
424 break;
425
426 case 65: /* AS4 capability, RFC 4893 */
427 if (cl != 4)
428 goto err;
429
430 caps->as4_support = 1;
431 caps->as4_number = get_u32(pos + 2);
432 break;
433
434 case 69: /* ADD-PATH capability, RFC 7911 */
435 if (cl % 4)
436 goto err;
437
438 for (i = 0; i < cl; i += 4)
439 {
440 byte val = pos[2+i+3];
441 if (!val || (val > BGP_ADD_PATH_FULL))
442 {
443 log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
444 p->p.name, val);
445 break;
446 }
447 }
448
449 for (i = 0; i < cl; i += 4)
450 {
451 af = get_af3(pos+2+i);
452 ac = bgp_get_af_caps(caps, af);
453 ac->add_path = pos[2+i+3];
454 }
455 break;
456
457 case 70: /* Enhanced route refresh capability, RFC 7313 */
458 if (cl != 0)
459 goto err;
460
461 caps->enhanced_refresh = 1;
462 break;
463
464 /* We can safely ignore all other capabilities */
465 }
466
467 ADVANCE(pos, len, 2 + cl);
468 }
469 return;
470
471 err:
472 bgp_error(conn, 2, 0, NULL, 0);
473 return;
474 }
475
476 static int
477 bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
478 {
479 struct bgp_proto *p = conn->bgp;
480 struct bgp_caps *caps;
481 int ol;
482
483 /* Max number of announced AFIs is limited by max option length (255) */
484 caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps));
485 memset(caps, 0, sizeof(struct bgp_caps));
486
487 while (len > 0)
488 {
489 if ((len < 2) || (len < (2 + pos[1])))
490 { bgp_error(conn, 2, 0, NULL, 0); return -1; }
491
492 ol = pos[1];
493 if (pos[0] == 2)
494 {
495 /* BGP capabilities, RFC 5492 */
496 if (p->cf->capabilities)
497 bgp_read_capabilities(conn, caps, pos + 2, ol);
498 }
499 else
500 {
501 /* Unknown option */
502 bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */
503 return -1;
504 }
505
506 ADVANCE(pos, len, 2 + ol);
507 }
508
509 uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps);
510 conn->remote_caps = mb_allocz(p->p.pool, n);
511 memcpy(conn->remote_caps, caps, n);
512
513 return 0;
514 }
515
516 static byte *
517 bgp_create_open(struct bgp_conn *conn, byte *buf)
518 {
519 struct bgp_proto *p = conn->bgp;
520
521 BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
522 BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
523
524 buf[0] = BGP_VERSION;
525 put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
526 put_u16(buf+3, p->cf->hold_time);
527 put_u32(buf+5, p->local_id);
528
529 if (p->cf->capabilities)
530 {
531 /* Prepare local_caps and write capabilities to buffer */
532 byte *end = bgp_write_capabilities(conn, buf+12);
533 uint len = end - (buf+12);
534
535 buf[9] = len + 2; /* Optional parameters length */
536 buf[10] = 2; /* Option 2: Capability list */
537 buf[11] = len; /* Option data length */
538
539 return end;
540 }
541 else
542 {
543 /* Prepare empty local_caps */
544 conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
545
546 buf[9] = 0; /* No optional parameters */
547 return buf + 10;
548 }
549
550 return buf;
551 }
552
553 static void
554 bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
555 {
556 struct bgp_proto *p = conn->bgp;
557 struct bgp_conn *other;
558 u32 asn, hold, id;
559
560 /* Check state */
561 if (conn->state != BS_OPENSENT)
562 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
563
564 /* Check message contents */
565 if (len < 29 || len != 29 + (uint) pkt[28])
566 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
567
568 if (pkt[19] != BGP_VERSION)
569 { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
570
571 asn = get_u16(pkt+20);
572 hold = get_u16(pkt+22);
573 id = get_u32(pkt+24);
574 BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
575
576 if (bgp_read_options(conn, pkt+29, pkt[28]) < 0)
577 return;
578
579 if (hold > 0 && hold < 3)
580 { bgp_error(conn, 2, 6, pkt+22, 2); return; }
581
582 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
583 if (!id || (p->is_internal && id == p->local_id))
584 { bgp_error(conn, 2, 3, pkt+24, -4); return; }
585
586 struct bgp_caps *caps = conn->remote_caps;
587
588 if (caps->as4_support)
589 {
590 u32 as4 = caps->as4_number;
591
592 if ((as4 != asn) && (asn != AS_TRANS))
593 log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
594
595 if (as4 != p->remote_as)
596 { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
597 }
598 else
599 {
600 if (asn != p->remote_as)
601 { bgp_error(conn, 2, 2, pkt+20, 2); return; }
602 }
603
604 /* Check the other connection */
605 other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
606 switch (other->state)
607 {
608 case BS_CONNECT:
609 case BS_ACTIVE:
610 /* Stop outgoing connection attempts */
611 bgp_conn_enter_idle_state(other);
612 break;
613
614 case BS_IDLE:
615 case BS_OPENSENT:
616 case BS_CLOSE:
617 break;
618
619 case BS_OPENCONFIRM:
620 /*
621 * Description of collision detection rules in RFC 4271 is confusing and
622 * contradictory, but it is essentially:
623 *
624 * 1. Router with higher ID is dominant
625 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
626 * 3. When both connections are in OpenConfirm state, one initiated by
627 * the dominant router is kept.
628 *
629 * The first line in the expression below evaluates whether the neighbor
630 * is dominant, the second line whether the new connection was initiated
631 * by the neighbor. If both are true (or both are false), we keep the new
632 * connection, otherwise we keep the old one.
633 */
634 if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
635 == (conn == &p->incoming_conn))
636 {
637 /* Should close the other connection */
638 BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
639 bgp_error(other, 6, 7, NULL, 0);
640 break;
641 }
642 /* Fall thru */
643 case BS_ESTABLISHED:
644 /* Should close this connection */
645 BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
646 bgp_error(conn, 6, 7, NULL, 0);
647 return;
648
649 default:
650 bug("bgp_rx_open: Unknown state");
651 }
652
653 /* Update our local variables */
654 conn->hold_time = MIN(hold, p->cf->hold_time);
655 conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
656 conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
657 conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
658 p->remote_id = id;
659
660 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
661 conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
662
663 bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
664 bgp_start_timer(conn->hold_timer, conn->hold_time);
665 bgp_conn_enter_openconfirm_state(conn);
666 }
667
668
669 /*
670 * Next hop handling
671 */
672
673 #define REPORT(msg, args...) \
674 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
675
676 #define WITHDRAW(msg, args...) \
677 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
678
679 #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
680 #define NO_NEXT_HOP "Missing NEXT_HOP attribute"
681
682
683 static void
684 bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
685 {
686 struct bgp_proto *p = s->proto;
687 struct bgp_channel *c = s->channel;
688
689 if (c->cf->gw_mode == GW_DIRECT)
690 {
691 neighbor *nbr = NULL;
692
693 /* GW_DIRECT -> single_hop -> p->neigh != NULL */
694 if (ipa_nonzero(gw))
695 nbr = neigh_find2(&p->p, &gw, NULL, 0);
696 else if (ipa_nonzero(ll))
697 nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0);
698
699 if (!nbr || (nbr->scope == SCOPE_HOST))
700 WITHDRAW(BAD_NEXT_HOP);
701
702 a->dest = RTD_UNICAST;
703 a->nh.gw = nbr->addr;
704 a->nh.iface = nbr->iface;
705 a->nh.next = NULL;
706 a->hostentry = NULL;
707 a->igp_metric = 0;
708 }
709 else /* GW_RECURSIVE */
710 {
711 if (ipa_zero(gw))
712 WITHDRAW(BAD_NEXT_HOP);
713
714 rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll);
715 }
716 }
717
718 static inline int
719 bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
720 {
721 struct bgp_proto *p = s->proto;
722 ip_addr *nh = (void *) a->u.ptr->data;
723
724 if (s->channel->cf->next_hop_self)
725 return 0;
726
727 if (s->channel->cf->next_hop_keep)
728 return 1;
729
730 /* Keep it when explicitly set in export filter */
731 if (a->type & EAF_FRESH)
732 return 1;
733
734 /* Keep it when exported to internal peers */
735 if (p->is_interior && ipa_nonzero(*nh))
736 return 1;
737
738 /* Keep it when forwarded between single-hop BGPs on the same iface */
739 struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
740 return p->neigh && (p->neigh->iface == ifa);
741 }
742
743 static inline int
744 bgp_use_gateway(struct bgp_export_state *s)
745 {
746 struct bgp_proto *p = s->proto;
747 rta *ra = s->route->attrs;
748
749 if (s->channel->cf->next_hop_self)
750 return 0;
751
752 /* We need valid global gateway */
753 if ((ra->dest != RTD_UNICAST) || (ra->nh.next) || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
754 return 0;
755
756 /* Use it when exported to internal peers */
757 if (p->is_interior)
758 return 1;
759
760 /* Use it when forwarded to single-hop BGP peer on on the same iface */
761 return p->neigh && (p->neigh->iface == ra->nh.iface);
762 }
763
764 static void
765 bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
766 {
767 if (!a || !bgp_use_next_hop(s, a))
768 {
769 if (bgp_use_gateway(s))
770 {
771 ip_addr nh[1] = { s->route->attrs->nh.gw };
772 bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
773 }
774 else
775 {
776 ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
777 bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
778 }
779 }
780
781 /* Check if next hop is valid */
782 a = bgp_find_attr(*to, BA_NEXT_HOP);
783 if (!a)
784 WITHDRAW(NO_NEXT_HOP);
785
786 ip_addr *nh = (void *) a->u.ptr->data;
787 ip_addr peer = s->proto->cf->remote_ip;
788 uint len = a->u.ptr->length;
789
790 if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
791 WITHDRAW(BAD_NEXT_HOP);
792
793 if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
794 WITHDRAW(BAD_NEXT_HOP);
795 }
796
797
798 /*
799 * UPDATE
800 */
801
802 static void
803 bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
804 {
805 if (path_id != s->last_id)
806 {
807 s->last_src = rt_get_source(&s->proto->p, path_id);
808 s->last_id = path_id;
809
810 rta_free(s->cached_rta);
811 s->cached_rta = NULL;
812 }
813
814 if (!a0)
815 {
816 /* Route withdraw */
817 rte_update2(&s->channel->c, n, NULL, s->last_src);
818 return;
819 }
820
821 /* Prepare cached route attributes */
822 if (s->cached_rta == NULL)
823 {
824 a0->src = s->last_src;
825
826 /* Workaround for rta_lookup() breaking eattrs */
827 ea_list *ea = a0->eattrs;
828 s->cached_rta = rta_lookup(a0);
829 a0->eattrs = ea;
830 }
831
832 rta *a = rta_clone(s->cached_rta);
833 rte *e = rte_get_temp(a);
834
835 e->pflags = 0;
836 e->u.bgp.suppressed = 0;
837 rte_update2(&s->channel->c, n, e, s->last_src);
838 }
839
840
841
842 static uint
843 bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
844 {
845 byte *pos = buf;
846
847 while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip4_addr))))
848 {
849 struct bgp_prefix *px = HEAD(buck->prefixes);
850 struct net_addr_ip4 *net = (void *) px->net;
851
852 /* Encode path ID */
853 if (s->add_path)
854 {
855 put_u32(pos, px->path_id);
856 ADVANCE(pos, size, 4);
857 }
858
859 ip4_addr a = ip4_hton(net->prefix);
860 uint b = (net->pxlen + 7) / 8;
861
862 /* Encode prefix length */
863 *pos = net->pxlen;
864 ADVANCE(pos, size, 1);
865
866 /* Encode prefix body */
867 memcpy(pos, &a, b);
868 ADVANCE(pos, size, b);
869
870 bgp_free_prefix(s->channel, px);
871 }
872
873 return pos - buf;
874 }
875
876 static void
877 bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
878 {
879 while (len)
880 {
881 net_addr_ip4 net;
882 u32 path_id = 0;
883
884 /* Decode path ID */
885 if (s->add_path)
886 {
887 if (len < 5)
888 bgp_parse_error(s, 1);
889
890 path_id = get_u32(pos);
891 ADVANCE(pos, len, 4);
892 }
893
894 /* Decode prefix length */
895 uint l = *pos;
896 uint b = (l + 7) / 8;
897 ADVANCE(pos, len, 1);
898
899 if (l > IP4_MAX_PREFIX_LENGTH)
900 bgp_parse_error(s, 10);
901
902 if (len < b)
903 bgp_parse_error(s, 1);
904
905 /* Decode prefix body */
906 ip4_addr addr = IP4_NONE;
907 memcpy(&addr, pos, b);
908 ADVANCE(pos, len, b);
909
910 net = NET_ADDR_IP4(ip4_ntoh(addr), l);
911 net_normalize_ip4(&net);
912
913 // XXXX validate prefix
914
915 bgp_rte_update(s, (net_addr *) &net, path_id, a);
916 }
917 }
918
919 static uint
920 bgp_encode_next_hop_ip4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
921 {
922 /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
923
924 ASSERT(a->u.ptr->length == sizeof(ip_addr));
925
926 put_ip4(buf, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
927
928 return 4;
929 }
930
931 static void
932 bgp_decode_next_hop_ip4(struct bgp_parse_state *s, byte *data, uint len, rta *a)
933 {
934 if (len != 4)
935 bgp_parse_error(s, 9);
936
937 ip_addr nh = ipa_from_ip4(get_ip4(data));
938
939 // XXXX validate next hop
940
941 bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh));
942 bgp_apply_next_hop(s, a, nh, IPA_NONE);
943 }
944
945
946 static uint
947 bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
948 {
949 byte *pos = buf;
950
951 while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip6_addr))))
952 {
953 struct bgp_prefix *px = HEAD(buck->prefixes);
954 struct net_addr_ip6 *net = (void *) px->net;
955
956 /* Encode path ID */
957 if (s->add_path)
958 {
959 put_u32(pos, px->path_id);
960 ADVANCE(pos, size, 4);
961 }
962
963 ip6_addr a = ip6_hton(net->prefix);
964 uint b = (net->pxlen + 7) / 8;
965
966 /* Encode prefix length */
967 *pos = net->pxlen;
968 ADVANCE(pos, size, 1);
969
970 /* Encode prefix body */
971 memcpy(pos, &a, b);
972 ADVANCE(pos, size, b);
973
974 bgp_free_prefix(s->channel, px);
975 }
976
977 return pos - buf;
978 }
979
980 static void
981 bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
982 {
983 while (len)
984 {
985 net_addr_ip6 net;
986 u32 path_id = 0;
987
988 /* Decode path ID */
989 if (s->add_path)
990 {
991 if (len < 5)
992 bgp_parse_error(s, 1);
993
994 path_id = get_u32(pos);
995 ADVANCE(pos, len, 4);
996 }
997
998 /* Decode prefix length */
999 uint l = *pos;
1000 uint b = (l + 7) / 8;
1001 ADVANCE(pos, len, 1);
1002
1003 if (l > IP6_MAX_PREFIX_LENGTH)
1004 bgp_parse_error(s, 10);
1005
1006 if (len < b)
1007 bgp_parse_error(s, 1);
1008
1009 /* Decode prefix body */
1010 ip6_addr addr = IP6_NONE;
1011 memcpy(&addr, pos, b);
1012 ADVANCE(pos, len, b);
1013
1014 net = NET_ADDR_IP6(ip6_ntoh(addr), l);
1015 net_normalize_ip6(&net);
1016
1017 // XXXX validate prefix
1018
1019 bgp_rte_update(s, (net_addr *) &net, path_id, a);
1020 }
1021 }
1022
1023 static uint
1024 bgp_encode_next_hop_ip6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
1025 {
1026 ip_addr *nh = (void *) a->u.ptr->data;
1027 uint len = a->u.ptr->length;
1028
1029 ASSERT((len == 16) || (len == 32));
1030
1031 put_ip6(buf, ipa_to_ip6(nh[0]));
1032
1033 if (len == 32)
1034 put_ip6(buf+16, ipa_to_ip6(nh[1]));
1035
1036 return len;
1037 }
1038
1039 static void
1040 bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
1041 {
1042 struct adata *ad = lp_alloc_adata(s->pool, 32);
1043 ip_addr *nh = (void *) ad->data;
1044
1045 if ((len != 16) && (len != 32))
1046 bgp_parse_error(s, 9);
1047
1048 nh[0] = ipa_from_ip6(get_ip6(data));
1049 nh[1] = (len == 32) ? ipa_from_ip6(get_ip6(data+16)) : IPA_NONE;
1050
1051 if (ip6_is_link_local(nh[0]))
1052 {
1053 nh[1] = nh[0];
1054 nh[0] = IPA_NONE;
1055 }
1056
1057 if (!ip6_is_link_local(nh[1]))
1058 nh[1] = IPA_NONE;
1059
1060 if (ipa_zero(nh[1]))
1061 ad->length = 16;
1062
1063 // XXXX validate next hop
1064
1065 bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
1066 bgp_apply_next_hop(s, a, nh[0], nh[1]);
1067 }
1068
1069
1070 static const struct bgp_af_desc bgp_af_table[] = {
1071 {
1072 .afi = BGP_AF_IPV4,
1073 .net = NET_IP4,
1074 .name = "ipv4",
1075 .encode_nlri = bgp_encode_nlri_ip4,
1076 .decode_nlri = bgp_decode_nlri_ip4,
1077 .encode_next_hop = bgp_encode_next_hop_ip4,
1078 .decode_next_hop = bgp_decode_next_hop_ip4,
1079 .update_next_hop = bgp_update_next_hop_ip,
1080 },
1081 {
1082 .afi = BGP_AF_IPV4_MC,
1083 .net = NET_IP4,
1084 .name = "ipv4-mc",
1085 .encode_nlri = bgp_encode_nlri_ip4,
1086 .decode_nlri = bgp_decode_nlri_ip4,
1087 .encode_next_hop = bgp_encode_next_hop_ip4,
1088 .decode_next_hop = bgp_decode_next_hop_ip4,
1089 .update_next_hop = bgp_update_next_hop_ip,
1090 },
1091 {
1092 .afi = BGP_AF_IPV6,
1093 .net = NET_IP6,
1094 .name = "ipv6",
1095 .encode_nlri = bgp_encode_nlri_ip6,
1096 .decode_nlri = bgp_decode_nlri_ip6,
1097 .encode_next_hop = bgp_encode_next_hop_ip6,
1098 .decode_next_hop = bgp_decode_next_hop_ip6,
1099 .update_next_hop = bgp_update_next_hop_ip,
1100 },
1101 {
1102 .afi = BGP_AF_IPV6_MC,
1103 .net = NET_IP6,
1104 .name = "ipv6-mc",
1105 .encode_nlri = bgp_encode_nlri_ip6,
1106 .decode_nlri = bgp_decode_nlri_ip6,
1107 .encode_next_hop = bgp_encode_next_hop_ip6,
1108 .decode_next_hop = bgp_decode_next_hop_ip6,
1109 .update_next_hop = bgp_update_next_hop_ip,
1110 },
1111 };
1112
1113 const struct bgp_af_desc *
1114 bgp_get_af_desc(u32 afi)
1115 {
1116 uint i;
1117 for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
1118 if (bgp_af_table[i].afi == afi)
1119 return &bgp_af_table[i];
1120
1121 return NULL;
1122 }
1123
1124 static inline uint
1125 bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1126 {
1127 return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
1128 }
1129
1130 static inline uint
1131 bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
1132 {
1133 return s->channel->desc->encode_next_hop(s, nh, buf, 255);
1134 }
1135
1136 void
1137 bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
1138 {
1139 s->channel->desc->update_next_hop(s, a, to);
1140 }
1141
1142 #define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
1143
1144 static byte *
1145 bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1146 {
1147 /*
1148 * 2 B Withdrawn Routes Length (zero)
1149 * --- IPv4 Withdrawn Routes NLRI (unused)
1150 * 2 B Total Path Attribute Length
1151 * var Path Attributes
1152 * var IPv4 Network Layer Reachability Information
1153 */
1154
1155 int lr, la;
1156
1157 la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
1158 if (la < 0)
1159 {
1160 /* Attribute list too long */
1161 bgp_withdraw_bucket(s->channel, buck);
1162 return NULL;
1163 }
1164
1165 put_u16(buf+0, 0);
1166 put_u16(buf+2, la);
1167
1168 lr = bgp_encode_nlri(s, buck, buf+4+la, end);
1169
1170 return buf+4+la+lr;
1171 }
1172
1173 static byte *
1174 bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1175 {
1176 /*
1177 * 2 B IPv4 Withdrawn Routes Length (zero)
1178 * --- IPv4 Withdrawn Routes NLRI (unused)
1179 * 2 B Total Path Attribute Length
1180 * 1 B MP_REACH_NLRI hdr - Attribute Flags
1181 * 1 B MP_REACH_NLRI hdr - Attribute Type Code
1182 * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
1183 * 2 B MP_REACH_NLRI data - Address Family Identifier
1184 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
1185 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
1186 * var MP_REACH_NLRI data - Network Address of Next Hop
1187 * 1 B MP_REACH_NLRI data - Reserved (zero)
1188 * var MP_REACH_NLRI data - Network Layer Reachability Information
1189 * var Rest of Path Attributes
1190 * --- IPv4 Network Layer Reachability Information (unused)
1191 */
1192
1193 int lh, lr, la; /* Lengths of next hop, NLRI and attributes */
1194
1195 /* Begin of MP_REACH_NLRI atribute */
1196 buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
1197 buf[5] = BA_MP_REACH_NLRI;
1198 put_u16(buf+6, 0); /* Will be fixed later */
1199 put_af3(buf+8, s->channel->afi);
1200 byte *pos = buf+11;
1201
1202 /* Encode attributes to temporary buffer */
1203 byte *abuf = alloca(MAX_ATTRS_LENGTH);
1204 la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
1205 if (la < 0)
1206 {
1207 /* Attribute list too long */
1208 bgp_withdraw_bucket(s->channel, buck);
1209 return NULL;
1210 }
1211
1212 /* Encode the next hop */
1213 lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
1214 *pos = lh;
1215 pos += 1+lh;
1216
1217 /* Reserved field */
1218 *pos++ = 0;
1219
1220 /* Encode the NLRI */
1221 lr = bgp_encode_nlri(s, buck, pos, end - la);
1222 pos += lr;
1223
1224 /* End of MP_REACH_NLRI atribute, update data length */
1225 put_u16(buf+6, pos-buf-8);
1226
1227 /* Copy remaining attributes */
1228 memcpy(pos, abuf, la);
1229 pos += la;
1230
1231 /* Initial UPDATE fields */
1232 put_u16(buf+0, 0);
1233 put_u16(buf+2, pos-buf-4);
1234
1235 return pos;
1236 }
1237
1238 #undef MAX_ATTRS_LENGTH
1239
1240 static byte *
1241 bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1242 {
1243 /*
1244 * 2 B Withdrawn Routes Length
1245 * var IPv4 Withdrawn Routes NLRI
1246 * 2 B Total Path Attribute Length (zero)
1247 * --- Path Attributes (unused)
1248 * --- IPv4 Network Layer Reachability Information (unused)
1249 */
1250
1251 uint len = bgp_encode_nlri(s, buck, buf+2, end);
1252
1253 put_u16(buf+0, len);
1254 put_u16(buf+2+len, 0);
1255
1256 return buf+4+len;
1257 }
1258
1259 static byte *
1260 bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
1261 {
1262 /*
1263 * 2 B Withdrawn Routes Length (zero)
1264 * --- IPv4 Withdrawn Routes NLRI (unused)
1265 * 2 B Total Path Attribute Length
1266 * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
1267 * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
1268 * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
1269 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
1270 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
1271 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
1272 * --- IPv4 Network Layer Reachability Information (unused)
1273 */
1274
1275 uint len = bgp_encode_nlri(s, buck, buf+11, end);
1276
1277 put_u16(buf+0, 0);
1278 put_u16(buf+2, 7+len);
1279
1280 /* Begin of MP_UNREACH_NLRI atribute */
1281 buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
1282 buf[5] = BA_MP_UNREACH_NLRI;
1283 put_u16(buf+6, 3+len);
1284 put_af3(buf+8, s->channel->afi);
1285
1286 return buf+11+len;
1287 }
1288
1289 static byte *
1290 bgp_create_update(struct bgp_channel *c, byte *buf)
1291 {
1292 struct bgp_proto *p = (void *) c->c.proto;
1293 struct bgp_bucket *buck;
1294 byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
1295 byte *res = NULL;
1296
1297 /* Initialize write state */
1298 struct bgp_write_state s = {
1299 .proto = p,
1300 .channel = c,
1301 .pool = bgp_linpool,
1302 .as4_session = p->as4_session,
1303 .add_path = c->add_path_tx,
1304 };
1305
1306 again:
1307
1308 /* Try unreachable bucket */
1309 if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
1310 {
1311 res = (c->afi == BGP_AF_IPV4) ?
1312 bgp_create_ip_unreach(&s, buck, buf, end):
1313 bgp_create_mp_unreach(&s, buck, buf, end);
1314
1315 goto done;
1316 }
1317
1318 /* Try reachable buckets */
1319 if (!EMPTY_LIST(c->bucket_queue))
1320 {
1321 buck = HEAD(c->bucket_queue);
1322
1323 /* Cleanup empty buckets */
1324 if (EMPTY_LIST(buck->prefixes))
1325 {
1326 bgp_free_bucket(c, buck);
1327 goto again;
1328 }
1329
1330 res = (c->afi == BGP_AF_IPV4) ?
1331 bgp_create_ip_reach(&s, buck, buf, end):
1332 bgp_create_mp_reach(&s, buck, buf, end);
1333
1334 if (EMPTY_LIST(buck->prefixes))
1335 bgp_free_bucket(c, buck);
1336 else
1337 bgp_defer_bucket(c, buck);
1338
1339 if (!res)
1340 goto again;
1341
1342 goto done;
1343 }
1344
1345 /* No more prefixes to send */
1346 return NULL;
1347
1348 done:
1349 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
1350 lp_flush(s.pool);
1351
1352 return res;
1353 }
1354
1355 static byte *
1356 bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
1357 {
1358 /* Empty update packet */
1359 put_u32(buf, 0);
1360
1361 return buf+4;
1362 }
1363
1364 static byte *
1365 bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
1366 {
1367 put_u16(buf+0, 0);
1368 put_u16(buf+2, 6); /* length 4--9 */
1369
1370 /* Empty MP_UNREACH_NLRI atribute */
1371 buf[4] = BAF_OPTIONAL;
1372 buf[5] = BA_MP_UNREACH_NLRI;
1373 buf[6] = 3; /* Length 7--9 */
1374 put_af3(buf+7, c->afi);
1375
1376 return buf+10;
1377 }
1378
1379 static byte *
1380 bgp_create_end_mark(struct bgp_channel *c, byte *buf)
1381 {
1382 struct bgp_proto *p = (void *) c->c.proto;
1383
1384 BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
1385
1386 return (c->afi == BGP_AF_IPV4) ?
1387 bgp_create_ip_end_mark(c, buf):
1388 bgp_create_mp_end_mark(c, buf);
1389 }
1390
1391 static inline void
1392 bgp_rx_end_mark(struct bgp_proto *p, u32 afi)
1393 {
1394 struct bgp_channel *c = bgp_get_channel(p, afi);
1395
1396 BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
1397
1398 /* XXXX handle unknown AF in MP_*_NLRI */
1399 if (!c)
1400 return;
1401
1402 if (c->load_state == BFS_LOADING)
1403 c->load_state = BFS_NONE;
1404
1405 if (p->p.gr_recovery)
1406 channel_graceful_restart_unlock(&c->c);
1407
1408 if (c->gr_active)
1409 bgp_graceful_restart_done(c);
1410 }
1411
1412 static inline void
1413 bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
1414 {
1415 struct bgp_channel *c = bgp_get_channel(s->proto, afi);
1416 rta *a = NULL;
1417
1418 /* XXXX handle unknown AF in MP_*_NLRI */
1419 if (!c)
1420 return;
1421
1422 s->channel = c;
1423 s->add_path = c->add_path_rx;
1424
1425 s->last_id = 0;
1426 s->last_src = s->proto->p.main_source;
1427
1428 /*
1429 * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
1430 * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
1431 * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
1432 * decode_next_hop hooks) by restoring a->eattrs afterwards.
1433 */
1434
1435 if (ea)
1436 {
1437 a = alloca(sizeof(struct rta));
1438 memset(a, 0, sizeof(struct rta));
1439
1440 a->source = RTS_BGP;
1441 a->scope = SCOPE_UNIVERSE;
1442 a->cast = RTC_UNICAST;
1443 a->dest = RTD_UNREACHABLE;
1444 a->from = s->proto->cf->remote_ip;
1445 a->eattrs = ea;
1446
1447 c->desc->decode_next_hop(s, nh, nh_len, a);
1448
1449 /* Handle withdraw during next hop decoding */
1450 if (s->err_withdraw)
1451 a = NULL;
1452 }
1453
1454 c->desc->decode_nlri(s, nlri, len, a);
1455
1456 rta_free(s->cached_rta);
1457 s->cached_rta = NULL;
1458 }
1459
1460 static void
1461 bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
1462 {
1463 struct bgp_proto *p = conn->bgp;
1464 ea_list *ea = NULL;
1465
1466 BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
1467
1468 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1469 if (conn->state == BS_OPENCONFIRM)
1470 bgp_conn_enter_established_state(conn);
1471
1472 if (conn->state != BS_ESTABLISHED)
1473 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1474
1475 bgp_start_timer(conn->hold_timer, conn->hold_time);
1476
1477 /* Initialize parse state */
1478 struct bgp_parse_state s = {
1479 .proto = p,
1480 .pool = bgp_linpool,
1481 .as4_session = p->as4_session,
1482 };
1483
1484 /* Parse error handler */
1485 if (setjmp(s.err_jmpbuf))
1486 {
1487 bgp_error(conn, 3, s.err_subcode, NULL, 0);
1488 goto done;
1489 }
1490
1491 /* Check minimal length */
1492 if (len < 23)
1493 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1494
1495 /* Skip fixed header */
1496 uint pos = 19;
1497
1498 /*
1499 * UPDATE message format
1500 *
1501 * 2 B IPv4 Withdrawn Routes Length
1502 * var IPv4 Withdrawn Routes NLRI
1503 * 2 B Total Path Attribute Length
1504 * var Path Attributes
1505 * var IPv4 Reachable Routes NLRI
1506 */
1507
1508 s.ip_unreach_len = get_u16(pkt + pos);
1509 s.ip_unreach_nlri = pkt + pos + 2;
1510 pos += 2 + s.ip_unreach_len;
1511
1512 if (pos + 2 > len)
1513 bgp_parse_error(&s, 1);
1514
1515 s.attr_len = get_u16(pkt + pos);
1516 s.attrs = pkt + pos + 2;
1517 pos += 2 + s.attr_len;
1518
1519 if (pos > len)
1520 bgp_parse_error(&s, 1);
1521
1522 s.ip_reach_len = len - pos;
1523 s.ip_reach_nlri = pkt + pos;
1524
1525
1526 if (s.attr_len)
1527 ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
1528
1529 /* Check for End-of-RIB marker */
1530 if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
1531 { bgp_rx_end_mark(p, BGP_AF_IPV4); goto done; }
1532
1533 /* Check for MP End-of-RIB marker */
1534 if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
1535 !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af) /* XXXX See RFC 7606 5.2 */
1536 { bgp_rx_end_mark(p, s.mp_unreach_af); goto done; }
1537
1538 if (s.ip_unreach_len)
1539 bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
1540
1541 if (s.mp_unreach_len)
1542 bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
1543
1544 if (s.ip_reach_len)
1545 bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
1546 ea, s.ip_next_hop_data, s.ip_next_hop_len);
1547
1548 if (s.mp_reach_len)
1549 bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
1550 ea, s.mp_next_hop_data, s.mp_next_hop_len);
1551
1552 done:
1553 rta_free(s.cached_rta);
1554 lp_flush(s.pool);
1555 return;
1556 }
1557
1558
1559 /*
1560 * ROUTE-REFRESH
1561 */
1562
1563 static inline byte *
1564 bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
1565 {
1566 struct bgp_proto *p = (void *) c->c.proto;
1567
1568 BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
1569
1570 /* Original route refresh request, RFC 2918 */
1571 put_af4(buf, c->afi);
1572 buf[2] = BGP_RR_REQUEST;
1573
1574 return buf+4;
1575 }
1576
1577 static inline byte *
1578 bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
1579 {
1580 struct bgp_proto *p = (void *) c->c.proto;
1581
1582 BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
1583
1584 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
1585 put_af4(buf, c->afi);
1586 buf[2] = BGP_RR_BEGIN;
1587
1588 return buf+4;
1589 }
1590
1591 static inline byte *
1592 bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
1593 {
1594 struct bgp_proto *p = (void *) c->c.proto;
1595
1596 BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
1597
1598 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
1599 put_af4(buf, c->afi);
1600 buf[2] = BGP_RR_END;
1601
1602 return buf+4;
1603 }
1604
1605 static void
1606 bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
1607 {
1608 struct bgp_proto *p = conn->bgp;
1609
1610 if (conn->state != BS_ESTABLISHED)
1611 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1612
1613 if (!conn->local_caps->route_refresh)
1614 { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1615
1616 if (len < (BGP_HEADER_LENGTH + 4))
1617 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1618
1619 if (len > (BGP_HEADER_LENGTH + 4))
1620 { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
1621
1622 struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
1623 if (!c)
1624 {
1625 log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
1626 p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
1627 return;
1628 }
1629
1630 /* RFC 7313 redefined reserved field as RR message subtype */
1631 uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
1632
1633 switch (subtype)
1634 {
1635 case BGP_RR_REQUEST:
1636 BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1637 channel_request_feeding(&c->c);
1638 break;
1639
1640 case BGP_RR_BEGIN:
1641 BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
1642 bgp_refresh_begin(c);
1643 break;
1644
1645 case BGP_RR_END:
1646 BGP_TRACE(D_PACKETS, "Got END-OF-RR");
1647 bgp_refresh_end(c);
1648 break;
1649
1650 default:
1651 log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
1652 p->p.name, subtype);
1653 break;
1654 }
1655 }
1656
1657 static inline struct bgp_channel *
1658 bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
1659 {
1660 uint i = conn->last_channel;
1661
1662 /* Try the last channel, but at most several times */
1663 if ((conn->channels_to_send & (1 << i)) &&
1664 (conn->last_channel_count < 16))
1665 goto found;
1666
1667 /* Find channel with non-zero channels_to_send */
1668 do
1669 {
1670 i++;
1671 if (i >= p->channel_count)
1672 i = 0;
1673 }
1674 while (! (conn->channels_to_send & (1 << i)));
1675
1676 /* Use that channel */
1677 conn->last_channel = i;
1678 conn->last_channel_count = 0;
1679
1680 found:
1681 conn->last_channel_count++;
1682 return p->channel_map[i];
1683 }
1684
1685 static inline int
1686 bgp_send(struct bgp_conn *conn, uint type, uint len)
1687 {
1688 sock *sk = conn->sk;
1689 byte *buf = sk->tbuf;
1690
1691 memset(buf, 0xff, 16); /* Marker */
1692 put_u16(buf+16, len);
1693 buf[18] = type;
1694
1695 return sk_send(sk, len);
1696 }
1697
1698 /**
1699 * bgp_fire_tx - transmit packets
1700 * @conn: connection
1701 *
1702 * Whenever the transmit buffers of the underlying TCP connection
1703 * are free and we have any packets queued for sending, the socket functions
1704 * call bgp_fire_tx() which takes care of selecting the highest priority packet
1705 * queued (Notification > Keepalive > Open > Update), assembling its header
1706 * and body and sending it to the connection.
1707 */
1708 static int
1709 bgp_fire_tx(struct bgp_conn *conn)
1710 {
1711 struct bgp_proto *p = conn->bgp;
1712 struct bgp_channel *c;
1713 byte *buf, *pkt, *end;
1714 uint s;
1715
1716 if (!conn->sk)
1717 return 0;
1718
1719 buf = conn->sk->tbuf;
1720 pkt = buf + BGP_HEADER_LENGTH;
1721 s = conn->packets_to_send;
1722
1723 if (s & (1 << PKT_SCHEDULE_CLOSE))
1724 {
1725 /* We can finally close connection and enter idle state */
1726 bgp_conn_enter_idle_state(conn);
1727 return 0;
1728 }
1729 if (s & (1 << PKT_NOTIFICATION))
1730 {
1731 conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
1732 end = bgp_create_notification(conn, pkt);
1733 return bgp_send(conn, PKT_NOTIFICATION, end - buf);
1734 }
1735 else if (s & (1 << PKT_KEEPALIVE))
1736 {
1737 conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
1738 BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
1739 bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
1740 return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
1741 }
1742 else if (s & (1 << PKT_OPEN))
1743 {
1744 conn->packets_to_send &= ~(1 << PKT_OPEN);
1745 end = bgp_create_open(conn, pkt);
1746 return bgp_send(conn, PKT_OPEN, end - buf);
1747 }
1748 else while (conn->channels_to_send)
1749 {
1750 c = bgp_get_channel_to_send(p, conn);
1751 s = c->packets_to_send;
1752
1753 if (s & (1 << PKT_ROUTE_REFRESH))
1754 {
1755 c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
1756 end = bgp_create_route_refresh(c, pkt);
1757 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
1758 }
1759 else if (s & (1 << PKT_BEGIN_REFRESH))
1760 {
1761 /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
1762 c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
1763 end = bgp_create_begin_refresh(c, pkt);
1764 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
1765 }
1766 else if (s & (1 << PKT_UPDATE))
1767 {
1768 end = bgp_create_update(c, pkt);
1769 if (end)
1770 return bgp_send(conn, PKT_UPDATE, end - buf);
1771
1772 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
1773 c->packets_to_send = 0;
1774 conn->channels_to_send &= ~(1 << c->index);
1775
1776 if (c->feed_state == BFS_LOADED)
1777 {
1778 c->feed_state = BFS_NONE;
1779 end = bgp_create_end_mark(c, pkt);
1780 return bgp_send(conn, PKT_UPDATE, end - buf);
1781 }
1782
1783 else if (c->feed_state == BFS_REFRESHED)
1784 {
1785 c->feed_state = BFS_NONE;
1786 end = bgp_create_end_refresh(c, pkt);
1787 return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
1788 }
1789 }
1790 else if (s)
1791 bug("Channel packets_to_send: %x", s);
1792
1793 c->packets_to_send = 0;
1794 conn->channels_to_send &= ~(1 << c->index);
1795 }
1796
1797 return 0;
1798 }
1799
1800 /**
1801 * bgp_schedule_packet - schedule a packet for transmission
1802 * @conn: connection
1803 * @c: channel
1804 * @type: packet type
1805 *
1806 * Schedule a packet of type @type to be sent as soon as possible.
1807 */
1808 void
1809 bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
1810 {
1811 ASSERT(conn->sk);
1812
1813 DBG("BGP: Scheduling packet type %d\n", type);
1814
1815 if (c)
1816 {
1817 if (! conn->channels_to_send)
1818 {
1819 conn->last_channel = c->index;
1820 conn->last_channel_count = 0;
1821 }
1822
1823 c->packets_to_send |= 1 << type;
1824 conn->channels_to_send |= 1 << c->index;
1825 }
1826 else
1827 conn->packets_to_send |= 1 << type;
1828
1829 if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
1830 ev_schedule(conn->tx_ev);
1831 }
1832
1833 void
1834 bgp_kick_tx(void *vconn)
1835 {
1836 struct bgp_conn *conn = vconn;
1837
1838 DBG("BGP: kicking TX\n");
1839 while (bgp_fire_tx(conn) > 0)
1840 ;
1841 }
1842
1843 void
1844 bgp_tx(sock *sk)
1845 {
1846 struct bgp_conn *conn = sk->data;
1847
1848 DBG("BGP: TX hook\n");
1849 while (bgp_fire_tx(conn) > 0)
1850 ;
1851 }
1852
1853
1854 static struct {
1855 byte major, minor;
1856 byte *msg;
1857 } bgp_msg_table[] = {
1858 { 1, 0, "Invalid message header" },
1859 { 1, 1, "Connection not synchronized" },
1860 { 1, 2, "Bad message length" },
1861 { 1, 3, "Bad message type" },
1862 { 2, 0, "Invalid OPEN message" },
1863 { 2, 1, "Unsupported version number" },
1864 { 2, 2, "Bad peer AS" },
1865 { 2, 3, "Bad BGP identifier" },
1866 { 2, 4, "Unsupported optional parameter" },
1867 { 2, 5, "Authentication failure" },
1868 { 2, 6, "Unacceptable hold time" },
1869 { 2, 7, "Required capability missing" }, /* [RFC5492] */
1870 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1871 { 3, 0, "Invalid UPDATE message" },
1872 { 3, 1, "Malformed attribute list" },
1873 { 3, 2, "Unrecognized well-known attribute" },
1874 { 3, 3, "Missing mandatory attribute" },
1875 { 3, 4, "Invalid attribute flags" },
1876 { 3, 5, "Invalid attribute length" },
1877 { 3, 6, "Invalid ORIGIN attribute" },
1878 { 3, 7, "AS routing loop" }, /* Deprecated */
1879 { 3, 8, "Invalid NEXT_HOP attribute" },
1880 { 3, 9, "Optional attribute error" },
1881 { 3, 10, "Invalid network field" },
1882 { 3, 11, "Malformed AS_PATH" },
1883 { 4, 0, "Hold timer expired" },
1884 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1885 { 5, 1, "Unexpected message in OpenSent state" },
1886 { 5, 2, "Unexpected message in OpenConfirm state" },
1887 { 5, 3, "Unexpected message in Established state" },
1888 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1889 { 6, 1, "Maximum number of prefixes reached" },
1890 { 6, 2, "Administrative shutdown" },
1891 { 6, 3, "Peer de-configured" },
1892 { 6, 4, "Administrative reset" },
1893 { 6, 5, "Connection rejected" },
1894 { 6, 6, "Other configuration change" },
1895 { 6, 7, "Connection collision resolution" },
1896 { 6, 8, "Out of Resources" },
1897 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
1898 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
1899 };
1900
1901 /**
1902 * bgp_error_dsc - return BGP error description
1903 * @code: BGP error code
1904 * @subcode: BGP error subcode
1905 *
1906 * bgp_error_dsc() returns error description for BGP errors
1907 * which might be static string or given temporary buffer.
1908 */
1909 const char *
1910 bgp_error_dsc(uint code, uint subcode)
1911 {
1912 static char buff[32];
1913 uint i;
1914
1915 for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1916 if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1917 return bgp_msg_table[i].msg;
1918
1919 bsprintf(buff, "Unknown error %u.%u", code, subcode);
1920 return buff;
1921 }
1922
1923 void
1924 bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
1925 {
1926 const byte *name;
1927 byte *t, argbuf[36];
1928 uint i;
1929
1930 /* Don't report Cease messages generated by myself */
1931 if (code == 6 && class == BE_BGP_TX)
1932 return;
1933
1934 name = bgp_error_dsc(code, subcode);
1935 t = argbuf;
1936 if (len)
1937 {
1938 *t++ = ':';
1939 *t++ = ' ';
1940
1941 if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1942 {
1943 /* Bad peer AS - we would like to print the AS */
1944 t += bsprintf(t, "%u", (len == 2) ? get_u16(data) : get_u32(data));
1945 goto done;
1946 }
1947 if (len > 16)
1948 len = 16;
1949 for (i=0; i<len; i++)
1950 t += bsprintf(t, "%02x", data[i]);
1951 }
1952 done:
1953 *t = 0;
1954 log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
1955 }
1956
1957 static void
1958 bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
1959 {
1960 struct bgp_proto *p = conn->bgp;
1961
1962 if (len < 21)
1963 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1964
1965 uint code = pkt[19];
1966 uint subcode = pkt[20];
1967 int err = (code != 6);
1968
1969 bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1970 bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1971
1972 bgp_conn_enter_close_state(conn);
1973 bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
1974
1975 if (err)
1976 {
1977 bgp_update_startup_delay(p);
1978 bgp_stop(p, 0);
1979 }
1980 }
1981
1982 static void
1983 bgp_rx_keepalive(struct bgp_conn *conn)
1984 {
1985 struct bgp_proto *p = conn->bgp;
1986
1987 BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1988 bgp_start_timer(conn->hold_timer, conn->hold_time);
1989
1990 if (conn->state == BS_OPENCONFIRM)
1991 { bgp_conn_enter_established_state(conn); return; }
1992
1993 if (conn->state != BS_ESTABLISHED)
1994 bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
1995 }
1996
1997
1998 /**
1999 * bgp_rx_packet - handle a received packet
2000 * @conn: BGP connection
2001 * @pkt: start of the packet
2002 * @len: packet size
2003 *
2004 * bgp_rx_packet() takes a newly received packet and calls the corresponding
2005 * packet handler according to the packet type.
2006 */
2007 static void
2008 bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
2009 {
2010 byte type = pkt[18];
2011
2012 DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
2013
2014 if (conn->bgp->p.mrtdump & MD_MESSAGES)
2015 mrt_dump_bgp_packet(conn, pkt, len);
2016
2017 switch (type)
2018 {
2019 case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
2020 case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
2021 case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
2022 case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
2023 case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
2024 default: bgp_error(conn, 1, 3, pkt+18, 1);
2025 }
2026 }
2027
2028 /**
2029 * bgp_rx - handle received data
2030 * @sk: socket
2031 * @size: amount of data received
2032 *
2033 * bgp_rx() is called by the socket layer whenever new data arrive from
2034 * the underlying TCP connection. It assembles the data fragments to packets,
2035 * checks their headers and framing and passes complete packets to
2036 * bgp_rx_packet().
2037 */
2038 int
2039 bgp_rx(sock *sk, uint size)
2040 {
2041 struct bgp_conn *conn = sk->data;
2042 byte *pkt_start = sk->rbuf;
2043 byte *end = pkt_start + size;
2044 uint i, len;
2045
2046 DBG("BGP: RX hook: Got %d bytes\n", size);
2047 while (end >= pkt_start + BGP_HEADER_LENGTH)
2048 {
2049 if ((conn->state == BS_CLOSE) || (conn->sk != sk))
2050 return 0;
2051 for(i=0; i<16; i++)
2052 if (pkt_start[i] != 0xff)
2053 {
2054 bgp_error(conn, 1, 1, NULL, 0);
2055 break;
2056 }
2057 len = get_u16(pkt_start+16);
2058 if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
2059 {
2060 bgp_error(conn, 1, 2, pkt_start+16, 2);
2061 break;
2062 }
2063 if (end < pkt_start + len)
2064 break;
2065 bgp_rx_packet(conn, pkt_start, len);
2066 pkt_start += len;
2067 }
2068 if (pkt_start != sk->rbuf)
2069 {
2070 memmove(sk->rbuf, pkt_start, end - pkt_start);
2071 sk->rpos = sk->rbuf + (end - pkt_start);
2072 }
2073 return 0;
2074 }