]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
Finishes add-path.
[thirdparty/bird.git] / proto / bgp / packets.c
1 /*
2 * BIRD -- BGP Packet Processing
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 #undef LOCAL_DEBUG
10
11 #include "nest/bird.h"
12 #include "nest/iface.h"
13 #include "nest/protocol.h"
14 #include "nest/route.h"
15 #include "nest/attrs.h"
16 #include "nest/mrtdump.h"
17 #include "conf/conf.h"
18 #include "lib/unaligned.h"
19 #include "lib/socket.h"
20
21 #include "nest/cli.h"
22
23 #include "bgp.h"
24
25 static struct rate_limit rl_rcv_update, rl_snd_update;
26
27 /* Table for state -> RFC 6608 FSM error subcodes */
28 static byte fsm_err_subcode[BS_MAX] = {
29 [BS_OPENSENT] = 1,
30 [BS_OPENCONFIRM] = 2,
31 [BS_ESTABLISHED] = 3
32 };
33
34 /*
35 * MRT Dump format is not semantically specified.
36 * We will use these values in appropriate fields:
37 *
38 * Local AS, Remote AS - configured AS numbers for given BGP instance.
39 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
40 *
41 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
42 * changes) and MESSAGE (for received BGP messages).
43 *
44 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
45 * only when AS4 session is established and even in that case MESSAGE
46 * does not use AS4 variant for initial OPEN message. This strange
47 * behavior is here for compatibility with Quagga and Bgpdump,
48 */
49
50 static byte *
51 mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
52 {
53 struct bgp_proto *p = conn->bgp;
54
55 if (as4)
56 {
57 put_u32(buf+0, p->remote_as);
58 put_u32(buf+4, p->local_as);
59 buf+=8;
60 }
61 else
62 {
63 put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
64 put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS);
65 buf+=4;
66 }
67
68 put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
69 put_u16(buf+2, BGP_AF);
70 buf+=4;
71 buf = ipa_put_addr(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
72 buf = ipa_put_addr(buf, conn->sk ? conn->sk->saddr : IPA_NONE);
73
74 return buf;
75 }
76
77 static void
78 mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
79 {
80 byte buf[BGP_MAX_PACKET_LENGTH + 128];
81 byte *bp = buf + MRTDUMP_HDR_LENGTH;
82 int as4 = conn->bgp->as4_session;
83
84 bp = mrt_put_bgp4_hdr(bp, conn, as4);
85 memcpy(bp, pkt, len);
86 bp += len;
87 mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
88 buf, bp-buf);
89 }
90
91 static inline u16
92 convert_state(unsigned state)
93 {
94 /* Convert state from our BS_* values to values used in MRTDump */
95 return (state == BS_CLOSE) ? 1 : state + 1;
96 }
97
98 void
99 mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
100 {
101 byte buf[128];
102 byte *bp = buf + MRTDUMP_HDR_LENGTH;
103
104 bp = mrt_put_bgp4_hdr(bp, conn, 1);
105 put_u16(bp+0, convert_state(old));
106 put_u16(bp+2, convert_state(new));
107 bp += 4;
108 mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
109 }
110
111 static byte *
112 bgp_create_notification(struct bgp_conn *conn, byte *buf)
113 {
114 struct bgp_proto *p = conn->bgp;
115
116 BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
117 buf[0] = conn->notify_code;
118 buf[1] = conn->notify_subcode;
119 memcpy(buf+2, conn->notify_data, conn->notify_size);
120 return buf + 2 + conn->notify_size;
121 }
122
123 #ifdef IPV6
124 static byte *
125 bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
126 {
127 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
128 *buf++ = 4; /* Capability data length */
129 *buf++ = 0; /* We support AF IPv6 */
130 *buf++ = BGP_AF_IPV6;
131 *buf++ = 0; /* RFU */
132 *buf++ = 1; /* and SAFI 1 */
133 return buf;
134 }
135
136 #else
137
138 static byte *
139 bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
140 {
141 *buf++ = 1; /* Capability 1: Multiprotocol extensions */
142 *buf++ = 4; /* Capability data length */
143 *buf++ = 0; /* We support AF IPv4 */
144 *buf++ = BGP_AF_IPV4;
145 *buf++ = 0; /* RFU */
146 *buf++ = 1; /* and SAFI 1 */
147 return buf;
148 }
149 #endif
150
151 static byte *
152 bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
153 {
154 *buf++ = 2; /* Capability 2: Support for route refresh */
155 *buf++ = 0; /* Capability data length */
156 return buf;
157 }
158
159 static byte *
160 bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
161 {
162 *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
163 *buf++ = 4; /* Capability data length */
164 put_u32(buf, conn->bgp->local_as);
165 return buf + 4;
166 }
167
168 static byte *
169 bgp_put_cap_add_path(struct bgp_conn *conn, byte *buf)
170 {
171 *buf++ = 69; /* Capability 69: Support for ADD-PATH */
172 *buf++ = 4; /* Capability data length */
173
174 *buf++ = 0; /* Appropriate AF */
175 *buf++ = BGP_AF;
176 *buf++ = 1; /* SAFI 1 */
177
178 *buf++ = conn->bgp->cf->add_path;
179
180 return buf;
181 }
182
183 static byte *
184 bgp_create_open(struct bgp_conn *conn, byte *buf)
185 {
186 struct bgp_proto *p = conn->bgp;
187 byte *cap;
188 int cap_len;
189
190 BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
191 BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
192 buf[0] = BGP_VERSION;
193 put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
194 put_u16(buf+3, p->cf->hold_time);
195 put_u32(buf+5, p->local_id);
196
197 if (conn->start_state == BSS_CONNECT_NOCAP)
198 {
199 BGP_TRACE(D_PACKETS, "Skipping capabilities");
200 buf[9] = 0;
201 return buf + 10;
202 }
203
204 /* Skipped 3 B for length field and Capabilities parameter header */
205 cap = buf + 12;
206
207 #ifndef IPV6
208 if (p->cf->advertise_ipv4)
209 cap = bgp_put_cap_ipv4(conn, cap);
210 #endif
211
212 #ifdef IPV6
213 cap = bgp_put_cap_ipv6(conn, cap);
214 #endif
215
216 if (p->cf->enable_refresh)
217 cap = bgp_put_cap_rr(conn, cap);
218
219 if (p->cf->enable_as4)
220 cap = bgp_put_cap_as4(conn, cap);
221
222 if (p->cf->add_path)
223 cap = bgp_put_cap_add_path(conn, cap);
224
225 cap_len = cap - buf - 12;
226 if (cap_len > 0)
227 {
228 buf[9] = cap_len + 2; /* Optional params len */
229 buf[10] = 2; /* Option: Capability list */
230 buf[11] = cap_len; /* Option length */
231 return cap;
232 }
233 else
234 {
235 buf[9] = 0; /* No optional parameters */
236 return buf + 10;
237 }
238 }
239
240 static unsigned int
241 bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
242 {
243 byte *start = w;
244 ip_addr a;
245 int bytes;
246
247 while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
248 {
249 struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
250 DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
251
252 if (p->add_path_tx)
253 {
254 put_u32(w, px->path_id);
255 w += 4;
256 remains -= 4;
257 }
258
259 *w++ = px->n.pxlen;
260 bytes = (px->n.pxlen + 7) / 8;
261 a = px->n.prefix;
262 ipa_hton(a);
263 memcpy(w, &a, bytes);
264 w += bytes;
265 remains -= bytes + 1;
266 rem_node(&px->bucket_node);
267 bgp_free_prefix(p, px);
268 // fib_delete(&p->prefix_fib, px);
269 }
270 return w - start;
271 }
272
273 static void
274 bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
275 {
276 while (!EMPTY_LIST(buck->prefixes))
277 {
278 struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
279 log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
280 rem_node(&px->bucket_node);
281 bgp_free_prefix(p, px);
282 // fib_delete(&p->prefix_fib, px);
283 }
284 }
285
286 #ifndef IPV6 /* IPv4 version */
287
288 static byte *
289 bgp_create_update(struct bgp_conn *conn, byte *buf)
290 {
291 struct bgp_proto *p = conn->bgp;
292 struct bgp_bucket *buck;
293 int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
294 byte *w;
295 int wd_size = 0;
296 int r_size = 0;
297 int a_size = 0;
298
299 w = buf+2;
300 if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
301 {
302 DBG("Withdrawn routes:\n");
303 wd_size = bgp_encode_prefixes(p, w, buck, remains);
304 w += wd_size;
305 remains -= wd_size;
306 }
307 put_u16(buf, wd_size);
308
309 if (remains >= 3072)
310 {
311 while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
312 {
313 if (EMPTY_LIST(buck->prefixes))
314 {
315 DBG("Deleting empty bucket %p\n", buck);
316 rem_node(&buck->send_node);
317 bgp_free_bucket(p, buck);
318 continue;
319 }
320
321 DBG("Processing bucket %p\n", buck);
322 a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048);
323
324 if (a_size < 0)
325 {
326 log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
327 bgp_flush_prefixes(p, buck);
328 rem_node(&buck->send_node);
329 bgp_free_bucket(p, buck);
330 continue;
331 }
332
333 put_u16(w, a_size);
334 w += a_size + 2;
335 r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
336 w += r_size;
337 break;
338 }
339 }
340 if (!a_size) /* Attributes not already encoded */
341 {
342 put_u16(w, 0);
343 w += 2;
344 }
345 if (wd_size || r_size)
346 {
347 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
348 return w;
349 }
350 else
351 return NULL;
352 }
353
354 #else /* IPv6 version */
355
356 static inline int
357 same_iface(struct bgp_proto *p, ip_addr *ip)
358 {
359 neighbor *n = neigh_find(&p->p, ip, 0);
360 return n && p->neigh && n->iface == p->neigh->iface;
361 }
362
363 static byte *
364 bgp_create_update(struct bgp_conn *conn, byte *buf)
365 {
366 struct bgp_proto *p = conn->bgp;
367 struct bgp_bucket *buck;
368 int size, second, rem_stored;
369 int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
370 byte *w, *w_stored, *tmp, *tstart;
371 ip_addr *ipp, ip, ip_ll;
372 ea_list *ea;
373 eattr *nh;
374
375 put_u16(buf, 0);
376 w = buf+4;
377
378 if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
379 {
380 DBG("Withdrawn routes:\n");
381 tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
382 *tmp++ = 0;
383 *tmp++ = BGP_AF_IPV6;
384 *tmp++ = 1;
385 ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
386 size = bgp_encode_attrs(p, w, ea, remains);
387 ASSERT(size >= 0);
388 w += size;
389 remains -= size;
390 }
391
392 if (remains >= 3072)
393 {
394 while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
395 {
396 if (EMPTY_LIST(buck->prefixes))
397 {
398 DBG("Deleting empty bucket %p\n", buck);
399 rem_node(&buck->send_node);
400 bgp_free_bucket(p, buck);
401 continue;
402 }
403
404 DBG("Processing bucket %p\n", buck);
405 rem_stored = remains;
406 w_stored = w;
407
408 size = bgp_encode_attrs(p, w, buck->eattrs, 2048);
409 if (size < 0)
410 {
411 log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
412 bgp_flush_prefixes(p, buck);
413 rem_node(&buck->send_node);
414 bgp_free_bucket(p, buck);
415 continue;
416 }
417 w += size;
418 remains -= size;
419
420 /* We have two addresses here in NEXT_HOP eattr. Really.
421 Unless NEXT_HOP was modified by filter */
422 nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
423 ASSERT(nh);
424 second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
425 ipp = (ip_addr *) nh->u.ptr->data;
426 ip = ipp[0];
427 ip_ll = IPA_NONE;
428
429 if (ipa_equal(ip, p->source_addr))
430 ip_ll = p->local_link;
431 else
432 {
433 /* If we send a route with 'third party' next hop destinated
434 * in the same interface, we should also send a link local
435 * next hop address. We use the received one (stored in the
436 * other part of BA_NEXT_HOP eattr). If we didn't received
437 * it (for example it is a static route), we can't use
438 * 'third party' next hop and we have to use local IP address
439 * as next hop. Sending original next hop address without
440 * link local address seems to be a natural way to solve that
441 * problem, but it is contrary to RFC 2545 and Quagga does not
442 * accept such routes.
443 *
444 * There are two cases, either we have global IP, or
445 * IPA_NONE if the neighbor is link-local. For IPA_NONE,
446 * we suppose it is on the same iface, see bgp_update_attrs().
447 */
448
449 if (ipa_zero(ip) || same_iface(p, &ip))
450 {
451 if (second && ipa_nonzero(ipp[1]))
452 ip_ll = ipp[1];
453 else
454 {
455 switch (p->cf->missing_lladdr)
456 {
457 case MLL_SELF:
458 ip = p->source_addr;
459 ip_ll = p->local_link;
460 break;
461 case MLL_DROP:
462 log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
463 w = w_stored;
464 remains = rem_stored;
465 bgp_flush_prefixes(p, buck);
466 rem_node(&buck->send_node);
467 bgp_free_bucket(p, buck);
468 continue;
469 case MLL_IGNORE:
470 break;
471 }
472 }
473 }
474 }
475
476 tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
477 *tmp++ = 0;
478 *tmp++ = BGP_AF_IPV6;
479 *tmp++ = 1;
480
481 if (ipa_has_link_scope(ip))
482 ip = IPA_NONE;
483
484 if (ipa_nonzero(ip_ll))
485 {
486 *tmp++ = 32;
487 ipa_hton(ip);
488 memcpy(tmp, &ip, 16);
489 ipa_hton(ip_ll);
490 memcpy(tmp+16, &ip_ll, 16);
491 tmp += 32;
492 }
493 else
494 {
495 *tmp++ = 16;
496 ipa_hton(ip);
497 memcpy(tmp, &ip, 16);
498 tmp += 16;
499 }
500
501 *tmp++ = 0; /* No SNPA information */
502 tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
503 ea->attrs[0].u.ptr->length = tmp - tstart;
504 size = bgp_encode_attrs(p, w, ea, remains);
505 ASSERT(size >= 0);
506 w += size;
507 break;
508 }
509 }
510
511 size = w - (buf+4);
512 put_u16(buf+2, size);
513 lp_flush(bgp_linpool);
514 if (size)
515 {
516 BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
517 return w;
518 }
519 else
520 return NULL;
521 }
522
523 #endif
524
525 static byte *
526 bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
527 {
528 struct bgp_proto *p = conn->bgp;
529 BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
530
531 *buf++ = 0;
532 *buf++ = BGP_AF;
533 *buf++ = 0; /* RFU */
534 *buf++ = 1; /* and SAFI 1 */
535 return buf;
536 }
537
538 static void
539 bgp_create_header(byte *buf, unsigned int len, unsigned int type)
540 {
541 memset(buf, 0xff, 16); /* Marker */
542 put_u16(buf+16, len);
543 buf[18] = type;
544 }
545
546 /**
547 * bgp_fire_tx - transmit packets
548 * @conn: connection
549 *
550 * Whenever the transmit buffers of the underlying TCP connection
551 * are free and we have any packets queued for sending, the socket functions
552 * call bgp_fire_tx() which takes care of selecting the highest priority packet
553 * queued (Notification > Keepalive > Open > Update), assembling its header
554 * and body and sending it to the connection.
555 */
556 static int
557 bgp_fire_tx(struct bgp_conn *conn)
558 {
559 struct bgp_proto *p = conn->bgp;
560 unsigned int s = conn->packets_to_send;
561 sock *sk = conn->sk;
562 byte *buf, *pkt, *end;
563 int type;
564
565 if (!sk)
566 {
567 conn->packets_to_send = 0;
568 return 0;
569 }
570 buf = sk->tbuf;
571 pkt = buf + BGP_HEADER_LENGTH;
572
573 if (s & (1 << PKT_SCHEDULE_CLOSE))
574 {
575 /* We can finally close connection and enter idle state */
576 bgp_conn_enter_idle_state(conn);
577 return 0;
578 }
579 if (s & (1 << PKT_NOTIFICATION))
580 {
581 s = 1 << PKT_SCHEDULE_CLOSE;
582 type = PKT_NOTIFICATION;
583 end = bgp_create_notification(conn, pkt);
584 }
585 else if (s & (1 << PKT_KEEPALIVE))
586 {
587 s &= ~(1 << PKT_KEEPALIVE);
588 type = PKT_KEEPALIVE;
589 end = pkt; /* Keepalives carry no data */
590 BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
591 bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
592 }
593 else if (s & (1 << PKT_OPEN))
594 {
595 s &= ~(1 << PKT_OPEN);
596 type = PKT_OPEN;
597 end = bgp_create_open(conn, pkt);
598 }
599 else if (s & (1 << PKT_ROUTE_REFRESH))
600 {
601 s &= ~(1 << PKT_ROUTE_REFRESH);
602 type = PKT_ROUTE_REFRESH;
603 end = bgp_create_route_refresh(conn, pkt);
604 }
605 else if (s & (1 << PKT_UPDATE))
606 {
607 end = bgp_create_update(conn, pkt);
608 type = PKT_UPDATE;
609 if (!end)
610 {
611 conn->packets_to_send = 0;
612 return 0;
613 }
614 }
615 else
616 return 0;
617 conn->packets_to_send = s;
618 bgp_create_header(buf, end - buf, type);
619 return sk_send(sk, end - buf);
620 }
621
622 /**
623 * bgp_schedule_packet - schedule a packet for transmission
624 * @conn: connection
625 * @type: packet type
626 *
627 * Schedule a packet of type @type to be sent as soon as possible.
628 */
629 void
630 bgp_schedule_packet(struct bgp_conn *conn, int type)
631 {
632 DBG("BGP: Scheduling packet type %d\n", type);
633 conn->packets_to_send |= 1 << type;
634 if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
635 ev_schedule(conn->tx_ev);
636 }
637
638 void
639 bgp_kick_tx(void *vconn)
640 {
641 struct bgp_conn *conn = vconn;
642
643 DBG("BGP: kicking TX\n");
644 while (bgp_fire_tx(conn) > 0)
645 ;
646 }
647
648 void
649 bgp_tx(sock *sk)
650 {
651 struct bgp_conn *conn = sk->data;
652
653 DBG("BGP: TX hook\n");
654 while (bgp_fire_tx(conn) > 0)
655 ;
656 }
657
658 /* Capatibility negotiation as per RFC 2842 */
659
660 void
661 bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
662 {
663 // struct bgp_proto *p = conn->bgp;
664 int i, cl;
665
666 while (len > 0)
667 {
668 if (len < 2 || len < 2 + opt[1])
669 goto err;
670
671 cl = opt[1];
672
673 switch (opt[0])
674 {
675 case 2: /* Route refresh capability, RFC 2918 */
676 if (cl != 0)
677 goto err;
678 conn->peer_refresh_support = 1;
679 break;
680
681 case 65: /* AS4 capability, RFC 4893 */
682 if (cl != 4)
683 goto err;
684 conn->peer_as4_support = 1;
685 if (conn->bgp->cf->enable_as4)
686 conn->advertised_as = get_u32(opt + 2);
687 break;
688
689 case 69: /* ADD-PATH capability, draft */
690 if (cl % 4)
691 goto err;
692 for (i = 0; i < cl; i += 4)
693 if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
694 conn->peer_add_path = opt[2+i+3];
695 if (conn->peer_add_path > ADD_PATH_FULL)
696 goto err;
697
698 break;
699
700 /* We can safely ignore all other capabilities */
701 }
702 len -= 2 + cl;
703 opt += 2 + cl;
704 }
705 return;
706
707 err:
708 bgp_error(conn, 2, 0, NULL, 0);
709 return;
710 }
711
712 static int
713 bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
714 {
715 struct bgp_proto *p = conn->bgp;
716 int ol;
717
718 while (len > 0)
719 {
720 if (len < 2 || len < 2 + opt[1])
721 { bgp_error(conn, 2, 0, NULL, 0); return 0; }
722 #ifdef LOCAL_DEBUG
723 {
724 int i;
725 DBG("\tOption %02x:", opt[0]);
726 for(i=0; i<opt[1]; i++)
727 DBG(" %02x", opt[2+i]);
728 DBG("\n");
729 }
730 #endif
731
732 ol = opt[1];
733 switch (opt[0])
734 {
735 case 2:
736 if (conn->start_state == BSS_CONNECT_NOCAP)
737 BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
738 else
739 bgp_parse_capabilities(conn, opt + 2, ol);
740 break;
741
742 default:
743 /*
744 * BGP specs don't tell us to send which option
745 * we didn't recognize, but it's common practice
746 * to do so. Also, capability negotiation with
747 * Cisco routers doesn't work without that.
748 */
749 bgp_error(conn, 2, 4, opt, ol);
750 return 0;
751 }
752 len -= 2 + ol;
753 opt += 2 + ol;
754 }
755 return 0;
756 }
757
758 static void
759 bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
760 {
761 struct bgp_conn *other;
762 struct bgp_proto *p = conn->bgp;
763 unsigned hold;
764 u16 base_as;
765 u32 id;
766
767 /* Check state */
768 if (conn->state != BS_OPENSENT)
769 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
770
771 /* Check message contents */
772 if (len < 29 || len != 29 + pkt[28])
773 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
774 if (pkt[19] != BGP_VERSION)
775 { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
776 conn->advertised_as = base_as = get_u16(pkt+20);
777 hold = get_u16(pkt+22);
778 id = get_u32(pkt+24);
779 BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
780
781 if (bgp_parse_options(conn, pkt+29, pkt[28]))
782 return;
783
784 if (hold > 0 && hold < 3)
785 { bgp_error(conn, 2, 6, pkt+22, 2); return; }
786
787 if (!id || id == 0xffffffff || id == p->local_id)
788 { bgp_error(conn, 2, 3, pkt+24, -4); return; }
789
790 if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
791 log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
792
793 if (conn->advertised_as != p->remote_as)
794 {
795 if (conn->peer_as4_support)
796 {
797 u32 val = htonl(conn->advertised_as);
798 bgp_error(conn, 2, 2, (byte *) &val, 4);
799 }
800 else
801 bgp_error(conn, 2, 2, pkt+20, 2);
802
803 return;
804 }
805
806 /* Check the other connection */
807 other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
808 switch (other->state)
809 {
810 case BS_IDLE:
811 case BS_CONNECT:
812 case BS_ACTIVE:
813 case BS_OPENSENT:
814 case BS_CLOSE:
815 break;
816 case BS_OPENCONFIRM:
817 if ((p->local_id < id) == (conn == &p->incoming_conn))
818 {
819 /* Should close the other connection */
820 BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
821 bgp_error(other, 6, 7, NULL, 0);
822 break;
823 }
824 /* Fall thru */
825 case BS_ESTABLISHED:
826 /* Should close this connection */
827 BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
828 bgp_error(conn, 6, 7, NULL, 0);
829 return;
830 default:
831 bug("bgp_rx_open: Unknown state");
832 }
833
834 /* Update our local variables */
835 conn->hold_time = MIN(hold, p->cf->hold_time);
836 conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
837 p->remote_id = id;
838 p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
839 p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
840 p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
841
842 if (p->add_path_tx)
843 p->p.accept_ra_types = RA_ANY;
844
845 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
846
847 bgp_schedule_packet(conn, PKT_KEEPALIVE);
848 bgp_start_timer(conn->hold_timer, conn->hold_time);
849 bgp_conn_enter_openconfirm_state(conn);
850 }
851
852 #define DECODE_PREFIX(pp, ll) do { \
853 if (p->add_path_rx) \
854 { \
855 if (ll < 5) { err=1; goto done; } \
856 path_id = get_u32(pp); \
857 pp += 4; \
858 ll -= 4; \
859 } \
860 int b = *pp++; \
861 int q; \
862 ll--; \
863 if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
864 q = (b+7) / 8; \
865 if (ll < q) { err=1; goto done; } \
866 memcpy(&prefix, pp, q); \
867 pp += q; \
868 ll -= q; \
869 ipa_ntoh(prefix); \
870 prefix = ipa_and(prefix, ipa_mkmask(b)); \
871 pxlen = b; \
872 } while (0)
873
874
875 static inline void
876 bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
877 u32 path_id, u32 *last_id, struct rte_src **src,
878 rta *a0, rta **a)
879 {
880 if (path_id != *last_id)
881 {
882 *src = rt_get_source(&p->p, path_id);
883 *last_id = path_id;
884
885 if (*a)
886 {
887 rta_free(*a);
888 *a = NULL;
889 }
890 }
891
892 /* Prepare cached route attributes */
893 if (!*a)
894 {
895 a0->src = *src;
896 *a = rta_lookup(a0);
897 }
898
899 net *n = net_get(p->p.table, prefix, pxlen);
900 rte *e = rte_get_temp(rta_clone(*a));
901 e->net = n;
902 e->pflags = 0;
903 e->u.bgp.suppressed = 0;
904 rte_update2(p->p.main_ahook, n, e, *src);
905 }
906
907 static inline void
908 bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
909 u32 path_id, u32 *last_id, struct rte_src **src)
910 {
911 if (path_id != *last_id)
912 {
913 *src = rt_find_source(&p->p, path_id);
914 *last_id = path_id;
915 }
916
917 net *n = net_find(p->p.table, prefix, pxlen);
918 rte_update2( p->p.main_ahook, n, NULL, *src);
919 }
920
921 static inline int
922 bgp_set_next_hop(struct bgp_proto *p, rta *a)
923 {
924 struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
925 ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
926
927 #ifdef IPV6
928 int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
929
930 /* First address should not be link-local, but may be zero in direct mode */
931 if (ipa_has_link_scope(*nexthop))
932 *nexthop = IPA_NONE;
933 #else
934 int second = 0;
935 #endif
936
937 if (p->cf->gw_mode == GW_DIRECT)
938 {
939 neighbor *ng = NULL;
940
941 if (ipa_nonzero(*nexthop))
942 ng = neigh_find(&p->p, nexthop, 0);
943 else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
944 ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
945
946 /* Fallback */
947 if (!ng)
948 ng = p->neigh;
949
950 if (ng->scope == SCOPE_HOST)
951 return 0;
952
953 a->dest = RTD_ROUTER;
954 a->gw = ng->addr;
955 a->iface = ng->iface;
956 a->hostentry = NULL;
957 a->igp_metric = 0;
958 }
959 else /* GW_RECURSIVE */
960 {
961 if (ipa_zero(*nexthop))
962 return 0;
963
964 rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
965 }
966
967 return 1;
968 }
969
970 #ifndef IPV6 /* IPv4 version */
971
972 static void
973 bgp_do_rx_update(struct bgp_conn *conn,
974 byte *withdrawn, int withdrawn_len,
975 byte *nlri, int nlri_len,
976 byte *attrs, int attr_len)
977 {
978 struct bgp_proto *p = conn->bgp;
979 struct rte_src *src = p->p.main_source;
980 rta *a0, *a;
981 ip_addr prefix;
982 int pxlen, err = 0;
983 u32 path_id = 0;
984 u32 last_id = 0;
985
986 /* Withdraw routes */
987 while (withdrawn_len)
988 {
989 DECODE_PREFIX(withdrawn, withdrawn_len);
990 DBG("Withdraw %I/%d\n", prefix, pxlen);
991
992 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
993 }
994
995 if (!attr_len && !nlri_len) /* shortcut */
996 return;
997
998 a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
999
1000 if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1001 return;
1002
1003 if (a0 && ! bgp_set_next_hop(p, a0))
1004 a0 = NULL;
1005
1006 a = NULL;
1007 last_id = 0;
1008 src = p->p.main_source;
1009
1010 while (nlri_len)
1011 {
1012 DECODE_PREFIX(nlri, nlri_len);
1013 DBG("Add %I/%d\n", prefix, pxlen);
1014
1015 if (a0)
1016 bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1017 else /* Forced withdraw as a result of soft error */
1018 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1019 }
1020
1021 done:
1022 if (a)
1023 rta_free(a);
1024
1025 if (err)
1026 bgp_error(conn, 3, err, NULL, 0);
1027
1028 return;
1029 }
1030
1031 #else /* IPv6 version */
1032
1033 #define DO_NLRI(name) \
1034 start = x = p->name##_start; \
1035 len = len0 = p->name##_len; \
1036 if (len) \
1037 { \
1038 if (len < 3) { err=9; goto done; } \
1039 af = get_u16(x); \
1040 sub = x[2]; \
1041 x += 3; \
1042 len -= 3; \
1043 DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
1044 } \
1045 else \
1046 af = 0; \
1047 if (af == BGP_AF_IPV6)
1048
1049 static void
1050 bgp_attach_next_hop(rta *a0, byte *x)
1051 {
1052 ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
1053 memcpy(nh, x+1, 16);
1054 ipa_ntoh(nh[0]);
1055
1056 /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
1057 if (*x == 32)
1058 {
1059 memcpy(nh+1, x+17, 16);
1060 ipa_ntoh(nh[1]);
1061 }
1062 else
1063 nh[1] = IPA_NONE;
1064 }
1065
1066
1067 static void
1068 bgp_do_rx_update(struct bgp_conn *conn,
1069 byte *withdrawn, int withdrawn_len,
1070 byte *nlri, int nlri_len,
1071 byte *attrs, int attr_len)
1072 {
1073 struct bgp_proto *p = conn->bgp;
1074 struct rte_src *src = p->p.main_source;
1075 byte *start, *x;
1076 int len, len0;
1077 unsigned af, sub;
1078 rta *a0, *a;
1079 ip_addr prefix;
1080 int pxlen, err = 0;
1081 u32 path_id = 0;
1082 u32 last_id = 0;
1083
1084 p->mp_reach_len = 0;
1085 p->mp_unreach_len = 0;
1086 a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
1087
1088 if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1089 return;
1090
1091 DO_NLRI(mp_unreach)
1092 {
1093 while (len)
1094 {
1095 DECODE_PREFIX(x, len);
1096 DBG("Withdraw %I/%d\n", prefix, pxlen);
1097 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1098 }
1099 }
1100
1101 DO_NLRI(mp_reach)
1102 {
1103 /* Create fake NEXT_HOP attribute */
1104 if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
1105 { err = 9; goto done; }
1106
1107 if (a0)
1108 bgp_attach_next_hop(a0, x);
1109
1110 /* Also ignore one reserved byte */
1111 len -= *x + 2;
1112 x += *x + 2;
1113
1114 if (a0 && ! bgp_set_next_hop(p, a0))
1115 a0 = NULL;
1116
1117 a = NULL;
1118 last_id = 0;
1119 src = p->p.main_source;
1120
1121 while (len)
1122 {
1123 DECODE_PREFIX(x, len);
1124 DBG("Add %I/%d\n", prefix, pxlen);
1125
1126 if (a0)
1127 bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1128 else /* Forced withdraw as a result of soft error */
1129 bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1130 }
1131 }
1132
1133 done:
1134 if (a)
1135 rta_free(a);
1136
1137 if (err) /* Use subcode 9, not err */
1138 bgp_error(conn, 3, 9, NULL, 0);
1139
1140 return;
1141 }
1142
1143 #endif
1144
1145 static void
1146 bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
1147 {
1148 struct bgp_proto *p = conn->bgp;
1149 byte *withdrawn, *attrs, *nlri;
1150 int withdrawn_len, attr_len, nlri_len;
1151
1152 BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
1153
1154 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1155 if (conn->state == BS_OPENCONFIRM)
1156 bgp_conn_enter_established_state(conn);
1157
1158 if (conn->state != BS_ESTABLISHED)
1159 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1160 bgp_start_timer(conn->hold_timer, conn->hold_time);
1161
1162 /* Find parts of the packet and check sizes */
1163 if (len < 23)
1164 {
1165 bgp_error(conn, 1, 2, pkt+16, 2);
1166 return;
1167 }
1168 withdrawn = pkt + 21;
1169 withdrawn_len = get_u16(pkt + 19);
1170 if (withdrawn_len + 23 > len)
1171 goto malformed;
1172 attrs = withdrawn + withdrawn_len + 2;
1173 attr_len = get_u16(attrs - 2);
1174 if (withdrawn_len + attr_len + 23 > len)
1175 goto malformed;
1176 nlri = attrs + attr_len;
1177 nlri_len = len - withdrawn_len - attr_len - 23;
1178 if (!attr_len && nlri_len)
1179 goto malformed;
1180 DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
1181
1182 lp_flush(bgp_linpool);
1183
1184 bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
1185 return;
1186
1187 malformed:
1188 bgp_error(conn, 3, 1, NULL, 0);
1189 }
1190
1191 static struct {
1192 byte major, minor;
1193 byte *msg;
1194 } bgp_msg_table[] = {
1195 { 1, 0, "Invalid message header" },
1196 { 1, 1, "Connection not synchronized" },
1197 { 1, 2, "Bad message length" },
1198 { 1, 3, "Bad message type" },
1199 { 2, 0, "Invalid OPEN message" },
1200 { 2, 1, "Unsupported version number" },
1201 { 2, 2, "Bad peer AS" },
1202 { 2, 3, "Bad BGP identifier" },
1203 { 2, 4, "Unsupported optional parameter" },
1204 { 2, 5, "Authentication failure" },
1205 { 2, 6, "Unacceptable hold time" },
1206 { 2, 7, "Required capability missing" }, /* [RFC3392] */
1207 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1208 { 3, 0, "Invalid UPDATE message" },
1209 { 3, 1, "Malformed attribute list" },
1210 { 3, 2, "Unrecognized well-known attribute" },
1211 { 3, 3, "Missing mandatory attribute" },
1212 { 3, 4, "Invalid attribute flags" },
1213 { 3, 5, "Invalid attribute length" },
1214 { 3, 6, "Invalid ORIGIN attribute" },
1215 { 3, 7, "AS routing loop" }, /* Deprecated */
1216 { 3, 8, "Invalid NEXT_HOP attribute" },
1217 { 3, 9, "Optional attribute error" },
1218 { 3, 10, "Invalid network field" },
1219 { 3, 11, "Malformed AS_PATH" },
1220 { 4, 0, "Hold timer expired" },
1221 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1222 { 5, 1, "Unexpected message in OpenSent state" },
1223 { 5, 2, "Unexpected message in OpenConfirm state" },
1224 { 5, 3, "Unexpected message in Established state" },
1225 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1226 { 6, 1, "Maximum number of prefixes reached" },
1227 { 6, 2, "Administrative shutdown" },
1228 { 6, 3, "Peer de-configured" },
1229 { 6, 4, "Administrative reset" },
1230 { 6, 5, "Connection rejected" },
1231 { 6, 6, "Other configuration change" },
1232 { 6, 7, "Connection collision resolution" },
1233 { 6, 8, "Out of Resources" }
1234 };
1235
1236 /**
1237 * bgp_error_dsc - return BGP error description
1238 * @code: BGP error code
1239 * @subcode: BGP error subcode
1240 *
1241 * bgp_error_dsc() returns error description for BGP errors
1242 * which might be static string or given temporary buffer.
1243 */
1244 const char *
1245 bgp_error_dsc(unsigned code, unsigned subcode)
1246 {
1247 static char buff[32];
1248 unsigned i;
1249 for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1250 if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1251 {
1252 return bgp_msg_table[i].msg;
1253 }
1254
1255 bsprintf(buff, "Unknown error %d.%d", code, subcode);
1256 return buff;
1257 }
1258
1259 void
1260 bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
1261 {
1262 const byte *name;
1263 byte *t, argbuf[36];
1264 unsigned i;
1265
1266 /* Don't report Cease messages generated by myself */
1267 if (code == 6 && class == BE_BGP_TX)
1268 return;
1269
1270 name = bgp_error_dsc(code, subcode);
1271 t = argbuf;
1272 if (len)
1273 {
1274 *t++ = ':';
1275 *t++ = ' ';
1276
1277 if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1278 {
1279 /* Bad peer AS - we would like to print the AS */
1280 t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
1281 goto done;
1282 }
1283 if (len > 16)
1284 len = 16;
1285 for (i=0; i<len; i++)
1286 t += bsprintf(t, "%02x", data[i]);
1287 }
1288 done:
1289 *t = 0;
1290 log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
1291 }
1292
1293 static void
1294 bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
1295 {
1296 struct bgp_proto *p = conn->bgp;
1297 if (len < 21)
1298 {
1299 bgp_error(conn, 1, 2, pkt+16, 2);
1300 return;
1301 }
1302
1303 unsigned code = pkt[19];
1304 unsigned subcode = pkt[20];
1305 int err = (code != 6);
1306
1307 bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1308 bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1309
1310 #ifndef IPV6
1311 if ((code == 2) && ((subcode == 4) || (subcode == 7))
1312 /* Error related to capability:
1313 * 4 - Peer does not support capabilities at all.
1314 * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1315 */
1316 && (p->cf->capabilities == 2)
1317 /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1318 && (conn->start_state == BSS_CONNECT)
1319 /* Failed connection attempt have used capabilities */
1320 && (p->cf->remote_as <= 0xFFFF))
1321 /* Not possible with disabled capabilities */
1322 {
1323 /* We try connect without capabilities */
1324 log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1325 p->start_state = BSS_CONNECT_NOCAP;
1326 err = 0;
1327 }
1328 #endif
1329
1330 bgp_conn_enter_close_state(conn);
1331 bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1332
1333 if (err)
1334 {
1335 bgp_update_startup_delay(p);
1336 bgp_stop(p, 0);
1337 }
1338 }
1339
1340 static void
1341 bgp_rx_keepalive(struct bgp_conn *conn)
1342 {
1343 struct bgp_proto *p = conn->bgp;
1344
1345 BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1346 bgp_start_timer(conn->hold_timer, conn->hold_time);
1347 switch (conn->state)
1348 {
1349 case BS_OPENCONFIRM:
1350 bgp_conn_enter_established_state(conn);
1351 break;
1352 case BS_ESTABLISHED:
1353 break;
1354 default:
1355 bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
1356 }
1357 }
1358
1359 static void
1360 bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, int len)
1361 {
1362 struct bgp_proto *p = conn->bgp;
1363
1364 BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1365
1366 if (conn->state != BS_ESTABLISHED)
1367 { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1368
1369 if (!p->cf->enable_refresh)
1370 { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1371
1372 if (len != (BGP_HEADER_LENGTH + 4))
1373 { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1374
1375 /* FIXME - we ignore AFI/SAFI values, as we support
1376 just one value and even an error code for an invalid
1377 request is not defined */
1378
1379 proto_request_feeding(&p->p);
1380 }
1381
1382
1383 /**
1384 * bgp_rx_packet - handle a received packet
1385 * @conn: BGP connection
1386 * @pkt: start of the packet
1387 * @len: packet size
1388 *
1389 * bgp_rx_packet() takes a newly received packet and calls the corresponding
1390 * packet handler according to the packet type.
1391 */
1392 static void
1393 bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1394 {
1395 byte type = pkt[18];
1396
1397 DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
1398
1399 if (conn->bgp->p.mrtdump & MD_MESSAGES)
1400 mrt_dump_bgp_packet(conn, pkt, len);
1401
1402 switch (type)
1403 {
1404 case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
1405 case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
1406 case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
1407 case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
1408 case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
1409 default: bgp_error(conn, 1, 3, pkt+18, 1);
1410 }
1411 }
1412
1413 /**
1414 * bgp_rx - handle received data
1415 * @sk: socket
1416 * @size: amount of data received
1417 *
1418 * bgp_rx() is called by the socket layer whenever new data arrive from
1419 * the underlying TCP connection. It assembles the data fragments to packets,
1420 * checks their headers and framing and passes complete packets to
1421 * bgp_rx_packet().
1422 */
1423 int
1424 bgp_rx(sock *sk, int size)
1425 {
1426 struct bgp_conn *conn = sk->data;
1427 byte *pkt_start = sk->rbuf;
1428 byte *end = pkt_start + size;
1429 unsigned i, len;
1430
1431 DBG("BGP: RX hook: Got %d bytes\n", size);
1432 while (end >= pkt_start + BGP_HEADER_LENGTH)
1433 {
1434 if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1435 return 0;
1436 for(i=0; i<16; i++)
1437 if (pkt_start[i] != 0xff)
1438 {
1439 bgp_error(conn, 1, 1, NULL, 0);
1440 break;
1441 }
1442 len = get_u16(pkt_start+16);
1443 if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
1444 {
1445 bgp_error(conn, 1, 2, pkt_start+16, 2);
1446 break;
1447 }
1448 if (end < pkt_start + len)
1449 break;
1450 bgp_rx_packet(conn, pkt_start, len);
1451 pkt_start += len;
1452 }
1453 if (pkt_start != sk->rbuf)
1454 {
1455 memmove(sk->rbuf, pkt_start, end - pkt_start);
1456 sk->rpos = sk->rbuf + (end - pkt_start);
1457 }
1458 return 0;
1459 }