]>
git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
2 * BIRD -- BGP Packet Processing
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
8 * Can be freely distributed and used under the terms of the GNU GPL.
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "nest/mrtdump.h"
21 #include "conf/conf.h"
22 #include "lib/unaligned.h"
23 #include "lib/socket.h"
30 #define BGP_RR_REQUEST 0
31 #define BGP_RR_BEGIN 1
35 static struct tbf rl_rcv_update
= TBF_DEFAULT_LOG_LIMITS
;
36 static struct tbf rl_snd_update
= TBF_DEFAULT_LOG_LIMITS
;
38 /* Table for state -> RFC 6608 FSM error subcodes */
39 static byte fsm_err_subcode
[BS_MAX
] = {
46 static struct bgp_channel
*
47 bgp_get_channel(struct bgp_proto
*p
, u32 afi
)
51 for (i
= 0; i
< p
->channel_count
; i
++)
52 if (p
->afi_map
[i
] == afi
)
53 return p
->channel_map
[i
];
59 put_af3(byte
*buf
, u32 id
)
61 put_u16(buf
, id
>> 16);
66 put_af4(byte
*buf
, u32 id
)
68 put_u16(buf
, id
>> 16);
76 return (get_u16(buf
) << 16) | buf
[2];
82 return (get_u16(buf
) << 16) | buf
[3];
86 * MRT Dump format is not semantically specified.
87 * We will use these values in appropriate fields:
89 * Local AS, Remote AS - configured AS numbers for given BGP instance.
90 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
92 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
93 * changes) and MESSAGE (for received BGP messages).
95 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
96 * only when AS4 session is established and even in that case MESSAGE
97 * does not use AS4 variant for initial OPEN message. This strange
98 * behavior is here for compatibility with Quagga and Bgpdump,
102 mrt_put_bgp4_hdr(byte
*buf
, struct bgp_conn
*conn
, int as4
)
104 struct bgp_proto
*p
= conn
->bgp
;
105 uint v4
= ipa_is_ip4(p
->cf
->remote_ip
);
109 put_u32(buf
+0, p
->remote_as
);
110 put_u32(buf
+4, p
->public_as
);
115 put_u16(buf
+0, (p
->remote_as
<= 0xFFFF) ? p
->remote_as
: AS_TRANS
);
116 put_u16(buf
+2, (p
->public_as
<= 0xFFFF) ? p
->public_as
: AS_TRANS
);
120 put_u16(buf
+0, (p
->neigh
&& p
->neigh
->iface
) ? p
->neigh
->iface
->index
: 0);
121 put_u16(buf
+2, v4
? BGP_AFI_IPV4
: BGP_AFI_IPV6
);
126 buf
= put_ip4(buf
, conn
->sk
? ipa_to_ip4(conn
->sk
->daddr
) : IP4_NONE
);
127 buf
= put_ip4(buf
, conn
->sk
? ipa_to_ip4(conn
->sk
->saddr
) : IP4_NONE
);
131 buf
= put_ip6(buf
, conn
->sk
? ipa_to_ip6(conn
->sk
->daddr
) : IP6_NONE
);
132 buf
= put_ip6(buf
, conn
->sk
? ipa_to_ip6(conn
->sk
->saddr
) : IP6_NONE
);
139 mrt_dump_bgp_packet(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
141 byte
*buf
= alloca(128+len
); /* 128 is enough for MRT headers */
142 byte
*bp
= buf
+ MRTDUMP_HDR_LENGTH
;
143 int as4
= conn
->bgp
->as4_session
;
145 bp
= mrt_put_bgp4_hdr(bp
, conn
, as4
);
146 memcpy(bp
, pkt
, len
);
148 mrt_dump_message(&conn
->bgp
->p
, BGP4MP
, as4
? BGP4MP_MESSAGE_AS4
: BGP4MP_MESSAGE
,
153 convert_state(uint state
)
155 /* Convert state from our BS_* values to values used in MRTDump */
156 return (state
== BS_CLOSE
) ? 1 : state
+ 1;
160 mrt_dump_bgp_state_change(struct bgp_conn
*conn
, uint old
, uint
new)
163 byte
*bp
= buf
+ MRTDUMP_HDR_LENGTH
;
165 bp
= mrt_put_bgp4_hdr(bp
, conn
, 1);
166 put_u16(bp
+0, convert_state(old
));
167 put_u16(bp
+2, convert_state(new));
169 mrt_dump_message(&conn
->bgp
->p
, BGP4MP
, BGP4MP_STATE_CHANGE_AS4
, buf
, bp
-buf
);
173 bgp_create_notification(struct bgp_conn
*conn
, byte
*buf
)
175 struct bgp_proto
*p
= conn
->bgp
;
177 BGP_TRACE(D_PACKETS
, "Sending NOTIFICATION(code=%d.%d)", conn
->notify_code
, conn
->notify_subcode
);
178 buf
[0] = conn
->notify_code
;
179 buf
[1] = conn
->notify_subcode
;
180 memcpy(buf
+2, conn
->notify_data
, conn
->notify_size
);
181 return buf
+ 2 + conn
->notify_size
;
185 /* Capability negotiation as per RFC 5492 */
187 #define WALK_AF_CAPS(caps,ac) \
188 for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++)
190 const struct bgp_af_caps
*
191 bgp_find_af_caps(struct bgp_caps
*caps
, u32 afi
)
193 struct bgp_af_caps
*ac
;
195 WALK_AF_CAPS(caps
, ac
)
202 static struct bgp_af_caps
*
203 bgp_get_af_caps(struct bgp_caps
*caps
, u32 afi
)
205 struct bgp_af_caps
*ac
;
207 WALK_AF_CAPS(caps
, ac
)
211 ac
= &caps
->af_data
[caps
->af_count
++];
212 memset(ac
, 0, sizeof(struct bgp_af_caps
));
219 bgp_af_caps_cmp(const void *X
, const void *Y
)
221 const struct bgp_af_caps
*x
= X
, *y
= Y
;
222 return (x
->afi
< y
->afi
) ? -1 : (x
->afi
> y
->afi
) ? 1 : 0;
227 bgp_write_capabilities(struct bgp_conn
*conn
, byte
*buf
)
229 struct bgp_proto
*p
= conn
->bgp
;
230 struct bgp_channel
*c
;
231 struct bgp_caps
*caps
;
232 struct bgp_af_caps
*ac
;
233 uint any_add_path
= 0;
236 /* Prepare bgp_caps structure */
238 int n
= list_length(&p
->p
.channels
);
239 caps
= mb_allocz(p
->p
.pool
, sizeof(struct bgp_caps
) + n
* sizeof(struct bgp_af_caps
));
240 conn
->local_caps
= caps
;
242 caps
->as4_support
= p
->cf
->enable_as4
;
243 caps
->ext_messages
= p
->cf
->enable_extended_messages
;
244 caps
->route_refresh
= p
->cf
->enable_refresh
;
245 caps
->enhanced_refresh
= p
->cf
->enable_refresh
;
247 if (caps
->as4_support
)
248 caps
->as4_number
= p
->public_as
;
253 caps
->gr_time
= p
->cf
->gr_time
;
254 caps
->gr_flags
= p
->p
.gr_recovery
? BGP_GRF_RESTART
: 0;
257 /* Allocate and fill per-AF fields */
258 WALK_LIST(c
, p
->p
.channels
)
260 ac
= &caps
->af_data
[caps
->af_count
++];
264 ac
->add_path
= c
->cf
->add_path
;
265 any_add_path
|= ac
->add_path
;
271 if (p
->p
.gr_recovery
)
272 ac
->gr_af_flags
|= BGP_GRF_FORWARDING
;
276 /* Sort capability fields by AFI/SAFI */
277 qsort(caps
->af_data
, caps
->af_count
, sizeof(struct bgp_af_caps
), bgp_af_caps_cmp
);
280 /* Create capability list in buffer */
282 WALK_AF_CAPS(caps
, ac
)
285 *buf
++ = 1; /* Capability 1: Multiprotocol extensions */
286 *buf
++ = 4; /* Capability data length */
287 put_af4(buf
, ac
->afi
);
291 if (caps
->route_refresh
)
293 *buf
++ = 2; /* Capability 2: Support for route refresh */
294 *buf
++ = 0; /* Capability data length */
297 if (caps
->ext_messages
)
299 *buf
++ = 6; /* Capability 6: Support for extended messages */
300 *buf
++ = 0; /* Capability data length */
305 *buf
++ = 64; /* Capability 64: Support for graceful restart */
306 *buf
++ = 0; /* Capability data length, will be fixed later */
309 put_u16(buf
, caps
->gr_time
);
310 buf
[0] |= caps
->gr_flags
;
313 WALK_AF_CAPS(caps
, ac
)
316 put_af3(buf
, ac
->afi
);
317 buf
[3] = ac
->gr_af_flags
;
321 data
[-1] = buf
- data
;
324 if (caps
->as4_support
)
326 *buf
++ = 65; /* Capability 65: Support for 4-octet AS number */
327 *buf
++ = 4; /* Capability data length */
328 put_u32(buf
, p
->public_as
);
334 *buf
++ = 69; /* Capability 69: Support for ADD-PATH */
335 *buf
++ = 0; /* Capability data length, will be fixed later */
338 WALK_AF_CAPS(caps
, ac
)
341 put_af3(buf
, ac
->afi
);
342 buf
[3] = ac
->add_path
;
346 data
[-1] = buf
- data
;
349 if (caps
->enhanced_refresh
)
351 *buf
++ = 70; /* Capability 70: Support for enhanced route refresh */
352 *buf
++ = 0; /* Capability data length */
355 /* FIXME: Should not XXXX 255 */
361 bgp_read_capabilities(struct bgp_conn
*conn
, struct bgp_caps
*caps
, byte
*pos
, int len
)
363 struct bgp_proto
*p
= conn
->bgp
;
364 struct bgp_af_caps
*ac
;
370 if (len
< 2 || len
< (2 + pos
[1]))
373 /* Capability length */
376 /* Capability type */
379 case 1: /* Multiprotocol capability, RFC 4760 */
384 ac
= bgp_get_af_caps(caps
, af
);
388 case 2: /* Route refresh capability, RFC 2918 */
392 caps
->route_refresh
= 1;
395 case 6: /* Extended message length capability, RFC draft */
399 caps
->ext_messages
= 1;
402 case 64: /* Graceful restart capability, RFC 4724 */
406 /* Only the last instance is valid */
407 WALK_AF_CAPS(caps
, ac
)
414 caps
->gr_flags
= pos
[2] & 0xf0;
415 caps
->gr_time
= get_u16(pos
+ 2) & 0x0fff;
417 for (i
= 2; i
< cl
; i
+= 4)
419 af
= get_af3(pos
+2+i
);
420 ac
= bgp_get_af_caps(caps
, af
);
422 ac
->gr_af_flags
= pos
[2+i
+3];
426 case 65: /* AS4 capability, RFC 4893 */
430 caps
->as4_support
= 1;
431 caps
->as4_number
= get_u32(pos
+ 2);
434 case 69: /* ADD-PATH capability, RFC 7911 */
438 for (i
= 0; i
< cl
; i
+= 4)
440 byte val
= pos
[2+i
+3];
441 if (!val
|| (val
> BGP_ADD_PATH_FULL
))
443 log(L_WARN
"%s: Got ADD-PATH capability with unknown value %u, ignoring",
449 for (i
= 0; i
< cl
; i
+= 4)
451 af
= get_af3(pos
+2+i
);
452 ac
= bgp_get_af_caps(caps
, af
);
453 ac
->add_path
= pos
[2+i
+3];
457 case 70: /* Enhanced route refresh capability, RFC 7313 */
461 caps
->enhanced_refresh
= 1;
464 /* We can safely ignore all other capabilities */
467 ADVANCE(pos
, len
, 2 + cl
);
472 bgp_error(conn
, 2, 0, NULL
, 0);
477 bgp_read_options(struct bgp_conn
*conn
, byte
*pos
, int len
)
479 struct bgp_proto
*p
= conn
->bgp
;
480 struct bgp_caps
*caps
;
483 /* Max number of announced AFIs is limited by max option length (255) */
484 caps
= alloca(sizeof(struct bgp_caps
) + 64 * sizeof(struct bgp_af_caps
));
485 memset(caps
, 0, sizeof(struct bgp_caps
));
489 if ((len
< 2) || (len
< (2 + pos
[1])))
490 { bgp_error(conn
, 2, 0, NULL
, 0); return -1; }
495 /* BGP capabilities, RFC 5492 */
496 if (p
->cf
->capabilities
)
497 bgp_read_capabilities(conn
, caps
, pos
+ 2, ol
);
502 bgp_error(conn
, 2, 4, pos
, ol
); /* FIXME: ol or ol+2 ? */
506 ADVANCE(pos
, len
, 2 + ol
);
509 uint n
= sizeof(struct bgp_caps
) + caps
->af_count
* sizeof(struct bgp_af_caps
);
510 conn
->remote_caps
= mb_allocz(p
->p
.pool
, n
);
511 memcpy(conn
->remote_caps
, caps
, n
);
517 bgp_create_open(struct bgp_conn
*conn
, byte
*buf
)
519 struct bgp_proto
*p
= conn
->bgp
;
521 BGP_TRACE(D_PACKETS
, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
522 BGP_VERSION
, p
->public_as
, p
->cf
->hold_time
, p
->local_id
);
524 buf
[0] = BGP_VERSION
;
525 put_u16(buf
+1, (p
->public_as
< 0xFFFF) ? p
->public_as
: AS_TRANS
);
526 put_u16(buf
+3, p
->cf
->hold_time
);
527 put_u32(buf
+5, p
->local_id
);
529 if (p
->cf
->capabilities
)
531 /* Prepare local_caps and write capabilities to buffer */
532 byte
*end
= bgp_write_capabilities(conn
, buf
+12);
533 uint len
= end
- (buf
+12);
535 buf
[9] = len
+ 2; /* Optional parameters length */
536 buf
[10] = 2; /* Option 2: Capability list */
537 buf
[11] = len
; /* Option data length */
543 /* Prepare empty local_caps */
544 conn
->local_caps
= mb_allocz(p
->p
.pool
, sizeof(struct bgp_caps
));
546 buf
[9] = 0; /* No optional parameters */
554 bgp_rx_open(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
556 struct bgp_proto
*p
= conn
->bgp
;
557 struct bgp_conn
*other
;
561 if (conn
->state
!= BS_OPENSENT
)
562 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
564 /* Check message contents */
565 if (len
< 29 || len
!= 29 + (uint
) pkt
[28])
566 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
568 if (pkt
[19] != BGP_VERSION
)
569 { u16 val
= BGP_VERSION
; bgp_error(conn
, 2, 1, (byte
*) &val
, 2); return; }
571 asn
= get_u16(pkt
+20);
572 hold
= get_u16(pkt
+22);
573 id
= get_u32(pkt
+24);
574 BGP_TRACE(D_PACKETS
, "Got OPEN(as=%d,hold=%d,id=%R)", asn
, hold
, id
);
576 if (bgp_read_options(conn
, pkt
+29, pkt
[28]) < 0)
579 if (hold
> 0 && hold
< 3)
580 { bgp_error(conn
, 2, 6, pkt
+22, 2); return; }
582 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
583 if (!id
|| (p
->is_internal
&& id
== p
->local_id
))
584 { bgp_error(conn
, 2, 3, pkt
+24, -4); return; }
586 struct bgp_caps
*caps
= conn
->remote_caps
;
588 if (caps
->as4_support
)
590 u32 as4
= caps
->as4_number
;
592 if ((as4
!= asn
) && (asn
!= AS_TRANS
))
593 log(L_WARN
"%s: Peer advertised inconsistent AS numbers", p
->p
.name
);
595 if (as4
!= p
->remote_as
)
596 { as4
= htonl(as4
); bgp_error(conn
, 2, 2, (byte
*) &as4
, 4); return; }
600 if (asn
!= p
->remote_as
)
601 { bgp_error(conn
, 2, 2, pkt
+20, 2); return; }
604 /* Check the other connection */
605 other
= (conn
== &p
->outgoing_conn
) ? &p
->incoming_conn
: &p
->outgoing_conn
;
606 switch (other
->state
)
610 /* Stop outgoing connection attempts */
611 bgp_conn_enter_idle_state(other
);
621 * Description of collision detection rules in RFC 4271 is confusing and
622 * contradictory, but it is essentially:
624 * 1. Router with higher ID is dominant
625 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
626 * 3. When both connections are in OpenConfirm state, one initiated by
627 * the dominant router is kept.
629 * The first line in the expression below evaluates whether the neighbor
630 * is dominant, the second line whether the new connection was initiated
631 * by the neighbor. If both are true (or both are false), we keep the new
632 * connection, otherwise we keep the old one.
634 if (((p
->local_id
< id
) || ((p
->local_id
== id
) && (p
->public_as
< p
->remote_as
)))
635 == (conn
== &p
->incoming_conn
))
637 /* Should close the other connection */
638 BGP_TRACE(D_EVENTS
, "Connection collision, giving up the other connection");
639 bgp_error(other
, 6, 7, NULL
, 0);
644 /* Should close this connection */
645 BGP_TRACE(D_EVENTS
, "Connection collision, giving up this connection");
646 bgp_error(conn
, 6, 7, NULL
, 0);
650 bug("bgp_rx_open: Unknown state");
653 /* Update our local variables */
654 conn
->hold_time
= MIN(hold
, p
->cf
->hold_time
);
655 conn
->keepalive_time
= p
->cf
->keepalive_time
? : conn
->hold_time
/ 3;
656 conn
->as4_session
= conn
->local_caps
->as4_support
&& caps
->as4_support
;
657 conn
->ext_messages
= conn
->local_caps
->ext_messages
&& caps
->ext_messages
;
660 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
661 conn
->hold_time
, conn
->keepalive_time
, p
->remote_as
, p
->remote_id
, conn
->as4_session
);
663 bgp_schedule_packet(conn
, NULL
, PKT_KEEPALIVE
);
664 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
665 bgp_conn_enter_openconfirm_state(conn
);
673 #define REPORT(msg, args...) \
674 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
676 #define WITHDRAW(msg, args...) \
677 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
679 #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
680 #define NO_NEXT_HOP "Missing NEXT_HOP attribute"
684 bgp_apply_next_hop(struct bgp_parse_state
*s
, rta
*a
, ip_addr gw
, ip_addr ll
)
686 struct bgp_proto
*p
= s
->proto
;
687 struct bgp_channel
*c
= s
->channel
;
689 if (c
->cf
->gw_mode
== GW_DIRECT
)
691 neighbor
*nbr
= NULL
;
693 /* GW_DIRECT -> single_hop -> p->neigh != NULL */
695 nbr
= neigh_find2(&p
->p
, &gw
, NULL
, 0);
696 else if (ipa_nonzero(ll
))
697 nbr
= neigh_find2(&p
->p
, &ll
, p
->neigh
->iface
, 0);
699 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
700 WITHDRAW(BAD_NEXT_HOP
);
702 a
->dest
= RTD_UNICAST
;
703 a
->nh
.gw
= nbr
->addr
;
704 a
->nh
.iface
= nbr
->iface
;
709 else /* GW_RECURSIVE */
712 WITHDRAW(BAD_NEXT_HOP
);
714 rta_set_recursive_next_hop(c
->c
.table
, a
, c
->igp_table
, gw
, ll
);
719 bgp_use_next_hop(struct bgp_export_state
*s
, eattr
*a
)
721 struct bgp_proto
*p
= s
->proto
;
722 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
724 if (s
->channel
->cf
->next_hop_self
)
727 if (s
->channel
->cf
->next_hop_keep
)
730 /* Keep it when explicitly set in export filter */
731 if (a
->type
& EAF_FRESH
)
734 /* Keep it when exported to internal peers */
735 if (p
->is_interior
&& ipa_nonzero(*nh
))
738 /* Keep it when forwarded between single-hop BGPs on the same iface */
739 struct iface
*ifa
= (s
->src
&& s
->src
->neigh
) ? s
->src
->neigh
->iface
: NULL
;
740 return p
->neigh
&& (p
->neigh
->iface
== ifa
);
744 bgp_use_gateway(struct bgp_export_state
*s
)
746 struct bgp_proto
*p
= s
->proto
;
747 rta
*ra
= s
->route
->attrs
;
749 if (s
->channel
->cf
->next_hop_self
)
752 /* We need valid global gateway */
753 if ((ra
->dest
!= RTD_UNICAST
) || (ra
->nh
.next
) || ipa_zero(ra
->nh
.gw
) || ipa_is_link_local(ra
->nh
.gw
))
756 /* Use it when exported to internal peers */
760 /* Use it when forwarded to single-hop BGP peer on on the same iface */
761 return p
->neigh
&& (p
->neigh
->iface
== ra
->nh
.iface
);
765 bgp_update_next_hop_ip(struct bgp_export_state
*s
, eattr
*a
, ea_list
**to
)
767 if (!a
|| !bgp_use_next_hop(s
, a
))
769 if (bgp_use_gateway(s
))
771 ip_addr nh
[1] = { s
->route
->attrs
->nh
.gw
};
772 bgp_set_attr_data(to
, s
->pool
, BA_NEXT_HOP
, 0, nh
, 16);
776 ip_addr nh
[2] = { s
->channel
->next_hop_addr
, s
->channel
->link_addr
};
777 bgp_set_attr_data(to
, s
->pool
, BA_NEXT_HOP
, 0, nh
, ipa_nonzero(nh
[1]) ? 32 : 16);
781 /* Check if next hop is valid */
782 a
= bgp_find_attr(*to
, BA_NEXT_HOP
);
784 WITHDRAW(NO_NEXT_HOP
);
786 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
787 ip_addr peer
= s
->proto
->cf
->remote_ip
;
788 uint len
= a
->u
.ptr
->length
;
790 if (ipa_zero(nh
[0]) && ((len
!= 32) || ipa_zero(nh
[1])))
791 WITHDRAW(BAD_NEXT_HOP
);
793 if (ipa_equal(peer
, nh
[0]) || ((len
== 32) && ipa_equal(peer
, nh
[1])))
794 WITHDRAW(BAD_NEXT_HOP
);
803 bgp_rte_update(struct bgp_parse_state
*s
, net_addr
*n
, u32 path_id
, rta
*a0
)
805 if (path_id
!= s
->last_id
)
807 s
->last_src
= rt_get_source(&s
->proto
->p
, path_id
);
808 s
->last_id
= path_id
;
810 rta_free(s
->cached_rta
);
811 s
->cached_rta
= NULL
;
817 rte_update2(&s
->channel
->c
, n
, NULL
, s
->last_src
);
821 /* Prepare cached route attributes */
822 if (s
->cached_rta
== NULL
)
824 a0
->src
= s
->last_src
;
826 /* Workaround for rta_lookup() breaking eattrs */
827 ea_list
*ea
= a0
->eattrs
;
828 s
->cached_rta
= rta_lookup(a0
);
832 rta
*a
= rta_clone(s
->cached_rta
);
833 rte
*e
= rte_get_temp(a
);
836 e
->u
.bgp
.suppressed
= 0;
837 rte_update2(&s
->channel
->c
, n
, e
, s
->last_src
);
843 bgp_encode_nlri_ip4(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
847 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= (5 + sizeof(ip4_addr
))))
849 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
850 struct net_addr_ip4
*net
= (void *) px
->net
;
855 put_u32(pos
, px
->path_id
);
856 ADVANCE(pos
, size
, 4);
859 ip4_addr a
= ip4_hton(net
->prefix
);
860 uint b
= (net
->pxlen
+ 7) / 8;
862 /* Encode prefix length */
864 ADVANCE(pos
, size
, 1);
866 /* Encode prefix body */
868 ADVANCE(pos
, size
, b
);
870 bgp_free_prefix(s
->channel
, px
);
877 bgp_decode_nlri_ip4(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
888 bgp_parse_error(s
, 1);
890 path_id
= get_u32(pos
);
891 ADVANCE(pos
, len
, 4);
894 /* Decode prefix length */
896 uint b
= (l
+ 7) / 8;
897 ADVANCE(pos
, len
, 1);
899 if (l
> IP4_MAX_PREFIX_LENGTH
)
900 bgp_parse_error(s
, 10);
903 bgp_parse_error(s
, 1);
905 /* Decode prefix body */
906 ip4_addr addr
= IP4_NONE
;
907 memcpy(&addr
, pos
, b
);
908 ADVANCE(pos
, len
, b
);
910 net
= NET_ADDR_IP4(ip4_ntoh(addr
), l
);
911 net_normalize_ip4(&net
);
913 // XXXX validate prefix
915 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
920 bgp_encode_next_hop_ip4(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size UNUSED
)
922 /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
924 ASSERT(a
->u
.ptr
->length
== sizeof(ip_addr
));
926 put_ip4(buf
, ipa_to_ip4( *(ip_addr
*) a
->u
.ptr
->data
));
932 bgp_decode_next_hop_ip4(struct bgp_parse_state
*s
, byte
*data
, uint len
, rta
*a
)
935 bgp_parse_error(s
, 9);
937 ip_addr nh
= ipa_from_ip4(get_ip4(data
));
939 // XXXX validate next hop
941 bgp_set_attr_data(&(a
->eattrs
), s
->pool
, BA_NEXT_HOP
, 0, &nh
, sizeof(nh
));
942 bgp_apply_next_hop(s
, a
, nh
, IPA_NONE
);
947 bgp_encode_nlri_ip6(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
951 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= (5 + sizeof(ip6_addr
))))
953 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
954 struct net_addr_ip6
*net
= (void *) px
->net
;
959 put_u32(pos
, px
->path_id
);
960 ADVANCE(pos
, size
, 4);
963 ip6_addr a
= ip6_hton(net
->prefix
);
964 uint b
= (net
->pxlen
+ 7) / 8;
966 /* Encode prefix length */
968 ADVANCE(pos
, size
, 1);
970 /* Encode prefix body */
972 ADVANCE(pos
, size
, b
);
974 bgp_free_prefix(s
->channel
, px
);
981 bgp_decode_nlri_ip6(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
992 bgp_parse_error(s
, 1);
994 path_id
= get_u32(pos
);
995 ADVANCE(pos
, len
, 4);
998 /* Decode prefix length */
1000 uint b
= (l
+ 7) / 8;
1001 ADVANCE(pos
, len
, 1);
1003 if (l
> IP6_MAX_PREFIX_LENGTH
)
1004 bgp_parse_error(s
, 10);
1007 bgp_parse_error(s
, 1);
1009 /* Decode prefix body */
1010 ip6_addr addr
= IP6_NONE
;
1011 memcpy(&addr
, pos
, b
);
1012 ADVANCE(pos
, len
, b
);
1014 net
= NET_ADDR_IP6(ip6_ntoh(addr
), l
);
1015 net_normalize_ip6(&net
);
1017 // XXXX validate prefix
1019 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1024 bgp_encode_next_hop_ip6(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size UNUSED
)
1026 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
1027 uint len
= a
->u
.ptr
->length
;
1029 ASSERT((len
== 16) || (len
== 32));
1031 put_ip6(buf
, ipa_to_ip6(nh
[0]));
1034 put_ip6(buf
+16, ipa_to_ip6(nh
[1]));
1040 bgp_decode_next_hop_ip6(struct bgp_parse_state
*s
, byte
*data
, uint len
, rta
*a
)
1042 struct adata
*ad
= lp_alloc_adata(s
->pool
, 32);
1043 ip_addr
*nh
= (void *) ad
->data
;
1045 if ((len
!= 16) && (len
!= 32))
1046 bgp_parse_error(s
, 9);
1048 nh
[0] = ipa_from_ip6(get_ip6(data
));
1049 nh
[1] = (len
== 32) ? ipa_from_ip6(get_ip6(data
+16)) : IPA_NONE
;
1051 if (ip6_is_link_local(nh
[0]))
1057 if (!ip6_is_link_local(nh
[1]))
1060 if (ipa_zero(nh
[1]))
1063 // XXXX validate next hop
1065 bgp_set_attr_ptr(&(a
->eattrs
), s
->pool
, BA_NEXT_HOP
, 0, ad
);
1066 bgp_apply_next_hop(s
, a
, nh
[0], nh
[1]);
1070 static const struct bgp_af_desc bgp_af_table
[] = {
1075 .encode_nlri
= bgp_encode_nlri_ip4
,
1076 .decode_nlri
= bgp_decode_nlri_ip4
,
1077 .encode_next_hop
= bgp_encode_next_hop_ip4
,
1078 .decode_next_hop
= bgp_decode_next_hop_ip4
,
1079 .update_next_hop
= bgp_update_next_hop_ip
,
1082 .afi
= BGP_AF_IPV4_MC
,
1085 .encode_nlri
= bgp_encode_nlri_ip4
,
1086 .decode_nlri
= bgp_decode_nlri_ip4
,
1087 .encode_next_hop
= bgp_encode_next_hop_ip4
,
1088 .decode_next_hop
= bgp_decode_next_hop_ip4
,
1089 .update_next_hop
= bgp_update_next_hop_ip
,
1095 .encode_nlri
= bgp_encode_nlri_ip6
,
1096 .decode_nlri
= bgp_decode_nlri_ip6
,
1097 .encode_next_hop
= bgp_encode_next_hop_ip6
,
1098 .decode_next_hop
= bgp_decode_next_hop_ip6
,
1099 .update_next_hop
= bgp_update_next_hop_ip
,
1102 .afi
= BGP_AF_IPV6_MC
,
1105 .encode_nlri
= bgp_encode_nlri_ip6
,
1106 .decode_nlri
= bgp_decode_nlri_ip6
,
1107 .encode_next_hop
= bgp_encode_next_hop_ip6
,
1108 .decode_next_hop
= bgp_decode_next_hop_ip6
,
1109 .update_next_hop
= bgp_update_next_hop_ip
,
1113 const struct bgp_af_desc
*
1114 bgp_get_af_desc(u32 afi
)
1117 for (i
= 0; i
< ARRAY_SIZE(bgp_af_table
); i
++)
1118 if (bgp_af_table
[i
].afi
== afi
)
1119 return &bgp_af_table
[i
];
1125 bgp_encode_nlri(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
1127 return s
->channel
->desc
->encode_nlri(s
, buck
, buf
, end
- buf
);
1131 bgp_encode_next_hop(struct bgp_write_state
*s
, eattr
*nh
, byte
*buf
)
1133 return s
->channel
->desc
->encode_next_hop(s
, nh
, buf
, 255);
1137 bgp_update_next_hop(struct bgp_export_state
*s
, eattr
*a
, ea_list
**to
)
1139 s
->channel
->desc
->update_next_hop(s
, a
, to
);
1142 #define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
1145 bgp_create_ip_reach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
1148 * 2 B Withdrawn Routes Length (zero)
1149 * --- IPv4 Withdrawn Routes NLRI (unused)
1150 * 2 B Total Path Attribute Length
1151 * var Path Attributes
1152 * var IPv4 Network Layer Reachability Information
1157 la
= bgp_encode_attrs(s
, buck
->eattrs
, buf
+4, buf
+ MAX_ATTRS_LENGTH
);
1160 /* Attribute list too long */
1161 bgp_withdraw_bucket(s
->channel
, buck
);
1168 lr
= bgp_encode_nlri(s
, buck
, buf
+4+la
, end
);
1174 bgp_create_mp_reach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
1177 * 2 B IPv4 Withdrawn Routes Length (zero)
1178 * --- IPv4 Withdrawn Routes NLRI (unused)
1179 * 2 B Total Path Attribute Length
1180 * 1 B MP_REACH_NLRI hdr - Attribute Flags
1181 * 1 B MP_REACH_NLRI hdr - Attribute Type Code
1182 * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
1183 * 2 B MP_REACH_NLRI data - Address Family Identifier
1184 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
1185 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
1186 * var MP_REACH_NLRI data - Network Address of Next Hop
1187 * 1 B MP_REACH_NLRI data - Reserved (zero)
1188 * var MP_REACH_NLRI data - Network Layer Reachability Information
1189 * var Rest of Path Attributes
1190 * --- IPv4 Network Layer Reachability Information (unused)
1193 int lh
, lr
, la
; /* Lengths of next hop, NLRI and attributes */
1195 /* Begin of MP_REACH_NLRI atribute */
1196 buf
[4] = BAF_OPTIONAL
| BAF_EXT_LEN
;
1197 buf
[5] = BA_MP_REACH_NLRI
;
1198 put_u16(buf
+6, 0); /* Will be fixed later */
1199 put_af3(buf
+8, s
->channel
->afi
);
1202 /* Encode attributes to temporary buffer */
1203 byte
*abuf
= alloca(MAX_ATTRS_LENGTH
);
1204 la
= bgp_encode_attrs(s
, buck
->eattrs
, abuf
, abuf
+ MAX_ATTRS_LENGTH
);
1207 /* Attribute list too long */
1208 bgp_withdraw_bucket(s
->channel
, buck
);
1212 /* Encode the next hop */
1213 lh
= bgp_encode_next_hop(s
, s
->mp_next_hop
, pos
+1);
1217 /* Reserved field */
1220 /* Encode the NLRI */
1221 lr
= bgp_encode_nlri(s
, buck
, pos
, end
- la
);
1224 /* End of MP_REACH_NLRI atribute, update data length */
1225 put_u16(buf
+6, pos
-buf
-8);
1227 /* Copy remaining attributes */
1228 memcpy(pos
, abuf
, la
);
1231 /* Initial UPDATE fields */
1233 put_u16(buf
+2, pos
-buf
-4);
1238 #undef MAX_ATTRS_LENGTH
1241 bgp_create_ip_unreach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
1244 * 2 B Withdrawn Routes Length
1245 * var IPv4 Withdrawn Routes NLRI
1246 * 2 B Total Path Attribute Length (zero)
1247 * --- Path Attributes (unused)
1248 * --- IPv4 Network Layer Reachability Information (unused)
1251 uint len
= bgp_encode_nlri(s
, buck
, buf
+2, end
);
1253 put_u16(buf
+0, len
);
1254 put_u16(buf
+2+len
, 0);
1260 bgp_create_mp_unreach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
1263 * 2 B Withdrawn Routes Length (zero)
1264 * --- IPv4 Withdrawn Routes NLRI (unused)
1265 * 2 B Total Path Attribute Length
1266 * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
1267 * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
1268 * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
1269 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
1270 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
1271 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
1272 * --- IPv4 Network Layer Reachability Information (unused)
1275 uint len
= bgp_encode_nlri(s
, buck
, buf
+11, end
);
1278 put_u16(buf
+2, 7+len
);
1280 /* Begin of MP_UNREACH_NLRI atribute */
1281 buf
[4] = BAF_OPTIONAL
| BAF_EXT_LEN
;
1282 buf
[5] = BA_MP_UNREACH_NLRI
;
1283 put_u16(buf
+6, 3+len
);
1284 put_af3(buf
+8, s
->channel
->afi
);
1290 bgp_create_update(struct bgp_channel
*c
, byte
*buf
)
1292 struct bgp_proto
*p
= (void *) c
->c
.proto
;
1293 struct bgp_bucket
*buck
;
1294 byte
*end
= buf
+ (bgp_max_packet_length(p
->conn
) - BGP_HEADER_LENGTH
);
1297 /* Initialize write state */
1298 struct bgp_write_state s
= {
1301 .pool
= bgp_linpool
,
1302 .as4_session
= p
->as4_session
,
1303 .add_path
= c
->add_path_tx
,
1308 /* Try unreachable bucket */
1309 if ((buck
= c
->withdraw_bucket
) && !EMPTY_LIST(buck
->prefixes
))
1311 res
= (c
->afi
== BGP_AF_IPV4
) ?
1312 bgp_create_ip_unreach(&s
, buck
, buf
, end
):
1313 bgp_create_mp_unreach(&s
, buck
, buf
, end
);
1318 /* Try reachable buckets */
1319 if (!EMPTY_LIST(c
->bucket_queue
))
1321 buck
= HEAD(c
->bucket_queue
);
1323 /* Cleanup empty buckets */
1324 if (EMPTY_LIST(buck
->prefixes
))
1326 bgp_free_bucket(c
, buck
);
1330 res
= (c
->afi
== BGP_AF_IPV4
) ?
1331 bgp_create_ip_reach(&s
, buck
, buf
, end
):
1332 bgp_create_mp_reach(&s
, buck
, buf
, end
);
1334 if (EMPTY_LIST(buck
->prefixes
))
1335 bgp_free_bucket(c
, buck
);
1337 bgp_defer_bucket(c
, buck
);
1345 /* No more prefixes to send */
1349 BGP_TRACE_RL(&rl_snd_update
, D_PACKETS
, "Sending UPDATE");
1356 bgp_create_ip_end_mark(struct bgp_channel
*c UNUSED
, byte
*buf
)
1358 /* Empty update packet */
1365 bgp_create_mp_end_mark(struct bgp_channel
*c
, byte
*buf
)
1368 put_u16(buf
+2, 6); /* length 4--9 */
1370 /* Empty MP_UNREACH_NLRI atribute */
1371 buf
[4] = BAF_OPTIONAL
;
1372 buf
[5] = BA_MP_UNREACH_NLRI
;
1373 buf
[6] = 3; /* Length 7--9 */
1374 put_af3(buf
+7, c
->afi
);
1380 bgp_create_end_mark(struct bgp_channel
*c
, byte
*buf
)
1382 struct bgp_proto
*p
= (void *) c
->c
.proto
;
1384 BGP_TRACE(D_PACKETS
, "Sending END-OF-RIB");
1386 return (c
->afi
== BGP_AF_IPV4
) ?
1387 bgp_create_ip_end_mark(c
, buf
):
1388 bgp_create_mp_end_mark(c
, buf
);
1392 bgp_rx_end_mark(struct bgp_proto
*p
, u32 afi
)
1394 struct bgp_channel
*c
= bgp_get_channel(p
, afi
);
1396 BGP_TRACE(D_PACKETS
, "Got END-OF-RIB");
1398 /* XXXX handle unknown AF in MP_*_NLRI */
1402 if (c
->load_state
== BFS_LOADING
)
1403 c
->load_state
= BFS_NONE
;
1405 if (p
->p
.gr_recovery
)
1406 channel_graceful_restart_unlock(&c
->c
);
1409 bgp_graceful_restart_done(c
);
1413 bgp_decode_nlri(struct bgp_parse_state
*s
, u32 afi
, byte
*nlri
, uint len
, ea_list
*ea
, byte
*nh
, uint nh_len
)
1415 struct bgp_channel
*c
= bgp_get_channel(s
->proto
, afi
);
1418 /* XXXX handle unknown AF in MP_*_NLRI */
1423 s
->add_path
= c
->add_path_rx
;
1426 s
->last_src
= s
->proto
->p
.main_source
;
1429 * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
1430 * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
1431 * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
1432 * decode_next_hop hooks) by restoring a->eattrs afterwards.
1437 a
= alloca(sizeof(struct rta
));
1438 memset(a
, 0, sizeof(struct rta
));
1440 a
->source
= RTS_BGP
;
1441 a
->scope
= SCOPE_UNIVERSE
;
1442 a
->cast
= RTC_UNICAST
;
1443 a
->dest
= RTD_UNREACHABLE
;
1444 a
->from
= s
->proto
->cf
->remote_ip
;
1447 c
->desc
->decode_next_hop(s
, nh
, nh_len
, a
);
1449 /* Handle withdraw during next hop decoding */
1450 if (s
->err_withdraw
)
1454 c
->desc
->decode_nlri(s
, nlri
, len
, a
);
1456 rta_free(s
->cached_rta
);
1457 s
->cached_rta
= NULL
;
1461 bgp_rx_update(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
1463 struct bgp_proto
*p
= conn
->bgp
;
1466 BGP_TRACE_RL(&rl_rcv_update
, D_PACKETS
, "Got UPDATE");
1468 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1469 if (conn
->state
== BS_OPENCONFIRM
)
1470 bgp_conn_enter_established_state(conn
);
1472 if (conn
->state
!= BS_ESTABLISHED
)
1473 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
1475 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
1477 /* Initialize parse state */
1478 struct bgp_parse_state s
= {
1480 .pool
= bgp_linpool
,
1481 .as4_session
= p
->as4_session
,
1484 /* Parse error handler */
1485 if (setjmp(s
.err_jmpbuf
))
1487 bgp_error(conn
, 3, s
.err_subcode
, NULL
, 0);
1491 /* Check minimal length */
1493 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
1495 /* Skip fixed header */
1499 * UPDATE message format
1501 * 2 B IPv4 Withdrawn Routes Length
1502 * var IPv4 Withdrawn Routes NLRI
1503 * 2 B Total Path Attribute Length
1504 * var Path Attributes
1505 * var IPv4 Reachable Routes NLRI
1508 s
.ip_unreach_len
= get_u16(pkt
+ pos
);
1509 s
.ip_unreach_nlri
= pkt
+ pos
+ 2;
1510 pos
+= 2 + s
.ip_unreach_len
;
1513 bgp_parse_error(&s
, 1);
1515 s
.attr_len
= get_u16(pkt
+ pos
);
1516 s
.attrs
= pkt
+ pos
+ 2;
1517 pos
+= 2 + s
.attr_len
;
1520 bgp_parse_error(&s
, 1);
1522 s
.ip_reach_len
= len
- pos
;
1523 s
.ip_reach_nlri
= pkt
+ pos
;
1527 ea
= bgp_decode_attrs(&s
, s
.attrs
, s
.attr_len
);
1529 /* Check for End-of-RIB marker */
1530 if (!s
.attr_len
&& !s
.ip_unreach_len
&& !s
.ip_reach_len
)
1531 { bgp_rx_end_mark(p
, BGP_AF_IPV4
); goto done
; }
1533 /* Check for MP End-of-RIB marker */
1534 if ((s
.attr_len
< 8) && !s
.ip_unreach_len
&& !s
.ip_reach_len
&&
1535 !s
.mp_reach_len
&& !s
.mp_unreach_len
&& s
.mp_unreach_af
) /* XXXX See RFC 7606 5.2 */
1536 { bgp_rx_end_mark(p
, s
.mp_unreach_af
); goto done
; }
1538 if (s
.ip_unreach_len
)
1539 bgp_decode_nlri(&s
, BGP_AF_IPV4
, s
.ip_unreach_nlri
, s
.ip_unreach_len
, NULL
, NULL
, 0);
1541 if (s
.mp_unreach_len
)
1542 bgp_decode_nlri(&s
, s
.mp_unreach_af
, s
.mp_unreach_nlri
, s
.mp_unreach_len
, NULL
, NULL
, 0);
1545 bgp_decode_nlri(&s
, BGP_AF_IPV4
, s
.ip_reach_nlri
, s
.ip_reach_len
,
1546 ea
, s
.ip_next_hop_data
, s
.ip_next_hop_len
);
1549 bgp_decode_nlri(&s
, s
.mp_reach_af
, s
.mp_reach_nlri
, s
.mp_reach_len
,
1550 ea
, s
.mp_next_hop_data
, s
.mp_next_hop_len
);
1553 rta_free(s
.cached_rta
);
1563 static inline byte
*
1564 bgp_create_route_refresh(struct bgp_channel
*c
, byte
*buf
)
1566 struct bgp_proto
*p
= (void *) c
->c
.proto
;
1568 BGP_TRACE(D_PACKETS
, "Sending ROUTE-REFRESH");
1570 /* Original route refresh request, RFC 2918 */
1571 put_af4(buf
, c
->afi
);
1572 buf
[2] = BGP_RR_REQUEST
;
1577 static inline byte
*
1578 bgp_create_begin_refresh(struct bgp_channel
*c
, byte
*buf
)
1580 struct bgp_proto
*p
= (void *) c
->c
.proto
;
1582 BGP_TRACE(D_PACKETS
, "Sending BEGIN-OF-RR");
1584 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
1585 put_af4(buf
, c
->afi
);
1586 buf
[2] = BGP_RR_BEGIN
;
1591 static inline byte
*
1592 bgp_create_end_refresh(struct bgp_channel
*c
, byte
*buf
)
1594 struct bgp_proto
*p
= (void *) c
->c
.proto
;
1596 BGP_TRACE(D_PACKETS
, "Sending END-OF-RR");
1598 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
1599 put_af4(buf
, c
->afi
);
1600 buf
[2] = BGP_RR_END
;
1606 bgp_rx_route_refresh(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
1608 struct bgp_proto
*p
= conn
->bgp
;
1610 if (conn
->state
!= BS_ESTABLISHED
)
1611 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
1613 if (!conn
->local_caps
->route_refresh
)
1614 { bgp_error(conn
, 1, 3, pkt
+18, 1); return; }
1616 if (len
< (BGP_HEADER_LENGTH
+ 4))
1617 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
1619 if (len
> (BGP_HEADER_LENGTH
+ 4))
1620 { bgp_error(conn
, 7, 1, pkt
, MIN(len
, 2048)); return; }
1622 struct bgp_channel
*c
= bgp_get_channel(p
, get_af4(pkt
+19));
1625 log(L_WARN
"%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
1626 p
->p
.name
, pkt
[21], get_u16(pkt
+19), pkt
[22]);
1630 /* RFC 7313 redefined reserved field as RR message subtype */
1631 uint subtype
= p
->enhanced_refresh
? pkt
[21] : BGP_RR_REQUEST
;
1635 case BGP_RR_REQUEST
:
1636 BGP_TRACE(D_PACKETS
, "Got ROUTE-REFRESH");
1637 channel_request_feeding(&c
->c
);
1641 BGP_TRACE(D_PACKETS
, "Got BEGIN-OF-RR");
1642 bgp_refresh_begin(c
);
1646 BGP_TRACE(D_PACKETS
, "Got END-OF-RR");
1651 log(L_WARN
"%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
1652 p
->p
.name
, subtype
);
1657 static inline struct bgp_channel
*
1658 bgp_get_channel_to_send(struct bgp_proto
*p
, struct bgp_conn
*conn
)
1660 uint i
= conn
->last_channel
;
1662 /* Try the last channel, but at most several times */
1663 if ((conn
->channels_to_send
& (1 << i
)) &&
1664 (conn
->last_channel_count
< 16))
1667 /* Find channel with non-zero channels_to_send */
1671 if (i
>= p
->channel_count
)
1674 while (! (conn
->channels_to_send
& (1 << i
)));
1676 /* Use that channel */
1677 conn
->last_channel
= i
;
1678 conn
->last_channel_count
= 0;
1681 conn
->last_channel_count
++;
1682 return p
->channel_map
[i
];
1686 bgp_send(struct bgp_conn
*conn
, uint type
, uint len
)
1688 sock
*sk
= conn
->sk
;
1689 byte
*buf
= sk
->tbuf
;
1691 memset(buf
, 0xff, 16); /* Marker */
1692 put_u16(buf
+16, len
);
1695 return sk_send(sk
, len
);
1699 * bgp_fire_tx - transmit packets
1702 * Whenever the transmit buffers of the underlying TCP connection
1703 * are free and we have any packets queued for sending, the socket functions
1704 * call bgp_fire_tx() which takes care of selecting the highest priority packet
1705 * queued (Notification > Keepalive > Open > Update), assembling its header
1706 * and body and sending it to the connection.
1709 bgp_fire_tx(struct bgp_conn
*conn
)
1711 struct bgp_proto
*p
= conn
->bgp
;
1712 struct bgp_channel
*c
;
1713 byte
*buf
, *pkt
, *end
;
1719 buf
= conn
->sk
->tbuf
;
1720 pkt
= buf
+ BGP_HEADER_LENGTH
;
1721 s
= conn
->packets_to_send
;
1723 if (s
& (1 << PKT_SCHEDULE_CLOSE
))
1725 /* We can finally close connection and enter idle state */
1726 bgp_conn_enter_idle_state(conn
);
1729 if (s
& (1 << PKT_NOTIFICATION
))
1731 conn
->packets_to_send
= 1 << PKT_SCHEDULE_CLOSE
;
1732 end
= bgp_create_notification(conn
, pkt
);
1733 return bgp_send(conn
, PKT_NOTIFICATION
, end
- buf
);
1735 else if (s
& (1 << PKT_KEEPALIVE
))
1737 conn
->packets_to_send
&= ~(1 << PKT_KEEPALIVE
);
1738 BGP_TRACE(D_PACKETS
, "Sending KEEPALIVE");
1739 bgp_start_timer(conn
->keepalive_timer
, conn
->keepalive_time
);
1740 return bgp_send(conn
, PKT_KEEPALIVE
, BGP_HEADER_LENGTH
);
1742 else if (s
& (1 << PKT_OPEN
))
1744 conn
->packets_to_send
&= ~(1 << PKT_OPEN
);
1745 end
= bgp_create_open(conn
, pkt
);
1746 return bgp_send(conn
, PKT_OPEN
, end
- buf
);
1748 else while (conn
->channels_to_send
)
1750 c
= bgp_get_channel_to_send(p
, conn
);
1751 s
= c
->packets_to_send
;
1753 if (s
& (1 << PKT_ROUTE_REFRESH
))
1755 c
->packets_to_send
&= ~(1 << PKT_ROUTE_REFRESH
);
1756 end
= bgp_create_route_refresh(c
, pkt
);
1757 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
1759 else if (s
& (1 << PKT_BEGIN_REFRESH
))
1761 /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
1762 c
->packets_to_send
&= ~(1 << PKT_BEGIN_REFRESH
);
1763 end
= bgp_create_begin_refresh(c
, pkt
);
1764 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
1766 else if (s
& (1 << PKT_UPDATE
))
1768 end
= bgp_create_update(c
, pkt
);
1770 return bgp_send(conn
, PKT_UPDATE
, end
- buf
);
1772 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
1773 c
->packets_to_send
= 0;
1774 conn
->channels_to_send
&= ~(1 << c
->index
);
1776 if (c
->feed_state
== BFS_LOADED
)
1778 c
->feed_state
= BFS_NONE
;
1779 end
= bgp_create_end_mark(c
, pkt
);
1780 return bgp_send(conn
, PKT_UPDATE
, end
- buf
);
1783 else if (c
->feed_state
== BFS_REFRESHED
)
1785 c
->feed_state
= BFS_NONE
;
1786 end
= bgp_create_end_refresh(c
, pkt
);
1787 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
1791 bug("Channel packets_to_send: %x", s
);
1793 c
->packets_to_send
= 0;
1794 conn
->channels_to_send
&= ~(1 << c
->index
);
1801 * bgp_schedule_packet - schedule a packet for transmission
1804 * @type: packet type
1806 * Schedule a packet of type @type to be sent as soon as possible.
1809 bgp_schedule_packet(struct bgp_conn
*conn
, struct bgp_channel
*c
, int type
)
1813 DBG("BGP: Scheduling packet type %d\n", type
);
1817 if (! conn
->channels_to_send
)
1819 conn
->last_channel
= c
->index
;
1820 conn
->last_channel_count
= 0;
1823 c
->packets_to_send
|= 1 << type
;
1824 conn
->channels_to_send
|= 1 << c
->index
;
1827 conn
->packets_to_send
|= 1 << type
;
1829 if ((conn
->sk
->tpos
== conn
->sk
->tbuf
) && !ev_active(conn
->tx_ev
))
1830 ev_schedule(conn
->tx_ev
);
1834 bgp_kick_tx(void *vconn
)
1836 struct bgp_conn
*conn
= vconn
;
1838 DBG("BGP: kicking TX\n");
1839 while (bgp_fire_tx(conn
) > 0)
1846 struct bgp_conn
*conn
= sk
->data
;
1848 DBG("BGP: TX hook\n");
1849 while (bgp_fire_tx(conn
) > 0)
1857 } bgp_msg_table
[] = {
1858 { 1, 0, "Invalid message header" },
1859 { 1, 1, "Connection not synchronized" },
1860 { 1, 2, "Bad message length" },
1861 { 1, 3, "Bad message type" },
1862 { 2, 0, "Invalid OPEN message" },
1863 { 2, 1, "Unsupported version number" },
1864 { 2, 2, "Bad peer AS" },
1865 { 2, 3, "Bad BGP identifier" },
1866 { 2, 4, "Unsupported optional parameter" },
1867 { 2, 5, "Authentication failure" },
1868 { 2, 6, "Unacceptable hold time" },
1869 { 2, 7, "Required capability missing" }, /* [RFC5492] */
1870 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1871 { 3, 0, "Invalid UPDATE message" },
1872 { 3, 1, "Malformed attribute list" },
1873 { 3, 2, "Unrecognized well-known attribute" },
1874 { 3, 3, "Missing mandatory attribute" },
1875 { 3, 4, "Invalid attribute flags" },
1876 { 3, 5, "Invalid attribute length" },
1877 { 3, 6, "Invalid ORIGIN attribute" },
1878 { 3, 7, "AS routing loop" }, /* Deprecated */
1879 { 3, 8, "Invalid NEXT_HOP attribute" },
1880 { 3, 9, "Optional attribute error" },
1881 { 3, 10, "Invalid network field" },
1882 { 3, 11, "Malformed AS_PATH" },
1883 { 4, 0, "Hold timer expired" },
1884 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1885 { 5, 1, "Unexpected message in OpenSent state" },
1886 { 5, 2, "Unexpected message in OpenConfirm state" },
1887 { 5, 3, "Unexpected message in Established state" },
1888 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1889 { 6, 1, "Maximum number of prefixes reached" },
1890 { 6, 2, "Administrative shutdown" },
1891 { 6, 3, "Peer de-configured" },
1892 { 6, 4, "Administrative reset" },
1893 { 6, 5, "Connection rejected" },
1894 { 6, 6, "Other configuration change" },
1895 { 6, 7, "Connection collision resolution" },
1896 { 6, 8, "Out of Resources" },
1897 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
1898 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
1902 * bgp_error_dsc - return BGP error description
1903 * @code: BGP error code
1904 * @subcode: BGP error subcode
1906 * bgp_error_dsc() returns error description for BGP errors
1907 * which might be static string or given temporary buffer.
1910 bgp_error_dsc(uint code
, uint subcode
)
1912 static char buff
[32];
1915 for (i
=0; i
< ARRAY_SIZE(bgp_msg_table
); i
++)
1916 if (bgp_msg_table
[i
].major
== code
&& bgp_msg_table
[i
].minor
== subcode
)
1917 return bgp_msg_table
[i
].msg
;
1919 bsprintf(buff
, "Unknown error %u.%u", code
, subcode
);
1924 bgp_log_error(struct bgp_proto
*p
, u8
class, char *msg
, uint code
, uint subcode
, byte
*data
, uint len
)
1927 byte
*t
, argbuf
[36];
1930 /* Don't report Cease messages generated by myself */
1931 if (code
== 6 && class == BE_BGP_TX
)
1934 name
= bgp_error_dsc(code
, subcode
);
1941 if ((code
== 2) && (subcode
== 2) && ((len
== 2) || (len
== 4)))
1943 /* Bad peer AS - we would like to print the AS */
1944 t
+= bsprintf(t
, "%u", (len
== 2) ? get_u16(data
) : get_u32(data
));
1949 for (i
=0; i
<len
; i
++)
1950 t
+= bsprintf(t
, "%02x", data
[i
]);
1954 log(L_REMOTE
"%s: %s: %s%s", p
->p
.name
, msg
, name
, argbuf
);
1958 bgp_rx_notification(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
1960 struct bgp_proto
*p
= conn
->bgp
;
1963 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
1965 uint code
= pkt
[19];
1966 uint subcode
= pkt
[20];
1967 int err
= (code
!= 6);
1969 bgp_log_error(p
, BE_BGP_RX
, "Received", code
, subcode
, pkt
+21, len
-21);
1970 bgp_store_error(p
, conn
, BE_BGP_RX
, (code
<< 16) | subcode
);
1972 bgp_conn_enter_close_state(conn
);
1973 bgp_schedule_packet(conn
, NULL
, PKT_SCHEDULE_CLOSE
);
1977 bgp_update_startup_delay(p
);
1983 bgp_rx_keepalive(struct bgp_conn
*conn
)
1985 struct bgp_proto
*p
= conn
->bgp
;
1987 BGP_TRACE(D_PACKETS
, "Got KEEPALIVE");
1988 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
1990 if (conn
->state
== BS_OPENCONFIRM
)
1991 { bgp_conn_enter_established_state(conn
); return; }
1993 if (conn
->state
!= BS_ESTABLISHED
)
1994 bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0);
1999 * bgp_rx_packet - handle a received packet
2000 * @conn: BGP connection
2001 * @pkt: start of the packet
2004 * bgp_rx_packet() takes a newly received packet and calls the corresponding
2005 * packet handler according to the packet type.
2008 bgp_rx_packet(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
2010 byte type
= pkt
[18];
2012 DBG("BGP: Got packet %02x (%d bytes)\n", type
, len
);
2014 if (conn
->bgp
->p
.mrtdump
& MD_MESSAGES
)
2015 mrt_dump_bgp_packet(conn
, pkt
, len
);
2019 case PKT_OPEN
: return bgp_rx_open(conn
, pkt
, len
);
2020 case PKT_UPDATE
: return bgp_rx_update(conn
, pkt
, len
);
2021 case PKT_NOTIFICATION
: return bgp_rx_notification(conn
, pkt
, len
);
2022 case PKT_KEEPALIVE
: return bgp_rx_keepalive(conn
);
2023 case PKT_ROUTE_REFRESH
: return bgp_rx_route_refresh(conn
, pkt
, len
);
2024 default: bgp_error(conn
, 1, 3, pkt
+18, 1);
2029 * bgp_rx - handle received data
2031 * @size: amount of data received
2033 * bgp_rx() is called by the socket layer whenever new data arrive from
2034 * the underlying TCP connection. It assembles the data fragments to packets,
2035 * checks their headers and framing and passes complete packets to
2039 bgp_rx(sock
*sk
, uint size
)
2041 struct bgp_conn
*conn
= sk
->data
;
2042 byte
*pkt_start
= sk
->rbuf
;
2043 byte
*end
= pkt_start
+ size
;
2046 DBG("BGP: RX hook: Got %d bytes\n", size
);
2047 while (end
>= pkt_start
+ BGP_HEADER_LENGTH
)
2049 if ((conn
->state
== BS_CLOSE
) || (conn
->sk
!= sk
))
2052 if (pkt_start
[i
] != 0xff)
2054 bgp_error(conn
, 1, 1, NULL
, 0);
2057 len
= get_u16(pkt_start
+16);
2058 if ((len
< BGP_HEADER_LENGTH
) || (len
> bgp_max_packet_length(conn
)))
2060 bgp_error(conn
, 1, 2, pkt_start
+16, 2);
2063 if (end
< pkt_start
+ len
)
2065 bgp_rx_packet(conn
, pkt_start
, len
);
2068 if (pkt_start
!= sk
->rbuf
)
2070 memmove(sk
->rbuf
, pkt_start
, end
- pkt_start
);
2071 sk
->rpos
= sk
->rbuf
+ (end
- pkt_start
);