]>
git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
2 * BIRD -- BGP Packet Processing
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
8 * Can be freely distributed and used under the terms of the GNU GPL.
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "nest/mrtdump.h"
21 #include "conf/conf.h"
22 #include "lib/unaligned.h"
23 #include "lib/flowspec.h"
24 #include "lib/socket.h"
31 #define BGP_RR_REQUEST 0
32 #define BGP_RR_BEGIN 1
35 #define BGP_NLRI_MAX (4 + 1 + 32)
37 #define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */
38 #define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */
39 #define BGP_MPLS_NULL 3 /* Implicit NULL label */
40 #define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */
43 static struct tbf rl_rcv_update
= TBF_DEFAULT_LOG_LIMITS
;
44 static struct tbf rl_snd_update
= TBF_DEFAULT_LOG_LIMITS
;
46 /* Table for state -> RFC 6608 FSM error subcodes */
47 static byte fsm_err_subcode
[BS_MAX
] = {
54 static struct bgp_channel
*
55 bgp_get_channel(struct bgp_proto
*p
, u32 afi
)
59 for (i
= 0; i
< p
->channel_count
; i
++)
60 if (p
->afi_map
[i
] == afi
)
61 return p
->channel_map
[i
];
67 put_af3(byte
*buf
, u32 id
)
69 put_u16(buf
, id
>> 16);
74 put_af4(byte
*buf
, u32 id
)
76 put_u16(buf
, id
>> 16);
84 return (get_u16(buf
) << 16) | buf
[2];
90 return (get_u16(buf
) << 16) | buf
[3];
94 * MRT Dump format is not semantically specified.
95 * We will use these values in appropriate fields:
97 * Local AS, Remote AS - configured AS numbers for given BGP instance.
98 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
100 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
101 * changes) and MESSAGE (for received BGP messages).
103 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
104 * only when AS4 session is established and even in that case MESSAGE
105 * does not use AS4 variant for initial OPEN message. This strange
106 * behavior is here for compatibility with Quagga and Bgpdump,
110 mrt_put_bgp4_hdr(byte
*buf
, struct bgp_conn
*conn
, int as4
)
112 struct bgp_proto
*p
= conn
->bgp
;
113 uint v4
= ipa_is_ip4(p
->cf
->remote_ip
);
117 put_u32(buf
+0, p
->remote_as
);
118 put_u32(buf
+4, p
->public_as
);
123 put_u16(buf
+0, (p
->remote_as
<= 0xFFFF) ? p
->remote_as
: AS_TRANS
);
124 put_u16(buf
+2, (p
->public_as
<= 0xFFFF) ? p
->public_as
: AS_TRANS
);
128 put_u16(buf
+0, (p
->neigh
&& p
->neigh
->iface
) ? p
->neigh
->iface
->index
: 0);
129 put_u16(buf
+2, v4
? BGP_AFI_IPV4
: BGP_AFI_IPV6
);
134 buf
= put_ip4(buf
, conn
->sk
? ipa_to_ip4(conn
->sk
->daddr
) : IP4_NONE
);
135 buf
= put_ip4(buf
, conn
->sk
? ipa_to_ip4(conn
->sk
->saddr
) : IP4_NONE
);
139 buf
= put_ip6(buf
, conn
->sk
? ipa_to_ip6(conn
->sk
->daddr
) : IP6_NONE
);
140 buf
= put_ip6(buf
, conn
->sk
? ipa_to_ip6(conn
->sk
->saddr
) : IP6_NONE
);
147 mrt_dump_bgp_packet(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
149 byte
*buf
= alloca(128+len
); /* 128 is enough for MRT headers */
150 byte
*bp
= buf
+ MRTDUMP_HDR_LENGTH
;
151 int as4
= conn
->bgp
->as4_session
;
153 bp
= mrt_put_bgp4_hdr(bp
, conn
, as4
);
154 memcpy(bp
, pkt
, len
);
156 mrt_dump_message(&conn
->bgp
->p
, BGP4MP
, as4
? BGP4MP_MESSAGE_AS4
: BGP4MP_MESSAGE
,
161 convert_state(uint state
)
163 /* Convert state from our BS_* values to values used in MRTDump */
164 return (state
== BS_CLOSE
) ? 1 : state
+ 1;
168 mrt_dump_bgp_state_change(struct bgp_conn
*conn
, uint old
, uint
new)
171 byte
*bp
= buf
+ MRTDUMP_HDR_LENGTH
;
173 bp
= mrt_put_bgp4_hdr(bp
, conn
, 1);
174 put_u16(bp
+0, convert_state(old
));
175 put_u16(bp
+2, convert_state(new));
177 mrt_dump_message(&conn
->bgp
->p
, BGP4MP
, BGP4MP_STATE_CHANGE_AS4
, buf
, bp
-buf
);
181 bgp_create_notification(struct bgp_conn
*conn
, byte
*buf
)
183 struct bgp_proto
*p
= conn
->bgp
;
185 BGP_TRACE(D_PACKETS
, "Sending NOTIFICATION(code=%d.%d)", conn
->notify_code
, conn
->notify_subcode
);
186 buf
[0] = conn
->notify_code
;
187 buf
[1] = conn
->notify_subcode
;
188 memcpy(buf
+2, conn
->notify_data
, conn
->notify_size
);
189 return buf
+ 2 + conn
->notify_size
;
193 /* Capability negotiation as per RFC 5492 */
195 const struct bgp_af_caps
*
196 bgp_find_af_caps(struct bgp_caps
*caps
, u32 afi
)
198 struct bgp_af_caps
*ac
;
200 WALK_AF_CAPS(caps
, ac
)
207 static struct bgp_af_caps
*
208 bgp_get_af_caps(struct bgp_caps
*caps
, u32 afi
)
210 struct bgp_af_caps
*ac
;
212 WALK_AF_CAPS(caps
, ac
)
216 ac
= &caps
->af_data
[caps
->af_count
++];
217 memset(ac
, 0, sizeof(struct bgp_af_caps
));
224 bgp_af_caps_cmp(const void *X
, const void *Y
)
226 const struct bgp_af_caps
*x
= X
, *y
= Y
;
227 return (x
->afi
< y
->afi
) ? -1 : (x
->afi
> y
->afi
) ? 1 : 0;
232 bgp_write_capabilities(struct bgp_conn
*conn
, byte
*buf
)
234 struct bgp_proto
*p
= conn
->bgp
;
235 struct bgp_channel
*c
;
236 struct bgp_caps
*caps
;
237 struct bgp_af_caps
*ac
;
238 uint any_ext_next_hop
= 0;
239 uint any_add_path
= 0;
242 /* Prepare bgp_caps structure */
244 int n
= list_length(&p
->p
.channels
);
245 caps
= mb_allocz(p
->p
.pool
, sizeof(struct bgp_caps
) + n
* sizeof(struct bgp_af_caps
));
246 conn
->local_caps
= caps
;
248 caps
->as4_support
= p
->cf
->enable_as4
;
249 caps
->ext_messages
= p
->cf
->enable_extended_messages
;
250 caps
->route_refresh
= p
->cf
->enable_refresh
;
251 caps
->enhanced_refresh
= p
->cf
->enable_refresh
;
253 if (caps
->as4_support
)
254 caps
->as4_number
= p
->public_as
;
259 caps
->gr_time
= p
->cf
->gr_time
;
260 caps
->gr_flags
= p
->p
.gr_recovery
? BGP_GRF_RESTART
: 0;
263 /* Allocate and fill per-AF fields */
264 WALK_LIST(c
, p
->p
.channels
)
266 ac
= &caps
->af_data
[caps
->af_count
++];
270 ac
->ext_next_hop
= bgp_channel_is_ipv4(c
) && c
->cf
->ext_next_hop
;
271 any_ext_next_hop
|= ac
->ext_next_hop
;
273 ac
->add_path
= c
->cf
->add_path
;
274 any_add_path
|= ac
->add_path
;
280 if (p
->p
.gr_recovery
)
281 ac
->gr_af_flags
|= BGP_GRF_FORWARDING
;
285 /* Sort capability fields by AFI/SAFI */
286 qsort(caps
->af_data
, caps
->af_count
, sizeof(struct bgp_af_caps
), bgp_af_caps_cmp
);
289 /* Create capability list in buffer */
292 * Note that max length is ~ 20+14*af_count. With max 12 channels that is
293 * 188. Option limit is 253 and buffer size is 4096, so we cannot overflow
294 * unless we add new capabilities or more AFs.
297 WALK_AF_CAPS(caps
, ac
)
300 *buf
++ = 1; /* Capability 1: Multiprotocol extensions */
301 *buf
++ = 4; /* Capability data length */
302 put_af4(buf
, ac
->afi
);
306 if (caps
->route_refresh
)
308 *buf
++ = 2; /* Capability 2: Support for route refresh */
309 *buf
++ = 0; /* Capability data length */
312 if (any_ext_next_hop
)
314 *buf
++ = 5; /* Capability 5: Support for extended next hop */
315 *buf
++ = 0; /* Capability data length, will be fixed later */
318 WALK_AF_CAPS(caps
, ac
)
319 if (ac
->ext_next_hop
)
321 put_af4(buf
, ac
->afi
);
322 put_u16(buf
+4, BGP_AFI_IPV6
);
326 data
[-1] = buf
- data
;
329 if (caps
->ext_messages
)
331 *buf
++ = 6; /* Capability 6: Support for extended messages */
332 *buf
++ = 0; /* Capability data length */
337 *buf
++ = 64; /* Capability 64: Support for graceful restart */
338 *buf
++ = 0; /* Capability data length, will be fixed later */
341 put_u16(buf
, caps
->gr_time
);
342 buf
[0] |= caps
->gr_flags
;
345 WALK_AF_CAPS(caps
, ac
)
348 put_af3(buf
, ac
->afi
);
349 buf
[3] = ac
->gr_af_flags
;
353 data
[-1] = buf
- data
;
356 if (caps
->as4_support
)
358 *buf
++ = 65; /* Capability 65: Support for 4-octet AS number */
359 *buf
++ = 4; /* Capability data length */
360 put_u32(buf
, p
->public_as
);
366 *buf
++ = 69; /* Capability 69: Support for ADD-PATH */
367 *buf
++ = 0; /* Capability data length, will be fixed later */
370 WALK_AF_CAPS(caps
, ac
)
373 put_af3(buf
, ac
->afi
);
374 buf
[3] = ac
->add_path
;
378 data
[-1] = buf
- data
;
381 if (caps
->enhanced_refresh
)
383 *buf
++ = 70; /* Capability 70: Support for enhanced route refresh */
384 *buf
++ = 0; /* Capability data length */
391 bgp_read_capabilities(struct bgp_conn
*conn
, struct bgp_caps
*caps
, byte
*pos
, int len
)
393 struct bgp_proto
*p
= conn
->bgp
;
394 struct bgp_af_caps
*ac
;
400 if (len
< 2 || len
< (2 + pos
[1]))
403 /* Capability length */
406 /* Capability type */
409 case 1: /* Multiprotocol capability, RFC 4760 */
414 ac
= bgp_get_af_caps(caps
, af
);
418 case 2: /* Route refresh capability, RFC 2918 */
422 caps
->route_refresh
= 1;
425 case 5: /* Extended next hop encoding capability, RFC 5549 */
429 for (i
= 0; i
< cl
; i
+= 6)
431 /* Specified only for IPv4 prefixes with IPv6 next hops */
432 if ((get_u16(pos
+2+i
+0) != BGP_AFI_IPV4
) ||
433 (get_u16(pos
+2+i
+4) != BGP_AFI_IPV6
))
436 af
= get_af4(pos
+2+i
);
437 ac
= bgp_get_af_caps(caps
, af
);
438 ac
->ext_next_hop
= 1;
442 case 6: /* Extended message length capability, RFC draft */
446 caps
->ext_messages
= 1;
449 case 64: /* Graceful restart capability, RFC 4724 */
453 /* Only the last instance is valid */
454 WALK_AF_CAPS(caps
, ac
)
461 caps
->gr_flags
= pos
[2] & 0xf0;
462 caps
->gr_time
= get_u16(pos
+ 2) & 0x0fff;
464 for (i
= 2; i
< cl
; i
+= 4)
466 af
= get_af3(pos
+2+i
);
467 ac
= bgp_get_af_caps(caps
, af
);
469 ac
->gr_af_flags
= pos
[2+i
+3];
473 case 65: /* AS4 capability, RFC 6793 */
477 caps
->as4_support
= 1;
478 caps
->as4_number
= get_u32(pos
+ 2);
481 case 69: /* ADD-PATH capability, RFC 7911 */
485 for (i
= 0; i
< cl
; i
+= 4)
487 byte val
= pos
[2+i
+3];
488 if (!val
|| (val
> BGP_ADD_PATH_FULL
))
490 log(L_WARN
"%s: Got ADD-PATH capability with unknown value %u, ignoring",
496 for (i
= 0; i
< cl
; i
+= 4)
498 af
= get_af3(pos
+2+i
);
499 ac
= bgp_get_af_caps(caps
, af
);
500 ac
->add_path
= pos
[2+i
+3];
504 case 70: /* Enhanced route refresh capability, RFC 7313 */
508 caps
->enhanced_refresh
= 1;
511 /* We can safely ignore all other capabilities */
514 ADVANCE(pos
, len
, 2 + cl
);
519 bgp_error(conn
, 2, 0, NULL
, 0);
524 bgp_read_options(struct bgp_conn
*conn
, byte
*pos
, int len
)
526 struct bgp_proto
*p
= conn
->bgp
;
527 struct bgp_caps
*caps
;
530 /* Max number of announced AFIs is limited by max option length (255) */
531 caps
= alloca(sizeof(struct bgp_caps
) + 64 * sizeof(struct bgp_af_caps
));
532 memset(caps
, 0, sizeof(struct bgp_caps
));
536 if ((len
< 2) || (len
< (2 + pos
[1])))
537 { bgp_error(conn
, 2, 0, NULL
, 0); return -1; }
542 /* BGP capabilities, RFC 5492 */
543 if (p
->cf
->capabilities
)
544 bgp_read_capabilities(conn
, caps
, pos
+ 2, ol
);
549 bgp_error(conn
, 2, 4, pos
, ol
); /* FIXME: ol or ol+2 ? */
553 ADVANCE(pos
, len
, 2 + ol
);
556 uint n
= sizeof(struct bgp_caps
) + caps
->af_count
* sizeof(struct bgp_af_caps
);
557 conn
->remote_caps
= mb_allocz(p
->p
.pool
, n
);
558 memcpy(conn
->remote_caps
, caps
, n
);
564 bgp_create_open(struct bgp_conn
*conn
, byte
*buf
)
566 struct bgp_proto
*p
= conn
->bgp
;
568 BGP_TRACE(D_PACKETS
, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
569 BGP_VERSION
, p
->public_as
, p
->cf
->hold_time
, p
->local_id
);
571 buf
[0] = BGP_VERSION
;
572 put_u16(buf
+1, (p
->public_as
< 0xFFFF) ? p
->public_as
: AS_TRANS
);
573 put_u16(buf
+3, p
->cf
->hold_time
);
574 put_u32(buf
+5, p
->local_id
);
576 if (p
->cf
->capabilities
)
578 /* Prepare local_caps and write capabilities to buffer */
579 byte
*end
= bgp_write_capabilities(conn
, buf
+12);
580 uint len
= end
- (buf
+12);
582 buf
[9] = len
+ 2; /* Optional parameters length */
583 buf
[10] = 2; /* Option 2: Capability list */
584 buf
[11] = len
; /* Option data length */
590 /* Prepare empty local_caps */
591 conn
->local_caps
= mb_allocz(p
->p
.pool
, sizeof(struct bgp_caps
));
593 buf
[9] = 0; /* No optional parameters */
601 bgp_rx_open(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
603 struct bgp_proto
*p
= conn
->bgp
;
604 struct bgp_conn
*other
;
608 if (conn
->state
!= BS_OPENSENT
)
609 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
611 /* Check message contents */
612 if (len
< 29 || len
!= 29 + (uint
) pkt
[28])
613 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
615 if (pkt
[19] != BGP_VERSION
)
616 { u16 val
= BGP_VERSION
; bgp_error(conn
, 2, 1, (byte
*) &val
, 2); return; }
618 asn
= get_u16(pkt
+20);
619 hold
= get_u16(pkt
+22);
620 id
= get_u32(pkt
+24);
621 BGP_TRACE(D_PACKETS
, "Got OPEN(as=%d,hold=%d,id=%R)", asn
, hold
, id
);
623 if (bgp_read_options(conn
, pkt
+29, pkt
[28]) < 0)
626 if (hold
> 0 && hold
< 3)
627 { bgp_error(conn
, 2, 6, pkt
+22, 2); return; }
629 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
630 if (!id
|| (p
->is_internal
&& id
== p
->local_id
))
631 { bgp_error(conn
, 2, 3, pkt
+24, -4); return; }
633 struct bgp_caps
*caps
= conn
->remote_caps
;
635 if (caps
->as4_support
)
637 u32 as4
= caps
->as4_number
;
639 if ((as4
!= asn
) && (asn
!= AS_TRANS
))
640 log(L_WARN
"%s: Peer advertised inconsistent AS numbers", p
->p
.name
);
642 if (as4
!= p
->remote_as
)
643 { as4
= htonl(as4
); bgp_error(conn
, 2, 2, (byte
*) &as4
, 4); return; }
647 if (asn
!= p
->remote_as
)
648 { bgp_error(conn
, 2, 2, pkt
+20, 2); return; }
651 /* Check the other connection */
652 other
= (conn
== &p
->outgoing_conn
) ? &p
->incoming_conn
: &p
->outgoing_conn
;
653 switch (other
->state
)
657 /* Stop outgoing connection attempts */
658 bgp_conn_enter_idle_state(other
);
668 * Description of collision detection rules in RFC 4271 is confusing and
669 * contradictory, but it is essentially:
671 * 1. Router with higher ID is dominant
672 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
673 * 3. When both connections are in OpenConfirm state, one initiated by
674 * the dominant router is kept.
676 * The first line in the expression below evaluates whether the neighbor
677 * is dominant, the second line whether the new connection was initiated
678 * by the neighbor. If both are true (or both are false), we keep the new
679 * connection, otherwise we keep the old one.
681 if (((p
->local_id
< id
) || ((p
->local_id
== id
) && (p
->public_as
< p
->remote_as
)))
682 == (conn
== &p
->incoming_conn
))
684 /* Should close the other connection */
685 BGP_TRACE(D_EVENTS
, "Connection collision, giving up the other connection");
686 bgp_error(other
, 6, 7, NULL
, 0);
691 /* Should close this connection */
692 BGP_TRACE(D_EVENTS
, "Connection collision, giving up this connection");
693 bgp_error(conn
, 6, 7, NULL
, 0);
697 bug("bgp_rx_open: Unknown state");
700 /* Update our local variables */
701 conn
->hold_time
= MIN(hold
, p
->cf
->hold_time
);
702 conn
->keepalive_time
= p
->cf
->keepalive_time
? : conn
->hold_time
/ 3;
703 conn
->as4_session
= conn
->local_caps
->as4_support
&& caps
->as4_support
;
704 conn
->ext_messages
= conn
->local_caps
->ext_messages
&& caps
->ext_messages
;
707 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
708 conn
->hold_time
, conn
->keepalive_time
, p
->remote_as
, p
->remote_id
, conn
->as4_session
);
710 bgp_schedule_packet(conn
, NULL
, PKT_KEEPALIVE
);
711 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
712 bgp_conn_enter_openconfirm_state(conn
);
720 #define REPORT(msg, args...) \
721 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
723 #define DISCARD(msg, args...) \
724 ({ REPORT(msg, ## args); return; })
726 #define WITHDRAW(msg, args...) \
727 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
729 #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
730 #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
731 #define NO_NEXT_HOP "Missing NEXT_HOP attribute"
732 #define NO_LABEL_STACK "Missing MPLS stack"
736 bgp_apply_next_hop(struct bgp_parse_state
*s
, rta
*a
, ip_addr gw
, ip_addr ll
)
738 struct bgp_proto
*p
= s
->proto
;
739 struct bgp_channel
*c
= s
->channel
;
741 if (c
->cf
->gw_mode
== GW_DIRECT
)
743 neighbor
*nbr
= NULL
;
745 /* GW_DIRECT -> single_hop -> p->neigh != NULL */
747 nbr
= neigh_find2(&p
->p
, &gw
, NULL
, 0);
748 else if (ipa_nonzero(ll
))
749 nbr
= neigh_find2(&p
->p
, &ll
, p
->neigh
->iface
, 0);
751 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
752 WITHDRAW(BAD_NEXT_HOP
);
754 a
->dest
= RTD_UNICAST
;
755 a
->nh
.gw
= nbr
->addr
;
756 a
->nh
.iface
= nbr
->iface
;
758 else /* GW_RECURSIVE */
761 WITHDRAW(BAD_NEXT_HOP
);
763 rtable
*tab
= ipa_is_ip4(gw
) ? c
->igp_table_ip4
: c
->igp_table_ip6
;
764 s
->hostentry
= rt_get_hostentry(tab
, gw
, ll
, c
->c
.table
);
767 rta_apply_hostentry(a
, s
->hostentry
, NULL
);
769 /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
774 bgp_apply_mpls_labels(struct bgp_parse_state
*s
, rta
*a
, u32
*labels
, uint lnum
)
776 if (lnum
> MPLS_MAX_LABEL_STACK
)
778 REPORT("Too many MPLS labels ($u)", lnum
);
780 a
->dest
= RTD_UNREACHABLE
;
782 a
->nh
= (struct nexthop
) { };
786 /* Handle implicit NULL as empty MPLS stack */
787 if ((lnum
== 1) && (labels
[0] == BGP_MPLS_NULL
))
790 if (s
->channel
->cf
->gw_mode
== GW_DIRECT
)
793 memcpy(a
->nh
.label
, labels
, 4*lnum
);
795 else /* GW_RECURSIVE */
800 memcpy(ms
.stack
, labels
, 4*lnum
);
801 rta_apply_hostentry(a
, s
->hostentry
, &ms
);
807 bgp_use_next_hop(struct bgp_export_state
*s
, eattr
*a
)
809 struct bgp_proto
*p
= s
->proto
;
810 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
812 if (s
->channel
->cf
->next_hop_self
)
815 if (s
->channel
->cf
->next_hop_keep
)
818 /* Keep it when explicitly set in export filter */
819 if (a
->type
& EAF_FRESH
)
822 /* Keep it when exported to internal peers */
823 if (p
->is_interior
&& ipa_nonzero(*nh
))
826 /* Keep it when forwarded between single-hop BGPs on the same iface */
827 struct iface
*ifa
= (s
->src
&& s
->src
->neigh
) ? s
->src
->neigh
->iface
: NULL
;
828 return p
->neigh
&& (p
->neigh
->iface
== ifa
);
832 bgp_use_gateway(struct bgp_export_state
*s
)
834 struct bgp_proto
*p
= s
->proto
;
835 rta
*ra
= s
->route
->attrs
;
837 if (s
->channel
->cf
->next_hop_self
)
840 /* We need one valid global gateway */
841 if ((ra
->dest
!= RTD_UNICAST
) || ra
->nh
.next
|| ipa_zero(ra
->nh
.gw
) || ipa_is_link_local(ra
->nh
.gw
))
844 /* Use it when exported to internal peers */
848 /* Use it when forwarded to single-hop BGP peer on on the same iface */
849 return p
->neigh
&& (p
->neigh
->iface
== ra
->nh
.iface
);
853 bgp_update_next_hop_ip(struct bgp_export_state
*s
, eattr
*a
, ea_list
**to
)
855 if (!a
|| !bgp_use_next_hop(s
, a
))
857 if (bgp_use_gateway(s
))
859 rta
*ra
= s
->route
->attrs
;
860 ip_addr nh
[1] = { ra
->nh
.gw
};
861 bgp_set_attr_data(to
, s
->pool
, BA_NEXT_HOP
, 0, nh
, 16);
865 u32 implicit_null
= BGP_MPLS_NULL
;
866 u32
*labels
= ra
->nh
.labels
? ra
->nh
.label
: &implicit_null
;
867 uint lnum
= ra
->nh
.labels
? ra
->nh
.labels
: 1;
868 bgp_set_attr_data(to
, s
->pool
, BA_MPLS_LABEL_STACK
, 0, labels
, lnum
* 4);
873 ip_addr nh
[2] = { s
->channel
->next_hop_addr
, s
->channel
->link_addr
};
874 bgp_set_attr_data(to
, s
->pool
, BA_NEXT_HOP
, 0, nh
, ipa_nonzero(nh
[1]) ? 32 : 16);
876 /* TODO: Use local MPLS assigned label */
878 bgp_unset_attr(to
, s
->pool
, BA_MPLS_LABEL_STACK
);
882 /* Check if next hop is valid */
883 a
= bgp_find_attr(*to
, BA_NEXT_HOP
);
885 WITHDRAW(NO_NEXT_HOP
);
887 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
888 ip_addr peer
= s
->proto
->cf
->remote_ip
;
889 uint len
= a
->u
.ptr
->length
;
891 /* Forbid zero next hop */
892 if (ipa_zero(nh
[0]) && ((len
!= 32) || ipa_zero(nh
[1])))
893 WITHDRAW(BAD_NEXT_HOP
);
895 /* Forbid next hop equal to neighbor IP */
896 if (ipa_equal(peer
, nh
[0]) || ((len
== 32) && ipa_equal(peer
, nh
[1])))
897 WITHDRAW(BAD_NEXT_HOP
);
899 /* Forbid next hop with non-matching AF */
900 if ((ipa_is_ip4(nh
[0]) != bgp_channel_is_ipv4(s
->channel
)) &&
901 !s
->channel
->ext_next_hop
)
902 WITHDRAW(BAD_NEXT_HOP
);
904 /* Just check if MPLS stack */
905 if (s
->mpls
&& !bgp_find_attr(*to
, BA_MPLS_LABEL_STACK
))
906 WITHDRAW(NO_LABEL_STACK
);
910 bgp_encode_next_hop_ip(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size UNUSED
)
912 /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
913 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
914 uint len
= a
->u
.ptr
->length
;
916 ASSERT((len
== 16) || (len
== 32));
919 * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
920 * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
921 * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
922 * IPv6 address with IPv6 NLRI.
925 if (bgp_channel_is_ipv4(s
->channel
) && ipa_is_ip4(nh
[0]))
927 put_ip4(buf
, ipa_to_ip4(nh
[0]));
931 put_ip6(buf
, ipa_to_ip6(nh
[0]));
934 put_ip6(buf
+16, ipa_to_ip6(nh
[1]));
940 bgp_decode_next_hop_ip(struct bgp_parse_state
*s
, byte
*data
, uint len
, rta
*a
)
942 struct bgp_channel
*c
= s
->channel
;
943 struct adata
*ad
= lp_alloc_adata(s
->pool
, 32);
944 ip_addr
*nh
= (void *) ad
->data
;
948 nh
[0] = ipa_from_ip4(get_ip4(data
));
953 nh
[0] = ipa_from_ip6(get_ip6(data
));
956 if (ipa_is_link_local(nh
[0]))
957 { nh
[1] = nh
[0]; nh
[0] = IPA_NONE
; }
961 nh
[0] = ipa_from_ip6(get_ip6(data
));
962 nh
[1] = ipa_from_ip6(get_ip6(data
+16));
964 if (ipa_is_ip4(nh
[0]) || !ip6_is_link_local(nh
[1]))
968 bgp_parse_error(s
, 9);
973 if ((bgp_channel_is_ipv4(c
) != ipa_is_ip4(nh
[0])) && !c
->ext_next_hop
)
974 WITHDRAW(BAD_NEXT_HOP
);
976 // XXXX validate next hop
978 bgp_set_attr_ptr(&(a
->eattrs
), s
->pool
, BA_NEXT_HOP
, 0, ad
);
979 bgp_apply_next_hop(s
, a
, nh
[0], nh
[1]);
983 bgp_encode_next_hop_vpn(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size UNUSED
)
985 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
986 uint len
= a
->u
.ptr
->length
;
988 ASSERT((len
== 16) || (len
== 32));
991 * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
992 * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
993 * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
994 * IPv6 address with VPNv6 NLRI.
997 if (bgp_channel_is_ipv4(s
->channel
) && ipa_is_ip4(nh
[0]))
999 put_u64(buf
, 0); /* VPN RD is 0 */
1000 put_ip4(buf
+8, ipa_to_ip4(nh
[0]));
1004 put_u64(buf
, 0); /* VPN RD is 0 */
1005 put_ip6(buf
+8, ipa_to_ip6(nh
[0]));
1010 put_u64(buf
+24, 0); /* VPN RD is 0 */
1011 put_ip6(buf
+32, ipa_to_ip6(nh
[1]));
1017 bgp_decode_next_hop_vpn(struct bgp_parse_state
*s
, byte
*data
, uint len
, rta
*a
)
1019 struct bgp_channel
*c
= s
->channel
;
1020 struct adata
*ad
= lp_alloc_adata(s
->pool
, 32);
1021 ip_addr
*nh
= (void *) ad
->data
;
1025 nh
[0] = ipa_from_ip4(get_ip4(data
+8));
1030 nh
[0] = ipa_from_ip6(get_ip6(data
+8));
1033 if (ipa_is_link_local(nh
[0]))
1034 { nh
[1] = nh
[0]; nh
[0] = IPA_NONE
; }
1038 nh
[0] = ipa_from_ip6(get_ip6(data
+8));
1039 nh
[1] = ipa_from_ip6(get_ip6(data
+32));
1041 if (ipa_is_ip4(nh
[0]) || !ip6_is_link_local(nh
[1]))
1045 bgp_parse_error(s
, 9);
1047 if (ipa_zero(nh
[1]))
1050 /* XXXX which error */
1051 if ((get_u64(data
) != 0) || ((len
== 48) && (get_u64(data
+24) != 0)))
1052 bgp_parse_error(s
, 9);
1054 if ((bgp_channel_is_ipv4(c
) != ipa_is_ip4(nh
[0])) && !c
->ext_next_hop
)
1055 WITHDRAW(BAD_NEXT_HOP
);
1057 // XXXX validate next hop
1059 bgp_set_attr_ptr(&(a
->eattrs
), s
->pool
, BA_NEXT_HOP
, 0, ad
);
1060 bgp_apply_next_hop(s
, a
, nh
[0], nh
[1]);
1066 bgp_encode_next_hop_none(struct bgp_write_state
*s UNUSED
, eattr
*a UNUSED
, byte
*buf UNUSED
, uint size UNUSED
)
1072 bgp_decode_next_hop_none(struct bgp_parse_state
*s UNUSED
, byte
*data UNUSED
, uint len UNUSED
, rta
*a UNUSED
)
1075 * Although we expect no next hop and RFC 7606 7.11 states that attribute
1076 * MP_REACH_NLRI with unexpected next hop length is considered malformed,
1077 * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
1084 bgp_update_next_hop_none(struct bgp_export_state
*s
, eattr
*a
, ea_list
**to
)
1086 /* NEXT_HOP shall not pass */
1088 bgp_unset_attr(to
, s
->pool
, BA_NEXT_HOP
);
1097 bgp_rte_update(struct bgp_parse_state
*s
, net_addr
*n
, u32 path_id
, rta
*a0
)
1099 if (path_id
!= s
->last_id
)
1101 s
->last_src
= rt_get_source(&s
->proto
->p
, path_id
);
1102 s
->last_id
= path_id
;
1104 rta_free(s
->cached_rta
);
1105 s
->cached_rta
= NULL
;
1110 /* Route withdraw */
1111 rte_update2(&s
->channel
->c
, n
, NULL
, s
->last_src
);
1115 /* Prepare cached route attributes */
1116 if (s
->cached_rta
== NULL
)
1118 a0
->src
= s
->last_src
;
1120 /* Workaround for rta_lookup() breaking eattrs */
1121 ea_list
*ea
= a0
->eattrs
;
1122 s
->cached_rta
= rta_lookup(a0
);
1126 rta
*a
= rta_clone(s
->cached_rta
);
1127 rte
*e
= rte_get_temp(a
);
1130 e
->u
.bgp
.suppressed
= 0;
1131 rte_update2(&s
->channel
->c
, n
, e
, s
->last_src
);
1135 bgp_encode_mpls_labels(struct bgp_write_state
*s UNUSED
, adata
*mpls
, byte
**pos
, uint
*size
, byte
*pxlen
)
1138 u32
*labels
= mpls
? (u32
*) mpls
->data
: &dummy
;
1139 uint lnum
= mpls
? (mpls
->length
/ 4) : 1;
1141 for (uint i
= 0; i
< lnum
; i
++)
1143 put_u24(*pos
, labels
[i
] << 4);
1144 ADVANCE(*pos
, *size
, 3);
1147 /* Add bottom-of-stack flag */
1148 (*pos
)[-1] |= BGP_MPLS_BOS
;
1150 *pxlen
+= 24 * lnum
;
1154 bgp_decode_mpls_labels(struct bgp_parse_state
*s
, byte
**pos
, uint
*len
, uint
*pxlen
, rta
*a
)
1156 u32 labels
[BGP_MPLS_MAX
], label
;
1161 bgp_parse_error(s
, 1);
1163 label
= get_u24(*pos
);
1164 labels
[lnum
++] = label
>> 4;
1165 ADVANCE(*pos
, *len
, 3);
1168 /* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */
1169 if (!a
&& !s
->err_withdraw
&& (lnum
== 1) && (label
== BGP_MPLS_MAGIC
))
1172 while (!(label
& BGP_MPLS_BOS
));
1177 /* Attach MPLS attribute unless we already have one */
1178 if (!s
->mpls_labels
)
1180 s
->mpls_labels
= lp_alloc_adata(s
->pool
, 4*BGP_MPLS_MAX
);
1181 bgp_set_attr_ptr(&(a
->eattrs
), s
->pool
, BA_MPLS_LABEL_STACK
, 0, s
->mpls_labels
);
1184 /* Overwrite data in the attribute */
1185 s
->mpls_labels
->length
= 4*lnum
;
1186 memcpy(s
->mpls_labels
->data
, labels
, 4*lnum
);
1188 /* Update next hop entry in rta */
1189 bgp_apply_mpls_labels(s
, a
, labels
, lnum
);
1191 /* Attributes were changed, invalidate cached entry */
1192 rta_free(s
->cached_rta
);
1193 s
->cached_rta
= NULL
;
1199 bgp_encode_nlri_ip4(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1203 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= BGP_NLRI_MAX
))
1205 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1206 struct net_addr_ip4
*net
= (void *) px
->net
;
1208 /* Encode path ID */
1211 put_u32(pos
, px
->path_id
);
1212 ADVANCE(pos
, size
, 4);
1215 /* Encode prefix length */
1217 ADVANCE(pos
, size
, 1);
1219 /* Encode MPLS labels */
1221 bgp_encode_mpls_labels(s
, s
->mpls_labels
, &pos
, &size
, pos
- 1);
1223 /* Encode prefix body */
1224 ip4_addr a
= ip4_hton(net
->prefix
);
1225 uint b
= (net
->pxlen
+ 7) / 8;
1227 ADVANCE(pos
, size
, b
);
1229 bgp_free_prefix(s
->channel
, px
);
1236 bgp_decode_nlri_ip4(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1243 /* Decode path ID */
1247 bgp_parse_error(s
, 1);
1249 path_id
= get_u32(pos
);
1250 ADVANCE(pos
, len
, 4);
1253 /* Decode prefix length */
1255 ADVANCE(pos
, len
, 1);
1257 if (len
< ((l
+ 7) / 8))
1258 bgp_parse_error(s
, 1);
1260 /* Decode MPLS labels */
1262 bgp_decode_mpls_labels(s
, &pos
, &len
, &l
, a
);
1264 if (l
> IP4_MAX_PREFIX_LENGTH
)
1265 bgp_parse_error(s
, 10);
1267 /* Decode prefix body */
1268 ip4_addr addr
= IP4_NONE
;
1269 uint b
= (l
+ 7) / 8;
1270 memcpy(&addr
, pos
, b
);
1271 ADVANCE(pos
, len
, b
);
1273 net
= NET_ADDR_IP4(ip4_ntoh(addr
), l
);
1274 net_normalize_ip4(&net
);
1276 // XXXX validate prefix
1278 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1284 bgp_encode_nlri_ip6(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1288 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= BGP_NLRI_MAX
))
1290 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1291 struct net_addr_ip6
*net
= (void *) px
->net
;
1293 /* Encode path ID */
1296 put_u32(pos
, px
->path_id
);
1297 ADVANCE(pos
, size
, 4);
1300 /* Encode prefix length */
1302 ADVANCE(pos
, size
, 1);
1304 /* Encode MPLS labels */
1306 bgp_encode_mpls_labels(s
, s
->mpls_labels
, &pos
, &size
, pos
- 1);
1308 /* Encode prefix body */
1309 ip6_addr a
= ip6_hton(net
->prefix
);
1310 uint b
= (net
->pxlen
+ 7) / 8;
1312 ADVANCE(pos
, size
, b
);
1314 bgp_free_prefix(s
->channel
, px
);
1321 bgp_decode_nlri_ip6(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1328 /* Decode path ID */
1332 bgp_parse_error(s
, 1);
1334 path_id
= get_u32(pos
);
1335 ADVANCE(pos
, len
, 4);
1338 /* Decode prefix length */
1340 ADVANCE(pos
, len
, 1);
1342 if (len
< ((l
+ 7) / 8))
1343 bgp_parse_error(s
, 1);
1345 /* Decode MPLS labels */
1347 bgp_decode_mpls_labels(s
, &pos
, &len
, &l
, a
);
1349 if (l
> IP6_MAX_PREFIX_LENGTH
)
1350 bgp_parse_error(s
, 10);
1352 /* Decode prefix body */
1353 ip6_addr addr
= IP6_NONE
;
1354 uint b
= (l
+ 7) / 8;
1355 memcpy(&addr
, pos
, b
);
1356 ADVANCE(pos
, len
, b
);
1358 net
= NET_ADDR_IP6(ip6_ntoh(addr
), l
);
1359 net_normalize_ip6(&net
);
1361 // XXXX validate prefix
1363 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1368 bgp_encode_nlri_vpn4(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1372 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= BGP_NLRI_MAX
))
1374 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1375 struct net_addr_vpn4
*net
= (void *) px
->net
;
1377 /* Encode path ID */
1380 put_u32(pos
, px
->path_id
);
1381 ADVANCE(pos
, size
, 4);
1384 /* Encode prefix length */
1385 *pos
= 64 + net
->pxlen
;
1386 ADVANCE(pos
, size
, 1);
1388 /* Encode MPLS labels */
1390 bgp_encode_mpls_labels(s
, s
->mpls_labels
, &pos
, &size
, pos
- 1);
1392 /* Encode route distinguisher */
1393 put_u64(pos
, net
->rd
);
1394 ADVANCE(pos
, size
, 8);
1396 /* Encode prefix body */
1397 ip4_addr a
= ip4_hton(net
->prefix
);
1398 uint b
= (net
->pxlen
+ 7) / 8;
1400 ADVANCE(pos
, size
, b
);
1402 bgp_free_prefix(s
->channel
, px
);
1409 bgp_decode_nlri_vpn4(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1416 /* Decode path ID */
1420 bgp_parse_error(s
, 1);
1422 path_id
= get_u32(pos
);
1423 ADVANCE(pos
, len
, 4);
1426 /* Decode prefix length */
1428 ADVANCE(pos
, len
, 1);
1430 if (len
< ((l
+ 7) / 8))
1431 bgp_parse_error(s
, 1);
1433 /* Decode MPLS labels */
1435 bgp_decode_mpls_labels(s
, &pos
, &len
, &l
, a
);
1437 /* Decode route distinguisher */
1439 bgp_parse_error(s
, 1);
1441 u64 rd
= get_u64(pos
);
1442 ADVANCE(pos
, len
, 8);
1445 if (l
> IP4_MAX_PREFIX_LENGTH
)
1446 bgp_parse_error(s
, 10);
1448 /* Decode prefix body */
1449 ip4_addr addr
= IP4_NONE
;
1450 uint b
= (l
+ 7) / 8;
1451 memcpy(&addr
, pos
, b
);
1452 ADVANCE(pos
, len
, b
);
1454 net
= NET_ADDR_VPN4(ip4_ntoh(addr
), l
, rd
);
1455 net_normalize_vpn4(&net
);
1457 // XXXX validate prefix
1459 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1465 bgp_encode_nlri_vpn6(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1469 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= BGP_NLRI_MAX
))
1471 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1472 struct net_addr_vpn6
*net
= (void *) px
->net
;
1474 /* Encode path ID */
1477 put_u32(pos
, px
->path_id
);
1478 ADVANCE(pos
, size
, 4);
1481 /* Encode prefix length */
1482 *pos
= 64 + net
->pxlen
;
1483 ADVANCE(pos
, size
, 1);
1485 /* Encode MPLS labels */
1486 bgp_encode_mpls_labels(s
, s
->mpls_labels
, &pos
, &size
, pos
- 1);
1488 /* Encode route distinguisher */
1489 put_u64(pos
, net
->rd
);
1490 ADVANCE(pos
, size
, 8);
1492 /* Encode prefix body */
1493 ip6_addr a
= ip6_hton(net
->prefix
);
1494 uint b
= (net
->pxlen
+ 7) / 8;
1496 ADVANCE(pos
, size
, b
);
1498 bgp_free_prefix(s
->channel
, px
);
1505 bgp_decode_nlri_vpn6(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1512 /* Decode path ID */
1516 bgp_parse_error(s
, 1);
1518 path_id
= get_u32(pos
);
1519 ADVANCE(pos
, len
, 4);
1522 /* Decode prefix length */
1524 ADVANCE(pos
, len
, 1);
1526 if (len
< ((l
+ 7) / 8))
1527 bgp_parse_error(s
, 1);
1529 /* Decode MPLS labels */
1531 bgp_decode_mpls_labels(s
, &pos
, &len
, &l
, a
);
1533 /* Decode route distinguisher */
1535 bgp_parse_error(s
, 1);
1537 u64 rd
= get_u64(pos
);
1538 ADVANCE(pos
, len
, 8);
1541 if (l
> IP6_MAX_PREFIX_LENGTH
)
1542 bgp_parse_error(s
, 10);
1544 /* Decode prefix body */
1545 ip6_addr addr
= IP6_NONE
;
1546 uint b
= (l
+ 7) / 8;
1547 memcpy(&addr
, pos
, b
);
1548 ADVANCE(pos
, len
, b
);
1550 net
= NET_ADDR_VPN6(ip6_ntoh(addr
), l
, rd
);
1551 net_normalize_vpn6(&net
);
1553 // XXXX validate prefix
1555 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1561 bgp_encode_nlri_flow4(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1565 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= 4))
1567 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1568 struct net_addr_flow4
*net
= (void *) px
->net
;
1569 uint flen
= net
->length
- sizeof(net_addr_flow4
);
1571 /* Encode path ID */
1574 put_u32(pos
, px
->path_id
);
1575 ADVANCE(pos
, size
, 4);
1581 /* Copy whole flow data including length */
1582 memcpy(pos
, net
->data
, flen
);
1583 ADVANCE(pos
, size
, flen
);
1585 bgp_free_prefix(s
->channel
, px
);
1592 bgp_decode_nlri_flow4(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1598 /* Decode path ID */
1602 bgp_parse_error(s
, 1);
1604 path_id
= get_u32(pos
);
1605 ADVANCE(pos
, len
, 4);
1609 bgp_parse_error(s
, 1);
1611 /* Decode flow length */
1612 uint hlen
= flow_hdr_length(pos
);
1613 uint dlen
= flow_read_length(pos
);
1614 uint flen
= hlen
+ dlen
;
1615 byte
*data
= pos
+ hlen
;
1618 bgp_parse_error(s
, 1);
1620 /* Validate flow data */
1621 enum flow_validated_state r
= flow4_validate(data
, dlen
);
1622 if (r
!= FLOW_ST_VALID
)
1624 log(L_REMOTE
"%s: Invalid flow route: %s", s
->proto
->p
.name
, flow_validated_state_str(r
));
1625 bgp_parse_error(s
, 1);
1628 if (data
[0] != FLOW_TYPE_DST_PREFIX
)
1630 log(L_REMOTE
"%s: No dst prefix at first pos", s
->proto
->p
.name
);
1631 bgp_parse_error(s
, 1);
1634 /* Decode dst prefix */
1635 ip4_addr px
= IP4_NONE
;
1636 uint pxlen
= data
[1];
1638 // FIXME: Use some generic function
1639 memcpy(&px
, data
, BYTES(pxlen
));
1640 px
= ip4_and(px
, ip4_mkmask(pxlen
));
1642 /* Prepare the flow */
1643 net_addr
*n
= alloca(sizeof(struct net_addr_flow4
) + flen
);
1644 net_fill_flow4(n
, px
, pxlen
, pos
, flen
);
1645 ADVANCE(pos
, len
, flen
);
1647 bgp_rte_update(s
, n
, path_id
, a
);
1653 bgp_encode_nlri_flow6(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1657 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= 4))
1659 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1660 struct net_addr_flow6
*net
= (void *) px
->net
;
1661 uint flen
= net
->length
- sizeof(net_addr_flow6
);
1663 /* Encode path ID */
1666 put_u32(pos
, px
->path_id
);
1667 ADVANCE(pos
, size
, 4);
1673 /* Copy whole flow data including length */
1674 memcpy(pos
, net
->data
, flen
);
1675 ADVANCE(pos
, size
, flen
);
1677 bgp_free_prefix(s
->channel
, px
);
1684 bgp_decode_nlri_flow6(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1690 /* Decode path ID */
1694 bgp_parse_error(s
, 1);
1696 path_id
= get_u32(pos
);
1697 ADVANCE(pos
, len
, 4);
1701 bgp_parse_error(s
, 1);
1703 /* Decode flow length */
1704 uint hlen
= flow_hdr_length(pos
);
1705 uint dlen
= flow_read_length(pos
);
1706 uint flen
= hlen
+ dlen
;
1707 byte
*data
= pos
+ hlen
;
1710 bgp_parse_error(s
, 1);
1712 /* Validate flow data */
1713 enum flow_validated_state r
= flow6_validate(data
, dlen
);
1714 if (r
!= FLOW_ST_VALID
)
1716 log(L_REMOTE
"%s: Invalid flow route: %s", s
->proto
->p
.name
, flow_validated_state_str(r
));
1717 bgp_parse_error(s
, 1);
1720 if (data
[0] != FLOW_TYPE_DST_PREFIX
)
1722 log(L_REMOTE
"%s: No dst prefix at first pos", s
->proto
->p
.name
);
1723 bgp_parse_error(s
, 1);
1726 /* Decode dst prefix */
1727 ip6_addr px
= IP6_NONE
;
1728 uint pxlen
= data
[1];
1730 // FIXME: Use some generic function
1731 memcpy(&px
, data
, BYTES(pxlen
));
1732 px
= ip6_and(px
, ip6_mkmask(pxlen
));
1734 /* Prepare the flow */
1735 net_addr
*n
= alloca(sizeof(struct net_addr_flow6
) + flen
);
1736 net_fill_flow6(n
, px
, pxlen
, pos
, flen
);
1737 ADVANCE(pos
, len
, flen
);
1739 bgp_rte_update(s
, n
, path_id
, a
);
1744 static const struct bgp_af_desc bgp_af_table
[] = {
1749 .encode_nlri
= bgp_encode_nlri_ip4
,
1750 .decode_nlri
= bgp_decode_nlri_ip4
,
1751 .encode_next_hop
= bgp_encode_next_hop_ip
,
1752 .decode_next_hop
= bgp_decode_next_hop_ip
,
1753 .update_next_hop
= bgp_update_next_hop_ip
,
1756 .afi
= BGP_AF_IPV4_MC
,
1759 .encode_nlri
= bgp_encode_nlri_ip4
,
1760 .decode_nlri
= bgp_decode_nlri_ip4
,
1761 .encode_next_hop
= bgp_encode_next_hop_ip
,
1762 .decode_next_hop
= bgp_decode_next_hop_ip
,
1763 .update_next_hop
= bgp_update_next_hop_ip
,
1766 .afi
= BGP_AF_IPV4_MPLS
,
1769 .name
= "ipv4-mpls",
1770 .encode_nlri
= bgp_encode_nlri_ip4
,
1771 .decode_nlri
= bgp_decode_nlri_ip4
,
1772 .encode_next_hop
= bgp_encode_next_hop_ip
,
1773 .decode_next_hop
= bgp_decode_next_hop_ip
,
1774 .update_next_hop
= bgp_update_next_hop_ip
,
1780 .encode_nlri
= bgp_encode_nlri_ip6
,
1781 .decode_nlri
= bgp_decode_nlri_ip6
,
1782 .encode_next_hop
= bgp_encode_next_hop_ip
,
1783 .decode_next_hop
= bgp_decode_next_hop_ip
,
1784 .update_next_hop
= bgp_update_next_hop_ip
,
1787 .afi
= BGP_AF_IPV6_MC
,
1790 .encode_nlri
= bgp_encode_nlri_ip6
,
1791 .decode_nlri
= bgp_decode_nlri_ip6
,
1792 .encode_next_hop
= bgp_encode_next_hop_ip
,
1793 .decode_next_hop
= bgp_decode_next_hop_ip
,
1794 .update_next_hop
= bgp_update_next_hop_ip
,
1797 .afi
= BGP_AF_IPV6_MPLS
,
1800 .name
= "ipv6-mpls",
1801 .encode_nlri
= bgp_encode_nlri_ip6
,
1802 .decode_nlri
= bgp_decode_nlri_ip6
,
1803 .encode_next_hop
= bgp_encode_next_hop_ip
,
1804 .decode_next_hop
= bgp_decode_next_hop_ip
,
1805 .update_next_hop
= bgp_update_next_hop_ip
,
1808 .afi
= BGP_AF_VPN4_MPLS
,
1811 .name
= "vpn4-mpls",
1812 .encode_nlri
= bgp_encode_nlri_vpn4
,
1813 .decode_nlri
= bgp_decode_nlri_vpn4
,
1814 .encode_next_hop
= bgp_encode_next_hop_vpn
,
1815 .decode_next_hop
= bgp_decode_next_hop_vpn
,
1816 .update_next_hop
= bgp_update_next_hop_ip
,
1819 .afi
= BGP_AF_VPN6_MPLS
,
1822 .name
= "vpn6-mpls",
1823 .encode_nlri
= bgp_encode_nlri_vpn6
,
1824 .decode_nlri
= bgp_decode_nlri_vpn6
,
1825 .encode_next_hop
= bgp_encode_next_hop_vpn
,
1826 .decode_next_hop
= bgp_decode_next_hop_vpn
,
1827 .update_next_hop
= bgp_update_next_hop_ip
,
1830 .afi
= BGP_AF_VPN4_MC
,
1833 .encode_nlri
= bgp_encode_nlri_vpn4
,
1834 .decode_nlri
= bgp_decode_nlri_vpn4
,
1835 .encode_next_hop
= bgp_encode_next_hop_vpn
,
1836 .decode_next_hop
= bgp_decode_next_hop_vpn
,
1837 .update_next_hop
= bgp_update_next_hop_ip
,
1840 .afi
= BGP_AF_VPN6_MC
,
1843 .encode_nlri
= bgp_encode_nlri_vpn6
,
1844 .decode_nlri
= bgp_decode_nlri_vpn6
,
1845 .encode_next_hop
= bgp_encode_next_hop_vpn
,
1846 .decode_next_hop
= bgp_decode_next_hop_vpn
,
1847 .update_next_hop
= bgp_update_next_hop_ip
,
1850 .afi
= BGP_AF_FLOW4
,
1854 .encode_nlri
= bgp_encode_nlri_flow4
,
1855 .decode_nlri
= bgp_decode_nlri_flow4
,
1856 .encode_next_hop
= bgp_encode_next_hop_none
,
1857 .decode_next_hop
= bgp_decode_next_hop_none
,
1858 .update_next_hop
= bgp_update_next_hop_none
,
1861 .afi
= BGP_AF_FLOW6
,
1865 .encode_nlri
= bgp_encode_nlri_flow6
,
1866 .decode_nlri
= bgp_decode_nlri_flow6
,
1867 .encode_next_hop
= bgp_encode_next_hop_none
,
1868 .decode_next_hop
= bgp_decode_next_hop_none
,
1869 .update_next_hop
= bgp_update_next_hop_none
,
1873 const struct bgp_af_desc
*
1874 bgp_get_af_desc(u32 afi
)
1877 for (i
= 0; i
< ARRAY_SIZE(bgp_af_table
); i
++)
1878 if (bgp_af_table
[i
].afi
== afi
)
1879 return &bgp_af_table
[i
];
1885 bgp_encode_nlri(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
1887 return s
->channel
->desc
->encode_nlri(s
, buck
, buf
, end
- buf
);
1891 bgp_encode_next_hop(struct bgp_write_state
*s
, eattr
*nh
, byte
*buf
)
1893 return s
->channel
->desc
->encode_next_hop(s
, nh
, buf
, 255);
1897 bgp_update_next_hop(struct bgp_export_state
*s
, eattr
*a
, ea_list
**to
)
1899 s
->channel
->desc
->update_next_hop(s
, a
, to
);
1902 #define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
1905 bgp_create_ip_reach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
1908 * 2 B Withdrawn Routes Length (zero)
1909 * --- IPv4 Withdrawn Routes NLRI (unused)
1910 * 2 B Total Path Attribute Length
1911 * var Path Attributes
1912 * var IPv4 Network Layer Reachability Information
1917 la
= bgp_encode_attrs(s
, buck
->eattrs
, buf
+4, buf
+ MAX_ATTRS_LENGTH
);
1920 /* Attribute list too long */
1921 bgp_withdraw_bucket(s
->channel
, buck
);
1928 lr
= bgp_encode_nlri(s
, buck
, buf
+4+la
, end
);
1934 bgp_create_mp_reach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
1937 * 2 B IPv4 Withdrawn Routes Length (zero)
1938 * --- IPv4 Withdrawn Routes NLRI (unused)
1939 * 2 B Total Path Attribute Length
1940 * 1 B MP_REACH_NLRI hdr - Attribute Flags
1941 * 1 B MP_REACH_NLRI hdr - Attribute Type Code
1942 * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
1943 * 2 B MP_REACH_NLRI data - Address Family Identifier
1944 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
1945 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
1946 * var MP_REACH_NLRI data - Network Address of Next Hop
1947 * 1 B MP_REACH_NLRI data - Reserved (zero)
1948 * var MP_REACH_NLRI data - Network Layer Reachability Information
1949 * var Rest of Path Attributes
1950 * --- IPv4 Network Layer Reachability Information (unused)
1953 int lh
, lr
, la
; /* Lengths of next hop, NLRI and attributes */
1955 /* Begin of MP_REACH_NLRI atribute */
1956 buf
[4] = BAF_OPTIONAL
| BAF_EXT_LEN
;
1957 buf
[5] = BA_MP_REACH_NLRI
;
1958 put_u16(buf
+6, 0); /* Will be fixed later */
1959 put_af3(buf
+8, s
->channel
->afi
);
1962 /* Encode attributes to temporary buffer */
1963 byte
*abuf
= alloca(MAX_ATTRS_LENGTH
);
1964 la
= bgp_encode_attrs(s
, buck
->eattrs
, abuf
, abuf
+ MAX_ATTRS_LENGTH
);
1967 /* Attribute list too long */
1968 bgp_withdraw_bucket(s
->channel
, buck
);
1972 /* Encode the next hop */
1973 lh
= bgp_encode_next_hop(s
, s
->mp_next_hop
, pos
+1);
1977 /* Reserved field */
1980 /* Encode the NLRI */
1981 lr
= bgp_encode_nlri(s
, buck
, pos
, end
- la
);
1984 /* End of MP_REACH_NLRI atribute, update data length */
1985 put_u16(buf
+6, pos
-buf
-8);
1987 /* Copy remaining attributes */
1988 memcpy(pos
, abuf
, la
);
1991 /* Initial UPDATE fields */
1993 put_u16(buf
+2, pos
-buf
-4);
1998 #undef MAX_ATTRS_LENGTH
2001 bgp_create_ip_unreach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
2004 * 2 B Withdrawn Routes Length
2005 * var IPv4 Withdrawn Routes NLRI
2006 * 2 B Total Path Attribute Length (zero)
2007 * --- Path Attributes (unused)
2008 * --- IPv4 Network Layer Reachability Information (unused)
2011 uint len
= bgp_encode_nlri(s
, buck
, buf
+2, end
);
2013 put_u16(buf
+0, len
);
2014 put_u16(buf
+2+len
, 0);
2020 bgp_create_mp_unreach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
2023 * 2 B Withdrawn Routes Length (zero)
2024 * --- IPv4 Withdrawn Routes NLRI (unused)
2025 * 2 B Total Path Attribute Length
2026 * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
2027 * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
2028 * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
2029 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
2030 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
2031 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
2032 * --- IPv4 Network Layer Reachability Information (unused)
2035 uint len
= bgp_encode_nlri(s
, buck
, buf
+11, end
);
2038 put_u16(buf
+2, 7+len
);
2040 /* Begin of MP_UNREACH_NLRI atribute */
2041 buf
[4] = BAF_OPTIONAL
| BAF_EXT_LEN
;
2042 buf
[5] = BA_MP_UNREACH_NLRI
;
2043 put_u16(buf
+6, 3+len
);
2044 put_af3(buf
+8, s
->channel
->afi
);
2050 bgp_create_update(struct bgp_channel
*c
, byte
*buf
)
2052 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2053 struct bgp_bucket
*buck
;
2054 byte
*end
= buf
+ (bgp_max_packet_length(p
->conn
) - BGP_HEADER_LENGTH
);
2059 /* Initialize write state */
2060 struct bgp_write_state s
= {
2063 .pool
= bgp_linpool
,
2064 .as4_session
= p
->as4_session
,
2065 .add_path
= c
->add_path_tx
,
2066 .mpls
= c
->desc
->mpls
,
2069 /* Try unreachable bucket */
2070 if ((buck
= c
->withdraw_bucket
) && !EMPTY_LIST(buck
->prefixes
))
2072 res
= (c
->afi
== BGP_AF_IPV4
) && !c
->ext_next_hop
?
2073 bgp_create_ip_unreach(&s
, buck
, buf
, end
):
2074 bgp_create_mp_unreach(&s
, buck
, buf
, end
);
2079 /* Try reachable buckets */
2080 if (!EMPTY_LIST(c
->bucket_queue
))
2082 buck
= HEAD(c
->bucket_queue
);
2084 /* Cleanup empty buckets */
2085 if (EMPTY_LIST(buck
->prefixes
))
2087 bgp_free_bucket(c
, buck
);
2091 res
= (c
->afi
== BGP_AF_IPV4
) && !c
->ext_next_hop
?
2092 bgp_create_ip_reach(&s
, buck
, buf
, end
):
2093 bgp_create_mp_reach(&s
, buck
, buf
, end
);
2095 if (EMPTY_LIST(buck
->prefixes
))
2096 bgp_free_bucket(c
, buck
);
2098 bgp_defer_bucket(c
, buck
);
2106 /* No more prefixes to send */
2110 BGP_TRACE_RL(&rl_snd_update
, D_PACKETS
, "Sending UPDATE");
2117 bgp_create_ip_end_mark(struct bgp_channel
*c UNUSED
, byte
*buf
)
2119 /* Empty update packet */
2126 bgp_create_mp_end_mark(struct bgp_channel
*c
, byte
*buf
)
2129 put_u16(buf
+2, 6); /* length 4--9 */
2131 /* Empty MP_UNREACH_NLRI atribute */
2132 buf
[4] = BAF_OPTIONAL
;
2133 buf
[5] = BA_MP_UNREACH_NLRI
;
2134 buf
[6] = 3; /* Length 7--9 */
2135 put_af3(buf
+7, c
->afi
);
2141 bgp_create_end_mark(struct bgp_channel
*c
, byte
*buf
)
2143 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2145 BGP_TRACE(D_PACKETS
, "Sending END-OF-RIB");
2147 return (c
->afi
== BGP_AF_IPV4
) ?
2148 bgp_create_ip_end_mark(c
, buf
):
2149 bgp_create_mp_end_mark(c
, buf
);
2153 bgp_rx_end_mark(struct bgp_parse_state
*s
, u32 afi
)
2155 struct bgp_proto
*p
= s
->proto
;
2156 struct bgp_channel
*c
= bgp_get_channel(p
, afi
);
2158 BGP_TRACE(D_PACKETS
, "Got END-OF-RIB");
2161 DISCARD(BAD_AFI
, BGP_AFI(afi
), BGP_SAFI(afi
));
2163 if (c
->load_state
== BFS_LOADING
)
2164 c
->load_state
= BFS_NONE
;
2166 if (p
->p
.gr_recovery
)
2167 channel_graceful_restart_unlock(&c
->c
);
2170 bgp_graceful_restart_done(c
);
2174 bgp_decode_nlri(struct bgp_parse_state
*s
, u32 afi
, byte
*nlri
, uint len
, ea_list
*ea
, byte
*nh
, uint nh_len
)
2176 struct bgp_channel
*c
= bgp_get_channel(s
->proto
, afi
);
2180 DISCARD(BAD_AFI
, BGP_AFI(afi
), BGP_SAFI(afi
));
2183 s
->add_path
= c
->add_path_rx
;
2184 s
->mpls
= c
->desc
->mpls
;
2187 s
->last_src
= s
->proto
->p
.main_source
;
2190 * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
2191 * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
2192 * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
2193 * decode_next_hop hooks) by restoring a->eattrs afterwards.
2198 a
= allocz(RTA_MAX_SIZE
);
2200 a
->source
= RTS_BGP
;
2201 a
->scope
= SCOPE_UNIVERSE
;
2202 a
->from
= s
->proto
->cf
->remote_ip
;
2205 c
->desc
->decode_next_hop(s
, nh
, nh_len
, a
);
2207 /* Handle withdraw during next hop decoding */
2208 if (s
->err_withdraw
)
2212 c
->desc
->decode_nlri(s
, nlri
, len
, a
);
2214 rta_free(s
->cached_rta
);
2215 s
->cached_rta
= NULL
;
2219 bgp_rx_update(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
2221 struct bgp_proto
*p
= conn
->bgp
;
2224 BGP_TRACE_RL(&rl_rcv_update
, D_PACKETS
, "Got UPDATE");
2226 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
2227 if (conn
->state
== BS_OPENCONFIRM
)
2228 bgp_conn_enter_established_state(conn
);
2230 if (conn
->state
!= BS_ESTABLISHED
)
2231 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
2233 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
2235 /* Initialize parse state */
2236 struct bgp_parse_state s
= {
2238 .pool
= bgp_linpool
,
2239 .as4_session
= p
->as4_session
,
2242 /* Parse error handler */
2243 if (setjmp(s
.err_jmpbuf
))
2245 bgp_error(conn
, 3, s
.err_subcode
, NULL
, 0);
2249 /* Check minimal length */
2251 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
2253 /* Skip fixed header */
2257 * UPDATE message format
2259 * 2 B IPv4 Withdrawn Routes Length
2260 * var IPv4 Withdrawn Routes NLRI
2261 * 2 B Total Path Attribute Length
2262 * var Path Attributes
2263 * var IPv4 Reachable Routes NLRI
2266 s
.ip_unreach_len
= get_u16(pkt
+ pos
);
2267 s
.ip_unreach_nlri
= pkt
+ pos
+ 2;
2268 pos
+= 2 + s
.ip_unreach_len
;
2271 bgp_parse_error(&s
, 1);
2273 s
.attr_len
= get_u16(pkt
+ pos
);
2274 s
.attrs
= pkt
+ pos
+ 2;
2275 pos
+= 2 + s
.attr_len
;
2278 bgp_parse_error(&s
, 1);
2280 s
.ip_reach_len
= len
- pos
;
2281 s
.ip_reach_nlri
= pkt
+ pos
;
2285 ea
= bgp_decode_attrs(&s
, s
.attrs
, s
.attr_len
);
2287 /* Check for End-of-RIB marker */
2288 if (!s
.attr_len
&& !s
.ip_unreach_len
&& !s
.ip_reach_len
)
2289 { bgp_rx_end_mark(&s
, BGP_AF_IPV4
); goto done
; }
2291 /* Check for MP End-of-RIB marker */
2292 if ((s
.attr_len
< 8) && !s
.ip_unreach_len
&& !s
.ip_reach_len
&&
2293 !s
.mp_reach_len
&& !s
.mp_unreach_len
&& s
.mp_unreach_af
)
2294 { bgp_rx_end_mark(&s
, s
.mp_unreach_af
); goto done
; }
2296 if (s
.ip_unreach_len
)
2297 bgp_decode_nlri(&s
, BGP_AF_IPV4
, s
.ip_unreach_nlri
, s
.ip_unreach_len
, NULL
, NULL
, 0);
2299 if (s
.mp_unreach_len
)
2300 bgp_decode_nlri(&s
, s
.mp_unreach_af
, s
.mp_unreach_nlri
, s
.mp_unreach_len
, NULL
, NULL
, 0);
2303 bgp_decode_nlri(&s
, BGP_AF_IPV4
, s
.ip_reach_nlri
, s
.ip_reach_len
,
2304 ea
, s
.ip_next_hop_data
, s
.ip_next_hop_len
);
2307 bgp_decode_nlri(&s
, s
.mp_reach_af
, s
.mp_reach_nlri
, s
.mp_reach_len
,
2308 ea
, s
.mp_next_hop_data
, s
.mp_next_hop_len
);
2311 rta_free(s
.cached_rta
);
2321 static inline byte
*
2322 bgp_create_route_refresh(struct bgp_channel
*c
, byte
*buf
)
2324 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2326 BGP_TRACE(D_PACKETS
, "Sending ROUTE-REFRESH");
2328 /* Original route refresh request, RFC 2918 */
2329 put_af4(buf
, c
->afi
);
2330 buf
[2] = BGP_RR_REQUEST
;
2335 static inline byte
*
2336 bgp_create_begin_refresh(struct bgp_channel
*c
, byte
*buf
)
2338 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2340 BGP_TRACE(D_PACKETS
, "Sending BEGIN-OF-RR");
2342 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
2343 put_af4(buf
, c
->afi
);
2344 buf
[2] = BGP_RR_BEGIN
;
2349 static inline byte
*
2350 bgp_create_end_refresh(struct bgp_channel
*c
, byte
*buf
)
2352 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2354 BGP_TRACE(D_PACKETS
, "Sending END-OF-RR");
2356 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
2357 put_af4(buf
, c
->afi
);
2358 buf
[2] = BGP_RR_END
;
2364 bgp_rx_route_refresh(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
2366 struct bgp_proto
*p
= conn
->bgp
;
2368 if (conn
->state
!= BS_ESTABLISHED
)
2369 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
2371 if (!conn
->local_caps
->route_refresh
)
2372 { bgp_error(conn
, 1, 3, pkt
+18, 1); return; }
2374 if (len
< (BGP_HEADER_LENGTH
+ 4))
2375 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
2377 if (len
> (BGP_HEADER_LENGTH
+ 4))
2378 { bgp_error(conn
, 7, 1, pkt
, MIN(len
, 2048)); return; }
2380 struct bgp_channel
*c
= bgp_get_channel(p
, get_af4(pkt
+19));
2383 log(L_WARN
"%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
2384 p
->p
.name
, pkt
[21], get_u16(pkt
+19), pkt
[22]);
2388 /* RFC 7313 redefined reserved field as RR message subtype */
2389 uint subtype
= p
->enhanced_refresh
? pkt
[21] : BGP_RR_REQUEST
;
2393 case BGP_RR_REQUEST
:
2394 BGP_TRACE(D_PACKETS
, "Got ROUTE-REFRESH");
2395 channel_request_feeding(&c
->c
);
2399 BGP_TRACE(D_PACKETS
, "Got BEGIN-OF-RR");
2400 bgp_refresh_begin(c
);
2404 BGP_TRACE(D_PACKETS
, "Got END-OF-RR");
2409 log(L_WARN
"%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
2410 p
->p
.name
, subtype
);
2415 static inline struct bgp_channel
*
2416 bgp_get_channel_to_send(struct bgp_proto
*p
, struct bgp_conn
*conn
)
2418 uint i
= conn
->last_channel
;
2420 /* Try the last channel, but at most several times */
2421 if ((conn
->channels_to_send
& (1 << i
)) &&
2422 (conn
->last_channel_count
< 16))
2425 /* Find channel with non-zero channels_to_send */
2429 if (i
>= p
->channel_count
)
2432 while (! (conn
->channels_to_send
& (1 << i
)));
2434 /* Use that channel */
2435 conn
->last_channel
= i
;
2436 conn
->last_channel_count
= 0;
2439 conn
->last_channel_count
++;
2440 return p
->channel_map
[i
];
2444 bgp_send(struct bgp_conn
*conn
, uint type
, uint len
)
2446 sock
*sk
= conn
->sk
;
2447 byte
*buf
= sk
->tbuf
;
2449 memset(buf
, 0xff, 16); /* Marker */
2450 put_u16(buf
+16, len
);
2453 return sk_send(sk
, len
);
2457 * bgp_fire_tx - transmit packets
2460 * Whenever the transmit buffers of the underlying TCP connection
2461 * are free and we have any packets queued for sending, the socket functions
2462 * call bgp_fire_tx() which takes care of selecting the highest priority packet
2463 * queued (Notification > Keepalive > Open > Update), assembling its header
2464 * and body and sending it to the connection.
2467 bgp_fire_tx(struct bgp_conn
*conn
)
2469 struct bgp_proto
*p
= conn
->bgp
;
2470 struct bgp_channel
*c
;
2471 byte
*buf
, *pkt
, *end
;
2477 buf
= conn
->sk
->tbuf
;
2478 pkt
= buf
+ BGP_HEADER_LENGTH
;
2479 s
= conn
->packets_to_send
;
2481 if (s
& (1 << PKT_SCHEDULE_CLOSE
))
2483 /* We can finally close connection and enter idle state */
2484 bgp_conn_enter_idle_state(conn
);
2487 if (s
& (1 << PKT_NOTIFICATION
))
2489 conn
->packets_to_send
= 1 << PKT_SCHEDULE_CLOSE
;
2490 end
= bgp_create_notification(conn
, pkt
);
2491 return bgp_send(conn
, PKT_NOTIFICATION
, end
- buf
);
2493 else if (s
& (1 << PKT_KEEPALIVE
))
2495 conn
->packets_to_send
&= ~(1 << PKT_KEEPALIVE
);
2496 BGP_TRACE(D_PACKETS
, "Sending KEEPALIVE");
2497 bgp_start_timer(conn
->keepalive_timer
, conn
->keepalive_time
);
2498 return bgp_send(conn
, PKT_KEEPALIVE
, BGP_HEADER_LENGTH
);
2500 else if (s
& (1 << PKT_OPEN
))
2502 conn
->packets_to_send
&= ~(1 << PKT_OPEN
);
2503 end
= bgp_create_open(conn
, pkt
);
2504 return bgp_send(conn
, PKT_OPEN
, end
- buf
);
2506 else while (conn
->channels_to_send
)
2508 c
= bgp_get_channel_to_send(p
, conn
);
2509 s
= c
->packets_to_send
;
2511 if (s
& (1 << PKT_ROUTE_REFRESH
))
2513 c
->packets_to_send
&= ~(1 << PKT_ROUTE_REFRESH
);
2514 end
= bgp_create_route_refresh(c
, pkt
);
2515 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
2517 else if (s
& (1 << PKT_BEGIN_REFRESH
))
2519 /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
2520 c
->packets_to_send
&= ~(1 << PKT_BEGIN_REFRESH
);
2521 end
= bgp_create_begin_refresh(c
, pkt
);
2522 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
2524 else if (s
& (1 << PKT_UPDATE
))
2526 end
= bgp_create_update(c
, pkt
);
2528 return bgp_send(conn
, PKT_UPDATE
, end
- buf
);
2530 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
2531 c
->packets_to_send
= 0;
2532 conn
->channels_to_send
&= ~(1 << c
->index
);
2534 if (c
->feed_state
== BFS_LOADED
)
2536 c
->feed_state
= BFS_NONE
;
2537 end
= bgp_create_end_mark(c
, pkt
);
2538 return bgp_send(conn
, PKT_UPDATE
, end
- buf
);
2541 else if (c
->feed_state
== BFS_REFRESHED
)
2543 c
->feed_state
= BFS_NONE
;
2544 end
= bgp_create_end_refresh(c
, pkt
);
2545 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
2549 bug("Channel packets_to_send: %x", s
);
2551 c
->packets_to_send
= 0;
2552 conn
->channels_to_send
&= ~(1 << c
->index
);
2559 * bgp_schedule_packet - schedule a packet for transmission
2562 * @type: packet type
2564 * Schedule a packet of type @type to be sent as soon as possible.
2567 bgp_schedule_packet(struct bgp_conn
*conn
, struct bgp_channel
*c
, int type
)
2571 DBG("BGP: Scheduling packet type %d\n", type
);
2575 if (! conn
->channels_to_send
)
2577 conn
->last_channel
= c
->index
;
2578 conn
->last_channel_count
= 0;
2581 c
->packets_to_send
|= 1 << type
;
2582 conn
->channels_to_send
|= 1 << c
->index
;
2585 conn
->packets_to_send
|= 1 << type
;
2587 if ((conn
->sk
->tpos
== conn
->sk
->tbuf
) && !ev_active(conn
->tx_ev
))
2588 ev_schedule(conn
->tx_ev
);
2592 bgp_kick_tx(void *vconn
)
2594 struct bgp_conn
*conn
= vconn
;
2596 DBG("BGP: kicking TX\n");
2597 while (bgp_fire_tx(conn
) > 0)
2604 struct bgp_conn
*conn
= sk
->data
;
2606 DBG("BGP: TX hook\n");
2607 while (bgp_fire_tx(conn
) > 0)
2615 } bgp_msg_table
[] = {
2616 { 1, 0, "Invalid message header" },
2617 { 1, 1, "Connection not synchronized" },
2618 { 1, 2, "Bad message length" },
2619 { 1, 3, "Bad message type" },
2620 { 2, 0, "Invalid OPEN message" },
2621 { 2, 1, "Unsupported version number" },
2622 { 2, 2, "Bad peer AS" },
2623 { 2, 3, "Bad BGP identifier" },
2624 { 2, 4, "Unsupported optional parameter" },
2625 { 2, 5, "Authentication failure" },
2626 { 2, 6, "Unacceptable hold time" },
2627 { 2, 7, "Required capability missing" }, /* [RFC5492] */
2628 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
2629 { 3, 0, "Invalid UPDATE message" },
2630 { 3, 1, "Malformed attribute list" },
2631 { 3, 2, "Unrecognized well-known attribute" },
2632 { 3, 3, "Missing mandatory attribute" },
2633 { 3, 4, "Invalid attribute flags" },
2634 { 3, 5, "Invalid attribute length" },
2635 { 3, 6, "Invalid ORIGIN attribute" },
2636 { 3, 7, "AS routing loop" }, /* Deprecated */
2637 { 3, 8, "Invalid NEXT_HOP attribute" },
2638 { 3, 9, "Optional attribute error" },
2639 { 3, 10, "Invalid network field" },
2640 { 3, 11, "Malformed AS_PATH" },
2641 { 4, 0, "Hold timer expired" },
2642 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
2643 { 5, 1, "Unexpected message in OpenSent state" },
2644 { 5, 2, "Unexpected message in OpenConfirm state" },
2645 { 5, 3, "Unexpected message in Established state" },
2646 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
2647 { 6, 1, "Maximum number of prefixes reached" },
2648 { 6, 2, "Administrative shutdown" },
2649 { 6, 3, "Peer de-configured" },
2650 { 6, 4, "Administrative reset" },
2651 { 6, 5, "Connection rejected" },
2652 { 6, 6, "Other configuration change" },
2653 { 6, 7, "Connection collision resolution" },
2654 { 6, 8, "Out of Resources" },
2655 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
2656 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
2660 * bgp_error_dsc - return BGP error description
2661 * @code: BGP error code
2662 * @subcode: BGP error subcode
2664 * bgp_error_dsc() returns error description for BGP errors
2665 * which might be static string or given temporary buffer.
2668 bgp_error_dsc(uint code
, uint subcode
)
2670 static char buff
[32];
2673 for (i
=0; i
< ARRAY_SIZE(bgp_msg_table
); i
++)
2674 if (bgp_msg_table
[i
].major
== code
&& bgp_msg_table
[i
].minor
== subcode
)
2675 return bgp_msg_table
[i
].msg
;
2677 bsprintf(buff
, "Unknown error %u.%u", code
, subcode
);
2682 bgp_log_error(struct bgp_proto
*p
, u8
class, char *msg
, uint code
, uint subcode
, byte
*data
, uint len
)
2685 byte
*t
, argbuf
[36];
2688 /* Don't report Cease messages generated by myself */
2689 if (code
== 6 && class == BE_BGP_TX
)
2692 name
= bgp_error_dsc(code
, subcode
);
2699 if ((code
== 2) && (subcode
== 2) && ((len
== 2) || (len
== 4)))
2701 /* Bad peer AS - we would like to print the AS */
2702 t
+= bsprintf(t
, "%u", (len
== 2) ? get_u16(data
) : get_u32(data
));
2707 for (i
=0; i
<len
; i
++)
2708 t
+= bsprintf(t
, "%02x", data
[i
]);
2712 log(L_REMOTE
"%s: %s: %s%s", p
->p
.name
, msg
, name
, argbuf
);
2716 bgp_rx_notification(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
2718 struct bgp_proto
*p
= conn
->bgp
;
2721 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
2723 uint code
= pkt
[19];
2724 uint subcode
= pkt
[20];
2725 int err
= (code
!= 6);
2727 bgp_log_error(p
, BE_BGP_RX
, "Received", code
, subcode
, pkt
+21, len
-21);
2728 bgp_store_error(p
, conn
, BE_BGP_RX
, (code
<< 16) | subcode
);
2730 bgp_conn_enter_close_state(conn
);
2731 bgp_schedule_packet(conn
, NULL
, PKT_SCHEDULE_CLOSE
);
2735 bgp_update_startup_delay(p
);
2741 bgp_rx_keepalive(struct bgp_conn
*conn
)
2743 struct bgp_proto
*p
= conn
->bgp
;
2745 BGP_TRACE(D_PACKETS
, "Got KEEPALIVE");
2746 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
2748 if (conn
->state
== BS_OPENCONFIRM
)
2749 { bgp_conn_enter_established_state(conn
); return; }
2751 if (conn
->state
!= BS_ESTABLISHED
)
2752 bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0);
2757 * bgp_rx_packet - handle a received packet
2758 * @conn: BGP connection
2759 * @pkt: start of the packet
2762 * bgp_rx_packet() takes a newly received packet and calls the corresponding
2763 * packet handler according to the packet type.
2766 bgp_rx_packet(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
2768 byte type
= pkt
[18];
2770 DBG("BGP: Got packet %02x (%d bytes)\n", type
, len
);
2772 if (conn
->bgp
->p
.mrtdump
& MD_MESSAGES
)
2773 mrt_dump_bgp_packet(conn
, pkt
, len
);
2777 case PKT_OPEN
: return bgp_rx_open(conn
, pkt
, len
);
2778 case PKT_UPDATE
: return bgp_rx_update(conn
, pkt
, len
);
2779 case PKT_NOTIFICATION
: return bgp_rx_notification(conn
, pkt
, len
);
2780 case PKT_KEEPALIVE
: return bgp_rx_keepalive(conn
);
2781 case PKT_ROUTE_REFRESH
: return bgp_rx_route_refresh(conn
, pkt
, len
);
2782 default: bgp_error(conn
, 1, 3, pkt
+18, 1);
2787 * bgp_rx - handle received data
2789 * @size: amount of data received
2791 * bgp_rx() is called by the socket layer whenever new data arrive from
2792 * the underlying TCP connection. It assembles the data fragments to packets,
2793 * checks their headers and framing and passes complete packets to
2797 bgp_rx(sock
*sk
, uint size
)
2799 struct bgp_conn
*conn
= sk
->data
;
2800 byte
*pkt_start
= sk
->rbuf
;
2801 byte
*end
= pkt_start
+ size
;
2804 DBG("BGP: RX hook: Got %d bytes\n", size
);
2805 while (end
>= pkt_start
+ BGP_HEADER_LENGTH
)
2807 if ((conn
->state
== BS_CLOSE
) || (conn
->sk
!= sk
))
2810 if (pkt_start
[i
] != 0xff)
2812 bgp_error(conn
, 1, 1, NULL
, 0);
2815 len
= get_u16(pkt_start
+16);
2816 if ((len
< BGP_HEADER_LENGTH
) || (len
> bgp_max_packet_length(conn
)))
2818 bgp_error(conn
, 1, 2, pkt_start
+16, 2);
2821 if (end
< pkt_start
+ len
)
2823 bgp_rx_packet(conn
, pkt_start
, len
);
2826 if (pkt_start
!= sk
->rbuf
)
2828 memmove(sk
->rbuf
, pkt_start
, end
- pkt_start
);
2829 sk
->rpos
= sk
->rbuf
+ (end
- pkt_start
);