]>
git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
c3bd600a10d9b0183c505ed293aa87885113c958
2 * BIRD -- BGP Packet Processing
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
8 * Can be freely distributed and used under the terms of the GNU GPL.
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "proto/mrt/mrt.h"
21 #include "conf/conf.h"
22 #include "lib/unaligned.h"
23 #include "lib/flowspec.h"
24 #include "lib/socket.h"
31 #define BGP_RR_REQUEST 0
32 #define BGP_RR_BEGIN 1
35 #define BGP_NLRI_MAX (4 + 1 + 32)
37 #define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */
38 #define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */
39 #define BGP_MPLS_NULL 3 /* Implicit NULL label */
40 #define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */
43 static struct tbf rl_rcv_update
= TBF_DEFAULT_LOG_LIMITS
;
44 static struct tbf rl_snd_update
= TBF_DEFAULT_LOG_LIMITS
;
46 /* Table for state -> RFC 6608 FSM error subcodes */
47 static byte fsm_err_subcode
[BS_MAX
] = {
54 static struct bgp_channel
*
55 bgp_get_channel(struct bgp_proto
*p
, u32 afi
)
59 for (i
= 0; i
< p
->channel_count
; i
++)
60 if (p
->afi_map
[i
] == afi
)
61 return p
->channel_map
[i
];
67 put_af3(byte
*buf
, u32 id
)
69 put_u16(buf
, id
>> 16);
74 put_af4(byte
*buf
, u32 id
)
76 put_u16(buf
, id
>> 16);
84 return (get_u16(buf
) << 16) | buf
[2];
90 return (get_u16(buf
) << 16) | buf
[3];
94 init_mrt_bgp_data(struct bgp_conn
*conn
, struct mrt_bgp_data
*d
)
96 struct bgp_proto
*p
= conn
->bgp
;
97 int p_ok
= conn
->state
>= BS_OPENCONFIRM
;
99 memset(d
, 0, sizeof(struct mrt_bgp_data
));
100 d
->peer_as
= p
->remote_as
;
101 d
->local_as
= p
->local_as
;
102 d
->index
= (p
->neigh
&& p
->neigh
->iface
) ? p
->neigh
->iface
->index
: 0;
103 d
->af
= ipa_is_ip4(p
->remote_ip
) ? BGP_AFI_IPV4
: BGP_AFI_IPV6
;
104 d
->peer_ip
= conn
->sk
? conn
->sk
->daddr
: IPA_NONE
;
105 d
->local_ip
= conn
->sk
? conn
->sk
->saddr
: IPA_NONE
;
106 d
->as4
= p_ok
? p
->as4_session
: 0;
109 static uint
bgp_find_update_afi(byte
*pos
, uint len
);
112 bgp_estimate_add_path(struct bgp_proto
*p
, byte
*pkt
, uint len
)
114 /* No need to estimate it for other messages than UPDATE */
115 if (pkt
[18] != PKT_UPDATE
)
118 /* 1 -> no channel, 2 -> all channels, 3 -> some channels */
119 if (p
->summary_add_path_rx
< 3)
120 return p
->summary_add_path_rx
== 2;
122 uint afi
= bgp_find_update_afi(pkt
, len
);
123 struct bgp_channel
*c
= bgp_get_channel(p
, afi
);
126 /* Either frame error (if !afi) or unknown AFI/SAFI,
127 will be reported later in regular parsing */
128 BGP_TRACE(D_PACKETS
, "MRT processing noticed invalid packet");
132 return c
->add_path_rx
;
136 bgp_dump_message(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
138 struct mrt_bgp_data d
;
139 init_mrt_bgp_data(conn
, &d
);
143 d
.add_path
= bgp_estimate_add_path(conn
->bgp
, pkt
, len
);
145 mrt_dump_bgp_message(&d
);
149 bgp_dump_state_change(struct bgp_conn
*conn
, uint old
, uint
new)
151 struct mrt_bgp_data d
;
152 init_mrt_bgp_data(conn
, &d
);
157 mrt_dump_bgp_state_change(&d
);
161 bgp_create_notification(struct bgp_conn
*conn
, byte
*buf
)
163 struct bgp_proto
*p
= conn
->bgp
;
165 BGP_TRACE(D_PACKETS
, "Sending NOTIFICATION(code=%d.%d)", conn
->notify_code
, conn
->notify_subcode
);
166 buf
[0] = conn
->notify_code
;
167 buf
[1] = conn
->notify_subcode
;
168 memcpy(buf
+2, conn
->notify_data
, conn
->notify_size
);
169 return buf
+ 2 + conn
->notify_size
;
173 /* Capability negotiation as per RFC 5492 */
175 const struct bgp_af_caps
*
176 bgp_find_af_caps(struct bgp_caps
*caps
, u32 afi
)
178 struct bgp_af_caps
*ac
;
180 WALK_AF_CAPS(caps
, ac
)
187 static struct bgp_af_caps
*
188 bgp_get_af_caps(struct bgp_caps
**pcaps
, u32 afi
)
190 struct bgp_caps
*caps
= *pcaps
;
191 struct bgp_af_caps
*ac
;
193 WALK_AF_CAPS(caps
, ac
)
197 uint n
= caps
->af_count
;
199 *pcaps
= caps
= mb_realloc(caps
, sizeof(struct bgp_caps
) +
200 (2 * n
) * sizeof(struct bgp_af_caps
));
202 ac
= &caps
->af_data
[caps
->af_count
++];
203 memset(ac
, 0, sizeof(struct bgp_af_caps
));
210 bgp_af_caps_cmp(const void *X
, const void *Y
)
212 const struct bgp_af_caps
*x
= X
, *y
= Y
;
213 return (x
->afi
< y
->afi
) ? -1 : (x
->afi
> y
->afi
) ? 1 : 0;
218 bgp_prepare_capabilities(struct bgp_conn
*conn
)
220 struct bgp_proto
*p
= conn
->bgp
;
221 struct bgp_channel
*c
;
222 struct bgp_caps
*caps
;
223 struct bgp_af_caps
*ac
;
225 if (!p
->cf
->capabilities
)
227 /* Just prepare empty local_caps */
228 conn
->local_caps
= mb_allocz(p
->p
.pool
, sizeof(struct bgp_caps
));
232 /* Prepare bgp_caps structure */
233 int n
= list_length(&p
->p
.channels
);
234 caps
= mb_allocz(p
->p
.pool
, sizeof(struct bgp_caps
) + n
* sizeof(struct bgp_af_caps
));
235 conn
->local_caps
= caps
;
237 caps
->as4_support
= p
->cf
->enable_as4
;
238 caps
->ext_messages
= p
->cf
->enable_extended_messages
;
239 caps
->route_refresh
= p
->cf
->enable_refresh
;
240 caps
->enhanced_refresh
= p
->cf
->enable_refresh
;
242 if (caps
->as4_support
)
243 caps
->as4_number
= p
->public_as
;
248 caps
->gr_time
= p
->cf
->gr_time
;
249 caps
->gr_flags
= p
->p
.gr_recovery
? BGP_GRF_RESTART
: 0;
252 if (p
->cf
->llgr_mode
)
253 caps
->llgr_aware
= 1;
255 /* Allocate and fill per-AF fields */
256 WALK_LIST(c
, p
->p
.channels
)
258 ac
= &caps
->af_data
[caps
->af_count
++];
262 ac
->ext_next_hop
= bgp_channel_is_ipv4(c
) && c
->cf
->ext_next_hop
;
263 caps
->any_ext_next_hop
|= ac
->ext_next_hop
;
265 ac
->add_path
= c
->cf
->add_path
;
266 caps
->any_add_path
|= ac
->add_path
;
272 if (p
->p
.gr_recovery
)
273 ac
->gr_af_flags
|= BGP_GRF_FORWARDING
;
276 if (c
->cf
->llgr_able
)
279 ac
->llgr_time
= c
->cf
->llgr_time
;
281 if (p
->p
.gr_recovery
)
282 ac
->llgr_flags
|= BGP_LLGRF_FORWARDING
;
286 /* Sort capability fields by AFI/SAFI */
287 qsort(caps
->af_data
, caps
->af_count
, sizeof(struct bgp_af_caps
), bgp_af_caps_cmp
);
291 bgp_write_capabilities(struct bgp_conn
*conn
, byte
*buf
)
293 struct bgp_proto
*p
= conn
->bgp
;
294 struct bgp_caps
*caps
= conn
->local_caps
;
295 struct bgp_af_caps
*ac
;
296 byte
*buf_head
= buf
;
299 /* Create capability list in buffer */
302 * Note that max length is ~ 22+21*af_count. With max 12 channels that is
303 * 274. We are limited just by buffer size (4096, minus header), as we support
304 * extended optional parameres. Therefore, we have enough space for expansion.
307 WALK_AF_CAPS(caps
, ac
)
310 *buf
++ = 1; /* Capability 1: Multiprotocol extensions */
311 *buf
++ = 4; /* Capability data length */
312 put_af4(buf
, ac
->afi
);
316 if (caps
->route_refresh
)
318 *buf
++ = 2; /* Capability 2: Support for route refresh */
319 *buf
++ = 0; /* Capability data length */
322 if (caps
->any_ext_next_hop
)
324 *buf
++ = 5; /* Capability 5: Support for extended next hop */
325 *buf
++ = 0; /* Capability data length, will be fixed later */
328 WALK_AF_CAPS(caps
, ac
)
329 if (ac
->ext_next_hop
)
331 put_af4(buf
, ac
->afi
);
332 put_u16(buf
+4, BGP_AFI_IPV6
);
336 data
[-1] = buf
- data
;
339 if (caps
->ext_messages
)
341 *buf
++ = 6; /* Capability 6: Support for extended messages */
342 *buf
++ = 0; /* Capability data length */
347 *buf
++ = 64; /* Capability 64: Support for graceful restart */
348 *buf
++ = 0; /* Capability data length, will be fixed later */
351 put_u16(buf
, caps
->gr_time
);
352 buf
[0] |= caps
->gr_flags
;
355 WALK_AF_CAPS(caps
, ac
)
358 put_af3(buf
, ac
->afi
);
359 buf
[3] = ac
->gr_af_flags
;
363 data
[-1] = buf
- data
;
366 if (caps
->as4_support
)
368 *buf
++ = 65; /* Capability 65: Support for 4-octet AS number */
369 *buf
++ = 4; /* Capability data length */
370 put_u32(buf
, p
->public_as
);
374 if (caps
->any_add_path
)
376 *buf
++ = 69; /* Capability 69: Support for ADD-PATH */
377 *buf
++ = 0; /* Capability data length, will be fixed later */
380 WALK_AF_CAPS(caps
, ac
)
383 put_af3(buf
, ac
->afi
);
384 buf
[3] = ac
->add_path
;
388 data
[-1] = buf
- data
;
391 if (caps
->enhanced_refresh
)
393 *buf
++ = 70; /* Capability 70: Support for enhanced route refresh */
394 *buf
++ = 0; /* Capability data length */
397 if (caps
->llgr_aware
)
399 *buf
++ = 71; /* Capability 71: Support for long-lived graceful restart */
400 *buf
++ = 0; /* Capability data length, will be fixed later */
403 WALK_AF_CAPS(caps
, ac
)
406 put_af3(buf
, ac
->afi
);
407 buf
[3] = ac
->llgr_flags
;
408 put_u24(buf
+4, ac
->llgr_time
);
412 data
[-1] = buf
- data
;
415 caps
->length
= buf
- buf_head
;
421 bgp_read_capabilities(struct bgp_conn
*conn
, byte
*pos
, int len
)
423 struct bgp_proto
*p
= conn
->bgp
;
424 struct bgp_caps
*caps
;
425 struct bgp_af_caps
*ac
;
429 if (!conn
->remote_caps
)
430 caps
= mb_allocz(p
->p
.pool
, sizeof(struct bgp_caps
) + sizeof(struct bgp_af_caps
));
433 caps
= conn
->remote_caps
;
434 conn
->remote_caps
= NULL
;
441 if (len
< 2 || len
< (2 + pos
[1]))
444 /* Capability length */
447 /* Capability type */
450 case 1: /* Multiprotocol capability, RFC 4760 */
455 ac
= bgp_get_af_caps(&caps
, af
);
459 case 2: /* Route refresh capability, RFC 2918 */
463 caps
->route_refresh
= 1;
466 case 5: /* Extended next hop encoding capability, RFC 5549 */
470 for (i
= 0; i
< cl
; i
+= 6)
472 /* Specified only for IPv4 prefixes with IPv6 next hops */
473 if ((get_u16(pos
+2+i
+0) != BGP_AFI_IPV4
) ||
474 (get_u16(pos
+2+i
+4) != BGP_AFI_IPV6
))
477 af
= get_af4(pos
+2+i
);
478 ac
= bgp_get_af_caps(&caps
, af
);
479 ac
->ext_next_hop
= 1;
483 case 6: /* Extended message length capability, RFC draft */
487 caps
->ext_messages
= 1;
490 case 64: /* Graceful restart capability, RFC 4724 */
494 /* Only the last instance is valid */
495 WALK_AF_CAPS(caps
, ac
)
502 caps
->gr_flags
= pos
[2] & 0xf0;
503 caps
->gr_time
= get_u16(pos
+ 2) & 0x0fff;
505 for (i
= 2; i
< cl
; i
+= 4)
507 af
= get_af3(pos
+2+i
);
508 ac
= bgp_get_af_caps(&caps
, af
);
510 ac
->gr_af_flags
= pos
[2+i
+3];
514 case 65: /* AS4 capability, RFC 6793 */
518 caps
->as4_support
= 1;
519 caps
->as4_number
= get_u32(pos
+ 2);
522 case 69: /* ADD-PATH capability, RFC 7911 */
526 for (i
= 0; i
< cl
; i
+= 4)
528 byte val
= pos
[2+i
+3];
529 if (!val
|| (val
> BGP_ADD_PATH_FULL
))
531 log(L_WARN
"%s: Got ADD-PATH capability with unknown value %u, ignoring",
537 for (i
= 0; i
< cl
; i
+= 4)
539 af
= get_af3(pos
+2+i
);
540 ac
= bgp_get_af_caps(&caps
, af
);
541 ac
->add_path
= pos
[2+i
+3];
545 case 70: /* Enhanced route refresh capability, RFC 7313 */
549 caps
->enhanced_refresh
= 1;
552 case 71: /* Long lived graceful restart capability, RFC draft */
556 /* Presumably, only the last instance is valid */
557 WALK_AF_CAPS(caps
, ac
)
564 caps
->llgr_aware
= 1;
566 for (i
= 0; i
< cl
; i
+= 7)
568 af
= get_af3(pos
+2+i
);
569 ac
= bgp_get_af_caps(&caps
, af
);
571 ac
->llgr_flags
= pos
[2+i
+3];
572 ac
->llgr_time
= get_u24(pos
+ 2+i
+4);
576 /* We can safely ignore all other capabilities */
579 ADVANCE(pos
, len
, 2 + cl
);
582 /* The LLGR capability must be advertised together with the GR capability,
583 otherwise it must be disregarded */
584 if (!caps
->gr_aware
&& caps
->llgr_aware
)
586 caps
->llgr_aware
= 0;
587 WALK_AF_CAPS(caps
, ac
)
595 conn
->remote_caps
= caps
;
600 bgp_error(conn
, 2, 0, NULL
, 0);
605 bgp_check_capabilities(struct bgp_conn
*conn
)
607 struct bgp_proto
*p
= conn
->bgp
;
608 struct bgp_caps
*local
= conn
->local_caps
;
609 struct bgp_caps
*remote
= conn
->remote_caps
;
610 struct bgp_channel
*c
;
613 /* This is partially overlapping with bgp_conn_enter_established_state(),
614 but we need to run this just after we receive OPEN message */
616 WALK_LIST(c
, p
->p
.channels
)
618 const struct bgp_af_caps
*loc
= bgp_find_af_caps(local
, c
->afi
);
619 const struct bgp_af_caps
*rem
= bgp_find_af_caps(remote
, c
->afi
);
621 /* Find out whether this channel will be active */
622 int active
= loc
&& loc
->ready
&&
623 ((rem
&& rem
->ready
) || (!remote
->length
&& (c
->afi
== BGP_AF_IPV4
)));
625 /* Mandatory must be active */
626 if (c
->cf
->mandatory
&& !active
)
633 /* We need at least one channel active */
641 bgp_read_options(struct bgp_conn
*conn
, byte
*pos
, uint len
, uint rest
)
643 struct bgp_proto
*p
= conn
->bgp
;
646 /* Handle extended length (draft-ietf-idr-ext-opt-param-07) */
647 if ((len
> 0) && (rest
> 0) && (pos
[0] == 255))
652 /* Update pos/len to describe optional data */
653 len
= get_u16(pos
+1);
659 /* Verify that optional data fits into OPEN packet */
663 /* Length of option parameter header */
664 uint hlen
= ext
? 3 : 2;
671 uint otype
= get_u8(pos
);
672 uint olen
= ext
? get_u16(pos
+1) : get_u8(pos
+1);
674 if (len
< (hlen
+ olen
))
679 /* BGP capabilities, RFC 5492 */
680 if (p
->cf
->capabilities
)
681 if (bgp_read_capabilities(conn
, pos
+ hlen
, olen
) < 0)
687 bgp_error(conn
, 2, 4, pos
, hlen
+ olen
);
691 ADVANCE(pos
, len
, hlen
+ olen
);
694 /* Prepare empty caps if no capability option was announced */
695 if (!conn
->remote_caps
)
696 conn
->remote_caps
= mb_allocz(p
->p
.pool
, sizeof(struct bgp_caps
));
701 bgp_error(conn
, 2, 0, NULL
, 0);
706 bgp_create_open(struct bgp_conn
*conn
, byte
*buf
)
708 struct bgp_proto
*p
= conn
->bgp
;
710 BGP_TRACE(D_PACKETS
, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
711 BGP_VERSION
, p
->public_as
, p
->cf
->hold_time
, p
->local_id
);
713 buf
[0] = BGP_VERSION
;
714 put_u16(buf
+1, (p
->public_as
< 0xFFFF) ? p
->public_as
: AS_TRANS
);
715 put_u16(buf
+3, p
->cf
->hold_time
);
716 put_u32(buf
+5, p
->local_id
);
718 if (p
->cf
->capabilities
)
720 /* Prepare local_caps and write capabilities to buffer */
722 byte
*end
= bgp_write_capabilities(conn
, pos
);
723 uint len
= end
- pos
;
727 buf
[9] = len
+ 2; /* Optional parameters length */
728 buf
[10] = 2; /* Option 2: Capability list */
729 buf
[11] = len
; /* Option data length */
731 else /* draft-ietf-idr-ext-opt-param-07 */
733 /* Move capabilities 4 B forward */
734 memmove(buf
+ 16, pos
, len
);
738 buf
[9] = 255; /* Non-ext OP length, fake */
739 buf
[10] = 255; /* Non-ext OP type, signals extended length */
740 put_u16(buf
+11, len
+ 3); /* Extended optional parameters length */
741 buf
[13] = 2; /* Option 2: Capability list */
742 put_u16(buf
+14, len
); /* Option extended data length */
749 buf
[9] = 0; /* No optional parameters */
757 bgp_rx_open(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
759 struct bgp_proto
*p
= conn
->bgp
;
760 struct bgp_conn
*other
;
764 if (conn
->state
!= BS_OPENSENT
)
765 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
767 /* Check message length */
769 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
771 if (pkt
[19] != BGP_VERSION
)
772 { u16 val
= BGP_VERSION
; bgp_error(conn
, 2, 1, (byte
*) &val
, 2); return; }
774 asn
= get_u16(pkt
+20);
775 hold
= get_u16(pkt
+22);
776 id
= get_u32(pkt
+24);
777 BGP_TRACE(D_PACKETS
, "Got OPEN(as=%d,hold=%d,id=%R)", asn
, hold
, id
);
779 if (bgp_read_options(conn
, pkt
+29, pkt
[28], len
-29) < 0)
782 if (hold
> 0 && hold
< 3)
783 { bgp_error(conn
, 2, 6, pkt
+22, 2); return; }
785 /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
786 if (!id
|| (p
->is_internal
&& id
== p
->local_id
))
787 { bgp_error(conn
, 2, 3, pkt
+24, -4); return; }
789 /* RFC 5492 4 - check for required capabilities */
790 if (p
->cf
->capabilities
&& !bgp_check_capabilities(conn
))
791 { bgp_error(conn
, 2, 7, NULL
, 0); return; }
793 struct bgp_caps
*caps
= conn
->remote_caps
;
795 if (caps
->as4_support
)
797 u32 as4
= caps
->as4_number
;
799 if ((as4
!= asn
) && (asn
!= AS_TRANS
))
800 log(L_WARN
"%s: Peer advertised inconsistent AS numbers", p
->p
.name
);
802 /* When remote ASN is unspecified, it must be external one */
803 if (p
->remote_as
? (as4
!= p
->remote_as
) : (as4
== p
->local_as
))
804 { as4
= htonl(as4
); bgp_error(conn
, 2, 2, (byte
*) &as4
, 4); return; }
806 conn
->received_as
= as4
;
810 if (p
->remote_as
? (asn
!= p
->remote_as
) : (asn
== p
->local_as
))
811 { bgp_error(conn
, 2, 2, pkt
+20, 2); return; }
813 conn
->received_as
= asn
;
816 /* Check the other connection */
817 other
= (conn
== &p
->outgoing_conn
) ? &p
->incoming_conn
: &p
->outgoing_conn
;
818 switch (other
->state
)
822 /* Stop outgoing connection attempts */
823 bgp_conn_enter_idle_state(other
);
833 * Description of collision detection rules in RFC 4271 is confusing and
834 * contradictory, but it is essentially:
836 * 1. Router with higher ID is dominant
837 * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
838 * 3. When both connections are in OpenConfirm state, one initiated by
839 * the dominant router is kept.
841 * The first line in the expression below evaluates whether the neighbor
842 * is dominant, the second line whether the new connection was initiated
843 * by the neighbor. If both are true (or both are false), we keep the new
844 * connection, otherwise we keep the old one.
846 if (((p
->local_id
< id
) || ((p
->local_id
== id
) && (p
->public_as
< p
->remote_as
)))
847 == (conn
== &p
->incoming_conn
))
849 /* Should close the other connection */
850 BGP_TRACE(D_EVENTS
, "Connection collision, giving up the other connection");
851 bgp_error(other
, 6, 7, NULL
, 0);
856 /* Should close this connection */
857 BGP_TRACE(D_EVENTS
, "Connection collision, giving up this connection");
858 bgp_error(conn
, 6, 7, NULL
, 0);
862 bug("bgp_rx_open: Unknown state");
865 /* Update our local variables */
866 conn
->hold_time
= MIN(hold
, p
->cf
->hold_time
);
867 conn
->keepalive_time
= p
->cf
->keepalive_time
? : conn
->hold_time
/ 3;
868 conn
->as4_session
= conn
->local_caps
->as4_support
&& caps
->as4_support
;
869 conn
->ext_messages
= conn
->local_caps
->ext_messages
&& caps
->ext_messages
;
872 DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
873 conn
->hold_time
, conn
->keepalive_time
, p
->remote_as
, p
->remote_id
, conn
->as4_session
);
875 bgp_schedule_packet(conn
, NULL
, PKT_KEEPALIVE
);
876 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
877 bgp_conn_enter_openconfirm_state(conn
);
885 #define REPORT(msg, args...) \
886 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
888 #define DISCARD(msg, args...) \
889 ({ REPORT(msg, ## args); return; })
891 #define WITHDRAW(msg, args...) \
892 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
894 #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
895 #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
896 #define NO_NEXT_HOP "Missing NEXT_HOP attribute"
897 #define NO_LABEL_STACK "Missing MPLS stack"
901 bgp_apply_next_hop(struct bgp_parse_state
*s
, rta
*a
, ip_addr gw
, ip_addr ll
)
903 struct bgp_proto
*p
= s
->proto
;
904 struct bgp_channel
*c
= s
->channel
;
906 if (c
->cf
->gw_mode
== GW_DIRECT
)
908 neighbor
*nbr
= NULL
;
910 /* GW_DIRECT -> single_hop -> p->neigh != NULL */
912 nbr
= neigh_find(&p
->p
, gw
, NULL
, 0);
913 else if (ipa_nonzero(ll
))
914 nbr
= neigh_find(&p
->p
, ll
, p
->neigh
->iface
, 0);
916 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
917 WITHDRAW(BAD_NEXT_HOP
);
919 a
->dest
= RTD_UNICAST
;
920 a
->nh
.gw
= nbr
->addr
;
921 a
->nh
.iface
= nbr
->iface
;
922 a
->igp_metric
= c
->cf
->cost
;
924 else /* GW_RECURSIVE */
927 WITHDRAW(BAD_NEXT_HOP
);
929 rtable
*tab
= ipa_is_ip4(gw
) ? c
->igp_table_ip4
: c
->igp_table_ip6
;
930 s
->hostentry
= rt_get_hostentry(tab
, gw
, ll
, c
->c
.table
);
933 rta_apply_hostentry(a
, s
->hostentry
, NULL
);
935 /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
940 bgp_apply_mpls_labels(struct bgp_parse_state
*s
, rta
*a
, u32
*labels
, uint lnum
)
942 if (lnum
> MPLS_MAX_LABEL_STACK
)
944 REPORT("Too many MPLS labels ($u)", lnum
);
946 a
->dest
= RTD_UNREACHABLE
;
948 a
->nh
= (struct nexthop
) { };
952 /* Handle implicit NULL as empty MPLS stack */
953 if ((lnum
== 1) && (labels
[0] == BGP_MPLS_NULL
))
956 if (s
->channel
->cf
->gw_mode
== GW_DIRECT
)
959 memcpy(a
->nh
.label
, labels
, 4*lnum
);
961 else /* GW_RECURSIVE */
966 memcpy(ms
.stack
, labels
, 4*lnum
);
967 rta_apply_hostentry(a
, s
->hostentry
, &ms
);
973 bgp_match_src(struct bgp_export_state
*s
, int mode
)
977 case NH_NO
: return 0;
978 case NH_ALL
: return 1;
979 case NH_IBGP
: return s
->src
&& s
->src
->is_internal
;
980 case NH_EBGP
: return s
->src
&& !s
->src
->is_internal
;
986 bgp_use_next_hop(struct bgp_export_state
*s
, eattr
*a
)
988 struct bgp_proto
*p
= s
->proto
;
989 struct bgp_channel
*c
= s
->channel
;
990 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
992 /* Handle next hop self option */
993 if (c
->cf
->next_hop_self
&& bgp_match_src(s
, c
->cf
->next_hop_self
))
996 /* Handle next hop keep option */
997 if (c
->cf
->next_hop_keep
&& bgp_match_src(s
, c
->cf
->next_hop_keep
))
1000 /* Keep it when explicitly set in export filter */
1001 if (a
->type
& EAF_FRESH
)
1004 /* Check for non-matching AF */
1005 if ((ipa_is_ip4(*nh
) != bgp_channel_is_ipv4(c
)) && !c
->ext_next_hop
)
1008 /* Keep it when exported to internal peers */
1009 if (p
->is_interior
&& ipa_nonzero(*nh
))
1012 /* Keep it when forwarded between single-hop BGPs on the same iface */
1013 struct iface
*ifa
= (s
->src
&& s
->src
->neigh
) ? s
->src
->neigh
->iface
: NULL
;
1014 return p
->neigh
&& (p
->neigh
->iface
== ifa
);
1018 bgp_use_gateway(struct bgp_export_state
*s
)
1020 struct bgp_proto
*p
= s
->proto
;
1021 struct bgp_channel
*c
= s
->channel
;
1022 rta
*ra
= s
->route
->attrs
;
1024 /* Handle next hop self option - also applies to gateway */
1025 if (c
->cf
->next_hop_self
&& bgp_match_src(s
, c
->cf
->next_hop_self
))
1028 /* We need one valid global gateway */
1029 if ((ra
->dest
!= RTD_UNICAST
) || ra
->nh
.next
|| ipa_zero(ra
->nh
.gw
) || ipa_is_link_local(ra
->nh
.gw
))
1032 /* Check for non-matching AF */
1033 if ((ipa_is_ip4(ra
->nh
.gw
) != bgp_channel_is_ipv4(c
)) && !c
->ext_next_hop
)
1036 /* Use it when exported to internal peers */
1040 /* Use it when forwarded to single-hop BGP peer on on the same iface */
1041 return p
->neigh
&& (p
->neigh
->iface
== ra
->nh
.iface
);
1045 bgp_update_next_hop_ip(struct bgp_export_state
*s
, eattr
*a
, ea_list
**to
)
1047 if (!a
|| !bgp_use_next_hop(s
, a
))
1049 if (bgp_use_gateway(s
))
1051 rta
*ra
= s
->route
->attrs
;
1052 ip_addr nh
[1] = { ra
->nh
.gw
};
1053 bgp_set_attr_data(to
, s
->pool
, BA_NEXT_HOP
, 0, nh
, 16);
1057 u32 implicit_null
= BGP_MPLS_NULL
;
1058 u32
*labels
= ra
->nh
.labels
? ra
->nh
.label
: &implicit_null
;
1059 uint lnum
= ra
->nh
.labels
? ra
->nh
.labels
: 1;
1060 bgp_set_attr_data(to
, s
->pool
, BA_MPLS_LABEL_STACK
, 0, labels
, lnum
* 4);
1065 ip_addr nh
[2] = { s
->channel
->next_hop_addr
, s
->channel
->link_addr
};
1066 bgp_set_attr_data(to
, s
->pool
, BA_NEXT_HOP
, 0, nh
, ipa_nonzero(nh
[1]) ? 32 : 16);
1067 s
->local_next_hop
= 1;
1069 /* TODO: Use local MPLS assigned label */
1072 u32 implicit_null
= BGP_MPLS_NULL
;
1073 bgp_set_attr_data(to
, s
->pool
, BA_MPLS_LABEL_STACK
, 0, &implicit_null
, 4);
1078 /* Check if next hop is valid */
1079 a
= bgp_find_attr(*to
, BA_NEXT_HOP
);
1081 WITHDRAW(NO_NEXT_HOP
);
1083 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
1084 ip_addr peer
= s
->proto
->remote_ip
;
1085 uint len
= a
->u
.ptr
->length
;
1087 /* Forbid zero next hop */
1088 if (ipa_zero(nh
[0]) && ((len
!= 32) || ipa_zero(nh
[1])))
1089 WITHDRAW(BAD_NEXT_HOP
);
1091 /* Forbid next hop equal to neighbor IP */
1092 if (ipa_equal(peer
, nh
[0]) || ((len
== 32) && ipa_equal(peer
, nh
[1])))
1093 WITHDRAW(BAD_NEXT_HOP
);
1095 /* Forbid next hop with non-matching AF */
1096 if ((ipa_is_ip4(nh
[0]) != bgp_channel_is_ipv4(s
->channel
)) &&
1097 !s
->channel
->ext_next_hop
)
1098 WITHDRAW(BAD_NEXT_HOP
);
1100 /* Just check if MPLS stack */
1101 if (s
->mpls
&& !bgp_find_attr(*to
, BA_MPLS_LABEL_STACK
))
1102 WITHDRAW(NO_LABEL_STACK
);
1106 bgp_encode_next_hop_ip(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size UNUSED
)
1108 /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
1109 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
1110 uint len
= a
->u
.ptr
->length
;
1112 ASSERT((len
== 16) || (len
== 32));
1115 * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
1116 * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
1117 * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
1118 * IPv6 address with IPv6 NLRI.
1121 if (bgp_channel_is_ipv4(s
->channel
) && ipa_is_ip4(nh
[0]))
1123 put_ip4(buf
, ipa_to_ip4(nh
[0]));
1127 put_ip6(buf
, ipa_to_ip6(nh
[0]));
1130 put_ip6(buf
+16, ipa_to_ip6(nh
[1]));
1136 bgp_decode_next_hop_ip(struct bgp_parse_state
*s
, byte
*data
, uint len
, rta
*a
)
1138 struct bgp_channel
*c
= s
->channel
;
1139 struct adata
*ad
= lp_alloc_adata(s
->pool
, 32);
1140 ip_addr
*nh
= (void *) ad
->data
;
1144 nh
[0] = ipa_from_ip4(get_ip4(data
));
1149 nh
[0] = ipa_from_ip6(get_ip6(data
));
1152 if (ipa_is_link_local(nh
[0]))
1153 { nh
[1] = nh
[0]; nh
[0] = IPA_NONE
; }
1157 nh
[0] = ipa_from_ip6(get_ip6(data
));
1158 nh
[1] = ipa_from_ip6(get_ip6(data
+16));
1160 if (ipa_is_ip4(nh
[0]) || !ip6_is_link_local(nh
[1]))
1164 bgp_parse_error(s
, 9);
1166 if (ipa_zero(nh
[1]))
1169 if ((bgp_channel_is_ipv4(c
) != ipa_is_ip4(nh
[0])) && !c
->ext_next_hop
)
1170 WITHDRAW(BAD_NEXT_HOP
);
1172 // XXXX validate next hop
1174 bgp_set_attr_ptr(&(a
->eattrs
), s
->pool
, BA_NEXT_HOP
, 0, ad
);
1175 bgp_apply_next_hop(s
, a
, nh
[0], nh
[1]);
1179 bgp_encode_next_hop_vpn(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size UNUSED
)
1181 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
1182 uint len
= a
->u
.ptr
->length
;
1184 ASSERT((len
== 16) || (len
== 32));
1187 * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
1188 * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
1189 * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
1190 * IPv6 address with VPNv6 NLRI.
1193 if (bgp_channel_is_ipv4(s
->channel
) && ipa_is_ip4(nh
[0]))
1195 put_u64(buf
, 0); /* VPN RD is 0 */
1196 put_ip4(buf
+8, ipa_to_ip4(nh
[0]));
1200 put_u64(buf
, 0); /* VPN RD is 0 */
1201 put_ip6(buf
+8, ipa_to_ip6(nh
[0]));
1206 put_u64(buf
+24, 0); /* VPN RD is 0 */
1207 put_ip6(buf
+32, ipa_to_ip6(nh
[1]));
1213 bgp_decode_next_hop_vpn(struct bgp_parse_state
*s
, byte
*data
, uint len
, rta
*a
)
1215 struct bgp_channel
*c
= s
->channel
;
1216 struct adata
*ad
= lp_alloc_adata(s
->pool
, 32);
1217 ip_addr
*nh
= (void *) ad
->data
;
1221 nh
[0] = ipa_from_ip4(get_ip4(data
+8));
1226 nh
[0] = ipa_from_ip6(get_ip6(data
+8));
1229 if (ipa_is_link_local(nh
[0]))
1230 { nh
[1] = nh
[0]; nh
[0] = IPA_NONE
; }
1234 nh
[0] = ipa_from_ip6(get_ip6(data
+8));
1235 nh
[1] = ipa_from_ip6(get_ip6(data
+32));
1237 if (ipa_is_ip4(nh
[0]) || !ip6_is_link_local(nh
[1]))
1241 bgp_parse_error(s
, 9);
1243 if (ipa_zero(nh
[1]))
1246 /* XXXX which error */
1247 if ((get_u64(data
) != 0) || ((len
== 48) && (get_u64(data
+24) != 0)))
1248 bgp_parse_error(s
, 9);
1250 if ((bgp_channel_is_ipv4(c
) != ipa_is_ip4(nh
[0])) && !c
->ext_next_hop
)
1251 WITHDRAW(BAD_NEXT_HOP
);
1253 // XXXX validate next hop
1255 bgp_set_attr_ptr(&(a
->eattrs
), s
->pool
, BA_NEXT_HOP
, 0, ad
);
1256 bgp_apply_next_hop(s
, a
, nh
[0], nh
[1]);
1262 bgp_encode_next_hop_none(struct bgp_write_state
*s UNUSED
, eattr
*a UNUSED
, byte
*buf UNUSED
, uint size UNUSED
)
1268 bgp_decode_next_hop_none(struct bgp_parse_state
*s UNUSED
, byte
*data UNUSED
, uint len UNUSED
, rta
*a UNUSED
)
1271 * Although we expect no next hop and RFC 7606 7.11 states that attribute
1272 * MP_REACH_NLRI with unexpected next hop length is considered malformed,
1273 * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
1280 bgp_update_next_hop_none(struct bgp_export_state
*s
, eattr
*a
, ea_list
**to
)
1282 /* NEXT_HOP shall not pass */
1284 bgp_unset_attr(to
, s
->pool
, BA_NEXT_HOP
);
1293 bgp_rte_update(struct bgp_parse_state
*s
, net_addr
*n
, u32 path_id
, rta
*a0
)
1295 if (path_id
!= s
->last_id
)
1297 s
->last_src
= rt_get_source(&s
->proto
->p
, path_id
);
1298 s
->last_id
= path_id
;
1300 rta_free(s
->cached_rta
);
1301 s
->cached_rta
= NULL
;
1306 /* Route withdraw */
1307 rte_update3(&s
->channel
->c
, n
, NULL
, s
->last_src
);
1311 /* Prepare cached route attributes */
1312 if (s
->cached_rta
== NULL
)
1314 a0
->src
= s
->last_src
;
1316 /* Workaround for rta_lookup() breaking eattrs */
1317 ea_list
*ea
= a0
->eattrs
;
1318 s
->cached_rta
= rta_lookup(a0
);
1322 rta
*a
= rta_clone(s
->cached_rta
);
1323 rte
*e
= rte_get_temp(a
);
1326 e
->u
.bgp
.suppressed
= 0;
1327 e
->u
.bgp
.stale
= -1;
1328 rte_update3(&s
->channel
->c
, n
, e
, s
->last_src
);
1332 bgp_encode_mpls_labels(struct bgp_write_state
*s UNUSED
, const adata
*mpls
, byte
**pos
, uint
*size
, byte
*pxlen
)
1334 const u32 dummy
= 0;
1335 const u32
*labels
= mpls
? (const u32
*) mpls
->data
: &dummy
;
1336 uint lnum
= mpls
? (mpls
->length
/ 4) : 1;
1338 for (uint i
= 0; i
< lnum
; i
++)
1340 put_u24(*pos
, labels
[i
] << 4);
1341 ADVANCE(*pos
, *size
, 3);
1344 /* Add bottom-of-stack flag */
1345 (*pos
)[-1] |= BGP_MPLS_BOS
;
1347 *pxlen
+= 24 * lnum
;
1351 bgp_decode_mpls_labels(struct bgp_parse_state
*s
, byte
**pos
, uint
*len
, uint
*pxlen
, rta
*a
)
1353 u32 labels
[BGP_MPLS_MAX
], label
;
1358 bgp_parse_error(s
, 1);
1360 label
= get_u24(*pos
);
1361 labels
[lnum
++] = label
>> 4;
1362 ADVANCE(*pos
, *len
, 3);
1365 /* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but
1366 fixed-size 24-bit Compatibility field, which MUST be ignored */
1367 if (!a
&& !s
->err_withdraw
)
1370 while (!(label
& BGP_MPLS_BOS
));
1375 /* Attach MPLS attribute unless we already have one */
1376 if (!s
->mpls_labels
)
1378 s
->mpls_labels
= lp_alloc_adata(s
->pool
, 4*BGP_MPLS_MAX
);
1379 bgp_set_attr_ptr(&(a
->eattrs
), s
->pool
, BA_MPLS_LABEL_STACK
, 0, s
->mpls_labels
);
1382 /* Overwrite data in the attribute */
1383 s
->mpls_labels
->length
= 4*lnum
;
1384 memcpy(s
->mpls_labels
->data
, labels
, 4*lnum
);
1386 /* Update next hop entry in rta */
1387 bgp_apply_mpls_labels(s
, a
, labels
, lnum
);
1389 /* Attributes were changed, invalidate cached entry */
1390 rta_free(s
->cached_rta
);
1391 s
->cached_rta
= NULL
;
1397 bgp_encode_nlri_ip4(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1401 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= BGP_NLRI_MAX
))
1403 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1404 struct net_addr_ip4
*net
= (void *) px
->net
;
1406 /* Encode path ID */
1409 put_u32(pos
, px
->path_id
);
1410 ADVANCE(pos
, size
, 4);
1413 /* Encode prefix length */
1415 ADVANCE(pos
, size
, 1);
1417 /* Encode MPLS labels */
1419 bgp_encode_mpls_labels(s
, s
->mpls_labels
, &pos
, &size
, pos
- 1);
1421 /* Encode prefix body */
1422 ip4_addr a
= ip4_hton(net
->prefix
);
1423 uint b
= (net
->pxlen
+ 7) / 8;
1425 ADVANCE(pos
, size
, b
);
1427 bgp_free_prefix(s
->channel
, px
);
1434 bgp_decode_nlri_ip4(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1441 /* Decode path ID */
1445 bgp_parse_error(s
, 1);
1447 path_id
= get_u32(pos
);
1448 ADVANCE(pos
, len
, 4);
1451 /* Decode prefix length */
1453 ADVANCE(pos
, len
, 1);
1455 if (len
< ((l
+ 7) / 8))
1456 bgp_parse_error(s
, 1);
1458 /* Decode MPLS labels */
1460 bgp_decode_mpls_labels(s
, &pos
, &len
, &l
, a
);
1462 if (l
> IP4_MAX_PREFIX_LENGTH
)
1463 bgp_parse_error(s
, 10);
1465 /* Decode prefix body */
1466 ip4_addr addr
= IP4_NONE
;
1467 uint b
= (l
+ 7) / 8;
1468 memcpy(&addr
, pos
, b
);
1469 ADVANCE(pos
, len
, b
);
1471 net
= NET_ADDR_IP4(ip4_ntoh(addr
), l
);
1472 net_normalize_ip4(&net
);
1474 // XXXX validate prefix
1476 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1482 bgp_encode_nlri_ip6(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1486 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= BGP_NLRI_MAX
))
1488 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1489 struct net_addr_ip6
*net
= (void *) px
->net
;
1491 /* Encode path ID */
1494 put_u32(pos
, px
->path_id
);
1495 ADVANCE(pos
, size
, 4);
1498 /* Encode prefix length */
1500 ADVANCE(pos
, size
, 1);
1502 /* Encode MPLS labels */
1504 bgp_encode_mpls_labels(s
, s
->mpls_labels
, &pos
, &size
, pos
- 1);
1506 /* Encode prefix body */
1507 ip6_addr a
= ip6_hton(net
->prefix
);
1508 uint b
= (net
->pxlen
+ 7) / 8;
1510 ADVANCE(pos
, size
, b
);
1512 bgp_free_prefix(s
->channel
, px
);
1519 bgp_decode_nlri_ip6(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1526 /* Decode path ID */
1530 bgp_parse_error(s
, 1);
1532 path_id
= get_u32(pos
);
1533 ADVANCE(pos
, len
, 4);
1536 /* Decode prefix length */
1538 ADVANCE(pos
, len
, 1);
1540 if (len
< ((l
+ 7) / 8))
1541 bgp_parse_error(s
, 1);
1543 /* Decode MPLS labels */
1545 bgp_decode_mpls_labels(s
, &pos
, &len
, &l
, a
);
1547 if (l
> IP6_MAX_PREFIX_LENGTH
)
1548 bgp_parse_error(s
, 10);
1550 /* Decode prefix body */
1551 ip6_addr addr
= IP6_NONE
;
1552 uint b
= (l
+ 7) / 8;
1553 memcpy(&addr
, pos
, b
);
1554 ADVANCE(pos
, len
, b
);
1556 net
= NET_ADDR_IP6(ip6_ntoh(addr
), l
);
1557 net_normalize_ip6(&net
);
1559 // XXXX validate prefix
1561 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1566 bgp_encode_nlri_vpn4(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1570 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= BGP_NLRI_MAX
))
1572 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1573 struct net_addr_vpn4
*net
= (void *) px
->net
;
1575 /* Encode path ID */
1578 put_u32(pos
, px
->path_id
);
1579 ADVANCE(pos
, size
, 4);
1582 /* Encode prefix length */
1583 *pos
= 64 + net
->pxlen
;
1584 ADVANCE(pos
, size
, 1);
1586 /* Encode MPLS labels */
1588 bgp_encode_mpls_labels(s
, s
->mpls_labels
, &pos
, &size
, pos
- 1);
1590 /* Encode route distinguisher */
1591 put_u64(pos
, net
->rd
);
1592 ADVANCE(pos
, size
, 8);
1594 /* Encode prefix body */
1595 ip4_addr a
= ip4_hton(net
->prefix
);
1596 uint b
= (net
->pxlen
+ 7) / 8;
1598 ADVANCE(pos
, size
, b
);
1600 bgp_free_prefix(s
->channel
, px
);
1607 bgp_decode_nlri_vpn4(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1614 /* Decode path ID */
1618 bgp_parse_error(s
, 1);
1620 path_id
= get_u32(pos
);
1621 ADVANCE(pos
, len
, 4);
1624 /* Decode prefix length */
1626 ADVANCE(pos
, len
, 1);
1628 if (len
< ((l
+ 7) / 8))
1629 bgp_parse_error(s
, 1);
1631 /* Decode MPLS labels */
1633 bgp_decode_mpls_labels(s
, &pos
, &len
, &l
, a
);
1635 /* Decode route distinguisher */
1637 bgp_parse_error(s
, 1);
1639 u64 rd
= get_u64(pos
);
1640 ADVANCE(pos
, len
, 8);
1643 if (l
> IP4_MAX_PREFIX_LENGTH
)
1644 bgp_parse_error(s
, 10);
1646 /* Decode prefix body */
1647 ip4_addr addr
= IP4_NONE
;
1648 uint b
= (l
+ 7) / 8;
1649 memcpy(&addr
, pos
, b
);
1650 ADVANCE(pos
, len
, b
);
1652 net
= NET_ADDR_VPN4(ip4_ntoh(addr
), l
, rd
);
1653 net_normalize_vpn4(&net
);
1655 // XXXX validate prefix
1657 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1663 bgp_encode_nlri_vpn6(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1667 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= BGP_NLRI_MAX
))
1669 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1670 struct net_addr_vpn6
*net
= (void *) px
->net
;
1672 /* Encode path ID */
1675 put_u32(pos
, px
->path_id
);
1676 ADVANCE(pos
, size
, 4);
1679 /* Encode prefix length */
1680 *pos
= 64 + net
->pxlen
;
1681 ADVANCE(pos
, size
, 1);
1683 /* Encode MPLS labels */
1685 bgp_encode_mpls_labels(s
, s
->mpls_labels
, &pos
, &size
, pos
- 1);
1687 /* Encode route distinguisher */
1688 put_u64(pos
, net
->rd
);
1689 ADVANCE(pos
, size
, 8);
1691 /* Encode prefix body */
1692 ip6_addr a
= ip6_hton(net
->prefix
);
1693 uint b
= (net
->pxlen
+ 7) / 8;
1695 ADVANCE(pos
, size
, b
);
1697 bgp_free_prefix(s
->channel
, px
);
1704 bgp_decode_nlri_vpn6(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1711 /* Decode path ID */
1715 bgp_parse_error(s
, 1);
1717 path_id
= get_u32(pos
);
1718 ADVANCE(pos
, len
, 4);
1721 /* Decode prefix length */
1723 ADVANCE(pos
, len
, 1);
1725 if (len
< ((l
+ 7) / 8))
1726 bgp_parse_error(s
, 1);
1728 /* Decode MPLS labels */
1730 bgp_decode_mpls_labels(s
, &pos
, &len
, &l
, a
);
1732 /* Decode route distinguisher */
1734 bgp_parse_error(s
, 1);
1736 u64 rd
= get_u64(pos
);
1737 ADVANCE(pos
, len
, 8);
1740 if (l
> IP6_MAX_PREFIX_LENGTH
)
1741 bgp_parse_error(s
, 10);
1743 /* Decode prefix body */
1744 ip6_addr addr
= IP6_NONE
;
1745 uint b
= (l
+ 7) / 8;
1746 memcpy(&addr
, pos
, b
);
1747 ADVANCE(pos
, len
, b
);
1749 net
= NET_ADDR_VPN6(ip6_ntoh(addr
), l
, rd
);
1750 net_normalize_vpn6(&net
);
1752 // XXXX validate prefix
1754 bgp_rte_update(s
, (net_addr
*) &net
, path_id
, a
);
1760 bgp_encode_nlri_flow4(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1764 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= 4))
1766 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1767 struct net_addr_flow4
*net
= (void *) px
->net
;
1768 uint flen
= net
->length
- sizeof(net_addr_flow4
);
1770 /* Encode path ID */
1773 put_u32(pos
, px
->path_id
);
1774 ADVANCE(pos
, size
, 4);
1780 /* Copy whole flow data including length */
1781 memcpy(pos
, net
->data
, flen
);
1782 ADVANCE(pos
, size
, flen
);
1784 bgp_free_prefix(s
->channel
, px
);
1791 bgp_decode_nlri_flow4(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1797 /* Decode path ID */
1801 bgp_parse_error(s
, 1);
1803 path_id
= get_u32(pos
);
1804 ADVANCE(pos
, len
, 4);
1808 bgp_parse_error(s
, 1);
1810 /* Decode flow length */
1811 uint hlen
= flow_hdr_length(pos
);
1812 uint dlen
= flow_read_length(pos
);
1813 uint flen
= hlen
+ dlen
;
1814 byte
*data
= pos
+ hlen
;
1817 bgp_parse_error(s
, 1);
1819 /* Validate flow data */
1820 enum flow_validated_state r
= flow4_validate(data
, dlen
);
1821 if (r
!= FLOW_ST_VALID
)
1823 log(L_REMOTE
"%s: Invalid flow route: %s", s
->proto
->p
.name
, flow_validated_state_str(r
));
1824 bgp_parse_error(s
, 1);
1827 if (data
[0] != FLOW_TYPE_DST_PREFIX
)
1829 log(L_REMOTE
"%s: No dst prefix at first pos", s
->proto
->p
.name
);
1830 bgp_parse_error(s
, 1);
1833 /* Decode dst prefix */
1834 ip4_addr px
= IP4_NONE
;
1835 uint pxlen
= data
[1];
1837 // FIXME: Use some generic function
1838 memcpy(&px
, data
+2, BYTES(pxlen
));
1839 px
= ip4_and(ip4_ntoh(px
), ip4_mkmask(pxlen
));
1841 /* Prepare the flow */
1842 net_addr
*n
= alloca(sizeof(struct net_addr_flow4
) + flen
);
1843 net_fill_flow4(n
, px
, pxlen
, pos
, flen
);
1844 ADVANCE(pos
, len
, flen
);
1846 bgp_rte_update(s
, n
, path_id
, a
);
1852 bgp_encode_nlri_flow6(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, uint size
)
1856 while (!EMPTY_LIST(buck
->prefixes
) && (size
>= 4))
1858 struct bgp_prefix
*px
= HEAD(buck
->prefixes
);
1859 struct net_addr_flow6
*net
= (void *) px
->net
;
1860 uint flen
= net
->length
- sizeof(net_addr_flow6
);
1862 /* Encode path ID */
1865 put_u32(pos
, px
->path_id
);
1866 ADVANCE(pos
, size
, 4);
1872 /* Copy whole flow data including length */
1873 memcpy(pos
, net
->data
, flen
);
1874 ADVANCE(pos
, size
, flen
);
1876 bgp_free_prefix(s
->channel
, px
);
1883 bgp_decode_nlri_flow6(struct bgp_parse_state
*s
, byte
*pos
, uint len
, rta
*a
)
1889 /* Decode path ID */
1893 bgp_parse_error(s
, 1);
1895 path_id
= get_u32(pos
);
1896 ADVANCE(pos
, len
, 4);
1900 bgp_parse_error(s
, 1);
1902 /* Decode flow length */
1903 uint hlen
= flow_hdr_length(pos
);
1904 uint dlen
= flow_read_length(pos
);
1905 uint flen
= hlen
+ dlen
;
1906 byte
*data
= pos
+ hlen
;
1909 bgp_parse_error(s
, 1);
1911 /* Validate flow data */
1912 enum flow_validated_state r
= flow6_validate(data
, dlen
);
1913 if (r
!= FLOW_ST_VALID
)
1915 log(L_REMOTE
"%s: Invalid flow route: %s", s
->proto
->p
.name
, flow_validated_state_str(r
));
1916 bgp_parse_error(s
, 1);
1919 if (data
[0] != FLOW_TYPE_DST_PREFIX
)
1921 log(L_REMOTE
"%s: No dst prefix at first pos", s
->proto
->p
.name
);
1922 bgp_parse_error(s
, 1);
1925 /* Decode dst prefix */
1926 ip6_addr px
= IP6_NONE
;
1927 uint pxlen
= data
[1];
1929 // FIXME: Use some generic function
1930 memcpy(&px
, data
+2, BYTES(pxlen
));
1931 px
= ip6_and(ip6_ntoh(px
), ip6_mkmask(pxlen
));
1933 /* Prepare the flow */
1934 net_addr
*n
= alloca(sizeof(struct net_addr_flow6
) + flen
);
1935 net_fill_flow6(n
, px
, pxlen
, pos
, flen
);
1936 ADVANCE(pos
, len
, flen
);
1938 bgp_rte_update(s
, n
, path_id
, a
);
1943 static const struct bgp_af_desc bgp_af_table
[] = {
1948 .encode_nlri
= bgp_encode_nlri_ip4
,
1949 .decode_nlri
= bgp_decode_nlri_ip4
,
1950 .encode_next_hop
= bgp_encode_next_hop_ip
,
1951 .decode_next_hop
= bgp_decode_next_hop_ip
,
1952 .update_next_hop
= bgp_update_next_hop_ip
,
1955 .afi
= BGP_AF_IPV4_MC
,
1958 .encode_nlri
= bgp_encode_nlri_ip4
,
1959 .decode_nlri
= bgp_decode_nlri_ip4
,
1960 .encode_next_hop
= bgp_encode_next_hop_ip
,
1961 .decode_next_hop
= bgp_decode_next_hop_ip
,
1962 .update_next_hop
= bgp_update_next_hop_ip
,
1965 .afi
= BGP_AF_IPV4_MPLS
,
1968 .name
= "ipv4-mpls",
1969 .encode_nlri
= bgp_encode_nlri_ip4
,
1970 .decode_nlri
= bgp_decode_nlri_ip4
,
1971 .encode_next_hop
= bgp_encode_next_hop_ip
,
1972 .decode_next_hop
= bgp_decode_next_hop_ip
,
1973 .update_next_hop
= bgp_update_next_hop_ip
,
1979 .encode_nlri
= bgp_encode_nlri_ip6
,
1980 .decode_nlri
= bgp_decode_nlri_ip6
,
1981 .encode_next_hop
= bgp_encode_next_hop_ip
,
1982 .decode_next_hop
= bgp_decode_next_hop_ip
,
1983 .update_next_hop
= bgp_update_next_hop_ip
,
1986 .afi
= BGP_AF_IPV6_MC
,
1989 .encode_nlri
= bgp_encode_nlri_ip6
,
1990 .decode_nlri
= bgp_decode_nlri_ip6
,
1991 .encode_next_hop
= bgp_encode_next_hop_ip
,
1992 .decode_next_hop
= bgp_decode_next_hop_ip
,
1993 .update_next_hop
= bgp_update_next_hop_ip
,
1996 .afi
= BGP_AF_IPV6_MPLS
,
1999 .name
= "ipv6-mpls",
2000 .encode_nlri
= bgp_encode_nlri_ip6
,
2001 .decode_nlri
= bgp_decode_nlri_ip6
,
2002 .encode_next_hop
= bgp_encode_next_hop_ip
,
2003 .decode_next_hop
= bgp_decode_next_hop_ip
,
2004 .update_next_hop
= bgp_update_next_hop_ip
,
2007 .afi
= BGP_AF_VPN4_MPLS
,
2010 .name
= "vpn4-mpls",
2011 .encode_nlri
= bgp_encode_nlri_vpn4
,
2012 .decode_nlri
= bgp_decode_nlri_vpn4
,
2013 .encode_next_hop
= bgp_encode_next_hop_vpn
,
2014 .decode_next_hop
= bgp_decode_next_hop_vpn
,
2015 .update_next_hop
= bgp_update_next_hop_ip
,
2018 .afi
= BGP_AF_VPN6_MPLS
,
2021 .name
= "vpn6-mpls",
2022 .encode_nlri
= bgp_encode_nlri_vpn6
,
2023 .decode_nlri
= bgp_decode_nlri_vpn6
,
2024 .encode_next_hop
= bgp_encode_next_hop_vpn
,
2025 .decode_next_hop
= bgp_decode_next_hop_vpn
,
2026 .update_next_hop
= bgp_update_next_hop_ip
,
2029 .afi
= BGP_AF_VPN4_MC
,
2032 .encode_nlri
= bgp_encode_nlri_vpn4
,
2033 .decode_nlri
= bgp_decode_nlri_vpn4
,
2034 .encode_next_hop
= bgp_encode_next_hop_vpn
,
2035 .decode_next_hop
= bgp_decode_next_hop_vpn
,
2036 .update_next_hop
= bgp_update_next_hop_ip
,
2039 .afi
= BGP_AF_VPN6_MC
,
2042 .encode_nlri
= bgp_encode_nlri_vpn6
,
2043 .decode_nlri
= bgp_decode_nlri_vpn6
,
2044 .encode_next_hop
= bgp_encode_next_hop_vpn
,
2045 .decode_next_hop
= bgp_decode_next_hop_vpn
,
2046 .update_next_hop
= bgp_update_next_hop_ip
,
2049 .afi
= BGP_AF_FLOW4
,
2053 .encode_nlri
= bgp_encode_nlri_flow4
,
2054 .decode_nlri
= bgp_decode_nlri_flow4
,
2055 .encode_next_hop
= bgp_encode_next_hop_none
,
2056 .decode_next_hop
= bgp_decode_next_hop_none
,
2057 .update_next_hop
= bgp_update_next_hop_none
,
2060 .afi
= BGP_AF_FLOW6
,
2064 .encode_nlri
= bgp_encode_nlri_flow6
,
2065 .decode_nlri
= bgp_decode_nlri_flow6
,
2066 .encode_next_hop
= bgp_encode_next_hop_none
,
2067 .decode_next_hop
= bgp_decode_next_hop_none
,
2068 .update_next_hop
= bgp_update_next_hop_none
,
2072 const struct bgp_af_desc
*
2073 bgp_get_af_desc(u32 afi
)
2076 for (i
= 0; i
< ARRAY_SIZE(bgp_af_table
); i
++)
2077 if (bgp_af_table
[i
].afi
== afi
)
2078 return &bgp_af_table
[i
];
2084 bgp_encode_nlri(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
2086 return s
->channel
->desc
->encode_nlri(s
, buck
, buf
, end
- buf
);
2090 bgp_encode_next_hop(struct bgp_write_state
*s
, eattr
*nh
, byte
*buf
)
2092 return s
->channel
->desc
->encode_next_hop(s
, nh
, buf
, 255);
2096 bgp_update_next_hop(struct bgp_export_state
*s
, eattr
*a
, ea_list
**to
)
2098 s
->channel
->desc
->update_next_hop(s
, a
, to
);
2101 #define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
2104 bgp_create_ip_reach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
2107 * 2 B Withdrawn Routes Length (zero)
2108 * --- IPv4 Withdrawn Routes NLRI (unused)
2109 * 2 B Total Path Attribute Length
2110 * var Path Attributes
2111 * var IPv4 Network Layer Reachability Information
2116 la
= bgp_encode_attrs(s
, buck
->eattrs
, buf
+4, buf
+ MAX_ATTRS_LENGTH
);
2119 /* Attribute list too long */
2120 bgp_withdraw_bucket(s
->channel
, buck
);
2127 lr
= bgp_encode_nlri(s
, buck
, buf
+4+la
, end
);
2133 bgp_create_mp_reach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
2136 * 2 B IPv4 Withdrawn Routes Length (zero)
2137 * --- IPv4 Withdrawn Routes NLRI (unused)
2138 * 2 B Total Path Attribute Length
2139 * 1 B MP_REACH_NLRI hdr - Attribute Flags
2140 * 1 B MP_REACH_NLRI hdr - Attribute Type Code
2141 * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
2142 * 2 B MP_REACH_NLRI data - Address Family Identifier
2143 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
2144 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
2145 * var MP_REACH_NLRI data - Network Address of Next Hop
2146 * 1 B MP_REACH_NLRI data - Reserved (zero)
2147 * var MP_REACH_NLRI data - Network Layer Reachability Information
2148 * var Rest of Path Attributes
2149 * --- IPv4 Network Layer Reachability Information (unused)
2152 int lh
, lr
, la
; /* Lengths of next hop, NLRI and attributes */
2154 /* Begin of MP_REACH_NLRI atribute */
2155 buf
[4] = BAF_OPTIONAL
| BAF_EXT_LEN
;
2156 buf
[5] = BA_MP_REACH_NLRI
;
2157 put_u16(buf
+6, 0); /* Will be fixed later */
2158 put_af3(buf
+8, s
->channel
->afi
);
2161 /* Encode attributes to temporary buffer */
2162 byte
*abuf
= alloca(MAX_ATTRS_LENGTH
);
2163 la
= bgp_encode_attrs(s
, buck
->eattrs
, abuf
, abuf
+ MAX_ATTRS_LENGTH
);
2166 /* Attribute list too long */
2167 bgp_withdraw_bucket(s
->channel
, buck
);
2171 /* Encode the next hop */
2172 lh
= bgp_encode_next_hop(s
, s
->mp_next_hop
, pos
+1);
2176 /* Reserved field */
2179 /* Encode the NLRI */
2180 lr
= bgp_encode_nlri(s
, buck
, pos
, end
- la
);
2183 /* End of MP_REACH_NLRI atribute, update data length */
2184 put_u16(buf
+6, pos
-buf
-8);
2186 /* Copy remaining attributes */
2187 memcpy(pos
, abuf
, la
);
2190 /* Initial UPDATE fields */
2192 put_u16(buf
+2, pos
-buf
-4);
2197 #undef MAX_ATTRS_LENGTH
2200 bgp_create_ip_unreach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
2203 * 2 B Withdrawn Routes Length
2204 * var IPv4 Withdrawn Routes NLRI
2205 * 2 B Total Path Attribute Length (zero)
2206 * --- Path Attributes (unused)
2207 * --- IPv4 Network Layer Reachability Information (unused)
2210 uint len
= bgp_encode_nlri(s
, buck
, buf
+2, end
);
2212 put_u16(buf
+0, len
);
2213 put_u16(buf
+2+len
, 0);
2219 bgp_create_mp_unreach(struct bgp_write_state
*s
, struct bgp_bucket
*buck
, byte
*buf
, byte
*end
)
2222 * 2 B Withdrawn Routes Length (zero)
2223 * --- IPv4 Withdrawn Routes NLRI (unused)
2224 * 2 B Total Path Attribute Length
2225 * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
2226 * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
2227 * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
2228 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
2229 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
2230 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
2231 * --- IPv4 Network Layer Reachability Information (unused)
2234 uint len
= bgp_encode_nlri(s
, buck
, buf
+11, end
);
2237 put_u16(buf
+2, 7+len
);
2239 /* Begin of MP_UNREACH_NLRI atribute */
2240 buf
[4] = BAF_OPTIONAL
| BAF_EXT_LEN
;
2241 buf
[5] = BA_MP_UNREACH_NLRI
;
2242 put_u16(buf
+6, 3+len
);
2243 put_af3(buf
+8, s
->channel
->afi
);
2249 bgp_create_update(struct bgp_channel
*c
, byte
*buf
)
2251 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2252 struct bgp_bucket
*buck
;
2253 byte
*end
= buf
+ (bgp_max_packet_length(p
->conn
) - BGP_HEADER_LENGTH
);
2258 /* Initialize write state */
2259 struct bgp_write_state s
= {
2262 .pool
= bgp_linpool
,
2263 .mp_reach
= (c
->afi
!= BGP_AF_IPV4
) || c
->ext_next_hop
,
2264 .as4_session
= p
->as4_session
,
2265 .add_path
= c
->add_path_tx
,
2266 .mpls
= c
->desc
->mpls
,
2269 /* Try unreachable bucket */
2270 if ((buck
= c
->withdraw_bucket
) && !EMPTY_LIST(buck
->prefixes
))
2272 res
= (c
->afi
== BGP_AF_IPV4
) && !c
->ext_next_hop
?
2273 bgp_create_ip_unreach(&s
, buck
, buf
, end
):
2274 bgp_create_mp_unreach(&s
, buck
, buf
, end
);
2279 /* Try reachable buckets */
2280 if (!EMPTY_LIST(c
->bucket_queue
))
2282 buck
= HEAD(c
->bucket_queue
);
2284 /* Cleanup empty buckets */
2285 if (EMPTY_LIST(buck
->prefixes
))
2287 bgp_free_bucket(c
, buck
);
2292 bgp_create_ip_reach(&s
, buck
, buf
, end
):
2293 bgp_create_mp_reach(&s
, buck
, buf
, end
);
2295 if (EMPTY_LIST(buck
->prefixes
))
2296 bgp_free_bucket(c
, buck
);
2298 bgp_defer_bucket(c
, buck
);
2306 /* No more prefixes to send */
2310 BGP_TRACE_RL(&rl_snd_update
, D_PACKETS
, "Sending UPDATE");
2317 bgp_create_ip_end_mark(struct bgp_channel
*c UNUSED
, byte
*buf
)
2319 /* Empty update packet */
2326 bgp_create_mp_end_mark(struct bgp_channel
*c
, byte
*buf
)
2329 put_u16(buf
+2, 6); /* length 4--9 */
2331 /* Empty MP_UNREACH_NLRI atribute */
2332 buf
[4] = BAF_OPTIONAL
;
2333 buf
[5] = BA_MP_UNREACH_NLRI
;
2334 buf
[6] = 3; /* Length 7--9 */
2335 put_af3(buf
+7, c
->afi
);
2341 bgp_create_end_mark(struct bgp_channel
*c
, byte
*buf
)
2343 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2345 BGP_TRACE(D_PACKETS
, "Sending END-OF-RIB");
2347 return (c
->afi
== BGP_AF_IPV4
) ?
2348 bgp_create_ip_end_mark(c
, buf
):
2349 bgp_create_mp_end_mark(c
, buf
);
2353 bgp_rx_end_mark(struct bgp_parse_state
*s
, u32 afi
)
2355 struct bgp_proto
*p
= s
->proto
;
2356 struct bgp_channel
*c
= bgp_get_channel(p
, afi
);
2358 BGP_TRACE(D_PACKETS
, "Got END-OF-RIB");
2361 DISCARD(BAD_AFI
, BGP_AFI(afi
), BGP_SAFI(afi
));
2363 if (c
->load_state
== BFS_LOADING
)
2364 c
->load_state
= BFS_NONE
;
2366 if (p
->p
.gr_recovery
)
2367 channel_graceful_restart_unlock(&c
->c
);
2370 bgp_graceful_restart_done(c
);
2374 bgp_decode_nlri(struct bgp_parse_state
*s
, u32 afi
, byte
*nlri
, uint len
, ea_list
*ea
, byte
*nh
, uint nh_len
)
2376 struct bgp_channel
*c
= bgp_get_channel(s
->proto
, afi
);
2380 DISCARD(BAD_AFI
, BGP_AFI(afi
), BGP_SAFI(afi
));
2383 s
->add_path
= c
->add_path_rx
;
2384 s
->mpls
= c
->desc
->mpls
;
2387 s
->last_src
= s
->proto
->p
.main_source
;
2390 * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
2391 * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
2392 * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
2393 * decode_next_hop hooks) by restoring a->eattrs afterwards.
2398 a
= allocz(RTA_MAX_SIZE
);
2400 a
->source
= RTS_BGP
;
2401 a
->scope
= SCOPE_UNIVERSE
;
2402 a
->from
= s
->proto
->remote_ip
;
2405 c
->desc
->decode_next_hop(s
, nh
, nh_len
, a
);
2406 bgp_finish_attrs(s
, a
);
2408 /* Handle withdraw during next hop decoding */
2409 if (s
->err_withdraw
)
2413 c
->desc
->decode_nlri(s
, nlri
, len
, a
);
2415 rta_free(s
->cached_rta
);
2416 s
->cached_rta
= NULL
;
2420 bgp_rx_update(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
2422 struct bgp_proto
*p
= conn
->bgp
;
2425 BGP_TRACE_RL(&rl_rcv_update
, D_PACKETS
, "Got UPDATE");
2427 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
2428 if (conn
->state
== BS_OPENCONFIRM
)
2429 bgp_conn_enter_established_state(conn
);
2431 if (conn
->state
!= BS_ESTABLISHED
)
2432 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
2434 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
2436 /* Initialize parse state */
2437 struct bgp_parse_state s
= {
2439 .pool
= bgp_linpool
,
2440 .as4_session
= p
->as4_session
,
2443 /* Parse error handler */
2444 if (setjmp(s
.err_jmpbuf
))
2446 bgp_error(conn
, 3, s
.err_subcode
, NULL
, 0);
2450 /* Check minimal length */
2452 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
2454 /* Skip fixed header */
2458 * UPDATE message format
2460 * 2 B IPv4 Withdrawn Routes Length
2461 * var IPv4 Withdrawn Routes NLRI
2462 * 2 B Total Path Attribute Length
2463 * var Path Attributes
2464 * var IPv4 Reachable Routes NLRI
2467 s
.ip_unreach_len
= get_u16(pkt
+ pos
);
2468 s
.ip_unreach_nlri
= pkt
+ pos
+ 2;
2469 pos
+= 2 + s
.ip_unreach_len
;
2472 bgp_parse_error(&s
, 1);
2474 s
.attr_len
= get_u16(pkt
+ pos
);
2475 s
.attrs
= pkt
+ pos
+ 2;
2476 pos
+= 2 + s
.attr_len
;
2479 bgp_parse_error(&s
, 1);
2481 s
.ip_reach_len
= len
- pos
;
2482 s
.ip_reach_nlri
= pkt
+ pos
;
2486 ea
= bgp_decode_attrs(&s
, s
.attrs
, s
.attr_len
);
2490 /* Check for End-of-RIB marker */
2491 if (!s
.attr_len
&& !s
.ip_unreach_len
&& !s
.ip_reach_len
)
2492 { bgp_rx_end_mark(&s
, BGP_AF_IPV4
); goto done
; }
2494 /* Check for MP End-of-RIB marker */
2495 if ((s
.attr_len
< 8) && !s
.ip_unreach_len
&& !s
.ip_reach_len
&&
2496 !s
.mp_reach_len
&& !s
.mp_unreach_len
&& s
.mp_unreach_af
)
2497 { bgp_rx_end_mark(&s
, s
.mp_unreach_af
); goto done
; }
2499 if (s
.ip_unreach_len
)
2500 bgp_decode_nlri(&s
, BGP_AF_IPV4
, s
.ip_unreach_nlri
, s
.ip_unreach_len
, NULL
, NULL
, 0);
2502 if (s
.mp_unreach_len
)
2503 bgp_decode_nlri(&s
, s
.mp_unreach_af
, s
.mp_unreach_nlri
, s
.mp_unreach_len
, NULL
, NULL
, 0);
2506 bgp_decode_nlri(&s
, BGP_AF_IPV4
, s
.ip_reach_nlri
, s
.ip_reach_len
,
2507 ea
, s
.ip_next_hop_data
, s
.ip_next_hop_len
);
2510 bgp_decode_nlri(&s
, s
.mp_reach_af
, s
.mp_reach_nlri
, s
.mp_reach_len
,
2511 ea
, s
.mp_next_hop_data
, s
.mp_next_hop_len
);
2514 rta_free(s
.cached_rta
);
2520 bgp_find_update_afi(byte
*pos
, uint len
)
2523 * This is stripped-down version of bgp_rx_update(), bgp_decode_attrs() and
2524 * bgp_decode_mp_[un]reach_nlri() used by MRT code in order to find out which
2525 * AFI/SAFI is associated with incoming UPDATE. Returns 0 for framing errors.
2530 /* Assume there is no withrawn NLRI, read lengths and move to attribute list */
2531 uint wlen
= get_u16(pos
+ 19);
2532 uint alen
= get_u16(pos
+ 21);
2533 ADVANCE(pos
, len
, 23);
2535 /* Either non-zero withdrawn NLRI, non-zero reachable NLRI, or IPv4 End-of-RIB */
2536 if ((wlen
!= 0) || (alen
< len
) || !alen
)
2542 /* Process attribute list (alen == len) */
2548 uint flags
= pos
[0];
2550 ADVANCE(pos
, len
, 2);
2552 uint ll
= !(flags
& BAF_EXT_LEN
) ? 1 : 2;
2556 /* Read attribute length and move to attribute body */
2557 alen
= (ll
== 1) ? get_u8(pos
) : get_u16(pos
);
2558 ADVANCE(pos
, len
, ll
);
2564 if ((code
== BA_MP_REACH_NLRI
) || (code
== BA_MP_UNREACH_NLRI
))
2569 return BGP_AF(get_u16(pos
), pos
[2]);
2572 /* Move to the next attribute */
2573 ADVANCE(pos
, len
, alen
);
2576 /* No basic or MP NLRI, but there are some attributes -> error */
2585 static inline byte
*
2586 bgp_create_route_refresh(struct bgp_channel
*c
, byte
*buf
)
2588 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2590 BGP_TRACE(D_PACKETS
, "Sending ROUTE-REFRESH");
2592 /* Original route refresh request, RFC 2918 */
2593 put_af4(buf
, c
->afi
);
2594 buf
[2] = BGP_RR_REQUEST
;
2599 static inline byte
*
2600 bgp_create_begin_refresh(struct bgp_channel
*c
, byte
*buf
)
2602 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2604 BGP_TRACE(D_PACKETS
, "Sending BEGIN-OF-RR");
2606 /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
2607 put_af4(buf
, c
->afi
);
2608 buf
[2] = BGP_RR_BEGIN
;
2613 static inline byte
*
2614 bgp_create_end_refresh(struct bgp_channel
*c
, byte
*buf
)
2616 struct bgp_proto
*p
= (void *) c
->c
.proto
;
2618 BGP_TRACE(D_PACKETS
, "Sending END-OF-RR");
2620 /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
2621 put_af4(buf
, c
->afi
);
2622 buf
[2] = BGP_RR_END
;
2628 bgp_rx_route_refresh(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
2630 struct bgp_proto
*p
= conn
->bgp
;
2632 if (conn
->state
!= BS_ESTABLISHED
)
2633 { bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0); return; }
2635 if (!conn
->local_caps
->route_refresh
)
2636 { bgp_error(conn
, 1, 3, pkt
+18, 1); return; }
2638 if (len
< (BGP_HEADER_LENGTH
+ 4))
2639 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
2641 if (len
> (BGP_HEADER_LENGTH
+ 4))
2642 { bgp_error(conn
, 7, 1, pkt
, MIN(len
, 2048)); return; }
2644 struct bgp_channel
*c
= bgp_get_channel(p
, get_af4(pkt
+19));
2647 log(L_WARN
"%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
2648 p
->p
.name
, pkt
[21], get_u16(pkt
+19), pkt
[22]);
2652 /* RFC 7313 redefined reserved field as RR message subtype */
2653 uint subtype
= p
->enhanced_refresh
? pkt
[21] : BGP_RR_REQUEST
;
2657 case BGP_RR_REQUEST
:
2658 BGP_TRACE(D_PACKETS
, "Got ROUTE-REFRESH");
2659 channel_request_feeding(&c
->c
);
2663 BGP_TRACE(D_PACKETS
, "Got BEGIN-OF-RR");
2664 bgp_refresh_begin(c
);
2668 BGP_TRACE(D_PACKETS
, "Got END-OF-RR");
2673 log(L_WARN
"%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
2674 p
->p
.name
, subtype
);
2679 static inline struct bgp_channel
*
2680 bgp_get_channel_to_send(struct bgp_proto
*p
, struct bgp_conn
*conn
)
2682 uint i
= conn
->last_channel
;
2684 /* Try the last channel, but at most several times */
2685 if ((conn
->channels_to_send
& (1 << i
)) &&
2686 (conn
->last_channel_count
< 16))
2689 /* Find channel with non-zero channels_to_send */
2693 if (i
>= p
->channel_count
)
2696 while (! (conn
->channels_to_send
& (1 << i
)));
2698 /* Use that channel */
2699 conn
->last_channel
= i
;
2700 conn
->last_channel_count
= 0;
2703 conn
->last_channel_count
++;
2704 return p
->channel_map
[i
];
2708 bgp_send(struct bgp_conn
*conn
, uint type
, uint len
)
2710 sock
*sk
= conn
->sk
;
2711 byte
*buf
= sk
->tbuf
;
2713 memset(buf
, 0xff, 16); /* Marker */
2714 put_u16(buf
+16, len
);
2717 return sk_send(sk
, len
);
2721 * bgp_fire_tx - transmit packets
2724 * Whenever the transmit buffers of the underlying TCP connection
2725 * are free and we have any packets queued for sending, the socket functions
2726 * call bgp_fire_tx() which takes care of selecting the highest priority packet
2727 * queued (Notification > Keepalive > Open > Update), assembling its header
2728 * and body and sending it to the connection.
2731 bgp_fire_tx(struct bgp_conn
*conn
)
2733 struct bgp_proto
*p
= conn
->bgp
;
2734 struct bgp_channel
*c
;
2735 byte
*buf
, *pkt
, *end
;
2741 buf
= conn
->sk
->tbuf
;
2742 pkt
= buf
+ BGP_HEADER_LENGTH
;
2743 s
= conn
->packets_to_send
;
2745 if (s
& (1 << PKT_SCHEDULE_CLOSE
))
2747 /* We can finally close connection and enter idle state */
2748 bgp_conn_enter_idle_state(conn
);
2751 if (s
& (1 << PKT_NOTIFICATION
))
2753 conn
->packets_to_send
= 1 << PKT_SCHEDULE_CLOSE
;
2754 end
= bgp_create_notification(conn
, pkt
);
2755 return bgp_send(conn
, PKT_NOTIFICATION
, end
- buf
);
2757 else if (s
& (1 << PKT_OPEN
))
2759 conn
->packets_to_send
&= ~(1 << PKT_OPEN
);
2760 end
= bgp_create_open(conn
, pkt
);
2761 return bgp_send(conn
, PKT_OPEN
, end
- buf
);
2763 else if (s
& (1 << PKT_KEEPALIVE
))
2765 conn
->packets_to_send
&= ~(1 << PKT_KEEPALIVE
);
2766 BGP_TRACE(D_PACKETS
, "Sending KEEPALIVE");
2767 bgp_start_timer(conn
->keepalive_timer
, conn
->keepalive_time
);
2768 return bgp_send(conn
, PKT_KEEPALIVE
, BGP_HEADER_LENGTH
);
2770 else while (conn
->channels_to_send
)
2772 c
= bgp_get_channel_to_send(p
, conn
);
2773 s
= c
->packets_to_send
;
2775 if (s
& (1 << PKT_ROUTE_REFRESH
))
2777 c
->packets_to_send
&= ~(1 << PKT_ROUTE_REFRESH
);
2778 end
= bgp_create_route_refresh(c
, pkt
);
2779 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
2781 else if (s
& (1 << PKT_BEGIN_REFRESH
))
2783 /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
2784 c
->packets_to_send
&= ~(1 << PKT_BEGIN_REFRESH
);
2785 end
= bgp_create_begin_refresh(c
, pkt
);
2786 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
2788 else if (s
& (1 << PKT_UPDATE
))
2790 end
= bgp_create_update(c
, pkt
);
2792 return bgp_send(conn
, PKT_UPDATE
, end
- buf
);
2794 /* No update to send, perhaps we need to send End-of-RIB or EoRR */
2795 c
->packets_to_send
= 0;
2796 conn
->channels_to_send
&= ~(1 << c
->index
);
2798 if (c
->feed_state
== BFS_LOADED
)
2800 c
->feed_state
= BFS_NONE
;
2801 end
= bgp_create_end_mark(c
, pkt
);
2802 return bgp_send(conn
, PKT_UPDATE
, end
- buf
);
2805 else if (c
->feed_state
== BFS_REFRESHED
)
2807 c
->feed_state
= BFS_NONE
;
2808 end
= bgp_create_end_refresh(c
, pkt
);
2809 return bgp_send(conn
, PKT_ROUTE_REFRESH
, end
- buf
);
2813 bug("Channel packets_to_send: %x", s
);
2815 c
->packets_to_send
= 0;
2816 conn
->channels_to_send
&= ~(1 << c
->index
);
2823 * bgp_schedule_packet - schedule a packet for transmission
2826 * @type: packet type
2828 * Schedule a packet of type @type to be sent as soon as possible.
2831 bgp_schedule_packet(struct bgp_conn
*conn
, struct bgp_channel
*c
, int type
)
2835 DBG("BGP: Scheduling packet type %d\n", type
);
2839 if (! conn
->channels_to_send
)
2841 conn
->last_channel
= c
->index
;
2842 conn
->last_channel_count
= 0;
2845 c
->packets_to_send
|= 1 << type
;
2846 conn
->channels_to_send
|= 1 << c
->index
;
2849 conn
->packets_to_send
|= 1 << type
;
2851 if ((conn
->sk
->tpos
== conn
->sk
->tbuf
) && !ev_active(conn
->tx_ev
))
2852 ev_schedule(conn
->tx_ev
);
2855 bgp_kick_tx(void *vconn
)
2857 struct bgp_conn
*conn
= vconn
;
2859 DBG("BGP: kicking TX\n");
2861 while (--max
&& (bgp_fire_tx(conn
) > 0))
2864 if (!max
&& !ev_active(conn
->tx_ev
))
2865 ev_schedule(conn
->tx_ev
);
2871 struct bgp_conn
*conn
= sk
->data
;
2873 DBG("BGP: TX hook\n");
2875 while (--max
&& (bgp_fire_tx(conn
) > 0))
2878 if (!max
&& !ev_active(conn
->tx_ev
))
2879 ev_schedule(conn
->tx_ev
);
2886 } bgp_msg_table
[] = {
2887 { 1, 0, "Invalid message header" },
2888 { 1, 1, "Connection not synchronized" },
2889 { 1, 2, "Bad message length" },
2890 { 1, 3, "Bad message type" },
2891 { 2, 0, "Invalid OPEN message" },
2892 { 2, 1, "Unsupported version number" },
2893 { 2, 2, "Bad peer AS" },
2894 { 2, 3, "Bad BGP identifier" },
2895 { 2, 4, "Unsupported optional parameter" },
2896 { 2, 5, "Authentication failure" },
2897 { 2, 6, "Unacceptable hold time" },
2898 { 2, 7, "Required capability missing" }, /* [RFC5492] */
2899 { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
2900 { 3, 0, "Invalid UPDATE message" },
2901 { 3, 1, "Malformed attribute list" },
2902 { 3, 2, "Unrecognized well-known attribute" },
2903 { 3, 3, "Missing mandatory attribute" },
2904 { 3, 4, "Invalid attribute flags" },
2905 { 3, 5, "Invalid attribute length" },
2906 { 3, 6, "Invalid ORIGIN attribute" },
2907 { 3, 7, "AS routing loop" }, /* Deprecated */
2908 { 3, 8, "Invalid NEXT_HOP attribute" },
2909 { 3, 9, "Optional attribute error" },
2910 { 3, 10, "Invalid network field" },
2911 { 3, 11, "Malformed AS_PATH" },
2912 { 4, 0, "Hold timer expired" },
2913 { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
2914 { 5, 1, "Unexpected message in OpenSent state" },
2915 { 5, 2, "Unexpected message in OpenConfirm state" },
2916 { 5, 3, "Unexpected message in Established state" },
2917 { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
2918 { 6, 1, "Maximum number of prefixes reached" },
2919 { 6, 2, "Administrative shutdown" },
2920 { 6, 3, "Peer de-configured" },
2921 { 6, 4, "Administrative reset" },
2922 { 6, 5, "Connection rejected" },
2923 { 6, 6, "Other configuration change" },
2924 { 6, 7, "Connection collision resolution" },
2925 { 6, 8, "Out of Resources" },
2926 { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
2927 { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
2931 * bgp_error_dsc - return BGP error description
2932 * @code: BGP error code
2933 * @subcode: BGP error subcode
2935 * bgp_error_dsc() returns error description for BGP errors
2936 * which might be static string or given temporary buffer.
2939 bgp_error_dsc(uint code
, uint subcode
)
2941 static char buff
[32];
2944 for (i
=0; i
< ARRAY_SIZE(bgp_msg_table
); i
++)
2945 if (bgp_msg_table
[i
].major
== code
&& bgp_msg_table
[i
].minor
== subcode
)
2946 return bgp_msg_table
[i
].msg
;
2948 bsprintf(buff
, "Unknown error %u.%u", code
, subcode
);
2952 /* RFC 8203 - shutdown communication message */
2954 bgp_handle_message(struct bgp_proto
*p
, byte
*data
, uint len
, byte
**bp
)
2956 byte
*msg
= data
+ 1;
2957 uint msg_len
= data
[0];
2960 /* Handle zero length message */
2964 /* Handle proper message */
2965 if (msg_len
+ 1 > len
)
2968 /* Some elementary cleanup */
2969 for (i
= 0; i
< msg_len
; i
++)
2973 proto_set_message(&p
->p
, msg
, msg_len
);
2974 *bp
+= bsprintf(*bp
, ": \"%s\"", p
->p
.message
);
2979 bgp_log_error(struct bgp_proto
*p
, u8
class, char *msg
, uint code
, uint subcode
, byte
*data
, uint len
)
2981 byte argbuf
[256+16], *t
= argbuf
;
2984 /* Don't report Cease messages generated by myself */
2985 if (code
== 6 && class == BE_BGP_TX
)
2988 /* Reset shutdown message */
2989 if ((code
== 6) && ((subcode
== 2) || (subcode
== 4)))
2990 proto_set_message(&p
->p
, NULL
, 0);
2994 /* Bad peer AS - we would like to print the AS */
2995 if ((code
== 2) && (subcode
== 2) && ((len
== 2) || (len
== 4)))
2997 t
+= bsprintf(t
, ": %u", (len
== 2) ? get_u16(data
) : get_u32(data
));
3001 /* RFC 8203 - shutdown communication */
3002 if (((code
== 6) && ((subcode
== 2) || (subcode
== 4))))
3003 if (bgp_handle_message(p
, data
, len
, &t
))
3010 for (i
=0; i
<len
; i
++)
3011 t
+= bsprintf(t
, "%02x", data
[i
]);
3016 const byte
*dsc
= bgp_error_dsc(code
, subcode
);
3017 log(L_REMOTE
"%s: %s: %s%s", p
->p
.name
, msg
, dsc
, argbuf
);
3021 bgp_rx_notification(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
3023 struct bgp_proto
*p
= conn
->bgp
;
3026 { bgp_error(conn
, 1, 2, pkt
+16, 2); return; }
3028 uint code
= pkt
[19];
3029 uint subcode
= pkt
[20];
3030 int err
= (code
!= 6);
3032 bgp_log_error(p
, BE_BGP_RX
, "Received", code
, subcode
, pkt
+21, len
-21);
3033 bgp_store_error(p
, conn
, BE_BGP_RX
, (code
<< 16) | subcode
);
3035 bgp_conn_enter_close_state(conn
);
3036 bgp_schedule_packet(conn
, NULL
, PKT_SCHEDULE_CLOSE
);
3040 bgp_update_startup_delay(p
);
3041 bgp_stop(p
, 0, NULL
, 0);
3045 uint subcode_bit
= 1 << ((subcode
<= 8) ? subcode
: 0);
3046 if (p
->cf
->disable_after_cease
& subcode_bit
)
3048 log(L_INFO
"%s: Disabled after Cease notification", p
->p
.name
);
3049 p
->startup_delay
= 0;
3056 bgp_rx_keepalive(struct bgp_conn
*conn
)
3058 struct bgp_proto
*p
= conn
->bgp
;
3060 BGP_TRACE(D_PACKETS
, "Got KEEPALIVE");
3061 bgp_start_timer(conn
->hold_timer
, conn
->hold_time
);
3063 if (conn
->state
== BS_OPENCONFIRM
)
3064 { bgp_conn_enter_established_state(conn
); return; }
3066 if (conn
->state
!= BS_ESTABLISHED
)
3067 bgp_error(conn
, 5, fsm_err_subcode
[conn
->state
], NULL
, 0);
3072 * bgp_rx_packet - handle a received packet
3073 * @conn: BGP connection
3074 * @pkt: start of the packet
3077 * bgp_rx_packet() takes a newly received packet and calls the corresponding
3078 * packet handler according to the packet type.
3081 bgp_rx_packet(struct bgp_conn
*conn
, byte
*pkt
, uint len
)
3083 byte type
= pkt
[18];
3085 DBG("BGP: Got packet %02x (%d bytes)\n", type
, len
);
3087 if (conn
->bgp
->p
.mrtdump
& MD_MESSAGES
)
3088 bgp_dump_message(conn
, pkt
, len
);
3092 case PKT_OPEN
: return bgp_rx_open(conn
, pkt
, len
);
3093 case PKT_UPDATE
: return bgp_rx_update(conn
, pkt
, len
);
3094 case PKT_NOTIFICATION
: return bgp_rx_notification(conn
, pkt
, len
);
3095 case PKT_KEEPALIVE
: return bgp_rx_keepalive(conn
);
3096 case PKT_ROUTE_REFRESH
: return bgp_rx_route_refresh(conn
, pkt
, len
);
3097 default: bgp_error(conn
, 1, 3, pkt
+18, 1);
3102 * bgp_rx - handle received data
3104 * @size: amount of data received
3106 * bgp_rx() is called by the socket layer whenever new data arrive from
3107 * the underlying TCP connection. It assembles the data fragments to packets,
3108 * checks their headers and framing and passes complete packets to
3112 bgp_rx(sock
*sk
, uint size
)
3114 struct bgp_conn
*conn
= sk
->data
;
3115 byte
*pkt_start
= sk
->rbuf
;
3116 byte
*end
= pkt_start
+ size
;
3119 DBG("BGP: RX hook: Got %d bytes\n", size
);
3120 while (end
>= pkt_start
+ BGP_HEADER_LENGTH
)
3122 if ((conn
->state
== BS_CLOSE
) || (conn
->sk
!= sk
))
3125 if (pkt_start
[i
] != 0xff)
3127 bgp_error(conn
, 1, 1, NULL
, 0);
3130 len
= get_u16(pkt_start
+16);
3131 if ((len
< BGP_HEADER_LENGTH
) || (len
> bgp_max_packet_length(conn
)))
3133 bgp_error(conn
, 1, 2, pkt_start
+16, 2);
3136 if (end
< pkt_start
+ len
)
3138 bgp_rx_packet(conn
, pkt_start
, len
);
3141 if (pkt_start
!= sk
->rbuf
)
3143 memmove(sk
->rbuf
, pkt_start
, end
- pkt_start
);
3144 sk
->rpos
= sk
->rbuf
+ (end
- pkt_start
);