]>
git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/packets.c
2 * BIRD -- BGP Packet Processing
4 * (c) 2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
11 #include "nest/bird.h"
12 #include "nest/iface.h"
13 #include "nest/protocol.h"
14 #include "nest/route.h"
15 #include "nest/attrs.h"
16 #include "nest/mrtdump.h"
17 #include "conf/conf.h"
18 #include "lib/unaligned.h"
19 #include "lib/socket.h"
25 static struct rate_limit rl_rcv_update
, rl_snd_update
;
27 /* Table for state -> RFC 6608 FSM error subcodes */
28 static byte fsm_err_subcode
[ BS_MAX
] = {
35 * MRT Dump format is not semantically specified.
36 * We will use these values in appropriate fields:
38 * Local AS, Remote AS - configured AS numbers for given BGP instance.
39 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
41 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
42 * changes) and MESSAGE (for received BGP messages).
44 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
45 * only when AS4 session is established and even in that case MESSAGE
46 * does not use AS4 variant for initial OPEN message. This strange
47 * behavior is here for compatibility with Quagga and Bgpdump,
51 mrt_put_bgp4_hdr ( byte
* buf
, struct bgp_conn
* conn
, int as4
)
53 struct bgp_proto
* p
= conn
-> bgp
;
57 put_u32 ( buf
+ 0 , p
-> remote_as
);
58 put_u32 ( buf
+ 4 , p
-> local_as
);
63 put_u16 ( buf
+ 0 , ( p
-> remote_as
<= 0xFFFF ) ? p
-> remote_as
: AS_TRANS
);
64 put_u16 ( buf
+ 2 , ( p
-> local_as
<= 0xFFFF ) ? p
-> local_as
: AS_TRANS
);
68 put_u16 ( buf
+ 0 , ( p
-> neigh
&& p
-> neigh
-> iface
) ? p
-> neigh
-> iface
-> index
: 0 );
69 put_u16 ( buf
+ 2 , BGP_AF
);
71 buf
= ipa_put_addr ( buf
, conn
-> sk
? conn
-> sk
-> daddr
: IPA_NONE
);
72 buf
= ipa_put_addr ( buf
, conn
-> sk
? conn
-> sk
-> saddr
: IPA_NONE
);
78 mrt_dump_bgp_packet ( struct bgp_conn
* conn
, byte
* pkt
, unsigned len
)
80 byte buf
[ BGP_MAX_PACKET_LENGTH
+ 128 ];
81 byte
* bp
= buf
+ MRTDUMP_HDR_LENGTH
;
82 int as4
= conn
-> bgp
-> as4_session
;
84 bp
= mrt_put_bgp4_hdr ( bp
, conn
, as4
);
87 mrt_dump_message (& conn
-> bgp
-> p
, BGP4MP
, as4
? BGP4MP_MESSAGE_AS4
: BGP4MP_MESSAGE
,
92 convert_state ( unsigned state
)
94 /* Convert state from our BS_* values to values used in MRTDump */
95 return ( state
== BS_CLOSE
) ? 1 : state
+ 1 ;
99 mrt_dump_bgp_state_change ( struct bgp_conn
* conn
, unsigned old
, unsigned new )
102 byte
* bp
= buf
+ MRTDUMP_HDR_LENGTH
;
104 bp
= mrt_put_bgp4_hdr ( bp
, conn
, 1 );
105 put_u16 ( bp
+ 0 , convert_state ( old
));
106 put_u16 ( bp
+ 2 , convert_state ( new ));
108 mrt_dump_message (& conn
-> bgp
-> p
, BGP4MP
, BGP4MP_STATE_CHANGE_AS4
, buf
, bp
- buf
);
112 bgp_create_notification ( struct bgp_conn
* conn
, byte
* buf
)
114 struct bgp_proto
* p
= conn
-> bgp
;
116 BGP_TRACE ( D_PACKETS
, "Sending NOTIFICATION(code=%d.%d)" , conn
-> notify_code
, conn
-> notify_subcode
);
117 buf
[ 0 ] = conn
-> notify_code
;
118 buf
[ 1 ] = conn
-> notify_subcode
;
119 memcpy ( buf
+ 2 , conn
-> notify_data
, conn
-> notify_size
);
120 return buf
+ 2 + conn
-> notify_size
;
125 bgp_put_cap_ipv6 ( struct bgp_conn
* conn UNUSED
, byte
* buf
)
127 * buf
++ = 1 ; /* Capability 1: Multiprotocol extensions */
128 * buf
++ = 4 ; /* Capability data length */
129 * buf
++ = 0 ; /* We support AF IPv6 */
130 * buf
++ = BGP_AF_IPV6
;
131 * buf
++ = 0 ; /* RFU */
132 * buf
++ = 1 ; /* and SAFI 1 */
139 bgp_put_cap_ipv4 ( struct bgp_conn
* conn UNUSED
, byte
* buf
)
141 * buf
++ = 1 ; /* Capability 1: Multiprotocol extensions */
142 * buf
++ = 4 ; /* Capability data length */
143 * buf
++ = 0 ; /* We support AF IPv4 */
144 * buf
++ = BGP_AF_IPV4
;
145 * buf
++ = 0 ; /* RFU */
146 * buf
++ = 1 ; /* and SAFI 1 */
152 bgp_put_cap_rr ( struct bgp_conn
* conn UNUSED
, byte
* buf
)
154 * buf
++ = 2 ; /* Capability 2: Support for route refresh */
155 * buf
++ = 0 ; /* Capability data length */
160 bgp_put_cap_as4 ( struct bgp_conn
* conn
, byte
* buf
)
162 * buf
++ = 65 ; /* Capability 65: Support for 4-octet AS number */
163 * buf
++ = 4 ; /* Capability data length */
164 put_u32 ( buf
, conn
-> bgp
-> local_as
);
169 bgp_put_cap_add_path ( struct bgp_conn
* conn
, byte
* buf
)
171 * buf
++ = 69 ; /* Capability 69: Support for ADD-PATH */
172 * buf
++ = 4 ; /* Capability data length */
174 * buf
++ = 0 ; /* Appropriate AF */
176 * buf
++ = 1 ; /* SAFI 1 */
178 * buf
++ = conn
-> bgp
-> cf
-> add_path
;
184 bgp_create_open ( struct bgp_conn
* conn
, byte
* buf
)
186 struct bgp_proto
* p
= conn
-> bgp
;
190 BGP_TRACE ( D_PACKETS
, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)" ,
191 BGP_VERSION
, p
-> local_as
, p
-> cf
-> hold_time
, p
-> local_id
);
192 buf
[ 0 ] = BGP_VERSION
;
193 put_u16 ( buf
+ 1 , ( p
-> local_as
< 0xFFFF ) ? p
-> local_as
: AS_TRANS
);
194 put_u16 ( buf
+ 3 , p
-> cf
-> hold_time
);
195 put_u32 ( buf
+ 5 , p
-> local_id
);
197 if ( conn
-> start_state
== BSS_CONNECT_NOCAP
)
199 BGP_TRACE ( D_PACKETS
, "Skipping capabilities" );
204 /* Skipped 3 B for length field and Capabilities parameter header */
208 if ( p
-> cf
-> advertise_ipv4
)
209 cap
= bgp_put_cap_ipv4 ( conn
, cap
);
213 cap
= bgp_put_cap_ipv6 ( conn
, cap
);
216 if ( p
-> cf
-> enable_refresh
)
217 cap
= bgp_put_cap_rr ( conn
, cap
);
219 if ( p
-> cf
-> enable_as4
)
220 cap
= bgp_put_cap_as4 ( conn
, cap
);
223 cap
= bgp_put_cap_add_path ( conn
, cap
);
225 cap_len
= cap
- buf
- 12 ;
228 buf
[ 9 ] = cap_len
+ 2 ; /* Optional params len */
229 buf
[ 10 ] = 2 ; /* Option: Capability list */
230 buf
[ 11 ] = cap_len
; /* Option length */
235 buf
[ 9 ] = 0 ; /* No optional parameters */
241 bgp_encode_prefixes ( struct bgp_proto
* p
, byte
* w
, struct bgp_bucket
* buck
, unsigned int remains
)
247 while (! EMPTY_LIST ( buck
-> prefixes
) && ( remains
>= ( 5 + sizeof ( ip_addr
))))
249 struct bgp_prefix
* px
= SKIP_BACK ( struct bgp_prefix
, bucket_node
, HEAD ( buck
-> prefixes
));
250 DBG ( " \t Dequeued route %I/%d \n " , px
-> n
. prefix
, px
-> n
. pxlen
);
254 put_u32 ( w
, px
-> path_id
);
260 bytes
= ( px
-> n
. pxlen
+ 7 ) / 8 ;
263 memcpy ( w
, & a
, bytes
);
265 remains
-= bytes
+ 1 ;
266 rem_node (& px
-> bucket_node
);
267 bgp_free_prefix ( p
, px
);
268 // fib_delete(&p->prefix_fib, px);
274 bgp_flush_prefixes ( struct bgp_proto
* p
, struct bgp_bucket
* buck
)
276 while (! EMPTY_LIST ( buck
-> prefixes
))
278 struct bgp_prefix
* px
= SKIP_BACK ( struct bgp_prefix
, bucket_node
, HEAD ( buck
-> prefixes
));
279 log ( L_ERR
"%s: - route %I/%d skipped" , p
-> p
. name
, px
-> n
. prefix
, px
-> n
. pxlen
);
280 rem_node (& px
-> bucket_node
);
281 bgp_free_prefix ( p
, px
);
282 // fib_delete(&p->prefix_fib, px);
286 #ifndef IPV6 /* IPv4 version */
289 bgp_create_update ( struct bgp_conn
* conn
, byte
* buf
)
291 struct bgp_proto
* p
= conn
-> bgp
;
292 struct bgp_bucket
* buck
;
293 int remains
= BGP_MAX_PACKET_LENGTH
- BGP_HEADER_LENGTH
- 4 ;
300 if (( buck
= p
-> withdraw_bucket
) && ! EMPTY_LIST ( buck
-> prefixes
))
302 DBG ( "Withdrawn routes: \n " );
303 wd_size
= bgp_encode_prefixes ( p
, w
, buck
, remains
);
307 put_u16 ( buf
, wd_size
);
311 while (( buck
= ( struct bgp_bucket
*) HEAD ( p
-> bucket_queue
))-> send_node
. next
)
313 if ( EMPTY_LIST ( buck
-> prefixes
))
315 DBG ( "Deleting empty bucket %p \n " , buck
);
316 rem_node (& buck
-> send_node
);
317 bgp_free_bucket ( p
, buck
);
321 DBG ( "Processing bucket %p \n " , buck
);
322 a_size
= bgp_encode_attrs ( p
, w
+ 2 , buck
-> eattrs
, 2048 );
326 log ( L_ERR
"%s: Attribute list too long, skipping corresponding routes" , p
-> p
. name
);
327 bgp_flush_prefixes ( p
, buck
);
328 rem_node (& buck
-> send_node
);
329 bgp_free_bucket ( p
, buck
);
335 r_size
= bgp_encode_prefixes ( p
, w
, buck
, remains
- a_size
);
340 if (! a_size
) /* Attributes not already encoded */
345 if ( wd_size
|| r_size
)
347 BGP_TRACE_RL (& rl_snd_update
, D_PACKETS
, "Sending UPDATE" );
354 #else /* IPv6 version */
357 same_iface ( struct bgp_proto
* p
, ip_addr
* ip
)
359 neighbor
* n
= neigh_find (& p
-> p
, ip
, 0 );
360 return n
&& p
-> neigh
&& n
-> iface
== p
-> neigh
-> iface
;
364 bgp_create_update ( struct bgp_conn
* conn
, byte
* buf
)
366 struct bgp_proto
* p
= conn
-> bgp
;
367 struct bgp_bucket
* buck
;
368 int size
, second
, rem_stored
;
369 int remains
= BGP_MAX_PACKET_LENGTH
- BGP_HEADER_LENGTH
- 4 ;
370 byte
* w
, * w_stored
, * tmp
, * tstart
;
371 ip_addr
* ipp
, ip
, ip_ll
;
378 if (( buck
= p
-> withdraw_bucket
) && ! EMPTY_LIST ( buck
-> prefixes
))
380 DBG ( "Withdrawn routes: \n " );
381 tmp
= bgp_attach_attr_wa (& ea
, bgp_linpool
, BA_MP_UNREACH_NLRI
, remains
- 8 );
383 * tmp
++ = BGP_AF_IPV6
;
385 ea
-> attrs
[ 0 ]. u
. ptr
-> length
= 3 + bgp_encode_prefixes ( p
, tmp
, buck
, remains
- 11 );
386 size
= bgp_encode_attrs ( p
, w
, ea
, remains
);
394 while (( buck
= ( struct bgp_bucket
*) HEAD ( p
-> bucket_queue
))-> send_node
. next
)
396 if ( EMPTY_LIST ( buck
-> prefixes
))
398 DBG ( "Deleting empty bucket %p \n " , buck
);
399 rem_node (& buck
-> send_node
);
400 bgp_free_bucket ( p
, buck
);
404 DBG ( "Processing bucket %p \n " , buck
);
405 rem_stored
= remains
;
408 size
= bgp_encode_attrs ( p
, w
, buck
-> eattrs
, 2048 );
411 log ( L_ERR
"%s: Attribute list too long, skipping corresponding routes" , p
-> p
. name
);
412 bgp_flush_prefixes ( p
, buck
);
413 rem_node (& buck
-> send_node
);
414 bgp_free_bucket ( p
, buck
);
420 /* We have two addresses here in NEXT_HOP eattr. Really.
421 Unless NEXT_HOP was modified by filter */
422 nh
= ea_find ( buck
-> eattrs
, EA_CODE ( EAP_BGP
, BA_NEXT_HOP
));
424 second
= ( nh
-> u
. ptr
-> length
== NEXT_HOP_LENGTH
);
425 ipp
= ( ip_addr
*) nh
-> u
. ptr
-> data
;
429 if ( ipa_equal ( ip
, p
-> source_addr
))
430 ip_ll
= p
-> local_link
;
433 /* If we send a route with 'third party' next hop destinated
434 * in the same interface, we should also send a link local
435 * next hop address. We use the received one (stored in the
436 * other part of BA_NEXT_HOP eattr). If we didn't received
437 * it (for example it is a static route), we can't use
438 * 'third party' next hop and we have to use local IP address
439 * as next hop. Sending original next hop address without
440 * link local address seems to be a natural way to solve that
441 * problem, but it is contrary to RFC 2545 and Quagga does not
442 * accept such routes.
444 * There are two cases, either we have global IP, or
445 * IPA_NONE if the neighbor is link-local. For IPA_NONE,
446 * we suppose it is on the same iface, see bgp_update_attrs().
449 if ( ipa_zero ( ip
) || same_iface ( p
, & ip
))
451 if ( second
&& ipa_nonzero ( ipp
[ 1 ]))
455 switch ( p
-> cf
-> missing_lladdr
)
459 ip_ll
= p
-> local_link
;
462 log ( L_ERR
"%s: Missing link-local next hop address, skipping corresponding routes" , p
-> p
. name
);
464 remains
= rem_stored
;
465 bgp_flush_prefixes ( p
, buck
);
466 rem_node (& buck
-> send_node
);
467 bgp_free_bucket ( p
, buck
);
476 tstart
= tmp
= bgp_attach_attr_wa (& ea
, bgp_linpool
, BA_MP_REACH_NLRI
, remains
- 8 );
478 * tmp
++ = BGP_AF_IPV6
;
481 if ( ipa_has_link_scope ( ip
))
484 if ( ipa_nonzero ( ip_ll
))
488 memcpy ( tmp
, & ip
, 16 );
490 memcpy ( tmp
+ 16 , & ip_ll
, 16 );
497 memcpy ( tmp
, & ip
, 16 );
501 * tmp
++ = 0 ; /* No SNPA information */
502 tmp
+= bgp_encode_prefixes ( p
, tmp
, buck
, remains
- ( 8 + 3 + 32 + 1 ));
503 ea
-> attrs
[ 0 ]. u
. ptr
-> length
= tmp
- tstart
;
504 size
= bgp_encode_attrs ( p
, w
, ea
, remains
);
512 put_u16 ( buf
+ 2 , size
);
513 lp_flush ( bgp_linpool
);
516 BGP_TRACE_RL (& rl_snd_update
, D_PACKETS
, "Sending UPDATE" );
526 bgp_create_route_refresh ( struct bgp_conn
* conn
, byte
* buf
)
528 struct bgp_proto
* p
= conn
-> bgp
;
529 BGP_TRACE ( D_PACKETS
, "Sending ROUTE-REFRESH" );
533 * buf
++ = 0 ; /* RFU */
534 * buf
++ = 1 ; /* and SAFI 1 */
539 bgp_create_header ( byte
* buf
, unsigned int len
, unsigned int type
)
541 memset ( buf
, 0xff , 16 ); /* Marker */
542 put_u16 ( buf
+ 16 , len
);
547 * bgp_fire_tx - transmit packets
550 * Whenever the transmit buffers of the underlying TCP connection
551 * are free and we have any packets queued for sending, the socket functions
552 * call bgp_fire_tx() which takes care of selecting the highest priority packet
553 * queued (Notification > Keepalive > Open > Update), assembling its header
554 * and body and sending it to the connection.
557 bgp_fire_tx ( struct bgp_conn
* conn
)
559 struct bgp_proto
* p
= conn
-> bgp
;
560 unsigned int s
= conn
-> packets_to_send
;
562 byte
* buf
, * pkt
, * end
;
567 conn
-> packets_to_send
= 0 ;
571 pkt
= buf
+ BGP_HEADER_LENGTH
;
573 if ( s
& ( 1 << PKT_SCHEDULE_CLOSE
))
575 /* We can finally close connection and enter idle state */
576 bgp_conn_enter_idle_state ( conn
);
579 if ( s
& ( 1 << PKT_NOTIFICATION
))
581 s
= 1 << PKT_SCHEDULE_CLOSE
;
582 type
= PKT_NOTIFICATION
;
583 end
= bgp_create_notification ( conn
, pkt
);
585 else if ( s
& ( 1 << PKT_KEEPALIVE
))
587 s
&= ~( 1 << PKT_KEEPALIVE
);
588 type
= PKT_KEEPALIVE
;
589 end
= pkt
; /* Keepalives carry no data */
590 BGP_TRACE ( D_PACKETS
, "Sending KEEPALIVE" );
591 bgp_start_timer ( conn
-> keepalive_timer
, conn
-> keepalive_time
);
593 else if ( s
& ( 1 << PKT_OPEN
))
595 s
&= ~( 1 << PKT_OPEN
);
597 end
= bgp_create_open ( conn
, pkt
);
599 else if ( s
& ( 1 << PKT_ROUTE_REFRESH
))
601 s
&= ~( 1 << PKT_ROUTE_REFRESH
);
602 type
= PKT_ROUTE_REFRESH
;
603 end
= bgp_create_route_refresh ( conn
, pkt
);
605 else if ( s
& ( 1 << PKT_UPDATE
))
607 end
= bgp_create_update ( conn
, pkt
);
611 conn
-> packets_to_send
= 0 ;
617 conn
-> packets_to_send
= s
;
618 bgp_create_header ( buf
, end
- buf
, type
);
619 return sk_send ( sk
, end
- buf
);
623 * bgp_schedule_packet - schedule a packet for transmission
627 * Schedule a packet of type @type to be sent as soon as possible.
630 bgp_schedule_packet ( struct bgp_conn
* conn
, int type
)
632 DBG ( "BGP: Scheduling packet type %d \n " , type
);
633 conn
-> packets_to_send
|= 1 << type
;
634 if ( conn
-> sk
&& conn
-> sk
-> tpos
== conn
-> sk
-> tbuf
)
635 ev_schedule ( conn
-> tx_ev
);
639 bgp_kick_tx ( void * vconn
)
641 struct bgp_conn
* conn
= vconn
;
643 DBG ( "BGP: kicking TX \n " );
644 while ( bgp_fire_tx ( conn
) > 0 )
651 struct bgp_conn
* conn
= sk
-> data
;
653 DBG ( "BGP: TX hook \n " );
654 while ( bgp_fire_tx ( conn
) > 0 )
658 /* Capatibility negotiation as per RFC 2842 */
661 bgp_parse_capabilities ( struct bgp_conn
* conn
, byte
* opt
, int len
)
663 // struct bgp_proto *p = conn->bgp;
668 if ( len
< 2 || len
< 2 + opt
[ 1 ])
675 case 2 : /* Route refresh capability, RFC 2918 */
678 conn
-> peer_refresh_support
= 1 ;
681 case 65 : /* AS4 capability, RFC 4893 */
684 conn
-> peer_as4_support
= 1 ;
685 if ( conn
-> bgp
-> cf
-> enable_as4
)
686 conn
-> advertised_as
= get_u32 ( opt
+ 2 );
689 case 69 : /* ADD-PATH capability, draft */
692 for ( i
= 0 ; i
< cl
; i
+= 4 )
693 if ( opt
[ 2 + i
+ 0 ] == 0 && opt
[ 2 + i
+ 1 ] == BGP_AF
&& opt
[ 2 + i
+ 2 ] == 1 ) /* Match AFI/SAFI */
694 conn
-> peer_add_path
= opt
[ 2 + i
+ 3 ];
695 if ( conn
-> peer_add_path
> ADD_PATH_FULL
)
700 /* We can safely ignore all other capabilities */
708 bgp_error ( conn
, 2 , 0 , NULL
, 0 );
713 bgp_parse_options ( struct bgp_conn
* conn
, byte
* opt
, int len
)
715 struct bgp_proto
* p
= conn
-> bgp
;
720 if ( len
< 2 || len
< 2 + opt
[ 1 ])
721 { bgp_error ( conn
, 2 , 0 , NULL
, 0 ); return 0 ; }
725 DBG ( " \t Option %02x:" , opt
[ 0 ]);
726 for ( i
= 0 ; i
< opt
[ 1 ]; i
++)
727 DBG ( " %02x" , opt
[ 2 + i
]);
736 if ( conn
-> start_state
== BSS_CONNECT_NOCAP
)
737 BGP_TRACE ( D_PACKETS
, "Ignoring received capabilities" );
739 bgp_parse_capabilities ( conn
, opt
+ 2 , ol
);
744 * BGP specs don't tell us to send which option
745 * we didn't recognize, but it's common practice
746 * to do so. Also, capability negotiation with
747 * Cisco routers doesn't work without that.
749 bgp_error ( conn
, 2 , 4 , opt
, ol
);
759 bgp_rx_open ( struct bgp_conn
* conn
, byte
* pkt
, int len
)
761 struct bgp_conn
* other
;
762 struct bgp_proto
* p
= conn
-> bgp
;
768 if ( conn
-> state
!= BS_OPENSENT
)
769 { bgp_error ( conn
, 5 , fsm_err_subcode
[ conn
-> state
], NULL
, 0 ); return ; }
771 /* Check message contents */
772 if ( len
< 29 || len
!= 29 + pkt
[ 28 ])
773 { bgp_error ( conn
, 1 , 2 , pkt
+ 16 , 2 ); return ; }
774 if ( pkt
[ 19 ] != BGP_VERSION
)
775 { bgp_error ( conn
, 2 , 1 , pkt
+ 19 , 1 ); return ; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
776 conn
-> advertised_as
= base_as
= get_u16 ( pkt
+ 20 );
777 hold
= get_u16 ( pkt
+ 22 );
778 id
= get_u32 ( pkt
+ 24 );
779 BGP_TRACE ( D_PACKETS
, "Got OPEN(as=%d,hold=%d,id=%08x)" , conn
-> advertised_as
, hold
, id
);
781 if ( bgp_parse_options ( conn
, pkt
+ 29 , pkt
[ 28 ]))
784 if ( hold
> 0 && hold
< 3 )
785 { bgp_error ( conn
, 2 , 6 , pkt
+ 22 , 2 ); return ; }
787 if (! id
|| id
== 0xffffffff || id
== p
-> local_id
)
788 { bgp_error ( conn
, 2 , 3 , pkt
+ 24 , - 4 ); return ; }
790 if (( conn
-> advertised_as
!= base_as
) && ( base_as
!= AS_TRANS
))
791 log ( L_WARN
"%s: Peer advertised inconsistent AS numbers" , p
-> p
. name
);
793 if ( conn
-> advertised_as
!= p
-> remote_as
)
795 if ( conn
-> peer_as4_support
)
797 u32 val
= htonl ( conn
-> advertised_as
);
798 bgp_error ( conn
, 2 , 2 , ( byte
*) & val
, 4 );
801 bgp_error ( conn
, 2 , 2 , pkt
+ 20 , 2 );
806 /* Check the other connection */
807 other
= ( conn
== & p
-> outgoing_conn
) ? & p
-> incoming_conn
: & p
-> outgoing_conn
;
808 switch ( other
-> state
)
817 if (( p
-> local_id
< id
) == ( conn
== & p
-> incoming_conn
))
819 /* Should close the other connection */
820 BGP_TRACE ( D_EVENTS
, "Connection collision, giving up the other connection" );
821 bgp_error ( other
, 6 , 7 , NULL
, 0 );
826 /* Should close this connection */
827 BGP_TRACE ( D_EVENTS
, "Connection collision, giving up this connection" );
828 bgp_error ( conn
, 6 , 7 , NULL
, 0 );
831 bug ( "bgp_rx_open: Unknown state" );
834 /* Update our local variables */
835 conn
-> hold_time
= MIN ( hold
, p
-> cf
-> hold_time
);
836 conn
-> keepalive_time
= p
-> cf
-> keepalive_time
? : conn
-> hold_time
/ 3 ;
838 p
-> as4_session
= p
-> cf
-> enable_as4
&& conn
-> peer_as4_support
;
839 p
-> add_path_rx
= ( p
-> cf
-> add_path
& ADD_PATH_RX
) && ( conn
-> peer_add_path
& ADD_PATH_TX
);
840 p
-> add_path_tx
= ( p
-> cf
-> add_path
& ADD_PATH_TX
) && ( conn
-> peer_add_path
& ADD_PATH_RX
);
843 p
-> p
. accept_ra_types
= RA_ANY
;
845 DBG ( "BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d \n " , conn
-> hold_time
, conn
-> keepalive_time
, p
-> remote_as
, p
-> remote_id
, p
-> as4_session
);
847 bgp_schedule_packet ( conn
, PKT_KEEPALIVE
);
848 bgp_start_timer ( conn
-> hold_timer
, conn
-> hold_time
);
849 bgp_conn_enter_openconfirm_state ( conn
);
852 #define DECODE_PREFIX(pp, ll) do { \
853 if (p->add_path_rx) \
855 if (ll < 5) { err=1; goto done; } \
856 path_id = get_u32(pp); \
863 if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
865 if (ll < q) { err=1; goto done; } \
866 memcpy(&prefix, pp, q); \
870 prefix = ipa_and(prefix, ipa_mkmask(b)); \
876 bgp_rte_update ( struct bgp_proto
* p
, ip_addr prefix
, int pxlen
,
877 u32 path_id
, u32
* last_id
, struct rte_src
** src
,
880 if ( path_id
!= * last_id
)
882 * src
= rt_get_source (& p
-> p
, path_id
);
892 /* Prepare cached route attributes */
899 net
* n
= net_get ( p
-> p
. table
, prefix
, pxlen
);
900 rte
* e
= rte_get_temp ( rta_clone (* a
));
903 e
-> u
. bgp
. suppressed
= 0 ;
904 rte_update2 ( p
-> p
. main_ahook
, n
, e
, * src
);
908 bgp_rte_withdraw ( struct bgp_proto
* p
, ip_addr prefix
, int pxlen
,
909 u32 path_id
, u32
* last_id
, struct rte_src
** src
)
911 if ( path_id
!= * last_id
)
913 * src
= rt_find_source (& p
-> p
, path_id
);
917 net
* n
= net_find ( p
-> p
. table
, prefix
, pxlen
);
918 rte_update2 ( p
-> p
. main_ahook
, n
, NULL
, * src
);
922 bgp_set_next_hop ( struct bgp_proto
* p
, rta
* a
)
924 struct eattr
* nh
= ea_find ( a
-> eattrs
, EA_CODE ( EAP_BGP
, BA_NEXT_HOP
));
925 ip_addr
* nexthop
= ( ip_addr
*) nh
-> u
. ptr
-> data
;
928 int second
= ( nh
-> u
. ptr
-> length
== NEXT_HOP_LENGTH
) && ipa_nonzero ( nexthop
[ 1 ]);
930 /* First address should not be link-local, but may be zero in direct mode */
931 if ( ipa_has_link_scope (* nexthop
))
937 if ( p
-> cf
-> gw_mode
== GW_DIRECT
)
941 if ( ipa_nonzero (* nexthop
))
942 ng
= neigh_find (& p
-> p
, nexthop
, 0 );
943 else if ( second
) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
944 ng
= neigh_find2 (& p
-> p
, nexthop
+ 1 , p
-> neigh
-> iface
, 0 );
950 if ( ng
-> scope
== SCOPE_HOST
)
953 a
-> dest
= RTD_ROUTER
;
955 a
-> iface
= ng
-> iface
;
959 else /* GW_RECURSIVE */
961 if ( ipa_zero (* nexthop
))
964 rta_set_recursive_next_hop ( p
-> p
. table
, a
, p
-> igp_table
, nexthop
, nexthop
+ second
);
970 #ifndef IPV6 /* IPv4 version */
973 bgp_do_rx_update ( struct bgp_conn
* conn
,
974 byte
* withdrawn
, int withdrawn_len
,
975 byte
* nlri
, int nlri_len
,
976 byte
* attrs
, int attr_len
)
978 struct bgp_proto
* p
= conn
-> bgp
;
979 struct rte_src
* src
= p
-> p
. main_source
;
986 /* Withdraw routes */
987 while ( withdrawn_len
)
989 DECODE_PREFIX ( withdrawn
, withdrawn_len
);
990 DBG ( "Withdraw %I/%d \n " , prefix
, pxlen
);
992 bgp_rte_withdraw ( p
, prefix
, pxlen
, path_id
, & last_id
, & src
);
995 if (! attr_len
&& ! nlri_len
) /* shortcut */
998 a0
= bgp_decode_attrs ( conn
, attrs
, attr_len
, bgp_linpool
, nlri_len
);
1000 if ( conn
-> state
!= BS_ESTABLISHED
) /* fatal error during decoding */
1003 if ( a0
&& ! bgp_set_next_hop ( p
, a0
))
1008 src
= p
-> p
. main_source
;
1012 DECODE_PREFIX ( nlri
, nlri_len
);
1013 DBG ( "Add %I/%d \n " , prefix
, pxlen
);
1016 bgp_rte_update ( p
, prefix
, pxlen
, path_id
, & last_id
, & src
, a0
, & a
);
1017 else /* Forced withdraw as a result of soft error */
1018 bgp_rte_withdraw ( p
, prefix
, pxlen
, path_id
, & last_id
, & src
);
1026 bgp_error ( conn
, 3 , err
, NULL
, 0 );
1031 #else /* IPv6 version */
1033 #define DO_NLRI(name) \
1034 start = x = p->name##_start; \
1035 len = len0 = p->name##_len; \
1038 if (len < 3) { err=9; goto done; } \
1043 DBG( " \t NLRI AF=%d sub=%d len=%d \n " , af, sub, len);\
1047 if (af == BGP_AF_IPV6)
1050 bgp_attach_next_hop ( rta
* a0
, byte
* x
)
1052 ip_addr
* nh
= ( ip_addr
*) bgp_attach_attr_wa (& a0
-> eattrs
, bgp_linpool
, BA_NEXT_HOP
, NEXT_HOP_LENGTH
);
1053 memcpy ( nh
, x
+ 1 , 16 );
1056 /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
1059 memcpy ( nh
+ 1 , x
+ 17 , 16 );
1068 bgp_do_rx_update ( struct bgp_conn
* conn
,
1069 byte
* withdrawn
, int withdrawn_len
,
1070 byte
* nlri
, int nlri_len
,
1071 byte
* attrs
, int attr_len
)
1073 struct bgp_proto
* p
= conn
-> bgp
;
1074 struct rte_src
* src
= p
-> p
. main_source
;
1084 p
-> mp_reach_len
= 0 ;
1085 p
-> mp_unreach_len
= 0 ;
1086 a0
= bgp_decode_attrs ( conn
, attrs
, attr_len
, bgp_linpool
, 0 );
1088 if ( conn
-> state
!= BS_ESTABLISHED
) /* fatal error during decoding */
1095 DECODE_PREFIX ( x
, len
);
1096 DBG ( "Withdraw %I/%d \n " , prefix
, pxlen
);
1097 bgp_rte_withdraw ( p
, prefix
, pxlen
, path_id
, & last_id
, & src
);
1103 /* Create fake NEXT_HOP attribute */
1104 if ( len
< 1 || (* x
!= 16 && * x
!= 32 ) || len
< * x
+ 2 )
1105 { err
= 9 ; goto done
; }
1108 bgp_attach_next_hop ( a0
, x
);
1110 /* Also ignore one reserved byte */
1114 if ( a0
&& ! bgp_set_next_hop ( p
, a0
))
1119 src
= p
-> p
. main_source
;
1123 DECODE_PREFIX ( x
, len
);
1124 DBG ( "Add %I/%d \n " , prefix
, pxlen
);
1127 bgp_rte_update ( p
, prefix
, pxlen
, path_id
, & last_id
, & src
, a0
, & a
);
1128 else /* Forced withdraw as a result of soft error */
1129 bgp_rte_withdraw ( p
, prefix
, pxlen
, path_id
, & last_id
, & src
);
1137 if ( err
) /* Use subcode 9, not err */
1138 bgp_error ( conn
, 3 , 9 , NULL
, 0 );
1146 bgp_rx_update ( struct bgp_conn
* conn
, byte
* pkt
, int len
)
1148 struct bgp_proto
* p
= conn
-> bgp
;
1149 byte
* withdrawn
, * attrs
, * nlri
;
1150 int withdrawn_len
, attr_len
, nlri_len
;
1152 BGP_TRACE_RL (& rl_rcv_update
, D_PACKETS
, "Got UPDATE" );
1154 /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1155 if ( conn
-> state
== BS_OPENCONFIRM
)
1156 bgp_conn_enter_established_state ( conn
);
1158 if ( conn
-> state
!= BS_ESTABLISHED
)
1159 { bgp_error ( conn
, 5 , fsm_err_subcode
[ conn
-> state
], NULL
, 0 ); return ; }
1160 bgp_start_timer ( conn
-> hold_timer
, conn
-> hold_time
);
1162 /* Find parts of the packet and check sizes */
1165 bgp_error ( conn
, 1 , 2 , pkt
+ 16 , 2 );
1168 withdrawn
= pkt
+ 21 ;
1169 withdrawn_len
= get_u16 ( pkt
+ 19 );
1170 if ( withdrawn_len
+ 23 > len
)
1172 attrs
= withdrawn
+ withdrawn_len
+ 2 ;
1173 attr_len
= get_u16 ( attrs
- 2 );
1174 if ( withdrawn_len
+ attr_len
+ 23 > len
)
1176 nlri
= attrs
+ attr_len
;
1177 nlri_len
= len
- withdrawn_len
- attr_len
- 23 ;
1178 if (! attr_len
&& nlri_len
)
1180 DBG ( "Sizes: withdrawn=%d, attrs=%d, NLRI=%d \n " , withdrawn_len
, attr_len
, nlri_len
);
1182 lp_flush ( bgp_linpool
);
1184 bgp_do_rx_update ( conn
, withdrawn
, withdrawn_len
, nlri
, nlri_len
, attrs
, attr_len
);
1188 bgp_error ( conn
, 3 , 1 , NULL
, 0 );
1194 } bgp_msg_table
[] = {
1195 { 1 , 0 , "Invalid message header" },
1196 { 1 , 1 , "Connection not synchronized" },
1197 { 1 , 2 , "Bad message length" },
1198 { 1 , 3 , "Bad message type" },
1199 { 2 , 0 , "Invalid OPEN message" },
1200 { 2 , 1 , "Unsupported version number" },
1201 { 2 , 2 , "Bad peer AS" },
1202 { 2 , 3 , "Bad BGP identifier" },
1203 { 2 , 4 , "Unsupported optional parameter" },
1204 { 2 , 5 , "Authentication failure" },
1205 { 2 , 6 , "Unacceptable hold time" },
1206 { 2 , 7 , "Required capability missing" }, /* [RFC3392] */
1207 { 2 , 8 , "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1208 { 3 , 0 , "Invalid UPDATE message" },
1209 { 3 , 1 , "Malformed attribute list" },
1210 { 3 , 2 , "Unrecognized well-known attribute" },
1211 { 3 , 3 , "Missing mandatory attribute" },
1212 { 3 , 4 , "Invalid attribute flags" },
1213 { 3 , 5 , "Invalid attribute length" },
1214 { 3 , 6 , "Invalid ORIGIN attribute" },
1215 { 3 , 7 , "AS routing loop" }, /* Deprecated */
1216 { 3 , 8 , "Invalid NEXT_HOP attribute" },
1217 { 3 , 9 , "Optional attribute error" },
1218 { 3 , 10 , "Invalid network field" },
1219 { 3 , 11 , "Malformed AS_PATH" },
1220 { 4 , 0 , "Hold timer expired" },
1221 { 5 , 0 , "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1222 { 5 , 1 , "Unexpected message in OpenSent state" },
1223 { 5 , 2 , "Unexpected message in OpenConfirm state" },
1224 { 5 , 3 , "Unexpected message in Established state" },
1225 { 6 , 0 , "Cease" }, /* Subcodes are according to [RFC4486] */
1226 { 6 , 1 , "Maximum number of prefixes reached" },
1227 { 6 , 2 , "Administrative shutdown" },
1228 { 6 , 3 , "Peer de-configured" },
1229 { 6 , 4 , "Administrative reset" },
1230 { 6 , 5 , "Connection rejected" },
1231 { 6 , 6 , "Other configuration change" },
1232 { 6 , 7 , "Connection collision resolution" },
1233 { 6 , 8 , "Out of Resources" }
1237 * bgp_error_dsc - return BGP error description
1238 * @code: BGP error code
1239 * @subcode: BGP error subcode
1241 * bgp_error_dsc() returns error description for BGP errors
1242 * which might be static string or given temporary buffer.
1245 bgp_error_dsc ( unsigned code
, unsigned subcode
)
1247 static char buff
[ 32 ];
1249 for ( i
= 0 ; i
< ARRAY_SIZE ( bgp_msg_table
); i
++)
1250 if ( bgp_msg_table
[ i
]. major
== code
&& bgp_msg_table
[ i
]. minor
== subcode
)
1252 return bgp_msg_table
[ i
]. msg
;
1255 bsprintf ( buff
, "Unknown error %d.%d" , code
, subcode
);
1260 bgp_log_error ( struct bgp_proto
* p
, u8
class , char * msg
, unsigned code
, unsigned subcode
, byte
* data
, unsigned len
)
1263 byte
* t
, argbuf
[ 36 ];
1266 /* Don't report Cease messages generated by myself */
1267 if ( code
== 6 && class == BE_BGP_TX
)
1270 name
= bgp_error_dsc ( code
, subcode
);
1277 if (( code
== 2 ) && ( subcode
== 2 ) && (( len
== 2 ) || ( len
== 4 )))
1279 /* Bad peer AS - we would like to print the AS */
1280 t
+= bsprintf ( t
, "%d" , ( len
== 2 ) ? get_u16 ( data
) : get_u32 ( data
));
1285 for ( i
= 0 ; i
< len
; i
++)
1286 t
+= bsprintf ( t
, "%02x" , data
[ i
]);
1290 log ( L_REMOTE
"%s: %s: %s%s" , p
-> p
. name
, msg
, name
, argbuf
);
1294 bgp_rx_notification ( struct bgp_conn
* conn
, byte
* pkt
, int len
)
1296 struct bgp_proto
* p
= conn
-> bgp
;
1299 bgp_error ( conn
, 1 , 2 , pkt
+ 16 , 2 );
1303 unsigned code
= pkt
[ 19 ];
1304 unsigned subcode
= pkt
[ 20 ];
1305 int err
= ( code
!= 6 );
1307 bgp_log_error ( p
, BE_BGP_RX
, "Received" , code
, subcode
, pkt
+ 21 , len
- 21 );
1308 bgp_store_error ( p
, conn
, BE_BGP_RX
, ( code
<< 16 ) | subcode
);
1311 if (( code
== 2 ) && (( subcode
== 4 ) || ( subcode
== 7 ))
1312 /* Error related to capability:
1313 * 4 - Peer does not support capabilities at all.
1314 * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1316 && ( p
-> cf
-> capabilities
== 2 )
1317 /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1318 && ( conn
-> start_state
== BSS_CONNECT
)
1319 /* Failed connection attempt have used capabilities */
1320 && ( p
-> cf
-> remote_as
<= 0xFFFF ))
1321 /* Not possible with disabled capabilities */
1323 /* We try connect without capabilities */
1324 log ( L_WARN
"%s: Capability related error received, retry with capabilities disabled" , p
-> p
. name
);
1325 p
-> start_state
= BSS_CONNECT_NOCAP
;
1330 bgp_conn_enter_close_state ( conn
);
1331 bgp_schedule_packet ( conn
, PKT_SCHEDULE_CLOSE
);
1335 bgp_update_startup_delay ( p
);
1341 bgp_rx_keepalive ( struct bgp_conn
* conn
)
1343 struct bgp_proto
* p
= conn
-> bgp
;
1345 BGP_TRACE ( D_PACKETS
, "Got KEEPALIVE" );
1346 bgp_start_timer ( conn
-> hold_timer
, conn
-> hold_time
);
1347 switch ( conn
-> state
)
1349 case BS_OPENCONFIRM
:
1350 bgp_conn_enter_established_state ( conn
);
1352 case BS_ESTABLISHED
:
1355 bgp_error ( conn
, 5 , fsm_err_subcode
[ conn
-> state
], NULL
, 0 );
1360 bgp_rx_route_refresh ( struct bgp_conn
* conn
, byte
* pkt
, int len
)
1362 struct bgp_proto
* p
= conn
-> bgp
;
1364 BGP_TRACE ( D_PACKETS
, "Got ROUTE-REFRESH" );
1366 if ( conn
-> state
!= BS_ESTABLISHED
)
1367 { bgp_error ( conn
, 5 , fsm_err_subcode
[ conn
-> state
], NULL
, 0 ); return ; }
1369 if (! p
-> cf
-> enable_refresh
)
1370 { bgp_error ( conn
, 1 , 3 , pkt
+ 18 , 1 ); return ; }
1372 if ( len
!= ( BGP_HEADER_LENGTH
+ 4 ))
1373 { bgp_error ( conn
, 1 , 2 , pkt
+ 16 , 2 ); return ; }
1375 /* FIXME - we ignore AFI/SAFI values, as we support
1376 just one value and even an error code for an invalid
1377 request is not defined */
1379 proto_request_feeding (& p
-> p
);
1384 * bgp_rx_packet - handle a received packet
1385 * @conn: BGP connection
1386 * @pkt: start of the packet
1389 * bgp_rx_packet() takes a newly received packet and calls the corresponding
1390 * packet handler according to the packet type.
1393 bgp_rx_packet ( struct bgp_conn
* conn
, byte
* pkt
, unsigned len
)
1395 byte type
= pkt
[ 18 ];
1397 DBG ( "BGP: Got packet %02x (%d bytes) \n " , type
, len
);
1399 if ( conn
-> bgp
-> p
. mrtdump
& MD_MESSAGES
)
1400 mrt_dump_bgp_packet ( conn
, pkt
, len
);
1404 case PKT_OPEN
: return bgp_rx_open ( conn
, pkt
, len
);
1405 case PKT_UPDATE
: return bgp_rx_update ( conn
, pkt
, len
);
1406 case PKT_NOTIFICATION
: return bgp_rx_notification ( conn
, pkt
, len
);
1407 case PKT_KEEPALIVE
: return bgp_rx_keepalive ( conn
);
1408 case PKT_ROUTE_REFRESH
: return bgp_rx_route_refresh ( conn
, pkt
, len
);
1409 default : bgp_error ( conn
, 1 , 3 , pkt
+ 18 , 1 );
1414 * bgp_rx - handle received data
1416 * @size: amount of data received
1418 * bgp_rx() is called by the socket layer whenever new data arrive from
1419 * the underlying TCP connection. It assembles the data fragments to packets,
1420 * checks their headers and framing and passes complete packets to
1424 bgp_rx ( sock
* sk
, int size
)
1426 struct bgp_conn
* conn
= sk
-> data
;
1427 byte
* pkt_start
= sk
-> rbuf
;
1428 byte
* end
= pkt_start
+ size
;
1431 DBG ( "BGP: RX hook: Got %d bytes \n " , size
);
1432 while ( end
>= pkt_start
+ BGP_HEADER_LENGTH
)
1434 if (( conn
-> state
== BS_CLOSE
) || ( conn
-> sk
!= sk
))
1437 if ( pkt_start
[ i
] != 0xff )
1439 bgp_error ( conn
, 1 , 1 , NULL
, 0 );
1442 len
= get_u16 ( pkt_start
+ 16 );
1443 if ( len
< BGP_HEADER_LENGTH
|| len
> BGP_MAX_PACKET_LENGTH
)
1445 bgp_error ( conn
, 1 , 2 , pkt_start
+ 16 , 2 );
1448 if ( end
< pkt_start
+ len
)
1450 bgp_rx_packet ( conn
, pkt_start
, len
);
1453 if ( pkt_start
!= sk
-> rbuf
)
1455 memmove ( sk
-> rbuf
, pkt_start
, end
- pkt_start
);
1456 sk
-> rpos
= sk
-> rbuf
+ ( end
- pkt_start
);